ifq.c revision 1.20
1/* $OpenBSD: ifq.c,v 1.20 2018/01/02 07:08:10 dlg Exp $ */ 2 3/* 4 * Copyright (c) 2015 David Gwynne <dlg@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19#include "bpfilter.h" 20 21#include <sys/param.h> 22#include <sys/systm.h> 23#include <sys/socket.h> 24#include <sys/mbuf.h> 25#include <sys/proc.h> 26 27#include <net/if.h> 28#include <net/if_var.h> 29 30#if NBPFILTER > 0 31#include <net/bpf.h> 32#endif 33 34/* 35 * priq glue 36 */ 37unsigned int priq_idx(unsigned int, const struct mbuf *); 38struct mbuf *priq_enq(struct ifqueue *, struct mbuf *); 39struct mbuf *priq_deq_begin(struct ifqueue *, void **); 40void priq_deq_commit(struct ifqueue *, struct mbuf *, void *); 41void priq_purge(struct ifqueue *, struct mbuf_list *); 42 43void *priq_alloc(unsigned int, void *); 44void priq_free(unsigned int, void *); 45 46const struct ifq_ops priq_ops = { 47 priq_idx, 48 priq_enq, 49 priq_deq_begin, 50 priq_deq_commit, 51 priq_purge, 52 priq_alloc, 53 priq_free, 54}; 55 56const struct ifq_ops * const ifq_priq_ops = &priq_ops; 57 58/* 59 * priq internal structures 60 */ 61 62struct priq { 63 struct mbuf_list pq_lists[IFQ_NQUEUES]; 64}; 65 66/* 67 * ifqueue serialiser 68 */ 69 70void ifq_start_task(void *); 71void ifq_restart_task(void *); 72void ifq_barrier_task(void *); 73void ifq_bundle_task(void *); 74 75#define TASK_ONQUEUE 0x1 76 77static inline void 78ifq_run_start(struct ifqueue *ifq) 79{ 80 ifq_serialize(ifq, &ifq->ifq_start); 81} 82 83void 84ifq_serialize(struct ifqueue *ifq, struct task *t) 85{ 86 struct task work; 87 88 if (ISSET(t->t_flags, TASK_ONQUEUE)) 89 return; 90 91 mtx_enter(&ifq->ifq_task_mtx); 92 if (!ISSET(t->t_flags, TASK_ONQUEUE)) { 93 SET(t->t_flags, TASK_ONQUEUE); 94 TAILQ_INSERT_TAIL(&ifq->ifq_task_list, t, t_entry); 95 } 96 97 if (ifq->ifq_serializer == NULL) { 98 ifq->ifq_serializer = curcpu(); 99 100 while ((t = TAILQ_FIRST(&ifq->ifq_task_list)) != NULL) { 101 TAILQ_REMOVE(&ifq->ifq_task_list, t, t_entry); 102 CLR(t->t_flags, TASK_ONQUEUE); 103 work = *t; /* copy to caller to avoid races */ 104 105 mtx_leave(&ifq->ifq_task_mtx); 106 107 (*work.t_func)(work.t_arg); 108 109 mtx_enter(&ifq->ifq_task_mtx); 110 } 111 112 ifq->ifq_serializer = NULL; 113 } 114 mtx_leave(&ifq->ifq_task_mtx); 115} 116 117int 118ifq_is_serialized(struct ifqueue *ifq) 119{ 120 return (ifq->ifq_serializer == curcpu()); 121} 122 123void 124ifq_start(struct ifqueue *ifq) 125{ 126 if (ifq_len(ifq) >= min(4, ifq->ifq_maxlen)) { 127 task_del(ifq->ifq_softnet, &ifq->ifq_bundle); 128 ifq_run_start(ifq); 129 } else 130 task_add(ifq->ifq_softnet, &ifq->ifq_bundle); 131} 132 133void 134ifq_start_task(void *p) 135{ 136 struct ifqueue *ifq = p; 137 struct ifnet *ifp = ifq->ifq_if; 138 139 if (!ISSET(ifp->if_flags, IFF_RUNNING) || 140 ifq_empty(ifq) || ifq_is_oactive(ifq)) 141 return; 142 143 ifp->if_qstart(ifq); 144} 145 146void 147ifq_restart_task(void *p) 148{ 149 struct ifqueue *ifq = p; 150 struct ifnet *ifp = ifq->ifq_if; 151 152 ifq_clr_oactive(ifq); 153 ifp->if_qstart(ifq); 154} 155 156void 157ifq_bundle_task(void *p) 158{ 159 struct ifqueue *ifq = p; 160 161 ifq_run_start(ifq); 162} 163 164void 165ifq_barrier(struct ifqueue *ifq) 166{ 167 struct cond c = COND_INITIALIZER(); 168 struct task t = TASK_INITIALIZER(ifq_barrier_task, &c); 169 170 if (!task_del(ifq->ifq_softnet, &ifq->ifq_bundle)) { 171 int netlocked = (rw_status(&netlock) == RW_WRITE); 172 173 if (netlocked) /* XXXSMP breaks atomicity */ 174 NET_UNLOCK(); 175 176 taskq_barrier(ifq->ifq_softnet); 177 178 if (netlocked) 179 NET_LOCK(); 180 } 181 182 if (ifq->ifq_serializer == NULL) 183 return; 184 185 ifq_serialize(ifq, &t); 186 187 cond_wait(&c, "ifqbar"); 188} 189 190void 191ifq_barrier_task(void *p) 192{ 193 struct cond *c = p; 194 195 cond_signal(c); 196} 197 198/* 199 * ifqueue mbuf queue API 200 */ 201 202void 203ifq_init(struct ifqueue *ifq, struct ifnet *ifp, unsigned int idx) 204{ 205 ifq->ifq_if = ifp; 206 ifq->ifq_softnet = net_tq(ifp->if_index); 207 ifq->ifq_softc = NULL; 208 209 mtx_init(&ifq->ifq_mtx, IPL_NET); 210 ifq->ifq_qdrops = 0; 211 212 /* default to priq */ 213 ifq->ifq_ops = &priq_ops; 214 ifq->ifq_q = priq_ops.ifqop_alloc(idx, NULL); 215 216 ml_init(&ifq->ifq_free); 217 ifq->ifq_len = 0; 218 219 ifq->ifq_packets = 0; 220 ifq->ifq_bytes = 0; 221 ifq->ifq_qdrops = 0; 222 ifq->ifq_errors = 0; 223 ifq->ifq_mcasts = 0; 224 225 mtx_init(&ifq->ifq_task_mtx, IPL_NET); 226 TAILQ_INIT(&ifq->ifq_task_list); 227 ifq->ifq_serializer = NULL; 228 task_set(&ifq->ifq_bundle, ifq_bundle_task, ifq); 229 230 task_set(&ifq->ifq_start, ifq_start_task, ifq); 231 task_set(&ifq->ifq_restart, ifq_restart_task, ifq); 232 233 if (ifq->ifq_maxlen == 0) 234 ifq_set_maxlen(ifq, IFQ_MAXLEN); 235 236 ifq->ifq_idx = idx; 237} 238 239void 240ifq_attach(struct ifqueue *ifq, const struct ifq_ops *newops, void *opsarg) 241{ 242 struct mbuf_list ml = MBUF_LIST_INITIALIZER(); 243 struct mbuf_list free_ml = MBUF_LIST_INITIALIZER(); 244 struct mbuf *m; 245 const struct ifq_ops *oldops; 246 void *newq, *oldq; 247 248 newq = newops->ifqop_alloc(ifq->ifq_idx, opsarg); 249 250 mtx_enter(&ifq->ifq_mtx); 251 ifq->ifq_ops->ifqop_purge(ifq, &ml); 252 ifq->ifq_len = 0; 253 254 oldops = ifq->ifq_ops; 255 oldq = ifq->ifq_q; 256 257 ifq->ifq_ops = newops; 258 ifq->ifq_q = newq; 259 260 while ((m = ml_dequeue(&ml)) != NULL) { 261 m = ifq->ifq_ops->ifqop_enq(ifq, m); 262 if (m != NULL) { 263 ifq->ifq_qdrops++; 264 ml_enqueue(&free_ml, m); 265 } else 266 ifq->ifq_len++; 267 } 268 mtx_leave(&ifq->ifq_mtx); 269 270 oldops->ifqop_free(ifq->ifq_idx, oldq); 271 272 ml_purge(&free_ml); 273} 274 275void 276ifq_destroy(struct ifqueue *ifq) 277{ 278 struct mbuf_list ml = MBUF_LIST_INITIALIZER(); 279 280 ifq_barrier(ifq); /* ensure nothing is running with the ifq */ 281 282 /* don't need to lock because this is the last use of the ifq */ 283 284 ifq->ifq_ops->ifqop_purge(ifq, &ml); 285 ifq->ifq_ops->ifqop_free(ifq->ifq_idx, ifq->ifq_q); 286 287 ml_purge(&ml); 288} 289 290void 291ifq_add_data(struct ifqueue *ifq, struct if_data *data) 292{ 293 mtx_enter(&ifq->ifq_mtx); 294 data->ifi_opackets += ifq->ifq_packets; 295 data->ifi_obytes += ifq->ifq_bytes; 296 data->ifi_oqdrops += ifq->ifq_qdrops; 297 data->ifi_omcasts += ifq->ifq_mcasts; 298 /* ifp->if_data.ifi_oerrors */ 299 mtx_leave(&ifq->ifq_mtx); 300} 301 302int 303ifq_enqueue(struct ifqueue *ifq, struct mbuf *m) 304{ 305 struct mbuf *dm; 306 307 mtx_enter(&ifq->ifq_mtx); 308 dm = ifq->ifq_ops->ifqop_enq(ifq, m); 309 if (dm != m) { 310 ifq->ifq_packets++; 311 ifq->ifq_bytes += m->m_pkthdr.len; 312 if (ISSET(m->m_flags, M_MCAST)) 313 ifq->ifq_mcasts++; 314 } 315 316 if (dm == NULL) 317 ifq->ifq_len++; 318 else 319 ifq->ifq_qdrops++; 320 mtx_leave(&ifq->ifq_mtx); 321 322 if (dm != NULL) 323 m_freem(dm); 324 325 return (dm == m ? ENOBUFS : 0); 326} 327 328static inline void 329ifq_deq_enter(struct ifqueue *ifq) 330{ 331 mtx_enter(&ifq->ifq_mtx); 332} 333 334static inline void 335ifq_deq_leave(struct ifqueue *ifq) 336{ 337 struct mbuf_list ml; 338 339 ml = ifq->ifq_free; 340 ml_init(&ifq->ifq_free); 341 342 mtx_leave(&ifq->ifq_mtx); 343 344 if (!ml_empty(&ml)) 345 ml_purge(&ml); 346} 347 348struct mbuf * 349ifq_deq_begin(struct ifqueue *ifq) 350{ 351 struct mbuf *m = NULL; 352 void *cookie; 353 354 ifq_deq_enter(ifq); 355 if (ifq->ifq_len == 0 || 356 (m = ifq->ifq_ops->ifqop_deq_begin(ifq, &cookie)) == NULL) { 357 ifq_deq_leave(ifq); 358 return (NULL); 359 } 360 361 m->m_pkthdr.ph_cookie = cookie; 362 363 return (m); 364} 365 366void 367ifq_deq_commit(struct ifqueue *ifq, struct mbuf *m) 368{ 369 void *cookie; 370 371 KASSERT(m != NULL); 372 cookie = m->m_pkthdr.ph_cookie; 373 374 ifq->ifq_ops->ifqop_deq_commit(ifq, m, cookie); 375 ifq->ifq_len--; 376 ifq_deq_leave(ifq); 377} 378 379void 380ifq_deq_rollback(struct ifqueue *ifq, struct mbuf *m) 381{ 382 KASSERT(m != NULL); 383 384 ifq_deq_leave(ifq); 385} 386 387struct mbuf * 388ifq_dequeue(struct ifqueue *ifq) 389{ 390 struct mbuf *m; 391 392 m = ifq_deq_begin(ifq); 393 if (m == NULL) 394 return (NULL); 395 396 ifq_deq_commit(ifq, m); 397 398 return (m); 399} 400 401unsigned int 402ifq_purge(struct ifqueue *ifq) 403{ 404 struct mbuf_list ml = MBUF_LIST_INITIALIZER(); 405 unsigned int rv; 406 407 mtx_enter(&ifq->ifq_mtx); 408 ifq->ifq_ops->ifqop_purge(ifq, &ml); 409 rv = ifq->ifq_len; 410 ifq->ifq_len = 0; 411 ifq->ifq_qdrops += rv; 412 mtx_leave(&ifq->ifq_mtx); 413 414 KASSERT(rv == ml_len(&ml)); 415 416 ml_purge(&ml); 417 418 return (rv); 419} 420 421void * 422ifq_q_enter(struct ifqueue *ifq, const struct ifq_ops *ops) 423{ 424 mtx_enter(&ifq->ifq_mtx); 425 if (ifq->ifq_ops == ops) 426 return (ifq->ifq_q); 427 428 mtx_leave(&ifq->ifq_mtx); 429 430 return (NULL); 431} 432 433void 434ifq_q_leave(struct ifqueue *ifq, void *q) 435{ 436 KASSERT(q == ifq->ifq_q); 437 mtx_leave(&ifq->ifq_mtx); 438} 439 440void 441ifq_mfreem(struct ifqueue *ifq, struct mbuf *m) 442{ 443 MUTEX_ASSERT_LOCKED(&ifq->ifq_mtx); 444 445 ifq->ifq_len--; 446 ifq->ifq_qdrops++; 447 ml_enqueue(&ifq->ifq_free, m); 448} 449 450void 451ifq_mfreeml(struct ifqueue *ifq, struct mbuf_list *ml) 452{ 453 MUTEX_ASSERT_LOCKED(&ifq->ifq_mtx); 454 455 ifq->ifq_len -= ml_len(ml); 456 ifq->ifq_qdrops += ml_len(ml); 457 ml_enlist(&ifq->ifq_free, ml); 458} 459 460/* 461 * ifiq 462 */ 463 464static void ifiq_process(void *); 465 466void 467ifiq_init(struct ifiqueue *ifiq, struct ifnet *ifp, unsigned int idx) 468{ 469 ifiq->ifiq_if = ifp; 470 ifiq->ifiq_softnet = net_tq(ifp->if_index); /* + idx */ 471 ifiq->ifiq_softc = NULL; 472 473 mtx_init(&ifiq->ifiq_mtx, IPL_NET); 474 ml_init(&ifiq->ifiq_ml); 475 task_set(&ifiq->ifiq_task, ifiq_process, ifiq); 476 477 ifiq->ifiq_qdrops = 0; 478 ifiq->ifiq_packets = 0; 479 ifiq->ifiq_bytes = 0; 480 ifiq->ifiq_qdrops = 0; 481 ifiq->ifiq_errors = 0; 482 483 ifiq->ifiq_idx = idx; 484} 485 486void 487ifiq_destroy(struct ifiqueue *ifiq) 488{ 489 if (!task_del(ifiq->ifiq_softnet, &ifiq->ifiq_task)) { 490 int netlocked = (rw_status(&netlock) == RW_WRITE); 491 492 if (netlocked) /* XXXSMP breaks atomicity */ 493 NET_UNLOCK(); 494 495 taskq_barrier(ifiq->ifiq_softnet); 496 497 if (netlocked) 498 NET_LOCK(); 499 } 500 501 /* don't need to lock because this is the last use of the ifiq */ 502 ml_purge(&ifiq->ifiq_ml); 503} 504 505int 506ifiq_input(struct ifiqueue *ifiq, struct mbuf_list *ml, unsigned int cwm) 507{ 508 struct ifnet *ifp = ifiq->ifiq_if; 509 struct mbuf *m; 510 uint64_t packets; 511 uint64_t bytes = 0; 512#if NBPFILTER > 0 513 caddr_t if_bpf; 514#endif 515 int rv = 1; 516 517 if (ml_empty(ml)) 518 return (0); 519 520 MBUF_LIST_FOREACH(ml, m) { 521 m->m_pkthdr.ph_ifidx = ifp->if_index; 522 m->m_pkthdr.ph_rtableid = ifp->if_rdomain; 523 bytes += m->m_pkthdr.len; 524 } 525 packets = ml_len(ml); 526 527#if NBPFILTER > 0 528 if_bpf = ifp->if_bpf; 529 if (if_bpf) { 530 struct mbuf_list ml0 = *ml; 531 532 ml_init(ml); 533 534 while ((m = ml_dequeue(&ml0)) != NULL) { 535 if (bpf_mtap_ether(if_bpf, m, BPF_DIRECTION_IN)) 536 m_freem(m); 537 else 538 ml_enqueue(ml, m); 539 } 540 541 if (ml_empty(ml)) { 542 mtx_enter(&ifiq->ifiq_mtx); 543 ifiq->ifiq_packets += packets; 544 ifiq->ifiq_bytes += bytes; 545 mtx_leave(&ifiq->ifiq_mtx); 546 547 return (0); 548 } 549 } 550#endif 551 552 mtx_enter(&ifiq->ifiq_mtx); 553 ifiq->ifiq_packets += packets; 554 ifiq->ifiq_bytes += bytes; 555 556 if (ifiq_len(ifiq) >= cwm * 5) 557 ifiq->ifiq_qdrops += ml_len(ml); 558 else { 559 rv = (ifiq_len(ifiq) >= cwm * 3); 560 ml_enlist(&ifiq->ifiq_ml, ml); 561 } 562 mtx_leave(&ifiq->ifiq_mtx); 563 564 if (ml_empty(ml)) 565 task_add(ifiq->ifiq_softnet, &ifiq->ifiq_task); 566 else 567 ml_purge(ml); 568 569 return (rv); 570} 571 572void 573ifiq_add_data(struct ifiqueue *ifiq, struct if_data *data) 574{ 575 mtx_enter(&ifiq->ifiq_mtx); 576 data->ifi_ipackets += ifiq->ifiq_packets; 577 data->ifi_ibytes += ifiq->ifiq_bytes; 578 data->ifi_iqdrops += ifiq->ifiq_qdrops; 579 mtx_leave(&ifiq->ifiq_mtx); 580} 581 582void 583ifiq_barrier(struct ifiqueue *ifiq) 584{ 585 if (!task_del(ifiq->ifiq_softnet, &ifiq->ifiq_task)) 586 taskq_barrier(ifiq->ifiq_softnet); 587} 588 589int 590ifiq_enqueue(struct ifiqueue *ifiq, struct mbuf *m) 591{ 592 mtx_enter(&ifiq->ifiq_mtx); 593 ml_enqueue(&ifiq->ifiq_ml, m); 594 mtx_leave(&ifiq->ifiq_mtx); 595 596 task_add(ifiq->ifiq_softnet, &ifiq->ifiq_task); 597 598 return (0); 599} 600 601static void 602ifiq_process(void *arg) 603{ 604 struct ifiqueue *ifiq = arg; 605 struct mbuf_list ml; 606 607 if (ifiq_empty(ifiq)) 608 return; 609 610 mtx_enter(&ifiq->ifiq_mtx); 611 ml = ifiq->ifiq_ml; 612 ml_init(&ifiq->ifiq_ml); 613 mtx_leave(&ifiq->ifiq_mtx); 614 615 if_input_process(ifiq->ifiq_if, &ml); 616} 617 618/* 619 * priq implementation 620 */ 621 622unsigned int 623priq_idx(unsigned int nqueues, const struct mbuf *m) 624{ 625 unsigned int flow = 0; 626 627 if (ISSET(m->m_pkthdr.ph_flowid, M_FLOWID_VALID)) 628 flow = m->m_pkthdr.ph_flowid & M_FLOWID_MASK; 629 630 return (flow % nqueues); 631} 632 633void * 634priq_alloc(unsigned int idx, void *null) 635{ 636 struct priq *pq; 637 int i; 638 639 pq = malloc(sizeof(struct priq), M_DEVBUF, M_WAITOK); 640 for (i = 0; i < IFQ_NQUEUES; i++) 641 ml_init(&pq->pq_lists[i]); 642 return (pq); 643} 644 645void 646priq_free(unsigned int idx, void *pq) 647{ 648 free(pq, M_DEVBUF, sizeof(struct priq)); 649} 650 651struct mbuf * 652priq_enq(struct ifqueue *ifq, struct mbuf *m) 653{ 654 struct priq *pq; 655 struct mbuf_list *pl; 656 struct mbuf *n = NULL; 657 unsigned int prio; 658 659 pq = ifq->ifq_q; 660 KASSERT(m->m_pkthdr.pf.prio <= IFQ_MAXPRIO); 661 662 /* Find a lower priority queue to drop from */ 663 if (ifq_len(ifq) >= ifq->ifq_maxlen) { 664 for (prio = 0; prio < m->m_pkthdr.pf.prio; prio++) { 665 pl = &pq->pq_lists[prio]; 666 if (ml_len(pl) > 0) { 667 n = ml_dequeue(pl); 668 goto enqueue; 669 } 670 } 671 /* 672 * There's no lower priority queue that we can 673 * drop from so don't enqueue this one. 674 */ 675 return (m); 676 } 677 678 enqueue: 679 pl = &pq->pq_lists[m->m_pkthdr.pf.prio]; 680 ml_enqueue(pl, m); 681 682 return (n); 683} 684 685struct mbuf * 686priq_deq_begin(struct ifqueue *ifq, void **cookiep) 687{ 688 struct priq *pq = ifq->ifq_q; 689 struct mbuf_list *pl; 690 unsigned int prio = nitems(pq->pq_lists); 691 struct mbuf *m; 692 693 do { 694 pl = &pq->pq_lists[--prio]; 695 m = MBUF_LIST_FIRST(pl); 696 if (m != NULL) { 697 *cookiep = pl; 698 return (m); 699 } 700 } while (prio > 0); 701 702 return (NULL); 703} 704 705void 706priq_deq_commit(struct ifqueue *ifq, struct mbuf *m, void *cookie) 707{ 708 struct mbuf_list *pl = cookie; 709 710 KASSERT(MBUF_LIST_FIRST(pl) == m); 711 712 ml_dequeue(pl); 713} 714 715void 716priq_purge(struct ifqueue *ifq, struct mbuf_list *ml) 717{ 718 struct priq *pq = ifq->ifq_q; 719 struct mbuf_list *pl; 720 unsigned int prio = nitems(pq->pq_lists); 721 722 do { 723 pl = &pq->pq_lists[--prio]; 724 ml_enlist(ml, pl); 725 } while (prio > 0); 726} 727