ifq.c revision 1.22
1/* $OpenBSD: ifq.c,v 1.22 2018/01/25 14:04:36 mpi Exp $ */ 2 3/* 4 * Copyright (c) 2015 David Gwynne <dlg@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19#include "bpfilter.h" 20 21#include <sys/param.h> 22#include <sys/systm.h> 23#include <sys/socket.h> 24#include <sys/mbuf.h> 25#include <sys/proc.h> 26 27#include <net/if.h> 28#include <net/if_var.h> 29 30#if NBPFILTER > 0 31#include <net/bpf.h> 32#endif 33 34/* 35 * priq glue 36 */ 37unsigned int priq_idx(unsigned int, const struct mbuf *); 38struct mbuf *priq_enq(struct ifqueue *, struct mbuf *); 39struct mbuf *priq_deq_begin(struct ifqueue *, void **); 40void priq_deq_commit(struct ifqueue *, struct mbuf *, void *); 41void priq_purge(struct ifqueue *, struct mbuf_list *); 42 43void *priq_alloc(unsigned int, void *); 44void priq_free(unsigned int, void *); 45 46const struct ifq_ops priq_ops = { 47 priq_idx, 48 priq_enq, 49 priq_deq_begin, 50 priq_deq_commit, 51 priq_purge, 52 priq_alloc, 53 priq_free, 54}; 55 56const struct ifq_ops * const ifq_priq_ops = &priq_ops; 57 58/* 59 * priq internal structures 60 */ 61 62struct priq { 63 struct mbuf_list pq_lists[IFQ_NQUEUES]; 64}; 65 66/* 67 * ifqueue serialiser 68 */ 69 70void ifq_start_task(void *); 71void ifq_restart_task(void *); 72void ifq_barrier_task(void *); 73 74#define TASK_ONQUEUE 0x1 75 76void 77ifq_serialize(struct ifqueue *ifq, struct task *t) 78{ 79 struct task work; 80 81 if (ISSET(t->t_flags, TASK_ONQUEUE)) 82 return; 83 84 mtx_enter(&ifq->ifq_task_mtx); 85 if (!ISSET(t->t_flags, TASK_ONQUEUE)) { 86 SET(t->t_flags, TASK_ONQUEUE); 87 TAILQ_INSERT_TAIL(&ifq->ifq_task_list, t, t_entry); 88 } 89 90 if (ifq->ifq_serializer == NULL) { 91 ifq->ifq_serializer = curcpu(); 92 93 while ((t = TAILQ_FIRST(&ifq->ifq_task_list)) != NULL) { 94 TAILQ_REMOVE(&ifq->ifq_task_list, t, t_entry); 95 CLR(t->t_flags, TASK_ONQUEUE); 96 work = *t; /* copy to caller to avoid races */ 97 98 mtx_leave(&ifq->ifq_task_mtx); 99 100 (*work.t_func)(work.t_arg); 101 102 mtx_enter(&ifq->ifq_task_mtx); 103 } 104 105 ifq->ifq_serializer = NULL; 106 } 107 mtx_leave(&ifq->ifq_task_mtx); 108} 109 110int 111ifq_is_serialized(struct ifqueue *ifq) 112{ 113 return (ifq->ifq_serializer == curcpu()); 114} 115 116void 117ifq_start_task(void *p) 118{ 119 struct ifqueue *ifq = p; 120 struct ifnet *ifp = ifq->ifq_if; 121 122 if (!ISSET(ifp->if_flags, IFF_RUNNING) || 123 ifq_empty(ifq) || ifq_is_oactive(ifq)) 124 return; 125 126 ifp->if_qstart(ifq); 127} 128 129void 130ifq_restart_task(void *p) 131{ 132 struct ifqueue *ifq = p; 133 struct ifnet *ifp = ifq->ifq_if; 134 135 ifq_clr_oactive(ifq); 136 ifp->if_qstart(ifq); 137} 138 139void 140ifq_barrier(struct ifqueue *ifq) 141{ 142 struct cond c = COND_INITIALIZER(); 143 struct task t = TASK_INITIALIZER(ifq_barrier_task, &c); 144 145 if (ifq->ifq_serializer == NULL) 146 return; 147 148 ifq_serialize(ifq, &t); 149 150 cond_wait(&c, "ifqbar"); 151} 152 153void 154ifq_barrier_task(void *p) 155{ 156 struct cond *c = p; 157 158 cond_signal(c); 159} 160 161/* 162 * ifqueue mbuf queue API 163 */ 164 165void 166ifq_init(struct ifqueue *ifq, struct ifnet *ifp, unsigned int idx) 167{ 168 ifq->ifq_if = ifp; 169 ifq->ifq_softc = NULL; 170 171 mtx_init(&ifq->ifq_mtx, IPL_NET); 172 ifq->ifq_qdrops = 0; 173 174 /* default to priq */ 175 ifq->ifq_ops = &priq_ops; 176 ifq->ifq_q = priq_ops.ifqop_alloc(idx, NULL); 177 178 ml_init(&ifq->ifq_free); 179 ifq->ifq_len = 0; 180 181 ifq->ifq_packets = 0; 182 ifq->ifq_bytes = 0; 183 ifq->ifq_qdrops = 0; 184 ifq->ifq_errors = 0; 185 ifq->ifq_mcasts = 0; 186 187 mtx_init(&ifq->ifq_task_mtx, IPL_NET); 188 TAILQ_INIT(&ifq->ifq_task_list); 189 ifq->ifq_serializer = NULL; 190 191 task_set(&ifq->ifq_start, ifq_start_task, ifq); 192 task_set(&ifq->ifq_restart, ifq_restart_task, ifq); 193 194 if (ifq->ifq_maxlen == 0) 195 ifq_set_maxlen(ifq, IFQ_MAXLEN); 196 197 ifq->ifq_idx = idx; 198} 199 200void 201ifq_attach(struct ifqueue *ifq, const struct ifq_ops *newops, void *opsarg) 202{ 203 struct mbuf_list ml = MBUF_LIST_INITIALIZER(); 204 struct mbuf_list free_ml = MBUF_LIST_INITIALIZER(); 205 struct mbuf *m; 206 const struct ifq_ops *oldops; 207 void *newq, *oldq; 208 209 newq = newops->ifqop_alloc(ifq->ifq_idx, opsarg); 210 211 mtx_enter(&ifq->ifq_mtx); 212 ifq->ifq_ops->ifqop_purge(ifq, &ml); 213 ifq->ifq_len = 0; 214 215 oldops = ifq->ifq_ops; 216 oldq = ifq->ifq_q; 217 218 ifq->ifq_ops = newops; 219 ifq->ifq_q = newq; 220 221 while ((m = ml_dequeue(&ml)) != NULL) { 222 m = ifq->ifq_ops->ifqop_enq(ifq, m); 223 if (m != NULL) { 224 ifq->ifq_qdrops++; 225 ml_enqueue(&free_ml, m); 226 } else 227 ifq->ifq_len++; 228 } 229 mtx_leave(&ifq->ifq_mtx); 230 231 oldops->ifqop_free(ifq->ifq_idx, oldq); 232 233 ml_purge(&free_ml); 234} 235 236void 237ifq_destroy(struct ifqueue *ifq) 238{ 239 struct mbuf_list ml = MBUF_LIST_INITIALIZER(); 240 241 /* don't need to lock because this is the last use of the ifq */ 242 243 ifq->ifq_ops->ifqop_purge(ifq, &ml); 244 ifq->ifq_ops->ifqop_free(ifq->ifq_idx, ifq->ifq_q); 245 246 ml_purge(&ml); 247} 248 249void 250ifq_add_data(struct ifqueue *ifq, struct if_data *data) 251{ 252 mtx_enter(&ifq->ifq_mtx); 253 data->ifi_opackets += ifq->ifq_packets; 254 data->ifi_obytes += ifq->ifq_bytes; 255 data->ifi_oqdrops += ifq->ifq_qdrops; 256 data->ifi_omcasts += ifq->ifq_mcasts; 257 /* ifp->if_data.ifi_oerrors */ 258 mtx_leave(&ifq->ifq_mtx); 259} 260 261int 262ifq_enqueue(struct ifqueue *ifq, struct mbuf *m) 263{ 264 struct mbuf *dm; 265 266 mtx_enter(&ifq->ifq_mtx); 267 dm = ifq->ifq_ops->ifqop_enq(ifq, m); 268 if (dm != m) { 269 ifq->ifq_packets++; 270 ifq->ifq_bytes += m->m_pkthdr.len; 271 if (ISSET(m->m_flags, M_MCAST)) 272 ifq->ifq_mcasts++; 273 } 274 275 if (dm == NULL) 276 ifq->ifq_len++; 277 else 278 ifq->ifq_qdrops++; 279 mtx_leave(&ifq->ifq_mtx); 280 281 if (dm != NULL) 282 m_freem(dm); 283 284 return (dm == m ? ENOBUFS : 0); 285} 286 287static inline void 288ifq_deq_enter(struct ifqueue *ifq) 289{ 290 mtx_enter(&ifq->ifq_mtx); 291} 292 293static inline void 294ifq_deq_leave(struct ifqueue *ifq) 295{ 296 struct mbuf_list ml; 297 298 ml = ifq->ifq_free; 299 ml_init(&ifq->ifq_free); 300 301 mtx_leave(&ifq->ifq_mtx); 302 303 if (!ml_empty(&ml)) 304 ml_purge(&ml); 305} 306 307struct mbuf * 308ifq_deq_begin(struct ifqueue *ifq) 309{ 310 struct mbuf *m = NULL; 311 void *cookie; 312 313 ifq_deq_enter(ifq); 314 if (ifq->ifq_len == 0 || 315 (m = ifq->ifq_ops->ifqop_deq_begin(ifq, &cookie)) == NULL) { 316 ifq_deq_leave(ifq); 317 return (NULL); 318 } 319 320 m->m_pkthdr.ph_cookie = cookie; 321 322 return (m); 323} 324 325void 326ifq_deq_commit(struct ifqueue *ifq, struct mbuf *m) 327{ 328 void *cookie; 329 330 KASSERT(m != NULL); 331 cookie = m->m_pkthdr.ph_cookie; 332 333 ifq->ifq_ops->ifqop_deq_commit(ifq, m, cookie); 334 ifq->ifq_len--; 335 ifq_deq_leave(ifq); 336} 337 338void 339ifq_deq_rollback(struct ifqueue *ifq, struct mbuf *m) 340{ 341 KASSERT(m != NULL); 342 343 ifq_deq_leave(ifq); 344} 345 346struct mbuf * 347ifq_dequeue(struct ifqueue *ifq) 348{ 349 struct mbuf *m; 350 351 m = ifq_deq_begin(ifq); 352 if (m == NULL) 353 return (NULL); 354 355 ifq_deq_commit(ifq, m); 356 357 return (m); 358} 359 360unsigned int 361ifq_purge(struct ifqueue *ifq) 362{ 363 struct mbuf_list ml = MBUF_LIST_INITIALIZER(); 364 unsigned int rv; 365 366 mtx_enter(&ifq->ifq_mtx); 367 ifq->ifq_ops->ifqop_purge(ifq, &ml); 368 rv = ifq->ifq_len; 369 ifq->ifq_len = 0; 370 ifq->ifq_qdrops += rv; 371 mtx_leave(&ifq->ifq_mtx); 372 373 KASSERT(rv == ml_len(&ml)); 374 375 ml_purge(&ml); 376 377 return (rv); 378} 379 380void * 381ifq_q_enter(struct ifqueue *ifq, const struct ifq_ops *ops) 382{ 383 mtx_enter(&ifq->ifq_mtx); 384 if (ifq->ifq_ops == ops) 385 return (ifq->ifq_q); 386 387 mtx_leave(&ifq->ifq_mtx); 388 389 return (NULL); 390} 391 392void 393ifq_q_leave(struct ifqueue *ifq, void *q) 394{ 395 KASSERT(q == ifq->ifq_q); 396 mtx_leave(&ifq->ifq_mtx); 397} 398 399void 400ifq_mfreem(struct ifqueue *ifq, struct mbuf *m) 401{ 402 MUTEX_ASSERT_LOCKED(&ifq->ifq_mtx); 403 404 ifq->ifq_len--; 405 ifq->ifq_qdrops++; 406 ml_enqueue(&ifq->ifq_free, m); 407} 408 409void 410ifq_mfreeml(struct ifqueue *ifq, struct mbuf_list *ml) 411{ 412 MUTEX_ASSERT_LOCKED(&ifq->ifq_mtx); 413 414 ifq->ifq_len -= ml_len(ml); 415 ifq->ifq_qdrops += ml_len(ml); 416 ml_enlist(&ifq->ifq_free, ml); 417} 418 419/* 420 * ifiq 421 */ 422 423static void ifiq_process(void *); 424 425void 426ifiq_init(struct ifiqueue *ifiq, struct ifnet *ifp, unsigned int idx) 427{ 428 ifiq->ifiq_if = ifp; 429 ifiq->ifiq_softnet = net_tq(ifp->if_index); /* + idx */ 430 ifiq->ifiq_softc = NULL; 431 432 mtx_init(&ifiq->ifiq_mtx, IPL_NET); 433 ml_init(&ifiq->ifiq_ml); 434 task_set(&ifiq->ifiq_task, ifiq_process, ifiq); 435 436 ifiq->ifiq_qdrops = 0; 437 ifiq->ifiq_packets = 0; 438 ifiq->ifiq_bytes = 0; 439 ifiq->ifiq_qdrops = 0; 440 ifiq->ifiq_errors = 0; 441 442 ifiq->ifiq_idx = idx; 443} 444 445void 446ifiq_destroy(struct ifiqueue *ifiq) 447{ 448 if (!task_del(ifiq->ifiq_softnet, &ifiq->ifiq_task)) { 449 NET_ASSERT_UNLOCKED(); 450 taskq_barrier(ifiq->ifiq_softnet); 451 } 452 453 /* don't need to lock because this is the last use of the ifiq */ 454 ml_purge(&ifiq->ifiq_ml); 455} 456 457int 458ifiq_input(struct ifiqueue *ifiq, struct mbuf_list *ml, unsigned int cwm) 459{ 460 struct ifnet *ifp = ifiq->ifiq_if; 461 struct mbuf *m; 462 uint64_t packets; 463 uint64_t bytes = 0; 464#if NBPFILTER > 0 465 caddr_t if_bpf; 466#endif 467 int rv = 1; 468 469 if (ml_empty(ml)) 470 return (0); 471 472 MBUF_LIST_FOREACH(ml, m) { 473 m->m_pkthdr.ph_ifidx = ifp->if_index; 474 m->m_pkthdr.ph_rtableid = ifp->if_rdomain; 475 bytes += m->m_pkthdr.len; 476 } 477 packets = ml_len(ml); 478 479#if NBPFILTER > 0 480 if_bpf = ifp->if_bpf; 481 if (if_bpf) { 482 struct mbuf_list ml0 = *ml; 483 484 ml_init(ml); 485 486 while ((m = ml_dequeue(&ml0)) != NULL) { 487 if (bpf_mtap_ether(if_bpf, m, BPF_DIRECTION_IN)) 488 m_freem(m); 489 else 490 ml_enqueue(ml, m); 491 } 492 493 if (ml_empty(ml)) { 494 mtx_enter(&ifiq->ifiq_mtx); 495 ifiq->ifiq_packets += packets; 496 ifiq->ifiq_bytes += bytes; 497 mtx_leave(&ifiq->ifiq_mtx); 498 499 return (0); 500 } 501 } 502#endif 503 504 mtx_enter(&ifiq->ifiq_mtx); 505 ifiq->ifiq_packets += packets; 506 ifiq->ifiq_bytes += bytes; 507 508 if (ifiq_len(ifiq) >= cwm * 5) 509 ifiq->ifiq_qdrops += ml_len(ml); 510 else { 511 rv = (ifiq_len(ifiq) >= cwm * 3); 512 ml_enlist(&ifiq->ifiq_ml, ml); 513 } 514 mtx_leave(&ifiq->ifiq_mtx); 515 516 if (ml_empty(ml)) 517 task_add(ifiq->ifiq_softnet, &ifiq->ifiq_task); 518 else 519 ml_purge(ml); 520 521 return (rv); 522} 523 524void 525ifiq_add_data(struct ifiqueue *ifiq, struct if_data *data) 526{ 527 mtx_enter(&ifiq->ifiq_mtx); 528 data->ifi_ipackets += ifiq->ifiq_packets; 529 data->ifi_ibytes += ifiq->ifiq_bytes; 530 data->ifi_iqdrops += ifiq->ifiq_qdrops; 531 mtx_leave(&ifiq->ifiq_mtx); 532} 533 534void 535ifiq_barrier(struct ifiqueue *ifiq) 536{ 537 if (!task_del(ifiq->ifiq_softnet, &ifiq->ifiq_task)) 538 taskq_barrier(ifiq->ifiq_softnet); 539} 540 541int 542ifiq_enqueue(struct ifiqueue *ifiq, struct mbuf *m) 543{ 544 mtx_enter(&ifiq->ifiq_mtx); 545 ml_enqueue(&ifiq->ifiq_ml, m); 546 mtx_leave(&ifiq->ifiq_mtx); 547 548 task_add(ifiq->ifiq_softnet, &ifiq->ifiq_task); 549 550 return (0); 551} 552 553static void 554ifiq_process(void *arg) 555{ 556 struct ifiqueue *ifiq = arg; 557 struct mbuf_list ml; 558 559 if (ifiq_empty(ifiq)) 560 return; 561 562 mtx_enter(&ifiq->ifiq_mtx); 563 ml = ifiq->ifiq_ml; 564 ml_init(&ifiq->ifiq_ml); 565 mtx_leave(&ifiq->ifiq_mtx); 566 567 if_input_process(ifiq->ifiq_if, &ml); 568} 569 570/* 571 * priq implementation 572 */ 573 574unsigned int 575priq_idx(unsigned int nqueues, const struct mbuf *m) 576{ 577 unsigned int flow = 0; 578 579 if (ISSET(m->m_pkthdr.ph_flowid, M_FLOWID_VALID)) 580 flow = m->m_pkthdr.ph_flowid & M_FLOWID_MASK; 581 582 return (flow % nqueues); 583} 584 585void * 586priq_alloc(unsigned int idx, void *null) 587{ 588 struct priq *pq; 589 int i; 590 591 pq = malloc(sizeof(struct priq), M_DEVBUF, M_WAITOK); 592 for (i = 0; i < IFQ_NQUEUES; i++) 593 ml_init(&pq->pq_lists[i]); 594 return (pq); 595} 596 597void 598priq_free(unsigned int idx, void *pq) 599{ 600 free(pq, M_DEVBUF, sizeof(struct priq)); 601} 602 603struct mbuf * 604priq_enq(struct ifqueue *ifq, struct mbuf *m) 605{ 606 struct priq *pq; 607 struct mbuf_list *pl; 608 struct mbuf *n = NULL; 609 unsigned int prio; 610 611 pq = ifq->ifq_q; 612 KASSERT(m->m_pkthdr.pf.prio <= IFQ_MAXPRIO); 613 614 /* Find a lower priority queue to drop from */ 615 if (ifq_len(ifq) >= ifq->ifq_maxlen) { 616 for (prio = 0; prio < m->m_pkthdr.pf.prio; prio++) { 617 pl = &pq->pq_lists[prio]; 618 if (ml_len(pl) > 0) { 619 n = ml_dequeue(pl); 620 goto enqueue; 621 } 622 } 623 /* 624 * There's no lower priority queue that we can 625 * drop from so don't enqueue this one. 626 */ 627 return (m); 628 } 629 630 enqueue: 631 pl = &pq->pq_lists[m->m_pkthdr.pf.prio]; 632 ml_enqueue(pl, m); 633 634 return (n); 635} 636 637struct mbuf * 638priq_deq_begin(struct ifqueue *ifq, void **cookiep) 639{ 640 struct priq *pq = ifq->ifq_q; 641 struct mbuf_list *pl; 642 unsigned int prio = nitems(pq->pq_lists); 643 struct mbuf *m; 644 645 do { 646 pl = &pq->pq_lists[--prio]; 647 m = MBUF_LIST_FIRST(pl); 648 if (m != NULL) { 649 *cookiep = pl; 650 return (m); 651 } 652 } while (prio > 0); 653 654 return (NULL); 655} 656 657void 658priq_deq_commit(struct ifqueue *ifq, struct mbuf *m, void *cookie) 659{ 660 struct mbuf_list *pl = cookie; 661 662 KASSERT(MBUF_LIST_FIRST(pl) == m); 663 664 ml_dequeue(pl); 665} 666 667void 668priq_purge(struct ifqueue *ifq, struct mbuf_list *ml) 669{ 670 struct priq *pq = ifq->ifq_q; 671 struct mbuf_list *pl; 672 unsigned int prio = nitems(pq->pq_lists); 673 674 do { 675 pl = &pq->pq_lists[--prio]; 676 ml_enlist(ml, pl); 677 } while (prio > 0); 678} 679