ifq.c revision 1.18
1/* $OpenBSD: ifq.c,v 1.18 2017/12/15 01:37:30 dlg Exp $ */ 2 3/* 4 * Copyright (c) 2015 David Gwynne <dlg@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19#include "bpfilter.h" 20 21#include <sys/param.h> 22#include <sys/systm.h> 23#include <sys/socket.h> 24#include <sys/mbuf.h> 25#include <sys/proc.h> 26 27#include <net/if.h> 28#include <net/if_var.h> 29 30#if NBPFILTER > 0 31#include <net/bpf.h> 32#endif 33 34/* 35 * priq glue 36 */ 37unsigned int priq_idx(unsigned int, const struct mbuf *); 38struct mbuf *priq_enq(struct ifqueue *, struct mbuf *); 39struct mbuf *priq_deq_begin(struct ifqueue *, void **); 40void priq_deq_commit(struct ifqueue *, struct mbuf *, void *); 41void priq_purge(struct ifqueue *, struct mbuf_list *); 42 43void *priq_alloc(unsigned int, void *); 44void priq_free(unsigned int, void *); 45 46const struct ifq_ops priq_ops = { 47 priq_idx, 48 priq_enq, 49 priq_deq_begin, 50 priq_deq_commit, 51 priq_purge, 52 priq_alloc, 53 priq_free, 54}; 55 56const struct ifq_ops * const ifq_priq_ops = &priq_ops; 57 58/* 59 * priq internal structures 60 */ 61 62struct priq { 63 struct mbuf_list pq_lists[IFQ_NQUEUES]; 64}; 65 66/* 67 * ifqueue serialiser 68 */ 69 70void ifq_start_task(void *); 71void ifq_restart_task(void *); 72void ifq_barrier_task(void *); 73 74#define TASK_ONQUEUE 0x1 75 76void 77ifq_serialize(struct ifqueue *ifq, struct task *t) 78{ 79 struct task work; 80 81 if (ISSET(t->t_flags, TASK_ONQUEUE)) 82 return; 83 84 mtx_enter(&ifq->ifq_task_mtx); 85 if (!ISSET(t->t_flags, TASK_ONQUEUE)) { 86 SET(t->t_flags, TASK_ONQUEUE); 87 TAILQ_INSERT_TAIL(&ifq->ifq_task_list, t, t_entry); 88 } 89 90 if (ifq->ifq_serializer == NULL) { 91 ifq->ifq_serializer = curcpu(); 92 93 while ((t = TAILQ_FIRST(&ifq->ifq_task_list)) != NULL) { 94 TAILQ_REMOVE(&ifq->ifq_task_list, t, t_entry); 95 CLR(t->t_flags, TASK_ONQUEUE); 96 work = *t; /* copy to caller to avoid races */ 97 98 mtx_leave(&ifq->ifq_task_mtx); 99 100 (*work.t_func)(work.t_arg); 101 102 mtx_enter(&ifq->ifq_task_mtx); 103 } 104 105 ifq->ifq_serializer = NULL; 106 } 107 mtx_leave(&ifq->ifq_task_mtx); 108} 109 110int 111ifq_is_serialized(struct ifqueue *ifq) 112{ 113 return (ifq->ifq_serializer == curcpu()); 114} 115 116void 117ifq_start_task(void *p) 118{ 119 struct ifqueue *ifq = p; 120 struct ifnet *ifp = ifq->ifq_if; 121 122 if (!ISSET(ifp->if_flags, IFF_RUNNING) || 123 ifq_empty(ifq) || ifq_is_oactive(ifq)) 124 return; 125 126 ifp->if_qstart(ifq); 127} 128 129void 130ifq_restart_task(void *p) 131{ 132 struct ifqueue *ifq = p; 133 struct ifnet *ifp = ifq->ifq_if; 134 135 ifq_clr_oactive(ifq); 136 ifp->if_qstart(ifq); 137} 138 139void 140ifq_barrier(struct ifqueue *ifq) 141{ 142 struct cond c = COND_INITIALIZER(); 143 struct task t = TASK_INITIALIZER(ifq_barrier_task, &c); 144 145 /* this should only be called from converted drivers */ 146 KASSERT(ISSET(ifq->ifq_if->if_xflags, IFXF_MPSAFE)); 147 148 if (ifq->ifq_serializer == NULL) 149 return; 150 151 ifq_serialize(ifq, &t); 152 153 cond_wait(&c, "ifqbar"); 154} 155 156void 157ifq_barrier_task(void *p) 158{ 159 struct cond *c = p; 160 161 cond_signal(c); 162} 163 164/* 165 * ifqueue mbuf queue API 166 */ 167 168void 169ifq_init(struct ifqueue *ifq, struct ifnet *ifp, unsigned int idx) 170{ 171 ifq->ifq_if = ifp; 172 ifq->ifq_softc = NULL; 173 174 mtx_init(&ifq->ifq_mtx, IPL_NET); 175 ifq->ifq_qdrops = 0; 176 177 /* default to priq */ 178 ifq->ifq_ops = &priq_ops; 179 ifq->ifq_q = priq_ops.ifqop_alloc(idx, NULL); 180 181 ml_init(&ifq->ifq_free); 182 ifq->ifq_len = 0; 183 184 ifq->ifq_packets = 0; 185 ifq->ifq_bytes = 0; 186 ifq->ifq_qdrops = 0; 187 ifq->ifq_errors = 0; 188 ifq->ifq_mcasts = 0; 189 190 mtx_init(&ifq->ifq_task_mtx, IPL_NET); 191 TAILQ_INIT(&ifq->ifq_task_list); 192 ifq->ifq_serializer = NULL; 193 194 task_set(&ifq->ifq_start, ifq_start_task, ifq); 195 task_set(&ifq->ifq_restart, ifq_restart_task, ifq); 196 197 if (ifq->ifq_maxlen == 0) 198 ifq_set_maxlen(ifq, IFQ_MAXLEN); 199 200 ifq->ifq_idx = idx; 201} 202 203void 204ifq_attach(struct ifqueue *ifq, const struct ifq_ops *newops, void *opsarg) 205{ 206 struct mbuf_list ml = MBUF_LIST_INITIALIZER(); 207 struct mbuf_list free_ml = MBUF_LIST_INITIALIZER(); 208 struct mbuf *m; 209 const struct ifq_ops *oldops; 210 void *newq, *oldq; 211 212 newq = newops->ifqop_alloc(ifq->ifq_idx, opsarg); 213 214 mtx_enter(&ifq->ifq_mtx); 215 ifq->ifq_ops->ifqop_purge(ifq, &ml); 216 ifq->ifq_len = 0; 217 218 oldops = ifq->ifq_ops; 219 oldq = ifq->ifq_q; 220 221 ifq->ifq_ops = newops; 222 ifq->ifq_q = newq; 223 224 while ((m = ml_dequeue(&ml)) != NULL) { 225 m = ifq->ifq_ops->ifqop_enq(ifq, m); 226 if (m != NULL) { 227 ifq->ifq_qdrops++; 228 ml_enqueue(&free_ml, m); 229 } else 230 ifq->ifq_len++; 231 } 232 mtx_leave(&ifq->ifq_mtx); 233 234 oldops->ifqop_free(ifq->ifq_idx, oldq); 235 236 ml_purge(&free_ml); 237} 238 239void 240ifq_destroy(struct ifqueue *ifq) 241{ 242 struct mbuf_list ml = MBUF_LIST_INITIALIZER(); 243 244 /* don't need to lock because this is the last use of the ifq */ 245 246 ifq->ifq_ops->ifqop_purge(ifq, &ml); 247 ifq->ifq_ops->ifqop_free(ifq->ifq_idx, ifq->ifq_q); 248 249 ml_purge(&ml); 250} 251 252void 253ifq_add_data(struct ifqueue *ifq, struct if_data *data) 254{ 255 mtx_enter(&ifq->ifq_mtx); 256 data->ifi_opackets += ifq->ifq_packets; 257 data->ifi_obytes += ifq->ifq_bytes; 258 data->ifi_oqdrops += ifq->ifq_qdrops; 259 data->ifi_omcasts += ifq->ifq_mcasts; 260 /* ifp->if_data.ifi_oerrors */ 261 mtx_leave(&ifq->ifq_mtx); 262} 263 264int 265ifq_enqueue(struct ifqueue *ifq, struct mbuf *m) 266{ 267 struct mbuf *dm; 268 269 mtx_enter(&ifq->ifq_mtx); 270 dm = ifq->ifq_ops->ifqop_enq(ifq, m); 271 if (dm != m) { 272 ifq->ifq_packets++; 273 ifq->ifq_bytes += m->m_pkthdr.len; 274 if (ISSET(m->m_flags, M_MCAST)) 275 ifq->ifq_mcasts++; 276 } 277 278 if (dm == NULL) 279 ifq->ifq_len++; 280 else 281 ifq->ifq_qdrops++; 282 mtx_leave(&ifq->ifq_mtx); 283 284 if (dm != NULL) 285 m_freem(dm); 286 287 return (dm == m ? ENOBUFS : 0); 288} 289 290static inline void 291ifq_deq_enter(struct ifqueue *ifq) 292{ 293 mtx_enter(&ifq->ifq_mtx); 294} 295 296static inline void 297ifq_deq_leave(struct ifqueue *ifq) 298{ 299 struct mbuf_list ml; 300 301 ml = ifq->ifq_free; 302 ml_init(&ifq->ifq_free); 303 304 mtx_leave(&ifq->ifq_mtx); 305 306 if (!ml_empty(&ml)) 307 ml_purge(&ml); 308} 309 310struct mbuf * 311ifq_deq_begin(struct ifqueue *ifq) 312{ 313 struct mbuf *m = NULL; 314 void *cookie; 315 316 ifq_deq_enter(ifq); 317 if (ifq->ifq_len == 0 || 318 (m = ifq->ifq_ops->ifqop_deq_begin(ifq, &cookie)) == NULL) { 319 ifq_deq_leave(ifq); 320 return (NULL); 321 } 322 323 m->m_pkthdr.ph_cookie = cookie; 324 325 return (m); 326} 327 328void 329ifq_deq_commit(struct ifqueue *ifq, struct mbuf *m) 330{ 331 void *cookie; 332 333 KASSERT(m != NULL); 334 cookie = m->m_pkthdr.ph_cookie; 335 336 ifq->ifq_ops->ifqop_deq_commit(ifq, m, cookie); 337 ifq->ifq_len--; 338 ifq_deq_leave(ifq); 339} 340 341void 342ifq_deq_rollback(struct ifqueue *ifq, struct mbuf *m) 343{ 344 KASSERT(m != NULL); 345 346 ifq_deq_leave(ifq); 347} 348 349struct mbuf * 350ifq_dequeue(struct ifqueue *ifq) 351{ 352 struct mbuf *m; 353 354 m = ifq_deq_begin(ifq); 355 if (m == NULL) 356 return (NULL); 357 358 ifq_deq_commit(ifq, m); 359 360 return (m); 361} 362 363unsigned int 364ifq_purge(struct ifqueue *ifq) 365{ 366 struct mbuf_list ml = MBUF_LIST_INITIALIZER(); 367 unsigned int rv; 368 369 mtx_enter(&ifq->ifq_mtx); 370 ifq->ifq_ops->ifqop_purge(ifq, &ml); 371 rv = ifq->ifq_len; 372 ifq->ifq_len = 0; 373 ifq->ifq_qdrops += rv; 374 mtx_leave(&ifq->ifq_mtx); 375 376 KASSERT(rv == ml_len(&ml)); 377 378 ml_purge(&ml); 379 380 return (rv); 381} 382 383void * 384ifq_q_enter(struct ifqueue *ifq, const struct ifq_ops *ops) 385{ 386 mtx_enter(&ifq->ifq_mtx); 387 if (ifq->ifq_ops == ops) 388 return (ifq->ifq_q); 389 390 mtx_leave(&ifq->ifq_mtx); 391 392 return (NULL); 393} 394 395void 396ifq_q_leave(struct ifqueue *ifq, void *q) 397{ 398 KASSERT(q == ifq->ifq_q); 399 mtx_leave(&ifq->ifq_mtx); 400} 401 402void 403ifq_mfreem(struct ifqueue *ifq, struct mbuf *m) 404{ 405 MUTEX_ASSERT_LOCKED(&ifq->ifq_mtx); 406 407 ifq->ifq_len--; 408 ifq->ifq_qdrops++; 409 ml_enqueue(&ifq->ifq_free, m); 410} 411 412void 413ifq_mfreeml(struct ifqueue *ifq, struct mbuf_list *ml) 414{ 415 MUTEX_ASSERT_LOCKED(&ifq->ifq_mtx); 416 417 ifq->ifq_len -= ml_len(ml); 418 ifq->ifq_qdrops += ml_len(ml); 419 ml_enlist(&ifq->ifq_free, ml); 420} 421 422/* 423 * ifiq 424 */ 425 426static void ifiq_process(void *); 427 428void 429ifiq_init(struct ifiqueue *ifiq, struct ifnet *ifp, unsigned int idx) 430{ 431 ifiq->ifiq_if = ifp; 432 ifiq->ifiq_softnet = net_tq(ifp->if_index); /* + idx */ 433 ifiq->ifiq_softc = NULL; 434 435 mtx_init(&ifiq->ifiq_mtx, IPL_NET); 436 ml_init(&ifiq->ifiq_ml); 437 task_set(&ifiq->ifiq_task, ifiq_process, ifiq); 438 439 ifiq->ifiq_qdrops = 0; 440 ifiq->ifiq_packets = 0; 441 ifiq->ifiq_bytes = 0; 442 ifiq->ifiq_qdrops = 0; 443 ifiq->ifiq_errors = 0; 444 445 ifiq->ifiq_idx = idx; 446} 447 448void 449ifiq_destroy(struct ifiqueue *ifiq) 450{ 451 if (!task_del(ifiq->ifiq_softnet, &ifiq->ifiq_task)) { 452 int netlocked = (rw_status(&netlock) == RW_WRITE); 453 454 if (netlocked) /* XXXSMP breaks atomicity */ 455 NET_UNLOCK(); 456 457 taskq_barrier(ifiq->ifiq_softnet); 458 459 if (netlocked) 460 NET_LOCK(); 461 } 462 463 /* don't need to lock because this is the last use of the ifiq */ 464 ml_purge(&ifiq->ifiq_ml); 465} 466 467int 468ifiq_input(struct ifiqueue *ifiq, struct mbuf_list *ml, unsigned int cwm) 469{ 470 struct ifnet *ifp = ifiq->ifiq_if; 471 struct mbuf *m; 472 uint64_t packets; 473 uint64_t bytes = 0; 474#if NBPFILTER > 0 475 caddr_t if_bpf; 476#endif 477 int rv = 1; 478 479 if (ml_empty(ml)) 480 return (0); 481 482 MBUF_LIST_FOREACH(ml, m) { 483 m->m_pkthdr.ph_ifidx = ifp->if_index; 484 m->m_pkthdr.ph_rtableid = ifp->if_rdomain; 485 bytes += m->m_pkthdr.len; 486 } 487 packets = ml_len(ml); 488 489#if NBPFILTER > 0 490 if_bpf = ifp->if_bpf; 491 if (if_bpf) { 492 struct mbuf_list ml0 = *ml; 493 494 ml_init(ml); 495 496 while ((m = ml_dequeue(&ml0)) != NULL) { 497 if (bpf_mtap_ether(if_bpf, m, BPF_DIRECTION_IN)) 498 m_freem(m); 499 else 500 ml_enqueue(ml, m); 501 } 502 503 if (ml_empty(ml)) { 504 mtx_enter(&ifiq->ifiq_mtx); 505 ifiq->ifiq_packets += packets; 506 ifiq->ifiq_bytes += bytes; 507 mtx_leave(&ifiq->ifiq_mtx); 508 509 return (0); 510 } 511 } 512#endif 513 514 mtx_enter(&ifiq->ifiq_mtx); 515 ifiq->ifiq_packets += packets; 516 ifiq->ifiq_bytes += bytes; 517 518 if (ifiq_len(ifiq) >= cwm * 5) 519 ifiq->ifiq_qdrops += ml_len(ml); 520 else { 521 rv = (ifiq_len(ifiq) >= cwm * 3); 522 ml_enlist(&ifiq->ifiq_ml, ml); 523 } 524 mtx_leave(&ifiq->ifiq_mtx); 525 526 if (ml_empty(ml)) 527 task_add(ifiq->ifiq_softnet, &ifiq->ifiq_task); 528 else 529 ml_purge(ml); 530 531 return (rv); 532} 533 534void 535ifiq_add_data(struct ifiqueue *ifiq, struct if_data *data) 536{ 537 mtx_enter(&ifiq->ifiq_mtx); 538 data->ifi_ipackets += ifiq->ifiq_packets; 539 data->ifi_ibytes += ifiq->ifiq_bytes; 540 data->ifi_iqdrops += ifiq->ifiq_qdrops; 541 mtx_leave(&ifiq->ifiq_mtx); 542} 543 544void 545ifiq_barrier(struct ifiqueue *ifiq) 546{ 547 if (!task_del(ifiq->ifiq_softnet, &ifiq->ifiq_task)) 548 taskq_barrier(ifiq->ifiq_softnet); 549} 550 551int 552ifiq_enqueue(struct ifiqueue *ifiq, struct mbuf *m) 553{ 554 mtx_enter(&ifiq->ifiq_mtx); 555 ml_enqueue(&ifiq->ifiq_ml, m); 556 mtx_leave(&ifiq->ifiq_mtx); 557 558 task_add(ifiq->ifiq_softnet, &ifiq->ifiq_task); 559 560 return (0); 561} 562 563static void 564ifiq_process(void *arg) 565{ 566 struct ifiqueue *ifiq = arg; 567 struct mbuf_list ml; 568 569 if (ifiq_empty(ifiq)) 570 return; 571 572 mtx_enter(&ifiq->ifiq_mtx); 573 ml = ifiq->ifiq_ml; 574 ml_init(&ifiq->ifiq_ml); 575 mtx_leave(&ifiq->ifiq_mtx); 576 577 if_input_process(ifiq->ifiq_if, &ml); 578} 579 580/* 581 * priq implementation 582 */ 583 584unsigned int 585priq_idx(unsigned int nqueues, const struct mbuf *m) 586{ 587 unsigned int flow = 0; 588 589 if (ISSET(m->m_pkthdr.ph_flowid, M_FLOWID_VALID)) 590 flow = m->m_pkthdr.ph_flowid & M_FLOWID_MASK; 591 592 return (flow % nqueues); 593} 594 595void * 596priq_alloc(unsigned int idx, void *null) 597{ 598 struct priq *pq; 599 int i; 600 601 pq = malloc(sizeof(struct priq), M_DEVBUF, M_WAITOK); 602 for (i = 0; i < IFQ_NQUEUES; i++) 603 ml_init(&pq->pq_lists[i]); 604 return (pq); 605} 606 607void 608priq_free(unsigned int idx, void *pq) 609{ 610 free(pq, M_DEVBUF, sizeof(struct priq)); 611} 612 613struct mbuf * 614priq_enq(struct ifqueue *ifq, struct mbuf *m) 615{ 616 struct priq *pq; 617 struct mbuf_list *pl; 618 struct mbuf *n = NULL; 619 unsigned int prio; 620 621 pq = ifq->ifq_q; 622 KASSERT(m->m_pkthdr.pf.prio <= IFQ_MAXPRIO); 623 624 /* Find a lower priority queue to drop from */ 625 if (ifq_len(ifq) >= ifq->ifq_maxlen) { 626 for (prio = 0; prio < m->m_pkthdr.pf.prio; prio++) { 627 pl = &pq->pq_lists[prio]; 628 if (ml_len(pl) > 0) { 629 n = ml_dequeue(pl); 630 goto enqueue; 631 } 632 } 633 /* 634 * There's no lower priority queue that we can 635 * drop from so don't enqueue this one. 636 */ 637 return (m); 638 } 639 640 enqueue: 641 pl = &pq->pq_lists[m->m_pkthdr.pf.prio]; 642 ml_enqueue(pl, m); 643 644 return (n); 645} 646 647struct mbuf * 648priq_deq_begin(struct ifqueue *ifq, void **cookiep) 649{ 650 struct priq *pq = ifq->ifq_q; 651 struct mbuf_list *pl; 652 unsigned int prio = nitems(pq->pq_lists); 653 struct mbuf *m; 654 655 do { 656 pl = &pq->pq_lists[--prio]; 657 m = MBUF_LIST_FIRST(pl); 658 if (m != NULL) { 659 *cookiep = pl; 660 return (m); 661 } 662 } while (prio > 0); 663 664 return (NULL); 665} 666 667void 668priq_deq_commit(struct ifqueue *ifq, struct mbuf *m, void *cookie) 669{ 670 struct mbuf_list *pl = cookie; 671 672 KASSERT(MBUF_LIST_FIRST(pl) == m); 673 674 ml_dequeue(pl); 675} 676 677void 678priq_purge(struct ifqueue *ifq, struct mbuf_list *ml) 679{ 680 struct priq *pq = ifq->ifq_q; 681 struct mbuf_list *pl; 682 unsigned int prio = nitems(pq->pq_lists); 683 684 do { 685 pl = &pq->pq_lists[--prio]; 686 ml_enlist(ml, pl); 687 } while (prio > 0); 688} 689