66int nmbcnt; 67u_long m_mballoc_wid = 0; 68u_long m_clalloc_wid = 0; 69 70/* 71 * freelist header structures... 72 * mbffree_lst, mclfree_lst, mcntfree_lst 73 */ 74struct mbffree_lst mmbfree; 75struct mclfree_lst mclfree; 76struct mcntfree_lst mcntfree; 77struct mtx mbuf_mtx; 78 79/* 80 * sysctl(8) exported objects 81 */ 82SYSCTL_DECL(_kern_ipc); 83SYSCTL_INT(_kern_ipc, KIPC_MAX_LINKHDR, max_linkhdr, CTLFLAG_RW, 84 &max_linkhdr, 0, ""); 85SYSCTL_INT(_kern_ipc, KIPC_MAX_PROTOHDR, max_protohdr, CTLFLAG_RW, 86 &max_protohdr, 0, ""); 87SYSCTL_INT(_kern_ipc, KIPC_MAX_HDR, max_hdr, CTLFLAG_RW, &max_hdr, 0, ""); 88SYSCTL_INT(_kern_ipc, KIPC_MAX_DATALEN, max_datalen, CTLFLAG_RW, 89 &max_datalen, 0, ""); 90SYSCTL_INT(_kern_ipc, OID_AUTO, mbuf_wait, CTLFLAG_RW, 91 &mbuf_wait, 0, ""); 92SYSCTL_STRUCT(_kern_ipc, KIPC_MBSTAT, mbstat, CTLFLAG_RD, &mbstat, mbstat, ""); 93SYSCTL_OPAQUE(_kern_ipc, OID_AUTO, mbtypes, CTLFLAG_RD, mbtypes, 94 sizeof(mbtypes), "LU", ""); 95SYSCTL_INT(_kern_ipc, KIPC_NMBCLUSTERS, nmbclusters, CTLFLAG_RD, 96 &nmbclusters, 0, "Maximum number of mbuf clusters available"); 97SYSCTL_INT(_kern_ipc, OID_AUTO, nmbufs, CTLFLAG_RD, &nmbufs, 0, 98 "Maximum number of mbufs available"); 99SYSCTL_INT(_kern_ipc, OID_AUTO, nmbcnt, CTLFLAG_RD, &nmbcnt, 0, 100 "Maximum number of ext_buf counters available"); 101
| 70int nmbcnt; 71u_long m_mballoc_wid = 0; 72u_long m_clalloc_wid = 0; 73 74/* 75 * freelist header structures... 76 * mbffree_lst, mclfree_lst, mcntfree_lst 77 */ 78struct mbffree_lst mmbfree; 79struct mclfree_lst mclfree; 80struct mcntfree_lst mcntfree; 81struct mtx mbuf_mtx; 82 83/* 84 * sysctl(8) exported objects 85 */ 86SYSCTL_DECL(_kern_ipc); 87SYSCTL_INT(_kern_ipc, KIPC_MAX_LINKHDR, max_linkhdr, CTLFLAG_RW, 88 &max_linkhdr, 0, ""); 89SYSCTL_INT(_kern_ipc, KIPC_MAX_PROTOHDR, max_protohdr, CTLFLAG_RW, 90 &max_protohdr, 0, ""); 91SYSCTL_INT(_kern_ipc, KIPC_MAX_HDR, max_hdr, CTLFLAG_RW, &max_hdr, 0, ""); 92SYSCTL_INT(_kern_ipc, KIPC_MAX_DATALEN, max_datalen, CTLFLAG_RW, 93 &max_datalen, 0, ""); 94SYSCTL_INT(_kern_ipc, OID_AUTO, mbuf_wait, CTLFLAG_RW, 95 &mbuf_wait, 0, ""); 96SYSCTL_STRUCT(_kern_ipc, KIPC_MBSTAT, mbstat, CTLFLAG_RD, &mbstat, mbstat, ""); 97SYSCTL_OPAQUE(_kern_ipc, OID_AUTO, mbtypes, CTLFLAG_RD, mbtypes, 98 sizeof(mbtypes), "LU", ""); 99SYSCTL_INT(_kern_ipc, KIPC_NMBCLUSTERS, nmbclusters, CTLFLAG_RD, 100 &nmbclusters, 0, "Maximum number of mbuf clusters available"); 101SYSCTL_INT(_kern_ipc, OID_AUTO, nmbufs, CTLFLAG_RD, &nmbufs, 0, 102 "Maximum number of mbufs available"); 103SYSCTL_INT(_kern_ipc, OID_AUTO, nmbcnt, CTLFLAG_RD, &nmbcnt, 0, 104 "Maximum number of ext_buf counters available"); 105
|
129 /* 130 * Setup the mb_map, allocate requested VM space. 131 */ 132 mb_map_size = (vm_size_t)(nmbufs * MSIZE + nmbclusters * MCLBYTES + 133 nmbcnt * sizeof(union mext_refcnt)); 134 mb_map_size = rounddown(mb_map_size, PAGE_SIZE); 135 mb_map = kmem_suballoc(kmem_map, (vm_offset_t *)&mbutl, &maxaddr, 136 mb_map_size); 137 /* XXX XXX XXX: mb_map->system_map = 1; */ 138 139 /* 140 * Initialize the free list headers, and setup locks for lists. 141 */ 142 mmbfree.m_head = NULL; 143 mclfree.m_head = NULL; 144 mcntfree.m_head = NULL; 145 mtx_init(&mbuf_mtx, "mbuf free list lock", MTX_DEF); 146 cv_init(&mmbfree.m_starved, "mbuf free list starved cv"); 147 cv_init(&mclfree.m_starved, "mbuf cluster free list starved cv"); 148 149 /* 150 * Initialize mbuf subsystem (sysctl exported) statistics structure. 151 */ 152 mbstat.m_msize = MSIZE; 153 mbstat.m_mclbytes = MCLBYTES; 154 mbstat.m_minclsize = MINCLSIZE; 155 mbstat.m_mlen = MLEN; 156 mbstat.m_mhlen = MHLEN; 157 158 /* 159 * Perform some initial allocations. 160 */ 161 mtx_lock(&mbuf_mtx); 162 if (m_alloc_ref(REF_INIT, M_DONTWAIT) == 0) 163 goto bad; 164 if (m_mballoc(NMB_INIT, M_DONTWAIT) == 0) 165 goto bad; 166 if (m_clalloc(NCL_INIT, M_DONTWAIT) == 0) 167 goto bad; 168 mtx_unlock(&mbuf_mtx); 169 170 return; 171bad: 172 panic("mbinit: failed to initialize mbuf subsystem!"); 173} 174 175/* 176 * Allocate at least nmb reference count structs and place them 177 * on the ref cnt free list. 178 * 179 * Must be called with the mcntfree lock held. 180 */ 181int 182m_alloc_ref(u_int nmb, int how) 183{ 184 caddr_t p; 185 u_int nbytes; 186 int i; 187 188 /* 189 * We don't cap the amount of memory that can be used 190 * by the reference counters, like we do for mbufs and 191 * mbuf clusters. In fact, we're absolutely sure that we 192 * won't ever be going over our allocated space. We keep enough 193 * space in mb_map to accomodate maximum values of allocatable 194 * external buffers including, but not limited to, clusters. 195 * (That's also why we won't have to have wait routines for 196 * counters). 197 * 198 * If we're in here, we're absolutely certain to be returning 199 * succesfully, as long as there is physical memory to accomodate 200 * us. And if there isn't, but we're willing to wait, then 201 * kmem_malloc() will do the only waiting needed. 202 */ 203 204 nbytes = round_page(nmb * sizeof(union mext_refcnt)); 205 if (1 /* XXX: how == M_TRYWAIT */) 206 mtx_unlock(&mbuf_mtx); 207 if ((p = (caddr_t)kmem_malloc(mb_map, nbytes, how == M_TRYWAIT ? 208 M_WAITOK : M_NOWAIT)) == NULL) { 209 if (1 /* XXX: how == M_TRYWAIT */) 210 mtx_lock(&mbuf_mtx); 211 return (0); 212 } 213 nmb = nbytes / sizeof(union mext_refcnt); 214 215 /* 216 * We don't let go of the mutex in order to avoid a race. 217 * It is up to the caller to let go of the mutex. 218 */ 219 if (1 /* XXX: how == M_TRYWAIT */) 220 mtx_lock(&mbuf_mtx); 221 for (i = 0; i < nmb; i++) { 222 ((union mext_refcnt *)p)->next_ref = mcntfree.m_head; 223 mcntfree.m_head = (union mext_refcnt *)p; 224 p += sizeof(union mext_refcnt); 225 mbstat.m_refree++; 226 } 227 mbstat.m_refcnt += nmb; 228 229 return (1); 230} 231 232/* 233 * Allocate at least nmb mbufs and place on mbuf free list. 234 * 235 * Must be called with the mmbfree lock held. 236 */ 237int 238m_mballoc(int nmb, int how) 239{ 240 caddr_t p; 241 int i; 242 int nbytes; 243 244 nbytes = round_page(nmb * MSIZE); 245 nmb = nbytes / MSIZE; 246 247 /* 248 * If we've hit the mbuf limit, stop allocating from mb_map. 249 * Also, once we run out of map space, it will be impossible to 250 * get any more (nothing is ever freed back to the map). 251 */ 252 if (mb_map_full || ((nmb + mbstat.m_mbufs) > nmbufs)) 253 return (0); 254 255 if (1 /* XXX: how == M_TRYWAIT */) 256 mtx_unlock(&mbuf_mtx); 257 p = (caddr_t)kmem_malloc(mb_map, nbytes, how == M_TRYWAIT ? 258 M_WAITOK : M_NOWAIT); 259 if (1 /* XXX: how == M_TRYWAIT */) { 260 mtx_lock(&mbuf_mtx); 261 if (p == NULL) 262 mbstat.m_wait++; 263 } 264 265 /* 266 * Either the map is now full, or `how' is M_DONTWAIT and there 267 * are no pages left. 268 */ 269 if (p == NULL) 270 return (0); 271 272 /* 273 * We don't let go of the mutex in order to avoid a race. 274 * It is up to the caller to let go of the mutex when done 275 * with grabbing the mbuf from the free list. 276 */ 277 for (i = 0; i < nmb; i++) { 278 ((struct mbuf *)p)->m_next = mmbfree.m_head; 279 mmbfree.m_head = (struct mbuf *)p; 280 p += MSIZE; 281 } 282 mbstat.m_mbufs += nmb; 283 mbtypes[MT_FREE] += nmb; 284 return (1); 285} 286 287/* 288 * Once the mb_map has been exhausted and if the call to the allocation macros 289 * (or, in some cases, functions) is with M_TRYWAIT, then it is necessary to 290 * rely solely on reclaimed mbufs. 291 * 292 * Here we request for the protocols to free up some resources and, if we 293 * still cannot get anything, then we wait for an mbuf to be freed for a 294 * designated (mbuf_wait) time, at most. 295 * 296 * Must be called with the mmbfree mutex held. 297 */ 298struct mbuf * 299m_mballoc_wait(void) 300{ 301 struct mbuf *p = NULL; 302 303 /* 304 * See if we can drain some resources out of the protocols. 305 * We drop the mmbfree mutex to avoid recursing into it in some of 306 * the drain routines. Clearly, we're faced with a race here because 307 * once something is freed during the drain, it may be grabbed right 308 * from under us by some other thread. But we accept this possibility 309 * in order to avoid a potentially large lock recursion and, more 310 * importantly, to avoid a potential lock order reversal which may 311 * result in deadlock (See comment above m_reclaim()). 312 */ 313 mtx_unlock(&mbuf_mtx); 314 m_reclaim(); 315 316 mtx_lock(&mbuf_mtx); 317 _MGET(p, M_DONTWAIT); 318 319 if (p == NULL) { 320 int retval; 321 322 m_mballoc_wid++; 323 retval = cv_timedwait(&mmbfree.m_starved, &mbuf_mtx, 324 mbuf_wait); 325 m_mballoc_wid--; 326 327 /* 328 * If we got signaled (i.e. didn't time out), allocate. 329 */ 330 if (retval == 0) 331 _MGET(p, M_DONTWAIT); 332 } 333 334 if (p != NULL) { 335 mbstat.m_wait++; 336 if (mmbfree.m_head != NULL) 337 MBWAKEUP(m_mballoc_wid, &mmbfree.m_starved); 338 } 339 340 return (p); 341} 342 343/* 344 * Allocate some number of mbuf clusters 345 * and place on cluster free list. 346 * 347 * Must be called with the mclfree lock held. 348 */ 349int 350m_clalloc(int ncl, int how) 351{ 352 caddr_t p; 353 int i; 354 int npg_sz; 355 356 npg_sz = round_page(ncl * MCLBYTES); 357 ncl = npg_sz / MCLBYTES; 358 359 /* 360 * If the map is now full (nothing will ever be freed to it). 361 * If we've hit the mcluster number limit, stop allocating from 362 * mb_map. 363 */ 364 if (mb_map_full || ((ncl + mbstat.m_clusters) > nmbclusters)) 365 return (0); 366 367 if (1 /* XXX: how == M_TRYWAIT */) 368 mtx_unlock(&mbuf_mtx); 369 p = (caddr_t)kmem_malloc(mb_map, npg_sz, 370 how == M_TRYWAIT ? M_WAITOK : M_NOWAIT); 371 if (1 /* XXX: how == M_TRYWAIT */) 372 mtx_lock(&mbuf_mtx); 373 374 /* 375 * Either the map is now full, or `how' is M_DONTWAIT and there 376 * are no pages left. 377 */ 378 if (p == NULL) 379 return (0); 380 381 for (i = 0; i < ncl; i++) { 382 ((union mcluster *)p)->mcl_next = mclfree.m_head; 383 mclfree.m_head = (union mcluster *)p; 384 p += MCLBYTES; 385 mbstat.m_clfree++; 386 } 387 mbstat.m_clusters += ncl; 388 return (1); 389} 390 391/* 392 * Once the mb_map submap has been exhausted and the allocation is called with 393 * M_TRYWAIT, we rely on the mclfree list. If nothing is free, we will 394 * block on a cv for a designated amount of time (mbuf_wait) or until we're 395 * signaled due to sudden mcluster availability. 396 * 397 * Must be called with the mclfree lock held. 398 */ 399caddr_t 400m_clalloc_wait(void) 401{ 402 caddr_t p = NULL; 403 int retval; 404 405 m_clalloc_wid++; 406 retval = cv_timedwait(&mclfree.m_starved, &mbuf_mtx, mbuf_wait); 407 m_clalloc_wid--; 408 409 /* 410 * Now that we (think) that we've got something, try again. 411 */ 412 if (retval == 0) 413 _MCLALLOC(p, M_DONTWAIT); 414 415 if (p != NULL) { 416 mbstat.m_wait++; 417 if (mclfree.m_head != NULL) 418 MBWAKEUP(m_clalloc_wid, &mclfree.m_starved); 419 } 420 421 return (p); 422} 423 424/* 425 * m_reclaim: drain protocols in hopes to free up some resources... 426 * 427 * XXX: No locks should be held going in here. The drain routines have 428 * to presently acquire some locks which raises the possibility of lock 429 * order violation if we're holding any mutex if that mutex is acquired in 430 * reverse order relative to one of the locks in the drain routines. 431 */ 432static void 433m_reclaim(void) 434{ 435 struct domain *dp; 436 struct protosw *pr; 437 438#ifdef WITNESS 439 KASSERT(witness_list(curproc) == 0, 440 ("m_reclaim called with locks held")); 441#endif 442 443 for (dp = domains; dp; dp = dp->dom_next) 444 for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) 445 if (pr->pr_drain) 446 (*pr->pr_drain)(); 447 mbstat.m_drain++; 448} 449 450/* 451 * Space allocation routines. 452 * Some of these are also available as macros 453 * for critical paths. 454 */ 455struct mbuf * 456m_get(int how, int type) 457{ 458 struct mbuf *m; 459 460 MGET(m, how, type); 461 return (m); 462} 463 464struct mbuf * 465m_gethdr(int how, int type) 466{ 467 struct mbuf *m; 468 469 MGETHDR(m, how, type); 470 return (m); 471} 472 473struct mbuf * 474m_getclr(int how, int type) 475{ 476 struct mbuf *m; 477 478 MGET(m, how, type); 479 if (m != NULL) 480 bzero(mtod(m, caddr_t), MLEN); 481 return (m); 482} 483 484struct mbuf * 485m_free(struct mbuf *m) 486{ 487 struct mbuf *n; 488 489 MFREE(m, n); 490 return (n); 491} 492 493/* 494 * struct mbuf * 495 * m_getm(m, len, how, type) 496 * 497 * This will allocate len-worth of mbufs and/or mbuf clusters (whatever fits 498 * best) and return a pointer to the top of the allocated chain. If m is 499 * non-null, then we assume that it is a single mbuf or an mbuf chain to 500 * which we want len bytes worth of mbufs and/or clusters attached, and so 501 * if we succeed in allocating it, we will just return a pointer to m. 502 * 503 * If we happen to fail at any point during the allocation, we will free 504 * up everything we have already allocated and return NULL. 505 * 506 */ 507struct mbuf * 508m_getm(struct mbuf *m, int len, int how, int type) 509{ 510 struct mbuf *top, *tail, *mp, *mtail = NULL; 511 512 KASSERT(len >= 0, ("len is < 0 in m_getm")); 513 514 MGET(mp, how, type); 515 if (mp == NULL) 516 return (NULL); 517 else if (len > MINCLSIZE) { 518 MCLGET(mp, how); 519 if ((mp->m_flags & M_EXT) == 0) { 520 m_free(mp); 521 return (NULL); 522 } 523 } 524 mp->m_len = 0; 525 len -= M_TRAILINGSPACE(mp); 526 527 if (m != NULL) 528 for (mtail = m; mtail->m_next != NULL; mtail = mtail->m_next); 529 else 530 m = mp; 531 532 top = tail = mp; 533 while (len > 0) { 534 MGET(mp, how, type); 535 if (mp == NULL) 536 goto failed; 537 538 tail->m_next = mp; 539 tail = mp; 540 if (len > MINCLSIZE) { 541 MCLGET(mp, how); 542 if ((mp->m_flags & M_EXT) == 0) 543 goto failed; 544 } 545 546 mp->m_len = 0; 547 len -= M_TRAILINGSPACE(mp); 548 } 549 550 if (mtail != NULL) 551 mtail->m_next = top; 552 return (m); 553 554failed: 555 m_freem(top); 556 return (NULL); 557} 558 559void 560m_freem(struct mbuf *m) 561{ 562 struct mbuf *n; 563 564 if (m == NULL) 565 return; 566 do { 567 /* 568 * we do need to check non-first mbuf, since some of existing 569 * code does not call M_PREPEND properly. 570 * (example: call to bpf_mtap from drivers) 571 */ 572 if ((m->m_flags & M_PKTHDR) != 0 && m->m_pkthdr.aux) { 573 m_freem(m->m_pkthdr.aux); 574 m->m_pkthdr.aux = NULL; 575 } 576 MFREE(m, n); 577 m = n; 578 } while (m); 579} 580 581/* 582 * Lesser-used path for M_PREPEND: 583 * allocate new mbuf to prepend to chain, 584 * copy junk along. 585 */ 586struct mbuf * 587m_prepend(struct mbuf *m, int len, int how) 588{ 589 struct mbuf *mn; 590 591 MGET(mn, how, m->m_type); 592 if (mn == NULL) { 593 m_freem(m); 594 return (NULL); 595 } 596 if (m->m_flags & M_PKTHDR) { 597 M_COPY_PKTHDR(mn, m); 598 m->m_flags &= ~M_PKTHDR; 599 } 600 mn->m_next = m; 601 m = mn; 602 if (len < MHLEN) 603 MH_ALIGN(m, len); 604 m->m_len = len; 605 return (m); 606} 607 608/* 609 * Make a copy of an mbuf chain starting "off0" bytes from the beginning, 610 * continuing for "len" bytes. If len is M_COPYALL, copy to end of mbuf. 611 * The wait parameter is a choice of M_TRYWAIT/M_DONTWAIT from caller. 612 * Note that the copy is read-only, because clusters are not copied, 613 * only their reference counts are incremented. 614 */ 615struct mbuf * 616m_copym(struct mbuf *m, int off0, int len, int wait) 617{ 618 struct mbuf *n, **np; 619 int off = off0; 620 struct mbuf *top; 621 int copyhdr = 0; 622 623 KASSERT(off >= 0, ("m_copym, negative off %d", off)); 624 KASSERT(len >= 0, ("m_copym, negative len %d", len)); 625 if (off == 0 && m->m_flags & M_PKTHDR) 626 copyhdr = 1; 627 while (off > 0) { 628 KASSERT(m != NULL, ("m_copym, offset > size of mbuf chain")); 629 if (off < m->m_len) 630 break; 631 off -= m->m_len; 632 m = m->m_next; 633 } 634 np = ⊤ 635 top = 0; 636 while (len > 0) { 637 if (m == NULL) { 638 KASSERT(len == M_COPYALL, 639 ("m_copym, length > size of mbuf chain")); 640 break; 641 } 642 MGET(n, wait, m->m_type); 643 *np = n; 644 if (n == NULL) 645 goto nospace; 646 if (copyhdr) { 647 M_COPY_PKTHDR(n, m); 648 if (len == M_COPYALL) 649 n->m_pkthdr.len -= off0; 650 else 651 n->m_pkthdr.len = len; 652 copyhdr = 0; 653 } 654 n->m_len = min(len, m->m_len - off); 655 if (m->m_flags & M_EXT) { 656 n->m_data = m->m_data + off; 657 n->m_ext = m->m_ext; 658 n->m_flags |= M_EXT; 659 MEXT_ADD_REF(m); 660 } else 661 bcopy(mtod(m, caddr_t)+off, mtod(n, caddr_t), 662 (unsigned)n->m_len); 663 if (len != M_COPYALL) 664 len -= n->m_len; 665 off = 0; 666 m = m->m_next; 667 np = &n->m_next; 668 } 669 if (top == NULL) { 670 mtx_lock(&mbuf_mtx); 671 mbstat.m_mcfail++; 672 mtx_unlock(&mbuf_mtx); 673 } 674 return (top); 675nospace: 676 m_freem(top); 677 mtx_lock(&mbuf_mtx); 678 mbstat.m_mcfail++; 679 mtx_unlock(&mbuf_mtx); 680 return (NULL); 681} 682 683/* 684 * Copy an entire packet, including header (which must be present). 685 * An optimization of the common case `m_copym(m, 0, M_COPYALL, how)'. 686 * Note that the copy is read-only, because clusters are not copied, 687 * only their reference counts are incremented. 688 * Preserve alignment of the first mbuf so if the creator has left 689 * some room at the beginning (e.g. for inserting protocol headers) 690 * the copies still have the room available. 691 */ 692struct mbuf * 693m_copypacket(struct mbuf *m, int how) 694{ 695 struct mbuf *top, *n, *o; 696 697 MGET(n, how, m->m_type); 698 top = n; 699 if (n == NULL) 700 goto nospace; 701 702 M_COPY_PKTHDR(n, m); 703 n->m_len = m->m_len; 704 if (m->m_flags & M_EXT) { 705 n->m_data = m->m_data; 706 n->m_ext = m->m_ext; 707 n->m_flags |= M_EXT; 708 MEXT_ADD_REF(m); 709 } else { 710 n->m_data = n->m_pktdat + (m->m_data - m->m_pktdat ); 711 bcopy(mtod(m, char *), mtod(n, char *), n->m_len); 712 } 713 714 m = m->m_next; 715 while (m) { 716 MGET(o, how, m->m_type); 717 if (o == NULL) 718 goto nospace; 719 720 n->m_next = o; 721 n = n->m_next; 722 723 n->m_len = m->m_len; 724 if (m->m_flags & M_EXT) { 725 n->m_data = m->m_data; 726 n->m_ext = m->m_ext; 727 n->m_flags |= M_EXT; 728 MEXT_ADD_REF(m); 729 } else { 730 bcopy(mtod(m, char *), mtod(n, char *), n->m_len); 731 } 732 733 m = m->m_next; 734 } 735 return top; 736nospace: 737 m_freem(top); 738 mtx_lock(&mbuf_mtx); 739 mbstat.m_mcfail++; 740 mtx_unlock(&mbuf_mtx); 741 return (NULL); 742} 743 744/* 745 * Copy data from an mbuf chain starting "off" bytes from the beginning, 746 * continuing for "len" bytes, into the indicated buffer. 747 */ 748void 749m_copydata(struct mbuf *m, int off, int len, caddr_t cp) 750{ 751 unsigned count; 752 753 KASSERT(off >= 0, ("m_copydata, negative off %d", off)); 754 KASSERT(len >= 0, ("m_copydata, negative len %d", len)); 755 while (off > 0) { 756 KASSERT(m != NULL, ("m_copydata, offset > size of mbuf chain")); 757 if (off < m->m_len) 758 break; 759 off -= m->m_len; 760 m = m->m_next; 761 } 762 while (len > 0) { 763 KASSERT(m != NULL, ("m_copydata, length > size of mbuf chain")); 764 count = min(m->m_len - off, len); 765 bcopy(mtod(m, caddr_t) + off, cp, count); 766 len -= count; 767 cp += count; 768 off = 0; 769 m = m->m_next; 770 } 771} 772 773/* 774 * Copy a packet header mbuf chain into a completely new chain, including 775 * copying any mbuf clusters. Use this instead of m_copypacket() when 776 * you need a writable copy of an mbuf chain. 777 */ 778struct mbuf * 779m_dup(struct mbuf *m, int how) 780{ 781 struct mbuf **p, *top = NULL; 782 int remain, moff, nsize; 783 784 /* Sanity check */ 785 if (m == NULL) 786 return (NULL); 787 KASSERT((m->m_flags & M_PKTHDR) != 0, ("%s: !PKTHDR", __FUNCTION__)); 788 789 /* While there's more data, get a new mbuf, tack it on, and fill it */ 790 remain = m->m_pkthdr.len; 791 moff = 0; 792 p = ⊤ 793 while (remain > 0 || top == NULL) { /* allow m->m_pkthdr.len == 0 */ 794 struct mbuf *n; 795 796 /* Get the next new mbuf */ 797 MGET(n, how, m->m_type); 798 if (n == NULL) 799 goto nospace; 800 if (top == NULL) { /* first one, must be PKTHDR */ 801 M_COPY_PKTHDR(n, m); 802 nsize = MHLEN; 803 } else /* not the first one */ 804 nsize = MLEN; 805 if (remain >= MINCLSIZE) { 806 MCLGET(n, how); 807 if ((n->m_flags & M_EXT) == 0) { 808 (void)m_free(n); 809 goto nospace; 810 } 811 nsize = MCLBYTES; 812 } 813 n->m_len = 0; 814 815 /* Link it into the new chain */ 816 *p = n; 817 p = &n->m_next; 818 819 /* Copy data from original mbuf(s) into new mbuf */ 820 while (n->m_len < nsize && m != NULL) { 821 int chunk = min(nsize - n->m_len, m->m_len - moff); 822 823 bcopy(m->m_data + moff, n->m_data + n->m_len, chunk); 824 moff += chunk; 825 n->m_len += chunk; 826 remain -= chunk; 827 if (moff == m->m_len) { 828 m = m->m_next; 829 moff = 0; 830 } 831 } 832 833 /* Check correct total mbuf length */ 834 KASSERT((remain > 0 && m != NULL) || (remain == 0 && m == NULL), 835 ("%s: bogus m_pkthdr.len", __FUNCTION__)); 836 } 837 return (top); 838 839nospace: 840 m_freem(top); 841 mtx_lock(&mbuf_mtx); 842 mbstat.m_mcfail++; 843 mtx_unlock(&mbuf_mtx); 844 return (NULL); 845} 846 847/* 848 * Concatenate mbuf chain n to m. 849 * Both chains must be of the same type (e.g. MT_DATA). 850 * Any m_pkthdr is not updated. 851 */ 852void 853m_cat(struct mbuf *m, struct mbuf *n) 854{ 855 while (m->m_next) 856 m = m->m_next; 857 while (n) { 858 if (m->m_flags & M_EXT || 859 m->m_data + m->m_len + n->m_len >= &m->m_dat[MLEN]) { 860 /* just join the two chains */ 861 m->m_next = n; 862 return; 863 } 864 /* splat the data from one into the other */ 865 bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len, 866 (u_int)n->m_len); 867 m->m_len += n->m_len; 868 n = m_free(n); 869 } 870} 871 872void 873m_adj(struct mbuf *mp, int req_len) 874{ 875 int len = req_len; 876 struct mbuf *m; 877 int count; 878 879 if ((m = mp) == NULL) 880 return; 881 if (len >= 0) { 882 /* 883 * Trim from head. 884 */ 885 while (m != NULL && len > 0) { 886 if (m->m_len <= len) { 887 len -= m->m_len; 888 m->m_len = 0; 889 m = m->m_next; 890 } else { 891 m->m_len -= len; 892 m->m_data += len; 893 len = 0; 894 } 895 } 896 m = mp; 897 if (mp->m_flags & M_PKTHDR) 898 m->m_pkthdr.len -= (req_len - len); 899 } else { 900 /* 901 * Trim from tail. Scan the mbuf chain, 902 * calculating its length and finding the last mbuf. 903 * If the adjustment only affects this mbuf, then just 904 * adjust and return. Otherwise, rescan and truncate 905 * after the remaining size. 906 */ 907 len = -len; 908 count = 0; 909 for (;;) { 910 count += m->m_len; 911 if (m->m_next == (struct mbuf *)0) 912 break; 913 m = m->m_next; 914 } 915 if (m->m_len >= len) { 916 m->m_len -= len; 917 if (mp->m_flags & M_PKTHDR) 918 mp->m_pkthdr.len -= len; 919 return; 920 } 921 count -= len; 922 if (count < 0) 923 count = 0; 924 /* 925 * Correct length for chain is "count". 926 * Find the mbuf with last data, adjust its length, 927 * and toss data from remaining mbufs on chain. 928 */ 929 m = mp; 930 if (m->m_flags & M_PKTHDR) 931 m->m_pkthdr.len = count; 932 for (; m; m = m->m_next) { 933 if (m->m_len >= count) { 934 m->m_len = count; 935 break; 936 } 937 count -= m->m_len; 938 } 939 while (m->m_next) 940 (m = m->m_next) ->m_len = 0; 941 } 942} 943 944/* 945 * Rearange an mbuf chain so that len bytes are contiguous 946 * and in the data area of an mbuf (so that mtod and dtom 947 * will work for a structure of size len). Returns the resulting 948 * mbuf chain on success, frees it and returns null on failure. 949 * If there is room, it will add up to max_protohdr-len extra bytes to the 950 * contiguous region in an attempt to avoid being called next time. 951 */ 952struct mbuf * 953m_pullup(struct mbuf *n, int len) 954{ 955 struct mbuf *m; 956 int count; 957 int space; 958 959 /* 960 * If first mbuf has no cluster, and has room for len bytes 961 * without shifting current data, pullup into it, 962 * otherwise allocate a new mbuf to prepend to the chain. 963 */ 964 if ((n->m_flags & M_EXT) == 0 && 965 n->m_data + len < &n->m_dat[MLEN] && n->m_next) { 966 if (n->m_len >= len) 967 return (n); 968 m = n; 969 n = n->m_next; 970 len -= m->m_len; 971 } else { 972 if (len > MHLEN) 973 goto bad; 974 MGET(m, M_DONTWAIT, n->m_type); 975 if (m == NULL) 976 goto bad; 977 m->m_len = 0; 978 if (n->m_flags & M_PKTHDR) { 979 M_COPY_PKTHDR(m, n); 980 n->m_flags &= ~M_PKTHDR; 981 } 982 } 983 space = &m->m_dat[MLEN] - (m->m_data + m->m_len); 984 do { 985 count = min(min(max(len, max_protohdr), space), n->m_len); 986 bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len, 987 (unsigned)count); 988 len -= count; 989 m->m_len += count; 990 n->m_len -= count; 991 space -= count; 992 if (n->m_len) 993 n->m_data += count; 994 else 995 n = m_free(n); 996 } while (len > 0 && n); 997 if (len > 0) { 998 (void) m_free(m); 999 goto bad; 1000 } 1001 m->m_next = n; 1002 return (m); 1003bad: 1004 m_freem(n); 1005 mtx_lock(&mbuf_mtx); 1006 mbstat.m_mpfail++; 1007 mtx_unlock(&mbuf_mtx); 1008 return (NULL); 1009} 1010 1011/* 1012 * Partition an mbuf chain in two pieces, returning the tail -- 1013 * all but the first len0 bytes. In case of failure, it returns NULL and 1014 * attempts to restore the chain to its original state. 1015 */ 1016struct mbuf * 1017m_split(struct mbuf *m0, int len0, int wait) 1018{ 1019 struct mbuf *m, *n; 1020 unsigned len = len0, remain; 1021 1022 for (m = m0; m && len > m->m_len; m = m->m_next) 1023 len -= m->m_len; 1024 if (m == NULL) 1025 return (NULL); 1026 remain = m->m_len - len; 1027 if (m0->m_flags & M_PKTHDR) { 1028 MGETHDR(n, wait, m0->m_type); 1029 if (n == NULL) 1030 return (NULL); 1031 n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif; 1032 n->m_pkthdr.len = m0->m_pkthdr.len - len0; 1033 m0->m_pkthdr.len = len0; 1034 if (m->m_flags & M_EXT) 1035 goto extpacket; 1036 if (remain > MHLEN) { 1037 /* m can't be the lead packet */ 1038 MH_ALIGN(n, 0); 1039 n->m_next = m_split(m, len, wait); 1040 if (n->m_next == NULL) { 1041 (void) m_free(n); 1042 return (NULL); 1043 } else 1044 return (n); 1045 } else 1046 MH_ALIGN(n, remain); 1047 } else if (remain == 0) { 1048 n = m->m_next; 1049 m->m_next = NULL; 1050 return (n); 1051 } else { 1052 MGET(n, wait, m->m_type); 1053 if (n == NULL) 1054 return (NULL); 1055 M_ALIGN(n, remain); 1056 } 1057extpacket: 1058 if (m->m_flags & M_EXT) { 1059 n->m_flags |= M_EXT; 1060 n->m_ext = m->m_ext; 1061 MEXT_ADD_REF(m); 1062 m->m_ext.ext_size = 0; /* For Accounting XXXXXX danger */ 1063 n->m_data = m->m_data + len; 1064 } else { 1065 bcopy(mtod(m, caddr_t) + len, mtod(n, caddr_t), remain); 1066 } 1067 n->m_len = remain; 1068 m->m_len = len; 1069 n->m_next = m->m_next; 1070 m->m_next = NULL; 1071 return (n); 1072} 1073/* 1074 * Routine to copy from device local memory into mbufs. 1075 */ 1076struct mbuf * 1077m_devget(char *buf, int totlen, int off0, struct ifnet *ifp, 1078 void (*copy)(char *from, caddr_t to, u_int len)) 1079{ 1080 struct mbuf *m; 1081 struct mbuf *top = 0, **mp = ⊤ 1082 int off = off0, len; 1083 char *cp; 1084 char *epkt; 1085 1086 cp = buf; 1087 epkt = cp + totlen; 1088 if (off) { 1089 cp += off + 2 * sizeof(u_short); 1090 totlen -= 2 * sizeof(u_short); 1091 } 1092 MGETHDR(m, M_DONTWAIT, MT_DATA); 1093 if (m == NULL) 1094 return (NULL); 1095 m->m_pkthdr.rcvif = ifp; 1096 m->m_pkthdr.len = totlen; 1097 m->m_len = MHLEN; 1098 1099 while (totlen > 0) { 1100 if (top) { 1101 MGET(m, M_DONTWAIT, MT_DATA); 1102 if (m == NULL) { 1103 m_freem(top); 1104 return (NULL); 1105 } 1106 m->m_len = MLEN; 1107 } 1108 len = min(totlen, epkt - cp); 1109 if (len >= MINCLSIZE) { 1110 MCLGET(m, M_DONTWAIT); 1111 if (m->m_flags & M_EXT) 1112 m->m_len = len = min(len, MCLBYTES); 1113 else 1114 len = m->m_len; 1115 } else { 1116 /* 1117 * Place initial small packet/header at end of mbuf. 1118 */ 1119 if (len < m->m_len) { 1120 if (top == NULL && len + 1121 max_linkhdr <= m->m_len) 1122 m->m_data += max_linkhdr; 1123 m->m_len = len; 1124 } else 1125 len = m->m_len; 1126 } 1127 if (copy) 1128 copy(cp, mtod(m, caddr_t), (unsigned)len); 1129 else 1130 bcopy(cp, mtod(m, caddr_t), (unsigned)len); 1131 cp += len; 1132 *mp = m; 1133 mp = &m->m_next; 1134 totlen -= len; 1135 if (cp == epkt) 1136 cp = buf; 1137 } 1138 return (top); 1139} 1140 1141/* 1142 * Copy data from a buffer back into the indicated mbuf chain, 1143 * starting "off" bytes from the beginning, extending the mbuf 1144 * chain if necessary. 1145 */ 1146void 1147m_copyback(struct mbuf *m0, int off, int len, caddr_t cp) 1148{ 1149 int mlen; 1150 struct mbuf *m = m0, *n; 1151 int totlen = 0; 1152 1153 if (m0 == NULL) 1154 return; 1155 while (off > (mlen = m->m_len)) { 1156 off -= mlen; 1157 totlen += mlen; 1158 if (m->m_next == NULL) { 1159 n = m_getclr(M_DONTWAIT, m->m_type); 1160 if (n == NULL) 1161 goto out; 1162 n->m_len = min(MLEN, len + off); 1163 m->m_next = n; 1164 } 1165 m = m->m_next; 1166 } 1167 while (len > 0) { 1168 mlen = min (m->m_len - off, len); 1169 bcopy(cp, off + mtod(m, caddr_t), (unsigned)mlen); 1170 cp += mlen; 1171 len -= mlen; 1172 mlen += off; 1173 off = 0; 1174 totlen += mlen; 1175 if (len == 0) 1176 break; 1177 if (m->m_next == NULL) { 1178 n = m_get(M_DONTWAIT, m->m_type); 1179 if (n == NULL) 1180 break; 1181 n->m_len = min(MLEN, len); 1182 m->m_next = n; 1183 } 1184 m = m->m_next; 1185 } 1186out: if (((m = m0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen)) 1187 m->m_pkthdr.len = totlen; 1188} 1189 1190void 1191m_print(const struct mbuf *m) 1192{ 1193 int len; 1194 const struct mbuf *m2; 1195 1196 len = m->m_pkthdr.len; 1197 m2 = m; 1198 while (len) { 1199 printf("%p %*D\n", m2, m2->m_len, (u_char *)m2->m_data, "-"); 1200 len -= m2->m_len; 1201 m2 = m2->m_next; 1202 } 1203 return; 1204}
| 135 /* 136 * Setup the mb_map, allocate requested VM space. 137 */ 138 mb_map_size = (vm_size_t)(nmbufs * MSIZE + nmbclusters * MCLBYTES + 139 nmbcnt * sizeof(union mext_refcnt)); 140 mb_map_size = rounddown(mb_map_size, PAGE_SIZE); 141 mb_map = kmem_suballoc(kmem_map, (vm_offset_t *)&mbutl, &maxaddr, 142 mb_map_size); 143 /* XXX XXX XXX: mb_map->system_map = 1; */ 144 145 /* 146 * Initialize the free list headers, and setup locks for lists. 147 */ 148 mmbfree.m_head = NULL; 149 mclfree.m_head = NULL; 150 mcntfree.m_head = NULL; 151 mtx_init(&mbuf_mtx, "mbuf free list lock", MTX_DEF); 152 cv_init(&mmbfree.m_starved, "mbuf free list starved cv"); 153 cv_init(&mclfree.m_starved, "mbuf cluster free list starved cv"); 154 155 /* 156 * Initialize mbuf subsystem (sysctl exported) statistics structure. 157 */ 158 mbstat.m_msize = MSIZE; 159 mbstat.m_mclbytes = MCLBYTES; 160 mbstat.m_minclsize = MINCLSIZE; 161 mbstat.m_mlen = MLEN; 162 mbstat.m_mhlen = MHLEN; 163 164 /* 165 * Perform some initial allocations. 166 */ 167 mtx_lock(&mbuf_mtx); 168 if (m_alloc_ref(REF_INIT, M_DONTWAIT) == 0) 169 goto bad; 170 if (m_mballoc(NMB_INIT, M_DONTWAIT) == 0) 171 goto bad; 172 if (m_clalloc(NCL_INIT, M_DONTWAIT) == 0) 173 goto bad; 174 mtx_unlock(&mbuf_mtx); 175 176 return; 177bad: 178 panic("mbinit: failed to initialize mbuf subsystem!"); 179} 180 181/* 182 * Allocate at least nmb reference count structs and place them 183 * on the ref cnt free list. 184 * 185 * Must be called with the mcntfree lock held. 186 */ 187int 188m_alloc_ref(u_int nmb, int how) 189{ 190 caddr_t p; 191 u_int nbytes; 192 int i; 193 194 /* 195 * We don't cap the amount of memory that can be used 196 * by the reference counters, like we do for mbufs and 197 * mbuf clusters. In fact, we're absolutely sure that we 198 * won't ever be going over our allocated space. We keep enough 199 * space in mb_map to accomodate maximum values of allocatable 200 * external buffers including, but not limited to, clusters. 201 * (That's also why we won't have to have wait routines for 202 * counters). 203 * 204 * If we're in here, we're absolutely certain to be returning 205 * succesfully, as long as there is physical memory to accomodate 206 * us. And if there isn't, but we're willing to wait, then 207 * kmem_malloc() will do the only waiting needed. 208 */ 209 210 nbytes = round_page(nmb * sizeof(union mext_refcnt)); 211 if (1 /* XXX: how == M_TRYWAIT */) 212 mtx_unlock(&mbuf_mtx); 213 if ((p = (caddr_t)kmem_malloc(mb_map, nbytes, how == M_TRYWAIT ? 214 M_WAITOK : M_NOWAIT)) == NULL) { 215 if (1 /* XXX: how == M_TRYWAIT */) 216 mtx_lock(&mbuf_mtx); 217 return (0); 218 } 219 nmb = nbytes / sizeof(union mext_refcnt); 220 221 /* 222 * We don't let go of the mutex in order to avoid a race. 223 * It is up to the caller to let go of the mutex. 224 */ 225 if (1 /* XXX: how == M_TRYWAIT */) 226 mtx_lock(&mbuf_mtx); 227 for (i = 0; i < nmb; i++) { 228 ((union mext_refcnt *)p)->next_ref = mcntfree.m_head; 229 mcntfree.m_head = (union mext_refcnt *)p; 230 p += sizeof(union mext_refcnt); 231 mbstat.m_refree++; 232 } 233 mbstat.m_refcnt += nmb; 234 235 return (1); 236} 237 238/* 239 * Allocate at least nmb mbufs and place on mbuf free list. 240 * 241 * Must be called with the mmbfree lock held. 242 */ 243int 244m_mballoc(int nmb, int how) 245{ 246 caddr_t p; 247 int i; 248 int nbytes; 249 250 nbytes = round_page(nmb * MSIZE); 251 nmb = nbytes / MSIZE; 252 253 /* 254 * If we've hit the mbuf limit, stop allocating from mb_map. 255 * Also, once we run out of map space, it will be impossible to 256 * get any more (nothing is ever freed back to the map). 257 */ 258 if (mb_map_full || ((nmb + mbstat.m_mbufs) > nmbufs)) 259 return (0); 260 261 if (1 /* XXX: how == M_TRYWAIT */) 262 mtx_unlock(&mbuf_mtx); 263 p = (caddr_t)kmem_malloc(mb_map, nbytes, how == M_TRYWAIT ? 264 M_WAITOK : M_NOWAIT); 265 if (1 /* XXX: how == M_TRYWAIT */) { 266 mtx_lock(&mbuf_mtx); 267 if (p == NULL) 268 mbstat.m_wait++; 269 } 270 271 /* 272 * Either the map is now full, or `how' is M_DONTWAIT and there 273 * are no pages left. 274 */ 275 if (p == NULL) 276 return (0); 277 278 /* 279 * We don't let go of the mutex in order to avoid a race. 280 * It is up to the caller to let go of the mutex when done 281 * with grabbing the mbuf from the free list. 282 */ 283 for (i = 0; i < nmb; i++) { 284 ((struct mbuf *)p)->m_next = mmbfree.m_head; 285 mmbfree.m_head = (struct mbuf *)p; 286 p += MSIZE; 287 } 288 mbstat.m_mbufs += nmb; 289 mbtypes[MT_FREE] += nmb; 290 return (1); 291} 292 293/* 294 * Once the mb_map has been exhausted and if the call to the allocation macros 295 * (or, in some cases, functions) is with M_TRYWAIT, then it is necessary to 296 * rely solely on reclaimed mbufs. 297 * 298 * Here we request for the protocols to free up some resources and, if we 299 * still cannot get anything, then we wait for an mbuf to be freed for a 300 * designated (mbuf_wait) time, at most. 301 * 302 * Must be called with the mmbfree mutex held. 303 */ 304struct mbuf * 305m_mballoc_wait(void) 306{ 307 struct mbuf *p = NULL; 308 309 /* 310 * See if we can drain some resources out of the protocols. 311 * We drop the mmbfree mutex to avoid recursing into it in some of 312 * the drain routines. Clearly, we're faced with a race here because 313 * once something is freed during the drain, it may be grabbed right 314 * from under us by some other thread. But we accept this possibility 315 * in order to avoid a potentially large lock recursion and, more 316 * importantly, to avoid a potential lock order reversal which may 317 * result in deadlock (See comment above m_reclaim()). 318 */ 319 mtx_unlock(&mbuf_mtx); 320 m_reclaim(); 321 322 mtx_lock(&mbuf_mtx); 323 _MGET(p, M_DONTWAIT); 324 325 if (p == NULL) { 326 int retval; 327 328 m_mballoc_wid++; 329 retval = cv_timedwait(&mmbfree.m_starved, &mbuf_mtx, 330 mbuf_wait); 331 m_mballoc_wid--; 332 333 /* 334 * If we got signaled (i.e. didn't time out), allocate. 335 */ 336 if (retval == 0) 337 _MGET(p, M_DONTWAIT); 338 } 339 340 if (p != NULL) { 341 mbstat.m_wait++; 342 if (mmbfree.m_head != NULL) 343 MBWAKEUP(m_mballoc_wid, &mmbfree.m_starved); 344 } 345 346 return (p); 347} 348 349/* 350 * Allocate some number of mbuf clusters 351 * and place on cluster free list. 352 * 353 * Must be called with the mclfree lock held. 354 */ 355int 356m_clalloc(int ncl, int how) 357{ 358 caddr_t p; 359 int i; 360 int npg_sz; 361 362 npg_sz = round_page(ncl * MCLBYTES); 363 ncl = npg_sz / MCLBYTES; 364 365 /* 366 * If the map is now full (nothing will ever be freed to it). 367 * If we've hit the mcluster number limit, stop allocating from 368 * mb_map. 369 */ 370 if (mb_map_full || ((ncl + mbstat.m_clusters) > nmbclusters)) 371 return (0); 372 373 if (1 /* XXX: how == M_TRYWAIT */) 374 mtx_unlock(&mbuf_mtx); 375 p = (caddr_t)kmem_malloc(mb_map, npg_sz, 376 how == M_TRYWAIT ? M_WAITOK : M_NOWAIT); 377 if (1 /* XXX: how == M_TRYWAIT */) 378 mtx_lock(&mbuf_mtx); 379 380 /* 381 * Either the map is now full, or `how' is M_DONTWAIT and there 382 * are no pages left. 383 */ 384 if (p == NULL) 385 return (0); 386 387 for (i = 0; i < ncl; i++) { 388 ((union mcluster *)p)->mcl_next = mclfree.m_head; 389 mclfree.m_head = (union mcluster *)p; 390 p += MCLBYTES; 391 mbstat.m_clfree++; 392 } 393 mbstat.m_clusters += ncl; 394 return (1); 395} 396 397/* 398 * Once the mb_map submap has been exhausted and the allocation is called with 399 * M_TRYWAIT, we rely on the mclfree list. If nothing is free, we will 400 * block on a cv for a designated amount of time (mbuf_wait) or until we're 401 * signaled due to sudden mcluster availability. 402 * 403 * Must be called with the mclfree lock held. 404 */ 405caddr_t 406m_clalloc_wait(void) 407{ 408 caddr_t p = NULL; 409 int retval; 410 411 m_clalloc_wid++; 412 retval = cv_timedwait(&mclfree.m_starved, &mbuf_mtx, mbuf_wait); 413 m_clalloc_wid--; 414 415 /* 416 * Now that we (think) that we've got something, try again. 417 */ 418 if (retval == 0) 419 _MCLALLOC(p, M_DONTWAIT); 420 421 if (p != NULL) { 422 mbstat.m_wait++; 423 if (mclfree.m_head != NULL) 424 MBWAKEUP(m_clalloc_wid, &mclfree.m_starved); 425 } 426 427 return (p); 428} 429 430/* 431 * m_reclaim: drain protocols in hopes to free up some resources... 432 * 433 * XXX: No locks should be held going in here. The drain routines have 434 * to presently acquire some locks which raises the possibility of lock 435 * order violation if we're holding any mutex if that mutex is acquired in 436 * reverse order relative to one of the locks in the drain routines. 437 */ 438static void 439m_reclaim(void) 440{ 441 struct domain *dp; 442 struct protosw *pr; 443 444#ifdef WITNESS 445 KASSERT(witness_list(curproc) == 0, 446 ("m_reclaim called with locks held")); 447#endif 448 449 for (dp = domains; dp; dp = dp->dom_next) 450 for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) 451 if (pr->pr_drain) 452 (*pr->pr_drain)(); 453 mbstat.m_drain++; 454} 455 456/* 457 * Space allocation routines. 458 * Some of these are also available as macros 459 * for critical paths. 460 */ 461struct mbuf * 462m_get(int how, int type) 463{ 464 struct mbuf *m; 465 466 MGET(m, how, type); 467 return (m); 468} 469 470struct mbuf * 471m_gethdr(int how, int type) 472{ 473 struct mbuf *m; 474 475 MGETHDR(m, how, type); 476 return (m); 477} 478 479struct mbuf * 480m_getclr(int how, int type) 481{ 482 struct mbuf *m; 483 484 MGET(m, how, type); 485 if (m != NULL) 486 bzero(mtod(m, caddr_t), MLEN); 487 return (m); 488} 489 490struct mbuf * 491m_free(struct mbuf *m) 492{ 493 struct mbuf *n; 494 495 MFREE(m, n); 496 return (n); 497} 498 499/* 500 * struct mbuf * 501 * m_getm(m, len, how, type) 502 * 503 * This will allocate len-worth of mbufs and/or mbuf clusters (whatever fits 504 * best) and return a pointer to the top of the allocated chain. If m is 505 * non-null, then we assume that it is a single mbuf or an mbuf chain to 506 * which we want len bytes worth of mbufs and/or clusters attached, and so 507 * if we succeed in allocating it, we will just return a pointer to m. 508 * 509 * If we happen to fail at any point during the allocation, we will free 510 * up everything we have already allocated and return NULL. 511 * 512 */ 513struct mbuf * 514m_getm(struct mbuf *m, int len, int how, int type) 515{ 516 struct mbuf *top, *tail, *mp, *mtail = NULL; 517 518 KASSERT(len >= 0, ("len is < 0 in m_getm")); 519 520 MGET(mp, how, type); 521 if (mp == NULL) 522 return (NULL); 523 else if (len > MINCLSIZE) { 524 MCLGET(mp, how); 525 if ((mp->m_flags & M_EXT) == 0) { 526 m_free(mp); 527 return (NULL); 528 } 529 } 530 mp->m_len = 0; 531 len -= M_TRAILINGSPACE(mp); 532 533 if (m != NULL) 534 for (mtail = m; mtail->m_next != NULL; mtail = mtail->m_next); 535 else 536 m = mp; 537 538 top = tail = mp; 539 while (len > 0) { 540 MGET(mp, how, type); 541 if (mp == NULL) 542 goto failed; 543 544 tail->m_next = mp; 545 tail = mp; 546 if (len > MINCLSIZE) { 547 MCLGET(mp, how); 548 if ((mp->m_flags & M_EXT) == 0) 549 goto failed; 550 } 551 552 mp->m_len = 0; 553 len -= M_TRAILINGSPACE(mp); 554 } 555 556 if (mtail != NULL) 557 mtail->m_next = top; 558 return (m); 559 560failed: 561 m_freem(top); 562 return (NULL); 563} 564 565void 566m_freem(struct mbuf *m) 567{ 568 struct mbuf *n; 569 570 if (m == NULL) 571 return; 572 do { 573 /* 574 * we do need to check non-first mbuf, since some of existing 575 * code does not call M_PREPEND properly. 576 * (example: call to bpf_mtap from drivers) 577 */ 578 if ((m->m_flags & M_PKTHDR) != 0 && m->m_pkthdr.aux) { 579 m_freem(m->m_pkthdr.aux); 580 m->m_pkthdr.aux = NULL; 581 } 582 MFREE(m, n); 583 m = n; 584 } while (m); 585} 586 587/* 588 * Lesser-used path for M_PREPEND: 589 * allocate new mbuf to prepend to chain, 590 * copy junk along. 591 */ 592struct mbuf * 593m_prepend(struct mbuf *m, int len, int how) 594{ 595 struct mbuf *mn; 596 597 MGET(mn, how, m->m_type); 598 if (mn == NULL) { 599 m_freem(m); 600 return (NULL); 601 } 602 if (m->m_flags & M_PKTHDR) { 603 M_COPY_PKTHDR(mn, m); 604 m->m_flags &= ~M_PKTHDR; 605 } 606 mn->m_next = m; 607 m = mn; 608 if (len < MHLEN) 609 MH_ALIGN(m, len); 610 m->m_len = len; 611 return (m); 612} 613 614/* 615 * Make a copy of an mbuf chain starting "off0" bytes from the beginning, 616 * continuing for "len" bytes. If len is M_COPYALL, copy to end of mbuf. 617 * The wait parameter is a choice of M_TRYWAIT/M_DONTWAIT from caller. 618 * Note that the copy is read-only, because clusters are not copied, 619 * only their reference counts are incremented. 620 */ 621struct mbuf * 622m_copym(struct mbuf *m, int off0, int len, int wait) 623{ 624 struct mbuf *n, **np; 625 int off = off0; 626 struct mbuf *top; 627 int copyhdr = 0; 628 629 KASSERT(off >= 0, ("m_copym, negative off %d", off)); 630 KASSERT(len >= 0, ("m_copym, negative len %d", len)); 631 if (off == 0 && m->m_flags & M_PKTHDR) 632 copyhdr = 1; 633 while (off > 0) { 634 KASSERT(m != NULL, ("m_copym, offset > size of mbuf chain")); 635 if (off < m->m_len) 636 break; 637 off -= m->m_len; 638 m = m->m_next; 639 } 640 np = ⊤ 641 top = 0; 642 while (len > 0) { 643 if (m == NULL) { 644 KASSERT(len == M_COPYALL, 645 ("m_copym, length > size of mbuf chain")); 646 break; 647 } 648 MGET(n, wait, m->m_type); 649 *np = n; 650 if (n == NULL) 651 goto nospace; 652 if (copyhdr) { 653 M_COPY_PKTHDR(n, m); 654 if (len == M_COPYALL) 655 n->m_pkthdr.len -= off0; 656 else 657 n->m_pkthdr.len = len; 658 copyhdr = 0; 659 } 660 n->m_len = min(len, m->m_len - off); 661 if (m->m_flags & M_EXT) { 662 n->m_data = m->m_data + off; 663 n->m_ext = m->m_ext; 664 n->m_flags |= M_EXT; 665 MEXT_ADD_REF(m); 666 } else 667 bcopy(mtod(m, caddr_t)+off, mtod(n, caddr_t), 668 (unsigned)n->m_len); 669 if (len != M_COPYALL) 670 len -= n->m_len; 671 off = 0; 672 m = m->m_next; 673 np = &n->m_next; 674 } 675 if (top == NULL) { 676 mtx_lock(&mbuf_mtx); 677 mbstat.m_mcfail++; 678 mtx_unlock(&mbuf_mtx); 679 } 680 return (top); 681nospace: 682 m_freem(top); 683 mtx_lock(&mbuf_mtx); 684 mbstat.m_mcfail++; 685 mtx_unlock(&mbuf_mtx); 686 return (NULL); 687} 688 689/* 690 * Copy an entire packet, including header (which must be present). 691 * An optimization of the common case `m_copym(m, 0, M_COPYALL, how)'. 692 * Note that the copy is read-only, because clusters are not copied, 693 * only their reference counts are incremented. 694 * Preserve alignment of the first mbuf so if the creator has left 695 * some room at the beginning (e.g. for inserting protocol headers) 696 * the copies still have the room available. 697 */ 698struct mbuf * 699m_copypacket(struct mbuf *m, int how) 700{ 701 struct mbuf *top, *n, *o; 702 703 MGET(n, how, m->m_type); 704 top = n; 705 if (n == NULL) 706 goto nospace; 707 708 M_COPY_PKTHDR(n, m); 709 n->m_len = m->m_len; 710 if (m->m_flags & M_EXT) { 711 n->m_data = m->m_data; 712 n->m_ext = m->m_ext; 713 n->m_flags |= M_EXT; 714 MEXT_ADD_REF(m); 715 } else { 716 n->m_data = n->m_pktdat + (m->m_data - m->m_pktdat ); 717 bcopy(mtod(m, char *), mtod(n, char *), n->m_len); 718 } 719 720 m = m->m_next; 721 while (m) { 722 MGET(o, how, m->m_type); 723 if (o == NULL) 724 goto nospace; 725 726 n->m_next = o; 727 n = n->m_next; 728 729 n->m_len = m->m_len; 730 if (m->m_flags & M_EXT) { 731 n->m_data = m->m_data; 732 n->m_ext = m->m_ext; 733 n->m_flags |= M_EXT; 734 MEXT_ADD_REF(m); 735 } else { 736 bcopy(mtod(m, char *), mtod(n, char *), n->m_len); 737 } 738 739 m = m->m_next; 740 } 741 return top; 742nospace: 743 m_freem(top); 744 mtx_lock(&mbuf_mtx); 745 mbstat.m_mcfail++; 746 mtx_unlock(&mbuf_mtx); 747 return (NULL); 748} 749 750/* 751 * Copy data from an mbuf chain starting "off" bytes from the beginning, 752 * continuing for "len" bytes, into the indicated buffer. 753 */ 754void 755m_copydata(struct mbuf *m, int off, int len, caddr_t cp) 756{ 757 unsigned count; 758 759 KASSERT(off >= 0, ("m_copydata, negative off %d", off)); 760 KASSERT(len >= 0, ("m_copydata, negative len %d", len)); 761 while (off > 0) { 762 KASSERT(m != NULL, ("m_copydata, offset > size of mbuf chain")); 763 if (off < m->m_len) 764 break; 765 off -= m->m_len; 766 m = m->m_next; 767 } 768 while (len > 0) { 769 KASSERT(m != NULL, ("m_copydata, length > size of mbuf chain")); 770 count = min(m->m_len - off, len); 771 bcopy(mtod(m, caddr_t) + off, cp, count); 772 len -= count; 773 cp += count; 774 off = 0; 775 m = m->m_next; 776 } 777} 778 779/* 780 * Copy a packet header mbuf chain into a completely new chain, including 781 * copying any mbuf clusters. Use this instead of m_copypacket() when 782 * you need a writable copy of an mbuf chain. 783 */ 784struct mbuf * 785m_dup(struct mbuf *m, int how) 786{ 787 struct mbuf **p, *top = NULL; 788 int remain, moff, nsize; 789 790 /* Sanity check */ 791 if (m == NULL) 792 return (NULL); 793 KASSERT((m->m_flags & M_PKTHDR) != 0, ("%s: !PKTHDR", __FUNCTION__)); 794 795 /* While there's more data, get a new mbuf, tack it on, and fill it */ 796 remain = m->m_pkthdr.len; 797 moff = 0; 798 p = ⊤ 799 while (remain > 0 || top == NULL) { /* allow m->m_pkthdr.len == 0 */ 800 struct mbuf *n; 801 802 /* Get the next new mbuf */ 803 MGET(n, how, m->m_type); 804 if (n == NULL) 805 goto nospace; 806 if (top == NULL) { /* first one, must be PKTHDR */ 807 M_COPY_PKTHDR(n, m); 808 nsize = MHLEN; 809 } else /* not the first one */ 810 nsize = MLEN; 811 if (remain >= MINCLSIZE) { 812 MCLGET(n, how); 813 if ((n->m_flags & M_EXT) == 0) { 814 (void)m_free(n); 815 goto nospace; 816 } 817 nsize = MCLBYTES; 818 } 819 n->m_len = 0; 820 821 /* Link it into the new chain */ 822 *p = n; 823 p = &n->m_next; 824 825 /* Copy data from original mbuf(s) into new mbuf */ 826 while (n->m_len < nsize && m != NULL) { 827 int chunk = min(nsize - n->m_len, m->m_len - moff); 828 829 bcopy(m->m_data + moff, n->m_data + n->m_len, chunk); 830 moff += chunk; 831 n->m_len += chunk; 832 remain -= chunk; 833 if (moff == m->m_len) { 834 m = m->m_next; 835 moff = 0; 836 } 837 } 838 839 /* Check correct total mbuf length */ 840 KASSERT((remain > 0 && m != NULL) || (remain == 0 && m == NULL), 841 ("%s: bogus m_pkthdr.len", __FUNCTION__)); 842 } 843 return (top); 844 845nospace: 846 m_freem(top); 847 mtx_lock(&mbuf_mtx); 848 mbstat.m_mcfail++; 849 mtx_unlock(&mbuf_mtx); 850 return (NULL); 851} 852 853/* 854 * Concatenate mbuf chain n to m. 855 * Both chains must be of the same type (e.g. MT_DATA). 856 * Any m_pkthdr is not updated. 857 */ 858void 859m_cat(struct mbuf *m, struct mbuf *n) 860{ 861 while (m->m_next) 862 m = m->m_next; 863 while (n) { 864 if (m->m_flags & M_EXT || 865 m->m_data + m->m_len + n->m_len >= &m->m_dat[MLEN]) { 866 /* just join the two chains */ 867 m->m_next = n; 868 return; 869 } 870 /* splat the data from one into the other */ 871 bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len, 872 (u_int)n->m_len); 873 m->m_len += n->m_len; 874 n = m_free(n); 875 } 876} 877 878void 879m_adj(struct mbuf *mp, int req_len) 880{ 881 int len = req_len; 882 struct mbuf *m; 883 int count; 884 885 if ((m = mp) == NULL) 886 return; 887 if (len >= 0) { 888 /* 889 * Trim from head. 890 */ 891 while (m != NULL && len > 0) { 892 if (m->m_len <= len) { 893 len -= m->m_len; 894 m->m_len = 0; 895 m = m->m_next; 896 } else { 897 m->m_len -= len; 898 m->m_data += len; 899 len = 0; 900 } 901 } 902 m = mp; 903 if (mp->m_flags & M_PKTHDR) 904 m->m_pkthdr.len -= (req_len - len); 905 } else { 906 /* 907 * Trim from tail. Scan the mbuf chain, 908 * calculating its length and finding the last mbuf. 909 * If the adjustment only affects this mbuf, then just 910 * adjust and return. Otherwise, rescan and truncate 911 * after the remaining size. 912 */ 913 len = -len; 914 count = 0; 915 for (;;) { 916 count += m->m_len; 917 if (m->m_next == (struct mbuf *)0) 918 break; 919 m = m->m_next; 920 } 921 if (m->m_len >= len) { 922 m->m_len -= len; 923 if (mp->m_flags & M_PKTHDR) 924 mp->m_pkthdr.len -= len; 925 return; 926 } 927 count -= len; 928 if (count < 0) 929 count = 0; 930 /* 931 * Correct length for chain is "count". 932 * Find the mbuf with last data, adjust its length, 933 * and toss data from remaining mbufs on chain. 934 */ 935 m = mp; 936 if (m->m_flags & M_PKTHDR) 937 m->m_pkthdr.len = count; 938 for (; m; m = m->m_next) { 939 if (m->m_len >= count) { 940 m->m_len = count; 941 break; 942 } 943 count -= m->m_len; 944 } 945 while (m->m_next) 946 (m = m->m_next) ->m_len = 0; 947 } 948} 949 950/* 951 * Rearange an mbuf chain so that len bytes are contiguous 952 * and in the data area of an mbuf (so that mtod and dtom 953 * will work for a structure of size len). Returns the resulting 954 * mbuf chain on success, frees it and returns null on failure. 955 * If there is room, it will add up to max_protohdr-len extra bytes to the 956 * contiguous region in an attempt to avoid being called next time. 957 */ 958struct mbuf * 959m_pullup(struct mbuf *n, int len) 960{ 961 struct mbuf *m; 962 int count; 963 int space; 964 965 /* 966 * If first mbuf has no cluster, and has room for len bytes 967 * without shifting current data, pullup into it, 968 * otherwise allocate a new mbuf to prepend to the chain. 969 */ 970 if ((n->m_flags & M_EXT) == 0 && 971 n->m_data + len < &n->m_dat[MLEN] && n->m_next) { 972 if (n->m_len >= len) 973 return (n); 974 m = n; 975 n = n->m_next; 976 len -= m->m_len; 977 } else { 978 if (len > MHLEN) 979 goto bad; 980 MGET(m, M_DONTWAIT, n->m_type); 981 if (m == NULL) 982 goto bad; 983 m->m_len = 0; 984 if (n->m_flags & M_PKTHDR) { 985 M_COPY_PKTHDR(m, n); 986 n->m_flags &= ~M_PKTHDR; 987 } 988 } 989 space = &m->m_dat[MLEN] - (m->m_data + m->m_len); 990 do { 991 count = min(min(max(len, max_protohdr), space), n->m_len); 992 bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len, 993 (unsigned)count); 994 len -= count; 995 m->m_len += count; 996 n->m_len -= count; 997 space -= count; 998 if (n->m_len) 999 n->m_data += count; 1000 else 1001 n = m_free(n); 1002 } while (len > 0 && n); 1003 if (len > 0) { 1004 (void) m_free(m); 1005 goto bad; 1006 } 1007 m->m_next = n; 1008 return (m); 1009bad: 1010 m_freem(n); 1011 mtx_lock(&mbuf_mtx); 1012 mbstat.m_mpfail++; 1013 mtx_unlock(&mbuf_mtx); 1014 return (NULL); 1015} 1016 1017/* 1018 * Partition an mbuf chain in two pieces, returning the tail -- 1019 * all but the first len0 bytes. In case of failure, it returns NULL and 1020 * attempts to restore the chain to its original state. 1021 */ 1022struct mbuf * 1023m_split(struct mbuf *m0, int len0, int wait) 1024{ 1025 struct mbuf *m, *n; 1026 unsigned len = len0, remain; 1027 1028 for (m = m0; m && len > m->m_len; m = m->m_next) 1029 len -= m->m_len; 1030 if (m == NULL) 1031 return (NULL); 1032 remain = m->m_len - len; 1033 if (m0->m_flags & M_PKTHDR) { 1034 MGETHDR(n, wait, m0->m_type); 1035 if (n == NULL) 1036 return (NULL); 1037 n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif; 1038 n->m_pkthdr.len = m0->m_pkthdr.len - len0; 1039 m0->m_pkthdr.len = len0; 1040 if (m->m_flags & M_EXT) 1041 goto extpacket; 1042 if (remain > MHLEN) { 1043 /* m can't be the lead packet */ 1044 MH_ALIGN(n, 0); 1045 n->m_next = m_split(m, len, wait); 1046 if (n->m_next == NULL) { 1047 (void) m_free(n); 1048 return (NULL); 1049 } else 1050 return (n); 1051 } else 1052 MH_ALIGN(n, remain); 1053 } else if (remain == 0) { 1054 n = m->m_next; 1055 m->m_next = NULL; 1056 return (n); 1057 } else { 1058 MGET(n, wait, m->m_type); 1059 if (n == NULL) 1060 return (NULL); 1061 M_ALIGN(n, remain); 1062 } 1063extpacket: 1064 if (m->m_flags & M_EXT) { 1065 n->m_flags |= M_EXT; 1066 n->m_ext = m->m_ext; 1067 MEXT_ADD_REF(m); 1068 m->m_ext.ext_size = 0; /* For Accounting XXXXXX danger */ 1069 n->m_data = m->m_data + len; 1070 } else { 1071 bcopy(mtod(m, caddr_t) + len, mtod(n, caddr_t), remain); 1072 } 1073 n->m_len = remain; 1074 m->m_len = len; 1075 n->m_next = m->m_next; 1076 m->m_next = NULL; 1077 return (n); 1078} 1079/* 1080 * Routine to copy from device local memory into mbufs. 1081 */ 1082struct mbuf * 1083m_devget(char *buf, int totlen, int off0, struct ifnet *ifp, 1084 void (*copy)(char *from, caddr_t to, u_int len)) 1085{ 1086 struct mbuf *m; 1087 struct mbuf *top = 0, **mp = ⊤ 1088 int off = off0, len; 1089 char *cp; 1090 char *epkt; 1091 1092 cp = buf; 1093 epkt = cp + totlen; 1094 if (off) { 1095 cp += off + 2 * sizeof(u_short); 1096 totlen -= 2 * sizeof(u_short); 1097 } 1098 MGETHDR(m, M_DONTWAIT, MT_DATA); 1099 if (m == NULL) 1100 return (NULL); 1101 m->m_pkthdr.rcvif = ifp; 1102 m->m_pkthdr.len = totlen; 1103 m->m_len = MHLEN; 1104 1105 while (totlen > 0) { 1106 if (top) { 1107 MGET(m, M_DONTWAIT, MT_DATA); 1108 if (m == NULL) { 1109 m_freem(top); 1110 return (NULL); 1111 } 1112 m->m_len = MLEN; 1113 } 1114 len = min(totlen, epkt - cp); 1115 if (len >= MINCLSIZE) { 1116 MCLGET(m, M_DONTWAIT); 1117 if (m->m_flags & M_EXT) 1118 m->m_len = len = min(len, MCLBYTES); 1119 else 1120 len = m->m_len; 1121 } else { 1122 /* 1123 * Place initial small packet/header at end of mbuf. 1124 */ 1125 if (len < m->m_len) { 1126 if (top == NULL && len + 1127 max_linkhdr <= m->m_len) 1128 m->m_data += max_linkhdr; 1129 m->m_len = len; 1130 } else 1131 len = m->m_len; 1132 } 1133 if (copy) 1134 copy(cp, mtod(m, caddr_t), (unsigned)len); 1135 else 1136 bcopy(cp, mtod(m, caddr_t), (unsigned)len); 1137 cp += len; 1138 *mp = m; 1139 mp = &m->m_next; 1140 totlen -= len; 1141 if (cp == epkt) 1142 cp = buf; 1143 } 1144 return (top); 1145} 1146 1147/* 1148 * Copy data from a buffer back into the indicated mbuf chain, 1149 * starting "off" bytes from the beginning, extending the mbuf 1150 * chain if necessary. 1151 */ 1152void 1153m_copyback(struct mbuf *m0, int off, int len, caddr_t cp) 1154{ 1155 int mlen; 1156 struct mbuf *m = m0, *n; 1157 int totlen = 0; 1158 1159 if (m0 == NULL) 1160 return; 1161 while (off > (mlen = m->m_len)) { 1162 off -= mlen; 1163 totlen += mlen; 1164 if (m->m_next == NULL) { 1165 n = m_getclr(M_DONTWAIT, m->m_type); 1166 if (n == NULL) 1167 goto out; 1168 n->m_len = min(MLEN, len + off); 1169 m->m_next = n; 1170 } 1171 m = m->m_next; 1172 } 1173 while (len > 0) { 1174 mlen = min (m->m_len - off, len); 1175 bcopy(cp, off + mtod(m, caddr_t), (unsigned)mlen); 1176 cp += mlen; 1177 len -= mlen; 1178 mlen += off; 1179 off = 0; 1180 totlen += mlen; 1181 if (len == 0) 1182 break; 1183 if (m->m_next == NULL) { 1184 n = m_get(M_DONTWAIT, m->m_type); 1185 if (n == NULL) 1186 break; 1187 n->m_len = min(MLEN, len); 1188 m->m_next = n; 1189 } 1190 m = m->m_next; 1191 } 1192out: if (((m = m0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen)) 1193 m->m_pkthdr.len = totlen; 1194} 1195 1196void 1197m_print(const struct mbuf *m) 1198{ 1199 int len; 1200 const struct mbuf *m2; 1201 1202 len = m->m_pkthdr.len; 1203 m2 = m; 1204 while (len) { 1205 printf("%p %*D\n", m2, m2->m_len, (u_char *)m2->m_data, "-"); 1206 len -= m2->m_len; 1207 m2 = m2->m_next; 1208 } 1209 return; 1210}
|