uipc_mbuf.c revision 77544
1/* 2 * Copyright (c) 1982, 1986, 1988, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * @(#)uipc_mbuf.c 8.2 (Berkeley) 1/4/94 34 * $FreeBSD: head/sys/kern/uipc_mbuf.c 77544 2001-05-31 21:56:44Z jesper $ 35 */ 36 37#include <sys/param.h> 38#include <sys/systm.h> 39#include <sys/condvar.h> 40#include <sys/kernel.h> 41#include <sys/lock.h> 42#include <sys/malloc.h> 43#include <sys/mbuf.h> 44#include <sys/mutex.h> 45#include <sys/sysctl.h> 46#include <sys/domain.h> 47#include <sys/protosw.h> 48 49#include <vm/vm.h> 50#include <vm/vm_kern.h> 51#include <vm/vm_extern.h> 52 53static void mbinit(void *); 54SYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbinit, NULL) 55 56struct mbuf *mbutl; 57struct mbstat mbstat; 58u_long mbtypes[MT_NTYPES]; 59int max_linkhdr; 60int max_protohdr; 61int max_hdr; 62int max_datalen; 63int nmbclusters; 64int nmbufs; 65int nmbcnt; 66u_long m_mballoc_wid = 0; 67u_long m_clalloc_wid = 0; 68 69/* 70 * freelist header structures... 71 * mbffree_lst, mclfree_lst, mcntfree_lst 72 */ 73struct mbffree_lst mmbfree; 74struct mclfree_lst mclfree; 75struct mcntfree_lst mcntfree; 76struct mtx mbuf_mtx; 77 78/* 79 * sysctl(8) exported objects 80 */ 81SYSCTL_DECL(_kern_ipc); 82SYSCTL_INT(_kern_ipc, KIPC_MAX_LINKHDR, max_linkhdr, CTLFLAG_RW, 83 &max_linkhdr, 0, ""); 84SYSCTL_INT(_kern_ipc, KIPC_MAX_PROTOHDR, max_protohdr, CTLFLAG_RW, 85 &max_protohdr, 0, ""); 86SYSCTL_INT(_kern_ipc, KIPC_MAX_HDR, max_hdr, CTLFLAG_RW, &max_hdr, 0, ""); 87SYSCTL_INT(_kern_ipc, KIPC_MAX_DATALEN, max_datalen, CTLFLAG_RW, 88 &max_datalen, 0, ""); 89SYSCTL_INT(_kern_ipc, OID_AUTO, mbuf_wait, CTLFLAG_RW, 90 &mbuf_wait, 0, ""); 91SYSCTL_STRUCT(_kern_ipc, KIPC_MBSTAT, mbstat, CTLFLAG_RD, &mbstat, mbstat, ""); 92SYSCTL_OPAQUE(_kern_ipc, OID_AUTO, mbtypes, CTLFLAG_RD, mbtypes, 93 sizeof(mbtypes), "LU", ""); 94SYSCTL_INT(_kern_ipc, KIPC_NMBCLUSTERS, nmbclusters, CTLFLAG_RD, 95 &nmbclusters, 0, "Maximum number of mbuf clusters available"); 96SYSCTL_INT(_kern_ipc, OID_AUTO, nmbufs, CTLFLAG_RD, &nmbufs, 0, 97 "Maximum number of mbufs available"); 98SYSCTL_INT(_kern_ipc, OID_AUTO, nmbcnt, CTLFLAG_RD, &nmbcnt, 0, 99 "Maximum number of ext_buf counters available"); 100 101TUNABLE_INT_DECL("kern.ipc.nmbclusters", NMBCLUSTERS, nmbclusters); 102TUNABLE_INT_DECL("kern.ipc.nmbufs", NMBCLUSTERS * 4, nmbufs); 103TUNABLE_INT_DECL("kern.ipc.nmbcnt", EXT_COUNTERS, nmbcnt); 104 105static void m_reclaim(void); 106 107/* Initial allocation numbers */ 108#define NCL_INIT 2 109#define NMB_INIT 16 110#define REF_INIT NMBCLUSTERS 111 112/* 113 * Full mbuf subsystem initialization done here. 114 * 115 * XXX: If ever we have system specific map setups to do, then move them to 116 * machdep.c - for now, there is no reason for this stuff to go there. 117 */ 118static void 119mbinit(void *dummy) 120{ 121 vm_offset_t maxaddr; 122 vm_size_t mb_map_size; 123 124 /* 125 * Setup the mb_map, allocate requested VM space. 126 */ 127 mb_map_size = (vm_size_t)(nmbufs * MSIZE + nmbclusters * MCLBYTES + 128 nmbcnt * sizeof(union mext_refcnt)); 129 mb_map_size = rounddown(mb_map_size, PAGE_SIZE); 130 mb_map = kmem_suballoc(kmem_map, (vm_offset_t *)&mbutl, &maxaddr, 131 mb_map_size); 132 /* XXX XXX XXX: mb_map->system_map = 1; */ 133 134 /* 135 * Initialize the free list headers, and setup locks for lists. 136 */ 137 mmbfree.m_head = NULL; 138 mclfree.m_head = NULL; 139 mcntfree.m_head = NULL; 140 mtx_init(&mbuf_mtx, "mbuf free list lock", MTX_DEF); 141 cv_init(&mmbfree.m_starved, "mbuf free list starved cv"); 142 cv_init(&mclfree.m_starved, "mbuf cluster free list starved cv"); 143 144 /* 145 * Initialize mbuf subsystem (sysctl exported) statistics structure. 146 */ 147 mbstat.m_msize = MSIZE; 148 mbstat.m_mclbytes = MCLBYTES; 149 mbstat.m_minclsize = MINCLSIZE; 150 mbstat.m_mlen = MLEN; 151 mbstat.m_mhlen = MHLEN; 152 153 /* 154 * Perform some initial allocations. 155 */ 156 mtx_lock(&mbuf_mtx); 157 if (m_alloc_ref(REF_INIT, M_DONTWAIT) == 0) 158 goto bad; 159 if (m_mballoc(NMB_INIT, M_DONTWAIT) == 0) 160 goto bad; 161 if (m_clalloc(NCL_INIT, M_DONTWAIT) == 0) 162 goto bad; 163 mtx_unlock(&mbuf_mtx); 164 165 return; 166bad: 167 panic("mbinit: failed to initialize mbuf subsystem!"); 168} 169 170/* 171 * Allocate at least nmb reference count structs and place them 172 * on the ref cnt free list. 173 * 174 * Must be called with the mcntfree lock held. 175 */ 176int 177m_alloc_ref(u_int nmb, int how) 178{ 179 caddr_t p; 180 u_int nbytes; 181 int i; 182 183 /* 184 * We don't cap the amount of memory that can be used 185 * by the reference counters, like we do for mbufs and 186 * mbuf clusters. In fact, we're absolutely sure that we 187 * won't ever be going over our allocated space. We keep enough 188 * space in mb_map to accomodate maximum values of allocatable 189 * external buffers including, but not limited to, clusters. 190 * (That's also why we won't have to have wait routines for 191 * counters). 192 * 193 * If we're in here, we're absolutely certain to be returning 194 * succesfully, as long as there is physical memory to accomodate 195 * us. And if there isn't, but we're willing to wait, then 196 * kmem_malloc() will do the only waiting needed. 197 */ 198 199 nbytes = round_page(nmb * sizeof(union mext_refcnt)); 200 if (1 /* XXX: how == M_TRYWAIT */) 201 mtx_unlock(&mbuf_mtx); 202 if ((p = (caddr_t)kmem_malloc(mb_map, nbytes, how == M_TRYWAIT ? 203 M_WAITOK : M_NOWAIT)) == NULL) { 204 if (1 /* XXX: how == M_TRYWAIT */) 205 mtx_lock(&mbuf_mtx); 206 return (0); 207 } 208 nmb = nbytes / sizeof(union mext_refcnt); 209 210 /* 211 * We don't let go of the mutex in order to avoid a race. 212 * It is up to the caller to let go of the mutex. 213 */ 214 if (1 /* XXX: how == M_TRYWAIT */) 215 mtx_lock(&mbuf_mtx); 216 for (i = 0; i < nmb; i++) { 217 ((union mext_refcnt *)p)->next_ref = mcntfree.m_head; 218 mcntfree.m_head = (union mext_refcnt *)p; 219 p += sizeof(union mext_refcnt); 220 mbstat.m_refree++; 221 } 222 mbstat.m_refcnt += nmb; 223 224 return (1); 225} 226 227/* 228 * Allocate at least nmb mbufs and place on mbuf free list. 229 * 230 * Must be called with the mmbfree lock held. 231 */ 232int 233m_mballoc(int nmb, int how) 234{ 235 caddr_t p; 236 int i; 237 int nbytes; 238 239 nbytes = round_page(nmb * MSIZE); 240 nmb = nbytes / MSIZE; 241 242 /* 243 * If we've hit the mbuf limit, stop allocating from mb_map. 244 * Also, once we run out of map space, it will be impossible to 245 * get any more (nothing is ever freed back to the map). 246 */ 247 if (mb_map_full || ((nmb + mbstat.m_mbufs) > nmbufs)) 248 return (0); 249 250 if (1 /* XXX: how == M_TRYWAIT */) 251 mtx_unlock(&mbuf_mtx); 252 p = (caddr_t)kmem_malloc(mb_map, nbytes, how == M_TRYWAIT ? 253 M_WAITOK : M_NOWAIT); 254 if (1 /* XXX: how == M_TRYWAIT */) { 255 mtx_lock(&mbuf_mtx); 256 if (p == NULL) 257 mbstat.m_wait++; 258 } 259 260 /* 261 * Either the map is now full, or `how' is M_DONTWAIT and there 262 * are no pages left. 263 */ 264 if (p == NULL) 265 return (0); 266 267 /* 268 * We don't let go of the mutex in order to avoid a race. 269 * It is up to the caller to let go of the mutex when done 270 * with grabbing the mbuf from the free list. 271 */ 272 for (i = 0; i < nmb; i++) { 273 ((struct mbuf *)p)->m_next = mmbfree.m_head; 274 mmbfree.m_head = (struct mbuf *)p; 275 p += MSIZE; 276 } 277 mbstat.m_mbufs += nmb; 278 mbtypes[MT_FREE] += nmb; 279 return (1); 280} 281 282/* 283 * Once the mb_map has been exhausted and if the call to the allocation macros 284 * (or, in some cases, functions) is with M_TRYWAIT, then it is necessary to 285 * rely solely on reclaimed mbufs. 286 * 287 * Here we request for the protocols to free up some resources and, if we 288 * still cannot get anything, then we wait for an mbuf to be freed for a 289 * designated (mbuf_wait) time, at most. 290 * 291 * Must be called with the mmbfree mutex held. 292 */ 293struct mbuf * 294m_mballoc_wait(void) 295{ 296 struct mbuf *p = NULL; 297 298 /* 299 * See if we can drain some resources out of the protocols. 300 * We drop the mmbfree mutex to avoid recursing into it in some of 301 * the drain routines. Clearly, we're faced with a race here because 302 * once something is freed during the drain, it may be grabbed right 303 * from under us by some other thread. But we accept this possibility 304 * in order to avoid a potentially large lock recursion and, more 305 * importantly, to avoid a potential lock order reversal which may 306 * result in deadlock (See comment above m_reclaim()). 307 */ 308 mtx_unlock(&mbuf_mtx); 309 m_reclaim(); 310 311 mtx_lock(&mbuf_mtx); 312 _MGET(p, M_DONTWAIT); 313 314 if (p == NULL) { 315 int retval; 316 317 m_mballoc_wid++; 318 retval = cv_timedwait(&mmbfree.m_starved, &mbuf_mtx, 319 mbuf_wait); 320 m_mballoc_wid--; 321 322 /* 323 * If we got signaled (i.e. didn't time out), allocate. 324 */ 325 if (retval == 0) 326 _MGET(p, M_DONTWAIT); 327 } 328 329 if (p != NULL) { 330 mbstat.m_wait++; 331 if (mmbfree.m_head != NULL) 332 MBWAKEUP(m_mballoc_wid, &mmbfree.m_starved); 333 } 334 335 return (p); 336} 337 338/* 339 * Allocate some number of mbuf clusters 340 * and place on cluster free list. 341 * 342 * Must be called with the mclfree lock held. 343 */ 344int 345m_clalloc(int ncl, int how) 346{ 347 caddr_t p; 348 int i; 349 int npg_sz; 350 351 npg_sz = round_page(ncl * MCLBYTES); 352 ncl = npg_sz / MCLBYTES; 353 354 /* 355 * If the map is now full (nothing will ever be freed to it). 356 * If we've hit the mcluster number limit, stop allocating from 357 * mb_map. 358 */ 359 if (mb_map_full || ((ncl + mbstat.m_clusters) > nmbclusters)) 360 return (0); 361 362 if (1 /* XXX: how == M_TRYWAIT */) 363 mtx_unlock(&mbuf_mtx); 364 p = (caddr_t)kmem_malloc(mb_map, npg_sz, 365 how == M_TRYWAIT ? M_WAITOK : M_NOWAIT); 366 if (1 /* XXX: how == M_TRYWAIT */) 367 mtx_lock(&mbuf_mtx); 368 369 /* 370 * Either the map is now full, or `how' is M_DONTWAIT and there 371 * are no pages left. 372 */ 373 if (p == NULL) 374 return (0); 375 376 for (i = 0; i < ncl; i++) { 377 ((union mcluster *)p)->mcl_next = mclfree.m_head; 378 mclfree.m_head = (union mcluster *)p; 379 p += MCLBYTES; 380 mbstat.m_clfree++; 381 } 382 mbstat.m_clusters += ncl; 383 return (1); 384} 385 386/* 387 * Once the mb_map submap has been exhausted and the allocation is called with 388 * M_TRYWAIT, we rely on the mclfree list. If nothing is free, we will 389 * block on a cv for a designated amount of time (mbuf_wait) or until we're 390 * signaled due to sudden mcluster availability. 391 * 392 * Must be called with the mclfree lock held. 393 */ 394caddr_t 395m_clalloc_wait(void) 396{ 397 caddr_t p = NULL; 398 int retval; 399 400 m_clalloc_wid++; 401 retval = cv_timedwait(&mclfree.m_starved, &mbuf_mtx, mbuf_wait); 402 m_clalloc_wid--; 403 404 /* 405 * Now that we (think) that we've got something, try again. 406 */ 407 if (retval == 0) 408 _MCLALLOC(p, M_DONTWAIT); 409 410 if (p != NULL) { 411 mbstat.m_wait++; 412 if (mclfree.m_head != NULL) 413 MBWAKEUP(m_clalloc_wid, &mclfree.m_starved); 414 } 415 416 return (p); 417} 418 419/* 420 * m_reclaim: drain protocols in hopes to free up some resources... 421 * 422 * XXX: No locks should be held going in here. The drain routines have 423 * to presently acquire some locks which raises the possibility of lock 424 * order violation if we're holding any mutex if that mutex is acquired in 425 * reverse order relative to one of the locks in the drain routines. 426 */ 427static void 428m_reclaim(void) 429{ 430 struct domain *dp; 431 struct protosw *pr; 432 433#ifdef WITNESS 434 KASSERT(witness_list(curproc) == 0, 435 ("m_reclaim called with locks held")); 436#endif 437 438 for (dp = domains; dp; dp = dp->dom_next) 439 for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) 440 if (pr->pr_drain) 441 (*pr->pr_drain)(); 442 mbstat.m_drain++; 443} 444 445/* 446 * Space allocation routines. 447 * Some of these are also available as macros 448 * for critical paths. 449 */ 450struct mbuf * 451m_get(int how, int type) 452{ 453 struct mbuf *m; 454 455 MGET(m, how, type); 456 return (m); 457} 458 459struct mbuf * 460m_gethdr(int how, int type) 461{ 462 struct mbuf *m; 463 464 MGETHDR(m, how, type); 465 return (m); 466} 467 468struct mbuf * 469m_getclr(int how, int type) 470{ 471 struct mbuf *m; 472 473 MGET(m, how, type); 474 if (m != NULL) 475 bzero(mtod(m, caddr_t), MLEN); 476 return (m); 477} 478 479struct mbuf * 480m_free(struct mbuf *m) 481{ 482 struct mbuf *n; 483 484 MFREE(m, n); 485 return (n); 486} 487 488/* 489 * struct mbuf * 490 * m_getm(m, len, how, type) 491 * 492 * This will allocate len-worth of mbufs and/or mbuf clusters (whatever fits 493 * best) and return a pointer to the top of the allocated chain. If m is 494 * non-null, then we assume that it is a single mbuf or an mbuf chain to 495 * which we want len bytes worth of mbufs and/or clusters attached, and so 496 * if we succeed in allocating it, we will just return a pointer to m. 497 * 498 * If we happen to fail at any point during the allocation, we will free 499 * up everything we have already allocated and return NULL. 500 * 501 */ 502struct mbuf * 503m_getm(struct mbuf *m, int len, int how, int type) 504{ 505 struct mbuf *top, *tail, *mp, *mtail = NULL; 506 507 KASSERT(len >= 0, ("len is < 0 in m_getm")); 508 509 MGET(mp, how, type); 510 if (mp == NULL) 511 return (NULL); 512 else if (len > MINCLSIZE) { 513 MCLGET(mp, how); 514 if ((mp->m_flags & M_EXT) == 0) { 515 m_free(mp); 516 return (NULL); 517 } 518 } 519 mp->m_len = 0; 520 len -= M_TRAILINGSPACE(mp); 521 522 if (m != NULL) 523 for (mtail = m; mtail->m_next != NULL; mtail = mtail->m_next); 524 else 525 m = mp; 526 527 top = tail = mp; 528 while (len > 0) { 529 MGET(mp, how, type); 530 if (mp == NULL) 531 goto failed; 532 533 tail->m_next = mp; 534 tail = mp; 535 if (len > MINCLSIZE) { 536 MCLGET(mp, how); 537 if ((mp->m_flags & M_EXT) == 0) 538 goto failed; 539 } 540 541 mp->m_len = 0; 542 len -= M_TRAILINGSPACE(mp); 543 } 544 545 if (mtail != NULL) 546 mtail->m_next = top; 547 return (m); 548 549failed: 550 m_freem(top); 551 return (NULL); 552} 553 554void 555m_freem(struct mbuf *m) 556{ 557 struct mbuf *n; 558 559 if (m == NULL) 560 return; 561 do { 562 /* 563 * we do need to check non-first mbuf, since some of existing 564 * code does not call M_PREPEND properly. 565 * (example: call to bpf_mtap from drivers) 566 */ 567 if ((m->m_flags & M_PKTHDR) != 0 && m->m_pkthdr.aux) { 568 m_freem(m->m_pkthdr.aux); 569 m->m_pkthdr.aux = NULL; 570 } 571 MFREE(m, n); 572 m = n; 573 } while (m); 574} 575 576/* 577 * Lesser-used path for M_PREPEND: 578 * allocate new mbuf to prepend to chain, 579 * copy junk along. 580 */ 581struct mbuf * 582m_prepend(struct mbuf *m, int len, int how) 583{ 584 struct mbuf *mn; 585 586 MGET(mn, how, m->m_type); 587 if (mn == NULL) { 588 m_freem(m); 589 return (NULL); 590 } 591 if (m->m_flags & M_PKTHDR) { 592 M_COPY_PKTHDR(mn, m); 593 m->m_flags &= ~M_PKTHDR; 594 } 595 mn->m_next = m; 596 m = mn; 597 if (len < MHLEN) 598 MH_ALIGN(m, len); 599 m->m_len = len; 600 return (m); 601} 602 603/* 604 * Make a copy of an mbuf chain starting "off0" bytes from the beginning, 605 * continuing for "len" bytes. If len is M_COPYALL, copy to end of mbuf. 606 * The wait parameter is a choice of M_TRYWAIT/M_DONTWAIT from caller. 607 * Note that the copy is read-only, because clusters are not copied, 608 * only their reference counts are incremented. 609 */ 610struct mbuf * 611m_copym(struct mbuf *m, int off0, int len, int wait) 612{ 613 struct mbuf *n, **np; 614 int off = off0; 615 struct mbuf *top; 616 int copyhdr = 0; 617 618 KASSERT(off >= 0, ("m_copym, negative off %d", off)); 619 KASSERT(len >= 0, ("m_copym, negative len %d", len)); 620 if (off == 0 && m->m_flags & M_PKTHDR) 621 copyhdr = 1; 622 while (off > 0) { 623 KASSERT(m != NULL, ("m_copym, offset > size of mbuf chain")); 624 if (off < m->m_len) 625 break; 626 off -= m->m_len; 627 m = m->m_next; 628 } 629 np = ⊤ 630 top = 0; 631 while (len > 0) { 632 if (m == NULL) { 633 KASSERT(len == M_COPYALL, 634 ("m_copym, length > size of mbuf chain")); 635 break; 636 } 637 MGET(n, wait, m->m_type); 638 *np = n; 639 if (n == NULL) 640 goto nospace; 641 if (copyhdr) { 642 M_COPY_PKTHDR(n, m); 643 if (len == M_COPYALL) 644 n->m_pkthdr.len -= off0; 645 else 646 n->m_pkthdr.len = len; 647 copyhdr = 0; 648 } 649 n->m_len = min(len, m->m_len - off); 650 if (m->m_flags & M_EXT) { 651 n->m_data = m->m_data + off; 652 n->m_ext = m->m_ext; 653 n->m_flags |= M_EXT; 654 MEXT_ADD_REF(m); 655 } else 656 bcopy(mtod(m, caddr_t)+off, mtod(n, caddr_t), 657 (unsigned)n->m_len); 658 if (len != M_COPYALL) 659 len -= n->m_len; 660 off = 0; 661 m = m->m_next; 662 np = &n->m_next; 663 } 664 if (top == NULL) { 665 mtx_lock(&mbuf_mtx); 666 mbstat.m_mcfail++; 667 mtx_unlock(&mbuf_mtx); 668 } 669 return (top); 670nospace: 671 m_freem(top); 672 mtx_lock(&mbuf_mtx); 673 mbstat.m_mcfail++; 674 mtx_unlock(&mbuf_mtx); 675 return (NULL); 676} 677 678/* 679 * Copy an entire packet, including header (which must be present). 680 * An optimization of the common case `m_copym(m, 0, M_COPYALL, how)'. 681 * Note that the copy is read-only, because clusters are not copied, 682 * only their reference counts are incremented. 683 * Preserve alignment of the first mbuf so if the creator has left 684 * some room at the beginning (e.g. for inserting protocol headers) 685 * the copies still have the room available. 686 */ 687struct mbuf * 688m_copypacket(struct mbuf *m, int how) 689{ 690 struct mbuf *top, *n, *o; 691 692 MGET(n, how, m->m_type); 693 top = n; 694 if (n == NULL) 695 goto nospace; 696 697 M_COPY_PKTHDR(n, m); 698 n->m_len = m->m_len; 699 if (m->m_flags & M_EXT) { 700 n->m_data = m->m_data; 701 n->m_ext = m->m_ext; 702 n->m_flags |= M_EXT; 703 MEXT_ADD_REF(m); 704 } else { 705 n->m_data = n->m_pktdat + (m->m_data - m->m_pktdat ); 706 bcopy(mtod(m, char *), mtod(n, char *), n->m_len); 707 } 708 709 m = m->m_next; 710 while (m) { 711 MGET(o, how, m->m_type); 712 if (o == NULL) 713 goto nospace; 714 715 n->m_next = o; 716 n = n->m_next; 717 718 n->m_len = m->m_len; 719 if (m->m_flags & M_EXT) { 720 n->m_data = m->m_data; 721 n->m_ext = m->m_ext; 722 n->m_flags |= M_EXT; 723 MEXT_ADD_REF(m); 724 } else { 725 bcopy(mtod(m, char *), mtod(n, char *), n->m_len); 726 } 727 728 m = m->m_next; 729 } 730 return top; 731nospace: 732 m_freem(top); 733 mtx_lock(&mbuf_mtx); 734 mbstat.m_mcfail++; 735 mtx_unlock(&mbuf_mtx); 736 return (NULL); 737} 738 739/* 740 * Copy data from an mbuf chain starting "off" bytes from the beginning, 741 * continuing for "len" bytes, into the indicated buffer. 742 */ 743void 744m_copydata(struct mbuf *m, int off, int len, caddr_t cp) 745{ 746 unsigned count; 747 748 KASSERT(off >= 0, ("m_copydata, negative off %d", off)); 749 KASSERT(len >= 0, ("m_copydata, negative len %d", len)); 750 while (off > 0) { 751 KASSERT(m != NULL, ("m_copydata, offset > size of mbuf chain")); 752 if (off < m->m_len) 753 break; 754 off -= m->m_len; 755 m = m->m_next; 756 } 757 while (len > 0) { 758 KASSERT(m != NULL, ("m_copydata, length > size of mbuf chain")); 759 count = min(m->m_len - off, len); 760 bcopy(mtod(m, caddr_t) + off, cp, count); 761 len -= count; 762 cp += count; 763 off = 0; 764 m = m->m_next; 765 } 766} 767 768/* 769 * Copy a packet header mbuf chain into a completely new chain, including 770 * copying any mbuf clusters. Use this instead of m_copypacket() when 771 * you need a writable copy of an mbuf chain. 772 */ 773struct mbuf * 774m_dup(struct mbuf *m, int how) 775{ 776 struct mbuf **p, *top = NULL; 777 int remain, moff, nsize; 778 779 /* Sanity check */ 780 if (m == NULL) 781 return (NULL); 782 KASSERT((m->m_flags & M_PKTHDR) != 0, ("%s: !PKTHDR", __FUNCTION__)); 783 784 /* While there's more data, get a new mbuf, tack it on, and fill it */ 785 remain = m->m_pkthdr.len; 786 moff = 0; 787 p = ⊤ 788 while (remain > 0 || top == NULL) { /* allow m->m_pkthdr.len == 0 */ 789 struct mbuf *n; 790 791 /* Get the next new mbuf */ 792 MGET(n, how, m->m_type); 793 if (n == NULL) 794 goto nospace; 795 if (top == NULL) { /* first one, must be PKTHDR */ 796 M_COPY_PKTHDR(n, m); 797 nsize = MHLEN; 798 } else /* not the first one */ 799 nsize = MLEN; 800 if (remain >= MINCLSIZE) { 801 MCLGET(n, how); 802 if ((n->m_flags & M_EXT) == 0) { 803 (void)m_free(n); 804 goto nospace; 805 } 806 nsize = MCLBYTES; 807 } 808 n->m_len = 0; 809 810 /* Link it into the new chain */ 811 *p = n; 812 p = &n->m_next; 813 814 /* Copy data from original mbuf(s) into new mbuf */ 815 while (n->m_len < nsize && m != NULL) { 816 int chunk = min(nsize - n->m_len, m->m_len - moff); 817 818 bcopy(m->m_data + moff, n->m_data + n->m_len, chunk); 819 moff += chunk; 820 n->m_len += chunk; 821 remain -= chunk; 822 if (moff == m->m_len) { 823 m = m->m_next; 824 moff = 0; 825 } 826 } 827 828 /* Check correct total mbuf length */ 829 KASSERT((remain > 0 && m != NULL) || (remain == 0 && m == NULL), 830 ("%s: bogus m_pkthdr.len", __FUNCTION__)); 831 } 832 return (top); 833 834nospace: 835 m_freem(top); 836 mtx_lock(&mbuf_mtx); 837 mbstat.m_mcfail++; 838 mtx_unlock(&mbuf_mtx); 839 return (NULL); 840} 841 842/* 843 * Concatenate mbuf chain n to m. 844 * Both chains must be of the same type (e.g. MT_DATA). 845 * Any m_pkthdr is not updated. 846 */ 847void 848m_cat(struct mbuf *m, struct mbuf *n) 849{ 850 while (m->m_next) 851 m = m->m_next; 852 while (n) { 853 if (m->m_flags & M_EXT || 854 m->m_data + m->m_len + n->m_len >= &m->m_dat[MLEN]) { 855 /* just join the two chains */ 856 m->m_next = n; 857 return; 858 } 859 /* splat the data from one into the other */ 860 bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len, 861 (u_int)n->m_len); 862 m->m_len += n->m_len; 863 n = m_free(n); 864 } 865} 866 867void 868m_adj(struct mbuf *mp, int req_len) 869{ 870 int len = req_len; 871 struct mbuf *m; 872 int count; 873 874 if ((m = mp) == NULL) 875 return; 876 if (len >= 0) { 877 /* 878 * Trim from head. 879 */ 880 while (m != NULL && len > 0) { 881 if (m->m_len <= len) { 882 len -= m->m_len; 883 m->m_len = 0; 884 m = m->m_next; 885 } else { 886 m->m_len -= len; 887 m->m_data += len; 888 len = 0; 889 } 890 } 891 m = mp; 892 if (mp->m_flags & M_PKTHDR) 893 m->m_pkthdr.len -= (req_len - len); 894 } else { 895 /* 896 * Trim from tail. Scan the mbuf chain, 897 * calculating its length and finding the last mbuf. 898 * If the adjustment only affects this mbuf, then just 899 * adjust and return. Otherwise, rescan and truncate 900 * after the remaining size. 901 */ 902 len = -len; 903 count = 0; 904 for (;;) { 905 count += m->m_len; 906 if (m->m_next == (struct mbuf *)0) 907 break; 908 m = m->m_next; 909 } 910 if (m->m_len >= len) { 911 m->m_len -= len; 912 if (mp->m_flags & M_PKTHDR) 913 mp->m_pkthdr.len -= len; 914 return; 915 } 916 count -= len; 917 if (count < 0) 918 count = 0; 919 /* 920 * Correct length for chain is "count". 921 * Find the mbuf with last data, adjust its length, 922 * and toss data from remaining mbufs on chain. 923 */ 924 m = mp; 925 if (m->m_flags & M_PKTHDR) 926 m->m_pkthdr.len = count; 927 for (; m; m = m->m_next) { 928 if (m->m_len >= count) { 929 m->m_len = count; 930 break; 931 } 932 count -= m->m_len; 933 } 934 while (m->m_next) 935 (m = m->m_next) ->m_len = 0; 936 } 937} 938 939/* 940 * Rearange an mbuf chain so that len bytes are contiguous 941 * and in the data area of an mbuf (so that mtod and dtom 942 * will work for a structure of size len). Returns the resulting 943 * mbuf chain on success, frees it and returns null on failure. 944 * If there is room, it will add up to max_protohdr-len extra bytes to the 945 * contiguous region in an attempt to avoid being called next time. 946 */ 947struct mbuf * 948m_pullup(struct mbuf *n, int len) 949{ 950 struct mbuf *m; 951 int count; 952 int space; 953 954 /* 955 * If first mbuf has no cluster, and has room for len bytes 956 * without shifting current data, pullup into it, 957 * otherwise allocate a new mbuf to prepend to the chain. 958 */ 959 if ((n->m_flags & M_EXT) == 0 && 960 n->m_data + len < &n->m_dat[MLEN] && n->m_next) { 961 if (n->m_len >= len) 962 return (n); 963 m = n; 964 n = n->m_next; 965 len -= m->m_len; 966 } else { 967 if (len > MHLEN) 968 goto bad; 969 MGET(m, M_DONTWAIT, n->m_type); 970 if (m == NULL) 971 goto bad; 972 m->m_len = 0; 973 if (n->m_flags & M_PKTHDR) { 974 M_COPY_PKTHDR(m, n); 975 n->m_flags &= ~M_PKTHDR; 976 } 977 } 978 space = &m->m_dat[MLEN] - (m->m_data + m->m_len); 979 do { 980 count = min(min(max(len, max_protohdr), space), n->m_len); 981 bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len, 982 (unsigned)count); 983 len -= count; 984 m->m_len += count; 985 n->m_len -= count; 986 space -= count; 987 if (n->m_len) 988 n->m_data += count; 989 else 990 n = m_free(n); 991 } while (len > 0 && n); 992 if (len > 0) { 993 (void) m_free(m); 994 goto bad; 995 } 996 m->m_next = n; 997 return (m); 998bad: 999 m_freem(n); 1000 mtx_lock(&mbuf_mtx); 1001 mbstat.m_mpfail++; 1002 mtx_unlock(&mbuf_mtx); 1003 return (NULL); 1004} 1005 1006/* 1007 * Partition an mbuf chain in two pieces, returning the tail -- 1008 * all but the first len0 bytes. In case of failure, it returns NULL and 1009 * attempts to restore the chain to its original state. 1010 */ 1011struct mbuf * 1012m_split(struct mbuf *m0, int len0, int wait) 1013{ 1014 struct mbuf *m, *n; 1015 unsigned len = len0, remain; 1016 1017 for (m = m0; m && len > m->m_len; m = m->m_next) 1018 len -= m->m_len; 1019 if (m == NULL) 1020 return (NULL); 1021 remain = m->m_len - len; 1022 if (m0->m_flags & M_PKTHDR) { 1023 MGETHDR(n, wait, m0->m_type); 1024 if (n == NULL) 1025 return (NULL); 1026 n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif; 1027 n->m_pkthdr.len = m0->m_pkthdr.len - len0; 1028 m0->m_pkthdr.len = len0; 1029 if (m->m_flags & M_EXT) 1030 goto extpacket; 1031 if (remain > MHLEN) { 1032 /* m can't be the lead packet */ 1033 MH_ALIGN(n, 0); 1034 n->m_next = m_split(m, len, wait); 1035 if (n->m_next == NULL) { 1036 (void) m_free(n); 1037 return (NULL); 1038 } else 1039 return (n); 1040 } else 1041 MH_ALIGN(n, remain); 1042 } else if (remain == 0) { 1043 n = m->m_next; 1044 m->m_next = NULL; 1045 return (n); 1046 } else { 1047 MGET(n, wait, m->m_type); 1048 if (n == NULL) 1049 return (NULL); 1050 M_ALIGN(n, remain); 1051 } 1052extpacket: 1053 if (m->m_flags & M_EXT) { 1054 n->m_flags |= M_EXT; 1055 n->m_ext = m->m_ext; 1056 MEXT_ADD_REF(m); 1057 m->m_ext.ext_size = 0; /* For Accounting XXXXXX danger */ 1058 n->m_data = m->m_data + len; 1059 } else { 1060 bcopy(mtod(m, caddr_t) + len, mtod(n, caddr_t), remain); 1061 } 1062 n->m_len = remain; 1063 m->m_len = len; 1064 n->m_next = m->m_next; 1065 m->m_next = NULL; 1066 return (n); 1067} 1068/* 1069 * Routine to copy from device local memory into mbufs. 1070 */ 1071struct mbuf * 1072m_devget(char *buf, int totlen, int off0, struct ifnet *ifp, 1073 void (*copy)(char *from, caddr_t to, u_int len)) 1074{ 1075 struct mbuf *m; 1076 struct mbuf *top = 0, **mp = ⊤ 1077 int off = off0, len; 1078 char *cp; 1079 char *epkt; 1080 1081 cp = buf; 1082 epkt = cp + totlen; 1083 if (off) { 1084 cp += off + 2 * sizeof(u_short); 1085 totlen -= 2 * sizeof(u_short); 1086 } 1087 MGETHDR(m, M_DONTWAIT, MT_DATA); 1088 if (m == NULL) 1089 return (NULL); 1090 m->m_pkthdr.rcvif = ifp; 1091 m->m_pkthdr.len = totlen; 1092 m->m_len = MHLEN; 1093 1094 while (totlen > 0) { 1095 if (top) { 1096 MGET(m, M_DONTWAIT, MT_DATA); 1097 if (m == NULL) { 1098 m_freem(top); 1099 return (NULL); 1100 } 1101 m->m_len = MLEN; 1102 } 1103 len = min(totlen, epkt - cp); 1104 if (len >= MINCLSIZE) { 1105 MCLGET(m, M_DONTWAIT); 1106 if (m->m_flags & M_EXT) 1107 m->m_len = len = min(len, MCLBYTES); 1108 else 1109 len = m->m_len; 1110 } else { 1111 /* 1112 * Place initial small packet/header at end of mbuf. 1113 */ 1114 if (len < m->m_len) { 1115 if (top == NULL && len + 1116 max_linkhdr <= m->m_len) 1117 m->m_data += max_linkhdr; 1118 m->m_len = len; 1119 } else 1120 len = m->m_len; 1121 } 1122 if (copy) 1123 copy(cp, mtod(m, caddr_t), (unsigned)len); 1124 else 1125 bcopy(cp, mtod(m, caddr_t), (unsigned)len); 1126 cp += len; 1127 *mp = m; 1128 mp = &m->m_next; 1129 totlen -= len; 1130 if (cp == epkt) 1131 cp = buf; 1132 } 1133 return (top); 1134} 1135 1136/* 1137 * Copy data from a buffer back into the indicated mbuf chain, 1138 * starting "off" bytes from the beginning, extending the mbuf 1139 * chain if necessary. 1140 */ 1141void 1142m_copyback(struct mbuf *m0, int off, int len, caddr_t cp) 1143{ 1144 int mlen; 1145 struct mbuf *m = m0, *n; 1146 int totlen = 0; 1147 1148 if (m0 == NULL) 1149 return; 1150 while (off > (mlen = m->m_len)) { 1151 off -= mlen; 1152 totlen += mlen; 1153 if (m->m_next == NULL) { 1154 n = m_getclr(M_DONTWAIT, m->m_type); 1155 if (n == NULL) 1156 goto out; 1157 n->m_len = min(MLEN, len + off); 1158 m->m_next = n; 1159 } 1160 m = m->m_next; 1161 } 1162 while (len > 0) { 1163 mlen = min (m->m_len - off, len); 1164 bcopy(cp, off + mtod(m, caddr_t), (unsigned)mlen); 1165 cp += mlen; 1166 len -= mlen; 1167 mlen += off; 1168 off = 0; 1169 totlen += mlen; 1170 if (len == 0) 1171 break; 1172 if (m->m_next == NULL) { 1173 n = m_get(M_DONTWAIT, m->m_type); 1174 if (n == NULL) 1175 break; 1176 n->m_len = min(MLEN, len); 1177 m->m_next = n; 1178 } 1179 m = m->m_next; 1180 } 1181out: if (((m = m0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen)) 1182 m->m_pkthdr.len = totlen; 1183} 1184 1185void 1186m_print(const struct mbuf *m) 1187{ 1188 int len; 1189 const struct mbuf *m2; 1190 1191 len = m->m_pkthdr.len; 1192 m2 = m; 1193 while (len) { 1194 printf("%p %*D\n", m2, m2->m_len, (u_char *)m2->m_data, "-"); 1195 len -= m2->m_len; 1196 m2 = m2->m_next; 1197 } 1198 return; 1199} 1200