uipc_mbuf.c revision 75105
1/* 2 * Copyright (c) 1982, 1986, 1988, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * @(#)uipc_mbuf.c 8.2 (Berkeley) 1/4/94 34 * $FreeBSD: head/sys/kern/uipc_mbuf.c 75105 2001-04-03 03:15:11Z alfred $ 35 */ 36 37#include "opt_param.h" 38#include <sys/param.h> 39#include <sys/systm.h> 40#include <sys/malloc.h> 41#include <sys/mbuf.h> 42#include <sys/mutex.h> 43#include <sys/kernel.h> 44#include <sys/sysctl.h> 45#include <sys/domain.h> 46#include <sys/protosw.h> 47#include <vm/vm.h> 48#include <vm/vm_kern.h> 49#include <vm/vm_extern.h> 50 51static void mbinit(void *); 52SYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbinit, NULL) 53 54struct mbuf *mbutl; 55struct mbstat mbstat; 56u_long mbtypes[MT_NTYPES]; 57int max_linkhdr; 58int max_protohdr; 59int max_hdr; 60int max_datalen; 61int nmbclusters; 62int nmbufs; 63int nmbcnt; 64u_long m_mballoc_wid = 0; 65u_long m_clalloc_wid = 0; 66 67/* 68 * freelist header structures... 69 * mbffree_lst, mclfree_lst, mcntfree_lst 70 */ 71struct mbffree_lst mmbfree; 72struct mclfree_lst mclfree; 73struct mcntfree_lst mcntfree; 74struct mtx mbuf_mtx; 75 76/* 77 * sysctl(8) exported objects 78 */ 79SYSCTL_DECL(_kern_ipc); 80SYSCTL_INT(_kern_ipc, KIPC_MAX_LINKHDR, max_linkhdr, CTLFLAG_RW, 81 &max_linkhdr, 0, ""); 82SYSCTL_INT(_kern_ipc, KIPC_MAX_PROTOHDR, max_protohdr, CTLFLAG_RW, 83 &max_protohdr, 0, ""); 84SYSCTL_INT(_kern_ipc, KIPC_MAX_HDR, max_hdr, CTLFLAG_RW, &max_hdr, 0, ""); 85SYSCTL_INT(_kern_ipc, KIPC_MAX_DATALEN, max_datalen, CTLFLAG_RW, 86 &max_datalen, 0, ""); 87SYSCTL_INT(_kern_ipc, OID_AUTO, mbuf_wait, CTLFLAG_RW, 88 &mbuf_wait, 0, ""); 89SYSCTL_STRUCT(_kern_ipc, KIPC_MBSTAT, mbstat, CTLFLAG_RD, &mbstat, mbstat, ""); 90SYSCTL_OPAQUE(_kern_ipc, OID_AUTO, mbtypes, CTLFLAG_RD, mbtypes, 91 sizeof(mbtypes), "LU", ""); 92SYSCTL_INT(_kern_ipc, KIPC_NMBCLUSTERS, nmbclusters, CTLFLAG_RD, 93 &nmbclusters, 0, "Maximum number of mbuf clusters available"); 94SYSCTL_INT(_kern_ipc, OID_AUTO, nmbufs, CTLFLAG_RD, &nmbufs, 0, 95 "Maximum number of mbufs available"); 96SYSCTL_INT(_kern_ipc, OID_AUTO, nmbcnt, CTLFLAG_RD, &nmbcnt, 0, 97 "Maximum number of ext_buf counters available"); 98#ifndef NMBCLUSTERS 99#define NMBCLUSTERS (512 + MAXUSERS * 16) 100#endif 101TUNABLE_INT_DECL("kern.ipc.nmbclusters", NMBCLUSTERS, nmbclusters); 102TUNABLE_INT_DECL("kern.ipc.nmbufs", NMBCLUSTERS * 4, nmbufs); 103TUNABLE_INT_DECL("kern.ipc.nmbcnt", EXT_COUNTERS, nmbcnt); 104 105static void m_reclaim(void); 106 107/* Initial allocation numbers */ 108#define NCL_INIT 2 109#define NMB_INIT 16 110#define REF_INIT NMBCLUSTERS 111 112/* 113 * Full mbuf subsystem initialization done here. 114 * 115 * XXX: If ever we have system specific map setups to do, then move them to 116 * machdep.c - for now, there is no reason for this stuff to go there. 117 */ 118static void 119mbinit(void *dummy) 120{ 121 vm_offset_t maxaddr, mb_map_size; 122 123 /* 124 * Setup the mb_map, allocate requested VM space. 125 */ 126 mb_map_size = nmbufs * MSIZE + nmbclusters * MCLBYTES + nmbcnt 127 * sizeof(union mext_refcnt); 128 mb_map_size = roundup2(mb_map_size, PAGE_SIZE); 129 mb_map = kmem_suballoc(kmem_map, (vm_offset_t *)&mbutl, &maxaddr, 130 mb_map_size); 131 /* XXX XXX XXX: mb_map->system_map = 1; */ 132 133 /* 134 * Initialize the free list headers, and setup locks for lists. 135 */ 136 mmbfree.m_head = NULL; 137 mclfree.m_head = NULL; 138 mcntfree.m_head = NULL; 139 mtx_init(&mbuf_mtx, "mbuf free list lock", MTX_DEF); 140 141 /* 142 * Initialize mbuf subsystem (sysctl exported) statistics structure. 143 */ 144 mbstat.m_msize = MSIZE; 145 mbstat.m_mclbytes = MCLBYTES; 146 mbstat.m_minclsize = MINCLSIZE; 147 mbstat.m_mlen = MLEN; 148 mbstat.m_mhlen = MHLEN; 149 150 /* 151 * Perform some initial allocations. 152 */ 153 mtx_lock(&mbuf_mtx); 154 if (m_alloc_ref(REF_INIT, M_DONTWAIT) == 0) 155 goto bad; 156 if (m_mballoc(NMB_INIT, M_DONTWAIT) == 0) 157 goto bad; 158 if (m_clalloc(NCL_INIT, M_DONTWAIT) == 0) 159 goto bad; 160 mtx_unlock(&mbuf_mtx); 161 162 return; 163bad: 164 panic("mbinit: failed to initialize mbuf subsystem!"); 165} 166 167/* 168 * Allocate at least nmb reference count structs and place them 169 * on the ref cnt free list. 170 * 171 * Must be called with the mcntfree lock held. 172 */ 173int 174m_alloc_ref(u_int nmb, int how) 175{ 176 caddr_t p; 177 u_int nbytes; 178 int i; 179 180 /* 181 * We don't cap the amount of memory that can be used 182 * by the reference counters, like we do for mbufs and 183 * mbuf clusters. In fact, we're absolutely sure that we 184 * won't ever be going over our allocated space. We keep enough 185 * space in mb_map to accomodate maximum values of allocatable 186 * external buffers including, but not limited to, clusters. 187 * (That's also why we won't have to have wait routines for 188 * counters). 189 * 190 * If we're in here, we're absolutely certain to be returning 191 * succesfully, as long as there is physical memory to accomodate 192 * us. And if there isn't, but we're willing to wait, then 193 * kmem_malloc() will do the only waiting needed. 194 */ 195 196 nbytes = round_page(nmb * sizeof(union mext_refcnt)); 197 if (1 /* XXX: how == M_TRYWAIT */) 198 mtx_unlock(&mbuf_mtx); 199 if ((p = (caddr_t)kmem_malloc(mb_map, nbytes, how == M_TRYWAIT ? 200 M_WAITOK : M_NOWAIT)) == NULL) { 201 if (1 /* XXX: how == M_TRYWAIT */) 202 mtx_lock(&mbuf_mtx); 203 return (0); 204 } 205 nmb = nbytes / sizeof(union mext_refcnt); 206 207 /* 208 * We don't let go of the mutex in order to avoid a race. 209 * It is up to the caller to let go of the mutex. 210 */ 211 if (1 /* XXX: how == M_TRYWAIT */) 212 mtx_lock(&mbuf_mtx); 213 for (i = 0; i < nmb; i++) { 214 ((union mext_refcnt *)p)->next_ref = mcntfree.m_head; 215 mcntfree.m_head = (union mext_refcnt *)p; 216 p += sizeof(union mext_refcnt); 217 mbstat.m_refree++; 218 } 219 mbstat.m_refcnt += nmb; 220 221 return (1); 222} 223 224/* 225 * Allocate at least nmb mbufs and place on mbuf free list. 226 * 227 * Must be called with the mmbfree lock held. 228 */ 229int 230m_mballoc(int nmb, int how) 231{ 232 caddr_t p; 233 int i; 234 int nbytes; 235 236 nbytes = round_page(nmb * MSIZE); 237 nmb = nbytes / MSIZE; 238 239 /* 240 * If we've hit the mbuf limit, stop allocating from mb_map. 241 * Also, once we run out of map space, it will be impossible to 242 * get any more (nothing is ever freed back to the map). 243 */ 244 if (mb_map_full || ((nmb + mbstat.m_mbufs) > nmbufs)) 245 return (0); 246 247 if (1 /* XXX: how == M_TRYWAIT */) 248 mtx_unlock(&mbuf_mtx); 249 p = (caddr_t)kmem_malloc(mb_map, nbytes, how == M_TRYWAIT ? 250 M_WAITOK : M_NOWAIT); 251 if (1 /* XXX: how == M_TRYWAIT */) { 252 mtx_lock(&mbuf_mtx); 253 if (p == NULL) 254 mbstat.m_wait++; 255 } 256 257 /* 258 * Either the map is now full, or `how' is M_DONTWAIT and there 259 * are no pages left. 260 */ 261 if (p == NULL) 262 return (0); 263 264 /* 265 * We don't let go of the mutex in order to avoid a race. 266 * It is up to the caller to let go of the mutex when done 267 * with grabbing the mbuf from the free list. 268 */ 269 for (i = 0; i < nmb; i++) { 270 ((struct mbuf *)p)->m_next = mmbfree.m_head; 271 mmbfree.m_head = (struct mbuf *)p; 272 p += MSIZE; 273 } 274 mbstat.m_mbufs += nmb; 275 mbtypes[MT_FREE] += nmb; 276 return (1); 277} 278 279/* 280 * Once the mb_map has been exhausted and if the call to the allocation macros 281 * (or, in some cases, functions) is with M_TRYWAIT, then it is necessary to 282 * rely solely on reclaimed mbufs. 283 * 284 * Here we request for the protocols to free up some resources and, if we 285 * still cannot get anything, then we wait for an mbuf to be freed for a 286 * designated (mbuf_wait) time. 287 * 288 * Must be called with the mmbfree mutex held. 289 */ 290struct mbuf * 291m_mballoc_wait(void) 292{ 293 struct mbuf *p = NULL; 294 295 /* 296 * See if we can drain some resources out of the protocols. 297 * We drop the mmbfree mutex to avoid recursing into it in some of 298 * the drain routines. Clearly, we're faced with a race here because 299 * once something is freed during the drain, it may be grabbed right 300 * from under us by some other thread. But we accept this possibility 301 * in order to avoid a potentially large lock recursion and, more 302 * importantly, to avoid a potential lock order reversal which may 303 * result in deadlock (See comment above m_reclaim()). 304 */ 305 mtx_unlock(&mbuf_mtx); 306 m_reclaim(); 307 308 mtx_lock(&mbuf_mtx); 309 _MGET(p, M_DONTWAIT); 310 311 if (p == NULL) { 312 m_mballoc_wid++; 313 msleep(&m_mballoc_wid, &mbuf_mtx, PVM, "mballc", 314 mbuf_wait); 315 m_mballoc_wid--; 316 317 /* 318 * Try again (one last time). 319 * 320 * We retry to fetch _even_ if the sleep timed out. This 321 * is left this way, purposely, in the [unlikely] case 322 * that an mbuf was freed but the sleep was not awoken 323 * in time. 324 * 325 * If the sleep didn't time out (i.e. we got woken up) then 326 * we have the lock so we just grab an mbuf, hopefully. 327 */ 328 _MGET(p, M_DONTWAIT); 329 } 330 331 /* If we waited and got something... */ 332 if (p != NULL) { 333 mbstat.m_wait++; 334 if (mmbfree.m_head != NULL) 335 MBWAKEUP(m_mballoc_wid); 336 } 337 338 return (p); 339} 340 341/* 342 * Allocate some number of mbuf clusters 343 * and place on cluster free list. 344 * 345 * Must be called with the mclfree lock held. 346 */ 347int 348m_clalloc(int ncl, int how) 349{ 350 caddr_t p; 351 int i; 352 int npg_sz; 353 354 npg_sz = round_page(ncl * MCLBYTES); 355 ncl = npg_sz / MCLBYTES; 356 357 /* 358 * If the map is now full (nothing will ever be freed to it). 359 * If we've hit the mcluster number limit, stop allocating from 360 * mb_map. 361 */ 362 if (mb_map_full || ((ncl + mbstat.m_clusters) > nmbclusters)) 363 return (0); 364 365 if (1 /* XXX: how == M_TRYWAIT */) 366 mtx_unlock(&mbuf_mtx); 367 p = (caddr_t)kmem_malloc(mb_map, npg_sz, 368 how == M_TRYWAIT ? M_WAITOK : M_NOWAIT); 369 if (1 /* XXX: how == M_TRYWAIT */) 370 mtx_lock(&mbuf_mtx); 371 372 /* 373 * Either the map is now full, or `how' is M_DONTWAIT and there 374 * are no pages left. 375 */ 376 if (p == NULL) 377 return (0); 378 379 for (i = 0; i < ncl; i++) { 380 ((union mcluster *)p)->mcl_next = mclfree.m_head; 381 mclfree.m_head = (union mcluster *)p; 382 p += MCLBYTES; 383 mbstat.m_clfree++; 384 } 385 mbstat.m_clusters += ncl; 386 return (1); 387} 388 389/* 390 * Once the mb_map submap has been exhausted and the allocation is called with 391 * M_TRYWAIT, we rely on the mclfree list. If nothing is free, we will 392 * sleep for a designated amount of time (mbuf_wait) or until we're woken up 393 * due to sudden mcluster availability. 394 * 395 * Must be called with the mclfree lock held. 396 */ 397caddr_t 398m_clalloc_wait(void) 399{ 400 caddr_t p = NULL; 401 402 m_clalloc_wid++; 403 msleep(&m_clalloc_wid, &mbuf_mtx, PVM, "mclalc", mbuf_wait); 404 m_clalloc_wid--; 405 406 /* 407 * Now that we (think) that we've got something, try again. 408 */ 409 _MCLALLOC(p, M_DONTWAIT); 410 411 /* If we waited and got something ... */ 412 if (p != NULL) { 413 mbstat.m_wait++; 414 if (mclfree.m_head != NULL) 415 MBWAKEUP(m_clalloc_wid); 416 } 417 418 return (p); 419} 420 421/* 422 * m_reclaim: drain protocols in hopes to free up some resources... 423 * 424 * XXX: No locks should be held going in here. The drain routines have 425 * to presently acquire some locks which raises the possibility of lock 426 * order violation if we're holding any mutex if that mutex is acquired in 427 * reverse order relative to one of the locks in the drain routines. 428 */ 429static void 430m_reclaim(void) 431{ 432 struct domain *dp; 433 struct protosw *pr; 434 435#ifdef WITNESS 436 KASSERT(witness_list(CURPROC) == 0, 437 ("m_reclaim called with locks held")); 438#endif 439 440 for (dp = domains; dp; dp = dp->dom_next) 441 for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) 442 if (pr->pr_drain) 443 (*pr->pr_drain)(); 444 mbstat.m_drain++; 445} 446 447/* 448 * Space allocation routines. 449 * Some of these are also available as macros 450 * for critical paths. 451 */ 452struct mbuf * 453m_get(int how, int type) 454{ 455 struct mbuf *m; 456 457 MGET(m, how, type); 458 return (m); 459} 460 461struct mbuf * 462m_gethdr(int how, int type) 463{ 464 struct mbuf *m; 465 466 MGETHDR(m, how, type); 467 return (m); 468} 469 470struct mbuf * 471m_getclr(int how, int type) 472{ 473 struct mbuf *m; 474 475 MGET(m, how, type); 476 if (m != NULL) 477 bzero(mtod(m, caddr_t), MLEN); 478 return (m); 479} 480 481struct mbuf * 482m_free(struct mbuf *m) 483{ 484 struct mbuf *n; 485 486 MFREE(m, n); 487 return (n); 488} 489 490/* 491 * struct mbuf * 492 * m_getm(m, len, how, type) 493 * 494 * This will allocate len-worth of mbufs and/or mbuf clusters (whatever fits 495 * best) and return a pointer to the top of the allocated chain. If m is 496 * non-null, then we assume that it is a single mbuf or an mbuf chain to 497 * which we want len bytes worth of mbufs and/or clusters attached, and so 498 * if we succeed in allocating it, we will just return a pointer to m. 499 * 500 * If we happen to fail at any point during the allocation, we will free 501 * up everything we have already allocated and return NULL. 502 * 503 */ 504struct mbuf * 505m_getm(struct mbuf *m, int len, int how, int type) 506{ 507 struct mbuf *top, *tail, *mp, *mtail = NULL; 508 509 KASSERT(len >= 0, ("len is < 0 in m_getm")); 510 511 MGET(mp, how, type); 512 if (mp == NULL) 513 return (NULL); 514 else if (len > MINCLSIZE) { 515 MCLGET(mp, how); 516 if ((mp->m_flags & M_EXT) == 0) { 517 m_free(mp); 518 return (NULL); 519 } 520 } 521 mp->m_len = 0; 522 len -= M_TRAILINGSPACE(mp); 523 524 if (m != NULL) 525 for (mtail = m; mtail->m_next != NULL; mtail = mtail->m_next); 526 else 527 m = mp; 528 529 top = tail = mp; 530 while (len > 0) { 531 MGET(mp, how, type); 532 if (mp == NULL) 533 goto failed; 534 535 tail->m_next = mp; 536 tail = mp; 537 if (len > MINCLSIZE) { 538 MCLGET(mp, how); 539 if ((mp->m_flags & M_EXT) == 0) 540 goto failed; 541 } 542 543 mp->m_len = 0; 544 len -= M_TRAILINGSPACE(mp); 545 } 546 547 if (mtail != NULL) 548 mtail->m_next = top; 549 return (m); 550 551failed: 552 m_freem(top); 553 return (NULL); 554} 555 556void 557m_freem(struct mbuf *m) 558{ 559 struct mbuf *n; 560 561 if (m == NULL) 562 return; 563 do { 564 /* 565 * we do need to check non-first mbuf, since some of existing 566 * code does not call M_PREPEND properly. 567 * (example: call to bpf_mtap from drivers) 568 */ 569 if ((m->m_flags & M_PKTHDR) != 0 && m->m_pkthdr.aux) { 570 m_freem(m->m_pkthdr.aux); 571 m->m_pkthdr.aux = NULL; 572 } 573 MFREE(m, n); 574 m = n; 575 } while (m); 576} 577 578/* 579 * Lesser-used path for M_PREPEND: 580 * allocate new mbuf to prepend to chain, 581 * copy junk along. 582 */ 583struct mbuf * 584m_prepend(struct mbuf *m, int len, int how) 585{ 586 struct mbuf *mn; 587 588 MGET(mn, how, m->m_type); 589 if (mn == NULL) { 590 m_freem(m); 591 return (NULL); 592 } 593 if (m->m_flags & M_PKTHDR) { 594 M_COPY_PKTHDR(mn, m); 595 m->m_flags &= ~M_PKTHDR; 596 } 597 mn->m_next = m; 598 m = mn; 599 if (len < MHLEN) 600 MH_ALIGN(m, len); 601 m->m_len = len; 602 return (m); 603} 604 605/* 606 * Make a copy of an mbuf chain starting "off0" bytes from the beginning, 607 * continuing for "len" bytes. If len is M_COPYALL, copy to end of mbuf. 608 * The wait parameter is a choice of M_TRYWAIT/M_DONTWAIT from caller. 609 * Note that the copy is read-only, because clusters are not copied, 610 * only their reference counts are incremented. 611 */ 612struct mbuf * 613m_copym(struct mbuf *m, int off0, int len, int wait) 614{ 615 struct mbuf *n, **np; 616 int off = off0; 617 struct mbuf *top; 618 int copyhdr = 0; 619 620 KASSERT(off >= 0, ("m_copym, negative off %d", off)); 621 KASSERT(len >= 0, ("m_copym, negative len %d", len)); 622 if (off == 0 && m->m_flags & M_PKTHDR) 623 copyhdr = 1; 624 while (off > 0) { 625 KASSERT(m != NULL, ("m_copym, offset > size of mbuf chain")); 626 if (off < m->m_len) 627 break; 628 off -= m->m_len; 629 m = m->m_next; 630 } 631 np = ⊤ 632 top = 0; 633 while (len > 0) { 634 if (m == NULL) { 635 KASSERT(len == M_COPYALL, 636 ("m_copym, length > size of mbuf chain")); 637 break; 638 } 639 MGET(n, wait, m->m_type); 640 *np = n; 641 if (n == NULL) 642 goto nospace; 643 if (copyhdr) { 644 M_COPY_PKTHDR(n, m); 645 if (len == M_COPYALL) 646 n->m_pkthdr.len -= off0; 647 else 648 n->m_pkthdr.len = len; 649 copyhdr = 0; 650 } 651 n->m_len = min(len, m->m_len - off); 652 if (m->m_flags & M_EXT) { 653 n->m_data = m->m_data + off; 654 n->m_ext = m->m_ext; 655 n->m_flags |= M_EXT; 656 MEXT_ADD_REF(m); 657 } else 658 bcopy(mtod(m, caddr_t)+off, mtod(n, caddr_t), 659 (unsigned)n->m_len); 660 if (len != M_COPYALL) 661 len -= n->m_len; 662 off = 0; 663 m = m->m_next; 664 np = &n->m_next; 665 } 666 if (top == NULL) { 667 mtx_lock(&mbuf_mtx); 668 mbstat.m_mcfail++; 669 mtx_unlock(&mbuf_mtx); 670 } 671 return (top); 672nospace: 673 m_freem(top); 674 mtx_lock(&mbuf_mtx); 675 mbstat.m_mcfail++; 676 mtx_unlock(&mbuf_mtx); 677 return (NULL); 678} 679 680/* 681 * Copy an entire packet, including header (which must be present). 682 * An optimization of the common case `m_copym(m, 0, M_COPYALL, how)'. 683 * Note that the copy is read-only, because clusters are not copied, 684 * only their reference counts are incremented. 685 * Preserve alignment of the first mbuf so if the creator has left 686 * some room at the beginning (e.g. for inserting protocol headers) 687 * the copies still have the room available. 688 */ 689struct mbuf * 690m_copypacket(struct mbuf *m, int how) 691{ 692 struct mbuf *top, *n, *o; 693 694 MGET(n, how, m->m_type); 695 top = n; 696 if (n == NULL) 697 goto nospace; 698 699 M_COPY_PKTHDR(n, m); 700 n->m_len = m->m_len; 701 if (m->m_flags & M_EXT) { 702 n->m_data = m->m_data; 703 n->m_ext = m->m_ext; 704 n->m_flags |= M_EXT; 705 MEXT_ADD_REF(m); 706 } else { 707 n->m_data = n->m_pktdat + (m->m_data - m->m_pktdat ); 708 bcopy(mtod(m, char *), mtod(n, char *), n->m_len); 709 } 710 711 m = m->m_next; 712 while (m) { 713 MGET(o, how, m->m_type); 714 if (o == NULL) 715 goto nospace; 716 717 n->m_next = o; 718 n = n->m_next; 719 720 n->m_len = m->m_len; 721 if (m->m_flags & M_EXT) { 722 n->m_data = m->m_data; 723 n->m_ext = m->m_ext; 724 n->m_flags |= M_EXT; 725 MEXT_ADD_REF(m); 726 } else { 727 bcopy(mtod(m, char *), mtod(n, char *), n->m_len); 728 } 729 730 m = m->m_next; 731 } 732 return top; 733nospace: 734 m_freem(top); 735 mtx_lock(&mbuf_mtx); 736 mbstat.m_mcfail++; 737 mtx_unlock(&mbuf_mtx); 738 return (NULL); 739} 740 741/* 742 * Copy data from an mbuf chain starting "off" bytes from the beginning, 743 * continuing for "len" bytes, into the indicated buffer. 744 */ 745void 746m_copydata(struct mbuf *m, int off, int len, caddr_t cp) 747{ 748 unsigned count; 749 750 KASSERT(off >= 0, ("m_copydata, negative off %d", off)); 751 KASSERT(len >= 0, ("m_copydata, negative len %d", len)); 752 while (off > 0) { 753 KASSERT(m != NULL, ("m_copydata, offset > size of mbuf chain")); 754 if (off < m->m_len) 755 break; 756 off -= m->m_len; 757 m = m->m_next; 758 } 759 while (len > 0) { 760 KASSERT(m != NULL, ("m_copydata, length > size of mbuf chain")); 761 count = min(m->m_len - off, len); 762 bcopy(mtod(m, caddr_t) + off, cp, count); 763 len -= count; 764 cp += count; 765 off = 0; 766 m = m->m_next; 767 } 768} 769 770/* 771 * Copy a packet header mbuf chain into a completely new chain, including 772 * copying any mbuf clusters. Use this instead of m_copypacket() when 773 * you need a writable copy of an mbuf chain. 774 */ 775struct mbuf * 776m_dup(struct mbuf *m, int how) 777{ 778 struct mbuf **p, *top = NULL; 779 int remain, moff, nsize; 780 781 /* Sanity check */ 782 if (m == NULL) 783 return (NULL); 784 KASSERT((m->m_flags & M_PKTHDR) != 0, ("%s: !PKTHDR", __FUNCTION__)); 785 786 /* While there's more data, get a new mbuf, tack it on, and fill it */ 787 remain = m->m_pkthdr.len; 788 moff = 0; 789 p = ⊤ 790 while (remain > 0 || top == NULL) { /* allow m->m_pkthdr.len == 0 */ 791 struct mbuf *n; 792 793 /* Get the next new mbuf */ 794 MGET(n, how, m->m_type); 795 if (n == NULL) 796 goto nospace; 797 if (top == NULL) { /* first one, must be PKTHDR */ 798 M_COPY_PKTHDR(n, m); 799 nsize = MHLEN; 800 } else /* not the first one */ 801 nsize = MLEN; 802 if (remain >= MINCLSIZE) { 803 MCLGET(n, how); 804 if ((n->m_flags & M_EXT) == 0) { 805 (void)m_free(n); 806 goto nospace; 807 } 808 nsize = MCLBYTES; 809 } 810 n->m_len = 0; 811 812 /* Link it into the new chain */ 813 *p = n; 814 p = &n->m_next; 815 816 /* Copy data from original mbuf(s) into new mbuf */ 817 while (n->m_len < nsize && m != NULL) { 818 int chunk = min(nsize - n->m_len, m->m_len - moff); 819 820 bcopy(m->m_data + moff, n->m_data + n->m_len, chunk); 821 moff += chunk; 822 n->m_len += chunk; 823 remain -= chunk; 824 if (moff == m->m_len) { 825 m = m->m_next; 826 moff = 0; 827 } 828 } 829 830 /* Check correct total mbuf length */ 831 KASSERT((remain > 0 && m != NULL) || (remain == 0 && m == NULL), 832 ("%s: bogus m_pkthdr.len", __FUNCTION__)); 833 } 834 return (top); 835 836nospace: 837 m_freem(top); 838 mtx_lock(&mbuf_mtx); 839 mbstat.m_mcfail++; 840 mtx_unlock(&mbuf_mtx); 841 return (NULL); 842} 843 844/* 845 * Concatenate mbuf chain n to m. 846 * Both chains must be of the same type (e.g. MT_DATA). 847 * Any m_pkthdr is not updated. 848 */ 849void 850m_cat(struct mbuf *m, struct mbuf *n) 851{ 852 while (m->m_next) 853 m = m->m_next; 854 while (n) { 855 if (m->m_flags & M_EXT || 856 m->m_data + m->m_len + n->m_len >= &m->m_dat[MLEN]) { 857 /* just join the two chains */ 858 m->m_next = n; 859 return; 860 } 861 /* splat the data from one into the other */ 862 bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len, 863 (u_int)n->m_len); 864 m->m_len += n->m_len; 865 n = m_free(n); 866 } 867} 868 869void 870m_adj(struct mbuf *mp, int req_len) 871{ 872 int len = req_len; 873 struct mbuf *m; 874 int count; 875 876 if ((m = mp) == NULL) 877 return; 878 if (len >= 0) { 879 /* 880 * Trim from head. 881 */ 882 while (m != NULL && len > 0) { 883 if (m->m_len <= len) { 884 len -= m->m_len; 885 m->m_len = 0; 886 m = m->m_next; 887 } else { 888 m->m_len -= len; 889 m->m_data += len; 890 len = 0; 891 } 892 } 893 m = mp; 894 if (mp->m_flags & M_PKTHDR) 895 m->m_pkthdr.len -= (req_len - len); 896 } else { 897 /* 898 * Trim from tail. Scan the mbuf chain, 899 * calculating its length and finding the last mbuf. 900 * If the adjustment only affects this mbuf, then just 901 * adjust and return. Otherwise, rescan and truncate 902 * after the remaining size. 903 */ 904 len = -len; 905 count = 0; 906 for (;;) { 907 count += m->m_len; 908 if (m->m_next == (struct mbuf *)0) 909 break; 910 m = m->m_next; 911 } 912 if (m->m_len >= len) { 913 m->m_len -= len; 914 if (mp->m_flags & M_PKTHDR) 915 mp->m_pkthdr.len -= len; 916 return; 917 } 918 count -= len; 919 if (count < 0) 920 count = 0; 921 /* 922 * Correct length for chain is "count". 923 * Find the mbuf with last data, adjust its length, 924 * and toss data from remaining mbufs on chain. 925 */ 926 m = mp; 927 if (m->m_flags & M_PKTHDR) 928 m->m_pkthdr.len = count; 929 for (; m; m = m->m_next) { 930 if (m->m_len >= count) { 931 m->m_len = count; 932 break; 933 } 934 count -= m->m_len; 935 } 936 while (m->m_next) 937 (m = m->m_next) ->m_len = 0; 938 } 939} 940 941/* 942 * Rearange an mbuf chain so that len bytes are contiguous 943 * and in the data area of an mbuf (so that mtod and dtom 944 * will work for a structure of size len). Returns the resulting 945 * mbuf chain on success, frees it and returns null on failure. 946 * If there is room, it will add up to max_protohdr-len extra bytes to the 947 * contiguous region in an attempt to avoid being called next time. 948 */ 949struct mbuf * 950m_pullup(struct mbuf *n, int len) 951{ 952 struct mbuf *m; 953 int count; 954 int space; 955 956 /* 957 * If first mbuf has no cluster, and has room for len bytes 958 * without shifting current data, pullup into it, 959 * otherwise allocate a new mbuf to prepend to the chain. 960 */ 961 if ((n->m_flags & M_EXT) == 0 && 962 n->m_data + len < &n->m_dat[MLEN] && n->m_next) { 963 if (n->m_len >= len) 964 return (n); 965 m = n; 966 n = n->m_next; 967 len -= m->m_len; 968 } else { 969 if (len > MHLEN) 970 goto bad; 971 MGET(m, M_DONTWAIT, n->m_type); 972 if (m == NULL) 973 goto bad; 974 m->m_len = 0; 975 if (n->m_flags & M_PKTHDR) { 976 M_COPY_PKTHDR(m, n); 977 n->m_flags &= ~M_PKTHDR; 978 } 979 } 980 space = &m->m_dat[MLEN] - (m->m_data + m->m_len); 981 do { 982 count = min(min(max(len, max_protohdr), space), n->m_len); 983 bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len, 984 (unsigned)count); 985 len -= count; 986 m->m_len += count; 987 n->m_len -= count; 988 space -= count; 989 if (n->m_len) 990 n->m_data += count; 991 else 992 n = m_free(n); 993 } while (len > 0 && n); 994 if (len > 0) { 995 (void) m_free(m); 996 goto bad; 997 } 998 m->m_next = n; 999 return (m); 1000bad: 1001 m_freem(n); 1002 mtx_lock(&mbuf_mtx); 1003 mbstat.m_mcfail++; 1004 mtx_unlock(&mbuf_mtx); 1005 return (NULL); 1006} 1007 1008/* 1009 * Partition an mbuf chain in two pieces, returning the tail -- 1010 * all but the first len0 bytes. In case of failure, it returns NULL and 1011 * attempts to restore the chain to its original state. 1012 */ 1013struct mbuf * 1014m_split(struct mbuf *m0, int len0, int wait) 1015{ 1016 struct mbuf *m, *n; 1017 unsigned len = len0, remain; 1018 1019 for (m = m0; m && len > m->m_len; m = m->m_next) 1020 len -= m->m_len; 1021 if (m == NULL) 1022 return (NULL); 1023 remain = m->m_len - len; 1024 if (m0->m_flags & M_PKTHDR) { 1025 MGETHDR(n, wait, m0->m_type); 1026 if (n == NULL) 1027 return (NULL); 1028 n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif; 1029 n->m_pkthdr.len = m0->m_pkthdr.len - len0; 1030 m0->m_pkthdr.len = len0; 1031 if (m->m_flags & M_EXT) 1032 goto extpacket; 1033 if (remain > MHLEN) { 1034 /* m can't be the lead packet */ 1035 MH_ALIGN(n, 0); 1036 n->m_next = m_split(m, len, wait); 1037 if (n->m_next == NULL) { 1038 (void) m_free(n); 1039 return (NULL); 1040 } else 1041 return (n); 1042 } else 1043 MH_ALIGN(n, remain); 1044 } else if (remain == 0) { 1045 n = m->m_next; 1046 m->m_next = NULL; 1047 return (n); 1048 } else { 1049 MGET(n, wait, m->m_type); 1050 if (n == NULL) 1051 return (NULL); 1052 M_ALIGN(n, remain); 1053 } 1054extpacket: 1055 if (m->m_flags & M_EXT) { 1056 n->m_flags |= M_EXT; 1057 n->m_ext = m->m_ext; 1058 MEXT_ADD_REF(m); 1059 m->m_ext.ext_size = 0; /* For Accounting XXXXXX danger */ 1060 n->m_data = m->m_data + len; 1061 } else { 1062 bcopy(mtod(m, caddr_t) + len, mtod(n, caddr_t), remain); 1063 } 1064 n->m_len = remain; 1065 m->m_len = len; 1066 n->m_next = m->m_next; 1067 m->m_next = NULL; 1068 return (n); 1069} 1070/* 1071 * Routine to copy from device local memory into mbufs. 1072 */ 1073struct mbuf * 1074m_devget(char *buf, int totlen, int off0, struct ifnet *ifp, 1075 void (*copy)(char *from, caddr_t to, u_int len)) 1076{ 1077 struct mbuf *m; 1078 struct mbuf *top = 0, **mp = ⊤ 1079 int off = off0, len; 1080 char *cp; 1081 char *epkt; 1082 1083 cp = buf; 1084 epkt = cp + totlen; 1085 if (off) { 1086 cp += off + 2 * sizeof(u_short); 1087 totlen -= 2 * sizeof(u_short); 1088 } 1089 MGETHDR(m, M_DONTWAIT, MT_DATA); 1090 if (m == NULL) 1091 return (NULL); 1092 m->m_pkthdr.rcvif = ifp; 1093 m->m_pkthdr.len = totlen; 1094 m->m_len = MHLEN; 1095 1096 while (totlen > 0) { 1097 if (top) { 1098 MGET(m, M_DONTWAIT, MT_DATA); 1099 if (m == NULL) { 1100 m_freem(top); 1101 return (NULL); 1102 } 1103 m->m_len = MLEN; 1104 } 1105 len = min(totlen, epkt - cp); 1106 if (len >= MINCLSIZE) { 1107 MCLGET(m, M_DONTWAIT); 1108 if (m->m_flags & M_EXT) 1109 m->m_len = len = min(len, MCLBYTES); 1110 else 1111 len = m->m_len; 1112 } else { 1113 /* 1114 * Place initial small packet/header at end of mbuf. 1115 */ 1116 if (len < m->m_len) { 1117 if (top == NULL && len + 1118 max_linkhdr <= m->m_len) 1119 m->m_data += max_linkhdr; 1120 m->m_len = len; 1121 } else 1122 len = m->m_len; 1123 } 1124 if (copy) 1125 copy(cp, mtod(m, caddr_t), (unsigned)len); 1126 else 1127 bcopy(cp, mtod(m, caddr_t), (unsigned)len); 1128 cp += len; 1129 *mp = m; 1130 mp = &m->m_next; 1131 totlen -= len; 1132 if (cp == epkt) 1133 cp = buf; 1134 } 1135 return (top); 1136} 1137 1138/* 1139 * Copy data from a buffer back into the indicated mbuf chain, 1140 * starting "off" bytes from the beginning, extending the mbuf 1141 * chain if necessary. 1142 */ 1143void 1144m_copyback(struct mbuf *m0, int off, int len, caddr_t cp) 1145{ 1146 int mlen; 1147 struct mbuf *m = m0, *n; 1148 int totlen = 0; 1149 1150 if (m0 == NULL) 1151 return; 1152 while (off > (mlen = m->m_len)) { 1153 off -= mlen; 1154 totlen += mlen; 1155 if (m->m_next == NULL) { 1156 n = m_getclr(M_DONTWAIT, m->m_type); 1157 if (n == NULL) 1158 goto out; 1159 n->m_len = min(MLEN, len + off); 1160 m->m_next = n; 1161 } 1162 m = m->m_next; 1163 } 1164 while (len > 0) { 1165 mlen = min (m->m_len - off, len); 1166 bcopy(cp, off + mtod(m, caddr_t), (unsigned)mlen); 1167 cp += mlen; 1168 len -= mlen; 1169 mlen += off; 1170 off = 0; 1171 totlen += mlen; 1172 if (len == 0) 1173 break; 1174 if (m->m_next == NULL) { 1175 n = m_get(M_DONTWAIT, m->m_type); 1176 if (n == NULL) 1177 break; 1178 n->m_len = min(MLEN, len); 1179 m->m_next = n; 1180 } 1181 m = m->m_next; 1182 } 1183out: if (((m = m0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen)) 1184 m->m_pkthdr.len = totlen; 1185} 1186 1187void 1188m_print(const struct mbuf *m) 1189{ 1190 int len; 1191 const struct mbuf *m2; 1192 1193 len = m->m_pkthdr.len; 1194 m2 = m; 1195 while (len) { 1196 printf("%p %*D\n", m2, m2->m_len, (u_char *)m2->m_data, "-"); 1197 len -= m2->m_len; 1198 m2 = m2->m_next; 1199 } 1200 return; 1201} 1202