uipc_mbuf.c revision 75686
1/* 2 * Copyright (c) 1982, 1986, 1988, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * @(#)uipc_mbuf.c 8.2 (Berkeley) 1/4/94 34 * $FreeBSD: head/sys/kern/uipc_mbuf.c 75686 2001-04-18 23:54:13Z bmilekic $ 35 */ 36 37#include "opt_param.h" 38#include <sys/param.h> 39#include <sys/systm.h> 40#include <sys/malloc.h> 41#include <sys/mbuf.h> 42#include <sys/mutex.h> 43#include <sys/condvar.h> 44#include <sys/kernel.h> 45#include <sys/sysctl.h> 46#include <sys/domain.h> 47#include <sys/protosw.h> 48#include <vm/vm.h> 49#include <vm/vm_kern.h> 50#include <vm/vm_extern.h> 51 52static void mbinit(void *); 53SYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbinit, NULL) 54 55struct mbuf *mbutl; 56struct mbstat mbstat; 57u_long mbtypes[MT_NTYPES]; 58int max_linkhdr; 59int max_protohdr; 60int max_hdr; 61int max_datalen; 62int nmbclusters; 63int nmbufs; 64int nmbcnt; 65u_long m_mballoc_wid = 0; 66u_long m_clalloc_wid = 0; 67 68/* 69 * freelist header structures... 70 * mbffree_lst, mclfree_lst, mcntfree_lst 71 */ 72struct mbffree_lst mmbfree; 73struct mclfree_lst mclfree; 74struct mcntfree_lst mcntfree; 75struct mtx mbuf_mtx; 76 77/* 78 * sysctl(8) exported objects 79 */ 80SYSCTL_DECL(_kern_ipc); 81SYSCTL_INT(_kern_ipc, KIPC_MAX_LINKHDR, max_linkhdr, CTLFLAG_RW, 82 &max_linkhdr, 0, ""); 83SYSCTL_INT(_kern_ipc, KIPC_MAX_PROTOHDR, max_protohdr, CTLFLAG_RW, 84 &max_protohdr, 0, ""); 85SYSCTL_INT(_kern_ipc, KIPC_MAX_HDR, max_hdr, CTLFLAG_RW, &max_hdr, 0, ""); 86SYSCTL_INT(_kern_ipc, KIPC_MAX_DATALEN, max_datalen, CTLFLAG_RW, 87 &max_datalen, 0, ""); 88SYSCTL_INT(_kern_ipc, OID_AUTO, mbuf_wait, CTLFLAG_RW, 89 &mbuf_wait, 0, ""); 90SYSCTL_STRUCT(_kern_ipc, KIPC_MBSTAT, mbstat, CTLFLAG_RD, &mbstat, mbstat, ""); 91SYSCTL_OPAQUE(_kern_ipc, OID_AUTO, mbtypes, CTLFLAG_RD, mbtypes, 92 sizeof(mbtypes), "LU", ""); 93SYSCTL_INT(_kern_ipc, KIPC_NMBCLUSTERS, nmbclusters, CTLFLAG_RD, 94 &nmbclusters, 0, "Maximum number of mbuf clusters available"); 95SYSCTL_INT(_kern_ipc, OID_AUTO, nmbufs, CTLFLAG_RD, &nmbufs, 0, 96 "Maximum number of mbufs available"); 97SYSCTL_INT(_kern_ipc, OID_AUTO, nmbcnt, CTLFLAG_RD, &nmbcnt, 0, 98 "Maximum number of ext_buf counters available"); 99 100#ifndef NMBCLUSTERS 101#define NMBCLUSTERS (512 + MAXUSERS * 16) 102#endif 103 104TUNABLE_INT_DECL("kern.ipc.nmbclusters", NMBCLUSTERS, nmbclusters); 105TUNABLE_INT_DECL("kern.ipc.nmbufs", NMBCLUSTERS * 4, nmbufs); 106TUNABLE_INT_DECL("kern.ipc.nmbcnt", EXT_COUNTERS, nmbcnt); 107 108static void m_reclaim(void); 109 110/* Initial allocation numbers */ 111#define NCL_INIT 2 112#define NMB_INIT 16 113#define REF_INIT NMBCLUSTERS 114 115/* 116 * Full mbuf subsystem initialization done here. 117 * 118 * XXX: If ever we have system specific map setups to do, then move them to 119 * machdep.c - for now, there is no reason for this stuff to go there. 120 */ 121static void 122mbinit(void *dummy) 123{ 124 vm_offset_t maxaddr; 125 vm_size_t mb_map_size; 126 127 /* 128 * Setup the mb_map, allocate requested VM space. 129 */ 130 mb_map_size = (vm_size_t)(nmbufs * MSIZE + nmbclusters * MCLBYTES + 131 nmbcnt * sizeof(union mext_refcnt)); 132 mb_map_size = rounddown(mb_map_size, PAGE_SIZE); 133 mb_map = kmem_suballoc(kmem_map, (vm_offset_t *)&mbutl, &maxaddr, 134 mb_map_size); 135 /* XXX XXX XXX: mb_map->system_map = 1; */ 136 137 /* 138 * Initialize the free list headers, and setup locks for lists. 139 */ 140 mmbfree.m_head = NULL; 141 mclfree.m_head = NULL; 142 mcntfree.m_head = NULL; 143 mtx_init(&mbuf_mtx, "mbuf free list lock", MTX_DEF); 144 cv_init(&mmbfree.m_starved, "mbuf free list starved cv"); 145 cv_init(&mclfree.m_starved, "mbuf cluster free list starved cv"); 146 147 /* 148 * Initialize mbuf subsystem (sysctl exported) statistics structure. 149 */ 150 mbstat.m_msize = MSIZE; 151 mbstat.m_mclbytes = MCLBYTES; 152 mbstat.m_minclsize = MINCLSIZE; 153 mbstat.m_mlen = MLEN; 154 mbstat.m_mhlen = MHLEN; 155 156 /* 157 * Perform some initial allocations. 158 */ 159 mtx_lock(&mbuf_mtx); 160 if (m_alloc_ref(REF_INIT, M_DONTWAIT) == 0) 161 goto bad; 162 if (m_mballoc(NMB_INIT, M_DONTWAIT) == 0) 163 goto bad; 164 if (m_clalloc(NCL_INIT, M_DONTWAIT) == 0) 165 goto bad; 166 mtx_unlock(&mbuf_mtx); 167 168 return; 169bad: 170 panic("mbinit: failed to initialize mbuf subsystem!"); 171} 172 173/* 174 * Allocate at least nmb reference count structs and place them 175 * on the ref cnt free list. 176 * 177 * Must be called with the mcntfree lock held. 178 */ 179int 180m_alloc_ref(u_int nmb, int how) 181{ 182 caddr_t p; 183 u_int nbytes; 184 int i; 185 186 /* 187 * We don't cap the amount of memory that can be used 188 * by the reference counters, like we do for mbufs and 189 * mbuf clusters. In fact, we're absolutely sure that we 190 * won't ever be going over our allocated space. We keep enough 191 * space in mb_map to accomodate maximum values of allocatable 192 * external buffers including, but not limited to, clusters. 193 * (That's also why we won't have to have wait routines for 194 * counters). 195 * 196 * If we're in here, we're absolutely certain to be returning 197 * succesfully, as long as there is physical memory to accomodate 198 * us. And if there isn't, but we're willing to wait, then 199 * kmem_malloc() will do the only waiting needed. 200 */ 201 202 nbytes = round_page(nmb * sizeof(union mext_refcnt)); 203 if (1 /* XXX: how == M_TRYWAIT */) 204 mtx_unlock(&mbuf_mtx); 205 if ((p = (caddr_t)kmem_malloc(mb_map, nbytes, how == M_TRYWAIT ? 206 M_WAITOK : M_NOWAIT)) == NULL) { 207 if (1 /* XXX: how == M_TRYWAIT */) 208 mtx_lock(&mbuf_mtx); 209 return (0); 210 } 211 nmb = nbytes / sizeof(union mext_refcnt); 212 213 /* 214 * We don't let go of the mutex in order to avoid a race. 215 * It is up to the caller to let go of the mutex. 216 */ 217 if (1 /* XXX: how == M_TRYWAIT */) 218 mtx_lock(&mbuf_mtx); 219 for (i = 0; i < nmb; i++) { 220 ((union mext_refcnt *)p)->next_ref = mcntfree.m_head; 221 mcntfree.m_head = (union mext_refcnt *)p; 222 p += sizeof(union mext_refcnt); 223 mbstat.m_refree++; 224 } 225 mbstat.m_refcnt += nmb; 226 227 return (1); 228} 229 230/* 231 * Allocate at least nmb mbufs and place on mbuf free list. 232 * 233 * Must be called with the mmbfree lock held. 234 */ 235int 236m_mballoc(int nmb, int how) 237{ 238 caddr_t p; 239 int i; 240 int nbytes; 241 242 nbytes = round_page(nmb * MSIZE); 243 nmb = nbytes / MSIZE; 244 245 /* 246 * If we've hit the mbuf limit, stop allocating from mb_map. 247 * Also, once we run out of map space, it will be impossible to 248 * get any more (nothing is ever freed back to the map). 249 */ 250 if (mb_map_full || ((nmb + mbstat.m_mbufs) > nmbufs)) 251 return (0); 252 253 if (1 /* XXX: how == M_TRYWAIT */) 254 mtx_unlock(&mbuf_mtx); 255 p = (caddr_t)kmem_malloc(mb_map, nbytes, how == M_TRYWAIT ? 256 M_WAITOK : M_NOWAIT); 257 if (1 /* XXX: how == M_TRYWAIT */) { 258 mtx_lock(&mbuf_mtx); 259 if (p == NULL) 260 mbstat.m_wait++; 261 } 262 263 /* 264 * Either the map is now full, or `how' is M_DONTWAIT and there 265 * are no pages left. 266 */ 267 if (p == NULL) 268 return (0); 269 270 /* 271 * We don't let go of the mutex in order to avoid a race. 272 * It is up to the caller to let go of the mutex when done 273 * with grabbing the mbuf from the free list. 274 */ 275 for (i = 0; i < nmb; i++) { 276 ((struct mbuf *)p)->m_next = mmbfree.m_head; 277 mmbfree.m_head = (struct mbuf *)p; 278 p += MSIZE; 279 } 280 mbstat.m_mbufs += nmb; 281 mbtypes[MT_FREE] += nmb; 282 return (1); 283} 284 285/* 286 * Once the mb_map has been exhausted and if the call to the allocation macros 287 * (or, in some cases, functions) is with M_TRYWAIT, then it is necessary to 288 * rely solely on reclaimed mbufs. 289 * 290 * Here we request for the protocols to free up some resources and, if we 291 * still cannot get anything, then we wait for an mbuf to be freed for a 292 * designated (mbuf_wait) time, at most. 293 * 294 * Must be called with the mmbfree mutex held. 295 */ 296struct mbuf * 297m_mballoc_wait(void) 298{ 299 struct mbuf *p = NULL; 300 301 /* 302 * See if we can drain some resources out of the protocols. 303 * We drop the mmbfree mutex to avoid recursing into it in some of 304 * the drain routines. Clearly, we're faced with a race here because 305 * once something is freed during the drain, it may be grabbed right 306 * from under us by some other thread. But we accept this possibility 307 * in order to avoid a potentially large lock recursion and, more 308 * importantly, to avoid a potential lock order reversal which may 309 * result in deadlock (See comment above m_reclaim()). 310 */ 311 mtx_unlock(&mbuf_mtx); 312 m_reclaim(); 313 314 mtx_lock(&mbuf_mtx); 315 _MGET(p, M_DONTWAIT); 316 317 if (p == NULL) { 318 int retval; 319 320 m_mballoc_wid++; 321 retval = cv_timedwait(&mmbfree.m_starved, &mbuf_mtx, 322 mbuf_wait); 323 m_mballoc_wid--; 324 325 /* 326 * If we got signaled (i.e. didn't time out), allocate. 327 */ 328 if (retval == 0) 329 _MGET(p, M_DONTWAIT); 330 } 331 332 if (p != NULL) { 333 mbstat.m_wait++; 334 if (mmbfree.m_head != NULL) 335 MBWAKEUP(m_mballoc_wid, &mmbfree.m_starved); 336 } 337 338 return (p); 339} 340 341/* 342 * Allocate some number of mbuf clusters 343 * and place on cluster free list. 344 * 345 * Must be called with the mclfree lock held. 346 */ 347int 348m_clalloc(int ncl, int how) 349{ 350 caddr_t p; 351 int i; 352 int npg_sz; 353 354 npg_sz = round_page(ncl * MCLBYTES); 355 ncl = npg_sz / MCLBYTES; 356 357 /* 358 * If the map is now full (nothing will ever be freed to it). 359 * If we've hit the mcluster number limit, stop allocating from 360 * mb_map. 361 */ 362 if (mb_map_full || ((ncl + mbstat.m_clusters) > nmbclusters)) 363 return (0); 364 365 if (1 /* XXX: how == M_TRYWAIT */) 366 mtx_unlock(&mbuf_mtx); 367 p = (caddr_t)kmem_malloc(mb_map, npg_sz, 368 how == M_TRYWAIT ? M_WAITOK : M_NOWAIT); 369 if (1 /* XXX: how == M_TRYWAIT */) 370 mtx_lock(&mbuf_mtx); 371 372 /* 373 * Either the map is now full, or `how' is M_DONTWAIT and there 374 * are no pages left. 375 */ 376 if (p == NULL) 377 return (0); 378 379 for (i = 0; i < ncl; i++) { 380 ((union mcluster *)p)->mcl_next = mclfree.m_head; 381 mclfree.m_head = (union mcluster *)p; 382 p += MCLBYTES; 383 mbstat.m_clfree++; 384 } 385 mbstat.m_clusters += ncl; 386 return (1); 387} 388 389/* 390 * Once the mb_map submap has been exhausted and the allocation is called with 391 * M_TRYWAIT, we rely on the mclfree list. If nothing is free, we will 392 * block on a cv for a designated amount of time (mbuf_wait) or until we're 393 * signaled due to sudden mcluster availability. 394 * 395 * Must be called with the mclfree lock held. 396 */ 397caddr_t 398m_clalloc_wait(void) 399{ 400 caddr_t p = NULL; 401 int retval; 402 403 m_clalloc_wid++; 404 retval = cv_timedwait(&mclfree.m_starved, &mbuf_mtx, mbuf_wait); 405 m_clalloc_wid--; 406 407 /* 408 * Now that we (think) that we've got something, try again. 409 */ 410 if (retval == 0) 411 _MCLALLOC(p, M_DONTWAIT); 412 413 if (p != NULL) { 414 mbstat.m_wait++; 415 if (mclfree.m_head != NULL) 416 MBWAKEUP(m_clalloc_wid, &mclfree.m_starved); 417 } 418 419 return (p); 420} 421 422/* 423 * m_reclaim: drain protocols in hopes to free up some resources... 424 * 425 * XXX: No locks should be held going in here. The drain routines have 426 * to presently acquire some locks which raises the possibility of lock 427 * order violation if we're holding any mutex if that mutex is acquired in 428 * reverse order relative to one of the locks in the drain routines. 429 */ 430static void 431m_reclaim(void) 432{ 433 struct domain *dp; 434 struct protosw *pr; 435 436#ifdef WITNESS 437 KASSERT(witness_list(curproc) == 0, 438 ("m_reclaim called with locks held")); 439#endif 440 441 for (dp = domains; dp; dp = dp->dom_next) 442 for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) 443 if (pr->pr_drain) 444 (*pr->pr_drain)(); 445 mbstat.m_drain++; 446} 447 448/* 449 * Space allocation routines. 450 * Some of these are also available as macros 451 * for critical paths. 452 */ 453struct mbuf * 454m_get(int how, int type) 455{ 456 struct mbuf *m; 457 458 MGET(m, how, type); 459 return (m); 460} 461 462struct mbuf * 463m_gethdr(int how, int type) 464{ 465 struct mbuf *m; 466 467 MGETHDR(m, how, type); 468 return (m); 469} 470 471struct mbuf * 472m_getclr(int how, int type) 473{ 474 struct mbuf *m; 475 476 MGET(m, how, type); 477 if (m != NULL) 478 bzero(mtod(m, caddr_t), MLEN); 479 return (m); 480} 481 482struct mbuf * 483m_free(struct mbuf *m) 484{ 485 struct mbuf *n; 486 487 MFREE(m, n); 488 return (n); 489} 490 491/* 492 * struct mbuf * 493 * m_getm(m, len, how, type) 494 * 495 * This will allocate len-worth of mbufs and/or mbuf clusters (whatever fits 496 * best) and return a pointer to the top of the allocated chain. If m is 497 * non-null, then we assume that it is a single mbuf or an mbuf chain to 498 * which we want len bytes worth of mbufs and/or clusters attached, and so 499 * if we succeed in allocating it, we will just return a pointer to m. 500 * 501 * If we happen to fail at any point during the allocation, we will free 502 * up everything we have already allocated and return NULL. 503 * 504 */ 505struct mbuf * 506m_getm(struct mbuf *m, int len, int how, int type) 507{ 508 struct mbuf *top, *tail, *mp, *mtail = NULL; 509 510 KASSERT(len >= 0, ("len is < 0 in m_getm")); 511 512 MGET(mp, how, type); 513 if (mp == NULL) 514 return (NULL); 515 else if (len > MINCLSIZE) { 516 MCLGET(mp, how); 517 if ((mp->m_flags & M_EXT) == 0) { 518 m_free(mp); 519 return (NULL); 520 } 521 } 522 mp->m_len = 0; 523 len -= M_TRAILINGSPACE(mp); 524 525 if (m != NULL) 526 for (mtail = m; mtail->m_next != NULL; mtail = mtail->m_next); 527 else 528 m = mp; 529 530 top = tail = mp; 531 while (len > 0) { 532 MGET(mp, how, type); 533 if (mp == NULL) 534 goto failed; 535 536 tail->m_next = mp; 537 tail = mp; 538 if (len > MINCLSIZE) { 539 MCLGET(mp, how); 540 if ((mp->m_flags & M_EXT) == 0) 541 goto failed; 542 } 543 544 mp->m_len = 0; 545 len -= M_TRAILINGSPACE(mp); 546 } 547 548 if (mtail != NULL) 549 mtail->m_next = top; 550 return (m); 551 552failed: 553 m_freem(top); 554 return (NULL); 555} 556 557void 558m_freem(struct mbuf *m) 559{ 560 struct mbuf *n; 561 562 if (m == NULL) 563 return; 564 do { 565 /* 566 * we do need to check non-first mbuf, since some of existing 567 * code does not call M_PREPEND properly. 568 * (example: call to bpf_mtap from drivers) 569 */ 570 if ((m->m_flags & M_PKTHDR) != 0 && m->m_pkthdr.aux) { 571 m_freem(m->m_pkthdr.aux); 572 m->m_pkthdr.aux = NULL; 573 } 574 MFREE(m, n); 575 m = n; 576 } while (m); 577} 578 579/* 580 * Lesser-used path for M_PREPEND: 581 * allocate new mbuf to prepend to chain, 582 * copy junk along. 583 */ 584struct mbuf * 585m_prepend(struct mbuf *m, int len, int how) 586{ 587 struct mbuf *mn; 588 589 MGET(mn, how, m->m_type); 590 if (mn == NULL) { 591 m_freem(m); 592 return (NULL); 593 } 594 if (m->m_flags & M_PKTHDR) { 595 M_COPY_PKTHDR(mn, m); 596 m->m_flags &= ~M_PKTHDR; 597 } 598 mn->m_next = m; 599 m = mn; 600 if (len < MHLEN) 601 MH_ALIGN(m, len); 602 m->m_len = len; 603 return (m); 604} 605 606/* 607 * Make a copy of an mbuf chain starting "off0" bytes from the beginning, 608 * continuing for "len" bytes. If len is M_COPYALL, copy to end of mbuf. 609 * The wait parameter is a choice of M_TRYWAIT/M_DONTWAIT from caller. 610 * Note that the copy is read-only, because clusters are not copied, 611 * only their reference counts are incremented. 612 */ 613struct mbuf * 614m_copym(struct mbuf *m, int off0, int len, int wait) 615{ 616 struct mbuf *n, **np; 617 int off = off0; 618 struct mbuf *top; 619 int copyhdr = 0; 620 621 KASSERT(off >= 0, ("m_copym, negative off %d", off)); 622 KASSERT(len >= 0, ("m_copym, negative len %d", len)); 623 if (off == 0 && m->m_flags & M_PKTHDR) 624 copyhdr = 1; 625 while (off > 0) { 626 KASSERT(m != NULL, ("m_copym, offset > size of mbuf chain")); 627 if (off < m->m_len) 628 break; 629 off -= m->m_len; 630 m = m->m_next; 631 } 632 np = ⊤ 633 top = 0; 634 while (len > 0) { 635 if (m == NULL) { 636 KASSERT(len == M_COPYALL, 637 ("m_copym, length > size of mbuf chain")); 638 break; 639 } 640 MGET(n, wait, m->m_type); 641 *np = n; 642 if (n == NULL) 643 goto nospace; 644 if (copyhdr) { 645 M_COPY_PKTHDR(n, m); 646 if (len == M_COPYALL) 647 n->m_pkthdr.len -= off0; 648 else 649 n->m_pkthdr.len = len; 650 copyhdr = 0; 651 } 652 n->m_len = min(len, m->m_len - off); 653 if (m->m_flags & M_EXT) { 654 n->m_data = m->m_data + off; 655 n->m_ext = m->m_ext; 656 n->m_flags |= M_EXT; 657 MEXT_ADD_REF(m); 658 } else 659 bcopy(mtod(m, caddr_t)+off, mtod(n, caddr_t), 660 (unsigned)n->m_len); 661 if (len != M_COPYALL) 662 len -= n->m_len; 663 off = 0; 664 m = m->m_next; 665 np = &n->m_next; 666 } 667 if (top == NULL) { 668 mtx_lock(&mbuf_mtx); 669 mbstat.m_mcfail++; 670 mtx_unlock(&mbuf_mtx); 671 } 672 return (top); 673nospace: 674 m_freem(top); 675 mtx_lock(&mbuf_mtx); 676 mbstat.m_mcfail++; 677 mtx_unlock(&mbuf_mtx); 678 return (NULL); 679} 680 681/* 682 * Copy an entire packet, including header (which must be present). 683 * An optimization of the common case `m_copym(m, 0, M_COPYALL, how)'. 684 * Note that the copy is read-only, because clusters are not copied, 685 * only their reference counts are incremented. 686 * Preserve alignment of the first mbuf so if the creator has left 687 * some room at the beginning (e.g. for inserting protocol headers) 688 * the copies still have the room available. 689 */ 690struct mbuf * 691m_copypacket(struct mbuf *m, int how) 692{ 693 struct mbuf *top, *n, *o; 694 695 MGET(n, how, m->m_type); 696 top = n; 697 if (n == NULL) 698 goto nospace; 699 700 M_COPY_PKTHDR(n, m); 701 n->m_len = m->m_len; 702 if (m->m_flags & M_EXT) { 703 n->m_data = m->m_data; 704 n->m_ext = m->m_ext; 705 n->m_flags |= M_EXT; 706 MEXT_ADD_REF(m); 707 } else { 708 n->m_data = n->m_pktdat + (m->m_data - m->m_pktdat ); 709 bcopy(mtod(m, char *), mtod(n, char *), n->m_len); 710 } 711 712 m = m->m_next; 713 while (m) { 714 MGET(o, how, m->m_type); 715 if (o == NULL) 716 goto nospace; 717 718 n->m_next = o; 719 n = n->m_next; 720 721 n->m_len = m->m_len; 722 if (m->m_flags & M_EXT) { 723 n->m_data = m->m_data; 724 n->m_ext = m->m_ext; 725 n->m_flags |= M_EXT; 726 MEXT_ADD_REF(m); 727 } else { 728 bcopy(mtod(m, char *), mtod(n, char *), n->m_len); 729 } 730 731 m = m->m_next; 732 } 733 return top; 734nospace: 735 m_freem(top); 736 mtx_lock(&mbuf_mtx); 737 mbstat.m_mcfail++; 738 mtx_unlock(&mbuf_mtx); 739 return (NULL); 740} 741 742/* 743 * Copy data from an mbuf chain starting "off" bytes from the beginning, 744 * continuing for "len" bytes, into the indicated buffer. 745 */ 746void 747m_copydata(struct mbuf *m, int off, int len, caddr_t cp) 748{ 749 unsigned count; 750 751 KASSERT(off >= 0, ("m_copydata, negative off %d", off)); 752 KASSERT(len >= 0, ("m_copydata, negative len %d", len)); 753 while (off > 0) { 754 KASSERT(m != NULL, ("m_copydata, offset > size of mbuf chain")); 755 if (off < m->m_len) 756 break; 757 off -= m->m_len; 758 m = m->m_next; 759 } 760 while (len > 0) { 761 KASSERT(m != NULL, ("m_copydata, length > size of mbuf chain")); 762 count = min(m->m_len - off, len); 763 bcopy(mtod(m, caddr_t) + off, cp, count); 764 len -= count; 765 cp += count; 766 off = 0; 767 m = m->m_next; 768 } 769} 770 771/* 772 * Copy a packet header mbuf chain into a completely new chain, including 773 * copying any mbuf clusters. Use this instead of m_copypacket() when 774 * you need a writable copy of an mbuf chain. 775 */ 776struct mbuf * 777m_dup(struct mbuf *m, int how) 778{ 779 struct mbuf **p, *top = NULL; 780 int remain, moff, nsize; 781 782 /* Sanity check */ 783 if (m == NULL) 784 return (NULL); 785 KASSERT((m->m_flags & M_PKTHDR) != 0, ("%s: !PKTHDR", __FUNCTION__)); 786 787 /* While there's more data, get a new mbuf, tack it on, and fill it */ 788 remain = m->m_pkthdr.len; 789 moff = 0; 790 p = ⊤ 791 while (remain > 0 || top == NULL) { /* allow m->m_pkthdr.len == 0 */ 792 struct mbuf *n; 793 794 /* Get the next new mbuf */ 795 MGET(n, how, m->m_type); 796 if (n == NULL) 797 goto nospace; 798 if (top == NULL) { /* first one, must be PKTHDR */ 799 M_COPY_PKTHDR(n, m); 800 nsize = MHLEN; 801 } else /* not the first one */ 802 nsize = MLEN; 803 if (remain >= MINCLSIZE) { 804 MCLGET(n, how); 805 if ((n->m_flags & M_EXT) == 0) { 806 (void)m_free(n); 807 goto nospace; 808 } 809 nsize = MCLBYTES; 810 } 811 n->m_len = 0; 812 813 /* Link it into the new chain */ 814 *p = n; 815 p = &n->m_next; 816 817 /* Copy data from original mbuf(s) into new mbuf */ 818 while (n->m_len < nsize && m != NULL) { 819 int chunk = min(nsize - n->m_len, m->m_len - moff); 820 821 bcopy(m->m_data + moff, n->m_data + n->m_len, chunk); 822 moff += chunk; 823 n->m_len += chunk; 824 remain -= chunk; 825 if (moff == m->m_len) { 826 m = m->m_next; 827 moff = 0; 828 } 829 } 830 831 /* Check correct total mbuf length */ 832 KASSERT((remain > 0 && m != NULL) || (remain == 0 && m == NULL), 833 ("%s: bogus m_pkthdr.len", __FUNCTION__)); 834 } 835 return (top); 836 837nospace: 838 m_freem(top); 839 mtx_lock(&mbuf_mtx); 840 mbstat.m_mcfail++; 841 mtx_unlock(&mbuf_mtx); 842 return (NULL); 843} 844 845/* 846 * Concatenate mbuf chain n to m. 847 * Both chains must be of the same type (e.g. MT_DATA). 848 * Any m_pkthdr is not updated. 849 */ 850void 851m_cat(struct mbuf *m, struct mbuf *n) 852{ 853 while (m->m_next) 854 m = m->m_next; 855 while (n) { 856 if (m->m_flags & M_EXT || 857 m->m_data + m->m_len + n->m_len >= &m->m_dat[MLEN]) { 858 /* just join the two chains */ 859 m->m_next = n; 860 return; 861 } 862 /* splat the data from one into the other */ 863 bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len, 864 (u_int)n->m_len); 865 m->m_len += n->m_len; 866 n = m_free(n); 867 } 868} 869 870void 871m_adj(struct mbuf *mp, int req_len) 872{ 873 int len = req_len; 874 struct mbuf *m; 875 int count; 876 877 if ((m = mp) == NULL) 878 return; 879 if (len >= 0) { 880 /* 881 * Trim from head. 882 */ 883 while (m != NULL && len > 0) { 884 if (m->m_len <= len) { 885 len -= m->m_len; 886 m->m_len = 0; 887 m = m->m_next; 888 } else { 889 m->m_len -= len; 890 m->m_data += len; 891 len = 0; 892 } 893 } 894 m = mp; 895 if (mp->m_flags & M_PKTHDR) 896 m->m_pkthdr.len -= (req_len - len); 897 } else { 898 /* 899 * Trim from tail. Scan the mbuf chain, 900 * calculating its length and finding the last mbuf. 901 * If the adjustment only affects this mbuf, then just 902 * adjust and return. Otherwise, rescan and truncate 903 * after the remaining size. 904 */ 905 len = -len; 906 count = 0; 907 for (;;) { 908 count += m->m_len; 909 if (m->m_next == (struct mbuf *)0) 910 break; 911 m = m->m_next; 912 } 913 if (m->m_len >= len) { 914 m->m_len -= len; 915 if (mp->m_flags & M_PKTHDR) 916 mp->m_pkthdr.len -= len; 917 return; 918 } 919 count -= len; 920 if (count < 0) 921 count = 0; 922 /* 923 * Correct length for chain is "count". 924 * Find the mbuf with last data, adjust its length, 925 * and toss data from remaining mbufs on chain. 926 */ 927 m = mp; 928 if (m->m_flags & M_PKTHDR) 929 m->m_pkthdr.len = count; 930 for (; m; m = m->m_next) { 931 if (m->m_len >= count) { 932 m->m_len = count; 933 break; 934 } 935 count -= m->m_len; 936 } 937 while (m->m_next) 938 (m = m->m_next) ->m_len = 0; 939 } 940} 941 942/* 943 * Rearange an mbuf chain so that len bytes are contiguous 944 * and in the data area of an mbuf (so that mtod and dtom 945 * will work for a structure of size len). Returns the resulting 946 * mbuf chain on success, frees it and returns null on failure. 947 * If there is room, it will add up to max_protohdr-len extra bytes to the 948 * contiguous region in an attempt to avoid being called next time. 949 */ 950struct mbuf * 951m_pullup(struct mbuf *n, int len) 952{ 953 struct mbuf *m; 954 int count; 955 int space; 956 957 /* 958 * If first mbuf has no cluster, and has room for len bytes 959 * without shifting current data, pullup into it, 960 * otherwise allocate a new mbuf to prepend to the chain. 961 */ 962 if ((n->m_flags & M_EXT) == 0 && 963 n->m_data + len < &n->m_dat[MLEN] && n->m_next) { 964 if (n->m_len >= len) 965 return (n); 966 m = n; 967 n = n->m_next; 968 len -= m->m_len; 969 } else { 970 if (len > MHLEN) 971 goto bad; 972 MGET(m, M_DONTWAIT, n->m_type); 973 if (m == NULL) 974 goto bad; 975 m->m_len = 0; 976 if (n->m_flags & M_PKTHDR) { 977 M_COPY_PKTHDR(m, n); 978 n->m_flags &= ~M_PKTHDR; 979 } 980 } 981 space = &m->m_dat[MLEN] - (m->m_data + m->m_len); 982 do { 983 count = min(min(max(len, max_protohdr), space), n->m_len); 984 bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len, 985 (unsigned)count); 986 len -= count; 987 m->m_len += count; 988 n->m_len -= count; 989 space -= count; 990 if (n->m_len) 991 n->m_data += count; 992 else 993 n = m_free(n); 994 } while (len > 0 && n); 995 if (len > 0) { 996 (void) m_free(m); 997 goto bad; 998 } 999 m->m_next = n; 1000 return (m); 1001bad: 1002 m_freem(n); 1003 mtx_lock(&mbuf_mtx); 1004 mbstat.m_mcfail++; 1005 mtx_unlock(&mbuf_mtx); 1006 return (NULL); 1007} 1008 1009/* 1010 * Partition an mbuf chain in two pieces, returning the tail -- 1011 * all but the first len0 bytes. In case of failure, it returns NULL and 1012 * attempts to restore the chain to its original state. 1013 */ 1014struct mbuf * 1015m_split(struct mbuf *m0, int len0, int wait) 1016{ 1017 struct mbuf *m, *n; 1018 unsigned len = len0, remain; 1019 1020 for (m = m0; m && len > m->m_len; m = m->m_next) 1021 len -= m->m_len; 1022 if (m == NULL) 1023 return (NULL); 1024 remain = m->m_len - len; 1025 if (m0->m_flags & M_PKTHDR) { 1026 MGETHDR(n, wait, m0->m_type); 1027 if (n == NULL) 1028 return (NULL); 1029 n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif; 1030 n->m_pkthdr.len = m0->m_pkthdr.len - len0; 1031 m0->m_pkthdr.len = len0; 1032 if (m->m_flags & M_EXT) 1033 goto extpacket; 1034 if (remain > MHLEN) { 1035 /* m can't be the lead packet */ 1036 MH_ALIGN(n, 0); 1037 n->m_next = m_split(m, len, wait); 1038 if (n->m_next == NULL) { 1039 (void) m_free(n); 1040 return (NULL); 1041 } else 1042 return (n); 1043 } else 1044 MH_ALIGN(n, remain); 1045 } else if (remain == 0) { 1046 n = m->m_next; 1047 m->m_next = NULL; 1048 return (n); 1049 } else { 1050 MGET(n, wait, m->m_type); 1051 if (n == NULL) 1052 return (NULL); 1053 M_ALIGN(n, remain); 1054 } 1055extpacket: 1056 if (m->m_flags & M_EXT) { 1057 n->m_flags |= M_EXT; 1058 n->m_ext = m->m_ext; 1059 MEXT_ADD_REF(m); 1060 m->m_ext.ext_size = 0; /* For Accounting XXXXXX danger */ 1061 n->m_data = m->m_data + len; 1062 } else { 1063 bcopy(mtod(m, caddr_t) + len, mtod(n, caddr_t), remain); 1064 } 1065 n->m_len = remain; 1066 m->m_len = len; 1067 n->m_next = m->m_next; 1068 m->m_next = NULL; 1069 return (n); 1070} 1071/* 1072 * Routine to copy from device local memory into mbufs. 1073 */ 1074struct mbuf * 1075m_devget(char *buf, int totlen, int off0, struct ifnet *ifp, 1076 void (*copy)(char *from, caddr_t to, u_int len)) 1077{ 1078 struct mbuf *m; 1079 struct mbuf *top = 0, **mp = ⊤ 1080 int off = off0, len; 1081 char *cp; 1082 char *epkt; 1083 1084 cp = buf; 1085 epkt = cp + totlen; 1086 if (off) { 1087 cp += off + 2 * sizeof(u_short); 1088 totlen -= 2 * sizeof(u_short); 1089 } 1090 MGETHDR(m, M_DONTWAIT, MT_DATA); 1091 if (m == NULL) 1092 return (NULL); 1093 m->m_pkthdr.rcvif = ifp; 1094 m->m_pkthdr.len = totlen; 1095 m->m_len = MHLEN; 1096 1097 while (totlen > 0) { 1098 if (top) { 1099 MGET(m, M_DONTWAIT, MT_DATA); 1100 if (m == NULL) { 1101 m_freem(top); 1102 return (NULL); 1103 } 1104 m->m_len = MLEN; 1105 } 1106 len = min(totlen, epkt - cp); 1107 if (len >= MINCLSIZE) { 1108 MCLGET(m, M_DONTWAIT); 1109 if (m->m_flags & M_EXT) 1110 m->m_len = len = min(len, MCLBYTES); 1111 else 1112 len = m->m_len; 1113 } else { 1114 /* 1115 * Place initial small packet/header at end of mbuf. 1116 */ 1117 if (len < m->m_len) { 1118 if (top == NULL && len + 1119 max_linkhdr <= m->m_len) 1120 m->m_data += max_linkhdr; 1121 m->m_len = len; 1122 } else 1123 len = m->m_len; 1124 } 1125 if (copy) 1126 copy(cp, mtod(m, caddr_t), (unsigned)len); 1127 else 1128 bcopy(cp, mtod(m, caddr_t), (unsigned)len); 1129 cp += len; 1130 *mp = m; 1131 mp = &m->m_next; 1132 totlen -= len; 1133 if (cp == epkt) 1134 cp = buf; 1135 } 1136 return (top); 1137} 1138 1139/* 1140 * Copy data from a buffer back into the indicated mbuf chain, 1141 * starting "off" bytes from the beginning, extending the mbuf 1142 * chain if necessary. 1143 */ 1144void 1145m_copyback(struct mbuf *m0, int off, int len, caddr_t cp) 1146{ 1147 int mlen; 1148 struct mbuf *m = m0, *n; 1149 int totlen = 0; 1150 1151 if (m0 == NULL) 1152 return; 1153 while (off > (mlen = m->m_len)) { 1154 off -= mlen; 1155 totlen += mlen; 1156 if (m->m_next == NULL) { 1157 n = m_getclr(M_DONTWAIT, m->m_type); 1158 if (n == NULL) 1159 goto out; 1160 n->m_len = min(MLEN, len + off); 1161 m->m_next = n; 1162 } 1163 m = m->m_next; 1164 } 1165 while (len > 0) { 1166 mlen = min (m->m_len - off, len); 1167 bcopy(cp, off + mtod(m, caddr_t), (unsigned)mlen); 1168 cp += mlen; 1169 len -= mlen; 1170 mlen += off; 1171 off = 0; 1172 totlen += mlen; 1173 if (len == 0) 1174 break; 1175 if (m->m_next == NULL) { 1176 n = m_get(M_DONTWAIT, m->m_type); 1177 if (n == NULL) 1178 break; 1179 n->m_len = min(MLEN, len); 1180 m->m_next = n; 1181 } 1182 m = m->m_next; 1183 } 1184out: if (((m = m0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen)) 1185 m->m_pkthdr.len = totlen; 1186} 1187 1188void 1189m_print(const struct mbuf *m) 1190{ 1191 int len; 1192 const struct mbuf *m2; 1193 1194 len = m->m_pkthdr.len; 1195 m2 = m; 1196 while (len) { 1197 printf("%p %*D\n", m2, m2->m_len, (u_char *)m2->m_data, "-"); 1198 len -= m2->m_len; 1199 m2 = m2->m_next; 1200 } 1201 return; 1202} 1203