uipc_mbuf.c revision 78064
1/* 2 * Copyright (c) 1982, 1986, 1988, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * @(#)uipc_mbuf.c 8.2 (Berkeley) 1/4/94 34 * $FreeBSD: head/sys/kern/uipc_mbuf.c 78064 2001-06-11 12:39:29Z ume $ 35 */ 36 37#include "opt_param.h" 38#include <sys/param.h> 39#include <sys/systm.h> 40#include <sys/condvar.h> 41#include <sys/kernel.h> 42#include <sys/lock.h> 43#include <sys/malloc.h> 44#include <sys/mbuf.h> 45#include <sys/mutex.h> 46#include <sys/sysctl.h> 47#include <sys/domain.h> 48#include <sys/protosw.h> 49 50#include <vm/vm.h> 51#include <vm/vm_kern.h> 52#include <vm/vm_extern.h> 53 54#ifndef NMBCLUSTERS 55#define NMBCLUSTERS (512 + MAXUSERS * 16) 56#endif 57 58static void mbinit(void *); 59SYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbinit, NULL) 60 61struct mbuf *mbutl; 62struct mbstat mbstat; 63u_long mbtypes[MT_NTYPES]; 64int max_linkhdr; 65int max_protohdr; 66int max_hdr; 67int max_datalen; 68int nmbclusters = NMBCLUSTERS; 69int nmbufs = NMBCLUSTERS * 4; 70int nmbcnt; 71u_long m_mballoc_wid = 0; 72u_long m_clalloc_wid = 0; 73 74/* 75 * freelist header structures... 76 * mbffree_lst, mclfree_lst, mcntfree_lst 77 */ 78struct mbffree_lst mmbfree; 79struct mclfree_lst mclfree; 80struct mcntfree_lst mcntfree; 81struct mtx mbuf_mtx; 82 83/* 84 * sysctl(8) exported objects 85 */ 86SYSCTL_DECL(_kern_ipc); 87SYSCTL_INT(_kern_ipc, KIPC_MAX_LINKHDR, max_linkhdr, CTLFLAG_RW, 88 &max_linkhdr, 0, ""); 89SYSCTL_INT(_kern_ipc, KIPC_MAX_PROTOHDR, max_protohdr, CTLFLAG_RW, 90 &max_protohdr, 0, ""); 91SYSCTL_INT(_kern_ipc, KIPC_MAX_HDR, max_hdr, CTLFLAG_RW, &max_hdr, 0, ""); 92SYSCTL_INT(_kern_ipc, KIPC_MAX_DATALEN, max_datalen, CTLFLAG_RW, 93 &max_datalen, 0, ""); 94SYSCTL_INT(_kern_ipc, OID_AUTO, mbuf_wait, CTLFLAG_RW, 95 &mbuf_wait, 0, ""); 96SYSCTL_STRUCT(_kern_ipc, KIPC_MBSTAT, mbstat, CTLFLAG_RD, &mbstat, mbstat, ""); 97SYSCTL_OPAQUE(_kern_ipc, OID_AUTO, mbtypes, CTLFLAG_RD, mbtypes, 98 sizeof(mbtypes), "LU", ""); 99SYSCTL_INT(_kern_ipc, KIPC_NMBCLUSTERS, nmbclusters, CTLFLAG_RD, 100 &nmbclusters, 0, "Maximum number of mbuf clusters available"); 101SYSCTL_INT(_kern_ipc, OID_AUTO, nmbufs, CTLFLAG_RD, &nmbufs, 0, 102 "Maximum number of mbufs available"); 103SYSCTL_INT(_kern_ipc, OID_AUTO, nmbcnt, CTLFLAG_RD, &nmbcnt, 0, 104 "Maximum number of ext_buf counters available"); 105 106TUNABLE_INT("kern.ipc.nmbclusters", &nmbclusters); 107TUNABLE_INT("kern.ipc.nmbufs", &nmbufs); 108TUNABLE_INT("kern.ipc.nmbcnt", &nmbcnt); 109 110static void m_reclaim(void); 111 112/* Initial allocation numbers */ 113#define NCL_INIT 2 114#define NMB_INIT 16 115#define REF_INIT NMBCLUSTERS 116 117/* 118 * Full mbuf subsystem initialization done here. 119 * 120 * XXX: If ever we have system specific map setups to do, then move them to 121 * machdep.c - for now, there is no reason for this stuff to go there. 122 */ 123static void 124mbinit(void *dummy) 125{ 126 vm_offset_t maxaddr; 127 vm_size_t mb_map_size; 128 129 /* Sanity checks and pre-initialization for non-constants */ 130 if (nmbufs < nmbclusters * 2) 131 nmbufs = nmbclusters * 2; 132 if (nmbcnt == 0) 133 nmbcnt = EXT_COUNTERS; 134 135 /* 136 * Setup the mb_map, allocate requested VM space. 137 */ 138 mb_map_size = (vm_size_t)(nmbufs * MSIZE + nmbclusters * MCLBYTES + 139 nmbcnt * sizeof(union mext_refcnt)); 140 mb_map_size = rounddown(mb_map_size, PAGE_SIZE); 141 mb_map = kmem_suballoc(kmem_map, (vm_offset_t *)&mbutl, &maxaddr, 142 mb_map_size); 143 /* XXX XXX XXX: mb_map->system_map = 1; */ 144 145 /* 146 * Initialize the free list headers, and setup locks for lists. 147 */ 148 mmbfree.m_head = NULL; 149 mclfree.m_head = NULL; 150 mcntfree.m_head = NULL; 151 mtx_init(&mbuf_mtx, "mbuf free list lock", MTX_DEF); 152 cv_init(&mmbfree.m_starved, "mbuf free list starved cv"); 153 cv_init(&mclfree.m_starved, "mbuf cluster free list starved cv"); 154 155 /* 156 * Initialize mbuf subsystem (sysctl exported) statistics structure. 157 */ 158 mbstat.m_msize = MSIZE; 159 mbstat.m_mclbytes = MCLBYTES; 160 mbstat.m_minclsize = MINCLSIZE; 161 mbstat.m_mlen = MLEN; 162 mbstat.m_mhlen = MHLEN; 163 164 /* 165 * Perform some initial allocations. 166 */ 167 mtx_lock(&mbuf_mtx); 168 if (m_alloc_ref(REF_INIT, M_DONTWAIT) == 0) 169 goto bad; 170 if (m_mballoc(NMB_INIT, M_DONTWAIT) == 0) 171 goto bad; 172 if (m_clalloc(NCL_INIT, M_DONTWAIT) == 0) 173 goto bad; 174 mtx_unlock(&mbuf_mtx); 175 176 return; 177bad: 178 panic("mbinit: failed to initialize mbuf subsystem!"); 179} 180 181/* 182 * Allocate at least nmb reference count structs and place them 183 * on the ref cnt free list. 184 * 185 * Must be called with the mcntfree lock held. 186 */ 187int 188m_alloc_ref(u_int nmb, int how) 189{ 190 caddr_t p; 191 u_int nbytes; 192 int i; 193 194 /* 195 * We don't cap the amount of memory that can be used 196 * by the reference counters, like we do for mbufs and 197 * mbuf clusters. In fact, we're absolutely sure that we 198 * won't ever be going over our allocated space. We keep enough 199 * space in mb_map to accomodate maximum values of allocatable 200 * external buffers including, but not limited to, clusters. 201 * (That's also why we won't have to have wait routines for 202 * counters). 203 * 204 * If we're in here, we're absolutely certain to be returning 205 * succesfully, as long as there is physical memory to accomodate 206 * us. And if there isn't, but we're willing to wait, then 207 * kmem_malloc() will do the only waiting needed. 208 */ 209 210 nbytes = round_page(nmb * sizeof(union mext_refcnt)); 211 if (1 /* XXX: how == M_TRYWAIT */) 212 mtx_unlock(&mbuf_mtx); 213 if ((p = (caddr_t)kmem_malloc(mb_map, nbytes, how == M_TRYWAIT ? 214 M_WAITOK : M_NOWAIT)) == NULL) { 215 if (1 /* XXX: how == M_TRYWAIT */) 216 mtx_lock(&mbuf_mtx); 217 return (0); 218 } 219 nmb = nbytes / sizeof(union mext_refcnt); 220 221 /* 222 * We don't let go of the mutex in order to avoid a race. 223 * It is up to the caller to let go of the mutex. 224 */ 225 if (1 /* XXX: how == M_TRYWAIT */) 226 mtx_lock(&mbuf_mtx); 227 for (i = 0; i < nmb; i++) { 228 ((union mext_refcnt *)p)->next_ref = mcntfree.m_head; 229 mcntfree.m_head = (union mext_refcnt *)p; 230 p += sizeof(union mext_refcnt); 231 mbstat.m_refree++; 232 } 233 mbstat.m_refcnt += nmb; 234 235 return (1); 236} 237 238/* 239 * Allocate at least nmb mbufs and place on mbuf free list. 240 * 241 * Must be called with the mmbfree lock held. 242 */ 243int 244m_mballoc(int nmb, int how) 245{ 246 caddr_t p; 247 int i; 248 int nbytes; 249 250 nbytes = round_page(nmb * MSIZE); 251 nmb = nbytes / MSIZE; 252 253 /* 254 * If we've hit the mbuf limit, stop allocating from mb_map. 255 * Also, once we run out of map space, it will be impossible to 256 * get any more (nothing is ever freed back to the map). 257 */ 258 if (mb_map_full || ((nmb + mbstat.m_mbufs) > nmbufs)) 259 return (0); 260 261 if (1 /* XXX: how == M_TRYWAIT */) 262 mtx_unlock(&mbuf_mtx); 263 p = (caddr_t)kmem_malloc(mb_map, nbytes, how == M_TRYWAIT ? 264 M_WAITOK : M_NOWAIT); 265 if (1 /* XXX: how == M_TRYWAIT */) { 266 mtx_lock(&mbuf_mtx); 267 if (p == NULL) 268 mbstat.m_wait++; 269 } 270 271 /* 272 * Either the map is now full, or `how' is M_DONTWAIT and there 273 * are no pages left. 274 */ 275 if (p == NULL) 276 return (0); 277 278 /* 279 * We don't let go of the mutex in order to avoid a race. 280 * It is up to the caller to let go of the mutex when done 281 * with grabbing the mbuf from the free list. 282 */ 283 for (i = 0; i < nmb; i++) { 284 ((struct mbuf *)p)->m_next = mmbfree.m_head; 285 mmbfree.m_head = (struct mbuf *)p; 286 p += MSIZE; 287 } 288 mbstat.m_mbufs += nmb; 289 mbtypes[MT_FREE] += nmb; 290 return (1); 291} 292 293/* 294 * Once the mb_map has been exhausted and if the call to the allocation macros 295 * (or, in some cases, functions) is with M_TRYWAIT, then it is necessary to 296 * rely solely on reclaimed mbufs. 297 * 298 * Here we request for the protocols to free up some resources and, if we 299 * still cannot get anything, then we wait for an mbuf to be freed for a 300 * designated (mbuf_wait) time, at most. 301 * 302 * Must be called with the mmbfree mutex held. 303 */ 304struct mbuf * 305m_mballoc_wait(void) 306{ 307 struct mbuf *p = NULL; 308 309 /* 310 * See if we can drain some resources out of the protocols. 311 * We drop the mmbfree mutex to avoid recursing into it in some of 312 * the drain routines. Clearly, we're faced with a race here because 313 * once something is freed during the drain, it may be grabbed right 314 * from under us by some other thread. But we accept this possibility 315 * in order to avoid a potentially large lock recursion and, more 316 * importantly, to avoid a potential lock order reversal which may 317 * result in deadlock (See comment above m_reclaim()). 318 */ 319 mtx_unlock(&mbuf_mtx); 320 m_reclaim(); 321 322 mtx_lock(&mbuf_mtx); 323 _MGET(p, M_DONTWAIT); 324 325 if (p == NULL) { 326 int retval; 327 328 m_mballoc_wid++; 329 retval = cv_timedwait(&mmbfree.m_starved, &mbuf_mtx, 330 mbuf_wait); 331 m_mballoc_wid--; 332 333 /* 334 * If we got signaled (i.e. didn't time out), allocate. 335 */ 336 if (retval == 0) 337 _MGET(p, M_DONTWAIT); 338 } 339 340 if (p != NULL) { 341 mbstat.m_wait++; 342 if (mmbfree.m_head != NULL) 343 MBWAKEUP(m_mballoc_wid, &mmbfree.m_starved); 344 } 345 346 return (p); 347} 348 349/* 350 * Allocate some number of mbuf clusters 351 * and place on cluster free list. 352 * 353 * Must be called with the mclfree lock held. 354 */ 355int 356m_clalloc(int ncl, int how) 357{ 358 caddr_t p; 359 int i; 360 int npg_sz; 361 362 npg_sz = round_page(ncl * MCLBYTES); 363 ncl = npg_sz / MCLBYTES; 364 365 /* 366 * If the map is now full (nothing will ever be freed to it). 367 * If we've hit the mcluster number limit, stop allocating from 368 * mb_map. 369 */ 370 if (mb_map_full || ((ncl + mbstat.m_clusters) > nmbclusters)) 371 return (0); 372 373 if (1 /* XXX: how == M_TRYWAIT */) 374 mtx_unlock(&mbuf_mtx); 375 p = (caddr_t)kmem_malloc(mb_map, npg_sz, 376 how == M_TRYWAIT ? M_WAITOK : M_NOWAIT); 377 if (1 /* XXX: how == M_TRYWAIT */) 378 mtx_lock(&mbuf_mtx); 379 380 /* 381 * Either the map is now full, or `how' is M_DONTWAIT and there 382 * are no pages left. 383 */ 384 if (p == NULL) 385 return (0); 386 387 for (i = 0; i < ncl; i++) { 388 ((union mcluster *)p)->mcl_next = mclfree.m_head; 389 mclfree.m_head = (union mcluster *)p; 390 p += MCLBYTES; 391 mbstat.m_clfree++; 392 } 393 mbstat.m_clusters += ncl; 394 return (1); 395} 396 397/* 398 * Once the mb_map submap has been exhausted and the allocation is called with 399 * M_TRYWAIT, we rely on the mclfree list. If nothing is free, we will 400 * block on a cv for a designated amount of time (mbuf_wait) or until we're 401 * signaled due to sudden mcluster availability. 402 * 403 * Must be called with the mclfree lock held. 404 */ 405caddr_t 406m_clalloc_wait(void) 407{ 408 caddr_t p = NULL; 409 int retval; 410 411 m_clalloc_wid++; 412 retval = cv_timedwait(&mclfree.m_starved, &mbuf_mtx, mbuf_wait); 413 m_clalloc_wid--; 414 415 /* 416 * Now that we (think) that we've got something, try again. 417 */ 418 if (retval == 0) 419 _MCLALLOC(p, M_DONTWAIT); 420 421 if (p != NULL) { 422 mbstat.m_wait++; 423 if (mclfree.m_head != NULL) 424 MBWAKEUP(m_clalloc_wid, &mclfree.m_starved); 425 } 426 427 return (p); 428} 429 430/* 431 * m_reclaim: drain protocols in hopes to free up some resources... 432 * 433 * XXX: No locks should be held going in here. The drain routines have 434 * to presently acquire some locks which raises the possibility of lock 435 * order violation if we're holding any mutex if that mutex is acquired in 436 * reverse order relative to one of the locks in the drain routines. 437 */ 438static void 439m_reclaim(void) 440{ 441 struct domain *dp; 442 struct protosw *pr; 443 444#ifdef WITNESS 445 KASSERT(witness_list(curproc) == 0, 446 ("m_reclaim called with locks held")); 447#endif 448 449 for (dp = domains; dp; dp = dp->dom_next) 450 for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) 451 if (pr->pr_drain) 452 (*pr->pr_drain)(); 453 mbstat.m_drain++; 454} 455 456/* 457 * Space allocation routines. 458 * Some of these are also available as macros 459 * for critical paths. 460 */ 461struct mbuf * 462m_get(int how, int type) 463{ 464 struct mbuf *m; 465 466 MGET(m, how, type); 467 return (m); 468} 469 470struct mbuf * 471m_gethdr(int how, int type) 472{ 473 struct mbuf *m; 474 475 MGETHDR(m, how, type); 476 return (m); 477} 478 479struct mbuf * 480m_getclr(int how, int type) 481{ 482 struct mbuf *m; 483 484 MGET(m, how, type); 485 if (m != NULL) 486 bzero(mtod(m, caddr_t), MLEN); 487 return (m); 488} 489 490struct mbuf * 491m_free(struct mbuf *m) 492{ 493 struct mbuf *n; 494 495 MFREE(m, n); 496 return (n); 497} 498 499/* 500 * struct mbuf * 501 * m_getm(m, len, how, type) 502 * 503 * This will allocate len-worth of mbufs and/or mbuf clusters (whatever fits 504 * best) and return a pointer to the top of the allocated chain. If m is 505 * non-null, then we assume that it is a single mbuf or an mbuf chain to 506 * which we want len bytes worth of mbufs and/or clusters attached, and so 507 * if we succeed in allocating it, we will just return a pointer to m. 508 * 509 * If we happen to fail at any point during the allocation, we will free 510 * up everything we have already allocated and return NULL. 511 * 512 */ 513struct mbuf * 514m_getm(struct mbuf *m, int len, int how, int type) 515{ 516 struct mbuf *top, *tail, *mp, *mtail = NULL; 517 518 KASSERT(len >= 0, ("len is < 0 in m_getm")); 519 520 MGET(mp, how, type); 521 if (mp == NULL) 522 return (NULL); 523 else if (len > MINCLSIZE) { 524 MCLGET(mp, how); 525 if ((mp->m_flags & M_EXT) == 0) { 526 m_free(mp); 527 return (NULL); 528 } 529 } 530 mp->m_len = 0; 531 len -= M_TRAILINGSPACE(mp); 532 533 if (m != NULL) 534 for (mtail = m; mtail->m_next != NULL; mtail = mtail->m_next); 535 else 536 m = mp; 537 538 top = tail = mp; 539 while (len > 0) { 540 MGET(mp, how, type); 541 if (mp == NULL) 542 goto failed; 543 544 tail->m_next = mp; 545 tail = mp; 546 if (len > MINCLSIZE) { 547 MCLGET(mp, how); 548 if ((mp->m_flags & M_EXT) == 0) 549 goto failed; 550 } 551 552 mp->m_len = 0; 553 len -= M_TRAILINGSPACE(mp); 554 } 555 556 if (mtail != NULL) 557 mtail->m_next = top; 558 return (m); 559 560failed: 561 m_freem(top); 562 return (NULL); 563} 564 565void 566m_freem(struct mbuf *m) 567{ 568 struct mbuf *n; 569 570 if (m == NULL) 571 return; 572 do { 573 MFREE(m, n); 574 m = n; 575 } while (m); 576} 577 578/* 579 * Lesser-used path for M_PREPEND: 580 * allocate new mbuf to prepend to chain, 581 * copy junk along. 582 */ 583struct mbuf * 584m_prepend(struct mbuf *m, int len, int how) 585{ 586 struct mbuf *mn; 587 588 MGET(mn, how, m->m_type); 589 if (mn == NULL) { 590 m_freem(m); 591 return (NULL); 592 } 593 if (m->m_flags & M_PKTHDR) { 594 M_COPY_PKTHDR(mn, m); 595 m->m_flags &= ~M_PKTHDR; 596 } 597 mn->m_next = m; 598 m = mn; 599 if (len < MHLEN) 600 MH_ALIGN(m, len); 601 m->m_len = len; 602 return (m); 603} 604 605/* 606 * Make a copy of an mbuf chain starting "off0" bytes from the beginning, 607 * continuing for "len" bytes. If len is M_COPYALL, copy to end of mbuf. 608 * The wait parameter is a choice of M_TRYWAIT/M_DONTWAIT from caller. 609 * Note that the copy is read-only, because clusters are not copied, 610 * only their reference counts are incremented. 611 */ 612struct mbuf * 613m_copym(struct mbuf *m, int off0, int len, int wait) 614{ 615 struct mbuf *n, **np; 616 int off = off0; 617 struct mbuf *top; 618 int copyhdr = 0; 619 620 KASSERT(off >= 0, ("m_copym, negative off %d", off)); 621 KASSERT(len >= 0, ("m_copym, negative len %d", len)); 622 if (off == 0 && m->m_flags & M_PKTHDR) 623 copyhdr = 1; 624 while (off > 0) { 625 KASSERT(m != NULL, ("m_copym, offset > size of mbuf chain")); 626 if (off < m->m_len) 627 break; 628 off -= m->m_len; 629 m = m->m_next; 630 } 631 np = ⊤ 632 top = 0; 633 while (len > 0) { 634 if (m == NULL) { 635 KASSERT(len == M_COPYALL, 636 ("m_copym, length > size of mbuf chain")); 637 break; 638 } 639 MGET(n, wait, m->m_type); 640 *np = n; 641 if (n == NULL) 642 goto nospace; 643 if (copyhdr) { 644 M_COPY_PKTHDR(n, m); 645 if (len == M_COPYALL) 646 n->m_pkthdr.len -= off0; 647 else 648 n->m_pkthdr.len = len; 649 copyhdr = 0; 650 } 651 n->m_len = min(len, m->m_len - off); 652 if (m->m_flags & M_EXT) { 653 n->m_data = m->m_data + off; 654 n->m_ext = m->m_ext; 655 n->m_flags |= M_EXT; 656 MEXT_ADD_REF(m); 657 } else 658 bcopy(mtod(m, caddr_t)+off, mtod(n, caddr_t), 659 (unsigned)n->m_len); 660 if (len != M_COPYALL) 661 len -= n->m_len; 662 off = 0; 663 m = m->m_next; 664 np = &n->m_next; 665 } 666 if (top == NULL) { 667 mtx_lock(&mbuf_mtx); 668 mbstat.m_mcfail++; 669 mtx_unlock(&mbuf_mtx); 670 } 671 return (top); 672nospace: 673 m_freem(top); 674 mtx_lock(&mbuf_mtx); 675 mbstat.m_mcfail++; 676 mtx_unlock(&mbuf_mtx); 677 return (NULL); 678} 679 680/* 681 * Copy an entire packet, including header (which must be present). 682 * An optimization of the common case `m_copym(m, 0, M_COPYALL, how)'. 683 * Note that the copy is read-only, because clusters are not copied, 684 * only their reference counts are incremented. 685 * Preserve alignment of the first mbuf so if the creator has left 686 * some room at the beginning (e.g. for inserting protocol headers) 687 * the copies still have the room available. 688 */ 689struct mbuf * 690m_copypacket(struct mbuf *m, int how) 691{ 692 struct mbuf *top, *n, *o; 693 694 MGET(n, how, m->m_type); 695 top = n; 696 if (n == NULL) 697 goto nospace; 698 699 M_COPY_PKTHDR(n, m); 700 n->m_len = m->m_len; 701 if (m->m_flags & M_EXT) { 702 n->m_data = m->m_data; 703 n->m_ext = m->m_ext; 704 n->m_flags |= M_EXT; 705 MEXT_ADD_REF(m); 706 } else { 707 n->m_data = n->m_pktdat + (m->m_data - m->m_pktdat ); 708 bcopy(mtod(m, char *), mtod(n, char *), n->m_len); 709 } 710 711 m = m->m_next; 712 while (m) { 713 MGET(o, how, m->m_type); 714 if (o == NULL) 715 goto nospace; 716 717 n->m_next = o; 718 n = n->m_next; 719 720 n->m_len = m->m_len; 721 if (m->m_flags & M_EXT) { 722 n->m_data = m->m_data; 723 n->m_ext = m->m_ext; 724 n->m_flags |= M_EXT; 725 MEXT_ADD_REF(m); 726 } else { 727 bcopy(mtod(m, char *), mtod(n, char *), n->m_len); 728 } 729 730 m = m->m_next; 731 } 732 return top; 733nospace: 734 m_freem(top); 735 mtx_lock(&mbuf_mtx); 736 mbstat.m_mcfail++; 737 mtx_unlock(&mbuf_mtx); 738 return (NULL); 739} 740 741/* 742 * Copy data from an mbuf chain starting "off" bytes from the beginning, 743 * continuing for "len" bytes, into the indicated buffer. 744 */ 745void 746m_copydata(struct mbuf *m, int off, int len, caddr_t cp) 747{ 748 unsigned count; 749 750 KASSERT(off >= 0, ("m_copydata, negative off %d", off)); 751 KASSERT(len >= 0, ("m_copydata, negative len %d", len)); 752 while (off > 0) { 753 KASSERT(m != NULL, ("m_copydata, offset > size of mbuf chain")); 754 if (off < m->m_len) 755 break; 756 off -= m->m_len; 757 m = m->m_next; 758 } 759 while (len > 0) { 760 KASSERT(m != NULL, ("m_copydata, length > size of mbuf chain")); 761 count = min(m->m_len - off, len); 762 bcopy(mtod(m, caddr_t) + off, cp, count); 763 len -= count; 764 cp += count; 765 off = 0; 766 m = m->m_next; 767 } 768} 769 770/* 771 * Copy a packet header mbuf chain into a completely new chain, including 772 * copying any mbuf clusters. Use this instead of m_copypacket() when 773 * you need a writable copy of an mbuf chain. 774 */ 775struct mbuf * 776m_dup(struct mbuf *m, int how) 777{ 778 struct mbuf **p, *top = NULL; 779 int remain, moff, nsize; 780 781 /* Sanity check */ 782 if (m == NULL) 783 return (NULL); 784 KASSERT((m->m_flags & M_PKTHDR) != 0, ("%s: !PKTHDR", __FUNCTION__)); 785 786 /* While there's more data, get a new mbuf, tack it on, and fill it */ 787 remain = m->m_pkthdr.len; 788 moff = 0; 789 p = ⊤ 790 while (remain > 0 || top == NULL) { /* allow m->m_pkthdr.len == 0 */ 791 struct mbuf *n; 792 793 /* Get the next new mbuf */ 794 MGET(n, how, m->m_type); 795 if (n == NULL) 796 goto nospace; 797 if (top == NULL) { /* first one, must be PKTHDR */ 798 M_COPY_PKTHDR(n, m); 799 nsize = MHLEN; 800 } else /* not the first one */ 801 nsize = MLEN; 802 if (remain >= MINCLSIZE) { 803 MCLGET(n, how); 804 if ((n->m_flags & M_EXT) == 0) { 805 (void)m_free(n); 806 goto nospace; 807 } 808 nsize = MCLBYTES; 809 } 810 n->m_len = 0; 811 812 /* Link it into the new chain */ 813 *p = n; 814 p = &n->m_next; 815 816 /* Copy data from original mbuf(s) into new mbuf */ 817 while (n->m_len < nsize && m != NULL) { 818 int chunk = min(nsize - n->m_len, m->m_len - moff); 819 820 bcopy(m->m_data + moff, n->m_data + n->m_len, chunk); 821 moff += chunk; 822 n->m_len += chunk; 823 remain -= chunk; 824 if (moff == m->m_len) { 825 m = m->m_next; 826 moff = 0; 827 } 828 } 829 830 /* Check correct total mbuf length */ 831 KASSERT((remain > 0 && m != NULL) || (remain == 0 && m == NULL), 832 ("%s: bogus m_pkthdr.len", __FUNCTION__)); 833 } 834 return (top); 835 836nospace: 837 m_freem(top); 838 mtx_lock(&mbuf_mtx); 839 mbstat.m_mcfail++; 840 mtx_unlock(&mbuf_mtx); 841 return (NULL); 842} 843 844/* 845 * Concatenate mbuf chain n to m. 846 * Both chains must be of the same type (e.g. MT_DATA). 847 * Any m_pkthdr is not updated. 848 */ 849void 850m_cat(struct mbuf *m, struct mbuf *n) 851{ 852 while (m->m_next) 853 m = m->m_next; 854 while (n) { 855 if (m->m_flags & M_EXT || 856 m->m_data + m->m_len + n->m_len >= &m->m_dat[MLEN]) { 857 /* just join the two chains */ 858 m->m_next = n; 859 return; 860 } 861 /* splat the data from one into the other */ 862 bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len, 863 (u_int)n->m_len); 864 m->m_len += n->m_len; 865 n = m_free(n); 866 } 867} 868 869void 870m_adj(struct mbuf *mp, int req_len) 871{ 872 int len = req_len; 873 struct mbuf *m; 874 int count; 875 876 if ((m = mp) == NULL) 877 return; 878 if (len >= 0) { 879 /* 880 * Trim from head. 881 */ 882 while (m != NULL && len > 0) { 883 if (m->m_len <= len) { 884 len -= m->m_len; 885 m->m_len = 0; 886 m = m->m_next; 887 } else { 888 m->m_len -= len; 889 m->m_data += len; 890 len = 0; 891 } 892 } 893 m = mp; 894 if (mp->m_flags & M_PKTHDR) 895 m->m_pkthdr.len -= (req_len - len); 896 } else { 897 /* 898 * Trim from tail. Scan the mbuf chain, 899 * calculating its length and finding the last mbuf. 900 * If the adjustment only affects this mbuf, then just 901 * adjust and return. Otherwise, rescan and truncate 902 * after the remaining size. 903 */ 904 len = -len; 905 count = 0; 906 for (;;) { 907 count += m->m_len; 908 if (m->m_next == (struct mbuf *)0) 909 break; 910 m = m->m_next; 911 } 912 if (m->m_len >= len) { 913 m->m_len -= len; 914 if (mp->m_flags & M_PKTHDR) 915 mp->m_pkthdr.len -= len; 916 return; 917 } 918 count -= len; 919 if (count < 0) 920 count = 0; 921 /* 922 * Correct length for chain is "count". 923 * Find the mbuf with last data, adjust its length, 924 * and toss data from remaining mbufs on chain. 925 */ 926 m = mp; 927 if (m->m_flags & M_PKTHDR) 928 m->m_pkthdr.len = count; 929 for (; m; m = m->m_next) { 930 if (m->m_len >= count) { 931 m->m_len = count; 932 break; 933 } 934 count -= m->m_len; 935 } 936 while (m->m_next) 937 (m = m->m_next) ->m_len = 0; 938 } 939} 940 941/* 942 * Rearange an mbuf chain so that len bytes are contiguous 943 * and in the data area of an mbuf (so that mtod and dtom 944 * will work for a structure of size len). Returns the resulting 945 * mbuf chain on success, frees it and returns null on failure. 946 * If there is room, it will add up to max_protohdr-len extra bytes to the 947 * contiguous region in an attempt to avoid being called next time. 948 */ 949struct mbuf * 950m_pullup(struct mbuf *n, int len) 951{ 952 struct mbuf *m; 953 int count; 954 int space; 955 956 /* 957 * If first mbuf has no cluster, and has room for len bytes 958 * without shifting current data, pullup into it, 959 * otherwise allocate a new mbuf to prepend to the chain. 960 */ 961 if ((n->m_flags & M_EXT) == 0 && 962 n->m_data + len < &n->m_dat[MLEN] && n->m_next) { 963 if (n->m_len >= len) 964 return (n); 965 m = n; 966 n = n->m_next; 967 len -= m->m_len; 968 } else { 969 if (len > MHLEN) 970 goto bad; 971 MGET(m, M_DONTWAIT, n->m_type); 972 if (m == NULL) 973 goto bad; 974 m->m_len = 0; 975 if (n->m_flags & M_PKTHDR) { 976 M_COPY_PKTHDR(m, n); 977 n->m_flags &= ~M_PKTHDR; 978 } 979 } 980 space = &m->m_dat[MLEN] - (m->m_data + m->m_len); 981 do { 982 count = min(min(max(len, max_protohdr), space), n->m_len); 983 bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len, 984 (unsigned)count); 985 len -= count; 986 m->m_len += count; 987 n->m_len -= count; 988 space -= count; 989 if (n->m_len) 990 n->m_data += count; 991 else 992 n = m_free(n); 993 } while (len > 0 && n); 994 if (len > 0) { 995 (void) m_free(m); 996 goto bad; 997 } 998 m->m_next = n; 999 return (m); 1000bad: 1001 m_freem(n); 1002 mtx_lock(&mbuf_mtx); 1003 mbstat.m_mpfail++; 1004 mtx_unlock(&mbuf_mtx); 1005 return (NULL); 1006} 1007 1008/* 1009 * Partition an mbuf chain in two pieces, returning the tail -- 1010 * all but the first len0 bytes. In case of failure, it returns NULL and 1011 * attempts to restore the chain to its original state. 1012 */ 1013struct mbuf * 1014m_split(struct mbuf *m0, int len0, int wait) 1015{ 1016 struct mbuf *m, *n; 1017 unsigned len = len0, remain; 1018 1019 for (m = m0; m && len > m->m_len; m = m->m_next) 1020 len -= m->m_len; 1021 if (m == NULL) 1022 return (NULL); 1023 remain = m->m_len - len; 1024 if (m0->m_flags & M_PKTHDR) { 1025 MGETHDR(n, wait, m0->m_type); 1026 if (n == NULL) 1027 return (NULL); 1028 n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif; 1029 n->m_pkthdr.len = m0->m_pkthdr.len - len0; 1030 m0->m_pkthdr.len = len0; 1031 if (m->m_flags & M_EXT) 1032 goto extpacket; 1033 if (remain > MHLEN) { 1034 /* m can't be the lead packet */ 1035 MH_ALIGN(n, 0); 1036 n->m_next = m_split(m, len, wait); 1037 if (n->m_next == NULL) { 1038 (void) m_free(n); 1039 return (NULL); 1040 } else 1041 return (n); 1042 } else 1043 MH_ALIGN(n, remain); 1044 } else if (remain == 0) { 1045 n = m->m_next; 1046 m->m_next = NULL; 1047 return (n); 1048 } else { 1049 MGET(n, wait, m->m_type); 1050 if (n == NULL) 1051 return (NULL); 1052 M_ALIGN(n, remain); 1053 } 1054extpacket: 1055 if (m->m_flags & M_EXT) { 1056 n->m_flags |= M_EXT; 1057 n->m_ext = m->m_ext; 1058 MEXT_ADD_REF(m); 1059 m->m_ext.ext_size = 0; /* For Accounting XXXXXX danger */ 1060 n->m_data = m->m_data + len; 1061 } else { 1062 bcopy(mtod(m, caddr_t) + len, mtod(n, caddr_t), remain); 1063 } 1064 n->m_len = remain; 1065 m->m_len = len; 1066 n->m_next = m->m_next; 1067 m->m_next = NULL; 1068 return (n); 1069} 1070/* 1071 * Routine to copy from device local memory into mbufs. 1072 */ 1073struct mbuf * 1074m_devget(char *buf, int totlen, int off0, struct ifnet *ifp, 1075 void (*copy)(char *from, caddr_t to, u_int len)) 1076{ 1077 struct mbuf *m; 1078 struct mbuf *top = 0, **mp = ⊤ 1079 int off = off0, len; 1080 char *cp; 1081 char *epkt; 1082 1083 cp = buf; 1084 epkt = cp + totlen; 1085 if (off) { 1086 cp += off + 2 * sizeof(u_short); 1087 totlen -= 2 * sizeof(u_short); 1088 } 1089 MGETHDR(m, M_DONTWAIT, MT_DATA); 1090 if (m == NULL) 1091 return (NULL); 1092 m->m_pkthdr.rcvif = ifp; 1093 m->m_pkthdr.len = totlen; 1094 m->m_len = MHLEN; 1095 1096 while (totlen > 0) { 1097 if (top) { 1098 MGET(m, M_DONTWAIT, MT_DATA); 1099 if (m == NULL) { 1100 m_freem(top); 1101 return (NULL); 1102 } 1103 m->m_len = MLEN; 1104 } 1105 len = min(totlen, epkt - cp); 1106 if (len >= MINCLSIZE) { 1107 MCLGET(m, M_DONTWAIT); 1108 if (m->m_flags & M_EXT) 1109 m->m_len = len = min(len, MCLBYTES); 1110 else 1111 len = m->m_len; 1112 } else { 1113 /* 1114 * Place initial small packet/header at end of mbuf. 1115 */ 1116 if (len < m->m_len) { 1117 if (top == NULL && len + 1118 max_linkhdr <= m->m_len) 1119 m->m_data += max_linkhdr; 1120 m->m_len = len; 1121 } else 1122 len = m->m_len; 1123 } 1124 if (copy) 1125 copy(cp, mtod(m, caddr_t), (unsigned)len); 1126 else 1127 bcopy(cp, mtod(m, caddr_t), (unsigned)len); 1128 cp += len; 1129 *mp = m; 1130 mp = &m->m_next; 1131 totlen -= len; 1132 if (cp == epkt) 1133 cp = buf; 1134 } 1135 return (top); 1136} 1137 1138/* 1139 * Copy data from a buffer back into the indicated mbuf chain, 1140 * starting "off" bytes from the beginning, extending the mbuf 1141 * chain if necessary. 1142 */ 1143void 1144m_copyback(struct mbuf *m0, int off, int len, caddr_t cp) 1145{ 1146 int mlen; 1147 struct mbuf *m = m0, *n; 1148 int totlen = 0; 1149 1150 if (m0 == NULL) 1151 return; 1152 while (off > (mlen = m->m_len)) { 1153 off -= mlen; 1154 totlen += mlen; 1155 if (m->m_next == NULL) { 1156 n = m_getclr(M_DONTWAIT, m->m_type); 1157 if (n == NULL) 1158 goto out; 1159 n->m_len = min(MLEN, len + off); 1160 m->m_next = n; 1161 } 1162 m = m->m_next; 1163 } 1164 while (len > 0) { 1165 mlen = min (m->m_len - off, len); 1166 bcopy(cp, off + mtod(m, caddr_t), (unsigned)mlen); 1167 cp += mlen; 1168 len -= mlen; 1169 mlen += off; 1170 off = 0; 1171 totlen += mlen; 1172 if (len == 0) 1173 break; 1174 if (m->m_next == NULL) { 1175 n = m_get(M_DONTWAIT, m->m_type); 1176 if (n == NULL) 1177 break; 1178 n->m_len = min(MLEN, len); 1179 m->m_next = n; 1180 } 1181 m = m->m_next; 1182 } 1183out: if (((m = m0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen)) 1184 m->m_pkthdr.len = totlen; 1185} 1186 1187void 1188m_print(const struct mbuf *m) 1189{ 1190 int len; 1191 const struct mbuf *m2; 1192 1193 len = m->m_pkthdr.len; 1194 m2 = m; 1195 while (len) { 1196 printf("%p %*D\n", m2, m2->m_len, (u_char *)m2->m_data, "-"); 1197 len -= m2->m_len; 1198 m2 = m2->m_next; 1199 } 1200 return; 1201} 1202