uipc_mbuf.c revision 77066
1/* 2 * Copyright (c) 1982, 1986, 1988, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * @(#)uipc_mbuf.c 8.2 (Berkeley) 1/4/94 34 * $FreeBSD: head/sys/kern/uipc_mbuf.c 77066 2001-05-23 20:44:54Z bmilekic $ 35 */ 36 37#include "opt_param.h" 38#include <sys/param.h> 39#include <sys/systm.h> 40#include <sys/condvar.h> 41#include <sys/kernel.h> 42#include <sys/lock.h> 43#include <sys/malloc.h> 44#include <sys/mbuf.h> 45#include <sys/mutex.h> 46#include <sys/sysctl.h> 47#include <sys/domain.h> 48#include <sys/protosw.h> 49 50#include <vm/vm.h> 51#include <vm/vm_kern.h> 52#include <vm/vm_extern.h> 53 54static void mbinit(void *); 55SYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbinit, NULL) 56 57struct mbuf *mbutl; 58struct mbstat mbstat; 59u_long mbtypes[MT_NTYPES]; 60int max_linkhdr; 61int max_protohdr; 62int max_hdr; 63int max_datalen; 64int nmbclusters; 65int nmbufs; 66int nmbcnt; 67u_long m_mballoc_wid = 0; 68u_long m_clalloc_wid = 0; 69 70/* 71 * freelist header structures... 72 * mbffree_lst, mclfree_lst, mcntfree_lst 73 */ 74struct mbffree_lst mmbfree; 75struct mclfree_lst mclfree; 76struct mcntfree_lst mcntfree; 77struct mtx mbuf_mtx; 78 79/* 80 * sysctl(8) exported objects 81 */ 82SYSCTL_DECL(_kern_ipc); 83SYSCTL_INT(_kern_ipc, KIPC_MAX_LINKHDR, max_linkhdr, CTLFLAG_RW, 84 &max_linkhdr, 0, ""); 85SYSCTL_INT(_kern_ipc, KIPC_MAX_PROTOHDR, max_protohdr, CTLFLAG_RW, 86 &max_protohdr, 0, ""); 87SYSCTL_INT(_kern_ipc, KIPC_MAX_HDR, max_hdr, CTLFLAG_RW, &max_hdr, 0, ""); 88SYSCTL_INT(_kern_ipc, KIPC_MAX_DATALEN, max_datalen, CTLFLAG_RW, 89 &max_datalen, 0, ""); 90SYSCTL_INT(_kern_ipc, OID_AUTO, mbuf_wait, CTLFLAG_RW, 91 &mbuf_wait, 0, ""); 92SYSCTL_STRUCT(_kern_ipc, KIPC_MBSTAT, mbstat, CTLFLAG_RD, &mbstat, mbstat, ""); 93SYSCTL_OPAQUE(_kern_ipc, OID_AUTO, mbtypes, CTLFLAG_RD, mbtypes, 94 sizeof(mbtypes), "LU", ""); 95SYSCTL_INT(_kern_ipc, KIPC_NMBCLUSTERS, nmbclusters, CTLFLAG_RD, 96 &nmbclusters, 0, "Maximum number of mbuf clusters available"); 97SYSCTL_INT(_kern_ipc, OID_AUTO, nmbufs, CTLFLAG_RD, &nmbufs, 0, 98 "Maximum number of mbufs available"); 99SYSCTL_INT(_kern_ipc, OID_AUTO, nmbcnt, CTLFLAG_RD, &nmbcnt, 0, 100 "Maximum number of ext_buf counters available"); 101 102#ifndef NMBCLUSTERS 103#define NMBCLUSTERS (512 + MAXUSERS * 16) 104#endif 105 106TUNABLE_INT_DECL("kern.ipc.nmbclusters", NMBCLUSTERS, nmbclusters); 107TUNABLE_INT_DECL("kern.ipc.nmbufs", NMBCLUSTERS * 4, nmbufs); 108TUNABLE_INT_DECL("kern.ipc.nmbcnt", EXT_COUNTERS, nmbcnt); 109 110static void m_reclaim(void); 111 112/* Initial allocation numbers */ 113#define NCL_INIT 2 114#define NMB_INIT 16 115#define REF_INIT NMBCLUSTERS 116 117/* 118 * Full mbuf subsystem initialization done here. 119 * 120 * XXX: If ever we have system specific map setups to do, then move them to 121 * machdep.c - for now, there is no reason for this stuff to go there. 122 */ 123static void 124mbinit(void *dummy) 125{ 126 vm_offset_t maxaddr; 127 vm_size_t mb_map_size; 128 129 /* 130 * Setup the mb_map, allocate requested VM space. 131 */ 132 mb_map_size = (vm_size_t)(nmbufs * MSIZE + nmbclusters * MCLBYTES + 133 nmbcnt * sizeof(union mext_refcnt)); 134 mb_map_size = rounddown(mb_map_size, PAGE_SIZE); 135 mb_map = kmem_suballoc(kmem_map, (vm_offset_t *)&mbutl, &maxaddr, 136 mb_map_size); 137 /* XXX XXX XXX: mb_map->system_map = 1; */ 138 139 /* 140 * Initialize the free list headers, and setup locks for lists. 141 */ 142 mmbfree.m_head = NULL; 143 mclfree.m_head = NULL; 144 mcntfree.m_head = NULL; 145 mtx_init(&mbuf_mtx, "mbuf free list lock", MTX_DEF); 146 cv_init(&mmbfree.m_starved, "mbuf free list starved cv"); 147 cv_init(&mclfree.m_starved, "mbuf cluster free list starved cv"); 148 149 /* 150 * Initialize mbuf subsystem (sysctl exported) statistics structure. 151 */ 152 mbstat.m_msize = MSIZE; 153 mbstat.m_mclbytes = MCLBYTES; 154 mbstat.m_minclsize = MINCLSIZE; 155 mbstat.m_mlen = MLEN; 156 mbstat.m_mhlen = MHLEN; 157 158 /* 159 * Perform some initial allocations. 160 */ 161 mtx_lock(&mbuf_mtx); 162 if (m_alloc_ref(REF_INIT, M_DONTWAIT) == 0) 163 goto bad; 164 if (m_mballoc(NMB_INIT, M_DONTWAIT) == 0) 165 goto bad; 166 if (m_clalloc(NCL_INIT, M_DONTWAIT) == 0) 167 goto bad; 168 mtx_unlock(&mbuf_mtx); 169 170 return; 171bad: 172 panic("mbinit: failed to initialize mbuf subsystem!"); 173} 174 175/* 176 * Allocate at least nmb reference count structs and place them 177 * on the ref cnt free list. 178 * 179 * Must be called with the mcntfree lock held. 180 */ 181int 182m_alloc_ref(u_int nmb, int how) 183{ 184 caddr_t p; 185 u_int nbytes; 186 int i; 187 188 /* 189 * We don't cap the amount of memory that can be used 190 * by the reference counters, like we do for mbufs and 191 * mbuf clusters. In fact, we're absolutely sure that we 192 * won't ever be going over our allocated space. We keep enough 193 * space in mb_map to accomodate maximum values of allocatable 194 * external buffers including, but not limited to, clusters. 195 * (That's also why we won't have to have wait routines for 196 * counters). 197 * 198 * If we're in here, we're absolutely certain to be returning 199 * succesfully, as long as there is physical memory to accomodate 200 * us. And if there isn't, but we're willing to wait, then 201 * kmem_malloc() will do the only waiting needed. 202 */ 203 204 nbytes = round_page(nmb * sizeof(union mext_refcnt)); 205 if (1 /* XXX: how == M_TRYWAIT */) 206 mtx_unlock(&mbuf_mtx); 207 if ((p = (caddr_t)kmem_malloc(mb_map, nbytes, how == M_TRYWAIT ? 208 M_WAITOK : M_NOWAIT)) == NULL) { 209 if (1 /* XXX: how == M_TRYWAIT */) 210 mtx_lock(&mbuf_mtx); 211 return (0); 212 } 213 nmb = nbytes / sizeof(union mext_refcnt); 214 215 /* 216 * We don't let go of the mutex in order to avoid a race. 217 * It is up to the caller to let go of the mutex. 218 */ 219 if (1 /* XXX: how == M_TRYWAIT */) 220 mtx_lock(&mbuf_mtx); 221 for (i = 0; i < nmb; i++) { 222 ((union mext_refcnt *)p)->next_ref = mcntfree.m_head; 223 mcntfree.m_head = (union mext_refcnt *)p; 224 p += sizeof(union mext_refcnt); 225 mbstat.m_refree++; 226 } 227 mbstat.m_refcnt += nmb; 228 229 return (1); 230} 231 232/* 233 * Allocate at least nmb mbufs and place on mbuf free list. 234 * 235 * Must be called with the mmbfree lock held. 236 */ 237int 238m_mballoc(int nmb, int how) 239{ 240 caddr_t p; 241 int i; 242 int nbytes; 243 244 nbytes = round_page(nmb * MSIZE); 245 nmb = nbytes / MSIZE; 246 247 /* 248 * If we've hit the mbuf limit, stop allocating from mb_map. 249 * Also, once we run out of map space, it will be impossible to 250 * get any more (nothing is ever freed back to the map). 251 */ 252 if (mb_map_full || ((nmb + mbstat.m_mbufs) > nmbufs)) 253 return (0); 254 255 if (1 /* XXX: how == M_TRYWAIT */) 256 mtx_unlock(&mbuf_mtx); 257 p = (caddr_t)kmem_malloc(mb_map, nbytes, how == M_TRYWAIT ? 258 M_WAITOK : M_NOWAIT); 259 if (1 /* XXX: how == M_TRYWAIT */) { 260 mtx_lock(&mbuf_mtx); 261 if (p == NULL) 262 mbstat.m_wait++; 263 } 264 265 /* 266 * Either the map is now full, or `how' is M_DONTWAIT and there 267 * are no pages left. 268 */ 269 if (p == NULL) 270 return (0); 271 272 /* 273 * We don't let go of the mutex in order to avoid a race. 274 * It is up to the caller to let go of the mutex when done 275 * with grabbing the mbuf from the free list. 276 */ 277 for (i = 0; i < nmb; i++) { 278 ((struct mbuf *)p)->m_next = mmbfree.m_head; 279 mmbfree.m_head = (struct mbuf *)p; 280 p += MSIZE; 281 } 282 mbstat.m_mbufs += nmb; 283 mbtypes[MT_FREE] += nmb; 284 return (1); 285} 286 287/* 288 * Once the mb_map has been exhausted and if the call to the allocation macros 289 * (or, in some cases, functions) is with M_TRYWAIT, then it is necessary to 290 * rely solely on reclaimed mbufs. 291 * 292 * Here we request for the protocols to free up some resources and, if we 293 * still cannot get anything, then we wait for an mbuf to be freed for a 294 * designated (mbuf_wait) time, at most. 295 * 296 * Must be called with the mmbfree mutex held. 297 */ 298struct mbuf * 299m_mballoc_wait(void) 300{ 301 struct mbuf *p = NULL; 302 303 /* 304 * See if we can drain some resources out of the protocols. 305 * We drop the mmbfree mutex to avoid recursing into it in some of 306 * the drain routines. Clearly, we're faced with a race here because 307 * once something is freed during the drain, it may be grabbed right 308 * from under us by some other thread. But we accept this possibility 309 * in order to avoid a potentially large lock recursion and, more 310 * importantly, to avoid a potential lock order reversal which may 311 * result in deadlock (See comment above m_reclaim()). 312 */ 313 mtx_unlock(&mbuf_mtx); 314 m_reclaim(); 315 316 mtx_lock(&mbuf_mtx); 317 _MGET(p, M_DONTWAIT); 318 319 if (p == NULL) { 320 int retval; 321 322 m_mballoc_wid++; 323 retval = cv_timedwait(&mmbfree.m_starved, &mbuf_mtx, 324 mbuf_wait); 325 m_mballoc_wid--; 326 327 /* 328 * If we got signaled (i.e. didn't time out), allocate. 329 */ 330 if (retval == 0) 331 _MGET(p, M_DONTWAIT); 332 } 333 334 if (p != NULL) { 335 mbstat.m_wait++; 336 if (mmbfree.m_head != NULL) 337 MBWAKEUP(m_mballoc_wid, &mmbfree.m_starved); 338 } 339 340 return (p); 341} 342 343/* 344 * Allocate some number of mbuf clusters 345 * and place on cluster free list. 346 * 347 * Must be called with the mclfree lock held. 348 */ 349int 350m_clalloc(int ncl, int how) 351{ 352 caddr_t p; 353 int i; 354 int npg_sz; 355 356 npg_sz = round_page(ncl * MCLBYTES); 357 ncl = npg_sz / MCLBYTES; 358 359 /* 360 * If the map is now full (nothing will ever be freed to it). 361 * If we've hit the mcluster number limit, stop allocating from 362 * mb_map. 363 */ 364 if (mb_map_full || ((ncl + mbstat.m_clusters) > nmbclusters)) 365 return (0); 366 367 if (1 /* XXX: how == M_TRYWAIT */) 368 mtx_unlock(&mbuf_mtx); 369 p = (caddr_t)kmem_malloc(mb_map, npg_sz, 370 how == M_TRYWAIT ? M_WAITOK : M_NOWAIT); 371 if (1 /* XXX: how == M_TRYWAIT */) 372 mtx_lock(&mbuf_mtx); 373 374 /* 375 * Either the map is now full, or `how' is M_DONTWAIT and there 376 * are no pages left. 377 */ 378 if (p == NULL) 379 return (0); 380 381 for (i = 0; i < ncl; i++) { 382 ((union mcluster *)p)->mcl_next = mclfree.m_head; 383 mclfree.m_head = (union mcluster *)p; 384 p += MCLBYTES; 385 mbstat.m_clfree++; 386 } 387 mbstat.m_clusters += ncl; 388 return (1); 389} 390 391/* 392 * Once the mb_map submap has been exhausted and the allocation is called with 393 * M_TRYWAIT, we rely on the mclfree list. If nothing is free, we will 394 * block on a cv for a designated amount of time (mbuf_wait) or until we're 395 * signaled due to sudden mcluster availability. 396 * 397 * Must be called with the mclfree lock held. 398 */ 399caddr_t 400m_clalloc_wait(void) 401{ 402 caddr_t p = NULL; 403 int retval; 404 405 m_clalloc_wid++; 406 retval = cv_timedwait(&mclfree.m_starved, &mbuf_mtx, mbuf_wait); 407 m_clalloc_wid--; 408 409 /* 410 * Now that we (think) that we've got something, try again. 411 */ 412 if (retval == 0) 413 _MCLALLOC(p, M_DONTWAIT); 414 415 if (p != NULL) { 416 mbstat.m_wait++; 417 if (mclfree.m_head != NULL) 418 MBWAKEUP(m_clalloc_wid, &mclfree.m_starved); 419 } 420 421 return (p); 422} 423 424/* 425 * m_reclaim: drain protocols in hopes to free up some resources... 426 * 427 * XXX: No locks should be held going in here. The drain routines have 428 * to presently acquire some locks which raises the possibility of lock 429 * order violation if we're holding any mutex if that mutex is acquired in 430 * reverse order relative to one of the locks in the drain routines. 431 */ 432static void 433m_reclaim(void) 434{ 435 struct domain *dp; 436 struct protosw *pr; 437 438#ifdef WITNESS 439 KASSERT(witness_list(curproc) == 0, 440 ("m_reclaim called with locks held")); 441#endif 442 443 for (dp = domains; dp; dp = dp->dom_next) 444 for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) 445 if (pr->pr_drain) 446 (*pr->pr_drain)(); 447 mbstat.m_drain++; 448} 449 450/* 451 * Space allocation routines. 452 * Some of these are also available as macros 453 * for critical paths. 454 */ 455struct mbuf * 456m_get(int how, int type) 457{ 458 struct mbuf *m; 459 460 MGET(m, how, type); 461 return (m); 462} 463 464struct mbuf * 465m_gethdr(int how, int type) 466{ 467 struct mbuf *m; 468 469 MGETHDR(m, how, type); 470 return (m); 471} 472 473struct mbuf * 474m_getclr(int how, int type) 475{ 476 struct mbuf *m; 477 478 MGET(m, how, type); 479 if (m != NULL) 480 bzero(mtod(m, caddr_t), MLEN); 481 return (m); 482} 483 484struct mbuf * 485m_free(struct mbuf *m) 486{ 487 struct mbuf *n; 488 489 MFREE(m, n); 490 return (n); 491} 492 493/* 494 * struct mbuf * 495 * m_getm(m, len, how, type) 496 * 497 * This will allocate len-worth of mbufs and/or mbuf clusters (whatever fits 498 * best) and return a pointer to the top of the allocated chain. If m is 499 * non-null, then we assume that it is a single mbuf or an mbuf chain to 500 * which we want len bytes worth of mbufs and/or clusters attached, and so 501 * if we succeed in allocating it, we will just return a pointer to m. 502 * 503 * If we happen to fail at any point during the allocation, we will free 504 * up everything we have already allocated and return NULL. 505 * 506 */ 507struct mbuf * 508m_getm(struct mbuf *m, int len, int how, int type) 509{ 510 struct mbuf *top, *tail, *mp, *mtail = NULL; 511 512 KASSERT(len >= 0, ("len is < 0 in m_getm")); 513 514 MGET(mp, how, type); 515 if (mp == NULL) 516 return (NULL); 517 else if (len > MINCLSIZE) { 518 MCLGET(mp, how); 519 if ((mp->m_flags & M_EXT) == 0) { 520 m_free(mp); 521 return (NULL); 522 } 523 } 524 mp->m_len = 0; 525 len -= M_TRAILINGSPACE(mp); 526 527 if (m != NULL) 528 for (mtail = m; mtail->m_next != NULL; mtail = mtail->m_next); 529 else 530 m = mp; 531 532 top = tail = mp; 533 while (len > 0) { 534 MGET(mp, how, type); 535 if (mp == NULL) 536 goto failed; 537 538 tail->m_next = mp; 539 tail = mp; 540 if (len > MINCLSIZE) { 541 MCLGET(mp, how); 542 if ((mp->m_flags & M_EXT) == 0) 543 goto failed; 544 } 545 546 mp->m_len = 0; 547 len -= M_TRAILINGSPACE(mp); 548 } 549 550 if (mtail != NULL) 551 mtail->m_next = top; 552 return (m); 553 554failed: 555 m_freem(top); 556 return (NULL); 557} 558 559void 560m_freem(struct mbuf *m) 561{ 562 struct mbuf *n; 563 564 if (m == NULL) 565 return; 566 do { 567 /* 568 * we do need to check non-first mbuf, since some of existing 569 * code does not call M_PREPEND properly. 570 * (example: call to bpf_mtap from drivers) 571 */ 572 if ((m->m_flags & M_PKTHDR) != 0 && m->m_pkthdr.aux) { 573 m_freem(m->m_pkthdr.aux); 574 m->m_pkthdr.aux = NULL; 575 } 576 MFREE(m, n); 577 m = n; 578 } while (m); 579} 580 581/* 582 * Lesser-used path for M_PREPEND: 583 * allocate new mbuf to prepend to chain, 584 * copy junk along. 585 */ 586struct mbuf * 587m_prepend(struct mbuf *m, int len, int how) 588{ 589 struct mbuf *mn; 590 591 MGET(mn, how, m->m_type); 592 if (mn == NULL) { 593 m_freem(m); 594 return (NULL); 595 } 596 if (m->m_flags & M_PKTHDR) { 597 M_COPY_PKTHDR(mn, m); 598 m->m_flags &= ~M_PKTHDR; 599 } 600 mn->m_next = m; 601 m = mn; 602 if (len < MHLEN) 603 MH_ALIGN(m, len); 604 m->m_len = len; 605 return (m); 606} 607 608/* 609 * Make a copy of an mbuf chain starting "off0" bytes from the beginning, 610 * continuing for "len" bytes. If len is M_COPYALL, copy to end of mbuf. 611 * The wait parameter is a choice of M_TRYWAIT/M_DONTWAIT from caller. 612 * Note that the copy is read-only, because clusters are not copied, 613 * only their reference counts are incremented. 614 */ 615struct mbuf * 616m_copym(struct mbuf *m, int off0, int len, int wait) 617{ 618 struct mbuf *n, **np; 619 int off = off0; 620 struct mbuf *top; 621 int copyhdr = 0; 622 623 KASSERT(off >= 0, ("m_copym, negative off %d", off)); 624 KASSERT(len >= 0, ("m_copym, negative len %d", len)); 625 if (off == 0 && m->m_flags & M_PKTHDR) 626 copyhdr = 1; 627 while (off > 0) { 628 KASSERT(m != NULL, ("m_copym, offset > size of mbuf chain")); 629 if (off < m->m_len) 630 break; 631 off -= m->m_len; 632 m = m->m_next; 633 } 634 np = ⊤ 635 top = 0; 636 while (len > 0) { 637 if (m == NULL) { 638 KASSERT(len == M_COPYALL, 639 ("m_copym, length > size of mbuf chain")); 640 break; 641 } 642 MGET(n, wait, m->m_type); 643 *np = n; 644 if (n == NULL) 645 goto nospace; 646 if (copyhdr) { 647 M_COPY_PKTHDR(n, m); 648 if (len == M_COPYALL) 649 n->m_pkthdr.len -= off0; 650 else 651 n->m_pkthdr.len = len; 652 copyhdr = 0; 653 } 654 n->m_len = min(len, m->m_len - off); 655 if (m->m_flags & M_EXT) { 656 n->m_data = m->m_data + off; 657 n->m_ext = m->m_ext; 658 n->m_flags |= M_EXT; 659 MEXT_ADD_REF(m); 660 } else 661 bcopy(mtod(m, caddr_t)+off, mtod(n, caddr_t), 662 (unsigned)n->m_len); 663 if (len != M_COPYALL) 664 len -= n->m_len; 665 off = 0; 666 m = m->m_next; 667 np = &n->m_next; 668 } 669 if (top == NULL) { 670 mtx_lock(&mbuf_mtx); 671 mbstat.m_mcfail++; 672 mtx_unlock(&mbuf_mtx); 673 } 674 return (top); 675nospace: 676 m_freem(top); 677 mtx_lock(&mbuf_mtx); 678 mbstat.m_mcfail++; 679 mtx_unlock(&mbuf_mtx); 680 return (NULL); 681} 682 683/* 684 * Copy an entire packet, including header (which must be present). 685 * An optimization of the common case `m_copym(m, 0, M_COPYALL, how)'. 686 * Note that the copy is read-only, because clusters are not copied, 687 * only their reference counts are incremented. 688 * Preserve alignment of the first mbuf so if the creator has left 689 * some room at the beginning (e.g. for inserting protocol headers) 690 * the copies still have the room available. 691 */ 692struct mbuf * 693m_copypacket(struct mbuf *m, int how) 694{ 695 struct mbuf *top, *n, *o; 696 697 MGET(n, how, m->m_type); 698 top = n; 699 if (n == NULL) 700 goto nospace; 701 702 M_COPY_PKTHDR(n, m); 703 n->m_len = m->m_len; 704 if (m->m_flags & M_EXT) { 705 n->m_data = m->m_data; 706 n->m_ext = m->m_ext; 707 n->m_flags |= M_EXT; 708 MEXT_ADD_REF(m); 709 } else { 710 n->m_data = n->m_pktdat + (m->m_data - m->m_pktdat ); 711 bcopy(mtod(m, char *), mtod(n, char *), n->m_len); 712 } 713 714 m = m->m_next; 715 while (m) { 716 MGET(o, how, m->m_type); 717 if (o == NULL) 718 goto nospace; 719 720 n->m_next = o; 721 n = n->m_next; 722 723 n->m_len = m->m_len; 724 if (m->m_flags & M_EXT) { 725 n->m_data = m->m_data; 726 n->m_ext = m->m_ext; 727 n->m_flags |= M_EXT; 728 MEXT_ADD_REF(m); 729 } else { 730 bcopy(mtod(m, char *), mtod(n, char *), n->m_len); 731 } 732 733 m = m->m_next; 734 } 735 return top; 736nospace: 737 m_freem(top); 738 mtx_lock(&mbuf_mtx); 739 mbstat.m_mcfail++; 740 mtx_unlock(&mbuf_mtx); 741 return (NULL); 742} 743 744/* 745 * Copy data from an mbuf chain starting "off" bytes from the beginning, 746 * continuing for "len" bytes, into the indicated buffer. 747 */ 748void 749m_copydata(struct mbuf *m, int off, int len, caddr_t cp) 750{ 751 unsigned count; 752 753 KASSERT(off >= 0, ("m_copydata, negative off %d", off)); 754 KASSERT(len >= 0, ("m_copydata, negative len %d", len)); 755 while (off > 0) { 756 KASSERT(m != NULL, ("m_copydata, offset > size of mbuf chain")); 757 if (off < m->m_len) 758 break; 759 off -= m->m_len; 760 m = m->m_next; 761 } 762 while (len > 0) { 763 KASSERT(m != NULL, ("m_copydata, length > size of mbuf chain")); 764 count = min(m->m_len - off, len); 765 bcopy(mtod(m, caddr_t) + off, cp, count); 766 len -= count; 767 cp += count; 768 off = 0; 769 m = m->m_next; 770 } 771} 772 773/* 774 * Copy a packet header mbuf chain into a completely new chain, including 775 * copying any mbuf clusters. Use this instead of m_copypacket() when 776 * you need a writable copy of an mbuf chain. 777 */ 778struct mbuf * 779m_dup(struct mbuf *m, int how) 780{ 781 struct mbuf **p, *top = NULL; 782 int remain, moff, nsize; 783 784 /* Sanity check */ 785 if (m == NULL) 786 return (NULL); 787 KASSERT((m->m_flags & M_PKTHDR) != 0, ("%s: !PKTHDR", __FUNCTION__)); 788 789 /* While there's more data, get a new mbuf, tack it on, and fill it */ 790 remain = m->m_pkthdr.len; 791 moff = 0; 792 p = ⊤ 793 while (remain > 0 || top == NULL) { /* allow m->m_pkthdr.len == 0 */ 794 struct mbuf *n; 795 796 /* Get the next new mbuf */ 797 MGET(n, how, m->m_type); 798 if (n == NULL) 799 goto nospace; 800 if (top == NULL) { /* first one, must be PKTHDR */ 801 M_COPY_PKTHDR(n, m); 802 nsize = MHLEN; 803 } else /* not the first one */ 804 nsize = MLEN; 805 if (remain >= MINCLSIZE) { 806 MCLGET(n, how); 807 if ((n->m_flags & M_EXT) == 0) { 808 (void)m_free(n); 809 goto nospace; 810 } 811 nsize = MCLBYTES; 812 } 813 n->m_len = 0; 814 815 /* Link it into the new chain */ 816 *p = n; 817 p = &n->m_next; 818 819 /* Copy data from original mbuf(s) into new mbuf */ 820 while (n->m_len < nsize && m != NULL) { 821 int chunk = min(nsize - n->m_len, m->m_len - moff); 822 823 bcopy(m->m_data + moff, n->m_data + n->m_len, chunk); 824 moff += chunk; 825 n->m_len += chunk; 826 remain -= chunk; 827 if (moff == m->m_len) { 828 m = m->m_next; 829 moff = 0; 830 } 831 } 832 833 /* Check correct total mbuf length */ 834 KASSERT((remain > 0 && m != NULL) || (remain == 0 && m == NULL), 835 ("%s: bogus m_pkthdr.len", __FUNCTION__)); 836 } 837 return (top); 838 839nospace: 840 m_freem(top); 841 mtx_lock(&mbuf_mtx); 842 mbstat.m_mcfail++; 843 mtx_unlock(&mbuf_mtx); 844 return (NULL); 845} 846 847/* 848 * Concatenate mbuf chain n to m. 849 * Both chains must be of the same type (e.g. MT_DATA). 850 * Any m_pkthdr is not updated. 851 */ 852void 853m_cat(struct mbuf *m, struct mbuf *n) 854{ 855 while (m->m_next) 856 m = m->m_next; 857 while (n) { 858 if (m->m_flags & M_EXT || 859 m->m_data + m->m_len + n->m_len >= &m->m_dat[MLEN]) { 860 /* just join the two chains */ 861 m->m_next = n; 862 return; 863 } 864 /* splat the data from one into the other */ 865 bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len, 866 (u_int)n->m_len); 867 m->m_len += n->m_len; 868 n = m_free(n); 869 } 870} 871 872void 873m_adj(struct mbuf *mp, int req_len) 874{ 875 int len = req_len; 876 struct mbuf *m; 877 int count; 878 879 if ((m = mp) == NULL) 880 return; 881 if (len >= 0) { 882 /* 883 * Trim from head. 884 */ 885 while (m != NULL && len > 0) { 886 if (m->m_len <= len) { 887 len -= m->m_len; 888 m->m_len = 0; 889 m = m->m_next; 890 } else { 891 m->m_len -= len; 892 m->m_data += len; 893 len = 0; 894 } 895 } 896 m = mp; 897 if (mp->m_flags & M_PKTHDR) 898 m->m_pkthdr.len -= (req_len - len); 899 } else { 900 /* 901 * Trim from tail. Scan the mbuf chain, 902 * calculating its length and finding the last mbuf. 903 * If the adjustment only affects this mbuf, then just 904 * adjust and return. Otherwise, rescan and truncate 905 * after the remaining size. 906 */ 907 len = -len; 908 count = 0; 909 for (;;) { 910 count += m->m_len; 911 if (m->m_next == (struct mbuf *)0) 912 break; 913 m = m->m_next; 914 } 915 if (m->m_len >= len) { 916 m->m_len -= len; 917 if (mp->m_flags & M_PKTHDR) 918 mp->m_pkthdr.len -= len; 919 return; 920 } 921 count -= len; 922 if (count < 0) 923 count = 0; 924 /* 925 * Correct length for chain is "count". 926 * Find the mbuf with last data, adjust its length, 927 * and toss data from remaining mbufs on chain. 928 */ 929 m = mp; 930 if (m->m_flags & M_PKTHDR) 931 m->m_pkthdr.len = count; 932 for (; m; m = m->m_next) { 933 if (m->m_len >= count) { 934 m->m_len = count; 935 break; 936 } 937 count -= m->m_len; 938 } 939 while (m->m_next) 940 (m = m->m_next) ->m_len = 0; 941 } 942} 943 944/* 945 * Rearange an mbuf chain so that len bytes are contiguous 946 * and in the data area of an mbuf (so that mtod and dtom 947 * will work for a structure of size len). Returns the resulting 948 * mbuf chain on success, frees it and returns null on failure. 949 * If there is room, it will add up to max_protohdr-len extra bytes to the 950 * contiguous region in an attempt to avoid being called next time. 951 */ 952struct mbuf * 953m_pullup(struct mbuf *n, int len) 954{ 955 struct mbuf *m; 956 int count; 957 int space; 958 959 /* 960 * If first mbuf has no cluster, and has room for len bytes 961 * without shifting current data, pullup into it, 962 * otherwise allocate a new mbuf to prepend to the chain. 963 */ 964 if ((n->m_flags & M_EXT) == 0 && 965 n->m_data + len < &n->m_dat[MLEN] && n->m_next) { 966 if (n->m_len >= len) 967 return (n); 968 m = n; 969 n = n->m_next; 970 len -= m->m_len; 971 } else { 972 if (len > MHLEN) 973 goto bad; 974 MGET(m, M_DONTWAIT, n->m_type); 975 if (m == NULL) 976 goto bad; 977 m->m_len = 0; 978 if (n->m_flags & M_PKTHDR) { 979 M_COPY_PKTHDR(m, n); 980 n->m_flags &= ~M_PKTHDR; 981 } 982 } 983 space = &m->m_dat[MLEN] - (m->m_data + m->m_len); 984 do { 985 count = min(min(max(len, max_protohdr), space), n->m_len); 986 bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len, 987 (unsigned)count); 988 len -= count; 989 m->m_len += count; 990 n->m_len -= count; 991 space -= count; 992 if (n->m_len) 993 n->m_data += count; 994 else 995 n = m_free(n); 996 } while (len > 0 && n); 997 if (len > 0) { 998 (void) m_free(m); 999 goto bad; 1000 } 1001 m->m_next = n; 1002 return (m); 1003bad: 1004 m_freem(n); 1005 mtx_lock(&mbuf_mtx); 1006 mbstat.m_mpfail++; 1007 mtx_unlock(&mbuf_mtx); 1008 return (NULL); 1009} 1010 1011/* 1012 * Partition an mbuf chain in two pieces, returning the tail -- 1013 * all but the first len0 bytes. In case of failure, it returns NULL and 1014 * attempts to restore the chain to its original state. 1015 */ 1016struct mbuf * 1017m_split(struct mbuf *m0, int len0, int wait) 1018{ 1019 struct mbuf *m, *n; 1020 unsigned len = len0, remain; 1021 1022 for (m = m0; m && len > m->m_len; m = m->m_next) 1023 len -= m->m_len; 1024 if (m == NULL) 1025 return (NULL); 1026 remain = m->m_len - len; 1027 if (m0->m_flags & M_PKTHDR) { 1028 MGETHDR(n, wait, m0->m_type); 1029 if (n == NULL) 1030 return (NULL); 1031 n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif; 1032 n->m_pkthdr.len = m0->m_pkthdr.len - len0; 1033 m0->m_pkthdr.len = len0; 1034 if (m->m_flags & M_EXT) 1035 goto extpacket; 1036 if (remain > MHLEN) { 1037 /* m can't be the lead packet */ 1038 MH_ALIGN(n, 0); 1039 n->m_next = m_split(m, len, wait); 1040 if (n->m_next == NULL) { 1041 (void) m_free(n); 1042 return (NULL); 1043 } else 1044 return (n); 1045 } else 1046 MH_ALIGN(n, remain); 1047 } else if (remain == 0) { 1048 n = m->m_next; 1049 m->m_next = NULL; 1050 return (n); 1051 } else { 1052 MGET(n, wait, m->m_type); 1053 if (n == NULL) 1054 return (NULL); 1055 M_ALIGN(n, remain); 1056 } 1057extpacket: 1058 if (m->m_flags & M_EXT) { 1059 n->m_flags |= M_EXT; 1060 n->m_ext = m->m_ext; 1061 MEXT_ADD_REF(m); 1062 m->m_ext.ext_size = 0; /* For Accounting XXXXXX danger */ 1063 n->m_data = m->m_data + len; 1064 } else { 1065 bcopy(mtod(m, caddr_t) + len, mtod(n, caddr_t), remain); 1066 } 1067 n->m_len = remain; 1068 m->m_len = len; 1069 n->m_next = m->m_next; 1070 m->m_next = NULL; 1071 return (n); 1072} 1073/* 1074 * Routine to copy from device local memory into mbufs. 1075 */ 1076struct mbuf * 1077m_devget(char *buf, int totlen, int off0, struct ifnet *ifp, 1078 void (*copy)(char *from, caddr_t to, u_int len)) 1079{ 1080 struct mbuf *m; 1081 struct mbuf *top = 0, **mp = ⊤ 1082 int off = off0, len; 1083 char *cp; 1084 char *epkt; 1085 1086 cp = buf; 1087 epkt = cp + totlen; 1088 if (off) { 1089 cp += off + 2 * sizeof(u_short); 1090 totlen -= 2 * sizeof(u_short); 1091 } 1092 MGETHDR(m, M_DONTWAIT, MT_DATA); 1093 if (m == NULL) 1094 return (NULL); 1095 m->m_pkthdr.rcvif = ifp; 1096 m->m_pkthdr.len = totlen; 1097 m->m_len = MHLEN; 1098 1099 while (totlen > 0) { 1100 if (top) { 1101 MGET(m, M_DONTWAIT, MT_DATA); 1102 if (m == NULL) { 1103 m_freem(top); 1104 return (NULL); 1105 } 1106 m->m_len = MLEN; 1107 } 1108 len = min(totlen, epkt - cp); 1109 if (len >= MINCLSIZE) { 1110 MCLGET(m, M_DONTWAIT); 1111 if (m->m_flags & M_EXT) 1112 m->m_len = len = min(len, MCLBYTES); 1113 else 1114 len = m->m_len; 1115 } else { 1116 /* 1117 * Place initial small packet/header at end of mbuf. 1118 */ 1119 if (len < m->m_len) { 1120 if (top == NULL && len + 1121 max_linkhdr <= m->m_len) 1122 m->m_data += max_linkhdr; 1123 m->m_len = len; 1124 } else 1125 len = m->m_len; 1126 } 1127 if (copy) 1128 copy(cp, mtod(m, caddr_t), (unsigned)len); 1129 else 1130 bcopy(cp, mtod(m, caddr_t), (unsigned)len); 1131 cp += len; 1132 *mp = m; 1133 mp = &m->m_next; 1134 totlen -= len; 1135 if (cp == epkt) 1136 cp = buf; 1137 } 1138 return (top); 1139} 1140 1141/* 1142 * Copy data from a buffer back into the indicated mbuf chain, 1143 * starting "off" bytes from the beginning, extending the mbuf 1144 * chain if necessary. 1145 */ 1146void 1147m_copyback(struct mbuf *m0, int off, int len, caddr_t cp) 1148{ 1149 int mlen; 1150 struct mbuf *m = m0, *n; 1151 int totlen = 0; 1152 1153 if (m0 == NULL) 1154 return; 1155 while (off > (mlen = m->m_len)) { 1156 off -= mlen; 1157 totlen += mlen; 1158 if (m->m_next == NULL) { 1159 n = m_getclr(M_DONTWAIT, m->m_type); 1160 if (n == NULL) 1161 goto out; 1162 n->m_len = min(MLEN, len + off); 1163 m->m_next = n; 1164 } 1165 m = m->m_next; 1166 } 1167 while (len > 0) { 1168 mlen = min (m->m_len - off, len); 1169 bcopy(cp, off + mtod(m, caddr_t), (unsigned)mlen); 1170 cp += mlen; 1171 len -= mlen; 1172 mlen += off; 1173 off = 0; 1174 totlen += mlen; 1175 if (len == 0) 1176 break; 1177 if (m->m_next == NULL) { 1178 n = m_get(M_DONTWAIT, m->m_type); 1179 if (n == NULL) 1180 break; 1181 n->m_len = min(MLEN, len); 1182 m->m_next = n; 1183 } 1184 m = m->m_next; 1185 } 1186out: if (((m = m0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen)) 1187 m->m_pkthdr.len = totlen; 1188} 1189 1190void 1191m_print(const struct mbuf *m) 1192{ 1193 int len; 1194 const struct mbuf *m2; 1195 1196 len = m->m_pkthdr.len; 1197 m2 = m; 1198 while (len) { 1199 printf("%p %*D\n", m2, m2->m_len, (u_char *)m2->m_data, "-"); 1200 len -= m2->m_len; 1201 m2 = m2->m_next; 1202 } 1203 return; 1204} 1205