1139804Simp/*- 21541Srgrimes * Copyright (c) 1982, 1986, 1988, 1991, 1993 31541Srgrimes * The Regents of the University of California. All rights reserved. 41541Srgrimes * 51541Srgrimes * Redistribution and use in source and binary forms, with or without 61541Srgrimes * modification, are permitted provided that the following conditions 71541Srgrimes * are met: 81541Srgrimes * 1. Redistributions of source code must retain the above copyright 91541Srgrimes * notice, this list of conditions and the following disclaimer. 101541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright 111541Srgrimes * notice, this list of conditions and the following disclaimer in the 121541Srgrimes * documentation and/or other materials provided with the distribution. 131541Srgrimes * 4. Neither the name of the University nor the names of its contributors 141541Srgrimes * may be used to endorse or promote products derived from this software 151541Srgrimes * without specific prior written permission. 161541Srgrimes * 171541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 181541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 191541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 201541Srgrimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 211541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 221541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 231541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 241541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 251541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 261541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 271541Srgrimes * SUCH DAMAGE. 281541Srgrimes * 291541Srgrimes * @(#)uipc_mbuf.c 8.2 (Berkeley) 1/4/94 301541Srgrimes */ 311541Srgrimes 32116182Sobrien#include <sys/cdefs.h> 33116182Sobrien__FBSDID("$FreeBSD: stable/11/sys/kern/uipc_mbuf.c 364163 2020-08-12 12:11:44Z ae $"); 34116182Sobrien 3577572Sobrien#include "opt_param.h" 36113490Ssilby#include "opt_mbuf_stress_test.h" 37178674Sjulian#include "opt_mbuf_profiling.h" 38101007Srwatson 391541Srgrimes#include <sys/param.h> 401541Srgrimes#include <sys/systm.h> 4176166Smarkm#include <sys/kernel.h> 42125296Ssilby#include <sys/limits.h> 4376166Smarkm#include <sys/lock.h> 4432036Sbde#include <sys/malloc.h> 451541Srgrimes#include <sys/mbuf.h> 4623081Swollman#include <sys/sysctl.h> 471541Srgrimes#include <sys/domain.h> 481541Srgrimes#include <sys/protosw.h> 49125296Ssilby#include <sys/uio.h> 50297188Sgnn#include <sys/sdt.h> 5176166Smarkm 52297227SgnnSDT_PROBE_DEFINE5_XLATE(sdt, , , m__init, 53297188Sgnn "struct mbuf *", "mbufinfo_t *", 54297188Sgnn "uint32_t", "uint32_t", 55297188Sgnn "uint16_t", "uint16_t", 56297188Sgnn "uint32_t", "uint32_t", 57297188Sgnn "uint32_t", "uint32_t"); 58297188Sgnn 59297227SgnnSDT_PROBE_DEFINE3_XLATE(sdt, , , m__gethdr, 60297188Sgnn "uint32_t", "uint32_t", 61297188Sgnn "uint16_t", "uint16_t", 62297188Sgnn "struct mbuf *", "mbufinfo_t *"); 63297188Sgnn 64297227SgnnSDT_PROBE_DEFINE3_XLATE(sdt, , , m__get, 65297188Sgnn "uint32_t", "uint32_t", 66297188Sgnn "uint16_t", "uint16_t", 67297188Sgnn "struct mbuf *", "mbufinfo_t *"); 68297188Sgnn 69297227SgnnSDT_PROBE_DEFINE4_XLATE(sdt, , , m__getcl, 70297188Sgnn "uint32_t", "uint32_t", 71297188Sgnn "uint16_t", "uint16_t", 72297188Sgnn "uint32_t", "uint32_t", 73297188Sgnn "struct mbuf *", "mbufinfo_t *"); 74297188Sgnn 75364163SaeSDT_PROBE_DEFINE5_XLATE(sdt, , , m__getjcl, 76364163Sae "uint32_t", "uint32_t", 77364163Sae "uint16_t", "uint16_t", 78364163Sae "uint32_t", "uint32_t", 79364163Sae "uint32_t", "uint32_t", 80364163Sae "struct mbuf *", "mbufinfo_t *"); 81364163Sae 82297227SgnnSDT_PROBE_DEFINE3_XLATE(sdt, , , m__clget, 83297188Sgnn "struct mbuf *", "mbufinfo_t *", 84297188Sgnn "uint32_t", "uint32_t", 85297188Sgnn "uint32_t", "uint32_t"); 86297188Sgnn 87297227SgnnSDT_PROBE_DEFINE4_XLATE(sdt, , , m__cljget, 88297188Sgnn "struct mbuf *", "mbufinfo_t *", 89297188Sgnn "uint32_t", "uint32_t", 90297188Sgnn "uint32_t", "uint32_t", 91297188Sgnn "void*", "void*"); 92297188Sgnn 93297227SgnnSDT_PROBE_DEFINE(sdt, , , m__cljset); 94297188Sgnn 95297227SgnnSDT_PROBE_DEFINE1_XLATE(sdt, , , m__free, 96297188Sgnn "struct mbuf *", "mbufinfo_t *"); 97297188Sgnn 98297227SgnnSDT_PROBE_DEFINE1_XLATE(sdt, , , m__freem, 99297188Sgnn "struct mbuf *", "mbufinfo_t *"); 100297188Sgnn 101295547Sglebius#include <security/mac/mac_framework.h> 102295547Sglebius 1039759Sbdeint max_linkhdr; 1049759Sbdeint max_protohdr; 1059759Sbdeint max_hdr; 1069759Sbdeint max_datalen; 107116455Ssilby#ifdef MBUF_STRESS_TEST 108112777Ssilbyint m_defragpackets; 109112777Ssilbyint m_defragbytes; 110112777Ssilbyint m_defraguseless; 111112777Ssilbyint m_defragfailure; 112113490Ssilbyint m_defragrandomfailures; 113113490Ssilby#endif 1141541Srgrimes 11566475Sbmilekic/* 11666475Sbmilekic * sysctl(8) exported objects 11766475Sbmilekic */ 118155820SandreSYSCTL_INT(_kern_ipc, KIPC_MAX_LINKHDR, max_linkhdr, CTLFLAG_RD, 119155820Sandre &max_linkhdr, 0, "Size of largest link layer header"); 120155820SandreSYSCTL_INT(_kern_ipc, KIPC_MAX_PROTOHDR, max_protohdr, CTLFLAG_RD, 121155820Sandre &max_protohdr, 0, "Size of largest protocol layer header"); 122155820SandreSYSCTL_INT(_kern_ipc, KIPC_MAX_HDR, max_hdr, CTLFLAG_RD, 123155820Sandre &max_hdr, 0, "Size of largest link plus protocol header"); 124155820SandreSYSCTL_INT(_kern_ipc, KIPC_MAX_DATALEN, max_datalen, CTLFLAG_RD, 125155820Sandre &max_datalen, 0, "Minimum space left in mbuf after max_hdr"); 126116455Ssilby#ifdef MBUF_STRESS_TEST 127112777SsilbySYSCTL_INT(_kern_ipc, OID_AUTO, m_defragpackets, CTLFLAG_RD, 128112777Ssilby &m_defragpackets, 0, ""); 129112777SsilbySYSCTL_INT(_kern_ipc, OID_AUTO, m_defragbytes, CTLFLAG_RD, 130112777Ssilby &m_defragbytes, 0, ""); 131112777SsilbySYSCTL_INT(_kern_ipc, OID_AUTO, m_defraguseless, CTLFLAG_RD, 132112777Ssilby &m_defraguseless, 0, ""); 133112777SsilbySYSCTL_INT(_kern_ipc, OID_AUTO, m_defragfailure, CTLFLAG_RD, 134112777Ssilby &m_defragfailure, 0, ""); 135113490SsilbySYSCTL_INT(_kern_ipc, OID_AUTO, m_defragrandomfailures, CTLFLAG_RW, 136113490Ssilby &m_defragrandomfailures, 0, ""); 137113490Ssilby#endif 13875112Sbmilekic 1391541Srgrimes/* 140254973Sandre * Ensure the correct size of various mbuf parameters. It could be off due 141254973Sandre * to compiler-induced padding and alignment artifacts. 142254973Sandre */ 143254973SandreCTASSERT(MSIZE - offsetof(struct mbuf, m_dat) == MLEN); 144254973SandreCTASSERT(MSIZE - offsetof(struct mbuf, m_pktdat) == MHLEN); 145254973Sandre 146254973Sandre/* 147277203Srwatson * mbuf data storage should be 64-bit aligned regardless of architectural 148277203Srwatson * pointer size; check this is the case with and without a packet header. 149277203Srwatson */ 150277203SrwatsonCTASSERT(offsetof(struct mbuf, m_dat) % 8 == 0); 151277203SrwatsonCTASSERT(offsetof(struct mbuf, m_pktdat) % 8 == 0); 152277203Srwatson 153277203Srwatson/* 154277203Srwatson * While the specific values here don't matter too much (i.e., +/- a few 155277203Srwatson * words), we do want to ensure that changes to these values are carefully 156277203Srwatson * reasoned about and properly documented. This is especially the case as 157277203Srwatson * network-protocol and device-driver modules encode these layouts, and must 158277203Srwatson * be recompiled if the structures change. Check these values at compile time 159277203Srwatson * against the ones documented in comments in mbuf.h. 160277203Srwatson * 161277203Srwatson * NB: Possibly they should be documented there via #define's and not just 162277203Srwatson * comments. 163277203Srwatson */ 164277203Srwatson#if defined(__LP64__) 165277203SrwatsonCTASSERT(offsetof(struct mbuf, m_dat) == 32); 166277203SrwatsonCTASSERT(sizeof(struct pkthdr) == 56); 167278920SglebiusCTASSERT(sizeof(struct m_ext) == 48); 168277203Srwatson#else 169277203SrwatsonCTASSERT(offsetof(struct mbuf, m_dat) == 24); 170277203SrwatsonCTASSERT(sizeof(struct pkthdr) == 48); 171278920SglebiusCTASSERT(sizeof(struct m_ext) == 28); 172277203Srwatson#endif 173277203Srwatson 174277203Srwatson/* 175278914Sglebius * Assert that the queue(3) macros produce code of the same size as an old 176278914Sglebius * plain pointer does. 177278914Sglebius */ 178278914Sglebius#ifdef INVARIANTS 179313122Smarkjstatic struct mbuf __used m_assertbuf; 180278914SglebiusCTASSERT(sizeof(m_assertbuf.m_slist) == sizeof(m_assertbuf.m_next)); 181278914SglebiusCTASSERT(sizeof(m_assertbuf.m_stailq) == sizeof(m_assertbuf.m_next)); 182278914SglebiusCTASSERT(sizeof(m_assertbuf.m_slistpkt) == sizeof(m_assertbuf.m_nextpkt)); 183278914SglebiusCTASSERT(sizeof(m_assertbuf.m_stailqpkt) == sizeof(m_assertbuf.m_nextpkt)); 184278914Sglebius#endif 185278914Sglebius 186278914Sglebius/* 187218909Sbrucec * Attach the cluster from *m to *n, set up m_ext in *n 188151976Sandre * and bump the refcount of the cluster. 189151976Sandre */ 190289276Shirenvoid 191296242Sglebiusmb_dupcl(struct mbuf *n, struct mbuf *m) 192151976Sandre{ 193296242Sglebius volatile u_int *refcnt; 194151976Sandre 195268535Sglebius KASSERT(m->m_flags & M_EXT, ("%s: M_EXT not set on %p", __func__, m)); 196268535Sglebius KASSERT(!(n->m_flags & M_EXT), ("%s: M_EXT set on %p", __func__, n)); 197268535Sglebius 198296242Sglebius n->m_ext = m->m_ext; 199296242Sglebius n->m_flags |= M_EXT; 200296242Sglebius n->m_flags |= m->m_flags & M_RDONLY; 201296242Sglebius 202296242Sglebius /* See if this is the mbuf that holds the embedded refcount. */ 203296242Sglebius if (m->m_ext.ext_flags & EXT_FLAG_EMBREF) { 204296242Sglebius refcnt = n->m_ext.ext_cnt = &m->m_ext.ext_count; 205296242Sglebius n->m_ext.ext_flags &= ~EXT_FLAG_EMBREF; 206296242Sglebius } else { 207268535Sglebius KASSERT(m->m_ext.ext_cnt != NULL, 208268535Sglebius ("%s: no refcounting pointer on %p", __func__, m)); 209296242Sglebius refcnt = m->m_ext.ext_cnt; 210268535Sglebius } 211268535Sglebius 212296242Sglebius if (*refcnt == 1) 213296242Sglebius *refcnt += 1; 214296242Sglebius else 215296242Sglebius atomic_add_int(refcnt, 1); 216151976Sandre} 217151976Sandre 218284961Snpvoid 219284961Snpm_demote_pkthdr(struct mbuf *m) 220284961Snp{ 221284961Snp 222284961Snp M_ASSERTPKTHDR(m); 223284961Snp 224284961Snp m_tag_delete_chain(m, NULL); 225284961Snp m->m_flags &= ~M_PKTHDR; 226284961Snp bzero(&m->m_pkthdr, sizeof(struct pkthdr)); 227284961Snp} 228284961Snp 229151976Sandre/* 230149598Sandre * Clean up mbuf (chain) from any tags and packet headers. 231149647Sandre * If "all" is set then the first mbuf in the chain will be 232149647Sandre * cleaned too. 233149598Sandre */ 234149598Sandrevoid 235275329Sglebiusm_demote(struct mbuf *m0, int all, int flags) 236149598Sandre{ 237149598Sandre struct mbuf *m; 238149598Sandre 239149598Sandre for (m = all ? m0 : m0->m_next; m != NULL; m = m->m_next) { 240271122Sglebius KASSERT(m->m_nextpkt == NULL, ("%s: m_nextpkt in m %p, m0 %p", 241271122Sglebius __func__, m, m0)); 242284961Snp if (m->m_flags & M_PKTHDR) 243284961Snp m_demote_pkthdr(m); 244275329Sglebius m->m_flags = m->m_flags & (M_EXT | M_RDONLY | M_NOFREE | flags); 245149598Sandre } 246149598Sandre} 247149598Sandre 248149598Sandre/* 249149648Sandre * Sanity checks on mbuf (chain) for use in KASSERT() and general 250149648Sandre * debugging. 251149648Sandre * Returns 0 or panics when bad and 1 on all tests passed. 252149648Sandre * Sanitize, 0 to run M_SANITY_ACTION, 1 to garble things so they 253149648Sandre * blow up later. 254149599Sandre */ 255149599Sandreint 256149599Sandrem_sanity(struct mbuf *m0, int sanitize) 257149599Sandre{ 258149599Sandre struct mbuf *m; 259149599Sandre caddr_t a, b; 260149599Sandre int pktlen = 0; 261149599Sandre 262168734Skmacy#ifdef INVARIANTS 263168734Skmacy#define M_SANITY_ACTION(s) panic("mbuf %p: " s, m) 264266876Sglebius#else 265168734Skmacy#define M_SANITY_ACTION(s) printf("mbuf %p: " s, m) 266168734Skmacy#endif 267149599Sandre 268149648Sandre for (m = m0; m != NULL; m = m->m_next) { 269149599Sandre /* 270149599Sandre * Basic pointer checks. If any of these fails then some 271149599Sandre * unrelated kernel memory before or after us is trashed. 272149599Sandre * No way to recover from that. 273149599Sandre */ 274276818Srwatson a = M_START(m); 275276818Srwatson b = a + M_SIZE(m); 276149599Sandre if ((caddr_t)m->m_data < a) 277149599Sandre M_SANITY_ACTION("m_data outside mbuf data range left"); 278149599Sandre if ((caddr_t)m->m_data > b) 279149599Sandre M_SANITY_ACTION("m_data outside mbuf data range right"); 280149599Sandre if ((caddr_t)m->m_data + m->m_len > b) 281149599Sandre M_SANITY_ACTION("m_data + m_len exeeds mbuf space"); 282149599Sandre 283149599Sandre /* m->m_nextpkt may only be set on first mbuf in chain. */ 284149648Sandre if (m != m0 && m->m_nextpkt != NULL) { 285149599Sandre if (sanitize) { 286149599Sandre m_freem(m->m_nextpkt); 287149599Sandre m->m_nextpkt = (struct mbuf *)0xDEADC0DE; 288149599Sandre } else 289149599Sandre M_SANITY_ACTION("m->m_nextpkt on in-chain mbuf"); 290149599Sandre } 291149599Sandre 292149599Sandre /* packet length (not mbuf length!) calculation */ 293149599Sandre if (m0->m_flags & M_PKTHDR) 294149599Sandre pktlen += m->m_len; 295149599Sandre 296149599Sandre /* m_tags may only be attached to first mbuf in chain. */ 297149599Sandre if (m != m0 && m->m_flags & M_PKTHDR && 298149599Sandre !SLIST_EMPTY(&m->m_pkthdr.tags)) { 299149599Sandre if (sanitize) { 300149599Sandre m_tag_delete_chain(m, NULL); 301149599Sandre /* put in 0xDEADC0DE perhaps? */ 302149648Sandre } else 303149599Sandre M_SANITY_ACTION("m_tags on in-chain mbuf"); 304149599Sandre } 305149599Sandre 306149599Sandre /* M_PKTHDR may only be set on first mbuf in chain */ 307149599Sandre if (m != m0 && m->m_flags & M_PKTHDR) { 308149599Sandre if (sanitize) { 309149599Sandre bzero(&m->m_pkthdr, sizeof(m->m_pkthdr)); 310149599Sandre m->m_flags &= ~M_PKTHDR; 311149599Sandre /* put in 0xDEADCODE and leave hdr flag in */ 312149599Sandre } else 313149599Sandre M_SANITY_ACTION("M_PKTHDR on in-chain mbuf"); 314149599Sandre } 315149599Sandre } 316149648Sandre m = m0; 317149648Sandre if (pktlen && pktlen != m->m_pkthdr.len) { 318149599Sandre if (sanitize) 319149648Sandre m->m_pkthdr.len = 0; 320149599Sandre else 321149599Sandre M_SANITY_ACTION("m_pkthdr.len != mbuf chain length"); 322149599Sandre } 323149648Sandre return 1; 324149648Sandre 325149599Sandre#undef M_SANITY_ACTION 326149599Sandre} 327149599Sandre 328295547Sglebius/* 329295547Sglebius * Non-inlined part of m_init(). 330295547Sglebius */ 331295547Sglebiusint 332295547Sglebiusm_pkthdr_init(struct mbuf *m, int how) 333295547Sglebius{ 334295547Sglebius#ifdef MAC 335295547Sglebius int error; 336295547Sglebius#endif 337295547Sglebius m->m_data = m->m_pktdat; 338295547Sglebius bzero(&m->m_pkthdr, sizeof(m->m_pkthdr)); 339295547Sglebius#ifdef MAC 340295547Sglebius /* If the label init fails, fail the alloc */ 341295547Sglebius error = mac_mbuf_init(m, how); 342295547Sglebius if (error) 343295547Sglebius return (error); 344295547Sglebius#endif 345149599Sandre 346295547Sglebius return (0); 347295547Sglebius} 348295547Sglebius 349149599Sandre/* 350108466Ssam * "Move" mbuf pkthdr from "from" to "to". 351100960Srwatson * "from" must have M_PKTHDR set, and "to" must be empty. 352100960Srwatson */ 353100960Srwatsonvoid 354108466Ssamm_move_pkthdr(struct mbuf *to, struct mbuf *from) 355100960Srwatson{ 356100960Srwatson 357100960Srwatson#if 0 358108466Ssam /* see below for why these are not enabled */ 359113255Sdes M_ASSERTPKTHDR(to); 360113487Srwatson /* Note: with MAC, this may not be a good assertion. */ 361108466Ssam KASSERT(SLIST_EMPTY(&to->m_pkthdr.tags), 362108466Ssam ("m_move_pkthdr: to has tags")); 363100960Srwatson#endif 364101007Srwatson#ifdef MAC 365113487Srwatson /* 366113487Srwatson * XXXMAC: It could be this should also occur for non-MAC? 367113487Srwatson */ 368101007Srwatson if (to->m_flags & M_PKTHDR) 369113487Srwatson m_tag_delete_chain(to, NULL); 370101007Srwatson#endif 371143302Ssam to->m_flags = (from->m_flags & M_COPYFLAGS) | (to->m_flags & M_EXT); 372143302Ssam if ((to->m_flags & M_EXT) == 0) 373143302Ssam to->m_data = to->m_pktdat; 374108466Ssam to->m_pkthdr = from->m_pkthdr; /* especially tags */ 375108466Ssam SLIST_INIT(&from->m_pkthdr.tags); /* purge tags from src */ 376108466Ssam from->m_flags &= ~M_PKTHDR; 377108466Ssam} 378108466Ssam 379108466Ssam/* 380108466Ssam * Duplicate "from"'s mbuf pkthdr in "to". 381108466Ssam * "from" must have M_PKTHDR set, and "to" must be empty. 382108466Ssam * In particular, this does a deep copy of the packet tags. 383108466Ssam */ 384108466Ssamint 385286450Smelifarom_dup_pkthdr(struct mbuf *to, const struct mbuf *from, int how) 386108466Ssam{ 387108466Ssam 388108466Ssam#if 0 389108466Ssam /* 390108466Ssam * The mbuf allocator only initializes the pkthdr 391248372Sglebius * when the mbuf is allocated with m_gethdr(). Many users 392248372Sglebius * (e.g. m_copy*, m_prepend) use m_get() and then 393108466Ssam * smash the pkthdr as needed causing these 394108466Ssam * assertions to trip. For now just disable them. 395108466Ssam */ 396113255Sdes M_ASSERTPKTHDR(to); 397113487Srwatson /* Note: with MAC, this may not be a good assertion. */ 398108466Ssam KASSERT(SLIST_EMPTY(&to->m_pkthdr.tags), ("m_dup_pkthdr: to has tags")); 399108466Ssam#endif 400132488Salfred MBUF_CHECKSLEEP(how); 401108466Ssam#ifdef MAC 402108466Ssam if (to->m_flags & M_PKTHDR) 403113487Srwatson m_tag_delete_chain(to, NULL); 404108466Ssam#endif 405112733Ssilby to->m_flags = (from->m_flags & M_COPYFLAGS) | (to->m_flags & M_EXT); 406112733Ssilby if ((to->m_flags & M_EXT) == 0) 407112733Ssilby to->m_data = to->m_pktdat; 408100960Srwatson to->m_pkthdr = from->m_pkthdr; 409108466Ssam SLIST_INIT(&to->m_pkthdr.tags); 410260716Sglebius return (m_tag_copy_chain(to, from, how)); 411100960Srwatson} 412100960Srwatson 413100960Srwatson/* 4141541Srgrimes * Lesser-used path for M_PREPEND: 4151541Srgrimes * allocate new mbuf to prepend to chain, 4161541Srgrimes * copy junk along. 4171541Srgrimes */ 4181541Srgrimesstruct mbuf * 41972356Sbmilekicm_prepend(struct mbuf *m, int len, int how) 4201541Srgrimes{ 4211541Srgrimes struct mbuf *mn; 4221541Srgrimes 423117770Ssilby if (m->m_flags & M_PKTHDR) 424248372Sglebius mn = m_gethdr(how, m->m_type); 425117770Ssilby else 426248372Sglebius mn = m_get(how, m->m_type); 42772356Sbmilekic if (mn == NULL) { 4281541Srgrimes m_freem(m); 42972356Sbmilekic return (NULL); 4301541Srgrimes } 431113487Srwatson if (m->m_flags & M_PKTHDR) 432248372Sglebius m_move_pkthdr(mn, m); 4331541Srgrimes mn->m_next = m; 4341541Srgrimes m = mn; 435276692Srwatson if (len < M_SIZE(m)) 436276692Srwatson M_ALIGN(m, len); 4371541Srgrimes m->m_len = len; 4381541Srgrimes return (m); 4391541Srgrimes} 4401541Srgrimes 4411541Srgrimes/* 4421541Srgrimes * Make a copy of an mbuf chain starting "off0" bytes from the beginning, 4431541Srgrimes * continuing for "len" bytes. If len is M_COPYALL, copy to end of mbuf. 444243882Sglebius * The wait parameter is a choice of M_WAITOK/M_NOWAIT from caller. 44554002Sarchie * Note that the copy is read-only, because clusters are not copied, 44654002Sarchie * only their reference counts are incremented. 4471541Srgrimes */ 4481541Srgrimesstruct mbuf * 449296242Sglebiusm_copym(struct mbuf *m, int off0, int len, int wait) 4501541Srgrimes{ 45172356Sbmilekic struct mbuf *n, **np; 45272356Sbmilekic int off = off0; 4531541Srgrimes struct mbuf *top; 4541541Srgrimes int copyhdr = 0; 4551541Srgrimes 45652201Salfred KASSERT(off >= 0, ("m_copym, negative off %d", off)); 45752201Salfred KASSERT(len >= 0, ("m_copym, negative len %d", len)); 458132488Salfred MBUF_CHECKSLEEP(wait); 4591541Srgrimes if (off == 0 && m->m_flags & M_PKTHDR) 4601541Srgrimes copyhdr = 1; 4611541Srgrimes while (off > 0) { 46252201Salfred KASSERT(m != NULL, ("m_copym, offset > size of mbuf chain")); 4631541Srgrimes if (off < m->m_len) 4641541Srgrimes break; 4651541Srgrimes off -= m->m_len; 4661541Srgrimes m = m->m_next; 4671541Srgrimes } 4681541Srgrimes np = ⊤ 469298069Spfg top = NULL; 4701541Srgrimes while (len > 0) { 47172356Sbmilekic if (m == NULL) { 472266876Sglebius KASSERT(len == M_COPYALL, 47352201Salfred ("m_copym, length > size of mbuf chain")); 4741541Srgrimes break; 4751541Srgrimes } 476117770Ssilby if (copyhdr) 477248372Sglebius n = m_gethdr(wait, m->m_type); 478117770Ssilby else 479248372Sglebius n = m_get(wait, m->m_type); 4801541Srgrimes *np = n; 48172356Sbmilekic if (n == NULL) 4821541Srgrimes goto nospace; 4831541Srgrimes if (copyhdr) { 484108466Ssam if (!m_dup_pkthdr(n, m, wait)) 485108466Ssam goto nospace; 4861541Srgrimes if (len == M_COPYALL) 4871541Srgrimes n->m_pkthdr.len -= off0; 4881541Srgrimes else 4891541Srgrimes n->m_pkthdr.len = len; 4901541Srgrimes copyhdr = 0; 4911541Srgrimes } 4921541Srgrimes n->m_len = min(len, m->m_len - off); 4931541Srgrimes if (m->m_flags & M_EXT) { 4941541Srgrimes n->m_data = m->m_data + off; 495151976Sandre mb_dupcl(n, m); 4961541Srgrimes } else 4971541Srgrimes bcopy(mtod(m, caddr_t)+off, mtod(n, caddr_t), 498103569Sbmilekic (u_int)n->m_len); 4991541Srgrimes if (len != M_COPYALL) 5001541Srgrimes len -= n->m_len; 5011541Srgrimes off = 0; 5021541Srgrimes m = m->m_next; 5031541Srgrimes np = &n->m_next; 5041541Srgrimes } 50578592Sbmilekic 5061541Srgrimes return (top); 5071541Srgrimesnospace: 5081541Srgrimes m_freem(top); 50972356Sbmilekic return (NULL); 5101541Srgrimes} 5111541Srgrimes 5121541Srgrimes/* 51315689Swollman * Copy an entire packet, including header (which must be present). 51415689Swollman * An optimization of the common case `m_copym(m, 0, M_COPYALL, how)'. 51554002Sarchie * Note that the copy is read-only, because clusters are not copied, 51654002Sarchie * only their reference counts are incremented. 51772750Sluigi * Preserve alignment of the first mbuf so if the creator has left 51872750Sluigi * some room at the beginning (e.g. for inserting protocol headers) 51972750Sluigi * the copies still have the room available. 52015689Swollman */ 52115689Swollmanstruct mbuf * 52272356Sbmilekicm_copypacket(struct mbuf *m, int how) 52315689Swollman{ 52415689Swollman struct mbuf *top, *n, *o; 52515689Swollman 526132488Salfred MBUF_CHECKSLEEP(how); 527248372Sglebius n = m_get(how, m->m_type); 52815689Swollman top = n; 52972356Sbmilekic if (n == NULL) 53015689Swollman goto nospace; 53115689Swollman 532108466Ssam if (!m_dup_pkthdr(n, m, how)) 533108466Ssam goto nospace; 53415689Swollman n->m_len = m->m_len; 53515689Swollman if (m->m_flags & M_EXT) { 53615689Swollman n->m_data = m->m_data; 537151976Sandre mb_dupcl(n, m); 53815689Swollman } else { 53972750Sluigi n->m_data = n->m_pktdat + (m->m_data - m->m_pktdat ); 54015689Swollman bcopy(mtod(m, char *), mtod(n, char *), n->m_len); 54115689Swollman } 54215689Swollman 54315689Swollman m = m->m_next; 54415689Swollman while (m) { 545248372Sglebius o = m_get(how, m->m_type); 54672356Sbmilekic if (o == NULL) 54715689Swollman goto nospace; 54815689Swollman 54915689Swollman n->m_next = o; 55015689Swollman n = n->m_next; 55115689Swollman 55215689Swollman n->m_len = m->m_len; 55315689Swollman if (m->m_flags & M_EXT) { 55415689Swollman n->m_data = m->m_data; 555151976Sandre mb_dupcl(n, m); 55615689Swollman } else { 55715689Swollman bcopy(mtod(m, char *), mtod(n, char *), n->m_len); 55815689Swollman } 55915689Swollman 56015689Swollman m = m->m_next; 56115689Swollman } 56215689Swollman return top; 56315689Swollmannospace: 56415689Swollman m_freem(top); 56572356Sbmilekic return (NULL); 56615689Swollman} 56715689Swollman 56815689Swollman/* 5691541Srgrimes * Copy data from an mbuf chain starting "off" bytes from the beginning, 5701541Srgrimes * continuing for "len" bytes, into the indicated buffer. 5711541Srgrimes */ 5721549Srgrimesvoid 57381907Sjulianm_copydata(const struct mbuf *m, int off, int len, caddr_t cp) 5741541Srgrimes{ 575103569Sbmilekic u_int count; 5761541Srgrimes 57752201Salfred KASSERT(off >= 0, ("m_copydata, negative off %d", off)); 57852201Salfred KASSERT(len >= 0, ("m_copydata, negative len %d", len)); 5791541Srgrimes while (off > 0) { 58052201Salfred KASSERT(m != NULL, ("m_copydata, offset > size of mbuf chain")); 5811541Srgrimes if (off < m->m_len) 5821541Srgrimes break; 5831541Srgrimes off -= m->m_len; 5841541Srgrimes m = m->m_next; 5851541Srgrimes } 5861541Srgrimes while (len > 0) { 58752201Salfred KASSERT(m != NULL, ("m_copydata, length > size of mbuf chain")); 5881541Srgrimes count = min(m->m_len - off, len); 5891541Srgrimes bcopy(mtod(m, caddr_t) + off, cp, count); 5901541Srgrimes len -= count; 5911541Srgrimes cp += count; 5921541Srgrimes off = 0; 5931541Srgrimes m = m->m_next; 5941541Srgrimes } 5951541Srgrimes} 5961541Srgrimes 5971541Srgrimes/* 59854002Sarchie * Copy a packet header mbuf chain into a completely new chain, including 59954002Sarchie * copying any mbuf clusters. Use this instead of m_copypacket() when 60054002Sarchie * you need a writable copy of an mbuf chain. 60154002Sarchie */ 60254002Sarchiestruct mbuf * 603286450Smelifarom_dup(const struct mbuf *m, int how) 60454002Sarchie{ 60554002Sarchie struct mbuf **p, *top = NULL; 60654002Sarchie int remain, moff, nsize; 60754002Sarchie 608132488Salfred MBUF_CHECKSLEEP(how); 60954002Sarchie /* Sanity check */ 61054002Sarchie if (m == NULL) 61172356Sbmilekic return (NULL); 612113255Sdes M_ASSERTPKTHDR(m); 61354002Sarchie 61454002Sarchie /* While there's more data, get a new mbuf, tack it on, and fill it */ 61554002Sarchie remain = m->m_pkthdr.len; 61654002Sarchie moff = 0; 61754002Sarchie p = ⊤ 61854002Sarchie while (remain > 0 || top == NULL) { /* allow m->m_pkthdr.len == 0 */ 61954002Sarchie struct mbuf *n; 62054002Sarchie 62154002Sarchie /* Get the next new mbuf */ 622129906Sbmilekic if (remain >= MINCLSIZE) { 623129906Sbmilekic n = m_getcl(how, m->m_type, 0); 624129906Sbmilekic nsize = MCLBYTES; 625129906Sbmilekic } else { 626129906Sbmilekic n = m_get(how, m->m_type); 627129906Sbmilekic nsize = MLEN; 628129906Sbmilekic } 62954002Sarchie if (n == NULL) 63054002Sarchie goto nospace; 631129906Sbmilekic 632129906Sbmilekic if (top == NULL) { /* First one, must be PKTHDR */ 633129906Sbmilekic if (!m_dup_pkthdr(n, m, how)) { 634129906Sbmilekic m_free(n); 635108466Ssam goto nospace; 636129906Sbmilekic } 637153428Semaste if ((n->m_flags & M_EXT) == 0) 638153428Semaste nsize = MHLEN; 639282594Sae n->m_flags &= ~M_RDONLY; 64054002Sarchie } 64154002Sarchie n->m_len = 0; 64254002Sarchie 64354002Sarchie /* Link it into the new chain */ 64454002Sarchie *p = n; 64554002Sarchie p = &n->m_next; 64654002Sarchie 64754002Sarchie /* Copy data from original mbuf(s) into new mbuf */ 64854002Sarchie while (n->m_len < nsize && m != NULL) { 64954002Sarchie int chunk = min(nsize - n->m_len, m->m_len - moff); 65054002Sarchie 65154002Sarchie bcopy(m->m_data + moff, n->m_data + n->m_len, chunk); 65254002Sarchie moff += chunk; 65354002Sarchie n->m_len += chunk; 65454002Sarchie remain -= chunk; 65554002Sarchie if (moff == m->m_len) { 65654002Sarchie m = m->m_next; 65754002Sarchie moff = 0; 65854002Sarchie } 65954002Sarchie } 66054002Sarchie 66154002Sarchie /* Check correct total mbuf length */ 66254002Sarchie KASSERT((remain > 0 && m != NULL) || (remain == 0 && m == NULL), 66387594Sobrien ("%s: bogus m_pkthdr.len", __func__)); 66454002Sarchie } 66554002Sarchie return (top); 66654002Sarchie 66754002Sarchienospace: 66854002Sarchie m_freem(top); 66972356Sbmilekic return (NULL); 67054002Sarchie} 67154002Sarchie 67254002Sarchie/* 6731541Srgrimes * Concatenate mbuf chain n to m. 6741541Srgrimes * Both chains must be of the same type (e.g. MT_DATA). 6751541Srgrimes * Any m_pkthdr is not updated. 6761541Srgrimes */ 6771549Srgrimesvoid 67872356Sbmilekicm_cat(struct mbuf *m, struct mbuf *n) 6791541Srgrimes{ 6801541Srgrimes while (m->m_next) 6811541Srgrimes m = m->m_next; 6821541Srgrimes while (n) { 683242256Sandre if (!M_WRITABLE(m) || 684242256Sandre M_TRAILINGSPACE(m) < n->m_len) { 6851541Srgrimes /* just join the two chains */ 6861541Srgrimes m->m_next = n; 6871541Srgrimes return; 6881541Srgrimes } 6891541Srgrimes /* splat the data from one into the other */ 6901541Srgrimes bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len, 6911541Srgrimes (u_int)n->m_len); 6921541Srgrimes m->m_len += n->m_len; 6931541Srgrimes n = m_free(n); 6941541Srgrimes } 6951541Srgrimes} 6961541Srgrimes 697271088Sglebius/* 698271088Sglebius * Concatenate two pkthdr mbuf chains. 699271088Sglebius */ 7001549Srgrimesvoid 701271088Sglebiusm_catpkt(struct mbuf *m, struct mbuf *n) 702271088Sglebius{ 703271088Sglebius 704271088Sglebius M_ASSERTPKTHDR(m); 705271088Sglebius M_ASSERTPKTHDR(n); 706271088Sglebius 707271088Sglebius m->m_pkthdr.len += n->m_pkthdr.len; 708275329Sglebius m_demote(n, 1, 0); 709271088Sglebius 710271088Sglebius m_cat(m, n); 711271088Sglebius} 712271088Sglebius 713271088Sglebiusvoid 71472356Sbmilekicm_adj(struct mbuf *mp, int req_len) 7151541Srgrimes{ 71672356Sbmilekic int len = req_len; 71772356Sbmilekic struct mbuf *m; 71872356Sbmilekic int count; 7191541Srgrimes 7201541Srgrimes if ((m = mp) == NULL) 7211541Srgrimes return; 7221541Srgrimes if (len >= 0) { 7231541Srgrimes /* 7241541Srgrimes * Trim from head. 7251541Srgrimes */ 7261541Srgrimes while (m != NULL && len > 0) { 7271541Srgrimes if (m->m_len <= len) { 7281541Srgrimes len -= m->m_len; 7291541Srgrimes m->m_len = 0; 7301541Srgrimes m = m->m_next; 7311541Srgrimes } else { 7321541Srgrimes m->m_len -= len; 7331541Srgrimes m->m_data += len; 7341541Srgrimes len = 0; 7351541Srgrimes } 7361541Srgrimes } 7371541Srgrimes if (mp->m_flags & M_PKTHDR) 738207475Szec mp->m_pkthdr.len -= (req_len - len); 7391541Srgrimes } else { 7401541Srgrimes /* 7411541Srgrimes * Trim from tail. Scan the mbuf chain, 7421541Srgrimes * calculating its length and finding the last mbuf. 7431541Srgrimes * If the adjustment only affects this mbuf, then just 7441541Srgrimes * adjust and return. Otherwise, rescan and truncate 7451541Srgrimes * after the remaining size. 7461541Srgrimes */ 7471541Srgrimes len = -len; 7481541Srgrimes count = 0; 7491541Srgrimes for (;;) { 7501541Srgrimes count += m->m_len; 7511541Srgrimes if (m->m_next == (struct mbuf *)0) 7521541Srgrimes break; 7531541Srgrimes m = m->m_next; 7541541Srgrimes } 7551541Srgrimes if (m->m_len >= len) { 7561541Srgrimes m->m_len -= len; 7571541Srgrimes if (mp->m_flags & M_PKTHDR) 7581541Srgrimes mp->m_pkthdr.len -= len; 7591541Srgrimes return; 7601541Srgrimes } 7611541Srgrimes count -= len; 7621541Srgrimes if (count < 0) 7631541Srgrimes count = 0; 7641541Srgrimes /* 7651541Srgrimes * Correct length for chain is "count". 7661541Srgrimes * Find the mbuf with last data, adjust its length, 7671541Srgrimes * and toss data from remaining mbufs on chain. 7681541Srgrimes */ 7691541Srgrimes m = mp; 7701541Srgrimes if (m->m_flags & M_PKTHDR) 7711541Srgrimes m->m_pkthdr.len = count; 7721541Srgrimes for (; m; m = m->m_next) { 7731541Srgrimes if (m->m_len >= count) { 7741541Srgrimes m->m_len = count; 775142350Ssam if (m->m_next != NULL) { 776142350Ssam m_freem(m->m_next); 777142350Ssam m->m_next = NULL; 778142350Ssam } 7791541Srgrimes break; 7801541Srgrimes } 7811541Srgrimes count -= m->m_len; 7821541Srgrimes } 7831541Srgrimes } 7841541Srgrimes} 7851541Srgrimes 7861541Srgrimes/* 7871541Srgrimes * Rearange an mbuf chain so that len bytes are contiguous 788242304Skevlo * and in the data area of an mbuf (so that mtod will work 789242304Skevlo * for a structure of size len). Returns the resulting 7901541Srgrimes * mbuf chain on success, frees it and returns null on failure. 7911541Srgrimes * If there is room, it will add up to max_protohdr-len extra bytes to the 7921541Srgrimes * contiguous region in an attempt to avoid being called next time. 7931541Srgrimes */ 7941541Srgrimesstruct mbuf * 79572356Sbmilekicm_pullup(struct mbuf *n, int len) 7961541Srgrimes{ 79772356Sbmilekic struct mbuf *m; 79872356Sbmilekic int count; 7991541Srgrimes int space; 8001541Srgrimes 8011541Srgrimes /* 8021541Srgrimes * If first mbuf has no cluster, and has room for len bytes 8031541Srgrimes * without shifting current data, pullup into it, 8041541Srgrimes * otherwise allocate a new mbuf to prepend to the chain. 8051541Srgrimes */ 8061541Srgrimes if ((n->m_flags & M_EXT) == 0 && 8071541Srgrimes n->m_data + len < &n->m_dat[MLEN] && n->m_next) { 8081541Srgrimes if (n->m_len >= len) 8091541Srgrimes return (n); 8101541Srgrimes m = n; 8111541Srgrimes n = n->m_next; 8121541Srgrimes len -= m->m_len; 8131541Srgrimes } else { 8141541Srgrimes if (len > MHLEN) 8151541Srgrimes goto bad; 816248372Sglebius m = m_get(M_NOWAIT, n->m_type); 81772356Sbmilekic if (m == NULL) 8181541Srgrimes goto bad; 819108466Ssam if (n->m_flags & M_PKTHDR) 820248372Sglebius m_move_pkthdr(m, n); 8211541Srgrimes } 8221541Srgrimes space = &m->m_dat[MLEN] - (m->m_data + m->m_len); 8231541Srgrimes do { 8241541Srgrimes count = min(min(max(len, max_protohdr), space), n->m_len); 8251541Srgrimes bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len, 826103569Sbmilekic (u_int)count); 8271541Srgrimes len -= count; 8281541Srgrimes m->m_len += count; 8291541Srgrimes n->m_len -= count; 8301541Srgrimes space -= count; 8311541Srgrimes if (n->m_len) 8321541Srgrimes n->m_data += count; 8331541Srgrimes else 8341541Srgrimes n = m_free(n); 8351541Srgrimes } while (len > 0 && n); 8361541Srgrimes if (len > 0) { 8371541Srgrimes (void) m_free(m); 8381541Srgrimes goto bad; 8391541Srgrimes } 8401541Srgrimes m->m_next = n; 8411541Srgrimes return (m); 8421541Srgrimesbad: 8431541Srgrimes m_freem(n); 84472356Sbmilekic return (NULL); 8451541Srgrimes} 8461541Srgrimes 8471541Srgrimes/* 848143761Sjmg * Like m_pullup(), except a new mbuf is always allocated, and we allow 849143761Sjmg * the amount of empty space before the data in the new mbuf to be specified 850143761Sjmg * (in the event that the caller expects to prepend later). 851143761Sjmg */ 852143761Sjmgstruct mbuf * 853143761Sjmgm_copyup(struct mbuf *n, int len, int dstoff) 854143761Sjmg{ 855143761Sjmg struct mbuf *m; 856143761Sjmg int count, space; 857143761Sjmg 858143761Sjmg if (len > (MHLEN - dstoff)) 859143761Sjmg goto bad; 860248372Sglebius m = m_get(M_NOWAIT, n->m_type); 861143761Sjmg if (m == NULL) 862143761Sjmg goto bad; 863143761Sjmg if (n->m_flags & M_PKTHDR) 864248372Sglebius m_move_pkthdr(m, n); 865143761Sjmg m->m_data += dstoff; 866143761Sjmg space = &m->m_dat[MLEN] - (m->m_data + m->m_len); 867143761Sjmg do { 868143761Sjmg count = min(min(max(len, max_protohdr), space), n->m_len); 869143761Sjmg memcpy(mtod(m, caddr_t) + m->m_len, mtod(n, caddr_t), 870143761Sjmg (unsigned)count); 871143761Sjmg len -= count; 872143761Sjmg m->m_len += count; 873143761Sjmg n->m_len -= count; 874143761Sjmg space -= count; 875143761Sjmg if (n->m_len) 876143761Sjmg n->m_data += count; 877143761Sjmg else 878143761Sjmg n = m_free(n); 879143761Sjmg } while (len > 0 && n); 880143761Sjmg if (len > 0) { 881143761Sjmg (void) m_free(m); 882143761Sjmg goto bad; 883143761Sjmg } 884143761Sjmg m->m_next = n; 885143761Sjmg return (m); 886143761Sjmg bad: 887143761Sjmg m_freem(n); 888143761Sjmg return (NULL); 889143761Sjmg} 890143761Sjmg 891143761Sjmg/* 8921541Srgrimes * Partition an mbuf chain in two pieces, returning the tail -- 8931541Srgrimes * all but the first len0 bytes. In case of failure, it returns NULL and 8941541Srgrimes * attempts to restore the chain to its original state. 89597681Sarchie * 89697681Sarchie * Note that the resulting mbufs might be read-only, because the new 89797681Sarchie * mbuf can end up sharing an mbuf cluster with the original mbuf if 89897681Sarchie * the "breaking point" happens to lie within a cluster mbuf. Use the 89997681Sarchie * M_WRITABLE() macro to check for this case. 9001541Srgrimes */ 9011541Srgrimesstruct mbuf * 90272356Sbmilekicm_split(struct mbuf *m0, int len0, int wait) 9031541Srgrimes{ 90472356Sbmilekic struct mbuf *m, *n; 905103569Sbmilekic u_int len = len0, remain; 9061541Srgrimes 907132488Salfred MBUF_CHECKSLEEP(wait); 9081541Srgrimes for (m = m0; m && len > m->m_len; m = m->m_next) 9091541Srgrimes len -= m->m_len; 91072356Sbmilekic if (m == NULL) 91172356Sbmilekic return (NULL); 9121541Srgrimes remain = m->m_len - len; 913248887Sglebius if (m0->m_flags & M_PKTHDR && remain == 0) { 914248372Sglebius n = m_gethdr(wait, m0->m_type); 915258128Sglebius if (n == NULL) 916248887Sglebius return (NULL); 917248887Sglebius n->m_next = m->m_next; 918248887Sglebius m->m_next = NULL; 919248887Sglebius n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif; 920248887Sglebius n->m_pkthdr.len = m0->m_pkthdr.len - len0; 921248887Sglebius m0->m_pkthdr.len = len0; 922248887Sglebius return (n); 923248887Sglebius } else if (m0->m_flags & M_PKTHDR) { 924248887Sglebius n = m_gethdr(wait, m0->m_type); 92572356Sbmilekic if (n == NULL) 92672356Sbmilekic return (NULL); 9271541Srgrimes n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif; 9281541Srgrimes n->m_pkthdr.len = m0->m_pkthdr.len - len0; 9291541Srgrimes m0->m_pkthdr.len = len0; 9301541Srgrimes if (m->m_flags & M_EXT) 9311541Srgrimes goto extpacket; 9321541Srgrimes if (remain > MHLEN) { 9331541Srgrimes /* m can't be the lead packet */ 934276692Srwatson M_ALIGN(n, 0); 9351541Srgrimes n->m_next = m_split(m, len, wait); 93672356Sbmilekic if (n->m_next == NULL) { 9371541Srgrimes (void) m_free(n); 93872356Sbmilekic return (NULL); 93994471Shsu } else { 94094471Shsu n->m_len = 0; 9411541Srgrimes return (n); 94294471Shsu } 9431541Srgrimes } else 944276692Srwatson M_ALIGN(n, remain); 9451541Srgrimes } else if (remain == 0) { 9461541Srgrimes n = m->m_next; 94772356Sbmilekic m->m_next = NULL; 9481541Srgrimes return (n); 9491541Srgrimes } else { 950248372Sglebius n = m_get(wait, m->m_type); 95172356Sbmilekic if (n == NULL) 95272356Sbmilekic return (NULL); 9531541Srgrimes M_ALIGN(n, remain); 9541541Srgrimes } 9551541Srgrimesextpacket: 9561541Srgrimes if (m->m_flags & M_EXT) { 9571541Srgrimes n->m_data = m->m_data + len; 958151976Sandre mb_dupcl(n, m); 9591541Srgrimes } else { 9601541Srgrimes bcopy(mtod(m, caddr_t) + len, mtod(n, caddr_t), remain); 9611541Srgrimes } 9621541Srgrimes n->m_len = remain; 9631541Srgrimes m->m_len = len; 9641541Srgrimes n->m_next = m->m_next; 96572356Sbmilekic m->m_next = NULL; 9661541Srgrimes return (n); 9671541Srgrimes} 9681541Srgrimes/* 9691541Srgrimes * Routine to copy from device local memory into mbufs. 97078508Sbmilekic * Note that `off' argument is offset into first mbuf of target chain from 97178508Sbmilekic * which to begin copying the data to. 9721541Srgrimes */ 9731541Srgrimesstruct mbuf * 97478508Sbmilekicm_devget(char *buf, int totlen, int off, struct ifnet *ifp, 975169624Srwatson void (*copy)(char *from, caddr_t to, u_int len)) 9761541Srgrimes{ 97772356Sbmilekic struct mbuf *m; 978129906Sbmilekic struct mbuf *top = NULL, **mp = ⊤ 97978508Sbmilekic int len; 9801541Srgrimes 98178508Sbmilekic if (off < 0 || off > MHLEN) 98278508Sbmilekic return (NULL); 98378508Sbmilekic 984129906Sbmilekic while (totlen > 0) { 985129906Sbmilekic if (top == NULL) { /* First one, must be PKTHDR */ 986129906Sbmilekic if (totlen + off >= MINCLSIZE) { 987243882Sglebius m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); 988129906Sbmilekic len = MCLBYTES; 989129906Sbmilekic } else { 990243882Sglebius m = m_gethdr(M_NOWAIT, MT_DATA); 991129906Sbmilekic len = MHLEN; 9921541Srgrimes 993129906Sbmilekic /* Place initial small packet/header at end of mbuf */ 994305691Skevlo if (m && totlen + off + max_linkhdr <= MHLEN) { 995129906Sbmilekic m->m_data += max_linkhdr; 996129906Sbmilekic len -= max_linkhdr; 997129906Sbmilekic } 998129906Sbmilekic } 999129906Sbmilekic if (m == NULL) 1000129906Sbmilekic return NULL; 1001129906Sbmilekic m->m_pkthdr.rcvif = ifp; 1002129906Sbmilekic m->m_pkthdr.len = totlen; 1003129906Sbmilekic } else { 1004129906Sbmilekic if (totlen + off >= MINCLSIZE) { 1005243882Sglebius m = m_getcl(M_NOWAIT, MT_DATA, 0); 1006129906Sbmilekic len = MCLBYTES; 1007129906Sbmilekic } else { 1008243882Sglebius m = m_get(M_NOWAIT, MT_DATA); 1009129906Sbmilekic len = MLEN; 1010129906Sbmilekic } 101172356Sbmilekic if (m == NULL) { 10121541Srgrimes m_freem(top); 1013129906Sbmilekic return NULL; 10141541Srgrimes } 10151541Srgrimes } 101678508Sbmilekic if (off) { 101778508Sbmilekic m->m_data += off; 101878508Sbmilekic len -= off; 101978508Sbmilekic off = 0; 102078508Sbmilekic } 102178508Sbmilekic m->m_len = len = min(totlen, len); 10221541Srgrimes if (copy) 1023103569Sbmilekic copy(buf, mtod(m, caddr_t), (u_int)len); 10241541Srgrimes else 1025103569Sbmilekic bcopy(buf, mtod(m, caddr_t), (u_int)len); 102678508Sbmilekic buf += len; 10271541Srgrimes *mp = m; 10281541Srgrimes mp = &m->m_next; 10291541Srgrimes totlen -= len; 10301541Srgrimes } 10311541Srgrimes return (top); 10321541Srgrimes} 10333352Sphk 10343352Sphk/* 10353352Sphk * Copy data from a buffer back into the indicated mbuf chain, 10363352Sphk * starting "off" bytes from the beginning, extending the mbuf 10373352Sphk * chain if necessary. 10383352Sphk */ 10393352Sphkvoid 1040128402Sluigim_copyback(struct mbuf *m0, int off, int len, c_caddr_t cp) 10413352Sphk{ 104272356Sbmilekic int mlen; 104372356Sbmilekic struct mbuf *m = m0, *n; 10443352Sphk int totlen = 0; 10453352Sphk 104672356Sbmilekic if (m0 == NULL) 10473352Sphk return; 10483352Sphk while (off > (mlen = m->m_len)) { 10493352Sphk off -= mlen; 10503352Sphk totlen += mlen; 105172356Sbmilekic if (m->m_next == NULL) { 1052243882Sglebius n = m_get(M_NOWAIT, m->m_type); 105372356Sbmilekic if (n == NULL) 10543352Sphk goto out; 1055129906Sbmilekic bzero(mtod(n, caddr_t), MLEN); 10563352Sphk n->m_len = min(MLEN, len + off); 10573352Sphk m->m_next = n; 10583352Sphk } 10593352Sphk m = m->m_next; 10603352Sphk } 10613352Sphk while (len > 0) { 1062187409Smav if (m->m_next == NULL && (len > m->m_len - off)) { 1063187409Smav m->m_len += min(len - (m->m_len - off), 1064187409Smav M_TRAILINGSPACE(m)); 1065187409Smav } 10663352Sphk mlen = min (m->m_len - off, len); 1067103569Sbmilekic bcopy(cp, off + mtod(m, caddr_t), (u_int)mlen); 10683352Sphk cp += mlen; 10693352Sphk len -= mlen; 10703352Sphk mlen += off; 10713352Sphk off = 0; 10723352Sphk totlen += mlen; 10733352Sphk if (len == 0) 10743352Sphk break; 107572356Sbmilekic if (m->m_next == NULL) { 1076243882Sglebius n = m_get(M_NOWAIT, m->m_type); 107772356Sbmilekic if (n == NULL) 10783352Sphk break; 10793352Sphk n->m_len = min(MLEN, len); 10803352Sphk m->m_next = n; 10813352Sphk } 10823352Sphk m = m->m_next; 10833352Sphk } 10843352Sphkout: if (((m = m0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen)) 10853352Sphk m->m_pkthdr.len = totlen; 10863352Sphk} 108752756Sphk 1088123557Sbms/* 1089138541Ssam * Append the specified data to the indicated mbuf chain, 1090138541Ssam * Extend the mbuf chain if the new data does not fit in 1091138541Ssam * existing space. 1092138541Ssam * 1093138541Ssam * Return 1 if able to complete the job; otherwise 0. 1094138541Ssam */ 1095138541Ssamint 1096138541Ssamm_append(struct mbuf *m0, int len, c_caddr_t cp) 1097138541Ssam{ 1098138541Ssam struct mbuf *m, *n; 1099138541Ssam int remainder, space; 1100138541Ssam 1101138541Ssam for (m = m0; m->m_next != NULL; m = m->m_next) 1102138541Ssam ; 1103138541Ssam remainder = len; 1104138541Ssam space = M_TRAILINGSPACE(m); 1105138541Ssam if (space > 0) { 1106138541Ssam /* 1107138541Ssam * Copy into available space. 1108138541Ssam */ 1109138541Ssam if (space > remainder) 1110138541Ssam space = remainder; 1111138541Ssam bcopy(cp, mtod(m, caddr_t) + m->m_len, space); 1112138541Ssam m->m_len += space; 1113138541Ssam cp += space, remainder -= space; 1114138541Ssam } 1115138541Ssam while (remainder > 0) { 1116138541Ssam /* 1117138541Ssam * Allocate a new mbuf; could check space 1118138541Ssam * and allocate a cluster instead. 1119138541Ssam */ 1120243882Sglebius n = m_get(M_NOWAIT, m->m_type); 1121138541Ssam if (n == NULL) 1122138541Ssam break; 1123138541Ssam n->m_len = min(MLEN, remainder); 1124138894Ssam bcopy(cp, mtod(n, caddr_t), n->m_len); 1125138894Ssam cp += n->m_len, remainder -= n->m_len; 1126138541Ssam m->m_next = n; 1127138541Ssam m = n; 1128138541Ssam } 1129138541Ssam if (m0->m_flags & M_PKTHDR) 1130138541Ssam m0->m_pkthdr.len += len - remainder; 1131138541Ssam return (remainder == 0); 1132138541Ssam} 1133138541Ssam 1134138541Ssam/* 1135123557Sbms * Apply function f to the data in an mbuf chain starting "off" bytes from 1136123557Sbms * the beginning, continuing for "len" bytes. 1137123557Sbms */ 1138123557Sbmsint 1139123557Sbmsm_apply(struct mbuf *m, int off, int len, 1140123564Sbms int (*f)(void *, void *, u_int), void *arg) 1141123557Sbms{ 1142123564Sbms u_int count; 1143123557Sbms int rval; 1144123557Sbms 1145123557Sbms KASSERT(off >= 0, ("m_apply, negative off %d", off)); 1146123557Sbms KASSERT(len >= 0, ("m_apply, negative len %d", len)); 1147123557Sbms while (off > 0) { 1148123557Sbms KASSERT(m != NULL, ("m_apply, offset > size of mbuf chain")); 1149123557Sbms if (off < m->m_len) 1150123557Sbms break; 1151123557Sbms off -= m->m_len; 1152123557Sbms m = m->m_next; 1153123557Sbms } 1154123557Sbms while (len > 0) { 1155123557Sbms KASSERT(m != NULL, ("m_apply, offset > size of mbuf chain")); 1156123557Sbms count = min(m->m_len - off, len); 1157123557Sbms rval = (*f)(arg, mtod(m, caddr_t) + off, count); 1158123557Sbms if (rval) 1159123557Sbms return (rval); 1160123557Sbms len -= count; 1161123557Sbms off = 0; 1162123557Sbms m = m->m_next; 1163123557Sbms } 1164123557Sbms return (0); 1165123557Sbms} 1166123557Sbms 1167123557Sbms/* 1168123557Sbms * Return a pointer to mbuf/offset of location in mbuf chain. 1169123557Sbms */ 1170123557Sbmsstruct mbuf * 1171123557Sbmsm_getptr(struct mbuf *m, int loc, int *off) 1172123557Sbms{ 1173123557Sbms 1174123557Sbms while (loc >= 0) { 1175123564Sbms /* Normal end of search. */ 1176123557Sbms if (m->m_len > loc) { 1177123557Sbms *off = loc; 1178123557Sbms return (m); 1179123557Sbms } else { 1180123557Sbms loc -= m->m_len; 1181123557Sbms if (m->m_next == NULL) { 1182123557Sbms if (loc == 0) { 1183123564Sbms /* Point at the end of valid data. */ 1184123557Sbms *off = m->m_len; 1185123557Sbms return (m); 1186123564Sbms } 1187123564Sbms return (NULL); 1188123564Sbms } 1189123564Sbms m = m->m_next; 1190123557Sbms } 1191123557Sbms } 1192123557Sbms return (NULL); 1193123557Sbms} 1194123557Sbms 119552756Sphkvoid 1196135904Sjmgm_print(const struct mbuf *m, int maxlen) 119752756Sphk{ 119852756Sphk int len; 1199135904Sjmg int pdata; 120054906Seivind const struct mbuf *m2; 120152756Sphk 1202230587Sken if (m == NULL) { 1203230587Sken printf("mbuf: %p\n", m); 1204230587Sken return; 1205230587Sken } 1206230587Sken 1207135904Sjmg if (m->m_flags & M_PKTHDR) 1208135904Sjmg len = m->m_pkthdr.len; 1209135904Sjmg else 1210135904Sjmg len = -1; 121152756Sphk m2 = m; 1212135904Sjmg while (m2 != NULL && (len == -1 || len)) { 1213135904Sjmg pdata = m2->m_len; 1214135904Sjmg if (maxlen != -1 && pdata > maxlen) 1215135904Sjmg pdata = maxlen; 1216135904Sjmg printf("mbuf: %p len: %d, next: %p, %b%s", m2, m2->m_len, 1217135904Sjmg m2->m_next, m2->m_flags, "\20\20freelist\17skipfw" 1218135904Sjmg "\11proto5\10proto4\7proto3\6proto2\5proto1\4rdonly" 1219135904Sjmg "\3eor\2pkthdr\1ext", pdata ? "" : "\n"); 1220135904Sjmg if (pdata) 1221156700Sjmg printf(", %*D\n", pdata, (u_char *)m2->m_data, "-"); 1222135904Sjmg if (len != -1) 1223135904Sjmg len -= m2->m_len; 122452756Sphk m2 = m2->m_next; 122552756Sphk } 1226135904Sjmg if (len > 0) 1227135904Sjmg printf("%d bytes unaccounted for.\n", len); 122852756Sphk return; 122952756Sphk} 1230103540Sphk 1231103569Sbmilekicu_int 1232103540Sphkm_fixhdr(struct mbuf *m0) 1233103540Sphk{ 1234103569Sbmilekic u_int len; 1235103540Sphk 1236103544Sphk len = m_length(m0, NULL); 1237103544Sphk m0->m_pkthdr.len = len; 1238103544Sphk return (len); 1239103544Sphk} 1240103544Sphk 1241103569Sbmilekicu_int 1242103544Sphkm_length(struct mbuf *m0, struct mbuf **last) 1243103544Sphk{ 1244103544Sphk struct mbuf *m; 1245103569Sbmilekic u_int len; 1246103544Sphk 1247103544Sphk len = 0; 1248103544Sphk for (m = m0; m != NULL; m = m->m_next) { 1249103540Sphk len += m->m_len; 1250103544Sphk if (m->m_next == NULL) 1251103544Sphk break; 1252103540Sphk } 1253103544Sphk if (last != NULL) 1254103544Sphk *last = m; 1255103544Sphk return (len); 1256103540Sphk} 1257112777Ssilby 1258112777Ssilby/* 1259112777Ssilby * Defragment a mbuf chain, returning the shortest possible 1260112777Ssilby * chain of mbufs and clusters. If allocation fails and 1261112777Ssilby * this cannot be completed, NULL will be returned, but 1262112777Ssilby * the passed in chain will be unchanged. Upon success, 1263112777Ssilby * the original chain will be freed, and the new chain 1264112777Ssilby * will be returned. 1265112777Ssilby * 1266112777Ssilby * If a non-packet header is passed in, the original 1267112777Ssilby * mbuf (chain?) will be returned unharmed. 1268112777Ssilby */ 1269112777Ssilbystruct mbuf * 1270112777Ssilbym_defrag(struct mbuf *m0, int how) 1271112777Ssilby{ 1272125472Ssilby struct mbuf *m_new = NULL, *m_final = NULL; 1273125472Ssilby int progress = 0, length; 1274112777Ssilby 1275132488Salfred MBUF_CHECKSLEEP(how); 1276112777Ssilby if (!(m0->m_flags & M_PKTHDR)) 1277112777Ssilby return (m0); 1278112777Ssilby 1279117770Ssilby m_fixhdr(m0); /* Needed sanity check */ 1280117770Ssilby 1281113490Ssilby#ifdef MBUF_STRESS_TEST 1282113490Ssilby if (m_defragrandomfailures) { 1283113490Ssilby int temp = arc4random() & 0xff; 1284113490Ssilby if (temp == 0xba) 1285113490Ssilby goto nospace; 1286113490Ssilby } 1287113490Ssilby#endif 1288266876Sglebius 1289112777Ssilby if (m0->m_pkthdr.len > MHLEN) 1290112777Ssilby m_final = m_getcl(how, MT_DATA, M_PKTHDR); 1291112777Ssilby else 1292112777Ssilby m_final = m_gethdr(how, MT_DATA); 1293112777Ssilby 1294112777Ssilby if (m_final == NULL) 1295112777Ssilby goto nospace; 1296112777Ssilby 1297123740Speter if (m_dup_pkthdr(m_final, m0, how) == 0) 1298112777Ssilby goto nospace; 1299112777Ssilby 1300112777Ssilby m_new = m_final; 1301112777Ssilby 1302112777Ssilby while (progress < m0->m_pkthdr.len) { 1303112777Ssilby length = m0->m_pkthdr.len - progress; 1304112777Ssilby if (length > MCLBYTES) 1305112777Ssilby length = MCLBYTES; 1306112777Ssilby 1307112777Ssilby if (m_new == NULL) { 1308112777Ssilby if (length > MLEN) 1309112777Ssilby m_new = m_getcl(how, MT_DATA, 0); 1310112777Ssilby else 1311112777Ssilby m_new = m_get(how, MT_DATA); 1312112777Ssilby if (m_new == NULL) 1313112777Ssilby goto nospace; 1314112777Ssilby } 1315112777Ssilby 1316112777Ssilby m_copydata(m0, progress, length, mtod(m_new, caddr_t)); 1317112777Ssilby progress += length; 1318112777Ssilby m_new->m_len = length; 1319112777Ssilby if (m_new != m_final) 1320112777Ssilby m_cat(m_final, m_new); 1321112777Ssilby m_new = NULL; 1322112777Ssilby } 1323116455Ssilby#ifdef MBUF_STRESS_TEST 1324112777Ssilby if (m0->m_next == NULL) 1325112777Ssilby m_defraguseless++; 1326116455Ssilby#endif 1327112777Ssilby m_freem(m0); 1328112777Ssilby m0 = m_final; 1329116455Ssilby#ifdef MBUF_STRESS_TEST 1330112777Ssilby m_defragpackets++; 1331112777Ssilby m_defragbytes += m0->m_pkthdr.len; 1332116455Ssilby#endif 1333112777Ssilby return (m0); 1334112777Ssilbynospace: 1335116455Ssilby#ifdef MBUF_STRESS_TEST 1336112777Ssilby m_defragfailure++; 1337116455Ssilby#endif 1338112777Ssilby if (m_final) 1339112777Ssilby m_freem(m_final); 1340112777Ssilby return (NULL); 1341112777Ssilby} 1342119644Ssilby 1343175414Ssam/* 1344175414Ssam * Defragment an mbuf chain, returning at most maxfrags separate 1345175414Ssam * mbufs+clusters. If this is not possible NULL is returned and 1346175414Ssam * the original mbuf chain is left in it's present (potentially 1347175414Ssam * modified) state. We use two techniques: collapsing consecutive 1348175414Ssam * mbufs and replacing consecutive mbufs by a cluster. 1349175414Ssam * 1350175414Ssam * NB: this should really be named m_defrag but that name is taken 1351175414Ssam */ 1352175414Ssamstruct mbuf * 1353175414Ssamm_collapse(struct mbuf *m0, int how, int maxfrags) 1354175414Ssam{ 1355175414Ssam struct mbuf *m, *n, *n2, **prev; 1356175414Ssam u_int curfrags; 1357175414Ssam 1358175414Ssam /* 1359175414Ssam * Calculate the current number of frags. 1360175414Ssam */ 1361175414Ssam curfrags = 0; 1362175414Ssam for (m = m0; m != NULL; m = m->m_next) 1363175414Ssam curfrags++; 1364175414Ssam /* 1365175414Ssam * First, try to collapse mbufs. Note that we always collapse 1366175414Ssam * towards the front so we don't need to deal with moving the 1367175414Ssam * pkthdr. This may be suboptimal if the first mbuf has much 1368175414Ssam * less data than the following. 1369175414Ssam */ 1370175414Ssam m = m0; 1371175414Ssamagain: 1372175414Ssam for (;;) { 1373175414Ssam n = m->m_next; 1374175414Ssam if (n == NULL) 1375175414Ssam break; 1376242256Sandre if (M_WRITABLE(m) && 1377175414Ssam n->m_len < M_TRAILINGSPACE(m)) { 1378175414Ssam bcopy(mtod(n, void *), mtod(m, char *) + m->m_len, 1379175414Ssam n->m_len); 1380175414Ssam m->m_len += n->m_len; 1381175414Ssam m->m_next = n->m_next; 1382175414Ssam m_free(n); 1383175414Ssam if (--curfrags <= maxfrags) 1384175414Ssam return m0; 1385175414Ssam } else 1386175414Ssam m = n; 1387175414Ssam } 1388175414Ssam KASSERT(maxfrags > 1, 1389175414Ssam ("maxfrags %u, but normal collapse failed", maxfrags)); 1390175414Ssam /* 1391175414Ssam * Collapse consecutive mbufs to a cluster. 1392175414Ssam */ 1393175414Ssam prev = &m0->m_next; /* NB: not the first mbuf */ 1394175414Ssam while ((n = *prev) != NULL) { 1395175414Ssam if ((n2 = n->m_next) != NULL && 1396175414Ssam n->m_len + n2->m_len < MCLBYTES) { 1397175414Ssam m = m_getcl(how, MT_DATA, 0); 1398175414Ssam if (m == NULL) 1399175414Ssam goto bad; 1400175414Ssam bcopy(mtod(n, void *), mtod(m, void *), n->m_len); 1401175414Ssam bcopy(mtod(n2, void *), mtod(m, char *) + n->m_len, 1402175414Ssam n2->m_len); 1403175414Ssam m->m_len = n->m_len + n2->m_len; 1404175414Ssam m->m_next = n2->m_next; 1405175414Ssam *prev = m; 1406175414Ssam m_free(n); 1407175414Ssam m_free(n2); 1408175414Ssam if (--curfrags <= maxfrags) /* +1 cl -2 mbufs */ 1409175414Ssam return m0; 1410175414Ssam /* 1411175414Ssam * Still not there, try the normal collapse 1412175414Ssam * again before we allocate another cluster. 1413175414Ssam */ 1414175414Ssam goto again; 1415175414Ssam } 1416175414Ssam prev = &n->m_next; 1417175414Ssam } 1418175414Ssam /* 1419175414Ssam * No place where we can collapse to a cluster; punt. 1420175414Ssam * This can occur if, for example, you request 2 frags 1421175414Ssam * but the packet requires that both be clusters (we 1422175414Ssam * never reallocate the first mbuf to avoid moving the 1423175414Ssam * packet header). 1424175414Ssam */ 1425175414Ssambad: 1426175414Ssam return NULL; 1427175414Ssam} 1428175414Ssam 1429119644Ssilby#ifdef MBUF_STRESS_TEST 1430119644Ssilby 1431119644Ssilby/* 1432119644Ssilby * Fragment an mbuf chain. There's no reason you'd ever want to do 1433119644Ssilby * this in normal usage, but it's great for stress testing various 1434119644Ssilby * mbuf consumers. 1435119644Ssilby * 1436119644Ssilby * If fragmentation is not possible, the original chain will be 1437119644Ssilby * returned. 1438119644Ssilby * 1439119644Ssilby * Possible length values: 1440119644Ssilby * 0 no fragmentation will occur 1441119644Ssilby * > 0 each fragment will be of the specified length 1442119644Ssilby * -1 each fragment will be the same random value in length 1443119644Ssilby * -2 each fragment's length will be entirely random 1444119644Ssilby * (Random values range from 1 to 256) 1445119644Ssilby */ 1446119644Ssilbystruct mbuf * 1447119644Ssilbym_fragment(struct mbuf *m0, int how, int length) 1448119644Ssilby{ 1449331847Savos struct mbuf *m_first, *m_last; 1450331847Savos int divisor = 255, progress = 0, fraglen; 1451119644Ssilby 1452119644Ssilby if (!(m0->m_flags & M_PKTHDR)) 1453119644Ssilby return (m0); 1454266876Sglebius 1455331847Savos if (length == 0 || length < -2) 1456119644Ssilby return (m0); 1457331847Savos if (length > MCLBYTES) 1458331847Savos length = MCLBYTES; 1459331847Savos if (length < 0 && divisor > MCLBYTES) 1460331847Savos divisor = MCLBYTES; 1461331847Savos if (length == -1) 1462331847Savos length = 1 + (arc4random() % divisor); 1463331847Savos if (length > 0) 1464331847Savos fraglen = length; 1465119644Ssilby 1466119644Ssilby m_fixhdr(m0); /* Needed sanity check */ 1467119644Ssilby 1468331847Savos m_first = m_getcl(how, MT_DATA, M_PKTHDR); 1469331847Savos if (m_first == NULL) 1470119644Ssilby goto nospace; 1471119644Ssilby 1472331847Savos if (m_dup_pkthdr(m_first, m0, how) == 0) 1473119644Ssilby goto nospace; 1474119644Ssilby 1475331847Savos m_last = m_first; 1476119644Ssilby 1477119644Ssilby while (progress < m0->m_pkthdr.len) { 1478331847Savos if (length == -2) 1479331847Savos fraglen = 1 + (arc4random() % divisor); 1480119644Ssilby if (fraglen > m0->m_pkthdr.len - progress) 1481119644Ssilby fraglen = m0->m_pkthdr.len - progress; 1482119644Ssilby 1483331847Savos if (progress != 0) { 1484331847Savos struct mbuf *m_new = m_getcl(how, MT_DATA, 0); 1485119644Ssilby if (m_new == NULL) 1486119644Ssilby goto nospace; 1487331847Savos 1488331847Savos m_last->m_next = m_new; 1489331847Savos m_last = m_new; 1490119644Ssilby } 1491119644Ssilby 1492331847Savos m_copydata(m0, progress, fraglen, mtod(m_last, caddr_t)); 1493119644Ssilby progress += fraglen; 1494331847Savos m_last->m_len = fraglen; 1495119644Ssilby } 1496119644Ssilby m_freem(m0); 1497331847Savos m0 = m_first; 1498119644Ssilby return (m0); 1499119644Ssilbynospace: 1500331847Savos if (m_first) 1501331847Savos m_freem(m_first); 1502119644Ssilby /* Return the original chain on failure */ 1503119644Ssilby return (m0); 1504119644Ssilby} 1505119644Ssilby 1506119644Ssilby#endif 1507125296Ssilby 1508163915Sandre/* 1509163915Sandre * Copy the contents of uio into a properly sized mbuf chain. 1510163915Sandre */ 1511125296Ssilbystruct mbuf * 1512163915Sandrem_uiotombuf(struct uio *uio, int how, int len, int align, int flags) 1513125296Ssilby{ 1514163915Sandre struct mbuf *m, *mb; 1515231949Skib int error, length; 1516231949Skib ssize_t total; 1517163915Sandre int progress = 0; 1518125296Ssilby 1519163915Sandre /* 1520163915Sandre * len can be zero or an arbitrary large value bound by 1521163915Sandre * the total data supplied by the uio. 1522163915Sandre */ 1523125296Ssilby if (len > 0) 1524125296Ssilby total = min(uio->uio_resid, len); 1525125296Ssilby else 1526125296Ssilby total = uio->uio_resid; 1527163915Sandre 1528163915Sandre /* 1529163915Sandre * The smallest unit returned by m_getm2() is a single mbuf 1530182777Sthompsa * with pkthdr. We can't align past it. 1531163915Sandre */ 1532145883Semax if (align >= MHLEN) 1533163915Sandre return (NULL); 1534163915Sandre 1535166171Sandre /* 1536166171Sandre * Give us the full allocation or nothing. 1537166171Sandre * If len is zero return the smallest empty mbuf. 1538166171Sandre */ 1539166171Sandre m = m_getm2(NULL, max(total + align, 1), how, MT_DATA, flags); 1540163915Sandre if (m == NULL) 1541163915Sandre return (NULL); 1542163915Sandre m->m_data += align; 1543163915Sandre 1544163915Sandre /* Fill all mbufs with uio data and update header information. */ 1545163915Sandre for (mb = m; mb != NULL; mb = mb->m_next) { 1546163915Sandre length = min(M_TRAILINGSPACE(mb), total - progress); 1547163915Sandre 1548163915Sandre error = uiomove(mtod(mb, void *), length, uio); 1549163915Sandre if (error) { 1550163915Sandre m_freem(m); 1551163915Sandre return (NULL); 1552125296Ssilby } 1553163915Sandre 1554163915Sandre mb->m_len = length; 1555125296Ssilby progress += length; 1556163915Sandre if (flags & M_PKTHDR) 1557163915Sandre m->m_pkthdr.len += length; 1558125296Ssilby } 1559163915Sandre KASSERT(progress == total, ("%s: progress != total", __func__)); 1560163915Sandre 1561163915Sandre return (m); 1562125296Ssilby} 1563148552Ssam 1564148552Ssam/* 1565194667Sandre * Copy an mbuf chain into a uio limited by len if set. 1566194667Sandre */ 1567194667Sandreint 1568194667Sandrem_mbuftouio(struct uio *uio, struct mbuf *m, int len) 1569194667Sandre{ 1570194667Sandre int error, length, total; 1571194667Sandre int progress = 0; 1572194667Sandre 1573194667Sandre if (len > 0) 1574194667Sandre total = min(uio->uio_resid, len); 1575194667Sandre else 1576194667Sandre total = uio->uio_resid; 1577194667Sandre 1578194667Sandre /* Fill the uio with data from the mbufs. */ 1579194667Sandre for (; m != NULL; m = m->m_next) { 1580194667Sandre length = min(m->m_len, total - progress); 1581194667Sandre 1582194667Sandre error = uiomove(mtod(m, void *), length, uio); 1583194667Sandre if (error) 1584194667Sandre return (error); 1585194667Sandre 1586194667Sandre progress += length; 1587194667Sandre } 1588194667Sandre 1589194667Sandre return (0); 1590194667Sandre} 1591194667Sandre 1592194667Sandre/* 1593156756Ssam * Create a writable copy of the mbuf chain. While doing this 1594156756Ssam * we compact the chain with a goal of producing a chain with 1595156756Ssam * at most two mbufs. The second mbuf in this chain is likely 1596156756Ssam * to be a cluster. The primary purpose of this work is to create 1597156756Ssam * a writable packet for encryption, compression, etc. The 1598156756Ssam * secondary goal is to linearize the data so the data can be 1599156756Ssam * passed to crypto hardware in the most efficient manner possible. 1600156756Ssam */ 1601156756Ssamstruct mbuf * 1602156756Ssamm_unshare(struct mbuf *m0, int how) 1603156756Ssam{ 1604156756Ssam struct mbuf *m, *mprev; 1605156756Ssam struct mbuf *n, *mfirst, *mlast; 1606156756Ssam int len, off; 1607156756Ssam 1608156756Ssam mprev = NULL; 1609156756Ssam for (m = m0; m != NULL; m = mprev->m_next) { 1610156756Ssam /* 1611156756Ssam * Regular mbufs are ignored unless there's a cluster 1612156756Ssam * in front of it that we can use to coalesce. We do 1613156756Ssam * the latter mainly so later clusters can be coalesced 1614156756Ssam * also w/o having to handle them specially (i.e. convert 1615156756Ssam * mbuf+cluster -> cluster). This optimization is heavily 1616156756Ssam * influenced by the assumption that we're running over 1617156756Ssam * Ethernet where MCLBYTES is large enough that the max 1618156756Ssam * packet size will permit lots of coalescing into a 1619156756Ssam * single cluster. This in turn permits efficient 1620156756Ssam * crypto operations, especially when using hardware. 1621156756Ssam */ 1622156756Ssam if ((m->m_flags & M_EXT) == 0) { 1623156756Ssam if (mprev && (mprev->m_flags & M_EXT) && 1624156756Ssam m->m_len <= M_TRAILINGSPACE(mprev)) { 1625156756Ssam /* XXX: this ignores mbuf types */ 1626156756Ssam memcpy(mtod(mprev, caddr_t) + mprev->m_len, 1627266876Sglebius mtod(m, caddr_t), m->m_len); 1628156756Ssam mprev->m_len += m->m_len; 1629156756Ssam mprev->m_next = m->m_next; /* unlink from chain */ 1630156756Ssam m_free(m); /* reclaim mbuf */ 1631156756Ssam#if 0 1632156756Ssam newipsecstat.ips_mbcoalesced++; 1633156756Ssam#endif 1634156756Ssam } else { 1635156756Ssam mprev = m; 1636156756Ssam } 1637156756Ssam continue; 1638156756Ssam } 1639156756Ssam /* 1640156756Ssam * Writable mbufs are left alone (for now). 1641156756Ssam */ 1642156756Ssam if (M_WRITABLE(m)) { 1643156756Ssam mprev = m; 1644156756Ssam continue; 1645156756Ssam } 1646156756Ssam 1647156756Ssam /* 1648156756Ssam * Not writable, replace with a copy or coalesce with 1649156756Ssam * the previous mbuf if possible (since we have to copy 1650156756Ssam * it anyway, we try to reduce the number of mbufs and 1651156756Ssam * clusters so that future work is easier). 1652156756Ssam */ 1653156756Ssam KASSERT(m->m_flags & M_EXT, ("m_flags 0x%x", m->m_flags)); 1654156756Ssam /* NB: we only coalesce into a cluster or larger */ 1655156756Ssam if (mprev != NULL && (mprev->m_flags & M_EXT) && 1656156756Ssam m->m_len <= M_TRAILINGSPACE(mprev)) { 1657156756Ssam /* XXX: this ignores mbuf types */ 1658156756Ssam memcpy(mtod(mprev, caddr_t) + mprev->m_len, 1659266876Sglebius mtod(m, caddr_t), m->m_len); 1660156756Ssam mprev->m_len += m->m_len; 1661156756Ssam mprev->m_next = m->m_next; /* unlink from chain */ 1662156756Ssam m_free(m); /* reclaim mbuf */ 1663156756Ssam#if 0 1664156756Ssam newipsecstat.ips_clcoalesced++; 1665156756Ssam#endif 1666156756Ssam continue; 1667156756Ssam } 1668156756Ssam 1669156756Ssam /* 1670248371Sglebius * Allocate new space to hold the copy and copy the data. 1671248371Sglebius * We deal with jumbo mbufs (i.e. m_len > MCLBYTES) by 1672248371Sglebius * splitting them into clusters. We could just malloc a 1673248371Sglebius * buffer and make it external but too many device drivers 1674248371Sglebius * don't know how to break up the non-contiguous memory when 1675248371Sglebius * doing DMA. 1676156756Ssam */ 1677297298Snp n = m_getcl(how, m->m_type, m->m_flags & M_COPYFLAGS); 1678248371Sglebius if (n == NULL) { 1679248371Sglebius m_freem(m0); 1680248371Sglebius return (NULL); 1681156756Ssam } 1682288990Sglebius if (m->m_flags & M_PKTHDR) { 1683288990Sglebius KASSERT(mprev == NULL, ("%s: m0 %p, m %p has M_PKTHDR", 1684288990Sglebius __func__, m0, m)); 1685288990Sglebius m_move_pkthdr(n, m); 1686288990Sglebius } 1687156756Ssam len = m->m_len; 1688156756Ssam off = 0; 1689156756Ssam mfirst = n; 1690156756Ssam mlast = NULL; 1691156756Ssam for (;;) { 1692156756Ssam int cc = min(len, MCLBYTES); 1693156756Ssam memcpy(mtod(n, caddr_t), mtod(m, caddr_t) + off, cc); 1694156756Ssam n->m_len = cc; 1695156756Ssam if (mlast != NULL) 1696156756Ssam mlast->m_next = n; 1697266876Sglebius mlast = n; 1698156756Ssam#if 0 1699156756Ssam newipsecstat.ips_clcopied++; 1700156756Ssam#endif 1701156756Ssam 1702156756Ssam len -= cc; 1703156756Ssam if (len <= 0) 1704156756Ssam break; 1705156756Ssam off += cc; 1706156756Ssam 1707297298Snp n = m_getcl(how, m->m_type, m->m_flags & M_COPYFLAGS); 1708156756Ssam if (n == NULL) { 1709156756Ssam m_freem(mfirst); 1710156756Ssam m_freem(m0); 1711156756Ssam return (NULL); 1712156756Ssam } 1713156756Ssam } 1714266876Sglebius n->m_next = m->m_next; 1715156756Ssam if (mprev == NULL) 1716156756Ssam m0 = mfirst; /* new head of chain */ 1717156756Ssam else 1718156756Ssam mprev->m_next = mfirst; /* replace old mbuf */ 1719156756Ssam m_free(m); /* release old mbuf */ 1720156756Ssam mprev = mfirst; 1721156756Ssam } 1722156756Ssam return (m0); 1723156756Ssam} 1724178674Sjulian 1725178674Sjulian#ifdef MBUF_PROFILING 1726178674Sjulian 1727178674Sjulian#define MP_BUCKETS 32 /* don't just change this as things may overflow.*/ 1728178674Sjulianstruct mbufprofile { 1729178700Sjulian uintmax_t wasted[MP_BUCKETS]; 1730178700Sjulian uintmax_t used[MP_BUCKETS]; 1731178700Sjulian uintmax_t segments[MP_BUCKETS]; 1732178674Sjulian} mbprof; 1733178674Sjulian 1734178674Sjulian#define MP_MAXDIGITS 21 /* strlen("16,000,000,000,000,000,000") == 21 */ 1735178674Sjulian#define MP_NUMLINES 6 1736178674Sjulian#define MP_NUMSPERLINE 16 1737178674Sjulian#define MP_EXTRABYTES 64 /* > strlen("used:\nwasted:\nsegments:\n") */ 1738178674Sjulian/* work out max space needed and add a bit of spare space too */ 1739178674Sjulian#define MP_MAXLINE ((MP_MAXDIGITS+1) * MP_NUMSPERLINE) 1740178674Sjulian#define MP_BUFSIZE ((MP_MAXLINE * MP_NUMLINES) + 1 + MP_EXTRABYTES) 1741178674Sjulian 1742178674Sjulianchar mbprofbuf[MP_BUFSIZE]; 1743178674Sjulian 1744178674Sjulianvoid 1745178674Sjulianm_profile(struct mbuf *m) 1746178674Sjulian{ 1747178674Sjulian int segments = 0; 1748178674Sjulian int used = 0; 1749178674Sjulian int wasted = 0; 1750266876Sglebius 1751178674Sjulian while (m) { 1752178674Sjulian segments++; 1753178674Sjulian used += m->m_len; 1754178674Sjulian if (m->m_flags & M_EXT) { 1755178674Sjulian wasted += MHLEN - sizeof(m->m_ext) + 1756178674Sjulian m->m_ext.ext_size - m->m_len; 1757178674Sjulian } else { 1758178674Sjulian if (m->m_flags & M_PKTHDR) 1759178674Sjulian wasted += MHLEN - m->m_len; 1760178674Sjulian else 1761178674Sjulian wasted += MLEN - m->m_len; 1762178674Sjulian } 1763178674Sjulian m = m->m_next; 1764178674Sjulian } 1765178674Sjulian /* be paranoid.. it helps */ 1766178674Sjulian if (segments > MP_BUCKETS - 1) 1767178674Sjulian segments = MP_BUCKETS - 1; 1768178674Sjulian if (used > 100000) 1769178674Sjulian used = 100000; 1770178674Sjulian if (wasted > 100000) 1771178674Sjulian wasted = 100000; 1772178674Sjulian /* store in the appropriate bucket */ 1773178674Sjulian /* don't bother locking. if it's slightly off, so what? */ 1774178674Sjulian mbprof.segments[segments]++; 1775178674Sjulian mbprof.used[fls(used)]++; 1776178674Sjulian mbprof.wasted[fls(wasted)]++; 1777178674Sjulian} 1778178674Sjulian 1779178674Sjulianstatic void 1780178674Sjulianmbprof_textify(void) 1781178674Sjulian{ 1782178674Sjulian int offset; 1783178674Sjulian char *c; 1784209390Sed uint64_t *p; 1785178674Sjulian 1786178674Sjulian p = &mbprof.wasted[0]; 1787178674Sjulian c = mbprofbuf; 1788266876Sglebius offset = snprintf(c, MP_MAXLINE + 10, 1789178674Sjulian "wasted:\n" 1790178700Sjulian "%ju %ju %ju %ju %ju %ju %ju %ju " 1791178700Sjulian "%ju %ju %ju %ju %ju %ju %ju %ju\n", 1792178674Sjulian p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7], 1793178674Sjulian p[8], p[9], p[10], p[11], p[12], p[13], p[14], p[15]); 1794178674Sjulian#ifdef BIG_ARRAY 1795178674Sjulian p = &mbprof.wasted[16]; 1796178674Sjulian c += offset; 1797266876Sglebius offset = snprintf(c, MP_MAXLINE, 1798178700Sjulian "%ju %ju %ju %ju %ju %ju %ju %ju " 1799178700Sjulian "%ju %ju %ju %ju %ju %ju %ju %ju\n", 1800178674Sjulian p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7], 1801178674Sjulian p[8], p[9], p[10], p[11], p[12], p[13], p[14], p[15]); 1802178674Sjulian#endif 1803178674Sjulian p = &mbprof.used[0]; 1804178674Sjulian c += offset; 1805266876Sglebius offset = snprintf(c, MP_MAXLINE + 10, 1806178674Sjulian "used:\n" 1807178700Sjulian "%ju %ju %ju %ju %ju %ju %ju %ju " 1808178700Sjulian "%ju %ju %ju %ju %ju %ju %ju %ju\n", 1809178674Sjulian p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7], 1810178674Sjulian p[8], p[9], p[10], p[11], p[12], p[13], p[14], p[15]); 1811178674Sjulian#ifdef BIG_ARRAY 1812178674Sjulian p = &mbprof.used[16]; 1813178674Sjulian c += offset; 1814266876Sglebius offset = snprintf(c, MP_MAXLINE, 1815178700Sjulian "%ju %ju %ju %ju %ju %ju %ju %ju " 1816178700Sjulian "%ju %ju %ju %ju %ju %ju %ju %ju\n", 1817178674Sjulian p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7], 1818178674Sjulian p[8], p[9], p[10], p[11], p[12], p[13], p[14], p[15]); 1819178674Sjulian#endif 1820178674Sjulian p = &mbprof.segments[0]; 1821178674Sjulian c += offset; 1822266876Sglebius offset = snprintf(c, MP_MAXLINE + 10, 1823178674Sjulian "segments:\n" 1824178700Sjulian "%ju %ju %ju %ju %ju %ju %ju %ju " 1825178700Sjulian "%ju %ju %ju %ju %ju %ju %ju %ju\n", 1826178674Sjulian p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7], 1827178674Sjulian p[8], p[9], p[10], p[11], p[12], p[13], p[14], p[15]); 1828178674Sjulian#ifdef BIG_ARRAY 1829178674Sjulian p = &mbprof.segments[16]; 1830178674Sjulian c += offset; 1831266876Sglebius offset = snprintf(c, MP_MAXLINE, 1832178700Sjulian "%ju %ju %ju %ju %ju %ju %ju %ju " 1833178700Sjulian "%ju %ju %ju %ju %ju %ju %ju %jju", 1834178674Sjulian p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7], 1835178674Sjulian p[8], p[9], p[10], p[11], p[12], p[13], p[14], p[15]); 1836178674Sjulian#endif 1837178674Sjulian} 1838178674Sjulian 1839178674Sjulianstatic int 1840178674Sjulianmbprof_handler(SYSCTL_HANDLER_ARGS) 1841178674Sjulian{ 1842178674Sjulian int error; 1843178674Sjulian 1844178674Sjulian mbprof_textify(); 1845178674Sjulian error = SYSCTL_OUT(req, mbprofbuf, strlen(mbprofbuf) + 1); 1846178674Sjulian return (error); 1847178674Sjulian} 1848178674Sjulian 1849178674Sjulianstatic int 1850178674Sjulianmbprof_clr_handler(SYSCTL_HANDLER_ARGS) 1851178674Sjulian{ 1852178674Sjulian int clear, error; 1853266876Sglebius 1854178674Sjulian clear = 0; 1855178674Sjulian error = sysctl_handle_int(oidp, &clear, 0, req); 1856178674Sjulian if (error || !req->newptr) 1857178674Sjulian return (error); 1858266876Sglebius 1859178674Sjulian if (clear) { 1860178674Sjulian bzero(&mbprof, sizeof(mbprof)); 1861178674Sjulian } 1862266876Sglebius 1863178674Sjulian return (error); 1864178674Sjulian} 1865178674Sjulian 1866178674Sjulian 1867178674SjulianSYSCTL_PROC(_kern_ipc, OID_AUTO, mbufprofile, CTLTYPE_STRING|CTLFLAG_RD, 1868178674Sjulian NULL, 0, mbprof_handler, "A", "mbuf profiling statistics"); 1869178674Sjulian 1870178674SjulianSYSCTL_PROC(_kern_ipc, OID_AUTO, mbufprofileclr, CTLTYPE_INT|CTLFLAG_RW, 1871178674Sjulian NULL, 0, mbprof_clr_handler, "I", "clear mbuf profiling statistics"); 1872178674Sjulian#endif 1873178674Sjulian 1874