uipc_mbuf.c revision 297227
1139804Simp/*- 21541Srgrimes * Copyright (c) 1982, 1986, 1988, 1991, 1993 31541Srgrimes * The Regents of the University of California. All rights reserved. 41541Srgrimes * 51541Srgrimes * Redistribution and use in source and binary forms, with or without 61541Srgrimes * modification, are permitted provided that the following conditions 71541Srgrimes * are met: 81541Srgrimes * 1. Redistributions of source code must retain the above copyright 91541Srgrimes * notice, this list of conditions and the following disclaimer. 101541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright 111541Srgrimes * notice, this list of conditions and the following disclaimer in the 121541Srgrimes * documentation and/or other materials provided with the distribution. 131541Srgrimes * 4. Neither the name of the University nor the names of its contributors 141541Srgrimes * may be used to endorse or promote products derived from this software 151541Srgrimes * without specific prior written permission. 161541Srgrimes * 171541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 181541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 191541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 201541Srgrimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 211541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 221541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 231541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 241541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 251541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 261541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 271541Srgrimes * SUCH DAMAGE. 281541Srgrimes * 291541Srgrimes * @(#)uipc_mbuf.c 8.2 (Berkeley) 1/4/94 301541Srgrimes */ 311541Srgrimes 32116182Sobrien#include <sys/cdefs.h> 33116182Sobrien__FBSDID("$FreeBSD: head/sys/kern/uipc_mbuf.c 297227 2016-03-24 08:26:06Z gnn $"); 34116182Sobrien 3577572Sobrien#include "opt_param.h" 36113490Ssilby#include "opt_mbuf_stress_test.h" 37178674Sjulian#include "opt_mbuf_profiling.h" 38101007Srwatson 391541Srgrimes#include <sys/param.h> 401541Srgrimes#include <sys/systm.h> 4176166Smarkm#include <sys/kernel.h> 42125296Ssilby#include <sys/limits.h> 4376166Smarkm#include <sys/lock.h> 4432036Sbde#include <sys/malloc.h> 451541Srgrimes#include <sys/mbuf.h> 4623081Swollman#include <sys/sysctl.h> 471541Srgrimes#include <sys/domain.h> 481541Srgrimes#include <sys/protosw.h> 49125296Ssilby#include <sys/uio.h> 50297188Sgnn#include <sys/sdt.h> 5176166Smarkm 52297227SgnnSDT_PROBE_DEFINE5_XLATE(sdt, , , m__init, 53297188Sgnn "struct mbuf *", "mbufinfo_t *", 54297188Sgnn "uint32_t", "uint32_t", 55297188Sgnn "uint16_t", "uint16_t", 56297188Sgnn "uint32_t", "uint32_t", 57297188Sgnn "uint32_t", "uint32_t"); 58297188Sgnn 59297227SgnnSDT_PROBE_DEFINE3_XLATE(sdt, , , m__gethdr, 60297188Sgnn "uint32_t", "uint32_t", 61297188Sgnn "uint16_t", "uint16_t", 62297188Sgnn "struct mbuf *", "mbufinfo_t *"); 63297188Sgnn 64297227SgnnSDT_PROBE_DEFINE3_XLATE(sdt, , , m__get, 65297188Sgnn "uint32_t", "uint32_t", 66297188Sgnn "uint16_t", "uint16_t", 67297188Sgnn "struct mbuf *", "mbufinfo_t *"); 68297188Sgnn 69297227SgnnSDT_PROBE_DEFINE4_XLATE(sdt, , , m__getcl, 70297188Sgnn "uint32_t", "uint32_t", 71297188Sgnn "uint16_t", "uint16_t", 72297188Sgnn "uint32_t", "uint32_t", 73297188Sgnn "struct mbuf *", "mbufinfo_t *"); 74297188Sgnn 75297227SgnnSDT_PROBE_DEFINE3_XLATE(sdt, , , m__clget, 76297188Sgnn "struct mbuf *", "mbufinfo_t *", 77297188Sgnn "uint32_t", "uint32_t", 78297188Sgnn "uint32_t", "uint32_t"); 79297188Sgnn 80297227SgnnSDT_PROBE_DEFINE4_XLATE(sdt, , , m__cljget, 81297188Sgnn "struct mbuf *", "mbufinfo_t *", 82297188Sgnn "uint32_t", "uint32_t", 83297188Sgnn "uint32_t", "uint32_t", 84297188Sgnn "void*", "void*"); 85297188Sgnn 86297227SgnnSDT_PROBE_DEFINE(sdt, , , m__cljset); 87297188Sgnn 88297227SgnnSDT_PROBE_DEFINE1_XLATE(sdt, , , m__free, 89297188Sgnn "struct mbuf *", "mbufinfo_t *"); 90297188Sgnn 91297227SgnnSDT_PROBE_DEFINE1_XLATE(sdt, , , m__freem, 92297188Sgnn "struct mbuf *", "mbufinfo_t *"); 93297188Sgnn 94295547Sglebius#include <security/mac/mac_framework.h> 95295547Sglebius 969759Sbdeint max_linkhdr; 979759Sbdeint max_protohdr; 989759Sbdeint max_hdr; 999759Sbdeint max_datalen; 100116455Ssilby#ifdef MBUF_STRESS_TEST 101112777Ssilbyint m_defragpackets; 102112777Ssilbyint m_defragbytes; 103112777Ssilbyint m_defraguseless; 104112777Ssilbyint m_defragfailure; 105113490Ssilbyint m_defragrandomfailures; 106113490Ssilby#endif 1071541Srgrimes 10866475Sbmilekic/* 10966475Sbmilekic * sysctl(8) exported objects 11066475Sbmilekic */ 111155820SandreSYSCTL_INT(_kern_ipc, KIPC_MAX_LINKHDR, max_linkhdr, CTLFLAG_RD, 112155820Sandre &max_linkhdr, 0, "Size of largest link layer header"); 113155820SandreSYSCTL_INT(_kern_ipc, KIPC_MAX_PROTOHDR, max_protohdr, CTLFLAG_RD, 114155820Sandre &max_protohdr, 0, "Size of largest protocol layer header"); 115155820SandreSYSCTL_INT(_kern_ipc, KIPC_MAX_HDR, max_hdr, CTLFLAG_RD, 116155820Sandre &max_hdr, 0, "Size of largest link plus protocol header"); 117155820SandreSYSCTL_INT(_kern_ipc, KIPC_MAX_DATALEN, max_datalen, CTLFLAG_RD, 118155820Sandre &max_datalen, 0, "Minimum space left in mbuf after max_hdr"); 119116455Ssilby#ifdef MBUF_STRESS_TEST 120112777SsilbySYSCTL_INT(_kern_ipc, OID_AUTO, m_defragpackets, CTLFLAG_RD, 121112777Ssilby &m_defragpackets, 0, ""); 122112777SsilbySYSCTL_INT(_kern_ipc, OID_AUTO, m_defragbytes, CTLFLAG_RD, 123112777Ssilby &m_defragbytes, 0, ""); 124112777SsilbySYSCTL_INT(_kern_ipc, OID_AUTO, m_defraguseless, CTLFLAG_RD, 125112777Ssilby &m_defraguseless, 0, ""); 126112777SsilbySYSCTL_INT(_kern_ipc, OID_AUTO, m_defragfailure, CTLFLAG_RD, 127112777Ssilby &m_defragfailure, 0, ""); 128113490SsilbySYSCTL_INT(_kern_ipc, OID_AUTO, m_defragrandomfailures, CTLFLAG_RW, 129113490Ssilby &m_defragrandomfailures, 0, ""); 130113490Ssilby#endif 13175112Sbmilekic 1321541Srgrimes/* 133254973Sandre * Ensure the correct size of various mbuf parameters. It could be off due 134254973Sandre * to compiler-induced padding and alignment artifacts. 135254973Sandre */ 136254973SandreCTASSERT(MSIZE - offsetof(struct mbuf, m_dat) == MLEN); 137254973SandreCTASSERT(MSIZE - offsetof(struct mbuf, m_pktdat) == MHLEN); 138254973Sandre 139254973Sandre/* 140277203Srwatson * mbuf data storage should be 64-bit aligned regardless of architectural 141277203Srwatson * pointer size; check this is the case with and without a packet header. 142277203Srwatson */ 143277203SrwatsonCTASSERT(offsetof(struct mbuf, m_dat) % 8 == 0); 144277203SrwatsonCTASSERT(offsetof(struct mbuf, m_pktdat) % 8 == 0); 145277203Srwatson 146277203Srwatson/* 147277203Srwatson * While the specific values here don't matter too much (i.e., +/- a few 148277203Srwatson * words), we do want to ensure that changes to these values are carefully 149277203Srwatson * reasoned about and properly documented. This is especially the case as 150277203Srwatson * network-protocol and device-driver modules encode these layouts, and must 151277203Srwatson * be recompiled if the structures change. Check these values at compile time 152277203Srwatson * against the ones documented in comments in mbuf.h. 153277203Srwatson * 154277203Srwatson * NB: Possibly they should be documented there via #define's and not just 155277203Srwatson * comments. 156277203Srwatson */ 157277203Srwatson#if defined(__LP64__) 158277203SrwatsonCTASSERT(offsetof(struct mbuf, m_dat) == 32); 159277203SrwatsonCTASSERT(sizeof(struct pkthdr) == 56); 160278920SglebiusCTASSERT(sizeof(struct m_ext) == 48); 161277203Srwatson#else 162277203SrwatsonCTASSERT(offsetof(struct mbuf, m_dat) == 24); 163277203SrwatsonCTASSERT(sizeof(struct pkthdr) == 48); 164278920SglebiusCTASSERT(sizeof(struct m_ext) == 28); 165277203Srwatson#endif 166277203Srwatson 167277203Srwatson/* 168278914Sglebius * Assert that the queue(3) macros produce code of the same size as an old 169278914Sglebius * plain pointer does. 170278914Sglebius */ 171278914Sglebius#ifdef INVARIANTS 172278914Sglebiusstatic struct mbuf m_assertbuf; 173278914SglebiusCTASSERT(sizeof(m_assertbuf.m_slist) == sizeof(m_assertbuf.m_next)); 174278914SglebiusCTASSERT(sizeof(m_assertbuf.m_stailq) == sizeof(m_assertbuf.m_next)); 175278914SglebiusCTASSERT(sizeof(m_assertbuf.m_slistpkt) == sizeof(m_assertbuf.m_nextpkt)); 176278914SglebiusCTASSERT(sizeof(m_assertbuf.m_stailqpkt) == sizeof(m_assertbuf.m_nextpkt)); 177278914Sglebius#endif 178278914Sglebius 179278914Sglebius/* 180218909Sbrucec * Attach the cluster from *m to *n, set up m_ext in *n 181151976Sandre * and bump the refcount of the cluster. 182151976Sandre */ 183289276Shirenvoid 184296242Sglebiusmb_dupcl(struct mbuf *n, struct mbuf *m) 185151976Sandre{ 186296242Sglebius volatile u_int *refcnt; 187151976Sandre 188268535Sglebius KASSERT(m->m_flags & M_EXT, ("%s: M_EXT not set on %p", __func__, m)); 189268535Sglebius KASSERT(!(n->m_flags & M_EXT), ("%s: M_EXT set on %p", __func__, n)); 190268535Sglebius 191296242Sglebius n->m_ext = m->m_ext; 192296242Sglebius n->m_flags |= M_EXT; 193296242Sglebius n->m_flags |= m->m_flags & M_RDONLY; 194296242Sglebius 195296242Sglebius /* See if this is the mbuf that holds the embedded refcount. */ 196296242Sglebius if (m->m_ext.ext_flags & EXT_FLAG_EMBREF) { 197296242Sglebius refcnt = n->m_ext.ext_cnt = &m->m_ext.ext_count; 198296242Sglebius n->m_ext.ext_flags &= ~EXT_FLAG_EMBREF; 199296242Sglebius } else { 200268535Sglebius KASSERT(m->m_ext.ext_cnt != NULL, 201268535Sglebius ("%s: no refcounting pointer on %p", __func__, m)); 202296242Sglebius refcnt = m->m_ext.ext_cnt; 203268535Sglebius } 204268535Sglebius 205296242Sglebius if (*refcnt == 1) 206296242Sglebius *refcnt += 1; 207296242Sglebius else 208296242Sglebius atomic_add_int(refcnt, 1); 209151976Sandre} 210151976Sandre 211284961Snpvoid 212284961Snpm_demote_pkthdr(struct mbuf *m) 213284961Snp{ 214284961Snp 215284961Snp M_ASSERTPKTHDR(m); 216284961Snp 217284961Snp m_tag_delete_chain(m, NULL); 218284961Snp m->m_flags &= ~M_PKTHDR; 219284961Snp bzero(&m->m_pkthdr, sizeof(struct pkthdr)); 220284961Snp} 221284961Snp 222151976Sandre/* 223149598Sandre * Clean up mbuf (chain) from any tags and packet headers. 224149647Sandre * If "all" is set then the first mbuf in the chain will be 225149647Sandre * cleaned too. 226149598Sandre */ 227149598Sandrevoid 228275329Sglebiusm_demote(struct mbuf *m0, int all, int flags) 229149598Sandre{ 230149598Sandre struct mbuf *m; 231149598Sandre 232149598Sandre for (m = all ? m0 : m0->m_next; m != NULL; m = m->m_next) { 233271122Sglebius KASSERT(m->m_nextpkt == NULL, ("%s: m_nextpkt in m %p, m0 %p", 234271122Sglebius __func__, m, m0)); 235284961Snp if (m->m_flags & M_PKTHDR) 236284961Snp m_demote_pkthdr(m); 237275329Sglebius m->m_flags = m->m_flags & (M_EXT | M_RDONLY | M_NOFREE | flags); 238149598Sandre } 239149598Sandre} 240149598Sandre 241149598Sandre/* 242149648Sandre * Sanity checks on mbuf (chain) for use in KASSERT() and general 243149648Sandre * debugging. 244149648Sandre * Returns 0 or panics when bad and 1 on all tests passed. 245149648Sandre * Sanitize, 0 to run M_SANITY_ACTION, 1 to garble things so they 246149648Sandre * blow up later. 247149599Sandre */ 248149599Sandreint 249149599Sandrem_sanity(struct mbuf *m0, int sanitize) 250149599Sandre{ 251149599Sandre struct mbuf *m; 252149599Sandre caddr_t a, b; 253149599Sandre int pktlen = 0; 254149599Sandre 255168734Skmacy#ifdef INVARIANTS 256168734Skmacy#define M_SANITY_ACTION(s) panic("mbuf %p: " s, m) 257266876Sglebius#else 258168734Skmacy#define M_SANITY_ACTION(s) printf("mbuf %p: " s, m) 259168734Skmacy#endif 260149599Sandre 261149648Sandre for (m = m0; m != NULL; m = m->m_next) { 262149599Sandre /* 263149599Sandre * Basic pointer checks. If any of these fails then some 264149599Sandre * unrelated kernel memory before or after us is trashed. 265149599Sandre * No way to recover from that. 266149599Sandre */ 267276818Srwatson a = M_START(m); 268276818Srwatson b = a + M_SIZE(m); 269149599Sandre if ((caddr_t)m->m_data < a) 270149599Sandre M_SANITY_ACTION("m_data outside mbuf data range left"); 271149599Sandre if ((caddr_t)m->m_data > b) 272149599Sandre M_SANITY_ACTION("m_data outside mbuf data range right"); 273149599Sandre if ((caddr_t)m->m_data + m->m_len > b) 274149599Sandre M_SANITY_ACTION("m_data + m_len exeeds mbuf space"); 275149599Sandre 276149599Sandre /* m->m_nextpkt may only be set on first mbuf in chain. */ 277149648Sandre if (m != m0 && m->m_nextpkt != NULL) { 278149599Sandre if (sanitize) { 279149599Sandre m_freem(m->m_nextpkt); 280149599Sandre m->m_nextpkt = (struct mbuf *)0xDEADC0DE; 281149599Sandre } else 282149599Sandre M_SANITY_ACTION("m->m_nextpkt on in-chain mbuf"); 283149599Sandre } 284149599Sandre 285149599Sandre /* packet length (not mbuf length!) calculation */ 286149599Sandre if (m0->m_flags & M_PKTHDR) 287149599Sandre pktlen += m->m_len; 288149599Sandre 289149599Sandre /* m_tags may only be attached to first mbuf in chain. */ 290149599Sandre if (m != m0 && m->m_flags & M_PKTHDR && 291149599Sandre !SLIST_EMPTY(&m->m_pkthdr.tags)) { 292149599Sandre if (sanitize) { 293149599Sandre m_tag_delete_chain(m, NULL); 294149599Sandre /* put in 0xDEADC0DE perhaps? */ 295149648Sandre } else 296149599Sandre M_SANITY_ACTION("m_tags on in-chain mbuf"); 297149599Sandre } 298149599Sandre 299149599Sandre /* M_PKTHDR may only be set on first mbuf in chain */ 300149599Sandre if (m != m0 && m->m_flags & M_PKTHDR) { 301149599Sandre if (sanitize) { 302149599Sandre bzero(&m->m_pkthdr, sizeof(m->m_pkthdr)); 303149599Sandre m->m_flags &= ~M_PKTHDR; 304149599Sandre /* put in 0xDEADCODE and leave hdr flag in */ 305149599Sandre } else 306149599Sandre M_SANITY_ACTION("M_PKTHDR on in-chain mbuf"); 307149599Sandre } 308149599Sandre } 309149648Sandre m = m0; 310149648Sandre if (pktlen && pktlen != m->m_pkthdr.len) { 311149599Sandre if (sanitize) 312149648Sandre m->m_pkthdr.len = 0; 313149599Sandre else 314149599Sandre M_SANITY_ACTION("m_pkthdr.len != mbuf chain length"); 315149599Sandre } 316149648Sandre return 1; 317149648Sandre 318149599Sandre#undef M_SANITY_ACTION 319149599Sandre} 320149599Sandre 321295547Sglebius/* 322295547Sglebius * Non-inlined part of m_init(). 323295547Sglebius */ 324295547Sglebiusint 325295547Sglebiusm_pkthdr_init(struct mbuf *m, int how) 326295547Sglebius{ 327295547Sglebius#ifdef MAC 328295547Sglebius int error; 329295547Sglebius#endif 330295547Sglebius m->m_data = m->m_pktdat; 331295547Sglebius bzero(&m->m_pkthdr, sizeof(m->m_pkthdr)); 332295547Sglebius#ifdef MAC 333295547Sglebius /* If the label init fails, fail the alloc */ 334295547Sglebius error = mac_mbuf_init(m, how); 335295547Sglebius if (error) 336295547Sglebius return (error); 337295547Sglebius#endif 338149599Sandre 339295547Sglebius return (0); 340295547Sglebius} 341295547Sglebius 342149599Sandre/* 343108466Ssam * "Move" mbuf pkthdr from "from" to "to". 344100960Srwatson * "from" must have M_PKTHDR set, and "to" must be empty. 345100960Srwatson */ 346100960Srwatsonvoid 347108466Ssamm_move_pkthdr(struct mbuf *to, struct mbuf *from) 348100960Srwatson{ 349100960Srwatson 350100960Srwatson#if 0 351108466Ssam /* see below for why these are not enabled */ 352113255Sdes M_ASSERTPKTHDR(to); 353113487Srwatson /* Note: with MAC, this may not be a good assertion. */ 354108466Ssam KASSERT(SLIST_EMPTY(&to->m_pkthdr.tags), 355108466Ssam ("m_move_pkthdr: to has tags")); 356100960Srwatson#endif 357101007Srwatson#ifdef MAC 358113487Srwatson /* 359113487Srwatson * XXXMAC: It could be this should also occur for non-MAC? 360113487Srwatson */ 361101007Srwatson if (to->m_flags & M_PKTHDR) 362113487Srwatson m_tag_delete_chain(to, NULL); 363101007Srwatson#endif 364143302Ssam to->m_flags = (from->m_flags & M_COPYFLAGS) | (to->m_flags & M_EXT); 365143302Ssam if ((to->m_flags & M_EXT) == 0) 366143302Ssam to->m_data = to->m_pktdat; 367108466Ssam to->m_pkthdr = from->m_pkthdr; /* especially tags */ 368108466Ssam SLIST_INIT(&from->m_pkthdr.tags); /* purge tags from src */ 369108466Ssam from->m_flags &= ~M_PKTHDR; 370108466Ssam} 371108466Ssam 372108466Ssam/* 373108466Ssam * Duplicate "from"'s mbuf pkthdr in "to". 374108466Ssam * "from" must have M_PKTHDR set, and "to" must be empty. 375108466Ssam * In particular, this does a deep copy of the packet tags. 376108466Ssam */ 377108466Ssamint 378286450Smelifarom_dup_pkthdr(struct mbuf *to, const struct mbuf *from, int how) 379108466Ssam{ 380108466Ssam 381108466Ssam#if 0 382108466Ssam /* 383108466Ssam * The mbuf allocator only initializes the pkthdr 384248372Sglebius * when the mbuf is allocated with m_gethdr(). Many users 385248372Sglebius * (e.g. m_copy*, m_prepend) use m_get() and then 386108466Ssam * smash the pkthdr as needed causing these 387108466Ssam * assertions to trip. For now just disable them. 388108466Ssam */ 389113255Sdes M_ASSERTPKTHDR(to); 390113487Srwatson /* Note: with MAC, this may not be a good assertion. */ 391108466Ssam KASSERT(SLIST_EMPTY(&to->m_pkthdr.tags), ("m_dup_pkthdr: to has tags")); 392108466Ssam#endif 393132488Salfred MBUF_CHECKSLEEP(how); 394108466Ssam#ifdef MAC 395108466Ssam if (to->m_flags & M_PKTHDR) 396113487Srwatson m_tag_delete_chain(to, NULL); 397108466Ssam#endif 398112733Ssilby to->m_flags = (from->m_flags & M_COPYFLAGS) | (to->m_flags & M_EXT); 399112733Ssilby if ((to->m_flags & M_EXT) == 0) 400112733Ssilby to->m_data = to->m_pktdat; 401100960Srwatson to->m_pkthdr = from->m_pkthdr; 402108466Ssam SLIST_INIT(&to->m_pkthdr.tags); 403260716Sglebius return (m_tag_copy_chain(to, from, how)); 404100960Srwatson} 405100960Srwatson 406100960Srwatson/* 4071541Srgrimes * Lesser-used path for M_PREPEND: 4081541Srgrimes * allocate new mbuf to prepend to chain, 4091541Srgrimes * copy junk along. 4101541Srgrimes */ 4111541Srgrimesstruct mbuf * 41272356Sbmilekicm_prepend(struct mbuf *m, int len, int how) 4131541Srgrimes{ 4141541Srgrimes struct mbuf *mn; 4151541Srgrimes 416117770Ssilby if (m->m_flags & M_PKTHDR) 417248372Sglebius mn = m_gethdr(how, m->m_type); 418117770Ssilby else 419248372Sglebius mn = m_get(how, m->m_type); 42072356Sbmilekic if (mn == NULL) { 4211541Srgrimes m_freem(m); 42272356Sbmilekic return (NULL); 4231541Srgrimes } 424113487Srwatson if (m->m_flags & M_PKTHDR) 425248372Sglebius m_move_pkthdr(mn, m); 4261541Srgrimes mn->m_next = m; 4271541Srgrimes m = mn; 428276692Srwatson if (len < M_SIZE(m)) 429276692Srwatson M_ALIGN(m, len); 4301541Srgrimes m->m_len = len; 4311541Srgrimes return (m); 4321541Srgrimes} 4331541Srgrimes 4341541Srgrimes/* 4351541Srgrimes * Make a copy of an mbuf chain starting "off0" bytes from the beginning, 4361541Srgrimes * continuing for "len" bytes. If len is M_COPYALL, copy to end of mbuf. 437243882Sglebius * The wait parameter is a choice of M_WAITOK/M_NOWAIT from caller. 43854002Sarchie * Note that the copy is read-only, because clusters are not copied, 43954002Sarchie * only their reference counts are incremented. 4401541Srgrimes */ 4411541Srgrimesstruct mbuf * 442296242Sglebiusm_copym(struct mbuf *m, int off0, int len, int wait) 4431541Srgrimes{ 44472356Sbmilekic struct mbuf *n, **np; 44572356Sbmilekic int off = off0; 4461541Srgrimes struct mbuf *top; 4471541Srgrimes int copyhdr = 0; 4481541Srgrimes 44952201Salfred KASSERT(off >= 0, ("m_copym, negative off %d", off)); 45052201Salfred KASSERT(len >= 0, ("m_copym, negative len %d", len)); 451132488Salfred MBUF_CHECKSLEEP(wait); 4521541Srgrimes if (off == 0 && m->m_flags & M_PKTHDR) 4531541Srgrimes copyhdr = 1; 4541541Srgrimes while (off > 0) { 45552201Salfred KASSERT(m != NULL, ("m_copym, offset > size of mbuf chain")); 4561541Srgrimes if (off < m->m_len) 4571541Srgrimes break; 4581541Srgrimes off -= m->m_len; 4591541Srgrimes m = m->m_next; 4601541Srgrimes } 4611541Srgrimes np = ⊤ 4621541Srgrimes top = 0; 4631541Srgrimes while (len > 0) { 46472356Sbmilekic if (m == NULL) { 465266876Sglebius KASSERT(len == M_COPYALL, 46652201Salfred ("m_copym, length > size of mbuf chain")); 4671541Srgrimes break; 4681541Srgrimes } 469117770Ssilby if (copyhdr) 470248372Sglebius n = m_gethdr(wait, m->m_type); 471117770Ssilby else 472248372Sglebius n = m_get(wait, m->m_type); 4731541Srgrimes *np = n; 47472356Sbmilekic if (n == NULL) 4751541Srgrimes goto nospace; 4761541Srgrimes if (copyhdr) { 477108466Ssam if (!m_dup_pkthdr(n, m, wait)) 478108466Ssam goto nospace; 4791541Srgrimes if (len == M_COPYALL) 4801541Srgrimes n->m_pkthdr.len -= off0; 4811541Srgrimes else 4821541Srgrimes n->m_pkthdr.len = len; 4831541Srgrimes copyhdr = 0; 4841541Srgrimes } 4851541Srgrimes n->m_len = min(len, m->m_len - off); 4861541Srgrimes if (m->m_flags & M_EXT) { 4871541Srgrimes n->m_data = m->m_data + off; 488151976Sandre mb_dupcl(n, m); 4891541Srgrimes } else 4901541Srgrimes bcopy(mtod(m, caddr_t)+off, mtod(n, caddr_t), 491103569Sbmilekic (u_int)n->m_len); 4921541Srgrimes if (len != M_COPYALL) 4931541Srgrimes len -= n->m_len; 4941541Srgrimes off = 0; 4951541Srgrimes m = m->m_next; 4961541Srgrimes np = &n->m_next; 4971541Srgrimes } 49878592Sbmilekic 4991541Srgrimes return (top); 5001541Srgrimesnospace: 5011541Srgrimes m_freem(top); 50272356Sbmilekic return (NULL); 5031541Srgrimes} 5041541Srgrimes 5051541Srgrimes/* 50615689Swollman * Copy an entire packet, including header (which must be present). 50715689Swollman * An optimization of the common case `m_copym(m, 0, M_COPYALL, how)'. 50854002Sarchie * Note that the copy is read-only, because clusters are not copied, 50954002Sarchie * only their reference counts are incremented. 51072750Sluigi * Preserve alignment of the first mbuf so if the creator has left 51172750Sluigi * some room at the beginning (e.g. for inserting protocol headers) 51272750Sluigi * the copies still have the room available. 51315689Swollman */ 51415689Swollmanstruct mbuf * 51572356Sbmilekicm_copypacket(struct mbuf *m, int how) 51615689Swollman{ 51715689Swollman struct mbuf *top, *n, *o; 51815689Swollman 519132488Salfred MBUF_CHECKSLEEP(how); 520248372Sglebius n = m_get(how, m->m_type); 52115689Swollman top = n; 52272356Sbmilekic if (n == NULL) 52315689Swollman goto nospace; 52415689Swollman 525108466Ssam if (!m_dup_pkthdr(n, m, how)) 526108466Ssam goto nospace; 52715689Swollman n->m_len = m->m_len; 52815689Swollman if (m->m_flags & M_EXT) { 52915689Swollman n->m_data = m->m_data; 530151976Sandre mb_dupcl(n, m); 53115689Swollman } else { 53272750Sluigi n->m_data = n->m_pktdat + (m->m_data - m->m_pktdat ); 53315689Swollman bcopy(mtod(m, char *), mtod(n, char *), n->m_len); 53415689Swollman } 53515689Swollman 53615689Swollman m = m->m_next; 53715689Swollman while (m) { 538248372Sglebius o = m_get(how, m->m_type); 53972356Sbmilekic if (o == NULL) 54015689Swollman goto nospace; 54115689Swollman 54215689Swollman n->m_next = o; 54315689Swollman n = n->m_next; 54415689Swollman 54515689Swollman n->m_len = m->m_len; 54615689Swollman if (m->m_flags & M_EXT) { 54715689Swollman n->m_data = m->m_data; 548151976Sandre mb_dupcl(n, m); 54915689Swollman } else { 55015689Swollman bcopy(mtod(m, char *), mtod(n, char *), n->m_len); 55115689Swollman } 55215689Swollman 55315689Swollman m = m->m_next; 55415689Swollman } 55515689Swollman return top; 55615689Swollmannospace: 55715689Swollman m_freem(top); 55872356Sbmilekic return (NULL); 55915689Swollman} 56015689Swollman 56115689Swollman/* 5621541Srgrimes * Copy data from an mbuf chain starting "off" bytes from the beginning, 5631541Srgrimes * continuing for "len" bytes, into the indicated buffer. 5641541Srgrimes */ 5651549Srgrimesvoid 56681907Sjulianm_copydata(const struct mbuf *m, int off, int len, caddr_t cp) 5671541Srgrimes{ 568103569Sbmilekic u_int count; 5691541Srgrimes 57052201Salfred KASSERT(off >= 0, ("m_copydata, negative off %d", off)); 57152201Salfred KASSERT(len >= 0, ("m_copydata, negative len %d", len)); 5721541Srgrimes while (off > 0) { 57352201Salfred KASSERT(m != NULL, ("m_copydata, offset > size of mbuf chain")); 5741541Srgrimes if (off < m->m_len) 5751541Srgrimes break; 5761541Srgrimes off -= m->m_len; 5771541Srgrimes m = m->m_next; 5781541Srgrimes } 5791541Srgrimes while (len > 0) { 58052201Salfred KASSERT(m != NULL, ("m_copydata, length > size of mbuf chain")); 5811541Srgrimes count = min(m->m_len - off, len); 5821541Srgrimes bcopy(mtod(m, caddr_t) + off, cp, count); 5831541Srgrimes len -= count; 5841541Srgrimes cp += count; 5851541Srgrimes off = 0; 5861541Srgrimes m = m->m_next; 5871541Srgrimes } 5881541Srgrimes} 5891541Srgrimes 5901541Srgrimes/* 59154002Sarchie * Copy a packet header mbuf chain into a completely new chain, including 59254002Sarchie * copying any mbuf clusters. Use this instead of m_copypacket() when 59354002Sarchie * you need a writable copy of an mbuf chain. 59454002Sarchie */ 59554002Sarchiestruct mbuf * 596286450Smelifarom_dup(const struct mbuf *m, int how) 59754002Sarchie{ 59854002Sarchie struct mbuf **p, *top = NULL; 59954002Sarchie int remain, moff, nsize; 60054002Sarchie 601132488Salfred MBUF_CHECKSLEEP(how); 60254002Sarchie /* Sanity check */ 60354002Sarchie if (m == NULL) 60472356Sbmilekic return (NULL); 605113255Sdes M_ASSERTPKTHDR(m); 60654002Sarchie 60754002Sarchie /* While there's more data, get a new mbuf, tack it on, and fill it */ 60854002Sarchie remain = m->m_pkthdr.len; 60954002Sarchie moff = 0; 61054002Sarchie p = ⊤ 61154002Sarchie while (remain > 0 || top == NULL) { /* allow m->m_pkthdr.len == 0 */ 61254002Sarchie struct mbuf *n; 61354002Sarchie 61454002Sarchie /* Get the next new mbuf */ 615129906Sbmilekic if (remain >= MINCLSIZE) { 616129906Sbmilekic n = m_getcl(how, m->m_type, 0); 617129906Sbmilekic nsize = MCLBYTES; 618129906Sbmilekic } else { 619129906Sbmilekic n = m_get(how, m->m_type); 620129906Sbmilekic nsize = MLEN; 621129906Sbmilekic } 62254002Sarchie if (n == NULL) 62354002Sarchie goto nospace; 624129906Sbmilekic 625129906Sbmilekic if (top == NULL) { /* First one, must be PKTHDR */ 626129906Sbmilekic if (!m_dup_pkthdr(n, m, how)) { 627129906Sbmilekic m_free(n); 628108466Ssam goto nospace; 629129906Sbmilekic } 630153428Semaste if ((n->m_flags & M_EXT) == 0) 631153428Semaste nsize = MHLEN; 632282594Sae n->m_flags &= ~M_RDONLY; 63354002Sarchie } 63454002Sarchie n->m_len = 0; 63554002Sarchie 63654002Sarchie /* Link it into the new chain */ 63754002Sarchie *p = n; 63854002Sarchie p = &n->m_next; 63954002Sarchie 64054002Sarchie /* Copy data from original mbuf(s) into new mbuf */ 64154002Sarchie while (n->m_len < nsize && m != NULL) { 64254002Sarchie int chunk = min(nsize - n->m_len, m->m_len - moff); 64354002Sarchie 64454002Sarchie bcopy(m->m_data + moff, n->m_data + n->m_len, chunk); 64554002Sarchie moff += chunk; 64654002Sarchie n->m_len += chunk; 64754002Sarchie remain -= chunk; 64854002Sarchie if (moff == m->m_len) { 64954002Sarchie m = m->m_next; 65054002Sarchie moff = 0; 65154002Sarchie } 65254002Sarchie } 65354002Sarchie 65454002Sarchie /* Check correct total mbuf length */ 65554002Sarchie KASSERT((remain > 0 && m != NULL) || (remain == 0 && m == NULL), 65687594Sobrien ("%s: bogus m_pkthdr.len", __func__)); 65754002Sarchie } 65854002Sarchie return (top); 65954002Sarchie 66054002Sarchienospace: 66154002Sarchie m_freem(top); 66272356Sbmilekic return (NULL); 66354002Sarchie} 66454002Sarchie 66554002Sarchie/* 6661541Srgrimes * Concatenate mbuf chain n to m. 6671541Srgrimes * Both chains must be of the same type (e.g. MT_DATA). 6681541Srgrimes * Any m_pkthdr is not updated. 6691541Srgrimes */ 6701549Srgrimesvoid 67172356Sbmilekicm_cat(struct mbuf *m, struct mbuf *n) 6721541Srgrimes{ 6731541Srgrimes while (m->m_next) 6741541Srgrimes m = m->m_next; 6751541Srgrimes while (n) { 676242256Sandre if (!M_WRITABLE(m) || 677242256Sandre M_TRAILINGSPACE(m) < n->m_len) { 6781541Srgrimes /* just join the two chains */ 6791541Srgrimes m->m_next = n; 6801541Srgrimes return; 6811541Srgrimes } 6821541Srgrimes /* splat the data from one into the other */ 6831541Srgrimes bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len, 6841541Srgrimes (u_int)n->m_len); 6851541Srgrimes m->m_len += n->m_len; 6861541Srgrimes n = m_free(n); 6871541Srgrimes } 6881541Srgrimes} 6891541Srgrimes 690271088Sglebius/* 691271088Sglebius * Concatenate two pkthdr mbuf chains. 692271088Sglebius */ 6931549Srgrimesvoid 694271088Sglebiusm_catpkt(struct mbuf *m, struct mbuf *n) 695271088Sglebius{ 696271088Sglebius 697271088Sglebius M_ASSERTPKTHDR(m); 698271088Sglebius M_ASSERTPKTHDR(n); 699271088Sglebius 700271088Sglebius m->m_pkthdr.len += n->m_pkthdr.len; 701275329Sglebius m_demote(n, 1, 0); 702271088Sglebius 703271088Sglebius m_cat(m, n); 704271088Sglebius} 705271088Sglebius 706271088Sglebiusvoid 70772356Sbmilekicm_adj(struct mbuf *mp, int req_len) 7081541Srgrimes{ 70972356Sbmilekic int len = req_len; 71072356Sbmilekic struct mbuf *m; 71172356Sbmilekic int count; 7121541Srgrimes 7131541Srgrimes if ((m = mp) == NULL) 7141541Srgrimes return; 7151541Srgrimes if (len >= 0) { 7161541Srgrimes /* 7171541Srgrimes * Trim from head. 7181541Srgrimes */ 7191541Srgrimes while (m != NULL && len > 0) { 7201541Srgrimes if (m->m_len <= len) { 7211541Srgrimes len -= m->m_len; 7221541Srgrimes m->m_len = 0; 7231541Srgrimes m = m->m_next; 7241541Srgrimes } else { 7251541Srgrimes m->m_len -= len; 7261541Srgrimes m->m_data += len; 7271541Srgrimes len = 0; 7281541Srgrimes } 7291541Srgrimes } 7301541Srgrimes if (mp->m_flags & M_PKTHDR) 731207475Szec mp->m_pkthdr.len -= (req_len - len); 7321541Srgrimes } else { 7331541Srgrimes /* 7341541Srgrimes * Trim from tail. Scan the mbuf chain, 7351541Srgrimes * calculating its length and finding the last mbuf. 7361541Srgrimes * If the adjustment only affects this mbuf, then just 7371541Srgrimes * adjust and return. Otherwise, rescan and truncate 7381541Srgrimes * after the remaining size. 7391541Srgrimes */ 7401541Srgrimes len = -len; 7411541Srgrimes count = 0; 7421541Srgrimes for (;;) { 7431541Srgrimes count += m->m_len; 7441541Srgrimes if (m->m_next == (struct mbuf *)0) 7451541Srgrimes break; 7461541Srgrimes m = m->m_next; 7471541Srgrimes } 7481541Srgrimes if (m->m_len >= len) { 7491541Srgrimes m->m_len -= len; 7501541Srgrimes if (mp->m_flags & M_PKTHDR) 7511541Srgrimes mp->m_pkthdr.len -= len; 7521541Srgrimes return; 7531541Srgrimes } 7541541Srgrimes count -= len; 7551541Srgrimes if (count < 0) 7561541Srgrimes count = 0; 7571541Srgrimes /* 7581541Srgrimes * Correct length for chain is "count". 7591541Srgrimes * Find the mbuf with last data, adjust its length, 7601541Srgrimes * and toss data from remaining mbufs on chain. 7611541Srgrimes */ 7621541Srgrimes m = mp; 7631541Srgrimes if (m->m_flags & M_PKTHDR) 7641541Srgrimes m->m_pkthdr.len = count; 7651541Srgrimes for (; m; m = m->m_next) { 7661541Srgrimes if (m->m_len >= count) { 7671541Srgrimes m->m_len = count; 768142350Ssam if (m->m_next != NULL) { 769142350Ssam m_freem(m->m_next); 770142350Ssam m->m_next = NULL; 771142350Ssam } 7721541Srgrimes break; 7731541Srgrimes } 7741541Srgrimes count -= m->m_len; 7751541Srgrimes } 7761541Srgrimes } 7771541Srgrimes} 7781541Srgrimes 7791541Srgrimes/* 7801541Srgrimes * Rearange an mbuf chain so that len bytes are contiguous 781242304Skevlo * and in the data area of an mbuf (so that mtod will work 782242304Skevlo * for a structure of size len). Returns the resulting 7831541Srgrimes * mbuf chain on success, frees it and returns null on failure. 7841541Srgrimes * If there is room, it will add up to max_protohdr-len extra bytes to the 7851541Srgrimes * contiguous region in an attempt to avoid being called next time. 7861541Srgrimes */ 7871541Srgrimesstruct mbuf * 78872356Sbmilekicm_pullup(struct mbuf *n, int len) 7891541Srgrimes{ 79072356Sbmilekic struct mbuf *m; 79172356Sbmilekic int count; 7921541Srgrimes int space; 7931541Srgrimes 7941541Srgrimes /* 7951541Srgrimes * If first mbuf has no cluster, and has room for len bytes 7961541Srgrimes * without shifting current data, pullup into it, 7971541Srgrimes * otherwise allocate a new mbuf to prepend to the chain. 7981541Srgrimes */ 7991541Srgrimes if ((n->m_flags & M_EXT) == 0 && 8001541Srgrimes n->m_data + len < &n->m_dat[MLEN] && n->m_next) { 8011541Srgrimes if (n->m_len >= len) 8021541Srgrimes return (n); 8031541Srgrimes m = n; 8041541Srgrimes n = n->m_next; 8051541Srgrimes len -= m->m_len; 8061541Srgrimes } else { 8071541Srgrimes if (len > MHLEN) 8081541Srgrimes goto bad; 809248372Sglebius m = m_get(M_NOWAIT, n->m_type); 81072356Sbmilekic if (m == NULL) 8111541Srgrimes goto bad; 812108466Ssam if (n->m_flags & M_PKTHDR) 813248372Sglebius m_move_pkthdr(m, n); 8141541Srgrimes } 8151541Srgrimes space = &m->m_dat[MLEN] - (m->m_data + m->m_len); 8161541Srgrimes do { 8171541Srgrimes count = min(min(max(len, max_protohdr), space), n->m_len); 8181541Srgrimes bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len, 819103569Sbmilekic (u_int)count); 8201541Srgrimes len -= count; 8211541Srgrimes m->m_len += count; 8221541Srgrimes n->m_len -= count; 8231541Srgrimes space -= count; 8241541Srgrimes if (n->m_len) 8251541Srgrimes n->m_data += count; 8261541Srgrimes else 8271541Srgrimes n = m_free(n); 8281541Srgrimes } while (len > 0 && n); 8291541Srgrimes if (len > 0) { 8301541Srgrimes (void) m_free(m); 8311541Srgrimes goto bad; 8321541Srgrimes } 8331541Srgrimes m->m_next = n; 8341541Srgrimes return (m); 8351541Srgrimesbad: 8361541Srgrimes m_freem(n); 83772356Sbmilekic return (NULL); 8381541Srgrimes} 8391541Srgrimes 8401541Srgrimes/* 841143761Sjmg * Like m_pullup(), except a new mbuf is always allocated, and we allow 842143761Sjmg * the amount of empty space before the data in the new mbuf to be specified 843143761Sjmg * (in the event that the caller expects to prepend later). 844143761Sjmg */ 845143761Sjmgstruct mbuf * 846143761Sjmgm_copyup(struct mbuf *n, int len, int dstoff) 847143761Sjmg{ 848143761Sjmg struct mbuf *m; 849143761Sjmg int count, space; 850143761Sjmg 851143761Sjmg if (len > (MHLEN - dstoff)) 852143761Sjmg goto bad; 853248372Sglebius m = m_get(M_NOWAIT, n->m_type); 854143761Sjmg if (m == NULL) 855143761Sjmg goto bad; 856143761Sjmg if (n->m_flags & M_PKTHDR) 857248372Sglebius m_move_pkthdr(m, n); 858143761Sjmg m->m_data += dstoff; 859143761Sjmg space = &m->m_dat[MLEN] - (m->m_data + m->m_len); 860143761Sjmg do { 861143761Sjmg count = min(min(max(len, max_protohdr), space), n->m_len); 862143761Sjmg memcpy(mtod(m, caddr_t) + m->m_len, mtod(n, caddr_t), 863143761Sjmg (unsigned)count); 864143761Sjmg len -= count; 865143761Sjmg m->m_len += count; 866143761Sjmg n->m_len -= count; 867143761Sjmg space -= count; 868143761Sjmg if (n->m_len) 869143761Sjmg n->m_data += count; 870143761Sjmg else 871143761Sjmg n = m_free(n); 872143761Sjmg } while (len > 0 && n); 873143761Sjmg if (len > 0) { 874143761Sjmg (void) m_free(m); 875143761Sjmg goto bad; 876143761Sjmg } 877143761Sjmg m->m_next = n; 878143761Sjmg return (m); 879143761Sjmg bad: 880143761Sjmg m_freem(n); 881143761Sjmg return (NULL); 882143761Sjmg} 883143761Sjmg 884143761Sjmg/* 8851541Srgrimes * Partition an mbuf chain in two pieces, returning the tail -- 8861541Srgrimes * all but the first len0 bytes. In case of failure, it returns NULL and 8871541Srgrimes * attempts to restore the chain to its original state. 88897681Sarchie * 88997681Sarchie * Note that the resulting mbufs might be read-only, because the new 89097681Sarchie * mbuf can end up sharing an mbuf cluster with the original mbuf if 89197681Sarchie * the "breaking point" happens to lie within a cluster mbuf. Use the 89297681Sarchie * M_WRITABLE() macro to check for this case. 8931541Srgrimes */ 8941541Srgrimesstruct mbuf * 89572356Sbmilekicm_split(struct mbuf *m0, int len0, int wait) 8961541Srgrimes{ 89772356Sbmilekic struct mbuf *m, *n; 898103569Sbmilekic u_int len = len0, remain; 8991541Srgrimes 900132488Salfred MBUF_CHECKSLEEP(wait); 9011541Srgrimes for (m = m0; m && len > m->m_len; m = m->m_next) 9021541Srgrimes len -= m->m_len; 90372356Sbmilekic if (m == NULL) 90472356Sbmilekic return (NULL); 9051541Srgrimes remain = m->m_len - len; 906248887Sglebius if (m0->m_flags & M_PKTHDR && remain == 0) { 907248372Sglebius n = m_gethdr(wait, m0->m_type); 908258128Sglebius if (n == NULL) 909248887Sglebius return (NULL); 910248887Sglebius n->m_next = m->m_next; 911248887Sglebius m->m_next = NULL; 912248887Sglebius n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif; 913248887Sglebius n->m_pkthdr.len = m0->m_pkthdr.len - len0; 914248887Sglebius m0->m_pkthdr.len = len0; 915248887Sglebius return (n); 916248887Sglebius } else if (m0->m_flags & M_PKTHDR) { 917248887Sglebius n = m_gethdr(wait, m0->m_type); 91872356Sbmilekic if (n == NULL) 91972356Sbmilekic return (NULL); 9201541Srgrimes n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif; 9211541Srgrimes n->m_pkthdr.len = m0->m_pkthdr.len - len0; 9221541Srgrimes m0->m_pkthdr.len = len0; 9231541Srgrimes if (m->m_flags & M_EXT) 9241541Srgrimes goto extpacket; 9251541Srgrimes if (remain > MHLEN) { 9261541Srgrimes /* m can't be the lead packet */ 927276692Srwatson M_ALIGN(n, 0); 9281541Srgrimes n->m_next = m_split(m, len, wait); 92972356Sbmilekic if (n->m_next == NULL) { 9301541Srgrimes (void) m_free(n); 93172356Sbmilekic return (NULL); 93294471Shsu } else { 93394471Shsu n->m_len = 0; 9341541Srgrimes return (n); 93594471Shsu } 9361541Srgrimes } else 937276692Srwatson M_ALIGN(n, remain); 9381541Srgrimes } else if (remain == 0) { 9391541Srgrimes n = m->m_next; 94072356Sbmilekic m->m_next = NULL; 9411541Srgrimes return (n); 9421541Srgrimes } else { 943248372Sglebius n = m_get(wait, m->m_type); 94472356Sbmilekic if (n == NULL) 94572356Sbmilekic return (NULL); 9461541Srgrimes M_ALIGN(n, remain); 9471541Srgrimes } 9481541Srgrimesextpacket: 9491541Srgrimes if (m->m_flags & M_EXT) { 9501541Srgrimes n->m_data = m->m_data + len; 951151976Sandre mb_dupcl(n, m); 9521541Srgrimes } else { 9531541Srgrimes bcopy(mtod(m, caddr_t) + len, mtod(n, caddr_t), remain); 9541541Srgrimes } 9551541Srgrimes n->m_len = remain; 9561541Srgrimes m->m_len = len; 9571541Srgrimes n->m_next = m->m_next; 95872356Sbmilekic m->m_next = NULL; 9591541Srgrimes return (n); 9601541Srgrimes} 9611541Srgrimes/* 9621541Srgrimes * Routine to copy from device local memory into mbufs. 96378508Sbmilekic * Note that `off' argument is offset into first mbuf of target chain from 96478508Sbmilekic * which to begin copying the data to. 9651541Srgrimes */ 9661541Srgrimesstruct mbuf * 96778508Sbmilekicm_devget(char *buf, int totlen, int off, struct ifnet *ifp, 968169624Srwatson void (*copy)(char *from, caddr_t to, u_int len)) 9691541Srgrimes{ 97072356Sbmilekic struct mbuf *m; 971129906Sbmilekic struct mbuf *top = NULL, **mp = ⊤ 97278508Sbmilekic int len; 9731541Srgrimes 97478508Sbmilekic if (off < 0 || off > MHLEN) 97578508Sbmilekic return (NULL); 97678508Sbmilekic 977129906Sbmilekic while (totlen > 0) { 978129906Sbmilekic if (top == NULL) { /* First one, must be PKTHDR */ 979129906Sbmilekic if (totlen + off >= MINCLSIZE) { 980243882Sglebius m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); 981129906Sbmilekic len = MCLBYTES; 982129906Sbmilekic } else { 983243882Sglebius m = m_gethdr(M_NOWAIT, MT_DATA); 984129906Sbmilekic len = MHLEN; 9851541Srgrimes 986129906Sbmilekic /* Place initial small packet/header at end of mbuf */ 987129906Sbmilekic if (m && totlen + off + max_linkhdr <= MLEN) { 988129906Sbmilekic m->m_data += max_linkhdr; 989129906Sbmilekic len -= max_linkhdr; 990129906Sbmilekic } 991129906Sbmilekic } 992129906Sbmilekic if (m == NULL) 993129906Sbmilekic return NULL; 994129906Sbmilekic m->m_pkthdr.rcvif = ifp; 995129906Sbmilekic m->m_pkthdr.len = totlen; 996129906Sbmilekic } else { 997129906Sbmilekic if (totlen + off >= MINCLSIZE) { 998243882Sglebius m = m_getcl(M_NOWAIT, MT_DATA, 0); 999129906Sbmilekic len = MCLBYTES; 1000129906Sbmilekic } else { 1001243882Sglebius m = m_get(M_NOWAIT, MT_DATA); 1002129906Sbmilekic len = MLEN; 1003129906Sbmilekic } 100472356Sbmilekic if (m == NULL) { 10051541Srgrimes m_freem(top); 1006129906Sbmilekic return NULL; 10071541Srgrimes } 10081541Srgrimes } 100978508Sbmilekic if (off) { 101078508Sbmilekic m->m_data += off; 101178508Sbmilekic len -= off; 101278508Sbmilekic off = 0; 101378508Sbmilekic } 101478508Sbmilekic m->m_len = len = min(totlen, len); 10151541Srgrimes if (copy) 1016103569Sbmilekic copy(buf, mtod(m, caddr_t), (u_int)len); 10171541Srgrimes else 1018103569Sbmilekic bcopy(buf, mtod(m, caddr_t), (u_int)len); 101978508Sbmilekic buf += len; 10201541Srgrimes *mp = m; 10211541Srgrimes mp = &m->m_next; 10221541Srgrimes totlen -= len; 10231541Srgrimes } 10241541Srgrimes return (top); 10251541Srgrimes} 10263352Sphk 10273352Sphk/* 10283352Sphk * Copy data from a buffer back into the indicated mbuf chain, 10293352Sphk * starting "off" bytes from the beginning, extending the mbuf 10303352Sphk * chain if necessary. 10313352Sphk */ 10323352Sphkvoid 1033128402Sluigim_copyback(struct mbuf *m0, int off, int len, c_caddr_t cp) 10343352Sphk{ 103572356Sbmilekic int mlen; 103672356Sbmilekic struct mbuf *m = m0, *n; 10373352Sphk int totlen = 0; 10383352Sphk 103972356Sbmilekic if (m0 == NULL) 10403352Sphk return; 10413352Sphk while (off > (mlen = m->m_len)) { 10423352Sphk off -= mlen; 10433352Sphk totlen += mlen; 104472356Sbmilekic if (m->m_next == NULL) { 1045243882Sglebius n = m_get(M_NOWAIT, m->m_type); 104672356Sbmilekic if (n == NULL) 10473352Sphk goto out; 1048129906Sbmilekic bzero(mtod(n, caddr_t), MLEN); 10493352Sphk n->m_len = min(MLEN, len + off); 10503352Sphk m->m_next = n; 10513352Sphk } 10523352Sphk m = m->m_next; 10533352Sphk } 10543352Sphk while (len > 0) { 1055187409Smav if (m->m_next == NULL && (len > m->m_len - off)) { 1056187409Smav m->m_len += min(len - (m->m_len - off), 1057187409Smav M_TRAILINGSPACE(m)); 1058187409Smav } 10593352Sphk mlen = min (m->m_len - off, len); 1060103569Sbmilekic bcopy(cp, off + mtod(m, caddr_t), (u_int)mlen); 10613352Sphk cp += mlen; 10623352Sphk len -= mlen; 10633352Sphk mlen += off; 10643352Sphk off = 0; 10653352Sphk totlen += mlen; 10663352Sphk if (len == 0) 10673352Sphk break; 106872356Sbmilekic if (m->m_next == NULL) { 1069243882Sglebius n = m_get(M_NOWAIT, m->m_type); 107072356Sbmilekic if (n == NULL) 10713352Sphk break; 10723352Sphk n->m_len = min(MLEN, len); 10733352Sphk m->m_next = n; 10743352Sphk } 10753352Sphk m = m->m_next; 10763352Sphk } 10773352Sphkout: if (((m = m0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen)) 10783352Sphk m->m_pkthdr.len = totlen; 10793352Sphk} 108052756Sphk 1081123557Sbms/* 1082138541Ssam * Append the specified data to the indicated mbuf chain, 1083138541Ssam * Extend the mbuf chain if the new data does not fit in 1084138541Ssam * existing space. 1085138541Ssam * 1086138541Ssam * Return 1 if able to complete the job; otherwise 0. 1087138541Ssam */ 1088138541Ssamint 1089138541Ssamm_append(struct mbuf *m0, int len, c_caddr_t cp) 1090138541Ssam{ 1091138541Ssam struct mbuf *m, *n; 1092138541Ssam int remainder, space; 1093138541Ssam 1094138541Ssam for (m = m0; m->m_next != NULL; m = m->m_next) 1095138541Ssam ; 1096138541Ssam remainder = len; 1097138541Ssam space = M_TRAILINGSPACE(m); 1098138541Ssam if (space > 0) { 1099138541Ssam /* 1100138541Ssam * Copy into available space. 1101138541Ssam */ 1102138541Ssam if (space > remainder) 1103138541Ssam space = remainder; 1104138541Ssam bcopy(cp, mtod(m, caddr_t) + m->m_len, space); 1105138541Ssam m->m_len += space; 1106138541Ssam cp += space, remainder -= space; 1107138541Ssam } 1108138541Ssam while (remainder > 0) { 1109138541Ssam /* 1110138541Ssam * Allocate a new mbuf; could check space 1111138541Ssam * and allocate a cluster instead. 1112138541Ssam */ 1113243882Sglebius n = m_get(M_NOWAIT, m->m_type); 1114138541Ssam if (n == NULL) 1115138541Ssam break; 1116138541Ssam n->m_len = min(MLEN, remainder); 1117138894Ssam bcopy(cp, mtod(n, caddr_t), n->m_len); 1118138894Ssam cp += n->m_len, remainder -= n->m_len; 1119138541Ssam m->m_next = n; 1120138541Ssam m = n; 1121138541Ssam } 1122138541Ssam if (m0->m_flags & M_PKTHDR) 1123138541Ssam m0->m_pkthdr.len += len - remainder; 1124138541Ssam return (remainder == 0); 1125138541Ssam} 1126138541Ssam 1127138541Ssam/* 1128123557Sbms * Apply function f to the data in an mbuf chain starting "off" bytes from 1129123557Sbms * the beginning, continuing for "len" bytes. 1130123557Sbms */ 1131123557Sbmsint 1132123557Sbmsm_apply(struct mbuf *m, int off, int len, 1133123564Sbms int (*f)(void *, void *, u_int), void *arg) 1134123557Sbms{ 1135123564Sbms u_int count; 1136123557Sbms int rval; 1137123557Sbms 1138123557Sbms KASSERT(off >= 0, ("m_apply, negative off %d", off)); 1139123557Sbms KASSERT(len >= 0, ("m_apply, negative len %d", len)); 1140123557Sbms while (off > 0) { 1141123557Sbms KASSERT(m != NULL, ("m_apply, offset > size of mbuf chain")); 1142123557Sbms if (off < m->m_len) 1143123557Sbms break; 1144123557Sbms off -= m->m_len; 1145123557Sbms m = m->m_next; 1146123557Sbms } 1147123557Sbms while (len > 0) { 1148123557Sbms KASSERT(m != NULL, ("m_apply, offset > size of mbuf chain")); 1149123557Sbms count = min(m->m_len - off, len); 1150123557Sbms rval = (*f)(arg, mtod(m, caddr_t) + off, count); 1151123557Sbms if (rval) 1152123557Sbms return (rval); 1153123557Sbms len -= count; 1154123557Sbms off = 0; 1155123557Sbms m = m->m_next; 1156123557Sbms } 1157123557Sbms return (0); 1158123557Sbms} 1159123557Sbms 1160123557Sbms/* 1161123557Sbms * Return a pointer to mbuf/offset of location in mbuf chain. 1162123557Sbms */ 1163123557Sbmsstruct mbuf * 1164123557Sbmsm_getptr(struct mbuf *m, int loc, int *off) 1165123557Sbms{ 1166123557Sbms 1167123557Sbms while (loc >= 0) { 1168123564Sbms /* Normal end of search. */ 1169123557Sbms if (m->m_len > loc) { 1170123557Sbms *off = loc; 1171123557Sbms return (m); 1172123557Sbms } else { 1173123557Sbms loc -= m->m_len; 1174123557Sbms if (m->m_next == NULL) { 1175123557Sbms if (loc == 0) { 1176123564Sbms /* Point at the end of valid data. */ 1177123557Sbms *off = m->m_len; 1178123557Sbms return (m); 1179123564Sbms } 1180123564Sbms return (NULL); 1181123564Sbms } 1182123564Sbms m = m->m_next; 1183123557Sbms } 1184123557Sbms } 1185123557Sbms return (NULL); 1186123557Sbms} 1187123557Sbms 118852756Sphkvoid 1189135904Sjmgm_print(const struct mbuf *m, int maxlen) 119052756Sphk{ 119152756Sphk int len; 1192135904Sjmg int pdata; 119354906Seivind const struct mbuf *m2; 119452756Sphk 1195230587Sken if (m == NULL) { 1196230587Sken printf("mbuf: %p\n", m); 1197230587Sken return; 1198230587Sken } 1199230587Sken 1200135904Sjmg if (m->m_flags & M_PKTHDR) 1201135904Sjmg len = m->m_pkthdr.len; 1202135904Sjmg else 1203135904Sjmg len = -1; 120452756Sphk m2 = m; 1205135904Sjmg while (m2 != NULL && (len == -1 || len)) { 1206135904Sjmg pdata = m2->m_len; 1207135904Sjmg if (maxlen != -1 && pdata > maxlen) 1208135904Sjmg pdata = maxlen; 1209135904Sjmg printf("mbuf: %p len: %d, next: %p, %b%s", m2, m2->m_len, 1210135904Sjmg m2->m_next, m2->m_flags, "\20\20freelist\17skipfw" 1211135904Sjmg "\11proto5\10proto4\7proto3\6proto2\5proto1\4rdonly" 1212135904Sjmg "\3eor\2pkthdr\1ext", pdata ? "" : "\n"); 1213135904Sjmg if (pdata) 1214156700Sjmg printf(", %*D\n", pdata, (u_char *)m2->m_data, "-"); 1215135904Sjmg if (len != -1) 1216135904Sjmg len -= m2->m_len; 121752756Sphk m2 = m2->m_next; 121852756Sphk } 1219135904Sjmg if (len > 0) 1220135904Sjmg printf("%d bytes unaccounted for.\n", len); 122152756Sphk return; 122252756Sphk} 1223103540Sphk 1224103569Sbmilekicu_int 1225103540Sphkm_fixhdr(struct mbuf *m0) 1226103540Sphk{ 1227103569Sbmilekic u_int len; 1228103540Sphk 1229103544Sphk len = m_length(m0, NULL); 1230103544Sphk m0->m_pkthdr.len = len; 1231103544Sphk return (len); 1232103544Sphk} 1233103544Sphk 1234103569Sbmilekicu_int 1235103544Sphkm_length(struct mbuf *m0, struct mbuf **last) 1236103544Sphk{ 1237103544Sphk struct mbuf *m; 1238103569Sbmilekic u_int len; 1239103544Sphk 1240103544Sphk len = 0; 1241103544Sphk for (m = m0; m != NULL; m = m->m_next) { 1242103540Sphk len += m->m_len; 1243103544Sphk if (m->m_next == NULL) 1244103544Sphk break; 1245103540Sphk } 1246103544Sphk if (last != NULL) 1247103544Sphk *last = m; 1248103544Sphk return (len); 1249103540Sphk} 1250112777Ssilby 1251112777Ssilby/* 1252112777Ssilby * Defragment a mbuf chain, returning the shortest possible 1253112777Ssilby * chain of mbufs and clusters. If allocation fails and 1254112777Ssilby * this cannot be completed, NULL will be returned, but 1255112777Ssilby * the passed in chain will be unchanged. Upon success, 1256112777Ssilby * the original chain will be freed, and the new chain 1257112777Ssilby * will be returned. 1258112777Ssilby * 1259112777Ssilby * If a non-packet header is passed in, the original 1260112777Ssilby * mbuf (chain?) will be returned unharmed. 1261112777Ssilby */ 1262112777Ssilbystruct mbuf * 1263112777Ssilbym_defrag(struct mbuf *m0, int how) 1264112777Ssilby{ 1265125472Ssilby struct mbuf *m_new = NULL, *m_final = NULL; 1266125472Ssilby int progress = 0, length; 1267112777Ssilby 1268132488Salfred MBUF_CHECKSLEEP(how); 1269112777Ssilby if (!(m0->m_flags & M_PKTHDR)) 1270112777Ssilby return (m0); 1271112777Ssilby 1272117770Ssilby m_fixhdr(m0); /* Needed sanity check */ 1273117770Ssilby 1274113490Ssilby#ifdef MBUF_STRESS_TEST 1275113490Ssilby if (m_defragrandomfailures) { 1276113490Ssilby int temp = arc4random() & 0xff; 1277113490Ssilby if (temp == 0xba) 1278113490Ssilby goto nospace; 1279113490Ssilby } 1280113490Ssilby#endif 1281266876Sglebius 1282112777Ssilby if (m0->m_pkthdr.len > MHLEN) 1283112777Ssilby m_final = m_getcl(how, MT_DATA, M_PKTHDR); 1284112777Ssilby else 1285112777Ssilby m_final = m_gethdr(how, MT_DATA); 1286112777Ssilby 1287112777Ssilby if (m_final == NULL) 1288112777Ssilby goto nospace; 1289112777Ssilby 1290123740Speter if (m_dup_pkthdr(m_final, m0, how) == 0) 1291112777Ssilby goto nospace; 1292112777Ssilby 1293112777Ssilby m_new = m_final; 1294112777Ssilby 1295112777Ssilby while (progress < m0->m_pkthdr.len) { 1296112777Ssilby length = m0->m_pkthdr.len - progress; 1297112777Ssilby if (length > MCLBYTES) 1298112777Ssilby length = MCLBYTES; 1299112777Ssilby 1300112777Ssilby if (m_new == NULL) { 1301112777Ssilby if (length > MLEN) 1302112777Ssilby m_new = m_getcl(how, MT_DATA, 0); 1303112777Ssilby else 1304112777Ssilby m_new = m_get(how, MT_DATA); 1305112777Ssilby if (m_new == NULL) 1306112777Ssilby goto nospace; 1307112777Ssilby } 1308112777Ssilby 1309112777Ssilby m_copydata(m0, progress, length, mtod(m_new, caddr_t)); 1310112777Ssilby progress += length; 1311112777Ssilby m_new->m_len = length; 1312112777Ssilby if (m_new != m_final) 1313112777Ssilby m_cat(m_final, m_new); 1314112777Ssilby m_new = NULL; 1315112777Ssilby } 1316116455Ssilby#ifdef MBUF_STRESS_TEST 1317112777Ssilby if (m0->m_next == NULL) 1318112777Ssilby m_defraguseless++; 1319116455Ssilby#endif 1320112777Ssilby m_freem(m0); 1321112777Ssilby m0 = m_final; 1322116455Ssilby#ifdef MBUF_STRESS_TEST 1323112777Ssilby m_defragpackets++; 1324112777Ssilby m_defragbytes += m0->m_pkthdr.len; 1325116455Ssilby#endif 1326112777Ssilby return (m0); 1327112777Ssilbynospace: 1328116455Ssilby#ifdef MBUF_STRESS_TEST 1329112777Ssilby m_defragfailure++; 1330116455Ssilby#endif 1331112777Ssilby if (m_final) 1332112777Ssilby m_freem(m_final); 1333112777Ssilby return (NULL); 1334112777Ssilby} 1335119644Ssilby 1336175414Ssam/* 1337175414Ssam * Defragment an mbuf chain, returning at most maxfrags separate 1338175414Ssam * mbufs+clusters. If this is not possible NULL is returned and 1339175414Ssam * the original mbuf chain is left in it's present (potentially 1340175414Ssam * modified) state. We use two techniques: collapsing consecutive 1341175414Ssam * mbufs and replacing consecutive mbufs by a cluster. 1342175414Ssam * 1343175414Ssam * NB: this should really be named m_defrag but that name is taken 1344175414Ssam */ 1345175414Ssamstruct mbuf * 1346175414Ssamm_collapse(struct mbuf *m0, int how, int maxfrags) 1347175414Ssam{ 1348175414Ssam struct mbuf *m, *n, *n2, **prev; 1349175414Ssam u_int curfrags; 1350175414Ssam 1351175414Ssam /* 1352175414Ssam * Calculate the current number of frags. 1353175414Ssam */ 1354175414Ssam curfrags = 0; 1355175414Ssam for (m = m0; m != NULL; m = m->m_next) 1356175414Ssam curfrags++; 1357175414Ssam /* 1358175414Ssam * First, try to collapse mbufs. Note that we always collapse 1359175414Ssam * towards the front so we don't need to deal with moving the 1360175414Ssam * pkthdr. This may be suboptimal if the first mbuf has much 1361175414Ssam * less data than the following. 1362175414Ssam */ 1363175414Ssam m = m0; 1364175414Ssamagain: 1365175414Ssam for (;;) { 1366175414Ssam n = m->m_next; 1367175414Ssam if (n == NULL) 1368175414Ssam break; 1369242256Sandre if (M_WRITABLE(m) && 1370175414Ssam n->m_len < M_TRAILINGSPACE(m)) { 1371175414Ssam bcopy(mtod(n, void *), mtod(m, char *) + m->m_len, 1372175414Ssam n->m_len); 1373175414Ssam m->m_len += n->m_len; 1374175414Ssam m->m_next = n->m_next; 1375175414Ssam m_free(n); 1376175414Ssam if (--curfrags <= maxfrags) 1377175414Ssam return m0; 1378175414Ssam } else 1379175414Ssam m = n; 1380175414Ssam } 1381175414Ssam KASSERT(maxfrags > 1, 1382175414Ssam ("maxfrags %u, but normal collapse failed", maxfrags)); 1383175414Ssam /* 1384175414Ssam * Collapse consecutive mbufs to a cluster. 1385175414Ssam */ 1386175414Ssam prev = &m0->m_next; /* NB: not the first mbuf */ 1387175414Ssam while ((n = *prev) != NULL) { 1388175414Ssam if ((n2 = n->m_next) != NULL && 1389175414Ssam n->m_len + n2->m_len < MCLBYTES) { 1390175414Ssam m = m_getcl(how, MT_DATA, 0); 1391175414Ssam if (m == NULL) 1392175414Ssam goto bad; 1393175414Ssam bcopy(mtod(n, void *), mtod(m, void *), n->m_len); 1394175414Ssam bcopy(mtod(n2, void *), mtod(m, char *) + n->m_len, 1395175414Ssam n2->m_len); 1396175414Ssam m->m_len = n->m_len + n2->m_len; 1397175414Ssam m->m_next = n2->m_next; 1398175414Ssam *prev = m; 1399175414Ssam m_free(n); 1400175414Ssam m_free(n2); 1401175414Ssam if (--curfrags <= maxfrags) /* +1 cl -2 mbufs */ 1402175414Ssam return m0; 1403175414Ssam /* 1404175414Ssam * Still not there, try the normal collapse 1405175414Ssam * again before we allocate another cluster. 1406175414Ssam */ 1407175414Ssam goto again; 1408175414Ssam } 1409175414Ssam prev = &n->m_next; 1410175414Ssam } 1411175414Ssam /* 1412175414Ssam * No place where we can collapse to a cluster; punt. 1413175414Ssam * This can occur if, for example, you request 2 frags 1414175414Ssam * but the packet requires that both be clusters (we 1415175414Ssam * never reallocate the first mbuf to avoid moving the 1416175414Ssam * packet header). 1417175414Ssam */ 1418175414Ssambad: 1419175414Ssam return NULL; 1420175414Ssam} 1421175414Ssam 1422119644Ssilby#ifdef MBUF_STRESS_TEST 1423119644Ssilby 1424119644Ssilby/* 1425119644Ssilby * Fragment an mbuf chain. There's no reason you'd ever want to do 1426119644Ssilby * this in normal usage, but it's great for stress testing various 1427119644Ssilby * mbuf consumers. 1428119644Ssilby * 1429119644Ssilby * If fragmentation is not possible, the original chain will be 1430119644Ssilby * returned. 1431119644Ssilby * 1432119644Ssilby * Possible length values: 1433119644Ssilby * 0 no fragmentation will occur 1434119644Ssilby * > 0 each fragment will be of the specified length 1435119644Ssilby * -1 each fragment will be the same random value in length 1436119644Ssilby * -2 each fragment's length will be entirely random 1437119644Ssilby * (Random values range from 1 to 256) 1438119644Ssilby */ 1439119644Ssilbystruct mbuf * 1440119644Ssilbym_fragment(struct mbuf *m0, int how, int length) 1441119644Ssilby{ 1442125472Ssilby struct mbuf *m_new = NULL, *m_final = NULL; 1443125472Ssilby int progress = 0; 1444119644Ssilby 1445119644Ssilby if (!(m0->m_flags & M_PKTHDR)) 1446119644Ssilby return (m0); 1447266876Sglebius 1448119644Ssilby if ((length == 0) || (length < -2)) 1449119644Ssilby return (m0); 1450119644Ssilby 1451119644Ssilby m_fixhdr(m0); /* Needed sanity check */ 1452119644Ssilby 1453119644Ssilby m_final = m_getcl(how, MT_DATA, M_PKTHDR); 1454119644Ssilby 1455119644Ssilby if (m_final == NULL) 1456119644Ssilby goto nospace; 1457119644Ssilby 1458123823Ssilby if (m_dup_pkthdr(m_final, m0, how) == 0) 1459119644Ssilby goto nospace; 1460119644Ssilby 1461119644Ssilby m_new = m_final; 1462119644Ssilby 1463119644Ssilby if (length == -1) 1464119644Ssilby length = 1 + (arc4random() & 255); 1465119644Ssilby 1466119644Ssilby while (progress < m0->m_pkthdr.len) { 1467119644Ssilby int fraglen; 1468119644Ssilby 1469119644Ssilby if (length > 0) 1470119644Ssilby fraglen = length; 1471119644Ssilby else 1472119644Ssilby fraglen = 1 + (arc4random() & 255); 1473119644Ssilby if (fraglen > m0->m_pkthdr.len - progress) 1474119644Ssilby fraglen = m0->m_pkthdr.len - progress; 1475119644Ssilby 1476119644Ssilby if (fraglen > MCLBYTES) 1477119644Ssilby fraglen = MCLBYTES; 1478119644Ssilby 1479119644Ssilby if (m_new == NULL) { 1480119644Ssilby m_new = m_getcl(how, MT_DATA, 0); 1481119644Ssilby if (m_new == NULL) 1482119644Ssilby goto nospace; 1483119644Ssilby } 1484119644Ssilby 1485119644Ssilby m_copydata(m0, progress, fraglen, mtod(m_new, caddr_t)); 1486119644Ssilby progress += fraglen; 1487119644Ssilby m_new->m_len = fraglen; 1488119644Ssilby if (m_new != m_final) 1489119644Ssilby m_cat(m_final, m_new); 1490119644Ssilby m_new = NULL; 1491119644Ssilby } 1492119644Ssilby m_freem(m0); 1493119644Ssilby m0 = m_final; 1494119644Ssilby return (m0); 1495119644Ssilbynospace: 1496119644Ssilby if (m_final) 1497119644Ssilby m_freem(m_final); 1498119644Ssilby /* Return the original chain on failure */ 1499119644Ssilby return (m0); 1500119644Ssilby} 1501119644Ssilby 1502119644Ssilby#endif 1503125296Ssilby 1504163915Sandre/* 1505163915Sandre * Copy the contents of uio into a properly sized mbuf chain. 1506163915Sandre */ 1507125296Ssilbystruct mbuf * 1508163915Sandrem_uiotombuf(struct uio *uio, int how, int len, int align, int flags) 1509125296Ssilby{ 1510163915Sandre struct mbuf *m, *mb; 1511231949Skib int error, length; 1512231949Skib ssize_t total; 1513163915Sandre int progress = 0; 1514125296Ssilby 1515163915Sandre /* 1516163915Sandre * len can be zero or an arbitrary large value bound by 1517163915Sandre * the total data supplied by the uio. 1518163915Sandre */ 1519125296Ssilby if (len > 0) 1520125296Ssilby total = min(uio->uio_resid, len); 1521125296Ssilby else 1522125296Ssilby total = uio->uio_resid; 1523163915Sandre 1524163915Sandre /* 1525163915Sandre * The smallest unit returned by m_getm2() is a single mbuf 1526182777Sthompsa * with pkthdr. We can't align past it. 1527163915Sandre */ 1528145883Semax if (align >= MHLEN) 1529163915Sandre return (NULL); 1530163915Sandre 1531166171Sandre /* 1532166171Sandre * Give us the full allocation or nothing. 1533166171Sandre * If len is zero return the smallest empty mbuf. 1534166171Sandre */ 1535166171Sandre m = m_getm2(NULL, max(total + align, 1), how, MT_DATA, flags); 1536163915Sandre if (m == NULL) 1537163915Sandre return (NULL); 1538163915Sandre m->m_data += align; 1539163915Sandre 1540163915Sandre /* Fill all mbufs with uio data and update header information. */ 1541163915Sandre for (mb = m; mb != NULL; mb = mb->m_next) { 1542163915Sandre length = min(M_TRAILINGSPACE(mb), total - progress); 1543163915Sandre 1544163915Sandre error = uiomove(mtod(mb, void *), length, uio); 1545163915Sandre if (error) { 1546163915Sandre m_freem(m); 1547163915Sandre return (NULL); 1548125296Ssilby } 1549163915Sandre 1550163915Sandre mb->m_len = length; 1551125296Ssilby progress += length; 1552163915Sandre if (flags & M_PKTHDR) 1553163915Sandre m->m_pkthdr.len += length; 1554125296Ssilby } 1555163915Sandre KASSERT(progress == total, ("%s: progress != total", __func__)); 1556163915Sandre 1557163915Sandre return (m); 1558125296Ssilby} 1559148552Ssam 1560148552Ssam/* 1561194667Sandre * Copy an mbuf chain into a uio limited by len if set. 1562194667Sandre */ 1563194667Sandreint 1564194667Sandrem_mbuftouio(struct uio *uio, struct mbuf *m, int len) 1565194667Sandre{ 1566194667Sandre int error, length, total; 1567194667Sandre int progress = 0; 1568194667Sandre 1569194667Sandre if (len > 0) 1570194667Sandre total = min(uio->uio_resid, len); 1571194667Sandre else 1572194667Sandre total = uio->uio_resid; 1573194667Sandre 1574194667Sandre /* Fill the uio with data from the mbufs. */ 1575194667Sandre for (; m != NULL; m = m->m_next) { 1576194667Sandre length = min(m->m_len, total - progress); 1577194667Sandre 1578194667Sandre error = uiomove(mtod(m, void *), length, uio); 1579194667Sandre if (error) 1580194667Sandre return (error); 1581194667Sandre 1582194667Sandre progress += length; 1583194667Sandre } 1584194667Sandre 1585194667Sandre return (0); 1586194667Sandre} 1587194667Sandre 1588194667Sandre/* 1589156756Ssam * Create a writable copy of the mbuf chain. While doing this 1590156756Ssam * we compact the chain with a goal of producing a chain with 1591156756Ssam * at most two mbufs. The second mbuf in this chain is likely 1592156756Ssam * to be a cluster. The primary purpose of this work is to create 1593156756Ssam * a writable packet for encryption, compression, etc. The 1594156756Ssam * secondary goal is to linearize the data so the data can be 1595156756Ssam * passed to crypto hardware in the most efficient manner possible. 1596156756Ssam */ 1597156756Ssamstruct mbuf * 1598156756Ssamm_unshare(struct mbuf *m0, int how) 1599156756Ssam{ 1600156756Ssam struct mbuf *m, *mprev; 1601156756Ssam struct mbuf *n, *mfirst, *mlast; 1602156756Ssam int len, off; 1603156756Ssam 1604156756Ssam mprev = NULL; 1605156756Ssam for (m = m0; m != NULL; m = mprev->m_next) { 1606156756Ssam /* 1607156756Ssam * Regular mbufs are ignored unless there's a cluster 1608156756Ssam * in front of it that we can use to coalesce. We do 1609156756Ssam * the latter mainly so later clusters can be coalesced 1610156756Ssam * also w/o having to handle them specially (i.e. convert 1611156756Ssam * mbuf+cluster -> cluster). This optimization is heavily 1612156756Ssam * influenced by the assumption that we're running over 1613156756Ssam * Ethernet where MCLBYTES is large enough that the max 1614156756Ssam * packet size will permit lots of coalescing into a 1615156756Ssam * single cluster. This in turn permits efficient 1616156756Ssam * crypto operations, especially when using hardware. 1617156756Ssam */ 1618156756Ssam if ((m->m_flags & M_EXT) == 0) { 1619156756Ssam if (mprev && (mprev->m_flags & M_EXT) && 1620156756Ssam m->m_len <= M_TRAILINGSPACE(mprev)) { 1621156756Ssam /* XXX: this ignores mbuf types */ 1622156756Ssam memcpy(mtod(mprev, caddr_t) + mprev->m_len, 1623266876Sglebius mtod(m, caddr_t), m->m_len); 1624156756Ssam mprev->m_len += m->m_len; 1625156756Ssam mprev->m_next = m->m_next; /* unlink from chain */ 1626156756Ssam m_free(m); /* reclaim mbuf */ 1627156756Ssam#if 0 1628156756Ssam newipsecstat.ips_mbcoalesced++; 1629156756Ssam#endif 1630156756Ssam } else { 1631156756Ssam mprev = m; 1632156756Ssam } 1633156756Ssam continue; 1634156756Ssam } 1635156756Ssam /* 1636156756Ssam * Writable mbufs are left alone (for now). 1637156756Ssam */ 1638156756Ssam if (M_WRITABLE(m)) { 1639156756Ssam mprev = m; 1640156756Ssam continue; 1641156756Ssam } 1642156756Ssam 1643156756Ssam /* 1644156756Ssam * Not writable, replace with a copy or coalesce with 1645156756Ssam * the previous mbuf if possible (since we have to copy 1646156756Ssam * it anyway, we try to reduce the number of mbufs and 1647156756Ssam * clusters so that future work is easier). 1648156756Ssam */ 1649156756Ssam KASSERT(m->m_flags & M_EXT, ("m_flags 0x%x", m->m_flags)); 1650156756Ssam /* NB: we only coalesce into a cluster or larger */ 1651156756Ssam if (mprev != NULL && (mprev->m_flags & M_EXT) && 1652156756Ssam m->m_len <= M_TRAILINGSPACE(mprev)) { 1653156756Ssam /* XXX: this ignores mbuf types */ 1654156756Ssam memcpy(mtod(mprev, caddr_t) + mprev->m_len, 1655266876Sglebius mtod(m, caddr_t), m->m_len); 1656156756Ssam mprev->m_len += m->m_len; 1657156756Ssam mprev->m_next = m->m_next; /* unlink from chain */ 1658156756Ssam m_free(m); /* reclaim mbuf */ 1659156756Ssam#if 0 1660156756Ssam newipsecstat.ips_clcoalesced++; 1661156756Ssam#endif 1662156756Ssam continue; 1663156756Ssam } 1664156756Ssam 1665156756Ssam /* 1666248371Sglebius * Allocate new space to hold the copy and copy the data. 1667248371Sglebius * We deal with jumbo mbufs (i.e. m_len > MCLBYTES) by 1668248371Sglebius * splitting them into clusters. We could just malloc a 1669248371Sglebius * buffer and make it external but too many device drivers 1670248371Sglebius * don't know how to break up the non-contiguous memory when 1671248371Sglebius * doing DMA. 1672156756Ssam */ 1673248371Sglebius n = m_getcl(how, m->m_type, m->m_flags); 1674248371Sglebius if (n == NULL) { 1675248371Sglebius m_freem(m0); 1676248371Sglebius return (NULL); 1677156756Ssam } 1678288990Sglebius if (m->m_flags & M_PKTHDR) { 1679288990Sglebius KASSERT(mprev == NULL, ("%s: m0 %p, m %p has M_PKTHDR", 1680288990Sglebius __func__, m0, m)); 1681288990Sglebius m_move_pkthdr(n, m); 1682288990Sglebius } 1683156756Ssam len = m->m_len; 1684156756Ssam off = 0; 1685156756Ssam mfirst = n; 1686156756Ssam mlast = NULL; 1687156756Ssam for (;;) { 1688156756Ssam int cc = min(len, MCLBYTES); 1689156756Ssam memcpy(mtod(n, caddr_t), mtod(m, caddr_t) + off, cc); 1690156756Ssam n->m_len = cc; 1691156756Ssam if (mlast != NULL) 1692156756Ssam mlast->m_next = n; 1693266876Sglebius mlast = n; 1694156756Ssam#if 0 1695156756Ssam newipsecstat.ips_clcopied++; 1696156756Ssam#endif 1697156756Ssam 1698156756Ssam len -= cc; 1699156756Ssam if (len <= 0) 1700156756Ssam break; 1701156756Ssam off += cc; 1702156756Ssam 1703156756Ssam n = m_getcl(how, m->m_type, m->m_flags); 1704156756Ssam if (n == NULL) { 1705156756Ssam m_freem(mfirst); 1706156756Ssam m_freem(m0); 1707156756Ssam return (NULL); 1708156756Ssam } 1709156756Ssam } 1710266876Sglebius n->m_next = m->m_next; 1711156756Ssam if (mprev == NULL) 1712156756Ssam m0 = mfirst; /* new head of chain */ 1713156756Ssam else 1714156756Ssam mprev->m_next = mfirst; /* replace old mbuf */ 1715156756Ssam m_free(m); /* release old mbuf */ 1716156756Ssam mprev = mfirst; 1717156756Ssam } 1718156756Ssam return (m0); 1719156756Ssam} 1720178674Sjulian 1721178674Sjulian#ifdef MBUF_PROFILING 1722178674Sjulian 1723178674Sjulian#define MP_BUCKETS 32 /* don't just change this as things may overflow.*/ 1724178674Sjulianstruct mbufprofile { 1725178700Sjulian uintmax_t wasted[MP_BUCKETS]; 1726178700Sjulian uintmax_t used[MP_BUCKETS]; 1727178700Sjulian uintmax_t segments[MP_BUCKETS]; 1728178674Sjulian} mbprof; 1729178674Sjulian 1730178674Sjulian#define MP_MAXDIGITS 21 /* strlen("16,000,000,000,000,000,000") == 21 */ 1731178674Sjulian#define MP_NUMLINES 6 1732178674Sjulian#define MP_NUMSPERLINE 16 1733178674Sjulian#define MP_EXTRABYTES 64 /* > strlen("used:\nwasted:\nsegments:\n") */ 1734178674Sjulian/* work out max space needed and add a bit of spare space too */ 1735178674Sjulian#define MP_MAXLINE ((MP_MAXDIGITS+1) * MP_NUMSPERLINE) 1736178674Sjulian#define MP_BUFSIZE ((MP_MAXLINE * MP_NUMLINES) + 1 + MP_EXTRABYTES) 1737178674Sjulian 1738178674Sjulianchar mbprofbuf[MP_BUFSIZE]; 1739178674Sjulian 1740178674Sjulianvoid 1741178674Sjulianm_profile(struct mbuf *m) 1742178674Sjulian{ 1743178674Sjulian int segments = 0; 1744178674Sjulian int used = 0; 1745178674Sjulian int wasted = 0; 1746266876Sglebius 1747178674Sjulian while (m) { 1748178674Sjulian segments++; 1749178674Sjulian used += m->m_len; 1750178674Sjulian if (m->m_flags & M_EXT) { 1751178674Sjulian wasted += MHLEN - sizeof(m->m_ext) + 1752178674Sjulian m->m_ext.ext_size - m->m_len; 1753178674Sjulian } else { 1754178674Sjulian if (m->m_flags & M_PKTHDR) 1755178674Sjulian wasted += MHLEN - m->m_len; 1756178674Sjulian else 1757178674Sjulian wasted += MLEN - m->m_len; 1758178674Sjulian } 1759178674Sjulian m = m->m_next; 1760178674Sjulian } 1761178674Sjulian /* be paranoid.. it helps */ 1762178674Sjulian if (segments > MP_BUCKETS - 1) 1763178674Sjulian segments = MP_BUCKETS - 1; 1764178674Sjulian if (used > 100000) 1765178674Sjulian used = 100000; 1766178674Sjulian if (wasted > 100000) 1767178674Sjulian wasted = 100000; 1768178674Sjulian /* store in the appropriate bucket */ 1769178674Sjulian /* don't bother locking. if it's slightly off, so what? */ 1770178674Sjulian mbprof.segments[segments]++; 1771178674Sjulian mbprof.used[fls(used)]++; 1772178674Sjulian mbprof.wasted[fls(wasted)]++; 1773178674Sjulian} 1774178674Sjulian 1775178674Sjulianstatic void 1776178674Sjulianmbprof_textify(void) 1777178674Sjulian{ 1778178674Sjulian int offset; 1779178674Sjulian char *c; 1780209390Sed uint64_t *p; 1781178674Sjulian 1782178674Sjulian p = &mbprof.wasted[0]; 1783178674Sjulian c = mbprofbuf; 1784266876Sglebius offset = snprintf(c, MP_MAXLINE + 10, 1785178674Sjulian "wasted:\n" 1786178700Sjulian "%ju %ju %ju %ju %ju %ju %ju %ju " 1787178700Sjulian "%ju %ju %ju %ju %ju %ju %ju %ju\n", 1788178674Sjulian p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7], 1789178674Sjulian p[8], p[9], p[10], p[11], p[12], p[13], p[14], p[15]); 1790178674Sjulian#ifdef BIG_ARRAY 1791178674Sjulian p = &mbprof.wasted[16]; 1792178674Sjulian c += offset; 1793266876Sglebius offset = snprintf(c, MP_MAXLINE, 1794178700Sjulian "%ju %ju %ju %ju %ju %ju %ju %ju " 1795178700Sjulian "%ju %ju %ju %ju %ju %ju %ju %ju\n", 1796178674Sjulian p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7], 1797178674Sjulian p[8], p[9], p[10], p[11], p[12], p[13], p[14], p[15]); 1798178674Sjulian#endif 1799178674Sjulian p = &mbprof.used[0]; 1800178674Sjulian c += offset; 1801266876Sglebius offset = snprintf(c, MP_MAXLINE + 10, 1802178674Sjulian "used:\n" 1803178700Sjulian "%ju %ju %ju %ju %ju %ju %ju %ju " 1804178700Sjulian "%ju %ju %ju %ju %ju %ju %ju %ju\n", 1805178674Sjulian p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7], 1806178674Sjulian p[8], p[9], p[10], p[11], p[12], p[13], p[14], p[15]); 1807178674Sjulian#ifdef BIG_ARRAY 1808178674Sjulian p = &mbprof.used[16]; 1809178674Sjulian c += offset; 1810266876Sglebius offset = snprintf(c, MP_MAXLINE, 1811178700Sjulian "%ju %ju %ju %ju %ju %ju %ju %ju " 1812178700Sjulian "%ju %ju %ju %ju %ju %ju %ju %ju\n", 1813178674Sjulian p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7], 1814178674Sjulian p[8], p[9], p[10], p[11], p[12], p[13], p[14], p[15]); 1815178674Sjulian#endif 1816178674Sjulian p = &mbprof.segments[0]; 1817178674Sjulian c += offset; 1818266876Sglebius offset = snprintf(c, MP_MAXLINE + 10, 1819178674Sjulian "segments:\n" 1820178700Sjulian "%ju %ju %ju %ju %ju %ju %ju %ju " 1821178700Sjulian "%ju %ju %ju %ju %ju %ju %ju %ju\n", 1822178674Sjulian p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7], 1823178674Sjulian p[8], p[9], p[10], p[11], p[12], p[13], p[14], p[15]); 1824178674Sjulian#ifdef BIG_ARRAY 1825178674Sjulian p = &mbprof.segments[16]; 1826178674Sjulian c += offset; 1827266876Sglebius offset = snprintf(c, MP_MAXLINE, 1828178700Sjulian "%ju %ju %ju %ju %ju %ju %ju %ju " 1829178700Sjulian "%ju %ju %ju %ju %ju %ju %ju %jju", 1830178674Sjulian p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7], 1831178674Sjulian p[8], p[9], p[10], p[11], p[12], p[13], p[14], p[15]); 1832178674Sjulian#endif 1833178674Sjulian} 1834178674Sjulian 1835178674Sjulianstatic int 1836178674Sjulianmbprof_handler(SYSCTL_HANDLER_ARGS) 1837178674Sjulian{ 1838178674Sjulian int error; 1839178674Sjulian 1840178674Sjulian mbprof_textify(); 1841178674Sjulian error = SYSCTL_OUT(req, mbprofbuf, strlen(mbprofbuf) + 1); 1842178674Sjulian return (error); 1843178674Sjulian} 1844178674Sjulian 1845178674Sjulianstatic int 1846178674Sjulianmbprof_clr_handler(SYSCTL_HANDLER_ARGS) 1847178674Sjulian{ 1848178674Sjulian int clear, error; 1849266876Sglebius 1850178674Sjulian clear = 0; 1851178674Sjulian error = sysctl_handle_int(oidp, &clear, 0, req); 1852178674Sjulian if (error || !req->newptr) 1853178674Sjulian return (error); 1854266876Sglebius 1855178674Sjulian if (clear) { 1856178674Sjulian bzero(&mbprof, sizeof(mbprof)); 1857178674Sjulian } 1858266876Sglebius 1859178674Sjulian return (error); 1860178674Sjulian} 1861178674Sjulian 1862178674Sjulian 1863178674SjulianSYSCTL_PROC(_kern_ipc, OID_AUTO, mbufprofile, CTLTYPE_STRING|CTLFLAG_RD, 1864178674Sjulian NULL, 0, mbprof_handler, "A", "mbuf profiling statistics"); 1865178674Sjulian 1866178674SjulianSYSCTL_PROC(_kern_ipc, OID_AUTO, mbufprofileclr, CTLTYPE_INT|CTLFLAG_RW, 1867178674Sjulian NULL, 0, mbprof_clr_handler, "I", "clear mbuf profiling statistics"); 1868178674Sjulian#endif 1869178674Sjulian 1870