1/* $NetBSD: mbuf.h,v 1.240 2024/05/12 10:34:56 rillig Exp $ */ 2 3/* 4 * Copyright (c) 1996, 1997, 1999, 2001, 2007 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center and Matt Thomas of 3am Software Foundry. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33/* 34 * Copyright (c) 1982, 1986, 1988, 1993 35 * The Regents of the University of California. All rights reserved. 36 * 37 * Redistribution and use in source and binary forms, with or without 38 * modification, are permitted provided that the following conditions 39 * are met: 40 * 1. Redistributions of source code must retain the above copyright 41 * notice, this list of conditions and the following disclaimer. 42 * 2. Redistributions in binary form must reproduce the above copyright 43 * notice, this list of conditions and the following disclaimer in the 44 * documentation and/or other materials provided with the distribution. 45 * 3. Neither the name of the University nor the names of its contributors 46 * may be used to endorse or promote products derived from this software 47 * without specific prior written permission. 48 * 49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 59 * SUCH DAMAGE. 60 * 61 * @(#)mbuf.h 8.5 (Berkeley) 2/19/95 62 */ 63 64#ifndef _SYS_MBUF_H_ 65#define _SYS_MBUF_H_ 66 67#ifdef _KERNEL_OPT 68#include "opt_mbuftrace.h" 69#endif 70 71#ifndef M_WAITOK 72#include <sys/malloc.h> 73#endif 74#include <sys/pool.h> 75#include <sys/queue.h> 76#if defined(_KERNEL) 77#include <sys/percpu_types.h> 78#include <sys/socket.h> /* for AF_UNSPEC */ 79#include <sys/psref.h> 80#endif /* defined(_KERNEL) */ 81 82/* For offsetof() */ 83#if defined(_KERNEL) || defined(_STANDALONE) 84#include <sys/systm.h> 85#else 86#include <stddef.h> 87#endif 88 89#include <uvm/uvm_param.h> /* for MIN_PAGE_SIZE */ 90 91#include <net/if.h> 92 93/* 94 * Mbufs are of a single size, MSIZE (machine/param.h), which 95 * includes overhead. An mbuf may add a single "mbuf cluster" of size 96 * MCLBYTES (also in machine/param.h), which has no additional overhead 97 * and is used instead of the internal data area; this is done when 98 * at least MINCLSIZE of data must be stored. 99 */ 100 101/* Packet tags structure */ 102struct m_tag { 103 SLIST_ENTRY(m_tag) m_tag_link; /* List of packet tags */ 104 uint16_t m_tag_id; /* Tag ID */ 105 uint16_t m_tag_len; /* Length of data */ 106}; 107 108/* mbuf ownership structure */ 109struct mowner { 110 char mo_name[16]; /* owner name (fxp0) */ 111 char mo_descr[16]; /* owner description (input) */ 112 LIST_ENTRY(mowner) mo_link; /* */ 113 struct percpu *mo_counters; 114}; 115 116#define MOWNER_INIT(x, y) { .mo_name = x, .mo_descr = y } 117 118enum mowner_counter_index { 119 MOWNER_COUNTER_CLAIMS, /* # of small mbuf claimed */ 120 MOWNER_COUNTER_RELEASES, /* # of small mbuf released */ 121 MOWNER_COUNTER_CLUSTER_CLAIMS, /* # of cluster mbuf claimed */ 122 MOWNER_COUNTER_CLUSTER_RELEASES,/* # of cluster mbuf released */ 123 MOWNER_COUNTER_EXT_CLAIMS, /* # of M_EXT mbuf claimed */ 124 MOWNER_COUNTER_EXT_RELEASES, /* # of M_EXT mbuf released */ 125 126 MOWNER_COUNTER_NCOUNTERS, 127}; 128 129#if defined(_KERNEL) 130struct mowner_counter { 131 u_long mc_counter[MOWNER_COUNTER_NCOUNTERS]; 132}; 133#endif 134 135/* userland-exported version of struct mowner */ 136struct mowner_user { 137 char mo_name[16]; /* owner name (fxp0) */ 138 char mo_descr[16]; /* owner description (input) */ 139 LIST_ENTRY(mowner) mo_link; /* unused padding; for compatibility */ 140 u_long mo_counter[MOWNER_COUNTER_NCOUNTERS]; /* counters */ 141}; 142 143/* 144 * Macros for type conversion 145 * mtod(m,t) - convert mbuf pointer to data pointer of correct type 146 */ 147#define mtod(m, t) ((t)((m)->m_data)) 148 149/* header at beginning of each mbuf */ 150struct m_hdr { 151 struct mbuf *mh_next; /* next buffer in chain */ 152 struct mbuf *mh_nextpkt; /* next chain in queue/record */ 153 char *mh_data; /* location of data */ 154 struct mowner *mh_owner; /* mbuf owner */ 155 int mh_len; /* amount of data in this mbuf */ 156 int mh_flags; /* flags; see below */ 157 paddr_t mh_paddr; /* physical address of mbuf */ 158 short mh_type; /* type of data in this mbuf */ 159}; 160 161/* 162 * record/packet header in first mbuf of chain; valid if M_PKTHDR set 163 * 164 * A note about csum_data: 165 * 166 * o For the out-bound direction, the low 16 bits indicates the offset after 167 * the L4 header where the final L4 checksum value is to be stored and the 168 * high 16 bits is the length of the L3 header (the start of the data to 169 * be checksummed). 170 * 171 * o For the in-bound direction, it is only valid if the M_CSUM_DATA flag is 172 * set. In this case, an L4 checksum has been calculated by hardware and 173 * is stored in csum_data, but it is up to software to perform final 174 * verification. 175 * 176 * Note for in-bound TCP/UDP checksums: we expect the csum_data to NOT 177 * be bit-wise inverted (the final step in the calculation of an IP 178 * checksum) -- this is so we can accumulate the checksum for fragmented 179 * packets during reassembly. 180 * 181 * Size ILP32: 40 182 * LP64: 56 183 */ 184struct pkthdr { 185 union { 186 void *ctx; /* for M_GETCTX/M_SETCTX */ 187 if_index_t index; /* rcv interface index */ 188 } _rcvif; 189#define rcvif_index _rcvif.index 190 SLIST_HEAD(packet_tags, m_tag) tags; /* list of packet tags */ 191 int len; /* total packet length */ 192 int csum_flags; /* checksum flags */ 193 uint32_t csum_data; /* checksum data */ 194 u_int segsz; /* segment size */ 195 uint16_t ether_vtag; /* ethernet 802.1p+q vlan tag */ 196 uint16_t pkthdr_flags; /* flags for pkthdr, see blow */ 197#define PKTHDR_FLAG_IPSEC_SKIP_PFIL 0x0001 /* skip pfil_run_hooks() after ipsec decrypt */ 198 199 /* 200 * Following three fields are open-coded struct altq_pktattr 201 * to rearrange struct pkthdr fields flexibly. 202 */ 203 int pattr_af; /* ALTQ: address family */ 204 void *pattr_class; /* ALTQ: sched class set by classifier */ 205 void *pattr_hdr; /* ALTQ: saved header position in mbuf */ 206}; 207 208/* Checksumming flags (csum_flags). */ 209#define M_CSUM_TCPv4 0x00000001 /* TCP header/payload */ 210#define M_CSUM_UDPv4 0x00000002 /* UDP header/payload */ 211#define M_CSUM_TCP_UDP_BAD 0x00000004 /* TCP/UDP checksum bad */ 212#define M_CSUM_DATA 0x00000008 /* consult csum_data */ 213#define M_CSUM_TCPv6 0x00000010 /* IPv6 TCP header/payload */ 214#define M_CSUM_UDPv6 0x00000020 /* IPv6 UDP header/payload */ 215#define M_CSUM_IPv4 0x00000040 /* IPv4 header */ 216#define M_CSUM_IPv4_BAD 0x00000080 /* IPv4 header checksum bad */ 217#define M_CSUM_TSOv4 0x00000100 /* TCPv4 segmentation offload */ 218#define M_CSUM_TSOv6 0x00000200 /* TCPv6 segmentation offload */ 219 220/* Checksum-assist quirks: keep separate from jump-table bits. */ 221#define M_CSUM_BLANK 0x40000000 /* csum is missing */ 222#define M_CSUM_NO_PSEUDOHDR 0x80000000 /* Rx csum_data does not include 223 * the UDP/TCP pseudo-hdr, and 224 * is not yet 1s-complemented. 225 */ 226 227#define M_CSUM_BITS \ 228 "\20\1TCPv4\2UDPv4\3TCP_UDP_BAD\4DATA\5TCPv6\6UDPv6\7IPv4\10IPv4_BAD" \ 229 "\11TSOv4\12TSOv6\37BLANK\40NO_PSEUDOHDR" 230 231/* 232 * Macros for manipulating csum_data on outgoing packets. These are 233 * used to pass information down from the L4/L3 to the L2. 234 * 235 * _IPHL: Length of the IPv{4/6} header, plus the options; in other 236 * words the offset of the UDP/TCP header in the packet. 237 * _OFFSET: Offset of the checksum field in the UDP/TCP header. 238 */ 239#define M_CSUM_DATA_IPv4_IPHL(x) ((x) >> 16) 240#define M_CSUM_DATA_IPv4_OFFSET(x) ((x) & 0xffff) 241#define M_CSUM_DATA_IPv6_IPHL(x) ((x) >> 16) 242#define M_CSUM_DATA_IPv6_OFFSET(x) ((x) & 0xffff) 243#define M_CSUM_DATA_IPv6_SET(x, v) (x) = ((x) & 0xffff) | ((v) << 16) 244 245/* 246 * Max # of pages we can attach to m_ext. This is carefully chosen 247 * to be able to handle SOSEND_LOAN_CHUNK with our minimum sized page. 248 */ 249#ifdef MIN_PAGE_SIZE 250#define M_EXT_MAXPAGES ((65536 / MIN_PAGE_SIZE) + 1) 251#endif 252 253/* 254 * Description of external storage mapped into mbuf, valid if M_EXT set. 255 */ 256struct _m_ext_storage { 257 unsigned int ext_refcnt; 258 char *ext_buf; /* start of buffer */ 259 void (*ext_free) /* free routine if not the usual */ 260 (struct mbuf *, void *, size_t, void *); 261 void *ext_arg; /* argument for ext_free */ 262 size_t ext_size; /* size of buffer, for ext_free */ 263 264 union { 265 /* M_EXT_CLUSTER: physical address */ 266 paddr_t extun_paddr; 267#ifdef M_EXT_MAXPAGES 268 /* M_EXT_PAGES: pages */ 269 struct vm_page *extun_pgs[M_EXT_MAXPAGES]; 270#endif 271 } ext_un; 272#define ext_paddr ext_un.extun_paddr 273#define ext_pgs ext_un.extun_pgs 274}; 275 276struct _m_ext { 277 struct mbuf *ext_ref; 278 struct _m_ext_storage ext_storage; 279}; 280 281#define M_PADDR_INVALID POOL_PADDR_INVALID 282 283/* 284 * Definition of "struct mbuf". 285 * Don't change this without understanding how MHLEN/MLEN are defined. 286 */ 287#define MBUF_DEFINE(name, mhlen, mlen) \ 288 struct name { \ 289 struct m_hdr m_hdr; \ 290 union { \ 291 struct { \ 292 struct pkthdr MH_pkthdr; \ 293 union { \ 294 struct _m_ext MH_ext; \ 295 char MH_databuf[(mhlen)]; \ 296 } MH_dat; \ 297 } MH; \ 298 char M_databuf[(mlen)]; \ 299 } M_dat; \ 300 } 301#define m_next m_hdr.mh_next 302#define m_len m_hdr.mh_len 303#define m_data m_hdr.mh_data 304#define m_owner m_hdr.mh_owner 305#define m_type m_hdr.mh_type 306#define m_flags m_hdr.mh_flags 307#define m_nextpkt m_hdr.mh_nextpkt 308#define m_paddr m_hdr.mh_paddr 309#define m_pkthdr M_dat.MH.MH_pkthdr 310#define m_ext_storage M_dat.MH.MH_dat.MH_ext.ext_storage 311#define m_ext_ref M_dat.MH.MH_dat.MH_ext.ext_ref 312#define m_ext m_ext_ref->m_ext_storage 313#define m_pktdat M_dat.MH.MH_dat.MH_databuf 314#define m_dat M_dat.M_databuf 315 316/* 317 * Dummy mbuf structure to calculate the right values for MLEN/MHLEN, taking 318 * into account inter-structure padding. 319 */ 320MBUF_DEFINE(_mbuf_dummy, 1, 1); 321 322/* normal data len */ 323#define MLEN ((int)(MSIZE - offsetof(struct _mbuf_dummy, m_dat))) 324/* data len w/pkthdr */ 325#define MHLEN ((int)(MSIZE - offsetof(struct _mbuf_dummy, m_pktdat))) 326 327#define MINCLSIZE (MHLEN+MLEN+1) /* smallest amount to put in cluster */ 328 329/* 330 * The *real* struct mbuf 331 */ 332MBUF_DEFINE(mbuf, MHLEN, MLEN); 333 334/* mbuf flags */ 335#define M_EXT 0x00000001 /* has associated external storage */ 336#define M_PKTHDR 0x00000002 /* start of record */ 337#define M_EOR 0x00000004 /* end of record */ 338#define M_PROTO1 0x00000008 /* protocol-specific */ 339 340/* mbuf pkthdr flags, also in m_flags */ 341#define M_AUTHIPHDR 0x00000010 /* authenticated (IPsec) */ 342#define M_DECRYPTED 0x00000020 /* decrypted (IPsec) */ 343#define M_LOOP 0x00000040 /* received on loopback */ 344#define M_BCAST 0x00000100 /* send/received as L2 broadcast */ 345#define M_MCAST 0x00000200 /* send/received as L2 multicast */ 346#define M_CANFASTFWD 0x00000400 /* packet can be fast-forwarded */ 347#define M_ANYCAST6 0x00000800 /* received as IPv6 anycast */ 348 349#define M_LINK0 0x00001000 /* link layer specific flag */ 350#define M_LINK1 0x00002000 /* link layer specific flag */ 351#define M_LINK2 0x00004000 /* link layer specific flag */ 352#define M_LINK3 0x00008000 /* link layer specific flag */ 353#define M_LINK4 0x00010000 /* link layer specific flag */ 354#define M_LINK5 0x00020000 /* link layer specific flag */ 355#define M_LINK6 0x00040000 /* link layer specific flag */ 356#define M_LINK7 0x00080000 /* link layer specific flag */ 357 358#define M_VLANTAG 0x00100000 /* ether_vtag is valid */ 359 360/* additional flags for M_EXT mbufs */ 361#define M_EXT_FLAGS 0xff000000 362#define M_EXT_CLUSTER 0x01000000 /* ext is a cluster */ 363#define M_EXT_PAGES 0x02000000 /* ext_pgs is valid */ 364#define M_EXT_ROMAP 0x04000000 /* ext mapping is r-o at MMU */ 365#define M_EXT_RW 0x08000000 /* ext storage is writable */ 366 367/* for source-level compatibility */ 368#define M_NOTIFICATION M_PROTO1 369 370#define M_FLAGS_BITS \ 371 "\20\1EXT\2PKTHDR\3EOR\4PROTO1\5AUTHIPHDR\6DECRYPTED\7LOOP\10NONE" \ 372 "\11BCAST\12MCAST\13CANFASTFWD\14ANYCAST6\15LINK0\16LINK1\17LINK2\20LINK3" \ 373 "\21LINK4\22LINK5\23LINK6\24LINK7" \ 374 "\25VLANTAG" \ 375 "\31EXT_CLUSTER\32EXT_PAGES\33EXT_ROMAP\34EXT_RW" 376 377/* flags copied when copying m_pkthdr */ 378#define M_COPYFLAGS (M_PKTHDR|M_EOR|M_BCAST|M_MCAST|M_CANFASTFWD| \ 379 M_ANYCAST6|M_LINK0|M_LINK1|M_LINK2|M_AUTHIPHDR|M_DECRYPTED|M_LOOP| \ 380 M_VLANTAG) 381 382/* flag copied when shallow-copying external storage */ 383#define M_EXTCOPYFLAGS (M_EXT|M_EXT_FLAGS) 384 385/* mbuf types */ 386#define MT_FREE 0 /* should be on free list */ 387#define MT_DATA 1 /* dynamic (data) allocation */ 388#define MT_HEADER 2 /* packet header */ 389#define MT_SONAME 3 /* socket name */ 390#define MT_SOOPTS 4 /* socket options */ 391#define MT_FTABLE 5 /* fragment reassembly header */ 392#define MT_CONTROL 6 /* extra-data protocol message */ 393#define MT_OOBDATA 7 /* expedited data */ 394 395#ifdef MBUFTYPES 396const char * const mbuftypes[] = { 397 "mbfree", 398 "mbdata", 399 "mbheader", 400 "mbsoname", 401 "mbsopts", 402 "mbftable", 403 "mbcontrol", 404 "mboobdata", 405}; 406#else 407extern const char * const mbuftypes[]; 408#endif 409 410/* flags to m_get/MGET */ 411#define M_DONTWAIT M_NOWAIT 412#define M_WAIT M_WAITOK 413 414#ifdef MBUFTRACE 415/* Mbuf allocation tracing. */ 416void mowner_init_owner(struct mowner *, const char *, const char *); 417void mowner_init(struct mbuf *, int); 418void mowner_ref(struct mbuf *, int); 419void m_claim(struct mbuf *, struct mowner *); 420void mowner_revoke(struct mbuf *, bool, int); 421void mowner_attach(struct mowner *); 422void mowner_detach(struct mowner *); 423void m_claimm(struct mbuf *, struct mowner *); 424#else 425#define mowner_init_owner(mo, n, d) __nothing 426#define mowner_init(m, type) __nothing 427#define mowner_ref(m, flags) __nothing 428#define mowner_revoke(m, all, flags) __nothing 429#define m_claim(m, mowner) __nothing 430#define mowner_attach(mo) __nothing 431#define mowner_detach(mo) __nothing 432#define m_claimm(m, mo) __nothing 433#endif 434 435#define MCLAIM(m, mo) m_claim((m), (mo)) 436#define MOWNER_ATTACH(mo) mowner_attach(mo) 437#define MOWNER_DETACH(mo) mowner_detach(mo) 438 439/* 440 * mbuf allocation/deallocation macros: 441 * 442 * MGET(struct mbuf *m, int how, int type) 443 * allocates an mbuf and initializes it to contain internal data. 444 * 445 * MGETHDR(struct mbuf *m, int how, int type) 446 * allocates an mbuf and initializes it to contain a packet header 447 * and internal data. 448 * 449 * If 'how' is M_WAIT, these macros (and the corresponding functions) 450 * are guaranteed to return successfully. 451 */ 452#define MGET(m, how, type) m = m_get((how), (type)) 453#define MGETHDR(m, how, type) m = m_gethdr((how), (type)) 454 455#if defined(_KERNEL) 456 457#define MCLINITREFERENCE(m) \ 458do { \ 459 KASSERT(((m)->m_flags & M_EXT) == 0); \ 460 (m)->m_ext_ref = (m); \ 461 (m)->m_ext.ext_refcnt = 1; \ 462} while (0) 463 464/* 465 * Macros for mbuf external storage. 466 * 467 * MCLGET allocates and adds an mbuf cluster to a normal mbuf; 468 * the flag M_EXT is set upon success. 469 * 470 * MEXTMALLOC allocates external storage and adds it to 471 * a normal mbuf; the flag M_EXT is set upon success. 472 * 473 * MEXTADD adds pre-allocated external storage to 474 * a normal mbuf; the flag M_EXT is set upon success. 475 */ 476 477#define MCLGET(m, how) m_clget((m), (how)) 478 479#define MEXTMALLOC(m, size, how) \ 480do { \ 481 (m)->m_ext_storage.ext_buf = malloc((size), 0, (how)); \ 482 if ((m)->m_ext_storage.ext_buf != NULL) { \ 483 MCLINITREFERENCE(m); \ 484 (m)->m_data = (m)->m_ext.ext_buf; \ 485 (m)->m_flags = ((m)->m_flags & ~M_EXTCOPYFLAGS) | \ 486 M_EXT|M_EXT_RW; \ 487 (m)->m_ext.ext_size = (size); \ 488 (m)->m_ext.ext_free = NULL; \ 489 (m)->m_ext.ext_arg = NULL; \ 490 mowner_ref((m), M_EXT); \ 491 } \ 492} while (0) 493 494#define MEXTADD(m, buf, size, type, free, arg) \ 495do { \ 496 MCLINITREFERENCE(m); \ 497 (m)->m_data = (m)->m_ext.ext_buf = (char *)(buf); \ 498 (m)->m_flags = ((m)->m_flags & ~M_EXTCOPYFLAGS) | M_EXT; \ 499 (m)->m_ext.ext_size = (size); \ 500 (m)->m_ext.ext_free = (free); \ 501 (m)->m_ext.ext_arg = (arg); \ 502 mowner_ref((m), M_EXT); \ 503} while (0) 504 505#define M_BUFADDR(m) \ 506 (((m)->m_flags & M_EXT) ? (m)->m_ext.ext_buf : \ 507 ((m)->m_flags & M_PKTHDR) ? (m)->m_pktdat : (m)->m_dat) 508 509#define M_BUFSIZE(m) \ 510 (((m)->m_flags & M_EXT) ? (m)->m_ext.ext_size : \ 511 ((m)->m_flags & M_PKTHDR) ? MHLEN : MLEN) 512 513#define MRESETDATA(m) (m)->m_data = M_BUFADDR(m) 514 515/* 516 * Compute the offset of the beginning of the data buffer of a non-ext 517 * mbuf. 518 */ 519#define M_BUFOFFSET(m) \ 520 (((m)->m_flags & M_PKTHDR) ? \ 521 offsetof(struct mbuf, m_pktdat) : offsetof(struct mbuf, m_dat)) 522 523/* 524 * Determine if an mbuf's data area is read-only. This is true 525 * if external storage is read-only mapped, or not marked as R/W, 526 * or referenced by more than one mbuf. 527 */ 528#define M_READONLY(m) \ 529 (((m)->m_flags & M_EXT) != 0 && \ 530 (((m)->m_flags & (M_EXT_ROMAP|M_EXT_RW)) != M_EXT_RW || \ 531 (m)->m_ext.ext_refcnt > 1)) 532 533#define M_UNWRITABLE(__m, __len) \ 534 ((__m)->m_len < (__len) || M_READONLY((__m))) 535 536/* 537 * Determine if an mbuf's data area is read-only at the MMU. 538 */ 539#define M_ROMAP(m) \ 540 (((m)->m_flags & (M_EXT|M_EXT_ROMAP)) == (M_EXT|M_EXT_ROMAP)) 541 542/* 543 * Compute the amount of space available before the current start of 544 * data in an mbuf. 545 */ 546#define M_LEADINGSPACE(m) \ 547 (M_READONLY((m)) ? 0 : ((m)->m_data - M_BUFADDR(m))) 548 549/* 550 * Compute the amount of space available 551 * after the end of data in an mbuf. 552 */ 553#define _M_TRAILINGSPACE(m) \ 554 ((m)->m_flags & M_EXT ? (m)->m_ext.ext_buf + (m)->m_ext.ext_size - \ 555 ((m)->m_data + (m)->m_len) : \ 556 &(m)->m_dat[MLEN] - ((m)->m_data + (m)->m_len)) 557 558#define M_TRAILINGSPACE(m) \ 559 (M_READONLY((m)) ? 0 : _M_TRAILINGSPACE((m))) 560 561/* 562 * Arrange to prepend space of size plen to mbuf m. 563 * If a new mbuf must be allocated, how specifies whether to wait. 564 * If how is M_DONTWAIT and allocation fails, the original mbuf chain 565 * is freed and m is set to NULL. 566 */ 567#define M_PREPEND(m, plen, how) \ 568do { \ 569 if (M_LEADINGSPACE(m) >= (plen)) { \ 570 (m)->m_data -= (plen); \ 571 (m)->m_len += (plen); \ 572 } else \ 573 (m) = m_prepend((m), (plen), (how)); \ 574 if ((m) && (m)->m_flags & M_PKTHDR) \ 575 (m)->m_pkthdr.len += (plen); \ 576} while (0) 577 578/* change mbuf to new type */ 579#define MCHTYPE(m, t) \ 580do { \ 581 KASSERT((t) != MT_FREE); \ 582 mbstat_type_add((m)->m_type, -1); \ 583 mbstat_type_add(t, 1); \ 584 (m)->m_type = t; \ 585} while (0) 586 587#ifdef DIAGNOSTIC 588#define M_VERIFY_PACKET(m) m_verify_packet(m) 589#else 590#define M_VERIFY_PACKET(m) __nothing 591#endif 592 593/* The "copy all" special length. */ 594#define M_COPYALL -1 595 596/* 597 * Allow drivers and/or protocols to store private context information. 598 */ 599#define M_GETCTX(m, t) ((t)(m)->m_pkthdr._rcvif.ctx) 600#define M_SETCTX(m, c) ((void)((m)->m_pkthdr._rcvif.ctx = (void *)(c))) 601#define M_CLEARCTX(m) M_SETCTX((m), NULL) 602 603/* 604 * M_REGION_GET ensures that the "len"-sized region of type "typ" starting 605 * from "off" within "m" is located in a single mbuf, contiguously. 606 * 607 * The pointer to the region will be returned to pointer variable "val". 608 */ 609#define M_REGION_GET(val, typ, m, off, len) \ 610do { \ 611 struct mbuf *_t; \ 612 int _tmp; \ 613 if ((m)->m_len >= (off) + (len)) \ 614 (val) = (typ)(mtod((m), char *) + (off)); \ 615 else { \ 616 _t = m_pulldown((m), (off), (len), &_tmp); \ 617 if (_t) { \ 618 if (_t->m_len < _tmp + (len)) \ 619 panic("m_pulldown malfunction"); \ 620 (val) = (typ)(mtod(_t, char *) + _tmp); \ 621 } else { \ 622 (val) = (typ)NULL; \ 623 (m) = NULL; \ 624 } \ 625 } \ 626} while (0) 627 628#endif /* defined(_KERNEL) */ 629 630/* 631 * Simple mbuf queueing system 632 * 633 * this is basically a SIMPLEQ adapted to mbuf use (ie using 634 * m_nextpkt instead of field.sqe_next). 635 * 636 * m_next is ignored, so queueing chains of mbufs is possible 637 */ 638#define MBUFQ_HEAD(name) \ 639struct name { \ 640 struct mbuf *mq_first; \ 641 struct mbuf **mq_last; \ 642} 643 644#define MBUFQ_INIT(q) do { \ 645 (q)->mq_first = NULL; \ 646 (q)->mq_last = &(q)->mq_first; \ 647} while (0) 648 649#define MBUFQ_ENQUEUE(q, m) do { \ 650 (m)->m_nextpkt = NULL; \ 651 *(q)->mq_last = (m); \ 652 (q)->mq_last = &(m)->m_nextpkt; \ 653} while (0) 654 655#define MBUFQ_PREPEND(q, m) do { \ 656 if (((m)->m_nextpkt = (q)->mq_first) == NULL) \ 657 (q)->mq_last = &(m)->m_nextpkt; \ 658 (q)->mq_first = (m); \ 659} while (0) 660 661#define MBUFQ_DEQUEUE(q, m) do { \ 662 if (((m) = (q)->mq_first) != NULL) { \ 663 if (((q)->mq_first = (m)->m_nextpkt) == NULL) \ 664 (q)->mq_last = &(q)->mq_first; \ 665 else \ 666 (m)->m_nextpkt = NULL; \ 667 } \ 668} while (0) 669 670#define MBUFQ_DRAIN(q) do { \ 671 struct mbuf *__m0; \ 672 while ((__m0 = (q)->mq_first) != NULL) { \ 673 (q)->mq_first = __m0->m_nextpkt; \ 674 m_freem(__m0); \ 675 } \ 676 (q)->mq_last = &(q)->mq_first; \ 677} while (0) 678 679#define MBUFQ_FIRST(q) ((q)->mq_first) 680#define MBUFQ_NEXT(m) ((m)->m_nextpkt) 681#define MBUFQ_LAST(q) (*(q)->mq_last) 682 683/* 684 * Mbuf statistics. 685 * For statistics related to mbuf and cluster allocations, see also the 686 * pool headers (mb_cache and mcl_cache). 687 */ 688struct mbstat { 689 u_long _m_spare; /* formerly m_mbufs */ 690 u_long _m_spare1; /* formerly m_clusters */ 691 u_long _m_spare2; /* spare field */ 692 u_long _m_spare3; /* formely m_clfree - free clusters */ 693 u_long m_drops; /* times failed to find space */ 694 u_long m_wait; /* times waited for space */ 695 u_long m_drain; /* times drained protocols for space */ 696 u_short m_mtypes[256]; /* type specific mbuf allocations */ 697}; 698 699struct mbstat_cpu { 700 u_int m_mtypes[256]; /* type specific mbuf allocations */ 701}; 702 703/* 704 * Mbuf sysctl variables. 705 */ 706#define MBUF_MSIZE 1 /* int: mbuf base size */ 707#define MBUF_MCLBYTES 2 /* int: mbuf cluster size */ 708#define MBUF_NMBCLUSTERS 3 /* int: limit on the # of clusters */ 709#define MBUF_MBLOWAT 4 /* int: mbuf low water mark */ 710#define MBUF_MCLLOWAT 5 /* int: mbuf cluster low water mark */ 711#define MBUF_STATS 6 /* struct: mbstat */ 712#define MBUF_MOWNERS 7 /* struct: m_owner[] */ 713#define MBUF_NMBCLUSTERS_LIMIT 8 /* int: limit of nmbclusters */ 714 715#ifdef _KERNEL 716extern struct mbstat mbstat; 717extern int nmbclusters; /* limit on the # of clusters */ 718extern int mblowat; /* mbuf low water mark */ 719extern int mcllowat; /* mbuf cluster low water mark */ 720extern int max_linkhdr; /* largest link-level header */ 721extern int max_protohdr; /* largest protocol header */ 722extern int max_hdr; /* largest link+protocol header */ 723extern int max_datalen; /* MHLEN - max_hdr */ 724extern const int msize; /* mbuf base size */ 725extern const int mclbytes; /* mbuf cluster size */ 726extern pool_cache_t mb_cache; 727#ifdef MBUFTRACE 728LIST_HEAD(mownerhead, mowner); 729extern struct mownerhead mowners; 730extern struct mowner unknown_mowners[]; 731extern struct mowner revoked_mowner; 732#endif 733 734MALLOC_DECLARE(M_MBUF); 735MALLOC_DECLARE(M_SONAME); 736 737struct mbuf *m_copym(struct mbuf *, int, int, int); 738struct mbuf *m_copypacket(struct mbuf *, int); 739struct mbuf *m_devget(char *, int, int, struct ifnet *); 740struct mbuf *m_dup(struct mbuf *, int, int, int); 741struct mbuf *m_get(int, int); 742struct mbuf *m_gethdr(int, int); 743struct mbuf *m_get_n(int, int, size_t, size_t); 744struct mbuf *m_gethdr_n(int, int, size_t, size_t); 745struct mbuf *m_prepend(struct mbuf *,int, int); 746struct mbuf *m_pulldown(struct mbuf *, int, int, int *); 747struct mbuf *m_pullup(struct mbuf *, int); 748struct mbuf *m_copyup(struct mbuf *, int, int); 749struct mbuf *m_split(struct mbuf *,int, int); 750struct mbuf *m_getptr(struct mbuf *, int, int *); 751void m_adj(struct mbuf *, int); 752struct mbuf *m_defrag(struct mbuf *, int); 753int m_apply(struct mbuf *, int, int, 754 int (*)(void *, void *, unsigned int), void *); 755void m_cat(struct mbuf *,struct mbuf *); 756void m_clget(struct mbuf *, int); 757void m_copyback(struct mbuf *, int, int, const void *); 758struct mbuf *m_copyback_cow(struct mbuf *, int, int, const void *, int); 759int m_makewritable(struct mbuf **, int, int, int); 760struct mbuf *m_getcl(int, int, int); 761void m_copydata(struct mbuf *, int, int, void *); 762void m_verify_packet(struct mbuf *); 763struct mbuf *m_free(struct mbuf *); 764void m_freem(struct mbuf *); 765void mbinit(void); 766void m_remove_pkthdr(struct mbuf *); 767void m_copy_pkthdr(struct mbuf *, struct mbuf *); 768void m_move_pkthdr(struct mbuf *, struct mbuf *); 769void m_align(struct mbuf *, int); 770 771bool m_ensure_contig(struct mbuf **, int); 772struct mbuf *m_add(struct mbuf *, struct mbuf *); 773 774/* Inline routines. */ 775static __inline u_int m_length(const struct mbuf *) __unused; 776 777/* Statistics */ 778void mbstat_type_add(int, int); 779 780/* Packet tag routines */ 781struct m_tag *m_tag_get(int, int, int); 782void m_tag_free(struct m_tag *); 783void m_tag_prepend(struct mbuf *, struct m_tag *); 784void m_tag_unlink(struct mbuf *, struct m_tag *); 785void m_tag_delete(struct mbuf *, struct m_tag *); 786void m_tag_delete_chain(struct mbuf *); 787struct m_tag *m_tag_find(const struct mbuf *, int); 788struct m_tag *m_tag_copy(struct m_tag *); 789int m_tag_copy_chain(struct mbuf *, struct mbuf *); 790 791/* Packet tag types */ 792#define PACKET_TAG_NONE 0 /* Nothing */ 793#define PACKET_TAG_SO 4 /* sending socket pointer */ 794#define PACKET_TAG_NPF 10 /* packet filter */ 795#define PACKET_TAG_PF 11 /* packet filter */ 796#define PACKET_TAG_ALTQ_QID 12 /* ALTQ queue id */ 797#define PACKET_TAG_IPSEC_OUT_DONE 18 798#define PACKET_TAG_IPSEC_NAT_T_PORTS 25 /* two uint16_t */ 799#define PACKET_TAG_INET6 26 /* IPv6 info */ 800#define PACKET_TAG_TUNNEL_INFO 28 /* tunnel identification and 801 * protocol callback, for loop 802 * detection/recovery 803 */ 804#define PACKET_TAG_MPLS 29 /* Indicate it's for MPLS */ 805#define PACKET_TAG_SRCROUTE 30 /* IPv4 source routing */ 806#define PACKET_TAG_ETHERNET_SRC 31 /* Ethernet source address */ 807 808/* 809 * Return the number of bytes in the mbuf chain, m. 810 */ 811static __inline u_int 812m_length(const struct mbuf *m) 813{ 814 const struct mbuf *m0; 815 u_int pktlen; 816 817 if ((m->m_flags & M_PKTHDR) != 0) 818 return m->m_pkthdr.len; 819 820 pktlen = 0; 821 for (m0 = m; m0 != NULL; m0 = m0->m_next) 822 pktlen += m0->m_len; 823 return pktlen; 824} 825 826static __inline void 827m_set_rcvif(struct mbuf *m, const struct ifnet *ifp) 828{ 829 KASSERT(m->m_flags & M_PKTHDR); 830 m->m_pkthdr.rcvif_index = ifp->if_index; 831} 832 833static __inline void 834m_reset_rcvif(struct mbuf *m) 835{ 836 KASSERT(m->m_flags & M_PKTHDR); 837 /* A caller may expect whole _rcvif union is zeroed */ 838 /* m->m_pkthdr.rcvif_index = 0; */ 839 m->m_pkthdr._rcvif.ctx = NULL; 840} 841 842static __inline void 843m_copy_rcvif(struct mbuf *m, const struct mbuf *n) 844{ 845 KASSERT(m->m_flags & M_PKTHDR); 846 KASSERT(n->m_flags & M_PKTHDR); 847 m->m_pkthdr.rcvif_index = n->m_pkthdr.rcvif_index; 848} 849 850#define M_GET_ALIGNED_HDR(m, type, linkhdr) \ 851 m_get_aligned_hdr((m), __alignof(type) - 1, sizeof(type), (linkhdr)) 852 853static __inline int 854m_get_aligned_hdr(struct mbuf **m, int mask, size_t hlen, bool linkhdr) 855{ 856#ifndef __NO_STRICT_ALIGNMENT 857 if (((uintptr_t)mtod(*m, void *) & mask) != 0) 858 *m = m_copyup(*m, hlen, 859 linkhdr ? (max_linkhdr + mask) & ~mask : 0); 860 else 861#endif 862 if (__predict_false((size_t)(*m)->m_len < hlen)) 863 *m = m_pullup(*m, hlen); 864 865 return *m == NULL; 866} 867 868void m_print(const struct mbuf *, const char *, void (*)(const char *, ...) 869 __printflike(1, 2)); 870 871/* from uipc_mbufdebug.c */ 872void m_examine(const struct mbuf *, int, const char *, 873 void (*)(const char *, ...) __printflike(1, 2)); 874 875/* parsers for m_examine() */ 876void m_examine_ether(const struct mbuf *, int, const char *, 877 void (*)(const char *, ...) __printflike(1, 2)); 878void m_examine_pppoe(const struct mbuf *, int, const char *, 879 void (*)(const char *, ...) __printflike(1, 2)); 880void m_examine_ppp(const struct mbuf *, int, const char *, 881 void (*)(const char *, ...) __printflike(1, 2)); 882void m_examine_arp(const struct mbuf *, int, const char *, 883 void (*)(const char *, ...) __printflike(1, 2)); 884void m_examine_ip(const struct mbuf *, int, const char *, 885 void (*)(const char *, ...) __printflike(1, 2)); 886void m_examine_icmp(const struct mbuf *, int, const char *, 887 void (*)(const char *, ...) __printflike(1, 2)); 888void m_examine_ip6(const struct mbuf *, int, const char *, 889 void (*)(const char *, ...) __printflike(1, 2)); 890void m_examine_icmp6(const struct mbuf *, int, const char *, 891 void (*)(const char *, ...) __printflike(1, 2)); 892void m_examine_tcp(const struct mbuf *, int, const char *, 893 void (*)(const char *, ...) __printflike(1, 2)); 894void m_examine_udp(const struct mbuf *, int, const char *, 895 void (*)(const char *, ...) __printflike(1, 2)); 896void m_examine_hex(const struct mbuf *, int, const char *, 897 void (*)(const char *, ...) __printflike(1, 2)); 898 899/* 900 * Get rcvif of a mbuf. 901 * 902 * The caller must call m_put_rcvif after using rcvif if the returned rcvif 903 * isn't NULL. If the returned rcvif is NULL, the caller doesn't need to call 904 * m_put_rcvif (although calling it is safe). 905 * 906 * The caller must not block or sleep while using rcvif. The API ensures a 907 * returned rcvif isn't freed until m_put_rcvif is called. 908 */ 909static __inline struct ifnet * 910m_get_rcvif(const struct mbuf *m, int *s) 911{ 912 struct ifnet *ifp; 913 914 KASSERT(m->m_flags & M_PKTHDR); 915 *s = pserialize_read_enter(); 916 ifp = if_byindex(m->m_pkthdr.rcvif_index); 917 if (__predict_false(ifp == NULL)) 918 pserialize_read_exit(*s); 919 920 return ifp; 921} 922 923static __inline void 924m_put_rcvif(struct ifnet *ifp, int *s) 925{ 926 927 if (ifp == NULL) 928 return; 929 pserialize_read_exit(*s); 930} 931 932/* 933 * Get rcvif of a mbuf. 934 * 935 * The caller must call m_put_rcvif_psref after using rcvif. The API ensures 936 * a got rcvif isn't be freed until m_put_rcvif_psref is called. 937 */ 938static __inline struct ifnet * 939m_get_rcvif_psref(const struct mbuf *m, struct psref *psref) 940{ 941 KASSERT(m->m_flags & M_PKTHDR); 942 return if_get_byindex(m->m_pkthdr.rcvif_index, psref); 943} 944 945static __inline void 946m_put_rcvif_psref(struct ifnet *ifp, struct psref *psref) 947{ 948 949 if (ifp == NULL) 950 return; 951 if_put(ifp, psref); 952} 953 954/* 955 * Get rcvif of a mbuf. 956 * 957 * This is NOT an MP-safe API and shouldn't be used at where we want MP-safe. 958 */ 959static __inline struct ifnet * 960m_get_rcvif_NOMPSAFE(const struct mbuf *m) 961{ 962 KASSERT(m->m_flags & M_PKTHDR); 963 return if_byindex(m->m_pkthdr.rcvif_index); 964} 965 966#endif /* _KERNEL */ 967#endif /* !_SYS_MBUF_H_ */ 968