1/*	$NetBSD: mbuf.h,v 1.240 2024/05/12 10:34:56 rillig Exp $	*/
2
3/*
4 * Copyright (c) 1996, 1997, 1999, 2001, 2007 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9 * NASA Ames Research Center and Matt Thomas of 3am Software Foundry.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33/*
34 * Copyright (c) 1982, 1986, 1988, 1993
35 *	The Regents of the University of California.  All rights reserved.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 *    notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 *    notice, this list of conditions and the following disclaimer in the
44 *    documentation and/or other materials provided with the distribution.
45 * 3. Neither the name of the University nor the names of its contributors
46 *    may be used to endorse or promote products derived from this software
47 *    without specific prior written permission.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59 * SUCH DAMAGE.
60 *
61 *	@(#)mbuf.h	8.5 (Berkeley) 2/19/95
62 */
63
64#ifndef _SYS_MBUF_H_
65#define _SYS_MBUF_H_
66
67#ifdef _KERNEL_OPT
68#include "opt_mbuftrace.h"
69#endif
70
71#ifndef M_WAITOK
72#include <sys/malloc.h>
73#endif
74#include <sys/pool.h>
75#include <sys/queue.h>
76#if defined(_KERNEL)
77#include <sys/percpu_types.h>
78#include <sys/socket.h>	/* for AF_UNSPEC */
79#include <sys/psref.h>
80#endif /* defined(_KERNEL) */
81
82/* For offsetof() */
83#if defined(_KERNEL) || defined(_STANDALONE)
84#include <sys/systm.h>
85#else
86#include <stddef.h>
87#endif
88
89#include <uvm/uvm_param.h>	/* for MIN_PAGE_SIZE */
90
91#include <net/if.h>
92
93/*
94 * Mbufs are of a single size, MSIZE (machine/param.h), which
95 * includes overhead.  An mbuf may add a single "mbuf cluster" of size
96 * MCLBYTES (also in machine/param.h), which has no additional overhead
97 * and is used instead of the internal data area; this is done when
98 * at least MINCLSIZE of data must be stored.
99 */
100
101/* Packet tags structure */
102struct m_tag {
103	SLIST_ENTRY(m_tag)	m_tag_link;	/* List of packet tags */
104	uint16_t		m_tag_id;	/* Tag ID */
105	uint16_t		m_tag_len;	/* Length of data */
106};
107
108/* mbuf ownership structure */
109struct mowner {
110	char mo_name[16];		/* owner name (fxp0) */
111	char mo_descr[16];		/* owner description (input) */
112	LIST_ENTRY(mowner) mo_link;	/* */
113	struct percpu *mo_counters;
114};
115
116#define MOWNER_INIT(x, y) { .mo_name = x, .mo_descr = y }
117
118enum mowner_counter_index {
119	MOWNER_COUNTER_CLAIMS,		/* # of small mbuf claimed */
120	MOWNER_COUNTER_RELEASES,	/* # of small mbuf released */
121	MOWNER_COUNTER_CLUSTER_CLAIMS,	/* # of cluster mbuf claimed */
122	MOWNER_COUNTER_CLUSTER_RELEASES,/* # of cluster mbuf released */
123	MOWNER_COUNTER_EXT_CLAIMS,	/* # of M_EXT mbuf claimed */
124	MOWNER_COUNTER_EXT_RELEASES,	/* # of M_EXT mbuf released */
125
126	MOWNER_COUNTER_NCOUNTERS,
127};
128
129#if defined(_KERNEL)
130struct mowner_counter {
131	u_long mc_counter[MOWNER_COUNTER_NCOUNTERS];
132};
133#endif
134
135/* userland-exported version of struct mowner */
136struct mowner_user {
137	char mo_name[16];		/* owner name (fxp0) */
138	char mo_descr[16];		/* owner description (input) */
139	LIST_ENTRY(mowner) mo_link;	/* unused padding; for compatibility */
140	u_long mo_counter[MOWNER_COUNTER_NCOUNTERS]; /* counters */
141};
142
143/*
144 * Macros for type conversion
145 * mtod(m,t) -	convert mbuf pointer to data pointer of correct type
146 */
147#define mtod(m, t)	((t)((m)->m_data))
148
149/* header at beginning of each mbuf */
150struct m_hdr {
151	struct	mbuf *mh_next;		/* next buffer in chain */
152	struct	mbuf *mh_nextpkt;	/* next chain in queue/record */
153	char	*mh_data;		/* location of data */
154	struct	mowner *mh_owner;	/* mbuf owner */
155	int	mh_len;			/* amount of data in this mbuf */
156	int	mh_flags;		/* flags; see below */
157	paddr_t	mh_paddr;		/* physical address of mbuf */
158	short	mh_type;		/* type of data in this mbuf */
159};
160
161/*
162 * record/packet header in first mbuf of chain; valid if M_PKTHDR set
163 *
164 * A note about csum_data:
165 *
166 *  o For the out-bound direction, the low 16 bits indicates the offset after
167 *    the L4 header where the final L4 checksum value is to be stored and the
168 *    high 16 bits is the length of the L3 header (the start of the data to
169 *    be checksummed).
170 *
171 *  o For the in-bound direction, it is only valid if the M_CSUM_DATA flag is
172 *    set. In this case, an L4 checksum has been calculated by hardware and
173 *    is stored in csum_data, but it is up to software to perform final
174 *    verification.
175 *
176 * Note for in-bound TCP/UDP checksums: we expect the csum_data to NOT
177 * be bit-wise inverted (the final step in the calculation of an IP
178 * checksum) -- this is so we can accumulate the checksum for fragmented
179 * packets during reassembly.
180 *
181 * Size ILP32: 40
182 *       LP64: 56
183 */
184struct pkthdr {
185	union {
186		void		*ctx;		/* for M_GETCTX/M_SETCTX */
187		if_index_t	index;		/* rcv interface index */
188	} _rcvif;
189#define rcvif_index		_rcvif.index
190	SLIST_HEAD(packet_tags, m_tag) tags;	/* list of packet tags */
191	int		len;			/* total packet length */
192	int		csum_flags;		/* checksum flags */
193	uint32_t	csum_data;		/* checksum data */
194	u_int		segsz;			/* segment size */
195	uint16_t	ether_vtag;		/* ethernet 802.1p+q vlan tag */
196	uint16_t	pkthdr_flags;		/* flags for pkthdr, see blow */
197#define PKTHDR_FLAG_IPSEC_SKIP_PFIL	0x0001	/* skip pfil_run_hooks() after ipsec decrypt */
198
199	/*
200	 * Following three fields are open-coded struct altq_pktattr
201	 * to rearrange struct pkthdr fields flexibly.
202	 */
203	int	pattr_af;		/* ALTQ: address family */
204	void	*pattr_class;		/* ALTQ: sched class set by classifier */
205	void	*pattr_hdr;		/* ALTQ: saved header position in mbuf */
206};
207
208/* Checksumming flags (csum_flags). */
209#define M_CSUM_TCPv4		0x00000001	/* TCP header/payload */
210#define M_CSUM_UDPv4		0x00000002	/* UDP header/payload */
211#define M_CSUM_TCP_UDP_BAD	0x00000004	/* TCP/UDP checksum bad */
212#define M_CSUM_DATA		0x00000008	/* consult csum_data */
213#define M_CSUM_TCPv6		0x00000010	/* IPv6 TCP header/payload */
214#define M_CSUM_UDPv6		0x00000020	/* IPv6 UDP header/payload */
215#define M_CSUM_IPv4		0x00000040	/* IPv4 header */
216#define M_CSUM_IPv4_BAD		0x00000080	/* IPv4 header checksum bad */
217#define M_CSUM_TSOv4		0x00000100	/* TCPv4 segmentation offload */
218#define M_CSUM_TSOv6		0x00000200	/* TCPv6 segmentation offload */
219
220/* Checksum-assist quirks: keep separate from jump-table bits. */
221#define M_CSUM_BLANK		0x40000000	/* csum is missing */
222#define M_CSUM_NO_PSEUDOHDR	0x80000000	/* Rx csum_data does not include
223						 * the UDP/TCP pseudo-hdr, and
224						 * is not yet 1s-complemented.
225						 */
226
227#define M_CSUM_BITS \
228    "\20\1TCPv4\2UDPv4\3TCP_UDP_BAD\4DATA\5TCPv6\6UDPv6\7IPv4\10IPv4_BAD" \
229    "\11TSOv4\12TSOv6\37BLANK\40NO_PSEUDOHDR"
230
231/*
232 * Macros for manipulating csum_data on outgoing packets. These are
233 * used to pass information down from the L4/L3 to the L2.
234 *
235 *   _IPHL:   Length of the IPv{4/6} header, plus the options; in other
236 *            words the offset of the UDP/TCP header in the packet.
237 *   _OFFSET: Offset of the checksum field in the UDP/TCP header.
238 */
239#define M_CSUM_DATA_IPv4_IPHL(x)	((x) >> 16)
240#define M_CSUM_DATA_IPv4_OFFSET(x)	((x) & 0xffff)
241#define M_CSUM_DATA_IPv6_IPHL(x)	((x) >> 16)
242#define M_CSUM_DATA_IPv6_OFFSET(x)	((x) & 0xffff)
243#define M_CSUM_DATA_IPv6_SET(x, v)	(x) = ((x) & 0xffff) | ((v) << 16)
244
245/*
246 * Max # of pages we can attach to m_ext.  This is carefully chosen
247 * to be able to handle SOSEND_LOAN_CHUNK with our minimum sized page.
248 */
249#ifdef MIN_PAGE_SIZE
250#define M_EXT_MAXPAGES		((65536 / MIN_PAGE_SIZE) + 1)
251#endif
252
253/*
254 * Description of external storage mapped into mbuf, valid if M_EXT set.
255 */
256struct _m_ext_storage {
257	unsigned int ext_refcnt;
258	char *ext_buf;			/* start of buffer */
259	void (*ext_free)		/* free routine if not the usual */
260		(struct mbuf *, void *, size_t, void *);
261	void *ext_arg;			/* argument for ext_free */
262	size_t ext_size;		/* size of buffer, for ext_free */
263
264	union {
265		/* M_EXT_CLUSTER: physical address */
266		paddr_t extun_paddr;
267#ifdef M_EXT_MAXPAGES
268		/* M_EXT_PAGES: pages */
269		struct vm_page *extun_pgs[M_EXT_MAXPAGES];
270#endif
271	} ext_un;
272#define ext_paddr	ext_un.extun_paddr
273#define ext_pgs		ext_un.extun_pgs
274};
275
276struct _m_ext {
277	struct mbuf *ext_ref;
278	struct _m_ext_storage ext_storage;
279};
280
281#define M_PADDR_INVALID		POOL_PADDR_INVALID
282
283/*
284 * Definition of "struct mbuf".
285 * Don't change this without understanding how MHLEN/MLEN are defined.
286 */
287#define MBUF_DEFINE(name, mhlen, mlen)					\
288	struct name {							\
289		struct m_hdr m_hdr;					\
290		union {							\
291			struct {					\
292				struct pkthdr MH_pkthdr;		\
293				union {					\
294					struct _m_ext MH_ext;		\
295					char MH_databuf[(mhlen)];	\
296				} MH_dat;				\
297			} MH;						\
298			char M_databuf[(mlen)];				\
299		} M_dat;						\
300	}
301#define m_next		m_hdr.mh_next
302#define m_len		m_hdr.mh_len
303#define m_data		m_hdr.mh_data
304#define m_owner		m_hdr.mh_owner
305#define m_type		m_hdr.mh_type
306#define m_flags		m_hdr.mh_flags
307#define m_nextpkt	m_hdr.mh_nextpkt
308#define m_paddr		m_hdr.mh_paddr
309#define m_pkthdr	M_dat.MH.MH_pkthdr
310#define m_ext_storage	M_dat.MH.MH_dat.MH_ext.ext_storage
311#define m_ext_ref	M_dat.MH.MH_dat.MH_ext.ext_ref
312#define m_ext		m_ext_ref->m_ext_storage
313#define m_pktdat	M_dat.MH.MH_dat.MH_databuf
314#define m_dat		M_dat.M_databuf
315
316/*
317 * Dummy mbuf structure to calculate the right values for MLEN/MHLEN, taking
318 * into account inter-structure padding.
319 */
320MBUF_DEFINE(_mbuf_dummy, 1, 1);
321
322/* normal data len */
323#define MLEN		((int)(MSIZE - offsetof(struct _mbuf_dummy, m_dat)))
324/* data len w/pkthdr */
325#define MHLEN		((int)(MSIZE - offsetof(struct _mbuf_dummy, m_pktdat)))
326
327#define MINCLSIZE	(MHLEN+MLEN+1)	/* smallest amount to put in cluster */
328
329/*
330 * The *real* struct mbuf
331 */
332MBUF_DEFINE(mbuf, MHLEN, MLEN);
333
334/* mbuf flags */
335#define M_EXT		0x00000001	/* has associated external storage */
336#define M_PKTHDR	0x00000002	/* start of record */
337#define M_EOR		0x00000004	/* end of record */
338#define M_PROTO1	0x00000008	/* protocol-specific */
339
340/* mbuf pkthdr flags, also in m_flags */
341#define M_AUTHIPHDR	0x00000010	/* authenticated (IPsec) */
342#define M_DECRYPTED	0x00000020	/* decrypted (IPsec) */
343#define M_LOOP		0x00000040	/* received on loopback */
344#define M_BCAST		0x00000100	/* send/received as L2 broadcast */
345#define M_MCAST		0x00000200	/* send/received as L2 multicast */
346#define M_CANFASTFWD	0x00000400	/* packet can be fast-forwarded */
347#define M_ANYCAST6	0x00000800	/* received as IPv6 anycast */
348
349#define M_LINK0		0x00001000	/* link layer specific flag */
350#define M_LINK1		0x00002000	/* link layer specific flag */
351#define M_LINK2		0x00004000	/* link layer specific flag */
352#define M_LINK3		0x00008000	/* link layer specific flag */
353#define M_LINK4		0x00010000	/* link layer specific flag */
354#define M_LINK5		0x00020000	/* link layer specific flag */
355#define M_LINK6		0x00040000	/* link layer specific flag */
356#define M_LINK7		0x00080000	/* link layer specific flag */
357
358#define M_VLANTAG	0x00100000	/* ether_vtag is valid */
359
360/* additional flags for M_EXT mbufs */
361#define M_EXT_FLAGS	0xff000000
362#define M_EXT_CLUSTER	0x01000000	/* ext is a cluster */
363#define M_EXT_PAGES	0x02000000	/* ext_pgs is valid */
364#define M_EXT_ROMAP	0x04000000	/* ext mapping is r-o at MMU */
365#define M_EXT_RW	0x08000000	/* ext storage is writable */
366
367/* for source-level compatibility */
368#define M_NOTIFICATION	M_PROTO1
369
370#define M_FLAGS_BITS \
371    "\20\1EXT\2PKTHDR\3EOR\4PROTO1\5AUTHIPHDR\6DECRYPTED\7LOOP\10NONE" \
372    "\11BCAST\12MCAST\13CANFASTFWD\14ANYCAST6\15LINK0\16LINK1\17LINK2\20LINK3" \
373    "\21LINK4\22LINK5\23LINK6\24LINK7" \
374    "\25VLANTAG" \
375    "\31EXT_CLUSTER\32EXT_PAGES\33EXT_ROMAP\34EXT_RW"
376
377/* flags copied when copying m_pkthdr */
378#define M_COPYFLAGS	(M_PKTHDR|M_EOR|M_BCAST|M_MCAST|M_CANFASTFWD| \
379    M_ANYCAST6|M_LINK0|M_LINK1|M_LINK2|M_AUTHIPHDR|M_DECRYPTED|M_LOOP| \
380    M_VLANTAG)
381
382/* flag copied when shallow-copying external storage */
383#define M_EXTCOPYFLAGS	(M_EXT|M_EXT_FLAGS)
384
385/* mbuf types */
386#define MT_FREE		0	/* should be on free list */
387#define MT_DATA		1	/* dynamic (data) allocation */
388#define MT_HEADER	2	/* packet header */
389#define MT_SONAME	3	/* socket name */
390#define MT_SOOPTS	4	/* socket options */
391#define MT_FTABLE	5	/* fragment reassembly header */
392#define MT_CONTROL	6	/* extra-data protocol message */
393#define MT_OOBDATA	7	/* expedited data  */
394
395#ifdef MBUFTYPES
396const char * const mbuftypes[] = {
397	"mbfree",
398	"mbdata",
399	"mbheader",
400	"mbsoname",
401	"mbsopts",
402	"mbftable",
403	"mbcontrol",
404	"mboobdata",
405};
406#else
407extern const char * const mbuftypes[];
408#endif
409
410/* flags to m_get/MGET */
411#define M_DONTWAIT	M_NOWAIT
412#define M_WAIT		M_WAITOK
413
414#ifdef MBUFTRACE
415/* Mbuf allocation tracing. */
416void mowner_init_owner(struct mowner *, const char *, const char *);
417void mowner_init(struct mbuf *, int);
418void mowner_ref(struct mbuf *, int);
419void m_claim(struct mbuf *, struct mowner *);
420void mowner_revoke(struct mbuf *, bool, int);
421void mowner_attach(struct mowner *);
422void mowner_detach(struct mowner *);
423void m_claimm(struct mbuf *, struct mowner *);
424#else
425#define mowner_init_owner(mo, n, d)	__nothing
426#define mowner_init(m, type)		__nothing
427#define mowner_ref(m, flags)		__nothing
428#define mowner_revoke(m, all, flags)	__nothing
429#define m_claim(m, mowner)		__nothing
430#define mowner_attach(mo)		__nothing
431#define mowner_detach(mo)		__nothing
432#define m_claimm(m, mo)			__nothing
433#endif
434
435#define MCLAIM(m, mo)		m_claim((m), (mo))
436#define MOWNER_ATTACH(mo)	mowner_attach(mo)
437#define MOWNER_DETACH(mo)	mowner_detach(mo)
438
439/*
440 * mbuf allocation/deallocation macros:
441 *
442 *	MGET(struct mbuf *m, int how, int type)
443 * allocates an mbuf and initializes it to contain internal data.
444 *
445 *	MGETHDR(struct mbuf *m, int how, int type)
446 * allocates an mbuf and initializes it to contain a packet header
447 * and internal data.
448 *
449 * If 'how' is M_WAIT, these macros (and the corresponding functions)
450 * are guaranteed to return successfully.
451 */
452#define MGET(m, how, type)	m = m_get((how), (type))
453#define MGETHDR(m, how, type)	m = m_gethdr((how), (type))
454
455#if defined(_KERNEL)
456
457#define MCLINITREFERENCE(m)						\
458do {									\
459	KASSERT(((m)->m_flags & M_EXT) == 0);				\
460	(m)->m_ext_ref = (m);						\
461	(m)->m_ext.ext_refcnt = 1;					\
462} while (0)
463
464/*
465 * Macros for mbuf external storage.
466 *
467 * MCLGET allocates and adds an mbuf cluster to a normal mbuf;
468 * the flag M_EXT is set upon success.
469 *
470 * MEXTMALLOC allocates external storage and adds it to
471 * a normal mbuf; the flag M_EXT is set upon success.
472 *
473 * MEXTADD adds pre-allocated external storage to
474 * a normal mbuf; the flag M_EXT is set upon success.
475 */
476
477#define MCLGET(m, how)	m_clget((m), (how))
478
479#define MEXTMALLOC(m, size, how)					\
480do {									\
481	(m)->m_ext_storage.ext_buf = malloc((size), 0, (how));		\
482	if ((m)->m_ext_storage.ext_buf != NULL) {			\
483		MCLINITREFERENCE(m);					\
484		(m)->m_data = (m)->m_ext.ext_buf;			\
485		(m)->m_flags = ((m)->m_flags & ~M_EXTCOPYFLAGS) |	\
486				M_EXT|M_EXT_RW;				\
487		(m)->m_ext.ext_size = (size);				\
488		(m)->m_ext.ext_free = NULL;				\
489		(m)->m_ext.ext_arg = NULL;				\
490		mowner_ref((m), M_EXT);					\
491	}								\
492} while (0)
493
494#define MEXTADD(m, buf, size, type, free, arg)				\
495do {									\
496	MCLINITREFERENCE(m);						\
497	(m)->m_data = (m)->m_ext.ext_buf = (char *)(buf);		\
498	(m)->m_flags = ((m)->m_flags & ~M_EXTCOPYFLAGS) | M_EXT;	\
499	(m)->m_ext.ext_size = (size);					\
500	(m)->m_ext.ext_free = (free);					\
501	(m)->m_ext.ext_arg = (arg);					\
502	mowner_ref((m), M_EXT);						\
503} while (0)
504
505#define M_BUFADDR(m)							\
506	(((m)->m_flags & M_EXT) ? (m)->m_ext.ext_buf :			\
507	    ((m)->m_flags & M_PKTHDR) ? (m)->m_pktdat : (m)->m_dat)
508
509#define M_BUFSIZE(m)							\
510	(((m)->m_flags & M_EXT) ? (m)->m_ext.ext_size :			\
511	    ((m)->m_flags & M_PKTHDR) ? MHLEN : MLEN)
512
513#define MRESETDATA(m)	(m)->m_data = M_BUFADDR(m)
514
515/*
516 * Compute the offset of the beginning of the data buffer of a non-ext
517 * mbuf.
518 */
519#define M_BUFOFFSET(m)							\
520	(((m)->m_flags & M_PKTHDR) ?					\
521	 offsetof(struct mbuf, m_pktdat) : offsetof(struct mbuf, m_dat))
522
523/*
524 * Determine if an mbuf's data area is read-only.  This is true
525 * if external storage is read-only mapped, or not marked as R/W,
526 * or referenced by more than one mbuf.
527 */
528#define M_READONLY(m)							\
529	(((m)->m_flags & M_EXT) != 0 &&					\
530	  (((m)->m_flags & (M_EXT_ROMAP|M_EXT_RW)) != M_EXT_RW ||	\
531	  (m)->m_ext.ext_refcnt > 1))
532
533#define M_UNWRITABLE(__m, __len)					\
534	((__m)->m_len < (__len) || M_READONLY((__m)))
535
536/*
537 * Determine if an mbuf's data area is read-only at the MMU.
538 */
539#define M_ROMAP(m)							\
540	(((m)->m_flags & (M_EXT|M_EXT_ROMAP)) == (M_EXT|M_EXT_ROMAP))
541
542/*
543 * Compute the amount of space available before the current start of
544 * data in an mbuf.
545 */
546#define M_LEADINGSPACE(m)						\
547	(M_READONLY((m)) ? 0 : ((m)->m_data - M_BUFADDR(m)))
548
549/*
550 * Compute the amount of space available
551 * after the end of data in an mbuf.
552 */
553#define _M_TRAILINGSPACE(m)						\
554	((m)->m_flags & M_EXT ? (m)->m_ext.ext_buf + (m)->m_ext.ext_size - \
555	 ((m)->m_data + (m)->m_len) :					\
556	 &(m)->m_dat[MLEN] - ((m)->m_data + (m)->m_len))
557
558#define M_TRAILINGSPACE(m)						\
559	(M_READONLY((m)) ? 0 : _M_TRAILINGSPACE((m)))
560
561/*
562 * Arrange to prepend space of size plen to mbuf m.
563 * If a new mbuf must be allocated, how specifies whether to wait.
564 * If how is M_DONTWAIT and allocation fails, the original mbuf chain
565 * is freed and m is set to NULL.
566 */
567#define M_PREPEND(m, plen, how)						\
568do {									\
569	if (M_LEADINGSPACE(m) >= (plen)) {				\
570		(m)->m_data -= (plen);					\
571		(m)->m_len += (plen);					\
572	} else								\
573		(m) = m_prepend((m), (plen), (how));			\
574	if ((m) && (m)->m_flags & M_PKTHDR)				\
575		(m)->m_pkthdr.len += (plen);				\
576} while (0)
577
578/* change mbuf to new type */
579#define MCHTYPE(m, t)							\
580do {									\
581	KASSERT((t) != MT_FREE);					\
582	mbstat_type_add((m)->m_type, -1);				\
583	mbstat_type_add(t, 1);						\
584	(m)->m_type = t;						\
585} while (0)
586
587#ifdef DIAGNOSTIC
588#define M_VERIFY_PACKET(m)	m_verify_packet(m)
589#else
590#define M_VERIFY_PACKET(m)	__nothing
591#endif
592
593/* The "copy all" special length. */
594#define M_COPYALL	-1
595
596/*
597 * Allow drivers and/or protocols to store private context information.
598 */
599#define M_GETCTX(m, t)		((t)(m)->m_pkthdr._rcvif.ctx)
600#define M_SETCTX(m, c)		((void)((m)->m_pkthdr._rcvif.ctx = (void *)(c)))
601#define M_CLEARCTX(m)		M_SETCTX((m), NULL)
602
603/*
604 * M_REGION_GET ensures that the "len"-sized region of type "typ" starting
605 * from "off" within "m" is located in a single mbuf, contiguously.
606 *
607 * The pointer to the region will be returned to pointer variable "val".
608 */
609#define M_REGION_GET(val, typ, m, off, len) \
610do {									\
611	struct mbuf *_t;						\
612	int _tmp;							\
613	if ((m)->m_len >= (off) + (len))				\
614		(val) = (typ)(mtod((m), char *) + (off));		\
615	else {								\
616		_t = m_pulldown((m), (off), (len), &_tmp);		\
617		if (_t) {						\
618			if (_t->m_len < _tmp + (len))			\
619				panic("m_pulldown malfunction");	\
620			(val) = (typ)(mtod(_t, char *) + _tmp);	\
621		} else {						\
622			(val) = (typ)NULL;				\
623			(m) = NULL;					\
624		}							\
625	}								\
626} while (0)
627
628#endif /* defined(_KERNEL) */
629
630/*
631 * Simple mbuf queueing system
632 *
633 * this is basically a SIMPLEQ adapted to mbuf use (ie using
634 * m_nextpkt instead of field.sqe_next).
635 *
636 * m_next is ignored, so queueing chains of mbufs is possible
637 */
638#define MBUFQ_HEAD(name)					\
639struct name {							\
640	struct mbuf *mq_first;					\
641	struct mbuf **mq_last;					\
642}
643
644#define MBUFQ_INIT(q)		do {				\
645	(q)->mq_first = NULL;					\
646	(q)->mq_last = &(q)->mq_first;				\
647} while (0)
648
649#define MBUFQ_ENQUEUE(q, m)	do {				\
650	(m)->m_nextpkt = NULL;					\
651	*(q)->mq_last = (m);					\
652	(q)->mq_last = &(m)->m_nextpkt;				\
653} while (0)
654
655#define MBUFQ_PREPEND(q, m)	do {				\
656	if (((m)->m_nextpkt = (q)->mq_first) == NULL)		\
657		(q)->mq_last = &(m)->m_nextpkt;			\
658	(q)->mq_first = (m);					\
659} while (0)
660
661#define MBUFQ_DEQUEUE(q, m)	do {				\
662	if (((m) = (q)->mq_first) != NULL) {			\
663		if (((q)->mq_first = (m)->m_nextpkt) == NULL)	\
664			(q)->mq_last = &(q)->mq_first;		\
665		else						\
666			(m)->m_nextpkt = NULL;			\
667	}							\
668} while (0)
669
670#define MBUFQ_DRAIN(q)		do {				\
671	struct mbuf *__m0;					\
672	while ((__m0 = (q)->mq_first) != NULL) {		\
673		(q)->mq_first = __m0->m_nextpkt;		\
674		m_freem(__m0);					\
675	}							\
676	(q)->mq_last = &(q)->mq_first;				\
677} while (0)
678
679#define MBUFQ_FIRST(q)		((q)->mq_first)
680#define MBUFQ_NEXT(m)		((m)->m_nextpkt)
681#define MBUFQ_LAST(q)		(*(q)->mq_last)
682
683/*
684 * Mbuf statistics.
685 * For statistics related to mbuf and cluster allocations, see also the
686 * pool headers (mb_cache and mcl_cache).
687 */
688struct mbstat {
689	u_long	_m_spare;	/* formerly m_mbufs */
690	u_long	_m_spare1;	/* formerly m_clusters */
691	u_long	_m_spare2;	/* spare field */
692	u_long	_m_spare3;	/* formely m_clfree - free clusters */
693	u_long	m_drops;	/* times failed to find space */
694	u_long	m_wait;		/* times waited for space */
695	u_long	m_drain;	/* times drained protocols for space */
696	u_short	m_mtypes[256];	/* type specific mbuf allocations */
697};
698
699struct mbstat_cpu {
700	u_int	m_mtypes[256];	/* type specific mbuf allocations */
701};
702
703/*
704 * Mbuf sysctl variables.
705 */
706#define MBUF_MSIZE		1	/* int: mbuf base size */
707#define MBUF_MCLBYTES		2	/* int: mbuf cluster size */
708#define MBUF_NMBCLUSTERS	3	/* int: limit on the # of clusters */
709#define MBUF_MBLOWAT		4	/* int: mbuf low water mark */
710#define MBUF_MCLLOWAT		5	/* int: mbuf cluster low water mark */
711#define MBUF_STATS		6	/* struct: mbstat */
712#define MBUF_MOWNERS		7	/* struct: m_owner[] */
713#define MBUF_NMBCLUSTERS_LIMIT	8	/* int: limit of nmbclusters */
714
715#ifdef _KERNEL
716extern struct mbstat mbstat;
717extern int nmbclusters;		/* limit on the # of clusters */
718extern int mblowat;		/* mbuf low water mark */
719extern int mcllowat;		/* mbuf cluster low water mark */
720extern int max_linkhdr;		/* largest link-level header */
721extern int max_protohdr;		/* largest protocol header */
722extern int max_hdr;		/* largest link+protocol header */
723extern int max_datalen;		/* MHLEN - max_hdr */
724extern const int msize;			/* mbuf base size */
725extern const int mclbytes;		/* mbuf cluster size */
726extern pool_cache_t mb_cache;
727#ifdef MBUFTRACE
728LIST_HEAD(mownerhead, mowner);
729extern struct mownerhead mowners;
730extern struct mowner unknown_mowners[];
731extern struct mowner revoked_mowner;
732#endif
733
734MALLOC_DECLARE(M_MBUF);
735MALLOC_DECLARE(M_SONAME);
736
737struct	mbuf *m_copym(struct mbuf *, int, int, int);
738struct	mbuf *m_copypacket(struct mbuf *, int);
739struct	mbuf *m_devget(char *, int, int, struct ifnet *);
740struct	mbuf *m_dup(struct mbuf *, int, int, int);
741struct	mbuf *m_get(int, int);
742struct	mbuf *m_gethdr(int, int);
743struct	mbuf *m_get_n(int, int, size_t, size_t);
744struct	mbuf *m_gethdr_n(int, int, size_t, size_t);
745struct	mbuf *m_prepend(struct mbuf *,int, int);
746struct	mbuf *m_pulldown(struct mbuf *, int, int, int *);
747struct	mbuf *m_pullup(struct mbuf *, int);
748struct	mbuf *m_copyup(struct mbuf *, int, int);
749struct	mbuf *m_split(struct mbuf *,int, int);
750struct	mbuf *m_getptr(struct mbuf *, int, int *);
751void	m_adj(struct mbuf *, int);
752struct	mbuf *m_defrag(struct mbuf *, int);
753int	m_apply(struct mbuf *, int, int,
754    int (*)(void *, void *, unsigned int), void *);
755void	m_cat(struct mbuf *,struct mbuf *);
756void	m_clget(struct mbuf *, int);
757void	m_copyback(struct mbuf *, int, int, const void *);
758struct	mbuf *m_copyback_cow(struct mbuf *, int, int, const void *, int);
759int	m_makewritable(struct mbuf **, int, int, int);
760struct	mbuf *m_getcl(int, int, int);
761void	m_copydata(struct mbuf *, int, int, void *);
762void	m_verify_packet(struct mbuf *);
763struct	mbuf *m_free(struct mbuf *);
764void	m_freem(struct mbuf *);
765void	mbinit(void);
766void	m_remove_pkthdr(struct mbuf *);
767void	m_copy_pkthdr(struct mbuf *, struct mbuf *);
768void	m_move_pkthdr(struct mbuf *, struct mbuf *);
769void	m_align(struct mbuf *, int);
770
771bool	m_ensure_contig(struct mbuf **, int);
772struct mbuf *m_add(struct mbuf *, struct mbuf *);
773
774/* Inline routines. */
775static __inline u_int m_length(const struct mbuf *) __unused;
776
777/* Statistics */
778void mbstat_type_add(int, int);
779
780/* Packet tag routines */
781struct	m_tag *m_tag_get(int, int, int);
782void	m_tag_free(struct m_tag *);
783void	m_tag_prepend(struct mbuf *, struct m_tag *);
784void	m_tag_unlink(struct mbuf *, struct m_tag *);
785void	m_tag_delete(struct mbuf *, struct m_tag *);
786void	m_tag_delete_chain(struct mbuf *);
787struct	m_tag *m_tag_find(const struct mbuf *, int);
788struct	m_tag *m_tag_copy(struct m_tag *);
789int	m_tag_copy_chain(struct mbuf *, struct mbuf *);
790
791/* Packet tag types */
792#define PACKET_TAG_NONE			0  /* Nothing */
793#define PACKET_TAG_SO			4  /* sending socket pointer */
794#define PACKET_TAG_NPF			10 /* packet filter */
795#define PACKET_TAG_PF			11 /* packet filter */
796#define PACKET_TAG_ALTQ_QID		12 /* ALTQ queue id */
797#define PACKET_TAG_IPSEC_OUT_DONE	18
798#define PACKET_TAG_IPSEC_NAT_T_PORTS	25 /* two uint16_t */
799#define PACKET_TAG_INET6		26 /* IPv6 info */
800#define PACKET_TAG_TUNNEL_INFO		28 /* tunnel identification and
801					    * protocol callback, for loop
802					    * detection/recovery
803					    */
804#define PACKET_TAG_MPLS			29 /* Indicate it's for MPLS */
805#define PACKET_TAG_SRCROUTE		30 /* IPv4 source routing */
806#define PACKET_TAG_ETHERNET_SRC		31 /* Ethernet source address */
807
808/*
809 * Return the number of bytes in the mbuf chain, m.
810 */
811static __inline u_int
812m_length(const struct mbuf *m)
813{
814	const struct mbuf *m0;
815	u_int pktlen;
816
817	if ((m->m_flags & M_PKTHDR) != 0)
818		return m->m_pkthdr.len;
819
820	pktlen = 0;
821	for (m0 = m; m0 != NULL; m0 = m0->m_next)
822		pktlen += m0->m_len;
823	return pktlen;
824}
825
826static __inline void
827m_set_rcvif(struct mbuf *m, const struct ifnet *ifp)
828{
829	KASSERT(m->m_flags & M_PKTHDR);
830	m->m_pkthdr.rcvif_index = ifp->if_index;
831}
832
833static __inline void
834m_reset_rcvif(struct mbuf *m)
835{
836	KASSERT(m->m_flags & M_PKTHDR);
837	/* A caller may expect whole _rcvif union is zeroed */
838	/* m->m_pkthdr.rcvif_index = 0; */
839	m->m_pkthdr._rcvif.ctx = NULL;
840}
841
842static __inline void
843m_copy_rcvif(struct mbuf *m, const struct mbuf *n)
844{
845	KASSERT(m->m_flags & M_PKTHDR);
846	KASSERT(n->m_flags & M_PKTHDR);
847	m->m_pkthdr.rcvif_index = n->m_pkthdr.rcvif_index;
848}
849
850#define M_GET_ALIGNED_HDR(m, type, linkhdr) \
851    m_get_aligned_hdr((m), __alignof(type) - 1, sizeof(type), (linkhdr))
852
853static __inline int
854m_get_aligned_hdr(struct mbuf **m, int mask, size_t hlen, bool linkhdr)
855{
856#ifndef __NO_STRICT_ALIGNMENT
857	if (((uintptr_t)mtod(*m, void *) & mask) != 0)
858		*m = m_copyup(*m, hlen,
859		      linkhdr ? (max_linkhdr + mask) & ~mask : 0);
860	else
861#endif
862	if (__predict_false((size_t)(*m)->m_len < hlen))
863		*m = m_pullup(*m, hlen);
864
865	return *m == NULL;
866}
867
868void m_print(const struct mbuf *, const char *, void (*)(const char *, ...)
869    __printflike(1, 2));
870
871/* from uipc_mbufdebug.c */
872void	m_examine(const struct mbuf *, int, const char *,
873    void (*)(const char *, ...) __printflike(1, 2));
874
875/* parsers for m_examine() */
876void m_examine_ether(const struct mbuf *, int, const char *,
877    void (*)(const char *, ...) __printflike(1, 2));
878void m_examine_pppoe(const struct mbuf *, int, const char *,
879    void (*)(const char *, ...) __printflike(1, 2));
880void m_examine_ppp(const struct mbuf *, int, const char *,
881    void (*)(const char *, ...) __printflike(1, 2));
882void m_examine_arp(const struct mbuf *, int, const char *,
883    void (*)(const char *, ...) __printflike(1, 2));
884void m_examine_ip(const struct mbuf *, int, const char *,
885    void (*)(const char *, ...) __printflike(1, 2));
886void m_examine_icmp(const struct mbuf *, int, const char *,
887    void (*)(const char *, ...) __printflike(1, 2));
888void m_examine_ip6(const struct mbuf *, int, const char *,
889    void (*)(const char *, ...) __printflike(1, 2));
890void m_examine_icmp6(const struct mbuf *, int, const char *,
891    void (*)(const char *, ...) __printflike(1, 2));
892void m_examine_tcp(const struct mbuf *, int, const char *,
893    void (*)(const char *, ...) __printflike(1, 2));
894void m_examine_udp(const struct mbuf *, int, const char *,
895    void (*)(const char *, ...) __printflike(1, 2));
896void m_examine_hex(const struct mbuf *, int, const char *,
897    void (*)(const char *, ...) __printflike(1, 2));
898
899/*
900 * Get rcvif of a mbuf.
901 *
902 * The caller must call m_put_rcvif after using rcvif if the returned rcvif
903 * isn't NULL. If the returned rcvif is NULL, the caller doesn't need to call
904 * m_put_rcvif (although calling it is safe).
905 *
906 * The caller must not block or sleep while using rcvif. The API ensures a
907 * returned rcvif isn't freed until m_put_rcvif is called.
908 */
909static __inline struct ifnet *
910m_get_rcvif(const struct mbuf *m, int *s)
911{
912	struct ifnet *ifp;
913
914	KASSERT(m->m_flags & M_PKTHDR);
915	*s = pserialize_read_enter();
916	ifp = if_byindex(m->m_pkthdr.rcvif_index);
917	if (__predict_false(ifp == NULL))
918		pserialize_read_exit(*s);
919
920	return ifp;
921}
922
923static __inline void
924m_put_rcvif(struct ifnet *ifp, int *s)
925{
926
927	if (ifp == NULL)
928		return;
929	pserialize_read_exit(*s);
930}
931
932/*
933 * Get rcvif of a mbuf.
934 *
935 * The caller must call m_put_rcvif_psref after using rcvif. The API ensures
936 * a got rcvif isn't be freed until m_put_rcvif_psref is called.
937 */
938static __inline struct ifnet *
939m_get_rcvif_psref(const struct mbuf *m, struct psref *psref)
940{
941	KASSERT(m->m_flags & M_PKTHDR);
942	return if_get_byindex(m->m_pkthdr.rcvif_index, psref);
943}
944
945static __inline void
946m_put_rcvif_psref(struct ifnet *ifp, struct psref *psref)
947{
948
949	if (ifp == NULL)
950		return;
951	if_put(ifp, psref);
952}
953
954/*
955 * Get rcvif of a mbuf.
956 *
957 * This is NOT an MP-safe API and shouldn't be used at where we want MP-safe.
958 */
959static __inline struct ifnet *
960m_get_rcvif_NOMPSAFE(const struct mbuf *m)
961{
962	KASSERT(m->m_flags & M_PKTHDR);
963	return if_byindex(m->m_pkthdr.rcvif_index);
964}
965
966#endif /* _KERNEL */
967#endif /* !_SYS_MBUF_H_ */
968