Deleted Added
full compact
mbuf.9 (129469) mbuf.9 (131530)
1.\" Copyright (c) 2000 FreeBSD Inc.
2.\" All rights reserved.
3.\"
4.\" Redistribution and use in source and binary forms, with or without
5.\" modification, are permitted provided that the following conditions
6.\" are met:
7.\" 1. Redistributions of source code must retain the above copyright
8.\" notice, this list of conditions and the following disclaimer.
9.\" 2. Redistributions in binary form must reproduce the above copyright
10.\" notice, this list of conditions and the following disclaimer in the
11.\" documentation and/or other materials provided with the distribution.
12.\"
13.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16.\" ARE DISCLAIMED. IN NO EVENT SHALL [your name] OR CONTRIBUTORS BE LIABLE
17.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23.\" SUCH DAMAGE.
24.\"
1.\" Copyright (c) 2000 FreeBSD Inc.
2.\" All rights reserved.
3.\"
4.\" Redistribution and use in source and binary forms, with or without
5.\" modification, are permitted provided that the following conditions
6.\" are met:
7.\" 1. Redistributions of source code must retain the above copyright
8.\" notice, this list of conditions and the following disclaimer.
9.\" 2. Redistributions in binary form must reproduce the above copyright
10.\" notice, this list of conditions and the following disclaimer in the
11.\" documentation and/or other materials provided with the distribution.
12.\"
13.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16.\" ARE DISCLAIMED. IN NO EVENT SHALL [your name] OR CONTRIBUTORS BE LIABLE
17.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23.\" SUCH DAMAGE.
24.\"
25.\" $FreeBSD: head/share/man/man9/mbuf.9 129469 2004-05-20 09:52:48Z ru $
25.\" $FreeBSD: head/share/man/man9/mbuf.9 131530 2004-07-03 18:29:24Z ru $
26.\"
27.Dd May 20, 2004
28.Dt MBUF 9
29.Os
30.\"
31.Sh NAME
32.Nm mbuf
33.Nd "memory management in the kernel IPC subsystem"
34.\"
35.Sh SYNOPSIS
36.In sys/param.h
37.In sys/systm.h
38.In sys/mbuf.h
39.\"
40.Ss Mbuf allocation macros
41.Fn MGET "struct mbuf *mbuf" "int how" "short type"
42.Fn MGETHDR "struct mbuf *mbuf" "int how" "short type"
43.Fn MCLGET "struct mbuf *mbuf" "int how"
44.Fo MEXTADD
45.Fa "struct mbuf *mbuf"
46.Fa "caddr_t buf"
47.Fa "u_int size"
48.Fa "void (*free)(void *opt_args)"
49.Fa "void *opt_args"
50.Fa "short flags"
51.Fa "int type"
52.Fc
53.Fn MEXTFREE "struct mbuf *mbuf"
54.Fn MEXT_ADD_REF "struct mbuf *mbuf"
55.Fn MEXT_REM_REF "struct mbuf *mbuf"
56.Fn MFREE "struct mbuf *mbuf" "struct mbuf *successor"
57.\"
58.Ss Mbuf utility macros
59.Fn mtod "struct mbuf *mbuf" "type"
60.Ft int
61.Fn MEXT_IS_REF "struct mbuf *mbuf"
62.Fn M_ALIGN "struct mbuf *mbuf" "u_int len"
63.Fn MH_ALIGN "struct mbuf *mbuf" "u_int len"
64.Ft int
65.Fn M_LEADINGSPACE "struct mbuf *mbuf"
66.Ft int
67.Fn M_TRAILINGSPACE "struct mbuf *mbuf"
68.Fn M_MOVE_PKTHDR "struct mbuf *to" "struct mbuf *from"
69.Fn M_PREPEND "struct mbuf *mbuf" "int len" "int how"
70.Fn MCHTYPE "struct mbuf *mbuf" "u_int type"
71.Ft int
72.Fn M_WRITABLE "struct mbuf *mbuf"
73.\"
74.Ss Mbuf allocation functions
75.Ft struct mbuf *
76.Fn m_get "int how" "int type"
77.Ft struct mbuf *
78.Fn m_getm "struct mbuf *orig" "int len" "int how" "int type"
79.Ft struct mbuf *
80.Fn m_getcl "int how" "short type" "int flags"
81.Ft struct mbuf *
82.Fn m_getclr "int how" "int type"
83.Ft struct mbuf *
84.Fn m_gethdr "int how" "int type"
85.Ft struct mbuf *
86.Fn m_free "struct mbuf *mbuf"
87.Ft void
88.Fn m_freem "struct mbuf *mbuf"
89.\"
90.Ss Mbuf utility functions
91.Ft void
92.Fn m_adj "struct mbuf *mbuf" "int len"
93.Ft struct mbuf *
94.Fn m_prepend "struct mbuf *mbuf" "int len" "int how"
95.Ft struct mbuf *
96.Fn m_pullup "struct mbuf *mbuf" "int len"
97.Ft struct mbuf *
98.Fn m_copym "struct mbuf *mbuf" "int offset" "int len" "int how"
99.Ft struct mbuf *
100.Fn m_copypacket "struct mbuf *mbuf" "int how"
101.Ft struct mbuf *
102.Fn m_dup "struct mbuf *mbuf" "int how"
103.Ft void
104.Fn m_copydata "const struct mbuf *mbuf" "int offset" "int len" "caddr_t buf"
105.Ft void
106.Fn m_copyback "struct mbuf *mbuf" "int offset" "int len" "caddr_t buf"
107.Ft struct mbuf *
108.Fo m_devget
109.Fa "char *buf"
110.Fa "int len"
111.Fa "int offset"
112.Fa "struct ifnet *ifp"
113.Fa "void (*copy)(char *from, caddr_t to, u_int len)"
114.Fc
115.Ft void
116.Fn m_cat "struct mbuf *m" "struct mbuf *n"
117.Ft u_int
118.Fn m_fixhdr "struct mbuf *mbuf"
119.Ft void
120.Fn m_dup_pkthdr "struct mbuf *to" "struct mbuf *from"
121.Ft void
122.Fn m_move_pkthdr "struct mbuf *to" "struct mbuf *from"
123.Ft u_int
124.Fn m_length "struct mbuf *mbuf" "struct mbuf **last"
125.Ft struct mbuf *
126.Fn m_split "struct mbuf *mbuf" "int len" "int how"
127.Ft int
128.Fn m_apply "struct mbuf *mbuf" "int off" "int len" "int (*f)(void *arg, void *data, u_int len)" "void *arg"
129.Ft struct mbuf *
130.Fn m_getptr "struct mbuf *mbuf" "int loc" "int *off"
131.Ft struct mbuf *
132.Fn m_defrag "struct mbuf *m0" "int how"
133.\"
134.Sh DESCRIPTION
135An
136.Vt mbuf
137is a basic unit of memory management in the kernel IPC subsystem.
138Network packets and socket buffers are stored in
139.Vt mbufs .
140A network packet may span multiple
141.Vt mbufs
142arranged into a
143.Vt mbuf chain
144(linked list),
145which allows adding or trimming
146network headers with little overhead.
147.Pp
148While a developer should not bother with
149.Vt mbuf
150internals without serious
151reason in order to avoid incompatibilities with future changes, it
152is useful to understand the general structure of an
153.Vt mbuf .
154.Pp
155An
156.Vt mbuf
157consists of a variable-sized header and a small internal
158buffer for data.
159The total size of an
160.Vt mbuf ,
161.Dv MSIZE ,
162is a constant defined in
163.In sys/param.h .
164The
165.Vt mbuf
166header includes:
167.Pp
168.Bl -tag -width "m_nextpkt" -offset indent
169.It Va m_next
170.Pq Vt struct mbuf *
171A pointer to the next
172.Vt mbuf
173in the
174.Vt mbuf chain .
175.It Va m_nextpkt
176.Pq Vt struct mbuf *
177A pointer to the next
178.Vt mbuf chain
179in the queue.
180.It Va m_data
181.Pq Vt caddr_t
182A pointer to data attached to this
183.Vt mbuf .
184.It Va m_len
185.Pq Vt int
186The length of the data.
187.It Va m_type
188.Pq Vt short
189The type of the data.
190.It Va m_flags
191.Pq Vt int
192The
193.Vt mbuf
194flags.
195.El
196.Pp
197The
198.Vt mbuf
199flag bits are defined as follows:
200.Bd -literal
201/* mbuf flags */
202#define M_EXT 0x0001 /* has associated external storage */
203#define M_PKTHDR 0x0002 /* start of record */
204#define M_EOR 0x0004 /* end of record */
205#define M_RDONLY 0x0008 /* associated data marked read-only */
206#define M_PROTO1 0x0010 /* protocol-specific */
207#define M_PROTO2 0x0020 /* protocol-specific */
208#define M_PROTO3 0x0040 /* protocol-specific */
209#define M_PROTO4 0x0080 /* protocol-specific */
210#define M_PROTO5 0x0100 /* protocol-specific */
211#define M_PROTO6 0x4000 /* protocol-specific (avoid M_BCAST conflict) */
212#define M_FREELIST 0x8000 /* mbuf is on the free list */
213
214/* mbuf pkthdr flags (also stored in m_flags) */
215#define M_BCAST 0x0200 /* send/received as link-level broadcast */
216#define M_MCAST 0x0400 /* send/received as link-level multicast */
217#define M_FRAG 0x0800 /* packet is fragment of larger packet */
218#define M_FIRSTFRAG 0x1000 /* packet is first fragment */
219#define M_LASTFRAG 0x2000 /* packet is last fragment */
220.Ed
221.Pp
222The available
223.Vt mbuf
224types are defined as follows:
225.Bd -literal
226/* mbuf types */
227#define MT_DATA 1 /* dynamic (data) allocation */
228#define MT_HEADER 2 /* packet header */
229#define MT_SONAME 8 /* socket name */
230#define MT_FTABLE 11 /* fragment reassembly header */
231#define MT_TAG 13 /* volatile metadata associated to pkts */
232#define MT_CONTROL 14 /* extra-data protocol message */
233#define MT_OOBDATA 15 /* expedited data */
234.Ed
235.Pp
236If the
237.Dv M_PKTHDR
238flag is set, a
239.Vt struct pkthdr Va m_pkthdr
240is added to the
241.Vt mbuf
242header.
243It contains a pointer to the interface
244the packet has been received from
245.Pq Vt struct ifnet Va *rcvif ,
246and the total packet length
247.Pq Vt int Va len .
248Optionally, it may also contain an attached list of packet tags
249.Pq Vt "struct m_tag" .
250See
251.Xr mbuf_tags 9
252for details.
253Fields used in offloading checksum calculation to the hardware are kept in
254.Va m_pkthdr
255as well.
256See
257.Sx HARDWARE-ASSISTED CHECKSUM CALCULATION
258for details.
259.Pp
260If small enough, data is stored in the internal data buffer of an
261.Vt mbuf .
262If the data is sufficiently large, another
263.Vt mbuf
264may be added to the
265.Vt mbuf chain ,
266or external storage may be associated with the
267.Vt mbuf .
268.Dv MHLEN
269bytes of data can fit into an
270.Vt mbuf
271with the
272.Dv M_PKTHDR
273flag set,
274.Dv MLEN
275bytes can otherwise.
276.Pp
277If external storage is being associated with an
278.Vt mbuf ,
279the
280.Va m_ext
281header is added at the cost of losing the internal data buffer.
282It includes a pointer to external storage, the size of the storage,
283a pointer to a function used for freeing the storage,
284a pointer to an optional argument that can be passed to the function,
285and a pointer to a reference counter.
286An
287.Vt mbuf
288using external storage has the
289.Dv M_EXT
290flag set.
291.Pp
292The system supplies a macro for allocating the desired external storage
293buffer,
294.Dv MEXTADD .
295.Pp
296The allocation and management of the reference counter is handled by the
297subsystem.
298The developer can check whether the reference count for the
299external storage of a given
300.Vt mbuf
301is greater than 1 with the
302.Dv MEXT_IS_REF
303macro.
304Similarly, the developer can directly add and remove references,
305if absolutely necessary, with the use of the
306.Dv MEXT_ADD_REF
307and
308.Dv MEXT_REM_REF
309macros.
310.Pp
311The system also supplies a default type of external storage buffer called an
312.Vt mbuf cluster .
313.Vt Mbuf clusters
314can be allocated and configured with the use of the
315.Dv MCLGET
316macro.
317Each
318.Vt mbuf cluster
319is
320.Dv MCLBYTES
321in size, where MCLBYTES is a machine-dependent constant.
322The system defines an advisory macro
323.Dv MINCLSIZE ,
324which is the smallest amount of data to put into an
325.Vt mbuf cluster .
326It's equal to the sum of
327.Dv MLEN
328and
329.Dv MHLEN .
330It is typically preferable to store data into the data region of an
331.Vt mbuf ,
332if size permits, as opposed to allocating a separate
333.Vt mbuf cluster
334to hold the same data.
335.\"
336.Ss Macros and Functions
337There are numerous predefined macros and functions that provide the
338developer with common utilities.
339.\"
340.Bl -ohang -offset indent
341.It Fn mtod mbuf type
342Convert an
343.Fa mbuf
344pointer to a data pointer.
345The macro expands to the data pointer cast to the pointer of the specified
346.Fa type .
347.Sy Note :
348It is advisable to ensure that there is enough contiguous data in
349.Fa mbuf .
350See
351.Fn m_pullup
352for details.
353.It Fn MGET mbuf how type
354Allocate an
355.Vt mbuf
356and initialize it to contain internal data.
357.Fa mbuf
358will point to the allocated
359.Vt mbuf
360on success, or be set to
361.Dv NULL
362on failure.
363The
364.Fa how
365argument is to be set to
366.Dv M_TRYWAIT
367or
368.Dv M_DONTWAIT .
369It specifies whether the caller is willing to block if necessary.
370If
371.Fa how
372is set to
373.Dv M_TRYWAIT ,
374a failed allocation will result in the caller being put
375to sleep for a designated
376kern.ipc.mbuf_wait
377.Xr ( sysctl 8
378tunable)
379number of ticks.
380A number of other functions and macros related to
381.Vt mbufs
382have the same argument because they may
383at some point need to allocate new
384.Vt mbufs .
385.Pp
386Programmers should be careful not to confuse the
387.Vt mbuf
388allocation flag
389.Dv M_DONTWAIT
390with the
391.Xr malloc 9
392allocation flag,
393.Dv M_NOWAIT .
394They are not the same.
395.It Fn MGETHDR mbuf how type
396Allocate an
397.Vt mbuf
398and initialize it to contain a packet header
399and internal data.
400See
401.Fn MGET
402for details.
403.It Fn MCLGET mbuf how
404Allocate and attach an
405.Vt mbuf cluster
406to
407.Fa mbuf .
408If the macro fails, the
409.Dv M_EXT
410flag won't be set in
411.Fa mbuf .
412.It Fn M_ALIGN mbuf len
413Set the pointer
414.Fa mbuf->m_data
415to place an object of the size
416.Fa len
417at the end of the internal data area of
418.Fa mbuf ,
419long word aligned.
420Applicable only if
421.Fa mbuf
422is newly allocated with
423.Fn MGET
424or
425.Fn m_get .
426.It Fn MH_ALIGN mbuf len
427Serves the same purpose as
428.Fn M_ALIGN
429does, but only for
430.Fa mbuf
431newly allocated with
432.Fn MGETHDR
433or
434.Fn m_gethdr ,
435or initialized by
436.Fn m_dup_pkthdr
437or
438.Fn m_move_pkthdr .
439.It Fn M_LEADINGSPACE mbuf
440Returns the number of bytes available before the beginning
441of data in
442.Fa mbuf .
443.It Fn M_TRAILINGSPACE mbuf
444Returns the number of bytes available after the end of data in
445.Fa mbuf .
446.It Fn M_PREPEND mbuf len how
447This macro operates on an
448.Vt mbuf chain .
449It is an optimized wrapper for
450.Fn m_prepend
451that can make use of possible empty space before data
26.\"
27.Dd May 20, 2004
28.Dt MBUF 9
29.Os
30.\"
31.Sh NAME
32.Nm mbuf
33.Nd "memory management in the kernel IPC subsystem"
34.\"
35.Sh SYNOPSIS
36.In sys/param.h
37.In sys/systm.h
38.In sys/mbuf.h
39.\"
40.Ss Mbuf allocation macros
41.Fn MGET "struct mbuf *mbuf" "int how" "short type"
42.Fn MGETHDR "struct mbuf *mbuf" "int how" "short type"
43.Fn MCLGET "struct mbuf *mbuf" "int how"
44.Fo MEXTADD
45.Fa "struct mbuf *mbuf"
46.Fa "caddr_t buf"
47.Fa "u_int size"
48.Fa "void (*free)(void *opt_args)"
49.Fa "void *opt_args"
50.Fa "short flags"
51.Fa "int type"
52.Fc
53.Fn MEXTFREE "struct mbuf *mbuf"
54.Fn MEXT_ADD_REF "struct mbuf *mbuf"
55.Fn MEXT_REM_REF "struct mbuf *mbuf"
56.Fn MFREE "struct mbuf *mbuf" "struct mbuf *successor"
57.\"
58.Ss Mbuf utility macros
59.Fn mtod "struct mbuf *mbuf" "type"
60.Ft int
61.Fn MEXT_IS_REF "struct mbuf *mbuf"
62.Fn M_ALIGN "struct mbuf *mbuf" "u_int len"
63.Fn MH_ALIGN "struct mbuf *mbuf" "u_int len"
64.Ft int
65.Fn M_LEADINGSPACE "struct mbuf *mbuf"
66.Ft int
67.Fn M_TRAILINGSPACE "struct mbuf *mbuf"
68.Fn M_MOVE_PKTHDR "struct mbuf *to" "struct mbuf *from"
69.Fn M_PREPEND "struct mbuf *mbuf" "int len" "int how"
70.Fn MCHTYPE "struct mbuf *mbuf" "u_int type"
71.Ft int
72.Fn M_WRITABLE "struct mbuf *mbuf"
73.\"
74.Ss Mbuf allocation functions
75.Ft struct mbuf *
76.Fn m_get "int how" "int type"
77.Ft struct mbuf *
78.Fn m_getm "struct mbuf *orig" "int len" "int how" "int type"
79.Ft struct mbuf *
80.Fn m_getcl "int how" "short type" "int flags"
81.Ft struct mbuf *
82.Fn m_getclr "int how" "int type"
83.Ft struct mbuf *
84.Fn m_gethdr "int how" "int type"
85.Ft struct mbuf *
86.Fn m_free "struct mbuf *mbuf"
87.Ft void
88.Fn m_freem "struct mbuf *mbuf"
89.\"
90.Ss Mbuf utility functions
91.Ft void
92.Fn m_adj "struct mbuf *mbuf" "int len"
93.Ft struct mbuf *
94.Fn m_prepend "struct mbuf *mbuf" "int len" "int how"
95.Ft struct mbuf *
96.Fn m_pullup "struct mbuf *mbuf" "int len"
97.Ft struct mbuf *
98.Fn m_copym "struct mbuf *mbuf" "int offset" "int len" "int how"
99.Ft struct mbuf *
100.Fn m_copypacket "struct mbuf *mbuf" "int how"
101.Ft struct mbuf *
102.Fn m_dup "struct mbuf *mbuf" "int how"
103.Ft void
104.Fn m_copydata "const struct mbuf *mbuf" "int offset" "int len" "caddr_t buf"
105.Ft void
106.Fn m_copyback "struct mbuf *mbuf" "int offset" "int len" "caddr_t buf"
107.Ft struct mbuf *
108.Fo m_devget
109.Fa "char *buf"
110.Fa "int len"
111.Fa "int offset"
112.Fa "struct ifnet *ifp"
113.Fa "void (*copy)(char *from, caddr_t to, u_int len)"
114.Fc
115.Ft void
116.Fn m_cat "struct mbuf *m" "struct mbuf *n"
117.Ft u_int
118.Fn m_fixhdr "struct mbuf *mbuf"
119.Ft void
120.Fn m_dup_pkthdr "struct mbuf *to" "struct mbuf *from"
121.Ft void
122.Fn m_move_pkthdr "struct mbuf *to" "struct mbuf *from"
123.Ft u_int
124.Fn m_length "struct mbuf *mbuf" "struct mbuf **last"
125.Ft struct mbuf *
126.Fn m_split "struct mbuf *mbuf" "int len" "int how"
127.Ft int
128.Fn m_apply "struct mbuf *mbuf" "int off" "int len" "int (*f)(void *arg, void *data, u_int len)" "void *arg"
129.Ft struct mbuf *
130.Fn m_getptr "struct mbuf *mbuf" "int loc" "int *off"
131.Ft struct mbuf *
132.Fn m_defrag "struct mbuf *m0" "int how"
133.\"
134.Sh DESCRIPTION
135An
136.Vt mbuf
137is a basic unit of memory management in the kernel IPC subsystem.
138Network packets and socket buffers are stored in
139.Vt mbufs .
140A network packet may span multiple
141.Vt mbufs
142arranged into a
143.Vt mbuf chain
144(linked list),
145which allows adding or trimming
146network headers with little overhead.
147.Pp
148While a developer should not bother with
149.Vt mbuf
150internals without serious
151reason in order to avoid incompatibilities with future changes, it
152is useful to understand the general structure of an
153.Vt mbuf .
154.Pp
155An
156.Vt mbuf
157consists of a variable-sized header and a small internal
158buffer for data.
159The total size of an
160.Vt mbuf ,
161.Dv MSIZE ,
162is a constant defined in
163.In sys/param.h .
164The
165.Vt mbuf
166header includes:
167.Pp
168.Bl -tag -width "m_nextpkt" -offset indent
169.It Va m_next
170.Pq Vt struct mbuf *
171A pointer to the next
172.Vt mbuf
173in the
174.Vt mbuf chain .
175.It Va m_nextpkt
176.Pq Vt struct mbuf *
177A pointer to the next
178.Vt mbuf chain
179in the queue.
180.It Va m_data
181.Pq Vt caddr_t
182A pointer to data attached to this
183.Vt mbuf .
184.It Va m_len
185.Pq Vt int
186The length of the data.
187.It Va m_type
188.Pq Vt short
189The type of the data.
190.It Va m_flags
191.Pq Vt int
192The
193.Vt mbuf
194flags.
195.El
196.Pp
197The
198.Vt mbuf
199flag bits are defined as follows:
200.Bd -literal
201/* mbuf flags */
202#define M_EXT 0x0001 /* has associated external storage */
203#define M_PKTHDR 0x0002 /* start of record */
204#define M_EOR 0x0004 /* end of record */
205#define M_RDONLY 0x0008 /* associated data marked read-only */
206#define M_PROTO1 0x0010 /* protocol-specific */
207#define M_PROTO2 0x0020 /* protocol-specific */
208#define M_PROTO3 0x0040 /* protocol-specific */
209#define M_PROTO4 0x0080 /* protocol-specific */
210#define M_PROTO5 0x0100 /* protocol-specific */
211#define M_PROTO6 0x4000 /* protocol-specific (avoid M_BCAST conflict) */
212#define M_FREELIST 0x8000 /* mbuf is on the free list */
213
214/* mbuf pkthdr flags (also stored in m_flags) */
215#define M_BCAST 0x0200 /* send/received as link-level broadcast */
216#define M_MCAST 0x0400 /* send/received as link-level multicast */
217#define M_FRAG 0x0800 /* packet is fragment of larger packet */
218#define M_FIRSTFRAG 0x1000 /* packet is first fragment */
219#define M_LASTFRAG 0x2000 /* packet is last fragment */
220.Ed
221.Pp
222The available
223.Vt mbuf
224types are defined as follows:
225.Bd -literal
226/* mbuf types */
227#define MT_DATA 1 /* dynamic (data) allocation */
228#define MT_HEADER 2 /* packet header */
229#define MT_SONAME 8 /* socket name */
230#define MT_FTABLE 11 /* fragment reassembly header */
231#define MT_TAG 13 /* volatile metadata associated to pkts */
232#define MT_CONTROL 14 /* extra-data protocol message */
233#define MT_OOBDATA 15 /* expedited data */
234.Ed
235.Pp
236If the
237.Dv M_PKTHDR
238flag is set, a
239.Vt struct pkthdr Va m_pkthdr
240is added to the
241.Vt mbuf
242header.
243It contains a pointer to the interface
244the packet has been received from
245.Pq Vt struct ifnet Va *rcvif ,
246and the total packet length
247.Pq Vt int Va len .
248Optionally, it may also contain an attached list of packet tags
249.Pq Vt "struct m_tag" .
250See
251.Xr mbuf_tags 9
252for details.
253Fields used in offloading checksum calculation to the hardware are kept in
254.Va m_pkthdr
255as well.
256See
257.Sx HARDWARE-ASSISTED CHECKSUM CALCULATION
258for details.
259.Pp
260If small enough, data is stored in the internal data buffer of an
261.Vt mbuf .
262If the data is sufficiently large, another
263.Vt mbuf
264may be added to the
265.Vt mbuf chain ,
266or external storage may be associated with the
267.Vt mbuf .
268.Dv MHLEN
269bytes of data can fit into an
270.Vt mbuf
271with the
272.Dv M_PKTHDR
273flag set,
274.Dv MLEN
275bytes can otherwise.
276.Pp
277If external storage is being associated with an
278.Vt mbuf ,
279the
280.Va m_ext
281header is added at the cost of losing the internal data buffer.
282It includes a pointer to external storage, the size of the storage,
283a pointer to a function used for freeing the storage,
284a pointer to an optional argument that can be passed to the function,
285and a pointer to a reference counter.
286An
287.Vt mbuf
288using external storage has the
289.Dv M_EXT
290flag set.
291.Pp
292The system supplies a macro for allocating the desired external storage
293buffer,
294.Dv MEXTADD .
295.Pp
296The allocation and management of the reference counter is handled by the
297subsystem.
298The developer can check whether the reference count for the
299external storage of a given
300.Vt mbuf
301is greater than 1 with the
302.Dv MEXT_IS_REF
303macro.
304Similarly, the developer can directly add and remove references,
305if absolutely necessary, with the use of the
306.Dv MEXT_ADD_REF
307and
308.Dv MEXT_REM_REF
309macros.
310.Pp
311The system also supplies a default type of external storage buffer called an
312.Vt mbuf cluster .
313.Vt Mbuf clusters
314can be allocated and configured with the use of the
315.Dv MCLGET
316macro.
317Each
318.Vt mbuf cluster
319is
320.Dv MCLBYTES
321in size, where MCLBYTES is a machine-dependent constant.
322The system defines an advisory macro
323.Dv MINCLSIZE ,
324which is the smallest amount of data to put into an
325.Vt mbuf cluster .
326It's equal to the sum of
327.Dv MLEN
328and
329.Dv MHLEN .
330It is typically preferable to store data into the data region of an
331.Vt mbuf ,
332if size permits, as opposed to allocating a separate
333.Vt mbuf cluster
334to hold the same data.
335.\"
336.Ss Macros and Functions
337There are numerous predefined macros and functions that provide the
338developer with common utilities.
339.\"
340.Bl -ohang -offset indent
341.It Fn mtod mbuf type
342Convert an
343.Fa mbuf
344pointer to a data pointer.
345The macro expands to the data pointer cast to the pointer of the specified
346.Fa type .
347.Sy Note :
348It is advisable to ensure that there is enough contiguous data in
349.Fa mbuf .
350See
351.Fn m_pullup
352for details.
353.It Fn MGET mbuf how type
354Allocate an
355.Vt mbuf
356and initialize it to contain internal data.
357.Fa mbuf
358will point to the allocated
359.Vt mbuf
360on success, or be set to
361.Dv NULL
362on failure.
363The
364.Fa how
365argument is to be set to
366.Dv M_TRYWAIT
367or
368.Dv M_DONTWAIT .
369It specifies whether the caller is willing to block if necessary.
370If
371.Fa how
372is set to
373.Dv M_TRYWAIT ,
374a failed allocation will result in the caller being put
375to sleep for a designated
376kern.ipc.mbuf_wait
377.Xr ( sysctl 8
378tunable)
379number of ticks.
380A number of other functions and macros related to
381.Vt mbufs
382have the same argument because they may
383at some point need to allocate new
384.Vt mbufs .
385.Pp
386Programmers should be careful not to confuse the
387.Vt mbuf
388allocation flag
389.Dv M_DONTWAIT
390with the
391.Xr malloc 9
392allocation flag,
393.Dv M_NOWAIT .
394They are not the same.
395.It Fn MGETHDR mbuf how type
396Allocate an
397.Vt mbuf
398and initialize it to contain a packet header
399and internal data.
400See
401.Fn MGET
402for details.
403.It Fn MCLGET mbuf how
404Allocate and attach an
405.Vt mbuf cluster
406to
407.Fa mbuf .
408If the macro fails, the
409.Dv M_EXT
410flag won't be set in
411.Fa mbuf .
412.It Fn M_ALIGN mbuf len
413Set the pointer
414.Fa mbuf->m_data
415to place an object of the size
416.Fa len
417at the end of the internal data area of
418.Fa mbuf ,
419long word aligned.
420Applicable only if
421.Fa mbuf
422is newly allocated with
423.Fn MGET
424or
425.Fn m_get .
426.It Fn MH_ALIGN mbuf len
427Serves the same purpose as
428.Fn M_ALIGN
429does, but only for
430.Fa mbuf
431newly allocated with
432.Fn MGETHDR
433or
434.Fn m_gethdr ,
435or initialized by
436.Fn m_dup_pkthdr
437or
438.Fn m_move_pkthdr .
439.It Fn M_LEADINGSPACE mbuf
440Returns the number of bytes available before the beginning
441of data in
442.Fa mbuf .
443.It Fn M_TRAILINGSPACE mbuf
444Returns the number of bytes available after the end of data in
445.Fa mbuf .
446.It Fn M_PREPEND mbuf len how
447This macro operates on an
448.Vt mbuf chain .
449It is an optimized wrapper for
450.Fn m_prepend
451that can make use of possible empty space before data
452(e.g. left after trimming of a link-layer header).
452(e.g.\& left after trimming of a link-layer header).
453The new
454.Vt mbuf chain
455pointer or
456.Dv NULL
457is in
458.Fa mbuf
459after the call.
460.It Fn M_MOVE_PKTHDR to from
461Using this macro is equivalent to calling
462.Fn m_move_pkthdr to from .
463.It Fn M_WRITABLE mbuf
464This macro will evaluate true if
465.Fa mbuf
466is not marked
467.Dv M_RDONLY
468and if either
469.Fa mbuf
470does not contain external storage or,
471if it does,
472then if the reference count of the storage is not greater than 1.
473The
474.Dv M_RDONLY
475flag can be set in
476.Fa mbuf->m_flags .
477This can be achieved during setup of the external storage,
478by passing the
479.Dv M_RDONLY
480bit as a
481.Fa flags
482argument to the
483.Fn MEXTADD
484macro, or can be directly set in individual
485.Vt mbufs .
486.It Fn MCHTYPE mbuf type
487Change the type of
488.Fa mbuf
489to
490.Fa type .
491This is a relatively expensive operation and should be avoided.
492.El
493.Pp
494The functions are:
495.Bl -ohang -offset indent
496.It Fn m_get how type
497A function version of
498.Fn MGET
499for non-critical paths.
500.It Fn m_getm orig len how type
501Allocate
502.Fa len
503bytes worth of
504.Vt mbufs
505and
506.Vt mbuf clusters
507if necessary and append the resulting allocated
508.Vt mbuf chain
509to the
510.Vt mbuf chain
511.Fa orig ,
512if it is
513.No non- Ns Dv NULL .
514If the allocation fails at any point,
515free whatever was allocated and return
516.Dv NULL .
517If
518.Fa orig
519is
520.No non- Ns Dv NULL ,
521it will not be freed.
522It is possible to use
523.Fn m_getm
524to either append
525.Fa len
526bytes to an existing
527.Vt mbuf
528or
529.Vt mbuf chain
530(for example, one which may be sitting in a pre-allocated ring)
531or to simply perform an all-or-nothing
532.Vt mbuf
533and
534.Vt mbuf cluster
535allocation.
536.It Fn m_gethdr how type
537A function version of
538.Fn MGETHDR
539for non-critical paths.
540.It Fn m_getcl how type flags
541Fetch an
542.Vt mbuf
543with a
544.Vt mbuf cluster
545attached to it.
546If one of the allocations fails, the entire allocation fails.
547This routine is the preferred way of fetching both the
548.Vt mbuf
549and
550.Vt mbuf cluster
551together, as it avoids having to unlock/relock between allocations.
552Returns
553.Dv NULL
554on failure.
555.It Fn m_getclr how type
556Allocate an
557.Vt mbuf
558and zero out the data region.
559.It Fn m_free mbuf
560Frees
561.Vt mbuf .
562.El
563.Pp
564The functions below operate on
565.Vt mbuf chains .
566.Bl -ohang -offset indent
567.It Fn m_freem mbuf
568Free an entire
569.Vt mbuf chain ,
570including any external storage.
571.\"
572.It Fn m_adj mbuf len
573Trim
574.Fa len
575bytes from the head of an
576.Vt mbuf chain
577if
578.Fa len
579is positive, from the tail otherwise.
580.\"
581.It Fn m_prepend mbuf len how
582Allocate a new
583.Vt mbuf
584and prepend it to the
585.Vt mbuf chain ,
586handle
587.Dv M_PKTHDR
588properly.
589.Sy Note :
590It doesn't allocate any
591.Vt mbuf clusters ,
592so
593.Fa len
594must be less than
595.Dv MLEN
596or
597.Dv MHLEN ,
598depending on the
599.Dv M_PKTHDR
600flag setting.
601.\"
602.It Fn m_pullup mbuf len
603Arrange that the first
604.Fa len
605bytes of an
606.Vt mbuf chain
607are contiguous and lay in the data area of
608.Fa mbuf ,
609so they are accessible with
610.Fn mtod mbuf type .
611Return the new
612.Vt mbuf chain
613on success,
614.Dv NULL
615on failure
616(the
617.Vt mbuf chain
618is freed in this case).
619.Sy Note :
620It doesn't allocate any
621.Vt mbuf clusters ,
622so
623.Fa len
624must be less than
625.Dv MHLEN .
626.\"
627.It Fn m_copym mbuf offset len how
628Make a copy of an
629.Vt mbuf chain
630starting
631.Fa offset
632bytes from the beginning, continuing for
633.Fa len
634bytes.
635If
636.Fa len
637is
638.Dv M_COPYALL ,
639copy to the end of the
640.Vt mbuf chain .
641.Sy Note :
642The copy is read-only, because the
643.Vt mbuf clusters
644are not copied, only their reference counts are incremented.
645.\"
646.It Fn m_copypacket mbuf how
647Copy an entire packet including header, which must be present.
648This is an optimized version of the common case
649.Fn m_copym mbuf 0 M_COPYALL how .
650.Sy Note :
651the copy is read-only, because the
652.Vt mbuf clusters
653are not copied, only their reference counts are incremented.
654.\"
655.It Fn m_dup mbuf how
656Copy a packet header
657.Vt mbuf chain
658into a completely new
659.Vt mbuf chain ,
660including copying any
661.Vt mbuf clusters .
662Use this instead of
663.Fn m_copypacket
664when you need a writable copy of an
665.Vt mbuf chain .
666.\"
667.It Fn m_copydata mbuf offset len buf
668Copy data from an
669.Vt mbuf chain
670starting
671.Fa off
672bytes from the beginning, continuing for
673.Fa len
674bytes, into the indicated buffer
675.Fa buf .
676.\"
677.It Fn m_copyback mbuf offset len buf
678Copy
679.Fa len
680bytes from the buffer
681.Fa buf
682back into the indicated
683.Vt mbuf chain ,
684starting at
685.Fa offset
686bytes from the beginning of the
687.Vt mbuf chain ,
688extending the
689.Vt mbuf chain
690if necessary.
691.Sy Note :
692It doesn't allocate any
693.Vt mbuf clusters ,
694just adds
695.Vt mbufs
696to the
697.Vt mbuf chain .
698It's safe to set
699.Fa offset
700beyond the current
701.Vt mbuf chain
702end: zeroed
703.Vt mbufs
704will be allocated to fill the space.
705.\"
706.It Fn m_length mbuf last
707Return the length of the
708.Vt mbuf chain ,
709and optionally a pointer to the last
710.Vt mbuf .
711.\"
712.It Fn m_dup_pkthdr to from how
713Upon the function's completion, the
714.Vt mbuf
715.Fa to
716will contain an identical copy of
717.Fa from->m_pkthdr
718and the per-packet attributes found in the
719.Vt mbuf chain
720.Fa from .
721The
722.Vt mbuf
723.Fa from
724must have the flag
725.Dv M_PKTHDR
726initially set, and
727.Fa to
728must be empty on entry.
729.\"
730.It Fn m_move_pkthdr to from
731Move
732.Va m_pkthdr
733and the per-packet attributes from the
734.Vt mbuf chain
735.Fa from
736to the
737.Vt mbuf
738.Fa to .
739The
740.Vt mbuf
741.Fa from
742must have the flag
743.Dv M_PKTHDR
744initially set, and
745.Fa to
746must be empty on entry.
747Upon the function's completion,
748.Fa from
749will have the flag
750.Dv M_PKTHDR
751and the per-packet attributes cleared.
752.\"
753.It Fn m_fixhdr mbuf
754Set the packet-header length to the length of the
755.Vt mbuf chain .
756.\"
757.It Fn m_devget buf len offset ifp copy
758Copy data from a device local memory pointed to by
759.Fa buf
760to an
761.Vt mbuf chain .
762The copy is done using a specified copy routine
763.Fa copy ,
764or
765.Fn bcopy
766if
767.Fa copy
768is
769.Dv NULL .
770.\"
771.It Fn m_cat m n
772Concatenate
773.Fa n
774to
775.Fa m .
776Both
777.Vt mbuf chains
778must be of the same type.
779.Fa N
780is still valid after the function returned.
781.Sy Note :
782It does not handle
783.Dv M_PKTHDR
784and friends.
785.\"
786.It Fn m_split mbuf len how
787Partition an
788.Vt mbuf chain
789in two pieces, returning the tail:
790all but the first
791.Fa len
792bytes.
793In case of failure, it returns
794.Dv NULL
795and attempts to restore the
796.Vt mbuf chain
797to its original state.
798.\"
799.It Fn m_apply mbuf off len f arg
800Apply a function to an
801.Vt mbuf chain ,
802at offset
803.Fa off ,
804for length
805.Fa len
806bytes.
807Typically used to avoid calls to
808.Fn m_pullup
809which would otherwise be unnecessary or undesirable.
810.Fa arg
811is a convenience argument which is passed to the callback function
812.Fa f .
813.Pp
814Each time
815.Fn f
816is called, it will be passed
817.Fa arg ,
818a pointer to the
819.Fa data
820in the current mbuf, and the length
821.Fa len
822of the data in this mbuf to which the function should be applied.
823.Pp
824The function should return zero to indicate success;
825otherwise, if an error is indicated, then
826.Fn m_apply
827will return the error and stop iterating through the
828.Vt mbuf chain .
829.\"
830.It Fn m_getptr mbuf loc off
831Return a pointer to the mbuf containing the data located at
832.Fa loc
833bytes from the beginning of the
834.Vt mbuf chain .
835The corresponding offset into the mbuf will be stored in
836.Fa *off .
837.It Fn m_defrag m0 how
838Defragment an mbuf chain, returning the shortest possible
839chain of mbufs and clusters.
840If allocation fails and this can not be completed,
841.Dv NULL
842will be returned and the original chain will be unchanged.
843Upon success, the original chain will be freed and the new
844chain will be returned.
845.Fa how
846should be either
847.Dv M_TRYWAIT
848or
849.Dv M_DONTWAIT ,
850depending on the caller's preference.
851.Pp
852This function is especially useful in network drivers, where
853certain long mbuf chains must be shortened before being added
854to TX descriptor lists.
855.El
856.Sh HARDWARE-ASSISTED CHECKSUM CALCULATION
857This section currently applies to TCP/IP only.
858In order to save the host CPU resources, computing checksums is
859offloaded to the network interface hardware if possible.
860The
861.Va m_pkthdr
862member of the leading
863.Vt mbuf
864of a packet contains two fields used for that purpose,
865.Vt int Va csum_flags
866and
867.Vt int Va csum_data .
868The meaning of those fields depends on the direction a packet flows in,
869and on whether the packet is fragmented.
870Henceforth,
871.Va csum_flags
872or
873.Va csum_data
874of a packet
875will denote the corresponding field of the
876.Va m_pkthdr
877member of the leading
878.Vt mbuf
879in the
880.Vt mbuf chain
881containing the packet.
882.Pp
883On output, checksum offloading is attempted after the outgoing
884interface has been determined for a packet.
885The interface-specific field
886.Va ifnet.if_data.ifi_hwassist
887(see
888.Xr ifnet 9 )
889is consulted for the capabilities of the interface to assist in
890computing checksums.
891The
892.Va csum_flags
893field of the packet header is set to indicate which actions the interface
894is supposed to perform on it.
895The actions unsupported by the network interface are done in the
896software prior to passing the packet down to the interface driver;
897such actions will never be requested through
898.Va csum_flags .
899.Pp
900The flags demanding a particular action from an interface are as follows:
901.Bl -tag -width ".Dv CSUM_TCP" -offset indent
902.It Dv CSUM_IP
903The IP header checksum is to be computed and stored in the
904corresponding field of the packet.
905The hardware is expected to know the format of an IP header
906to determine the offset of the IP checksum field.
907.It Dv CSUM_TCP
908The TCP checksum is to be computed.
909(See below.)
910.It Dv CSUM_UDP
911The UDP checksum is to be computed.
912(See below.)
913.El
914.Pp
915Should a TCP or UDP checksum be offloaded to the hardware,
916the field
917.Va csum_data
918will contain the byte offset of the checksum field relative to the
919end of the IP header.
920In this case, the checksum field will be initially
921set by the TCP/IP module to the checksum of the pseudo header
922defined by the TCP and UDP specifications.
923.Pp
924For outbound packets which have been fragmented
925by the host CPU, the following will also be true,
926regardless of the checksum flag settings:
927.Bl -bullet -offset indent
928.It
929all fragments will have the flag
930.Dv M_FRAG
931set in their
932.Va m_flags
933field;
934.It
935the first and the last fragments in the chain will have
936.Dv M_FIRSTFRAG
937or
938.Dv M_LASTFRAG
939set in their
940.Va m_flags ,
941correspondingly;
942.It
943the first fragment in the chain will have the total number
944of fragments contained in its
945.Va csum_data
946field.
947.El
948.Pp
949The last rule for fragmented packets takes precedence over the one
950for a TCP or UDP checksum.
951Nevertheless, offloading a TCP or UDP checksum is possible for a
952fragmented packet if the flag
953.Dv CSUM_IP_FRAGS
954is set in the field
955.Va ifnet.if_data.ifi_hwassist
956associated with the network interface.
957However, in this case the interface is expected to figure out
958the location of the checksum field within the sequence of fragments
959by itself because
960.Va csum_data
961contains a fragment count instead of a checksum offset value.
962.Pp
963On input, an interface indicates the actions it has performed
964on a packet by setting one or more of the following flags in
965.Va csum_flags
966associated with the packet:
967.Bl -tag -width ".Dv CSUM_IP_CHECKED" -offset indent
968.It Dv CSUM_IP_CHECKED
969The IP header checksum has been computed.
970.It Dv CSUM_IP_VALID
971The IP header has a valid checksum.
972This flag can appear only in combination with
973.Dv CSUM_IP_CHECKED .
974.It Dv CSUM_DATA_VALID
975The checksum of the data portion of the IP packet has been computed
976and stored in the field
977.Va csum_data
978in network byte order.
979.It Dv CSUM_PSEUDO_HDR
980Can be set only along with
981.Dv CSUM_DATA_VALID
982to indicate that the IP data checksum found in
983.Va csum_data
984allows for the pseudo header defined by the TCP and UDP specifications.
985Otherwise the checksum of the pseudo header must be calculated by
986the host CPU and added to
987.Va csum_data
988to obtain the final checksum to be used for TCP or UDP validation purposes.
989.El
990.Pp
991If a particular network interface just indicates success or
992failure of TCP or UDP checksum validation without returning
993the exact value of the checksum to the host CPU, its driver can mark
994.Dv CSUM_DATA_VALID
995and
996.Dv CSUM_PSEUDO_HDR
997in
998.Va csum_flags ,
999and set
1000.Va csum_data
1001to
1002.Li 0xFFFF
1003hexadecimal to indicate a valid checksum.
1004It is a peculiarity of the algorithm used that the Internet checksum
1005calculated over any valid packet will be
1006.Li 0xFFFF
1007as long as the original checksum field is included.
1008.Pp
1009For inbound packets which are IP fragments, all
1010.Va csum_data
1011fields will be summed during reassembly to obtain the final checksum
1012value passed to an upper layer in the
1013.Va csum_data
1014field of the reassembled packet.
1015The
1016.Va csum_flags
1017fields of all fragments will be consolidated using logical AND
1018to obtain the final value for
1019.Va csum_flags .
1020Thus, in order to successfully
1021offload checksum computation for fragmented data,
1022all fragments should have the same value of
1023.Va csum_flags .
1024.Sh STRESS TESTING
1025When running a kernel compiled with the option
1026.Dv MBUF_STRESS_TEST ,
1027the following
1028.Xr sysctl 8 Ns
1029-controlled options may be used to create
1030various failure/extreme cases for testing of network drivers
1031and other parts of the kernel that rely on
1032.Vt mbufs .
1033.Bl -tag -width ident
1034.It Va net.inet.ip.mbuf_frag_size
1035Causes
1036.Fn ip_output
1037to fragment outgoing
1038.Vt mbuf chains
1039into fragments of the specified size.
1040Setting this variable to 1 is an excellent way to
1041test the long
1042.Vt mbuf chain
1043handling ability of network drivers.
1044.It Va kern.ipc.m_defragrandomfailures
1045Causes the function
1046.Fn m_defrag
1047to randomly fail, returning
1048.Dv NULL .
1049Any piece of code which uses
1050.Fn m_defrag
1051should be tested with this feature.
1052.El
1053.Sh RETURN VALUES
1054See above.
1055.Sh SEE ALSO
1056.Xr ifnet 9 ,
1057.Xr mbuf_tags 9
1058.Sh HISTORY
1059.\" Please correct me if I'm wrong
1060.Vt Mbufs
1061appeared in an early version of
1062.Bx .
1063Besides being used for network packets, they were used
1064to store various dynamic structures, such as routing table
1065entries, interface addresses, protocol control blocks, etc.
1066.Sh AUTHORS
1067The original
1068.Nm
1069man page was written by Yar Tikhiy.
453The new
454.Vt mbuf chain
455pointer or
456.Dv NULL
457is in
458.Fa mbuf
459after the call.
460.It Fn M_MOVE_PKTHDR to from
461Using this macro is equivalent to calling
462.Fn m_move_pkthdr to from .
463.It Fn M_WRITABLE mbuf
464This macro will evaluate true if
465.Fa mbuf
466is not marked
467.Dv M_RDONLY
468and if either
469.Fa mbuf
470does not contain external storage or,
471if it does,
472then if the reference count of the storage is not greater than 1.
473The
474.Dv M_RDONLY
475flag can be set in
476.Fa mbuf->m_flags .
477This can be achieved during setup of the external storage,
478by passing the
479.Dv M_RDONLY
480bit as a
481.Fa flags
482argument to the
483.Fn MEXTADD
484macro, or can be directly set in individual
485.Vt mbufs .
486.It Fn MCHTYPE mbuf type
487Change the type of
488.Fa mbuf
489to
490.Fa type .
491This is a relatively expensive operation and should be avoided.
492.El
493.Pp
494The functions are:
495.Bl -ohang -offset indent
496.It Fn m_get how type
497A function version of
498.Fn MGET
499for non-critical paths.
500.It Fn m_getm orig len how type
501Allocate
502.Fa len
503bytes worth of
504.Vt mbufs
505and
506.Vt mbuf clusters
507if necessary and append the resulting allocated
508.Vt mbuf chain
509to the
510.Vt mbuf chain
511.Fa orig ,
512if it is
513.No non- Ns Dv NULL .
514If the allocation fails at any point,
515free whatever was allocated and return
516.Dv NULL .
517If
518.Fa orig
519is
520.No non- Ns Dv NULL ,
521it will not be freed.
522It is possible to use
523.Fn m_getm
524to either append
525.Fa len
526bytes to an existing
527.Vt mbuf
528or
529.Vt mbuf chain
530(for example, one which may be sitting in a pre-allocated ring)
531or to simply perform an all-or-nothing
532.Vt mbuf
533and
534.Vt mbuf cluster
535allocation.
536.It Fn m_gethdr how type
537A function version of
538.Fn MGETHDR
539for non-critical paths.
540.It Fn m_getcl how type flags
541Fetch an
542.Vt mbuf
543with a
544.Vt mbuf cluster
545attached to it.
546If one of the allocations fails, the entire allocation fails.
547This routine is the preferred way of fetching both the
548.Vt mbuf
549and
550.Vt mbuf cluster
551together, as it avoids having to unlock/relock between allocations.
552Returns
553.Dv NULL
554on failure.
555.It Fn m_getclr how type
556Allocate an
557.Vt mbuf
558and zero out the data region.
559.It Fn m_free mbuf
560Frees
561.Vt mbuf .
562.El
563.Pp
564The functions below operate on
565.Vt mbuf chains .
566.Bl -ohang -offset indent
567.It Fn m_freem mbuf
568Free an entire
569.Vt mbuf chain ,
570including any external storage.
571.\"
572.It Fn m_adj mbuf len
573Trim
574.Fa len
575bytes from the head of an
576.Vt mbuf chain
577if
578.Fa len
579is positive, from the tail otherwise.
580.\"
581.It Fn m_prepend mbuf len how
582Allocate a new
583.Vt mbuf
584and prepend it to the
585.Vt mbuf chain ,
586handle
587.Dv M_PKTHDR
588properly.
589.Sy Note :
590It doesn't allocate any
591.Vt mbuf clusters ,
592so
593.Fa len
594must be less than
595.Dv MLEN
596or
597.Dv MHLEN ,
598depending on the
599.Dv M_PKTHDR
600flag setting.
601.\"
602.It Fn m_pullup mbuf len
603Arrange that the first
604.Fa len
605bytes of an
606.Vt mbuf chain
607are contiguous and lay in the data area of
608.Fa mbuf ,
609so they are accessible with
610.Fn mtod mbuf type .
611Return the new
612.Vt mbuf chain
613on success,
614.Dv NULL
615on failure
616(the
617.Vt mbuf chain
618is freed in this case).
619.Sy Note :
620It doesn't allocate any
621.Vt mbuf clusters ,
622so
623.Fa len
624must be less than
625.Dv MHLEN .
626.\"
627.It Fn m_copym mbuf offset len how
628Make a copy of an
629.Vt mbuf chain
630starting
631.Fa offset
632bytes from the beginning, continuing for
633.Fa len
634bytes.
635If
636.Fa len
637is
638.Dv M_COPYALL ,
639copy to the end of the
640.Vt mbuf chain .
641.Sy Note :
642The copy is read-only, because the
643.Vt mbuf clusters
644are not copied, only their reference counts are incremented.
645.\"
646.It Fn m_copypacket mbuf how
647Copy an entire packet including header, which must be present.
648This is an optimized version of the common case
649.Fn m_copym mbuf 0 M_COPYALL how .
650.Sy Note :
651the copy is read-only, because the
652.Vt mbuf clusters
653are not copied, only their reference counts are incremented.
654.\"
655.It Fn m_dup mbuf how
656Copy a packet header
657.Vt mbuf chain
658into a completely new
659.Vt mbuf chain ,
660including copying any
661.Vt mbuf clusters .
662Use this instead of
663.Fn m_copypacket
664when you need a writable copy of an
665.Vt mbuf chain .
666.\"
667.It Fn m_copydata mbuf offset len buf
668Copy data from an
669.Vt mbuf chain
670starting
671.Fa off
672bytes from the beginning, continuing for
673.Fa len
674bytes, into the indicated buffer
675.Fa buf .
676.\"
677.It Fn m_copyback mbuf offset len buf
678Copy
679.Fa len
680bytes from the buffer
681.Fa buf
682back into the indicated
683.Vt mbuf chain ,
684starting at
685.Fa offset
686bytes from the beginning of the
687.Vt mbuf chain ,
688extending the
689.Vt mbuf chain
690if necessary.
691.Sy Note :
692It doesn't allocate any
693.Vt mbuf clusters ,
694just adds
695.Vt mbufs
696to the
697.Vt mbuf chain .
698It's safe to set
699.Fa offset
700beyond the current
701.Vt mbuf chain
702end: zeroed
703.Vt mbufs
704will be allocated to fill the space.
705.\"
706.It Fn m_length mbuf last
707Return the length of the
708.Vt mbuf chain ,
709and optionally a pointer to the last
710.Vt mbuf .
711.\"
712.It Fn m_dup_pkthdr to from how
713Upon the function's completion, the
714.Vt mbuf
715.Fa to
716will contain an identical copy of
717.Fa from->m_pkthdr
718and the per-packet attributes found in the
719.Vt mbuf chain
720.Fa from .
721The
722.Vt mbuf
723.Fa from
724must have the flag
725.Dv M_PKTHDR
726initially set, and
727.Fa to
728must be empty on entry.
729.\"
730.It Fn m_move_pkthdr to from
731Move
732.Va m_pkthdr
733and the per-packet attributes from the
734.Vt mbuf chain
735.Fa from
736to the
737.Vt mbuf
738.Fa to .
739The
740.Vt mbuf
741.Fa from
742must have the flag
743.Dv M_PKTHDR
744initially set, and
745.Fa to
746must be empty on entry.
747Upon the function's completion,
748.Fa from
749will have the flag
750.Dv M_PKTHDR
751and the per-packet attributes cleared.
752.\"
753.It Fn m_fixhdr mbuf
754Set the packet-header length to the length of the
755.Vt mbuf chain .
756.\"
757.It Fn m_devget buf len offset ifp copy
758Copy data from a device local memory pointed to by
759.Fa buf
760to an
761.Vt mbuf chain .
762The copy is done using a specified copy routine
763.Fa copy ,
764or
765.Fn bcopy
766if
767.Fa copy
768is
769.Dv NULL .
770.\"
771.It Fn m_cat m n
772Concatenate
773.Fa n
774to
775.Fa m .
776Both
777.Vt mbuf chains
778must be of the same type.
779.Fa N
780is still valid after the function returned.
781.Sy Note :
782It does not handle
783.Dv M_PKTHDR
784and friends.
785.\"
786.It Fn m_split mbuf len how
787Partition an
788.Vt mbuf chain
789in two pieces, returning the tail:
790all but the first
791.Fa len
792bytes.
793In case of failure, it returns
794.Dv NULL
795and attempts to restore the
796.Vt mbuf chain
797to its original state.
798.\"
799.It Fn m_apply mbuf off len f arg
800Apply a function to an
801.Vt mbuf chain ,
802at offset
803.Fa off ,
804for length
805.Fa len
806bytes.
807Typically used to avoid calls to
808.Fn m_pullup
809which would otherwise be unnecessary or undesirable.
810.Fa arg
811is a convenience argument which is passed to the callback function
812.Fa f .
813.Pp
814Each time
815.Fn f
816is called, it will be passed
817.Fa arg ,
818a pointer to the
819.Fa data
820in the current mbuf, and the length
821.Fa len
822of the data in this mbuf to which the function should be applied.
823.Pp
824The function should return zero to indicate success;
825otherwise, if an error is indicated, then
826.Fn m_apply
827will return the error and stop iterating through the
828.Vt mbuf chain .
829.\"
830.It Fn m_getptr mbuf loc off
831Return a pointer to the mbuf containing the data located at
832.Fa loc
833bytes from the beginning of the
834.Vt mbuf chain .
835The corresponding offset into the mbuf will be stored in
836.Fa *off .
837.It Fn m_defrag m0 how
838Defragment an mbuf chain, returning the shortest possible
839chain of mbufs and clusters.
840If allocation fails and this can not be completed,
841.Dv NULL
842will be returned and the original chain will be unchanged.
843Upon success, the original chain will be freed and the new
844chain will be returned.
845.Fa how
846should be either
847.Dv M_TRYWAIT
848or
849.Dv M_DONTWAIT ,
850depending on the caller's preference.
851.Pp
852This function is especially useful in network drivers, where
853certain long mbuf chains must be shortened before being added
854to TX descriptor lists.
855.El
856.Sh HARDWARE-ASSISTED CHECKSUM CALCULATION
857This section currently applies to TCP/IP only.
858In order to save the host CPU resources, computing checksums is
859offloaded to the network interface hardware if possible.
860The
861.Va m_pkthdr
862member of the leading
863.Vt mbuf
864of a packet contains two fields used for that purpose,
865.Vt int Va csum_flags
866and
867.Vt int Va csum_data .
868The meaning of those fields depends on the direction a packet flows in,
869and on whether the packet is fragmented.
870Henceforth,
871.Va csum_flags
872or
873.Va csum_data
874of a packet
875will denote the corresponding field of the
876.Va m_pkthdr
877member of the leading
878.Vt mbuf
879in the
880.Vt mbuf chain
881containing the packet.
882.Pp
883On output, checksum offloading is attempted after the outgoing
884interface has been determined for a packet.
885The interface-specific field
886.Va ifnet.if_data.ifi_hwassist
887(see
888.Xr ifnet 9 )
889is consulted for the capabilities of the interface to assist in
890computing checksums.
891The
892.Va csum_flags
893field of the packet header is set to indicate which actions the interface
894is supposed to perform on it.
895The actions unsupported by the network interface are done in the
896software prior to passing the packet down to the interface driver;
897such actions will never be requested through
898.Va csum_flags .
899.Pp
900The flags demanding a particular action from an interface are as follows:
901.Bl -tag -width ".Dv CSUM_TCP" -offset indent
902.It Dv CSUM_IP
903The IP header checksum is to be computed and stored in the
904corresponding field of the packet.
905The hardware is expected to know the format of an IP header
906to determine the offset of the IP checksum field.
907.It Dv CSUM_TCP
908The TCP checksum is to be computed.
909(See below.)
910.It Dv CSUM_UDP
911The UDP checksum is to be computed.
912(See below.)
913.El
914.Pp
915Should a TCP or UDP checksum be offloaded to the hardware,
916the field
917.Va csum_data
918will contain the byte offset of the checksum field relative to the
919end of the IP header.
920In this case, the checksum field will be initially
921set by the TCP/IP module to the checksum of the pseudo header
922defined by the TCP and UDP specifications.
923.Pp
924For outbound packets which have been fragmented
925by the host CPU, the following will also be true,
926regardless of the checksum flag settings:
927.Bl -bullet -offset indent
928.It
929all fragments will have the flag
930.Dv M_FRAG
931set in their
932.Va m_flags
933field;
934.It
935the first and the last fragments in the chain will have
936.Dv M_FIRSTFRAG
937or
938.Dv M_LASTFRAG
939set in their
940.Va m_flags ,
941correspondingly;
942.It
943the first fragment in the chain will have the total number
944of fragments contained in its
945.Va csum_data
946field.
947.El
948.Pp
949The last rule for fragmented packets takes precedence over the one
950for a TCP or UDP checksum.
951Nevertheless, offloading a TCP or UDP checksum is possible for a
952fragmented packet if the flag
953.Dv CSUM_IP_FRAGS
954is set in the field
955.Va ifnet.if_data.ifi_hwassist
956associated with the network interface.
957However, in this case the interface is expected to figure out
958the location of the checksum field within the sequence of fragments
959by itself because
960.Va csum_data
961contains a fragment count instead of a checksum offset value.
962.Pp
963On input, an interface indicates the actions it has performed
964on a packet by setting one or more of the following flags in
965.Va csum_flags
966associated with the packet:
967.Bl -tag -width ".Dv CSUM_IP_CHECKED" -offset indent
968.It Dv CSUM_IP_CHECKED
969The IP header checksum has been computed.
970.It Dv CSUM_IP_VALID
971The IP header has a valid checksum.
972This flag can appear only in combination with
973.Dv CSUM_IP_CHECKED .
974.It Dv CSUM_DATA_VALID
975The checksum of the data portion of the IP packet has been computed
976and stored in the field
977.Va csum_data
978in network byte order.
979.It Dv CSUM_PSEUDO_HDR
980Can be set only along with
981.Dv CSUM_DATA_VALID
982to indicate that the IP data checksum found in
983.Va csum_data
984allows for the pseudo header defined by the TCP and UDP specifications.
985Otherwise the checksum of the pseudo header must be calculated by
986the host CPU and added to
987.Va csum_data
988to obtain the final checksum to be used for TCP or UDP validation purposes.
989.El
990.Pp
991If a particular network interface just indicates success or
992failure of TCP or UDP checksum validation without returning
993the exact value of the checksum to the host CPU, its driver can mark
994.Dv CSUM_DATA_VALID
995and
996.Dv CSUM_PSEUDO_HDR
997in
998.Va csum_flags ,
999and set
1000.Va csum_data
1001to
1002.Li 0xFFFF
1003hexadecimal to indicate a valid checksum.
1004It is a peculiarity of the algorithm used that the Internet checksum
1005calculated over any valid packet will be
1006.Li 0xFFFF
1007as long as the original checksum field is included.
1008.Pp
1009For inbound packets which are IP fragments, all
1010.Va csum_data
1011fields will be summed during reassembly to obtain the final checksum
1012value passed to an upper layer in the
1013.Va csum_data
1014field of the reassembled packet.
1015The
1016.Va csum_flags
1017fields of all fragments will be consolidated using logical AND
1018to obtain the final value for
1019.Va csum_flags .
1020Thus, in order to successfully
1021offload checksum computation for fragmented data,
1022all fragments should have the same value of
1023.Va csum_flags .
1024.Sh STRESS TESTING
1025When running a kernel compiled with the option
1026.Dv MBUF_STRESS_TEST ,
1027the following
1028.Xr sysctl 8 Ns
1029-controlled options may be used to create
1030various failure/extreme cases for testing of network drivers
1031and other parts of the kernel that rely on
1032.Vt mbufs .
1033.Bl -tag -width ident
1034.It Va net.inet.ip.mbuf_frag_size
1035Causes
1036.Fn ip_output
1037to fragment outgoing
1038.Vt mbuf chains
1039into fragments of the specified size.
1040Setting this variable to 1 is an excellent way to
1041test the long
1042.Vt mbuf chain
1043handling ability of network drivers.
1044.It Va kern.ipc.m_defragrandomfailures
1045Causes the function
1046.Fn m_defrag
1047to randomly fail, returning
1048.Dv NULL .
1049Any piece of code which uses
1050.Fn m_defrag
1051should be tested with this feature.
1052.El
1053.Sh RETURN VALUES
1054See above.
1055.Sh SEE ALSO
1056.Xr ifnet 9 ,
1057.Xr mbuf_tags 9
1058.Sh HISTORY
1059.\" Please correct me if I'm wrong
1060.Vt Mbufs
1061appeared in an early version of
1062.Bx .
1063Besides being used for network packets, they were used
1064to store various dynamic structures, such as routing table
1065entries, interface addresses, protocol control blocks, etc.
1066.Sh AUTHORS
1067The original
1068.Nm
1069man page was written by Yar Tikhiy.