Deleted Added
full compact
mbuf.9 (152586) mbuf.9 (156756)
1.\" Copyright (c) 2000 FreeBSD Inc.
2.\" All rights reserved.
3.\"
4.\" Redistribution and use in source and binary forms, with or without
5.\" modification, are permitted provided that the following conditions
6.\" are met:
7.\" 1. Redistributions of source code must retain the above copyright
8.\" notice, this list of conditions and the following disclaimer.
9.\" 2. Redistributions in binary form must reproduce the above copyright
10.\" notice, this list of conditions and the following disclaimer in the
11.\" documentation and/or other materials provided with the distribution.
12.\"
13.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16.\" ARE DISCLAIMED. IN NO EVENT SHALL [your name] OR CONTRIBUTORS BE LIABLE
17.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23.\" SUCH DAMAGE.
24.\"
1.\" Copyright (c) 2000 FreeBSD Inc.
2.\" All rights reserved.
3.\"
4.\" Redistribution and use in source and binary forms, with or without
5.\" modification, are permitted provided that the following conditions
6.\" are met:
7.\" 1. Redistributions of source code must retain the above copyright
8.\" notice, this list of conditions and the following disclaimer.
9.\" 2. Redistributions in binary form must reproduce the above copyright
10.\" notice, this list of conditions and the following disclaimer in the
11.\" documentation and/or other materials provided with the distribution.
12.\"
13.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16.\" ARE DISCLAIMED. IN NO EVENT SHALL [your name] OR CONTRIBUTORS BE LIABLE
17.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23.\" SUCH DAMAGE.
24.\"
25.\" $FreeBSD: head/share/man/man9/mbuf.9 152586 2005-11-18 17:04:49Z andre $
25.\" $FreeBSD: head/share/man/man9/mbuf.9 156756 2006-03-15 21:11:11Z sam $
26.\"
26.\"
27.Dd November 18, 2005
27.Dd March 15, 2006
28.Dt MBUF 9
29.Os
30.\"
31.Sh NAME
32.Nm mbuf
33.Nd "memory management in the kernel IPC subsystem"
34.\"
35.Sh SYNOPSIS
36.In sys/param.h
37.In sys/systm.h
38.In sys/mbuf.h
39.\"
40.Ss Mbuf allocation macros
41.Fn MGET "struct mbuf *mbuf" "int how" "short type"
42.Fn MGETHDR "struct mbuf *mbuf" "int how" "short type"
43.Fn MCLGET "struct mbuf *mbuf" "int how"
44.Fo MEXTADD
45.Fa "struct mbuf *mbuf"
46.Fa "caddr_t buf"
47.Fa "u_int size"
48.Fa "void (*free)(void *opt_args)"
49.Fa "void *opt_args"
50.Fa "short flags"
51.Fa "int type"
52.Fc
53.Fn MEXTFREE "struct mbuf *mbuf"
54.Fn MFREE "struct mbuf *mbuf" "struct mbuf *successor"
55.\"
56.Ss Mbuf utility macros
57.Fn mtod "struct mbuf *mbuf" "type"
58.Fn M_ALIGN "struct mbuf *mbuf" "u_int len"
59.Fn MH_ALIGN "struct mbuf *mbuf" "u_int len"
60.Ft int
61.Fn M_LEADINGSPACE "struct mbuf *mbuf"
62.Ft int
63.Fn M_TRAILINGSPACE "struct mbuf *mbuf"
64.Fn M_MOVE_PKTHDR "struct mbuf *to" "struct mbuf *from"
65.Fn M_PREPEND "struct mbuf *mbuf" "int len" "int how"
66.Fn MCHTYPE "struct mbuf *mbuf" "u_int type"
67.Ft int
68.Fn M_WRITABLE "struct mbuf *mbuf"
69.\"
70.Ss Mbuf allocation functions
71.Ft struct mbuf *
72.Fn m_get "int how" "int type"
73.Ft struct mbuf *
74.Fn m_getm "struct mbuf *orig" "int len" "int how" "int type"
75.Ft struct mbuf *
76.Fn m_getcl "int how" "short type" "int flags"
77.Ft struct mbuf *
78.Fn m_getclr "int how" "int type"
79.Ft struct mbuf *
80.Fn m_gethdr "int how" "int type"
81.Ft struct mbuf *
82.Fn m_free "struct mbuf *mbuf"
83.Ft void
84.Fn m_freem "struct mbuf *mbuf"
85.\"
86.Ss Mbuf utility functions
87.Ft void
88.Fn m_adj "struct mbuf *mbuf" "int len"
89.Ft void
90.Fn m_align "struct mbuf *mbuf" "int len"
91.Ft int
92.Fn m_append "struct mbuf *mbuf" "int len" "c_caddr_t cp"
93.Ft struct mbuf *
94.Fn m_prepend "struct mbuf *mbuf" "int len" "int how"
95.Ft struct mbuf *
96.Fn m_copyup "struct mbuf *mbuf" "int len" "int dstoff"
97.Ft struct mbuf *
98.Fn m_pullup "struct mbuf *mbuf" "int len"
99.Ft struct mbuf *
100.Fn m_copym "struct mbuf *mbuf" "int offset" "int len" "int how"
101.Ft struct mbuf *
102.Fn m_copypacket "struct mbuf *mbuf" "int how"
103.Ft struct mbuf *
104.Fn m_dup "struct mbuf *mbuf" "int how"
105.Ft void
106.Fn m_copydata "const struct mbuf *mbuf" "int offset" "int len" "caddr_t buf"
107.Ft void
108.Fn m_copyback "struct mbuf *mbuf" "int offset" "int len" "caddr_t buf"
109.Ft struct mbuf *
110.Fo m_devget
111.Fa "char *buf"
112.Fa "int len"
113.Fa "int offset"
114.Fa "struct ifnet *ifp"
115.Fa "void (*copy)(char *from, caddr_t to, u_int len)"
116.Fc
117.Ft void
118.Fn m_cat "struct mbuf *m" "struct mbuf *n"
119.Ft u_int
120.Fn m_fixhdr "struct mbuf *mbuf"
121.Ft void
122.Fn m_dup_pkthdr "struct mbuf *to" "struct mbuf *from"
123.Ft void
124.Fn m_move_pkthdr "struct mbuf *to" "struct mbuf *from"
125.Ft u_int
126.Fn m_length "struct mbuf *mbuf" "struct mbuf **last"
127.Ft struct mbuf *
128.Fn m_split "struct mbuf *mbuf" "int len" "int how"
129.Ft int
130.Fn m_apply "struct mbuf *mbuf" "int off" "int len" "int (*f)(void *arg, void *data, u_int len)" "void *arg"
131.Ft struct mbuf *
132.Fn m_getptr "struct mbuf *mbuf" "int loc" "int *off"
133.Ft struct mbuf *
134.Fn m_defrag "struct mbuf *m0" "int how"
28.Dt MBUF 9
29.Os
30.\"
31.Sh NAME
32.Nm mbuf
33.Nd "memory management in the kernel IPC subsystem"
34.\"
35.Sh SYNOPSIS
36.In sys/param.h
37.In sys/systm.h
38.In sys/mbuf.h
39.\"
40.Ss Mbuf allocation macros
41.Fn MGET "struct mbuf *mbuf" "int how" "short type"
42.Fn MGETHDR "struct mbuf *mbuf" "int how" "short type"
43.Fn MCLGET "struct mbuf *mbuf" "int how"
44.Fo MEXTADD
45.Fa "struct mbuf *mbuf"
46.Fa "caddr_t buf"
47.Fa "u_int size"
48.Fa "void (*free)(void *opt_args)"
49.Fa "void *opt_args"
50.Fa "short flags"
51.Fa "int type"
52.Fc
53.Fn MEXTFREE "struct mbuf *mbuf"
54.Fn MFREE "struct mbuf *mbuf" "struct mbuf *successor"
55.\"
56.Ss Mbuf utility macros
57.Fn mtod "struct mbuf *mbuf" "type"
58.Fn M_ALIGN "struct mbuf *mbuf" "u_int len"
59.Fn MH_ALIGN "struct mbuf *mbuf" "u_int len"
60.Ft int
61.Fn M_LEADINGSPACE "struct mbuf *mbuf"
62.Ft int
63.Fn M_TRAILINGSPACE "struct mbuf *mbuf"
64.Fn M_MOVE_PKTHDR "struct mbuf *to" "struct mbuf *from"
65.Fn M_PREPEND "struct mbuf *mbuf" "int len" "int how"
66.Fn MCHTYPE "struct mbuf *mbuf" "u_int type"
67.Ft int
68.Fn M_WRITABLE "struct mbuf *mbuf"
69.\"
70.Ss Mbuf allocation functions
71.Ft struct mbuf *
72.Fn m_get "int how" "int type"
73.Ft struct mbuf *
74.Fn m_getm "struct mbuf *orig" "int len" "int how" "int type"
75.Ft struct mbuf *
76.Fn m_getcl "int how" "short type" "int flags"
77.Ft struct mbuf *
78.Fn m_getclr "int how" "int type"
79.Ft struct mbuf *
80.Fn m_gethdr "int how" "int type"
81.Ft struct mbuf *
82.Fn m_free "struct mbuf *mbuf"
83.Ft void
84.Fn m_freem "struct mbuf *mbuf"
85.\"
86.Ss Mbuf utility functions
87.Ft void
88.Fn m_adj "struct mbuf *mbuf" "int len"
89.Ft void
90.Fn m_align "struct mbuf *mbuf" "int len"
91.Ft int
92.Fn m_append "struct mbuf *mbuf" "int len" "c_caddr_t cp"
93.Ft struct mbuf *
94.Fn m_prepend "struct mbuf *mbuf" "int len" "int how"
95.Ft struct mbuf *
96.Fn m_copyup "struct mbuf *mbuf" "int len" "int dstoff"
97.Ft struct mbuf *
98.Fn m_pullup "struct mbuf *mbuf" "int len"
99.Ft struct mbuf *
100.Fn m_copym "struct mbuf *mbuf" "int offset" "int len" "int how"
101.Ft struct mbuf *
102.Fn m_copypacket "struct mbuf *mbuf" "int how"
103.Ft struct mbuf *
104.Fn m_dup "struct mbuf *mbuf" "int how"
105.Ft void
106.Fn m_copydata "const struct mbuf *mbuf" "int offset" "int len" "caddr_t buf"
107.Ft void
108.Fn m_copyback "struct mbuf *mbuf" "int offset" "int len" "caddr_t buf"
109.Ft struct mbuf *
110.Fo m_devget
111.Fa "char *buf"
112.Fa "int len"
113.Fa "int offset"
114.Fa "struct ifnet *ifp"
115.Fa "void (*copy)(char *from, caddr_t to, u_int len)"
116.Fc
117.Ft void
118.Fn m_cat "struct mbuf *m" "struct mbuf *n"
119.Ft u_int
120.Fn m_fixhdr "struct mbuf *mbuf"
121.Ft void
122.Fn m_dup_pkthdr "struct mbuf *to" "struct mbuf *from"
123.Ft void
124.Fn m_move_pkthdr "struct mbuf *to" "struct mbuf *from"
125.Ft u_int
126.Fn m_length "struct mbuf *mbuf" "struct mbuf **last"
127.Ft struct mbuf *
128.Fn m_split "struct mbuf *mbuf" "int len" "int how"
129.Ft int
130.Fn m_apply "struct mbuf *mbuf" "int off" "int len" "int (*f)(void *arg, void *data, u_int len)" "void *arg"
131.Ft struct mbuf *
132.Fn m_getptr "struct mbuf *mbuf" "int loc" "int *off"
133.Ft struct mbuf *
134.Fn m_defrag "struct mbuf *m0" "int how"
135.Ft struct mbuf *
136.Fn m_unshare "struct mbuf *m0" "int how"
135.\"
136.Sh DESCRIPTION
137An
138.Vt mbuf
139is a basic unit of memory management in the kernel IPC subsystem.
140Network packets and socket buffers are stored in
141.Vt mbufs .
142A network packet may span multiple
143.Vt mbufs
144arranged into a
145.Vt mbuf chain
146(linked list),
147which allows adding or trimming
148network headers with little overhead.
149.Pp
150While a developer should not bother with
151.Vt mbuf
152internals without serious
153reason in order to avoid incompatibilities with future changes, it
154is useful to understand the general structure of an
155.Vt mbuf .
156.Pp
157An
158.Vt mbuf
159consists of a variable-sized header and a small internal
160buffer for data.
161The total size of an
162.Vt mbuf ,
163.Dv MSIZE ,
164is a constant defined in
165.In sys/param.h .
166The
167.Vt mbuf
168header includes:
169.Pp
170.Bl -tag -width "m_nextpkt" -offset indent
171.It Va m_next
172.Pq Vt struct mbuf *
173A pointer to the next
174.Vt mbuf
175in the
176.Vt mbuf chain .
177.It Va m_nextpkt
178.Pq Vt struct mbuf *
179A pointer to the next
180.Vt mbuf chain
181in the queue.
182.It Va m_data
183.Pq Vt caddr_t
184A pointer to data attached to this
185.Vt mbuf .
186.It Va m_len
187.Pq Vt int
188The length of the data.
189.It Va m_type
190.Pq Vt short
191The type of the data.
192.It Va m_flags
193.Pq Vt int
194The
195.Vt mbuf
196flags.
197.El
198.Pp
199The
200.Vt mbuf
201flag bits are defined as follows:
202.Bd -literal
203/* mbuf flags */
204#define M_EXT 0x0001 /* has associated external storage */
205#define M_PKTHDR 0x0002 /* start of record */
206#define M_EOR 0x0004 /* end of record */
207#define M_RDONLY 0x0008 /* associated data marked read-only */
208#define M_PROTO1 0x0010 /* protocol-specific */
209#define M_PROTO2 0x0020 /* protocol-specific */
210#define M_PROTO3 0x0040 /* protocol-specific */
211#define M_PROTO4 0x0080 /* protocol-specific */
212#define M_PROTO5 0x0100 /* protocol-specific */
213#define M_PROTO6 0x4000 /* protocol-specific (avoid M_BCAST conflict) */
214#define M_FREELIST 0x8000 /* mbuf is on the free list */
215
216/* mbuf pkthdr flags (also stored in m_flags) */
217#define M_BCAST 0x0200 /* send/received as link-level broadcast */
218#define M_MCAST 0x0400 /* send/received as link-level multicast */
219#define M_FRAG 0x0800 /* packet is fragment of larger packet */
220#define M_FIRSTFRAG 0x1000 /* packet is first fragment */
221#define M_LASTFRAG 0x2000 /* packet is last fragment */
222.Ed
223.Pp
224The available
225.Vt mbuf
226types are defined as follows:
227.Bd -literal
228/* mbuf types */
229#define MT_DATA 1 /* dynamic (data) allocation */
230#define MT_HEADER 2 /* packet header */
231#define MT_SONAME 8 /* socket name */
232#define MT_FTABLE 11 /* fragment reassembly header */
233#define MT_CONTROL 14 /* extra-data protocol message */
234#define MT_OOBDATA 15 /* expedited data */
235.Ed
236.Pp
237If the
238.Dv M_PKTHDR
239flag is set, a
240.Vt struct pkthdr Va m_pkthdr
241is added to the
242.Vt mbuf
243header.
244It contains a pointer to the interface
245the packet has been received from
246.Pq Vt struct ifnet Va *rcvif ,
247and the total packet length
248.Pq Vt int Va len .
249Optionally, it may also contain an attached list of packet tags
250.Pq Vt "struct m_tag" .
251See
252.Xr mbuf_tags 9
253for details.
254Fields used in offloading checksum calculation to the hardware are kept in
255.Va m_pkthdr
256as well.
257See
258.Sx HARDWARE-ASSISTED CHECKSUM CALCULATION
259for details.
260.Pp
261If small enough, data is stored in the internal data buffer of an
262.Vt mbuf .
263If the data is sufficiently large, another
264.Vt mbuf
265may be added to the
266.Vt mbuf chain ,
267or external storage may be associated with the
268.Vt mbuf .
269.Dv MHLEN
270bytes of data can fit into an
271.Vt mbuf
272with the
273.Dv M_PKTHDR
274flag set,
275.Dv MLEN
276bytes can otherwise.
277.Pp
278If external storage is being associated with an
279.Vt mbuf ,
280the
281.Va m_ext
282header is added at the cost of losing the internal data buffer.
283It includes a pointer to external storage, the size of the storage,
284a pointer to a function used for freeing the storage,
285a pointer to an optional argument that can be passed to the function,
286and a pointer to a reference counter.
287An
288.Vt mbuf
289using external storage has the
290.Dv M_EXT
291flag set.
292.Pp
293The system supplies a macro for allocating the desired external storage
294buffer,
295.Dv MEXTADD .
296.Pp
297The allocation and management of the reference counter is handled by the
298subsystem.
299.Pp
300The system also supplies a default type of external storage buffer called an
301.Vt mbuf cluster .
302.Vt Mbuf clusters
303can be allocated and configured with the use of the
304.Dv MCLGET
305macro.
306Each
307.Vt mbuf cluster
308is
309.Dv MCLBYTES
310in size, where MCLBYTES is a machine-dependent constant.
311The system defines an advisory macro
312.Dv MINCLSIZE ,
313which is the smallest amount of data to put into an
314.Vt mbuf cluster .
315It is equal to the sum of
316.Dv MLEN
317and
318.Dv MHLEN .
319It is typically preferable to store data into the data region of an
320.Vt mbuf ,
321if size permits, as opposed to allocating a separate
322.Vt mbuf cluster
323to hold the same data.
324.\"
325.Ss Macros and Functions
326There are numerous predefined macros and functions that provide the
327developer with common utilities.
328.\"
329.Bl -ohang -offset indent
330.It Fn mtod mbuf type
331Convert an
332.Fa mbuf
333pointer to a data pointer.
334The macro expands to the data pointer cast to the pointer of the specified
335.Fa type .
336.Sy Note :
337It is advisable to ensure that there is enough contiguous data in
338.Fa mbuf .
339See
340.Fn m_pullup
341for details.
342.It Fn MGET mbuf how type
343Allocate an
344.Vt mbuf
345and initialize it to contain internal data.
346.Fa mbuf
347will point to the allocated
348.Vt mbuf
349on success, or be set to
350.Dv NULL
351on failure.
352The
353.Fa how
354argument is to be set to
355.Dv M_TRYWAIT
356or
357.Dv M_DONTWAIT .
358It specifies whether the caller is willing to block if necessary.
359If
360.Fa how
361is set to
362.Dv M_TRYWAIT ,
363a failed allocation will result in the caller being put
364to sleep for a designated
365kern.ipc.mbuf_wait
366.Xr ( sysctl 8
367tunable)
368number of ticks.
369A number of other functions and macros related to
370.Vt mbufs
371have the same argument because they may
372at some point need to allocate new
373.Vt mbufs .
374.Pp
375Programmers should be careful not to confuse the
376.Vt mbuf
377allocation flag
378.Dv M_DONTWAIT
379with the
380.Xr malloc 9
381allocation flag,
382.Dv M_NOWAIT .
383They are not the same.
384.It Fn MGETHDR mbuf how type
385Allocate an
386.Vt mbuf
387and initialize it to contain a packet header
388and internal data.
389See
390.Fn MGET
391for details.
392.It Fn MCLGET mbuf how
393Allocate and attach an
394.Vt mbuf cluster
395to
396.Fa mbuf .
397If the macro fails, the
398.Dv M_EXT
399flag will not be set in
400.Fa mbuf .
401.It Fn M_ALIGN mbuf len
402Set the pointer
403.Fa mbuf->m_data
404to place an object of the size
405.Fa len
406at the end of the internal data area of
407.Fa mbuf ,
408long word aligned.
409Applicable only if
410.Fa mbuf
411is newly allocated with
412.Fn MGET
413or
414.Fn m_get .
415.It Fn MH_ALIGN mbuf len
416Serves the same purpose as
417.Fn M_ALIGN
418does, but only for
419.Fa mbuf
420newly allocated with
421.Fn MGETHDR
422or
423.Fn m_gethdr ,
424or initialized by
425.Fn m_dup_pkthdr
426or
427.Fn m_move_pkthdr .
428.It Fn m_align mbuf len
429Services the same purpose as
430.Fn M_ALIGN
431but handles any type of mbuf.
432.It Fn M_LEADINGSPACE mbuf
433Returns the number of bytes available before the beginning
434of data in
435.Fa mbuf .
436.It Fn M_TRAILINGSPACE mbuf
437Returns the number of bytes available after the end of data in
438.Fa mbuf .
439.It Fn M_PREPEND mbuf len how
440This macro operates on an
441.Vt mbuf chain .
442It is an optimized wrapper for
443.Fn m_prepend
444that can make use of possible empty space before data
445(e.g.\& left after trimming of a link-layer header).
446The new
447.Vt mbuf chain
448pointer or
449.Dv NULL
450is in
451.Fa mbuf
452after the call.
453.It Fn M_MOVE_PKTHDR to from
454Using this macro is equivalent to calling
455.Fn m_move_pkthdr to from .
456.It Fn M_WRITABLE mbuf
457This macro will evaluate true if
458.Fa mbuf
459is not marked
460.Dv M_RDONLY
461and if either
462.Fa mbuf
463does not contain external storage or,
464if it does,
465then if the reference count of the storage is not greater than 1.
466The
467.Dv M_RDONLY
468flag can be set in
469.Fa mbuf->m_flags .
470This can be achieved during setup of the external storage,
471by passing the
472.Dv M_RDONLY
473bit as a
474.Fa flags
475argument to the
476.Fn MEXTADD
477macro, or can be directly set in individual
478.Vt mbufs .
479.It Fn MCHTYPE mbuf type
480Change the type of
481.Fa mbuf
482to
483.Fa type .
484This is a relatively expensive operation and should be avoided.
485.El
486.Pp
487The functions are:
488.Bl -ohang -offset indent
489.It Fn m_get how type
490A function version of
491.Fn MGET
492for non-critical paths.
493.It Fn m_getm orig len how type
494Allocate
495.Fa len
496bytes worth of
497.Vt mbufs
498and
499.Vt mbuf clusters
500if necessary and append the resulting allocated
501.Vt mbuf chain
502to the
503.Vt mbuf chain
504.Fa orig ,
505if it is
506.No non- Ns Dv NULL .
507If the allocation fails at any point,
508free whatever was allocated and return
509.Dv NULL .
510If
511.Fa orig
512is
513.No non- Ns Dv NULL ,
514it will not be freed.
515It is possible to use
516.Fn m_getm
517to either append
518.Fa len
519bytes to an existing
520.Vt mbuf
521or
522.Vt mbuf chain
523(for example, one which may be sitting in a pre-allocated ring)
524or to simply perform an all-or-nothing
525.Vt mbuf
526and
527.Vt mbuf cluster
528allocation.
529.It Fn m_gethdr how type
530A function version of
531.Fn MGETHDR
532for non-critical paths.
533.It Fn m_getcl how type flags
534Fetch an
535.Vt mbuf
536with a
537.Vt mbuf cluster
538attached to it.
539If one of the allocations fails, the entire allocation fails.
540This routine is the preferred way of fetching both the
541.Vt mbuf
542and
543.Vt mbuf cluster
544together, as it avoids having to unlock/relock between allocations.
545Returns
546.Dv NULL
547on failure.
548.It Fn m_getclr how type
549Allocate an
550.Vt mbuf
551and zero out the data region.
552.It Fn m_free mbuf
553Frees
554.Vt mbuf .
555Returns
556.Va m_next
557of the freed
558.Vt mbuf .
559.El
560.Pp
561The functions below operate on
562.Vt mbuf chains .
563.Bl -ohang -offset indent
564.It Fn m_freem mbuf
565Free an entire
566.Vt mbuf chain ,
567including any external storage.
568.\"
569.It Fn m_adj mbuf len
570Trim
571.Fa len
572bytes from the head of an
573.Vt mbuf chain
574if
575.Fa len
576is positive, from the tail otherwise.
577.\"
578.It Fn m_append mbuf len cp
579Append
580.Vt len
581bytes of data
582.Vt cp
583to the
584.Vt mbuf chain .
585Extend the mbuf chain if the new data does not fit in
586existing space.
587.\"
588.It Fn m_prepend mbuf len how
589Allocate a new
590.Vt mbuf
591and prepend it to the
592.Vt mbuf chain ,
593handle
594.Dv M_PKTHDR
595properly.
596.Sy Note :
597It does not allocate any
598.Vt mbuf clusters ,
599so
600.Fa len
601must be less than
602.Dv MLEN
603or
604.Dv MHLEN ,
605depending on the
606.Dv M_PKTHDR
607flag setting.
608.\"
609.It Fn m_copyup mbuf len dstoff
610Similar to
611.Fn m_pullup
612but copies
613.Fa len
614bytes of data into a new mbuf at
615.Fa dstoff
616bytes into the mbuf.
617The
618.Fa dstoff
619argument aligns the data and leaves room for a link layer header.
620Returns the new
621.Vt mbuf chain
622on success,
623and frees the
624.Vt mbuf chain
625and returns
626.Dv NULL
627on failure.
628.Sy Note :
629The function does not allocate
630.Vt mbuf clusters ,
631so
632.Fa len + dstoff
633must be less than
634.Dv MHLEN .
635.\"
636.It Fn m_pullup mbuf len
637Arrange that the first
638.Fa len
639bytes of an
640.Vt mbuf chain
641are contiguous and lay in the data area of
642.Fa mbuf ,
643so they are accessible with
644.Fn mtod mbuf type .
645Return the new
646.Vt mbuf chain
647on success,
648.Dv NULL
649on failure
650(the
651.Vt mbuf chain
652is freed in this case).
653.Sy Note :
654It does not allocate any
655.Vt mbuf clusters ,
656so
657.Fa len
658must be less than
659.Dv MHLEN .
660.\"
661.It Fn m_copym mbuf offset len how
662Make a copy of an
663.Vt mbuf chain
664starting
665.Fa offset
666bytes from the beginning, continuing for
667.Fa len
668bytes.
669If
670.Fa len
671is
672.Dv M_COPYALL ,
673copy to the end of the
674.Vt mbuf chain .
675.Sy Note :
676The copy is read-only, because the
677.Vt mbuf clusters
678are not copied, only their reference counts are incremented.
679.\"
680.It Fn m_copypacket mbuf how
681Copy an entire packet including header, which must be present.
682This is an optimized version of the common case
683.Fn m_copym mbuf 0 M_COPYALL how .
684.Sy Note :
685the copy is read-only, because the
686.Vt mbuf clusters
687are not copied, only their reference counts are incremented.
688.\"
689.It Fn m_dup mbuf how
690Copy a packet header
691.Vt mbuf chain
692into a completely new
693.Vt mbuf chain ,
694including copying any
695.Vt mbuf clusters .
696Use this instead of
697.Fn m_copypacket
698when you need a writable copy of an
699.Vt mbuf chain .
700.\"
701.It Fn m_copydata mbuf offset len buf
702Copy data from an
703.Vt mbuf chain
704starting
705.Fa off
706bytes from the beginning, continuing for
707.Fa len
708bytes, into the indicated buffer
709.Fa buf .
710.\"
711.It Fn m_copyback mbuf offset len buf
712Copy
713.Fa len
714bytes from the buffer
715.Fa buf
716back into the indicated
717.Vt mbuf chain ,
718starting at
719.Fa offset
720bytes from the beginning of the
721.Vt mbuf chain ,
722extending the
723.Vt mbuf chain
724if necessary.
725.Sy Note :
726It does not allocate any
727.Vt mbuf clusters ,
728just adds
729.Vt mbufs
730to the
731.Vt mbuf chain .
732It is safe to set
733.Fa offset
734beyond the current
735.Vt mbuf chain
736end: zeroed
737.Vt mbufs
738will be allocated to fill the space.
739.\"
740.It Fn m_length mbuf last
741Return the length of the
742.Vt mbuf chain ,
743and optionally a pointer to the last
744.Vt mbuf .
745.\"
746.It Fn m_dup_pkthdr to from how
747Upon the function's completion, the
748.Vt mbuf
749.Fa to
750will contain an identical copy of
751.Fa from->m_pkthdr
752and the per-packet attributes found in the
753.Vt mbuf chain
754.Fa from .
755The
756.Vt mbuf
757.Fa from
758must have the flag
759.Dv M_PKTHDR
760initially set, and
761.Fa to
762must be empty on entry.
763.\"
764.It Fn m_move_pkthdr to from
765Move
766.Va m_pkthdr
767and the per-packet attributes from the
768.Vt mbuf chain
769.Fa from
770to the
771.Vt mbuf
772.Fa to .
773The
774.Vt mbuf
775.Fa from
776must have the flag
777.Dv M_PKTHDR
778initially set, and
779.Fa to
780must be empty on entry.
781Upon the function's completion,
782.Fa from
783will have the flag
784.Dv M_PKTHDR
785and the per-packet attributes cleared.
786.\"
787.It Fn m_fixhdr mbuf
788Set the packet-header length to the length of the
789.Vt mbuf chain .
790.\"
791.It Fn m_devget buf len offset ifp copy
792Copy data from a device local memory pointed to by
793.Fa buf
794to an
795.Vt mbuf chain .
796The copy is done using a specified copy routine
797.Fa copy ,
798or
799.Fn bcopy
800if
801.Fa copy
802is
803.Dv NULL .
804.\"
805.It Fn m_cat m n
806Concatenate
807.Fa n
808to
809.Fa m .
810Both
811.Vt mbuf chains
812must be of the same type.
813.Fa N
814is still valid after the function returned.
815.Sy Note :
816It does not handle
817.Dv M_PKTHDR
818and friends.
819.\"
820.It Fn m_split mbuf len how
821Partition an
822.Vt mbuf chain
823in two pieces, returning the tail:
824all but the first
825.Fa len
826bytes.
827In case of failure, it returns
828.Dv NULL
829and attempts to restore the
830.Vt mbuf chain
831to its original state.
832.\"
833.It Fn m_apply mbuf off len f arg
834Apply a function to an
835.Vt mbuf chain ,
836at offset
837.Fa off ,
838for length
839.Fa len
840bytes.
841Typically used to avoid calls to
842.Fn m_pullup
843which would otherwise be unnecessary or undesirable.
844.Fa arg
845is a convenience argument which is passed to the callback function
846.Fa f .
847.Pp
848Each time
849.Fn f
850is called, it will be passed
851.Fa arg ,
852a pointer to the
853.Fa data
854in the current mbuf, and the length
855.Fa len
856of the data in this mbuf to which the function should be applied.
857.Pp
858The function should return zero to indicate success;
859otherwise, if an error is indicated, then
860.Fn m_apply
861will return the error and stop iterating through the
862.Vt mbuf chain .
863.\"
864.It Fn m_getptr mbuf loc off
865Return a pointer to the mbuf containing the data located at
866.Fa loc
867bytes from the beginning of the
868.Vt mbuf chain .
869The corresponding offset into the mbuf will be stored in
870.Fa *off .
871.It Fn m_defrag m0 how
872Defragment an mbuf chain, returning the shortest possible
873chain of mbufs and clusters.
874If allocation fails and this can not be completed,
875.Dv NULL
876will be returned and the original chain will be unchanged.
877Upon success, the original chain will be freed and the new
878chain will be returned.
879.Fa how
880should be either
881.Dv M_TRYWAIT
882or
883.Dv M_DONTWAIT ,
884depending on the caller's preference.
885.Pp
886This function is especially useful in network drivers, where
887certain long mbuf chains must be shortened before being added
888to TX descriptor lists.
137.\"
138.Sh DESCRIPTION
139An
140.Vt mbuf
141is a basic unit of memory management in the kernel IPC subsystem.
142Network packets and socket buffers are stored in
143.Vt mbufs .
144A network packet may span multiple
145.Vt mbufs
146arranged into a
147.Vt mbuf chain
148(linked list),
149which allows adding or trimming
150network headers with little overhead.
151.Pp
152While a developer should not bother with
153.Vt mbuf
154internals without serious
155reason in order to avoid incompatibilities with future changes, it
156is useful to understand the general structure of an
157.Vt mbuf .
158.Pp
159An
160.Vt mbuf
161consists of a variable-sized header and a small internal
162buffer for data.
163The total size of an
164.Vt mbuf ,
165.Dv MSIZE ,
166is a constant defined in
167.In sys/param.h .
168The
169.Vt mbuf
170header includes:
171.Pp
172.Bl -tag -width "m_nextpkt" -offset indent
173.It Va m_next
174.Pq Vt struct mbuf *
175A pointer to the next
176.Vt mbuf
177in the
178.Vt mbuf chain .
179.It Va m_nextpkt
180.Pq Vt struct mbuf *
181A pointer to the next
182.Vt mbuf chain
183in the queue.
184.It Va m_data
185.Pq Vt caddr_t
186A pointer to data attached to this
187.Vt mbuf .
188.It Va m_len
189.Pq Vt int
190The length of the data.
191.It Va m_type
192.Pq Vt short
193The type of the data.
194.It Va m_flags
195.Pq Vt int
196The
197.Vt mbuf
198flags.
199.El
200.Pp
201The
202.Vt mbuf
203flag bits are defined as follows:
204.Bd -literal
205/* mbuf flags */
206#define M_EXT 0x0001 /* has associated external storage */
207#define M_PKTHDR 0x0002 /* start of record */
208#define M_EOR 0x0004 /* end of record */
209#define M_RDONLY 0x0008 /* associated data marked read-only */
210#define M_PROTO1 0x0010 /* protocol-specific */
211#define M_PROTO2 0x0020 /* protocol-specific */
212#define M_PROTO3 0x0040 /* protocol-specific */
213#define M_PROTO4 0x0080 /* protocol-specific */
214#define M_PROTO5 0x0100 /* protocol-specific */
215#define M_PROTO6 0x4000 /* protocol-specific (avoid M_BCAST conflict) */
216#define M_FREELIST 0x8000 /* mbuf is on the free list */
217
218/* mbuf pkthdr flags (also stored in m_flags) */
219#define M_BCAST 0x0200 /* send/received as link-level broadcast */
220#define M_MCAST 0x0400 /* send/received as link-level multicast */
221#define M_FRAG 0x0800 /* packet is fragment of larger packet */
222#define M_FIRSTFRAG 0x1000 /* packet is first fragment */
223#define M_LASTFRAG 0x2000 /* packet is last fragment */
224.Ed
225.Pp
226The available
227.Vt mbuf
228types are defined as follows:
229.Bd -literal
230/* mbuf types */
231#define MT_DATA 1 /* dynamic (data) allocation */
232#define MT_HEADER 2 /* packet header */
233#define MT_SONAME 8 /* socket name */
234#define MT_FTABLE 11 /* fragment reassembly header */
235#define MT_CONTROL 14 /* extra-data protocol message */
236#define MT_OOBDATA 15 /* expedited data */
237.Ed
238.Pp
239If the
240.Dv M_PKTHDR
241flag is set, a
242.Vt struct pkthdr Va m_pkthdr
243is added to the
244.Vt mbuf
245header.
246It contains a pointer to the interface
247the packet has been received from
248.Pq Vt struct ifnet Va *rcvif ,
249and the total packet length
250.Pq Vt int Va len .
251Optionally, it may also contain an attached list of packet tags
252.Pq Vt "struct m_tag" .
253See
254.Xr mbuf_tags 9
255for details.
256Fields used in offloading checksum calculation to the hardware are kept in
257.Va m_pkthdr
258as well.
259See
260.Sx HARDWARE-ASSISTED CHECKSUM CALCULATION
261for details.
262.Pp
263If small enough, data is stored in the internal data buffer of an
264.Vt mbuf .
265If the data is sufficiently large, another
266.Vt mbuf
267may be added to the
268.Vt mbuf chain ,
269or external storage may be associated with the
270.Vt mbuf .
271.Dv MHLEN
272bytes of data can fit into an
273.Vt mbuf
274with the
275.Dv M_PKTHDR
276flag set,
277.Dv MLEN
278bytes can otherwise.
279.Pp
280If external storage is being associated with an
281.Vt mbuf ,
282the
283.Va m_ext
284header is added at the cost of losing the internal data buffer.
285It includes a pointer to external storage, the size of the storage,
286a pointer to a function used for freeing the storage,
287a pointer to an optional argument that can be passed to the function,
288and a pointer to a reference counter.
289An
290.Vt mbuf
291using external storage has the
292.Dv M_EXT
293flag set.
294.Pp
295The system supplies a macro for allocating the desired external storage
296buffer,
297.Dv MEXTADD .
298.Pp
299The allocation and management of the reference counter is handled by the
300subsystem.
301.Pp
302The system also supplies a default type of external storage buffer called an
303.Vt mbuf cluster .
304.Vt Mbuf clusters
305can be allocated and configured with the use of the
306.Dv MCLGET
307macro.
308Each
309.Vt mbuf cluster
310is
311.Dv MCLBYTES
312in size, where MCLBYTES is a machine-dependent constant.
313The system defines an advisory macro
314.Dv MINCLSIZE ,
315which is the smallest amount of data to put into an
316.Vt mbuf cluster .
317It is equal to the sum of
318.Dv MLEN
319and
320.Dv MHLEN .
321It is typically preferable to store data into the data region of an
322.Vt mbuf ,
323if size permits, as opposed to allocating a separate
324.Vt mbuf cluster
325to hold the same data.
326.\"
327.Ss Macros and Functions
328There are numerous predefined macros and functions that provide the
329developer with common utilities.
330.\"
331.Bl -ohang -offset indent
332.It Fn mtod mbuf type
333Convert an
334.Fa mbuf
335pointer to a data pointer.
336The macro expands to the data pointer cast to the pointer of the specified
337.Fa type .
338.Sy Note :
339It is advisable to ensure that there is enough contiguous data in
340.Fa mbuf .
341See
342.Fn m_pullup
343for details.
344.It Fn MGET mbuf how type
345Allocate an
346.Vt mbuf
347and initialize it to contain internal data.
348.Fa mbuf
349will point to the allocated
350.Vt mbuf
351on success, or be set to
352.Dv NULL
353on failure.
354The
355.Fa how
356argument is to be set to
357.Dv M_TRYWAIT
358or
359.Dv M_DONTWAIT .
360It specifies whether the caller is willing to block if necessary.
361If
362.Fa how
363is set to
364.Dv M_TRYWAIT ,
365a failed allocation will result in the caller being put
366to sleep for a designated
367kern.ipc.mbuf_wait
368.Xr ( sysctl 8
369tunable)
370number of ticks.
371A number of other functions and macros related to
372.Vt mbufs
373have the same argument because they may
374at some point need to allocate new
375.Vt mbufs .
376.Pp
377Programmers should be careful not to confuse the
378.Vt mbuf
379allocation flag
380.Dv M_DONTWAIT
381with the
382.Xr malloc 9
383allocation flag,
384.Dv M_NOWAIT .
385They are not the same.
386.It Fn MGETHDR mbuf how type
387Allocate an
388.Vt mbuf
389and initialize it to contain a packet header
390and internal data.
391See
392.Fn MGET
393for details.
394.It Fn MCLGET mbuf how
395Allocate and attach an
396.Vt mbuf cluster
397to
398.Fa mbuf .
399If the macro fails, the
400.Dv M_EXT
401flag will not be set in
402.Fa mbuf .
403.It Fn M_ALIGN mbuf len
404Set the pointer
405.Fa mbuf->m_data
406to place an object of the size
407.Fa len
408at the end of the internal data area of
409.Fa mbuf ,
410long word aligned.
411Applicable only if
412.Fa mbuf
413is newly allocated with
414.Fn MGET
415or
416.Fn m_get .
417.It Fn MH_ALIGN mbuf len
418Serves the same purpose as
419.Fn M_ALIGN
420does, but only for
421.Fa mbuf
422newly allocated with
423.Fn MGETHDR
424or
425.Fn m_gethdr ,
426or initialized by
427.Fn m_dup_pkthdr
428or
429.Fn m_move_pkthdr .
430.It Fn m_align mbuf len
431Services the same purpose as
432.Fn M_ALIGN
433but handles any type of mbuf.
434.It Fn M_LEADINGSPACE mbuf
435Returns the number of bytes available before the beginning
436of data in
437.Fa mbuf .
438.It Fn M_TRAILINGSPACE mbuf
439Returns the number of bytes available after the end of data in
440.Fa mbuf .
441.It Fn M_PREPEND mbuf len how
442This macro operates on an
443.Vt mbuf chain .
444It is an optimized wrapper for
445.Fn m_prepend
446that can make use of possible empty space before data
447(e.g.\& left after trimming of a link-layer header).
448The new
449.Vt mbuf chain
450pointer or
451.Dv NULL
452is in
453.Fa mbuf
454after the call.
455.It Fn M_MOVE_PKTHDR to from
456Using this macro is equivalent to calling
457.Fn m_move_pkthdr to from .
458.It Fn M_WRITABLE mbuf
459This macro will evaluate true if
460.Fa mbuf
461is not marked
462.Dv M_RDONLY
463and if either
464.Fa mbuf
465does not contain external storage or,
466if it does,
467then if the reference count of the storage is not greater than 1.
468The
469.Dv M_RDONLY
470flag can be set in
471.Fa mbuf->m_flags .
472This can be achieved during setup of the external storage,
473by passing the
474.Dv M_RDONLY
475bit as a
476.Fa flags
477argument to the
478.Fn MEXTADD
479macro, or can be directly set in individual
480.Vt mbufs .
481.It Fn MCHTYPE mbuf type
482Change the type of
483.Fa mbuf
484to
485.Fa type .
486This is a relatively expensive operation and should be avoided.
487.El
488.Pp
489The functions are:
490.Bl -ohang -offset indent
491.It Fn m_get how type
492A function version of
493.Fn MGET
494for non-critical paths.
495.It Fn m_getm orig len how type
496Allocate
497.Fa len
498bytes worth of
499.Vt mbufs
500and
501.Vt mbuf clusters
502if necessary and append the resulting allocated
503.Vt mbuf chain
504to the
505.Vt mbuf chain
506.Fa orig ,
507if it is
508.No non- Ns Dv NULL .
509If the allocation fails at any point,
510free whatever was allocated and return
511.Dv NULL .
512If
513.Fa orig
514is
515.No non- Ns Dv NULL ,
516it will not be freed.
517It is possible to use
518.Fn m_getm
519to either append
520.Fa len
521bytes to an existing
522.Vt mbuf
523or
524.Vt mbuf chain
525(for example, one which may be sitting in a pre-allocated ring)
526or to simply perform an all-or-nothing
527.Vt mbuf
528and
529.Vt mbuf cluster
530allocation.
531.It Fn m_gethdr how type
532A function version of
533.Fn MGETHDR
534for non-critical paths.
535.It Fn m_getcl how type flags
536Fetch an
537.Vt mbuf
538with a
539.Vt mbuf cluster
540attached to it.
541If one of the allocations fails, the entire allocation fails.
542This routine is the preferred way of fetching both the
543.Vt mbuf
544and
545.Vt mbuf cluster
546together, as it avoids having to unlock/relock between allocations.
547Returns
548.Dv NULL
549on failure.
550.It Fn m_getclr how type
551Allocate an
552.Vt mbuf
553and zero out the data region.
554.It Fn m_free mbuf
555Frees
556.Vt mbuf .
557Returns
558.Va m_next
559of the freed
560.Vt mbuf .
561.El
562.Pp
563The functions below operate on
564.Vt mbuf chains .
565.Bl -ohang -offset indent
566.It Fn m_freem mbuf
567Free an entire
568.Vt mbuf chain ,
569including any external storage.
570.\"
571.It Fn m_adj mbuf len
572Trim
573.Fa len
574bytes from the head of an
575.Vt mbuf chain
576if
577.Fa len
578is positive, from the tail otherwise.
579.\"
580.It Fn m_append mbuf len cp
581Append
582.Vt len
583bytes of data
584.Vt cp
585to the
586.Vt mbuf chain .
587Extend the mbuf chain if the new data does not fit in
588existing space.
589.\"
590.It Fn m_prepend mbuf len how
591Allocate a new
592.Vt mbuf
593and prepend it to the
594.Vt mbuf chain ,
595handle
596.Dv M_PKTHDR
597properly.
598.Sy Note :
599It does not allocate any
600.Vt mbuf clusters ,
601so
602.Fa len
603must be less than
604.Dv MLEN
605or
606.Dv MHLEN ,
607depending on the
608.Dv M_PKTHDR
609flag setting.
610.\"
611.It Fn m_copyup mbuf len dstoff
612Similar to
613.Fn m_pullup
614but copies
615.Fa len
616bytes of data into a new mbuf at
617.Fa dstoff
618bytes into the mbuf.
619The
620.Fa dstoff
621argument aligns the data and leaves room for a link layer header.
622Returns the new
623.Vt mbuf chain
624on success,
625and frees the
626.Vt mbuf chain
627and returns
628.Dv NULL
629on failure.
630.Sy Note :
631The function does not allocate
632.Vt mbuf clusters ,
633so
634.Fa len + dstoff
635must be less than
636.Dv MHLEN .
637.\"
638.It Fn m_pullup mbuf len
639Arrange that the first
640.Fa len
641bytes of an
642.Vt mbuf chain
643are contiguous and lay in the data area of
644.Fa mbuf ,
645so they are accessible with
646.Fn mtod mbuf type .
647Return the new
648.Vt mbuf chain
649on success,
650.Dv NULL
651on failure
652(the
653.Vt mbuf chain
654is freed in this case).
655.Sy Note :
656It does not allocate any
657.Vt mbuf clusters ,
658so
659.Fa len
660must be less than
661.Dv MHLEN .
662.\"
663.It Fn m_copym mbuf offset len how
664Make a copy of an
665.Vt mbuf chain
666starting
667.Fa offset
668bytes from the beginning, continuing for
669.Fa len
670bytes.
671If
672.Fa len
673is
674.Dv M_COPYALL ,
675copy to the end of the
676.Vt mbuf chain .
677.Sy Note :
678The copy is read-only, because the
679.Vt mbuf clusters
680are not copied, only their reference counts are incremented.
681.\"
682.It Fn m_copypacket mbuf how
683Copy an entire packet including header, which must be present.
684This is an optimized version of the common case
685.Fn m_copym mbuf 0 M_COPYALL how .
686.Sy Note :
687the copy is read-only, because the
688.Vt mbuf clusters
689are not copied, only their reference counts are incremented.
690.\"
691.It Fn m_dup mbuf how
692Copy a packet header
693.Vt mbuf chain
694into a completely new
695.Vt mbuf chain ,
696including copying any
697.Vt mbuf clusters .
698Use this instead of
699.Fn m_copypacket
700when you need a writable copy of an
701.Vt mbuf chain .
702.\"
703.It Fn m_copydata mbuf offset len buf
704Copy data from an
705.Vt mbuf chain
706starting
707.Fa off
708bytes from the beginning, continuing for
709.Fa len
710bytes, into the indicated buffer
711.Fa buf .
712.\"
713.It Fn m_copyback mbuf offset len buf
714Copy
715.Fa len
716bytes from the buffer
717.Fa buf
718back into the indicated
719.Vt mbuf chain ,
720starting at
721.Fa offset
722bytes from the beginning of the
723.Vt mbuf chain ,
724extending the
725.Vt mbuf chain
726if necessary.
727.Sy Note :
728It does not allocate any
729.Vt mbuf clusters ,
730just adds
731.Vt mbufs
732to the
733.Vt mbuf chain .
734It is safe to set
735.Fa offset
736beyond the current
737.Vt mbuf chain
738end: zeroed
739.Vt mbufs
740will be allocated to fill the space.
741.\"
742.It Fn m_length mbuf last
743Return the length of the
744.Vt mbuf chain ,
745and optionally a pointer to the last
746.Vt mbuf .
747.\"
748.It Fn m_dup_pkthdr to from how
749Upon the function's completion, the
750.Vt mbuf
751.Fa to
752will contain an identical copy of
753.Fa from->m_pkthdr
754and the per-packet attributes found in the
755.Vt mbuf chain
756.Fa from .
757The
758.Vt mbuf
759.Fa from
760must have the flag
761.Dv M_PKTHDR
762initially set, and
763.Fa to
764must be empty on entry.
765.\"
766.It Fn m_move_pkthdr to from
767Move
768.Va m_pkthdr
769and the per-packet attributes from the
770.Vt mbuf chain
771.Fa from
772to the
773.Vt mbuf
774.Fa to .
775The
776.Vt mbuf
777.Fa from
778must have the flag
779.Dv M_PKTHDR
780initially set, and
781.Fa to
782must be empty on entry.
783Upon the function's completion,
784.Fa from
785will have the flag
786.Dv M_PKTHDR
787and the per-packet attributes cleared.
788.\"
789.It Fn m_fixhdr mbuf
790Set the packet-header length to the length of the
791.Vt mbuf chain .
792.\"
793.It Fn m_devget buf len offset ifp copy
794Copy data from a device local memory pointed to by
795.Fa buf
796to an
797.Vt mbuf chain .
798The copy is done using a specified copy routine
799.Fa copy ,
800or
801.Fn bcopy
802if
803.Fa copy
804is
805.Dv NULL .
806.\"
807.It Fn m_cat m n
808Concatenate
809.Fa n
810to
811.Fa m .
812Both
813.Vt mbuf chains
814must be of the same type.
815.Fa N
816is still valid after the function returned.
817.Sy Note :
818It does not handle
819.Dv M_PKTHDR
820and friends.
821.\"
822.It Fn m_split mbuf len how
823Partition an
824.Vt mbuf chain
825in two pieces, returning the tail:
826all but the first
827.Fa len
828bytes.
829In case of failure, it returns
830.Dv NULL
831and attempts to restore the
832.Vt mbuf chain
833to its original state.
834.\"
835.It Fn m_apply mbuf off len f arg
836Apply a function to an
837.Vt mbuf chain ,
838at offset
839.Fa off ,
840for length
841.Fa len
842bytes.
843Typically used to avoid calls to
844.Fn m_pullup
845which would otherwise be unnecessary or undesirable.
846.Fa arg
847is a convenience argument which is passed to the callback function
848.Fa f .
849.Pp
850Each time
851.Fn f
852is called, it will be passed
853.Fa arg ,
854a pointer to the
855.Fa data
856in the current mbuf, and the length
857.Fa len
858of the data in this mbuf to which the function should be applied.
859.Pp
860The function should return zero to indicate success;
861otherwise, if an error is indicated, then
862.Fn m_apply
863will return the error and stop iterating through the
864.Vt mbuf chain .
865.\"
866.It Fn m_getptr mbuf loc off
867Return a pointer to the mbuf containing the data located at
868.Fa loc
869bytes from the beginning of the
870.Vt mbuf chain .
871The corresponding offset into the mbuf will be stored in
872.Fa *off .
873.It Fn m_defrag m0 how
874Defragment an mbuf chain, returning the shortest possible
875chain of mbufs and clusters.
876If allocation fails and this can not be completed,
877.Dv NULL
878will be returned and the original chain will be unchanged.
879Upon success, the original chain will be freed and the new
880chain will be returned.
881.Fa how
882should be either
883.Dv M_TRYWAIT
884or
885.Dv M_DONTWAIT ,
886depending on the caller's preference.
887.Pp
888This function is especially useful in network drivers, where
889certain long mbuf chains must be shortened before being added
890to TX descriptor lists.
891.It Fn m_unshare m0 how
892Create a version of the specified mbuf chain whose
893contents can be safely modified without affecting other users.
894If allocation fails and this operation can not be completed,
895.Dv NULL
896will be returned.
897The original mbuf chain is always reclaimed and the reference
898count of any shared mbuf clusters is decremented.
899.Fa how
900should be either
901.Dv M_TRYWAIT
902or
903.Dv M_DONTWAIT ,
904depending on the caller's preference.
905As a side-effect of this process the returned
906mbuf chain may be compacted.
907.Pp
908This function is especially useful in the transmit path of
909network code, when data must be encrypted or otherwise
910altered prior to transmission.
889.El
890.Sh HARDWARE-ASSISTED CHECKSUM CALCULATION
891This section currently applies to TCP/IP only.
892In order to save the host CPU resources, computing checksums is
893offloaded to the network interface hardware if possible.
894The
895.Va m_pkthdr
896member of the leading
897.Vt mbuf
898of a packet contains two fields used for that purpose,
899.Vt int Va csum_flags
900and
901.Vt int Va csum_data .
902The meaning of those fields depends on the direction a packet flows in,
903and on whether the packet is fragmented.
904Henceforth,
905.Va csum_flags
906or
907.Va csum_data
908of a packet
909will denote the corresponding field of the
910.Va m_pkthdr
911member of the leading
912.Vt mbuf
913in the
914.Vt mbuf chain
915containing the packet.
916.Pp
917On output, checksum offloading is attempted after the outgoing
918interface has been determined for a packet.
919The interface-specific field
920.Va ifnet.if_data.ifi_hwassist
921(see
922.Xr ifnet 9 )
923is consulted for the capabilities of the interface to assist in
924computing checksums.
925The
926.Va csum_flags
927field of the packet header is set to indicate which actions the interface
928is supposed to perform on it.
929The actions unsupported by the network interface are done in the
930software prior to passing the packet down to the interface driver;
931such actions will never be requested through
932.Va csum_flags .
933.Pp
934The flags demanding a particular action from an interface are as follows:
935.Bl -tag -width ".Dv CSUM_TCP" -offset indent
936.It Dv CSUM_IP
937The IP header checksum is to be computed and stored in the
938corresponding field of the packet.
939The hardware is expected to know the format of an IP header
940to determine the offset of the IP checksum field.
941.It Dv CSUM_TCP
942The TCP checksum is to be computed.
943(See below.)
944.It Dv CSUM_UDP
945The UDP checksum is to be computed.
946(See below.)
947.El
948.Pp
949Should a TCP or UDP checksum be offloaded to the hardware,
950the field
951.Va csum_data
952will contain the byte offset of the checksum field relative to the
953end of the IP header.
954In this case, the checksum field will be initially
955set by the TCP/IP module to the checksum of the pseudo header
956defined by the TCP and UDP specifications.
957.Pp
958For outbound packets which have been fragmented
959by the host CPU, the following will also be true,
960regardless of the checksum flag settings:
961.Bl -bullet -offset indent
962.It
963all fragments will have the flag
964.Dv M_FRAG
965set in their
966.Va m_flags
967field;
968.It
969the first and the last fragments in the chain will have
970.Dv M_FIRSTFRAG
971or
972.Dv M_LASTFRAG
973set in their
974.Va m_flags ,
975correspondingly;
976.It
977the first fragment in the chain will have the total number
978of fragments contained in its
979.Va csum_data
980field.
981.El
982.Pp
983The last rule for fragmented packets takes precedence over the one
984for a TCP or UDP checksum.
985Nevertheless, offloading a TCP or UDP checksum is possible for a
986fragmented packet if the flag
987.Dv CSUM_IP_FRAGS
988is set in the field
989.Va ifnet.if_data.ifi_hwassist
990associated with the network interface.
991However, in this case the interface is expected to figure out
992the location of the checksum field within the sequence of fragments
993by itself because
994.Va csum_data
995contains a fragment count instead of a checksum offset value.
996.Pp
997On input, an interface indicates the actions it has performed
998on a packet by setting one or more of the following flags in
999.Va csum_flags
1000associated with the packet:
1001.Bl -tag -width ".Dv CSUM_IP_CHECKED" -offset indent
1002.It Dv CSUM_IP_CHECKED
1003The IP header checksum has been computed.
1004.It Dv CSUM_IP_VALID
1005The IP header has a valid checksum.
1006This flag can appear only in combination with
1007.Dv CSUM_IP_CHECKED .
1008.It Dv CSUM_DATA_VALID
1009The checksum of the data portion of the IP packet has been computed
1010and stored in the field
1011.Va csum_data
1012in network byte order.
1013.It Dv CSUM_PSEUDO_HDR
1014Can be set only along with
1015.Dv CSUM_DATA_VALID
1016to indicate that the IP data checksum found in
1017.Va csum_data
1018allows for the pseudo header defined by the TCP and UDP specifications.
1019Otherwise the checksum of the pseudo header must be calculated by
1020the host CPU and added to
1021.Va csum_data
1022to obtain the final checksum to be used for TCP or UDP validation purposes.
1023.El
1024.Pp
1025If a particular network interface just indicates success or
1026failure of TCP or UDP checksum validation without returning
1027the exact value of the checksum to the host CPU, its driver can mark
1028.Dv CSUM_DATA_VALID
1029and
1030.Dv CSUM_PSEUDO_HDR
1031in
1032.Va csum_flags ,
1033and set
1034.Va csum_data
1035to
1036.Li 0xFFFF
1037hexadecimal to indicate a valid checksum.
1038It is a peculiarity of the algorithm used that the Internet checksum
1039calculated over any valid packet will be
1040.Li 0xFFFF
1041as long as the original checksum field is included.
1042.Pp
1043For inbound packets which are IP fragments, all
1044.Va csum_data
1045fields will be summed during reassembly to obtain the final checksum
1046value passed to an upper layer in the
1047.Va csum_data
1048field of the reassembled packet.
1049The
1050.Va csum_flags
1051fields of all fragments will be consolidated using logical AND
1052to obtain the final value for
1053.Va csum_flags .
1054Thus, in order to successfully
1055offload checksum computation for fragmented data,
1056all fragments should have the same value of
1057.Va csum_flags .
1058.Sh STRESS TESTING
1059When running a kernel compiled with the option
1060.Dv MBUF_STRESS_TEST ,
1061the following
1062.Xr sysctl 8 Ns
1063-controlled options may be used to create
1064various failure/extreme cases for testing of network drivers
1065and other parts of the kernel that rely on
1066.Vt mbufs .
1067.Bl -tag -width ident
1068.It Va net.inet.ip.mbuf_frag_size
1069Causes
1070.Fn ip_output
1071to fragment outgoing
1072.Vt mbuf chains
1073into fragments of the specified size.
1074Setting this variable to 1 is an excellent way to
1075test the long
1076.Vt mbuf chain
1077handling ability of network drivers.
1078.It Va kern.ipc.m_defragrandomfailures
1079Causes the function
1080.Fn m_defrag
1081to randomly fail, returning
1082.Dv NULL .
1083Any piece of code which uses
1084.Fn m_defrag
1085should be tested with this feature.
1086.El
1087.Sh RETURN VALUES
1088See above.
1089.Sh SEE ALSO
1090.Xr ifnet 9 ,
1091.Xr mbuf_tags 9
1092.Sh HISTORY
1093.\" Please correct me if I'm wrong
1094.Vt Mbufs
1095appeared in an early version of
1096.Bx .
1097Besides being used for network packets, they were used
1098to store various dynamic structures, such as routing table
1099entries, interface addresses, protocol control blocks, etc.
1100.Sh AUTHORS
1101The original
1102.Nm
1103manual page was written by Yar Tikhiy.
911.El
912.Sh HARDWARE-ASSISTED CHECKSUM CALCULATION
913This section currently applies to TCP/IP only.
914In order to save the host CPU resources, computing checksums is
915offloaded to the network interface hardware if possible.
916The
917.Va m_pkthdr
918member of the leading
919.Vt mbuf
920of a packet contains two fields used for that purpose,
921.Vt int Va csum_flags
922and
923.Vt int Va csum_data .
924The meaning of those fields depends on the direction a packet flows in,
925and on whether the packet is fragmented.
926Henceforth,
927.Va csum_flags
928or
929.Va csum_data
930of a packet
931will denote the corresponding field of the
932.Va m_pkthdr
933member of the leading
934.Vt mbuf
935in the
936.Vt mbuf chain
937containing the packet.
938.Pp
939On output, checksum offloading is attempted after the outgoing
940interface has been determined for a packet.
941The interface-specific field
942.Va ifnet.if_data.ifi_hwassist
943(see
944.Xr ifnet 9 )
945is consulted for the capabilities of the interface to assist in
946computing checksums.
947The
948.Va csum_flags
949field of the packet header is set to indicate which actions the interface
950is supposed to perform on it.
951The actions unsupported by the network interface are done in the
952software prior to passing the packet down to the interface driver;
953such actions will never be requested through
954.Va csum_flags .
955.Pp
956The flags demanding a particular action from an interface are as follows:
957.Bl -tag -width ".Dv CSUM_TCP" -offset indent
958.It Dv CSUM_IP
959The IP header checksum is to be computed and stored in the
960corresponding field of the packet.
961The hardware is expected to know the format of an IP header
962to determine the offset of the IP checksum field.
963.It Dv CSUM_TCP
964The TCP checksum is to be computed.
965(See below.)
966.It Dv CSUM_UDP
967The UDP checksum is to be computed.
968(See below.)
969.El
970.Pp
971Should a TCP or UDP checksum be offloaded to the hardware,
972the field
973.Va csum_data
974will contain the byte offset of the checksum field relative to the
975end of the IP header.
976In this case, the checksum field will be initially
977set by the TCP/IP module to the checksum of the pseudo header
978defined by the TCP and UDP specifications.
979.Pp
980For outbound packets which have been fragmented
981by the host CPU, the following will also be true,
982regardless of the checksum flag settings:
983.Bl -bullet -offset indent
984.It
985all fragments will have the flag
986.Dv M_FRAG
987set in their
988.Va m_flags
989field;
990.It
991the first and the last fragments in the chain will have
992.Dv M_FIRSTFRAG
993or
994.Dv M_LASTFRAG
995set in their
996.Va m_flags ,
997correspondingly;
998.It
999the first fragment in the chain will have the total number
1000of fragments contained in its
1001.Va csum_data
1002field.
1003.El
1004.Pp
1005The last rule for fragmented packets takes precedence over the one
1006for a TCP or UDP checksum.
1007Nevertheless, offloading a TCP or UDP checksum is possible for a
1008fragmented packet if the flag
1009.Dv CSUM_IP_FRAGS
1010is set in the field
1011.Va ifnet.if_data.ifi_hwassist
1012associated with the network interface.
1013However, in this case the interface is expected to figure out
1014the location of the checksum field within the sequence of fragments
1015by itself because
1016.Va csum_data
1017contains a fragment count instead of a checksum offset value.
1018.Pp
1019On input, an interface indicates the actions it has performed
1020on a packet by setting one or more of the following flags in
1021.Va csum_flags
1022associated with the packet:
1023.Bl -tag -width ".Dv CSUM_IP_CHECKED" -offset indent
1024.It Dv CSUM_IP_CHECKED
1025The IP header checksum has been computed.
1026.It Dv CSUM_IP_VALID
1027The IP header has a valid checksum.
1028This flag can appear only in combination with
1029.Dv CSUM_IP_CHECKED .
1030.It Dv CSUM_DATA_VALID
1031The checksum of the data portion of the IP packet has been computed
1032and stored in the field
1033.Va csum_data
1034in network byte order.
1035.It Dv CSUM_PSEUDO_HDR
1036Can be set only along with
1037.Dv CSUM_DATA_VALID
1038to indicate that the IP data checksum found in
1039.Va csum_data
1040allows for the pseudo header defined by the TCP and UDP specifications.
1041Otherwise the checksum of the pseudo header must be calculated by
1042the host CPU and added to
1043.Va csum_data
1044to obtain the final checksum to be used for TCP or UDP validation purposes.
1045.El
1046.Pp
1047If a particular network interface just indicates success or
1048failure of TCP or UDP checksum validation without returning
1049the exact value of the checksum to the host CPU, its driver can mark
1050.Dv CSUM_DATA_VALID
1051and
1052.Dv CSUM_PSEUDO_HDR
1053in
1054.Va csum_flags ,
1055and set
1056.Va csum_data
1057to
1058.Li 0xFFFF
1059hexadecimal to indicate a valid checksum.
1060It is a peculiarity of the algorithm used that the Internet checksum
1061calculated over any valid packet will be
1062.Li 0xFFFF
1063as long as the original checksum field is included.
1064.Pp
1065For inbound packets which are IP fragments, all
1066.Va csum_data
1067fields will be summed during reassembly to obtain the final checksum
1068value passed to an upper layer in the
1069.Va csum_data
1070field of the reassembled packet.
1071The
1072.Va csum_flags
1073fields of all fragments will be consolidated using logical AND
1074to obtain the final value for
1075.Va csum_flags .
1076Thus, in order to successfully
1077offload checksum computation for fragmented data,
1078all fragments should have the same value of
1079.Va csum_flags .
1080.Sh STRESS TESTING
1081When running a kernel compiled with the option
1082.Dv MBUF_STRESS_TEST ,
1083the following
1084.Xr sysctl 8 Ns
1085-controlled options may be used to create
1086various failure/extreme cases for testing of network drivers
1087and other parts of the kernel that rely on
1088.Vt mbufs .
1089.Bl -tag -width ident
1090.It Va net.inet.ip.mbuf_frag_size
1091Causes
1092.Fn ip_output
1093to fragment outgoing
1094.Vt mbuf chains
1095into fragments of the specified size.
1096Setting this variable to 1 is an excellent way to
1097test the long
1098.Vt mbuf chain
1099handling ability of network drivers.
1100.It Va kern.ipc.m_defragrandomfailures
1101Causes the function
1102.Fn m_defrag
1103to randomly fail, returning
1104.Dv NULL .
1105Any piece of code which uses
1106.Fn m_defrag
1107should be tested with this feature.
1108.El
1109.Sh RETURN VALUES
1110See above.
1111.Sh SEE ALSO
1112.Xr ifnet 9 ,
1113.Xr mbuf_tags 9
1114.Sh HISTORY
1115.\" Please correct me if I'm wrong
1116.Vt Mbufs
1117appeared in an early version of
1118.Bx .
1119Besides being used for network packets, they were used
1120to store various dynamic structures, such as routing table
1121entries, interface addresses, protocol control blocks, etc.
1122.Sh AUTHORS
1123The original
1124.Nm
1125manual page was written by Yar Tikhiy.