Deleted Added
sdiff udiff text old ( 141846 ) new ( 141851 )
full compact
1.\" Copyright (c) 2000 FreeBSD Inc.
2.\" All rights reserved.
3.\"
4.\" Redistribution and use in source and binary forms, with or without
5.\" modification, are permitted provided that the following conditions
6.\" are met:
7.\" 1. Redistributions of source code must retain the above copyright
8.\" notice, this list of conditions and the following disclaimer.
9.\" 2. Redistributions in binary form must reproduce the above copyright
10.\" notice, this list of conditions and the following disclaimer in the
11.\" documentation and/or other materials provided with the distribution.
12.\"
13.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16.\" ARE DISCLAIMED. IN NO EVENT SHALL [your name] OR CONTRIBUTORS BE LIABLE
17.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23.\" SUCH DAMAGE.
24.\"
25.\" $FreeBSD: head/share/man/man9/mbuf.9 141846 2005-02-13 22:25:33Z ru $
26.\"
27.Dd August 27, 2004
28.Dt MBUF 9
29.Os
30.\"
31.Sh NAME
32.Nm mbuf
33.Nd "memory management in the kernel IPC subsystem"
34.\"
35.Sh SYNOPSIS
36.In sys/param.h
37.In sys/systm.h
38.In sys/mbuf.h
39.\"
40.Ss Mbuf allocation macros
41.Fn MGET "struct mbuf *mbuf" "int how" "short type"
42.Fn MGETHDR "struct mbuf *mbuf" "int how" "short type"
43.Fn MCLGET "struct mbuf *mbuf" "int how"
44.Fo MEXTADD
45.Fa "struct mbuf *mbuf"
46.Fa "caddr_t buf"
47.Fa "u_int size"
48.Fa "void (*free)(void *opt_args)"
49.Fa "void *opt_args"
50.Fa "short flags"
51.Fa "int type"
52.Fc
53.Fn MEXTFREE "struct mbuf *mbuf"
54.Fn MEXT_ADD_REF "struct mbuf *mbuf"
55.Fn MEXT_REM_REF "struct mbuf *mbuf"
56.Fn MFREE "struct mbuf *mbuf" "struct mbuf *successor"
57.\"
58.Ss Mbuf utility macros
59.Fn mtod "struct mbuf *mbuf" "type"
60.Ft int
61.Fn MEXT_IS_REF "struct mbuf *mbuf"
62.Fn M_ALIGN "struct mbuf *mbuf" "u_int len"
63.Fn MH_ALIGN "struct mbuf *mbuf" "u_int len"
64.Ft int
65.Fn M_LEADINGSPACE "struct mbuf *mbuf"
66.Ft int
67.Fn M_TRAILINGSPACE "struct mbuf *mbuf"
68.Fn M_MOVE_PKTHDR "struct mbuf *to" "struct mbuf *from"
69.Fn M_PREPEND "struct mbuf *mbuf" "int len" "int how"
70.Fn MCHTYPE "struct mbuf *mbuf" "u_int type"
71.Ft int
72.Fn M_WRITABLE "struct mbuf *mbuf"
73.\"
74.Ss Mbuf allocation functions
75.Ft struct mbuf *
76.Fn m_get "int how" "int type"
77.Ft struct mbuf *
78.Fn m_getm "struct mbuf *orig" "int len" "int how" "int type"
79.Ft struct mbuf *
80.Fn m_getcl "int how" "short type" "int flags"
81.Ft struct mbuf *
82.Fn m_getclr "int how" "int type"
83.Ft struct mbuf *
84.Fn m_gethdr "int how" "int type"
85.Ft struct mbuf *
86.Fn m_free "struct mbuf *mbuf"
87.Ft void
88.Fn m_freem "struct mbuf *mbuf"
89.\"
90.Ss Mbuf utility functions
91.Ft void
92.Fn m_adj "struct mbuf *mbuf" "int len"
93.Ft int
94.Fn m_append "struct mbuf *mbuf" "int len" "c_caddr_t cp"
95.Ft struct mbuf *
96.Fn m_prepend "struct mbuf *mbuf" "int len" "int how"
97.Ft struct mbuf *
98.Fn m_pullup "struct mbuf *mbuf" "int len"
99.Ft struct mbuf *
100.Fn m_copym "struct mbuf *mbuf" "int offset" "int len" "int how"
101.Ft struct mbuf *
102.Fn m_copypacket "struct mbuf *mbuf" "int how"
103.Ft struct mbuf *
104.Fn m_dup "struct mbuf *mbuf" "int how"
105.Ft void
106.Fn m_copydata "const struct mbuf *mbuf" "int offset" "int len" "caddr_t buf"
107.Ft void
108.Fn m_copyback "struct mbuf *mbuf" "int offset" "int len" "caddr_t buf"
109.Ft struct mbuf *
110.Fo m_devget
111.Fa "char *buf"
112.Fa "int len"
113.Fa "int offset"
114.Fa "struct ifnet *ifp"
115.Fa "void (*copy)(char *from, caddr_t to, u_int len)"
116.Fc
117.Ft void
118.Fn m_cat "struct mbuf *m" "struct mbuf *n"
119.Ft u_int
120.Fn m_fixhdr "struct mbuf *mbuf"
121.Ft void
122.Fn m_dup_pkthdr "struct mbuf *to" "struct mbuf *from"
123.Ft void
124.Fn m_move_pkthdr "struct mbuf *to" "struct mbuf *from"
125.Ft u_int
126.Fn m_length "struct mbuf *mbuf" "struct mbuf **last"
127.Ft struct mbuf *
128.Fn m_split "struct mbuf *mbuf" "int len" "int how"
129.Ft int
130.Fn m_apply "struct mbuf *mbuf" "int off" "int len" "int (*f)(void *arg, void *data, u_int len)" "void *arg"
131.Ft struct mbuf *
132.Fn m_getptr "struct mbuf *mbuf" "int loc" "int *off"
133.Ft struct mbuf *
134.Fn m_defrag "struct mbuf *m0" "int how"
135.\"
136.Sh DESCRIPTION
137An
138.Vt mbuf
139is a basic unit of memory management in the kernel IPC subsystem.
140Network packets and socket buffers are stored in
141.Vt mbufs .
142A network packet may span multiple
143.Vt mbufs
144arranged into a
145.Vt mbuf chain
146(linked list),
147which allows adding or trimming
148network headers with little overhead.
149.Pp
150While a developer should not bother with
151.Vt mbuf
152internals without serious
153reason in order to avoid incompatibilities with future changes, it
154is useful to understand the general structure of an
155.Vt mbuf .
156.Pp
157An
158.Vt mbuf
159consists of a variable-sized header and a small internal
160buffer for data.
161The total size of an
162.Vt mbuf ,
163.Dv MSIZE ,
164is a constant defined in
165.In sys/param.h .
166The
167.Vt mbuf
168header includes:
169.Pp
170.Bl -tag -width "m_nextpkt" -offset indent
171.It Va m_next
172.Pq Vt struct mbuf *
173A pointer to the next
174.Vt mbuf
175in the
176.Vt mbuf chain .
177.It Va m_nextpkt
178.Pq Vt struct mbuf *
179A pointer to the next
180.Vt mbuf chain
181in the queue.
182.It Va m_data
183.Pq Vt caddr_t
184A pointer to data attached to this
185.Vt mbuf .
186.It Va m_len
187.Pq Vt int
188The length of the data.
189.It Va m_type
190.Pq Vt short
191The type of the data.
192.It Va m_flags
193.Pq Vt int
194The
195.Vt mbuf
196flags.
197.El
198.Pp
199The
200.Vt mbuf
201flag bits are defined as follows:
202.Bd -literal
203/* mbuf flags */
204#define M_EXT 0x0001 /* has associated external storage */
205#define M_PKTHDR 0x0002 /* start of record */
206#define M_EOR 0x0004 /* end of record */
207#define M_RDONLY 0x0008 /* associated data marked read-only */
208#define M_PROTO1 0x0010 /* protocol-specific */
209#define M_PROTO2 0x0020 /* protocol-specific */
210#define M_PROTO3 0x0040 /* protocol-specific */
211#define M_PROTO4 0x0080 /* protocol-specific */
212#define M_PROTO5 0x0100 /* protocol-specific */
213#define M_PROTO6 0x4000 /* protocol-specific (avoid M_BCAST conflict) */
214#define M_FREELIST 0x8000 /* mbuf is on the free list */
215
216/* mbuf pkthdr flags (also stored in m_flags) */
217#define M_BCAST 0x0200 /* send/received as link-level broadcast */
218#define M_MCAST 0x0400 /* send/received as link-level multicast */
219#define M_FRAG 0x0800 /* packet is fragment of larger packet */
220#define M_FIRSTFRAG 0x1000 /* packet is first fragment */
221#define M_LASTFRAG 0x2000 /* packet is last fragment */
222.Ed
223.Pp
224The available
225.Vt mbuf
226types are defined as follows:
227.Bd -literal
228/* mbuf types */
229#define MT_DATA 1 /* dynamic (data) allocation */
230#define MT_HEADER 2 /* packet header */
231#define MT_SONAME 8 /* socket name */
232#define MT_FTABLE 11 /* fragment reassembly header */
233#define MT_CONTROL 14 /* extra-data protocol message */
234#define MT_OOBDATA 15 /* expedited data */
235.Ed
236.Pp
237If the
238.Dv M_PKTHDR
239flag is set, a
240.Vt struct pkthdr Va m_pkthdr
241is added to the
242.Vt mbuf
243header.
244It contains a pointer to the interface
245the packet has been received from
246.Pq Vt struct ifnet Va *rcvif ,
247and the total packet length
248.Pq Vt int Va len .
249Optionally, it may also contain an attached list of packet tags
250.Pq Vt "struct m_tag" .
251See
252.Xr mbuf_tags 9
253for details.
254Fields used in offloading checksum calculation to the hardware are kept in
255.Va m_pkthdr
256as well.
257See
258.Sx HARDWARE-ASSISTED CHECKSUM CALCULATION
259for details.
260.Pp
261If small enough, data is stored in the internal data buffer of an
262.Vt mbuf .
263If the data is sufficiently large, another
264.Vt mbuf
265may be added to the
266.Vt mbuf chain ,
267or external storage may be associated with the
268.Vt mbuf .
269.Dv MHLEN
270bytes of data can fit into an
271.Vt mbuf
272with the
273.Dv M_PKTHDR
274flag set,
275.Dv MLEN
276bytes can otherwise.
277.Pp
278If external storage is being associated with an
279.Vt mbuf ,
280the
281.Va m_ext
282header is added at the cost of losing the internal data buffer.
283It includes a pointer to external storage, the size of the storage,
284a pointer to a function used for freeing the storage,
285a pointer to an optional argument that can be passed to the function,
286and a pointer to a reference counter.
287An
288.Vt mbuf
289using external storage has the
290.Dv M_EXT
291flag set.
292.Pp
293The system supplies a macro for allocating the desired external storage
294buffer,
295.Dv MEXTADD .
296.Pp
297The allocation and management of the reference counter is handled by the
298subsystem.
299The developer can check whether the reference count for the
300external storage of a given
301.Vt mbuf
302is greater than 1 with the
303.Dv MEXT_IS_REF
304macro.
305Similarly, the developer can directly add and remove references,
306if absolutely necessary, with the use of the
307.Dv MEXT_ADD_REF
308and
309.Dv MEXT_REM_REF
310macros.
311.Pp
312The system also supplies a default type of external storage buffer called an
313.Vt mbuf cluster .
314.Vt Mbuf clusters
315can be allocated and configured with the use of the
316.Dv MCLGET
317macro.
318Each
319.Vt mbuf cluster
320is
321.Dv MCLBYTES
322in size, where MCLBYTES is a machine-dependent constant.
323The system defines an advisory macro
324.Dv MINCLSIZE ,
325which is the smallest amount of data to put into an
326.Vt mbuf cluster .
327It's equal to the sum of
328.Dv MLEN
329and
330.Dv MHLEN .
331It is typically preferable to store data into the data region of an
332.Vt mbuf ,
333if size permits, as opposed to allocating a separate
334.Vt mbuf cluster
335to hold the same data.
336.\"
337.Ss Macros and Functions
338There are numerous predefined macros and functions that provide the
339developer with common utilities.
340.\"
341.Bl -ohang -offset indent
342.It Fn mtod mbuf type
343Convert an
344.Fa mbuf
345pointer to a data pointer.
346The macro expands to the data pointer cast to the pointer of the specified
347.Fa type .
348.Sy Note :
349It is advisable to ensure that there is enough contiguous data in
350.Fa mbuf .
351See
352.Fn m_pullup
353for details.
354.It Fn MGET mbuf how type
355Allocate an
356.Vt mbuf
357and initialize it to contain internal data.
358.Fa mbuf
359will point to the allocated
360.Vt mbuf
361on success, or be set to
362.Dv NULL
363on failure.
364The
365.Fa how
366argument is to be set to
367.Dv M_TRYWAIT
368or
369.Dv M_DONTWAIT .
370It specifies whether the caller is willing to block if necessary.
371If
372.Fa how
373is set to
374.Dv M_TRYWAIT ,
375a failed allocation will result in the caller being put
376to sleep for a designated
377kern.ipc.mbuf_wait
378.Xr ( sysctl 8
379tunable)
380number of ticks.
381A number of other functions and macros related to
382.Vt mbufs
383have the same argument because they may
384at some point need to allocate new
385.Vt mbufs .
386.Pp
387Programmers should be careful not to confuse the
388.Vt mbuf
389allocation flag
390.Dv M_DONTWAIT
391with the
392.Xr malloc 9
393allocation flag,
394.Dv M_NOWAIT .
395They are not the same.
396.It Fn MGETHDR mbuf how type
397Allocate an
398.Vt mbuf
399and initialize it to contain a packet header
400and internal data.
401See
402.Fn MGET
403for details.
404.It Fn MCLGET mbuf how
405Allocate and attach an
406.Vt mbuf cluster
407to
408.Fa mbuf .
409If the macro fails, the
410.Dv M_EXT
411flag will not be set in
412.Fa mbuf .
413.It Fn M_ALIGN mbuf len
414Set the pointer
415.Fa mbuf->m_data
416to place an object of the size
417.Fa len
418at the end of the internal data area of
419.Fa mbuf ,
420long word aligned.
421Applicable only if
422.Fa mbuf
423is newly allocated with
424.Fn MGET
425or
426.Fn m_get .
427.It Fn MH_ALIGN mbuf len
428Serves the same purpose as
429.Fn M_ALIGN
430does, but only for
431.Fa mbuf
432newly allocated with
433.Fn MGETHDR
434or
435.Fn m_gethdr ,
436or initialized by
437.Fn m_dup_pkthdr
438or
439.Fn m_move_pkthdr .
440.It Fn M_LEADINGSPACE mbuf
441Returns the number of bytes available before the beginning
442of data in
443.Fa mbuf .
444.It Fn M_TRAILINGSPACE mbuf
445Returns the number of bytes available after the end of data in
446.Fa mbuf .
447.It Fn M_PREPEND mbuf len how
448This macro operates on an
449.Vt mbuf chain .
450It is an optimized wrapper for
451.Fn m_prepend
452that can make use of possible empty space before data
453(e.g.\& left after trimming of a link-layer header).
454The new
455.Vt mbuf chain
456pointer or
457.Dv NULL
458is in
459.Fa mbuf
460after the call.
461.It Fn M_MOVE_PKTHDR to from
462Using this macro is equivalent to calling
463.Fn m_move_pkthdr to from .
464.It Fn M_WRITABLE mbuf
465This macro will evaluate true if
466.Fa mbuf
467is not marked
468.Dv M_RDONLY
469and if either
470.Fa mbuf
471does not contain external storage or,
472if it does,
473then if the reference count of the storage is not greater than 1.
474The
475.Dv M_RDONLY
476flag can be set in
477.Fa mbuf->m_flags .
478This can be achieved during setup of the external storage,
479by passing the
480.Dv M_RDONLY
481bit as a
482.Fa flags
483argument to the
484.Fn MEXTADD
485macro, or can be directly set in individual
486.Vt mbufs .
487.It Fn MCHTYPE mbuf type
488Change the type of
489.Fa mbuf
490to
491.Fa type .
492This is a relatively expensive operation and should be avoided.
493.El
494.Pp
495The functions are:
496.Bl -ohang -offset indent
497.It Fn m_get how type
498A function version of
499.Fn MGET
500for non-critical paths.
501.It Fn m_getm orig len how type
502Allocate
503.Fa len
504bytes worth of
505.Vt mbufs
506and
507.Vt mbuf clusters
508if necessary and append the resulting allocated
509.Vt mbuf chain
510to the
511.Vt mbuf chain
512.Fa orig ,
513if it is
514.No non- Ns Dv NULL .
515If the allocation fails at any point,
516free whatever was allocated and return
517.Dv NULL .
518If
519.Fa orig
520is
521.No non- Ns Dv NULL ,
522it will not be freed.
523It is possible to use
524.Fn m_getm
525to either append
526.Fa len
527bytes to an existing
528.Vt mbuf
529or
530.Vt mbuf chain
531(for example, one which may be sitting in a pre-allocated ring)
532or to simply perform an all-or-nothing
533.Vt mbuf
534and
535.Vt mbuf cluster
536allocation.
537.It Fn m_gethdr how type
538A function version of
539.Fn MGETHDR
540for non-critical paths.
541.It Fn m_getcl how type flags
542Fetch an
543.Vt mbuf
544with a
545.Vt mbuf cluster
546attached to it.
547If one of the allocations fails, the entire allocation fails.
548This routine is the preferred way of fetching both the
549.Vt mbuf
550and
551.Vt mbuf cluster
552together, as it avoids having to unlock/relock between allocations.
553Returns
554.Dv NULL
555on failure.
556.It Fn m_getclr how type
557Allocate an
558.Vt mbuf
559and zero out the data region.
560.It Fn m_free mbuf
561Frees
562.Vt mbuf .
563Returns
564.Va m_next
565of the freed
566.Vt mbuf .
567.El
568.Pp
569The functions below operate on
570.Vt mbuf chains .
571.Bl -ohang -offset indent
572.It Fn m_freem mbuf
573Free an entire
574.Vt mbuf chain ,
575including any external storage.
576.\"
577.It Fn m_adj mbuf len
578Trim
579.Fa len
580bytes from the head of an
581.Vt mbuf chain
582if
583.Fa len
584is positive, from the tail otherwise.
585.\"
586.It Fn m_append mbuf len cp
587Append
588.Vt len
589bytes of data
590.Vt cp
591to the
592.Vt mbuf chain .
593Extend the mbuf chain if the new data does not fit in
594existing space.
595.\"
596.It Fn m_prepend mbuf len how
597Allocate a new
598.Vt mbuf
599and prepend it to the
600.Vt mbuf chain ,
601handle
602.Dv M_PKTHDR
603properly.
604.Sy Note :
605It does not allocate any
606.Vt mbuf clusters ,
607so
608.Fa len
609must be less than
610.Dv MLEN
611or
612.Dv MHLEN ,
613depending on the
614.Dv M_PKTHDR
615flag setting.
616.\"
617.It Fn m_pullup mbuf len
618Arrange that the first
619.Fa len
620bytes of an
621.Vt mbuf chain
622are contiguous and lay in the data area of
623.Fa mbuf ,
624so they are accessible with
625.Fn mtod mbuf type .
626Return the new
627.Vt mbuf chain
628on success,
629.Dv NULL
630on failure
631(the
632.Vt mbuf chain
633is freed in this case).
634.Sy Note :
635It does not allocate any
636.Vt mbuf clusters ,
637so
638.Fa len
639must be less than
640.Dv MHLEN .
641.\"
642.It Fn m_copym mbuf offset len how
643Make a copy of an
644.Vt mbuf chain
645starting
646.Fa offset
647bytes from the beginning, continuing for
648.Fa len
649bytes.
650If
651.Fa len
652is
653.Dv M_COPYALL ,
654copy to the end of the
655.Vt mbuf chain .
656.Sy Note :
657The copy is read-only, because the
658.Vt mbuf clusters
659are not copied, only their reference counts are incremented.
660.\"
661.It Fn m_copypacket mbuf how
662Copy an entire packet including header, which must be present.
663This is an optimized version of the common case
664.Fn m_copym mbuf 0 M_COPYALL how .
665.Sy Note :
666the copy is read-only, because the
667.Vt mbuf clusters
668are not copied, only their reference counts are incremented.
669.\"
670.It Fn m_dup mbuf how
671Copy a packet header
672.Vt mbuf chain
673into a completely new
674.Vt mbuf chain ,
675including copying any
676.Vt mbuf clusters .
677Use this instead of
678.Fn m_copypacket
679when you need a writable copy of an
680.Vt mbuf chain .
681.\"
682.It Fn m_copydata mbuf offset len buf
683Copy data from an
684.Vt mbuf chain
685starting
686.Fa off
687bytes from the beginning, continuing for
688.Fa len
689bytes, into the indicated buffer
690.Fa buf .
691.\"
692.It Fn m_copyback mbuf offset len buf
693Copy
694.Fa len
695bytes from the buffer
696.Fa buf
697back into the indicated
698.Vt mbuf chain ,
699starting at
700.Fa offset
701bytes from the beginning of the
702.Vt mbuf chain ,
703extending the
704.Vt mbuf chain
705if necessary.
706.Sy Note :
707It does not allocate any
708.Vt mbuf clusters ,
709just adds
710.Vt mbufs
711to the
712.Vt mbuf chain .
713It's safe to set
714.Fa offset
715beyond the current
716.Vt mbuf chain
717end: zeroed
718.Vt mbufs
719will be allocated to fill the space.
720.\"
721.It Fn m_length mbuf last
722Return the length of the
723.Vt mbuf chain ,
724and optionally a pointer to the last
725.Vt mbuf .
726.\"
727.It Fn m_dup_pkthdr to from how
728Upon the function's completion, the
729.Vt mbuf
730.Fa to
731will contain an identical copy of
732.Fa from->m_pkthdr
733and the per-packet attributes found in the
734.Vt mbuf chain
735.Fa from .
736The
737.Vt mbuf
738.Fa from
739must have the flag
740.Dv M_PKTHDR
741initially set, and
742.Fa to
743must be empty on entry.
744.\"
745.It Fn m_move_pkthdr to from
746Move
747.Va m_pkthdr
748and the per-packet attributes from the
749.Vt mbuf chain
750.Fa from
751to the
752.Vt mbuf
753.Fa to .
754The
755.Vt mbuf
756.Fa from
757must have the flag
758.Dv M_PKTHDR
759initially set, and
760.Fa to
761must be empty on entry.
762Upon the function's completion,
763.Fa from
764will have the flag
765.Dv M_PKTHDR
766and the per-packet attributes cleared.
767.\"
768.It Fn m_fixhdr mbuf
769Set the packet-header length to the length of the
770.Vt mbuf chain .
771.\"
772.It Fn m_devget buf len offset ifp copy
773Copy data from a device local memory pointed to by
774.Fa buf
775to an
776.Vt mbuf chain .
777The copy is done using a specified copy routine
778.Fa copy ,
779or
780.Fn bcopy
781if
782.Fa copy
783is
784.Dv NULL .
785.\"
786.It Fn m_cat m n
787Concatenate
788.Fa n
789to
790.Fa m .
791Both
792.Vt mbuf chains
793must be of the same type.
794.Fa N
795is still valid after the function returned.
796.Sy Note :
797It does not handle
798.Dv M_PKTHDR
799and friends.
800.\"
801.It Fn m_split mbuf len how
802Partition an
803.Vt mbuf chain
804in two pieces, returning the tail:
805all but the first
806.Fa len
807bytes.
808In case of failure, it returns
809.Dv NULL
810and attempts to restore the
811.Vt mbuf chain
812to its original state.
813.\"
814.It Fn m_apply mbuf off len f arg
815Apply a function to an
816.Vt mbuf chain ,
817at offset
818.Fa off ,
819for length
820.Fa len
821bytes.
822Typically used to avoid calls to
823.Fn m_pullup
824which would otherwise be unnecessary or undesirable.
825.Fa arg
826is a convenience argument which is passed to the callback function
827.Fa f .
828.Pp
829Each time
830.Fn f
831is called, it will be passed
832.Fa arg ,
833a pointer to the
834.Fa data
835in the current mbuf, and the length
836.Fa len
837of the data in this mbuf to which the function should be applied.
838.Pp
839The function should return zero to indicate success;
840otherwise, if an error is indicated, then
841.Fn m_apply
842will return the error and stop iterating through the
843.Vt mbuf chain .
844.\"
845.It Fn m_getptr mbuf loc off
846Return a pointer to the mbuf containing the data located at
847.Fa loc
848bytes from the beginning of the
849.Vt mbuf chain .
850The corresponding offset into the mbuf will be stored in
851.Fa *off .
852.It Fn m_defrag m0 how
853Defragment an mbuf chain, returning the shortest possible
854chain of mbufs and clusters.
855If allocation fails and this can not be completed,
856.Dv NULL
857will be returned and the original chain will be unchanged.
858Upon success, the original chain will be freed and the new
859chain will be returned.
860.Fa how
861should be either
862.Dv M_TRYWAIT
863or
864.Dv M_DONTWAIT ,
865depending on the caller's preference.
866.Pp
867This function is especially useful in network drivers, where
868certain long mbuf chains must be shortened before being added
869to TX descriptor lists.
870.El
871.Sh HARDWARE-ASSISTED CHECKSUM CALCULATION
872This section currently applies to TCP/IP only.
873In order to save the host CPU resources, computing checksums is
874offloaded to the network interface hardware if possible.
875The
876.Va m_pkthdr
877member of the leading
878.Vt mbuf
879of a packet contains two fields used for that purpose,
880.Vt int Va csum_flags
881and
882.Vt int Va csum_data .
883The meaning of those fields depends on the direction a packet flows in,
884and on whether the packet is fragmented.
885Henceforth,
886.Va csum_flags
887or
888.Va csum_data
889of a packet
890will denote the corresponding field of the
891.Va m_pkthdr
892member of the leading
893.Vt mbuf
894in the
895.Vt mbuf chain
896containing the packet.
897.Pp
898On output, checksum offloading is attempted after the outgoing
899interface has been determined for a packet.
900The interface-specific field
901.Va ifnet.if_data.ifi_hwassist
902(see
903.Xr ifnet 9 )
904is consulted for the capabilities of the interface to assist in
905computing checksums.
906The
907.Va csum_flags
908field of the packet header is set to indicate which actions the interface
909is supposed to perform on it.
910The actions unsupported by the network interface are done in the
911software prior to passing the packet down to the interface driver;
912such actions will never be requested through
913.Va csum_flags .
914.Pp
915The flags demanding a particular action from an interface are as follows:
916.Bl -tag -width ".Dv CSUM_TCP" -offset indent
917.It Dv CSUM_IP
918The IP header checksum is to be computed and stored in the
919corresponding field of the packet.
920The hardware is expected to know the format of an IP header
921to determine the offset of the IP checksum field.
922.It Dv CSUM_TCP
923The TCP checksum is to be computed.
924(See below.)
925.It Dv CSUM_UDP
926The UDP checksum is to be computed.
927(See below.)
928.El
929.Pp
930Should a TCP or UDP checksum be offloaded to the hardware,
931the field
932.Va csum_data
933will contain the byte offset of the checksum field relative to the
934end of the IP header.
935In this case, the checksum field will be initially
936set by the TCP/IP module to the checksum of the pseudo header
937defined by the TCP and UDP specifications.
938.Pp
939For outbound packets which have been fragmented
940by the host CPU, the following will also be true,
941regardless of the checksum flag settings:
942.Bl -bullet -offset indent
943.It
944all fragments will have the flag
945.Dv M_FRAG
946set in their
947.Va m_flags
948field;
949.It
950the first and the last fragments in the chain will have
951.Dv M_FIRSTFRAG
952or
953.Dv M_LASTFRAG
954set in their
955.Va m_flags ,
956correspondingly;
957.It
958the first fragment in the chain will have the total number
959of fragments contained in its
960.Va csum_data
961field.
962.El
963.Pp
964The last rule for fragmented packets takes precedence over the one
965for a TCP or UDP checksum.
966Nevertheless, offloading a TCP or UDP checksum is possible for a
967fragmented packet if the flag
968.Dv CSUM_IP_FRAGS
969is set in the field
970.Va ifnet.if_data.ifi_hwassist
971associated with the network interface.
972However, in this case the interface is expected to figure out
973the location of the checksum field within the sequence of fragments
974by itself because
975.Va csum_data
976contains a fragment count instead of a checksum offset value.
977.Pp
978On input, an interface indicates the actions it has performed
979on a packet by setting one or more of the following flags in
980.Va csum_flags
981associated with the packet:
982.Bl -tag -width ".Dv CSUM_IP_CHECKED" -offset indent
983.It Dv CSUM_IP_CHECKED
984The IP header checksum has been computed.
985.It Dv CSUM_IP_VALID
986The IP header has a valid checksum.
987This flag can appear only in combination with
988.Dv CSUM_IP_CHECKED .
989.It Dv CSUM_DATA_VALID
990The checksum of the data portion of the IP packet has been computed
991and stored in the field
992.Va csum_data
993in network byte order.
994.It Dv CSUM_PSEUDO_HDR
995Can be set only along with
996.Dv CSUM_DATA_VALID
997to indicate that the IP data checksum found in
998.Va csum_data
999allows for the pseudo header defined by the TCP and UDP specifications.
1000Otherwise the checksum of the pseudo header must be calculated by
1001the host CPU and added to
1002.Va csum_data
1003to obtain the final checksum to be used for TCP or UDP validation purposes.
1004.El
1005.Pp
1006If a particular network interface just indicates success or
1007failure of TCP or UDP checksum validation without returning
1008the exact value of the checksum to the host CPU, its driver can mark
1009.Dv CSUM_DATA_VALID
1010and
1011.Dv CSUM_PSEUDO_HDR
1012in
1013.Va csum_flags ,
1014and set
1015.Va csum_data
1016to
1017.Li 0xFFFF
1018hexadecimal to indicate a valid checksum.
1019It is a peculiarity of the algorithm used that the Internet checksum
1020calculated over any valid packet will be
1021.Li 0xFFFF
1022as long as the original checksum field is included.
1023.Pp
1024For inbound packets which are IP fragments, all
1025.Va csum_data
1026fields will be summed during reassembly to obtain the final checksum
1027value passed to an upper layer in the
1028.Va csum_data
1029field of the reassembled packet.
1030The
1031.Va csum_flags
1032fields of all fragments will be consolidated using logical AND
1033to obtain the final value for
1034.Va csum_flags .
1035Thus, in order to successfully
1036offload checksum computation for fragmented data,
1037all fragments should have the same value of
1038.Va csum_flags .
1039.Sh STRESS TESTING
1040When running a kernel compiled with the option
1041.Dv MBUF_STRESS_TEST ,
1042the following
1043.Xr sysctl 8 Ns
1044-controlled options may be used to create
1045various failure/extreme cases for testing of network drivers
1046and other parts of the kernel that rely on
1047.Vt mbufs .
1048.Bl -tag -width ident
1049.It Va net.inet.ip.mbuf_frag_size
1050Causes
1051.Fn ip_output
1052to fragment outgoing
1053.Vt mbuf chains
1054into fragments of the specified size.
1055Setting this variable to 1 is an excellent way to
1056test the long
1057.Vt mbuf chain
1058handling ability of network drivers.
1059.It Va kern.ipc.m_defragrandomfailures
1060Causes the function
1061.Fn m_defrag
1062to randomly fail, returning
1063.Dv NULL .
1064Any piece of code which uses
1065.Fn m_defrag
1066should be tested with this feature.
1067.El
1068.Sh RETURN VALUES
1069See above.
1070.Sh SEE ALSO
1071.Xr ifnet 9 ,
1072.Xr mbuf_tags 9
1073.Sh HISTORY
1074.\" Please correct me if I'm wrong
1075.Vt Mbufs
1076appeared in an early version of
1077.Bx .
1078Besides being used for network packets, they were used
1079to store various dynamic structures, such as routing table
1080entries, interface addresses, protocol control blocks, etc.
1081.Sh AUTHORS
1082The original
1083.Nm
1084man page was written by Yar Tikhiy.