frag6.c revision 255792
12116Sjkh/*-
22116Sjkh * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
32116Sjkh * All rights reserved.
42116Sjkh *
52116Sjkh * Redistribution and use in source and binary forms, with or without
62116Sjkh * modification, are permitted provided that the following conditions
72116Sjkh * are met:
88870Srgrimes * 1. Redistributions of source code must retain the above copyright
92116Sjkh *    notice, this list of conditions and the following disclaimer.
102116Sjkh * 2. Redistributions in binary form must reproduce the above copyright
112116Sjkh *    notice, this list of conditions and the following disclaimer in the
122116Sjkh *    documentation and/or other materials provided with the distribution.
132116Sjkh * 3. Neither the name of the project nor the names of its contributors
148870Srgrimes *    may be used to endorse or promote products derived from this software
152116Sjkh *    without specific prior written permission.
162116Sjkh *
172116Sjkh * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
182116Sjkh * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
198870Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
202116Sjkh * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
212116Sjkh * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
222116Sjkh * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
232116Sjkh * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
242116Sjkh * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
252116Sjkh * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
262116Sjkh * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
272116Sjkh * SUCH DAMAGE.
282116Sjkh *
292116Sjkh *	$KAME: frag6.c,v 1.33 2002/01/07 11:34:48 kjc Exp $
302116Sjkh */
312116Sjkh
322116Sjkh#include <sys/cdefs.h>
332116Sjkh__FBSDID("$FreeBSD: head/sys/netinet6/frag6.c 255792 2013-09-22 14:53:07Z bz $");
342116Sjkh
358870Srgrimes#include <sys/param.h>
362116Sjkh#include <sys/systm.h>
378870Srgrimes#include <sys/malloc.h>
382116Sjkh#include <sys/mbuf.h>
392116Sjkh#include <sys/domain.h>
402116Sjkh#include <sys/protosw.h>
412116Sjkh#include <sys/socket.h>
422116Sjkh#include <sys/errno.h>
432116Sjkh#include <sys/time.h>
442116Sjkh#include <sys/kernel.h>
452116Sjkh#include <sys/syslog.h>
462116Sjkh
472116Sjkh#include <net/if.h>
488870Srgrimes#include <net/route.h>
492116Sjkh#include <net/vnet.h>
502116Sjkh
512116Sjkh#include <netinet/in.h>
522116Sjkh#include <netinet/in_var.h>
532116Sjkh#include <netinet/ip6.h>
542116Sjkh#include <netinet6/ip6_var.h>
552116Sjkh#include <netinet/icmp6.h>
562116Sjkh#include <netinet/in_systm.h>	/* for ECN definitions */
572116Sjkh#include <netinet/ip.h>		/* for ECN definitions */
588870Srgrimes
592116Sjkh#include <security/mac/mac_framework.h>
602116Sjkh
612116Sjkh/*
622116Sjkh * Define it to get a correct behavior on per-interface statistics.
632116Sjkh * You will need to perform an extra routing table lookup, per fragment,
642116Sjkh * to do it.  This may, or may not be, a performance hit.
652116Sjkh */
662116Sjkh#define IN6_IFSTAT_STRICT
672116Sjkh
682116Sjkhstatic void frag6_enq(struct ip6asfrag *, struct ip6asfrag *);
692116Sjkhstatic void frag6_deq(struct ip6asfrag *);
70static void frag6_insque(struct ip6q *, struct ip6q *);
71static void frag6_remque(struct ip6q *);
72static void frag6_freef(struct ip6q *);
73
74static struct mtx ip6qlock;
75/*
76 * These fields all protected by ip6qlock.
77 */
78static VNET_DEFINE(u_int, frag6_nfragpackets);
79static VNET_DEFINE(u_int, frag6_nfrags);
80static VNET_DEFINE(struct ip6q, ip6q);	/* ip6 reassemble queue */
81
82#define	V_frag6_nfragpackets		VNET(frag6_nfragpackets)
83#define	V_frag6_nfrags			VNET(frag6_nfrags)
84#define	V_ip6q				VNET(ip6q)
85
86#define	IP6Q_LOCK_INIT()	mtx_init(&ip6qlock, "ip6qlock", NULL, MTX_DEF);
87#define	IP6Q_LOCK()		mtx_lock(&ip6qlock)
88#define	IP6Q_TRYLOCK()		mtx_trylock(&ip6qlock)
89#define	IP6Q_LOCK_ASSERT()	mtx_assert(&ip6qlock, MA_OWNED)
90#define	IP6Q_UNLOCK()		mtx_unlock(&ip6qlock)
91
92static MALLOC_DEFINE(M_FTABLE, "fragment", "fragment reassembly header");
93
94/*
95 * Initialise reassembly queue and fragment identifier.
96 */
97static void
98frag6_change(void *tag)
99{
100
101	V_ip6_maxfragpackets = nmbclusters / 4;
102	V_ip6_maxfrags = nmbclusters / 4;
103}
104
105void
106frag6_init(void)
107{
108
109	V_ip6_maxfragpackets = nmbclusters / 4;
110	V_ip6_maxfrags = nmbclusters / 4;
111	V_ip6q.ip6q_next = V_ip6q.ip6q_prev = &V_ip6q;
112
113	if (!IS_DEFAULT_VNET(curvnet))
114		return;
115
116	EVENTHANDLER_REGISTER(nmbclusters_change,
117	    frag6_change, NULL, EVENTHANDLER_PRI_ANY);
118
119	IP6Q_LOCK_INIT();
120}
121
122/*
123 * In RFC2460, fragment and reassembly rule do not agree with each other,
124 * in terms of next header field handling in fragment header.
125 * While the sender will use the same value for all of the fragmented packets,
126 * receiver is suggested not to check the consistency.
127 *
128 * fragment rule (p20):
129 *	(2) A Fragment header containing:
130 *	The Next Header value that identifies the first header of
131 *	the Fragmentable Part of the original packet.
132 *		-> next header field is same for all fragments
133 *
134 * reassembly rule (p21):
135 *	The Next Header field of the last header of the Unfragmentable
136 *	Part is obtained from the Next Header field of the first
137 *	fragment's Fragment header.
138 *		-> should grab it from the first fragment only
139 *
140 * The following note also contradicts with fragment rule - noone is going to
141 * send different fragment with different next header field.
142 *
143 * additional note (p22):
144 *	The Next Header values in the Fragment headers of different
145 *	fragments of the same original packet may differ.  Only the value
146 *	from the Offset zero fragment packet is used for reassembly.
147 *		-> should grab it from the first fragment only
148 *
149 * There is no explicit reason given in the RFC.  Historical reason maybe?
150 */
151/*
152 * Fragment input
153 */
154int
155frag6_input(struct mbuf **mp, int *offp, int proto)
156{
157	struct mbuf *m = *mp, *t;
158	struct ip6_hdr *ip6;
159	struct ip6_frag *ip6f;
160	struct ip6q *q6;
161	struct ip6asfrag *af6, *ip6af, *af6dwn;
162#ifdef IN6_IFSTAT_STRICT
163	struct in6_ifaddr *ia;
164#endif
165	int offset = *offp, nxt, i, next;
166	int first_frag = 0;
167	int fragoff, frgpartlen;	/* must be larger than u_int16_t */
168	struct ifnet *dstifp;
169	u_int8_t ecn, ecn0;
170#if 0
171	char ip6buf[INET6_ADDRSTRLEN];
172#endif
173
174	ip6 = mtod(m, struct ip6_hdr *);
175#ifndef PULLDOWN_TEST
176	IP6_EXTHDR_CHECK(m, offset, sizeof(struct ip6_frag), IPPROTO_DONE);
177	ip6f = (struct ip6_frag *)((caddr_t)ip6 + offset);
178#else
179	IP6_EXTHDR_GET(ip6f, struct ip6_frag *, m, offset, sizeof(*ip6f));
180	if (ip6f == NULL)
181		return (IPPROTO_DONE);
182#endif
183
184	dstifp = NULL;
185#ifdef IN6_IFSTAT_STRICT
186	/* find the destination interface of the packet. */
187	if ((ia = ip6_getdstifaddr(m)) != NULL) {
188		dstifp = ia->ia_ifp;
189		ifa_free(&ia->ia_ifa);
190	}
191#else
192	/* we are violating the spec, this is not the destination interface */
193	if ((m->m_flags & M_PKTHDR) != 0)
194		dstifp = m->m_pkthdr.rcvif;
195#endif
196
197	/* jumbo payload can't contain a fragment header */
198	if (ip6->ip6_plen == 0) {
199		icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, offset);
200		in6_ifstat_inc(dstifp, ifs6_reass_fail);
201		return IPPROTO_DONE;
202	}
203
204	/*
205	 * check whether fragment packet's fragment length is
206	 * multiple of 8 octets.
207	 * sizeof(struct ip6_frag) == 8
208	 * sizeof(struct ip6_hdr) = 40
209	 */
210	if ((ip6f->ip6f_offlg & IP6F_MORE_FRAG) &&
211	    (((ntohs(ip6->ip6_plen) - offset) & 0x7) != 0)) {
212		icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER,
213		    offsetof(struct ip6_hdr, ip6_plen));
214		in6_ifstat_inc(dstifp, ifs6_reass_fail);
215		return IPPROTO_DONE;
216	}
217
218	IP6STAT_INC(ip6s_fragments);
219	in6_ifstat_inc(dstifp, ifs6_reass_reqd);
220
221	/* offset now points to data portion */
222	offset += sizeof(struct ip6_frag);
223
224	/*
225	 * RFC 6946: Handle "atomic" fragments (offset and m bit set to 0)
226	 * upfront, unrelated to any reassembly.  Just skip the fragment header.
227	 */
228	if ((ip6f->ip6f_offlg & ~IP6F_RESERVED_MASK) == 0) {
229		/* XXX-BZ we want dedicated counters for this. */
230		IP6STAT_INC(ip6s_reassembled);
231		in6_ifstat_inc(dstifp, ifs6_reass_ok);
232		*offp = offset;
233		return (ip6f->ip6f_nxt);
234	}
235
236	IP6Q_LOCK();
237
238	/*
239	 * Enforce upper bound on number of fragments.
240	 * If maxfrag is 0, never accept fragments.
241	 * If maxfrag is -1, accept all fragments without limitation.
242	 */
243	if (V_ip6_maxfrags < 0)
244		;
245	else if (V_frag6_nfrags >= (u_int)V_ip6_maxfrags)
246		goto dropfrag;
247
248	for (q6 = V_ip6q.ip6q_next; q6 != &V_ip6q; q6 = q6->ip6q_next)
249		if (ip6f->ip6f_ident == q6->ip6q_ident &&
250		    IN6_ARE_ADDR_EQUAL(&ip6->ip6_src, &q6->ip6q_src) &&
251		    IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &q6->ip6q_dst)
252#ifdef MAC
253		    && mac_ip6q_match(m, q6)
254#endif
255		    )
256			break;
257
258	if (q6 == &V_ip6q) {
259		/*
260		 * the first fragment to arrive, create a reassembly queue.
261		 */
262		first_frag = 1;
263
264		/*
265		 * Enforce upper bound on number of fragmented packets
266		 * for which we attempt reassembly;
267		 * If maxfragpackets is 0, never accept fragments.
268		 * If maxfragpackets is -1, accept all fragments without
269		 * limitation.
270		 */
271		if (V_ip6_maxfragpackets < 0)
272			;
273		else if (V_frag6_nfragpackets >= (u_int)V_ip6_maxfragpackets)
274			goto dropfrag;
275		V_frag6_nfragpackets++;
276		q6 = (struct ip6q *)malloc(sizeof(struct ip6q), M_FTABLE,
277		    M_NOWAIT);
278		if (q6 == NULL)
279			goto dropfrag;
280		bzero(q6, sizeof(*q6));
281#ifdef MAC
282		if (mac_ip6q_init(q6, M_NOWAIT) != 0) {
283			free(q6, M_FTABLE);
284			goto dropfrag;
285		}
286		mac_ip6q_create(m, q6);
287#endif
288		frag6_insque(q6, &V_ip6q);
289
290		/* ip6q_nxt will be filled afterwards, from 1st fragment */
291		q6->ip6q_down	= q6->ip6q_up = (struct ip6asfrag *)q6;
292#ifdef notyet
293		q6->ip6q_nxtp	= (u_char *)nxtp;
294#endif
295		q6->ip6q_ident	= ip6f->ip6f_ident;
296		q6->ip6q_ttl	= IPV6_FRAGTTL;
297		q6->ip6q_src	= ip6->ip6_src;
298		q6->ip6q_dst	= ip6->ip6_dst;
299		q6->ip6q_ecn	=
300		    (ntohl(ip6->ip6_flow) >> 20) & IPTOS_ECN_MASK;
301		q6->ip6q_unfrglen = -1;	/* The 1st fragment has not arrived. */
302
303		q6->ip6q_nfrag = 0;
304	}
305
306	/*
307	 * If it's the 1st fragment, record the length of the
308	 * unfragmentable part and the next header of the fragment header.
309	 */
310	fragoff = ntohs(ip6f->ip6f_offlg & IP6F_OFF_MASK);
311	if (fragoff == 0) {
312		q6->ip6q_unfrglen = offset - sizeof(struct ip6_hdr) -
313		    sizeof(struct ip6_frag);
314		q6->ip6q_nxt = ip6f->ip6f_nxt;
315	}
316
317	/*
318	 * Check that the reassembled packet would not exceed 65535 bytes
319	 * in size.
320	 * If it would exceed, discard the fragment and return an ICMP error.
321	 */
322	frgpartlen = sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen) - offset;
323	if (q6->ip6q_unfrglen >= 0) {
324		/* The 1st fragment has already arrived. */
325		if (q6->ip6q_unfrglen + fragoff + frgpartlen > IPV6_MAXPACKET) {
326			icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER,
327			    offset - sizeof(struct ip6_frag) +
328			    offsetof(struct ip6_frag, ip6f_offlg));
329			IP6Q_UNLOCK();
330			return (IPPROTO_DONE);
331		}
332	} else if (fragoff + frgpartlen > IPV6_MAXPACKET) {
333		icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER,
334		    offset - sizeof(struct ip6_frag) +
335		    offsetof(struct ip6_frag, ip6f_offlg));
336		IP6Q_UNLOCK();
337		return (IPPROTO_DONE);
338	}
339	/*
340	 * If it's the first fragment, do the above check for each
341	 * fragment already stored in the reassembly queue.
342	 */
343	if (fragoff == 0) {
344		for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6;
345		     af6 = af6dwn) {
346			af6dwn = af6->ip6af_down;
347
348			if (q6->ip6q_unfrglen + af6->ip6af_off + af6->ip6af_frglen >
349			    IPV6_MAXPACKET) {
350				struct mbuf *merr = IP6_REASS_MBUF(af6);
351				struct ip6_hdr *ip6err;
352				int erroff = af6->ip6af_offset;
353
354				/* dequeue the fragment. */
355				frag6_deq(af6);
356				free(af6, M_FTABLE);
357
358				/* adjust pointer. */
359				ip6err = mtod(merr, struct ip6_hdr *);
360
361				/*
362				 * Restore source and destination addresses
363				 * in the erroneous IPv6 header.
364				 */
365				ip6err->ip6_src = q6->ip6q_src;
366				ip6err->ip6_dst = q6->ip6q_dst;
367
368				icmp6_error(merr, ICMP6_PARAM_PROB,
369				    ICMP6_PARAMPROB_HEADER,
370				    erroff - sizeof(struct ip6_frag) +
371				    offsetof(struct ip6_frag, ip6f_offlg));
372			}
373		}
374	}
375
376	ip6af = (struct ip6asfrag *)malloc(sizeof(struct ip6asfrag), M_FTABLE,
377	    M_NOWAIT);
378	if (ip6af == NULL)
379		goto dropfrag;
380	bzero(ip6af, sizeof(*ip6af));
381	ip6af->ip6af_mff = ip6f->ip6f_offlg & IP6F_MORE_FRAG;
382	ip6af->ip6af_off = fragoff;
383	ip6af->ip6af_frglen = frgpartlen;
384	ip6af->ip6af_offset = offset;
385	IP6_REASS_MBUF(ip6af) = m;
386
387	if (first_frag) {
388		af6 = (struct ip6asfrag *)q6;
389		goto insert;
390	}
391
392	/*
393	 * Handle ECN by comparing this segment with the first one;
394	 * if CE is set, do not lose CE.
395	 * drop if CE and not-ECT are mixed for the same packet.
396	 */
397	ecn = (ntohl(ip6->ip6_flow) >> 20) & IPTOS_ECN_MASK;
398	ecn0 = q6->ip6q_ecn;
399	if (ecn == IPTOS_ECN_CE) {
400		if (ecn0 == IPTOS_ECN_NOTECT) {
401			free(ip6af, M_FTABLE);
402			goto dropfrag;
403		}
404		if (ecn0 != IPTOS_ECN_CE)
405			q6->ip6q_ecn = IPTOS_ECN_CE;
406	}
407	if (ecn == IPTOS_ECN_NOTECT && ecn0 != IPTOS_ECN_NOTECT) {
408		free(ip6af, M_FTABLE);
409		goto dropfrag;
410	}
411
412	/*
413	 * Find a segment which begins after this one does.
414	 */
415	for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6;
416	     af6 = af6->ip6af_down)
417		if (af6->ip6af_off > ip6af->ip6af_off)
418			break;
419
420#if 0
421	/*
422	 * If there is a preceding segment, it may provide some of
423	 * our data already.  If so, drop the data from the incoming
424	 * segment.  If it provides all of our data, drop us.
425	 */
426	if (af6->ip6af_up != (struct ip6asfrag *)q6) {
427		i = af6->ip6af_up->ip6af_off + af6->ip6af_up->ip6af_frglen
428			- ip6af->ip6af_off;
429		if (i > 0) {
430			if (i >= ip6af->ip6af_frglen)
431				goto dropfrag;
432			m_adj(IP6_REASS_MBUF(ip6af), i);
433			ip6af->ip6af_off += i;
434			ip6af->ip6af_frglen -= i;
435		}
436	}
437
438	/*
439	 * While we overlap succeeding segments trim them or,
440	 * if they are completely covered, dequeue them.
441	 */
442	while (af6 != (struct ip6asfrag *)q6 &&
443	       ip6af->ip6af_off + ip6af->ip6af_frglen > af6->ip6af_off) {
444		i = (ip6af->ip6af_off + ip6af->ip6af_frglen) - af6->ip6af_off;
445		if (i < af6->ip6af_frglen) {
446			af6->ip6af_frglen -= i;
447			af6->ip6af_off += i;
448			m_adj(IP6_REASS_MBUF(af6), i);
449			break;
450		}
451		af6 = af6->ip6af_down;
452		m_freem(IP6_REASS_MBUF(af6->ip6af_up));
453		frag6_deq(af6->ip6af_up);
454	}
455#else
456	/*
457	 * If the incoming framgent overlaps some existing fragments in
458	 * the reassembly queue, drop it, since it is dangerous to override
459	 * existing fragments from a security point of view.
460	 * We don't know which fragment is the bad guy - here we trust
461	 * fragment that came in earlier, with no real reason.
462	 *
463	 * Note: due to changes after disabling this part, mbuf passed to
464	 * m_adj() below now does not meet the requirement.
465	 */
466	if (af6->ip6af_up != (struct ip6asfrag *)q6) {
467		i = af6->ip6af_up->ip6af_off + af6->ip6af_up->ip6af_frglen
468			- ip6af->ip6af_off;
469		if (i > 0) {
470#if 0				/* suppress the noisy log */
471			log(LOG_ERR, "%d bytes of a fragment from %s "
472			    "overlaps the previous fragment\n",
473			    i, ip6_sprintf(ip6buf, &q6->ip6q_src));
474#endif
475			free(ip6af, M_FTABLE);
476			goto dropfrag;
477		}
478	}
479	if (af6 != (struct ip6asfrag *)q6) {
480		i = (ip6af->ip6af_off + ip6af->ip6af_frglen) - af6->ip6af_off;
481		if (i > 0) {
482#if 0				/* suppress the noisy log */
483			log(LOG_ERR, "%d bytes of a fragment from %s "
484			    "overlaps the succeeding fragment",
485			    i, ip6_sprintf(ip6buf, &q6->ip6q_src));
486#endif
487			free(ip6af, M_FTABLE);
488			goto dropfrag;
489		}
490	}
491#endif
492
493insert:
494#ifdef MAC
495	if (!first_frag)
496		mac_ip6q_update(m, q6);
497#endif
498
499	/*
500	 * Stick new segment in its place;
501	 * check for complete reassembly.
502	 * Move to front of packet queue, as we are
503	 * the most recently active fragmented packet.
504	 */
505	frag6_enq(ip6af, af6->ip6af_up);
506	V_frag6_nfrags++;
507	q6->ip6q_nfrag++;
508#if 0 /* xxx */
509	if (q6 != V_ip6q.ip6q_next) {
510		frag6_remque(q6);
511		frag6_insque(q6, &V_ip6q);
512	}
513#endif
514	next = 0;
515	for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6;
516	     af6 = af6->ip6af_down) {
517		if (af6->ip6af_off != next) {
518			IP6Q_UNLOCK();
519			return IPPROTO_DONE;
520		}
521		next += af6->ip6af_frglen;
522	}
523	if (af6->ip6af_up->ip6af_mff) {
524		IP6Q_UNLOCK();
525		return IPPROTO_DONE;
526	}
527
528	/*
529	 * Reassembly is complete; concatenate fragments.
530	 */
531	ip6af = q6->ip6q_down;
532	t = m = IP6_REASS_MBUF(ip6af);
533	af6 = ip6af->ip6af_down;
534	frag6_deq(ip6af);
535	while (af6 != (struct ip6asfrag *)q6) {
536		af6dwn = af6->ip6af_down;
537		frag6_deq(af6);
538		while (t->m_next)
539			t = t->m_next;
540		t->m_next = IP6_REASS_MBUF(af6);
541		m_adj(t->m_next, af6->ip6af_offset);
542		free(af6, M_FTABLE);
543		af6 = af6dwn;
544	}
545
546	/* adjust offset to point where the original next header starts */
547	offset = ip6af->ip6af_offset - sizeof(struct ip6_frag);
548	free(ip6af, M_FTABLE);
549	ip6 = mtod(m, struct ip6_hdr *);
550	ip6->ip6_plen = htons((u_short)next + offset - sizeof(struct ip6_hdr));
551	if (q6->ip6q_ecn == IPTOS_ECN_CE)
552		ip6->ip6_flow |= htonl(IPTOS_ECN_CE << 20);
553	nxt = q6->ip6q_nxt;
554#ifdef notyet
555	*q6->ip6q_nxtp = (u_char)(nxt & 0xff);
556#endif
557
558	/* Delete frag6 header */
559	if (m->m_len >= offset + sizeof(struct ip6_frag)) {
560		/* This is the only possible case with !PULLDOWN_TEST */
561		ovbcopy((caddr_t)ip6, (caddr_t)ip6 + sizeof(struct ip6_frag),
562		    offset);
563		m->m_data += sizeof(struct ip6_frag);
564		m->m_len -= sizeof(struct ip6_frag);
565	} else {
566		/* this comes with no copy if the boundary is on cluster */
567		if ((t = m_split(m, offset, M_NOWAIT)) == NULL) {
568			frag6_remque(q6);
569			V_frag6_nfrags -= q6->ip6q_nfrag;
570#ifdef MAC
571			mac_ip6q_destroy(q6);
572#endif
573			free(q6, M_FTABLE);
574			V_frag6_nfragpackets--;
575			goto dropfrag;
576		}
577		m_adj(t, sizeof(struct ip6_frag));
578		m_cat(m, t);
579	}
580
581	/*
582	 * Store NXT to the original.
583	 */
584	{
585		char *prvnxtp = ip6_get_prevhdr(m, offset); /* XXX */
586		*prvnxtp = nxt;
587	}
588
589	frag6_remque(q6);
590	V_frag6_nfrags -= q6->ip6q_nfrag;
591#ifdef MAC
592	mac_ip6q_reassemble(q6, m);
593	mac_ip6q_destroy(q6);
594#endif
595	free(q6, M_FTABLE);
596	V_frag6_nfragpackets--;
597
598	if (m->m_flags & M_PKTHDR) { /* Isn't it always true? */
599		int plen = 0;
600		for (t = m; t; t = t->m_next)
601			plen += t->m_len;
602		m->m_pkthdr.len = plen;
603	}
604
605	IP6STAT_INC(ip6s_reassembled);
606	in6_ifstat_inc(dstifp, ifs6_reass_ok);
607
608	/*
609	 * Tell launch routine the next header
610	 */
611
612	*mp = m;
613	*offp = offset;
614
615	IP6Q_UNLOCK();
616	return nxt;
617
618 dropfrag:
619	IP6Q_UNLOCK();
620	in6_ifstat_inc(dstifp, ifs6_reass_fail);
621	IP6STAT_INC(ip6s_fragdropped);
622	m_freem(m);
623	return IPPROTO_DONE;
624}
625
626/*
627 * Free a fragment reassembly header and all
628 * associated datagrams.
629 */
630void
631frag6_freef(struct ip6q *q6)
632{
633	struct ip6asfrag *af6, *down6;
634
635	IP6Q_LOCK_ASSERT();
636
637	for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6;
638	     af6 = down6) {
639		struct mbuf *m = IP6_REASS_MBUF(af6);
640
641		down6 = af6->ip6af_down;
642		frag6_deq(af6);
643
644		/*
645		 * Return ICMP time exceeded error for the 1st fragment.
646		 * Just free other fragments.
647		 */
648		if (af6->ip6af_off == 0) {
649			struct ip6_hdr *ip6;
650
651			/* adjust pointer */
652			ip6 = mtod(m, struct ip6_hdr *);
653
654			/* restore source and destination addresses */
655			ip6->ip6_src = q6->ip6q_src;
656			ip6->ip6_dst = q6->ip6q_dst;
657
658			icmp6_error(m, ICMP6_TIME_EXCEEDED,
659				    ICMP6_TIME_EXCEED_REASSEMBLY, 0);
660		} else
661			m_freem(m);
662		free(af6, M_FTABLE);
663	}
664	frag6_remque(q6);
665	V_frag6_nfrags -= q6->ip6q_nfrag;
666#ifdef MAC
667	mac_ip6q_destroy(q6);
668#endif
669	free(q6, M_FTABLE);
670	V_frag6_nfragpackets--;
671}
672
673/*
674 * Put an ip fragment on a reassembly chain.
675 * Like insque, but pointers in middle of structure.
676 */
677void
678frag6_enq(struct ip6asfrag *af6, struct ip6asfrag *up6)
679{
680
681	IP6Q_LOCK_ASSERT();
682
683	af6->ip6af_up = up6;
684	af6->ip6af_down = up6->ip6af_down;
685	up6->ip6af_down->ip6af_up = af6;
686	up6->ip6af_down = af6;
687}
688
689/*
690 * To frag6_enq as remque is to insque.
691 */
692void
693frag6_deq(struct ip6asfrag *af6)
694{
695
696	IP6Q_LOCK_ASSERT();
697
698	af6->ip6af_up->ip6af_down = af6->ip6af_down;
699	af6->ip6af_down->ip6af_up = af6->ip6af_up;
700}
701
702void
703frag6_insque(struct ip6q *new, struct ip6q *old)
704{
705
706	IP6Q_LOCK_ASSERT();
707
708	new->ip6q_prev = old;
709	new->ip6q_next = old->ip6q_next;
710	old->ip6q_next->ip6q_prev= new;
711	old->ip6q_next = new;
712}
713
714void
715frag6_remque(struct ip6q *p6)
716{
717
718	IP6Q_LOCK_ASSERT();
719
720	p6->ip6q_prev->ip6q_next = p6->ip6q_next;
721	p6->ip6q_next->ip6q_prev = p6->ip6q_prev;
722}
723
724/*
725 * IPv6 reassembling timer processing;
726 * if a timer expires on a reassembly
727 * queue, discard it.
728 */
729void
730frag6_slowtimo(void)
731{
732	VNET_ITERATOR_DECL(vnet_iter);
733	struct ip6q *q6;
734
735	VNET_LIST_RLOCK_NOSLEEP();
736	IP6Q_LOCK();
737	VNET_FOREACH(vnet_iter) {
738		CURVNET_SET(vnet_iter);
739		q6 = V_ip6q.ip6q_next;
740		if (q6)
741			while (q6 != &V_ip6q) {
742				--q6->ip6q_ttl;
743				q6 = q6->ip6q_next;
744				if (q6->ip6q_prev->ip6q_ttl == 0) {
745					IP6STAT_INC(ip6s_fragtimeout);
746					/* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */
747					frag6_freef(q6->ip6q_prev);
748				}
749			}
750		/*
751		 * If we are over the maximum number of fragments
752		 * (due to the limit being lowered), drain off
753		 * enough to get down to the new limit.
754		 */
755		while (V_frag6_nfragpackets > (u_int)V_ip6_maxfragpackets &&
756		    V_ip6q.ip6q_prev) {
757			IP6STAT_INC(ip6s_fragoverflow);
758			/* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */
759			frag6_freef(V_ip6q.ip6q_prev);
760		}
761		CURVNET_RESTORE();
762	}
763	IP6Q_UNLOCK();
764	VNET_LIST_RUNLOCK_NOSLEEP();
765}
766
767/*
768 * Drain off all datagram fragments.
769 */
770void
771frag6_drain(void)
772{
773	VNET_ITERATOR_DECL(vnet_iter);
774
775	VNET_LIST_RLOCK_NOSLEEP();
776	if (IP6Q_TRYLOCK() == 0) {
777		VNET_LIST_RUNLOCK_NOSLEEP();
778		return;
779	}
780	VNET_FOREACH(vnet_iter) {
781		CURVNET_SET(vnet_iter);
782		while (V_ip6q.ip6q_next != &V_ip6q) {
783			IP6STAT_INC(ip6s_fragdropped);
784			/* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */
785			frag6_freef(V_ip6q.ip6q_next);
786		}
787		CURVNET_RESTORE();
788	}
789	IP6Q_UNLOCK();
790	VNET_LIST_RUNLOCK_NOSLEEP();
791}
792