1/*-
2 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. Neither the name of the project nor the names of its contributors
14 *    may be used to endorse or promote products derived from this software
15 *    without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 *	$KAME: frag6.c,v 1.33 2002/01/07 11:34:48 kjc Exp $
30 */
31
32#include <sys/cdefs.h>
33__FBSDID("$FreeBSD: stable/10/sys/netinet6/frag6.c 329158 2018-02-12 13:52:58Z ae $");
34
35#include <sys/param.h>
36#include <sys/systm.h>
37#include <sys/malloc.h>
38#include <sys/mbuf.h>
39#include <sys/domain.h>
40#include <sys/protosw.h>
41#include <sys/socket.h>
42#include <sys/errno.h>
43#include <sys/time.h>
44#include <sys/kernel.h>
45#include <sys/syslog.h>
46
47#include <net/if.h>
48#include <net/route.h>
49#include <net/vnet.h>
50
51#include <netinet/in.h>
52#include <netinet/in_var.h>
53#include <netinet/ip6.h>
54#include <netinet6/ip6_var.h>
55#include <netinet/icmp6.h>
56#include <netinet/in_systm.h>	/* for ECN definitions */
57#include <netinet/ip.h>		/* for ECN definitions */
58
59#include <security/mac/mac_framework.h>
60
61/*
62 * Define it to get a correct behavior on per-interface statistics.
63 * You will need to perform an extra routing table lookup, per fragment,
64 * to do it.  This may, or may not be, a performance hit.
65 */
66#define IN6_IFSTAT_STRICT
67
68static void frag6_enq(struct ip6asfrag *, struct ip6asfrag *);
69static void frag6_deq(struct ip6asfrag *);
70static void frag6_insque(struct ip6q *, struct ip6q *);
71static void frag6_remque(struct ip6q *);
72static void frag6_freef(struct ip6q *);
73
74static struct mtx ip6qlock;
75/*
76 * These fields all protected by ip6qlock.
77 */
78static VNET_DEFINE(u_int, frag6_nfragpackets);
79static VNET_DEFINE(u_int, frag6_nfrags);
80static VNET_DEFINE(struct ip6q, ip6q);	/* ip6 reassemble queue */
81
82#define	V_frag6_nfragpackets		VNET(frag6_nfragpackets)
83#define	V_frag6_nfrags			VNET(frag6_nfrags)
84#define	V_ip6q				VNET(ip6q)
85
86#define	IP6Q_LOCK_INIT()	mtx_init(&ip6qlock, "ip6qlock", NULL, MTX_DEF);
87#define	IP6Q_LOCK()		mtx_lock(&ip6qlock)
88#define	IP6Q_TRYLOCK()		mtx_trylock(&ip6qlock)
89#define	IP6Q_LOCK_ASSERT()	mtx_assert(&ip6qlock, MA_OWNED)
90#define	IP6Q_UNLOCK()		mtx_unlock(&ip6qlock)
91
92static MALLOC_DEFINE(M_FTABLE, "fragment", "fragment reassembly header");
93
94/*
95 * Initialise reassembly queue and fragment identifier.
96 */
97static void
98frag6_change(void *tag)
99{
100
101	V_ip6_maxfragpackets = nmbclusters / 4;
102	V_ip6_maxfrags = nmbclusters / 4;
103}
104
105void
106frag6_init(void)
107{
108
109	V_ip6_maxfragpackets = nmbclusters / 4;
110	V_ip6_maxfrags = nmbclusters / 4;
111	V_ip6q.ip6q_next = V_ip6q.ip6q_prev = &V_ip6q;
112
113	if (!IS_DEFAULT_VNET(curvnet))
114		return;
115
116	EVENTHANDLER_REGISTER(nmbclusters_change,
117	    frag6_change, NULL, EVENTHANDLER_PRI_ANY);
118
119	IP6Q_LOCK_INIT();
120}
121
122/*
123 * In RFC2460, fragment and reassembly rule do not agree with each other,
124 * in terms of next header field handling in fragment header.
125 * While the sender will use the same value for all of the fragmented packets,
126 * receiver is suggested not to check the consistency.
127 *
128 * fragment rule (p20):
129 *	(2) A Fragment header containing:
130 *	The Next Header value that identifies the first header of
131 *	the Fragmentable Part of the original packet.
132 *		-> next header field is same for all fragments
133 *
134 * reassembly rule (p21):
135 *	The Next Header field of the last header of the Unfragmentable
136 *	Part is obtained from the Next Header field of the first
137 *	fragment's Fragment header.
138 *		-> should grab it from the first fragment only
139 *
140 * The following note also contradicts with fragment rule - noone is going to
141 * send different fragment with different next header field.
142 *
143 * additional note (p22):
144 *	The Next Header values in the Fragment headers of different
145 *	fragments of the same original packet may differ.  Only the value
146 *	from the Offset zero fragment packet is used for reassembly.
147 *		-> should grab it from the first fragment only
148 *
149 * There is no explicit reason given in the RFC.  Historical reason maybe?
150 */
151/*
152 * Fragment input
153 */
154int
155frag6_input(struct mbuf **mp, int *offp, int proto)
156{
157	struct mbuf *m = *mp, *t;
158	struct ip6_hdr *ip6;
159	struct ip6_frag *ip6f;
160	struct ip6q *q6;
161	struct ip6asfrag *af6, *ip6af, *af6dwn;
162#ifdef IN6_IFSTAT_STRICT
163	struct in6_ifaddr *ia;
164#endif
165	int offset = *offp, nxt, i, next;
166	int first_frag = 0;
167	int fragoff, frgpartlen;	/* must be larger than u_int16_t */
168	struct ifnet *dstifp;
169	u_int8_t ecn, ecn0;
170#if 0
171	char ip6buf[INET6_ADDRSTRLEN];
172#endif
173
174	ip6 = mtod(m, struct ip6_hdr *);
175#ifndef PULLDOWN_TEST
176	IP6_EXTHDR_CHECK(m, offset, sizeof(struct ip6_frag), IPPROTO_DONE);
177	ip6f = (struct ip6_frag *)((caddr_t)ip6 + offset);
178#else
179	IP6_EXTHDR_GET(ip6f, struct ip6_frag *, m, offset, sizeof(*ip6f));
180	if (ip6f == NULL)
181		return (IPPROTO_DONE);
182#endif
183
184	dstifp = NULL;
185#ifdef IN6_IFSTAT_STRICT
186	/* find the destination interface of the packet. */
187	if ((ia = ip6_getdstifaddr(m)) != NULL) {
188		dstifp = ia->ia_ifp;
189		ifa_free(&ia->ia_ifa);
190	}
191#else
192	/* we are violating the spec, this is not the destination interface */
193	if ((m->m_flags & M_PKTHDR) != 0)
194		dstifp = m->m_pkthdr.rcvif;
195#endif
196
197	/* jumbo payload can't contain a fragment header */
198	if (ip6->ip6_plen == 0) {
199		icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, offset);
200		in6_ifstat_inc(dstifp, ifs6_reass_fail);
201		return IPPROTO_DONE;
202	}
203
204	/*
205	 * check whether fragment packet's fragment length is
206	 * multiple of 8 octets.
207	 * sizeof(struct ip6_frag) == 8
208	 * sizeof(struct ip6_hdr) = 40
209	 */
210	if ((ip6f->ip6f_offlg & IP6F_MORE_FRAG) &&
211	    (((ntohs(ip6->ip6_plen) - offset) & 0x7) != 0)) {
212		icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER,
213		    offsetof(struct ip6_hdr, ip6_plen));
214		in6_ifstat_inc(dstifp, ifs6_reass_fail);
215		return IPPROTO_DONE;
216	}
217
218	IP6STAT_INC(ip6s_fragments);
219	in6_ifstat_inc(dstifp, ifs6_reass_reqd);
220
221	/* offset now points to data portion */
222	offset += sizeof(struct ip6_frag);
223
224	/*
225	 * RFC 6946: Handle "atomic" fragments (offset and m bit set to 0)
226	 * upfront, unrelated to any reassembly.  Just skip the fragment header.
227	 */
228	if ((ip6f->ip6f_offlg & ~IP6F_RESERVED_MASK) == 0) {
229		/* XXX-BZ we want dedicated counters for this. */
230		IP6STAT_INC(ip6s_reassembled);
231		in6_ifstat_inc(dstifp, ifs6_reass_ok);
232		*offp = offset;
233		return (ip6f->ip6f_nxt);
234	}
235
236	IP6Q_LOCK();
237
238	/*
239	 * Enforce upper bound on number of fragments.
240	 * If maxfrag is 0, never accept fragments.
241	 * If maxfrag is -1, accept all fragments without limitation.
242	 */
243	if (V_ip6_maxfrags < 0)
244		;
245	else if (V_frag6_nfrags >= (u_int)V_ip6_maxfrags)
246		goto dropfrag;
247
248	for (q6 = V_ip6q.ip6q_next; q6 != &V_ip6q; q6 = q6->ip6q_next)
249		if (ip6f->ip6f_ident == q6->ip6q_ident &&
250		    IN6_ARE_ADDR_EQUAL(&ip6->ip6_src, &q6->ip6q_src) &&
251		    IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &q6->ip6q_dst)
252#ifdef MAC
253		    && mac_ip6q_match(m, q6)
254#endif
255		    )
256			break;
257
258	if (q6 == &V_ip6q) {
259		/*
260		 * the first fragment to arrive, create a reassembly queue.
261		 */
262		first_frag = 1;
263
264		/*
265		 * Enforce upper bound on number of fragmented packets
266		 * for which we attempt reassembly;
267		 * If maxfragpackets is 0, never accept fragments.
268		 * If maxfragpackets is -1, accept all fragments without
269		 * limitation.
270		 */
271		if (V_ip6_maxfragpackets < 0)
272			;
273		else if (V_frag6_nfragpackets >= (u_int)V_ip6_maxfragpackets)
274			goto dropfrag;
275		V_frag6_nfragpackets++;
276		q6 = (struct ip6q *)malloc(sizeof(struct ip6q), M_FTABLE,
277		    M_NOWAIT);
278		if (q6 == NULL)
279			goto dropfrag;
280		bzero(q6, sizeof(*q6));
281#ifdef MAC
282		if (mac_ip6q_init(q6, M_NOWAIT) != 0) {
283			free(q6, M_FTABLE);
284			goto dropfrag;
285		}
286		mac_ip6q_create(m, q6);
287#endif
288		frag6_insque(q6, &V_ip6q);
289
290		/* ip6q_nxt will be filled afterwards, from 1st fragment */
291		q6->ip6q_down	= q6->ip6q_up = (struct ip6asfrag *)q6;
292#ifdef notyet
293		q6->ip6q_nxtp	= (u_char *)nxtp;
294#endif
295		q6->ip6q_ident	= ip6f->ip6f_ident;
296		q6->ip6q_ttl	= IPV6_FRAGTTL;
297		q6->ip6q_src	= ip6->ip6_src;
298		q6->ip6q_dst	= ip6->ip6_dst;
299		q6->ip6q_ecn	=
300		    (ntohl(ip6->ip6_flow) >> 20) & IPTOS_ECN_MASK;
301		q6->ip6q_unfrglen = -1;	/* The 1st fragment has not arrived. */
302
303		q6->ip6q_nfrag = 0;
304	}
305
306	/*
307	 * If it's the 1st fragment, record the length of the
308	 * unfragmentable part and the next header of the fragment header.
309	 */
310	fragoff = ntohs(ip6f->ip6f_offlg & IP6F_OFF_MASK);
311	if (fragoff == 0) {
312		q6->ip6q_unfrglen = offset - sizeof(struct ip6_hdr) -
313		    sizeof(struct ip6_frag);
314		q6->ip6q_nxt = ip6f->ip6f_nxt;
315	}
316
317	/*
318	 * Check that the reassembled packet would not exceed 65535 bytes
319	 * in size.
320	 * If it would exceed, discard the fragment and return an ICMP error.
321	 */
322	frgpartlen = sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen) - offset;
323	if (q6->ip6q_unfrglen >= 0) {
324		/* The 1st fragment has already arrived. */
325		if (q6->ip6q_unfrglen + fragoff + frgpartlen > IPV6_MAXPACKET) {
326			icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER,
327			    offset - sizeof(struct ip6_frag) +
328			    offsetof(struct ip6_frag, ip6f_offlg));
329			IP6Q_UNLOCK();
330			return (IPPROTO_DONE);
331		}
332	} else if (fragoff + frgpartlen > IPV6_MAXPACKET) {
333		icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER,
334		    offset - sizeof(struct ip6_frag) +
335		    offsetof(struct ip6_frag, ip6f_offlg));
336		IP6Q_UNLOCK();
337		return (IPPROTO_DONE);
338	}
339	/*
340	 * If it's the first fragment, do the above check for each
341	 * fragment already stored in the reassembly queue.
342	 */
343	if (fragoff == 0) {
344		for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6;
345		     af6 = af6dwn) {
346			af6dwn = af6->ip6af_down;
347
348			if (q6->ip6q_unfrglen + af6->ip6af_off + af6->ip6af_frglen >
349			    IPV6_MAXPACKET) {
350				struct mbuf *merr = IP6_REASS_MBUF(af6);
351				struct ip6_hdr *ip6err;
352				int erroff = af6->ip6af_offset;
353
354				/* dequeue the fragment. */
355				frag6_deq(af6);
356				free(af6, M_FTABLE);
357
358				/* adjust pointer. */
359				ip6err = mtod(merr, struct ip6_hdr *);
360
361				/*
362				 * Restore source and destination addresses
363				 * in the erroneous IPv6 header.
364				 */
365				ip6err->ip6_src = q6->ip6q_src;
366				ip6err->ip6_dst = q6->ip6q_dst;
367
368				icmp6_error(merr, ICMP6_PARAM_PROB,
369				    ICMP6_PARAMPROB_HEADER,
370				    erroff - sizeof(struct ip6_frag) +
371				    offsetof(struct ip6_frag, ip6f_offlg));
372			}
373		}
374	}
375
376	ip6af = (struct ip6asfrag *)malloc(sizeof(struct ip6asfrag), M_FTABLE,
377	    M_NOWAIT);
378	if (ip6af == NULL)
379		goto dropfrag;
380	bzero(ip6af, sizeof(*ip6af));
381	ip6af->ip6af_mff = ip6f->ip6f_offlg & IP6F_MORE_FRAG;
382	ip6af->ip6af_off = fragoff;
383	ip6af->ip6af_frglen = frgpartlen;
384	ip6af->ip6af_offset = offset;
385	IP6_REASS_MBUF(ip6af) = m;
386
387	if (first_frag) {
388		af6 = (struct ip6asfrag *)q6;
389		goto insert;
390	}
391
392	/*
393	 * Handle ECN by comparing this segment with the first one;
394	 * if CE is set, do not lose CE.
395	 * drop if CE and not-ECT are mixed for the same packet.
396	 */
397	ecn = (ntohl(ip6->ip6_flow) >> 20) & IPTOS_ECN_MASK;
398	ecn0 = q6->ip6q_ecn;
399	if (ecn == IPTOS_ECN_CE) {
400		if (ecn0 == IPTOS_ECN_NOTECT) {
401			free(ip6af, M_FTABLE);
402			goto dropfrag;
403		}
404		if (ecn0 != IPTOS_ECN_CE)
405			q6->ip6q_ecn = IPTOS_ECN_CE;
406	}
407	if (ecn == IPTOS_ECN_NOTECT && ecn0 != IPTOS_ECN_NOTECT) {
408		free(ip6af, M_FTABLE);
409		goto dropfrag;
410	}
411
412	/*
413	 * Find a segment which begins after this one does.
414	 */
415	for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6;
416	     af6 = af6->ip6af_down)
417		if (af6->ip6af_off > ip6af->ip6af_off)
418			break;
419
420#if 0
421	/*
422	 * If there is a preceding segment, it may provide some of
423	 * our data already.  If so, drop the data from the incoming
424	 * segment.  If it provides all of our data, drop us.
425	 */
426	if (af6->ip6af_up != (struct ip6asfrag *)q6) {
427		i = af6->ip6af_up->ip6af_off + af6->ip6af_up->ip6af_frglen
428			- ip6af->ip6af_off;
429		if (i > 0) {
430			if (i >= ip6af->ip6af_frglen)
431				goto dropfrag;
432			m_adj(IP6_REASS_MBUF(ip6af), i);
433			ip6af->ip6af_off += i;
434			ip6af->ip6af_frglen -= i;
435		}
436	}
437
438	/*
439	 * While we overlap succeeding segments trim them or,
440	 * if they are completely covered, dequeue them.
441	 */
442	while (af6 != (struct ip6asfrag *)q6 &&
443	       ip6af->ip6af_off + ip6af->ip6af_frglen > af6->ip6af_off) {
444		i = (ip6af->ip6af_off + ip6af->ip6af_frglen) - af6->ip6af_off;
445		if (i < af6->ip6af_frglen) {
446			af6->ip6af_frglen -= i;
447			af6->ip6af_off += i;
448			m_adj(IP6_REASS_MBUF(af6), i);
449			break;
450		}
451		af6 = af6->ip6af_down;
452		m_freem(IP6_REASS_MBUF(af6->ip6af_up));
453		frag6_deq(af6->ip6af_up);
454	}
455#else
456	/*
457	 * If the incoming framgent overlaps some existing fragments in
458	 * the reassembly queue, drop it, since it is dangerous to override
459	 * existing fragments from a security point of view.
460	 * We don't know which fragment is the bad guy - here we trust
461	 * fragment that came in earlier, with no real reason.
462	 *
463	 * Note: due to changes after disabling this part, mbuf passed to
464	 * m_adj() below now does not meet the requirement.
465	 */
466	if (af6->ip6af_up != (struct ip6asfrag *)q6) {
467		i = af6->ip6af_up->ip6af_off + af6->ip6af_up->ip6af_frglen
468			- ip6af->ip6af_off;
469		if (i > 0) {
470#if 0				/* suppress the noisy log */
471			log(LOG_ERR, "%d bytes of a fragment from %s "
472			    "overlaps the previous fragment\n",
473			    i, ip6_sprintf(ip6buf, &q6->ip6q_src));
474#endif
475			free(ip6af, M_FTABLE);
476			goto dropfrag;
477		}
478	}
479	if (af6 != (struct ip6asfrag *)q6) {
480		i = (ip6af->ip6af_off + ip6af->ip6af_frglen) - af6->ip6af_off;
481		if (i > 0) {
482#if 0				/* suppress the noisy log */
483			log(LOG_ERR, "%d bytes of a fragment from %s "
484			    "overlaps the succeeding fragment",
485			    i, ip6_sprintf(ip6buf, &q6->ip6q_src));
486#endif
487			free(ip6af, M_FTABLE);
488			goto dropfrag;
489		}
490	}
491#endif
492
493insert:
494#ifdef MAC
495	if (!first_frag)
496		mac_ip6q_update(m, q6);
497#endif
498
499	/*
500	 * Stick new segment in its place;
501	 * check for complete reassembly.
502	 * Move to front of packet queue, as we are
503	 * the most recently active fragmented packet.
504	 */
505	frag6_enq(ip6af, af6->ip6af_up);
506	V_frag6_nfrags++;
507	q6->ip6q_nfrag++;
508#if 0 /* xxx */
509	if (q6 != V_ip6q.ip6q_next) {
510		frag6_remque(q6);
511		frag6_insque(q6, &V_ip6q);
512	}
513#endif
514	next = 0;
515	for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6;
516	     af6 = af6->ip6af_down) {
517		if (af6->ip6af_off != next) {
518			IP6Q_UNLOCK();
519			return IPPROTO_DONE;
520		}
521		next += af6->ip6af_frglen;
522	}
523	if (af6->ip6af_up->ip6af_mff) {
524		IP6Q_UNLOCK();
525		return IPPROTO_DONE;
526	}
527
528	/*
529	 * Reassembly is complete; concatenate fragments.
530	 */
531	ip6af = q6->ip6q_down;
532	t = m = IP6_REASS_MBUF(ip6af);
533	af6 = ip6af->ip6af_down;
534	frag6_deq(ip6af);
535	while (af6 != (struct ip6asfrag *)q6) {
536		m->m_pkthdr.csum_flags &=
537		    IP6_REASS_MBUF(af6)->m_pkthdr.csum_flags;
538		m->m_pkthdr.csum_data +=
539		    IP6_REASS_MBUF(af6)->m_pkthdr.csum_data;
540
541		af6dwn = af6->ip6af_down;
542		frag6_deq(af6);
543		while (t->m_next)
544			t = t->m_next;
545		t->m_next = IP6_REASS_MBUF(af6);
546		m_adj(t->m_next, af6->ip6af_offset);
547		free(af6, M_FTABLE);
548		af6 = af6dwn;
549	}
550
551	while (m->m_pkthdr.csum_data & 0xffff0000)
552		m->m_pkthdr.csum_data = (m->m_pkthdr.csum_data & 0xffff) +
553		    (m->m_pkthdr.csum_data >> 16);
554
555	/* adjust offset to point where the original next header starts */
556	offset = ip6af->ip6af_offset - sizeof(struct ip6_frag);
557	free(ip6af, M_FTABLE);
558	ip6 = mtod(m, struct ip6_hdr *);
559	ip6->ip6_plen = htons((u_short)next + offset - sizeof(struct ip6_hdr));
560	if (q6->ip6q_ecn == IPTOS_ECN_CE)
561		ip6->ip6_flow |= htonl(IPTOS_ECN_CE << 20);
562	nxt = q6->ip6q_nxt;
563#ifdef notyet
564	*q6->ip6q_nxtp = (u_char)(nxt & 0xff);
565#endif
566
567	if (ip6_deletefraghdr(m, offset, M_NOWAIT) != 0) {
568		frag6_remque(q6);
569		V_frag6_nfrags -= q6->ip6q_nfrag;
570#ifdef MAC
571		mac_ip6q_destroy(q6);
572#endif
573		free(q6, M_FTABLE);
574		V_frag6_nfragpackets--;
575
576		goto dropfrag;
577	}
578
579	/*
580	 * Store NXT to the original.
581	 */
582	m_copyback(m, ip6_get_prevhdr(m, offset), sizeof(uint8_t),
583	    (caddr_t)&nxt);
584
585	frag6_remque(q6);
586	V_frag6_nfrags -= q6->ip6q_nfrag;
587#ifdef MAC
588	mac_ip6q_reassemble(q6, m);
589	mac_ip6q_destroy(q6);
590#endif
591	free(q6, M_FTABLE);
592	V_frag6_nfragpackets--;
593
594	if (m->m_flags & M_PKTHDR) { /* Isn't it always true? */
595		int plen = 0;
596		for (t = m; t; t = t->m_next)
597			plen += t->m_len;
598		m->m_pkthdr.len = plen;
599	}
600
601	IP6STAT_INC(ip6s_reassembled);
602	in6_ifstat_inc(dstifp, ifs6_reass_ok);
603
604	/*
605	 * Tell launch routine the next header
606	 */
607
608	*mp = m;
609	*offp = offset;
610
611	IP6Q_UNLOCK();
612	return nxt;
613
614 dropfrag:
615	IP6Q_UNLOCK();
616	in6_ifstat_inc(dstifp, ifs6_reass_fail);
617	IP6STAT_INC(ip6s_fragdropped);
618	m_freem(m);
619	return IPPROTO_DONE;
620}
621
622/*
623 * Free a fragment reassembly header and all
624 * associated datagrams.
625 */
626void
627frag6_freef(struct ip6q *q6)
628{
629	struct ip6asfrag *af6, *down6;
630
631	IP6Q_LOCK_ASSERT();
632
633	for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6;
634	     af6 = down6) {
635		struct mbuf *m = IP6_REASS_MBUF(af6);
636
637		down6 = af6->ip6af_down;
638		frag6_deq(af6);
639
640		/*
641		 * Return ICMP time exceeded error for the 1st fragment.
642		 * Just free other fragments.
643		 */
644		if (af6->ip6af_off == 0) {
645			struct ip6_hdr *ip6;
646
647			/* adjust pointer */
648			ip6 = mtod(m, struct ip6_hdr *);
649
650			/* restore source and destination addresses */
651			ip6->ip6_src = q6->ip6q_src;
652			ip6->ip6_dst = q6->ip6q_dst;
653
654			icmp6_error(m, ICMP6_TIME_EXCEEDED,
655				    ICMP6_TIME_EXCEED_REASSEMBLY, 0);
656		} else
657			m_freem(m);
658		free(af6, M_FTABLE);
659	}
660	frag6_remque(q6);
661	V_frag6_nfrags -= q6->ip6q_nfrag;
662#ifdef MAC
663	mac_ip6q_destroy(q6);
664#endif
665	free(q6, M_FTABLE);
666	V_frag6_nfragpackets--;
667}
668
669/*
670 * Put an ip fragment on a reassembly chain.
671 * Like insque, but pointers in middle of structure.
672 */
673void
674frag6_enq(struct ip6asfrag *af6, struct ip6asfrag *up6)
675{
676
677	IP6Q_LOCK_ASSERT();
678
679	af6->ip6af_up = up6;
680	af6->ip6af_down = up6->ip6af_down;
681	up6->ip6af_down->ip6af_up = af6;
682	up6->ip6af_down = af6;
683}
684
685/*
686 * To frag6_enq as remque is to insque.
687 */
688void
689frag6_deq(struct ip6asfrag *af6)
690{
691
692	IP6Q_LOCK_ASSERT();
693
694	af6->ip6af_up->ip6af_down = af6->ip6af_down;
695	af6->ip6af_down->ip6af_up = af6->ip6af_up;
696}
697
698void
699frag6_insque(struct ip6q *new, struct ip6q *old)
700{
701
702	IP6Q_LOCK_ASSERT();
703
704	new->ip6q_prev = old;
705	new->ip6q_next = old->ip6q_next;
706	old->ip6q_next->ip6q_prev= new;
707	old->ip6q_next = new;
708}
709
710void
711frag6_remque(struct ip6q *p6)
712{
713
714	IP6Q_LOCK_ASSERT();
715
716	p6->ip6q_prev->ip6q_next = p6->ip6q_next;
717	p6->ip6q_next->ip6q_prev = p6->ip6q_prev;
718}
719
720/*
721 * IPv6 reassembling timer processing;
722 * if a timer expires on a reassembly
723 * queue, discard it.
724 */
725void
726frag6_slowtimo(void)
727{
728	VNET_ITERATOR_DECL(vnet_iter);
729	struct ip6q *q6;
730
731	VNET_LIST_RLOCK_NOSLEEP();
732	IP6Q_LOCK();
733	VNET_FOREACH(vnet_iter) {
734		CURVNET_SET(vnet_iter);
735		q6 = V_ip6q.ip6q_next;
736		if (q6)
737			while (q6 != &V_ip6q) {
738				--q6->ip6q_ttl;
739				q6 = q6->ip6q_next;
740				if (q6->ip6q_prev->ip6q_ttl == 0) {
741					IP6STAT_INC(ip6s_fragtimeout);
742					/* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */
743					frag6_freef(q6->ip6q_prev);
744				}
745			}
746		/*
747		 * If we are over the maximum number of fragments
748		 * (due to the limit being lowered), drain off
749		 * enough to get down to the new limit.
750		 */
751		while (V_frag6_nfragpackets > (u_int)V_ip6_maxfragpackets &&
752		    V_ip6q.ip6q_prev) {
753			IP6STAT_INC(ip6s_fragoverflow);
754			/* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */
755			frag6_freef(V_ip6q.ip6q_prev);
756		}
757		CURVNET_RESTORE();
758	}
759	IP6Q_UNLOCK();
760	VNET_LIST_RUNLOCK_NOSLEEP();
761}
762
763/*
764 * Drain off all datagram fragments.
765 */
766void
767frag6_drain(void)
768{
769	VNET_ITERATOR_DECL(vnet_iter);
770
771	VNET_LIST_RLOCK_NOSLEEP();
772	if (IP6Q_TRYLOCK() == 0) {
773		VNET_LIST_RUNLOCK_NOSLEEP();
774		return;
775	}
776	VNET_FOREACH(vnet_iter) {
777		CURVNET_SET(vnet_iter);
778		while (V_ip6q.ip6q_next != &V_ip6q) {
779			IP6STAT_INC(ip6s_fragdropped);
780			/* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */
781			frag6_freef(V_ip6q.ip6q_next);
782		}
783		CURVNET_RESTORE();
784	}
785	IP6Q_UNLOCK();
786	VNET_LIST_RUNLOCK_NOSLEEP();
787}
788
789int
790ip6_deletefraghdr(struct mbuf *m, int offset, int wait)
791{
792	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
793	struct mbuf *t;
794
795	/* Delete frag6 header. */
796	if (m->m_len >= offset + sizeof(struct ip6_frag)) {
797		/* This is the only possible case with !PULLDOWN_TEST. */
798		bcopy(ip6, (char *)ip6 + sizeof(struct ip6_frag),
799		    offset);
800		m->m_data += sizeof(struct ip6_frag);
801		m->m_len -= sizeof(struct ip6_frag);
802	} else {
803		/* This comes with no copy if the boundary is on cluster. */
804		if ((t = m_split(m, offset, wait)) == NULL)
805			return (ENOMEM);
806		m_adj(t, sizeof(struct ip6_frag));
807		m_cat(m, t);
808	}
809
810	return (0);
811}
812