frag6.c revision 121355
1139749Simp/*	$FreeBSD: head/sys/netinet6/frag6.c 121355 2003-10-22 19:03:49Z ume $	*/
273161Sjulian/*	$KAME: frag6.c,v 1.33 2002/01/07 11:34:48 kjc Exp $	*/
373161Sjulian
473161Sjulian/*
573161Sjulian * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
673161Sjulian * All rights reserved.
773161Sjulian *
873161Sjulian * Redistribution and use in source and binary forms, with or without
973161Sjulian * modification, are permitted provided that the following conditions
1073161Sjulian * are met:
1173161Sjulian * 1. Redistributions of source code must retain the above copyright
1273161Sjulian *    notice, this list of conditions and the following disclaimer.
1373161Sjulian * 2. Redistributions in binary form must reproduce the above copyright
1473161Sjulian *    notice, this list of conditions and the following disclaimer in the
1573161Sjulian *    documentation and/or other materials provided with the distribution.
1673161Sjulian * 3. Neither the name of the project nor the names of its contributors
1773161Sjulian *    may be used to endorse or promote products derived from this software
1873161Sjulian *    without specific prior written permission.
1973161Sjulian *
2073161Sjulian * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
2173161Sjulian * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
2273161Sjulian * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
2373161Sjulian * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
2473161Sjulian * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2573161Sjulian * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2673161Sjulian * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2773161Sjulian * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2873161Sjulian * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2973161Sjulian * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
3073161Sjulian * SUCH DAMAGE.
31119418Sobrien */
32119418Sobrien
33119418Sobrien#include "opt_random_ip_id.h"
3473161Sjulian
3590995Sjulian#include <sys/param.h>
3690995Sjulian#include <sys/systm.h>
3773161Sjulian#include <sys/malloc.h>
3890995Sjulian#include <sys/mbuf.h>
3973161Sjulian#include <sys/domain.h>
40111899Sdas#include <sys/protosw.h>
41111899Sdas#include <sys/socket.h>
4273161Sjulian#include <sys/errno.h>
4373161Sjulian#include <sys/time.h>
4473161Sjulian#include <sys/kernel.h>
4573161Sjulian#include <sys/syslog.h>
4673161Sjulian
4773161Sjulian#include <net/if.h>
4873161Sjulian#include <net/route.h>
4973161Sjulian
50129879Sphk#include <netinet/in.h>
51131579Sphk#include <netinet/in_var.h>
5273161Sjulian#include <netinet/ip6.h>
5373161Sjulian#include <netinet6/ip6_var.h>
54129968Sphk#include <netinet/icmp6.h>
5573161Sjulian
5673161Sjulian#include <net/net_osdep.h>
5773161Sjulian
5890995Sjulian/*
5990995Sjulian * Define it to get a correct behavior on per-interface statistics.
60130585Sphk * You will need to perform an extra routing table lookup, per fragment,
61131579Sphk * to do it.  This may, or may not be, a performance hit.
6273161Sjulian */
6390995Sjulian#define IN6_IFSTAT_STRICT
6490995Sjulian
6573161Sjulianstatic void frag6_enq __P((struct ip6asfrag *, struct ip6asfrag *));
6673161Sjulianstatic void frag6_deq __P((struct ip6asfrag *));
67126080Sphkstatic void frag6_insque __P((struct ip6q *, struct ip6q *));
68111815Sphkstatic void frag6_remque __P((struct ip6q *));
69111815Sphkstatic void frag6_freef __P((struct ip6q *));
70126080Sphk
71126080Sphkstatic struct mtx ip6qlock;
7273161Sjulian/*
7373161Sjulian * These fields all protected by ip6qlock.
7490995Sjulian */
7590995Sjulianstatic u_int frag6_nfragpackets;
7690995Sjulianstatic u_int frag6_nfrags;
77126077Sphkstatic struct	ip6q ip6q;	/* ip6 reassemble queue */
7873161Sjulian
7973161Sjulian#define	IP6Q_LOCK_INIT()	mtx_init(&ip6qlock, "ip6qlock", NULL, MTX_DEF);
80131579Sphk#define	IP6Q_LOCK()		mtx_lock(&ip6qlock)
81131579Sphk#define	IP6Q_TRYLOCK()		mtx_trylock(&ip6qlock)
82131579Sphk#define	IP6Q_LOCK_ASSERT()	mtx_assert(&ip6qlock, MA_OWNED)
83129968Sphk#define	IP6Q_UNLOCK()		mtx_unlock(&ip6qlock)
84129968Sphk
8573161Sjulianstatic MALLOC_DEFINE(M_FTABLE, "fragment", "fragment reassembly header");
8673161Sjulian
8773161Sjulian/*
88126077Sphk * Initialise reassembly queue and fragment identifier.
89126077Sphk */
90126077Sphkvoid
91126077Sphkfrag6_init()
9273161Sjulian{
9373161Sjulian
94126077Sphk	ip6_maxfragpackets = nmbclusters / 4;
95126077Sphk	ip6_maxfrags = nmbclusters / 4;
96126077Sphk
9773161Sjulian	IP6Q_LOCK_INIT();
98130585Sphk
99126077Sphk#ifndef RANDOM_IP_ID
100126077Sphk	ip6_id = arc4random();
101126077Sphk#endif
102130585Sphk	ip6q.ip6q_next = ip6q.ip6q_prev = &ip6q;
103126077Sphk}
104130640Sphk
105126077Sphk/*
106126077Sphk * In RFC2460, fragment and reassembly rule do not agree with each other,
107126077Sphk * in terms of next header field handling in fragment header.
108126077Sphk * While the sender will use the same value for all of the fragmented packets,
109126077Sphk * receiver is suggested not to check the consistency.
110126077Sphk *
111126077Sphk * fragment rule (p20):
112126077Sphk *	(2) A Fragment header containing:
113126077Sphk *	The Next Header value that identifies the first header of
114126077Sphk *	the Fragmentable Part of the original packet.
115126077Sphk *		-> next header field is same for all fragments
116126077Sphk *
117126077Sphk * reassembly rule (p21):
118126077Sphk *	The Next Header field of the last header of the Unfragmentable
119126077Sphk *	Part is obtained from the Next Header field of the first
120126077Sphk *	fragment's Fragment header.
121126077Sphk *		-> should grab it from the first fragment only
122126077Sphk *
123126077Sphk * The following note also contradicts with fragment rule - noone is going to
124126077Sphk * send different fragment with different next header field.
125126077Sphk *
126126077Sphk * additional note (p22):
127126077Sphk *	The Next Header values in the Fragment headers of different
128126077Sphk *	fragments of the same original packet may differ.  Only the value
129126077Sphk *	from the Offset zero fragment packet is used for reassembly.
130126077Sphk *		-> should grab it from the first fragment only
131126077Sphk *
132126077Sphk * There is no explicit reason given in the RFC.  Historical reason maybe?
133126077Sphk */
134126077Sphk/*
135126077Sphk * Fragment input
136126077Sphk */
137126077Sphkint
138126077Sphkfrag6_input(mp, offp, proto)
139126077Sphk	struct mbuf **mp;
140126077Sphk	int *offp, proto;
141126077Sphk{
142126077Sphk	struct mbuf *m = *mp, *t;
143126077Sphk	struct ip6_hdr *ip6;
144129968Sphk	struct ip6_frag *ip6f;
145129968Sphk	struct ip6q *q6;
146129968Sphk	struct ip6asfrag *af6, *ip6af, *af6dwn;
147129968Sphk	int offset = *offp, nxt, i, next;
148129968Sphk	int first_frag = 0;
14973161Sjulian	int fragoff, frgpartlen;	/* must be larger than u_int16_t */
150129968Sphk	struct ifnet *dstifp;
151129968Sphk#ifdef IN6_IFSTAT_STRICT
152129968Sphk	static struct route_in6 ro;
153129968Sphk	struct sockaddr_in6 *dst;
154129968Sphk#endif
155131579Sphk
156129968Sphk	ip6 = mtod(m, struct ip6_hdr *);
157131579Sphk#ifndef PULLDOWN_TEST
158130077Sphk	IP6_EXTHDR_CHECK(m, offset, sizeof(struct ip6_frag), IPPROTO_DONE);
159129968Sphk	ip6f = (struct ip6_frag *)((caddr_t)ip6 + offset);
160129968Sphk#else
161129968Sphk	IP6_EXTHDR_GET(ip6f, struct ip6_frag *, m, offset, sizeof(*ip6f));
162131579Sphk	if (ip6f == NULL)
163130077Sphk		return (IPPROTO_DONE);
164129968Sphk#endif
165129968Sphk
166129968Sphk	dstifp = NULL;
167129968Sphk#ifdef IN6_IFSTAT_STRICT
168129968Sphk	/* find the destination interface of the packet. */
169129968Sphk	dst = (struct sockaddr_in6 *)&ro.ro_dst;
170129968Sphk	if (ro.ro_rt
171129968Sphk	 && ((ro.ro_rt->rt_flags & RTF_UP) == 0
172129968Sphk	  || !IN6_ARE_ADDR_EQUAL(&dst->sin6_addr, &ip6->ip6_dst))) {
173130077Sphk		RTFREE(ro.ro_rt);
174129968Sphk		ro.ro_rt = (struct rtentry *)0;
175129968Sphk	}
176129968Sphk	if (ro.ro_rt == NULL) {
177129968Sphk		bzero(dst, sizeof(*dst));
17873161Sjulian		dst->sin6_family = AF_INET6;
17973161Sjulian		dst->sin6_len = sizeof(struct sockaddr_in6);
18073161Sjulian		dst->sin6_addr = ip6->ip6_dst;
18173161Sjulian	}
18273161Sjulian	rtalloc((struct route *)&ro);
183130585Sphk	if (ro.ro_rt != NULL && ro.ro_rt->rt_ifa != NULL)
18473161Sjulian		dstifp = ((struct in6_ifaddr *)ro.ro_rt->rt_ifa)->ia_ifp;
185130585Sphk#else
18673161Sjulian	/* we are violating the spec, this is not the destination interface */
18773161Sjulian	if ((m->m_flags & M_PKTHDR) != 0)
188126077Sphk		dstifp = m->m_pkthdr.rcvif;
18973161Sjulian#endif
190126077Sphk
191126077Sphk	/* jumbo payload can't contain a fragment header */
19273161Sjulian	if (ip6->ip6_plen == 0) {
193126077Sphk		icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, offset);
194126077Sphk		in6_ifstat_inc(dstifp, ifs6_reass_fail);
195129968Sphk		return IPPROTO_DONE;
19673161Sjulian	}
197126077Sphk
198126077Sphk	/*
199126077Sphk	 * check whether fragment packet's fragment length is
200129968Sphk	 * multiple of 8 octets.
201129968Sphk	 * sizeof(struct ip6_frag) == 8
202129968Sphk	 * sizeof(struct ip6_hdr) = 40
203129968Sphk	 */
204131579Sphk	if ((ip6f->ip6f_offlg & IP6F_MORE_FRAG) &&
205129968Sphk	    (((ntohs(ip6->ip6_plen) - offset) & 0x7) != 0)) {
206129968Sphk		icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER,
207129968Sphk		    offsetof(struct ip6_hdr, ip6_plen));
208129968Sphk		in6_ifstat_inc(dstifp, ifs6_reass_fail);
209129968Sphk		return IPPROTO_DONE;
210129968Sphk	}
211129968Sphk
212131579Sphk	ip6stat.ip6s_fragments++;
213129968Sphk	in6_ifstat_inc(dstifp, ifs6_reass_reqd);
214129968Sphk
215129968Sphk	/* offset now points to data portion */
216129968Sphk	offset += sizeof(struct ip6_frag);
217129968Sphk
218129968Sphk	IP6Q_LOCK();
219129968Sphk
220129968Sphk	/*
221129968Sphk	 * Enforce upper bound on number of fragments.
222129968Sphk	 * If maxfrag is 0, never accept fragments.
223129968Sphk	 * If maxfrag is -1, accept all fragments without limitation.
224129968Sphk	 */
22573161Sjulian	if (ip6_maxfrags < 0)
22673161Sjulian		;
22790995Sjulian	else if (frag6_nfrags >= (u_int)ip6_maxfrags)
22890995Sjulian		goto dropfrag;
22990995Sjulian
23073161Sjulian	for (q6 = ip6q.ip6q_next; q6 != &ip6q; q6 = q6->ip6q_next)
231130585Sphk		if (ip6f->ip6f_ident == q6->ip6q_ident &&
23273161Sjulian		    IN6_ARE_ADDR_EQUAL(&ip6->ip6_src, &q6->ip6q_src) &&
233129968Sphk		    IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &q6->ip6q_dst))
23473161Sjulian			break;
23573161Sjulian
236129968Sphk	if (q6 == &ip6q) {
23773161Sjulian		/*
238126077Sphk		 * the first fragment to arrive, create a reassembly queue.
239126077Sphk		 */
240126077Sphk		first_frag = 1;
241129968Sphk
242129968Sphk		/*
24373161Sjulian		 * Enforce upper bound on number of fragmented packets
244129968Sphk		 * for which we attempt reassembly;
245129968Sphk		 * If maxfragpackets is 0, never accept fragments.
246129968Sphk		 * If maxfragpackets is -1, accept all fragments without
24790995Sjulian		 * limitation.
24873161Sjulian		 */
249136680Sphk		if (ip6_maxfragpackets < 0)
250129968Sphk			;
25193593Sjhb		else if (frag6_nfragpackets >= (u_int)ip6_maxfragpackets)
25273161Sjulian			goto dropfrag;
25373161Sjulian		frag6_nfragpackets++;
25473161Sjulian		q6 = (struct ip6q *)malloc(sizeof(struct ip6q), M_FTABLE,
255130077Sphk		    M_DONTWAIT);
25673161Sjulian		if (q6 == NULL)
25773161Sjulian			goto dropfrag;
25873161Sjulian		bzero(q6, sizeof(*q6));
259129968Sphk
260131579Sphk		frag6_insque(q6, &ip6q);
261131579Sphk
262131579Sphk		/* ip6q_nxt will be filled afterwards, from 1st fragment */
263131579Sphk		q6->ip6q_down	= q6->ip6q_up = (struct ip6asfrag *)q6;
264131579Sphk#ifdef notyet
265131579Sphk		q6->ip6q_nxtp	= (u_char *)nxtp;
266131579Sphk#endif
267131579Sphk		q6->ip6q_ident	= ip6f->ip6f_ident;
268131579Sphk		q6->ip6q_arrive = 0; /* Is it used anywhere? */
269131579Sphk		q6->ip6q_ttl 	= IPV6_FRAGTTL;
270131579Sphk		q6->ip6q_src	= ip6->ip6_src;
271131579Sphk		q6->ip6q_dst	= ip6->ip6_dst;
272131579Sphk		q6->ip6q_unfrglen = -1;	/* The 1st fragment has not arrived. */
273131579Sphk
274131579Sphk		q6->ip6q_nfrag = 0;
275131579Sphk	}
276131579Sphk
277131579Sphk	/*
278131579Sphk	 * If it's the 1st fragment, record the length of the
279131579Sphk	 * unfragmentable part and the next header of the fragment header.
280131579Sphk	 */
281131579Sphk	fragoff = ntohs(ip6f->ip6f_offlg & IP6F_OFF_MASK);
282131579Sphk	if (fragoff == 0) {
283131579Sphk		q6->ip6q_unfrglen = offset - sizeof(struct ip6_hdr) -
284131579Sphk		    sizeof(struct ip6_frag);
285131579Sphk		q6->ip6q_nxt = ip6f->ip6f_nxt;
286131579Sphk	}
287130585Sphk
28873161Sjulian	/*
28973161Sjulian	 * Check that the reassembled packet would not exceed 65535 bytes
290132226Sphk	 * in size.
29173161Sjulian	 * If it would exceed, discard the fragment and return an ICMP error.
29273161Sjulian	 */
29373161Sjulian	frgpartlen = sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen) - offset;
29490995Sjulian	if (q6->ip6q_unfrglen >= 0) {
29573161Sjulian		/* The 1st fragment has already arrived. */
296129968Sphk		if (q6->ip6q_unfrglen + fragoff + frgpartlen > IPV6_MAXPACKET) {
29773161Sjulian			icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER,
298129968Sphk			    offset - sizeof(struct ip6_frag) +
299129968Sphk			    offsetof(struct ip6_frag, ip6f_offlg));
30073161Sjulian			IP6Q_UNLOCK();
30173161Sjulian			return (IPPROTO_DONE);
30273161Sjulian		}
303129968Sphk	} else if (fragoff + frgpartlen > IPV6_MAXPACKET) {
30473161Sjulian		icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER,
305129968Sphk		    offset - sizeof(struct ip6_frag) +
30673161Sjulian		    offsetof(struct ip6_frag, ip6f_offlg));
307129968Sphk		IP6Q_UNLOCK();
308129968Sphk		return (IPPROTO_DONE);
30973161Sjulian	}
31073161Sjulian	/*
31190995Sjulian	 * If it's the first fragment, do the above check for each
31290995Sjulian	 * fragment already stored in the reassembly queue.
31390995Sjulian	 */
31490995Sjulian	if (fragoff == 0) {
31590995Sjulian		for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6;
31690995Sjulian		     af6 = af6dwn) {
317126077Sphk			af6dwn = af6->ip6af_down;
318126077Sphk
31990995Sjulian			if (q6->ip6q_unfrglen + af6->ip6af_off + af6->ip6af_frglen >
32073161Sjulian			    IPV6_MAXPACKET) {
32190995Sjulian				struct mbuf *merr = IP6_REASS_MBUF(af6);
322126077Sphk				struct ip6_hdr *ip6err;
323126845Sphk				int erroff = af6->ip6af_offset;
324126077Sphk
325126077Sphk				/* dequeue the fragment. */
326126077Sphk				frag6_deq(af6);
32790995Sjulian				free(af6, M_FTABLE);
32873161Sjulian
32990995Sjulian				/* adjust pointer. */
33090995Sjulian				ip6err = mtod(merr, struct ip6_hdr *);
33190995Sjulian
332126077Sphk				/*
333126077Sphk				 * Restore source and destination addresses
334126077Sphk				 * in the erroneous IPv6 header.
335126077Sphk				 */
336126077Sphk				ip6err->ip6_src = q6->ip6q_src;
337126077Sphk				ip6err->ip6_dst = q6->ip6q_dst;
338126077Sphk
33990995Sjulian				icmp6_error(merr, ICMP6_PARAM_PROB,
34090995Sjulian				    ICMP6_PARAMPROB_HEADER,
34190995Sjulian				    erroff - sizeof(struct ip6_frag) +
34290995Sjulian				    offsetof(struct ip6_frag, ip6f_offlg));
34390995Sjulian			}
34490995Sjulian		}
34573161Sjulian	}
34690995Sjulian
347	ip6af = (struct ip6asfrag *)malloc(sizeof(struct ip6asfrag), M_FTABLE,
348	    M_DONTWAIT);
349	if (ip6af == NULL)
350		goto dropfrag;
351	bzero(ip6af, sizeof(*ip6af));
352	ip6af->ip6af_head = ip6->ip6_flow;
353	ip6af->ip6af_len = ip6->ip6_plen;
354	ip6af->ip6af_nxt = ip6->ip6_nxt;
355	ip6af->ip6af_hlim = ip6->ip6_hlim;
356	ip6af->ip6af_mff = ip6f->ip6f_offlg & IP6F_MORE_FRAG;
357	ip6af->ip6af_off = fragoff;
358	ip6af->ip6af_frglen = frgpartlen;
359	ip6af->ip6af_offset = offset;
360	IP6_REASS_MBUF(ip6af) = m;
361
362	if (first_frag) {
363		af6 = (struct ip6asfrag *)q6;
364		goto insert;
365	}
366
367	/*
368	 * Find a segment which begins after this one does.
369	 */
370	for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6;
371	     af6 = af6->ip6af_down)
372		if (af6->ip6af_off > ip6af->ip6af_off)
373			break;
374
375#if 0
376	/*
377	 * If there is a preceding segment, it may provide some of
378	 * our data already.  If so, drop the data from the incoming
379	 * segment.  If it provides all of our data, drop us.
380	 */
381	if (af6->ip6af_up != (struct ip6asfrag *)q6) {
382		i = af6->ip6af_up->ip6af_off + af6->ip6af_up->ip6af_frglen
383			- ip6af->ip6af_off;
384		if (i > 0) {
385			if (i >= ip6af->ip6af_frglen)
386				goto dropfrag;
387			m_adj(IP6_REASS_MBUF(ip6af), i);
388			ip6af->ip6af_off += i;
389			ip6af->ip6af_frglen -= i;
390		}
391	}
392
393	/*
394	 * While we overlap succeeding segments trim them or,
395	 * if they are completely covered, dequeue them.
396	 */
397	while (af6 != (struct ip6asfrag *)q6 &&
398	       ip6af->ip6af_off + ip6af->ip6af_frglen > af6->ip6af_off) {
399		i = (ip6af->ip6af_off + ip6af->ip6af_frglen) - af6->ip6af_off;
400		if (i < af6->ip6af_frglen) {
401			af6->ip6af_frglen -= i;
402			af6->ip6af_off += i;
403			m_adj(IP6_REASS_MBUF(af6), i);
404			break;
405		}
406		af6 = af6->ip6af_down;
407		m_freem(IP6_REASS_MBUF(af6->ip6af_up));
408		frag6_deq(af6->ip6af_up);
409	}
410#else
411	/*
412	 * If the incoming framgent overlaps some existing fragments in
413	 * the reassembly queue, drop it, since it is dangerous to override
414	 * existing fragments from a security point of view.
415	 * We don't know which fragment is the bad guy - here we trust
416	 * fragment that came in earlier, with no real reason.
417	 */
418	if (af6->ip6af_up != (struct ip6asfrag *)q6) {
419		i = af6->ip6af_up->ip6af_off + af6->ip6af_up->ip6af_frglen
420			- ip6af->ip6af_off;
421		if (i > 0) {
422#if 0				/* suppress the noisy log */
423			log(LOG_ERR, "%d bytes of a fragment from %s "
424			    "overlaps the previous fragment\n",
425			    i, ip6_sprintf(&q6->ip6q_src));
426#endif
427			free(ip6af, M_FTABLE);
428			goto dropfrag;
429		}
430	}
431	if (af6 != (struct ip6asfrag *)q6) {
432		i = (ip6af->ip6af_off + ip6af->ip6af_frglen) - af6->ip6af_off;
433		if (i > 0) {
434#if 0				/* suppress the noisy log */
435			log(LOG_ERR, "%d bytes of a fragment from %s "
436			    "overlaps the succeeding fragment",
437			    i, ip6_sprintf(&q6->ip6q_src));
438#endif
439			free(ip6af, M_FTABLE);
440			goto dropfrag;
441		}
442	}
443#endif
444
445insert:
446
447	/*
448	 * Stick new segment in its place;
449	 * check for complete reassembly.
450	 * Move to front of packet queue, as we are
451	 * the most recently active fragmented packet.
452	 */
453	frag6_enq(ip6af, af6->ip6af_up);
454	frag6_nfrags++;
455	q6->ip6q_nfrag++;
456#if 0 /* xxx */
457	if (q6 != ip6q.ip6q_next) {
458		frag6_remque(q6);
459		frag6_insque(q6, &ip6q);
460	}
461#endif
462	next = 0;
463	for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6;
464	     af6 = af6->ip6af_down) {
465		if (af6->ip6af_off != next) {
466			IP6Q_UNLOCK();
467			return IPPROTO_DONE;
468		}
469		next += af6->ip6af_frglen;
470	}
471	if (af6->ip6af_up->ip6af_mff) {
472		IP6Q_UNLOCK();
473		return IPPROTO_DONE;
474	}
475
476	/*
477	 * Reassembly is complete; concatenate fragments.
478	 */
479	ip6af = q6->ip6q_down;
480	t = m = IP6_REASS_MBUF(ip6af);
481	af6 = ip6af->ip6af_down;
482	frag6_deq(ip6af);
483	while (af6 != (struct ip6asfrag *)q6) {
484		af6dwn = af6->ip6af_down;
485		frag6_deq(af6);
486		while (t->m_next)
487			t = t->m_next;
488		t->m_next = IP6_REASS_MBUF(af6);
489		m_adj(t->m_next, af6->ip6af_offset);
490		free(af6, M_FTABLE);
491		af6 = af6dwn;
492	}
493
494	/* adjust offset to point where the original next header starts */
495	offset = ip6af->ip6af_offset - sizeof(struct ip6_frag);
496	free(ip6af, M_FTABLE);
497	ip6 = mtod(m, struct ip6_hdr *);
498	ip6->ip6_plen = htons((u_short)next + offset - sizeof(struct ip6_hdr));
499	ip6->ip6_src = q6->ip6q_src;
500	ip6->ip6_dst = q6->ip6q_dst;
501	nxt = q6->ip6q_nxt;
502#ifdef notyet
503	*q6->ip6q_nxtp = (u_char)(nxt & 0xff);
504#endif
505
506	/*
507	 * Delete frag6 header with as a few cost as possible.
508	 */
509	if (offset < m->m_len) {
510		ovbcopy((caddr_t)ip6, (caddr_t)ip6 + sizeof(struct ip6_frag),
511			offset);
512		m->m_data += sizeof(struct ip6_frag);
513		m->m_len -= sizeof(struct ip6_frag);
514	} else {
515		/* this comes with no copy if the boundary is on cluster */
516		if ((t = m_split(m, offset, M_DONTWAIT)) == NULL) {
517			frag6_remque(q6);
518			frag6_nfrags -= q6->ip6q_nfrag;
519			free(q6, M_FTABLE);
520			frag6_nfragpackets--;
521			goto dropfrag;
522		}
523		m_adj(t, sizeof(struct ip6_frag));
524		m_cat(m, t);
525	}
526
527	/*
528	 * Store NXT to the original.
529	 */
530	{
531		char *prvnxtp = ip6_get_prevhdr(m, offset); /* XXX */
532		*prvnxtp = nxt;
533	}
534
535	frag6_remque(q6);
536	frag6_nfrags -= q6->ip6q_nfrag;
537	free(q6, M_FTABLE);
538	frag6_nfragpackets--;
539
540	if (m->m_flags & M_PKTHDR) { /* Isn't it always true? */
541		int plen = 0;
542		for (t = m; t; t = t->m_next)
543			plen += t->m_len;
544		m->m_pkthdr.len = plen;
545	}
546
547	ip6stat.ip6s_reassembled++;
548	in6_ifstat_inc(dstifp, ifs6_reass_ok);
549
550	/*
551	 * Tell launch routine the next header
552	 */
553
554	*mp = m;
555	*offp = offset;
556
557	IP6Q_UNLOCK();
558	return nxt;
559
560 dropfrag:
561	IP6Q_UNLOCK();
562	in6_ifstat_inc(dstifp, ifs6_reass_fail);
563	ip6stat.ip6s_fragdropped++;
564	m_freem(m);
565	return IPPROTO_DONE;
566}
567
568/*
569 * Free a fragment reassembly header and all
570 * associated datagrams.
571 */
572void
573frag6_freef(q6)
574	struct ip6q *q6;
575{
576	struct ip6asfrag *af6, *down6;
577
578	IP6Q_LOCK_ASSERT();
579
580	for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6;
581	     af6 = down6) {
582		struct mbuf *m = IP6_REASS_MBUF(af6);
583
584		down6 = af6->ip6af_down;
585		frag6_deq(af6);
586
587		/*
588		 * Return ICMP time exceeded error for the 1st fragment.
589		 * Just free other fragments.
590		 */
591		if (af6->ip6af_off == 0) {
592			struct ip6_hdr *ip6;
593
594			/* adjust pointer */
595			ip6 = mtod(m, struct ip6_hdr *);
596
597			/* restore source and destination addresses */
598			ip6->ip6_src = q6->ip6q_src;
599			ip6->ip6_dst = q6->ip6q_dst;
600
601			icmp6_error(m, ICMP6_TIME_EXCEEDED,
602				    ICMP6_TIME_EXCEED_REASSEMBLY, 0);
603		} else
604			m_freem(m);
605		free(af6, M_FTABLE);
606	}
607	frag6_remque(q6);
608	frag6_nfrags -= q6->ip6q_nfrag;
609	free(q6, M_FTABLE);
610	frag6_nfragpackets--;
611}
612
613/*
614 * Put an ip fragment on a reassembly chain.
615 * Like insque, but pointers in middle of structure.
616 */
617void
618frag6_enq(af6, up6)
619	struct ip6asfrag *af6, *up6;
620{
621
622	IP6Q_LOCK_ASSERT();
623
624	af6->ip6af_up = up6;
625	af6->ip6af_down = up6->ip6af_down;
626	up6->ip6af_down->ip6af_up = af6;
627	up6->ip6af_down = af6;
628}
629
630/*
631 * To frag6_enq as remque is to insque.
632 */
633void
634frag6_deq(af6)
635	struct ip6asfrag *af6;
636{
637
638	IP6Q_LOCK_ASSERT();
639
640	af6->ip6af_up->ip6af_down = af6->ip6af_down;
641	af6->ip6af_down->ip6af_up = af6->ip6af_up;
642}
643
644void
645frag6_insque(new, old)
646	struct ip6q *new, *old;
647{
648
649	IP6Q_LOCK_ASSERT();
650
651	new->ip6q_prev = old;
652	new->ip6q_next = old->ip6q_next;
653	old->ip6q_next->ip6q_prev= new;
654	old->ip6q_next = new;
655}
656
657void
658frag6_remque(p6)
659	struct ip6q *p6;
660{
661
662	IP6Q_LOCK_ASSERT();
663
664	p6->ip6q_prev->ip6q_next = p6->ip6q_next;
665	p6->ip6q_next->ip6q_prev = p6->ip6q_prev;
666}
667
668/*
669 * IPv6 reassembling timer processing;
670 * if a timer expires on a reassembly
671 * queue, discard it.
672 */
673void
674frag6_slowtimo()
675{
676	struct ip6q *q6;
677	int s = splnet();
678
679	IP6Q_LOCK();
680	q6 = ip6q.ip6q_next;
681	if (q6)
682		while (q6 != &ip6q) {
683			--q6->ip6q_ttl;
684			q6 = q6->ip6q_next;
685			if (q6->ip6q_prev->ip6q_ttl == 0) {
686				ip6stat.ip6s_fragtimeout++;
687				/* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */
688				frag6_freef(q6->ip6q_prev);
689			}
690		}
691	/*
692	 * If we are over the maximum number of fragments
693	 * (due to the limit being lowered), drain off
694	 * enough to get down to the new limit.
695	 */
696	while (frag6_nfragpackets > (u_int)ip6_maxfragpackets &&
697	    ip6q.ip6q_prev) {
698		ip6stat.ip6s_fragoverflow++;
699		/* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */
700		frag6_freef(ip6q.ip6q_prev);
701	}
702	IP6Q_UNLOCK();
703
704#if 0
705	/*
706	 * Routing changes might produce a better route than we last used;
707	 * make sure we notice eventually, even if forwarding only for one
708	 * destination and the cache is never replaced.
709	 */
710	if (ip6_forward_rt.ro_rt) {
711		RTFREE(ip6_forward_rt.ro_rt);
712		ip6_forward_rt.ro_rt = 0;
713	}
714	if (ipsrcchk_rt.ro_rt) {
715		RTFREE(ipsrcchk_rt.ro_rt);
716		ipsrcchk_rt.ro_rt = 0;
717	}
718#endif
719
720	splx(s);
721}
722
723/*
724 * Drain off all datagram fragments.
725 */
726void
727frag6_drain()
728{
729
730	if (IP6Q_TRYLOCK() == 0)
731		return;
732	while (ip6q.ip6q_next != &ip6q) {
733		ip6stat.ip6s_fragdropped++;
734		/* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */
735		frag6_freef(ip6q.ip6q_next);
736	}
737	IP6Q_UNLOCK();
738}
739