frag6.c revision 160981
167468Snon/*	$FreeBSD: head/sys/netinet6/frag6.c 160981 2006-08-04 21:27:40Z brooks $	*/
279697Snon/*	$KAME: frag6.c,v 1.33 2002/01/07 11:34:48 kjc Exp $	*/
367468Snon
467468Snon/*-
567468Snon * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
679697Snon * All rights reserved.
779697Snon *
879697Snon * Redistribution and use in source and binary forms, with or without
979697Snon * modification, are permitted provided that the following conditions
1079697Snon * are met:
1179697Snon * 1. Redistributions of source code must retain the above copyright
1279697Snon *    notice, this list of conditions and the following disclaimer.
1379697Snon * 2. Redistributions in binary form must reproduce the above copyright
1467468Snon *    notice, this list of conditions and the following disclaimer in the
1567468Snon *    documentation and/or other materials provided with the distribution.
1679697Snon * 3. Neither the name of the project nor the names of its contributors
1767468Snon *    may be used to endorse or promote products derived from this software
1879697Snon *    without specific prior written permission.
1979697Snon *
2079697Snon * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
2179697Snon * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
2267468Snon * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
2367468Snon * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
2479697Snon * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2567468Snon * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2679697Snon * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2767468Snon * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2879697Snon * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2979697Snon * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
3079697Snon * SUCH DAMAGE.
3179697Snon */
3279697Snon
3367468Snon#include <sys/param.h>
3467468Snon#include <sys/systm.h>
3567468Snon#include <sys/malloc.h>
3667468Snon#include <sys/mbuf.h>
3767468Snon#include <sys/domain.h>
3867468Snon#include <sys/protosw.h>
3967468Snon#include <sys/socket.h>
4067468Snon#include <sys/errno.h>
4167468Snon#include <sys/time.h>
4267468Snon#include <sys/kernel.h>
4367468Snon#include <sys/syslog.h>
4467468Snon
4567468Snon#include <net/if.h>
4667468Snon#include <net/route.h>
4767468Snon
4867468Snon#include <netinet/in.h>
4967468Snon#include <netinet/in_var.h>
5067468Snon#include <netinet/ip6.h>
5167468Snon#include <netinet6/ip6_var.h>
5267468Snon#include <netinet/icmp6.h>
5367468Snon#include <netinet/in_systm.h>	/* for ECN definitions */
5467468Snon#include <netinet/ip.h>		/* for ECN definitions */
5567468Snon
5667468Snon/*
5767468Snon * Define it to get a correct behavior on per-interface statistics.
5867468Snon * You will need to perform an extra routing table lookup, per fragment,
5967468Snon * to do it.  This may, or may not be, a performance hit.
6067468Snon */
6167468Snon#define IN6_IFSTAT_STRICT
6279697Snon
6379697Snonstatic void frag6_enq __P((struct ip6asfrag *, struct ip6asfrag *));
6479697Snonstatic void frag6_deq __P((struct ip6asfrag *));
6567468Snonstatic void frag6_insque __P((struct ip6q *, struct ip6q *));
6667468Snonstatic void frag6_remque __P((struct ip6q *));
6767468Snonstatic void frag6_freef __P((struct ip6q *));
6867468Snon
6967468Snonstatic struct mtx ip6qlock;
7067468Snon/*
7179697Snon * These fields all protected by ip6qlock.
7273025Snon */
7373025Snonstatic u_int frag6_nfragpackets;
7467468Snonstatic u_int frag6_nfrags;
7579697Snonstatic struct	ip6q ip6q;	/* ip6 reassemble queue */
7679697Snon
7773025Snon#define	IP6Q_LOCK_INIT()	mtx_init(&ip6qlock, "ip6qlock", NULL, MTX_DEF);
7869979Snon#define	IP6Q_LOCK()		mtx_lock(&ip6qlock)
7979697Snon#define	IP6Q_TRYLOCK()		mtx_trylock(&ip6qlock)
8079697Snon#define	IP6Q_LOCK_ASSERT()	mtx_assert(&ip6qlock, MA_OWNED)
8167468Snon#define	IP6Q_UNLOCK()		mtx_unlock(&ip6qlock)
8267468Snon
8367468Snonstatic MALLOC_DEFINE(M_FTABLE, "fragment", "fragment reassembly header");
8467468Snon
8567468Snon/*
8679697Snon * Initialise reassembly queue and fragment identifier.
8779697Snon */
8867468Snonstatic void
8967468Snonfrag6_change(void *tag)
9067468Snon{
9167468Snon
9267468Snon	ip6_maxfragpackets = nmbclusters / 4;
9367468Snon	ip6_maxfrags = nmbclusters / 4;
9467468Snon}
9567468Snon
9667468Snonvoid
9767468Snonfrag6_init()
9867468Snon{
9967468Snon
10067468Snon	ip6_maxfragpackets = nmbclusters / 4;
10179697Snon	ip6_maxfrags = nmbclusters / 4;
10267468Snon	EVENTHANDLER_REGISTER(nmbclusters_change,
10367468Snon	    frag6_change, NULL, EVENTHANDLER_PRI_ANY);
10479697Snon
10579697Snon	IP6Q_LOCK_INIT();
10667468Snon
10767468Snon	ip6q.ip6q_next = ip6q.ip6q_prev = &ip6q;
10867468Snon}
10967468Snon
11067468Snon/*
11167468Snon * In RFC2460, fragment and reassembly rule do not agree with each other,
11267468Snon * in terms of next header field handling in fragment header.
11367468Snon * While the sender will use the same value for all of the fragmented packets,
11473025Snon * receiver is suggested not to check the consistency.
11567468Snon *
11667468Snon * fragment rule (p20):
11767468Snon *	(2) A Fragment header containing:
11867468Snon *	The Next Header value that identifies the first header of
11979697Snon *	the Fragmentable Part of the original packet.
12067468Snon *		-> next header field is same for all fragments
12179697Snon *
12279697Snon * reassembly rule (p21):
12379697Snon *	The Next Header field of the last header of the Unfragmentable
12479697Snon *	Part is obtained from the Next Header field of the first
12567468Snon *	fragment's Fragment header.
12679697Snon *		-> should grab it from the first fragment only
12779697Snon *
12879697Snon * The following note also contradicts with fragment rule - noone is going to
12979697Snon * send different fragment with different next header field.
13067468Snon *
13167468Snon * additional note (p22):
13267468Snon *	The Next Header values in the Fragment headers of different
13379697Snon *	fragments of the same original packet may differ.  Only the value
13479697Snon *	from the Offset zero fragment packet is used for reassembly.
13579697Snon *		-> should grab it from the first fragment only
13679697Snon *
13779697Snon * There is no explicit reason given in the RFC.  Historical reason maybe?
13879697Snon */
13979697Snon/*
14079697Snon * Fragment input
14179697Snon */
14279697Snonint
14379697Snonfrag6_input(mp, offp, proto)
14479697Snon	struct mbuf **mp;
14579697Snon	int *offp, proto;
14679697Snon{
14779697Snon	struct mbuf *m = *mp, *t;
14879697Snon	struct ip6_hdr *ip6;
14967468Snon	struct ip6_frag *ip6f;
15067468Snon	struct ip6q *q6;
15167468Snon	struct ip6asfrag *af6, *ip6af, *af6dwn;
15279697Snon#ifdef IN6_IFSTAT_STRICT
15379697Snon	struct in6_ifaddr *ia;
15467468Snon#endif
15567468Snon	int offset = *offp, nxt, i, next;
15679697Snon	int first_frag = 0;
15779697Snon	int fragoff, frgpartlen;	/* must be larger than u_int16_t */
15879697Snon	struct ifnet *dstifp;
15979697Snon	u_int8_t ecn, ecn0;
16067468Snon
16179697Snon	ip6 = mtod(m, struct ip6_hdr *);
16279697Snon#ifndef PULLDOWN_TEST
16367468Snon	IP6_EXTHDR_CHECK(m, offset, sizeof(struct ip6_frag), IPPROTO_DONE);
16467468Snon	ip6f = (struct ip6_frag *)((caddr_t)ip6 + offset);
16567468Snon#else
16667468Snon	IP6_EXTHDR_GET(ip6f, struct ip6_frag *, m, offset, sizeof(*ip6f));
16779697Snon	if (ip6f == NULL)
16879697Snon		return (IPPROTO_DONE);
16979697Snon#endif
17079697Snon
17179697Snon	dstifp = NULL;
17279697Snon#ifdef IN6_IFSTAT_STRICT
17379697Snon	/* find the destination interface of the packet. */
17479697Snon	if ((ia = ip6_getdstifaddr(m)) != NULL)
17579697Snon		dstifp = ia->ia_ifp;
17679697Snon#else
17779697Snon	/* we are violating the spec, this is not the destination interface */
17879697Snon	if ((m->m_flags & M_PKTHDR) != 0)
17979697Snon		dstifp = m->m_pkthdr.rcvif;
18079697Snon#endif
18179697Snon
18279697Snon	/* jumbo payload can't contain a fragment header */
18379697Snon	if (ip6->ip6_plen == 0) {
18479697Snon		icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, offset);
18579697Snon		in6_ifstat_inc(dstifp, ifs6_reass_fail);
18679697Snon		return IPPROTO_DONE;
18779697Snon	}
18879697Snon
18979697Snon	/*
19079697Snon	 * check whether fragment packet's fragment length is
19179697Snon	 * multiple of 8 octets.
19279697Snon	 * sizeof(struct ip6_frag) == 8
19379697Snon	 * sizeof(struct ip6_hdr) = 40
19467468Snon	 */
19589113Smsmith	if ((ip6f->ip6f_offlg & IP6F_MORE_FRAG) &&
19667468Snon	    (((ntohs(ip6->ip6_plen) - offset) & 0x7) != 0)) {
19767468Snon		icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER,
19867468Snon		    offsetof(struct ip6_hdr, ip6_plen));
19967468Snon		in6_ifstat_inc(dstifp, ifs6_reass_fail);
20067468Snon		return IPPROTO_DONE;
20167468Snon	}
20267468Snon
20379697Snon	ip6stat.ip6s_fragments++;
20479697Snon	in6_ifstat_inc(dstifp, ifs6_reass_reqd);
20579697Snon
20679697Snon	/* offset now points to data portion */
20779697Snon	offset += sizeof(struct ip6_frag);
20879697Snon
20979697Snon	IP6Q_LOCK();
21079697Snon
21179697Snon	/*
21279697Snon	 * Enforce upper bound on number of fragments.
21379697Snon	 * If maxfrag is 0, never accept fragments.
21479697Snon	 * If maxfrag is -1, accept all fragments without limitation.
21579697Snon	 */
21679697Snon	if (ip6_maxfrags < 0)
21779697Snon		;
21879697Snon	else if (frag6_nfrags >= (u_int)ip6_maxfrags)
21979697Snon		goto dropfrag;
22079697Snon
22179697Snon	for (q6 = ip6q.ip6q_next; q6 != &ip6q; q6 = q6->ip6q_next)
22279697Snon		if (ip6f->ip6f_ident == q6->ip6q_ident &&
22379697Snon		    IN6_ARE_ADDR_EQUAL(&ip6->ip6_src, &q6->ip6q_src) &&
22479697Snon		    IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &q6->ip6q_dst))
22579697Snon			break;
22679697Snon
22779697Snon	if (q6 == &ip6q) {
22879697Snon		/*
22967468Snon		 * the first fragment to arrive, create a reassembly queue.
23079697Snon		 */
23179697Snon		first_frag = 1;
23279697Snon
23379697Snon		/*
23479697Snon		 * Enforce upper bound on number of fragmented packets
23579697Snon		 * for which we attempt reassembly;
23679697Snon		 * If maxfragpackets is 0, never accept fragments.
23779697Snon		 * If maxfragpackets is -1, accept all fragments without
23879697Snon		 * limitation.
23979697Snon		 */
24079697Snon		if (ip6_maxfragpackets < 0)
24179697Snon			;
24279697Snon		else if (frag6_nfragpackets >= (u_int)ip6_maxfragpackets)
24379697Snon			goto dropfrag;
24479697Snon		frag6_nfragpackets++;
24579697Snon		q6 = (struct ip6q *)malloc(sizeof(struct ip6q), M_FTABLE,
24679697Snon		    M_NOWAIT);
24779697Snon		if (q6 == NULL)
24879697Snon			goto dropfrag;
24979697Snon		bzero(q6, sizeof(*q6));
25079697Snon
25179697Snon		frag6_insque(q6, &ip6q);
25279697Snon
25379697Snon		/* ip6q_nxt will be filled afterwards, from 1st fragment */
25479697Snon		q6->ip6q_down	= q6->ip6q_up = (struct ip6asfrag *)q6;
25579697Snon#ifdef notyet
25679697Snon		q6->ip6q_nxtp	= (u_char *)nxtp;
25779697Snon#endif
25879697Snon		q6->ip6q_ident	= ip6f->ip6f_ident;
25979697Snon		q6->ip6q_arrive = 0; /* Is it used anywhere? */
26079697Snon		q6->ip6q_ttl 	= IPV6_FRAGTTL;
26179697Snon		q6->ip6q_src	= ip6->ip6_src;
26279697Snon		q6->ip6q_dst	= ip6->ip6_dst;
26379697Snon		q6->ip6q_unfrglen = -1;	/* The 1st fragment has not arrived. */
26479697Snon
26579697Snon		q6->ip6q_nfrag = 0;
26679697Snon	}
26779697Snon
26879697Snon	/*
26979697Snon	 * If it's the 1st fragment, record the length of the
27079697Snon	 * unfragmentable part and the next header of the fragment header.
27179697Snon	 */
27279697Snon	fragoff = ntohs(ip6f->ip6f_offlg & IP6F_OFF_MASK);
27379697Snon	if (fragoff == 0) {
27479697Snon		q6->ip6q_unfrglen = offset - sizeof(struct ip6_hdr) -
27579697Snon		    sizeof(struct ip6_frag);
27679697Snon		q6->ip6q_nxt = ip6f->ip6f_nxt;
27779697Snon	}
27879697Snon
27979697Snon	/*
28079697Snon	 * Check that the reassembled packet would not exceed 65535 bytes
28179697Snon	 * in size.
28279697Snon	 * If it would exceed, discard the fragment and return an ICMP error.
28379697Snon	 */
28479697Snon	frgpartlen = sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen) - offset;
28579697Snon	if (q6->ip6q_unfrglen >= 0) {
28679697Snon		/* The 1st fragment has already arrived. */
28779697Snon		if (q6->ip6q_unfrglen + fragoff + frgpartlen > IPV6_MAXPACKET) {
28879697Snon			icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER,
28979697Snon			    offset - sizeof(struct ip6_frag) +
29079697Snon			    offsetof(struct ip6_frag, ip6f_offlg));
29179697Snon			IP6Q_UNLOCK();
29279697Snon			return (IPPROTO_DONE);
29379697Snon		}
29479697Snon	} else if (fragoff + frgpartlen > IPV6_MAXPACKET) {
29579697Snon		icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER,
29679697Snon		    offset - sizeof(struct ip6_frag) +
29779697Snon		    offsetof(struct ip6_frag, ip6f_offlg));
29879697Snon		IP6Q_UNLOCK();
29979697Snon		return (IPPROTO_DONE);
30079697Snon	}
30179697Snon	/*
30279697Snon	 * If it's the first fragment, do the above check for each
30379697Snon	 * fragment already stored in the reassembly queue.
30479697Snon	 */
30579697Snon	if (fragoff == 0) {
30679697Snon		for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6;
30779697Snon		     af6 = af6dwn) {
30879697Snon			af6dwn = af6->ip6af_down;
30979697Snon
31079697Snon			if (q6->ip6q_unfrglen + af6->ip6af_off + af6->ip6af_frglen >
31179697Snon			    IPV6_MAXPACKET) {
31279697Snon				struct mbuf *merr = IP6_REASS_MBUF(af6);
31379697Snon				struct ip6_hdr *ip6err;
31479697Snon				int erroff = af6->ip6af_offset;
31579697Snon
31679697Snon				/* dequeue the fragment. */
31779697Snon				frag6_deq(af6);
31879697Snon				free(af6, M_FTABLE);
31979697Snon
32079697Snon				/* adjust pointer. */
32179697Snon				ip6err = mtod(merr, struct ip6_hdr *);
32279697Snon
32379697Snon				/*
32479697Snon				 * Restore source and destination addresses
32579697Snon				 * in the erroneous IPv6 header.
32679697Snon				 */
32779697Snon				ip6err->ip6_src = q6->ip6q_src;
32879697Snon				ip6err->ip6_dst = q6->ip6q_dst;
32979697Snon
33079697Snon				icmp6_error(merr, ICMP6_PARAM_PROB,
33179697Snon				    ICMP6_PARAMPROB_HEADER,
33279697Snon				    erroff - sizeof(struct ip6_frag) +
33379697Snon				    offsetof(struct ip6_frag, ip6f_offlg));
33479697Snon			}
33579697Snon		}
33679697Snon	}
33779697Snon
33879697Snon	ip6af = (struct ip6asfrag *)malloc(sizeof(struct ip6asfrag), M_FTABLE,
33979697Snon	    M_NOWAIT);
34079697Snon	if (ip6af == NULL)
34179697Snon		goto dropfrag;
34279697Snon	bzero(ip6af, sizeof(*ip6af));
34379697Snon	ip6af->ip6af_head = ip6->ip6_flow;
34479697Snon	ip6af->ip6af_len = ip6->ip6_plen;
34579697Snon	ip6af->ip6af_nxt = ip6->ip6_nxt;
34679697Snon	ip6af->ip6af_hlim = ip6->ip6_hlim;
34779697Snon	ip6af->ip6af_mff = ip6f->ip6f_offlg & IP6F_MORE_FRAG;
34879697Snon	ip6af->ip6af_off = fragoff;
34979697Snon	ip6af->ip6af_frglen = frgpartlen;
35079697Snon	ip6af->ip6af_offset = offset;
35179697Snon	IP6_REASS_MBUF(ip6af) = m;
35279697Snon
35379697Snon	if (first_frag) {
35479697Snon		af6 = (struct ip6asfrag *)q6;
35579697Snon		goto insert;
35679697Snon	}
35779697Snon
35879697Snon	/*
35979697Snon	 * Handle ECN by comparing this segment with the first one;
36079697Snon	 * if CE is set, do not lose CE.
36179697Snon	 * drop if CE and not-ECT are mixed for the same packet.
36279697Snon	 */
36379697Snon	ecn = (ntohl(ip6->ip6_flow) >> 20) & IPTOS_ECN_MASK;
36479697Snon	ecn0 = (ntohl(q6->ip6q_down->ip6af_head) >> 20) & IPTOS_ECN_MASK;
36579697Snon	if (ecn == IPTOS_ECN_CE) {
36679697Snon		if (ecn0 == IPTOS_ECN_NOTECT) {
36779697Snon			free(ip6af, M_FTABLE);
36879697Snon			goto dropfrag;
36979697Snon		}
37079697Snon		if (ecn0 != IPTOS_ECN_CE)
37179697Snon			q6->ip6q_down->ip6af_head |= htonl(IPTOS_ECN_CE << 20);
37279697Snon	}
37379697Snon	if (ecn == IPTOS_ECN_NOTECT && ecn0 != IPTOS_ECN_NOTECT) {
37479697Snon		free(ip6af, M_FTABLE);
37579697Snon		goto dropfrag;
37679697Snon	}
37779697Snon
37879697Snon	/*
37979697Snon	 * Find a segment which begins after this one does.
38079697Snon	 */
38179697Snon	for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6;
38279697Snon	     af6 = af6->ip6af_down)
38379697Snon		if (af6->ip6af_off > ip6af->ip6af_off)
38479697Snon			break;
38579697Snon
38679697Snon#if 0
38779697Snon	/*
38879697Snon	 * If there is a preceding segment, it may provide some of
38979697Snon	 * our data already.  If so, drop the data from the incoming
39079697Snon	 * segment.  If it provides all of our data, drop us.
39179697Snon	 */
39279697Snon	if (af6->ip6af_up != (struct ip6asfrag *)q6) {
39379697Snon		i = af6->ip6af_up->ip6af_off + af6->ip6af_up->ip6af_frglen
39479697Snon			- ip6af->ip6af_off;
39579697Snon		if (i > 0) {
39679697Snon			if (i >= ip6af->ip6af_frglen)
39779697Snon				goto dropfrag;
39879697Snon			m_adj(IP6_REASS_MBUF(ip6af), i);
39979697Snon			ip6af->ip6af_off += i;
40079697Snon			ip6af->ip6af_frglen -= i;
40179697Snon		}
40279697Snon	}
40379697Snon
40479697Snon	/*
40579697Snon	 * While we overlap succeeding segments trim them or,
40679697Snon	 * if they are completely covered, dequeue them.
40779697Snon	 */
40879697Snon	while (af6 != (struct ip6asfrag *)q6 &&
40979697Snon	       ip6af->ip6af_off + ip6af->ip6af_frglen > af6->ip6af_off) {
41079697Snon		i = (ip6af->ip6af_off + ip6af->ip6af_frglen) - af6->ip6af_off;
41179697Snon		if (i < af6->ip6af_frglen) {
41279697Snon			af6->ip6af_frglen -= i;
41379697Snon			af6->ip6af_off += i;
41479697Snon			m_adj(IP6_REASS_MBUF(af6), i);
41579697Snon			break;
41679697Snon		}
41779697Snon		af6 = af6->ip6af_down;
41879697Snon		m_freem(IP6_REASS_MBUF(af6->ip6af_up));
41979697Snon		frag6_deq(af6->ip6af_up);
42079697Snon	}
42179697Snon#else
42279697Snon	/*
42379697Snon	 * If the incoming framgent overlaps some existing fragments in
42479697Snon	 * the reassembly queue, drop it, since it is dangerous to override
42579697Snon	 * existing fragments from a security point of view.
42679697Snon	 * We don't know which fragment is the bad guy - here we trust
42779697Snon	 * fragment that came in earlier, with no real reason.
42879697Snon	 */
42979697Snon	if (af6->ip6af_up != (struct ip6asfrag *)q6) {
43079697Snon		i = af6->ip6af_up->ip6af_off + af6->ip6af_up->ip6af_frglen
43179697Snon			- ip6af->ip6af_off;
43279697Snon		if (i > 0) {
43379697Snon#if 0				/* suppress the noisy log */
43479697Snon			log(LOG_ERR, "%d bytes of a fragment from %s "
43579697Snon			    "overlaps the previous fragment\n",
43679697Snon			    i, ip6_sprintf(&q6->ip6q_src));
43779697Snon#endif
43879697Snon			free(ip6af, M_FTABLE);
43979697Snon			goto dropfrag;
44079697Snon		}
44179697Snon	}
44279697Snon	if (af6 != (struct ip6asfrag *)q6) {
44379697Snon		i = (ip6af->ip6af_off + ip6af->ip6af_frglen) - af6->ip6af_off;
44479697Snon		if (i > 0) {
44579697Snon#if 0				/* suppress the noisy log */
44679697Snon			log(LOG_ERR, "%d bytes of a fragment from %s "
44779697Snon			    "overlaps the succeeding fragment",
44879697Snon			    i, ip6_sprintf(&q6->ip6q_src));
44979697Snon#endif
45079697Snon			free(ip6af, M_FTABLE);
45179697Snon			goto dropfrag;
45279697Snon		}
45379697Snon	}
45479697Snon#endif
45579697Snon
45679697Snoninsert:
45779697Snon
45879697Snon	/*
45979697Snon	 * Stick new segment in its place;
46079697Snon	 * check for complete reassembly.
46179697Snon	 * Move to front of packet queue, as we are
46279697Snon	 * the most recently active fragmented packet.
46379697Snon	 */
46479697Snon	frag6_enq(ip6af, af6->ip6af_up);
46579697Snon	frag6_nfrags++;
46679697Snon	q6->ip6q_nfrag++;
46779697Snon#if 0 /* xxx */
46879697Snon	if (q6 != ip6q.ip6q_next) {
46979697Snon		frag6_remque(q6);
47079697Snon		frag6_insque(q6, &ip6q);
47179697Snon	}
47279697Snon#endif
47379697Snon	next = 0;
47479697Snon	for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6;
47579697Snon	     af6 = af6->ip6af_down) {
47679697Snon		if (af6->ip6af_off != next) {
47779697Snon			IP6Q_UNLOCK();
47879697Snon			return IPPROTO_DONE;
47979697Snon		}
48079697Snon		next += af6->ip6af_frglen;
48179697Snon	}
48279697Snon	if (af6->ip6af_up->ip6af_mff) {
48379697Snon		IP6Q_UNLOCK();
48479697Snon		return IPPROTO_DONE;
48579697Snon	}
48679697Snon
48779697Snon	/*
48879697Snon	 * Reassembly is complete; concatenate fragments.
48979697Snon	 */
49079697Snon	ip6af = q6->ip6q_down;
49179697Snon	t = m = IP6_REASS_MBUF(ip6af);
49279697Snon	af6 = ip6af->ip6af_down;
49379697Snon	frag6_deq(ip6af);
49479697Snon	while (af6 != (struct ip6asfrag *)q6) {
49579697Snon		af6dwn = af6->ip6af_down;
49679697Snon		frag6_deq(af6);
49779697Snon		while (t->m_next)
49879697Snon			t = t->m_next;
49979697Snon		t->m_next = IP6_REASS_MBUF(af6);
50079697Snon		m_adj(t->m_next, af6->ip6af_offset);
50179697Snon		free(af6, M_FTABLE);
50279697Snon		af6 = af6dwn;
50379697Snon	}
50479697Snon
50579697Snon	/* adjust offset to point where the original next header starts */
50679697Snon	offset = ip6af->ip6af_offset - sizeof(struct ip6_frag);
50779697Snon	free(ip6af, M_FTABLE);
50879697Snon	ip6 = mtod(m, struct ip6_hdr *);
50979697Snon	ip6->ip6_plen = htons((u_short)next + offset - sizeof(struct ip6_hdr));
51079697Snon	ip6->ip6_src = q6->ip6q_src;
51179697Snon	ip6->ip6_dst = q6->ip6q_dst;
51279697Snon	nxt = q6->ip6q_nxt;
51379697Snon#ifdef notyet
51479697Snon	*q6->ip6q_nxtp = (u_char)(nxt & 0xff);
51579697Snon#endif
51679697Snon
51779697Snon	/*
51879697Snon	 * Delete frag6 header with as a few cost as possible.
51979697Snon	 */
52079697Snon	if (offset < m->m_len) {
52179697Snon		ovbcopy((caddr_t)ip6, (caddr_t)ip6 + sizeof(struct ip6_frag),
52279697Snon			offset);
52379697Snon		m->m_data += sizeof(struct ip6_frag);
52479697Snon		m->m_len -= sizeof(struct ip6_frag);
52579697Snon	} else {
52679697Snon		/* this comes with no copy if the boundary is on cluster */
52779697Snon		if ((t = m_split(m, offset, M_DONTWAIT)) == NULL) {
52879697Snon			frag6_remque(q6);
52979697Snon			frag6_nfrags -= q6->ip6q_nfrag;
53079697Snon			free(q6, M_FTABLE);
53179697Snon			frag6_nfragpackets--;
53279697Snon			goto dropfrag;
53379697Snon		}
53479697Snon		m_adj(t, sizeof(struct ip6_frag));
53579697Snon		m_cat(m, t);
53679697Snon	}
53779697Snon
53879697Snon	/*
53979697Snon	 * Store NXT to the original.
54079697Snon	 */
54179697Snon	{
54279697Snon		char *prvnxtp = ip6_get_prevhdr(m, offset); /* XXX */
54379697Snon		*prvnxtp = nxt;
54479697Snon	}
54579697Snon
54679697Snon	frag6_remque(q6);
54779697Snon	frag6_nfrags -= q6->ip6q_nfrag;
54879697Snon	free(q6, M_FTABLE);
54979697Snon	frag6_nfragpackets--;
55079697Snon
55179697Snon	if (m->m_flags & M_PKTHDR) { /* Isn't it always true? */
55279697Snon		int plen = 0;
55379697Snon		for (t = m; t; t = t->m_next)
55479697Snon			plen += t->m_len;
55579697Snon		m->m_pkthdr.len = plen;
55679697Snon	}
55779697Snon
55879697Snon	ip6stat.ip6s_reassembled++;
55979697Snon	in6_ifstat_inc(dstifp, ifs6_reass_ok);
56079697Snon
56179697Snon	/*
56279697Snon	 * Tell launch routine the next header
56379697Snon	 */
56479697Snon
56579697Snon	*mp = m;
56679697Snon	*offp = offset;
56779697Snon
56879697Snon	IP6Q_UNLOCK();
56979697Snon	return nxt;
57079697Snon
57179697Snon dropfrag:
57279697Snon	IP6Q_UNLOCK();
57379697Snon	in6_ifstat_inc(dstifp, ifs6_reass_fail);
57479697Snon	ip6stat.ip6s_fragdropped++;
57579697Snon	m_freem(m);
57679697Snon	return IPPROTO_DONE;
57779697Snon}
57879697Snon
57979697Snon/*
58079697Snon * Free a fragment reassembly header and all
58179697Snon * associated datagrams.
58279697Snon */
58379697Snonvoid
58479697Snonfrag6_freef(q6)
58579697Snon	struct ip6q *q6;
58679697Snon{
58779697Snon	struct ip6asfrag *af6, *down6;
58879697Snon
58979697Snon	IP6Q_LOCK_ASSERT();
59079697Snon
59179697Snon	for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6;
59279697Snon	     af6 = down6) {
59379697Snon		struct mbuf *m = IP6_REASS_MBUF(af6);
59479697Snon
59579697Snon		down6 = af6->ip6af_down;
59679697Snon		frag6_deq(af6);
59779697Snon
59879697Snon		/*
59979697Snon		 * Return ICMP time exceeded error for the 1st fragment.
60079697Snon		 * Just free other fragments.
60179697Snon		 */
60279697Snon		if (af6->ip6af_off == 0) {
60379697Snon			struct ip6_hdr *ip6;
60479697Snon
60579697Snon			/* adjust pointer */
60679697Snon			ip6 = mtod(m, struct ip6_hdr *);
60779697Snon
60879697Snon			/* restore source and destination addresses */
60979697Snon			ip6->ip6_src = q6->ip6q_src;
61079697Snon			ip6->ip6_dst = q6->ip6q_dst;
61179697Snon
61279697Snon			icmp6_error(m, ICMP6_TIME_EXCEEDED,
61379697Snon				    ICMP6_TIME_EXCEED_REASSEMBLY, 0);
61479697Snon		} else
61579697Snon			m_freem(m);
61679697Snon		free(af6, M_FTABLE);
61779697Snon	}
61879697Snon	frag6_remque(q6);
61979697Snon	frag6_nfrags -= q6->ip6q_nfrag;
62079697Snon	free(q6, M_FTABLE);
62179697Snon	frag6_nfragpackets--;
62279697Snon}
62379697Snon
62479697Snon/*
62579697Snon * Put an ip fragment on a reassembly chain.
62679697Snon * Like insque, but pointers in middle of structure.
62779697Snon */
62879697Snonvoid
62979697Snonfrag6_enq(af6, up6)
63079697Snon	struct ip6asfrag *af6, *up6;
63179697Snon{
63279697Snon
63379697Snon	IP6Q_LOCK_ASSERT();
63479697Snon
63579697Snon	af6->ip6af_up = up6;
63679697Snon	af6->ip6af_down = up6->ip6af_down;
63779697Snon	up6->ip6af_down->ip6af_up = af6;
63879697Snon	up6->ip6af_down = af6;
63979697Snon}
64079697Snon
64179697Snon/*
64279697Snon * To frag6_enq as remque is to insque.
64379697Snon */
64479697Snonvoid
64579697Snonfrag6_deq(af6)
64679697Snon	struct ip6asfrag *af6;
64779697Snon{
64879697Snon
64979697Snon	IP6Q_LOCK_ASSERT();
65079697Snon
65179697Snon	af6->ip6af_up->ip6af_down = af6->ip6af_down;
65279697Snon	af6->ip6af_down->ip6af_up = af6->ip6af_up;
65379697Snon}
65479697Snon
65579697Snonvoid
65679697Snonfrag6_insque(new, old)
65779697Snon	struct ip6q *new, *old;
65879697Snon{
65979697Snon
66079697Snon	IP6Q_LOCK_ASSERT();
66179697Snon
66279697Snon	new->ip6q_prev = old;
66379697Snon	new->ip6q_next = old->ip6q_next;
66479697Snon	old->ip6q_next->ip6q_prev= new;
66579697Snon	old->ip6q_next = new;
66679697Snon}
66779697Snon
66879697Snonvoid
66979697Snonfrag6_remque(p6)
67079697Snon	struct ip6q *p6;
67179697Snon{
67279697Snon
67379697Snon	IP6Q_LOCK_ASSERT();
67479697Snon
67579697Snon	p6->ip6q_prev->ip6q_next = p6->ip6q_next;
67679697Snon	p6->ip6q_next->ip6q_prev = p6->ip6q_prev;
67779697Snon}
67879697Snon
67979697Snon/*
68079697Snon * IPv6 reassembling timer processing;
68179697Snon * if a timer expires on a reassembly
68279697Snon * queue, discard it.
68379697Snon */
68479697Snonvoid
68579697Snonfrag6_slowtimo()
68679697Snon{
68779697Snon	struct ip6q *q6;
68879697Snon
68979697Snon#if 0
69079697Snon	GIANT_REQUIRED;	/* XXX bz: ip6_forward_rt */
69179697Snon#endif
69279697Snon
69379697Snon	IP6Q_LOCK();
69479697Snon	q6 = ip6q.ip6q_next;
69579697Snon	if (q6)
69679697Snon		while (q6 != &ip6q) {
69779697Snon			--q6->ip6q_ttl;
69879697Snon			q6 = q6->ip6q_next;
69979697Snon			if (q6->ip6q_prev->ip6q_ttl == 0) {
70079697Snon				ip6stat.ip6s_fragtimeout++;
70179697Snon				/* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */
70279697Snon				frag6_freef(q6->ip6q_prev);
70379697Snon			}
70479697Snon		}
70579697Snon	/*
70679697Snon	 * If we are over the maximum number of fragments
70779697Snon	 * (due to the limit being lowered), drain off
70879697Snon	 * enough to get down to the new limit.
70979697Snon	 */
71079697Snon	while (frag6_nfragpackets > (u_int)ip6_maxfragpackets &&
71179697Snon	    ip6q.ip6q_prev) {
71279697Snon		ip6stat.ip6s_fragoverflow++;
71379697Snon		/* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */
71479697Snon		frag6_freef(ip6q.ip6q_prev);
71579697Snon	}
71679697Snon	IP6Q_UNLOCK();
71779697Snon
71879697Snon#if 0
71979697Snon	/*
72079697Snon	 * Routing changes might produce a better route than we last used;
72179697Snon	 * make sure we notice eventually, even if forwarding only for one
72279697Snon	 * destination and the cache is never replaced.
72379697Snon	 */
72479697Snon	if (ip6_forward_rt.ro_rt) {
72579697Snon		RTFREE(ip6_forward_rt.ro_rt);
72679697Snon		ip6_forward_rt.ro_rt = 0;
72779697Snon	}
72879697Snon	if (ipsrcchk_rt.ro_rt) {
72979697Snon		RTFREE(ipsrcchk_rt.ro_rt);
73079697Snon		ipsrcchk_rt.ro_rt = 0;
73179697Snon	}
73279697Snon#endif
73379697Snon}
73479697Snon
73579697Snon/*
73679697Snon * Drain off all datagram fragments.
73779697Snon */
73879697Snonvoid
73979697Snonfrag6_drain()
74079697Snon{
74179697Snon
74279697Snon	if (IP6Q_TRYLOCK() == 0)
74379697Snon		return;
74479697Snon	while (ip6q.ip6q_next != &ip6q) {
74579697Snon		ip6stat.ip6s_fragdropped++;
74679697Snon		/* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */
74779697Snon		frag6_freef(ip6q.ip6q_next);
74879697Snon	}
74979697Snon	IP6Q_UNLOCK();
75079697Snon}
75179697Snon