frag6.c revision 54350
1/*
2 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. Neither the name of the project nor the names of its contributors
14 *    may be used to endorse or promote products derived from this software
15 *    without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 * $FreeBSD: head/sys/netinet6/frag6.c 54350 1999-12-09 08:56:50Z shin $
30 */
31
32#include <sys/param.h>
33#include <sys/systm.h>
34#include <sys/malloc.h>
35#include <sys/mbuf.h>
36#include <sys/domain.h>
37#include <sys/protosw.h>
38#include <sys/socket.h>
39#include <sys/errno.h>
40#include <sys/time.h>
41#include <sys/kernel.h>
42#include <sys/syslog.h>
43
44#include <net/if.h>
45#include <net/route.h>
46
47#include <netinet/in.h>
48#include <netinet/in_var.h>
49#include <netinet6/ip6.h>
50#include <netinet6/ip6_var.h>
51#include <netinet6/icmp6.h>
52
53#include <net/net_osdep.h>
54
55/*
56 * Define it to get a correct behavior on per-interface statistics.
57 * You will need to perform an extra routing table lookup, per fragment,
58 * to do it.  This may, or may not be, a performance hit.
59 */
60#define	IN6_IFSTAT_STRICT
61
62static void	frag6_enq __P((struct ip6asfrag *, struct ip6asfrag *));
63static void	frag6_deq __P((struct ip6asfrag *));
64static void	frag6_insque __P((struct ip6q *, struct ip6q *));
65static void	frag6_remque __P((struct ip6q *));
66static void	frag6_freef __P((struct ip6q *));
67
68int	frag6_doing_reass;
69u_int	frag6_nfragpackets;
70struct	ip6q	ip6q;	/* ip6 reassemble queue */
71
72#if !defined(M_FTABLE)
73MALLOC_DEFINE(M_FTABLE, "fragment", "fragment reassembly header");
74#endif
75
76/*
77 * Initialise reassembly queue and fragment identifier.
78 */
79void
80frag6_init()
81{
82	struct timeval tv;
83
84	/*
85	 * in many cases, random() here does NOT return random number
86	 * as initialization during bootstrap time occur in fixed order.
87	 */
88	microtime(&tv);
89	ip6q.ip6q_next = ip6q.ip6q_prev = &ip6q;
90	ip6_id = random() ^ tv.tv_usec;
91}
92
93/*
94 * Fragment input
95 */
96int
97frag6_input(mp, offp, proto)
98	struct mbuf **mp;
99	int *offp, proto;
100{
101	struct mbuf *m = *mp, *t;
102	struct ip6_hdr *ip6;
103	struct ip6_frag *ip6f;
104	struct ip6q *q6;
105	struct ip6asfrag *af6, *ip6af;
106	int offset = *offp, nxt, i, next;
107	int first_frag = 0;
108	u_short fragoff, frgpartlen;
109	struct ifnet *dstifp;
110#ifdef IN6_IFSTAT_STRICT
111	static struct route_in6 ro;
112	struct sockaddr_in6 *dst;
113#endif
114
115	IP6_EXTHDR_CHECK(m, offset, sizeof(struct ip6_frag), IPPROTO_DONE);
116
117	ip6 = mtod(m, struct ip6_hdr *);
118	ip6f = (struct ip6_frag *)((caddr_t)ip6 + offset);
119
120	dstifp = NULL;
121#ifdef IN6_IFSTAT_STRICT
122	/* find the destination interface of the packet. */
123	dst = (struct sockaddr_in6 *)&ro.ro_dst;
124	if (ro.ro_rt
125	 && ((ro.ro_rt->rt_flags & RTF_UP) == 0
126	  || !IN6_ARE_ADDR_EQUAL(&dst->sin6_addr, &ip6->ip6_dst))) {
127		RTFREE(ro.ro_rt);
128		ro.ro_rt = (struct rtentry *)0;
129	}
130	if (ro.ro_rt == NULL) {
131		bzero(dst, sizeof(*dst));
132		dst->sin6_family = AF_INET6;
133		dst->sin6_len = sizeof(struct sockaddr_in6);
134		dst->sin6_addr = ip6->ip6_dst;
135	}
136	rtalloc((struct route *)&ro);
137	if (ro.ro_rt != NULL && ro.ro_rt->rt_ifa != NULL)
138		dstifp = ((struct in6_ifaddr *)ro.ro_rt->rt_ifa)->ia_ifp;
139#else
140	/* we are violating the spec, this is not the destination interface */
141	if ((m->m_flags & M_PKTHDR) != 0)
142		dstifp = m->m_pkthdr.rcvif;
143#endif
144
145	/* jumbo payload can't contain a fragment header */
146	if (ip6->ip6_plen == 0) {
147		icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, offset);
148		in6_ifstat_inc(dstifp, ifs6_reass_fail);
149		return IPPROTO_DONE;
150	}
151
152	/*
153	 * check whether fragment packet's fragment length is
154	 * multiple of 8 octets.
155	 * sizeof(struct ip6_frag) == 8
156	 * sizeof(struct ip6_hdr) = 40
157	 */
158	if ((ip6f->ip6f_offlg & IP6F_MORE_FRAG) &&
159	    (((ntohs(ip6->ip6_plen) - offset) & 0x7) != 0)) {
160		icmp6_error(m, ICMP6_PARAM_PROB,
161			    ICMP6_PARAMPROB_HEADER,
162			    (caddr_t)&ip6->ip6_plen - (caddr_t)ip6);
163		in6_ifstat_inc(dstifp, ifs6_reass_fail);
164		return IPPROTO_DONE;
165	}
166
167	ip6stat.ip6s_fragments++;
168	in6_ifstat_inc(dstifp, ifs6_reass_reqd);
169
170	/*
171	 * Presence of header sizes in mbufs
172	 * would confuse code below.
173	 */
174
175	offset += sizeof(struct ip6_frag);
176	m->m_data += offset;
177	m->m_len -= offset;
178
179	for (q6 = ip6q.ip6q_next; q6 != &ip6q; q6 = q6->ip6q_next)
180		if (ip6f->ip6f_ident == q6->ip6q_ident &&
181		    IN6_ARE_ADDR_EQUAL(&ip6->ip6_src, &q6->ip6q_src) &&
182		    IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &q6->ip6q_dst))
183			break;
184
185	if (q6 == &ip6q) {
186		/*
187		 * the first fragment to arrive, create a reassembly queue.
188		 */
189		first_frag = 1;
190		frag6_nfragpackets++;
191
192		/*
193		 * Enforce upper bound on number of fragmented packets
194		 * for which we attempt reassembly;
195		 * If maxfrag is 0, never accept fragments.
196		 * If maxfrag is -1, accept all fragments without limitation.
197		 */
198		if (frag6_nfragpackets >= (u_int)ip6_maxfragpackets) {
199			ip6stat.ip6s_fragoverflow++;
200			in6_ifstat_inc(dstifp, ifs6_reass_fail);
201			frag6_freef(ip6q.ip6q_prev);
202		}
203		q6 = (struct ip6q *)malloc(sizeof(struct ip6q), M_FTABLE,
204			M_DONTWAIT);
205		if (q6 == NULL)
206			goto dropfrag;
207
208		frag6_insque(q6, &ip6q);
209
210		q6->ip6q_down	= q6->ip6q_up = (struct ip6asfrag *)q6;
211		q6->ip6q_ident	= ip6f->ip6f_ident;
212		q6->ip6q_arrive = 0; /* Is it used anywhere? */
213		q6->ip6q_ttl 	= IPV6_FRAGTTL;
214		q6->ip6q_src	= ip6->ip6_src;
215		q6->ip6q_dst	= ip6->ip6_dst;
216		q6->ip6q_unfrglen = -1;	/* The 1st fragment has not arrived. */
217	}
218
219	/*
220	 * If it's the 1st fragment, record the length of the
221	 * unfragmentable part and the next header of the fragment header.
222	 */
223	fragoff = ntohs(ip6f->ip6f_offlg & IP6F_OFF_MASK);
224	if (fragoff == 0) {
225		q6->ip6q_unfrglen = offset - sizeof(struct ip6_hdr)
226			- sizeof(struct ip6_frag);
227		q6->ip6q_nxt = ip6f->ip6f_nxt;
228	}
229
230	/*
231	 * Check that the reassembled packet would not exceed 65535 bytes
232	 * in size.
233	 * If it would exceed, discard the fragment and return an ICMP error.
234	 */
235	frgpartlen =  sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen) - offset;
236	if (q6->ip6q_unfrglen >= 0) {
237		/* The 1st fragment has already arrived. */
238		if (q6->ip6q_unfrglen + fragoff + frgpartlen > IPV6_MAXPACKET) {
239			m->m_data -= offset;
240			m->m_len += offset;
241			icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER,
242				    offset - sizeof(struct ip6_frag) + 2);
243			return(IPPROTO_DONE);
244		}
245	}
246	else if (fragoff + frgpartlen > IPV6_MAXPACKET) {
247		m->m_data -= offset;
248		m->m_len += offset;
249		icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER,
250			    offset - sizeof(struct ip6_frag) + 2);
251		return(IPPROTO_DONE);
252	}
253	/*
254	 * If it's the first fragment, do the above check for each
255	 * fragment already stored in the reassembly queue.
256	 */
257	if (fragoff == 0) {
258		struct ip6asfrag *af6dwn;
259
260		for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6;
261		     af6 = af6dwn) {
262			af6dwn = af6->ip6af_down;
263
264			if (q6->ip6q_unfrglen + af6->ip6af_off + af6->ip6af_frglen >
265			    IPV6_MAXPACKET) {
266				struct mbuf *merr = IP6_REASS_MBUF(af6);
267				struct ip6_hdr *ip6err;
268				int erroff = af6->ip6af_offset;
269
270				/* dequeue the fragment. */
271				frag6_deq(af6);
272
273				/* adjust pointer. */
274				merr->m_data -= af6->ip6af_offset;
275				merr->m_len += af6->ip6af_offset;
276				ip6err = mtod(merr, struct ip6_hdr *);
277
278				/*
279				 * Restore source and destination addresses
280				 * in the erroneous IPv6 header.
281				 */
282				ip6err->ip6_src = q6->ip6q_src;
283				ip6err->ip6_dst = q6->ip6q_dst;
284
285				icmp6_error(merr, ICMP6_PARAM_PROB,
286					    ICMP6_PARAMPROB_HEADER,
287					    erroff - sizeof(struct ip6_frag) + 2);
288			}
289		}
290	}
291
292	/* Override the IPv6 header */
293	ip6af = (struct ip6asfrag *)ip6;
294	ip6af->ip6af_mff = ip6f->ip6f_offlg & IP6F_MORE_FRAG;
295	ip6af->ip6af_off = fragoff;
296	ip6af->ip6af_frglen = frgpartlen;
297	ip6af->ip6af_offset = offset;
298	IP6_REASS_MBUF(ip6af) = m;
299
300	if (first_frag) {
301		af6 = (struct ip6asfrag *)q6;
302		goto insert;
303	}
304
305	/*
306	 * Find a segment which begins after this one does.
307	 */
308	for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6;
309	     af6 = af6->ip6af_down)
310		if (af6->ip6af_off > ip6af->ip6af_off)
311			break;
312
313	/*
314	 * If the incoming framgent overlaps some existing fragments in
315	 * the reassembly queue, drop it, since it is dangerous to override
316	 * existing fragments from a security point of view.
317	 */
318	if (af6->ip6af_up != (struct ip6asfrag *)q6) {
319		i = af6->ip6af_up->ip6af_off + af6->ip6af_up->ip6af_frglen
320			- ip6af->ip6af_off;
321		if (i > 0) {
322			log(LOG_ERR, "%d bytes of a fragment from %s "
323			    "overlaps the previous fragment\n",
324			    i, ip6_sprintf(&q6->ip6q_src));
325			goto dropfrag;
326		}
327	}
328	if (af6 != (struct ip6asfrag *)q6) {
329		i = (ip6af->ip6af_off + ip6af->ip6af_frglen) - af6->ip6af_off;
330		if (i > 0) {
331			log(LOG_ERR, "%d bytes of a fragment from %s "
332			    "overlaps the succeeding fragment",
333			    i, ip6_sprintf(&q6->ip6q_src));
334			goto dropfrag;
335		}
336	}
337
338insert:
339
340	/*
341	 * Stick new segment in its place;
342	 * check for complete reassembly.
343	 * Move to front of packet queue, as we are
344	 * the most recently active fragmented packet.
345	 */
346	frag6_enq(ip6af, af6->ip6af_up);
347	next = 0;
348	for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6;
349	     af6 = af6->ip6af_down) {
350		if (af6->ip6af_off != next) {
351			frag6_doing_reass = 0;
352			return IPPROTO_DONE;
353		}
354		next += af6->ip6af_frglen;
355	}
356	if (af6->ip6af_up->ip6af_mff) {
357		frag6_doing_reass = 0;
358		return IPPROTO_DONE;
359	}
360
361	/*
362	 * Reassembly is complete; concatenate fragments.
363	 */
364
365	ip6af = q6->ip6q_down;
366	t = m = IP6_REASS_MBUF(ip6af);
367	af6 = ip6af->ip6af_down;
368	while (af6 != (struct ip6asfrag *)q6) {
369		while (t->m_next)
370			t = t->m_next;
371		t->m_next = IP6_REASS_MBUF(af6);
372		af6 = af6->ip6af_down;
373	}
374
375	/* adjust offset to point where the original next header starts */
376	offset = ip6af->ip6af_offset - sizeof(struct ip6_frag);
377	ip6 = (struct ip6_hdr *)ip6af;
378	ip6->ip6_plen = htons((u_short)next + offset - sizeof(struct ip6_hdr));
379	ip6->ip6_src = q6->ip6q_src;
380	ip6->ip6_dst = q6->ip6q_dst;
381	nxt = q6->ip6q_nxt;
382
383	/*
384	 * Delete frag6 header with as a few cost as possible.
385	 */
386
387	if (offset < m->m_len)
388		ovbcopy((caddr_t)ip6, (caddr_t)ip6 + sizeof(struct ip6_frag),
389			offset);
390	else {
391		ovbcopy(mtod(m, caddr_t), (caddr_t)ip6 + offset, m->m_len);
392		m->m_data -= sizeof(struct ip6_frag);
393	}
394	m->m_data -= offset;
395	m->m_len += offset;
396
397	/*
398	 * Store NXT to the original.
399	 */
400	{
401		char *prvnxtp = ip6_get_prevhdr(m, offset); /* XXX */
402		*prvnxtp = nxt;
403	}
404
405	frag6_remque(q6);
406	free(q6, M_FTABLE);
407	frag6_nfragpackets--;
408
409	if (m->m_flags & M_PKTHDR) { /* Isn't it always true? */
410		int plen = 0;
411		for (t = m; t; t = t->m_next)
412			plen += t->m_len;
413		m->m_pkthdr.len = plen;
414	}
415
416	ip6stat.ip6s_reassembled++;
417	in6_ifstat_inc(dstifp, ifs6_reass_ok);
418
419	/*
420	 * Tell launch routine the next header
421	 */
422
423	*mp = m;
424	*offp = offset;
425
426	frag6_doing_reass = 0;
427	return nxt;
428
429 dropfrag:
430	in6_ifstat_inc(dstifp, ifs6_reass_fail);
431	ip6stat.ip6s_fragdropped++;
432	m_freem(m);
433	return IPPROTO_DONE;
434}
435
436/*
437 * Free a fragment reassembly header and all
438 * associated datagrams.
439 */
440void
441frag6_freef(q6)
442	struct ip6q *q6;
443{
444	struct ip6asfrag *af6, *down6;
445
446	for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6;
447	     af6 = down6) {
448		struct mbuf *m = IP6_REASS_MBUF(af6);
449
450		down6 = af6->ip6af_down;
451		frag6_deq(af6);
452
453		/*
454		 * Return ICMP time exceeded error for the 1st fragment.
455		 * Just free other fragments.
456		 */
457		if (af6->ip6af_off == 0) {
458			struct ip6_hdr *ip6;
459
460			/* adjust pointer */
461			m->m_data -= af6->ip6af_offset;
462			m->m_len += af6->ip6af_offset;
463			ip6 = mtod(m, struct ip6_hdr *);
464
465			/* restoure source and destination addresses */
466			ip6->ip6_src = q6->ip6q_src;
467			ip6->ip6_dst = q6->ip6q_dst;
468
469			icmp6_error(m, ICMP6_TIME_EXCEEDED,
470				    ICMP6_TIME_EXCEED_REASSEMBLY, 0);
471		}
472		else
473			m_freem(m);
474	}
475	frag6_remque(q6);
476	free(q6, M_FTABLE);
477	frag6_nfragpackets--;
478}
479
480/*
481 * Put an ip fragment on a reassembly chain.
482 * Like insque, but pointers in middle of structure.
483 */
484void
485frag6_enq(af6, up6)
486	struct ip6asfrag *af6, *up6;
487{
488	af6->ip6af_up = up6;
489	af6->ip6af_down = up6->ip6af_down;
490	up6->ip6af_down->ip6af_up = af6;
491	up6->ip6af_down = af6;
492}
493
494/*
495 * To frag6_enq as remque is to insque.
496 */
497void
498frag6_deq(af6)
499	struct ip6asfrag *af6;
500{
501	af6->ip6af_up->ip6af_down = af6->ip6af_down;
502	af6->ip6af_down->ip6af_up = af6->ip6af_up;
503}
504
505void
506frag6_insque(new, old)
507	struct ip6q *new, *old;
508{
509	new->ip6q_prev = old;
510	new->ip6q_next = old->ip6q_next;
511	old->ip6q_next->ip6q_prev= new;
512	old->ip6q_next = new;
513}
514
515void
516frag6_remque(p6)
517	struct ip6q *p6;
518{
519	p6->ip6q_prev->ip6q_next = p6->ip6q_next;
520	p6->ip6q_next->ip6q_prev = p6->ip6q_prev;
521}
522
523/*
524 * IP timer processing;
525 * if a timer expires on a reassembly
526 * queue, discard it.
527 */
528void
529frag6_slowtimo()
530{
531	struct ip6q *q6;
532	int s = splnet();
533
534	frag6_doing_reass = 1;
535	q6 = ip6q.ip6q_next;
536	if (q6)
537		while (q6 != &ip6q) {
538			--q6->ip6q_ttl;
539			q6 = q6->ip6q_next;
540			if (q6->ip6q_prev->ip6q_ttl == 0) {
541				ip6stat.ip6s_fragtimeout++;
542				/* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */
543				frag6_freef(q6->ip6q_prev);
544			}
545		}
546	/*
547	 * If we are over the maximum number of fragments
548	 * (due to the limit being lowered), drain off
549	 * enough to get down to the new limit.
550	 */
551	while (frag6_nfragpackets > (u_int)ip6_maxfragpackets) {
552		ip6stat.ip6s_fragoverflow++;
553		/* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */
554		frag6_freef(ip6q.ip6q_prev);
555	}
556	frag6_doing_reass = 0;
557	splx(s);
558}
559
560/*
561 * Drain off all datagram fragments.
562 */
563void
564frag6_drain()
565{
566	if (frag6_doing_reass)
567		return;
568	while (ip6q.ip6q_next != &ip6q) {
569		ip6stat.ip6s_fragdropped++;
570		/* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */
571		frag6_freef(ip6q.ip6q_next);
572	}
573}
574