ip_reass.c revision 44677
1/*
2 * Copyright (c) 1982, 1986, 1988, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by the University of
16 *	California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 *	@(#)ip_input.c	8.2 (Berkeley) 1/4/94
34 *	$Id: ip_input.c,v 1.115 1999/02/22 18:19:57 des Exp $
35 */
36
37#define	_IP_VHL
38
39#include "opt_bootp.h"
40#include "opt_ipfw.h"
41#include "opt_ipdn.h"
42#include "opt_ipdivert.h"
43#include "opt_ipfilter.h"
44
45#include <stddef.h>
46
47#include <sys/param.h>
48#include <sys/systm.h>
49#include <sys/mbuf.h>
50#include <sys/malloc.h>
51#include <sys/domain.h>
52#include <sys/protosw.h>
53#include <sys/socket.h>
54#include <sys/time.h>
55#include <sys/kernel.h>
56#include <sys/syslog.h>
57#include <sys/sysctl.h>
58
59#include <net/if.h>
60#include <net/if_var.h>
61#include <net/if_dl.h>
62#include <net/route.h>
63#include <net/netisr.h>
64
65#include <netinet/in.h>
66#include <netinet/in_systm.h>
67#include <netinet/in_var.h>
68#include <netinet/ip.h>
69#include <netinet/in_pcb.h>
70#include <netinet/ip_var.h>
71#include <netinet/ip_icmp.h>
72#include <machine/in_cksum.h>
73
74#include <sys/socketvar.h>
75
76#ifdef IPFIREWALL
77#include <netinet/ip_fw.h>
78#endif
79
80#ifdef DUMMYNET
81#include <netinet/ip_dummynet.h>
82#endif
83
84int rsvp_on = 0;
85static int ip_rsvp_on;
86struct socket *ip_rsvpd;
87
88int	ipforwarding = 0;
89SYSCTL_INT(_net_inet_ip, IPCTL_FORWARDING, forwarding, CTLFLAG_RW,
90	&ipforwarding, 0, "");
91
92static int	ipsendredirects = 1; /* XXX */
93SYSCTL_INT(_net_inet_ip, IPCTL_SENDREDIRECTS, redirect, CTLFLAG_RW,
94	&ipsendredirects, 0, "");
95
96int	ip_defttl = IPDEFTTL;
97SYSCTL_INT(_net_inet_ip, IPCTL_DEFTTL, ttl, CTLFLAG_RW,
98	&ip_defttl, 0, "");
99
100static int	ip_dosourceroute = 0;
101SYSCTL_INT(_net_inet_ip, IPCTL_SOURCEROUTE, sourceroute, CTLFLAG_RW,
102	&ip_dosourceroute, 0, "");
103
104static int	ip_acceptsourceroute = 0;
105SYSCTL_INT(_net_inet_ip, IPCTL_ACCEPTSOURCEROUTE, accept_sourceroute,
106	CTLFLAG_RW, &ip_acceptsourceroute, 0, "");
107#ifdef DIAGNOSTIC
108static int	ipprintfs = 0;
109#endif
110
111extern	struct domain inetdomain;
112extern	struct protosw inetsw[];
113u_char	ip_protox[IPPROTO_MAX];
114static int	ipqmaxlen = IFQ_MAXLEN;
115struct	in_ifaddrhead in_ifaddrhead; /* first inet address */
116struct	ifqueue ipintrq;
117SYSCTL_INT(_net_inet_ip, IPCTL_INTRQMAXLEN, intr_queue_maxlen, CTLFLAG_RD,
118	&ipintrq.ifq_maxlen, 0, "");
119SYSCTL_INT(_net_inet_ip, IPCTL_INTRQDROPS, intr_queue_drops, CTLFLAG_RD,
120	&ipintrq.ifq_drops, 0, "");
121
122struct ipstat ipstat;
123SYSCTL_STRUCT(_net_inet_ip, IPCTL_STATS, stats, CTLFLAG_RD,
124	&ipstat, ipstat, "");
125
126/* Packet reassembly stuff */
127#define IPREASS_NHASH_LOG2      6
128#define IPREASS_NHASH           (1 << IPREASS_NHASH_LOG2)
129#define IPREASS_HMASK           (IPREASS_NHASH - 1)
130#define IPREASS_HASH(x,y) \
131	(((((x) & 0xF) | ((((x) >> 8) & 0xF) << 4)) ^ (y)) & IPREASS_HMASK)
132
133static struct ipq ipq[IPREASS_NHASH];
134static int    nipq = 0;         /* total # of reass queues */
135static int    maxnipq;
136
137#ifdef IPCTL_DEFMTU
138SYSCTL_INT(_net_inet_ip, IPCTL_DEFMTU, mtu, CTLFLAG_RW,
139	&ip_mtu, 0, "");
140#endif
141
142#ifdef IPSTEALTH
143static int	ipstealth = 0;
144SYSCTL_INT(_net_inet_ip, OID_AUTO, stealth, CTLFLAG_RW,
145    &ipstealth, 0, "");
146#endif
147
148#if !defined(COMPAT_IPFW) || COMPAT_IPFW == 1
149#undef COMPAT_IPFW
150#define COMPAT_IPFW 1
151#else
152#undef COMPAT_IPFW
153#endif
154
155#ifdef COMPAT_IPFW
156
157#include <netinet/ip_fw.h>
158
159/* Firewall hooks */
160ip_fw_chk_t *ip_fw_chk_ptr;
161ip_fw_ctl_t *ip_fw_ctl_ptr;
162
163#ifdef DUMMYNET
164ip_dn_ctl_t *ip_dn_ctl_ptr;
165#endif
166
167/* IP Network Address Translation (NAT) hooks */
168ip_nat_t *ip_nat_ptr;
169ip_nat_ctl_t *ip_nat_ctl_ptr;
170#endif
171
172#if defined(IPFILTER_LKM) || defined(IPFILTER)
173int iplattach __P((void));
174int (*fr_checkp) __P((struct ip *, int, struct ifnet *, int, struct mbuf **)) = NULL;
175#endif
176
177
178/*
179 * We need to save the IP options in case a protocol wants to respond
180 * to an incoming packet over the same route if the packet got here
181 * using IP source routing.  This allows connection establishment and
182 * maintenance when the remote end is on a network that is not known
183 * to us.
184 */
185static int	ip_nhops = 0;
186static	struct ip_srcrt {
187	struct	in_addr dst;			/* final destination */
188	char	nop;				/* one NOP to align */
189	char	srcopt[IPOPT_OFFSET + 1];	/* OPTVAL, OLEN and OFFSET */
190	struct	in_addr route[MAX_IPOPTLEN/sizeof(struct in_addr)];
191} ip_srcrt;
192
193#ifdef IPDIVERT
194/*
195 * Shared variable between ip_input() and ip_reass() to communicate
196 * about which packets, once assembled from fragments, get diverted,
197 * and to which port.
198 */
199static u_short	frag_divert_port;
200#endif
201
202struct sockaddr_in *ip_fw_fwd_addr;
203
204static void save_rte __P((u_char *, struct in_addr));
205static int	 ip_dooptions __P((struct mbuf *));
206static void	 ip_forward __P((struct mbuf *, int));
207static void	 ip_freef __P((struct ipq *));
208static struct ip *
209	 ip_reass __P((struct mbuf *, struct ipq *, struct ipq *));
210static struct in_ifaddr *
211	 ip_rtaddr __P((struct in_addr));
212static void	ipintr __P((void));
213/*
214 * IP initialization: fill in IP protocol switch table.
215 * All protocols not implemented in kernel go to raw IP protocol handler.
216 */
217void
218ip_init()
219{
220	register struct protosw *pr;
221	register int i;
222
223	TAILQ_INIT(&in_ifaddrhead);
224	pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
225	if (pr == 0)
226		panic("ip_init");
227	for (i = 0; i < IPPROTO_MAX; i++)
228		ip_protox[i] = pr - inetsw;
229	for (pr = inetdomain.dom_protosw;
230	    pr < inetdomain.dom_protoswNPROTOSW; pr++)
231		if (pr->pr_domain->dom_family == PF_INET &&
232		    pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW)
233			ip_protox[pr->pr_protocol] = pr - inetsw;
234
235	for (i = 0; i < IPREASS_NHASH; i++)
236	    ipq[i].next = ipq[i].prev = &ipq[i];
237
238	maxnipq = nmbclusters/4;
239
240	ip_id = time_second & 0xffff;
241	ipintrq.ifq_maxlen = ipqmaxlen;
242#ifdef DUMMYNET
243	ip_dn_init();
244#endif
245#ifdef IPNAT
246        ip_nat_init();
247#endif
248#ifdef IPFILTER
249        iplattach();
250#endif
251
252}
253
254static struct	sockaddr_in ipaddr = { sizeof(ipaddr), AF_INET };
255static struct	route ipforward_rt;
256
257/*
258 * Ip input routine.  Checksum and byte swap header.  If fragmented
259 * try to reassemble.  Process options.  Pass to next level.
260 */
261void
262ip_input(struct mbuf *m)
263{
264	struct ip *ip;
265	struct ipq *fp;
266	struct in_ifaddr *ia;
267	int    i, hlen, mff;
268	u_short sum;
269#ifndef IPDIVERT /* dummy variable for the firewall code to play with */
270        u_short ip_divert_cookie = 0 ;
271#endif
272#ifdef COMPAT_IPFW
273	struct ip_fw_chain *rule = NULL ;
274#endif
275
276#if defined(IPFIREWALL) && defined(DUMMYNET)
277        /*
278         * dummynet packet are prepended a vestigial mbuf with
279         * m_type = MT_DUMMYNET and m_data pointing to the matching
280         * rule.
281         */
282        if (m->m_type == MT_DUMMYNET) {
283            struct mbuf *m0 = m ;
284            rule = (struct ip_fw_chain *)(m->m_data) ;
285            m = m->m_next ;
286            free(m0, M_IPFW);
287            ip = mtod(m, struct ip *);
288            hlen = IP_VHL_HL(ip->ip_vhl) << 2;
289            goto iphack ;
290        } else
291            rule = NULL ;
292#endif
293
294#ifdef	DIAGNOSTIC
295	if (m == NULL || (m->m_flags & M_PKTHDR) == 0)
296		panic("ip_input no HDR");
297#endif
298	ipstat.ips_total++;
299
300	if (m->m_pkthdr.len < sizeof(struct ip))
301		goto tooshort;
302
303	if (m->m_len < sizeof (struct ip) &&
304	    (m = m_pullup(m, sizeof (struct ip))) == 0) {
305		ipstat.ips_toosmall++;
306		return;
307	}
308	ip = mtod(m, struct ip *);
309
310	if (IP_VHL_V(ip->ip_vhl) != IPVERSION) {
311		ipstat.ips_badvers++;
312		goto bad;
313	}
314
315	hlen = IP_VHL_HL(ip->ip_vhl) << 2;
316	if (hlen < sizeof(struct ip)) {	/* minimum header length */
317		ipstat.ips_badhlen++;
318		goto bad;
319	}
320	if (hlen > m->m_len) {
321		if ((m = m_pullup(m, hlen)) == 0) {
322			ipstat.ips_badhlen++;
323			return;
324		}
325		ip = mtod(m, struct ip *);
326	}
327	if (hlen == sizeof(struct ip)) {
328		sum = in_cksum_hdr(ip);
329	} else {
330		sum = in_cksum(m, hlen);
331	}
332	if (sum) {
333		ipstat.ips_badsum++;
334		goto bad;
335	}
336
337	/*
338	 * Convert fields to host representation.
339	 */
340	NTOHS(ip->ip_len);
341	if (ip->ip_len < hlen) {
342		ipstat.ips_badlen++;
343		goto bad;
344	}
345	NTOHS(ip->ip_id);
346	NTOHS(ip->ip_off);
347
348	/*
349	 * Check that the amount of data in the buffers
350	 * is as at least much as the IP header would have us expect.
351	 * Trim mbufs if longer than we expect.
352	 * Drop packet if shorter than we expect.
353	 */
354	if (m->m_pkthdr.len < ip->ip_len) {
355tooshort:
356		ipstat.ips_tooshort++;
357		goto bad;
358	}
359	if (m->m_pkthdr.len > ip->ip_len) {
360		if (m->m_len == m->m_pkthdr.len) {
361			m->m_len = ip->ip_len;
362			m->m_pkthdr.len = ip->ip_len;
363		} else
364			m_adj(m, ip->ip_len - m->m_pkthdr.len);
365	}
366	/*
367	 * IpHack's section.
368	 * Right now when no processing on packet has done
369	 * and it is still fresh out of network we do our black
370	 * deals with it.
371	 * - Firewall: deny/allow/divert
372	 * - Xlate: translate packet's addr/port (NAT).
373	 * - Pipe: pass pkt through dummynet.
374	 * - Wrap: fake packet's addr/port <unimpl.>
375	 * - Encapsulate: put it in another IP and send out. <unimp.>
376 	 */
377
378#if defined(IPFIREWALL) && defined(DUMMYNET)
379iphack:
380#endif
381#if defined(IPFILTER) || defined(IPFILTER_LKM)
382	/*
383	 * Check if we want to allow this packet to be processed.
384	 * Consider it to be bad if not.
385	 */
386	if (fr_checkp) {
387		struct	mbuf	*m1 = m;
388
389		if ((*fr_checkp)(ip, hlen, m->m_pkthdr.rcvif, 0, &m1) || !m1)
390			return;
391		ip = mtod(m = m1, struct ip *);
392	}
393#endif
394#ifdef COMPAT_IPFW
395	if (ip_fw_chk_ptr) {
396#ifdef IPFIREWALL_FORWARD
397		/*
398		 * If we've been forwarded from the output side, then
399		 * skip the firewall a second time
400		 */
401		if (ip_fw_fwd_addr)
402			goto ours;
403#endif	/* IPFIREWALL_FORWARD */
404		i = (*ip_fw_chk_ptr)(&ip, hlen, NULL, &ip_divert_cookie,
405					&m, &rule, &ip_fw_fwd_addr);
406		/*
407		 * see the comment in ip_output for the return values
408		 * produced by the firewall.
409		 */
410		if (!m) /* packet discarded by firewall */
411			return ;
412		if (i == 0 && ip_fw_fwd_addr == NULL) /* common case */
413			goto pass ;
414#ifdef DUMMYNET
415                if (i & 0x10000) {
416                        /* send packet to the appropriate pipe */
417                        dummynet_io(i&0xffff,DN_TO_IP_IN,m,NULL,NULL,0, rule);
418			return ;
419		}
420#endif
421#ifdef IPDIVERT
422		if (i > 0 && i < 0x10000) {
423			/* Divert packet */
424			frag_divert_port = i & 0xffff ;
425			goto ours;
426		}
427#endif
428#ifdef IPFIREWALL_FORWARD
429		if (i == 0 && ip_fw_fwd_addr != NULL)
430			goto pass ;
431#endif
432		/*
433		 * if we get here, the packet must be dropped
434		 */
435			m_freem(m);
436			return;
437	}
438pass:
439
440        if (ip_nat_ptr && !(*ip_nat_ptr)(&ip, &m, m->m_pkthdr.rcvif, IP_NAT_IN)) {
441#ifdef IPFIREWALL_FORWARD
442		ip_fw_fwd_addr = NULL;
443#endif
444		return;
445	}
446#endif	/* !COMPAT_IPFW */
447
448	/*
449	 * Process options and, if not destined for us,
450	 * ship it on.  ip_dooptions returns 1 when an
451	 * error was detected (causing an icmp message
452	 * to be sent and the original packet to be freed).
453	 */
454	ip_nhops = 0;		/* for source routed packets */
455	if (hlen > sizeof (struct ip) && ip_dooptions(m)) {
456#ifdef IPFIREWALL_FORWARD
457		ip_fw_fwd_addr = NULL;
458#endif
459		return;
460	}
461
462        /* greedy RSVP, snatches any PATH packet of the RSVP protocol and no
463         * matter if it is destined to another node, or whether it is
464         * a multicast one, RSVP wants it! and prevents it from being forwarded
465         * anywhere else. Also checks if the rsvp daemon is running before
466	 * grabbing the packet.
467         */
468	if (rsvp_on && ip->ip_p==IPPROTO_RSVP)
469		goto ours;
470
471	/*
472	 * Check our list of addresses, to see if the packet is for us.
473	 * If we don't have any addresses, assume any unicast packet
474	 * we receive might be for us (and let the upper layers deal
475	 * with it).
476	 */
477	if (TAILQ_EMPTY(&in_ifaddrhead) &&
478	    (m->m_flags & (M_MCAST|M_BCAST)) == 0)
479		goto ours;
480
481	for (ia = TAILQ_FIRST(&in_ifaddrhead); ia;
482					ia = TAILQ_NEXT(ia, ia_link)) {
483#define	satosin(sa)	((struct sockaddr_in *)(sa))
484
485#ifdef BOOTP_COMPAT
486		if (IA_SIN(ia)->sin_addr.s_addr == INADDR_ANY)
487			goto ours;
488#endif
489#ifdef IPFIREWALL_FORWARD
490		/*
491		 * If the addr to forward to is one of ours, we pretend to
492		 * be the destination for this packet.
493		 */
494		if (ip_fw_fwd_addr == NULL) {
495			if (IA_SIN(ia)->sin_addr.s_addr == ip->ip_dst.s_addr)
496				goto ours;
497		} else if (IA_SIN(ia)->sin_addr.s_addr ==
498					 ip_fw_fwd_addr->sin_addr.s_addr)
499			goto ours;
500#else
501		if (IA_SIN(ia)->sin_addr.s_addr == ip->ip_dst.s_addr)
502			goto ours;
503#endif
504		if (ia->ia_ifp && ia->ia_ifp->if_flags & IFF_BROADCAST) {
505			if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr ==
506			    ip->ip_dst.s_addr)
507				goto ours;
508			if (ip->ip_dst.s_addr == ia->ia_netbroadcast.s_addr)
509				goto ours;
510		}
511	}
512	if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
513		struct in_multi *inm;
514		if (ip_mrouter) {
515			/*
516			 * If we are acting as a multicast router, all
517			 * incoming multicast packets are passed to the
518			 * kernel-level multicast forwarding function.
519			 * The packet is returned (relatively) intact; if
520			 * ip_mforward() returns a non-zero value, the packet
521			 * must be discarded, else it may be accepted below.
522			 *
523			 * (The IP ident field is put in the same byte order
524			 * as expected when ip_mforward() is called from
525			 * ip_output().)
526			 */
527			ip->ip_id = htons(ip->ip_id);
528			if (ip_mforward(ip, m->m_pkthdr.rcvif, m, 0) != 0) {
529				ipstat.ips_cantforward++;
530				m_freem(m);
531				return;
532			}
533			ip->ip_id = ntohs(ip->ip_id);
534
535			/*
536			 * The process-level routing demon needs to receive
537			 * all multicast IGMP packets, whether or not this
538			 * host belongs to their destination groups.
539			 */
540			if (ip->ip_p == IPPROTO_IGMP)
541				goto ours;
542			ipstat.ips_forward++;
543		}
544		/*
545		 * See if we belong to the destination multicast group on the
546		 * arrival interface.
547		 */
548		IN_LOOKUP_MULTI(ip->ip_dst, m->m_pkthdr.rcvif, inm);
549		if (inm == NULL) {
550			ipstat.ips_notmember++;
551			m_freem(m);
552			return;
553		}
554		goto ours;
555	}
556	if (ip->ip_dst.s_addr == (u_long)INADDR_BROADCAST)
557		goto ours;
558	if (ip->ip_dst.s_addr == INADDR_ANY)
559		goto ours;
560
561	/*
562	 * Not for us; forward if possible and desirable.
563	 */
564	if (ipforwarding == 0) {
565		ipstat.ips_cantforward++;
566		m_freem(m);
567	} else
568		ip_forward(m, 0);
569#ifdef IPFIREWALL_FORWARD
570	ip_fw_fwd_addr = NULL;
571#endif
572	return;
573
574ours:
575
576	/*
577	 * If offset or IP_MF are set, must reassemble.
578	 * Otherwise, nothing need be done.
579	 * (We could look in the reassembly queue to see
580	 * if the packet was previously fragmented,
581	 * but it's not worth the time; just let them time out.)
582	 */
583	if (ip->ip_off & (IP_MF | IP_OFFMASK | IP_RF)) {
584		if (m->m_flags & M_EXT) {		/* XXX */
585			if ((m = m_pullup(m, hlen)) == 0) {
586				ipstat.ips_toosmall++;
587#ifdef IPDIVERT
588				frag_divert_port = 0;
589				ip_divert_cookie = 0;
590#endif
591#ifdef IPFIREWALL_FORWARD
592				ip_fw_fwd_addr = NULL;
593#endif
594				return;
595			}
596			ip = mtod(m, struct ip *);
597		}
598		sum = IPREASS_HASH(ip->ip_src.s_addr, ip->ip_id);
599		/*
600		 * Look for queue of fragments
601		 * of this datagram.
602		 */
603		for (fp = ipq[sum].next; fp != &ipq[sum]; fp = fp->next)
604			if (ip->ip_id == fp->ipq_id &&
605			    ip->ip_src.s_addr == fp->ipq_src.s_addr &&
606			    ip->ip_dst.s_addr == fp->ipq_dst.s_addr &&
607			    ip->ip_p == fp->ipq_p)
608				goto found;
609
610		fp = 0;
611
612		/* check if there's a place for the new queue */
613		if (nipq > maxnipq) {
614		    /*
615		     * drop something from the tail of the current queue
616		     * before proceeding further
617		     */
618		    if (ipq[sum].prev == &ipq[sum]) {   /* gak */
619			for (i = 0; i < IPREASS_NHASH; i++) {
620			    if (ipq[i].prev != &ipq[i]) {
621				ip_freef(ipq[i].prev);
622				break;
623			    }
624			}
625		    } else
626			ip_freef(ipq[sum].prev);
627		}
628found:
629		/*
630		 * Adjust ip_len to not reflect header,
631		 * set ip_mff if more fragments are expected,
632		 * convert offset of this to bytes.
633		 */
634		ip->ip_len -= hlen;
635		mff = (ip->ip_off & IP_MF) != 0;
636		if (mff) {
637		        /*
638		         * Make sure that fragments have a data length
639			 * that's a non-zero multiple of 8 bytes.
640		         */
641			if (ip->ip_len == 0 || (ip->ip_len & 0x7) != 0) {
642				ipstat.ips_toosmall++; /* XXX */
643				goto bad;
644			}
645			m->m_flags |= M_FRAG;
646		}
647		ip->ip_off <<= 3;
648
649		/*
650		 * If datagram marked as having more fragments
651		 * or if this is not the first fragment,
652		 * attempt reassembly; if it succeeds, proceed.
653		 */
654		if (mff || ip->ip_off) {
655			ipstat.ips_fragments++;
656			m->m_pkthdr.header = ip;
657			ip = ip_reass(m, fp, &ipq[sum]);
658			if (ip == 0) {
659#ifdef	IPFIREWALL_FORWARD
660				ip_fw_fwd_addr = NULL;
661#endif
662				return;
663			}
664			/* Get the length of the reassembled packets header */
665			hlen = IP_VHL_HL(ip->ip_vhl) << 2;
666			ipstat.ips_reassembled++;
667			m = dtom(ip);
668#ifdef IPDIVERT
669			if (frag_divert_port) {
670				ip->ip_len += hlen;
671				HTONS(ip->ip_len);
672				HTONS(ip->ip_off);
673				HTONS(ip->ip_id);
674				ip->ip_sum = 0;
675				ip->ip_sum = in_cksum_hdr(ip);
676				NTOHS(ip->ip_id);
677				NTOHS(ip->ip_off);
678				NTOHS(ip->ip_len);
679				ip->ip_len -= hlen;
680			}
681#endif
682		} else
683			if (fp)
684				ip_freef(fp);
685	} else
686		ip->ip_len -= hlen;
687
688#ifdef IPDIVERT
689	/*
690	 * Divert reassembled packets to the divert protocol if required
691	 *  If divert port is null then cookie should be too,
692	 * so we shouldn't need to clear them here. Assume ip_divert does so.
693	 */
694	if (frag_divert_port) {
695		ipstat.ips_delivered++;
696		ip_divert_port = frag_divert_port;
697		frag_divert_port = 0;
698		(*inetsw[ip_protox[IPPROTO_DIVERT]].pr_input)(m, hlen);
699		return;
700	}
701
702	/* Don't let packets divert themselves */
703	if (ip->ip_p == IPPROTO_DIVERT) {
704		ipstat.ips_noproto++;
705		goto bad;
706	}
707
708#endif
709
710	/*
711	 * Switch out to protocol's input routine.
712	 */
713	ipstat.ips_delivered++;
714	(*inetsw[ip_protox[ip->ip_p]].pr_input)(m, hlen);
715#ifdef	IPFIREWALL_FORWARD
716	ip_fw_fwd_addr = NULL;	/* tcp needed it */
717#endif
718	return;
719bad:
720#ifdef	IPFIREWALL_FORWARD
721	ip_fw_fwd_addr = NULL;
722#endif
723	m_freem(m);
724}
725
726/*
727 * IP software interrupt routine - to go away sometime soon
728 */
729static void
730ipintr(void)
731{
732	int s;
733	struct mbuf *m;
734
735	while(1) {
736		s = splimp();
737		IF_DEQUEUE(&ipintrq, m);
738		splx(s);
739		if (m == 0)
740			return;
741		ip_input(m);
742	}
743}
744
745NETISR_SET(NETISR_IP, ipintr);
746
747/*
748 * Take incoming datagram fragment and try to
749 * reassemble it into whole datagram.  If a chain for
750 * reassembly of this datagram already exists, then it
751 * is given as fp; otherwise have to make a chain.
752 */
753static struct ip *
754ip_reass(m, fp, where)
755	register struct mbuf *m;
756	register struct ipq *fp;
757	struct   ipq    *where;
758{
759	struct ip *ip = mtod(m, struct ip *);
760	register struct mbuf *p = 0, *q, *nq;
761	struct mbuf *t;
762	int hlen = IP_VHL_HL(ip->ip_vhl) << 2;
763	int i, next;
764
765	/*
766	 * Presence of header sizes in mbufs
767	 * would confuse code below.
768	 */
769	m->m_data += hlen;
770	m->m_len -= hlen;
771
772	/*
773	 * If first fragment to arrive, create a reassembly queue.
774	 */
775	if (fp == 0) {
776		if ((t = m_get(M_DONTWAIT, MT_FTABLE)) == NULL)
777			goto dropfrag;
778		fp = mtod(t, struct ipq *);
779		insque(fp, where);
780		nipq++;
781		fp->ipq_ttl = IPFRAGTTL;
782		fp->ipq_p = ip->ip_p;
783		fp->ipq_id = ip->ip_id;
784		fp->ipq_src = ip->ip_src;
785		fp->ipq_dst = ip->ip_dst;
786		fp->ipq_frags = m;
787		m->m_nextpkt = NULL;
788#ifdef IPDIVERT
789		fp->ipq_divert = 0;
790		fp->ipq_div_cookie = 0;
791#endif
792		goto inserted;
793	}
794
795#define GETIP(m)	((struct ip*)((m)->m_pkthdr.header))
796
797	/*
798	 * Find a segment which begins after this one does.
799	 */
800	for (p = NULL, q = fp->ipq_frags; q; p = q, q = q->m_nextpkt)
801		if (GETIP(q)->ip_off > ip->ip_off)
802			break;
803
804	/*
805	 * If there is a preceding segment, it may provide some of
806	 * our data already.  If so, drop the data from the incoming
807	 * segment.  If it provides all of our data, drop us, otherwise
808	 * stick new segment in the proper place.
809	 */
810	if (p) {
811		i = GETIP(p)->ip_off + GETIP(p)->ip_len - ip->ip_off;
812		if (i > 0) {
813			if (i >= ip->ip_len)
814				goto dropfrag;
815			m_adj(dtom(ip), i);
816			ip->ip_off += i;
817			ip->ip_len -= i;
818		}
819		m->m_nextpkt = p->m_nextpkt;
820		p->m_nextpkt = m;
821	} else {
822		m->m_nextpkt = fp->ipq_frags;
823		fp->ipq_frags = m;
824	}
825
826	/*
827	 * While we overlap succeeding segments trim them or,
828	 * if they are completely covered, dequeue them.
829	 */
830	for (; q != NULL && ip->ip_off + ip->ip_len > GETIP(q)->ip_off;
831	     q = nq) {
832		i = (ip->ip_off + ip->ip_len) -
833		    GETIP(q)->ip_off;
834		if (i < GETIP(q)->ip_len) {
835			GETIP(q)->ip_len -= i;
836			GETIP(q)->ip_off += i;
837			m_adj(q, i);
838			break;
839		}
840		nq = q->m_nextpkt;
841		m->m_nextpkt = nq;
842		m_freem(q);
843	}
844
845inserted:
846
847#ifdef IPDIVERT
848	/*
849	 * Any fragment diverting causes the whole packet to divert
850	 */
851	if (frag_divert_port) {
852		fp->ipq_divert = frag_divert_port;
853		fp->ipq_div_cookie = ip_divert_cookie;
854	}
855	frag_divert_port = 0;
856	ip_divert_cookie = 0;
857#endif
858
859	/*
860	 * Check for complete reassembly.
861	 */
862	next = 0;
863	for (p = NULL, q = fp->ipq_frags; q; p = q, q = q->m_nextpkt) {
864		if (GETIP(q)->ip_off != next)
865			return (0);
866		next += GETIP(q)->ip_len;
867	}
868	/* Make sure the last packet didn't have the IP_MF flag */
869	if (p->m_flags & M_FRAG)
870		return (0);
871
872	/*
873	 * Reassembly is complete.  Make sure the packet is a sane size.
874	 */
875	q = fp->ipq_frags;
876	ip = GETIP(q);
877	if (next + (IP_VHL_HL(ip->ip_vhl) << 2) > IP_MAXPACKET) {
878		ipstat.ips_toolong++;
879		ip_freef(fp);
880		return (0);
881	}
882
883	/*
884	 * Concatenate fragments.
885	 */
886	m = q;
887	t = m->m_next;
888	m->m_next = 0;
889	m_cat(m, t);
890	nq = q->m_nextpkt;
891	q->m_nextpkt = 0;
892	for (q = nq; q != NULL; q = nq) {
893		nq = q->m_nextpkt;
894		q->m_nextpkt = NULL;
895		m_cat(m, q);
896	}
897
898#ifdef IPDIVERT
899	/*
900	 * extract divert port for packet, if any
901	 */
902	frag_divert_port = fp->ipq_divert;
903	ip_divert_cookie = fp->ipq_div_cookie;
904#endif
905
906	/*
907	 * Create header for new ip packet by
908	 * modifying header of first packet;
909	 * dequeue and discard fragment reassembly header.
910	 * Make header visible.
911	 */
912	ip->ip_len = next;
913	ip->ip_src = fp->ipq_src;
914	ip->ip_dst = fp->ipq_dst;
915	remque(fp);
916	nipq--;
917	(void) m_free(dtom(fp));
918	m->m_len += (IP_VHL_HL(ip->ip_vhl) << 2);
919	m->m_data -= (IP_VHL_HL(ip->ip_vhl) << 2);
920	/* some debugging cruft by sklower, below, will go away soon */
921	if (m->m_flags & M_PKTHDR) { /* XXX this should be done elsewhere */
922		register int plen = 0;
923		for (t = m; m; m = m->m_next)
924			plen += m->m_len;
925		t->m_pkthdr.len = plen;
926	}
927	return (ip);
928
929dropfrag:
930#ifdef IPDIVERT
931	frag_divert_port = 0;
932	ip_divert_cookie = 0;
933#endif
934	ipstat.ips_fragdropped++;
935	m_freem(m);
936	return (0);
937
938#undef GETIP
939}
940
941/*
942 * Free a fragment reassembly header and all
943 * associated datagrams.
944 */
945static void
946ip_freef(fp)
947	struct ipq *fp;
948{
949	register struct mbuf *q;
950
951	while (fp->ipq_frags) {
952		q = fp->ipq_frags;
953		fp->ipq_frags = q->m_nextpkt;
954		m_freem(q);
955	}
956	remque(fp);
957	(void) m_free(dtom(fp));
958	nipq--;
959}
960
961/*
962 * IP timer processing;
963 * if a timer expires on a reassembly
964 * queue, discard it.
965 */
966void
967ip_slowtimo()
968{
969	register struct ipq *fp;
970	int s = splnet();
971	int i;
972
973	for (i = 0; i < IPREASS_NHASH; i++) {
974		fp = ipq[i].next;
975		if (fp == 0)
976			continue;
977		while (fp != &ipq[i]) {
978			--fp->ipq_ttl;
979			fp = fp->next;
980			if (fp->prev->ipq_ttl == 0) {
981				ipstat.ips_fragtimeout++;
982				ip_freef(fp->prev);
983			}
984		}
985	}
986	ipflow_slowtimo();
987	splx(s);
988}
989
990/*
991 * Drain off all datagram fragments.
992 */
993void
994ip_drain()
995{
996	int     i;
997
998	for (i = 0; i < IPREASS_NHASH; i++) {
999		while (ipq[i].next != &ipq[i]) {
1000			ipstat.ips_fragdropped++;
1001			ip_freef(ipq[i].next);
1002		}
1003	}
1004	in_rtqdrain();
1005}
1006
1007/*
1008 * Do option processing on a datagram,
1009 * possibly discarding it if bad options are encountered,
1010 * or forwarding it if source-routed.
1011 * Returns 1 if packet has been forwarded/freed,
1012 * 0 if the packet should be processed further.
1013 */
1014static int
1015ip_dooptions(m)
1016	struct mbuf *m;
1017{
1018	register struct ip *ip = mtod(m, struct ip *);
1019	register u_char *cp;
1020	register struct ip_timestamp *ipt;
1021	register struct in_ifaddr *ia;
1022	int opt, optlen, cnt, off, code, type = ICMP_PARAMPROB, forward = 0;
1023	struct in_addr *sin, dst;
1024	n_time ntime;
1025
1026	dst = ip->ip_dst;
1027	cp = (u_char *)(ip + 1);
1028	cnt = (IP_VHL_HL(ip->ip_vhl) << 2) - sizeof (struct ip);
1029	for (; cnt > 0; cnt -= optlen, cp += optlen) {
1030		opt = cp[IPOPT_OPTVAL];
1031		if (opt == IPOPT_EOL)
1032			break;
1033		if (opt == IPOPT_NOP)
1034			optlen = 1;
1035		else {
1036			optlen = cp[IPOPT_OLEN];
1037			if (optlen <= 0 || optlen > cnt) {
1038				code = &cp[IPOPT_OLEN] - (u_char *)ip;
1039				goto bad;
1040			}
1041		}
1042		switch (opt) {
1043
1044		default:
1045			break;
1046
1047		/*
1048		 * Source routing with record.
1049		 * Find interface with current destination address.
1050		 * If none on this machine then drop if strictly routed,
1051		 * or do nothing if loosely routed.
1052		 * Record interface address and bring up next address
1053		 * component.  If strictly routed make sure next
1054		 * address is on directly accessible net.
1055		 */
1056		case IPOPT_LSRR:
1057		case IPOPT_SSRR:
1058			if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) {
1059				code = &cp[IPOPT_OFFSET] - (u_char *)ip;
1060				goto bad;
1061			}
1062			ipaddr.sin_addr = ip->ip_dst;
1063			ia = (struct in_ifaddr *)
1064				ifa_ifwithaddr((struct sockaddr *)&ipaddr);
1065			if (ia == 0) {
1066				if (opt == IPOPT_SSRR) {
1067					type = ICMP_UNREACH;
1068					code = ICMP_UNREACH_SRCFAIL;
1069					goto bad;
1070				}
1071				if (!ip_dosourceroute)
1072					goto nosourcerouting;
1073				/*
1074				 * Loose routing, and not at next destination
1075				 * yet; nothing to do except forward.
1076				 */
1077				break;
1078			}
1079			off--;			/* 0 origin */
1080			if (off > optlen - sizeof(struct in_addr)) {
1081				/*
1082				 * End of source route.  Should be for us.
1083				 */
1084				if (!ip_acceptsourceroute)
1085					goto nosourcerouting;
1086				save_rte(cp, ip->ip_src);
1087				break;
1088			}
1089
1090			if (!ip_dosourceroute) {
1091				if (ipforwarding) {
1092					char buf[16]; /* aaa.bbb.ccc.ddd\0 */
1093					/*
1094					 * Acting as a router, so generate ICMP
1095					 */
1096nosourcerouting:
1097					strcpy(buf, inet_ntoa(ip->ip_dst));
1098					log(LOG_WARNING,
1099					    "attempted source route from %s to %s\n",
1100					    inet_ntoa(ip->ip_src), buf);
1101					type = ICMP_UNREACH;
1102					code = ICMP_UNREACH_SRCFAIL;
1103					goto bad;
1104				} else {
1105					/*
1106					 * Not acting as a router, so silently drop.
1107					 */
1108					ipstat.ips_cantforward++;
1109					m_freem(m);
1110					return (1);
1111				}
1112			}
1113
1114			/*
1115			 * locate outgoing interface
1116			 */
1117			(void)memcpy(&ipaddr.sin_addr, cp + off,
1118			    sizeof(ipaddr.sin_addr));
1119
1120			if (opt == IPOPT_SSRR) {
1121#define	INA	struct in_ifaddr *
1122#define	SA	struct sockaddr *
1123			    if ((ia = (INA)ifa_ifwithdstaddr((SA)&ipaddr)) == 0)
1124				ia = (INA)ifa_ifwithnet((SA)&ipaddr);
1125			} else
1126				ia = ip_rtaddr(ipaddr.sin_addr);
1127			if (ia == 0) {
1128				type = ICMP_UNREACH;
1129				code = ICMP_UNREACH_SRCFAIL;
1130				goto bad;
1131			}
1132			ip->ip_dst = ipaddr.sin_addr;
1133			(void)memcpy(cp + off, &(IA_SIN(ia)->sin_addr),
1134			    sizeof(struct in_addr));
1135			cp[IPOPT_OFFSET] += sizeof(struct in_addr);
1136			/*
1137			 * Let ip_intr's mcast routing check handle mcast pkts
1138			 */
1139			forward = !IN_MULTICAST(ntohl(ip->ip_dst.s_addr));
1140			break;
1141
1142		case IPOPT_RR:
1143			if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) {
1144				code = &cp[IPOPT_OFFSET] - (u_char *)ip;
1145				goto bad;
1146			}
1147			/*
1148			 * If no space remains, ignore.
1149			 */
1150			off--;			/* 0 origin */
1151			if (off > optlen - sizeof(struct in_addr))
1152				break;
1153			(void)memcpy(&ipaddr.sin_addr, &ip->ip_dst,
1154			    sizeof(ipaddr.sin_addr));
1155			/*
1156			 * locate outgoing interface; if we're the destination,
1157			 * use the incoming interface (should be same).
1158			 */
1159			if ((ia = (INA)ifa_ifwithaddr((SA)&ipaddr)) == 0 &&
1160			    (ia = ip_rtaddr(ipaddr.sin_addr)) == 0) {
1161				type = ICMP_UNREACH;
1162				code = ICMP_UNREACH_HOST;
1163				goto bad;
1164			}
1165			(void)memcpy(cp + off, &(IA_SIN(ia)->sin_addr),
1166			    sizeof(struct in_addr));
1167			cp[IPOPT_OFFSET] += sizeof(struct in_addr);
1168			break;
1169
1170		case IPOPT_TS:
1171			code = cp - (u_char *)ip;
1172			ipt = (struct ip_timestamp *)cp;
1173			if (ipt->ipt_len < 5)
1174				goto bad;
1175			if (ipt->ipt_ptr > ipt->ipt_len - sizeof(int32_t)) {
1176				if (++ipt->ipt_oflw == 0)
1177					goto bad;
1178				break;
1179			}
1180			sin = (struct in_addr *)(cp + ipt->ipt_ptr - 1);
1181			switch (ipt->ipt_flg) {
1182
1183			case IPOPT_TS_TSONLY:
1184				break;
1185
1186			case IPOPT_TS_TSANDADDR:
1187				if (ipt->ipt_ptr - 1 + sizeof(n_time) +
1188				    sizeof(struct in_addr) > ipt->ipt_len)
1189					goto bad;
1190				ipaddr.sin_addr = dst;
1191				ia = (INA)ifaof_ifpforaddr((SA)&ipaddr,
1192							    m->m_pkthdr.rcvif);
1193				if (ia == 0)
1194					continue;
1195				(void)memcpy(sin, &IA_SIN(ia)->sin_addr,
1196				    sizeof(struct in_addr));
1197				ipt->ipt_ptr += sizeof(struct in_addr);
1198				break;
1199
1200			case IPOPT_TS_PRESPEC:
1201				if (ipt->ipt_ptr - 1 + sizeof(n_time) +
1202				    sizeof(struct in_addr) > ipt->ipt_len)
1203					goto bad;
1204				(void)memcpy(&ipaddr.sin_addr, sin,
1205				    sizeof(struct in_addr));
1206				if (ifa_ifwithaddr((SA)&ipaddr) == 0)
1207					continue;
1208				ipt->ipt_ptr += sizeof(struct in_addr);
1209				break;
1210
1211			default:
1212				goto bad;
1213			}
1214			ntime = iptime();
1215			(void)memcpy(cp + ipt->ipt_ptr - 1, &ntime,
1216			    sizeof(n_time));
1217			ipt->ipt_ptr += sizeof(n_time);
1218		}
1219	}
1220	if (forward && ipforwarding) {
1221		ip_forward(m, 1);
1222		return (1);
1223	}
1224	return (0);
1225bad:
1226	ip->ip_len -= IP_VHL_HL(ip->ip_vhl) << 2;   /* XXX icmp_error adds in hdr length */
1227	icmp_error(m, type, code, 0, 0);
1228	ipstat.ips_badoptions++;
1229	return (1);
1230}
1231
1232/*
1233 * Given address of next destination (final or next hop),
1234 * return internet address info of interface to be used to get there.
1235 */
1236static struct in_ifaddr *
1237ip_rtaddr(dst)
1238	 struct in_addr dst;
1239{
1240	register struct sockaddr_in *sin;
1241
1242	sin = (struct sockaddr_in *) &ipforward_rt.ro_dst;
1243
1244	if (ipforward_rt.ro_rt == 0 || dst.s_addr != sin->sin_addr.s_addr) {
1245		if (ipforward_rt.ro_rt) {
1246			RTFREE(ipforward_rt.ro_rt);
1247			ipforward_rt.ro_rt = 0;
1248		}
1249		sin->sin_family = AF_INET;
1250		sin->sin_len = sizeof(*sin);
1251		sin->sin_addr = dst;
1252
1253		rtalloc_ign(&ipforward_rt, RTF_PRCLONING);
1254	}
1255	if (ipforward_rt.ro_rt == 0)
1256		return ((struct in_ifaddr *)0);
1257	return ((struct in_ifaddr *) ipforward_rt.ro_rt->rt_ifa);
1258}
1259
1260/*
1261 * Save incoming source route for use in replies,
1262 * to be picked up later by ip_srcroute if the receiver is interested.
1263 */
1264void
1265save_rte(option, dst)
1266	u_char *option;
1267	struct in_addr dst;
1268{
1269	unsigned olen;
1270
1271	olen = option[IPOPT_OLEN];
1272#ifdef DIAGNOSTIC
1273	if (ipprintfs)
1274		printf("save_rte: olen %d\n", olen);
1275#endif
1276	if (olen > sizeof(ip_srcrt) - (1 + sizeof(dst)))
1277		return;
1278	bcopy(option, ip_srcrt.srcopt, olen);
1279	ip_nhops = (olen - IPOPT_OFFSET - 1) / sizeof(struct in_addr);
1280	ip_srcrt.dst = dst;
1281}
1282
1283/*
1284 * Retrieve incoming source route for use in replies,
1285 * in the same form used by setsockopt.
1286 * The first hop is placed before the options, will be removed later.
1287 */
1288struct mbuf *
1289ip_srcroute()
1290{
1291	register struct in_addr *p, *q;
1292	register struct mbuf *m;
1293
1294	if (ip_nhops == 0)
1295		return ((struct mbuf *)0);
1296	m = m_get(M_DONTWAIT, MT_HEADER);
1297	if (m == 0)
1298		return ((struct mbuf *)0);
1299
1300#define OPTSIZ	(sizeof(ip_srcrt.nop) + sizeof(ip_srcrt.srcopt))
1301
1302	/* length is (nhops+1)*sizeof(addr) + sizeof(nop + srcrt header) */
1303	m->m_len = ip_nhops * sizeof(struct in_addr) + sizeof(struct in_addr) +
1304	    OPTSIZ;
1305#ifdef DIAGNOSTIC
1306	if (ipprintfs)
1307		printf("ip_srcroute: nhops %d mlen %d", ip_nhops, m->m_len);
1308#endif
1309
1310	/*
1311	 * First save first hop for return route
1312	 */
1313	p = &ip_srcrt.route[ip_nhops - 1];
1314	*(mtod(m, struct in_addr *)) = *p--;
1315#ifdef DIAGNOSTIC
1316	if (ipprintfs)
1317		printf(" hops %lx", (u_long)ntohl(mtod(m, struct in_addr *)->s_addr));
1318#endif
1319
1320	/*
1321	 * Copy option fields and padding (nop) to mbuf.
1322	 */
1323	ip_srcrt.nop = IPOPT_NOP;
1324	ip_srcrt.srcopt[IPOPT_OFFSET] = IPOPT_MINOFF;
1325	(void)memcpy(mtod(m, caddr_t) + sizeof(struct in_addr),
1326	    &ip_srcrt.nop, OPTSIZ);
1327	q = (struct in_addr *)(mtod(m, caddr_t) +
1328	    sizeof(struct in_addr) + OPTSIZ);
1329#undef OPTSIZ
1330	/*
1331	 * Record return path as an IP source route,
1332	 * reversing the path (pointers are now aligned).
1333	 */
1334	while (p >= ip_srcrt.route) {
1335#ifdef DIAGNOSTIC
1336		if (ipprintfs)
1337			printf(" %lx", (u_long)ntohl(q->s_addr));
1338#endif
1339		*q++ = *p--;
1340	}
1341	/*
1342	 * Last hop goes to final destination.
1343	 */
1344	*q = ip_srcrt.dst;
1345#ifdef DIAGNOSTIC
1346	if (ipprintfs)
1347		printf(" %lx\n", (u_long)ntohl(q->s_addr));
1348#endif
1349	return (m);
1350}
1351
1352/*
1353 * Strip out IP options, at higher
1354 * level protocol in the kernel.
1355 * Second argument is buffer to which options
1356 * will be moved, and return value is their length.
1357 * XXX should be deleted; last arg currently ignored.
1358 */
1359void
1360ip_stripoptions(m, mopt)
1361	register struct mbuf *m;
1362	struct mbuf *mopt;
1363{
1364	register int i;
1365	struct ip *ip = mtod(m, struct ip *);
1366	register caddr_t opts;
1367	int olen;
1368
1369	olen = (IP_VHL_HL(ip->ip_vhl) << 2) - sizeof (struct ip);
1370	opts = (caddr_t)(ip + 1);
1371	i = m->m_len - (sizeof (struct ip) + olen);
1372	bcopy(opts + olen, opts, (unsigned)i);
1373	m->m_len -= olen;
1374	if (m->m_flags & M_PKTHDR)
1375		m->m_pkthdr.len -= olen;
1376	ip->ip_vhl = IP_MAKE_VHL(IPVERSION, sizeof(struct ip) >> 2);
1377}
1378
1379u_char inetctlerrmap[PRC_NCMDS] = {
1380	0,		0,		0,		0,
1381	0,		EMSGSIZE,	EHOSTDOWN,	EHOSTUNREACH,
1382	EHOSTUNREACH,	EHOSTUNREACH,	ECONNREFUSED,	ECONNREFUSED,
1383	EMSGSIZE,	EHOSTUNREACH,	0,		0,
1384	0,		0,		0,		0,
1385	ENOPROTOOPT
1386};
1387
1388/*
1389 * Forward a packet.  If some error occurs return the sender
1390 * an icmp packet.  Note we can't always generate a meaningful
1391 * icmp message because icmp doesn't have a large enough repertoire
1392 * of codes and types.
1393 *
1394 * If not forwarding, just drop the packet.  This could be confusing
1395 * if ipforwarding was zero but some routing protocol was advancing
1396 * us as a gateway to somewhere.  However, we must let the routing
1397 * protocol deal with that.
1398 *
1399 * The srcrt parameter indicates whether the packet is being forwarded
1400 * via a source route.
1401 */
1402static void
1403ip_forward(m, srcrt)
1404	struct mbuf *m;
1405	int srcrt;
1406{
1407	register struct ip *ip = mtod(m, struct ip *);
1408	register struct sockaddr_in *sin;
1409	register struct rtentry *rt;
1410	int error, type = 0, code = 0;
1411	struct mbuf *mcopy;
1412	n_long dest;
1413	struct ifnet *destifp;
1414
1415	dest = 0;
1416#ifdef DIAGNOSTIC
1417	if (ipprintfs)
1418		printf("forward: src %lx dst %lx ttl %x\n",
1419		    (u_long)ip->ip_src.s_addr, (u_long)ip->ip_dst.s_addr,
1420		    ip->ip_ttl);
1421#endif
1422
1423
1424	if (m->m_flags & (M_BCAST|M_MCAST) || in_canforward(ip->ip_dst) == 0) {
1425		ipstat.ips_cantforward++;
1426		m_freem(m);
1427		return;
1428	}
1429	HTONS(ip->ip_id);
1430#ifdef IPSTEALTH
1431	if (!ipstealth) {
1432#endif
1433		if (ip->ip_ttl <= IPTTLDEC) {
1434			icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS,
1435			    dest, 0);
1436			return;
1437		}
1438		ip->ip_ttl -= IPTTLDEC;
1439#ifdef IPSTEALTH
1440	}
1441#endif
1442
1443	sin = (struct sockaddr_in *)&ipforward_rt.ro_dst;
1444	if ((rt = ipforward_rt.ro_rt) == 0 ||
1445	    ip->ip_dst.s_addr != sin->sin_addr.s_addr) {
1446		if (ipforward_rt.ro_rt) {
1447			RTFREE(ipforward_rt.ro_rt);
1448			ipforward_rt.ro_rt = 0;
1449		}
1450		sin->sin_family = AF_INET;
1451		sin->sin_len = sizeof(*sin);
1452		sin->sin_addr = ip->ip_dst;
1453
1454		rtalloc_ign(&ipforward_rt, RTF_PRCLONING);
1455		if (ipforward_rt.ro_rt == 0) {
1456			icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, dest, 0);
1457			return;
1458		}
1459		rt = ipforward_rt.ro_rt;
1460	}
1461
1462	/*
1463	 * Save at most 64 bytes of the packet in case
1464	 * we need to generate an ICMP message to the src.
1465	 */
1466	mcopy = m_copy(m, 0, imin((int)ip->ip_len, 64));
1467
1468	/*
1469	 * If forwarding packet using same interface that it came in on,
1470	 * perhaps should send a redirect to sender to shortcut a hop.
1471	 * Only send redirect if source is sending directly to us,
1472	 * and if packet was not source routed (or has any options).
1473	 * Also, don't send redirect if forwarding using a default route
1474	 * or a route modified by a redirect.
1475	 */
1476#define	satosin(sa)	((struct sockaddr_in *)(sa))
1477	if (rt->rt_ifp == m->m_pkthdr.rcvif &&
1478	    (rt->rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) == 0 &&
1479	    satosin(rt_key(rt))->sin_addr.s_addr != 0 &&
1480	    ipsendredirects && !srcrt) {
1481#define	RTA(rt)	((struct in_ifaddr *)(rt->rt_ifa))
1482		u_long src = ntohl(ip->ip_src.s_addr);
1483
1484		if (RTA(rt) &&
1485		    (src & RTA(rt)->ia_subnetmask) == RTA(rt)->ia_subnet) {
1486		    if (rt->rt_flags & RTF_GATEWAY)
1487			dest = satosin(rt->rt_gateway)->sin_addr.s_addr;
1488		    else
1489			dest = ip->ip_dst.s_addr;
1490		    /* Router requirements says to only send host redirects */
1491		    type = ICMP_REDIRECT;
1492		    code = ICMP_REDIRECT_HOST;
1493#ifdef DIAGNOSTIC
1494		    if (ipprintfs)
1495		        printf("redirect (%d) to %lx\n", code, (u_long)dest);
1496#endif
1497		}
1498	}
1499
1500	error = ip_output(m, (struct mbuf *)0, &ipforward_rt,
1501			  IP_FORWARDING, 0);
1502	if (error)
1503		ipstat.ips_cantforward++;
1504	else {
1505		ipstat.ips_forward++;
1506		if (type)
1507			ipstat.ips_redirectsent++;
1508		else {
1509			if (mcopy) {
1510				ipflow_create(&ipforward_rt, mcopy);
1511				m_freem(mcopy);
1512			}
1513			return;
1514		}
1515	}
1516	if (mcopy == NULL)
1517		return;
1518	destifp = NULL;
1519
1520	switch (error) {
1521
1522	case 0:				/* forwarded, but need redirect */
1523		/* type, code set above */
1524		break;
1525
1526	case ENETUNREACH:		/* shouldn't happen, checked above */
1527	case EHOSTUNREACH:
1528	case ENETDOWN:
1529	case EHOSTDOWN:
1530	default:
1531		type = ICMP_UNREACH;
1532		code = ICMP_UNREACH_HOST;
1533		break;
1534
1535	case EMSGSIZE:
1536		type = ICMP_UNREACH;
1537		code = ICMP_UNREACH_NEEDFRAG;
1538		if (ipforward_rt.ro_rt)
1539			destifp = ipforward_rt.ro_rt->rt_ifp;
1540		ipstat.ips_cantfrag++;
1541		break;
1542
1543	case ENOBUFS:
1544		type = ICMP_SOURCEQUENCH;
1545		code = 0;
1546		break;
1547	}
1548	icmp_error(mcopy, type, code, dest, destifp);
1549}
1550
1551void
1552ip_savecontrol(inp, mp, ip, m)
1553	register struct inpcb *inp;
1554	register struct mbuf **mp;
1555	register struct ip *ip;
1556	register struct mbuf *m;
1557{
1558	if (inp->inp_socket->so_options & SO_TIMESTAMP) {
1559		struct timeval tv;
1560
1561		microtime(&tv);
1562		*mp = sbcreatecontrol((caddr_t) &tv, sizeof(tv),
1563			SCM_TIMESTAMP, SOL_SOCKET);
1564		if (*mp)
1565			mp = &(*mp)->m_next;
1566	}
1567	if (inp->inp_flags & INP_RECVDSTADDR) {
1568		*mp = sbcreatecontrol((caddr_t) &ip->ip_dst,
1569		    sizeof(struct in_addr), IP_RECVDSTADDR, IPPROTO_IP);
1570		if (*mp)
1571			mp = &(*mp)->m_next;
1572	}
1573#ifdef notyet
1574	/* XXX
1575	 * Moving these out of udp_input() made them even more broken
1576	 * than they already were.
1577	 */
1578	/* options were tossed already */
1579	if (inp->inp_flags & INP_RECVOPTS) {
1580		*mp = sbcreatecontrol((caddr_t) opts_deleted_above,
1581		    sizeof(struct in_addr), IP_RECVOPTS, IPPROTO_IP);
1582		if (*mp)
1583			mp = &(*mp)->m_next;
1584	}
1585	/* ip_srcroute doesn't do what we want here, need to fix */
1586	if (inp->inp_flags & INP_RECVRETOPTS) {
1587		*mp = sbcreatecontrol((caddr_t) ip_srcroute(),
1588		    sizeof(struct in_addr), IP_RECVRETOPTS, IPPROTO_IP);
1589		if (*mp)
1590			mp = &(*mp)->m_next;
1591	}
1592#endif
1593	if (inp->inp_flags & INP_RECVIF) {
1594		struct ifnet *ifp;
1595		struct sdlbuf {
1596			struct sockaddr_dl sdl;
1597			u_char	pad[32];
1598		} sdlbuf;
1599		struct sockaddr_dl *sdp;
1600		struct sockaddr_dl *sdl2 = &sdlbuf.sdl;
1601
1602		if (((ifp = m->m_pkthdr.rcvif))
1603		&& ( ifp->if_index && (ifp->if_index <= if_index))) {
1604			sdp = (struct sockaddr_dl *)(ifnet_addrs
1605					[ifp->if_index - 1]->ifa_addr);
1606			/*
1607			 * Change our mind and don't try copy.
1608			 */
1609			if ((sdp->sdl_family != AF_LINK)
1610			|| (sdp->sdl_len > sizeof(sdlbuf))) {
1611				goto makedummy;
1612			}
1613			bcopy(sdp, sdl2, sdp->sdl_len);
1614		} else {
1615makedummy:
1616			sdl2->sdl_len
1617				= offsetof(struct sockaddr_dl, sdl_data[0]);
1618			sdl2->sdl_family = AF_LINK;
1619			sdl2->sdl_index = 0;
1620			sdl2->sdl_nlen = sdl2->sdl_alen = sdl2->sdl_slen = 0;
1621		}
1622		*mp = sbcreatecontrol((caddr_t) sdl2, sdl2->sdl_len,
1623			IP_RECVIF, IPPROTO_IP);
1624		if (*mp)
1625			mp = &(*mp)->m_next;
1626	}
1627}
1628
1629int
1630ip_rsvp_init(struct socket *so)
1631{
1632	if (so->so_type != SOCK_RAW ||
1633	    so->so_proto->pr_protocol != IPPROTO_RSVP)
1634	  return EOPNOTSUPP;
1635
1636	if (ip_rsvpd != NULL)
1637	  return EADDRINUSE;
1638
1639	ip_rsvpd = so;
1640	/*
1641	 * This may seem silly, but we need to be sure we don't over-increment
1642	 * the RSVP counter, in case something slips up.
1643	 */
1644	if (!ip_rsvp_on) {
1645		ip_rsvp_on = 1;
1646		rsvp_on++;
1647	}
1648
1649	return 0;
1650}
1651
1652int
1653ip_rsvp_done(void)
1654{
1655	ip_rsvpd = NULL;
1656	/*
1657	 * This may seem silly, but we need to be sure we don't over-decrement
1658	 * the RSVP counter, in case something slips up.
1659	 */
1660	if (ip_rsvp_on) {
1661		ip_rsvp_on = 0;
1662		rsvp_on--;
1663	}
1664	return 0;
1665}
1666