ip_input.c revision 15026
1/*
2 * Copyright (c) 1982, 1986, 1988, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by the University of
16 *	California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 *	@(#)ip_input.c	8.2 (Berkeley) 1/4/94
34 * $Id: ip_input.c,v 1.39 1996/03/25 17:41:23 phk Exp $
35 */
36
37#include <sys/param.h>
38#include <sys/systm.h>
39#include <sys/malloc.h>
40#include <sys/mbuf.h>
41#include <sys/domain.h>
42#include <sys/protosw.h>
43#include <sys/socket.h>
44#include <sys/errno.h>
45#include <sys/time.h>
46#include <sys/kernel.h>
47#include <sys/syslog.h>
48#include <sys/sysctl.h>
49
50#include <net/if.h>
51#include <net/route.h>
52#include <net/netisr.h>
53
54#include <netinet/in.h>
55#include <netinet/in_systm.h>
56#include <netinet/in_var.h>
57#include <netinet/ip.h>
58#include <netinet/in_pcb.h>
59#include <netinet/in_var.h>
60#include <netinet/ip_var.h>
61#include <netinet/ip_icmp.h>
62
63#include <sys/socketvar.h>
64int rsvp_on = 0;
65static int ip_rsvp_on;
66struct socket *ip_rsvpd;
67
68static int	ipforwarding = 0;
69SYSCTL_INT(_net_inet_ip, IPCTL_FORWARDING, forwarding, CTLFLAG_RW,
70	&ipforwarding, 0, "");
71
72static int	ipsendredirects = 1; /* XXX */
73SYSCTL_INT(_net_inet_ip, IPCTL_SENDREDIRECTS, redirect, CTLFLAG_RW,
74	&ipsendredirects, 0, "");
75
76int	ip_defttl = IPDEFTTL;
77SYSCTL_INT(_net_inet_ip, IPCTL_DEFTTL, ttl, CTLFLAG_RW,
78	&ip_defttl, 0, "");
79
80static int	ip_dosourceroute = 0;
81SYSCTL_INT(_net_inet_ip, IPCTL_SOURCEROUTE, sourceroute, CTLFLAG_RW,
82	&ip_dosourceroute, 0, "");
83#ifdef DIAGNOSTIC
84static int	ipprintfs = 0;
85#endif
86
87extern	struct domain inetdomain;
88extern	struct protosw inetsw[];
89u_char	ip_protox[IPPROTO_MAX];
90static int	ipqmaxlen = IFQ_MAXLEN;
91struct	in_ifaddr *in_ifaddr;			/* first inet address */
92struct	ifqueue ipintrq;
93SYSCTL_INT(_net_inet_ip, IPCTL_INTRQMAXLEN, intr_queue_maxlen, CTLFLAG_RD,
94	&ipintrq.ifq_maxlen, 0, "");
95SYSCTL_INT(_net_inet_ip, IPCTL_INTRQDROPS, intr_queue_drops, CTLFLAG_RD,
96	&ipintrq.ifq_drops, 0, "");
97
98struct ipstat ipstat;
99static struct ipq ipq;
100
101#ifdef IPCTL_DEFMTU
102SYSCTL_INT(_net_inet_ip, IPCTL_DEFMTU, mtu, CTLFLAG_RW,
103	&ip_mtu, 0, "");
104#endif
105
106/* Firewall hooks */
107ip_fw_chk_t *ip_fw_chk_ptr;
108ip_fw_ctl_t *ip_fw_ctl_ptr;
109
110/*
111 * We need to save the IP options in case a protocol wants to respond
112 * to an incoming packet over the same route if the packet got here
113 * using IP source routing.  This allows connection establishment and
114 * maintenance when the remote end is on a network that is not known
115 * to us.
116 */
117static int	ip_nhops = 0;
118static	struct ip_srcrt {
119	struct	in_addr dst;			/* final destination */
120	char	nop;				/* one NOP to align */
121	char	srcopt[IPOPT_OFFSET + 1];	/* OPTVAL, OLEN and OFFSET */
122	struct	in_addr route[MAX_IPOPTLEN/sizeof(struct in_addr)];
123} ip_srcrt;
124
125static void save_rte __P((u_char *, struct in_addr));
126static void	 ip_deq __P((struct ipasfrag *));
127static int	 ip_dooptions __P((struct mbuf *));
128static void	 ip_enq __P((struct ipasfrag *, struct ipasfrag *));
129static void	 ip_forward __P((struct mbuf *, int));
130static void	 ip_freef __P((struct ipq *));
131static struct ip *
132	 ip_reass __P((struct ipasfrag *, struct ipq *));
133static struct in_ifaddr *
134	 ip_rtaddr __P((struct in_addr));
135static void	 ipintr __P((void));
136/*
137 * IP initialization: fill in IP protocol switch table.
138 * All protocols not implemented in kernel go to raw IP protocol handler.
139 */
140void
141ip_init()
142{
143	register struct protosw *pr;
144	register int i;
145
146	pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
147	if (pr == 0)
148		panic("ip_init");
149	for (i = 0; i < IPPROTO_MAX; i++)
150		ip_protox[i] = pr - inetsw;
151	for (pr = inetdomain.dom_protosw;
152	    pr < inetdomain.dom_protoswNPROTOSW; pr++)
153		if (pr->pr_domain->dom_family == PF_INET &&
154		    pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW)
155			ip_protox[pr->pr_protocol] = pr - inetsw;
156	ipq.next = ipq.prev = &ipq;
157	ip_id = time.tv_sec & 0xffff;
158	ipintrq.ifq_maxlen = ipqmaxlen;
159#ifdef IPFIREWALL
160	ip_fw_init();
161#endif
162}
163
164static struct	sockaddr_in ipaddr = { sizeof(ipaddr), AF_INET };
165static struct	route ipforward_rt;
166
167/*
168 * Ip input routine.  Checksum and byte swap header.  If fragmented
169 * try to reassemble.  Process options.  Pass to next level.
170 */
171void
172ip_input(struct mbuf *m)
173{
174	struct ip *ip;
175	struct ipq *fp;
176	struct in_ifaddr *ia;
177	int hlen;
178
179#ifdef	DIAGNOSTIC
180	if ((m->m_flags & M_PKTHDR) == 0)
181		panic("ipintr no HDR");
182#endif
183	/*
184	 * If no IP addresses have been set yet but the interfaces
185	 * are receiving, can't do anything with incoming packets yet.
186	 */
187	if (in_ifaddr == NULL)
188		goto bad;
189	ipstat.ips_total++;
190	if (m->m_len < sizeof (struct ip) &&
191	    (m = m_pullup(m, sizeof (struct ip))) == 0) {
192		ipstat.ips_toosmall++;
193		return;
194	}
195	ip = mtod(m, struct ip *);
196	if (ip->ip_v != IPVERSION) {
197		ipstat.ips_badvers++;
198		goto bad;
199	}
200	hlen = ip->ip_hl << 2;
201	if (hlen < sizeof(struct ip)) {	/* minimum header length */
202		ipstat.ips_badhlen++;
203		goto bad;
204	}
205	if (hlen > m->m_len) {
206		if ((m = m_pullup(m, hlen)) == 0) {
207			ipstat.ips_badhlen++;
208			return;
209		}
210		ip = mtod(m, struct ip *);
211	}
212	ip->ip_sum = in_cksum(m, hlen);
213	if (ip->ip_sum) {
214		ipstat.ips_badsum++;
215		goto bad;
216	}
217
218	/*
219	 * Convert fields to host representation.
220	 */
221	NTOHS(ip->ip_len);
222	if (ip->ip_len < hlen) {
223		ipstat.ips_badlen++;
224		goto bad;
225	}
226	NTOHS(ip->ip_id);
227	NTOHS(ip->ip_off);
228
229	/*
230	 * Check that the amount of data in the buffers
231	 * is as at least much as the IP header would have us expect.
232	 * Trim mbufs if longer than we expect.
233	 * Drop packet if shorter than we expect.
234	 */
235	if (m->m_pkthdr.len < ip->ip_len) {
236		ipstat.ips_tooshort++;
237		goto bad;
238	}
239	if (m->m_pkthdr.len > ip->ip_len) {
240		if (m->m_len == m->m_pkthdr.len) {
241			m->m_len = ip->ip_len;
242			m->m_pkthdr.len = ip->ip_len;
243		} else
244			m_adj(m, ip->ip_len - m->m_pkthdr.len);
245	}
246	/*
247	 * IpHack's section.
248	 * Right now when no processing on packet has done
249	 * and it is still fresh out of network we do our black
250	 * deals with it.
251	 * - Firewall: deny/allow
252	 * - Wrap: fake packet's addr/port <unimpl.>
253	 * - Encapsulate: put it in another IP and send out. <unimp.>
254 	 */
255
256	if (ip_fw_chk_ptr &&
257	    !(*ip_fw_chk_ptr)(&ip, hlen, m->m_pkthdr.rcvif, 0, &m))
258		goto bad;
259
260	/*
261	 * Process options and, if not destined for us,
262	 * ship it on.  ip_dooptions returns 1 when an
263	 * error was detected (causing an icmp message
264	 * to be sent and the original packet to be freed).
265	 */
266	ip_nhops = 0;		/* for source routed packets */
267	if (hlen > sizeof (struct ip) && ip_dooptions(m))
268		return;
269
270        /* greedy RSVP, snatches any PATH packet of the RSVP protocol and no
271         * matter if it is destined to another node, or whether it is
272         * a multicast one, RSVP wants it! and prevents it from being forwarded
273         * anywhere else. Also checks if the rsvp daemon is running before
274	 * grabbing the packet.
275         */
276	if (rsvp_on && ip->ip_p==IPPROTO_RSVP)
277		goto ours;
278
279	/*
280	 * Check our list of addresses, to see if the packet is for us.
281	 */
282	for (ia = in_ifaddr; ia; ia = ia->ia_next) {
283#define	satosin(sa)	((struct sockaddr_in *)(sa))
284
285		if (IA_SIN(ia)->sin_addr.s_addr == ip->ip_dst.s_addr)
286			goto ours;
287		if (ia->ia_ifp && ia->ia_ifp->if_flags & IFF_BROADCAST) {
288#if 0
289			u_long t;
290#endif
291
292			if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr ==
293			    ip->ip_dst.s_addr)
294				goto ours;
295			if (ip->ip_dst.s_addr == ia->ia_netbroadcast.s_addr)
296				goto ours;
297#if 0 /* XXX - this should go away */
298			/*
299			 * Look for all-0's host part (old broadcast addr),
300			 * either for subnet or net.
301			 */
302			t = ntohl(ip->ip_dst.s_addr);
303			if (t == ia->ia_subnet)
304				goto ours;
305			if (t == ia->ia_net)
306				goto ours;
307#endif /* compatibility cruft */
308		}
309	}
310	if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
311		struct in_multi *inm;
312		if (ip_mrouter) {
313			/*
314			 * If we are acting as a multicast router, all
315			 * incoming multicast packets are passed to the
316			 * kernel-level multicast forwarding function.
317			 * The packet is returned (relatively) intact; if
318			 * ip_mforward() returns a non-zero value, the packet
319			 * must be discarded, else it may be accepted below.
320			 *
321			 * (The IP ident field is put in the same byte order
322			 * as expected when ip_mforward() is called from
323			 * ip_output().)
324			 */
325			ip->ip_id = htons(ip->ip_id);
326			if (ip_mforward(ip, m->m_pkthdr.rcvif, m, 0) != 0) {
327				ipstat.ips_cantforward++;
328				m_freem(m);
329				return;
330			}
331			ip->ip_id = ntohs(ip->ip_id);
332
333			/*
334			 * The process-level routing demon needs to receive
335			 * all multicast IGMP packets, whether or not this
336			 * host belongs to their destination groups.
337			 */
338			if (ip->ip_p == IPPROTO_IGMP)
339				goto ours;
340			ipstat.ips_forward++;
341		}
342		/*
343		 * See if we belong to the destination multicast group on the
344		 * arrival interface.
345		 */
346		IN_LOOKUP_MULTI(ip->ip_dst, m->m_pkthdr.rcvif, inm);
347		if (inm == NULL) {
348			ipstat.ips_cantforward++;
349			m_freem(m);
350			return;
351		}
352		goto ours;
353	}
354	if (ip->ip_dst.s_addr == (u_long)INADDR_BROADCAST)
355		goto ours;
356	if (ip->ip_dst.s_addr == INADDR_ANY)
357		goto ours;
358
359	/*
360	 * Not for us; forward if possible and desirable.
361	 */
362	if (ipforwarding == 0) {
363		ipstat.ips_cantforward++;
364		m_freem(m);
365	} else
366		ip_forward(m, 0);
367	return;
368
369ours:
370
371	/*
372	 * If offset or IP_MF are set, must reassemble.
373	 * Otherwise, nothing need be done.
374	 * (We could look in the reassembly queue to see
375	 * if the packet was previously fragmented,
376	 * but it's not worth the time; just let them time out.)
377	 */
378	if (ip->ip_off &~ IP_DF) {
379		if (m->m_flags & M_EXT) {		/* XXX */
380			if ((m = m_pullup(m, sizeof (struct ip))) == 0) {
381				ipstat.ips_toosmall++;
382				return;
383			}
384			ip = mtod(m, struct ip *);
385		}
386		/*
387		 * Look for queue of fragments
388		 * of this datagram.
389		 */
390		for (fp = ipq.next; fp != &ipq; fp = fp->next)
391			if (ip->ip_id == fp->ipq_id &&
392			    ip->ip_src.s_addr == fp->ipq_src.s_addr &&
393			    ip->ip_dst.s_addr == fp->ipq_dst.s_addr &&
394			    ip->ip_p == fp->ipq_p)
395				goto found;
396		fp = 0;
397found:
398
399		/*
400		 * Adjust ip_len to not reflect header,
401		 * set ip_mff if more fragments are expected,
402		 * convert offset of this to bytes.
403		 */
404		ip->ip_len -= hlen;
405		((struct ipasfrag *)ip)->ipf_mff &= ~1;
406		if (ip->ip_off & IP_MF)
407			((struct ipasfrag *)ip)->ipf_mff |= 1;
408		ip->ip_off <<= 3;
409
410		/*
411		 * If datagram marked as having more fragments
412		 * or if this is not the first fragment,
413		 * attempt reassembly; if it succeeds, proceed.
414		 */
415		if (((struct ipasfrag *)ip)->ipf_mff & 1 || ip->ip_off) {
416			ipstat.ips_fragments++;
417			ip = ip_reass((struct ipasfrag *)ip, fp);
418			if (ip == 0)
419				return;
420			ipstat.ips_reassembled++;
421			m = dtom(ip);
422		} else
423			if (fp)
424				ip_freef(fp);
425	} else
426		ip->ip_len -= hlen;
427
428	/*
429	 * Switch out to protocol's input routine.
430	 */
431	ipstat.ips_delivered++;
432	(*inetsw[ip_protox[ip->ip_p]].pr_input)(m, hlen);
433	return;
434bad:
435	m_freem(m);
436}
437
438/*
439 * IP software interrupt routine - to go away sometime soon
440 */
441static void
442ipintr(void)
443{
444	int s;
445	struct mbuf *m;
446
447	while(1) {
448		s = splimp();
449		IF_DEQUEUE(&ipintrq, m);
450		splx(s);
451		if (m == 0)
452			return;
453		ip_input(m);
454	}
455}
456
457NETISR_SET(NETISR_IP, ipintr);
458
459/*
460 * Take incoming datagram fragment and try to
461 * reassemble it into whole datagram.  If a chain for
462 * reassembly of this datagram already exists, then it
463 * is given as fp; otherwise have to make a chain.
464 */
465static struct ip *
466ip_reass(ip, fp)
467	register struct ipasfrag *ip;
468	register struct ipq *fp;
469{
470	register struct mbuf *m = dtom(ip);
471	register struct ipasfrag *q;
472	struct mbuf *t;
473	int hlen = ip->ip_hl << 2;
474	int i, next;
475
476	/*
477	 * Presence of header sizes in mbufs
478	 * would confuse code below.
479	 */
480	m->m_data += hlen;
481	m->m_len -= hlen;
482
483	/*
484	 * If first fragment to arrive, create a reassembly queue.
485	 */
486	if (fp == 0) {
487		if ((t = m_get(M_DONTWAIT, MT_FTABLE)) == NULL)
488			goto dropfrag;
489		fp = mtod(t, struct ipq *);
490		insque(fp, &ipq);
491		fp->ipq_ttl = IPFRAGTTL;
492		fp->ipq_p = ip->ip_p;
493		fp->ipq_id = ip->ip_id;
494		fp->ipq_next = fp->ipq_prev = (struct ipasfrag *)fp;
495		fp->ipq_src = ((struct ip *)ip)->ip_src;
496		fp->ipq_dst = ((struct ip *)ip)->ip_dst;
497		q = (struct ipasfrag *)fp;
498		goto insert;
499	}
500
501	/*
502	 * Find a segment which begins after this one does.
503	 */
504	for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = q->ipf_next)
505		if (q->ip_off > ip->ip_off)
506			break;
507
508	/*
509	 * If there is a preceding segment, it may provide some of
510	 * our data already.  If so, drop the data from the incoming
511	 * segment.  If it provides all of our data, drop us.
512	 */
513	if (q->ipf_prev != (struct ipasfrag *)fp) {
514		i = q->ipf_prev->ip_off + q->ipf_prev->ip_len - ip->ip_off;
515		if (i > 0) {
516			if (i >= ip->ip_len)
517				goto dropfrag;
518			m_adj(dtom(ip), i);
519			ip->ip_off += i;
520			ip->ip_len -= i;
521		}
522	}
523
524	/*
525	 * While we overlap succeeding segments trim them or,
526	 * if they are completely covered, dequeue them.
527	 */
528	while (q != (struct ipasfrag *)fp && ip->ip_off + ip->ip_len > q->ip_off) {
529		i = (ip->ip_off + ip->ip_len) - q->ip_off;
530		if (i < q->ip_len) {
531			q->ip_len -= i;
532			q->ip_off += i;
533			m_adj(dtom(q), i);
534			break;
535		}
536		q = q->ipf_next;
537		m_freem(dtom(q->ipf_prev));
538		ip_deq(q->ipf_prev);
539	}
540
541insert:
542	/*
543	 * Stick new segment in its place;
544	 * check for complete reassembly.
545	 */
546	ip_enq(ip, q->ipf_prev);
547	next = 0;
548	for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = q->ipf_next) {
549		if (q->ip_off != next)
550			return (0);
551		next += q->ip_len;
552	}
553	if (q->ipf_prev->ipf_mff & 1)
554		return (0);
555
556	/*
557	 * Reassembly is complete; concatenate fragments.
558	 */
559	q = fp->ipq_next;
560	m = dtom(q);
561	t = m->m_next;
562	m->m_next = 0;
563	m_cat(m, t);
564	q = q->ipf_next;
565	while (q != (struct ipasfrag *)fp) {
566		t = dtom(q);
567		q = q->ipf_next;
568		m_cat(m, t);
569	}
570
571	/*
572	 * Create header for new ip packet by
573	 * modifying header of first packet;
574	 * dequeue and discard fragment reassembly header.
575	 * Make header visible.
576	 */
577	ip = fp->ipq_next;
578	ip->ip_len = next;
579	ip->ipf_mff &= ~1;
580	((struct ip *)ip)->ip_src = fp->ipq_src;
581	((struct ip *)ip)->ip_dst = fp->ipq_dst;
582	remque(fp);
583	(void) m_free(dtom(fp));
584	m = dtom(ip);
585	m->m_len += (ip->ip_hl << 2);
586	m->m_data -= (ip->ip_hl << 2);
587	/* some debugging cruft by sklower, below, will go away soon */
588	if (m->m_flags & M_PKTHDR) { /* XXX this should be done elsewhere */
589		register int plen = 0;
590		for (t = m; m; m = m->m_next)
591			plen += m->m_len;
592		t->m_pkthdr.len = plen;
593	}
594	return ((struct ip *)ip);
595
596dropfrag:
597	ipstat.ips_fragdropped++;
598	m_freem(m);
599	return (0);
600}
601
602/*
603 * Free a fragment reassembly header and all
604 * associated datagrams.
605 */
606static void
607ip_freef(fp)
608	struct ipq *fp;
609{
610	register struct ipasfrag *q, *p;
611
612	for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = p) {
613		p = q->ipf_next;
614		ip_deq(q);
615		m_freem(dtom(q));
616	}
617	remque(fp);
618	(void) m_free(dtom(fp));
619}
620
621/*
622 * Put an ip fragment on a reassembly chain.
623 * Like insque, but pointers in middle of structure.
624 */
625static void
626ip_enq(p, prev)
627	register struct ipasfrag *p, *prev;
628{
629
630	p->ipf_prev = prev;
631	p->ipf_next = prev->ipf_next;
632	prev->ipf_next->ipf_prev = p;
633	prev->ipf_next = p;
634}
635
636/*
637 * To ip_enq as remque is to insque.
638 */
639static void
640ip_deq(p)
641	register struct ipasfrag *p;
642{
643
644	p->ipf_prev->ipf_next = p->ipf_next;
645	p->ipf_next->ipf_prev = p->ipf_prev;
646}
647
648/*
649 * IP timer processing;
650 * if a timer expires on a reassembly
651 * queue, discard it.
652 */
653void
654ip_slowtimo()
655{
656	register struct ipq *fp;
657	int s = splnet();
658
659	fp = ipq.next;
660	if (fp == 0) {
661		splx(s);
662		return;
663	}
664	while (fp != &ipq) {
665		--fp->ipq_ttl;
666		fp = fp->next;
667		if (fp->prev->ipq_ttl == 0) {
668			ipstat.ips_fragtimeout++;
669			ip_freef(fp->prev);
670		}
671	}
672	splx(s);
673}
674
675/*
676 * Drain off all datagram fragments.
677 */
678void
679ip_drain()
680{
681	while (ipq.next != &ipq) {
682		ipstat.ips_fragdropped++;
683		ip_freef(ipq.next);
684	}
685
686	in_rtqdrain();
687}
688
689/*
690 * Do option processing on a datagram,
691 * possibly discarding it if bad options are encountered,
692 * or forwarding it if source-routed.
693 * Returns 1 if packet has been forwarded/freed,
694 * 0 if the packet should be processed further.
695 */
696static int
697ip_dooptions(m)
698	struct mbuf *m;
699{
700	register struct ip *ip = mtod(m, struct ip *);
701	register u_char *cp;
702	register struct ip_timestamp *ipt;
703	register struct in_ifaddr *ia;
704	int opt, optlen, cnt, off, code, type = ICMP_PARAMPROB, forward = 0;
705	struct in_addr *sin, dst;
706	n_time ntime;
707
708	dst = ip->ip_dst;
709	cp = (u_char *)(ip + 1);
710	cnt = (ip->ip_hl << 2) - sizeof (struct ip);
711	for (; cnt > 0; cnt -= optlen, cp += optlen) {
712		opt = cp[IPOPT_OPTVAL];
713		if (opt == IPOPT_EOL)
714			break;
715		if (opt == IPOPT_NOP)
716			optlen = 1;
717		else {
718			optlen = cp[IPOPT_OLEN];
719			if (optlen <= 0 || optlen > cnt) {
720				code = &cp[IPOPT_OLEN] - (u_char *)ip;
721				goto bad;
722			}
723		}
724		switch (opt) {
725
726		default:
727			break;
728
729		/*
730		 * Source routing with record.
731		 * Find interface with current destination address.
732		 * If none on this machine then drop if strictly routed,
733		 * or do nothing if loosely routed.
734		 * Record interface address and bring up next address
735		 * component.  If strictly routed make sure next
736		 * address is on directly accessible net.
737		 */
738		case IPOPT_LSRR:
739		case IPOPT_SSRR:
740			if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) {
741				code = &cp[IPOPT_OFFSET] - (u_char *)ip;
742				goto bad;
743			}
744			ipaddr.sin_addr = ip->ip_dst;
745			ia = (struct in_ifaddr *)
746				ifa_ifwithaddr((struct sockaddr *)&ipaddr);
747			if (ia == 0) {
748				if (opt == IPOPT_SSRR) {
749					type = ICMP_UNREACH;
750					code = ICMP_UNREACH_SRCFAIL;
751					goto bad;
752				}
753				/*
754				 * Loose routing, and not at next destination
755				 * yet; nothing to do except forward.
756				 */
757				break;
758			}
759			off--;			/* 0 origin */
760			if (off > optlen - sizeof(struct in_addr)) {
761				/*
762				 * End of source route.  Should be for us.
763				 */
764				save_rte(cp, ip->ip_src);
765				break;
766			}
767
768			if (!ip_dosourceroute) {
769				char buf[4*sizeof "123"];
770				strcpy(buf, inet_ntoa(ip->ip_dst));
771
772				log(LOG_WARNING,
773				    "attempted source route from %s to %s\n",
774				    inet_ntoa(ip->ip_src), buf);
775				type = ICMP_UNREACH;
776				code = ICMP_UNREACH_SRCFAIL;
777				goto bad;
778			}
779
780			/*
781			 * locate outgoing interface
782			 */
783			(void)memcpy(&ipaddr.sin_addr, cp + off,
784			    sizeof(ipaddr.sin_addr));
785
786			if (opt == IPOPT_SSRR) {
787#define	INA	struct in_ifaddr *
788#define	SA	struct sockaddr *
789			    if ((ia = (INA)ifa_ifwithdstaddr((SA)&ipaddr)) == 0)
790				ia = (INA)ifa_ifwithnet((SA)&ipaddr);
791			} else
792				ia = ip_rtaddr(ipaddr.sin_addr);
793			if (ia == 0) {
794				type = ICMP_UNREACH;
795				code = ICMP_UNREACH_SRCFAIL;
796				goto bad;
797			}
798			ip->ip_dst = ipaddr.sin_addr;
799			(void)memcpy(cp + off, &(IA_SIN(ia)->sin_addr),
800			    sizeof(struct in_addr));
801			cp[IPOPT_OFFSET] += sizeof(struct in_addr);
802			/*
803			 * Let ip_intr's mcast routing check handle mcast pkts
804			 */
805			forward = !IN_MULTICAST(ntohl(ip->ip_dst.s_addr));
806			break;
807
808		case IPOPT_RR:
809			if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) {
810				code = &cp[IPOPT_OFFSET] - (u_char *)ip;
811				goto bad;
812			}
813			/*
814			 * If no space remains, ignore.
815			 */
816			off--;			/* 0 origin */
817			if (off > optlen - sizeof(struct in_addr))
818				break;
819			(void)memcpy(&ipaddr.sin_addr, &ip->ip_dst,
820			    sizeof(ipaddr.sin_addr));
821			/*
822			 * locate outgoing interface; if we're the destination,
823			 * use the incoming interface (should be same).
824			 */
825			if ((ia = (INA)ifa_ifwithaddr((SA)&ipaddr)) == 0 &&
826			    (ia = ip_rtaddr(ipaddr.sin_addr)) == 0) {
827				type = ICMP_UNREACH;
828				code = ICMP_UNREACH_HOST;
829				goto bad;
830			}
831			(void)memcpy(cp + off, &(IA_SIN(ia)->sin_addr),
832			    sizeof(struct in_addr));
833			cp[IPOPT_OFFSET] += sizeof(struct in_addr);
834			break;
835
836		case IPOPT_TS:
837			code = cp - (u_char *)ip;
838			ipt = (struct ip_timestamp *)cp;
839			if (ipt->ipt_len < 5)
840				goto bad;
841			if (ipt->ipt_ptr > ipt->ipt_len - sizeof (long)) {
842				if (++ipt->ipt_oflw == 0)
843					goto bad;
844				break;
845			}
846			sin = (struct in_addr *)(cp + ipt->ipt_ptr - 1);
847			switch (ipt->ipt_flg) {
848
849			case IPOPT_TS_TSONLY:
850				break;
851
852			case IPOPT_TS_TSANDADDR:
853				if (ipt->ipt_ptr + sizeof(n_time) +
854				    sizeof(struct in_addr) > ipt->ipt_len)
855					goto bad;
856				ipaddr.sin_addr = dst;
857				ia = (INA)ifaof_ifpforaddr((SA)&ipaddr,
858							    m->m_pkthdr.rcvif);
859				if (ia == 0)
860					continue;
861				(void)memcpy(sin, &IA_SIN(ia)->sin_addr,
862				    sizeof(struct in_addr));
863				ipt->ipt_ptr += sizeof(struct in_addr);
864				break;
865
866			case IPOPT_TS_PRESPEC:
867				if (ipt->ipt_ptr + sizeof(n_time) +
868				    sizeof(struct in_addr) > ipt->ipt_len)
869					goto bad;
870				(void)memcpy(&ipaddr.sin_addr, sin,
871				    sizeof(struct in_addr));
872				if (ifa_ifwithaddr((SA)&ipaddr) == 0)
873					continue;
874				ipt->ipt_ptr += sizeof(struct in_addr);
875				break;
876
877			default:
878				goto bad;
879			}
880			ntime = iptime();
881			(void)memcpy(cp + ipt->ipt_ptr - 1, &ntime,
882			    sizeof(n_time));
883			ipt->ipt_ptr += sizeof(n_time);
884		}
885	}
886	if (forward) {
887		ip_forward(m, 1);
888		return (1);
889	}
890	return (0);
891bad:
892	ip->ip_len -= ip->ip_hl << 2;   /* XXX icmp_error adds in hdr length */
893	icmp_error(m, type, code, 0, 0);
894	ipstat.ips_badoptions++;
895	return (1);
896}
897
898/*
899 * Given address of next destination (final or next hop),
900 * return internet address info of interface to be used to get there.
901 */
902static struct in_ifaddr *
903ip_rtaddr(dst)
904	 struct in_addr dst;
905{
906	register struct sockaddr_in *sin;
907
908	sin = (struct sockaddr_in *) &ipforward_rt.ro_dst;
909
910	if (ipforward_rt.ro_rt == 0 || dst.s_addr != sin->sin_addr.s_addr) {
911		if (ipforward_rt.ro_rt) {
912			RTFREE(ipforward_rt.ro_rt);
913			ipforward_rt.ro_rt = 0;
914		}
915		sin->sin_family = AF_INET;
916		sin->sin_len = sizeof(*sin);
917		sin->sin_addr = dst;
918
919		rtalloc_ign(&ipforward_rt, RTF_PRCLONING);
920	}
921	if (ipforward_rt.ro_rt == 0)
922		return ((struct in_ifaddr *)0);
923	return ((struct in_ifaddr *) ipforward_rt.ro_rt->rt_ifa);
924}
925
926/*
927 * Save incoming source route for use in replies,
928 * to be picked up later by ip_srcroute if the receiver is interested.
929 */
930void
931save_rte(option, dst)
932	u_char *option;
933	struct in_addr dst;
934{
935	unsigned olen;
936
937	olen = option[IPOPT_OLEN];
938#ifdef DIAGNOSTIC
939	if (ipprintfs)
940		printf("save_rte: olen %d\n", olen);
941#endif
942	if (olen > sizeof(ip_srcrt) - (1 + sizeof(dst)))
943		return;
944	(void)memcpy(ip_srcrt.srcopt, option, olen);
945	ip_nhops = (olen - IPOPT_OFFSET - 1) / sizeof(struct in_addr);
946	ip_srcrt.dst = dst;
947}
948
949/*
950 * Retrieve incoming source route for use in replies,
951 * in the same form used by setsockopt.
952 * The first hop is placed before the options, will be removed later.
953 */
954struct mbuf *
955ip_srcroute()
956{
957	register struct in_addr *p, *q;
958	register struct mbuf *m;
959
960	if (ip_nhops == 0)
961		return ((struct mbuf *)0);
962	m = m_get(M_DONTWAIT, MT_SOOPTS);
963	if (m == 0)
964		return ((struct mbuf *)0);
965
966#define OPTSIZ	(sizeof(ip_srcrt.nop) + sizeof(ip_srcrt.srcopt))
967
968	/* length is (nhops+1)*sizeof(addr) + sizeof(nop + srcrt header) */
969	m->m_len = ip_nhops * sizeof(struct in_addr) + sizeof(struct in_addr) +
970	    OPTSIZ;
971#ifdef DIAGNOSTIC
972	if (ipprintfs)
973		printf("ip_srcroute: nhops %d mlen %d", ip_nhops, m->m_len);
974#endif
975
976	/*
977	 * First save first hop for return route
978	 */
979	p = &ip_srcrt.route[ip_nhops - 1];
980	*(mtod(m, struct in_addr *)) = *p--;
981#ifdef DIAGNOSTIC
982	if (ipprintfs)
983		printf(" hops %lx", ntohl(mtod(m, struct in_addr *)->s_addr));
984#endif
985
986	/*
987	 * Copy option fields and padding (nop) to mbuf.
988	 */
989	ip_srcrt.nop = IPOPT_NOP;
990	ip_srcrt.srcopt[IPOPT_OFFSET] = IPOPT_MINOFF;
991	(void)memcpy(mtod(m, caddr_t) + sizeof(struct in_addr),
992	    &ip_srcrt.nop, OPTSIZ);
993	q = (struct in_addr *)(mtod(m, caddr_t) +
994	    sizeof(struct in_addr) + OPTSIZ);
995#undef OPTSIZ
996	/*
997	 * Record return path as an IP source route,
998	 * reversing the path (pointers are now aligned).
999	 */
1000	while (p >= ip_srcrt.route) {
1001#ifdef DIAGNOSTIC
1002		if (ipprintfs)
1003			printf(" %lx", ntohl(q->s_addr));
1004#endif
1005		*q++ = *p--;
1006	}
1007	/*
1008	 * Last hop goes to final destination.
1009	 */
1010	*q = ip_srcrt.dst;
1011#ifdef DIAGNOSTIC
1012	if (ipprintfs)
1013		printf(" %lx\n", ntohl(q->s_addr));
1014#endif
1015	return (m);
1016}
1017
1018/*
1019 * Strip out IP options, at higher
1020 * level protocol in the kernel.
1021 * Second argument is buffer to which options
1022 * will be moved, and return value is their length.
1023 * XXX should be deleted; last arg currently ignored.
1024 */
1025void
1026ip_stripoptions(m, mopt)
1027	register struct mbuf *m;
1028	struct mbuf *mopt;
1029{
1030	register int i;
1031	struct ip *ip = mtod(m, struct ip *);
1032	register caddr_t opts;
1033	int olen;
1034
1035	olen = (ip->ip_hl<<2) - sizeof (struct ip);
1036	opts = (caddr_t)(ip + 1);
1037	i = m->m_len - (sizeof (struct ip) + olen);
1038	bcopy(opts + olen, opts, (unsigned)i);
1039	m->m_len -= olen;
1040	if (m->m_flags & M_PKTHDR)
1041		m->m_pkthdr.len -= olen;
1042	ip->ip_hl = sizeof(struct ip) >> 2;
1043}
1044
1045u_char inetctlerrmap[PRC_NCMDS] = {
1046	0,		0,		0,		0,
1047	0,		EMSGSIZE,	EHOSTDOWN,	EHOSTUNREACH,
1048	EHOSTUNREACH,	EHOSTUNREACH,	ECONNREFUSED,	ECONNREFUSED,
1049	EMSGSIZE,	EHOSTUNREACH,	0,		0,
1050	0,		0,		0,		0,
1051	ENOPROTOOPT
1052};
1053
1054/*
1055 * Forward a packet.  If some error occurs return the sender
1056 * an icmp packet.  Note we can't always generate a meaningful
1057 * icmp message because icmp doesn't have a large enough repertoire
1058 * of codes and types.
1059 *
1060 * If not forwarding, just drop the packet.  This could be confusing
1061 * if ipforwarding was zero but some routing protocol was advancing
1062 * us as a gateway to somewhere.  However, we must let the routing
1063 * protocol deal with that.
1064 *
1065 * The srcrt parameter indicates whether the packet is being forwarded
1066 * via a source route.
1067 */
1068static void
1069ip_forward(m, srcrt)
1070	struct mbuf *m;
1071	int srcrt;
1072{
1073	register struct ip *ip = mtod(m, struct ip *);
1074	register struct sockaddr_in *sin;
1075	register struct rtentry *rt;
1076	int error, type = 0, code = 0;
1077	struct mbuf *mcopy;
1078	n_long dest;
1079	struct ifnet *destifp;
1080
1081	dest = 0;
1082#ifdef DIAGNOSTIC
1083	if (ipprintfs)
1084		printf("forward: src %lx dst %lx ttl %x\n",
1085			ip->ip_src.s_addr, ip->ip_dst.s_addr, ip->ip_ttl);
1086#endif
1087
1088
1089	if (m->m_flags & M_BCAST || in_canforward(ip->ip_dst) == 0) {
1090		ipstat.ips_cantforward++;
1091		m_freem(m);
1092		return;
1093	}
1094	HTONS(ip->ip_id);
1095	if (ip->ip_ttl <= IPTTLDEC) {
1096		icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, dest, 0);
1097		return;
1098	}
1099	ip->ip_ttl -= IPTTLDEC;
1100
1101	sin = (struct sockaddr_in *)&ipforward_rt.ro_dst;
1102	if ((rt = ipforward_rt.ro_rt) == 0 ||
1103	    ip->ip_dst.s_addr != sin->sin_addr.s_addr) {
1104		if (ipforward_rt.ro_rt) {
1105			RTFREE(ipforward_rt.ro_rt);
1106			ipforward_rt.ro_rt = 0;
1107		}
1108		sin->sin_family = AF_INET;
1109		sin->sin_len = sizeof(*sin);
1110		sin->sin_addr = ip->ip_dst;
1111
1112		rtalloc_ign(&ipforward_rt, RTF_PRCLONING);
1113		if (ipforward_rt.ro_rt == 0) {
1114			icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, dest, 0);
1115			return;
1116		}
1117		rt = ipforward_rt.ro_rt;
1118	}
1119
1120	/*
1121	 * Save at most 64 bytes of the packet in case
1122	 * we need to generate an ICMP message to the src.
1123	 */
1124	mcopy = m_copy(m, 0, imin((int)ip->ip_len, 64));
1125
1126	/*
1127	 * If forwarding packet using same interface that it came in on,
1128	 * perhaps should send a redirect to sender to shortcut a hop.
1129	 * Only send redirect if source is sending directly to us,
1130	 * and if packet was not source routed (or has any options).
1131	 * Also, don't send redirect if forwarding using a default route
1132	 * or a route modified by a redirect.
1133	 */
1134#define	satosin(sa)	((struct sockaddr_in *)(sa))
1135	if (rt->rt_ifp == m->m_pkthdr.rcvif &&
1136	    (rt->rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) == 0 &&
1137	    satosin(rt_key(rt))->sin_addr.s_addr != 0 &&
1138	    ipsendredirects && !srcrt) {
1139#define	RTA(rt)	((struct in_ifaddr *)(rt->rt_ifa))
1140		u_long src = ntohl(ip->ip_src.s_addr);
1141
1142		if (RTA(rt) &&
1143		    (src & RTA(rt)->ia_subnetmask) == RTA(rt)->ia_subnet) {
1144		    if (rt->rt_flags & RTF_GATEWAY)
1145			dest = satosin(rt->rt_gateway)->sin_addr.s_addr;
1146		    else
1147			dest = ip->ip_dst.s_addr;
1148		    /* Router requirements says to only send host redirects */
1149		    type = ICMP_REDIRECT;
1150		    code = ICMP_REDIRECT_HOST;
1151#ifdef DIAGNOSTIC
1152		    if (ipprintfs)
1153		        printf("redirect (%d) to %lx\n", code, (u_long)dest);
1154#endif
1155		}
1156	}
1157
1158	error = ip_output(m, (struct mbuf *)0, &ipforward_rt,
1159			  IP_FORWARDING, 0);
1160	if (error)
1161		ipstat.ips_cantforward++;
1162	else {
1163		ipstat.ips_forward++;
1164		if (type)
1165			ipstat.ips_redirectsent++;
1166		else {
1167			if (mcopy)
1168				m_freem(mcopy);
1169			return;
1170		}
1171	}
1172	if (mcopy == NULL)
1173		return;
1174	destifp = NULL;
1175
1176	switch (error) {
1177
1178	case 0:				/* forwarded, but need redirect */
1179		/* type, code set above */
1180		break;
1181
1182	case ENETUNREACH:		/* shouldn't happen, checked above */
1183	case EHOSTUNREACH:
1184	case ENETDOWN:
1185	case EHOSTDOWN:
1186	default:
1187		type = ICMP_UNREACH;
1188		code = ICMP_UNREACH_HOST;
1189		break;
1190
1191	case EMSGSIZE:
1192		type = ICMP_UNREACH;
1193		code = ICMP_UNREACH_NEEDFRAG;
1194		if (ipforward_rt.ro_rt)
1195			destifp = ipforward_rt.ro_rt->rt_ifp;
1196		ipstat.ips_cantfrag++;
1197		break;
1198
1199	case ENOBUFS:
1200		type = ICMP_SOURCEQUENCH;
1201		code = 0;
1202		break;
1203	}
1204	icmp_error(mcopy, type, code, dest, destifp);
1205}
1206
1207int
1208ip_rsvp_init(struct socket *so)
1209{
1210	if (so->so_type != SOCK_RAW ||
1211	    so->so_proto->pr_protocol != IPPROTO_RSVP)
1212	  return EOPNOTSUPP;
1213
1214	if (ip_rsvpd != NULL)
1215	  return EADDRINUSE;
1216
1217	ip_rsvpd = so;
1218	/*
1219	 * This may seem silly, but we need to be sure we don't over-increment
1220	 * the RSVP counter, in case something slips up.
1221	 */
1222	if (!ip_rsvp_on) {
1223		ip_rsvp_on = 1;
1224		rsvp_on++;
1225	}
1226
1227	return 0;
1228}
1229
1230int
1231ip_rsvp_done(void)
1232{
1233	ip_rsvpd = NULL;
1234	/*
1235	 * This may seem silly, but we need to be sure we don't over-decrement
1236	 * the RSVP counter, in case something slips up.
1237	 */
1238	if (ip_rsvp_on) {
1239		ip_rsvp_on = 0;
1240		rsvp_on--;
1241	}
1242	return 0;
1243}
1244