ip_reass.c revision 9209
1/*
2 * Copyright (c) 1982, 1986, 1988, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by the University of
16 *	California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 *	@(#)ip_input.c	8.2 (Berkeley) 1/4/94
34 * $Id: ip_input.c,v 1.21 1995/05/11 00:13:18 wollman Exp $
35 */
36
37#include <sys/param.h>
38#include <sys/systm.h>
39#include <sys/malloc.h>
40#include <sys/mbuf.h>
41#include <sys/domain.h>
42#include <sys/protosw.h>
43#include <sys/socket.h>
44#include <sys/errno.h>
45#include <sys/time.h>
46#include <sys/kernel.h>
47#include <sys/syslog.h>
48
49#include <vm/vm.h>
50#include <sys/sysctl.h>
51
52#include <net/if.h>
53#include <net/route.h>
54#include <net/netisr.h>
55
56#include <netinet/in.h>
57#include <netinet/in_systm.h>
58#include <netinet/in_var.h>
59#include <netinet/ip.h>
60#include <netinet/in_pcb.h>
61#include <netinet/in_var.h>
62#include <netinet/ip_var.h>
63#include <netinet/ip_icmp.h>
64
65#include <netinet/ip_fw.h>
66
67#include <sys/socketvar.h>
68int rsvp_on = 0;
69int ip_rsvp_on;
70struct socket *ip_rsvpd;
71
72#ifndef	IPFORWARDING
73#ifdef GATEWAY
74#define	IPFORWARDING	1	/* forward IP packets not for us */
75#else /* GATEWAY */
76#define	IPFORWARDING	0	/* don't forward IP packets not for us */
77#endif /* GATEWAY */
78#endif /* IPFORWARDING */
79#ifndef	IPSENDREDIRECTS
80#define	IPSENDREDIRECTS	1
81#endif
82int	ipforwarding = IPFORWARDING;
83int	ipsendredirects = IPSENDREDIRECTS;
84int	ip_defttl = IPDEFTTL;
85int	ip_dosourceroute = 0;
86#ifdef DIAGNOSTIC
87int	ipprintfs = 0;
88#endif
89
90extern	struct domain inetdomain;
91extern	struct protosw inetsw[];
92u_char	ip_protox[IPPROTO_MAX];
93int	ipqmaxlen = IFQ_MAXLEN;
94struct	in_ifaddr *in_ifaddr;			/* first inet address */
95struct	ifqueue ipintrq;
96
97struct ipstat ipstat;
98struct ipq ipq;
99
100/*
101 * We need to save the IP options in case a protocol wants to respond
102 * to an incoming packet over the same route if the packet got here
103 * using IP source routing.  This allows connection establishment and
104 * maintenance when the remote end is on a network that is not known
105 * to us.
106 */
107int	ip_nhops = 0;
108static	struct ip_srcrt {
109	struct	in_addr dst;			/* final destination */
110	char	nop;				/* one NOP to align */
111	char	srcopt[IPOPT_OFFSET + 1];	/* OPTVAL, OLEN and OFFSET */
112	struct	in_addr route[MAX_IPOPTLEN/sizeof(struct in_addr)];
113} ip_srcrt;
114
115static void save_rte __P((u_char *, struct in_addr));
116/*
117 * IP initialization: fill in IP protocol switch table.
118 * All protocols not implemented in kernel go to raw IP protocol handler.
119 */
120void
121ip_init()
122{
123	register struct protosw *pr;
124	register int i;
125
126	pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
127	if (pr == 0)
128		panic("ip_init");
129	for (i = 0; i < IPPROTO_MAX; i++)
130		ip_protox[i] = pr - inetsw;
131	for (pr = inetdomain.dom_protosw;
132	    pr < inetdomain.dom_protoswNPROTOSW; pr++)
133		if (pr->pr_domain->dom_family == PF_INET &&
134		    pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW)
135			ip_protox[pr->pr_protocol] = pr - inetsw;
136	ipq.next = ipq.prev = &ipq;
137	ip_id = time.tv_sec & 0xffff;
138	ipintrq.ifq_maxlen = ipqmaxlen;
139}
140
141struct	sockaddr_in ipaddr = { sizeof(ipaddr), AF_INET };
142struct	route ipforward_rt;
143
144/*
145 * Ip input routine.  Checksum and byte swap header.  If fragmented
146 * try to reassemble.  Process options.  Pass to next level.
147 */
148void
149ipintr(void)
150{
151	register struct ip *ip;
152	register struct mbuf *m;
153	register struct ipq *fp;
154	register struct in_ifaddr *ia;
155	int hlen, s;
156
157next:
158	/*
159	 * Get next datagram off input queue and get IP header
160	 * in first mbuf.
161	 */
162	s = splimp();
163	IF_DEQUEUE(&ipintrq, m);
164	splx(s);
165	if (m == 0)
166		return;
167#ifdef	DIAGNOSTIC
168	if ((m->m_flags & M_PKTHDR) == 0)
169		panic("ipintr no HDR");
170#endif
171	/*
172	 * If no IP addresses have been set yet but the interfaces
173	 * are receiving, can't do anything with incoming packets yet.
174	 */
175	if (in_ifaddr == NULL)
176		goto bad;
177	ipstat.ips_total++;
178	if (m->m_len < sizeof (struct ip) &&
179	    (m = m_pullup(m, sizeof (struct ip))) == 0) {
180		ipstat.ips_toosmall++;
181		goto next;
182	}
183	ip = mtod(m, struct ip *);
184	if (ip->ip_v != IPVERSION) {
185		ipstat.ips_badvers++;
186		goto bad;
187	}
188	hlen = ip->ip_hl << 2;
189	if (hlen < sizeof(struct ip)) {	/* minimum header length */
190		ipstat.ips_badhlen++;
191		goto bad;
192	}
193	if (hlen > m->m_len) {
194		if ((m = m_pullup(m, hlen)) == 0) {
195			ipstat.ips_badhlen++;
196			goto next;
197		}
198		ip = mtod(m, struct ip *);
199	}
200	ip->ip_sum = in_cksum(m, hlen);
201	if (ip->ip_sum) {
202		ipstat.ips_badsum++;
203		goto bad;
204	}
205
206	/*
207	 * Convert fields to host representation.
208	 */
209	NTOHS(ip->ip_len);
210	if (ip->ip_len < hlen) {
211		ipstat.ips_badlen++;
212		goto bad;
213	}
214	NTOHS(ip->ip_id);
215	NTOHS(ip->ip_off);
216
217	/*
218	 * Check that the amount of data in the buffers
219	 * is as at least much as the IP header would have us expect.
220	 * Trim mbufs if longer than we expect.
221	 * Drop packet if shorter than we expect.
222	 */
223	if (m->m_pkthdr.len < ip->ip_len) {
224		ipstat.ips_tooshort++;
225		goto bad;
226	}
227	if (m->m_pkthdr.len > ip->ip_len) {
228		if (m->m_len == m->m_pkthdr.len) {
229			m->m_len = ip->ip_len;
230			m->m_pkthdr.len = ip->ip_len;
231		} else
232			m_adj(m, ip->ip_len - m->m_pkthdr.len);
233	}
234	/*
235	 * IpHack's section.
236	 * Right now when no processing on packet has done
237	 * and it is still fresh out of network we do our black
238	 * deals with it.
239	 * - Firewall: deny/allow
240	 * - Wrap: fake packet's addr/port <unimpl.>
241	 * - Encapsulate: put it in another IP and send out. <unimp.>
242 	 */
243
244        if (ip_fw_chk_ptr!=NULL)
245               if (!(*ip_fw_chk_ptr)(ip,m->m_pkthdr.rcvif,ip_fw_chain) ) {
246                       goto bad;
247               }
248
249	/*
250	 * Process options and, if not destined for us,
251	 * ship it on.  ip_dooptions returns 1 when an
252	 * error was detected (causing an icmp message
253	 * to be sent and the original packet to be freed).
254	 */
255	ip_nhops = 0;		/* for source routed packets */
256	if (hlen > sizeof (struct ip) && ip_dooptions(m))
257		goto next;
258
259        /* greedy RSVP, snatches any PATH packet of the RSVP protocol and no
260         * matter if it is destined to another node, or whether it is
261         * a multicast one, RSVP wants it! and prevents it from being forwarded
262         * anywhere else. Also checks if the rsvp daemon is running before
263	 * grabbing the packet.
264         */
265	if (rsvp_on && ip->ip_p==IPPROTO_RSVP)
266		goto ours;
267
268	/*
269	 * Check our list of addresses, to see if the packet is for us.
270	 */
271	for (ia = in_ifaddr; ia; ia = ia->ia_next) {
272#define	satosin(sa)	((struct sockaddr_in *)(sa))
273
274		if (IA_SIN(ia)->sin_addr.s_addr == ip->ip_dst.s_addr)
275			goto ours;
276		if (
277#ifdef	DIRECTED_BROADCAST
278		    ia->ia_ifp == m->m_pkthdr.rcvif &&
279#endif
280		    (ia->ia_ifp->if_flags & IFF_BROADCAST)) {
281			u_long t;
282
283			if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr ==
284			    ip->ip_dst.s_addr)
285				goto ours;
286			if (ip->ip_dst.s_addr == ia->ia_netbroadcast.s_addr)
287				goto ours;
288			/*
289			 * Look for all-0's host part (old broadcast addr),
290			 * either for subnet or net.
291			 */
292			t = ntohl(ip->ip_dst.s_addr);
293			if (t == ia->ia_subnet)
294				goto ours;
295			if (t == ia->ia_net)
296				goto ours;
297		}
298	}
299	if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
300		struct in_multi *inm;
301		if (ip_mrouter) {
302			/*
303			 * If we are acting as a multicast router, all
304			 * incoming multicast packets are passed to the
305			 * kernel-level multicast forwarding function.
306			 * The packet is returned (relatively) intact; if
307			 * ip_mforward() returns a non-zero value, the packet
308			 * must be discarded, else it may be accepted below.
309			 *
310			 * (The IP ident field is put in the same byte order
311			 * as expected when ip_mforward() is called from
312			 * ip_output().)
313			 */
314			ip->ip_id = htons(ip->ip_id);
315			if (ip_mforward(ip, m->m_pkthdr.rcvif, m, 0) != 0) {
316				ipstat.ips_cantforward++;
317				m_freem(m);
318				goto next;
319			}
320			ip->ip_id = ntohs(ip->ip_id);
321
322			/*
323			 * The process-level routing demon needs to receive
324			 * all multicast IGMP packets, whether or not this
325			 * host belongs to their destination groups.
326			 */
327			if (ip->ip_p == IPPROTO_IGMP)
328				goto ours;
329			ipstat.ips_forward++;
330		}
331		/*
332		 * See if we belong to the destination multicast group on the
333		 * arrival interface.
334		 */
335		IN_LOOKUP_MULTI(ip->ip_dst, m->m_pkthdr.rcvif, inm);
336		if (inm == NULL) {
337			ipstat.ips_cantforward++;
338			m_freem(m);
339			goto next;
340		}
341		goto ours;
342	}
343	if (ip->ip_dst.s_addr == (u_long)INADDR_BROADCAST)
344		goto ours;
345	if (ip->ip_dst.s_addr == INADDR_ANY)
346		goto ours;
347
348	/*
349	 * Not for us; forward if possible and desirable.
350	 */
351	if (ipforwarding == 0) {
352		ipstat.ips_cantforward++;
353		m_freem(m);
354	} else
355		ip_forward(m, 0);
356	goto next;
357
358ours:
359
360		/*
361		 * If packet came to us we count it...
362		 * This way we count all incoming packets which has
363		 * not been forwarded...
364		 * Do not convert ip_len to host byte order when
365		 * counting,ppl already made it for us before..
366		 */
367	if (ip_acct_cnt_ptr!=NULL)
368		(*ip_acct_cnt_ptr)(ip,m->m_pkthdr.rcvif,ip_acct_chain,0);
369
370	/*
371	 * If offset or IP_MF are set, must reassemble.
372	 * Otherwise, nothing need be done.
373	 * (We could look in the reassembly queue to see
374	 * if the packet was previously fragmented,
375	 * but it's not worth the time; just let them time out.)
376	 */
377	if (ip->ip_off &~ IP_DF) {
378		if (m->m_flags & M_EXT) {		/* XXX */
379			if ((m = m_pullup(m, sizeof (struct ip))) == 0) {
380				ipstat.ips_toosmall++;
381				goto next;
382			}
383			ip = mtod(m, struct ip *);
384		}
385		/*
386		 * Look for queue of fragments
387		 * of this datagram.
388		 */
389		for (fp = ipq.next; fp != &ipq; fp = fp->next)
390			if (ip->ip_id == fp->ipq_id &&
391			    ip->ip_src.s_addr == fp->ipq_src.s_addr &&
392			    ip->ip_dst.s_addr == fp->ipq_dst.s_addr &&
393			    ip->ip_p == fp->ipq_p)
394				goto found;
395		fp = 0;
396found:
397
398		/*
399		 * Adjust ip_len to not reflect header,
400		 * set ip_mff if more fragments are expected,
401		 * convert offset of this to bytes.
402		 */
403		ip->ip_len -= hlen;
404		((struct ipasfrag *)ip)->ipf_mff &= ~1;
405		if (ip->ip_off & IP_MF)
406			((struct ipasfrag *)ip)->ipf_mff |= 1;
407		ip->ip_off <<= 3;
408
409		/*
410		 * If datagram marked as having more fragments
411		 * or if this is not the first fragment,
412		 * attempt reassembly; if it succeeds, proceed.
413		 */
414		if (((struct ipasfrag *)ip)->ipf_mff & 1 || ip->ip_off) {
415			ipstat.ips_fragments++;
416			ip = ip_reass((struct ipasfrag *)ip, fp);
417			if (ip == 0)
418				goto next;
419			ipstat.ips_reassembled++;
420			m = dtom(ip);
421		} else
422			if (fp)
423				ip_freef(fp);
424	} else
425		ip->ip_len -= hlen;
426
427	/*
428	 * Switch out to protocol's input routine.
429	 */
430	ipstat.ips_delivered++;
431	(*inetsw[ip_protox[ip->ip_p]].pr_input)(m, hlen);
432	goto next;
433bad:
434	m_freem(m);
435	goto next;
436}
437
438NETISR_SET(NETISR_IP, ipintr);
439
440/*
441 * Take incoming datagram fragment and try to
442 * reassemble it into whole datagram.  If a chain for
443 * reassembly of this datagram already exists, then it
444 * is given as fp; otherwise have to make a chain.
445 */
446struct ip *
447ip_reass(ip, fp)
448	register struct ipasfrag *ip;
449	register struct ipq *fp;
450{
451	register struct mbuf *m = dtom(ip);
452	register struct ipasfrag *q;
453	struct mbuf *t;
454	int hlen = ip->ip_hl << 2;
455	int i, next;
456
457	/*
458	 * Presence of header sizes in mbufs
459	 * would confuse code below.
460	 */
461	m->m_data += hlen;
462	m->m_len -= hlen;
463
464	/*
465	 * If first fragment to arrive, create a reassembly queue.
466	 */
467	if (fp == 0) {
468		if ((t = m_get(M_DONTWAIT, MT_FTABLE)) == NULL)
469			goto dropfrag;
470		fp = mtod(t, struct ipq *);
471		insque(fp, &ipq);
472		fp->ipq_ttl = IPFRAGTTL;
473		fp->ipq_p = ip->ip_p;
474		fp->ipq_id = ip->ip_id;
475		fp->ipq_next = fp->ipq_prev = (struct ipasfrag *)fp;
476		fp->ipq_src = ((struct ip *)ip)->ip_src;
477		fp->ipq_dst = ((struct ip *)ip)->ip_dst;
478		q = (struct ipasfrag *)fp;
479		goto insert;
480	}
481
482	/*
483	 * Find a segment which begins after this one does.
484	 */
485	for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = q->ipf_next)
486		if (q->ip_off > ip->ip_off)
487			break;
488
489	/*
490	 * If there is a preceding segment, it may provide some of
491	 * our data already.  If so, drop the data from the incoming
492	 * segment.  If it provides all of our data, drop us.
493	 */
494	if (q->ipf_prev != (struct ipasfrag *)fp) {
495		i = q->ipf_prev->ip_off + q->ipf_prev->ip_len - ip->ip_off;
496		if (i > 0) {
497			if (i >= ip->ip_len)
498				goto dropfrag;
499			m_adj(dtom(ip), i);
500			ip->ip_off += i;
501			ip->ip_len -= i;
502		}
503	}
504
505	/*
506	 * While we overlap succeeding segments trim them or,
507	 * if they are completely covered, dequeue them.
508	 */
509	while (q != (struct ipasfrag *)fp && ip->ip_off + ip->ip_len > q->ip_off) {
510		i = (ip->ip_off + ip->ip_len) - q->ip_off;
511		if (i < q->ip_len) {
512			q->ip_len -= i;
513			q->ip_off += i;
514			m_adj(dtom(q), i);
515			break;
516		}
517		q = q->ipf_next;
518		m_freem(dtom(q->ipf_prev));
519		ip_deq(q->ipf_prev);
520	}
521
522insert:
523	/*
524	 * Stick new segment in its place;
525	 * check for complete reassembly.
526	 */
527	ip_enq(ip, q->ipf_prev);
528	next = 0;
529	for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = q->ipf_next) {
530		if (q->ip_off != next)
531			return (0);
532		next += q->ip_len;
533	}
534	if (q->ipf_prev->ipf_mff & 1)
535		return (0);
536
537	/*
538	 * Reassembly is complete; concatenate fragments.
539	 */
540	q = fp->ipq_next;
541	m = dtom(q);
542	t = m->m_next;
543	m->m_next = 0;
544	m_cat(m, t);
545	q = q->ipf_next;
546	while (q != (struct ipasfrag *)fp) {
547		t = dtom(q);
548		q = q->ipf_next;
549		m_cat(m, t);
550	}
551
552	/*
553	 * Create header for new ip packet by
554	 * modifying header of first packet;
555	 * dequeue and discard fragment reassembly header.
556	 * Make header visible.
557	 */
558	ip = fp->ipq_next;
559	ip->ip_len = next;
560	ip->ipf_mff &= ~1;
561	((struct ip *)ip)->ip_src = fp->ipq_src;
562	((struct ip *)ip)->ip_dst = fp->ipq_dst;
563	remque(fp);
564	(void) m_free(dtom(fp));
565	m = dtom(ip);
566	m->m_len += (ip->ip_hl << 2);
567	m->m_data -= (ip->ip_hl << 2);
568	/* some debugging cruft by sklower, below, will go away soon */
569	if (m->m_flags & M_PKTHDR) { /* XXX this should be done elsewhere */
570		register int plen = 0;
571		for (t = m; m; m = m->m_next)
572			plen += m->m_len;
573		t->m_pkthdr.len = plen;
574	}
575	return ((struct ip *)ip);
576
577dropfrag:
578	ipstat.ips_fragdropped++;
579	m_freem(m);
580	return (0);
581}
582
583/*
584 * Free a fragment reassembly header and all
585 * associated datagrams.
586 */
587void
588ip_freef(fp)
589	struct ipq *fp;
590{
591	register struct ipasfrag *q, *p;
592
593	for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = p) {
594		p = q->ipf_next;
595		ip_deq(q);
596		m_freem(dtom(q));
597	}
598	remque(fp);
599	(void) m_free(dtom(fp));
600}
601
602/*
603 * Put an ip fragment on a reassembly chain.
604 * Like insque, but pointers in middle of structure.
605 */
606void
607ip_enq(p, prev)
608	register struct ipasfrag *p, *prev;
609{
610
611	p->ipf_prev = prev;
612	p->ipf_next = prev->ipf_next;
613	prev->ipf_next->ipf_prev = p;
614	prev->ipf_next = p;
615}
616
617/*
618 * To ip_enq as remque is to insque.
619 */
620void
621ip_deq(p)
622	register struct ipasfrag *p;
623{
624
625	p->ipf_prev->ipf_next = p->ipf_next;
626	p->ipf_next->ipf_prev = p->ipf_prev;
627}
628
629/*
630 * IP timer processing;
631 * if a timer expires on a reassembly
632 * queue, discard it.
633 */
634void
635ip_slowtimo()
636{
637	register struct ipq *fp;
638	int s = splnet();
639
640	fp = ipq.next;
641	if (fp == 0) {
642		splx(s);
643		return;
644	}
645	while (fp != &ipq) {
646		--fp->ipq_ttl;
647		fp = fp->next;
648		if (fp->prev->ipq_ttl == 0) {
649			ipstat.ips_fragtimeout++;
650			ip_freef(fp->prev);
651		}
652	}
653	splx(s);
654}
655
656/*
657 * Drain off all datagram fragments.
658 */
659void
660ip_drain()
661{
662
663	while (ipq.next != &ipq) {
664		ipstat.ips_fragdropped++;
665		ip_freef(ipq.next);
666	}
667}
668
669/*
670 * Do option processing on a datagram,
671 * possibly discarding it if bad options are encountered,
672 * or forwarding it if source-routed.
673 * Returns 1 if packet has been forwarded/freed,
674 * 0 if the packet should be processed further.
675 */
676int
677ip_dooptions(m)
678	struct mbuf *m;
679{
680	register struct ip *ip = mtod(m, struct ip *);
681	register u_char *cp;
682	register struct ip_timestamp *ipt;
683	register struct in_ifaddr *ia;
684	int opt, optlen, cnt, off, code, type = ICMP_PARAMPROB, forward = 0;
685	struct in_addr *sin, dst;
686	n_time ntime;
687
688	dst = ip->ip_dst;
689	cp = (u_char *)(ip + 1);
690	cnt = (ip->ip_hl << 2) - sizeof (struct ip);
691	for (; cnt > 0; cnt -= optlen, cp += optlen) {
692		opt = cp[IPOPT_OPTVAL];
693		if (opt == IPOPT_EOL)
694			break;
695		if (opt == IPOPT_NOP)
696			optlen = 1;
697		else {
698			optlen = cp[IPOPT_OLEN];
699			if (optlen <= 0 || optlen > cnt) {
700				code = &cp[IPOPT_OLEN] - (u_char *)ip;
701				goto bad;
702			}
703		}
704		switch (opt) {
705
706		default:
707			break;
708
709		/*
710		 * Source routing with record.
711		 * Find interface with current destination address.
712		 * If none on this machine then drop if strictly routed,
713		 * or do nothing if loosely routed.
714		 * Record interface address and bring up next address
715		 * component.  If strictly routed make sure next
716		 * address is on directly accessible net.
717		 */
718		case IPOPT_LSRR:
719		case IPOPT_SSRR:
720			if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) {
721				code = &cp[IPOPT_OFFSET] - (u_char *)ip;
722				goto bad;
723			}
724			ipaddr.sin_addr = ip->ip_dst;
725			ia = (struct in_ifaddr *)
726				ifa_ifwithaddr((struct sockaddr *)&ipaddr);
727			if (ia == 0) {
728				if (opt == IPOPT_SSRR) {
729					type = ICMP_UNREACH;
730					code = ICMP_UNREACH_SRCFAIL;
731					goto bad;
732				}
733				/*
734				 * Loose routing, and not at next destination
735				 * yet; nothing to do except forward.
736				 */
737				break;
738			}
739			off--;			/* 0 origin */
740			if (off > optlen - sizeof(struct in_addr)) {
741				/*
742				 * End of source route.  Should be for us.
743				 */
744				save_rte(cp, ip->ip_src);
745				break;
746			}
747
748			if (!ip_dosourceroute) {
749				char buf[4*sizeof "123"];
750				strcpy(buf, inet_ntoa(ip->ip_dst));
751
752				log(LOG_WARNING,
753				    "attempted source route from %s to %s\n",
754				    inet_ntoa(ip->ip_src), buf);
755				type = ICMP_UNREACH;
756				code = ICMP_UNREACH_SRCFAIL;
757				goto bad;
758			}
759
760			/*
761			 * locate outgoing interface
762			 */
763			(void)memcpy(&ipaddr.sin_addr, cp + off,
764			    sizeof(ipaddr.sin_addr));
765
766			if (opt == IPOPT_SSRR) {
767#define	INA	struct in_ifaddr *
768#define	SA	struct sockaddr *
769			    if ((ia = (INA)ifa_ifwithdstaddr((SA)&ipaddr)) == 0)
770				ia = (INA)ifa_ifwithnet((SA)&ipaddr);
771			} else
772				ia = ip_rtaddr(ipaddr.sin_addr);
773			if (ia == 0) {
774				type = ICMP_UNREACH;
775				code = ICMP_UNREACH_SRCFAIL;
776				goto bad;
777			}
778			ip->ip_dst = ipaddr.sin_addr;
779			(void)memcpy(cp + off, &(IA_SIN(ia)->sin_addr),
780			    sizeof(struct in_addr));
781			cp[IPOPT_OFFSET] += sizeof(struct in_addr);
782			/*
783			 * Let ip_intr's mcast routing check handle mcast pkts
784			 */
785			forward = !IN_MULTICAST(ntohl(ip->ip_dst.s_addr));
786			break;
787
788		case IPOPT_RR:
789			if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) {
790				code = &cp[IPOPT_OFFSET] - (u_char *)ip;
791				goto bad;
792			}
793			/*
794			 * If no space remains, ignore.
795			 */
796			off--;			/* 0 origin */
797			if (off > optlen - sizeof(struct in_addr))
798				break;
799			(void)memcpy(&ipaddr.sin_addr, &ip->ip_dst,
800			    sizeof(ipaddr.sin_addr));
801			/*
802			 * locate outgoing interface; if we're the destination,
803			 * use the incoming interface (should be same).
804			 */
805			if ((ia = (INA)ifa_ifwithaddr((SA)&ipaddr)) == 0 &&
806			    (ia = ip_rtaddr(ipaddr.sin_addr)) == 0) {
807				type = ICMP_UNREACH;
808				code = ICMP_UNREACH_HOST;
809				goto bad;
810			}
811			(void)memcpy(cp + off, &(IA_SIN(ia)->sin_addr),
812			    sizeof(struct in_addr));
813			cp[IPOPT_OFFSET] += sizeof(struct in_addr);
814			break;
815
816		case IPOPT_TS:
817			code = cp - (u_char *)ip;
818			ipt = (struct ip_timestamp *)cp;
819			if (ipt->ipt_len < 5)
820				goto bad;
821			if (ipt->ipt_ptr > ipt->ipt_len - sizeof (long)) {
822				if (++ipt->ipt_oflw == 0)
823					goto bad;
824				break;
825			}
826			sin = (struct in_addr *)(cp + ipt->ipt_ptr - 1);
827			switch (ipt->ipt_flg) {
828
829			case IPOPT_TS_TSONLY:
830				break;
831
832			case IPOPT_TS_TSANDADDR:
833				if (ipt->ipt_ptr + sizeof(n_time) +
834				    sizeof(struct in_addr) > ipt->ipt_len)
835					goto bad;
836				ipaddr.sin_addr = dst;
837				ia = (INA)ifaof_ifpforaddr((SA)&ipaddr,
838							    m->m_pkthdr.rcvif);
839				if (ia == 0)
840					continue;
841				(void)memcpy(sin, &IA_SIN(ia)->sin_addr,
842				    sizeof(struct in_addr));
843				ipt->ipt_ptr += sizeof(struct in_addr);
844				break;
845
846			case IPOPT_TS_PRESPEC:
847				if (ipt->ipt_ptr + sizeof(n_time) +
848				    sizeof(struct in_addr) > ipt->ipt_len)
849					goto bad;
850				(void)memcpy(&ipaddr.sin_addr, sin,
851				    sizeof(struct in_addr));
852				if (ifa_ifwithaddr((SA)&ipaddr) == 0)
853					continue;
854				ipt->ipt_ptr += sizeof(struct in_addr);
855				break;
856
857			default:
858				goto bad;
859			}
860			ntime = iptime();
861			(void)memcpy(cp + ipt->ipt_ptr - 1, &ntime,
862			    sizeof(n_time));
863			ipt->ipt_ptr += sizeof(n_time);
864		}
865	}
866	if (forward) {
867		ip_forward(m, 1);
868		return (1);
869	}
870	return (0);
871bad:
872	ip->ip_len -= ip->ip_hl << 2;   /* XXX icmp_error adds in hdr length */
873	icmp_error(m, type, code, 0, 0);
874	ipstat.ips_badoptions++;
875	return (1);
876}
877
878/*
879 * Given address of next destination (final or next hop),
880 * return internet address info of interface to be used to get there.
881 */
882struct in_ifaddr *
883ip_rtaddr(dst)
884	 struct in_addr dst;
885{
886	register struct sockaddr_in *sin;
887
888	sin = (struct sockaddr_in *) &ipforward_rt.ro_dst;
889
890	if (ipforward_rt.ro_rt == 0 || dst.s_addr != sin->sin_addr.s_addr) {
891		if (ipforward_rt.ro_rt) {
892			RTFREE(ipforward_rt.ro_rt);
893			ipforward_rt.ro_rt = 0;
894		}
895		sin->sin_family = AF_INET;
896		sin->sin_len = sizeof(*sin);
897		sin->sin_addr = dst;
898
899		rtalloc_ign(&ipforward_rt, RTF_PRCLONING);
900	}
901	if (ipforward_rt.ro_rt == 0)
902		return ((struct in_ifaddr *)0);
903	return ((struct in_ifaddr *) ipforward_rt.ro_rt->rt_ifa);
904}
905
906/*
907 * Save incoming source route for use in replies,
908 * to be picked up later by ip_srcroute if the receiver is interested.
909 */
910void
911save_rte(option, dst)
912	u_char *option;
913	struct in_addr dst;
914{
915	unsigned olen;
916
917	olen = option[IPOPT_OLEN];
918#ifdef DIAGNOSTIC
919	if (ipprintfs)
920		printf("save_rte: olen %d\n", olen);
921#endif
922	if (olen > sizeof(ip_srcrt) - (1 + sizeof(dst)))
923		return;
924	(void)memcpy(ip_srcrt.srcopt, option, olen);
925	ip_nhops = (olen - IPOPT_OFFSET - 1) / sizeof(struct in_addr);
926	ip_srcrt.dst = dst;
927}
928
929/*
930 * Retrieve incoming source route for use in replies,
931 * in the same form used by setsockopt.
932 * The first hop is placed before the options, will be removed later.
933 */
934struct mbuf *
935ip_srcroute()
936{
937	register struct in_addr *p, *q;
938	register struct mbuf *m;
939
940	if (ip_nhops == 0)
941		return ((struct mbuf *)0);
942	m = m_get(M_DONTWAIT, MT_SOOPTS);
943	if (m == 0)
944		return ((struct mbuf *)0);
945
946#define OPTSIZ	(sizeof(ip_srcrt.nop) + sizeof(ip_srcrt.srcopt))
947
948	/* length is (nhops+1)*sizeof(addr) + sizeof(nop + srcrt header) */
949	m->m_len = ip_nhops * sizeof(struct in_addr) + sizeof(struct in_addr) +
950	    OPTSIZ;
951#ifdef DIAGNOSTIC
952	if (ipprintfs)
953		printf("ip_srcroute: nhops %d mlen %d", ip_nhops, m->m_len);
954#endif
955
956	/*
957	 * First save first hop for return route
958	 */
959	p = &ip_srcrt.route[ip_nhops - 1];
960	*(mtod(m, struct in_addr *)) = *p--;
961#ifdef DIAGNOSTIC
962	if (ipprintfs)
963		printf(" hops %lx", ntohl(mtod(m, struct in_addr *)->s_addr));
964#endif
965
966	/*
967	 * Copy option fields and padding (nop) to mbuf.
968	 */
969	ip_srcrt.nop = IPOPT_NOP;
970	ip_srcrt.srcopt[IPOPT_OFFSET] = IPOPT_MINOFF;
971	(void)memcpy(mtod(m, caddr_t) + sizeof(struct in_addr),
972	    &ip_srcrt.nop, OPTSIZ);
973	q = (struct in_addr *)(mtod(m, caddr_t) +
974	    sizeof(struct in_addr) + OPTSIZ);
975#undef OPTSIZ
976	/*
977	 * Record return path as an IP source route,
978	 * reversing the path (pointers are now aligned).
979	 */
980	while (p >= ip_srcrt.route) {
981#ifdef DIAGNOSTIC
982		if (ipprintfs)
983			printf(" %lx", ntohl(q->s_addr));
984#endif
985		*q++ = *p--;
986	}
987	/*
988	 * Last hop goes to final destination.
989	 */
990	*q = ip_srcrt.dst;
991#ifdef DIAGNOSTIC
992	if (ipprintfs)
993		printf(" %lx\n", ntohl(q->s_addr));
994#endif
995	return (m);
996}
997
998/*
999 * Strip out IP options, at higher
1000 * level protocol in the kernel.
1001 * Second argument is buffer to which options
1002 * will be moved, and return value is their length.
1003 * XXX should be deleted; last arg currently ignored.
1004 */
1005void
1006ip_stripoptions(m, mopt)
1007	register struct mbuf *m;
1008	struct mbuf *mopt;
1009{
1010	register int i;
1011	struct ip *ip = mtod(m, struct ip *);
1012	register caddr_t opts;
1013	int olen;
1014
1015	olen = (ip->ip_hl<<2) - sizeof (struct ip);
1016	opts = (caddr_t)(ip + 1);
1017	i = m->m_len - (sizeof (struct ip) + olen);
1018	bcopy(opts + olen, opts, (unsigned)i);
1019	m->m_len -= olen;
1020	if (m->m_flags & M_PKTHDR)
1021		m->m_pkthdr.len -= olen;
1022	ip->ip_hl = sizeof(struct ip) >> 2;
1023}
1024
1025u_char inetctlerrmap[PRC_NCMDS] = {
1026	0,		0,		0,		0,
1027	0,		EMSGSIZE,	EHOSTDOWN,	EHOSTUNREACH,
1028	EHOSTUNREACH,	EHOSTUNREACH,	ECONNREFUSED,	ECONNREFUSED,
1029	EMSGSIZE,	EHOSTUNREACH,	0,		0,
1030	0,		0,		0,		0,
1031	ENOPROTOOPT
1032};
1033
1034/*
1035 * Forward a packet.  If some error occurs return the sender
1036 * an icmp packet.  Note we can't always generate a meaningful
1037 * icmp message because icmp doesn't have a large enough repertoire
1038 * of codes and types.
1039 *
1040 * If not forwarding, just drop the packet.  This could be confusing
1041 * if ipforwarding was zero but some routing protocol was advancing
1042 * us as a gateway to somewhere.  However, we must let the routing
1043 * protocol deal with that.
1044 *
1045 * The srcrt parameter indicates whether the packet is being forwarded
1046 * via a source route.
1047 */
1048void
1049ip_forward(m, srcrt)
1050	struct mbuf *m;
1051	int srcrt;
1052{
1053	register struct ip *ip = mtod(m, struct ip *);
1054	register struct sockaddr_in *sin;
1055	register struct rtentry *rt;
1056	int error, type = 0, code = 0;
1057	struct mbuf *mcopy;
1058	n_long dest;
1059	struct ifnet *destifp;
1060
1061	dest = 0;
1062#ifdef DIAGNOSTIC
1063	if (ipprintfs)
1064		printf("forward: src %lx dst %lx ttl %x\n",
1065			ip->ip_src.s_addr, ip->ip_dst.s_addr, ip->ip_ttl);
1066#endif
1067
1068
1069	if (m->m_flags & M_BCAST || in_canforward(ip->ip_dst) == 0) {
1070		ipstat.ips_cantforward++;
1071		m_freem(m);
1072		return;
1073	}
1074	HTONS(ip->ip_id);
1075	if (ip->ip_ttl <= IPTTLDEC) {
1076		icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, dest, 0);
1077		return;
1078	}
1079	ip->ip_ttl -= IPTTLDEC;
1080
1081	sin = (struct sockaddr_in *)&ipforward_rt.ro_dst;
1082	if ((rt = ipforward_rt.ro_rt) == 0 ||
1083	    ip->ip_dst.s_addr != sin->sin_addr.s_addr) {
1084		if (ipforward_rt.ro_rt) {
1085			RTFREE(ipforward_rt.ro_rt);
1086			ipforward_rt.ro_rt = 0;
1087		}
1088		sin->sin_family = AF_INET;
1089		sin->sin_len = sizeof(*sin);
1090		sin->sin_addr = ip->ip_dst;
1091
1092		rtalloc_ign(&ipforward_rt, RTF_PRCLONING);
1093		if (ipforward_rt.ro_rt == 0) {
1094			icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, dest, 0);
1095			return;
1096		}
1097		rt = ipforward_rt.ro_rt;
1098	}
1099
1100	/*
1101	 * Save at most 64 bytes of the packet in case
1102	 * we need to generate an ICMP message to the src.
1103	 */
1104	mcopy = m_copy(m, 0, imin((int)ip->ip_len, 64));
1105
1106	/*
1107	 * If forwarding packet using same interface that it came in on,
1108	 * perhaps should send a redirect to sender to shortcut a hop.
1109	 * Only send redirect if source is sending directly to us,
1110	 * and if packet was not source routed (or has any options).
1111	 * Also, don't send redirect if forwarding using a default route
1112	 * or a route modified by a redirect.
1113	 */
1114#define	satosin(sa)	((struct sockaddr_in *)(sa))
1115	if (rt->rt_ifp == m->m_pkthdr.rcvif &&
1116	    (rt->rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) == 0 &&
1117	    satosin(rt_key(rt))->sin_addr.s_addr != 0 &&
1118	    ipsendredirects && !srcrt) {
1119#define	RTA(rt)	((struct in_ifaddr *)(rt->rt_ifa))
1120		u_long src = ntohl(ip->ip_src.s_addr);
1121
1122		if (RTA(rt) &&
1123		    (src & RTA(rt)->ia_subnetmask) == RTA(rt)->ia_subnet) {
1124		    if (rt->rt_flags & RTF_GATEWAY)
1125			dest = satosin(rt->rt_gateway)->sin_addr.s_addr;
1126		    else
1127			dest = ip->ip_dst.s_addr;
1128		    /* Router requirements says to only send host redirects */
1129		    type = ICMP_REDIRECT;
1130		    code = ICMP_REDIRECT_HOST;
1131#ifdef DIAGNOSTIC
1132		    if (ipprintfs)
1133		        printf("redirect (%d) to %lx\n", code, (u_long)dest);
1134#endif
1135		}
1136	}
1137
1138	error = ip_output(m, (struct mbuf *)0, &ipforward_rt, IP_FORWARDING
1139#ifdef DIRECTED_BROADCAST
1140			    | IP_ALLOWBROADCAST
1141#endif
1142						, 0);
1143	if (error)
1144		ipstat.ips_cantforward++;
1145	else {
1146		ipstat.ips_forward++;
1147		if (type)
1148			ipstat.ips_redirectsent++;
1149		else {
1150			if (mcopy)
1151				m_freem(mcopy);
1152			return;
1153		}
1154	}
1155	if (mcopy == NULL)
1156		return;
1157	destifp = NULL;
1158
1159	switch (error) {
1160
1161	case 0:				/* forwarded, but need redirect */
1162		/* type, code set above */
1163		break;
1164
1165	case ENETUNREACH:		/* shouldn't happen, checked above */
1166	case EHOSTUNREACH:
1167	case ENETDOWN:
1168	case EHOSTDOWN:
1169	default:
1170		type = ICMP_UNREACH;
1171		code = ICMP_UNREACH_HOST;
1172		break;
1173
1174	case EMSGSIZE:
1175		type = ICMP_UNREACH;
1176		code = ICMP_UNREACH_NEEDFRAG;
1177		if (ipforward_rt.ro_rt)
1178			destifp = ipforward_rt.ro_rt->rt_ifp;
1179		ipstat.ips_cantfrag++;
1180		break;
1181
1182	case ENOBUFS:
1183		type = ICMP_SOURCEQUENCH;
1184		code = 0;
1185		break;
1186	}
1187	icmp_error(mcopy, type, code, dest, destifp);
1188}
1189
1190int
1191ip_sysctl(name, namelen, oldp, oldlenp, newp, newlen)
1192	int *name;
1193	u_int namelen;
1194	void *oldp;
1195	size_t *oldlenp;
1196	void *newp;
1197	size_t newlen;
1198{
1199	/* All sysctl names at this level are terminal. */
1200	if (namelen != 1)
1201		return (ENOTDIR);
1202
1203	switch (name[0]) {
1204	case IPCTL_FORWARDING:
1205		return (sysctl_int(oldp, oldlenp, newp, newlen, &ipforwarding));
1206	case IPCTL_SENDREDIRECTS:
1207		return (sysctl_int(oldp, oldlenp, newp, newlen,
1208			&ipsendredirects));
1209	case IPCTL_DEFTTL:
1210		return (sysctl_int(oldp, oldlenp, newp, newlen, &ip_defttl));
1211	case IPCTL_SOURCEROUTE:
1212		return (sysctl_int(oldp, oldlenp, newp, newlen,
1213				   &ip_dosourceroute));
1214#ifdef notyet
1215	case IPCTL_DEFMTU:
1216		return (sysctl_int(oldp, oldlenp, newp, newlen, &ip_mtu));
1217#endif
1218	case IPCTL_RTEXPIRE:
1219		return (sysctl_int(oldp, oldlenp, newp, newlen,
1220				   &rtq_reallyold));
1221	case IPCTL_RTMINEXPIRE:
1222		return (sysctl_int(oldp, oldlenp, newp, newlen,
1223				   &rtq_minreallyold));
1224	case IPCTL_RTMAXCACHE:
1225		return (sysctl_int(oldp, oldlenp, newp, newlen,
1226				   &rtq_toomany));
1227	default:
1228		return (EOPNOTSUPP);
1229	}
1230	/* NOTREACHED */
1231}
1232
1233int
1234ip_rsvp_init(struct socket *so)
1235{
1236	if (so->so_type != SOCK_RAW ||
1237	    so->so_proto->pr_protocol != IPPROTO_RSVP)
1238	  return EOPNOTSUPP;
1239
1240	if (ip_rsvpd != NULL)
1241	  return EADDRINUSE;
1242
1243	ip_rsvpd = so;
1244	/*
1245	 * This may seem silly, but we need to be sure we don't over-increment
1246	 * the RSVP counter, in case something slips up.
1247	 */
1248	if (!ip_rsvp_on) {
1249		ip_rsvp_on = 1;
1250		rsvp_on++;
1251	}
1252
1253	return 0;
1254}
1255
1256int
1257ip_rsvp_done(void)
1258{
1259	ip_rsvpd = NULL;
1260	/*
1261	 * This may seem silly, but we need to be sure we don't over-decrement
1262	 * the RSVP counter, in case something slips up.
1263	 */
1264	if (ip_rsvp_on) {
1265		ip_rsvp_on = 0;
1266		rsvp_on--;
1267	}
1268	return 0;
1269}
1270