ip_input.c revision 1.1
1/*
2 * Copyright (c) 1982, 1986, 1988 Regents of the University of California.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by the University of
16 *	California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 *	@(#)ip_input.c	7.19 (Berkeley) 5/25/91
34 */
35
36#include "param.h"
37#include "systm.h"
38#include "malloc.h"
39#include "mbuf.h"
40#include "domain.h"
41#include "protosw.h"
42#include "socket.h"
43#include "errno.h"
44#include "time.h"
45#include "kernel.h"
46
47#include "../net/if.h"
48#include "../net/route.h"
49
50#include "in.h"
51#include "in_systm.h"
52#include "ip.h"
53#include "in_pcb.h"
54#include "in_var.h"
55#include "ip_var.h"
56#include "ip_icmp.h"
57
58#ifndef	IPFORWARDING
59#ifdef GATEWAY
60#define	IPFORWARDING	1	/* forward IP packets not for us */
61#else /* GATEWAY */
62#define	IPFORWARDING	0	/* don't forward IP packets not for us */
63#endif /* GATEWAY */
64#endif /* IPFORWARDING */
65#ifndef	IPSENDREDIRECTS
66#define	IPSENDREDIRECTS	1
67#endif
68int	ipforwarding = IPFORWARDING;
69int	ipsendredirects = IPSENDREDIRECTS;
70#ifdef DIAGNOSTIC
71int	ipprintfs = 0;
72#endif
73
74extern	struct domain inetdomain;
75extern	struct protosw inetsw[];
76u_char	ip_protox[IPPROTO_MAX];
77int	ipqmaxlen = IFQ_MAXLEN;
78struct	in_ifaddr *in_ifaddr;			/* first inet address */
79
80/*
81 * We need to save the IP options in case a protocol wants to respond
82 * to an incoming packet over the same route if the packet got here
83 * using IP source routing.  This allows connection establishment and
84 * maintenance when the remote end is on a network that is not known
85 * to us.
86 */
87int	ip_nhops = 0;
88static	struct ip_srcrt {
89	struct	in_addr dst;			/* final destination */
90	char	nop;				/* one NOP to align */
91	char	srcopt[IPOPT_OFFSET + 1];	/* OPTVAL, OLEN and OFFSET */
92	struct	in_addr route[MAX_IPOPTLEN/sizeof(struct in_addr)];
93} ip_srcrt;
94
95#ifdef GATEWAY
96extern	int if_index;
97u_long	*ip_ifmatrix;
98#endif
99
100/*
101 * IP initialization: fill in IP protocol switch table.
102 * All protocols not implemented in kernel go to raw IP protocol handler.
103 */
104ip_init()
105{
106	register struct protosw *pr;
107	register int i;
108
109	pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
110	if (pr == 0)
111		panic("ip_init");
112	for (i = 0; i < IPPROTO_MAX; i++)
113		ip_protox[i] = pr - inetsw;
114	for (pr = inetdomain.dom_protosw;
115	    pr < inetdomain.dom_protoswNPROTOSW; pr++)
116		if (pr->pr_domain->dom_family == PF_INET &&
117		    pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW)
118			ip_protox[pr->pr_protocol] = pr - inetsw;
119	ipq.next = ipq.prev = &ipq;
120	ip_id = time.tv_sec & 0xffff;
121	ipintrq.ifq_maxlen = ipqmaxlen;
122#ifdef GATEWAY
123	i = (if_index + 1) * (if_index + 1) * sizeof (u_long);
124	if ((ip_ifmatrix = (u_long *) malloc(i, M_RTABLE, M_WAITOK)) == 0)
125		panic("no memory for ip_ifmatrix");
126#endif
127}
128
129struct	ip *ip_reass();
130struct	sockaddr_in ipaddr = { sizeof(ipaddr), AF_INET };
131struct	route ipforward_rt;
132
133/*
134 * Ip input routine.  Checksum and byte swap header.  If fragmented
135 * try to reassemble.  Process options.  Pass to next level.
136 */
137ipintr()
138{
139	register struct ip *ip;
140	register struct mbuf *m;
141	register struct ipq *fp;
142	register struct in_ifaddr *ia;
143	int hlen, s;
144
145next:
146	/*
147	 * Get next datagram off input queue and get IP header
148	 * in first mbuf.
149	 */
150	s = splimp();
151	IF_DEQUEUE(&ipintrq, m);
152	splx(s);
153	if (m == 0)
154		return;
155#ifdef	DIAGNOSTIC
156	if ((m->m_flags & M_PKTHDR) == 0)
157		panic("ipintr no HDR");
158#endif
159	/*
160	 * If no IP addresses have been set yet but the interfaces
161	 * are receiving, can't do anything with incoming packets yet.
162	 */
163	if (in_ifaddr == NULL)
164		goto bad;
165	ipstat.ips_total++;
166	if (m->m_len < sizeof (struct ip) &&
167	    (m = m_pullup(m, sizeof (struct ip))) == 0) {
168		ipstat.ips_toosmall++;
169		goto next;
170	}
171	ip = mtod(m, struct ip *);
172	hlen = ip->ip_hl << 2;
173	if (hlen < sizeof(struct ip)) {	/* minimum header length */
174		ipstat.ips_badhlen++;
175		goto bad;
176	}
177	if (hlen > m->m_len) {
178		if ((m = m_pullup(m, hlen)) == 0) {
179			ipstat.ips_badhlen++;
180			goto next;
181		}
182		ip = mtod(m, struct ip *);
183	}
184	if (ip->ip_sum = in_cksum(m, hlen)) {
185		ipstat.ips_badsum++;
186		goto bad;
187	}
188
189	/*
190	 * Convert fields to host representation.
191	 */
192	NTOHS(ip->ip_len);
193	if (ip->ip_len < hlen) {
194		ipstat.ips_badlen++;
195		goto bad;
196	}
197	NTOHS(ip->ip_id);
198	NTOHS(ip->ip_off);
199
200	/*
201	 * Check that the amount of data in the buffers
202	 * is as at least much as the IP header would have us expect.
203	 * Trim mbufs if longer than we expect.
204	 * Drop packet if shorter than we expect.
205	 */
206	if (m->m_pkthdr.len < ip->ip_len) {
207		ipstat.ips_tooshort++;
208		goto bad;
209	}
210	if (m->m_pkthdr.len > ip->ip_len) {
211		if (m->m_len == m->m_pkthdr.len) {
212			m->m_len = ip->ip_len;
213			m->m_pkthdr.len = ip->ip_len;
214		} else
215			m_adj(m, ip->ip_len - m->m_pkthdr.len);
216	}
217
218	/*
219	 * Process options and, if not destined for us,
220	 * ship it on.  ip_dooptions returns 1 when an
221	 * error was detected (causing an icmp message
222	 * to be sent and the original packet to be freed).
223	 */
224	ip_nhops = 0;		/* for source routed packets */
225	if (hlen > sizeof (struct ip) && ip_dooptions(m))
226		goto next;
227
228	/*
229	 * Check our list of addresses, to see if the packet is for us.
230	 */
231	for (ia = in_ifaddr; ia; ia = ia->ia_next) {
232#define	satosin(sa)	((struct sockaddr_in *)(sa))
233
234		if (IA_SIN(ia)->sin_addr.s_addr == ip->ip_dst.s_addr)
235			goto ours;
236		if (
237#ifdef	DIRECTED_BROADCAST
238		    ia->ia_ifp == m->m_pkthdr.rcvif &&
239#endif
240		    (ia->ia_ifp->if_flags & IFF_BROADCAST)) {
241			u_long t;
242
243			if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr ==
244			    ip->ip_dst.s_addr)
245				goto ours;
246			if (ip->ip_dst.s_addr == ia->ia_netbroadcast.s_addr)
247				goto ours;
248			/*
249			 * Look for all-0's host part (old broadcast addr),
250			 * either for subnet or net.
251			 */
252			t = ntohl(ip->ip_dst.s_addr);
253			if (t == ia->ia_subnet)
254				goto ours;
255			if (t == ia->ia_net)
256				goto ours;
257		}
258	}
259	if (ip->ip_dst.s_addr == (u_long)INADDR_BROADCAST)
260		goto ours;
261	if (ip->ip_dst.s_addr == INADDR_ANY)
262		goto ours;
263
264	/*
265	 * Not for us; forward if possible and desirable.
266	 */
267	if (ipforwarding == 0) {
268		ipstat.ips_cantforward++;
269		m_freem(m);
270	} else
271		ip_forward(m, 0);
272	goto next;
273
274ours:
275	/*
276	 * If offset or IP_MF are set, must reassemble.
277	 * Otherwise, nothing need be done.
278	 * (We could look in the reassembly queue to see
279	 * if the packet was previously fragmented,
280	 * but it's not worth the time; just let them time out.)
281	 */
282	if (ip->ip_off &~ IP_DF) {
283		if (m->m_flags & M_EXT) {		/* XXX */
284			if ((m = m_pullup(m, sizeof (struct ip))) == 0) {
285				ipstat.ips_toosmall++;
286				goto next;
287			}
288			ip = mtod(m, struct ip *);
289		}
290		/*
291		 * Look for queue of fragments
292		 * of this datagram.
293		 */
294		for (fp = ipq.next; fp != &ipq; fp = fp->next)
295			if (ip->ip_id == fp->ipq_id &&
296			    ip->ip_src.s_addr == fp->ipq_src.s_addr &&
297			    ip->ip_dst.s_addr == fp->ipq_dst.s_addr &&
298			    ip->ip_p == fp->ipq_p)
299				goto found;
300		fp = 0;
301found:
302
303		/*
304		 * Adjust ip_len to not reflect header,
305		 * set ip_mff if more fragments are expected,
306		 * convert offset of this to bytes.
307		 */
308		ip->ip_len -= hlen;
309		((struct ipasfrag *)ip)->ipf_mff = 0;
310		if (ip->ip_off & IP_MF)
311			((struct ipasfrag *)ip)->ipf_mff = 1;
312		ip->ip_off <<= 3;
313
314		/*
315		 * If datagram marked as having more fragments
316		 * or if this is not the first fragment,
317		 * attempt reassembly; if it succeeds, proceed.
318		 */
319		if (((struct ipasfrag *)ip)->ipf_mff || ip->ip_off) {
320			ipstat.ips_fragments++;
321			ip = ip_reass((struct ipasfrag *)ip, fp);
322			if (ip == 0)
323				goto next;
324			else
325				ipstat.ips_reassembled++;
326			m = dtom(ip);
327		} else
328			if (fp)
329				ip_freef(fp);
330	} else
331		ip->ip_len -= hlen;
332
333	/*
334	 * Switch out to protocol's input routine.
335	 */
336	ipstat.ips_delivered++;
337	(*inetsw[ip_protox[ip->ip_p]].pr_input)(m, hlen);
338	goto next;
339bad:
340	m_freem(m);
341	goto next;
342}
343
344/*
345 * Take incoming datagram fragment and try to
346 * reassemble it into whole datagram.  If a chain for
347 * reassembly of this datagram already exists, then it
348 * is given as fp; otherwise have to make a chain.
349 */
350struct ip *
351ip_reass(ip, fp)
352	register struct ipasfrag *ip;
353	register struct ipq *fp;
354{
355	register struct mbuf *m = dtom(ip);
356	register struct ipasfrag *q;
357	struct mbuf *t;
358	int hlen = ip->ip_hl << 2;
359	int i, next;
360
361	/*
362	 * Presence of header sizes in mbufs
363	 * would confuse code below.
364	 */
365	m->m_data += hlen;
366	m->m_len -= hlen;
367
368	/*
369	 * If first fragment to arrive, create a reassembly queue.
370	 */
371	if (fp == 0) {
372		if ((t = m_get(M_DONTWAIT, MT_FTABLE)) == NULL)
373			goto dropfrag;
374		fp = mtod(t, struct ipq *);
375		insque(fp, &ipq);
376		fp->ipq_ttl = IPFRAGTTL;
377		fp->ipq_p = ip->ip_p;
378		fp->ipq_id = ip->ip_id;
379		fp->ipq_next = fp->ipq_prev = (struct ipasfrag *)fp;
380		fp->ipq_src = ((struct ip *)ip)->ip_src;
381		fp->ipq_dst = ((struct ip *)ip)->ip_dst;
382		q = (struct ipasfrag *)fp;
383		goto insert;
384	}
385
386	/*
387	 * Find a segment which begins after this one does.
388	 */
389	for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = q->ipf_next)
390		if (q->ip_off > ip->ip_off)
391			break;
392
393	/*
394	 * If there is a preceding segment, it may provide some of
395	 * our data already.  If so, drop the data from the incoming
396	 * segment.  If it provides all of our data, drop us.
397	 */
398	if (q->ipf_prev != (struct ipasfrag *)fp) {
399		i = q->ipf_prev->ip_off + q->ipf_prev->ip_len - ip->ip_off;
400		if (i > 0) {
401			if (i >= ip->ip_len)
402				goto dropfrag;
403			m_adj(dtom(ip), i);
404			ip->ip_off += i;
405			ip->ip_len -= i;
406		}
407	}
408
409	/*
410	 * While we overlap succeeding segments trim them or,
411	 * if they are completely covered, dequeue them.
412	 */
413	while (q != (struct ipasfrag *)fp && ip->ip_off + ip->ip_len > q->ip_off) {
414		i = (ip->ip_off + ip->ip_len) - q->ip_off;
415		if (i < q->ip_len) {
416			q->ip_len -= i;
417			q->ip_off += i;
418			m_adj(dtom(q), i);
419			break;
420		}
421		q = q->ipf_next;
422		m_freem(dtom(q->ipf_prev));
423		ip_deq(q->ipf_prev);
424	}
425
426insert:
427	/*
428	 * Stick new segment in its place;
429	 * check for complete reassembly.
430	 */
431	ip_enq(ip, q->ipf_prev);
432	next = 0;
433	for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = q->ipf_next) {
434		if (q->ip_off != next)
435			return (0);
436		next += q->ip_len;
437	}
438	if (q->ipf_prev->ipf_mff)
439		return (0);
440
441	/*
442	 * Reassembly is complete; concatenate fragments.
443	 */
444	q = fp->ipq_next;
445	m = dtom(q);
446	t = m->m_next;
447	m->m_next = 0;
448	m_cat(m, t);
449	q = q->ipf_next;
450	while (q != (struct ipasfrag *)fp) {
451		t = dtom(q);
452		q = q->ipf_next;
453		m_cat(m, t);
454	}
455
456	/*
457	 * Create header for new ip packet by
458	 * modifying header of first packet;
459	 * dequeue and discard fragment reassembly header.
460	 * Make header visible.
461	 */
462	ip = fp->ipq_next;
463	ip->ip_len = next;
464	((struct ip *)ip)->ip_src = fp->ipq_src;
465	((struct ip *)ip)->ip_dst = fp->ipq_dst;
466	remque(fp);
467	(void) m_free(dtom(fp));
468	m = dtom(ip);
469	m->m_len += (ip->ip_hl << 2);
470	m->m_data -= (ip->ip_hl << 2);
471	/* some debugging cruft by sklower, below, will go away soon */
472	if (m->m_flags & M_PKTHDR) { /* XXX this should be done elsewhere */
473		register int plen = 0;
474		for (t = m; m; m = m->m_next)
475			plen += m->m_len;
476		t->m_pkthdr.len = plen;
477	}
478	return ((struct ip *)ip);
479
480dropfrag:
481	ipstat.ips_fragdropped++;
482	m_freem(m);
483	return (0);
484}
485
486/*
487 * Free a fragment reassembly header and all
488 * associated datagrams.
489 */
490ip_freef(fp)
491	struct ipq *fp;
492{
493	register struct ipasfrag *q, *p;
494
495	for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = p) {
496		p = q->ipf_next;
497		ip_deq(q);
498		m_freem(dtom(q));
499	}
500	remque(fp);
501	(void) m_free(dtom(fp));
502}
503
504/*
505 * Put an ip fragment on a reassembly chain.
506 * Like insque, but pointers in middle of structure.
507 */
508ip_enq(p, prev)
509	register struct ipasfrag *p, *prev;
510{
511
512	p->ipf_prev = prev;
513	p->ipf_next = prev->ipf_next;
514	prev->ipf_next->ipf_prev = p;
515	prev->ipf_next = p;
516}
517
518/*
519 * To ip_enq as remque is to insque.
520 */
521ip_deq(p)
522	register struct ipasfrag *p;
523{
524
525	p->ipf_prev->ipf_next = p->ipf_next;
526	p->ipf_next->ipf_prev = p->ipf_prev;
527}
528
529/*
530 * IP timer processing;
531 * if a timer expires on a reassembly
532 * queue, discard it.
533 */
534ip_slowtimo()
535{
536	register struct ipq *fp;
537	int s = splnet();
538
539	fp = ipq.next;
540	if (fp == 0) {
541		splx(s);
542		return;
543	}
544	while (fp != &ipq) {
545		--fp->ipq_ttl;
546		fp = fp->next;
547		if (fp->prev->ipq_ttl == 0) {
548			ipstat.ips_fragtimeout++;
549			ip_freef(fp->prev);
550		}
551	}
552	splx(s);
553}
554
555/*
556 * Drain off all datagram fragments.
557 */
558ip_drain()
559{
560
561	while (ipq.next != &ipq) {
562		ipstat.ips_fragdropped++;
563		ip_freef(ipq.next);
564	}
565}
566
567extern struct in_ifaddr *ifptoia();
568struct in_ifaddr *ip_rtaddr();
569
570/*
571 * Do option processing on a datagram,
572 * possibly discarding it if bad options are encountered,
573 * or forwarding it if source-routed.
574 * Returns 1 if packet has been forwarded/freed,
575 * 0 if the packet should be processed further.
576 */
577ip_dooptions(m)
578	struct mbuf *m;
579{
580	register struct ip *ip = mtod(m, struct ip *);
581	register u_char *cp;
582	register struct ip_timestamp *ipt;
583	register struct in_ifaddr *ia;
584	int opt, optlen, cnt, off, code, type = ICMP_PARAMPROB, forward = 0;
585	struct in_addr *sin;
586	n_time ntime;
587
588	cp = (u_char *)(ip + 1);
589	cnt = (ip->ip_hl << 2) - sizeof (struct ip);
590	for (; cnt > 0; cnt -= optlen, cp += optlen) {
591		opt = cp[IPOPT_OPTVAL];
592		if (opt == IPOPT_EOL)
593			break;
594		if (opt == IPOPT_NOP)
595			optlen = 1;
596		else {
597			optlen = cp[IPOPT_OLEN];
598			if (optlen <= 0 || optlen > cnt) {
599				code = &cp[IPOPT_OLEN] - (u_char *)ip;
600				goto bad;
601			}
602		}
603		switch (opt) {
604
605		default:
606			break;
607
608		/*
609		 * Source routing with record.
610		 * Find interface with current destination address.
611		 * If none on this machine then drop if strictly routed,
612		 * or do nothing if loosely routed.
613		 * Record interface address and bring up next address
614		 * component.  If strictly routed make sure next
615		 * address is on directly accessible net.
616		 */
617		case IPOPT_LSRR:
618		case IPOPT_SSRR:
619			if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) {
620				code = &cp[IPOPT_OFFSET] - (u_char *)ip;
621				goto bad;
622			}
623			ipaddr.sin_addr = ip->ip_dst;
624			ia = (struct in_ifaddr *)
625				ifa_ifwithaddr((struct sockaddr *)&ipaddr);
626			if (ia == 0) {
627				if (opt == IPOPT_SSRR) {
628					type = ICMP_UNREACH;
629					code = ICMP_UNREACH_SRCFAIL;
630					goto bad;
631				}
632				/*
633				 * Loose routing, and not at next destination
634				 * yet; nothing to do except forward.
635				 */
636				break;
637			}
638			off--;			/* 0 origin */
639			if (off > optlen - sizeof(struct in_addr)) {
640				/*
641				 * End of source route.  Should be for us.
642				 */
643				save_rte(cp, ip->ip_src);
644				break;
645			}
646			/*
647			 * locate outgoing interface
648			 */
649			bcopy((caddr_t)(cp + off), (caddr_t)&ipaddr.sin_addr,
650			    sizeof(ipaddr.sin_addr));
651			if (opt == IPOPT_SSRR) {
652#define	INA	struct in_ifaddr *
653#define	SA	struct sockaddr *
654			    if ((ia = (INA)ifa_ifwithdstaddr((SA)&ipaddr)) == 0)
655				ia = in_iaonnetof(in_netof(ipaddr.sin_addr));
656			} else
657				ia = ip_rtaddr(ipaddr.sin_addr);
658			if (ia == 0) {
659				type = ICMP_UNREACH;
660				code = ICMP_UNREACH_SRCFAIL;
661				goto bad;
662			}
663			ip->ip_dst = ipaddr.sin_addr;
664			bcopy((caddr_t)&(IA_SIN(ia)->sin_addr),
665			    (caddr_t)(cp + off), sizeof(struct in_addr));
666			cp[IPOPT_OFFSET] += sizeof(struct in_addr);
667			forward = 1;
668			break;
669
670		case IPOPT_RR:
671			if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) {
672				code = &cp[IPOPT_OFFSET] - (u_char *)ip;
673				goto bad;
674			}
675			/*
676			 * If no space remains, ignore.
677			 */
678			off--;			/* 0 origin */
679			if (off > optlen - sizeof(struct in_addr))
680				break;
681			bcopy((caddr_t)(&ip->ip_dst), (caddr_t)&ipaddr.sin_addr,
682			    sizeof(ipaddr.sin_addr));
683			/*
684			 * locate outgoing interface; if we're the destination,
685			 * use the incoming interface (should be same).
686			 */
687			if ((ia = (INA)ifa_ifwithaddr((SA)&ipaddr)) == 0 &&
688			    (ia = ip_rtaddr(ipaddr.sin_addr)) == 0) {
689				type = ICMP_UNREACH;
690				code = ICMP_UNREACH_HOST;
691				goto bad;
692			}
693			bcopy((caddr_t)&(IA_SIN(ia)->sin_addr),
694			    (caddr_t)(cp + off), sizeof(struct in_addr));
695			cp[IPOPT_OFFSET] += sizeof(struct in_addr);
696			break;
697
698		case IPOPT_TS:
699			code = cp - (u_char *)ip;
700			ipt = (struct ip_timestamp *)cp;
701			if (ipt->ipt_len < 5)
702				goto bad;
703			if (ipt->ipt_ptr > ipt->ipt_len - sizeof (long)) {
704				if (++ipt->ipt_oflw == 0)
705					goto bad;
706				break;
707			}
708			sin = (struct in_addr *)(cp + ipt->ipt_ptr - 1);
709			switch (ipt->ipt_flg) {
710
711			case IPOPT_TS_TSONLY:
712				break;
713
714			case IPOPT_TS_TSANDADDR:
715				if (ipt->ipt_ptr + sizeof(n_time) +
716				    sizeof(struct in_addr) > ipt->ipt_len)
717					goto bad;
718				ia = ifptoia(m->m_pkthdr.rcvif);
719				bcopy((caddr_t)&IA_SIN(ia)->sin_addr,
720				    (caddr_t)sin, sizeof(struct in_addr));
721				ipt->ipt_ptr += sizeof(struct in_addr);
722				break;
723
724			case IPOPT_TS_PRESPEC:
725				if (ipt->ipt_ptr + sizeof(n_time) +
726				    sizeof(struct in_addr) > ipt->ipt_len)
727					goto bad;
728				bcopy((caddr_t)sin, (caddr_t)&ipaddr.sin_addr,
729				    sizeof(struct in_addr));
730				if (ifa_ifwithaddr((SA)&ipaddr) == 0)
731					continue;
732				ipt->ipt_ptr += sizeof(struct in_addr);
733				break;
734
735			default:
736				goto bad;
737			}
738			ntime = iptime();
739			bcopy((caddr_t)&ntime, (caddr_t)cp + ipt->ipt_ptr - 1,
740			    sizeof(n_time));
741			ipt->ipt_ptr += sizeof(n_time);
742		}
743	}
744	if (forward) {
745		ip_forward(m, 1);
746		return (1);
747	} else
748		return (0);
749bad:
750	icmp_error(m, type, code);
751	return (1);
752}
753
754/*
755 * Given address of next destination (final or next hop),
756 * return internet address info of interface to be used to get there.
757 */
758struct in_ifaddr *
759ip_rtaddr(dst)
760	 struct in_addr dst;
761{
762	register struct sockaddr_in *sin;
763
764	sin = (struct sockaddr_in *) &ipforward_rt.ro_dst;
765
766	if (ipforward_rt.ro_rt == 0 || dst.s_addr != sin->sin_addr.s_addr) {
767		if (ipforward_rt.ro_rt) {
768			RTFREE(ipforward_rt.ro_rt);
769			ipforward_rt.ro_rt = 0;
770		}
771		sin->sin_family = AF_INET;
772		sin->sin_len = sizeof(*sin);
773		sin->sin_addr = dst;
774
775		rtalloc(&ipforward_rt);
776	}
777	if (ipforward_rt.ro_rt == 0)
778		return ((struct in_ifaddr *)0);
779	return ((struct in_ifaddr *) ipforward_rt.ro_rt->rt_ifa);
780}
781
782/*
783 * Save incoming source route for use in replies,
784 * to be picked up later by ip_srcroute if the receiver is interested.
785 */
786save_rte(option, dst)
787	u_char *option;
788	struct in_addr dst;
789{
790	unsigned olen;
791
792	olen = option[IPOPT_OLEN];
793#ifdef DIAGNOSTIC
794	if (ipprintfs)
795		printf("save_rte: olen %d\n", olen);
796#endif
797	if (olen > sizeof(ip_srcrt) - (1 + sizeof(dst)))
798		return;
799	bcopy((caddr_t)option, (caddr_t)ip_srcrt.srcopt, olen);
800	ip_nhops = (olen - IPOPT_OFFSET - 1) / sizeof(struct in_addr);
801	ip_srcrt.dst = dst;
802}
803
804/*
805 * Retrieve incoming source route for use in replies,
806 * in the same form used by setsockopt.
807 * The first hop is placed before the options, will be removed later.
808 */
809struct mbuf *
810ip_srcroute()
811{
812	register struct in_addr *p, *q;
813	register struct mbuf *m;
814
815	if (ip_nhops == 0)
816		return ((struct mbuf *)0);
817	m = m_get(M_DONTWAIT, MT_SOOPTS);
818	if (m == 0)
819		return ((struct mbuf *)0);
820
821#define OPTSIZ	(sizeof(ip_srcrt.nop) + sizeof(ip_srcrt.srcopt))
822
823	/* length is (nhops+1)*sizeof(addr) + sizeof(nop + srcrt header) */
824	m->m_len = ip_nhops * sizeof(struct in_addr) + sizeof(struct in_addr) +
825	    OPTSIZ;
826#ifdef DIAGNOSTIC
827	if (ipprintfs)
828		printf("ip_srcroute: nhops %d mlen %d", ip_nhops, m->m_len);
829#endif
830
831	/*
832	 * First save first hop for return route
833	 */
834	p = &ip_srcrt.route[ip_nhops - 1];
835	*(mtod(m, struct in_addr *)) = *p--;
836#ifdef DIAGNOSTIC
837	if (ipprintfs)
838		printf(" hops %lx", ntohl(mtod(m, struct in_addr *)->s_addr));
839#endif
840
841	/*
842	 * Copy option fields and padding (nop) to mbuf.
843	 */
844	ip_srcrt.nop = IPOPT_NOP;
845	ip_srcrt.srcopt[IPOPT_OFFSET] = IPOPT_MINOFF;
846	bcopy((caddr_t)&ip_srcrt.nop,
847	    mtod(m, caddr_t) + sizeof(struct in_addr), OPTSIZ);
848	q = (struct in_addr *)(mtod(m, caddr_t) +
849	    sizeof(struct in_addr) + OPTSIZ);
850#undef OPTSIZ
851	/*
852	 * Record return path as an IP source route,
853	 * reversing the path (pointers are now aligned).
854	 */
855	while (p >= ip_srcrt.route) {
856#ifdef DIAGNOSTIC
857		if (ipprintfs)
858			printf(" %lx", ntohl(q->s_addr));
859#endif
860		*q++ = *p--;
861	}
862	/*
863	 * Last hop goes to final destination.
864	 */
865	*q = ip_srcrt.dst;
866#ifdef DIAGNOSTIC
867	if (ipprintfs)
868		printf(" %lx\n", ntohl(q->s_addr));
869#endif
870	return (m);
871}
872
873/*
874 * Strip out IP options, at higher
875 * level protocol in the kernel.
876 * Second argument is buffer to which options
877 * will be moved, and return value is their length.
878 * XXX should be deleted; last arg currently ignored.
879 */
880ip_stripoptions(m, mopt)
881	register struct mbuf *m;
882	struct mbuf *mopt;
883{
884	register int i;
885	struct ip *ip = mtod(m, struct ip *);
886	register caddr_t opts;
887	int olen;
888
889	olen = (ip->ip_hl<<2) - sizeof (struct ip);
890	opts = (caddr_t)(ip + 1);
891	i = m->m_len - (sizeof (struct ip) + olen);
892	bcopy(opts  + olen, opts, (unsigned)i);
893	m->m_len -= olen;
894	if (m->m_flags & M_PKTHDR)
895		m->m_pkthdr.len -= olen;
896	ip->ip_hl = sizeof(struct ip) >> 2;
897}
898
899u_char inetctlerrmap[PRC_NCMDS] = {
900	0,		0,		0,		0,
901	0,		EMSGSIZE,	EHOSTDOWN,	EHOSTUNREACH,
902	EHOSTUNREACH,	EHOSTUNREACH,	ECONNREFUSED,	ECONNREFUSED,
903	EMSGSIZE,	EHOSTUNREACH,	0,		0,
904	0,		0,		0,		0,
905	ENOPROTOOPT
906};
907
908/*
909 * Forward a packet.  If some error occurs return the sender
910 * an icmp packet.  Note we can't always generate a meaningful
911 * icmp message because icmp doesn't have a large enough repertoire
912 * of codes and types.
913 *
914 * If not forwarding, just drop the packet.  This could be confusing
915 * if ipforwarding was zero but some routing protocol was advancing
916 * us as a gateway to somewhere.  However, we must let the routing
917 * protocol deal with that.
918 *
919 * The srcrt parameter indicates whether the packet is being forwarded
920 * via a source route.
921 */
922ip_forward(m, srcrt)
923	struct mbuf *m;
924	int srcrt;
925{
926	register struct ip *ip = mtod(m, struct ip *);
927	register struct sockaddr_in *sin;
928	register struct rtentry *rt;
929	int error, type = 0, code;
930	struct mbuf *mcopy;
931	struct in_addr dest;
932
933	dest.s_addr = 0;
934#ifdef DIAGNOSTIC
935	if (ipprintfs)
936		printf("forward: src %x dst %x ttl %x\n", ip->ip_src,
937			ip->ip_dst, ip->ip_ttl);
938#endif
939	if (m->m_flags & M_BCAST || in_canforward(ip->ip_dst) == 0) {
940		ipstat.ips_cantforward++;
941		m_freem(m);
942		return;
943	}
944	HTONS(ip->ip_id);
945	if (ip->ip_ttl <= IPTTLDEC) {
946		icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, dest);
947		return;
948	}
949	ip->ip_ttl -= IPTTLDEC;
950
951	sin = (struct sockaddr_in *)&ipforward_rt.ro_dst;
952	if ((rt = ipforward_rt.ro_rt) == 0 ||
953	    ip->ip_dst.s_addr != sin->sin_addr.s_addr) {
954		if (ipforward_rt.ro_rt) {
955			RTFREE(ipforward_rt.ro_rt);
956			ipforward_rt.ro_rt = 0;
957		}
958		sin->sin_family = AF_INET;
959		sin->sin_len = sizeof(*sin);
960		sin->sin_addr = ip->ip_dst;
961
962		rtalloc(&ipforward_rt);
963		if (ipforward_rt.ro_rt == 0) {
964			icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, dest);
965			return;
966		}
967		rt = ipforward_rt.ro_rt;
968	}
969
970	/*
971	 * Save at most 64 bytes of the packet in case
972	 * we need to generate an ICMP message to the src.
973	 */
974	mcopy = m_copy(m, 0, imin((int)ip->ip_len, 64));
975
976#ifdef GATEWAY
977	ip_ifmatrix[rt->rt_ifp->if_index +
978	     if_index * m->m_pkthdr.rcvif->if_index]++;
979#endif
980	/*
981	 * If forwarding packet using same interface that it came in on,
982	 * perhaps should send a redirect to sender to shortcut a hop.
983	 * Only send redirect if source is sending directly to us,
984	 * and if packet was not source routed (or has any options).
985	 * Also, don't send redirect if forwarding using a default route
986	 * or a route modified by a redirect.
987	 */
988#define	satosin(sa)	((struct sockaddr_in *)(sa))
989	if (rt->rt_ifp == m->m_pkthdr.rcvif &&
990	    (rt->rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) == 0 &&
991	    satosin(rt_key(rt))->sin_addr.s_addr != 0 &&
992	    ipsendredirects && !srcrt) {
993		struct in_ifaddr *ia;
994		u_long src = ntohl(ip->ip_src.s_addr);
995		u_long dst = ntohl(ip->ip_dst.s_addr);
996
997		if ((ia = ifptoia(m->m_pkthdr.rcvif)) &&
998		   (src & ia->ia_subnetmask) == ia->ia_subnet) {
999		    if (rt->rt_flags & RTF_GATEWAY)
1000			dest = satosin(rt->rt_gateway)->sin_addr;
1001		    else
1002			dest = ip->ip_dst;
1003		    /*
1004		     * If the destination is reached by a route to host,
1005		     * is on a subnet of a local net, or is directly
1006		     * on the attached net (!), use host redirect.
1007		     * (We may be the correct first hop for other subnets.)
1008		     */
1009#define	RTA(rt)	((struct in_ifaddr *)(rt->rt_ifa))
1010		    type = ICMP_REDIRECT;
1011		    if ((rt->rt_flags & RTF_HOST) ||
1012		        (rt->rt_flags & RTF_GATEWAY) == 0)
1013			    code = ICMP_REDIRECT_HOST;
1014		    else if (RTA(rt)->ia_subnetmask != RTA(rt)->ia_netmask &&
1015		        (dst & RTA(rt)->ia_netmask) ==  RTA(rt)->ia_net)
1016			    code = ICMP_REDIRECT_HOST;
1017		    else
1018			    code = ICMP_REDIRECT_NET;
1019#ifdef DIAGNOSTIC
1020		    if (ipprintfs)
1021		        printf("redirect (%d) to %x\n", code, dest.s_addr);
1022#endif
1023		}
1024	}
1025
1026	error = ip_output(m, (struct mbuf *)0, &ipforward_rt, IP_FORWARDING);
1027	if (error)
1028		ipstat.ips_cantforward++;
1029	else {
1030		ipstat.ips_forward++;
1031		if (type)
1032			ipstat.ips_redirectsent++;
1033		else {
1034			if (mcopy)
1035				m_freem(mcopy);
1036			return;
1037		}
1038	}
1039	if (mcopy == NULL)
1040		return;
1041	switch (error) {
1042
1043	case 0:				/* forwarded, but need redirect */
1044		/* type, code set above */
1045		break;
1046
1047	case ENETUNREACH:		/* shouldn't happen, checked above */
1048	case EHOSTUNREACH:
1049	case ENETDOWN:
1050	case EHOSTDOWN:
1051	default:
1052		type = ICMP_UNREACH;
1053		code = ICMP_UNREACH_HOST;
1054		break;
1055
1056	case EMSGSIZE:
1057		type = ICMP_UNREACH;
1058		code = ICMP_UNREACH_NEEDFRAG;
1059		ipstat.ips_cantfrag++;
1060		break;
1061
1062	case ENOBUFS:
1063		type = ICMP_SOURCEQUENCH;
1064		code = 0;
1065		break;
1066	}
1067	icmp_error(mcopy, type, code, dest);
1068}
1069