ip_icmp.c revision 125360
1/*
2 * Copyright (c) 1982, 1986, 1988, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by the University of
16 *	California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 *	@(#)ip_icmp.c	8.2 (Berkeley) 1/4/94
34 * $FreeBSD: head/sys/netinet/ip_icmp.c 125360 2004-02-02 22:53:16Z andre $
35 */
36
37#include "opt_ipsec.h"
38#include "opt_mac.h"
39
40#include <sys/param.h>
41#include <sys/systm.h>
42#include <sys/mac.h>
43#include <sys/mbuf.h>
44#include <sys/protosw.h>
45#include <sys/socket.h>
46#include <sys/time.h>
47#include <sys/kernel.h>
48#include <sys/sysctl.h>
49
50#include <net/if.h>
51#include <net/if_types.h>
52#include <net/route.h>
53
54#include <netinet/in.h>
55#include <netinet/in_pcb.h>
56#include <netinet/in_systm.h>
57#include <netinet/in_var.h>
58#include <netinet/ip.h>
59#include <netinet/ip_icmp.h>
60#include <netinet/ip_var.h>
61#include <netinet/tcp.h>
62#include <netinet/tcp_var.h>
63#include <netinet/tcpip.h>
64#include <netinet/icmp_var.h>
65
66#ifdef IPSEC
67#include <netinet6/ipsec.h>
68#include <netkey/key.h>
69#endif
70
71#ifdef FAST_IPSEC
72#include <netipsec/ipsec.h>
73#include <netipsec/key.h>
74#define	IPSEC
75#endif
76
77#include <machine/in_cksum.h>
78
79/*
80 * ICMP routines: error generation, receive packet processing, and
81 * routines to turnaround packets back to the originator, and
82 * host table maintenance routines.
83 */
84
85static struct	icmpstat icmpstat;
86SYSCTL_STRUCT(_net_inet_icmp, ICMPCTL_STATS, stats, CTLFLAG_RW,
87	&icmpstat, icmpstat, "");
88
89static int	icmpmaskrepl = 0;
90SYSCTL_INT(_net_inet_icmp, ICMPCTL_MASKREPL, maskrepl, CTLFLAG_RW,
91	&icmpmaskrepl, 0, "Reply to ICMP Address Mask Request packets.");
92
93static u_int	icmpmaskfake = 0;
94SYSCTL_UINT(_net_inet_icmp, OID_AUTO, maskfake, CTLFLAG_RW,
95	&icmpmaskfake, 0, "Fake reply to ICMP Address Mask Request packets.");
96
97static int	drop_redirect = 0;
98SYSCTL_INT(_net_inet_icmp, OID_AUTO, drop_redirect, CTLFLAG_RW,
99	&drop_redirect, 0, "");
100
101static int	log_redirect = 0;
102SYSCTL_INT(_net_inet_icmp, OID_AUTO, log_redirect, CTLFLAG_RW,
103	&log_redirect, 0, "");
104
105static int      icmplim = 200;
106SYSCTL_INT(_net_inet_icmp, ICMPCTL_ICMPLIM, icmplim, CTLFLAG_RW,
107	&icmplim, 0, "");
108
109static int	icmplim_output = 1;
110SYSCTL_INT(_net_inet_icmp, OID_AUTO, icmplim_output, CTLFLAG_RW,
111	&icmplim_output, 0, "");
112
113static char	reply_src[IFNAMSIZ+1];
114SYSCTL_STRING(_net_inet_icmp, OID_AUTO, reply_src, CTLFLAG_RW,
115	&reply_src, IFNAMSIZ, "icmp reply source for non-local packets.");
116
117/*
118 * ICMP broadcast echo sysctl
119 */
120
121static int	icmpbmcastecho = 0;
122SYSCTL_INT(_net_inet_icmp, OID_AUTO, bmcastecho, CTLFLAG_RW,
123	&icmpbmcastecho, 0, "");
124
125
126#ifdef ICMPPRINTFS
127int	icmpprintfs = 0;
128#endif
129
130static void	icmp_reflect(struct mbuf *);
131static void	icmp_send(struct mbuf *, struct mbuf *);
132static int	ip_next_mtu(int, int);
133
134extern	struct protosw inetsw[];
135
136/*
137 * Generate an error packet of type error
138 * in response to bad packet ip.
139 */
140void
141icmp_error(n, type, code, dest, destifp)
142	struct mbuf *n;
143	int type, code;
144	n_long dest;
145	struct ifnet *destifp;
146{
147	register struct ip *oip = mtod(n, struct ip *), *nip;
148	register unsigned oiplen = oip->ip_hl << 2;
149	register struct icmp *icp;
150	register struct mbuf *m;
151	unsigned icmplen;
152
153#ifdef ICMPPRINTFS
154	if (icmpprintfs)
155		printf("icmp_error(%p, %x, %d)\n", oip, type, code);
156#endif
157	if (type != ICMP_REDIRECT)
158		icmpstat.icps_error++;
159	/*
160	 * Don't send error if not the first fragment of message.
161	 * Don't error if the old packet protocol was ICMP
162	 * error message, only known informational types.
163	 */
164	if (oip->ip_off &~ (IP_MF|IP_DF))
165		goto freeit;
166	if (oip->ip_p == IPPROTO_ICMP && type != ICMP_REDIRECT &&
167	  n->m_len >= oiplen + ICMP_MINLEN &&
168	  !ICMP_INFOTYPE(((struct icmp *)((caddr_t)oip + oiplen))->icmp_type)) {
169		icmpstat.icps_oldicmp++;
170		goto freeit;
171	}
172	/* Don't send error in response to a multicast or broadcast packet */
173	if (n->m_flags & (M_BCAST|M_MCAST))
174		goto freeit;
175	/*
176	 * First, formulate icmp message
177	 */
178	m = m_gethdr(M_DONTWAIT, MT_HEADER);
179	if (m == NULL)
180		goto freeit;
181#ifdef MAC
182	mac_create_mbuf_netlayer(n, m);
183#endif
184	icmplen = min(oiplen + 8, oip->ip_len);
185	if (icmplen < sizeof(struct ip))
186		panic("icmp_error: bad length");
187	m->m_len = icmplen + ICMP_MINLEN;
188	MH_ALIGN(m, m->m_len);
189	icp = mtod(m, struct icmp *);
190	if ((u_int)type > ICMP_MAXTYPE)
191		panic("icmp_error");
192	icmpstat.icps_outhist[type]++;
193	icp->icmp_type = type;
194	if (type == ICMP_REDIRECT)
195		icp->icmp_gwaddr.s_addr = dest;
196	else {
197		icp->icmp_void = 0;
198		/*
199		 * The following assignments assume an overlay with the
200		 * zeroed icmp_void field.
201		 */
202		if (type == ICMP_PARAMPROB) {
203			icp->icmp_pptr = code;
204			code = 0;
205		} else if (type == ICMP_UNREACH &&
206			code == ICMP_UNREACH_NEEDFRAG && destifp) {
207			icp->icmp_nextmtu = htons(destifp->if_mtu);
208		}
209	}
210
211	icp->icmp_code = code;
212	m_copydata(n, 0, icmplen, (caddr_t)&icp->icmp_ip);
213	nip = &icp->icmp_ip;
214
215	/*
216	 * Convert fields to network representation.
217	 */
218	nip->ip_len = htons(nip->ip_len);
219	nip->ip_off = htons(nip->ip_off);
220
221	/*
222	 * Now, copy old ip header (without options)
223	 * in front of icmp message.
224	 */
225	if (m->m_data - sizeof(struct ip) < m->m_pktdat)
226		panic("icmp len");
227	m->m_data -= sizeof(struct ip);
228	m->m_len += sizeof(struct ip);
229	m->m_pkthdr.len = m->m_len;
230	m->m_pkthdr.rcvif = n->m_pkthdr.rcvif;
231	nip = mtod(m, struct ip *);
232	bcopy((caddr_t)oip, (caddr_t)nip, sizeof(struct ip));
233	nip->ip_len = m->m_len;
234	nip->ip_v = IPVERSION;
235	nip->ip_hl = 5;
236	nip->ip_p = IPPROTO_ICMP;
237	nip->ip_tos = 0;
238	icmp_reflect(m);
239
240freeit:
241	m_freem(n);
242}
243
244/*
245 * Process a received ICMP message.
246 */
247void
248icmp_input(m, off)
249	struct mbuf *m;
250	int off;
251{
252	struct icmp *icp;
253	struct in_ifaddr *ia;
254	struct ip *ip = mtod(m, struct ip *);
255	struct sockaddr_in icmpsrc, icmpdst, icmpgw;
256	int hlen = off;
257	int icmplen = ip->ip_len;
258	int i, code;
259	void (*ctlfunc)(int, struct sockaddr *, void *);
260
261	/*
262	 * Locate icmp structure in mbuf, and check
263	 * that not corrupted and of at least minimum length.
264	 */
265#ifdef ICMPPRINTFS
266	if (icmpprintfs) {
267		char buf[4 * sizeof "123"];
268		strcpy(buf, inet_ntoa(ip->ip_src));
269		printf("icmp_input from %s to %s, len %d\n",
270		       buf, inet_ntoa(ip->ip_dst), icmplen);
271	}
272#endif
273	if (icmplen < ICMP_MINLEN) {
274		icmpstat.icps_tooshort++;
275		goto freeit;
276	}
277	i = hlen + min(icmplen, ICMP_ADVLENMIN);
278	if (m->m_len < i && (m = m_pullup(m, i)) == 0)  {
279		icmpstat.icps_tooshort++;
280		return;
281	}
282	ip = mtod(m, struct ip *);
283	m->m_len -= hlen;
284	m->m_data += hlen;
285	icp = mtod(m, struct icmp *);
286	if (in_cksum(m, icmplen)) {
287		icmpstat.icps_checksum++;
288		goto freeit;
289	}
290	m->m_len += hlen;
291	m->m_data -= hlen;
292
293	if (m->m_pkthdr.rcvif && m->m_pkthdr.rcvif->if_type == IFT_FAITH) {
294		/*
295		 * Deliver very specific ICMP type only.
296		 */
297		switch (icp->icmp_type) {
298		case ICMP_UNREACH:
299		case ICMP_TIMXCEED:
300			break;
301		default:
302			goto freeit;
303		}
304	}
305
306#ifdef ICMPPRINTFS
307	if (icmpprintfs)
308		printf("icmp_input, type %d code %d\n", icp->icmp_type,
309		    icp->icmp_code);
310#endif
311
312	/*
313	 * Message type specific processing.
314	 */
315	if (icp->icmp_type > ICMP_MAXTYPE)
316		goto raw;
317
318	/* Initialize */
319	bzero(&icmpsrc, sizeof(icmpsrc));
320	icmpsrc.sin_len = sizeof(struct sockaddr_in);
321	icmpsrc.sin_family = AF_INET;
322	bzero(&icmpdst, sizeof(icmpdst));
323	icmpdst.sin_len = sizeof(struct sockaddr_in);
324	icmpdst.sin_family = AF_INET;
325	bzero(&icmpgw, sizeof(icmpgw));
326	icmpgw.sin_len = sizeof(struct sockaddr_in);
327	icmpgw.sin_family = AF_INET;
328
329	icmpstat.icps_inhist[icp->icmp_type]++;
330	code = icp->icmp_code;
331	switch (icp->icmp_type) {
332
333	case ICMP_UNREACH:
334		switch (code) {
335			case ICMP_UNREACH_NET:
336			case ICMP_UNREACH_HOST:
337			case ICMP_UNREACH_SRCFAIL:
338			case ICMP_UNREACH_NET_UNKNOWN:
339			case ICMP_UNREACH_HOST_UNKNOWN:
340			case ICMP_UNREACH_ISOLATED:
341			case ICMP_UNREACH_TOSNET:
342			case ICMP_UNREACH_TOSHOST:
343			case ICMP_UNREACH_HOST_PRECEDENCE:
344			case ICMP_UNREACH_PRECEDENCE_CUTOFF:
345				code = PRC_UNREACH_NET;
346				break;
347
348			case ICMP_UNREACH_NEEDFRAG:
349				code = PRC_MSGSIZE;
350				break;
351
352			/*
353			 * RFC 1122, Sections 3.2.2.1 and 4.2.3.9.
354			 * Treat subcodes 2,3 as immediate RST
355			 */
356			case ICMP_UNREACH_PROTOCOL:
357			case ICMP_UNREACH_PORT:
358				code = PRC_UNREACH_PORT;
359				break;
360
361			case ICMP_UNREACH_NET_PROHIB:
362			case ICMP_UNREACH_HOST_PROHIB:
363			case ICMP_UNREACH_FILTER_PROHIB:
364				code = PRC_UNREACH_ADMIN_PROHIB;
365				break;
366
367			default:
368				goto badcode;
369		}
370		goto deliver;
371
372	case ICMP_TIMXCEED:
373		if (code > 1)
374			goto badcode;
375		code += PRC_TIMXCEED_INTRANS;
376		goto deliver;
377
378	case ICMP_PARAMPROB:
379		if (code > 1)
380			goto badcode;
381		code = PRC_PARAMPROB;
382		goto deliver;
383
384	case ICMP_SOURCEQUENCH:
385		if (code)
386			goto badcode;
387		code = PRC_QUENCH;
388	deliver:
389		/*
390		 * Problem with datagram; advise higher level routines.
391		 */
392		if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) ||
393		    icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) {
394			icmpstat.icps_badlen++;
395			goto freeit;
396		}
397		icp->icmp_ip.ip_len = ntohs(icp->icmp_ip.ip_len);
398		/* Discard ICMP's in response to multicast packets */
399		if (IN_MULTICAST(ntohl(icp->icmp_ip.ip_dst.s_addr)))
400			goto badcode;
401#ifdef ICMPPRINTFS
402		if (icmpprintfs)
403			printf("deliver to protocol %d\n", icp->icmp_ip.ip_p);
404#endif
405		icmpsrc.sin_addr = icp->icmp_ip.ip_dst;
406
407		/*
408		 * MTU discovery:
409		 * If we got a needfrag and there is a host route to the
410		 * original destination, and the MTU is not locked, then
411		 * set the MTU in the route to the suggested new value
412		 * (if given) and then notify as usual.  The ULPs will
413		 * notice that the MTU has changed and adapt accordingly.
414		 * If no new MTU was suggested, then we guess a new one
415		 * less than the current value.  If the new MTU is
416		 * unreasonably small (defined by sysctl tcp_minmss), then
417		 * we don't update the MTU value.
418		 *
419		 * XXX: All this should be done in tcp_mtudisc() because
420		 * the way we do it now, everyone can send us bogus ICMP
421		 * MSGSIZE packets for any destination. By doing this far
422		 * higher in the chain we have a matching tcp connection.
423		 * Thus spoofing is much harder. However there is no easy
424		 * non-hackish way to pass the new MTU up to tcp_mtudisc().
425		 * Also see next XXX regarding IPv4 AH TCP.
426		 */
427		if (code == PRC_MSGSIZE) {
428			int mtu;
429			struct in_conninfo inc;
430
431			bzero(&inc, sizeof(inc));
432			inc.inc_flags = 0; /* IPv4 */
433			inc.inc_faddr = icmpsrc.sin_addr;
434
435			mtu = ntohs(icp->icmp_nextmtu);
436			if (!mtu)
437				mtu = ip_next_mtu(mtu, 1);
438
439			if (mtu >= max(296, (tcp_minmss +
440					sizeof(struct tcpiphdr))))
441				tcp_hc_updatemtu(&inc, mtu);
442
443#ifdef DEBUG_MTUDISC
444			printf("MTU for %s reduced to %d\n",
445				inet_ntoa(icmpsrc.sin_addr), mtu);
446#endif
447		}
448
449		/*
450		 * XXX if the packet contains [IPv4 AH TCP], we can't make a
451		 * notification to TCP layer.
452		 */
453		ctlfunc = inetsw[ip_protox[icp->icmp_ip.ip_p]].pr_ctlinput;
454		if (ctlfunc)
455			(*ctlfunc)(code, (struct sockaddr *)&icmpsrc,
456				   (void *)&icp->icmp_ip);
457		break;
458
459	badcode:
460		icmpstat.icps_badcode++;
461		break;
462
463	case ICMP_ECHO:
464		if (!icmpbmcastecho
465		    && (m->m_flags & (M_MCAST | M_BCAST)) != 0) {
466			icmpstat.icps_bmcastecho++;
467			break;
468		}
469		icp->icmp_type = ICMP_ECHOREPLY;
470		if (badport_bandlim(BANDLIM_ICMP_ECHO) < 0)
471			goto freeit;
472		else
473			goto reflect;
474
475	case ICMP_TSTAMP:
476		if (!icmpbmcastecho
477		    && (m->m_flags & (M_MCAST | M_BCAST)) != 0) {
478			icmpstat.icps_bmcasttstamp++;
479			break;
480		}
481		if (icmplen < ICMP_TSLEN) {
482			icmpstat.icps_badlen++;
483			break;
484		}
485		icp->icmp_type = ICMP_TSTAMPREPLY;
486		icp->icmp_rtime = iptime();
487		icp->icmp_ttime = icp->icmp_rtime;	/* bogus, do later! */
488		if (badport_bandlim(BANDLIM_ICMP_TSTAMP) < 0)
489			goto freeit;
490		else
491			goto reflect;
492
493	case ICMP_MASKREQ:
494		if (icmpmaskrepl == 0)
495			break;
496		/*
497		 * We are not able to respond with all ones broadcast
498		 * unless we receive it over a point-to-point interface.
499		 */
500		if (icmplen < ICMP_MASKLEN)
501			break;
502		switch (ip->ip_dst.s_addr) {
503
504		case INADDR_BROADCAST:
505		case INADDR_ANY:
506			icmpdst.sin_addr = ip->ip_src;
507			break;
508
509		default:
510			icmpdst.sin_addr = ip->ip_dst;
511		}
512		ia = (struct in_ifaddr *)ifaof_ifpforaddr(
513			    (struct sockaddr *)&icmpdst, m->m_pkthdr.rcvif);
514		if (ia == 0)
515			break;
516		if (ia->ia_ifp == 0)
517			break;
518		icp->icmp_type = ICMP_MASKREPLY;
519		if (icmpmaskfake == 0)
520			icp->icmp_mask = ia->ia_sockmask.sin_addr.s_addr;
521		else
522			icp->icmp_mask = icmpmaskfake;
523		if (ip->ip_src.s_addr == 0) {
524			if (ia->ia_ifp->if_flags & IFF_BROADCAST)
525			    ip->ip_src = satosin(&ia->ia_broadaddr)->sin_addr;
526			else if (ia->ia_ifp->if_flags & IFF_POINTOPOINT)
527			    ip->ip_src = satosin(&ia->ia_dstaddr)->sin_addr;
528		}
529reflect:
530		ip->ip_len += hlen;	/* since ip_input deducts this */
531		icmpstat.icps_reflect++;
532		icmpstat.icps_outhist[icp->icmp_type]++;
533		icmp_reflect(m);
534		return;
535
536	case ICMP_REDIRECT:
537		if (log_redirect) {
538			u_long src, dst, gw;
539
540			src = ntohl(ip->ip_src.s_addr);
541			dst = ntohl(icp->icmp_ip.ip_dst.s_addr);
542			gw = ntohl(icp->icmp_gwaddr.s_addr);
543			printf("icmp redirect from %d.%d.%d.%d: "
544			       "%d.%d.%d.%d => %d.%d.%d.%d\n",
545			       (int)(src >> 24), (int)((src >> 16) & 0xff),
546			       (int)((src >> 8) & 0xff), (int)(src & 0xff),
547			       (int)(dst >> 24), (int)((dst >> 16) & 0xff),
548			       (int)((dst >> 8) & 0xff), (int)(dst & 0xff),
549			       (int)(gw >> 24), (int)((gw >> 16) & 0xff),
550			       (int)((gw >> 8) & 0xff), (int)(gw & 0xff));
551		}
552		/*
553		 * RFC1812 says we must ignore ICMP redirects if we
554		 * are acting as router.
555		 */
556		if (drop_redirect || ipforwarding)
557			break;
558		if (code > 3)
559			goto badcode;
560		if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) ||
561		    icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) {
562			icmpstat.icps_badlen++;
563			break;
564		}
565		/*
566		 * Short circuit routing redirects to force
567		 * immediate change in the kernel's routing
568		 * tables.  The message is also handed to anyone
569		 * listening on a raw socket (e.g. the routing
570		 * daemon for use in updating its tables).
571		 */
572		icmpgw.sin_addr = ip->ip_src;
573		icmpdst.sin_addr = icp->icmp_gwaddr;
574#ifdef	ICMPPRINTFS
575		if (icmpprintfs) {
576			char buf[4 * sizeof "123"];
577			strcpy(buf, inet_ntoa(icp->icmp_ip.ip_dst));
578
579			printf("redirect dst %s to %s\n",
580			       buf, inet_ntoa(icp->icmp_gwaddr));
581		}
582#endif
583		icmpsrc.sin_addr = icp->icmp_ip.ip_dst;
584		rtredirect((struct sockaddr *)&icmpsrc,
585		  (struct sockaddr *)&icmpdst,
586		  (struct sockaddr *)0, RTF_GATEWAY | RTF_HOST,
587		  (struct sockaddr *)&icmpgw);
588		pfctlinput(PRC_REDIRECT_HOST, (struct sockaddr *)&icmpsrc);
589#ifdef IPSEC
590		key_sa_routechange((struct sockaddr *)&icmpsrc);
591#endif
592		break;
593
594	/*
595	 * No kernel processing for the following;
596	 * just fall through to send to raw listener.
597	 */
598	case ICMP_ECHOREPLY:
599	case ICMP_ROUTERADVERT:
600	case ICMP_ROUTERSOLICIT:
601	case ICMP_TSTAMPREPLY:
602	case ICMP_IREQREPLY:
603	case ICMP_MASKREPLY:
604	default:
605		break;
606	}
607
608raw:
609	rip_input(m, off);
610	return;
611
612freeit:
613	m_freem(m);
614}
615
616/*
617 * Reflect the ip packet back to the source
618 */
619static void
620icmp_reflect(m)
621	struct mbuf *m;
622{
623	struct ip *ip = mtod(m, struct ip *);
624	struct ifaddr *ifa;
625	struct ifnet *ifn;
626	struct in_ifaddr *ia;
627	struct in_addr t;
628	struct mbuf *opts = 0;
629	int optlen = (ip->ip_hl << 2) - sizeof(struct ip);
630
631	if (!in_canforward(ip->ip_src) &&
632	    ((ntohl(ip->ip_src.s_addr) & IN_CLASSA_NET) !=
633	     (IN_LOOPBACKNET << IN_CLASSA_NSHIFT))) {
634		m_freem(m);	/* Bad return address */
635		icmpstat.icps_badaddr++;
636		goto done;	/* Ip_output() will check for broadcast */
637	}
638	t = ip->ip_dst;
639	ip->ip_dst = ip->ip_src;
640
641	/*
642	 * Source selection for ICMP replies:
643	 *
644	 * If the incoming packet was addressed directly to one of our
645	 * own addresses, use dst as the src for the reply.
646	 */
647	LIST_FOREACH(ia, INADDR_HASH(t.s_addr), ia_hash)
648		if (t.s_addr == IA_SIN(ia)->sin_addr.s_addr)
649			goto match;
650	/*
651	 * If the incoming packet was addressed to one of our broadcast
652	 * addresses, use the first non-broadcast address which corresponds
653	 * to the incoming interface.
654	 */
655	if (m->m_pkthdr.rcvif != NULL &&
656	    m->m_pkthdr.rcvif->if_flags & IFF_BROADCAST) {
657		TAILQ_FOREACH(ifa, &m->m_pkthdr.rcvif->if_addrhead, ifa_link) {
658			if (ifa->ifa_addr->sa_family != AF_INET)
659				continue;
660			ia = ifatoia(ifa);
661			if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr ==
662			    t.s_addr)
663				goto match;
664		}
665	}
666	/*
667	 * If the incoming packet was not addressed directly to us, use
668	 * designated interface for icmp replies specified by sysctl
669	 * net.inet.icmp.reply_src (default not set). Otherwise continue
670	 * with normal source selection.
671	 */
672	if (reply_src[0] != '\0' && (ifn = ifunit(reply_src))) {
673		TAILQ_FOREACH(ifa, &ifn->if_addrhead, ifa_link) {
674			if (ifa->ifa_addr->sa_family != AF_INET)
675				continue;
676			ia = ifatoia(ifa);
677			goto match;
678		}
679	}
680	/*
681	 * If the packet was transiting through us, use the address of
682	 * the interface that is the closest to the packet source.
683	 * When we don't have a route back to the packet source, stop here
684	 * and drop the packet.
685	 */
686	ia = ip_rtaddr(ip->ip_dst);
687	if (ia == NULL) {
688		m_freem(m);
689		icmpstat.icps_noroute++;
690		goto done;
691	}
692match:
693#ifdef MAC
694	mac_reflect_mbuf_icmp(m);
695#endif
696	t = IA_SIN(ia)->sin_addr;
697	ip->ip_src = t;
698	ip->ip_ttl = ip_defttl;
699
700	if (optlen > 0) {
701		register u_char *cp;
702		int opt, cnt;
703		u_int len;
704
705		/*
706		 * Retrieve any source routing from the incoming packet;
707		 * add on any record-route or timestamp options.
708		 */
709		cp = (u_char *) (ip + 1);
710		if ((opts = ip_srcroute()) == 0 &&
711		    (opts = m_gethdr(M_DONTWAIT, MT_HEADER))) {
712			opts->m_len = sizeof(struct in_addr);
713			mtod(opts, struct in_addr *)->s_addr = 0;
714		}
715		if (opts) {
716#ifdef ICMPPRINTFS
717		    if (icmpprintfs)
718			    printf("icmp_reflect optlen %d rt %d => ",
719				optlen, opts->m_len);
720#endif
721		    for (cnt = optlen; cnt > 0; cnt -= len, cp += len) {
722			    opt = cp[IPOPT_OPTVAL];
723			    if (opt == IPOPT_EOL)
724				    break;
725			    if (opt == IPOPT_NOP)
726				    len = 1;
727			    else {
728				    if (cnt < IPOPT_OLEN + sizeof(*cp))
729					    break;
730				    len = cp[IPOPT_OLEN];
731				    if (len < IPOPT_OLEN + sizeof(*cp) ||
732				        len > cnt)
733					    break;
734			    }
735			    /*
736			     * Should check for overflow, but it "can't happen"
737			     */
738			    if (opt == IPOPT_RR || opt == IPOPT_TS ||
739				opt == IPOPT_SECURITY) {
740				    bcopy((caddr_t)cp,
741					mtod(opts, caddr_t) + opts->m_len, len);
742				    opts->m_len += len;
743			    }
744		    }
745		    /* Terminate & pad, if necessary */
746		    cnt = opts->m_len % 4;
747		    if (cnt) {
748			    for (; cnt < 4; cnt++) {
749				    *(mtod(opts, caddr_t) + opts->m_len) =
750					IPOPT_EOL;
751				    opts->m_len++;
752			    }
753		    }
754#ifdef ICMPPRINTFS
755		    if (icmpprintfs)
756			    printf("%d\n", opts->m_len);
757#endif
758		}
759		/*
760		 * Now strip out original options by copying rest of first
761		 * mbuf's data back, and adjust the IP length.
762		 */
763		ip->ip_len -= optlen;
764		ip->ip_v = IPVERSION;
765		ip->ip_hl = 5;
766		m->m_len -= optlen;
767		if (m->m_flags & M_PKTHDR)
768			m->m_pkthdr.len -= optlen;
769		optlen += sizeof(struct ip);
770		bcopy((caddr_t)ip + optlen, (caddr_t)(ip + 1),
771			 (unsigned)(m->m_len - sizeof(struct ip)));
772	}
773	m_tag_delete_nonpersistent(m);
774	m->m_flags &= ~(M_BCAST|M_MCAST);
775	icmp_send(m, opts);
776done:
777	if (opts)
778		(void)m_free(opts);
779}
780
781/*
782 * Send an icmp packet back to the ip level,
783 * after supplying a checksum.
784 */
785static void
786icmp_send(m, opts)
787	register struct mbuf *m;
788	struct mbuf *opts;
789{
790	register struct ip *ip = mtod(m, struct ip *);
791	register int hlen;
792	register struct icmp *icp;
793
794	hlen = ip->ip_hl << 2;
795	m->m_data += hlen;
796	m->m_len -= hlen;
797	icp = mtod(m, struct icmp *);
798	icp->icmp_cksum = 0;
799	icp->icmp_cksum = in_cksum(m, ip->ip_len - hlen);
800	m->m_data -= hlen;
801	m->m_len += hlen;
802	m->m_pkthdr.rcvif = (struct ifnet *)0;
803#ifdef ICMPPRINTFS
804	if (icmpprintfs) {
805		char buf[4 * sizeof "123"];
806		strcpy(buf, inet_ntoa(ip->ip_dst));
807		printf("icmp_send dst %s src %s\n",
808		       buf, inet_ntoa(ip->ip_src));
809	}
810#endif
811	(void) ip_output(m, opts, NULL, 0, NULL, NULL);
812}
813
814n_time
815iptime()
816{
817	struct timeval atv;
818	u_long t;
819
820	getmicrotime(&atv);
821	t = (atv.tv_sec % (24*60*60)) * 1000 + atv.tv_usec / 1000;
822	return (htonl(t));
823}
824
825/*
826 * Return the next larger or smaller MTU plateau (table from RFC 1191)
827 * given current value MTU.  If DIR is less than zero, a larger plateau
828 * is returned; otherwise, a smaller value is returned.
829 */
830static int
831ip_next_mtu(mtu, dir)
832	int mtu;
833	int dir;
834{
835	static int mtutab[] = {
836		65535, 32000, 17914, 8166, 4352, 2002, 1492, 1006, 508, 296,
837		68, 0
838	};
839	int i;
840
841	for (i = 0; i < (sizeof mtutab) / (sizeof mtutab[0]); i++) {
842		if (mtu >= mtutab[i])
843			break;
844	}
845
846	if (dir < 0) {
847		if (i == 0) {
848			return 0;
849		} else {
850			return mtutab[i - 1];
851		}
852	} else {
853		if (mtutab[i] == 0) {
854			return 0;
855		} else if(mtu > mtutab[i]) {
856			return mtutab[i];
857		} else {
858			return mtutab[i + 1];
859		}
860	}
861}
862
863
864/*
865 * badport_bandlim() - check for ICMP bandwidth limit
866 *
867 *	Return 0 if it is ok to send an ICMP error response, -1 if we have
868 *	hit our bandwidth limit and it is not ok.
869 *
870 *	If icmplim is <= 0, the feature is disabled and 0 is returned.
871 *
872 *	For now we separate the TCP and UDP subsystems w/ different 'which'
873 *	values.  We may eventually remove this separation (and simplify the
874 *	code further).
875 *
876 *	Note that the printing of the error message is delayed so we can
877 *	properly print the icmp error rate that the system was trying to do
878 *	(i.e. 22000/100 pps, etc...).  This can cause long delays in printing
879 *	the 'final' error, but it doesn't make sense to solve the printing
880 *	delay with more complex code.
881 */
882
883int
884badport_bandlim(int which)
885{
886#define	N(a)	(sizeof (a) / sizeof (a[0]))
887	static struct rate {
888		const char	*type;
889		struct timeval	lasttime;
890		int		curpps;;
891	} rates[BANDLIM_MAX+1] = {
892		{ "icmp unreach response" },
893		{ "icmp ping response" },
894		{ "icmp tstamp response" },
895		{ "closed port RST response" },
896		{ "open port RST response" }
897	};
898
899	/*
900	 * Return ok status if feature disabled or argument out of range.
901	 */
902	if (icmplim > 0 && (u_int) which < N(rates)) {
903		struct rate *r = &rates[which];
904		int opps = r->curpps;
905
906		if (!ppsratecheck(&r->lasttime, &r->curpps, icmplim))
907			return -1;	/* discard packet */
908		/*
909		 * If we've dropped below the threshold after having
910		 * rate-limited traffic print the message.  This preserves
911		 * the previous behaviour at the expense of added complexity.
912		 */
913		if (icmplim_output && opps > icmplim)
914			printf("Limiting %s from %d to %d packets/sec\n",
915				r->type, opps, icmplim);
916	}
917	return 0;			/* okay to send packet */
918#undef N
919}
920