ip_icmp.c revision 133874
1/*
2 * Copyright (c) 1982, 1986, 1988, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 4. Neither the name of the University nor the names of its contributors
14 *    may be used to endorse or promote products derived from this software
15 *    without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 *	@(#)ip_icmp.c	8.2 (Berkeley) 1/4/94
30 * $FreeBSD: head/sys/netinet/ip_icmp.c 133874 2004-08-16 18:32:07Z rwatson $
31 */
32
33#include "opt_ipsec.h"
34#include "opt_mac.h"
35
36#include <sys/param.h>
37#include <sys/systm.h>
38#include <sys/mac.h>
39#include <sys/mbuf.h>
40#include <sys/protosw.h>
41#include <sys/socket.h>
42#include <sys/time.h>
43#include <sys/kernel.h>
44#include <sys/sysctl.h>
45
46#include <net/if.h>
47#include <net/if_types.h>
48#include <net/route.h>
49
50#include <netinet/in.h>
51#include <netinet/in_pcb.h>
52#include <netinet/in_systm.h>
53#include <netinet/in_var.h>
54#include <netinet/ip.h>
55#include <netinet/ip_icmp.h>
56#include <netinet/ip_var.h>
57#include <netinet/tcp.h>
58#include <netinet/tcp_var.h>
59#include <netinet/tcpip.h>
60#include <netinet/icmp_var.h>
61
62#ifdef IPSEC
63#include <netinet6/ipsec.h>
64#include <netkey/key.h>
65#endif
66
67#ifdef FAST_IPSEC
68#include <netipsec/ipsec.h>
69#include <netipsec/key.h>
70#define	IPSEC
71#endif
72
73#include <machine/in_cksum.h>
74
75/*
76 * ICMP routines: error generation, receive packet processing, and
77 * routines to turnaround packets back to the originator, and
78 * host table maintenance routines.
79 */
80
81struct	icmpstat icmpstat;
82SYSCTL_STRUCT(_net_inet_icmp, ICMPCTL_STATS, stats, CTLFLAG_RW,
83	&icmpstat, icmpstat, "");
84
85static int	icmpmaskrepl = 0;
86SYSCTL_INT(_net_inet_icmp, ICMPCTL_MASKREPL, maskrepl, CTLFLAG_RW,
87	&icmpmaskrepl, 0, "Reply to ICMP Address Mask Request packets.");
88
89static u_int	icmpmaskfake = 0;
90SYSCTL_UINT(_net_inet_icmp, OID_AUTO, maskfake, CTLFLAG_RW,
91	&icmpmaskfake, 0, "Fake reply to ICMP Address Mask Request packets.");
92
93static int	drop_redirect = 0;
94SYSCTL_INT(_net_inet_icmp, OID_AUTO, drop_redirect, CTLFLAG_RW,
95	&drop_redirect, 0, "");
96
97static int	log_redirect = 0;
98SYSCTL_INT(_net_inet_icmp, OID_AUTO, log_redirect, CTLFLAG_RW,
99	&log_redirect, 0, "");
100
101static int      icmplim = 200;
102SYSCTL_INT(_net_inet_icmp, ICMPCTL_ICMPLIM, icmplim, CTLFLAG_RW,
103	&icmplim, 0, "");
104
105static int	icmplim_output = 1;
106SYSCTL_INT(_net_inet_icmp, OID_AUTO, icmplim_output, CTLFLAG_RW,
107	&icmplim_output, 0, "");
108
109static char	reply_src[IFNAMSIZ];
110SYSCTL_STRING(_net_inet_icmp, OID_AUTO, reply_src, CTLFLAG_RW,
111	&reply_src, IFNAMSIZ, "icmp reply source for non-local packets.");
112
113/*
114 * ICMP broadcast echo sysctl
115 */
116
117static int	icmpbmcastecho = 0;
118SYSCTL_INT(_net_inet_icmp, OID_AUTO, bmcastecho, CTLFLAG_RW,
119	&icmpbmcastecho, 0, "");
120
121
122#ifdef ICMPPRINTFS
123int	icmpprintfs = 0;
124#endif
125
126static void	icmp_reflect(struct mbuf *);
127static void	icmp_send(struct mbuf *, struct mbuf *);
128static int	ip_next_mtu(int, int);
129
130extern	struct protosw inetsw[];
131
132/*
133 * Generate an error packet of type error
134 * in response to bad packet ip.
135 */
136void
137icmp_error(n, type, code, dest, destifp)
138	struct mbuf *n;
139	int type, code;
140	n_long dest;
141	struct ifnet *destifp;
142{
143	register struct ip *oip = mtod(n, struct ip *), *nip;
144	register unsigned oiplen = oip->ip_hl << 2;
145	register struct icmp *icp;
146	register struct mbuf *m;
147	unsigned icmplen;
148
149#ifdef ICMPPRINTFS
150	if (icmpprintfs)
151		printf("icmp_error(%p, %x, %d)\n", oip, type, code);
152#endif
153	if (type != ICMP_REDIRECT)
154		icmpstat.icps_error++;
155	/*
156	 * Don't send error if the original packet was encrypted.
157	 * Don't send error if not the first fragment of message.
158	 * Don't error if the old packet protocol was ICMP
159	 * error message, only known informational types.
160	 */
161	if (n->m_flags & M_DECRYPTED)
162		goto freeit;
163	if (oip->ip_off &~ (IP_MF|IP_DF))
164		goto freeit;
165	if (oip->ip_p == IPPROTO_ICMP && type != ICMP_REDIRECT &&
166	  n->m_len >= oiplen + ICMP_MINLEN &&
167	  !ICMP_INFOTYPE(((struct icmp *)((caddr_t)oip + oiplen))->icmp_type)) {
168		icmpstat.icps_oldicmp++;
169		goto freeit;
170	}
171	/* Don't send error in response to a multicast or broadcast packet */
172	if (n->m_flags & (M_BCAST|M_MCAST))
173		goto freeit;
174	/*
175	 * First, formulate icmp message
176	 */
177	m = m_gethdr(M_DONTWAIT, MT_HEADER);
178	if (m == NULL)
179		goto freeit;
180#ifdef MAC
181	mac_create_mbuf_netlayer(n, m);
182#endif
183	icmplen = min(oiplen + 8, oip->ip_len);
184	if (icmplen < sizeof(struct ip))
185		panic("icmp_error: bad length");
186	m->m_len = icmplen + ICMP_MINLEN;
187	MH_ALIGN(m, m->m_len);
188	icp = mtod(m, struct icmp *);
189	if ((u_int)type > ICMP_MAXTYPE)
190		panic("icmp_error");
191	icmpstat.icps_outhist[type]++;
192	icp->icmp_type = type;
193	if (type == ICMP_REDIRECT)
194		icp->icmp_gwaddr.s_addr = dest;
195	else {
196		icp->icmp_void = 0;
197		/*
198		 * The following assignments assume an overlay with the
199		 * zeroed icmp_void field.
200		 */
201		if (type == ICMP_PARAMPROB) {
202			icp->icmp_pptr = code;
203			code = 0;
204		} else if (type == ICMP_UNREACH &&
205			code == ICMP_UNREACH_NEEDFRAG && destifp) {
206			icp->icmp_nextmtu = htons(destifp->if_mtu);
207		}
208	}
209
210	icp->icmp_code = code;
211	m_copydata(n, 0, icmplen, (caddr_t)&icp->icmp_ip);
212	nip = &icp->icmp_ip;
213
214	/*
215	 * Convert fields to network representation.
216	 */
217	nip->ip_len = htons(nip->ip_len);
218	nip->ip_off = htons(nip->ip_off);
219
220	/*
221	 * Now, copy old ip header (without options)
222	 * in front of icmp message.
223	 */
224	if (m->m_data - sizeof(struct ip) < m->m_pktdat)
225		panic("icmp len");
226	/*
227	 * If the original mbuf was meant to bypass the firewall, the error
228	 * reply should bypass as well.
229	 */
230	m->m_flags |= n->m_flags & M_SKIP_FIREWALL;
231	m->m_data -= sizeof(struct ip);
232	m->m_len += sizeof(struct ip);
233	m->m_pkthdr.len = m->m_len;
234	m->m_pkthdr.rcvif = n->m_pkthdr.rcvif;
235	nip = mtod(m, struct ip *);
236	bcopy((caddr_t)oip, (caddr_t)nip, sizeof(struct ip));
237	nip->ip_len = m->m_len;
238	nip->ip_v = IPVERSION;
239	nip->ip_hl = 5;
240	nip->ip_p = IPPROTO_ICMP;
241	nip->ip_tos = 0;
242	icmp_reflect(m);
243
244freeit:
245	m_freem(n);
246}
247
248/*
249 * Process a received ICMP message.
250 */
251void
252icmp_input(m, off)
253	struct mbuf *m;
254	int off;
255{
256	struct icmp *icp;
257	struct in_ifaddr *ia;
258	struct ip *ip = mtod(m, struct ip *);
259	struct sockaddr_in icmpsrc, icmpdst, icmpgw;
260	int hlen = off;
261	int icmplen = ip->ip_len;
262	int i, code;
263	void (*ctlfunc)(int, struct sockaddr *, void *);
264
265	/*
266	 * Locate icmp structure in mbuf, and check
267	 * that not corrupted and of at least minimum length.
268	 */
269#ifdef ICMPPRINTFS
270	if (icmpprintfs) {
271		char buf[4 * sizeof "123"];
272		strcpy(buf, inet_ntoa(ip->ip_src));
273		printf("icmp_input from %s to %s, len %d\n",
274		       buf, inet_ntoa(ip->ip_dst), icmplen);
275	}
276#endif
277	if (icmplen < ICMP_MINLEN) {
278		icmpstat.icps_tooshort++;
279		goto freeit;
280	}
281	i = hlen + min(icmplen, ICMP_ADVLENMIN);
282	if (m->m_len < i && (m = m_pullup(m, i)) == 0)  {
283		icmpstat.icps_tooshort++;
284		return;
285	}
286	ip = mtod(m, struct ip *);
287	m->m_len -= hlen;
288	m->m_data += hlen;
289	icp = mtod(m, struct icmp *);
290	if (in_cksum(m, icmplen)) {
291		icmpstat.icps_checksum++;
292		goto freeit;
293	}
294	m->m_len += hlen;
295	m->m_data -= hlen;
296
297	if (m->m_pkthdr.rcvif && m->m_pkthdr.rcvif->if_type == IFT_FAITH) {
298		/*
299		 * Deliver very specific ICMP type only.
300		 */
301		switch (icp->icmp_type) {
302		case ICMP_UNREACH:
303		case ICMP_TIMXCEED:
304			break;
305		default:
306			goto freeit;
307		}
308	}
309
310#ifdef ICMPPRINTFS
311	if (icmpprintfs)
312		printf("icmp_input, type %d code %d\n", icp->icmp_type,
313		    icp->icmp_code);
314#endif
315
316	/*
317	 * Message type specific processing.
318	 */
319	if (icp->icmp_type > ICMP_MAXTYPE)
320		goto raw;
321
322	/* Initialize */
323	bzero(&icmpsrc, sizeof(icmpsrc));
324	icmpsrc.sin_len = sizeof(struct sockaddr_in);
325	icmpsrc.sin_family = AF_INET;
326	bzero(&icmpdst, sizeof(icmpdst));
327	icmpdst.sin_len = sizeof(struct sockaddr_in);
328	icmpdst.sin_family = AF_INET;
329	bzero(&icmpgw, sizeof(icmpgw));
330	icmpgw.sin_len = sizeof(struct sockaddr_in);
331	icmpgw.sin_family = AF_INET;
332
333	icmpstat.icps_inhist[icp->icmp_type]++;
334	code = icp->icmp_code;
335	switch (icp->icmp_type) {
336
337	case ICMP_UNREACH:
338		switch (code) {
339			case ICMP_UNREACH_NET:
340			case ICMP_UNREACH_HOST:
341			case ICMP_UNREACH_SRCFAIL:
342			case ICMP_UNREACH_NET_UNKNOWN:
343			case ICMP_UNREACH_HOST_UNKNOWN:
344			case ICMP_UNREACH_ISOLATED:
345			case ICMP_UNREACH_TOSNET:
346			case ICMP_UNREACH_TOSHOST:
347			case ICMP_UNREACH_HOST_PRECEDENCE:
348			case ICMP_UNREACH_PRECEDENCE_CUTOFF:
349				code = PRC_UNREACH_NET;
350				break;
351
352			case ICMP_UNREACH_NEEDFRAG:
353				code = PRC_MSGSIZE;
354				break;
355
356			/*
357			 * RFC 1122, Sections 3.2.2.1 and 4.2.3.9.
358			 * Treat subcodes 2,3 as immediate RST
359			 */
360			case ICMP_UNREACH_PROTOCOL:
361			case ICMP_UNREACH_PORT:
362				code = PRC_UNREACH_PORT;
363				break;
364
365			case ICMP_UNREACH_NET_PROHIB:
366			case ICMP_UNREACH_HOST_PROHIB:
367			case ICMP_UNREACH_FILTER_PROHIB:
368				code = PRC_UNREACH_ADMIN_PROHIB;
369				break;
370
371			default:
372				goto badcode;
373		}
374		goto deliver;
375
376	case ICMP_TIMXCEED:
377		if (code > 1)
378			goto badcode;
379		code += PRC_TIMXCEED_INTRANS;
380		goto deliver;
381
382	case ICMP_PARAMPROB:
383		if (code > 1)
384			goto badcode;
385		code = PRC_PARAMPROB;
386		goto deliver;
387
388	case ICMP_SOURCEQUENCH:
389		if (code)
390			goto badcode;
391		code = PRC_QUENCH;
392	deliver:
393		/*
394		 * Problem with datagram; advise higher level routines.
395		 */
396		if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) ||
397		    icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) {
398			icmpstat.icps_badlen++;
399			goto freeit;
400		}
401		icp->icmp_ip.ip_len = ntohs(icp->icmp_ip.ip_len);
402		/* Discard ICMP's in response to multicast packets */
403		if (IN_MULTICAST(ntohl(icp->icmp_ip.ip_dst.s_addr)))
404			goto badcode;
405#ifdef ICMPPRINTFS
406		if (icmpprintfs)
407			printf("deliver to protocol %d\n", icp->icmp_ip.ip_p);
408#endif
409		icmpsrc.sin_addr = icp->icmp_ip.ip_dst;
410
411		/*
412		 * MTU discovery:
413		 * If we got a needfrag and there is a host route to the
414		 * original destination, and the MTU is not locked, then
415		 * set the MTU in the route to the suggested new value
416		 * (if given) and then notify as usual.  The ULPs will
417		 * notice that the MTU has changed and adapt accordingly.
418		 * If no new MTU was suggested, then we guess a new one
419		 * less than the current value.  If the new MTU is
420		 * unreasonably small (defined by sysctl tcp_minmss), then
421		 * we don't update the MTU value.
422		 *
423		 * XXX: All this should be done in tcp_mtudisc() because
424		 * the way we do it now, everyone can send us bogus ICMP
425		 * MSGSIZE packets for any destination. By doing this far
426		 * higher in the chain we have a matching tcp connection.
427		 * Thus spoofing is much harder. However there is no easy
428		 * non-hackish way to pass the new MTU up to tcp_mtudisc().
429		 * Also see next XXX regarding IPv4 AH TCP.
430		 */
431		if (code == PRC_MSGSIZE) {
432			int mtu;
433			struct in_conninfo inc;
434
435			bzero(&inc, sizeof(inc));
436			inc.inc_flags = 0; /* IPv4 */
437			inc.inc_faddr = icmpsrc.sin_addr;
438
439			mtu = ntohs(icp->icmp_nextmtu);
440			if (!mtu)
441				mtu = ip_next_mtu(mtu, 1);
442
443			if (mtu >= max(296, (tcp_minmss +
444					sizeof(struct tcpiphdr))))
445				tcp_hc_updatemtu(&inc, mtu);
446
447#ifdef DEBUG_MTUDISC
448			printf("MTU for %s reduced to %d\n",
449				inet_ntoa(icmpsrc.sin_addr), mtu);
450#endif
451		}
452
453		/*
454		 * XXX if the packet contains [IPv4 AH TCP], we can't make a
455		 * notification to TCP layer.
456		 */
457		ctlfunc = inetsw[ip_protox[icp->icmp_ip.ip_p]].pr_ctlinput;
458		if (ctlfunc)
459			(*ctlfunc)(code, (struct sockaddr *)&icmpsrc,
460				   (void *)&icp->icmp_ip);
461		break;
462
463	badcode:
464		icmpstat.icps_badcode++;
465		break;
466
467	case ICMP_ECHO:
468		if (!icmpbmcastecho
469		    && (m->m_flags & (M_MCAST | M_BCAST)) != 0) {
470			icmpstat.icps_bmcastecho++;
471			break;
472		}
473		icp->icmp_type = ICMP_ECHOREPLY;
474		if (badport_bandlim(BANDLIM_ICMP_ECHO) < 0)
475			goto freeit;
476		else
477			goto reflect;
478
479	case ICMP_TSTAMP:
480		if (!icmpbmcastecho
481		    && (m->m_flags & (M_MCAST | M_BCAST)) != 0) {
482			icmpstat.icps_bmcasttstamp++;
483			break;
484		}
485		if (icmplen < ICMP_TSLEN) {
486			icmpstat.icps_badlen++;
487			break;
488		}
489		icp->icmp_type = ICMP_TSTAMPREPLY;
490		icp->icmp_rtime = iptime();
491		icp->icmp_ttime = icp->icmp_rtime;	/* bogus, do later! */
492		if (badport_bandlim(BANDLIM_ICMP_TSTAMP) < 0)
493			goto freeit;
494		else
495			goto reflect;
496
497	case ICMP_MASKREQ:
498		if (icmpmaskrepl == 0)
499			break;
500		/*
501		 * We are not able to respond with all ones broadcast
502		 * unless we receive it over a point-to-point interface.
503		 */
504		if (icmplen < ICMP_MASKLEN)
505			break;
506		switch (ip->ip_dst.s_addr) {
507
508		case INADDR_BROADCAST:
509		case INADDR_ANY:
510			icmpdst.sin_addr = ip->ip_src;
511			break;
512
513		default:
514			icmpdst.sin_addr = ip->ip_dst;
515		}
516		ia = (struct in_ifaddr *)ifaof_ifpforaddr(
517			    (struct sockaddr *)&icmpdst, m->m_pkthdr.rcvif);
518		if (ia == 0)
519			break;
520		if (ia->ia_ifp == 0)
521			break;
522		icp->icmp_type = ICMP_MASKREPLY;
523		if (icmpmaskfake == 0)
524			icp->icmp_mask = ia->ia_sockmask.sin_addr.s_addr;
525		else
526			icp->icmp_mask = icmpmaskfake;
527		if (ip->ip_src.s_addr == 0) {
528			if (ia->ia_ifp->if_flags & IFF_BROADCAST)
529			    ip->ip_src = satosin(&ia->ia_broadaddr)->sin_addr;
530			else if (ia->ia_ifp->if_flags & IFF_POINTOPOINT)
531			    ip->ip_src = satosin(&ia->ia_dstaddr)->sin_addr;
532		}
533reflect:
534		ip->ip_len += hlen;	/* since ip_input deducts this */
535		icmpstat.icps_reflect++;
536		icmpstat.icps_outhist[icp->icmp_type]++;
537		icmp_reflect(m);
538		return;
539
540	case ICMP_REDIRECT:
541		if (log_redirect) {
542			u_long src, dst, gw;
543
544			src = ntohl(ip->ip_src.s_addr);
545			dst = ntohl(icp->icmp_ip.ip_dst.s_addr);
546			gw = ntohl(icp->icmp_gwaddr.s_addr);
547			printf("icmp redirect from %d.%d.%d.%d: "
548			       "%d.%d.%d.%d => %d.%d.%d.%d\n",
549			       (int)(src >> 24), (int)((src >> 16) & 0xff),
550			       (int)((src >> 8) & 0xff), (int)(src & 0xff),
551			       (int)(dst >> 24), (int)((dst >> 16) & 0xff),
552			       (int)((dst >> 8) & 0xff), (int)(dst & 0xff),
553			       (int)(gw >> 24), (int)((gw >> 16) & 0xff),
554			       (int)((gw >> 8) & 0xff), (int)(gw & 0xff));
555		}
556		/*
557		 * RFC1812 says we must ignore ICMP redirects if we
558		 * are acting as router.
559		 */
560		if (drop_redirect || ipforwarding)
561			break;
562		if (code > 3)
563			goto badcode;
564		if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) ||
565		    icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) {
566			icmpstat.icps_badlen++;
567			break;
568		}
569		/*
570		 * Short circuit routing redirects to force
571		 * immediate change in the kernel's routing
572		 * tables.  The message is also handed to anyone
573		 * listening on a raw socket (e.g. the routing
574		 * daemon for use in updating its tables).
575		 */
576		icmpgw.sin_addr = ip->ip_src;
577		icmpdst.sin_addr = icp->icmp_gwaddr;
578#ifdef	ICMPPRINTFS
579		if (icmpprintfs) {
580			char buf[4 * sizeof "123"];
581			strcpy(buf, inet_ntoa(icp->icmp_ip.ip_dst));
582
583			printf("redirect dst %s to %s\n",
584			       buf, inet_ntoa(icp->icmp_gwaddr));
585		}
586#endif
587		icmpsrc.sin_addr = icp->icmp_ip.ip_dst;
588		rtredirect((struct sockaddr *)&icmpsrc,
589		  (struct sockaddr *)&icmpdst,
590		  (struct sockaddr *)0, RTF_GATEWAY | RTF_HOST,
591		  (struct sockaddr *)&icmpgw);
592		pfctlinput(PRC_REDIRECT_HOST, (struct sockaddr *)&icmpsrc);
593#ifdef IPSEC
594		key_sa_routechange((struct sockaddr *)&icmpsrc);
595#endif
596		break;
597
598	/*
599	 * No kernel processing for the following;
600	 * just fall through to send to raw listener.
601	 */
602	case ICMP_ECHOREPLY:
603	case ICMP_ROUTERADVERT:
604	case ICMP_ROUTERSOLICIT:
605	case ICMP_TSTAMPREPLY:
606	case ICMP_IREQREPLY:
607	case ICMP_MASKREPLY:
608	default:
609		break;
610	}
611
612raw:
613	rip_input(m, off);
614	return;
615
616freeit:
617	m_freem(m);
618}
619
620/*
621 * Reflect the ip packet back to the source
622 */
623static void
624icmp_reflect(m)
625	struct mbuf *m;
626{
627	struct ip *ip = mtod(m, struct ip *);
628	struct ifaddr *ifa;
629	struct ifnet *ifn;
630	struct in_ifaddr *ia;
631	struct in_addr t;
632	struct mbuf *opts = 0;
633	int optlen = (ip->ip_hl << 2) - sizeof(struct ip);
634
635	if (!in_canforward(ip->ip_src) &&
636	    ((ntohl(ip->ip_src.s_addr) & IN_CLASSA_NET) !=
637	     (IN_LOOPBACKNET << IN_CLASSA_NSHIFT))) {
638		m_freem(m);	/* Bad return address */
639		icmpstat.icps_badaddr++;
640		goto done;	/* Ip_output() will check for broadcast */
641	}
642	t = ip->ip_dst;
643	ip->ip_dst = ip->ip_src;
644
645	/*
646	 * Source selection for ICMP replies:
647	 *
648	 * If the incoming packet was addressed directly to one of our
649	 * own addresses, use dst as the src for the reply.
650	 */
651	LIST_FOREACH(ia, INADDR_HASH(t.s_addr), ia_hash)
652		if (t.s_addr == IA_SIN(ia)->sin_addr.s_addr)
653			goto match;
654	/*
655	 * If the incoming packet was addressed to one of our broadcast
656	 * addresses, use the first non-broadcast address which corresponds
657	 * to the incoming interface.
658	 */
659	if (m->m_pkthdr.rcvif != NULL &&
660	    m->m_pkthdr.rcvif->if_flags & IFF_BROADCAST) {
661		TAILQ_FOREACH(ifa, &m->m_pkthdr.rcvif->if_addrhead, ifa_link) {
662			if (ifa->ifa_addr->sa_family != AF_INET)
663				continue;
664			ia = ifatoia(ifa);
665			if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr ==
666			    t.s_addr)
667				goto match;
668		}
669	}
670	/*
671	 * If the incoming packet was not addressed directly to us, use
672	 * designated interface for icmp replies specified by sysctl
673	 * net.inet.icmp.reply_src (default not set). Otherwise continue
674	 * with normal source selection.
675	 */
676	if (reply_src[0] != '\0' && (ifn = ifunit(reply_src))) {
677		TAILQ_FOREACH(ifa, &ifn->if_addrhead, ifa_link) {
678			if (ifa->ifa_addr->sa_family != AF_INET)
679				continue;
680			ia = ifatoia(ifa);
681			goto match;
682		}
683	}
684	/*
685	 * If the packet was transiting through us, use the address of
686	 * the interface that is the closest to the packet source.
687	 * When we don't have a route back to the packet source, stop here
688	 * and drop the packet.
689	 */
690	ia = ip_rtaddr(ip->ip_dst);
691	if (ia == NULL) {
692		m_freem(m);
693		icmpstat.icps_noroute++;
694		goto done;
695	}
696match:
697#ifdef MAC
698	mac_reflect_mbuf_icmp(m);
699#endif
700	t = IA_SIN(ia)->sin_addr;
701	ip->ip_src = t;
702	ip->ip_ttl = ip_defttl;
703
704	if (optlen > 0) {
705		register u_char *cp;
706		int opt, cnt;
707		u_int len;
708
709		/*
710		 * Retrieve any source routing from the incoming packet;
711		 * add on any record-route or timestamp options.
712		 */
713		cp = (u_char *) (ip + 1);
714		if ((opts = ip_srcroute()) == 0 &&
715		    (opts = m_gethdr(M_DONTWAIT, MT_HEADER))) {
716			opts->m_len = sizeof(struct in_addr);
717			mtod(opts, struct in_addr *)->s_addr = 0;
718		}
719		if (opts) {
720#ifdef ICMPPRINTFS
721		    if (icmpprintfs)
722			    printf("icmp_reflect optlen %d rt %d => ",
723				optlen, opts->m_len);
724#endif
725		    for (cnt = optlen; cnt > 0; cnt -= len, cp += len) {
726			    opt = cp[IPOPT_OPTVAL];
727			    if (opt == IPOPT_EOL)
728				    break;
729			    if (opt == IPOPT_NOP)
730				    len = 1;
731			    else {
732				    if (cnt < IPOPT_OLEN + sizeof(*cp))
733					    break;
734				    len = cp[IPOPT_OLEN];
735				    if (len < IPOPT_OLEN + sizeof(*cp) ||
736				        len > cnt)
737					    break;
738			    }
739			    /*
740			     * Should check for overflow, but it "can't happen"
741			     */
742			    if (opt == IPOPT_RR || opt == IPOPT_TS ||
743				opt == IPOPT_SECURITY) {
744				    bcopy((caddr_t)cp,
745					mtod(opts, caddr_t) + opts->m_len, len);
746				    opts->m_len += len;
747			    }
748		    }
749		    /* Terminate & pad, if necessary */
750		    cnt = opts->m_len % 4;
751		    if (cnt) {
752			    for (; cnt < 4; cnt++) {
753				    *(mtod(opts, caddr_t) + opts->m_len) =
754					IPOPT_EOL;
755				    opts->m_len++;
756			    }
757		    }
758#ifdef ICMPPRINTFS
759		    if (icmpprintfs)
760			    printf("%d\n", opts->m_len);
761#endif
762		}
763		/*
764		 * Now strip out original options by copying rest of first
765		 * mbuf's data back, and adjust the IP length.
766		 */
767		ip->ip_len -= optlen;
768		ip->ip_v = IPVERSION;
769		ip->ip_hl = 5;
770		m->m_len -= optlen;
771		if (m->m_flags & M_PKTHDR)
772			m->m_pkthdr.len -= optlen;
773		optlen += sizeof(struct ip);
774		bcopy((caddr_t)ip + optlen, (caddr_t)(ip + 1),
775			 (unsigned)(m->m_len - sizeof(struct ip)));
776	}
777	m_tag_delete_nonpersistent(m);
778	m->m_flags &= ~(M_BCAST|M_MCAST);
779	icmp_send(m, opts);
780done:
781	if (opts)
782		(void)m_free(opts);
783}
784
785/*
786 * Send an icmp packet back to the ip level,
787 * after supplying a checksum.
788 */
789static void
790icmp_send(m, opts)
791	register struct mbuf *m;
792	struct mbuf *opts;
793{
794	register struct ip *ip = mtod(m, struct ip *);
795	register int hlen;
796	register struct icmp *icp;
797
798	hlen = ip->ip_hl << 2;
799	m->m_data += hlen;
800	m->m_len -= hlen;
801	icp = mtod(m, struct icmp *);
802	icp->icmp_cksum = 0;
803	icp->icmp_cksum = in_cksum(m, ip->ip_len - hlen);
804	m->m_data -= hlen;
805	m->m_len += hlen;
806	m->m_pkthdr.rcvif = (struct ifnet *)0;
807#ifdef ICMPPRINTFS
808	if (icmpprintfs) {
809		char buf[4 * sizeof "123"];
810		strcpy(buf, inet_ntoa(ip->ip_dst));
811		printf("icmp_send dst %s src %s\n",
812		       buf, inet_ntoa(ip->ip_src));
813	}
814#endif
815	(void) ip_output(m, opts, NULL, 0, NULL, NULL);
816}
817
818n_time
819iptime()
820{
821	struct timeval atv;
822	u_long t;
823
824	getmicrotime(&atv);
825	t = (atv.tv_sec % (24*60*60)) * 1000 + atv.tv_usec / 1000;
826	return (htonl(t));
827}
828
829/*
830 * Return the next larger or smaller MTU plateau (table from RFC 1191)
831 * given current value MTU.  If DIR is less than zero, a larger plateau
832 * is returned; otherwise, a smaller value is returned.
833 */
834static int
835ip_next_mtu(mtu, dir)
836	int mtu;
837	int dir;
838{
839	static int mtutab[] = {
840		65535, 32000, 17914, 8166, 4352, 2002, 1492, 1006, 508, 296,
841		68, 0
842	};
843	int i;
844
845	for (i = 0; i < (sizeof mtutab) / (sizeof mtutab[0]); i++) {
846		if (mtu >= mtutab[i])
847			break;
848	}
849
850	if (dir < 0) {
851		if (i == 0) {
852			return 0;
853		} else {
854			return mtutab[i - 1];
855		}
856	} else {
857		if (mtutab[i] == 0) {
858			return 0;
859		} else if(mtu > mtutab[i]) {
860			return mtutab[i];
861		} else {
862			return mtutab[i + 1];
863		}
864	}
865}
866
867
868/*
869 * badport_bandlim() - check for ICMP bandwidth limit
870 *
871 *	Return 0 if it is ok to send an ICMP error response, -1 if we have
872 *	hit our bandwidth limit and it is not ok.
873 *
874 *	If icmplim is <= 0, the feature is disabled and 0 is returned.
875 *
876 *	For now we separate the TCP and UDP subsystems w/ different 'which'
877 *	values.  We may eventually remove this separation (and simplify the
878 *	code further).
879 *
880 *	Note that the printing of the error message is delayed so we can
881 *	properly print the icmp error rate that the system was trying to do
882 *	(i.e. 22000/100 pps, etc...).  This can cause long delays in printing
883 *	the 'final' error, but it doesn't make sense to solve the printing
884 *	delay with more complex code.
885 */
886
887int
888badport_bandlim(int which)
889{
890#define	N(a)	(sizeof (a) / sizeof (a[0]))
891	static struct rate {
892		const char	*type;
893		struct timeval	lasttime;
894		int		curpps;
895	} rates[BANDLIM_MAX+1] = {
896		{ "icmp unreach response" },
897		{ "icmp ping response" },
898		{ "icmp tstamp response" },
899		{ "closed port RST response" },
900		{ "open port RST response" }
901	};
902
903	/*
904	 * Return ok status if feature disabled or argument out of range.
905	 */
906	if (icmplim > 0 && (u_int) which < N(rates)) {
907		struct rate *r = &rates[which];
908		int opps = r->curpps;
909
910		if (!ppsratecheck(&r->lasttime, &r->curpps, icmplim))
911			return -1;	/* discard packet */
912		/*
913		 * If we've dropped below the threshold after having
914		 * rate-limited traffic print the message.  This preserves
915		 * the previous behaviour at the expense of added complexity.
916		 */
917		if (icmplim_output && opps > icmplim)
918			printf("Limiting %s from %d to %d packets/sec\n",
919				r->type, opps, icmplim);
920	}
921	return 0;			/* okay to send packet */
922#undef N
923}
924