1/*
2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * Copyright (c) 1982, 1986, 1988, 1993
30 *	The Regents of the University of California.  All rights reserved.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 *    notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 *    notice, this list of conditions and the following disclaimer in the
39 *    documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 *    must display the following acknowledgement:
42 *	This product includes software developed by the University of
43 *	California, Berkeley and its contributors.
44 * 4. Neither the name of the University nor the names of its contributors
45 *    may be used to endorse or promote products derived from this software
46 *    without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 *	@(#)ip_icmp.c	8.2 (Berkeley) 1/4/94
61 */
62/*
63 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
64 * support for mandatory and extensible security protections.  This notice
65 * is included in support of clause 2.2 (b) of the Apple Public License,
66 * Version 2.0.
67 */
68
69#include <sys/param.h>
70#include <sys/systm.h>
71#include <sys/mbuf.h>
72#include <sys/mcache.h>
73#include <sys/protosw.h>
74#include <sys/socket.h>
75#include <sys/time.h>
76#include <sys/kernel.h>
77#include <sys/sysctl.h>
78
79#include <machine/endian.h>
80
81#include <net/if.h>
82#include <net/route.h>
83
84#define _IP_VHL
85#include <netinet/in.h>
86#include <netinet/in_systm.h>
87#include <netinet/in_var.h>
88#include <netinet/ip.h>
89#include <netinet/ip_icmp.h>
90#include <netinet/ip_var.h>
91#include <netinet/icmp_var.h>
92#include <netinet/tcp.h>
93#include <netinet/tcp_fsm.h>
94#include <netinet/tcp_seq.h>
95#include <netinet/tcp_timer.h>
96#include <netinet/tcp_var.h>
97#include <netinet/tcpip.h>
98
99#if IPSEC
100#include <netinet6/ipsec.h>
101#include <netkey/key.h>
102#endif
103
104 /* XXX This one should go in sys/mbuf.h. It is used to avoid that
105 * a firewall-generated packet loops forever through the firewall.
106 */
107#ifndef M_SKIP_FIREWALL
108#define M_SKIP_FIREWALL         0x4000
109#endif
110
111#if CONFIG_MACF_NET
112#include <security/mac_framework.h>
113#endif /* MAC_NET */
114
115
116/*
117 * ICMP routines: error generation, receive packet processing, and
118 * routines to turnaround packets back to the originator, and
119 * host table maintenance routines.
120 */
121
122struct	icmpstat icmpstat;
123SYSCTL_STRUCT(_net_inet_icmp, ICMPCTL_STATS, stats, CTLFLAG_RD | CTLFLAG_LOCKED,
124	&icmpstat, icmpstat, "");
125
126static int	icmpmaskrepl = 0;
127SYSCTL_INT(_net_inet_icmp, ICMPCTL_MASKREPL, maskrepl, CTLFLAG_RW | CTLFLAG_LOCKED,
128	&icmpmaskrepl, 0, "");
129
130static int	icmptimestamp = 0;
131SYSCTL_INT(_net_inet_icmp, ICMPCTL_TIMESTAMP, timestamp, CTLFLAG_RW | CTLFLAG_LOCKED,
132	&icmptimestamp, 0, "");
133
134static int	drop_redirect = 0;
135SYSCTL_INT(_net_inet_icmp, OID_AUTO, drop_redirect, CTLFLAG_RW | CTLFLAG_LOCKED,
136	&drop_redirect, 0, "");
137
138static int	log_redirect = 0;
139SYSCTL_INT(_net_inet_icmp, OID_AUTO, log_redirect, CTLFLAG_RW | CTLFLAG_LOCKED,
140	&log_redirect, 0, "");
141
142#if ICMP_BANDLIM
143
144/* Default values in case CONFIG_ICMP_BANDLIM is not defined in the MASTER file */
145#ifndef CONFIG_ICMP_BANDLIM
146#if !CONFIG_EMBEDDED
147#define CONFIG_ICMP_BANDLIM 250
148#else /* CONFIG_EMBEDDED */
149#define CONFIG_ICMP_BANDLIM 50
150#endif /* CONFIG_EMBEDDED */
151#endif /* CONFIG_ICMP_BANDLIM */
152
153/*
154 * ICMP error-response bandwidth limiting sysctl.  If not enabled, sysctl
155 *      variable content is -1 and read-only.
156 */
157
158static int      icmplim = CONFIG_ICMP_BANDLIM;
159SYSCTL_INT(_net_inet_icmp, ICMPCTL_ICMPLIM, icmplim, CTLFLAG_RW | CTLFLAG_LOCKED,
160	&icmplim, 0, "");
161
162#else /* ICMP_BANDLIM */
163
164static int      icmplim = -1;
165SYSCTL_INT(_net_inet_icmp, ICMPCTL_ICMPLIM, icmplim, CTLFLAG_RD | CTLFLAG_LOCKED,
166	&icmplim, 0, "");
167
168#endif /* ICMP_BANDLIM */
169
170/*
171 * ICMP broadcast echo sysctl
172 */
173
174static int	icmpbmcastecho = 1;
175SYSCTL_INT(_net_inet_icmp, OID_AUTO, bmcastecho, CTLFLAG_RW | CTLFLAG_LOCKED,
176	&icmpbmcastecho, 0, "");
177
178
179#if ICMPPRINTFS
180int	icmpprintfs = 0;
181#endif
182
183static void	icmp_reflect(struct mbuf *);
184static void	icmp_send(struct mbuf *, struct mbuf *);
185
186extern	struct protosw inetsw[];
187
188/*
189 * Generate an error packet of type error
190 * in response to bad packet ip.
191 */
192void
193icmp_error(
194	struct mbuf *n,
195	int type,
196	int code,
197	n_long dest,
198	u_int32_t nextmtu)
199{
200	struct ip *oip = mtod(n, struct ip *), *nip;
201	unsigned oiplen;
202	struct icmp *icp;
203	struct mbuf *m;
204	unsigned icmplen;
205
206	/* Expect 32-bit aligned data pointer on strict-align platforms */
207	MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(n);
208
209	oiplen = IP_VHL_HL(oip->ip_vhl) << 2;
210
211#if ICMPPRINTFS
212	if (icmpprintfs)
213		printf("icmp_error(%p, %x, %d)\n", oip, type, code);
214#endif
215	if (type != ICMP_REDIRECT)
216		icmpstat.icps_error++;
217	/*
218	 * Don't send error if not the first fragment of message.
219	 * Don't error if the old packet protocol was ICMP
220	 * error message, only known informational types.
221	 */
222	if (oip->ip_off &~ (IP_MF|IP_DF))
223		goto freeit;
224	if (oip->ip_p == IPPROTO_ICMP && type != ICMP_REDIRECT &&
225	  n->m_len >= oiplen + ICMP_MINLEN &&
226	  !ICMP_INFOTYPE(((struct icmp *)(void *)((caddr_t)oip + oiplen))->
227	  icmp_type)) {
228		icmpstat.icps_oldicmp++;
229		goto freeit;
230	}
231	/* Don't send error in response to a multicast or broadcast packet */
232	if (n->m_flags & (M_BCAST|M_MCAST))
233		goto freeit;
234	/*
235	 * First, formulate icmp message
236	 */
237	m = m_gethdr(M_DONTWAIT, MT_HEADER);	/* MAC-OK */
238	if (m == NULL)
239		goto freeit;
240
241        if (n->m_flags & M_SKIP_FIREWALL) {
242		/* set M_SKIP_FIREWALL to skip firewall check, since we're called from firewall */
243		m->m_flags |= M_SKIP_FIREWALL;
244	}
245
246#if CONFIG_MACF_NET
247	mac_mbuf_label_associate_netlayer(n, m);
248#endif
249	icmplen = min(oiplen + 8, oip->ip_len);
250	if (icmplen < sizeof(struct ip)) {
251		printf("icmp_error: bad length\n");
252		m_free(m);
253		goto freeit;
254	}
255	m->m_len = icmplen + ICMP_MINLEN;
256	MH_ALIGN(m, m->m_len);
257	icp = mtod(m, struct icmp *);
258	if ((u_int)type > ICMP_MAXTYPE)
259		panic("icmp_error");
260	icmpstat.icps_outhist[type]++;
261	icp->icmp_type = type;
262	if (type == ICMP_REDIRECT)
263		icp->icmp_gwaddr.s_addr = dest;
264	else {
265		icp->icmp_void = 0;
266		/*
267		 * The following assignments assume an overlay with the
268		 * zeroed icmp_void field.
269		 */
270		if (type == ICMP_PARAMPROB) {
271			icp->icmp_pptr = code;
272			code = 0;
273		} else if (type == ICMP_UNREACH &&
274		    code == ICMP_UNREACH_NEEDFRAG && nextmtu != 0) {
275			icp->icmp_nextmtu = htons(nextmtu);
276		}
277	}
278
279	icp->icmp_code = code;
280	m_copydata(n, 0, icmplen, (caddr_t)&icp->icmp_ip);
281	nip = &icp->icmp_ip;
282
283	/*
284	 * Convert fields to network representation.
285	 */
286#if BYTE_ORDER != BIG_ENDIAN
287	HTONS(nip->ip_len);
288	HTONS(nip->ip_off);
289#endif
290	/*
291	 * Now, copy old ip header (without options)
292	 * in front of icmp message.
293	 */
294	if (m->m_data - sizeof(struct ip) < m->m_pktdat)
295		panic("icmp len");
296	m->m_data -= sizeof(struct ip);
297	m->m_len += sizeof(struct ip);
298	m->m_pkthdr.len = m->m_len;
299	m->m_pkthdr.rcvif = n->m_pkthdr.rcvif;
300	nip = mtod(m, struct ip *);
301	bcopy((caddr_t)oip, (caddr_t)nip, sizeof(struct ip));
302	nip->ip_len = m->m_len;
303	nip->ip_vhl = IP_VHL_BORING;
304	nip->ip_p = IPPROTO_ICMP;
305	nip->ip_tos = 0;
306	icmp_reflect(m);
307
308freeit:
309	m_freem(n);
310}
311
312static struct sockaddr_in icmpsrc = { sizeof (struct sockaddr_in), AF_INET,
313										0 , { 0 }, { 0,0,0,0,0,0,0,0 } };
314static struct sockaddr_in icmpdst = { sizeof (struct sockaddr_in), AF_INET,
315										0 , { 0 }, { 0,0,0,0,0,0,0,0 } };
316static struct sockaddr_in icmpgw = { sizeof (struct sockaddr_in), AF_INET,
317										0 , { 0 }, { 0,0,0,0,0,0,0,0 } };
318
319/*
320 * Process a received ICMP message.
321 */
322void
323icmp_input(struct mbuf *m, int hlen)
324{
325	struct icmp *icp;
326	struct ip *ip = mtod(m, struct ip *);
327	int icmplen;
328	int i;
329	struct in_ifaddr *ia;
330	void (*ctlfunc)(int, struct sockaddr *, void *);
331	int code;
332
333	/* Expect 32-bit aligned data pointer on strict-align platforms */
334	MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m);
335
336	icmplen = ip->ip_len;
337
338	/*
339	 * Locate icmp structure in mbuf, and check
340	 * that not corrupted and of at least minimum length.
341	 */
342#if ICMPPRINTFS
343	if (icmpprintfs) {
344		char buf[MAX_IPv4_STR_LEN];
345		char ipv4str[MAX_IPv4_STR_LEN];
346
347		printf("icmp_input from %s to %s, len %d\n",
348		       inet_ntop(AF_INET, &ip->ip_src, buf, sizeof(buf)),
349		       inet_ntop(AF_INET, &ip->ip_dst, ipv4str, sizeof(ipv4str)),
350		       icmplen);
351	}
352#endif
353	if (icmplen < ICMP_MINLEN) {
354		icmpstat.icps_tooshort++;
355		goto freeit;
356	}
357	i = hlen + min(icmplen, ICMP_ADVLENMIN);
358	if (m->m_len < i && (m = m_pullup(m, i)) == 0)  {
359		icmpstat.icps_tooshort++;
360		return;
361	}
362	ip = mtod(m, struct ip *);
363	m->m_len -= hlen;
364	m->m_data += hlen;
365	icp = mtod(m, struct icmp *);
366	if (in_cksum(m, icmplen)) {
367		icmpstat.icps_checksum++;
368		goto freeit;
369	}
370	m->m_len += hlen;
371	m->m_data -= hlen;
372
373#if ICMPPRINTFS
374	if (icmpprintfs)
375		printf("icmp_input, type %d code %d\n", icp->icmp_type,
376		    icp->icmp_code);
377#endif
378
379	/*
380	 * Message type specific processing.
381	 */
382	if (icp->icmp_type > ICMP_MAXTYPE)
383		goto raw;
384	icmpstat.icps_inhist[icp->icmp_type]++;
385	code = icp->icmp_code;
386	switch (icp->icmp_type) {
387
388	case ICMP_UNREACH:
389		switch (code) {
390			case ICMP_UNREACH_NET:
391			case ICMP_UNREACH_HOST:
392			case ICMP_UNREACH_SRCFAIL:
393			case ICMP_UNREACH_NET_UNKNOWN:
394			case ICMP_UNREACH_HOST_UNKNOWN:
395			case ICMP_UNREACH_ISOLATED:
396			case ICMP_UNREACH_TOSNET:
397			case ICMP_UNREACH_TOSHOST:
398			case ICMP_UNREACH_HOST_PRECEDENCE:
399			case ICMP_UNREACH_PRECEDENCE_CUTOFF:
400				code = PRC_UNREACH_NET;
401				break;
402
403			case ICMP_UNREACH_NEEDFRAG:
404				code = PRC_MSGSIZE;
405				break;
406
407			/*
408			 * RFC 1122, Sections 3.2.2.1 and 4.2.3.9.
409			 * Treat subcodes 2,3 as immediate RST
410			 */
411			case ICMP_UNREACH_PROTOCOL:
412			case ICMP_UNREACH_PORT:
413				code = PRC_UNREACH_PORT;
414				break;
415
416			case ICMP_UNREACH_NET_PROHIB:
417			case ICMP_UNREACH_HOST_PROHIB:
418			case ICMP_UNREACH_FILTER_PROHIB:
419				code = PRC_UNREACH_ADMIN_PROHIB;
420				break;
421
422			default:
423				goto badcode;
424		}
425		goto deliver;
426
427	case ICMP_TIMXCEED:
428		if (code > 1)
429			goto badcode;
430		code += PRC_TIMXCEED_INTRANS;
431		goto deliver;
432
433	case ICMP_PARAMPROB:
434		if (code > 1)
435			goto badcode;
436		code = PRC_PARAMPROB;
437		goto deliver;
438
439	case ICMP_SOURCEQUENCH:
440		if (code)
441			goto badcode;
442		code = PRC_QUENCH;
443	deliver:
444		/*
445		 * Problem with datagram; advise higher level routines.
446		 */
447		if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) ||
448		    IP_VHL_HL(icp->icmp_ip.ip_vhl) < (sizeof(struct ip) >> 2)) {
449			icmpstat.icps_badlen++;
450			goto freeit;
451		}
452
453#if BYTE_ORDER != BIG_ENDIAN
454		NTOHS(icp->icmp_ip.ip_len);
455#endif
456
457		/* Discard ICMP's in response to multicast packets */
458		if (IN_MULTICAST(ntohl(icp->icmp_ip.ip_dst.s_addr)))
459			goto badcode;
460#if ICMPPRINTFS
461		if (icmpprintfs)
462			printf("deliver to protocol %d\n", icp->icmp_ip.ip_p);
463#endif
464		icmpsrc.sin_addr = icp->icmp_ip.ip_dst;
465
466		/*
467		 * XXX if the packet contains [IPv4 AH TCP], we can't make a
468		 * notification to TCP layer.
469		 */
470		ctlfunc = ip_protox[icp->icmp_ip.ip_p]->pr_ctlinput;
471		if (ctlfunc)
472			(*ctlfunc)(code, (struct sockaddr *)&icmpsrc,
473				   (void *)&icp->icmp_ip);
474		break;
475
476	badcode:
477		icmpstat.icps_badcode++;
478		break;
479
480	case ICMP_ECHO:
481		if (!icmpbmcastecho
482		    && (m->m_flags & (M_MCAST | M_BCAST)) != 0) {
483			icmpstat.icps_bmcastecho++;
484			break;
485		}
486		icp->icmp_type = ICMP_ECHOREPLY;
487#if ICMP_BANDLIM
488		if (badport_bandlim(BANDLIM_ICMP_ECHO) < 0)
489			goto freeit;
490		else
491#endif
492			goto reflect;
493
494	case ICMP_TSTAMP:
495
496		if (icmptimestamp == 0)
497			break;
498
499		if (!icmpbmcastecho
500		    && (m->m_flags & (M_MCAST | M_BCAST)) != 0) {
501			icmpstat.icps_bmcasttstamp++;
502			break;
503		}
504		if (icmplen < ICMP_TSLEN) {
505			icmpstat.icps_badlen++;
506			break;
507		}
508		icp->icmp_type = ICMP_TSTAMPREPLY;
509		icp->icmp_rtime = iptime();
510		icp->icmp_ttime = icp->icmp_rtime;	/* bogus, do later! */
511#if ICMP_BANDLIM
512		if (badport_bandlim(BANDLIM_ICMP_TSTAMP) < 0)
513			goto freeit;
514		else
515#endif
516			goto reflect;
517
518	case ICMP_MASKREQ:
519		if (icmpmaskrepl == 0)
520			break;
521		/*
522		 * We are not able to respond with all ones broadcast
523		 * unless we receive it over a point-to-point interface.
524		 */
525		if (icmplen < ICMP_MASKLEN)
526			break;
527		switch (ip->ip_dst.s_addr) {
528
529		case INADDR_BROADCAST:
530		case INADDR_ANY:
531			icmpdst.sin_addr = ip->ip_src;
532			break;
533
534		default:
535			icmpdst.sin_addr = ip->ip_dst;
536		}
537		ia = (struct in_ifaddr *)ifaof_ifpforaddr(
538			    (struct sockaddr *)&icmpdst, m->m_pkthdr.rcvif);
539		if (ia == 0)
540			break;
541		IFA_LOCK(&ia->ia_ifa);
542		if (ia->ia_ifp == 0) {
543			IFA_UNLOCK(&ia->ia_ifa);
544			IFA_REMREF(&ia->ia_ifa);
545			ia = NULL;
546			break;
547		}
548		icp->icmp_type = ICMP_MASKREPLY;
549		icp->icmp_mask = ia->ia_sockmask.sin_addr.s_addr;
550		if (ip->ip_src.s_addr == 0) {
551			if (ia->ia_ifp->if_flags & IFF_BROADCAST)
552			    ip->ip_src = satosin(&ia->ia_broadaddr)->sin_addr;
553			else if (ia->ia_ifp->if_flags & IFF_POINTOPOINT)
554			    ip->ip_src = satosin(&ia->ia_dstaddr)->sin_addr;
555		}
556		IFA_UNLOCK(&ia->ia_ifa);
557		IFA_REMREF(&ia->ia_ifa);
558reflect:
559		ip->ip_len += hlen;	/* since ip_input deducts this */
560		icmpstat.icps_reflect++;
561		icmpstat.icps_outhist[icp->icmp_type]++;
562		icmp_reflect(m);
563		return;
564
565	case ICMP_REDIRECT:
566		if (log_redirect) {
567			u_int32_t src, dst, gw;
568
569			src = ntohl(ip->ip_src.s_addr);
570			dst = ntohl(icp->icmp_ip.ip_dst.s_addr);
571			gw = ntohl(icp->icmp_gwaddr.s_addr);
572			printf("icmp redirect from %d.%d.%d.%d: "
573			       "%d.%d.%d.%d => %d.%d.%d.%d\n",
574			       (int)(src >> 24), (int)((src >> 16) & 0xff),
575			       (int)((src >> 8) & 0xff), (int)(src & 0xff),
576			       (int)(dst >> 24), (int)((dst >> 16) & 0xff),
577			       (int)((dst >> 8) & 0xff), (int)(dst & 0xff),
578			       (int)(gw >> 24), (int)((gw >> 16) & 0xff),
579			       (int)((gw >> 8) & 0xff), (int)(gw & 0xff));
580		}
581		if (drop_redirect)
582			break;
583		if (code > 3)
584			goto badcode;
585		if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) ||
586		    IP_VHL_HL(icp->icmp_ip.ip_vhl) < (sizeof(struct ip) >> 2)) {
587			icmpstat.icps_badlen++;
588			break;
589		}
590		/*
591		 * Short circuit routing redirects to force
592		 * immediate change in the kernel's routing
593		 * tables.  The message is also handed to anyone
594		 * listening on a raw socket (e.g. the routing
595		 * daemon for use in updating its tables).
596		 */
597		icmpgw.sin_addr = ip->ip_src;
598		icmpdst.sin_addr = icp->icmp_gwaddr;
599#if	ICMPPRINTFS
600		if (icmpprintfs) {
601			char buf[MAX_IPv4_STR_LEN];
602
603			printf("redirect dst %s to %s\n",
604			       inet_ntop(AF_INET, &icp->icmp_ip.ip_dst, buf, sizeof(buf)),
605			       inet_ntop(AF_INET, &icp->icmp_gwaddr, ipv4str,
606			       			 sizeof(ipv4str)));
607		}
608#endif
609		icmpsrc.sin_addr = icp->icmp_ip.ip_dst;
610		rtredirect(m->m_pkthdr.rcvif, (struct sockaddr *)&icmpsrc,
611		  (struct sockaddr *)&icmpdst, NULL, RTF_GATEWAY | RTF_HOST,
612		  (struct sockaddr *)&icmpgw, NULL);
613		pfctlinput(PRC_REDIRECT_HOST, (struct sockaddr *)&icmpsrc);
614#if IPSEC
615		key_sa_routechange((struct sockaddr *)&icmpsrc);
616#endif
617		break;
618
619	/*
620	 * No kernel processing for the following;
621	 * just fall through to send to raw listener.
622	 */
623	case ICMP_ECHOREPLY:
624	case ICMP_ROUTERADVERT:
625	case ICMP_ROUTERSOLICIT:
626	case ICMP_TSTAMPREPLY:
627	case ICMP_IREQREPLY:
628	case ICMP_MASKREPLY:
629	default:
630		break;
631	}
632
633raw:
634	rip_input(m, hlen);
635	return;
636
637freeit:
638	m_freem(m);
639}
640
641/*
642 * Reflect the ip packet back to the source
643 */
644static void
645icmp_reflect(struct mbuf *m)
646{
647	struct ip *ip = mtod(m, struct ip *);
648	struct in_ifaddr *ia;
649	struct in_addr t;
650	struct mbuf *opts = NULL;
651	int optlen = (IP_VHL_HL(ip->ip_vhl) << 2) - sizeof(struct ip);
652
653	if (!in_canforward(ip->ip_src) &&
654	    ((ntohl(ip->ip_src.s_addr) & IN_CLASSA_NET) !=
655	     (IN_LOOPBACKNET << IN_CLASSA_NSHIFT))) {
656		m_freem(m);	/* Bad return address */
657		goto done;	/* Ip_output() will check for broadcast */
658	}
659	t = ip->ip_dst;
660	ip->ip_dst = ip->ip_src;
661	/*
662	 * If the incoming packet was addressed directly to us,
663	 * use dst as the src for the reply.  Otherwise (broadcast
664	 * or anonymous), use the address which corresponds
665	 * to the incoming interface.
666	 */
667	lck_rw_lock_shared(in_ifaddr_rwlock);
668	TAILQ_FOREACH(ia, INADDR_HASH(t.s_addr), ia_hash) {
669		IFA_LOCK(&ia->ia_ifa);
670		if (t.s_addr == IA_SIN(ia)->sin_addr.s_addr) {
671			IFA_ADDREF_LOCKED(&ia->ia_ifa);
672			IFA_UNLOCK(&ia->ia_ifa);
673			goto match;
674		}
675		IFA_UNLOCK(&ia->ia_ifa);
676	}
677	/*
678	 * Slow path; check for broadcast addresses.  Find a source
679	 * IP address to use when replying to the broadcast request;
680	 * let IP handle the source interface selection work.
681	 */
682	for (ia = in_ifaddrhead.tqh_first; ia; ia = ia->ia_link.tqe_next) {
683		IFA_LOCK(&ia->ia_ifa);
684		if (ia->ia_ifp && (ia->ia_ifp->if_flags & IFF_BROADCAST) &&
685		    t.s_addr == satosin(&ia->ia_broadaddr)->sin_addr.s_addr) {
686			IFA_ADDREF_LOCKED(&ia->ia_ifa);
687			IFA_UNLOCK(&ia->ia_ifa);
688			break;
689		}
690		IFA_UNLOCK(&ia->ia_ifa);
691	}
692match:
693	lck_rw_done(in_ifaddr_rwlock);
694	icmpdst.sin_addr = t;
695	if ((ia == (struct in_ifaddr *)0) && m->m_pkthdr.rcvif)
696		ia = (struct in_ifaddr *)ifaof_ifpforaddr(
697			(struct sockaddr *)&icmpdst, m->m_pkthdr.rcvif);
698	/*
699	 * The following happens if the packet was not addressed to us,
700	 * and was received on an interface with no IP address.
701	 */
702	if (ia == (struct in_ifaddr *)0) {
703		lck_rw_lock_shared(in_ifaddr_rwlock);
704		ia = in_ifaddrhead.tqh_first;
705		if (ia == (struct in_ifaddr *)0) {/* no address yet, bail out */
706			lck_rw_done(in_ifaddr_rwlock);
707			m_freem(m);
708			goto done;
709		}
710		IFA_ADDREF(&ia->ia_ifa);
711		lck_rw_done(in_ifaddr_rwlock);
712	}
713#if CONFIG_MACF_NET
714	mac_netinet_icmp_reply(m);
715#endif
716	IFA_LOCK_SPIN(&ia->ia_ifa);
717	t = IA_SIN(ia)->sin_addr;
718	IFA_UNLOCK(&ia->ia_ifa);
719	ip->ip_src = t;
720	ip->ip_ttl = ip_defttl;
721	IFA_REMREF(&ia->ia_ifa);
722	ia = NULL;
723
724	if (optlen > 0) {
725		u_char *cp;
726		int opt, cnt;
727		u_int len;
728
729		/*
730		 * Retrieve any source routing from the incoming packet;
731		 * add on any record-route or timestamp options.
732		 */
733		cp = (u_char *) (ip + 1);
734		if ((opts = ip_srcroute()) == 0 &&
735		    (opts = m_gethdr(M_DONTWAIT, MT_HEADER))) {	/* MAC-OK */
736			opts->m_len = sizeof(struct in_addr);
737			mtod(opts, struct in_addr *)->s_addr = 0;
738		}
739		if (opts) {
740#if ICMPPRINTFS
741		    if (icmpprintfs)
742			    printf("icmp_reflect optlen %d rt %d => ",
743				optlen, opts->m_len);
744#endif
745		    for (cnt = optlen; cnt > 0; cnt -= len, cp += len) {
746			    opt = cp[IPOPT_OPTVAL];
747			    if (opt == IPOPT_EOL)
748				    break;
749			    if (opt == IPOPT_NOP)
750				    len = 1;
751			    else {
752				    if (cnt < IPOPT_OLEN + sizeof(*cp))
753					    break;
754				    len = cp[IPOPT_OLEN];
755				    if (len < IPOPT_OLEN + sizeof(*cp) ||
756				        len > cnt)
757					    break;
758			    }
759			    /*
760			     * Should check for overflow, but it "can't happen"
761			     */
762			    if (opt == IPOPT_RR || opt == IPOPT_TS ||
763				opt == IPOPT_SECURITY) {
764				    bcopy((caddr_t)cp,
765					mtod(opts, caddr_t) + opts->m_len, len);
766				    opts->m_len += len;
767			    }
768		    }
769		    /* Terminate & pad, if necessary */
770		    cnt = opts->m_len % 4;
771		    if (cnt) {
772			    for (; cnt < 4; cnt++) {
773				    *(mtod(opts, caddr_t) + opts->m_len) =
774					IPOPT_EOL;
775				    opts->m_len++;
776			    }
777		    }
778#if ICMPPRINTFS
779		    if (icmpprintfs)
780			    printf("%d\n", opts->m_len);
781#endif
782		}
783		/*
784		 * Now strip out original options by copying rest of first
785		 * mbuf's data back, and adjust the IP length.
786		 */
787		ip->ip_len -= optlen;
788		ip->ip_vhl = IP_VHL_BORING;
789		m->m_len -= optlen;
790		if (m->m_flags & M_PKTHDR)
791			m->m_pkthdr.len -= optlen;
792		optlen += sizeof(struct ip);
793		bcopy((caddr_t)ip + optlen, (caddr_t)(ip + 1),
794			 (unsigned)(m->m_len - sizeof(struct ip)));
795	}
796	m->m_flags &= ~(M_BCAST|M_MCAST);
797	icmp_send(m, opts);
798done:
799	if (opts)
800		(void)m_free(opts);
801}
802
803/*
804 * Send an icmp packet back to the ip level,
805 * after supplying a checksum.
806 */
807static void
808icmp_send(struct mbuf *m, struct mbuf *opts)
809{
810	struct ip *ip = mtod(m, struct ip *);
811	int hlen;
812	struct icmp *icp;
813	struct route ro;
814	struct ip_out_args ipoa = { IFSCOPE_NONE, { 0 },
815	    IPOAF_SELECT_SRCIF | IPOAF_BOUND_SRCADDR };
816
817	if ((m->m_flags & M_PKTHDR) && m->m_pkthdr.rcvif != NULL) {
818		ipoa.ipoa_boundif = m->m_pkthdr.rcvif->if_index;
819		ipoa.ipoa_flags |= IPOAF_BOUND_IF;
820	}
821
822	hlen = IP_VHL_HL(ip->ip_vhl) << 2;
823	m->m_data += hlen;
824	m->m_len -= hlen;
825	icp = mtod(m, struct icmp *);
826	icp->icmp_cksum = 0;
827	icp->icmp_cksum = in_cksum(m, ip->ip_len - hlen);
828	m->m_data -= hlen;
829	m->m_len += hlen;
830	m->m_pkthdr.rcvif = NULL;
831	m->m_pkthdr.csum_data = 0;
832	m->m_pkthdr.csum_flags = 0;
833#if ICMPPRINTFS
834	if (icmpprintfs) {
835		char buf[MAX_IPv4_STR_LEN];
836		char ipv4str[MAX_IPv4_STR_LEN];
837
838		printf("icmp_send dst %s src %s\n",
839		       inet_ntop(AF_INET, &ip->ip_dst, buf, sizeof(buf)),
840		       inet_ntop(AF_INET, &ip->ip_src, ipv4str, sizeof(ipv4str)));
841	}
842#endif
843	bzero(&ro, sizeof ro);
844	(void) ip_output(m, opts, &ro, IP_OUTARGS, NULL, &ipoa);
845	if (ro.ro_rt)
846		rtfree(ro.ro_rt);
847}
848
849n_time
850iptime(void)
851{
852	struct timeval atv;
853	u_int32_t t;
854
855	microtime(&atv);
856	t = (atv.tv_sec % (24*60*60)) * 1000 + atv.tv_usec / 1000;
857	return (htonl(t));
858}
859
860#if 1
861/*
862 * Return the next larger or smaller MTU plateau (table from RFC 1191)
863 * given current value MTU.  If DIR is less than zero, a larger plateau
864 * is returned; otherwise, a smaller value is returned.
865 */
866int
867ip_next_mtu(int mtu, int dir)
868{
869	static int mtutab[] = {
870		65535, 32000, 17914, 8166, 4352, 2002, 1492, 1006, 508, 296,
871		68, 0
872	};
873	int i;
874
875	for (i = 0; i < (sizeof mtutab) / (sizeof mtutab[0]); i++) {
876		if (mtu >= mtutab[i])
877			break;
878	}
879
880	if (dir < 0) {
881		if (i == 0) {
882			return 0;
883		} else {
884			return mtutab[i - 1];
885		}
886	} else {
887		if (mtutab[i] == 0) {
888			return 0;
889		} else if(mtu > mtutab[i]) {
890			return mtutab[i];
891		} else {
892			return mtutab[i + 1];
893		}
894	}
895}
896#endif
897
898#if ICMP_BANDLIM
899
900/*
901 * badport_bandlim() - check for ICMP bandwidth limit
902 *
903 *	Return 0 if it is ok to send an ICMP error response, -1 if we have
904 *	hit our bandwidth limit and it is not ok.
905 *
906 *	If icmplim is <= 0, the feature is disabled and 0 is returned.
907 *
908 *	For now we separate the TCP and UDP subsystems w/ different 'which'
909 *	values.  We may eventually remove this separation (and simplify the
910 *	code further).
911 *
912 *	Note that the printing of the error message is delayed so we can
913 *	properly print the icmp error rate that the system was trying to do
914 *	(i.e. 22000/100 pps, etc...).  This can cause long delays in printing
915 *	the 'final' error, but it doesn't make sense to solve the printing
916 *	delay with more complex code.
917 */
918
919int
920badport_bandlim(int which)
921{
922	static struct timeval lticks[BANDLIM_MAX + 1];
923	static int lpackets[BANDLIM_MAX + 1];
924	struct timeval time;
925	int secs;
926
927	const char *bandlimittype[] = {
928		"Limiting icmp unreach response",
929		"Limiting icmp ping response",
930		"Limiting icmp tstamp response",
931		"Limiting closed port RST response",
932		"Limiting open port RST response"
933		};
934
935	/*
936	 * Return ok status if feature disabled or argument out of
937	 * ranage.
938	 */
939
940	if (icmplim <= 0 || which > BANDLIM_MAX || which < 0)
941		return(0);
942
943	getmicrouptime(&time);
944
945 	secs = time.tv_sec - lticks[which].tv_sec ;
946
947	/*
948	 * reset stats when cumulative delta exceeds one second.
949	 */
950
951	if ((secs > 1) || (secs == 1 && (lticks[which].tv_usec > time.tv_usec))) {
952		if (lpackets[which] > icmplim) {
953			printf("%s from %d to %d packets per second\n",
954				bandlimittype[which],
955				lpackets[which],
956				icmplim
957			);
958		}
959		lticks[which].tv_sec = time.tv_sec;
960		lticks[which].tv_usec = time.tv_usec;
961		lpackets[which] = 0;
962	}
963
964	/*
965	 * bump packet count
966	 */
967
968	if (++lpackets[which] > icmplim) {
969		return(-1);
970	}
971	return(0);
972}
973
974#endif
975
976#if __APPLE__
977
978/*
979 * Non-privileged ICMP socket operations
980 * - send ICMP echo request
981 * - all ICMP
982 * - limited socket options
983 */
984
985#include <netinet/ip_icmp.h>
986#include <netinet/in_pcb.h>
987
988extern struct domain inetdomain;
989extern u_int32_t rip_sendspace;
990extern u_int32_t rip_recvspace;
991extern struct inpcbinfo ripcbinfo;
992
993int rip_abort(struct socket *);
994int rip_bind(struct socket *, struct sockaddr *, struct proc *);
995int rip_connect(struct socket *, struct sockaddr *, struct proc *);
996int rip_detach(struct socket *);
997int rip_disconnect(struct socket *);
998int rip_shutdown(struct socket *);
999
1000__private_extern__ int icmp_dgram_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, struct mbuf *control, struct proc *p);
1001__private_extern__ int icmp_dgram_attach(struct socket *so, int proto, struct proc *p);
1002__private_extern__ int icmp_dgram_ctloutput(struct socket *so, struct sockopt *sopt);
1003
1004__private_extern__ struct pr_usrreqs icmp_dgram_usrreqs = {
1005        rip_abort, pru_accept_notsupp, icmp_dgram_attach, rip_bind, rip_connect,
1006        pru_connect2_notsupp, in_control, rip_detach, rip_disconnect,
1007        pru_listen_notsupp, in_setpeeraddr, pru_rcvd_notsupp,
1008        pru_rcvoob_notsupp, icmp_dgram_send, pru_sense_null, rip_shutdown,
1009        in_setsockaddr, sosend, soreceive, pru_sopoll_notsupp
1010};
1011
1012/* Like rip_attach but without root privilege enforcement */
1013__private_extern__ int
1014icmp_dgram_attach(struct socket *so, __unused int proto, struct proc *p)
1015{
1016        struct inpcb *inp;
1017        int error;
1018
1019        inp = sotoinpcb(so);
1020        if (inp)
1021                panic("icmp_dgram_attach");
1022
1023        error = soreserve(so, rip_sendspace, rip_recvspace);
1024        if (error)
1025                return error;
1026        error = in_pcballoc(so, &ripcbinfo, p);
1027        if (error)
1028                return error;
1029        inp = (struct inpcb *)so->so_pcb;
1030        inp->inp_vflag |= INP_IPV4;
1031        inp->inp_ip_p = IPPROTO_ICMP;
1032        inp->inp_ip_ttl = ip_defttl;
1033        return 0;
1034}
1035
1036/*
1037 * Raw IP socket option processing.
1038 */
1039__private_extern__ int
1040icmp_dgram_ctloutput(struct socket *so, struct sockopt *sopt)
1041{
1042	int	error;
1043
1044	if (sopt->sopt_level != IPPROTO_IP)
1045		return (EINVAL);
1046
1047	switch (sopt->sopt_name) {
1048		case IP_OPTIONS:
1049		case IP_HDRINCL:
1050		case IP_TOS:
1051		case IP_TTL:
1052		case IP_RECVOPTS:
1053		case IP_RECVRETOPTS:
1054		case IP_RECVDSTADDR:
1055		case IP_RETOPTS:
1056		case IP_MULTICAST_IF:
1057		case IP_MULTICAST_IFINDEX:
1058		case IP_MULTICAST_TTL:
1059		case IP_MULTICAST_LOOP:
1060		case IP_ADD_MEMBERSHIP:
1061		case IP_DROP_MEMBERSHIP:
1062		case IP_MULTICAST_VIF:
1063		case IP_PORTRANGE:
1064		case IP_RECVIF:
1065		case IP_IPSEC_POLICY:
1066		case IP_STRIPHDR:
1067		case IP_RECVTTL:
1068		case IP_BOUND_IF:
1069#if CONFIG_FORCE_OUT_IFP
1070                case IP_FORCE_OUT_IFP:
1071#endif
1072		case IP_NO_IFT_CELLULAR:
1073			error = rip_ctloutput(so, sopt);
1074			break;
1075
1076		default:
1077			error = EINVAL;
1078			break;
1079	}
1080
1081	return (error);
1082}
1083
1084__private_extern__ int
1085icmp_dgram_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
1086         struct mbuf *control, struct proc *p)
1087{
1088	struct ip *ip;
1089	struct inpcb *inp = sotoinpcb(so);
1090	int hlen;
1091	struct icmp *icp;
1092        struct in_ifaddr *ia = NULL;
1093	int icmplen;
1094
1095	if ((inp->inp_flags & INP_HDRINCL) != 0) {
1096		/* Expect 32-bit aligned data pointer on strict-align platforms */
1097		MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m);
1098		/*
1099		 * This is not raw IP, we liberal only for fields TOS, id and TTL
1100		 */
1101		ip = mtod(m, struct ip *);
1102
1103		hlen = IP_VHL_HL(ip->ip_vhl) << 2;
1104		/* Some sanity checks */
1105		if (m->m_pkthdr.len < hlen + ICMP_MINLEN) {
1106			goto bad;
1107		}
1108		/* Only IPv4 */
1109		if (IP_VHL_V(ip->ip_vhl) != 4)
1110			goto bad;
1111		if (hlen < 20 || hlen > 40 || ip->ip_len != m->m_pkthdr.len)
1112			goto bad;
1113		/* Bogus fragments can tie up peer resources */
1114		if ((ip->ip_off & ~IP_DF) !=  0)
1115			goto bad;
1116		/* Allow only ICMP even for user provided IP header */
1117		if (ip->ip_p != IPPROTO_ICMP)
1118			goto bad;
1119		/* To prevent spoofing, specified source address must be one of ours */
1120		if (ip->ip_src.s_addr != INADDR_ANY) {
1121			socket_unlock(so, 0);
1122			lck_rw_lock_shared(in_ifaddr_rwlock);
1123			if (TAILQ_EMPTY(&in_ifaddrhead)) {
1124				lck_rw_done(in_ifaddr_rwlock);
1125				socket_lock(so, 0);
1126				goto bad;
1127			}
1128			TAILQ_FOREACH(ia, INADDR_HASH(ip->ip_src.s_addr),
1129			    ia_hash) {
1130				IFA_LOCK(&ia->ia_ifa);
1131				if (IA_SIN(ia)->sin_addr.s_addr ==
1132				    ip->ip_src.s_addr) {
1133					IFA_UNLOCK(&ia->ia_ifa);
1134					lck_rw_done(in_ifaddr_rwlock);
1135					socket_lock(so, 0);
1136					goto ours;
1137				}
1138				IFA_UNLOCK(&ia->ia_ifa);
1139			}
1140			lck_rw_done(in_ifaddr_rwlock);
1141			socket_lock(so, 0);
1142			goto bad;
1143		}
1144ours:
1145		/* Do not trust we got a valid checksum */
1146		ip->ip_sum = 0;
1147
1148		icp = (struct icmp *)(void *)(((char *)m->m_data) + hlen);
1149		icmplen = m->m_pkthdr.len - hlen;
1150	} else {
1151		if ((icmplen = m->m_pkthdr.len) < ICMP_MINLEN) {
1152			goto bad;
1153		}
1154		icp = mtod(m, struct icmp *);
1155	}
1156	/*
1157	 * Allow only to send request types with code 0
1158	 */
1159	if (icp->icmp_code != 0)
1160		goto bad;
1161	switch (icp->icmp_type) {
1162		case ICMP_ECHO:
1163			break;
1164		case ICMP_TSTAMP:
1165			if (icmplen != 20)
1166				goto bad;
1167			break;
1168		case ICMP_MASKREQ:
1169			if (icmplen != 12)
1170				goto bad;
1171			break;
1172		default:
1173			goto bad;
1174	}
1175	return rip_send(so, flags, m, nam, control, p);
1176bad:
1177	m_freem(m);
1178	return EINVAL;
1179}
1180
1181#endif /* __APPLE__ */
1182