1/*
2 * Copyright (c) 2000-2013 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * Copyright (c) 1982, 1986, 1988, 1993
30 *	The Regents of the University of California.  All rights reserved.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 *    notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 *    notice, this list of conditions and the following disclaimer in the
39 *    documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 *    must display the following acknowledgement:
42 *	This product includes software developed by the University of
43 *	California, Berkeley and its contributors.
44 * 4. Neither the name of the University nor the names of its contributors
45 *    may be used to endorse or promote products derived from this software
46 *    without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 *	@(#)raw_ip.c	8.7 (Berkeley) 5/15/95
61 */
62/*
63 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
64 * support for mandatory and extensible security protections.  This notice
65 * is included in support of clause 2.2 (b) of the Apple Public License,
66 * Version 2.0.
67 */
68
69#include <sys/param.h>
70#include <sys/systm.h>
71#include <sys/kernel.h>
72#include <sys/malloc.h>
73#include <sys/mbuf.h>
74#include <sys/mcache.h>
75#include <sys/proc.h>
76#include <sys/domain.h>
77#include <sys/protosw.h>
78#include <sys/socket.h>
79#include <sys/socketvar.h>
80#include <sys/sysctl.h>
81#include <libkern/OSAtomic.h>
82#include <kern/zalloc.h>
83
84#include <pexpert/pexpert.h>
85
86#include <net/if.h>
87#include <net/route.h>
88
89#define _IP_VHL
90#include <netinet/in.h>
91#include <netinet/in_systm.h>
92#include <netinet/ip.h>
93#include <netinet/in_pcb.h>
94#include <netinet/in_var.h>
95#include <netinet/ip_var.h>
96#include <netinet/ip_mroute.h>
97
98#if INET6
99#include <netinet6/in6_pcb.h>
100#endif /* INET6 */
101
102#include <netinet/ip_fw.h>
103
104#if IPSEC
105#include <netinet6/ipsec.h>
106#endif /*IPSEC*/
107
108#if DUMMYNET
109#include <netinet/ip_dummynet.h>
110#endif
111
112#if CONFIG_MACF_NET
113#include <security/mac_framework.h>
114#endif /* MAC_NET */
115
116int load_ipfw(void);
117int rip_detach(struct socket *);
118int rip_abort(struct socket *);
119int rip_disconnect(struct socket *);
120int rip_bind(struct socket *, struct sockaddr *, struct proc *);
121int rip_connect(struct socket *, struct sockaddr *, struct proc *);
122int rip_shutdown(struct socket *);
123
124#if IPSEC
125extern int ipsec_bypass;
126#endif
127
128struct	inpcbhead ripcb;
129struct	inpcbinfo ripcbinfo;
130
131/* control hooks for ipfw and dummynet */
132#if IPFIREWALL
133ip_fw_ctl_t *ip_fw_ctl_ptr;
134#endif /* IPFIREWALL */
135#if DUMMYNET
136ip_dn_ctl_t *ip_dn_ctl_ptr;
137#endif /* DUMMYNET */
138
139/*
140 * Nominal space allocated to a raw ip socket.
141 */
142#define	RIPSNDQ		8192
143#define	RIPRCVQ		8192
144
145/*
146 * Raw interface to IP protocol.
147 */
148
149/*
150 * Initialize raw connection block q.
151 */
152void
153rip_init(struct protosw *pp, struct domain *dp)
154{
155#pragma unused(dp)
156	static int rip_initialized = 0;
157	struct inpcbinfo *pcbinfo;
158
159	VERIFY((pp->pr_flags & (PR_INITIALIZED|PR_ATTACHED)) == PR_ATTACHED);
160
161	if (rip_initialized)
162		return;
163	rip_initialized = 1;
164
165	LIST_INIT(&ripcb);
166	ripcbinfo.ipi_listhead = &ripcb;
167	/*
168	 * XXX We don't use the hash list for raw IP, but it's easier
169	 * to allocate a one entry hash list than it is to check all
170	 * over the place for ipi_hashbase == NULL.
171	 */
172	ripcbinfo.ipi_hashbase = hashinit(1, M_PCB, &ripcbinfo.ipi_hashmask);
173	ripcbinfo.ipi_porthashbase = hashinit(1, M_PCB, &ripcbinfo.ipi_porthashmask);
174
175	ripcbinfo.ipi_zone = zinit(sizeof(struct inpcb),
176	    (4096 * sizeof(struct inpcb)), 4096, "ripzone");
177
178	pcbinfo = &ripcbinfo;
179        /*
180	 * allocate lock group attribute and group for udp pcb mutexes
181	 */
182	pcbinfo->ipi_lock_grp_attr = lck_grp_attr_alloc_init();
183	pcbinfo->ipi_lock_grp = lck_grp_alloc_init("ripcb", pcbinfo->ipi_lock_grp_attr);
184
185	/*
186	 * allocate the lock attribute for udp pcb mutexes
187	 */
188	pcbinfo->ipi_lock_attr = lck_attr_alloc_init();
189	if ((pcbinfo->ipi_lock = lck_rw_alloc_init(pcbinfo->ipi_lock_grp,
190	    pcbinfo->ipi_lock_attr)) == NULL) {
191		panic("%s: unable to allocate PCB lock\n", __func__);
192		/* NOTREACHED */
193	}
194
195	in_pcbinfo_attach(&ripcbinfo);
196}
197
198static struct	sockaddr_in ripsrc = { sizeof(ripsrc), AF_INET , 0, {0}, {0,0,0,0,0,0,0,0,} };
199/*
200 * Setup generic address and protocol structures
201 * for raw_input routine, then pass them along with
202 * mbuf chain.
203 */
204void
205rip_input(m, iphlen)
206	struct mbuf *m;
207	int iphlen;
208{
209	struct ip *ip = mtod(m, struct ip *);
210	struct inpcb *inp;
211	struct inpcb *last = 0;
212	struct mbuf *opts = 0;
213	int skipit = 0, ret = 0;
214	struct ifnet *ifp = m->m_pkthdr.rcvif;
215
216	/* Expect 32-bit aligned data pointer on strict-align platforms */
217	MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m);
218
219	ripsrc.sin_addr = ip->ip_src;
220	lck_rw_lock_shared(ripcbinfo.ipi_lock);
221	LIST_FOREACH(inp, &ripcb, inp_list) {
222#if INET6
223		if ((inp->inp_vflag & INP_IPV4) == 0)
224			continue;
225#endif
226		if (inp->inp_ip_p && (inp->inp_ip_p != ip->ip_p))
227			continue;
228		if (inp->inp_laddr.s_addr &&
229                  inp->inp_laddr.s_addr != ip->ip_dst.s_addr)
230			continue;
231		if (inp->inp_faddr.s_addr &&
232                  inp->inp_faddr.s_addr != ip->ip_src.s_addr)
233			continue;
234
235		if (inp_restricted(inp, ifp))
236			continue;
237
238		if (ifp != NULL && IFNET_IS_CELLULAR(ifp) &&
239		    (inp->inp_flags & INP_NO_IFT_CELLULAR))
240			continue;
241
242		if (last) {
243			struct mbuf *n = m_copy(m, 0, (int)M_COPYALL);
244
245			skipit = 0;
246#if IPSEC
247			/* check AH/ESP integrity. */
248			if (ipsec_bypass == 0 && n) {
249				if (ipsec4_in_reject_so(n, last->inp_socket)) {
250					m_freem(n);
251					IPSEC_STAT_INCREMENT(ipsecstat.in_polvio);
252					/* do not inject data to pcb */
253					skipit = 1;
254				}
255			}
256#endif /*IPSEC*/
257#if CONFIG_MACF_NET
258			if (n && skipit == 0) {
259				if (mac_inpcb_check_deliver(last, n, AF_INET,
260				    SOCK_RAW) != 0) {
261					m_freem(n);
262					skipit = 1;
263				}
264			}
265#endif
266			if (n && skipit == 0) {
267				int error = 0;
268				if ((last->inp_flags & INP_CONTROLOPTS) != 0 ||
269				    (last->inp_socket->so_options & SO_TIMESTAMP) != 0 ||
270				    (last->inp_socket->so_options & SO_TIMESTAMP_MONOTONIC) != 0) {
271					ret = ip_savecontrol(last, &opts, ip, n);
272					if (ret != 0) {
273						m_freem(n);
274						m_freem(opts);
275						last = inp;
276						continue;
277					}
278				}
279				if (last->inp_flags & INP_STRIPHDR) {
280					n->m_len -= iphlen;
281					n->m_pkthdr.len -= iphlen;
282					n->m_data += iphlen;
283				}
284				so_recv_data_stat(last->inp_socket, m, 0);
285				if (sbappendaddr(&last->inp_socket->so_rcv,
286				    (struct sockaddr *)&ripsrc, n,
287				    opts, &error) != 0) {
288					sorwakeup(last->inp_socket);
289				} else {
290					if (error) {
291						/* should notify about lost packet */
292						kprintf("rip_input can't append to socket\n");
293					}
294				}
295				opts = 0;
296			}
297		}
298		last = inp;
299	}
300
301	skipit = 0;
302#if IPSEC
303	/* check AH/ESP integrity. */
304	if (ipsec_bypass == 0 && last) {
305		if (ipsec4_in_reject_so(m, last->inp_socket)) {
306			m_freem(m);
307			IPSEC_STAT_INCREMENT(ipsecstat.in_polvio);
308			OSAddAtomic(1, &ipstat.ips_delivered);
309			/* do not inject data to pcb */
310			skipit = 1;
311		}
312	}
313#endif /*IPSEC*/
314#if CONFIG_MACF_NET
315	if (last && skipit == 0) {
316		if (mac_inpcb_check_deliver(last, m, AF_INET, SOCK_RAW) != 0) {
317			skipit = 1;
318			m_freem(m);
319		}
320	}
321#endif
322	if (skipit == 0) {
323		if (last) {
324			if ((last->inp_flags & INP_CONTROLOPTS) != 0 ||
325				(last->inp_socket->so_options & SO_TIMESTAMP) != 0 ||
326				(last->inp_socket->so_options & SO_TIMESTAMP_MONOTONIC) != 0) {
327				ret = ip_savecontrol(last, &opts, ip, m);
328				if (ret != 0) {
329					m_freem(m);
330					m_freem(opts);
331					goto unlock;
332				}
333			}
334			if (last->inp_flags & INP_STRIPHDR) {
335				m->m_len -= iphlen;
336				m->m_pkthdr.len -= iphlen;
337				m->m_data += iphlen;
338			}
339			so_recv_data_stat(last->inp_socket, m, 0);
340			if (sbappendaddr(&last->inp_socket->so_rcv,
341				(struct sockaddr *)&ripsrc, m, opts, NULL) != 0) {
342				sorwakeup(last->inp_socket);
343			} else {
344				kprintf("rip_input(2) can't append to socket\n");
345			}
346		} else {
347			m_freem(m);
348			OSAddAtomic(1, &ipstat.ips_noproto);
349			OSAddAtomic(-1, &ipstat.ips_delivered);
350		}
351	}
352unlock:
353	/*
354	 * Keep the list locked because socket filter may force the socket lock
355	 * to be released when calling sbappendaddr() -- see rdar://7627704
356	 */
357	lck_rw_done(ripcbinfo.ipi_lock);
358}
359
360/*
361 * Generate IP header and pass packet to ip_output.
362 * Tack on options user may have setup with control call.
363 */
364int
365rip_output(
366	struct mbuf *m,
367	struct socket *so,
368	u_int32_t dst,
369	struct mbuf *control)
370{
371	struct ip *ip;
372	struct inpcb *inp = sotoinpcb(so);
373	int flags = (so->so_options & SO_DONTROUTE) | IP_ALLOWBROADCAST;
374	struct ip_out_args ipoa =
375	    { IFSCOPE_NONE, { 0 }, IPOAF_SELECT_SRCIF, 0 };
376	struct ip_moptions *imo;
377	int error = 0;
378	mbuf_svc_class_t msc = MBUF_SC_UNSPEC;
379
380	if (control != NULL) {
381		msc = mbuf_service_class_from_control(control);
382
383		m_freem(control);
384		control = NULL;
385	}
386
387	if (inp == NULL || (inp->inp_flags2 & INP2_WANT_FLOW_DIVERT)) {
388		if (m != NULL)
389			m_freem(m);
390		VERIFY(control == NULL);
391		return (inp == NULL ? EINVAL : EPROTOTYPE);
392	}
393
394	flags |= IP_OUTARGS;
395	/* If socket was bound to an ifindex, tell ip_output about it */
396	if (inp->inp_flags & INP_BOUND_IF) {
397		ipoa.ipoa_boundif = inp->inp_boundifp->if_index;
398		ipoa.ipoa_flags |= IPOAF_BOUND_IF;
399	}
400	if (inp->inp_flags & INP_NO_IFT_CELLULAR)
401		ipoa.ipoa_flags |=  IPOAF_NO_CELLULAR;
402
403	if (inp->inp_flowhash == 0)
404		inp->inp_flowhash = inp_calc_flowhash(inp);
405
406	/*
407	 * If the user handed us a complete IP packet, use it.
408	 * Otherwise, allocate an mbuf for a header and fill it in.
409	 */
410	if ((inp->inp_flags & INP_HDRINCL) == 0) {
411		if (m->m_pkthdr.len + sizeof(struct ip) > IP_MAXPACKET) {
412			m_freem(m);
413			return(EMSGSIZE);
414		}
415		M_PREPEND(m, sizeof(struct ip), M_WAIT);
416		if (m == NULL)
417			return ENOBUFS;
418		ip = mtod(m, struct ip *);
419		ip->ip_tos = inp->inp_ip_tos;
420		ip->ip_off = 0;
421		ip->ip_p = inp->inp_ip_p;
422		ip->ip_len = m->m_pkthdr.len;
423		ip->ip_src = inp->inp_laddr;
424		ip->ip_dst.s_addr = dst;
425		ip->ip_ttl = inp->inp_ip_ttl;
426	} else {
427		if (m->m_pkthdr.len > IP_MAXPACKET) {
428			m_freem(m);
429			return(EMSGSIZE);
430		}
431		ip = mtod(m, struct ip *);
432		/* don't allow both user specified and setsockopt options,
433		   and don't allow packet length sizes that will crash */
434		if (((IP_VHL_HL(ip->ip_vhl) != (sizeof (*ip) >> 2))
435		     && inp->inp_options)
436		    || (ip->ip_len > m->m_pkthdr.len)
437		    || (ip->ip_len < (IP_VHL_HL(ip->ip_vhl) << 2))) {
438			m_freem(m);
439			return EINVAL;
440		}
441		if (ip->ip_id == 0)
442			ip->ip_id = ip_randomid();
443		/* XXX prevent ip_output from overwriting header fields */
444		flags |= IP_RAWOUTPUT;
445		OSAddAtomic(1, &ipstat.ips_rawout);
446	}
447
448	if (inp->inp_laddr.s_addr != INADDR_ANY)
449		ipoa.ipoa_flags |= IPOAF_BOUND_SRCADDR;
450
451#if IPSEC
452	if (ipsec_bypass == 0 && ipsec_setsocket(m, so) != 0) {
453		m_freem(m);
454		return ENOBUFS;
455	}
456#endif /*IPSEC*/
457
458	if (ROUTE_UNUSABLE(&inp->inp_route))
459		ROUTE_RELEASE(&inp->inp_route);
460
461	set_packet_service_class(m, so, msc, 0);
462	m->m_pkthdr.pkt_flowsrc = FLOWSRC_INPCB;
463	m->m_pkthdr.pkt_flowid = inp->inp_flowhash;
464	m->m_pkthdr.pkt_flags |= (PKTF_FLOW_ID | PKTF_FLOW_LOCALSRC |
465	    PKTF_FLOW_RAWSOCK);
466	m->m_pkthdr.pkt_proto = inp->inp_ip_p;
467
468#if CONFIG_MACF_NET
469	mac_mbuf_label_associate_inpcb(inp, m);
470#endif
471
472	imo = inp->inp_moptions;
473	if (imo != NULL)
474		IMO_ADDREF(imo);
475	/*
476	 * The domain lock is held across ip_output, so it is okay
477	 * to pass the PCB cached route pointer directly to IP and
478	 * the modules beneath it.
479	 */
480	error = ip_output(m, inp->inp_options, &inp->inp_route, flags,
481	    imo, &ipoa);
482
483	if (imo != NULL)
484		IMO_REMREF(imo);
485
486	if (inp->inp_route.ro_rt != NULL) {
487		struct rtentry *rt = inp->inp_route.ro_rt;
488		struct ifnet *outif;
489
490		if ((rt->rt_flags & (RTF_MULTICAST|RTF_BROADCAST)) ||
491		    inp->inp_socket == NULL ||
492		    !(inp->inp_socket->so_state & SS_ISCONNECTED)) {
493			rt = NULL;	/* unusable */
494		}
495		/*
496		 * Always discard the cached route for unconnected
497		 * socket or if it is a multicast route.
498		 */
499		if (rt == NULL)
500			ROUTE_RELEASE(&inp->inp_route);
501
502		/*
503		 * If this is a connected socket and the destination
504		 * route is unicast, update outif with that of the
505		 * route interface used by IP.
506		 */
507		if (rt != NULL && (outif = rt->rt_ifp) != inp->inp_last_outifp)
508			inp->inp_last_outifp = outif;
509	} else {
510		ROUTE_RELEASE(&inp->inp_route);
511	}
512
513	/*
514	 * If output interface was cellular, and this socket is denied
515	 * access to it, generate an event.
516	 */
517	if (error != 0 && (ipoa.ipoa_retflags & IPOARF_IFDENIED) &&
518	    (inp->inp_flags & INP_NO_IFT_CELLULAR))
519		soevent(so, (SO_FILT_HINT_LOCKED|SO_FILT_HINT_IFDENIED));
520
521	return (error);
522}
523
524#if IPFIREWALL
525int
526load_ipfw(void)
527{
528	kern_return_t	err;
529
530	ipfw_init();
531
532#if DUMMYNET
533	if (!DUMMYNET_LOADED)
534		ip_dn_init();
535#endif /* DUMMYNET */
536	err = 0;
537
538	return err == 0 && ip_fw_ctl_ptr == NULL ? -1 : err;
539}
540#endif /* IPFIREWALL */
541
542/*
543 * Raw IP socket option processing.
544 */
545int
546rip_ctloutput(so, sopt)
547	struct socket *so;
548	struct sockopt *sopt;
549{
550	struct	inpcb *inp = sotoinpcb(so);
551	int	error, optval;
552
553	/* Allow <SOL_SOCKET,SO_FLUSH> at this level */
554	if (sopt->sopt_level != IPPROTO_IP &&
555	    !(sopt->sopt_level == SOL_SOCKET && sopt->sopt_name == SO_FLUSH))
556		return (EINVAL);
557
558	error = 0;
559
560	switch (sopt->sopt_dir) {
561	case SOPT_GET:
562		switch (sopt->sopt_name) {
563		case IP_HDRINCL:
564			optval = inp->inp_flags & INP_HDRINCL;
565			error = sooptcopyout(sopt, &optval, sizeof optval);
566			break;
567
568		case IP_STRIPHDR:
569			optval = inp->inp_flags & INP_STRIPHDR;
570			error = sooptcopyout(sopt, &optval, sizeof optval);
571			break;
572
573#if IPFIREWALL
574		case IP_FW_ADD:
575		case IP_FW_GET:
576		case IP_OLD_FW_ADD:
577		case IP_OLD_FW_GET:
578			if (ip_fw_ctl_ptr == 0)
579				error = load_ipfw();
580			if (ip_fw_ctl_ptr && error == 0)
581				error = ip_fw_ctl_ptr(sopt);
582			else
583				error = ENOPROTOOPT;
584			break;
585#endif /* IPFIREWALL */
586
587#if DUMMYNET
588		case IP_DUMMYNET_GET:
589			if (!DUMMYNET_LOADED)
590				ip_dn_init();
591			if (DUMMYNET_LOADED)
592				error = ip_dn_ctl_ptr(sopt);
593			else
594				error = ENOPROTOOPT;
595			break ;
596#endif /* DUMMYNET */
597
598#if MROUTING
599		case MRT_INIT:
600		case MRT_DONE:
601		case MRT_ADD_VIF:
602		case MRT_DEL_VIF:
603		case MRT_ADD_MFC:
604		case MRT_DEL_MFC:
605		case MRT_VERSION:
606		case MRT_ASSERT:
607			error = ip_mrouter_get(so, sopt);
608			break;
609#endif /* MROUTING */
610
611		default:
612			error = ip_ctloutput(so, sopt);
613			break;
614		}
615		break;
616
617	case SOPT_SET:
618		switch (sopt->sopt_name) {
619		case IP_HDRINCL:
620			error = sooptcopyin(sopt, &optval, sizeof optval,
621					    sizeof optval);
622			if (error)
623				break;
624			if (optval)
625				inp->inp_flags |= INP_HDRINCL;
626			else
627				inp->inp_flags &= ~INP_HDRINCL;
628			break;
629
630		case IP_STRIPHDR:
631			error = sooptcopyin(sopt, &optval, sizeof optval,
632			    sizeof optval);
633			if (error)
634				break;
635			if (optval)
636				inp->inp_flags |= INP_STRIPHDR;
637			else
638				inp->inp_flags &= ~INP_STRIPHDR;
639			break;
640
641#if IPFIREWALL
642		case IP_FW_ADD:
643		case IP_FW_DEL:
644		case IP_FW_FLUSH:
645		case IP_FW_ZERO:
646		case IP_FW_RESETLOG:
647		case IP_OLD_FW_ADD:
648		case IP_OLD_FW_DEL:
649		case IP_OLD_FW_FLUSH:
650		case IP_OLD_FW_ZERO:
651		case IP_OLD_FW_RESETLOG:
652			if (ip_fw_ctl_ptr == 0)
653				error = load_ipfw();
654			if (ip_fw_ctl_ptr && error == 0)
655				error = ip_fw_ctl_ptr(sopt);
656			else
657				error = ENOPROTOOPT;
658			break;
659#endif /* IPFIREWALL */
660
661#if DUMMYNET
662		case IP_DUMMYNET_CONFIGURE:
663		case IP_DUMMYNET_DEL:
664		case IP_DUMMYNET_FLUSH:
665			if (!DUMMYNET_LOADED)
666				ip_dn_init();
667			if (DUMMYNET_LOADED)
668				error = ip_dn_ctl_ptr(sopt);
669			else
670				error = ENOPROTOOPT ;
671			break ;
672#endif
673
674#if MROUTING
675		case IP_RSVP_ON:
676			error = ip_rsvp_init(so);
677			break;
678
679		case IP_RSVP_OFF:
680			error = ip_rsvp_done();
681			break;
682
683			/* XXX - should be combined */
684		case IP_RSVP_VIF_ON:
685			error = ip_rsvp_vif_init(so, sopt);
686			break;
687
688		case IP_RSVP_VIF_OFF:
689			error = ip_rsvp_vif_done(so, sopt);
690			break;
691
692		case MRT_INIT:
693		case MRT_DONE:
694		case MRT_ADD_VIF:
695		case MRT_DEL_VIF:
696		case MRT_ADD_MFC:
697		case MRT_DEL_MFC:
698		case MRT_VERSION:
699		case MRT_ASSERT:
700			error = ip_mrouter_set(so, sopt);
701			break;
702#endif /* MROUTING */
703
704		case SO_FLUSH:
705			if ((error = sooptcopyin(sopt, &optval, sizeof (optval),
706			    sizeof (optval))) != 0)
707				break;
708
709			error = inp_flush(inp, optval);
710			break;
711
712		default:
713			error = ip_ctloutput(so, sopt);
714			break;
715		}
716		break;
717	}
718
719	return (error);
720}
721
722/*
723 * This function exists solely to receive the PRC_IFDOWN messages which
724 * are sent by if_down().  It looks for an ifaddr whose ifa_addr is sa,
725 * and calls in_ifadown() to remove all routes corresponding to that address.
726 * It also receives the PRC_IFUP messages from if_up() and reinstalls the
727 * interface routes.
728 */
729void
730rip_ctlinput(
731	int cmd,
732	struct sockaddr *sa,
733	__unused void *vip)
734{
735	struct in_ifaddr *ia;
736	struct ifnet *ifp;
737	int err;
738	int flags, done = 0;
739
740	switch (cmd) {
741	case PRC_IFDOWN:
742		lck_rw_lock_shared(in_ifaddr_rwlock);
743		for (ia = in_ifaddrhead.tqh_first; ia;
744		     ia = ia->ia_link.tqe_next) {
745			IFA_LOCK(&ia->ia_ifa);
746			if (ia->ia_ifa.ifa_addr == sa &&
747			    (ia->ia_flags & IFA_ROUTE)) {
748				done = 1;
749				IFA_ADDREF_LOCKED(&ia->ia_ifa);
750				IFA_UNLOCK(&ia->ia_ifa);
751				lck_rw_done(in_ifaddr_rwlock);
752				lck_mtx_lock(rnh_lock);
753				/*
754				 * in_ifscrub kills the interface route.
755				 */
756				in_ifscrub(ia->ia_ifp, ia, 1);
757				/*
758				 * in_ifadown gets rid of all the rest of
759				 * the routes.  This is not quite the right
760				 * thing to do, but at least if we are running
761				 * a routing process they will come back.
762				 */
763				in_ifadown(&ia->ia_ifa, 1);
764				lck_mtx_unlock(rnh_lock);
765				IFA_REMREF(&ia->ia_ifa);
766				break;
767			}
768			IFA_UNLOCK(&ia->ia_ifa);
769		}
770		if (!done)
771			lck_rw_done(in_ifaddr_rwlock);
772		break;
773
774	case PRC_IFUP:
775		lck_rw_lock_shared(in_ifaddr_rwlock);
776		for (ia = in_ifaddrhead.tqh_first; ia;
777		     ia = ia->ia_link.tqe_next) {
778			IFA_LOCK(&ia->ia_ifa);
779			if (ia->ia_ifa.ifa_addr == sa) {
780				/* keep it locked */
781				break;
782			}
783			IFA_UNLOCK(&ia->ia_ifa);
784		}
785		if (ia == NULL || (ia->ia_flags & IFA_ROUTE) ||
786		    (ia->ia_ifa.ifa_debug & IFD_NOTREADY)) {
787			if (ia != NULL)
788				IFA_UNLOCK(&ia->ia_ifa);
789			lck_rw_done(in_ifaddr_rwlock);
790			return;
791		}
792		IFA_ADDREF_LOCKED(&ia->ia_ifa);
793		IFA_UNLOCK(&ia->ia_ifa);
794		lck_rw_done(in_ifaddr_rwlock);
795
796		flags = RTF_UP;
797		ifp = ia->ia_ifa.ifa_ifp;
798
799		if ((ifp->if_flags & IFF_LOOPBACK)
800		    || (ifp->if_flags & IFF_POINTOPOINT))
801			flags |= RTF_HOST;
802
803		err = rtinit(&ia->ia_ifa, RTM_ADD, flags);
804		if (err == 0) {
805			IFA_LOCK_SPIN(&ia->ia_ifa);
806			ia->ia_flags |= IFA_ROUTE;
807			IFA_UNLOCK(&ia->ia_ifa);
808		}
809		IFA_REMREF(&ia->ia_ifa);
810		break;
811	}
812}
813
814u_int32_t	rip_sendspace = RIPSNDQ;
815u_int32_t	rip_recvspace = RIPRCVQ;
816
817SYSCTL_INT(_net_inet_raw, OID_AUTO, maxdgram, CTLFLAG_RW | CTLFLAG_LOCKED,
818    &rip_sendspace, 0, "Maximum outgoing raw IP datagram size");
819SYSCTL_INT(_net_inet_raw, OID_AUTO, recvspace, CTLFLAG_RW | CTLFLAG_LOCKED,
820    &rip_recvspace, 0, "Maximum incoming raw IP datagram size");
821SYSCTL_UINT(_net_inet_raw, OID_AUTO, pcbcount, CTLFLAG_RD | CTLFLAG_LOCKED,
822    &ripcbinfo.ipi_count, 0, "Number of active PCBs");
823
824static int
825rip_attach(struct socket *so, int proto, struct proc *p)
826{
827	struct inpcb *inp;
828	int error;
829
830	inp = sotoinpcb(so);
831	if (inp)
832		panic("rip_attach");
833	if ((so->so_state & SS_PRIV) == 0)
834		return (EPERM);
835
836	error = soreserve(so, rip_sendspace, rip_recvspace);
837	if (error)
838		return error;
839	error = in_pcballoc(so, &ripcbinfo, p);
840	if (error)
841		return error;
842	inp = (struct inpcb *)so->so_pcb;
843	inp->inp_vflag |= INP_IPV4;
844	inp->inp_ip_p = proto;
845	inp->inp_ip_ttl = ip_defttl;
846	return 0;
847}
848
849__private_extern__ int
850rip_detach(struct socket *so)
851{
852	struct inpcb *inp;
853
854	inp = sotoinpcb(so);
855	if (inp == 0)
856		panic("rip_detach");
857#if MROUTING
858	if (so == ip_mrouter)
859		ip_mrouter_done();
860	ip_rsvp_force_done(so);
861	if (so == ip_rsvpd)
862		ip_rsvp_done();
863#endif /* MROUTING */
864	in_pcbdetach(inp);
865	return 0;
866}
867
868__private_extern__ int
869rip_abort(struct socket *so)
870{
871	soisdisconnected(so);
872	return rip_detach(so);
873}
874
875__private_extern__ int
876rip_disconnect(struct socket *so)
877{
878	if ((so->so_state & SS_ISCONNECTED) == 0)
879		return ENOTCONN;
880	return rip_abort(so);
881}
882
883__private_extern__ int
884rip_bind(struct socket *so, struct sockaddr *nam, struct proc *p)
885{
886#pragma unused(p)
887	struct inpcb *inp = sotoinpcb(so);
888	struct sockaddr_in sin;
889	struct ifaddr *ifa = NULL;
890	struct ifnet *outif = NULL;
891
892	if (inp == NULL || (inp->inp_flags2 & INP2_WANT_FLOW_DIVERT))
893		return (inp == NULL ? EINVAL : EPROTOTYPE);
894
895	if (nam->sa_len != sizeof (struct sockaddr_in))
896		return (EINVAL);
897
898	/* Sanitized local copy for interface address searches */
899	bzero(&sin, sizeof (sin));
900	sin.sin_family = AF_INET;
901	sin.sin_len = sizeof (struct sockaddr_in);
902	sin.sin_addr.s_addr = SIN(nam)->sin_addr.s_addr;
903
904	if (TAILQ_EMPTY(&ifnet_head) ||
905	    (sin.sin_family != AF_INET && sin.sin_family != AF_IMPLINK) ||
906	    (sin.sin_addr.s_addr && (ifa = ifa_ifwithaddr(SA(&sin))) == 0)) {
907		return (EADDRNOTAVAIL);
908	} else if (ifa) {
909		/*
910		 * Opportunistically determine the outbound
911		 * interface that may be used; this may not
912		 * hold true if we end up using a route
913		 * going over a different interface, e.g.
914		 * when sending to a local address.  This
915		 * will get updated again after sending.
916		 */
917		IFA_LOCK(ifa);
918		outif = ifa->ifa_ifp;
919		IFA_UNLOCK(ifa);
920		IFA_REMREF(ifa);
921	}
922	inp->inp_laddr = sin.sin_addr;
923	inp->inp_last_outifp = outif;
924	return (0);
925}
926
927__private_extern__ int
928rip_connect(struct socket *so, struct sockaddr *nam, __unused  struct proc *p)
929{
930	struct inpcb *inp = sotoinpcb(so);
931	struct sockaddr_in *addr = (struct sockaddr_in *)(void *)nam;
932
933	if (inp == NULL || (inp->inp_flags2 & INP2_WANT_FLOW_DIVERT))
934		return (inp == NULL ? EINVAL : EPROTOTYPE);
935	if (nam->sa_len != sizeof(*addr))
936		return EINVAL;
937	if (TAILQ_EMPTY(&ifnet_head))
938		return EADDRNOTAVAIL;
939	if ((addr->sin_family != AF_INET) &&
940	    (addr->sin_family != AF_IMPLINK))
941		return EAFNOSUPPORT;
942	inp->inp_faddr = addr->sin_addr;
943	soisconnected(so);
944
945	return 0;
946}
947
948__private_extern__ int
949rip_shutdown(struct socket *so)
950{
951	socantsendmore(so);
952	return 0;
953}
954
955__private_extern__ int
956rip_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
957    struct mbuf *control, struct proc *p)
958{
959#pragma unused(flags, p)
960	struct inpcb *inp = sotoinpcb(so);
961	u_int32_t dst;
962	int error = 0;
963
964	if (inp == NULL || (inp->inp_flags2 & INP2_WANT_FLOW_DIVERT)) {
965		error = (inp == NULL ? EINVAL : EPROTOTYPE);
966		goto bad;
967	}
968
969	if (so->so_state & SS_ISCONNECTED) {
970		if (nam != NULL) {
971			error = EISCONN;
972			goto bad;
973		}
974		dst = inp->inp_faddr.s_addr;
975	} else {
976		if (nam == NULL) {
977			error = ENOTCONN;
978			goto bad;
979		}
980		dst = ((struct sockaddr_in *)(void *)nam)->sin_addr.s_addr;
981	}
982	return (rip_output(m, so, dst, control));
983
984bad:
985	VERIFY(error != 0);
986
987	if (m != NULL)
988		m_freem(m);
989	if (control != NULL)
990		m_freem(control);
991
992	return (error);
993}
994
995/* note: rip_unlock is called from different protos  instead of the generic socket_unlock,
996 * it will handle the socket dealloc on last reference
997 * */
998int
999rip_unlock(struct socket *so, int refcount, void *debug)
1000{
1001	void *lr_saved;
1002	struct inpcb *inp = sotoinpcb(so);
1003
1004	if (debug == NULL)
1005		lr_saved = __builtin_return_address(0);
1006	else
1007		lr_saved = debug;
1008
1009	if (refcount) {
1010		if (so->so_usecount <= 0) {
1011			panic("rip_unlock: bad refoucnt so=%p val=%x lrh= %s\n",
1012			    so, so->so_usecount, solockhistory_nr(so));
1013			/* NOTREACHED */
1014		}
1015		so->so_usecount--;
1016		if (so->so_usecount == 0 && (inp->inp_wantcnt == WNT_STOPUSING)) {
1017			/* cleanup after last reference */
1018			lck_mtx_unlock(so->so_proto->pr_domain->dom_mtx);
1019			lck_rw_lock_exclusive(ripcbinfo.ipi_lock);
1020			if (inp->inp_state != INPCB_STATE_DEAD) {
1021#if INET6
1022				if (SOCK_CHECK_DOM(so, PF_INET6))
1023					in6_pcbdetach(inp);
1024				else
1025#endif /* INET6 */
1026				in_pcbdetach(inp);
1027			}
1028			in_pcbdispose(inp);
1029			lck_rw_done(ripcbinfo.ipi_lock);
1030			return(0);
1031		}
1032	}
1033	so->unlock_lr[so->next_unlock_lr] = lr_saved;
1034	so->next_unlock_lr = (so->next_unlock_lr+1) % SO_LCKDBG_MAX;
1035	lck_mtx_unlock(so->so_proto->pr_domain->dom_mtx);
1036	return(0);
1037}
1038
1039static int
1040rip_pcblist SYSCTL_HANDLER_ARGS
1041{
1042#pragma unused(oidp, arg1, arg2)
1043	int error, i, n;
1044	struct inpcb *inp, **inp_list;
1045	inp_gen_t gencnt;
1046	struct xinpgen xig;
1047
1048	/*
1049	 * The process of preparing the TCB list is too time-consuming and
1050	 * resource-intensive to repeat twice on every request.
1051	 */
1052	lck_rw_lock_exclusive(ripcbinfo.ipi_lock);
1053	if (req->oldptr == USER_ADDR_NULL) {
1054		n = ripcbinfo.ipi_count;
1055		req->oldidx = 2 * (sizeof xig)
1056			+ (n + n/8) * sizeof(struct xinpcb);
1057		lck_rw_done(ripcbinfo.ipi_lock);
1058		return 0;
1059	}
1060
1061	if (req->newptr != USER_ADDR_NULL) {
1062		lck_rw_done(ripcbinfo.ipi_lock);
1063		return EPERM;
1064	}
1065
1066	/*
1067	 * OK, now we're committed to doing something.
1068	 */
1069	gencnt = ripcbinfo.ipi_gencnt;
1070	n = ripcbinfo.ipi_count;
1071
1072	bzero(&xig, sizeof(xig));
1073	xig.xig_len = sizeof xig;
1074	xig.xig_count = n;
1075	xig.xig_gen = gencnt;
1076	xig.xig_sogen = so_gencnt;
1077	error = SYSCTL_OUT(req, &xig, sizeof xig);
1078	if (error) {
1079		lck_rw_done(ripcbinfo.ipi_lock);
1080		return error;
1081	}
1082    /*
1083     * We are done if there is no pcb
1084     */
1085    if (n == 0) {
1086	lck_rw_done(ripcbinfo.ipi_lock);
1087        return 0;
1088    }
1089
1090	inp_list = _MALLOC(n * sizeof *inp_list, M_TEMP, M_WAITOK);
1091	if (inp_list == 0) {
1092		lck_rw_done(ripcbinfo.ipi_lock);
1093		return ENOMEM;
1094	}
1095
1096	for (inp = ripcbinfo.ipi_listhead->lh_first, i = 0; inp && i < n;
1097	     inp = inp->inp_list.le_next) {
1098		if (inp->inp_gencnt <= gencnt && inp->inp_state != INPCB_STATE_DEAD)
1099			inp_list[i++] = inp;
1100	}
1101	n = i;
1102
1103	error = 0;
1104	for (i = 0; i < n; i++) {
1105		inp = inp_list[i];
1106		if (inp->inp_gencnt <= gencnt && inp->inp_state != INPCB_STATE_DEAD) {
1107			struct xinpcb xi;
1108
1109			bzero(&xi, sizeof(xi));
1110			xi.xi_len = sizeof xi;
1111			/* XXX should avoid extra copy */
1112			inpcb_to_compat(inp, &xi.xi_inp);
1113			if (inp->inp_socket)
1114				sotoxsocket(inp->inp_socket, &xi.xi_socket);
1115			error = SYSCTL_OUT(req, &xi, sizeof xi);
1116		}
1117	}
1118	if (!error) {
1119		/*
1120		 * Give the user an updated idea of our state.
1121		 * If the generation differs from what we told
1122		 * her before, she knows that something happened
1123		 * while we were processing this request, and it
1124		 * might be necessary to retry.
1125		 */
1126		bzero(&xig, sizeof(xig));
1127		xig.xig_len = sizeof xig;
1128		xig.xig_gen = ripcbinfo.ipi_gencnt;
1129		xig.xig_sogen = so_gencnt;
1130		xig.xig_count = ripcbinfo.ipi_count;
1131		error = SYSCTL_OUT(req, &xig, sizeof xig);
1132	}
1133	FREE(inp_list, M_TEMP);
1134	lck_rw_done(ripcbinfo.ipi_lock);
1135	return error;
1136}
1137
1138SYSCTL_PROC(_net_inet_raw, OID_AUTO/*XXX*/, pcblist, CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0,
1139	    rip_pcblist, "S,xinpcb", "List of active raw IP sockets");
1140
1141
1142static int
1143rip_pcblist64 SYSCTL_HANDLER_ARGS
1144{
1145#pragma unused(oidp, arg1, arg2)
1146        int error, i, n;
1147        struct inpcb *inp, **inp_list;
1148        inp_gen_t gencnt;
1149        struct xinpgen xig;
1150
1151        /*
1152         * The process of preparing the TCB list is too time-consuming and
1153         * resource-intensive to repeat twice on every request.
1154         */
1155        lck_rw_lock_exclusive(ripcbinfo.ipi_lock);
1156        if (req->oldptr == USER_ADDR_NULL) {
1157                n = ripcbinfo.ipi_count;
1158                req->oldidx = 2 * (sizeof xig)
1159                        + (n + n/8) * sizeof(struct xinpcb64);
1160                lck_rw_done(ripcbinfo.ipi_lock);
1161                return 0;
1162        }
1163
1164        if (req->newptr != USER_ADDR_NULL) {
1165                lck_rw_done(ripcbinfo.ipi_lock);
1166                return EPERM;
1167        }
1168
1169        /*
1170         * OK, now we're committed to doing something.
1171         */
1172        gencnt = ripcbinfo.ipi_gencnt;
1173        n = ripcbinfo.ipi_count;
1174
1175        bzero(&xig, sizeof(xig));
1176        xig.xig_len = sizeof xig;
1177        xig.xig_count = n;
1178        xig.xig_gen = gencnt;
1179        xig.xig_sogen = so_gencnt;
1180        error = SYSCTL_OUT(req, &xig, sizeof xig);
1181        if (error) {
1182                lck_rw_done(ripcbinfo.ipi_lock);
1183                return error;
1184        }
1185    /*
1186     * We are done if there is no pcb
1187     */
1188    if (n == 0) {
1189        lck_rw_done(ripcbinfo.ipi_lock);
1190        return 0;
1191    }
1192
1193        inp_list = _MALLOC(n * sizeof *inp_list, M_TEMP, M_WAITOK);
1194        if (inp_list == 0) {
1195                lck_rw_done(ripcbinfo.ipi_lock);
1196                return ENOMEM;
1197        }
1198
1199        for (inp = ripcbinfo.ipi_listhead->lh_first, i = 0; inp && i < n;
1200             inp = inp->inp_list.le_next) {
1201                if (inp->inp_gencnt <= gencnt && inp->inp_state != INPCB_STATE_DEAD)
1202                        inp_list[i++] = inp;
1203        }
1204        n = i;
1205
1206        error = 0;
1207        for (i = 0; i < n; i++) {
1208                inp = inp_list[i];
1209                if (inp->inp_gencnt <= gencnt && inp->inp_state != INPCB_STATE_DEAD) {
1210                        struct xinpcb64 xi;
1211
1212                        bzero(&xi, sizeof(xi));
1213                        xi.xi_len = sizeof xi;
1214                        inpcb_to_xinpcb64(inp, &xi);
1215                        if (inp->inp_socket)
1216                                sotoxsocket64(inp->inp_socket, &xi.xi_socket);
1217                        error = SYSCTL_OUT(req, &xi, sizeof xi);
1218                }
1219        }
1220        if (!error) {
1221                /*
1222                 * Give the user an updated idea of our state.
1223                 * If the generation differs from what we told
1224                 * her before, she knows that something happened
1225                 * while we were processing this request, and it
1226                 * might be necessary to retry.
1227                 */
1228                bzero(&xig, sizeof(xig));
1229                xig.xig_len = sizeof xig;
1230                xig.xig_gen = ripcbinfo.ipi_gencnt;
1231                xig.xig_sogen = so_gencnt;
1232                xig.xig_count = ripcbinfo.ipi_count;
1233                error = SYSCTL_OUT(req, &xig, sizeof xig);
1234        }
1235        FREE(inp_list, M_TEMP);
1236        lck_rw_done(ripcbinfo.ipi_lock);
1237        return error;
1238}
1239
1240SYSCTL_PROC(_net_inet_raw, OID_AUTO, pcblist64, CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0,
1241            rip_pcblist64, "S,xinpcb64", "List of active raw IP sockets");
1242
1243
1244
1245static int
1246rip_pcblist_n SYSCTL_HANDLER_ARGS
1247{
1248#pragma unused(oidp, arg1, arg2)
1249	int error = 0;
1250
1251	error = get_pcblist_n(IPPROTO_IP, req, &ripcbinfo);
1252
1253	return error;
1254}
1255
1256SYSCTL_PROC(_net_inet_raw, OID_AUTO, pcblist_n, CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0,
1257            rip_pcblist_n, "S,xinpcb_n", "List of active raw IP sockets");
1258
1259struct pr_usrreqs rip_usrreqs = {
1260	.pru_abort =		rip_abort,
1261	.pru_attach =		rip_attach,
1262	.pru_bind =		rip_bind,
1263	.pru_connect =		rip_connect,
1264	.pru_control =		in_control,
1265	.pru_detach =		rip_detach,
1266	.pru_disconnect =	rip_disconnect,
1267	.pru_peeraddr =		in_getpeeraddr,
1268	.pru_send =		rip_send,
1269	.pru_shutdown =		rip_shutdown,
1270	.pru_sockaddr =		in_getsockaddr,
1271	.pru_sosend =		sosend,
1272	.pru_soreceive =	soreceive,
1273};
1274/* DSEP Review Done pl-20051213-v02 @3253 */
1275