raw_ip.c revision 128019
1/*
2 * Copyright (c) 1982, 1986, 1988, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 4. Neither the name of the University nor the names of its contributors
14 *    may be used to endorse or promote products derived from this software
15 *    without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 *	@(#)raw_ip.c	8.7 (Berkeley) 5/15/95
30 * $FreeBSD: head/sys/netinet/raw_ip.c 128019 2004-04-07 20:46:16Z imp $
31 */
32
33#include "opt_inet6.h"
34#include "opt_ipsec.h"
35#include "opt_mac.h"
36#include "opt_random_ip_id.h"
37
38#include <sys/param.h>
39#include <sys/kernel.h>
40#include <sys/lock.h>
41#include <sys/mac.h>
42#include <sys/malloc.h>
43#include <sys/mbuf.h>
44#include <sys/proc.h>
45#include <sys/protosw.h>
46#include <sys/signalvar.h>
47#include <sys/socket.h>
48#include <sys/socketvar.h>
49#include <sys/sx.h>
50#include <sys/sysctl.h>
51#include <sys/systm.h>
52
53#include <vm/uma.h>
54
55#include <net/if.h>
56#include <net/route.h>
57
58#include <netinet/in.h>
59#include <netinet/in_systm.h>
60#include <netinet/in_pcb.h>
61#include <netinet/in_var.h>
62#include <netinet/ip.h>
63#include <netinet/ip_var.h>
64#include <netinet/ip_mroute.h>
65
66#include <netinet/ip_fw.h>
67#include <netinet/ip_dummynet.h>
68
69#ifdef FAST_IPSEC
70#include <netipsec/ipsec.h>
71#endif /*FAST_IPSEC*/
72
73#ifdef IPSEC
74#include <netinet6/ipsec.h>
75#endif /*IPSEC*/
76
77struct	inpcbhead ripcb;
78struct	inpcbinfo ripcbinfo;
79
80/* control hooks for ipfw and dummynet */
81ip_fw_ctl_t *ip_fw_ctl_ptr;
82ip_dn_ctl_t *ip_dn_ctl_ptr;
83
84/*
85 * hooks for multicast routing. They all default to NULL,
86 * so leave them not initialized and rely on BSS being set to 0.
87 */
88
89/* The socket used to communicate with the multicast routing daemon.  */
90struct socket  *ip_mrouter;
91
92/* The various mrouter and rsvp functions */
93int (*ip_mrouter_set)(struct socket *, struct sockopt *);
94int (*ip_mrouter_get)(struct socket *, struct sockopt *);
95int (*ip_mrouter_done)(void);
96int (*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *,
97                   struct ip_moptions *);
98int (*mrt_ioctl)(int, caddr_t);
99int (*legal_vif_num)(int);
100u_long (*ip_mcast_src)(int);
101
102void (*rsvp_input_p)(struct mbuf *m, int off);
103int (*ip_rsvp_vif)(struct socket *, struct sockopt *);
104void (*ip_rsvp_force_done)(struct socket *);
105
106/*
107 * Nominal space allocated to a raw ip socket.
108 */
109#define	RIPSNDQ		8192
110#define	RIPRCVQ		8192
111
112/*
113 * Raw interface to IP protocol.
114 */
115
116/*
117 * Initialize raw connection block q.
118 */
119void
120rip_init()
121{
122	INP_INFO_LOCK_INIT(&ripcbinfo, "rip");
123	LIST_INIT(&ripcb);
124	ripcbinfo.listhead = &ripcb;
125	/*
126	 * XXX We don't use the hash list for raw IP, but it's easier
127	 * to allocate a one entry hash list than it is to check all
128	 * over the place for hashbase == NULL.
129	 */
130	ripcbinfo.hashbase = hashinit(1, M_PCB, &ripcbinfo.hashmask);
131	ripcbinfo.porthashbase = hashinit(1, M_PCB, &ripcbinfo.porthashmask);
132	ripcbinfo.ipi_zone = uma_zcreate("ripcb", sizeof(struct inpcb),
133	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
134	uma_zone_set_max(ripcbinfo.ipi_zone, maxsockets);
135}
136
137static struct	sockaddr_in ripsrc = { sizeof(ripsrc), AF_INET };
138
139static int
140raw_append(struct inpcb *last, struct ip *ip, struct mbuf *n)
141{
142	int policyfail = 0;
143
144#if defined(IPSEC) || defined(FAST_IPSEC)
145	/* check AH/ESP integrity. */
146	if (ipsec4_in_reject(n, last)) {
147		policyfail = 1;
148#ifdef IPSEC
149		ipsecstat.in_polvio++;
150#endif /*IPSEC*/
151		/* do not inject data to pcb */
152	}
153#endif /*IPSEC || FAST_IPSEC*/
154#ifdef MAC
155	if (!policyfail && mac_check_inpcb_deliver(last, n) != 0)
156		policyfail = 1;
157#endif
158	if (!policyfail) {
159		struct mbuf *opts = NULL;
160
161		if ((last->inp_flags & INP_CONTROLOPTS) ||
162		    (last->inp_socket->so_options & SO_TIMESTAMP))
163			ip_savecontrol(last, &opts, ip, n);
164		if (sbappendaddr(&last->inp_socket->so_rcv,
165		    (struct sockaddr *)&ripsrc, n, opts) == 0) {
166			/* should notify about lost packet */
167			m_freem(n);
168			if (opts)
169				m_freem(opts);
170		} else
171			sorwakeup(last->inp_socket);
172	} else
173		m_freem(n);
174	return policyfail;
175}
176
177/*
178 * Setup generic address and protocol structures
179 * for raw_input routine, then pass them along with
180 * mbuf chain.
181 */
182void
183rip_input(struct mbuf *m, int off)
184{
185	struct ip *ip = mtod(m, struct ip *);
186	int proto = ip->ip_p;
187	struct inpcb *inp, *last;
188
189	INP_INFO_RLOCK(&ripcbinfo);
190	ripsrc.sin_addr = ip->ip_src;
191	last = NULL;
192	LIST_FOREACH(inp, &ripcb, inp_list) {
193		INP_LOCK(inp);
194		if (inp->inp_ip_p && inp->inp_ip_p != proto) {
195	docontinue:
196			INP_UNLOCK(inp);
197			continue;
198		}
199#ifdef INET6
200		if ((inp->inp_vflag & INP_IPV4) == 0)
201			goto docontinue;
202#endif
203		if (inp->inp_laddr.s_addr &&
204                    inp->inp_laddr.s_addr != ip->ip_dst.s_addr)
205			goto docontinue;
206		if (inp->inp_faddr.s_addr &&
207                    inp->inp_faddr.s_addr != ip->ip_src.s_addr)
208			goto docontinue;
209		if (last) {
210			struct mbuf *n;
211
212			n = m_copy(m, 0, (int)M_COPYALL);
213			if (n != NULL)
214				(void) raw_append(last, ip, n);
215			/* XXX count dropped packet */
216			INP_UNLOCK(last);
217		}
218		last = inp;
219	}
220	if (last != NULL) {
221		if (raw_append(last, ip, m) != 0)
222			ipstat.ips_delivered--;
223		INP_UNLOCK(last);
224	} else {
225		m_freem(m);
226		ipstat.ips_noproto++;
227		ipstat.ips_delivered--;
228	}
229	INP_INFO_RUNLOCK(&ripcbinfo);
230}
231
232/*
233 * Generate IP header and pass packet to ip_output.
234 * Tack on options user may have setup with control call.
235 */
236int
237rip_output(struct mbuf *m, struct socket *so, u_long dst)
238{
239	struct ip *ip;
240	struct inpcb *inp = sotoinpcb(so);
241	int flags = (so->so_options & SO_DONTROUTE) | IP_ALLOWBROADCAST;
242
243#ifdef MAC
244	mac_create_mbuf_from_socket(so, m);
245#endif
246
247	/*
248	 * If the user handed us a complete IP packet, use it.
249	 * Otherwise, allocate an mbuf for a header and fill it in.
250	 */
251	if ((inp->inp_flags & INP_HDRINCL) == 0) {
252		if (m->m_pkthdr.len + sizeof(struct ip) > IP_MAXPACKET) {
253			m_freem(m);
254			return(EMSGSIZE);
255		}
256		M_PREPEND(m, sizeof(struct ip), M_TRYWAIT);
257		if (m == NULL)
258			return(ENOBUFS);
259		ip = mtod(m, struct ip *);
260		ip->ip_tos = inp->inp_ip_tos;
261		ip->ip_off = 0;
262		ip->ip_p = inp->inp_ip_p;
263		ip->ip_len = m->m_pkthdr.len;
264		ip->ip_src = inp->inp_laddr;
265		ip->ip_dst.s_addr = dst;
266		ip->ip_ttl = inp->inp_ip_ttl;
267	} else {
268		if (m->m_pkthdr.len > IP_MAXPACKET) {
269			m_freem(m);
270			return(EMSGSIZE);
271		}
272		ip = mtod(m, struct ip *);
273		/* don't allow both user specified and setsockopt options,
274		   and don't allow packet length sizes that will crash */
275		if (((ip->ip_hl != (sizeof (*ip) >> 2))
276		     && inp->inp_options)
277		    || (ip->ip_len > m->m_pkthdr.len)
278		    || (ip->ip_len < (ip->ip_hl << 2))) {
279			m_freem(m);
280			return EINVAL;
281		}
282		if (ip->ip_id == 0)
283#ifdef RANDOM_IP_ID
284			ip->ip_id = ip_randomid();
285#else
286			ip->ip_id = htons(ip_id++);
287#endif
288		/* XXX prevent ip_output from overwriting header fields */
289		flags |= IP_RAWOUTPUT;
290		ipstat.ips_rawout++;
291	}
292
293	if (inp->inp_flags & INP_ONESBCAST)
294		flags |= IP_SENDONES;
295
296	return (ip_output(m, inp->inp_options, NULL, flags,
297			  inp->inp_moptions, inp));
298}
299
300/*
301 * Raw IP socket option processing.
302 *
303 * Note that access to all of the IP administrative functions here is
304 * implicitly protected by suser() as gaining access to a raw socket
305 * requires either that the thread pass a suser() check, or that it be
306 * passed a raw socket by another thread that has passed a suser() check.
307 * If FreeBSD moves to a more fine-grained access control mechanism,
308 * additional checks will need to be placed here if the raw IP attachment
309 * check is not equivilent the the check required for these
310 * administrative operations; in some cases, these checks are already
311 * present.
312 */
313int
314rip_ctloutput(struct socket *so, struct sockopt *sopt)
315{
316	struct	inpcb *inp = sotoinpcb(so);
317	int	error, optval;
318
319	if (sopt->sopt_level != IPPROTO_IP)
320		return (EINVAL);
321
322	error = 0;
323
324	switch (sopt->sopt_dir) {
325	case SOPT_GET:
326		switch (sopt->sopt_name) {
327		case IP_HDRINCL:
328			optval = inp->inp_flags & INP_HDRINCL;
329			error = sooptcopyout(sopt, &optval, sizeof optval);
330			break;
331
332		case IP_FW_ADD:	/* ADD actually returns the body... */
333		case IP_FW_GET:
334			if (IPFW_LOADED)
335				error = ip_fw_ctl_ptr(sopt);
336			else
337				error = ENOPROTOOPT;
338			break;
339
340		case IP_DUMMYNET_GET:
341			if (DUMMYNET_LOADED)
342				error = ip_dn_ctl_ptr(sopt);
343			else
344				error = ENOPROTOOPT;
345			break ;
346
347		case MRT_INIT:
348		case MRT_DONE:
349		case MRT_ADD_VIF:
350		case MRT_DEL_VIF:
351		case MRT_ADD_MFC:
352		case MRT_DEL_MFC:
353		case MRT_VERSION:
354		case MRT_ASSERT:
355		case MRT_API_SUPPORT:
356		case MRT_API_CONFIG:
357		case MRT_ADD_BW_UPCALL:
358		case MRT_DEL_BW_UPCALL:
359			error = ip_mrouter_get ? ip_mrouter_get(so, sopt) :
360				EOPNOTSUPP;
361			break;
362
363		default:
364			error = ip_ctloutput(so, sopt);
365			break;
366		}
367		break;
368
369	case SOPT_SET:
370		switch (sopt->sopt_name) {
371		case IP_HDRINCL:
372			error = sooptcopyin(sopt, &optval, sizeof optval,
373					    sizeof optval);
374			if (error)
375				break;
376			if (optval)
377				inp->inp_flags |= INP_HDRINCL;
378			else
379				inp->inp_flags &= ~INP_HDRINCL;
380			break;
381
382		case IP_FW_ADD:
383		case IP_FW_DEL:
384		case IP_FW_FLUSH:
385		case IP_FW_ZERO:
386		case IP_FW_RESETLOG:
387			if (IPFW_LOADED)
388				error = ip_fw_ctl_ptr(sopt);
389			else
390				error = ENOPROTOOPT;
391			break;
392
393		case IP_DUMMYNET_CONFIGURE:
394		case IP_DUMMYNET_DEL:
395		case IP_DUMMYNET_FLUSH:
396			if (DUMMYNET_LOADED)
397				error = ip_dn_ctl_ptr(sopt);
398			else
399				error = ENOPROTOOPT ;
400			break ;
401
402		case IP_RSVP_ON:
403			error = ip_rsvp_init(so);
404			break;
405
406		case IP_RSVP_OFF:
407			error = ip_rsvp_done();
408			break;
409
410		case IP_RSVP_VIF_ON:
411		case IP_RSVP_VIF_OFF:
412			error = ip_rsvp_vif ?
413				ip_rsvp_vif(so, sopt) : EINVAL;
414			break;
415
416		case MRT_INIT:
417		case MRT_DONE:
418		case MRT_ADD_VIF:
419		case MRT_DEL_VIF:
420		case MRT_ADD_MFC:
421		case MRT_DEL_MFC:
422		case MRT_VERSION:
423		case MRT_ASSERT:
424		case MRT_API_SUPPORT:
425		case MRT_API_CONFIG:
426		case MRT_ADD_BW_UPCALL:
427		case MRT_DEL_BW_UPCALL:
428			error = ip_mrouter_set ? ip_mrouter_set(so, sopt) :
429					EOPNOTSUPP;
430			break;
431
432		default:
433			error = ip_ctloutput(so, sopt);
434			break;
435		}
436		break;
437	}
438
439	return (error);
440}
441
442/*
443 * This function exists solely to receive the PRC_IFDOWN messages which
444 * are sent by if_down().  It looks for an ifaddr whose ifa_addr is sa,
445 * and calls in_ifadown() to remove all routes corresponding to that address.
446 * It also receives the PRC_IFUP messages from if_up() and reinstalls the
447 * interface routes.
448 */
449void
450rip_ctlinput(int cmd, struct sockaddr *sa, void *vip)
451{
452	struct in_ifaddr *ia;
453	struct ifnet *ifp;
454	int err;
455	int flags;
456
457	switch (cmd) {
458	case PRC_IFDOWN:
459		TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) {
460			if (ia->ia_ifa.ifa_addr == sa
461			    && (ia->ia_flags & IFA_ROUTE)) {
462				/*
463				 * in_ifscrub kills the interface route.
464				 */
465				in_ifscrub(ia->ia_ifp, ia);
466				/*
467				 * in_ifadown gets rid of all the rest of
468				 * the routes.  This is not quite the right
469				 * thing to do, but at least if we are running
470				 * a routing process they will come back.
471				 */
472				in_ifadown(&ia->ia_ifa, 0);
473				break;
474			}
475		}
476		break;
477
478	case PRC_IFUP:
479		TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) {
480			if (ia->ia_ifa.ifa_addr == sa)
481				break;
482		}
483		if (ia == 0 || (ia->ia_flags & IFA_ROUTE))
484			return;
485		flags = RTF_UP;
486		ifp = ia->ia_ifa.ifa_ifp;
487
488		if ((ifp->if_flags & IFF_LOOPBACK)
489		    || (ifp->if_flags & IFF_POINTOPOINT))
490			flags |= RTF_HOST;
491
492		err = rtinit(&ia->ia_ifa, RTM_ADD, flags);
493		if (err == 0)
494			ia->ia_flags |= IFA_ROUTE;
495		break;
496	}
497}
498
499u_long	rip_sendspace = RIPSNDQ;
500u_long	rip_recvspace = RIPRCVQ;
501
502SYSCTL_INT(_net_inet_raw, OID_AUTO, maxdgram, CTLFLAG_RW,
503    &rip_sendspace, 0, "Maximum outgoing raw IP datagram size");
504SYSCTL_INT(_net_inet_raw, OID_AUTO, recvspace, CTLFLAG_RW,
505    &rip_recvspace, 0, "Maximum space for incoming raw IP datagrams");
506
507static int
508rip_attach(struct socket *so, int proto, struct thread *td)
509{
510	struct inpcb *inp;
511	int error;
512
513	/* XXX why not lower? */
514	INP_INFO_WLOCK(&ripcbinfo);
515	inp = sotoinpcb(so);
516	if (inp) {
517		/* XXX counter, printf */
518		INP_INFO_WUNLOCK(&ripcbinfo);
519		return EINVAL;
520	}
521	if (td && (error = suser(td)) != 0) {
522		INP_INFO_WUNLOCK(&ripcbinfo);
523		return error;
524	}
525	if (proto >= IPPROTO_MAX || proto < 0) {
526		INP_INFO_WUNLOCK(&ripcbinfo);
527		return EPROTONOSUPPORT;
528	}
529
530	error = soreserve(so, rip_sendspace, rip_recvspace);
531	if (error) {
532		INP_INFO_WUNLOCK(&ripcbinfo);
533		return error;
534	}
535	error = in_pcballoc(so, &ripcbinfo, "rawinp");
536	if (error) {
537		INP_INFO_WUNLOCK(&ripcbinfo);
538		return error;
539	}
540	inp = (struct inpcb *)so->so_pcb;
541	INP_LOCK(inp);
542	INP_INFO_WUNLOCK(&ripcbinfo);
543	inp->inp_vflag |= INP_IPV4;
544	inp->inp_ip_p = proto;
545	inp->inp_ip_ttl = ip_defttl;
546	INP_UNLOCK(inp);
547	return 0;
548}
549
550static void
551rip_pcbdetach(struct socket *so, struct inpcb *inp)
552{
553	INP_INFO_WLOCK_ASSERT(&ripcbinfo);
554	INP_LOCK_ASSERT(inp);
555
556	if (so == ip_mrouter && ip_mrouter_done)
557		ip_mrouter_done();
558	if (ip_rsvp_force_done)
559		ip_rsvp_force_done(so);
560	if (so == ip_rsvpd)
561		ip_rsvp_done();
562	in_pcbdetach(inp);
563}
564
565static int
566rip_detach(struct socket *so)
567{
568	struct inpcb *inp;
569
570	INP_INFO_WLOCK(&ripcbinfo);
571	inp = sotoinpcb(so);
572	if (inp == 0) {
573		/* XXX counter, printf */
574		INP_INFO_WUNLOCK(&ripcbinfo);
575		return EINVAL;
576	}
577	INP_LOCK(inp);
578	rip_pcbdetach(so, inp);
579	INP_INFO_WUNLOCK(&ripcbinfo);
580	return 0;
581}
582
583static int
584rip_abort(struct socket *so)
585{
586	struct inpcb *inp;
587
588	INP_INFO_WLOCK(&ripcbinfo);
589	inp = sotoinpcb(so);
590	if (inp == 0) {
591		INP_INFO_WUNLOCK(&ripcbinfo);
592		return EINVAL;	/* ??? possible? panic instead? */
593	}
594	INP_LOCK(inp);
595	soisdisconnected(so);
596	if (so->so_state & SS_NOFDREF)
597		rip_pcbdetach(so, inp);
598	else
599		INP_UNLOCK(inp);
600	INP_INFO_WUNLOCK(&ripcbinfo);
601	return 0;
602}
603
604static int
605rip_disconnect(struct socket *so)
606{
607	if ((so->so_state & SS_ISCONNECTED) == 0)
608		return ENOTCONN;
609	return rip_abort(so);
610}
611
612static int
613rip_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
614{
615	struct sockaddr_in *addr = (struct sockaddr_in *)nam;
616	struct inpcb *inp;
617
618	if (nam->sa_len != sizeof(*addr))
619		return EINVAL;
620
621	if (TAILQ_EMPTY(&ifnet) ||
622	    (addr->sin_family != AF_INET && addr->sin_family != AF_IMPLINK) ||
623	    (addr->sin_addr.s_addr &&
624	     ifa_ifwithaddr((struct sockaddr *)addr) == 0))
625		return EADDRNOTAVAIL;
626
627	INP_INFO_WLOCK(&ripcbinfo);
628	inp = sotoinpcb(so);
629	if (inp == 0) {
630		INP_INFO_WUNLOCK(&ripcbinfo);
631		return EINVAL;
632	}
633	INP_LOCK(inp);
634	inp->inp_laddr = addr->sin_addr;
635	INP_UNLOCK(inp);
636	INP_INFO_WUNLOCK(&ripcbinfo);
637	return 0;
638}
639
640static int
641rip_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
642{
643	struct sockaddr_in *addr = (struct sockaddr_in *)nam;
644	struct inpcb *inp;
645
646	if (nam->sa_len != sizeof(*addr))
647		return EINVAL;
648	if (TAILQ_EMPTY(&ifnet))
649		return EADDRNOTAVAIL;
650	if (addr->sin_family != AF_INET && addr->sin_family != AF_IMPLINK)
651		return EAFNOSUPPORT;
652
653	INP_INFO_WLOCK(&ripcbinfo);
654	inp = sotoinpcb(so);
655	if (inp == 0) {
656		INP_INFO_WUNLOCK(&ripcbinfo);
657		return EINVAL;
658	}
659	INP_LOCK(inp);
660	inp->inp_faddr = addr->sin_addr;
661	soisconnected(so);
662	INP_UNLOCK(inp);
663	INP_INFO_WUNLOCK(&ripcbinfo);
664	return 0;
665}
666
667static int
668rip_shutdown(struct socket *so)
669{
670	struct inpcb *inp;
671
672	INP_INFO_RLOCK(&ripcbinfo);
673	inp = sotoinpcb(so);
674	if (inp == 0) {
675		INP_INFO_RUNLOCK(&ripcbinfo);
676		return EINVAL;
677	}
678	INP_LOCK(inp);
679	INP_INFO_RUNLOCK(&ripcbinfo);
680	socantsendmore(so);
681	INP_UNLOCK(inp);
682	return 0;
683}
684
685static int
686rip_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
687	 struct mbuf *control, struct thread *td)
688{
689	struct inpcb *inp;
690	u_long dst;
691	int ret;
692
693	INP_INFO_WLOCK(&ripcbinfo);
694	inp = sotoinpcb(so);
695	if (so->so_state & SS_ISCONNECTED) {
696		if (nam) {
697			INP_INFO_WUNLOCK(&ripcbinfo);
698			m_freem(m);
699			return EISCONN;
700		}
701		dst = inp->inp_faddr.s_addr;
702	} else {
703		if (nam == NULL) {
704			INP_INFO_WUNLOCK(&ripcbinfo);
705			m_freem(m);
706			return ENOTCONN;
707		}
708		dst = ((struct sockaddr_in *)nam)->sin_addr.s_addr;
709	}
710	INP_LOCK(inp);
711	ret = rip_output(m, so, dst);
712	INP_UNLOCK(inp);
713	INP_INFO_WUNLOCK(&ripcbinfo);
714	return ret;
715}
716
717static int
718rip_pcblist(SYSCTL_HANDLER_ARGS)
719{
720	int error, i, n;
721	struct inpcb *inp, **inp_list;
722	inp_gen_t gencnt;
723	struct xinpgen xig;
724
725	/*
726	 * The process of preparing the TCB list is too time-consuming and
727	 * resource-intensive to repeat twice on every request.
728	 */
729	if (req->oldptr == 0) {
730		n = ripcbinfo.ipi_count;
731		req->oldidx = 2 * (sizeof xig)
732			+ (n + n/8) * sizeof(struct xinpcb);
733		return 0;
734	}
735
736	if (req->newptr != 0)
737		return EPERM;
738
739	/*
740	 * OK, now we're committed to doing something.
741	 */
742	INP_INFO_RLOCK(&ripcbinfo);
743	gencnt = ripcbinfo.ipi_gencnt;
744	n = ripcbinfo.ipi_count;
745	INP_INFO_RUNLOCK(&ripcbinfo);
746
747	xig.xig_len = sizeof xig;
748	xig.xig_count = n;
749	xig.xig_gen = gencnt;
750	xig.xig_sogen = so_gencnt;
751	error = SYSCTL_OUT(req, &xig, sizeof xig);
752	if (error)
753		return error;
754
755	inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK);
756	if (inp_list == 0)
757		return ENOMEM;
758
759	INP_INFO_RLOCK(&ripcbinfo);
760	for (inp = LIST_FIRST(ripcbinfo.listhead), i = 0; inp && i < n;
761	     inp = LIST_NEXT(inp, inp_list)) {
762		INP_LOCK(inp);
763		if (inp->inp_gencnt <= gencnt &&
764		    cr_canseesocket(req->td->td_ucred, inp->inp_socket) == 0) {
765			/* XXX held references? */
766			inp_list[i++] = inp;
767		}
768		INP_UNLOCK(inp);
769	}
770	INP_INFO_RUNLOCK(&ripcbinfo);
771	n = i;
772
773	error = 0;
774	for (i = 0; i < n; i++) {
775		inp = inp_list[i];
776		if (inp->inp_gencnt <= gencnt) {
777			struct xinpcb xi;
778			xi.xi_len = sizeof xi;
779			/* XXX should avoid extra copy */
780			bcopy(inp, &xi.xi_inp, sizeof *inp);
781			if (inp->inp_socket)
782				sotoxsocket(inp->inp_socket, &xi.xi_socket);
783			error = SYSCTL_OUT(req, &xi, sizeof xi);
784		}
785	}
786	if (!error) {
787		/*
788		 * Give the user an updated idea of our state.
789		 * If the generation differs from what we told
790		 * her before, she knows that something happened
791		 * while we were processing this request, and it
792		 * might be necessary to retry.
793		 */
794		INP_INFO_RLOCK(&ripcbinfo);
795		xig.xig_gen = ripcbinfo.ipi_gencnt;
796		xig.xig_sogen = so_gencnt;
797		xig.xig_count = ripcbinfo.ipi_count;
798		INP_INFO_RUNLOCK(&ripcbinfo);
799		error = SYSCTL_OUT(req, &xig, sizeof xig);
800	}
801	free(inp_list, M_TEMP);
802	return error;
803}
804
805/*
806 * This is the wrapper function for in_setsockaddr.  We just pass down
807 * the pcbinfo for in_setpeeraddr to lock.
808 */
809static int
810rip_sockaddr(struct socket *so, struct sockaddr **nam)
811{
812	return (in_setsockaddr(so, nam, &ripcbinfo));
813}
814
815/*
816 * This is the wrapper function for in_setpeeraddr.  We just pass down
817 * the pcbinfo for in_setpeeraddr to lock.
818 */
819static int
820rip_peeraddr(struct socket *so, struct sockaddr **nam)
821{
822	return (in_setpeeraddr(so, nam, &ripcbinfo));
823}
824
825
826SYSCTL_PROC(_net_inet_raw, OID_AUTO/*XXX*/, pcblist, CTLFLAG_RD, 0, 0,
827	    rip_pcblist, "S,xinpcb", "List of active raw IP sockets");
828
829struct pr_usrreqs rip_usrreqs = {
830	rip_abort, pru_accept_notsupp, rip_attach, rip_bind, rip_connect,
831	pru_connect2_notsupp, in_control, rip_detach, rip_disconnect,
832	pru_listen_notsupp, rip_peeraddr, pru_rcvd_notsupp,
833	pru_rcvoob_notsupp, rip_send, pru_sense_null, rip_shutdown,
834	rip_sockaddr, sosend, soreceive, sopoll, in_pcbsosetlabel
835};
836