1/*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the project nor the names of its contributors
16 *    may be used to endorse or promote products derived from this software
17 *    without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32/*-
33 * Copyright (c) 1982, 1986, 1988, 1993
34 *	The Regents of the University of California.
35 * All rights reserved.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 *    notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 *    notice, this list of conditions and the following disclaimer in the
44 *    documentation and/or other materials provided with the distribution.
45 * 3. Neither the name of the University nor the names of its contributors
46 *    may be used to endorse or promote products derived from this software
47 *    without specific prior written permission.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59 * SUCH DAMAGE.
60 *
61 *	@(#)raw_ip.c	8.2 (Berkeley) 1/4/94
62 */
63
64#include <sys/cdefs.h>
65__FBSDID("$FreeBSD$");
66
67#include "opt_ipsec.h"
68#include "opt_inet6.h"
69
70#include <sys/param.h>
71#include <sys/errno.h>
72#include <sys/jail.h>
73#include <sys/kernel.h>
74#include <sys/lock.h>
75#include <sys/malloc.h>
76#include <sys/mbuf.h>
77#include <sys/priv.h>
78#include <sys/proc.h>
79#include <sys/protosw.h>
80#include <sys/signalvar.h>
81#include <sys/socket.h>
82#include <sys/socketvar.h>
83#include <sys/sx.h>
84#include <sys/syslog.h>
85
86#include <net/if.h>
87#include <net/if_var.h>
88#include <net/if_types.h>
89#include <net/route.h>
90#include <net/vnet.h>
91
92#include <netinet/in.h>
93#include <netinet/in_var.h>
94#include <netinet/in_systm.h>
95#include <netinet/in_pcb.h>
96
97#include <netinet/icmp6.h>
98#include <netinet/ip6.h>
99#include <netinet/ip_var.h>
100#include <netinet6/ip6protosw.h>
101#include <netinet6/ip6_mroute.h>
102#include <netinet6/in6_pcb.h>
103#include <netinet6/ip6_var.h>
104#include <netinet6/nd6.h>
105#include <netinet6/raw_ip6.h>
106#include <netinet6/scope6_var.h>
107#include <netinet6/send.h>
108
109#include <netipsec/ipsec_support.h>
110
111#include <machine/stdarg.h>
112
113#define	satosin6(sa)	((struct sockaddr_in6 *)(sa))
114#define	ifatoia6(ifa)	((struct in6_ifaddr *)(ifa))
115
116/*
117 * Raw interface to IP6 protocol.
118 */
119
120VNET_DECLARE(struct inpcbhead, ripcb);
121VNET_DECLARE(struct inpcbinfo, ripcbinfo);
122#define	V_ripcb				VNET(ripcb)
123#define	V_ripcbinfo			VNET(ripcbinfo)
124
125extern u_long	rip_sendspace;
126extern u_long	rip_recvspace;
127
128VNET_PCPUSTAT_DEFINE(struct rip6stat, rip6stat);
129VNET_PCPUSTAT_SYSINIT(rip6stat);
130
131#ifdef VIMAGE
132VNET_PCPUSTAT_SYSUNINIT(rip6stat);
133#endif /* VIMAGE */
134
135/*
136 * Hooks for multicast routing. They all default to NULL, so leave them not
137 * initialized and rely on BSS being set to 0.
138 */
139
140/*
141 * The socket used to communicate with the multicast routing daemon.
142 */
143VNET_DEFINE(struct socket *, ip6_mrouter);
144
145/*
146 * The various mrouter functions.
147 */
148int (*ip6_mrouter_set)(struct socket *, struct sockopt *);
149int (*ip6_mrouter_get)(struct socket *, struct sockopt *);
150int (*ip6_mrouter_done)(void);
151int (*ip6_mforward)(struct ip6_hdr *, struct ifnet *, struct mbuf *);
152int (*mrt6_ioctl)(u_long, caddr_t);
153
154/*
155 * Setup generic address and protocol structures for raw_input routine, then
156 * pass them along with mbuf chain.
157 */
158int
159rip6_input(struct mbuf **mp, int *offp, int proto)
160{
161	struct ifnet *ifp;
162	struct mbuf *m = *mp;
163	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
164	struct inpcb *inp;
165	struct inpcb *last = NULL;
166	struct mbuf *opts = NULL;
167	struct sockaddr_in6 fromsa;
168	struct epoch_tracker et;
169
170	RIP6STAT_INC(rip6s_ipackets);
171
172	init_sin6(&fromsa, m, 0); /* general init */
173
174	ifp = m->m_pkthdr.rcvif;
175
176	INP_INFO_RLOCK_ET(&V_ripcbinfo, et);
177	CK_LIST_FOREACH(inp, &V_ripcb, inp_list) {
178		/* XXX inp locking */
179		if ((inp->inp_vflag & INP_IPV6) == 0)
180			continue;
181		if (inp->inp_ip_p &&
182		    inp->inp_ip_p != proto)
183			continue;
184		if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) &&
185		    !IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, &ip6->ip6_dst))
186			continue;
187		if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) &&
188		    !IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, &ip6->ip6_src))
189			continue;
190		if (last != NULL) {
191			struct mbuf *n = m_copym(m, 0, M_COPYALL, M_NOWAIT);
192
193#if defined(IPSEC) || defined(IPSEC_SUPPORT)
194			/*
195			 * Check AH/ESP integrity.
196			 */
197			if (IPSEC_ENABLED(ipv6)) {
198				if (n != NULL &&
199				    IPSEC_CHECK_POLICY(ipv6, n, last) != 0) {
200					m_freem(n);
201					/* Do not inject data into pcb. */
202					n = NULL;
203				}
204			}
205#endif /* IPSEC */
206			if (n) {
207				if (last->inp_flags & INP_CONTROLOPTS ||
208				    last->inp_socket->so_options & SO_TIMESTAMP)
209					ip6_savecontrol(last, n, &opts);
210				/* strip intermediate headers */
211				m_adj(n, *offp);
212				if (sbappendaddr(&last->inp_socket->so_rcv,
213						(struct sockaddr *)&fromsa,
214						 n, opts) == 0) {
215					m_freem(n);
216					if (opts)
217						m_freem(opts);
218					RIP6STAT_INC(rip6s_fullsock);
219				} else
220					sorwakeup(last->inp_socket);
221				opts = NULL;
222			}
223			INP_RUNLOCK(last);
224			last = NULL;
225		}
226		INP_RLOCK(inp);
227		if (__predict_false(inp->inp_flags2 & INP_FREED))
228			goto skip_2;
229		if (jailed_without_vnet(inp->inp_cred)) {
230			/*
231			 * Allow raw socket in jail to receive multicast;
232			 * assume process had PRIV_NETINET_RAW at attach,
233			 * and fall through into normal filter path if so.
234			 */
235			if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) &&
236			    prison_check_ip6(inp->inp_cred,
237			    &ip6->ip6_dst) != 0)
238				goto skip_2;
239		}
240		if (inp->in6p_cksum != -1) {
241			RIP6STAT_INC(rip6s_isum);
242			if (m->m_pkthdr.len - (*offp + inp->in6p_cksum) < 2 ||
243			    in6_cksum(m, proto, *offp,
244			    m->m_pkthdr.len - *offp)) {
245				RIP6STAT_INC(rip6s_badsum);
246				/*
247				 * Drop the received message, don't send an
248				 * ICMP6 message. Set proto to IPPROTO_NONE
249				 * to achieve that.
250				 */
251				proto = IPPROTO_NONE;
252				goto skip_2;
253			}
254		}
255		/*
256		 * If this raw socket has multicast state, and we
257		 * have received a multicast, check if this socket
258		 * should receive it, as multicast filtering is now
259		 * the responsibility of the transport layer.
260		 */
261		if (inp->in6p_moptions &&
262		    IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
263			/*
264			 * If the incoming datagram is for MLD, allow it
265			 * through unconditionally to the raw socket.
266			 *
267			 * Use the M_RTALERT_MLD flag to check for MLD
268			 * traffic without having to inspect the mbuf chain
269			 * more deeply, as all MLDv1/v2 host messages MUST
270			 * contain the Router Alert option.
271			 *
272			 * In the case of MLDv1, we may not have explicitly
273			 * joined the group, and may have set IFF_ALLMULTI
274			 * on the interface. im6o_mc_filter() may discard
275			 * control traffic we actually need to see.
276			 *
277			 * Userland multicast routing daemons should continue
278			 * filter the control traffic appropriately.
279			 */
280			int blocked;
281
282			blocked = MCAST_PASS;
283			if ((m->m_flags & M_RTALERT_MLD) == 0) {
284				struct sockaddr_in6 mcaddr;
285
286				bzero(&mcaddr, sizeof(struct sockaddr_in6));
287				mcaddr.sin6_len = sizeof(struct sockaddr_in6);
288				mcaddr.sin6_family = AF_INET6;
289				mcaddr.sin6_addr = ip6->ip6_dst;
290
291				blocked = im6o_mc_filter(inp->in6p_moptions,
292				    ifp,
293				    (struct sockaddr *)&mcaddr,
294				    (struct sockaddr *)&fromsa);
295			}
296			if (blocked != MCAST_PASS) {
297				IP6STAT_INC(ip6s_notmember);
298				goto skip_2;
299			}
300		}
301		last = inp;
302		continue;
303skip_2:
304		INP_RUNLOCK(inp);
305	}
306	INP_INFO_RUNLOCK_ET(&V_ripcbinfo, et);
307#if defined(IPSEC) || defined(IPSEC_SUPPORT)
308	/*
309	 * Check AH/ESP integrity.
310	 */
311	if (IPSEC_ENABLED(ipv6) && last != NULL &&
312	    IPSEC_CHECK_POLICY(ipv6, m, last) != 0) {
313		m_freem(m);
314		IP6STAT_DEC(ip6s_delivered);
315		/* Do not inject data into pcb. */
316		INP_RUNLOCK(last);
317	} else
318#endif /* IPSEC */
319	if (last != NULL) {
320		if (last->inp_flags & INP_CONTROLOPTS ||
321		    last->inp_socket->so_options & SO_TIMESTAMP)
322			ip6_savecontrol(last, m, &opts);
323		/* Strip intermediate headers. */
324		m_adj(m, *offp);
325		if (sbappendaddr(&last->inp_socket->so_rcv,
326		    (struct sockaddr *)&fromsa, m, opts) == 0) {
327			m_freem(m);
328			if (opts)
329				m_freem(opts);
330			RIP6STAT_INC(rip6s_fullsock);
331		} else
332			sorwakeup(last->inp_socket);
333		INP_RUNLOCK(last);
334	} else {
335		RIP6STAT_INC(rip6s_nosock);
336		if (m->m_flags & M_MCAST)
337			RIP6STAT_INC(rip6s_nosockmcast);
338		if (proto == IPPROTO_NONE)
339			m_freem(m);
340		else
341			icmp6_error(m, ICMP6_PARAM_PROB,
342			    ICMP6_PARAMPROB_NEXTHEADER,
343			    ip6_get_prevhdr(m, *offp));
344		IP6STAT_DEC(ip6s_delivered);
345	}
346	return (IPPROTO_DONE);
347}
348
349void
350rip6_ctlinput(int cmd, struct sockaddr *sa, void *d)
351{
352	struct ip6ctlparam *ip6cp = NULL;
353	const struct sockaddr_in6 *sa6_src = NULL;
354	void *cmdarg;
355	struct inpcb *(*notify)(struct inpcb *, int) = in6_rtchange;
356
357	if (sa->sa_family != AF_INET6 ||
358	    sa->sa_len != sizeof(struct sockaddr_in6))
359		return;
360
361	if ((unsigned)cmd >= PRC_NCMDS)
362		return;
363	if (PRC_IS_REDIRECT(cmd))
364		notify = in6_rtchange, d = NULL;
365	else if (cmd == PRC_HOSTDEAD)
366		d = NULL;
367	else if (inet6ctlerrmap[cmd] == 0)
368		return;
369
370	/*
371	 * If the parameter is from icmp6, decode it.
372	 */
373	if (d != NULL) {
374		ip6cp = (struct ip6ctlparam *)d;
375		cmdarg = ip6cp->ip6c_cmdarg;
376		sa6_src = ip6cp->ip6c_src;
377	} else {
378		cmdarg = NULL;
379		sa6_src = &sa6_any;
380	}
381
382	(void) in6_pcbnotify(&V_ripcbinfo, sa, 0,
383	    (const struct sockaddr *)sa6_src, 0, cmd, cmdarg, notify);
384}
385
386/*
387 * Generate IPv6 header and pass packet to ip6_output.  Tack on options user
388 * may have setup with control call.
389 */
390int
391rip6_output(struct mbuf *m, struct socket *so, ...)
392{
393	struct mbuf *control;
394	struct m_tag *mtag;
395	struct sockaddr_in6 *dstsock;
396	struct ip6_hdr *ip6;
397	struct inpcb *inp;
398	u_int	plen = m->m_pkthdr.len;
399	int error = 0;
400	struct ip6_pktopts opt, *optp;
401	struct ifnet *oifp = NULL;
402	int type = 0, code = 0;		/* for ICMPv6 output statistics only */
403	int scope_ambiguous = 0;
404	int use_defzone = 0;
405	int hlim = 0;
406	struct in6_addr in6a;
407	va_list ap;
408
409	va_start(ap, so);
410	dstsock = va_arg(ap, struct sockaddr_in6 *);
411	control = va_arg(ap, struct mbuf *);
412	va_end(ap);
413
414	inp = sotoinpcb(so);
415	INP_WLOCK(inp);
416
417	if (control != NULL) {
418		if ((error = ip6_setpktopts(control, &opt,
419		    inp->in6p_outputopts, so->so_cred,
420		    so->so_proto->pr_protocol)) != 0) {
421			goto bad;
422		}
423		optp = &opt;
424	} else
425		optp = inp->in6p_outputopts;
426
427	/*
428	 * Check and convert scope zone ID into internal form.
429	 *
430	 * XXX: we may still need to determine the zone later.
431	 */
432	if (!(so->so_state & SS_ISCONNECTED)) {
433		if (!optp || !optp->ip6po_pktinfo ||
434		    !optp->ip6po_pktinfo->ipi6_ifindex)
435			use_defzone = V_ip6_use_defzone;
436		if (dstsock->sin6_scope_id == 0 && !use_defzone)
437			scope_ambiguous = 1;
438		if ((error = sa6_embedscope(dstsock, use_defzone)) != 0)
439			goto bad;
440	}
441
442	/*
443	 * For an ICMPv6 packet, we should know its type and code to update
444	 * statistics.
445	 */
446	if (so->so_proto->pr_protocol == IPPROTO_ICMPV6) {
447		struct icmp6_hdr *icmp6;
448		if (m->m_len < sizeof(struct icmp6_hdr) &&
449		    (m = m_pullup(m, sizeof(struct icmp6_hdr))) == NULL) {
450			error = ENOBUFS;
451			goto bad;
452		}
453		icmp6 = mtod(m, struct icmp6_hdr *);
454		type = icmp6->icmp6_type;
455		code = icmp6->icmp6_code;
456	}
457
458	M_PREPEND(m, sizeof(*ip6), M_NOWAIT);
459	if (m == NULL) {
460		error = ENOBUFS;
461		goto bad;
462	}
463	ip6 = mtod(m, struct ip6_hdr *);
464
465	/*
466	 * Source address selection.
467	 */
468	error = in6_selectsrc_socket(dstsock, optp, inp, so->so_cred,
469	    scope_ambiguous, &in6a, &hlim);
470
471	if (error)
472		goto bad;
473	error = prison_check_ip6(inp->inp_cred, &in6a);
474	if (error != 0)
475		goto bad;
476	ip6->ip6_src = in6a;
477
478	ip6->ip6_dst = dstsock->sin6_addr;
479
480	/*
481	 * Fill in the rest of the IPv6 header fields.
482	 */
483	ip6->ip6_flow = (ip6->ip6_flow & ~IPV6_FLOWINFO_MASK) |
484	    (inp->inp_flow & IPV6_FLOWINFO_MASK);
485	ip6->ip6_vfc = (ip6->ip6_vfc & ~IPV6_VERSION_MASK) |
486	    (IPV6_VERSION & IPV6_VERSION_MASK);
487
488	/*
489	 * ip6_plen will be filled in ip6_output, so not fill it here.
490	 */
491	ip6->ip6_nxt = inp->inp_ip_p;
492	ip6->ip6_hlim = hlim;
493
494	if (so->so_proto->pr_protocol == IPPROTO_ICMPV6 ||
495	    inp->in6p_cksum != -1) {
496		struct mbuf *n;
497		int off;
498		u_int16_t *p;
499
500		/* Compute checksum. */
501		if (so->so_proto->pr_protocol == IPPROTO_ICMPV6)
502			off = offsetof(struct icmp6_hdr, icmp6_cksum);
503		else
504			off = inp->in6p_cksum;
505		if (plen < off + 2) {
506			error = EINVAL;
507			goto bad;
508		}
509		off += sizeof(struct ip6_hdr);
510
511		n = m;
512		while (n && n->m_len <= off) {
513			off -= n->m_len;
514			n = n->m_next;
515		}
516		if (!n)
517			goto bad;
518		p = (u_int16_t *)(mtod(n, caddr_t) + off);
519		*p = 0;
520		*p = in6_cksum(m, ip6->ip6_nxt, sizeof(*ip6), plen);
521	}
522
523	/*
524	 * Send RA/RS messages to user land for protection, before sending
525	 * them to rtadvd/rtsol.
526	 */
527	if ((send_sendso_input_hook != NULL) &&
528	    so->so_proto->pr_protocol == IPPROTO_ICMPV6) {
529		switch (type) {
530		case ND_ROUTER_ADVERT:
531		case ND_ROUTER_SOLICIT:
532			mtag = m_tag_get(PACKET_TAG_ND_OUTGOING,
533				sizeof(unsigned short), M_NOWAIT);
534			if (mtag == NULL)
535				goto bad;
536			m_tag_prepend(m, mtag);
537		}
538	}
539
540	error = ip6_output(m, optp, NULL, 0, inp->in6p_moptions, &oifp, inp);
541	if (so->so_proto->pr_protocol == IPPROTO_ICMPV6) {
542		if (oifp)
543			icmp6_ifoutstat_inc(oifp, type, code);
544		ICMP6STAT_INC(icp6s_outhist[type]);
545	} else
546		RIP6STAT_INC(rip6s_opackets);
547
548	goto freectl;
549
550 bad:
551	if (m)
552		m_freem(m);
553
554 freectl:
555	if (control != NULL) {
556		ip6_clearpktopts(&opt, -1);
557		m_freem(control);
558	}
559	INP_WUNLOCK(inp);
560	return (error);
561}
562
563/*
564 * Raw IPv6 socket option processing.
565 */
566int
567rip6_ctloutput(struct socket *so, struct sockopt *sopt)
568{
569	struct inpcb *inp;
570	int error;
571
572	if (sopt->sopt_level == IPPROTO_ICMPV6)
573		/*
574		 * XXX: is it better to call icmp6_ctloutput() directly
575		 * from protosw?
576		 */
577		return (icmp6_ctloutput(so, sopt));
578	else if (sopt->sopt_level != IPPROTO_IPV6) {
579		if (sopt->sopt_level == SOL_SOCKET &&
580		    sopt->sopt_name == SO_SETFIB) {
581			inp = sotoinpcb(so);
582			INP_WLOCK(inp);
583			inp->inp_inc.inc_fibnum = so->so_fibnum;
584			INP_WUNLOCK(inp);
585			return (0);
586		}
587		return (EINVAL);
588	}
589
590	error = 0;
591
592	switch (sopt->sopt_dir) {
593	case SOPT_GET:
594		switch (sopt->sopt_name) {
595		case MRT6_INIT:
596		case MRT6_DONE:
597		case MRT6_ADD_MIF:
598		case MRT6_DEL_MIF:
599		case MRT6_ADD_MFC:
600		case MRT6_DEL_MFC:
601		case MRT6_PIM:
602			error = ip6_mrouter_get ?  ip6_mrouter_get(so, sopt) :
603			    EOPNOTSUPP;
604			break;
605		case IPV6_CHECKSUM:
606			error = ip6_raw_ctloutput(so, sopt);
607			break;
608		default:
609			error = ip6_ctloutput(so, sopt);
610			break;
611		}
612		break;
613
614	case SOPT_SET:
615		switch (sopt->sopt_name) {
616		case MRT6_INIT:
617		case MRT6_DONE:
618		case MRT6_ADD_MIF:
619		case MRT6_DEL_MIF:
620		case MRT6_ADD_MFC:
621		case MRT6_DEL_MFC:
622		case MRT6_PIM:
623			error = ip6_mrouter_set ?  ip6_mrouter_set(so, sopt) :
624			    EOPNOTSUPP;
625			break;
626		case IPV6_CHECKSUM:
627			error = ip6_raw_ctloutput(so, sopt);
628			break;
629		default:
630			error = ip6_ctloutput(so, sopt);
631			break;
632		}
633		break;
634	}
635
636	return (error);
637}
638
639static int
640rip6_attach(struct socket *so, int proto, struct thread *td)
641{
642	struct inpcb *inp;
643	struct icmp6_filter *filter;
644	int error;
645
646	inp = sotoinpcb(so);
647	KASSERT(inp == NULL, ("rip6_attach: inp != NULL"));
648
649	error = priv_check(td, PRIV_NETINET_RAW);
650	if (error)
651		return (error);
652	error = soreserve(so, rip_sendspace, rip_recvspace);
653	if (error)
654		return (error);
655	filter = malloc(sizeof(struct icmp6_filter), M_PCB, M_NOWAIT);
656	if (filter == NULL)
657		return (ENOMEM);
658	INP_INFO_WLOCK(&V_ripcbinfo);
659	error = in_pcballoc(so, &V_ripcbinfo);
660	if (error) {
661		INP_INFO_WUNLOCK(&V_ripcbinfo);
662		free(filter, M_PCB);
663		return (error);
664	}
665	inp = (struct inpcb *)so->so_pcb;
666	INP_INFO_WUNLOCK(&V_ripcbinfo);
667	inp->inp_vflag |= INP_IPV6;
668	inp->inp_ip_p = (long)proto;
669	inp->in6p_hops = -1;	/* use kernel default */
670	inp->in6p_cksum = -1;
671	inp->in6p_icmp6filt = filter;
672	ICMP6_FILTER_SETPASSALL(inp->in6p_icmp6filt);
673	INP_WUNLOCK(inp);
674	return (0);
675}
676
677static void
678rip6_detach(struct socket *so)
679{
680	struct inpcb *inp;
681
682	inp = sotoinpcb(so);
683	KASSERT(inp != NULL, ("rip6_detach: inp == NULL"));
684
685	if (so == V_ip6_mrouter && ip6_mrouter_done)
686		ip6_mrouter_done();
687	/* xxx: RSVP */
688	INP_INFO_WLOCK(&V_ripcbinfo);
689	INP_WLOCK(inp);
690	free(inp->in6p_icmp6filt, M_PCB);
691	in_pcbdetach(inp);
692	in_pcbfree(inp);
693	INP_INFO_WUNLOCK(&V_ripcbinfo);
694}
695
696/* XXXRW: This can't ever be called. */
697static void
698rip6_abort(struct socket *so)
699{
700	struct inpcb *inp;
701
702	inp = sotoinpcb(so);
703	KASSERT(inp != NULL, ("rip6_abort: inp == NULL"));
704
705	soisdisconnected(so);
706}
707
708static void
709rip6_close(struct socket *so)
710{
711	struct inpcb *inp;
712
713	inp = sotoinpcb(so);
714	KASSERT(inp != NULL, ("rip6_close: inp == NULL"));
715
716	soisdisconnected(so);
717}
718
719static int
720rip6_disconnect(struct socket *so)
721{
722	struct inpcb *inp;
723
724	inp = sotoinpcb(so);
725	KASSERT(inp != NULL, ("rip6_disconnect: inp == NULL"));
726
727	if ((so->so_state & SS_ISCONNECTED) == 0)
728		return (ENOTCONN);
729	inp->in6p_faddr = in6addr_any;
730	rip6_abort(so);
731	return (0);
732}
733
734static int
735rip6_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
736{
737	struct inpcb *inp;
738	struct sockaddr_in6 *addr = (struct sockaddr_in6 *)nam;
739	struct ifaddr *ifa = NULL;
740	int error = 0;
741
742	inp = sotoinpcb(so);
743	KASSERT(inp != NULL, ("rip6_bind: inp == NULL"));
744
745	if (nam->sa_len != sizeof(*addr))
746		return (EINVAL);
747	if ((error = prison_check_ip6(td->td_ucred, &addr->sin6_addr)) != 0)
748		return (error);
749	if (CK_STAILQ_EMPTY(&V_ifnet) || addr->sin6_family != AF_INET6)
750		return (EADDRNOTAVAIL);
751	if ((error = sa6_embedscope(addr, V_ip6_use_defzone)) != 0)
752		return (error);
753
754	NET_EPOCH_ENTER();
755	if (!IN6_IS_ADDR_UNSPECIFIED(&addr->sin6_addr) &&
756	    (ifa = ifa_ifwithaddr((struct sockaddr *)addr)) == NULL) {
757		NET_EPOCH_EXIT();
758		return (EADDRNOTAVAIL);
759	}
760	if (ifa != NULL &&
761	    ((struct in6_ifaddr *)ifa)->ia6_flags &
762	    (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY|
763	     IN6_IFF_DETACHED|IN6_IFF_DEPRECATED)) {
764		NET_EPOCH_EXIT();
765		return (EADDRNOTAVAIL);
766	}
767	NET_EPOCH_EXIT();
768	INP_INFO_WLOCK(&V_ripcbinfo);
769	INP_WLOCK(inp);
770	inp->in6p_laddr = addr->sin6_addr;
771	INP_WUNLOCK(inp);
772	INP_INFO_WUNLOCK(&V_ripcbinfo);
773	return (0);
774}
775
776static int
777rip6_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
778{
779	struct inpcb *inp;
780	struct sockaddr_in6 *addr = (struct sockaddr_in6 *)nam;
781	struct in6_addr in6a;
782	int error = 0, scope_ambiguous = 0;
783
784	inp = sotoinpcb(so);
785	KASSERT(inp != NULL, ("rip6_connect: inp == NULL"));
786
787	if (nam->sa_len != sizeof(*addr))
788		return (EINVAL);
789	if (CK_STAILQ_EMPTY(&V_ifnet))
790		return (EADDRNOTAVAIL);
791	if (addr->sin6_family != AF_INET6)
792		return (EAFNOSUPPORT);
793
794	/*
795	 * Application should provide a proper zone ID or the use of default
796	 * zone IDs should be enabled.  Unfortunately, some applications do
797	 * not behave as it should, so we need a workaround.  Even if an
798	 * appropriate ID is not determined, we'll see if we can determine
799	 * the outgoing interface.  If we can, determine the zone ID based on
800	 * the interface below.
801	 */
802	if (addr->sin6_scope_id == 0 && !V_ip6_use_defzone)
803		scope_ambiguous = 1;
804	if ((error = sa6_embedscope(addr, V_ip6_use_defzone)) != 0)
805		return (error);
806
807	INP_INFO_WLOCK(&V_ripcbinfo);
808	INP_WLOCK(inp);
809	/* Source address selection. XXX: need pcblookup? */
810	error = in6_selectsrc_socket(addr, inp->in6p_outputopts,
811	    inp, so->so_cred, scope_ambiguous, &in6a, NULL);
812	if (error) {
813		INP_WUNLOCK(inp);
814		INP_INFO_WUNLOCK(&V_ripcbinfo);
815		return (error);
816	}
817
818	inp->in6p_faddr = addr->sin6_addr;
819	inp->in6p_laddr = in6a;
820	soisconnected(so);
821	INP_WUNLOCK(inp);
822	INP_INFO_WUNLOCK(&V_ripcbinfo);
823	return (0);
824}
825
826static int
827rip6_shutdown(struct socket *so)
828{
829	struct inpcb *inp;
830
831	inp = sotoinpcb(so);
832	KASSERT(inp != NULL, ("rip6_shutdown: inp == NULL"));
833
834	INP_WLOCK(inp);
835	socantsendmore(so);
836	INP_WUNLOCK(inp);
837	return (0);
838}
839
840static int
841rip6_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
842    struct mbuf *control, struct thread *td)
843{
844	struct inpcb *inp;
845	struct sockaddr_in6 tmp;
846	struct sockaddr_in6 *dst;
847	int ret;
848
849	inp = sotoinpcb(so);
850	KASSERT(inp != NULL, ("rip6_send: inp == NULL"));
851
852	/* Always copy sockaddr to avoid overwrites. */
853	/* Unlocked read. */
854	if (so->so_state & SS_ISCONNECTED) {
855		if (nam) {
856			m_freem(m);
857			return (EISCONN);
858		}
859		/* XXX */
860		bzero(&tmp, sizeof(tmp));
861		tmp.sin6_family = AF_INET6;
862		tmp.sin6_len = sizeof(struct sockaddr_in6);
863		INP_RLOCK(inp);
864		bcopy(&inp->in6p_faddr, &tmp.sin6_addr,
865		    sizeof(struct in6_addr));
866		INP_RUNLOCK(inp);
867		dst = &tmp;
868	} else {
869		if (nam == NULL) {
870			m_freem(m);
871			return (ENOTCONN);
872		}
873		if (nam->sa_len != sizeof(struct sockaddr_in6)) {
874			m_freem(m);
875			return (EINVAL);
876		}
877		tmp = *(struct sockaddr_in6 *)nam;
878		dst = &tmp;
879
880		if (dst->sin6_family == AF_UNSPEC) {
881			/*
882			 * XXX: we allow this case for backward
883			 * compatibility to buggy applications that
884			 * rely on old (and wrong) kernel behavior.
885			 */
886			log(LOG_INFO, "rip6 SEND: address family is "
887			    "unspec. Assume AF_INET6\n");
888			dst->sin6_family = AF_INET6;
889		} else if (dst->sin6_family != AF_INET6) {
890			m_freem(m);
891			return(EAFNOSUPPORT);
892		}
893	}
894	ret = rip6_output(m, so, dst, control);
895	return (ret);
896}
897
898struct pr_usrreqs rip6_usrreqs = {
899	.pru_abort =		rip6_abort,
900	.pru_attach =		rip6_attach,
901	.pru_bind =		rip6_bind,
902	.pru_connect =		rip6_connect,
903	.pru_control =		in6_control,
904	.pru_detach =		rip6_detach,
905	.pru_disconnect =	rip6_disconnect,
906	.pru_peeraddr =		in6_getpeeraddr,
907	.pru_send =		rip6_send,
908	.pru_shutdown =		rip6_shutdown,
909	.pru_sockaddr =		in6_getsockaddr,
910	.pru_close =		rip6_close,
911};
912