udp6_usrreq.c revision 225044
1/*-
2 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
3 * Copyright (c) 2010-2011 Juniper Networks, Inc.
4 * All rights reserved.
5 *
6 * Portions of this software were developed by Robert N. M. Watson under
7 * contract to Juniper Networks, Inc.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 * 3. Neither the name of the project nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 *	$KAME: udp6_usrreq.c,v 1.27 2001/05/21 05:45:10 jinmei Exp $
34 *	$KAME: udp6_output.c,v 1.31 2001/05/21 16:39:15 jinmei Exp $
35 */
36
37/*-
38 * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
39 *	The Regents of the University of California.
40 * All rights reserved.
41 *
42 * Redistribution and use in source and binary forms, with or without
43 * modification, are permitted provided that the following conditions
44 * are met:
45 * 1. Redistributions of source code must retain the above copyright
46 *    notice, this list of conditions and the following disclaimer.
47 * 2. Redistributions in binary form must reproduce the above copyright
48 *    notice, this list of conditions and the following disclaimer in the
49 *    documentation and/or other materials provided with the distribution.
50 * 4. Neither the name of the University nor the names of its contributors
51 *    may be used to endorse or promote products derived from this software
52 *    without specific prior written permission.
53 *
54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 * SUCH DAMAGE.
65 *
66 *	@(#)udp_usrreq.c	8.6 (Berkeley) 5/23/95
67 */
68
69#include <sys/cdefs.h>
70__FBSDID("$FreeBSD: head/sys/netinet6/udp6_usrreq.c 225044 2011-08-20 17:05:11Z bz $");
71
72#include "opt_inet.h"
73#include "opt_inet6.h"
74#include "opt_ipfw.h"
75#include "opt_ipsec.h"
76
77#include <sys/param.h>
78#include <sys/jail.h>
79#include <sys/kernel.h>
80#include <sys/lock.h>
81#include <sys/mbuf.h>
82#include <sys/priv.h>
83#include <sys/proc.h>
84#include <sys/protosw.h>
85#include <sys/signalvar.h>
86#include <sys/socket.h>
87#include <sys/socketvar.h>
88#include <sys/sx.h>
89#include <sys/sysctl.h>
90#include <sys/syslog.h>
91#include <sys/systm.h>
92
93#include <net/if.h>
94#include <net/if_types.h>
95#include <net/route.h>
96
97#include <netinet/in.h>
98#include <netinet/in_pcb.h>
99#include <netinet/in_systm.h>
100#include <netinet/in_var.h>
101#include <netinet/ip.h>
102#include <netinet/ip_icmp.h>
103#include <netinet/ip6.h>
104#include <netinet/icmp_var.h>
105#include <netinet/icmp6.h>
106#include <netinet/ip_var.h>
107#include <netinet/udp.h>
108#include <netinet/udp_var.h>
109
110#include <netinet6/ip6protosw.h>
111#include <netinet6/ip6_var.h>
112#include <netinet6/in6_pcb.h>
113#include <netinet6/udp6_var.h>
114#include <netinet6/scope6_var.h>
115
116#ifdef IPSEC
117#include <netipsec/ipsec.h>
118#include <netipsec/ipsec6.h>
119#endif /* IPSEC */
120
121#include <security/mac/mac_framework.h>
122
123/*
124 * UDP protocol implementation.
125 * Per RFC 768, August, 1980.
126 */
127
128extern struct protosw	inetsw[];
129static void		udp6_detach(struct socket *so);
130
131static void
132udp6_append(struct inpcb *inp, struct mbuf *n, int off,
133    struct sockaddr_in6 *fromsa)
134{
135	struct socket *so;
136	struct mbuf *opts;
137
138	INP_LOCK_ASSERT(inp);
139
140#ifdef IPSEC
141	/* Check AH/ESP integrity. */
142	if (ipsec6_in_reject(n, inp)) {
143		m_freem(n);
144		V_ipsec6stat.in_polvio++;
145		return;
146	}
147#endif /* IPSEC */
148#ifdef MAC
149	if (mac_inpcb_check_deliver(inp, n) != 0) {
150		m_freem(n);
151		return;
152	}
153#endif
154	opts = NULL;
155	if (inp->inp_flags & INP_CONTROLOPTS ||
156	    inp->inp_socket->so_options & SO_TIMESTAMP)
157		ip6_savecontrol(inp, n, &opts);
158	m_adj(n, off + sizeof(struct udphdr));
159
160	so = inp->inp_socket;
161	SOCKBUF_LOCK(&so->so_rcv);
162	if (sbappendaddr_locked(&so->so_rcv, (struct sockaddr *)fromsa, n,
163	    opts) == 0) {
164		SOCKBUF_UNLOCK(&so->so_rcv);
165		m_freem(n);
166		if (opts)
167			m_freem(opts);
168		UDPSTAT_INC(udps_fullsock);
169	} else
170		sorwakeup_locked(so);
171}
172
173int
174udp6_input(struct mbuf **mp, int *offp, int proto)
175{
176	struct mbuf *m = *mp;
177	struct ifnet *ifp;
178	struct ip6_hdr *ip6;
179	struct udphdr *uh;
180	struct inpcb *inp;
181	struct udpcb *up;
182	int off = *offp;
183	int plen, ulen;
184	struct sockaddr_in6 fromsa;
185#ifdef IPFIREWALL_FORWARD
186	struct m_tag *fwd_tag;
187#endif
188
189	ifp = m->m_pkthdr.rcvif;
190	ip6 = mtod(m, struct ip6_hdr *);
191
192	if (faithprefix_p != NULL && (*faithprefix_p)(&ip6->ip6_dst)) {
193		/* XXX send icmp6 host/port unreach? */
194		m_freem(m);
195		return (IPPROTO_DONE);
196	}
197
198#ifndef PULLDOWN_TEST
199	IP6_EXTHDR_CHECK(m, off, sizeof(struct udphdr), IPPROTO_DONE);
200	ip6 = mtod(m, struct ip6_hdr *);
201	uh = (struct udphdr *)((caddr_t)ip6 + off);
202#else
203	IP6_EXTHDR_GET(uh, struct udphdr *, m, off, sizeof(*uh));
204	if (!uh)
205		return (IPPROTO_DONE);
206#endif
207
208	UDPSTAT_INC(udps_ipackets);
209
210	/*
211	 * Destination port of 0 is illegal, based on RFC768.
212	 */
213	if (uh->uh_dport == 0)
214		goto badunlocked;
215
216	plen = ntohs(ip6->ip6_plen) - off + sizeof(*ip6);
217	ulen = ntohs((u_short)uh->uh_ulen);
218
219	if (plen != ulen) {
220		UDPSTAT_INC(udps_badlen);
221		goto badunlocked;
222	}
223
224	/*
225	 * Checksum extended UDP header and data.
226	 */
227	if (uh->uh_sum == 0) {
228		UDPSTAT_INC(udps_nosum);
229		goto badunlocked;
230	}
231	if (in6_cksum(m, IPPROTO_UDP, off, ulen) != 0) {
232		UDPSTAT_INC(udps_badsum);
233		goto badunlocked;
234	}
235
236	/*
237	 * Construct sockaddr format source address.
238	 */
239	init_sin6(&fromsa, m);
240	fromsa.sin6_port = uh->uh_sport;
241
242	if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
243		struct inpcb *last;
244		struct ip6_moptions *imo;
245
246		INP_INFO_RLOCK(&V_udbinfo);
247		/*
248		 * In the event that laddr should be set to the link-local
249		 * address (this happens in RIPng), the multicast address
250		 * specified in the received packet will not match laddr.  To
251		 * handle this situation, matching is relaxed if the
252		 * receiving interface is the same as one specified in the
253		 * socket and if the destination multicast address matches
254		 * one of the multicast groups specified in the socket.
255		 */
256
257		/*
258		 * KAME note: traditionally we dropped udpiphdr from mbuf
259		 * here.  We need udphdr for IPsec processing so we do that
260		 * later.
261		 */
262		last = NULL;
263		LIST_FOREACH(inp, &V_udb, inp_list) {
264			if ((inp->inp_vflag & INP_IPV6) == 0)
265				continue;
266			if (inp->inp_lport != uh->uh_dport)
267				continue;
268			if (inp->inp_fport != 0 &&
269			    inp->inp_fport != uh->uh_sport)
270				continue;
271			if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) {
272				if (!IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr,
273							&ip6->ip6_dst))
274					continue;
275			}
276			if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
277				if (!IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr,
278							&ip6->ip6_src) ||
279				    inp->inp_fport != uh->uh_sport)
280					continue;
281			}
282
283			/*
284			 * XXXRW: Because we weren't holding either the inpcb
285			 * or the hash lock when we checked for a match
286			 * before, we should probably recheck now that the
287			 * inpcb lock is (supposed to be) held.
288			 */
289
290			/*
291			 * Handle socket delivery policy for any-source
292			 * and source-specific multicast. [RFC3678]
293			 */
294			imo = inp->in6p_moptions;
295			if (imo && IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
296				struct sockaddr_in6	 mcaddr;
297				int			 blocked;
298
299				INP_RLOCK(inp);
300
301				bzero(&mcaddr, sizeof(struct sockaddr_in6));
302				mcaddr.sin6_len = sizeof(struct sockaddr_in6);
303				mcaddr.sin6_family = AF_INET6;
304				mcaddr.sin6_addr = ip6->ip6_dst;
305
306				blocked = im6o_mc_filter(imo, ifp,
307					(struct sockaddr *)&mcaddr,
308					(struct sockaddr *)&fromsa);
309				if (blocked != MCAST_PASS) {
310					if (blocked == MCAST_NOTGMEMBER)
311						IP6STAT_INC(ip6s_notmember);
312					if (blocked == MCAST_NOTSMEMBER ||
313					    blocked == MCAST_MUTED)
314						UDPSTAT_INC(udps_filtermcast);
315					INP_RUNLOCK(inp); /* XXX */
316					continue;
317				}
318
319				INP_RUNLOCK(inp);
320			}
321			if (last != NULL) {
322				struct mbuf *n;
323
324				if ((n = m_copy(m, 0, M_COPYALL)) != NULL) {
325					INP_RLOCK(last);
326					up = intoudpcb(last);
327					if (up->u_tun_func == NULL) {
328						udp6_append(last, n, off, &fromsa);
329					} else {
330						/*
331						 * Engage the tunneling
332						 * protocol we will have to
333						 * leave the info_lock up,
334						 * since we are hunting
335						 * through multiple UDP's.
336						 *
337						 */
338						(*up->u_tun_func)(n, off, last);
339					}
340					INP_RUNLOCK(last);
341				}
342			}
343			last = inp;
344			/*
345			 * Don't look for additional matches if this one does
346			 * not have either the SO_REUSEPORT or SO_REUSEADDR
347			 * socket options set.  This heuristic avoids
348			 * searching through all pcbs in the common case of a
349			 * non-shared port.  It assumes that an application
350			 * will never clear these options after setting them.
351			 */
352			if ((last->inp_socket->so_options &
353			     (SO_REUSEPORT|SO_REUSEADDR)) == 0)
354				break;
355		}
356
357		if (last == NULL) {
358			/*
359			 * No matching pcb found; discard datagram.  (No need
360			 * to send an ICMP Port Unreachable for a broadcast
361			 * or multicast datgram.)
362			 */
363			UDPSTAT_INC(udps_noport);
364			UDPSTAT_INC(udps_noportmcast);
365			goto badheadlocked;
366		}
367		INP_RLOCK(last);
368		INP_INFO_RUNLOCK(&V_udbinfo);
369		up = intoudpcb(last);
370		if (up->u_tun_func == NULL) {
371			udp6_append(last, m, off, &fromsa);
372		} else {
373			/*
374			 * Engage the tunneling protocol.
375			 */
376			(*up->u_tun_func)(m, off, last);
377		}
378		INP_RUNLOCK(last);
379		return (IPPROTO_DONE);
380	}
381	/*
382	 * Locate pcb for datagram.
383	 */
384#ifdef IPFIREWALL_FORWARD
385	/*
386	 * Grab info from PACKET_TAG_IPFORWARD tag prepended to the chain.
387	 */
388	fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL);
389	if (fwd_tag != NULL) {
390		struct sockaddr_in6 *next_hop6;
391
392		next_hop6 = (struct sockaddr_in6 *)(fwd_tag + 1);
393
394		/*
395		 * Transparently forwarded. Pretend to be the destination.
396		 * Already got one like this?
397		 */
398		inp = in6_pcblookup_mbuf(&V_udbinfo,
399		    &ip6->ip6_src, uh->uh_sport, &ip6->ip6_dst, uh->uh_dport,
400		    INPLOOKUP_RLOCKPCB, m->m_pkthdr.rcvif, m);
401		if (!inp) {
402			/*
403			 * It's new.  Try to find the ambushing socket.
404			 * Because we've rewritten the destination address,
405			 * any hardware-generated hash is ignored.
406			 */
407			inp = in6_pcblookup(&V_udbinfo, &ip6->ip6_src,
408			    uh->uh_sport, &next_hop6->sin6_addr,
409			    next_hop6->sin6_port ? htons(next_hop6->sin6_port) :
410			    uh->uh_dport, INPLOOKUP_WILDCARD |
411			    INPLOOKUP_RLOCKPCB, m->m_pkthdr.rcvif);
412		}
413		/* Remove the tag from the packet. We don't need it anymore. */
414		m_tag_delete(m, fwd_tag);
415	} else
416#endif /* IPFIREWALL_FORWARD */
417		inp = in6_pcblookup_mbuf(&V_udbinfo, &ip6->ip6_src,
418		    uh->uh_sport, &ip6->ip6_dst, uh->uh_dport,
419		    INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB,
420		    m->m_pkthdr.rcvif, m);
421	if (inp == NULL) {
422		if (udp_log_in_vain) {
423			char ip6bufs[INET6_ADDRSTRLEN];
424			char ip6bufd[INET6_ADDRSTRLEN];
425
426			log(LOG_INFO,
427			    "Connection attempt to UDP [%s]:%d from [%s]:%d\n",
428			    ip6_sprintf(ip6bufd, &ip6->ip6_dst),
429			    ntohs(uh->uh_dport),
430			    ip6_sprintf(ip6bufs, &ip6->ip6_src),
431			    ntohs(uh->uh_sport));
432		}
433		UDPSTAT_INC(udps_noport);
434		if (m->m_flags & M_MCAST) {
435			printf("UDP6: M_MCAST is set in a unicast packet.\n");
436			UDPSTAT_INC(udps_noportmcast);
437			goto badunlocked;
438		}
439		if (V_udp_blackhole)
440			goto badunlocked;
441		if (badport_bandlim(BANDLIM_ICMP6_UNREACH) < 0)
442			goto badunlocked;
443		icmp6_error(m, ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_NOPORT, 0);
444		return (IPPROTO_DONE);
445	}
446	INP_RLOCK_ASSERT(inp);
447	up = intoudpcb(inp);
448	if (up->u_tun_func == NULL) {
449		udp6_append(inp, m, off, &fromsa);
450	} else {
451		/*
452		 * Engage the tunneling protocol.
453		 */
454
455		(*up->u_tun_func)(m, off, inp);
456	}
457	INP_RUNLOCK(inp);
458	return (IPPROTO_DONE);
459
460badheadlocked:
461	INP_INFO_RUNLOCK(&V_udbinfo);
462badunlocked:
463	if (m)
464		m_freem(m);
465	return (IPPROTO_DONE);
466}
467
468void
469udp6_ctlinput(int cmd, struct sockaddr *sa, void *d)
470{
471	struct udphdr uh;
472	struct ip6_hdr *ip6;
473	struct mbuf *m;
474	int off = 0;
475	struct ip6ctlparam *ip6cp = NULL;
476	const struct sockaddr_in6 *sa6_src = NULL;
477	void *cmdarg;
478	struct inpcb *(*notify)(struct inpcb *, int) = udp_notify;
479	struct udp_portonly {
480		u_int16_t uh_sport;
481		u_int16_t uh_dport;
482	} *uhp;
483
484	if (sa->sa_family != AF_INET6 ||
485	    sa->sa_len != sizeof(struct sockaddr_in6))
486		return;
487
488	if ((unsigned)cmd >= PRC_NCMDS)
489		return;
490	if (PRC_IS_REDIRECT(cmd))
491		notify = in6_rtchange, d = NULL;
492	else if (cmd == PRC_HOSTDEAD)
493		d = NULL;
494	else if (inet6ctlerrmap[cmd] == 0)
495		return;
496
497	/* if the parameter is from icmp6, decode it. */
498	if (d != NULL) {
499		ip6cp = (struct ip6ctlparam *)d;
500		m = ip6cp->ip6c_m;
501		ip6 = ip6cp->ip6c_ip6;
502		off = ip6cp->ip6c_off;
503		cmdarg = ip6cp->ip6c_cmdarg;
504		sa6_src = ip6cp->ip6c_src;
505	} else {
506		m = NULL;
507		ip6 = NULL;
508		cmdarg = NULL;
509		sa6_src = &sa6_any;
510	}
511
512	if (ip6) {
513		/*
514		 * XXX: We assume that when IPV6 is non NULL,
515		 * M and OFF are valid.
516		 */
517
518		/* Check if we can safely examine src and dst ports. */
519		if (m->m_pkthdr.len < off + sizeof(*uhp))
520			return;
521
522		bzero(&uh, sizeof(uh));
523		m_copydata(m, off, sizeof(*uhp), (caddr_t)&uh);
524
525		(void) in6_pcbnotify(&V_udbinfo, sa, uh.uh_dport,
526		    (struct sockaddr *)ip6cp->ip6c_src, uh.uh_sport, cmd,
527		    cmdarg, notify);
528	} else
529		(void) in6_pcbnotify(&V_udbinfo, sa, 0,
530		    (const struct sockaddr *)sa6_src, 0, cmd, cmdarg, notify);
531}
532
533static int
534udp6_getcred(SYSCTL_HANDLER_ARGS)
535{
536	struct xucred xuc;
537	struct sockaddr_in6 addrs[2];
538	struct inpcb *inp;
539	int error;
540
541	error = priv_check(req->td, PRIV_NETINET_GETCRED);
542	if (error)
543		return (error);
544
545	if (req->newlen != sizeof(addrs))
546		return (EINVAL);
547	if (req->oldlen != sizeof(struct xucred))
548		return (EINVAL);
549	error = SYSCTL_IN(req, addrs, sizeof(addrs));
550	if (error)
551		return (error);
552	if ((error = sa6_embedscope(&addrs[0], V_ip6_use_defzone)) != 0 ||
553	    (error = sa6_embedscope(&addrs[1], V_ip6_use_defzone)) != 0) {
554		return (error);
555	}
556	inp = in6_pcblookup(&V_udbinfo, &addrs[1].sin6_addr,
557	    addrs[1].sin6_port, &addrs[0].sin6_addr, addrs[0].sin6_port,
558	    INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB, NULL);
559	if (inp != NULL) {
560		INP_RLOCK_ASSERT(inp);
561		if (inp->inp_socket == NULL)
562			error = ENOENT;
563		if (error == 0)
564			error = cr_canseesocket(req->td->td_ucred,
565			    inp->inp_socket);
566		if (error == 0)
567			cru2x(inp->inp_cred, &xuc);
568		INP_RUNLOCK(inp);
569	} else
570		error = ENOENT;
571	if (error == 0)
572		error = SYSCTL_OUT(req, &xuc, sizeof(struct xucred));
573	return (error);
574}
575
576SYSCTL_PROC(_net_inet6_udp6, OID_AUTO, getcred, CTLTYPE_OPAQUE|CTLFLAG_RW, 0,
577    0, udp6_getcred, "S,xucred", "Get the xucred of a UDP6 connection");
578
579static int
580udp6_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr6,
581    struct mbuf *control, struct thread *td)
582{
583	u_int32_t ulen = m->m_pkthdr.len;
584	u_int32_t plen = sizeof(struct udphdr) + ulen;
585	struct ip6_hdr *ip6;
586	struct udphdr *udp6;
587	struct in6_addr *laddr, *faddr, in6a;
588	struct sockaddr_in6 *sin6 = NULL;
589	struct ifnet *oifp = NULL;
590	int scope_ambiguous = 0;
591	u_short fport;
592	int error = 0;
593	struct ip6_pktopts *optp, opt;
594	int af = AF_INET6, hlen = sizeof(struct ip6_hdr);
595	int flags;
596	struct sockaddr_in6 tmp;
597
598	INP_WLOCK_ASSERT(inp);
599	INP_HASH_WLOCK_ASSERT(inp->inp_pcbinfo);
600
601	if (addr6) {
602		/* addr6 has been validated in udp6_send(). */
603		sin6 = (struct sockaddr_in6 *)addr6;
604
605		/* protect *sin6 from overwrites */
606		tmp = *sin6;
607		sin6 = &tmp;
608
609		/*
610		 * Application should provide a proper zone ID or the use of
611		 * default zone IDs should be enabled.  Unfortunately, some
612		 * applications do not behave as it should, so we need a
613		 * workaround.  Even if an appropriate ID is not determined,
614		 * we'll see if we can determine the outgoing interface.  If we
615		 * can, determine the zone ID based on the interface below.
616		 */
617		if (sin6->sin6_scope_id == 0 && !V_ip6_use_defzone)
618			scope_ambiguous = 1;
619		if ((error = sa6_embedscope(sin6, V_ip6_use_defzone)) != 0)
620			return (error);
621	}
622
623	if (control) {
624		if ((error = ip6_setpktopts(control, &opt,
625		    inp->in6p_outputopts, td->td_ucred, IPPROTO_UDP)) != 0)
626			goto release;
627		optp = &opt;
628	} else
629		optp = inp->in6p_outputopts;
630
631	if (sin6) {
632		faddr = &sin6->sin6_addr;
633
634		/*
635		 * IPv4 version of udp_output calls in_pcbconnect in this case,
636		 * which needs splnet and affects performance.
637		 * Since we saw no essential reason for calling in_pcbconnect,
638		 * we get rid of such kind of logic, and call in6_selectsrc
639		 * and in6_pcbsetport in order to fill in the local address
640		 * and the local port.
641		 */
642		if (sin6->sin6_port == 0) {
643			error = EADDRNOTAVAIL;
644			goto release;
645		}
646
647		if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
648			/* how about ::ffff:0.0.0.0 case? */
649			error = EISCONN;
650			goto release;
651		}
652
653		fport = sin6->sin6_port; /* allow 0 port */
654
655		if (IN6_IS_ADDR_V4MAPPED(faddr)) {
656			if ((inp->inp_flags & IN6P_IPV6_V6ONLY)) {
657				/*
658				 * I believe we should explicitly discard the
659				 * packet when mapped addresses are disabled,
660				 * rather than send the packet as an IPv6 one.
661				 * If we chose the latter approach, the packet
662				 * might be sent out on the wire based on the
663				 * default route, the situation which we'd
664				 * probably want to avoid.
665				 * (20010421 jinmei@kame.net)
666				 */
667				error = EINVAL;
668				goto release;
669			}
670			if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) &&
671			    !IN6_IS_ADDR_V4MAPPED(&inp->in6p_laddr)) {
672				/*
673				 * when remote addr is an IPv4-mapped address,
674				 * local addr should not be an IPv6 address,
675				 * since you cannot determine how to map IPv6
676				 * source address to IPv4.
677				 */
678				error = EINVAL;
679				goto release;
680			}
681
682			af = AF_INET;
683		}
684
685		if (!IN6_IS_ADDR_V4MAPPED(faddr)) {
686			error = in6_selectsrc(sin6, optp, inp, NULL,
687			    td->td_ucred, &oifp, &in6a);
688			if (error)
689				goto release;
690			if (oifp && scope_ambiguous &&
691			    (error = in6_setscope(&sin6->sin6_addr,
692			    oifp, NULL))) {
693				goto release;
694			}
695			laddr = &in6a;
696		} else
697			laddr = &inp->in6p_laddr;	/* XXX */
698		if (laddr == NULL) {
699			if (error == 0)
700				error = EADDRNOTAVAIL;
701			goto release;
702		}
703		if (inp->inp_lport == 0 &&
704		    (error = in6_pcbsetport(laddr, inp, td->td_ucred)) != 0) {
705			/* Undo an address bind that may have occurred. */
706			inp->in6p_laddr = in6addr_any;
707			goto release;
708		}
709	} else {
710		if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
711			error = ENOTCONN;
712			goto release;
713		}
714		if (IN6_IS_ADDR_V4MAPPED(&inp->in6p_faddr)) {
715			if ((inp->inp_flags & IN6P_IPV6_V6ONLY)) {
716				/*
717				 * XXX: this case would happen when the
718				 * application sets the V6ONLY flag after
719				 * connecting the foreign address.
720				 * Such applications should be fixed,
721				 * so we bark here.
722				 */
723				log(LOG_INFO, "udp6_output: IPV6_V6ONLY "
724				    "option was set for a connected socket\n");
725				error = EINVAL;
726				goto release;
727			} else
728				af = AF_INET;
729		}
730		laddr = &inp->in6p_laddr;
731		faddr = &inp->in6p_faddr;
732		fport = inp->inp_fport;
733	}
734
735	if (af == AF_INET)
736		hlen = sizeof(struct ip);
737
738	/*
739	 * Calculate data length and get a mbuf
740	 * for UDP and IP6 headers.
741	 */
742	M_PREPEND(m, hlen + sizeof(struct udphdr), M_DONTWAIT);
743	if (m == 0) {
744		error = ENOBUFS;
745		goto release;
746	}
747
748	/*
749	 * Stuff checksum and output datagram.
750	 */
751	udp6 = (struct udphdr *)(mtod(m, caddr_t) + hlen);
752	udp6->uh_sport = inp->inp_lport; /* lport is always set in the PCB */
753	udp6->uh_dport = fport;
754	if (plen <= 0xffff)
755		udp6->uh_ulen = htons((u_short)plen);
756	else
757		udp6->uh_ulen = 0;
758	udp6->uh_sum = 0;
759
760	switch (af) {
761	case AF_INET6:
762		ip6 = mtod(m, struct ip6_hdr *);
763		ip6->ip6_flow	= inp->inp_flow & IPV6_FLOWINFO_MASK;
764		ip6->ip6_vfc	&= ~IPV6_VERSION_MASK;
765		ip6->ip6_vfc	|= IPV6_VERSION;
766#if 0				/* ip6_plen will be filled in ip6_output. */
767		ip6->ip6_plen	= htons((u_short)plen);
768#endif
769		ip6->ip6_nxt	= IPPROTO_UDP;
770		ip6->ip6_hlim	= in6_selecthlim(inp, NULL);
771		ip6->ip6_src	= *laddr;
772		ip6->ip6_dst	= *faddr;
773
774		if ((udp6->uh_sum = in6_cksum(m, IPPROTO_UDP,
775				sizeof(struct ip6_hdr), plen)) == 0) {
776			udp6->uh_sum = 0xffff;
777		}
778
779		flags = 0;
780
781		UDPSTAT_INC(udps_opackets);
782		error = ip6_output(m, optp, NULL, flags, inp->in6p_moptions,
783		    NULL, inp);
784		break;
785	case AF_INET:
786		error = EAFNOSUPPORT;
787		goto release;
788	}
789	goto releaseopt;
790
791release:
792	m_freem(m);
793
794releaseopt:
795	if (control) {
796		ip6_clearpktopts(&opt, -1);
797		m_freem(control);
798	}
799	return (error);
800}
801
802static void
803udp6_abort(struct socket *so)
804{
805	struct inpcb *inp;
806
807	inp = sotoinpcb(so);
808	KASSERT(inp != NULL, ("udp6_abort: inp == NULL"));
809
810#ifdef INET
811	if (inp->inp_vflag & INP_IPV4) {
812		struct pr_usrreqs *pru;
813
814		pru = inetsw[ip_protox[IPPROTO_UDP]].pr_usrreqs;
815		(*pru->pru_abort)(so);
816		return;
817	}
818#endif
819
820	INP_WLOCK(inp);
821	if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
822		INP_HASH_WLOCK(&V_udbinfo);
823		in6_pcbdisconnect(inp);
824		inp->in6p_laddr = in6addr_any;
825		INP_HASH_WUNLOCK(&V_udbinfo);
826		soisdisconnected(so);
827	}
828	INP_WUNLOCK(inp);
829}
830
831static int
832udp6_attach(struct socket *so, int proto, struct thread *td)
833{
834	struct inpcb *inp;
835	int error;
836
837	inp = sotoinpcb(so);
838	KASSERT(inp == NULL, ("udp6_attach: inp != NULL"));
839
840	if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
841		error = soreserve(so, udp_sendspace, udp_recvspace);
842		if (error)
843			return (error);
844	}
845	INP_INFO_WLOCK(&V_udbinfo);
846	error = in_pcballoc(so, &V_udbinfo);
847	if (error) {
848		INP_INFO_WUNLOCK(&V_udbinfo);
849		return (error);
850	}
851	inp = (struct inpcb *)so->so_pcb;
852	inp->inp_vflag |= INP_IPV6;
853	if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0)
854		inp->inp_vflag |= INP_IPV4;
855	inp->in6p_hops = -1;	/* use kernel default */
856	inp->in6p_cksum = -1;	/* just to be sure */
857	/*
858	 * XXX: ugly!!
859	 * IPv4 TTL initialization is necessary for an IPv6 socket as well,
860	 * because the socket may be bound to an IPv6 wildcard address,
861	 * which may match an IPv4-mapped IPv6 address.
862	 */
863	inp->inp_ip_ttl = V_ip_defttl;
864
865	error = udp_newudpcb(inp);
866	if (error) {
867		in_pcbdetach(inp);
868		in_pcbfree(inp);
869		INP_INFO_WUNLOCK(&V_udbinfo);
870		return (error);
871	}
872	INP_WUNLOCK(inp);
873	INP_INFO_WUNLOCK(&V_udbinfo);
874	return (0);
875}
876
877static int
878udp6_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
879{
880	struct inpcb *inp;
881	int error;
882
883	inp = sotoinpcb(so);
884	KASSERT(inp != NULL, ("udp6_bind: inp == NULL"));
885
886	INP_WLOCK(inp);
887	INP_HASH_WLOCK(&V_udbinfo);
888	inp->inp_vflag &= ~INP_IPV4;
889	inp->inp_vflag |= INP_IPV6;
890	if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) {
891		struct sockaddr_in6 *sin6_p;
892
893		sin6_p = (struct sockaddr_in6 *)nam;
894
895		if (IN6_IS_ADDR_UNSPECIFIED(&sin6_p->sin6_addr))
896			inp->inp_vflag |= INP_IPV4;
897#ifdef INET
898		else if (IN6_IS_ADDR_V4MAPPED(&sin6_p->sin6_addr)) {
899			struct sockaddr_in sin;
900
901			in6_sin6_2_sin(&sin, sin6_p);
902			inp->inp_vflag |= INP_IPV4;
903			inp->inp_vflag &= ~INP_IPV6;
904			error = in_pcbbind(inp, (struct sockaddr *)&sin,
905			    td->td_ucred);
906			goto out;
907		}
908#endif
909	}
910
911	error = in6_pcbbind(inp, nam, td->td_ucred);
912#ifdef INET
913out:
914#endif
915	INP_HASH_WUNLOCK(&V_udbinfo);
916	INP_WUNLOCK(inp);
917	return (error);
918}
919
920static void
921udp6_close(struct socket *so)
922{
923	struct inpcb *inp;
924
925	inp = sotoinpcb(so);
926	KASSERT(inp != NULL, ("udp6_close: inp == NULL"));
927
928#ifdef INET
929	if (inp->inp_vflag & INP_IPV4) {
930		struct pr_usrreqs *pru;
931
932		pru = inetsw[ip_protox[IPPROTO_UDP]].pr_usrreqs;
933		(*pru->pru_disconnect)(so);
934		return;
935	}
936#endif
937	INP_WLOCK(inp);
938	if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
939		INP_HASH_WLOCK(&V_udbinfo);
940		in6_pcbdisconnect(inp);
941		inp->in6p_laddr = in6addr_any;
942		INP_HASH_WUNLOCK(&V_udbinfo);
943		soisdisconnected(so);
944	}
945	INP_WUNLOCK(inp);
946}
947
948static int
949udp6_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
950{
951	struct inpcb *inp;
952	struct sockaddr_in6 *sin6;
953	int error;
954
955	inp = sotoinpcb(so);
956	sin6 = (struct sockaddr_in6 *)nam;
957	KASSERT(inp != NULL, ("udp6_connect: inp == NULL"));
958
959	/*
960	 * XXXRW: Need to clarify locking of v4/v6 flags.
961	 */
962	INP_WLOCK(inp);
963#ifdef INET
964	if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
965		struct sockaddr_in sin;
966
967		if ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0) {
968			error = EINVAL;
969			goto out;
970		}
971		if (inp->inp_faddr.s_addr != INADDR_ANY) {
972			error = EISCONN;
973			goto out;
974		}
975		in6_sin6_2_sin(&sin, sin6);
976		inp->inp_vflag |= INP_IPV4;
977		inp->inp_vflag &= ~INP_IPV6;
978		error = prison_remote_ip4(td->td_ucred, &sin.sin_addr);
979		if (error != 0)
980			goto out;
981		INP_HASH_WLOCK(&V_udbinfo);
982		error = in_pcbconnect(inp, (struct sockaddr *)&sin,
983		    td->td_ucred);
984		INP_HASH_WUNLOCK(&V_udbinfo);
985		if (error == 0)
986			soisconnected(so);
987		goto out;
988	}
989#endif
990	if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
991		error = EISCONN;
992		goto out;
993	}
994	inp->inp_vflag &= ~INP_IPV4;
995	inp->inp_vflag |= INP_IPV6;
996	error = prison_remote_ip6(td->td_ucred, &sin6->sin6_addr);
997	if (error != 0)
998		goto out;
999	INP_HASH_WLOCK(&V_udbinfo);
1000	error = in6_pcbconnect(inp, nam, td->td_ucred);
1001	INP_HASH_WUNLOCK(&V_udbinfo);
1002	if (error == 0)
1003		soisconnected(so);
1004out:
1005	INP_WUNLOCK(inp);
1006	return (error);
1007}
1008
1009static void
1010udp6_detach(struct socket *so)
1011{
1012	struct inpcb *inp;
1013	struct udpcb *up;
1014
1015	inp = sotoinpcb(so);
1016	KASSERT(inp != NULL, ("udp6_detach: inp == NULL"));
1017
1018	INP_INFO_WLOCK(&V_udbinfo);
1019	INP_WLOCK(inp);
1020	up = intoudpcb(inp);
1021	KASSERT(up != NULL, ("%s: up == NULL", __func__));
1022	in_pcbdetach(inp);
1023	in_pcbfree(inp);
1024	INP_INFO_WUNLOCK(&V_udbinfo);
1025	udp_discardcb(up);
1026}
1027
1028static int
1029udp6_disconnect(struct socket *so)
1030{
1031	struct inpcb *inp;
1032	int error;
1033
1034	inp = sotoinpcb(so);
1035	KASSERT(inp != NULL, ("udp6_disconnect: inp == NULL"));
1036
1037#ifdef INET
1038	if (inp->inp_vflag & INP_IPV4) {
1039		struct pr_usrreqs *pru;
1040
1041		pru = inetsw[ip_protox[IPPROTO_UDP]].pr_usrreqs;
1042		(void)(*pru->pru_disconnect)(so);
1043		return (0);
1044	}
1045#endif
1046
1047	INP_WLOCK(inp);
1048
1049	if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
1050		error = ENOTCONN;
1051		goto out;
1052	}
1053
1054	INP_HASH_WLOCK(&V_udbinfo);
1055	in6_pcbdisconnect(inp);
1056	inp->in6p_laddr = in6addr_any;
1057	INP_HASH_WUNLOCK(&V_udbinfo);
1058	SOCK_LOCK(so);
1059	so->so_state &= ~SS_ISCONNECTED;		/* XXX */
1060	SOCK_UNLOCK(so);
1061out:
1062	INP_WUNLOCK(inp);
1063	return (0);
1064}
1065
1066static int
1067udp6_send(struct socket *so, int flags, struct mbuf *m,
1068    struct sockaddr *addr, struct mbuf *control, struct thread *td)
1069{
1070	struct inpcb *inp;
1071	int error = 0;
1072
1073	inp = sotoinpcb(so);
1074	KASSERT(inp != NULL, ("udp6_send: inp == NULL"));
1075
1076	INP_WLOCK(inp);
1077	if (addr) {
1078		if (addr->sa_len != sizeof(struct sockaddr_in6)) {
1079			error = EINVAL;
1080			goto bad;
1081		}
1082		if (addr->sa_family != AF_INET6) {
1083			error = EAFNOSUPPORT;
1084			goto bad;
1085		}
1086	}
1087
1088#ifdef INET
1089	if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) {
1090		int hasv4addr;
1091		struct sockaddr_in6 *sin6 = 0;
1092
1093		if (addr == 0)
1094			hasv4addr = (inp->inp_vflag & INP_IPV4);
1095		else {
1096			sin6 = (struct sockaddr_in6 *)addr;
1097			hasv4addr = IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)
1098			    ? 1 : 0;
1099		}
1100		if (hasv4addr) {
1101			struct pr_usrreqs *pru;
1102
1103			/*
1104			 * XXXRW: We release UDP-layer locks before calling
1105			 * udp_send() in order to avoid recursion.  However,
1106			 * this does mean there is a short window where inp's
1107			 * fields are unstable.  Could this lead to a
1108			 * potential race in which the factors causing us to
1109			 * select the UDPv4 output routine are invalidated?
1110			 */
1111			INP_WUNLOCK(inp);
1112			if (sin6)
1113				in6_sin6_2_sin_in_sock(addr);
1114			pru = inetsw[ip_protox[IPPROTO_UDP]].pr_usrreqs;
1115			/* addr will just be freed in sendit(). */
1116			return ((*pru->pru_send)(so, flags, m, addr, control,
1117			    td));
1118		}
1119	}
1120#endif
1121#ifdef MAC
1122	mac_inpcb_create_mbuf(inp, m);
1123#endif
1124	INP_HASH_WLOCK(&V_udbinfo);
1125	error = udp6_output(inp, m, addr, control, td);
1126	INP_HASH_WUNLOCK(&V_udbinfo);
1127#ifdef INET
1128#endif
1129	INP_WUNLOCK(inp);
1130	return (error);
1131
1132bad:
1133	INP_WUNLOCK(inp);
1134	m_freem(m);
1135	return (error);
1136}
1137
1138struct pr_usrreqs udp6_usrreqs = {
1139	.pru_abort =		udp6_abort,
1140	.pru_attach =		udp6_attach,
1141	.pru_bind =		udp6_bind,
1142	.pru_connect =		udp6_connect,
1143	.pru_control =		in6_control,
1144	.pru_detach =		udp6_detach,
1145	.pru_disconnect =	udp6_disconnect,
1146	.pru_peeraddr =		in6_mapped_peeraddr,
1147	.pru_send =		udp6_send,
1148	.pru_shutdown =		udp_shutdown,
1149	.pru_sockaddr =		in6_mapped_sockaddr,
1150	.pru_soreceive =	soreceive_dgram,
1151	.pru_sosend =		sosend_dgram,
1152	.pru_sosetlabel =	in_pcbsosetlabel,
1153	.pru_close =		udp6_close
1154};
1155