udp6_usrreq.c revision 1.151
1/* $NetBSD: udp6_usrreq.c,v 1.151 2022/10/28 05:18:39 ozaki-r Exp $ */
2/* $KAME: udp6_usrreq.c,v 1.86 2001/05/27 17:33:00 itojun Exp $ */
3/* $KAME: udp6_output.c,v 1.43 2001/10/15 09:19:52 itojun Exp $ */
4
5/*
6 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 * 3. Neither the name of the project nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 */
33
34/*
35 * Copyright (c) 1982, 1986, 1989, 1993
36 *	The Regents of the University of California.  All rights reserved.
37 *
38 * Redistribution and use in source and binary forms, with or without
39 * modification, are permitted provided that the following conditions
40 * are met:
41 * 1. Redistributions of source code must retain the above copyright
42 *    notice, this list of conditions and the following disclaimer.
43 * 2. Redistributions in binary form must reproduce the above copyright
44 *    notice, this list of conditions and the following disclaimer in the
45 *    documentation and/or other materials provided with the distribution.
46 * 3. Neither the name of the University nor the names of its contributors
47 *    may be used to endorse or promote products derived from this software
48 *    without specific prior written permission.
49 *
50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60 * SUCH DAMAGE.
61 *
62 *	@(#)udp_var.h	8.1 (Berkeley) 6/10/93
63 */
64
65#include <sys/cdefs.h>
66__KERNEL_RCSID(0, "$NetBSD: udp6_usrreq.c,v 1.151 2022/10/28 05:18:39 ozaki-r Exp $");
67
68#ifdef _KERNEL_OPT
69#include "opt_inet.h"
70#include "opt_inet_csum.h"
71#include "opt_ipsec.h"
72#include "opt_net_mpsafe.h"
73#endif
74
75#include <sys/param.h>
76#include <sys/mbuf.h>
77#include <sys/protosw.h>
78#include <sys/socket.h>
79#include <sys/socketvar.h>
80#include <sys/systm.h>
81#include <sys/proc.h>
82#include <sys/syslog.h>
83#include <sys/domain.h>
84#include <sys/sysctl.h>
85
86#include <net/if.h>
87#include <net/if_types.h>
88
89#include <netinet/in.h>
90#include <netinet/in_var.h>
91#include <netinet/in_systm.h>
92#include <netinet/in_offload.h>
93#include <netinet/ip.h>
94#include <netinet/ip_var.h>
95#include <netinet/in_pcb.h>
96#include <netinet/udp.h>
97#include <netinet/udp_var.h>
98#include <netinet/udp_private.h>
99
100#include <netinet/ip6.h>
101#include <netinet/icmp6.h>
102#include <netinet6/ip6_var.h>
103#include <netinet6/ip6_private.h>
104#include <netinet6/in6_pcb.h>
105#include <netinet6/udp6_var.h>
106#include <netinet6/udp6_private.h>
107#include <netinet6/ip6protosw.h>
108#include <netinet6/scope6_var.h>
109
110#ifdef IPSEC
111#include <netipsec/ipsec.h>
112#include <netipsec/esp.h>
113#ifdef INET6
114#include <netipsec/ipsec6.h>
115#endif
116#endif
117
118#include "faith.h"
119#if defined(NFAITH) && NFAITH > 0
120#include <net/if_faith.h>
121#endif
122
123/*
124 * UDP protocol implementation.
125 * Per RFC 768, August, 1980.
126 */
127
128extern struct inpcbtable udbtable;
129
130percpu_t *udp6stat_percpu;
131
132/* UDP on IP6 parameters */
133static int udp6_sendspace = 9216;	/* really max datagram size */
134static int udp6_recvspace = 40 * (1024 + sizeof(struct sockaddr_in6));
135					/* 40 1K datagrams */
136
137static void udp6_notify(struct inpcb *, int);
138static void sysctl_net_inet6_udp6_setup(struct sysctllog **);
139#ifdef IPSEC
140static int udp6_espinudp(struct mbuf **, int);
141#endif
142
143#ifdef UDP_CSUM_COUNTERS
144#include <sys/device.h>
145struct evcnt udp6_hwcsum_bad = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
146    NULL, "udp6", "hwcsum bad");
147struct evcnt udp6_hwcsum_ok = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
148    NULL, "udp6", "hwcsum ok");
149struct evcnt udp6_hwcsum_data = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
150    NULL, "udp6", "hwcsum data");
151struct evcnt udp6_swcsum = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
152    NULL, "udp6", "swcsum");
153
154EVCNT_ATTACH_STATIC(udp6_hwcsum_bad);
155EVCNT_ATTACH_STATIC(udp6_hwcsum_ok);
156EVCNT_ATTACH_STATIC(udp6_hwcsum_data);
157EVCNT_ATTACH_STATIC(udp6_swcsum);
158
159#define	UDP_CSUM_COUNTER_INCR(ev)	(ev)->ev_count++
160#else
161#define	UDP_CSUM_COUNTER_INCR(ev)	/* nothing */
162#endif
163
164void
165udp6_init(void)
166{
167	sysctl_net_inet6_udp6_setup(NULL);
168	udp6stat_percpu = percpu_alloc(sizeof(uint64_t) * UDP6_NSTATS);
169
170	udp_init_common();
171}
172
173/*
174 * Notify a udp user of an asynchronous error;
175 * just wake up so that he can collect error status.
176 */
177static	void
178udp6_notify(struct inpcb *inp, int errno)
179{
180	inp->inp_socket->so_error = errno;
181	sorwakeup(inp->inp_socket);
182	sowwakeup(inp->inp_socket);
183}
184
185void *
186udp6_ctlinput(int cmd, const struct sockaddr *sa, void *d)
187{
188	struct udphdr uh;
189	struct ip6_hdr *ip6;
190	const struct sockaddr_in6 *sa6 = (const struct sockaddr_in6 *)sa;
191	struct mbuf *m;
192	int off;
193	void *cmdarg;
194	struct ip6ctlparam *ip6cp = NULL;
195	const struct sockaddr_in6 *sa6_src = NULL;
196	void (*notify)(struct inpcb *, int) = udp6_notify;
197	struct udp_portonly {
198		u_int16_t uh_sport;
199		u_int16_t uh_dport;
200	} *uhp;
201
202	if (sa->sa_family != AF_INET6 ||
203	    sa->sa_len != sizeof(struct sockaddr_in6))
204		return NULL;
205
206	if ((unsigned)cmd >= PRC_NCMDS)
207		return NULL;
208	if (PRC_IS_REDIRECT(cmd))
209		notify = in6_rtchange, d = NULL;
210	else if (cmd == PRC_HOSTDEAD)
211		d = NULL;
212	else if (cmd == PRC_MSGSIZE) {
213		/* special code is present, see below */
214		notify = in6_rtchange;
215	}
216	else if (inet6ctlerrmap[cmd] == 0)
217		return NULL;
218
219	/* if the parameter is from icmp6, decode it. */
220	if (d != NULL) {
221		ip6cp = (struct ip6ctlparam *)d;
222		m = ip6cp->ip6c_m;
223		ip6 = ip6cp->ip6c_ip6;
224		off = ip6cp->ip6c_off;
225		cmdarg = ip6cp->ip6c_cmdarg;
226		sa6_src = ip6cp->ip6c_src;
227	} else {
228		m = NULL;
229		ip6 = NULL;
230		cmdarg = NULL;
231		sa6_src = &sa6_any;
232		off = 0;
233	}
234
235	if (ip6) {
236		/* check if we can safely examine src and dst ports */
237		if (m->m_pkthdr.len < off + sizeof(*uhp)) {
238			if (cmd == PRC_MSGSIZE)
239				icmp6_mtudisc_update((struct ip6ctlparam *)d, 0);
240			return NULL;
241		}
242
243		memset(&uh, 0, sizeof(uh));
244		m_copydata(m, off, sizeof(*uhp), (void *)&uh);
245
246		if (cmd == PRC_MSGSIZE) {
247			int valid = 0;
248
249			/*
250			 * Check to see if we have a valid UDP socket
251			 * corresponding to the address in the ICMPv6 message
252			 * payload.
253			 */
254			if (in6_pcblookup_connect(&udbtable, &sa6->sin6_addr,
255			    uh.uh_dport, (const struct in6_addr *)&sa6_src->sin6_addr,
256			    uh.uh_sport, 0, 0))
257				valid++;
258#if 0
259			/*
260			 * As the use of sendto(2) is fairly popular,
261			 * we may want to allow non-connected pcb too.
262			 * But it could be too weak against attacks...
263			 * We should at least check if the local address (= s)
264			 * is really ours.
265			 */
266			else if (in6_pcblookup_bind(&udbtable, &sa6->sin6_addr,
267			    uh.uh_dport, 0))
268				valid++;
269#endif
270
271			/*
272			 * Depending on the value of "valid" and routing table
273			 * size (mtudisc_{hi,lo}wat), we will:
274			 * - recalculate the new MTU and create the
275			 *   corresponding routing entry, or
276			 * - ignore the MTU change notification.
277			 */
278			icmp6_mtudisc_update((struct ip6ctlparam *)d, valid);
279
280			/*
281			 * regardless of if we called
282			 * icmp6_mtudisc_update(), we need to call
283			 * in6_pcbnotify(), to notify path MTU change
284			 * to the userland (RFC3542), because some
285			 * unconnected sockets may share the same
286			 * destination and want to know the path MTU.
287			 */
288		}
289
290		(void)in6_pcbnotify(&udbtable, sa, uh.uh_dport,
291		    sin6tocsa(sa6_src), uh.uh_sport, cmd, cmdarg,
292		    notify);
293	} else {
294		(void)in6_pcbnotify(&udbtable, sa, 0,
295		    sin6tocsa(sa6_src), 0, cmd, cmdarg, notify);
296	}
297	return NULL;
298}
299
300int
301udp6_ctloutput(int op, struct socket *so, struct sockopt *sopt)
302{
303	int s;
304	int error = 0;
305	struct inpcb *inp;
306	int family;
307	int optval;
308
309	family = so->so_proto->pr_domain->dom_family;
310
311	s = splsoftnet();
312	switch (family) {
313#ifdef INET
314	case PF_INET:
315		if (sopt->sopt_level != IPPROTO_UDP) {
316			error = ip_ctloutput(op, so, sopt);
317			goto end;
318		}
319		break;
320#endif
321#ifdef INET6
322	case PF_INET6:
323		if (sopt->sopt_level != IPPROTO_UDP) {
324			error = ip6_ctloutput(op, so, sopt);
325			goto end;
326		}
327		break;
328#endif
329	default:
330		error = EAFNOSUPPORT;
331		goto end;
332	}
333
334	switch (op) {
335	case PRCO_SETOPT:
336		inp = sotoinpcb(so);
337
338		switch (sopt->sopt_name) {
339		case UDP_ENCAP:
340			error = sockopt_getint(sopt, &optval);
341			if (error)
342				break;
343
344			switch(optval) {
345			case 0:
346				inp->inp_flags &= ~IN6P_ESPINUDP;
347				break;
348
349			case UDP_ENCAP_ESPINUDP:
350				inp->inp_flags |= IN6P_ESPINUDP;
351				break;
352
353			default:
354				error = EINVAL;
355				break;
356			}
357			break;
358
359		default:
360			error = ENOPROTOOPT;
361			break;
362		}
363		break;
364
365	default:
366		error = EINVAL;
367		break;
368	}
369
370end:
371	splx(s);
372	return error;
373}
374
375static void
376udp6_sendup(struct mbuf *m, int off /* offset of data portion */,
377    struct sockaddr *src, struct socket *so)
378{
379	struct mbuf *opts = NULL;
380	struct mbuf *n;
381	struct inpcb *inp;
382
383	KASSERT(so != NULL);
384	KASSERT(so->so_proto->pr_domain->dom_family == AF_INET6);
385	inp = sotoinpcb(so);
386	KASSERT(inp != NULL);
387
388#if defined(IPSEC)
389	if (ipsec_used && ipsec_in_reject(m, inp)) {
390		if ((n = m_copypacket(m, M_DONTWAIT)) != NULL)
391			icmp6_error(n, ICMP6_DST_UNREACH,
392			    ICMP6_DST_UNREACH_ADMIN, 0);
393		return;
394	}
395#endif
396
397	if ((n = m_copypacket(m, M_DONTWAIT)) != NULL) {
398		if (inp->inp_flags & IN6P_CONTROLOPTS ||
399		    SOOPT_TIMESTAMP(inp->inp_socket->so_options)) {
400			struct ip6_hdr *ip6 = mtod(n, struct ip6_hdr *);
401			ip6_savecontrol(inp, &opts, ip6, n);
402		}
403
404		m_adj(n, off);
405		if (sbappendaddr(&so->so_rcv, src, n, opts) == 0) {
406			m_freem(n);
407			if (opts)
408				m_freem(opts);
409			UDP6_STATINC(UDP6_STAT_FULLSOCK);
410			soroverflow(so);
411		} else
412			sorwakeup(so);
413	}
414}
415
416int
417udp6_realinput(int af, struct sockaddr_in6 *src, struct sockaddr_in6 *dst,
418    struct mbuf **mp, int off)
419{
420	u_int16_t sport, dport;
421	int rcvcnt;
422	struct in6_addr src6, *dst6;
423	const struct in_addr *dst4;
424	struct inpcb *inp;
425	struct mbuf *m = *mp;
426
427	rcvcnt = 0;
428	off += sizeof(struct udphdr);	/* now, offset of payload */
429
430	if (af != AF_INET && af != AF_INET6)
431		goto bad;
432	if (src->sin6_family != AF_INET6 || dst->sin6_family != AF_INET6)
433		goto bad;
434
435	src6 = src->sin6_addr;
436	if (sa6_recoverscope(src) != 0) {
437		/* XXX: should be impossible. */
438		goto bad;
439	}
440	sport = src->sin6_port;
441
442	dport = dst->sin6_port;
443	dst4 = (struct in_addr *)&dst->sin6_addr.s6_addr[12];
444	dst6 = &dst->sin6_addr;
445
446	if (IN6_IS_ADDR_MULTICAST(dst6) ||
447	    (af == AF_INET && IN_MULTICAST(dst4->s_addr))) {
448		/*
449		 * Deliver a multicast or broadcast datagram to *all* sockets
450		 * for which the local and remote addresses and ports match
451		 * those of the incoming datagram.  This allows more than
452		 * one process to receive multi/broadcasts on the same port.
453		 * (This really ought to be done for unicast datagrams as
454		 * well, but that would cause problems with existing
455		 * applications that open both address-specific sockets and
456		 * a wildcard socket listening to the same port -- they would
457		 * end up receiving duplicates of every unicast datagram.
458		 * Those applications open the multiple sockets to overcome an
459		 * inadequacy of the UDP socket interface, but for backwards
460		 * compatibility we avoid the problem here rather than
461		 * fixing the interface.  Maybe 4.5BSD will remedy this?)
462		 */
463
464		/*
465		 * KAME note: traditionally we dropped udpiphdr from mbuf here.
466		 * we need udpiphdr for IPsec processing so we do that later.
467		 */
468		/*
469		 * Locate pcb(s) for datagram.
470		 */
471		TAILQ_FOREACH(inp, &udbtable.inpt_queue, inp_queue) {
472			if (inp->inp_af != AF_INET6)
473				continue;
474
475			if (inp->inp_lport != dport)
476				continue;
477			if (!IN6_IS_ADDR_UNSPECIFIED(&inp->inp_laddr6)) {
478				if (!IN6_ARE_ADDR_EQUAL(&inp->inp_laddr6,
479				    dst6))
480					continue;
481			} else {
482				if (IN6_IS_ADDR_V4MAPPED(dst6) &&
483				    (inp->inp_flags & IN6P_IPV6_V6ONLY))
484					continue;
485			}
486			if (!IN6_IS_ADDR_UNSPECIFIED(&inp->inp_faddr6)) {
487				if (!IN6_ARE_ADDR_EQUAL(&inp->inp_faddr6,
488				    &src6) || inp->inp_fport != sport)
489					continue;
490			} else {
491				if (IN6_IS_ADDR_V4MAPPED(&src6) &&
492				    (inp->inp_flags & IN6P_IPV6_V6ONLY))
493					continue;
494			}
495
496			udp6_sendup(m, off, sin6tosa(src), inp->inp_socket);
497			rcvcnt++;
498
499			/*
500			 * Don't look for additional matches if this one does
501			 * not have either the SO_REUSEPORT or SO_REUSEADDR
502			 * socket options set.  This heuristic avoids searching
503			 * through all pcbs in the common case of a non-shared
504			 * port.  It assumes that an application will never
505			 * clear these options after setting them.
506			 */
507			if ((inp->inp_socket->so_options &
508			    (SO_REUSEPORT|SO_REUSEADDR)) == 0)
509				break;
510		}
511	} else {
512		/*
513		 * Locate pcb for datagram.
514		 */
515		inp = in6_pcblookup_connect(&udbtable, &src6, sport, dst6,
516					     dport, 0, 0);
517		if (inp == NULL) {
518			UDP_STATINC(UDP_STAT_PCBHASHMISS);
519			inp = in6_pcblookup_bind(&udbtable, dst6, dport, 0);
520			if (inp == NULL)
521				return rcvcnt;
522		}
523
524#ifdef IPSEC
525		/* Handle ESP over UDP */
526		if (inp->inp_flags & IN6P_ESPINUDP) {
527			switch (udp6_espinudp(mp, off)) {
528			case -1: /* Error, m was freed */
529				rcvcnt = -1;
530				goto bad;
531
532			case 1: /* ESP over UDP */
533				rcvcnt++;
534				goto bad;
535
536			case 0: /* plain UDP */
537			default: /* Unexpected */
538				/*
539				 * Normal UDP processing will take place,
540				 * m may have changed.
541				 */
542				m = *mp;
543				break;
544			}
545		}
546#endif
547
548		if (inp->inp_overudp_cb != NULL) {
549			int ret;
550			ret = inp->inp_overudp_cb(mp, off, inp->inp_socket,
551			    sin6tosa(src), inp->inp_overudp_arg);
552			switch (ret) {
553			case -1: /* Error, m was freed */
554				rcvcnt = -1;
555				goto bad;
556
557			case 1: /* Foo over UDP */
558				KASSERT(*mp == NULL);
559				rcvcnt++;
560				goto bad;
561
562			case 0: /* plain UDP */
563			default: /* Unexpected */
564				/*
565				 * Normal UDP processing will take place,
566				 * m may have changed.
567				 */
568				break;
569			}
570		}
571
572		udp6_sendup(m, off, sin6tosa(src), inp->inp_socket);
573		rcvcnt++;
574	}
575
576bad:
577	return rcvcnt;
578}
579
580int
581udp6_input_checksum(struct mbuf *m, const struct udphdr *uh, int off, int len)
582{
583
584	/*
585	 * XXX it's better to record and check if this mbuf is
586	 * already checked.
587	 */
588
589	if (__predict_false((m->m_flags & M_LOOP) && !udp_do_loopback_cksum)) {
590		goto good;
591	}
592	if (uh->uh_sum == 0) {
593		UDP6_STATINC(UDP6_STAT_NOSUM);
594		goto bad;
595	}
596
597	switch (m->m_pkthdr.csum_flags &
598	    ((m_get_rcvif_NOMPSAFE(m)->if_csum_flags_rx & M_CSUM_UDPv6) |
599	    M_CSUM_TCP_UDP_BAD | M_CSUM_DATA)) {
600	case M_CSUM_UDPv6|M_CSUM_TCP_UDP_BAD:
601		UDP_CSUM_COUNTER_INCR(&udp6_hwcsum_bad);
602		UDP6_STATINC(UDP6_STAT_BADSUM);
603		goto bad;
604
605#if 0 /* notyet */
606	case M_CSUM_UDPv6|M_CSUM_DATA:
607#endif
608
609	case M_CSUM_UDPv6:
610		/* Checksum was okay. */
611		UDP_CSUM_COUNTER_INCR(&udp6_hwcsum_ok);
612		break;
613
614	default:
615		/*
616		 * Need to compute it ourselves.  Maybe skip checksum
617		 * on loopback interfaces.
618		 */
619		UDP_CSUM_COUNTER_INCR(&udp6_swcsum);
620		if (in6_cksum(m, IPPROTO_UDP, off, len) != 0) {
621			UDP6_STATINC(UDP6_STAT_BADSUM);
622			goto bad;
623		}
624	}
625
626good:
627	return 0;
628bad:
629	return -1;
630}
631
632int
633udp6_input(struct mbuf **mp, int *offp, int proto)
634{
635	struct mbuf *m = *mp;
636	int off = *offp;
637	struct sockaddr_in6 src, dst;
638	struct ip6_hdr *ip6;
639	struct udphdr *uh;
640	u_int32_t plen, ulen;
641
642	ip6 = mtod(m, struct ip6_hdr *);
643
644#if defined(NFAITH) && 0 < NFAITH
645	if (faithprefix(&ip6->ip6_dst)) {
646		/* send icmp6 host unreach? */
647		m_freem(m);
648		return IPPROTO_DONE;
649	}
650#endif
651
652	UDP6_STATINC(UDP6_STAT_IPACKETS);
653
654	/* Check for jumbogram is done in ip6_input. We can trust pkthdr.len. */
655	plen = m->m_pkthdr.len - off;
656	IP6_EXTHDR_GET(uh, struct udphdr *, m, off, sizeof(struct udphdr));
657	if (uh == NULL) {
658		IP6_STATINC(IP6_STAT_TOOSHORT);
659		return IPPROTO_DONE;
660	}
661
662	/*
663	 * Enforce alignment requirements that are violated in
664	 * some cases, see kern/50766 for details.
665	 */
666	if (ACCESSIBLE_POINTER(uh, struct udphdr) == 0) {
667		m = m_copyup(m, off + sizeof(struct udphdr), 0);
668		if (m == NULL) {
669			IP6_STATINC(IP6_STAT_TOOSHORT);
670			return IPPROTO_DONE;
671		}
672		ip6 = mtod(m, struct ip6_hdr *);
673		uh = (struct udphdr *)(mtod(m, char *) + off);
674	}
675	KASSERT(ACCESSIBLE_POINTER(uh, struct udphdr));
676	ulen = ntohs((u_short)uh->uh_ulen);
677
678	/*
679	 * RFC2675 section 4: jumbograms will have 0 in the UDP header field,
680	 * iff payload length > 0xffff.
681	 */
682	if (ulen == 0 && plen > 0xffff)
683		ulen = plen;
684
685	if (plen != ulen) {
686		UDP6_STATINC(UDP6_STAT_BADLEN);
687		goto bad;
688	}
689
690	/* destination port of 0 is illegal, based on RFC768. */
691	if (uh->uh_dport == 0)
692		goto bad;
693
694	/*
695	 * Checksum extended UDP header and data.  Maybe skip checksum
696	 * on loopback interfaces.
697	 */
698	if (udp6_input_checksum(m, uh, off, ulen))
699		goto bad;
700
701	/*
702	 * Construct source and dst sockaddrs.
703	 */
704	memset(&src, 0, sizeof(src));
705	src.sin6_family = AF_INET6;
706	src.sin6_len = sizeof(struct sockaddr_in6);
707	src.sin6_addr = ip6->ip6_src;
708	src.sin6_port = uh->uh_sport;
709	memset(&dst, 0, sizeof(dst));
710	dst.sin6_family = AF_INET6;
711	dst.sin6_len = sizeof(struct sockaddr_in6);
712	dst.sin6_addr = ip6->ip6_dst;
713	dst.sin6_port = uh->uh_dport;
714
715	if (udp6_realinput(AF_INET6, &src, &dst, &m, off) == 0) {
716		if (m->m_flags & M_MCAST) {
717			UDP6_STATINC(UDP6_STAT_NOPORTMCAST);
718			goto bad;
719		}
720		UDP6_STATINC(UDP6_STAT_NOPORT);
721		icmp6_error(m, ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_NOPORT, 0);
722		m = NULL;
723	}
724
725bad:
726	if (m)
727		m_freem(m);
728	return IPPROTO_DONE;
729}
730
731int
732udp6_output(struct inpcb * const inp, struct mbuf *m,
733    struct sockaddr_in6 * const addr6, struct mbuf * const control,
734    struct lwp * const l)
735{
736	u_int32_t ulen = m->m_pkthdr.len;
737	u_int32_t plen = sizeof(struct udphdr) + ulen;
738	struct ip6_hdr *ip6;
739	struct udphdr *udp6;
740	struct in6_addr _laddr, *laddr, *faddr;
741	struct in6_addr laddr_mapped; /* XXX ugly */
742	struct sockaddr_in6 *sin6 = NULL;
743	struct ifnet *oifp = NULL;
744	int scope_ambiguous = 0;
745	u_int16_t fport;
746	int error = 0;
747	struct ip6_pktopts *optp = NULL;
748	struct ip6_pktopts opt;
749	int af = AF_INET6, hlen = sizeof(struct ip6_hdr);
750#ifdef INET
751	struct ip *ip;
752	struct udpiphdr *ui;
753	int flags = 0;
754#endif
755	struct sockaddr_in6 tmp;
756
757	if (addr6) {
758		sin6 = addr6;
759		if (sin6->sin6_len != sizeof(*sin6)) {
760			error = EINVAL;
761			goto release;
762		}
763		if (sin6->sin6_family != AF_INET6) {
764			error = EAFNOSUPPORT;
765			goto release;
766		}
767
768		/* protect *sin6 from overwrites */
769		tmp = *sin6;
770		sin6 = &tmp;
771
772		/*
773		 * Application should provide a proper zone ID or the use of
774		 * default zone IDs should be enabled.  Unfortunately, some
775		 * applications do not behave as it should, so we need a
776		 * workaround.  Even if an appropriate ID is not determined,
777		 * we'll see if we can determine the outgoing interface.  If we
778		 * can, determine the zone ID based on the interface below.
779		 */
780		if (sin6->sin6_scope_id == 0 && !ip6_use_defzone)
781			scope_ambiguous = 1;
782		if ((error = sa6_embedscope(sin6, ip6_use_defzone)) != 0)
783			goto release;
784	}
785
786	if (control) {
787		if (__predict_false(l == NULL)) {
788			panic("%s: control but no lwp", __func__);
789		}
790		if ((error = ip6_setpktopts(control, &opt,
791		    inp->inp_outputopts6, l->l_cred, IPPROTO_UDP)) != 0)
792			goto release;
793		optp = &opt;
794	} else
795		optp = inp->inp_outputopts6;
796
797
798	if (sin6) {
799		/*
800		 * Slightly different than v4 version in that we call
801		 * in6_selectsrc and in6_pcbsetport to fill in the local
802		 * address and port rather than in_pcbconnect. in_pcbconnect
803		 * sets inp_faddr which causes EISCONN below to be hit on
804		 * subsequent sendto.
805		 */
806		if (sin6->sin6_port == 0) {
807			error = EADDRNOTAVAIL;
808			goto release;
809		}
810
811		if (!IN6_IS_ADDR_UNSPECIFIED(&inp->inp_faddr6)) {
812			/* how about ::ffff:0.0.0.0 case? */
813			error = EISCONN;
814			goto release;
815		}
816
817		faddr = &sin6->sin6_addr;
818		fport = sin6->sin6_port; /* allow 0 port */
819
820		if (IN6_IS_ADDR_V4MAPPED(faddr)) {
821			if ((inp->inp_flags & IN6P_IPV6_V6ONLY)) {
822				/*
823				 * I believe we should explicitly discard the
824				 * packet when mapped addresses are disabled,
825				 * rather than send the packet as an IPv6 one.
826				 * If we chose the latter approach, the packet
827				 * might be sent out on the wire based on the
828				 * default route, the situation which we'd
829				 * probably want to avoid.
830				 * (20010421 jinmei@kame.net)
831				 */
832				error = EINVAL;
833				goto release;
834			}
835			if (!IN6_IS_ADDR_UNSPECIFIED(&inp->inp_laddr6) &&
836			    !IN6_IS_ADDR_V4MAPPED(&inp->inp_laddr6)) {
837				/*
838				 * when remote addr is an IPv4-mapped address,
839				 * local addr should not be an IPv6 address,
840				 * since you cannot determine how to map IPv6
841				 * source address to IPv4.
842				 */
843				error = EINVAL;
844				goto release;
845			}
846
847			af = AF_INET;
848		}
849
850		if (!IN6_IS_ADDR_V4MAPPED(faddr)) {
851			struct psref psref;
852			int bound = curlwp_bind();
853
854			error = in6_selectsrc(sin6, optp,
855			    inp->inp_moptions6,
856			    &inp->inp_route,
857			    &inp->inp_laddr6, &oifp, &psref, &_laddr);
858			if (error)
859				laddr = NULL;
860			else
861				laddr = &_laddr;
862			if (oifp && scope_ambiguous &&
863			    (error = in6_setscope(&sin6->sin6_addr,
864			    oifp, NULL))) {
865				if_put(oifp, &psref);
866				curlwp_bindx(bound);
867				goto release;
868			}
869			if_put(oifp, &psref);
870			curlwp_bindx(bound);
871		} else {
872			/*
873			 * XXX: freebsd[34] does not have in_selectsrc, but
874			 * we can omit the whole part because freebsd4 calls
875			 * udp_output() directly in this case, and thus we'll
876			 * never see this path.
877			 */
878			if (IN6_IS_ADDR_UNSPECIFIED(&inp->inp_laddr6)) {
879				struct sockaddr_in sin_dst;
880				struct in_addr ina;
881				struct in_ifaddr *ia4;
882				struct psref _psref;
883				int bound;
884
885				memcpy(&ina, &faddr->s6_addr[12], sizeof(ina));
886				sockaddr_in_init(&sin_dst, &ina, 0);
887				bound = curlwp_bind();
888				ia4 = in_selectsrc(&sin_dst, &inp->inp_route,
889				    inp->inp_socket->so_options, NULL,
890				    &error, &_psref);
891				if (ia4 == NULL) {
892					curlwp_bindx(bound);
893					if (error == 0)
894						error = EADDRNOTAVAIL;
895					goto release;
896				}
897				memset(&laddr_mapped, 0, sizeof(laddr_mapped));
898				laddr_mapped.s6_addr16[5] = 0xffff; /* ugly */
899				memcpy(&laddr_mapped.s6_addr[12],
900				      &IA_SIN(ia4)->sin_addr,
901				      sizeof(IA_SIN(ia4)->sin_addr));
902				ia4_release(ia4, &_psref);
903				curlwp_bindx(bound);
904				laddr = &laddr_mapped;
905			} else
906			{
907				laddr = &inp->inp_laddr6;	/* XXX */
908			}
909		}
910		if (laddr == NULL) {
911			if (error == 0)
912				error = EADDRNOTAVAIL;
913			goto release;
914		}
915		if (inp->inp_lport == 0) {
916			/*
917			 * Craft a sockaddr_in6 for the local endpoint. Use the
918			 * "any" as a base, set the address, and recover the
919			 * scope.
920			 */
921			struct sockaddr_in6 lsin6 =
922			    *((const struct sockaddr_in6 *)inp->inp_socket->so_proto->pr_domain->dom_sa_any);
923			lsin6.sin6_addr = *laddr;
924			error = sa6_recoverscope(&lsin6);
925			if (error)
926				goto release;
927
928			error = in6_pcbsetport(&lsin6, inp, l);
929
930			if (error) {
931				inp->inp_laddr6 = in6addr_any;
932				goto release;
933			}
934		}
935	} else {
936		if (IN6_IS_ADDR_UNSPECIFIED(&inp->inp_faddr6)) {
937			error = ENOTCONN;
938			goto release;
939		}
940		if (IN6_IS_ADDR_V4MAPPED(&inp->inp_faddr6)) {
941			if ((inp->inp_flags & IN6P_IPV6_V6ONLY))
942			{
943				/*
944				 * XXX: this case would happen when the
945				 * application sets the V6ONLY flag after
946				 * connecting the foreign address.
947				 * Such applications should be fixed,
948				 * so we bark here.
949				 */
950				log(LOG_INFO, "udp6_output: IPV6_V6ONLY "
951				    "option was set for a connected socket\n");
952				error = EINVAL;
953				goto release;
954			} else
955				af = AF_INET;
956		}
957		laddr = &inp->inp_laddr6;
958		faddr = &inp->inp_faddr6;
959		fport = inp->inp_fport;
960	}
961
962	if (af == AF_INET)
963		hlen = sizeof(struct ip);
964
965	/*
966	 * Calculate data length and get a mbuf
967	 * for UDP and IP6 headers.
968	 */
969	M_PREPEND(m, hlen + sizeof(struct udphdr), M_DONTWAIT);
970	if (m == NULL) {
971		error = ENOBUFS;
972		goto release;
973	}
974
975	/*
976	 * Stuff checksum and output datagram.
977	 */
978	udp6 = (struct udphdr *)(mtod(m, char *) + hlen);
979	udp6->uh_sport = inp->inp_lport; /* lport is always set in the PCB */
980	udp6->uh_dport = fport;
981	if (plen <= 0xffff)
982		udp6->uh_ulen = htons((u_int16_t)plen);
983	else
984		udp6->uh_ulen = 0;
985	udp6->uh_sum = 0;
986
987	switch (af) {
988	case AF_INET6:
989		ip6 = mtod(m, struct ip6_hdr *);
990		ip6->ip6_flow	= inp->inp_flowinfo & IPV6_FLOWINFO_MASK;
991		ip6->ip6_vfc 	&= ~IPV6_VERSION_MASK;
992		ip6->ip6_vfc 	|= IPV6_VERSION;
993#if 0		/* ip6_plen will be filled in ip6_output. */
994		ip6->ip6_plen	= htons((u_int16_t)plen);
995#endif
996		ip6->ip6_nxt	= IPPROTO_UDP;
997		ip6->ip6_hlim	= in6_selecthlim_rt(inp);
998		ip6->ip6_src	= *laddr;
999		ip6->ip6_dst	= *faddr;
1000
1001		udp6->uh_sum = in6_cksum_phdr(laddr, faddr,
1002		    htonl(plen), htonl(IPPROTO_UDP));
1003		m->m_pkthdr.csum_flags = M_CSUM_UDPv6;
1004		m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
1005
1006		UDP6_STATINC(UDP6_STAT_OPACKETS);
1007		error = ip6_output(m, optp, &inp->inp_route, 0,
1008		    inp->inp_moptions6, inp, NULL);
1009		break;
1010	case AF_INET:
1011#ifdef INET
1012		/* can't transmit jumbogram over IPv4 */
1013		if (plen > 0xffff) {
1014			error = EMSGSIZE;
1015			goto release;
1016		}
1017
1018		ip = mtod(m, struct ip *);
1019		ui = (struct udpiphdr *)ip;
1020		memset(ui->ui_x1, 0, sizeof(ui->ui_x1));
1021		ui->ui_pr = IPPROTO_UDP;
1022		ui->ui_len = htons(plen);
1023		memcpy(&ui->ui_src, &laddr->s6_addr[12], sizeof(ui->ui_src));
1024		ui->ui_ulen = ui->ui_len;
1025
1026		flags = (inp->inp_socket->so_options &
1027			 (SO_DONTROUTE | SO_BROADCAST));
1028		memcpy(&ui->ui_dst, &faddr->s6_addr[12], sizeof(ui->ui_dst));
1029
1030		udp6->uh_sum = in_cksum(m, hlen + plen);
1031		if (udp6->uh_sum == 0)
1032			udp6->uh_sum = 0xffff;
1033
1034		ip->ip_len = htons(hlen + plen);
1035		ip->ip_ttl = in6_selecthlim(inp, NULL); /* XXX */
1036		ip->ip_tos = 0;	/* XXX */
1037
1038		UDP_STATINC(UDP_STAT_OPACKETS);
1039		error = ip_output(m, NULL, &inp->inp_route, flags /* XXX */,
1040		    inp->inp_moptions, NULL);
1041		break;
1042#else
1043		error = EAFNOSUPPORT;
1044		goto release;
1045#endif
1046	}
1047	goto releaseopt;
1048
1049release:
1050	m_freem(m);
1051
1052releaseopt:
1053	if (control) {
1054		if (optp == &opt)
1055			ip6_clearpktopts(&opt, -1);
1056		m_freem(control);
1057	}
1058	return (error);
1059}
1060
1061static int
1062udp6_attach(struct socket *so, int proto)
1063{
1064	struct inpcb *inp;
1065	int s, error;
1066
1067	KASSERT(sotoinpcb(so) == NULL);
1068	sosetlock(so);
1069
1070	error = soreserve(so, udp6_sendspace, udp6_recvspace);
1071	if (error) {
1072		return error;
1073	}
1074
1075	/*
1076	 * MAPPED_ADDR implementation spec:
1077	 *  Always attach for IPv6, and only when necessary for IPv4.
1078	 */
1079	s = splsoftnet();
1080	error = in_pcballoc(so, &udbtable);
1081	splx(s);
1082	if (error) {
1083		return error;
1084	}
1085
1086	inp = sotoinpcb(so);
1087	inp->inp_cksum6 = -1;	/* just to be sure */
1088
1089	KASSERT(solocked(so));
1090	return 0;
1091}
1092
1093static void
1094udp6_detach(struct socket *so)
1095{
1096	struct inpcb *inp = sotoinpcb(so);
1097	int s;
1098
1099	KASSERT(solocked(so));
1100	KASSERT(inp != NULL);
1101
1102	s = splsoftnet();
1103	in_pcbdetach(inp);
1104	splx(s);
1105}
1106
1107static int
1108udp6_accept(struct socket *so, struct sockaddr *nam)
1109{
1110	KASSERT(solocked(so));
1111
1112	return EOPNOTSUPP;
1113}
1114
1115static int
1116udp6_bind(struct socket *so, struct sockaddr *nam, struct lwp *l)
1117{
1118	struct inpcb *inp = sotoinpcb(so);
1119	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam;
1120	int error = 0;
1121	int s;
1122
1123	KASSERT(solocked(so));
1124	KASSERT(inp != NULL);
1125
1126	s = splsoftnet();
1127	error = in6_pcbbind(inp, sin6, l);
1128	splx(s);
1129	return error;
1130}
1131
1132static int
1133udp6_listen(struct socket *so, struct lwp *l)
1134{
1135	KASSERT(solocked(so));
1136
1137	return EOPNOTSUPP;
1138}
1139
1140static int
1141udp6_connect(struct socket *so, struct sockaddr *nam, struct lwp *l)
1142{
1143	struct inpcb *inp = sotoinpcb(so);
1144	int error = 0;
1145	int s;
1146
1147	KASSERT(solocked(so));
1148	KASSERT(inp != NULL);
1149
1150	if (!IN6_IS_ADDR_UNSPECIFIED(&inp->inp_faddr6))
1151		return EISCONN;
1152	s = splsoftnet();
1153	error = in6_pcbconnect(inp, (struct sockaddr_in6 *)nam, l);
1154	splx(s);
1155	if (error == 0)
1156		soisconnected(so);
1157
1158	return error;
1159}
1160
1161static int
1162udp6_connect2(struct socket *so, struct socket *so2)
1163{
1164	KASSERT(solocked(so));
1165
1166	return EOPNOTSUPP;
1167}
1168
1169static int
1170udp6_disconnect(struct socket *so)
1171{
1172	struct inpcb *inp = sotoinpcb(so);
1173	int s;
1174
1175	KASSERT(solocked(so));
1176	KASSERT(inp != NULL);
1177
1178	if (IN6_IS_ADDR_UNSPECIFIED(&inp->inp_faddr6))
1179		return ENOTCONN;
1180
1181	s = splsoftnet();
1182	in6_pcbdisconnect(inp);
1183	memset((void *)&inp->inp_laddr6, 0, sizeof(inp->inp_laddr6));
1184	splx(s);
1185
1186	so->so_state &= ~SS_ISCONNECTED;	/* XXX */
1187	in6_pcbstate(inp, INP_BOUND);		/* XXX */
1188	return 0;
1189}
1190
1191static int
1192udp6_shutdown(struct socket *so)
1193{
1194	int s;
1195
1196	s = splsoftnet();
1197	socantsendmore(so);
1198	splx(s);
1199
1200	return 0;
1201}
1202
1203static int
1204udp6_abort(struct socket *so)
1205{
1206	int s;
1207
1208	KASSERT(solocked(so));
1209	KASSERT(sotoinpcb(so) != NULL);
1210
1211	s = splsoftnet();
1212	soisdisconnected(so);
1213	in_pcbdetach(sotoinpcb(so));
1214	splx(s);
1215
1216	return 0;
1217}
1218
1219static int
1220udp6_ioctl(struct socket *so, u_long cmd, void *addr6, struct ifnet *ifp)
1221{
1222	/*
1223	 * MAPPED_ADDR implementation info:
1224	 *  Mapped addr support for PRU_CONTROL is not necessary.
1225	 *  Because typical user of PRU_CONTROL is such as ifconfig,
1226	 *  and they don't associate any addr to their socket.  Then
1227	 *  socket family is only hint about the PRU_CONTROL'ed address
1228	 *  family, especially when getting addrs from kernel.
1229	 *  So AF_INET socket need to be used to control AF_INET addrs,
1230	 *  and AF_INET6 socket for AF_INET6 addrs.
1231	 */
1232	return in6_control(so, cmd, addr6, ifp);
1233}
1234
1235static int
1236udp6_stat(struct socket *so, struct stat *ub)
1237{
1238	KASSERT(solocked(so));
1239
1240	/* stat: don't bother with a blocksize */
1241	return 0;
1242}
1243
1244static int
1245udp6_peeraddr(struct socket *so, struct sockaddr *nam)
1246{
1247	KASSERT(solocked(so));
1248	KASSERT(sotoinpcb(so) != NULL);
1249	KASSERT(nam != NULL);
1250
1251	in6_setpeeraddr(sotoinpcb(so), (struct sockaddr_in6 *)nam);
1252	return 0;
1253}
1254
1255static int
1256udp6_sockaddr(struct socket *so, struct sockaddr *nam)
1257{
1258	KASSERT(solocked(so));
1259	KASSERT(sotoinpcb(so) != NULL);
1260	KASSERT(nam != NULL);
1261
1262	in6_setsockaddr(sotoinpcb(so), (struct sockaddr_in6 *)nam);
1263	return 0;
1264}
1265
1266static int
1267udp6_rcvd(struct socket *so, int flags, struct lwp *l)
1268{
1269	KASSERT(solocked(so));
1270
1271	return EOPNOTSUPP;
1272}
1273
1274static int
1275udp6_recvoob(struct socket *so, struct mbuf *m, int flags)
1276{
1277	KASSERT(solocked(so));
1278
1279	return EOPNOTSUPP;
1280}
1281
1282static int
1283udp6_send(struct socket *so, struct mbuf *m, struct sockaddr *nam,
1284    struct mbuf *control, struct lwp *l)
1285{
1286	struct inpcb *inp = sotoinpcb(so);
1287	int error = 0;
1288	int s;
1289
1290	KASSERT(solocked(so));
1291	KASSERT(inp != NULL);
1292	KASSERT(m != NULL);
1293
1294	s = splsoftnet();
1295	error = udp6_output(inp, m, (struct sockaddr_in6 *)nam, control, l);
1296	splx(s);
1297
1298	return error;
1299}
1300
1301static int
1302udp6_sendoob(struct socket *so, struct mbuf *m, struct mbuf *control)
1303{
1304	KASSERT(solocked(so));
1305
1306	m_freem(m);
1307	m_freem(control);
1308
1309	return EOPNOTSUPP;
1310}
1311
1312static int
1313udp6_purgeif(struct socket *so, struct ifnet *ifp)
1314{
1315
1316	mutex_enter(softnet_lock);
1317	in6_pcbpurgeif0(&udbtable, ifp);
1318#ifdef NET_MPSAFE
1319	mutex_exit(softnet_lock);
1320#endif
1321	in6_purgeif(ifp);
1322#ifdef NET_MPSAFE
1323	mutex_enter(softnet_lock);
1324#endif
1325	in6_pcbpurgeif(&udbtable, ifp);
1326	mutex_exit(softnet_lock);
1327
1328	return 0;
1329}
1330
1331static int
1332sysctl_net_inet6_udp6_stats(SYSCTLFN_ARGS)
1333{
1334
1335	return (NETSTAT_SYSCTL(udp6stat_percpu, UDP6_NSTATS));
1336}
1337
1338static void
1339sysctl_net_inet6_udp6_setup(struct sysctllog **clog)
1340{
1341
1342	sysctl_createv(clog, 0, NULL, NULL,
1343		       CTLFLAG_PERMANENT,
1344		       CTLTYPE_NODE, "inet6", NULL,
1345		       NULL, 0, NULL, 0,
1346		       CTL_NET, PF_INET6, CTL_EOL);
1347	sysctl_createv(clog, 0, NULL, NULL,
1348		       CTLFLAG_PERMANENT,
1349		       CTLTYPE_NODE, "udp6",
1350		       SYSCTL_DESCR("UDPv6 related settings"),
1351		       NULL, 0, NULL, 0,
1352		       CTL_NET, PF_INET6, IPPROTO_UDP, CTL_EOL);
1353
1354	sysctl_createv(clog, 0, NULL, NULL,
1355		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1356		       CTLTYPE_INT, "sendspace",
1357		       SYSCTL_DESCR("Default UDP send buffer size"),
1358		       NULL, 0, &udp6_sendspace, 0,
1359		       CTL_NET, PF_INET6, IPPROTO_UDP, UDP6CTL_SENDSPACE,
1360		       CTL_EOL);
1361	sysctl_createv(clog, 0, NULL, NULL,
1362		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1363		       CTLTYPE_INT, "recvspace",
1364		       SYSCTL_DESCR("Default UDP receive buffer size"),
1365		       NULL, 0, &udp6_recvspace, 0,
1366		       CTL_NET, PF_INET6, IPPROTO_UDP, UDP6CTL_RECVSPACE,
1367		       CTL_EOL);
1368	sysctl_createv(clog, 0, NULL, NULL,
1369		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1370		       CTLTYPE_INT, "do_loopback_cksum",
1371		       SYSCTL_DESCR("Perform UDP checksum on loopback"),
1372		       NULL, 0, &udp_do_loopback_cksum, 0,
1373		       CTL_NET, PF_INET6, IPPROTO_UDP, UDP6CTL_LOOPBACKCKSUM,
1374		       CTL_EOL);
1375	sysctl_createv(clog, 0, NULL, NULL,
1376		       CTLFLAG_PERMANENT,
1377		       CTLTYPE_STRUCT, "pcblist",
1378		       SYSCTL_DESCR("UDP protocol control block list"),
1379		       sysctl_inpcblist, 0, &udbtable, 0,
1380		       CTL_NET, PF_INET6, IPPROTO_UDP, CTL_CREATE,
1381		       CTL_EOL);
1382	sysctl_createv(clog, 0, NULL, NULL,
1383		       CTLFLAG_PERMANENT,
1384		       CTLTYPE_STRUCT, "stats",
1385		       SYSCTL_DESCR("UDPv6 statistics"),
1386		       sysctl_net_inet6_udp6_stats, 0, NULL, 0,
1387		       CTL_NET, PF_INET6, IPPROTO_UDP, UDP6CTL_STATS,
1388		       CTL_EOL);
1389}
1390
1391void
1392udp6_statinc(u_int stat)
1393{
1394
1395	KASSERT(stat < UDP6_NSTATS);
1396	UDP6_STATINC(stat);
1397}
1398
1399#ifdef IPSEC
1400/*
1401 * Returns:
1402 *     1 if the packet was processed
1403 *     0 if normal UDP processing should take place
1404 *    -1 if an error occurred and m was freed
1405 */
1406static int
1407udp6_espinudp(struct mbuf **mp, int off)
1408{
1409	const size_t skip = sizeof(struct udphdr);
1410	size_t len;
1411	void *data;
1412	size_t minlen;
1413	int ip6hdrlen;
1414	struct ip6_hdr *ip6;
1415	struct m_tag *tag;
1416	struct udphdr *udphdr;
1417	u_int16_t sport, dport;
1418	struct mbuf *m = *mp;
1419	uint32_t *marker;
1420
1421	/*
1422	 * Collapse the mbuf chain if the first mbuf is too short
1423	 * The longest case is: UDP + non ESP marker + ESP
1424	 */
1425	minlen = off + sizeof(u_int64_t) + sizeof(struct esp);
1426	if (minlen > m->m_pkthdr.len)
1427		minlen = m->m_pkthdr.len;
1428
1429	if (m->m_len < minlen) {
1430		if ((*mp = m_pullup(m, minlen)) == NULL) {
1431			return -1;
1432		}
1433		m = *mp;
1434	}
1435
1436	len = m->m_len - off;
1437	data = mtod(m, char *) + off;
1438
1439	/* Ignore keepalive packets */
1440	if ((len == 1) && (*(unsigned char *)data == 0xff)) {
1441		m_freem(m);
1442		*mp = NULL; /* avoid any further processing by caller ... */
1443		return 1;
1444	}
1445
1446	/* Handle Non-ESP marker (32bit). If zero, then IKE. */
1447	marker = (uint32_t *)data;
1448	if (len <= sizeof(uint32_t))
1449		return 0;
1450	if (marker[0] == 0)
1451		return 0;
1452
1453	/*
1454	 * Get the UDP ports. They are handled in network
1455	 * order everywhere in IPSEC_NAT_T code.
1456	 */
1457	udphdr = (struct udphdr *)((char *)data - skip);
1458	sport = udphdr->uh_sport;
1459	dport = udphdr->uh_dport;
1460
1461	/*
1462	 * Remove the UDP header (and possibly the non ESP marker)
1463	 * IPv6 header length is ip6hdrlen
1464	 * Before:
1465	 *   <---- off --->
1466	 *   +-----+------+-----+
1467	 *   | IP6 |  UDP | ESP |
1468	 *   +-----+------+-----+
1469	 *         <-skip->
1470	 * After:
1471	 *          +-----+-----+
1472	 *          | IP6 | ESP |
1473	 *          +-----+-----+
1474	 *   <-skip->
1475	 */
1476	ip6hdrlen = off - sizeof(struct udphdr);
1477	memmove(mtod(m, char *) + skip, mtod(m, void *), ip6hdrlen);
1478	m_adj(m, skip);
1479
1480	ip6 = mtod(m, struct ip6_hdr *);
1481	ip6->ip6_plen = htons(ntohs(ip6->ip6_plen) - skip);
1482	ip6->ip6_nxt = IPPROTO_ESP;
1483
1484	/*
1485	 * We have modified the packet - it is now ESP, so we should not
1486	 * return to UDP processing ...
1487	 *
1488	 * Add a PACKET_TAG_IPSEC_NAT_T_PORT tag to remember
1489	 * the source UDP port. This is required if we want
1490	 * to select the right SPD for multiple hosts behind
1491	 * same NAT
1492	 */
1493	if ((tag = m_tag_get(PACKET_TAG_IPSEC_NAT_T_PORTS,
1494	    sizeof(sport) + sizeof(dport), M_DONTWAIT)) == NULL) {
1495		m_freem(m);
1496		return -1;
1497	}
1498	((u_int16_t *)(tag + 1))[0] = sport;
1499	((u_int16_t *)(tag + 1))[1] = dport;
1500	m_tag_prepend(m, tag);
1501
1502	if (ipsec_used)
1503		ipsec6_common_input(&m, &ip6hdrlen, IPPROTO_ESP);
1504	else
1505		m_freem(m);
1506
1507	/* We handled it, it shouldn't be handled by UDP */
1508	*mp = NULL; /* avoid free by caller ... */
1509	return 1;
1510}
1511#endif /* IPSEC */
1512
1513PR_WRAP_USRREQS(udp6)
1514#define	udp6_attach	udp6_attach_wrapper
1515#define	udp6_detach	udp6_detach_wrapper
1516#define	udp6_accept	udp6_accept_wrapper
1517#define	udp6_bind	udp6_bind_wrapper
1518#define	udp6_listen	udp6_listen_wrapper
1519#define	udp6_connect	udp6_connect_wrapper
1520#define	udp6_connect2	udp6_connect2_wrapper
1521#define	udp6_disconnect	udp6_disconnect_wrapper
1522#define	udp6_shutdown	udp6_shutdown_wrapper
1523#define	udp6_abort	udp6_abort_wrapper
1524#define	udp6_ioctl	udp6_ioctl_wrapper
1525#define	udp6_stat	udp6_stat_wrapper
1526#define	udp6_peeraddr	udp6_peeraddr_wrapper
1527#define	udp6_sockaddr	udp6_sockaddr_wrapper
1528#define	udp6_rcvd	udp6_rcvd_wrapper
1529#define	udp6_recvoob	udp6_recvoob_wrapper
1530#define	udp6_send	udp6_send_wrapper
1531#define	udp6_sendoob	udp6_sendoob_wrapper
1532#define	udp6_purgeif	udp6_purgeif_wrapper
1533
1534const struct pr_usrreqs udp6_usrreqs = {
1535	.pr_attach	= udp6_attach,
1536	.pr_detach	= udp6_detach,
1537	.pr_accept	= udp6_accept,
1538	.pr_bind	= udp6_bind,
1539	.pr_listen	= udp6_listen,
1540	.pr_connect	= udp6_connect,
1541	.pr_connect2	= udp6_connect2,
1542	.pr_disconnect	= udp6_disconnect,
1543	.pr_shutdown	= udp6_shutdown,
1544	.pr_abort	= udp6_abort,
1545	.pr_ioctl	= udp6_ioctl,
1546	.pr_stat	= udp6_stat,
1547	.pr_peeraddr	= udp6_peeraddr,
1548	.pr_sockaddr	= udp6_sockaddr,
1549	.pr_rcvd	= udp6_rcvd,
1550	.pr_recvoob	= udp6_recvoob,
1551	.pr_send	= udp6_send,
1552	.pr_sendoob	= udp6_sendoob,
1553	.pr_purgeif	= udp6_purgeif,
1554};
1555