icmp6.c revision 1.252
1/*	$NetBSD: icmp6.c,v 1.252 2022/08/29 09:14:02 knakahara Exp $	*/
2/*	$KAME: icmp6.c,v 1.217 2001/06/20 15:03:29 jinmei Exp $	*/
3
4/*
5 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the project nor the names of its contributors
17 *    may be used to endorse or promote products derived from this software
18 *    without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 */
32
33/*
34 * Copyright (c) 1982, 1986, 1988, 1993
35 *	The Regents of the University of California.  All rights reserved.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 *    notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 *    notice, this list of conditions and the following disclaimer in the
44 *    documentation and/or other materials provided with the distribution.
45 * 3. Neither the name of the University nor the names of its contributors
46 *    may be used to endorse or promote products derived from this software
47 *    without specific prior written permission.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59 * SUCH DAMAGE.
60 *
61 *	@(#)ip_icmp.c	8.2 (Berkeley) 1/4/94
62 */
63
64#include <sys/cdefs.h>
65__KERNEL_RCSID(0, "$NetBSD: icmp6.c,v 1.252 2022/08/29 09:14:02 knakahara Exp $");
66
67#ifdef _KERNEL_OPT
68#include "opt_compat_netbsd.h"
69#include "opt_inet.h"
70#include "opt_ipsec.h"
71#endif
72
73#include <sys/param.h>
74#include <sys/systm.h>
75#include <sys/kmem.h>
76#include <sys/mbuf.h>
77#include <sys/protosw.h>
78#include <sys/socket.h>
79#include <sys/socketvar.h>
80#include <sys/time.h>
81#include <sys/kernel.h>
82#include <sys/syslog.h>
83#include <sys/domain.h>
84#include <sys/sysctl.h>
85
86#include <net/if.h>
87#include <net/route.h>
88#include <net/if_dl.h>
89#include <net/if_types.h>
90#include <net/nd.h>
91
92#include <netinet/in.h>
93#include <netinet/in_var.h>
94#include <netinet/ip6.h>
95#include <netinet/wqinput.h>
96#include <netinet6/ip6_var.h>
97#include <netinet6/ip6_private.h>
98#include <netinet/icmp6.h>
99#include <netinet6/icmp6_private.h>
100#include <netinet6/mld6_var.h>
101#include <netinet6/in6_pcb.h>
102#include <netinet6/in6_ifattach.h>
103#include <netinet6/ip6protosw.h>
104#include <netinet6/nd6.h>
105#include <netinet6/scope6_var.h>
106
107#ifdef IPSEC
108#include <netipsec/ipsec.h>
109#include <netipsec/ipsec6.h>
110#include <netipsec/key.h>
111#endif
112
113#include "faith.h"
114#if defined(NFAITH) && 0 < NFAITH
115#include <net/if_faith.h>
116#endif
117
118/* Ensure that non packed structures are the desired size. */
119__CTASSERT(sizeof(struct icmp6_hdr) == 8);
120__CTASSERT(sizeof(struct icmp6_nodeinfo) == 16);
121__CTASSERT(sizeof(struct icmp6_namelookup) == 20);
122__CTASSERT(sizeof(struct icmp6_router_renum) == 16);
123
124__CTASSERT(sizeof(struct nd_router_solicit) == 8);
125__CTASSERT(sizeof(struct nd_router_advert) == 16);
126__CTASSERT(sizeof(struct nd_neighbor_solicit) == 24);
127__CTASSERT(sizeof(struct nd_neighbor_advert) == 24);
128__CTASSERT(sizeof(struct nd_redirect) == 40);
129__CTASSERT(sizeof(struct nd_opt_hdr) == 2);
130__CTASSERT(sizeof(struct nd_opt_route_info) == 8);
131__CTASSERT(sizeof(struct nd_opt_prefix_info) == 32);
132__CTASSERT(sizeof(struct nd_opt_rd_hdr) == 8);
133__CTASSERT(sizeof(struct nd_opt_mtu) == 8);
134__CTASSERT(sizeof(struct nd_opt_nonce) == 2 + ND_OPT_NONCE_LEN);
135__CTASSERT(sizeof(struct nd_opt_rdnss) == 8);
136__CTASSERT(sizeof(struct nd_opt_dnssl) == 8);
137
138__CTASSERT(sizeof(struct mld_hdr) == 24);
139__CTASSERT(sizeof(struct ni_reply_fqdn) == 8);
140__CTASSERT(sizeof(struct rr_pco_match) == 24);
141__CTASSERT(sizeof(struct rr_pco_use) == 32);
142__CTASSERT(sizeof(struct rr_result) == 24);
143
144extern struct domain inet6domain;
145
146percpu_t *icmp6stat_percpu;
147
148extern struct inpcbtable raw6cbtable;
149extern int icmp6errppslim;
150static int icmp6errpps_count = 0;
151static struct timeval icmp6errppslim_last;
152extern int icmp6_nodeinfo;
153
154bool icmp6_dynamic_rt_msg = false;
155
156/*
157 * List of callbacks to notify when Path MTU changes are made.
158 */
159struct icmp6_mtudisc_callback {
160	LIST_ENTRY(icmp6_mtudisc_callback) mc_list;
161	void (*mc_func)(struct in6_addr *);
162};
163
164LIST_HEAD(, icmp6_mtudisc_callback) icmp6_mtudisc_callbacks =
165    LIST_HEAD_INITIALIZER(&icmp6_mtudisc_callbacks);
166
167static struct rttimer_queue *icmp6_mtudisc_timeout_q = NULL;
168extern int pmtu_expire;
169
170/* XXX do these values make any sense? */
171static int icmp6_mtudisc_hiwat = 1280;
172static int icmp6_mtudisc_lowat = 256;
173
174/*
175 * keep track of # of redirect routes.
176 */
177static struct rttimer_queue *icmp6_redirect_timeout_q = NULL;
178
179/* XXX experimental, turned off */
180static int icmp6_redirect_hiwat = -1;
181static int icmp6_redirect_lowat = -1;
182
183/* Protect mtudisc and redirect stuffs */
184static kmutex_t icmp6_mtx __cacheline_aligned;
185
186static bool icmp6_reflect_pmtu = false;
187
188static void icmp6_errcount(u_int, int, int);
189static int icmp6_rip6_input(struct mbuf **, int);
190static void icmp6_reflect(struct mbuf *, size_t);
191static int icmp6_ratelimit(const struct in6_addr *, const int, const int);
192static const char *icmp6_redirect_diag(char *, size_t, struct in6_addr *,
193    struct in6_addr *, struct in6_addr *);
194static void icmp6_redirect_input(struct mbuf *, int);
195static struct mbuf *ni6_input(struct mbuf *, int);
196static struct mbuf *ni6_nametodns(const char *, int, int);
197static int ni6_dnsmatch(const char *, int, const char *, int);
198static int ni6_addrs(struct icmp6_nodeinfo *, struct ifnet **, char *,
199    struct psref *);
200static int ni6_store_addrs(struct icmp6_nodeinfo *, struct icmp6_nodeinfo *,
201    struct ifnet *, int);
202static int icmp6_notify_error(struct mbuf *, int, int, int);
203static struct rtentry *icmp6_mtudisc_clone(struct sockaddr *);
204static void icmp6_mtudisc_timeout(struct rtentry *, struct rttimer *);
205static void icmp6_redirect_timeout(struct rtentry *, struct rttimer *);
206static void sysctl_net_inet6_icmp6_setup(struct sysctllog **);
207
208/* workqueue-based pr_input */
209static struct wqinput *icmp6_wqinput;
210static void _icmp6_input(struct mbuf *m, int off, int proto);
211
212void
213icmp6_init(void)
214{
215
216	sysctl_net_inet6_icmp6_setup(NULL);
217	mld_init();
218
219	mutex_init(&icmp6_mtx, MUTEX_DEFAULT, IPL_NONE);
220	mutex_enter(&icmp6_mtx);
221	icmp6_mtudisc_timeout_q = rt_timer_queue_create(pmtu_expire);
222	icmp6_redirect_timeout_q = rt_timer_queue_create(icmp6_redirtimeout);
223	mutex_exit(&icmp6_mtx);
224
225	icmp6stat_percpu = percpu_alloc(sizeof(uint64_t) * ICMP6_NSTATS);
226
227	icmp6_wqinput = wqinput_create("icmp6", _icmp6_input);
228}
229
230static void
231icmp6_errcount(u_int base, int type, int code)
232{
233	switch (type) {
234	case ICMP6_DST_UNREACH:
235		switch (code) {
236		case ICMP6_DST_UNREACH_NOROUTE:
237			ICMP6_STATINC(base + ICMP6_ERRSTAT_DST_UNREACH_NOROUTE);
238			return;
239		case ICMP6_DST_UNREACH_ADMIN:
240			ICMP6_STATINC(base + ICMP6_ERRSTAT_DST_UNREACH_ADMIN);
241			return;
242		case ICMP6_DST_UNREACH_BEYONDSCOPE:
243			ICMP6_STATINC(base +
244				      ICMP6_ERRSTAT_DST_UNREACH_BEYONDSCOPE);
245			return;
246		case ICMP6_DST_UNREACH_ADDR:
247			ICMP6_STATINC(base + ICMP6_ERRSTAT_DST_UNREACH_ADDR);
248			return;
249		case ICMP6_DST_UNREACH_NOPORT:
250			ICMP6_STATINC(base + ICMP6_ERRSTAT_DST_UNREACH_NOPORT);
251			return;
252		}
253		break;
254	case ICMP6_PACKET_TOO_BIG:
255		ICMP6_STATINC(base + ICMP6_ERRSTAT_PACKET_TOO_BIG);
256		return;
257	case ICMP6_TIME_EXCEEDED:
258		switch (code) {
259		case ICMP6_TIME_EXCEED_TRANSIT:
260			ICMP6_STATINC(base + ICMP6_ERRSTAT_TIME_EXCEED_TRANSIT);
261			return;
262		case ICMP6_TIME_EXCEED_REASSEMBLY:
263			ICMP6_STATINC(base +
264				      ICMP6_ERRSTAT_TIME_EXCEED_REASSEMBLY);
265			return;
266		}
267		break;
268	case ICMP6_PARAM_PROB:
269		switch (code) {
270		case ICMP6_PARAMPROB_HEADER:
271			ICMP6_STATINC(base + ICMP6_ERRSTAT_PARAMPROB_HEADER);
272			return;
273		case ICMP6_PARAMPROB_NEXTHEADER:
274			ICMP6_STATINC(base +
275				      ICMP6_ERRSTAT_PARAMPROB_NEXTHEADER);
276			return;
277		case ICMP6_PARAMPROB_OPTION:
278			ICMP6_STATINC(base + ICMP6_ERRSTAT_PARAMPROB_OPTION);
279			return;
280		}
281		break;
282	case ND_REDIRECT:
283		ICMP6_STATINC(base + ICMP6_ERRSTAT_REDIRECT);
284		return;
285	}
286	ICMP6_STATINC(base + ICMP6_ERRSTAT_UNKNOWN);
287}
288
289/*
290 * Register a Path MTU Discovery callback.
291 */
292void
293icmp6_mtudisc_callback_register(void (*func)(struct in6_addr *))
294{
295	struct icmp6_mtudisc_callback *mc, *new;
296
297	new = kmem_alloc(sizeof(*mc), KM_SLEEP);
298
299	mutex_enter(&icmp6_mtx);
300	for (mc = LIST_FIRST(&icmp6_mtudisc_callbacks); mc != NULL;
301	     mc = LIST_NEXT(mc, mc_list)) {
302		if (mc->mc_func == func) {
303			mutex_exit(&icmp6_mtx);
304			kmem_free(new, sizeof(*mc));
305			return;
306		}
307	}
308
309	new->mc_func = func;
310	LIST_INSERT_HEAD(&icmp6_mtudisc_callbacks, new, mc_list);
311	mutex_exit(&icmp6_mtx);
312}
313
314/*
315 * A wrapper function for icmp6_error() necessary when the erroneous packet
316 * may not contain enough scope zone information.
317 */
318void
319icmp6_error2(struct mbuf *m, int type, int code, int param,
320	struct ifnet *ifp, struct in6_addr *src)
321{
322	struct ip6_hdr *ip6;
323
324	KASSERT(ifp != NULL);
325
326	if (m->m_len < sizeof(struct ip6_hdr)) {
327		m = m_pullup(m, sizeof(struct ip6_hdr));
328		if (m == NULL)
329			return;
330	}
331
332	ip6 = mtod(m, struct ip6_hdr *);
333
334	if (in6_setscope(&ip6->ip6_src, ifp, NULL) != 0)
335		goto out;
336	if (in6_setscope(&ip6->ip6_dst, ifp, NULL) != 0)
337		goto out;
338
339	*src = ip6->ip6_src;
340	icmp6_error(m, type, code, param);
341	return;
342
343out:
344	m_freem(m);
345}
346
347/*
348 * Generate an error packet of type error in response to bad IP6 packet.
349 */
350void
351icmp6_error(struct mbuf *m, int type, int code, int param)
352{
353	struct ip6_hdr *oip6, *nip6;
354	struct icmp6_hdr *icmp6;
355	u_int preplen;
356	int off;
357	int nxt;
358
359	ICMP6_STATINC(ICMP6_STAT_ERROR);
360
361	/* count per-type-code statistics */
362	icmp6_errcount(ICMP6_STAT_OUTERRHIST, type, code);
363
364	if (m->m_flags & M_DECRYPTED) {
365		ICMP6_STATINC(ICMP6_STAT_CANTERROR);
366		goto freeit;
367	}
368
369	if (M_UNWRITABLE(m, sizeof(struct ip6_hdr)) &&
370	    (m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL)
371		return;
372	oip6 = mtod(m, struct ip6_hdr *);
373
374	/*
375	 * If the destination address of the erroneous packet is a multicast
376	 * address, or the packet was sent using link-layer multicast,
377	 * we should basically suppress sending an error (RFC 2463, Section
378	 * 2.4).
379	 * We have two exceptions (the item e.2 in that section):
380	 * - the Packet Too Big message can be sent for path MTU discovery.
381	 * - the Parameter Problem Message that can be allowed an icmp6 error
382	 *   in the option type field.  This check has been done in
383	 *   ip6_unknown_opt(), so we can just check the type and code.
384	 */
385	if ((m->m_flags & (M_BCAST|M_MCAST) ||
386	     IN6_IS_ADDR_MULTICAST(&oip6->ip6_dst)) &&
387	    (type != ICMP6_PACKET_TOO_BIG &&
388	     (type != ICMP6_PARAM_PROB ||
389	      code != ICMP6_PARAMPROB_OPTION)))
390		goto freeit;
391
392	/*
393	 * RFC 2463, 2.4 (e.5): source address check.
394	 * XXX: the case of anycast source?
395	 */
396	if (IN6_IS_ADDR_UNSPECIFIED(&oip6->ip6_src) ||
397	    IN6_IS_ADDR_MULTICAST(&oip6->ip6_src))
398		goto freeit;
399
400	/*
401	 * If we are about to send ICMPv6 against ICMPv6 error/redirect,
402	 * don't do it.
403	 */
404	nxt = -1;
405	off = ip6_lasthdr(m, 0, IPPROTO_IPV6, &nxt);
406	if (off >= 0 && nxt == IPPROTO_ICMPV6) {
407		struct icmp6_hdr *icp;
408
409		IP6_EXTHDR_GET(icp, struct icmp6_hdr *, m, off,
410			sizeof(*icp));
411		if (icp == NULL) {
412			ICMP6_STATINC(ICMP6_STAT_TOOSHORT);
413			return;
414		}
415		if (icp->icmp6_type < ICMP6_ECHO_REQUEST ||
416		    icp->icmp6_type == ND_REDIRECT) {
417			/*
418			 * ICMPv6 error
419			 * Special case: for redirect (which is
420			 * informational) we must not send icmp6 error.
421			 */
422			ICMP6_STATINC(ICMP6_STAT_CANTERROR);
423			goto freeit;
424		} else {
425			/* ICMPv6 informational - send the error */
426		}
427	} else {
428		/* non-ICMPv6 - send the error */
429	}
430
431	oip6 = mtod(m, struct ip6_hdr *); /* adjust pointer */
432
433	/* Finally, do rate limitation check. */
434	if (icmp6_ratelimit(&oip6->ip6_src, type, code)) {
435		ICMP6_STATINC(ICMP6_STAT_TOOFREQ);
436		goto freeit;
437	}
438
439	/*
440	 * OK, ICMP6 can be generated.
441	 */
442
443	if (m->m_pkthdr.len >= ICMPV6_PLD_MAXLEN)
444		m_adj(m, ICMPV6_PLD_MAXLEN - m->m_pkthdr.len);
445
446	preplen = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr);
447	M_PREPEND(m, preplen, M_DONTWAIT);
448	if (m && M_UNWRITABLE(m, preplen))
449		m = m_pullup(m, preplen);
450	if (m == NULL) {
451		nd6log(LOG_DEBUG, "ENOBUFS in icmp6_error %d\n", __LINE__);
452		return;
453	}
454
455	nip6 = mtod(m, struct ip6_hdr *);
456	nip6->ip6_src  = oip6->ip6_src;
457	nip6->ip6_dst  = oip6->ip6_dst;
458
459	in6_clearscope(&oip6->ip6_src);
460	in6_clearscope(&oip6->ip6_dst);
461
462	icmp6 = (struct icmp6_hdr *)(nip6 + 1);
463	icmp6->icmp6_type = type;
464	icmp6->icmp6_code = code;
465	icmp6->icmp6_pptr = htonl((u_int32_t)param);
466
467	/*
468	 * icmp6_reflect() is designed to be in the input path.
469	 * icmp6_error() can be called from both input and output path,
470	 * and if we are in output path rcvif could contain bogus value.
471	 * clear m->m_pkthdr.rcvif for safety, we should have enough scope
472	 * information in ip header (nip6).
473	 */
474	m_reset_rcvif(m);
475
476	ICMP6_STATINC(ICMP6_STAT_OUTHIST + type);
477
478	/* header order: IPv6 - ICMPv6 */
479	icmp6_reflect(m, sizeof(struct ip6_hdr));
480
481	return;
482
483freeit:
484	/*
485	 * If we can't tell whether or not we can generate ICMP6, free it.
486	 */
487	m_freem(m);
488}
489
490/*
491 * Process a received ICMP6 message.
492 */
493static void
494_icmp6_input(struct mbuf *m, int off, int proto)
495{
496	struct mbuf *n;
497	struct ip6_hdr *ip6, *nip6;
498	struct icmp6_hdr *icmp6, *nicmp6;
499	int icmp6len = m->m_pkthdr.len - off;
500	int code, sum;
501	struct ifnet *rcvif;
502	struct psref psref;
503	char ip6buf[INET6_ADDRSTRLEN], ip6buf2[INET6_ADDRSTRLEN];
504
505	rcvif = m_get_rcvif_psref(m, &psref);
506	if (__predict_false(rcvif == NULL))
507		goto freeit;
508
509#define ICMP6_MAXLEN (sizeof(*nip6) + sizeof(*nicmp6) + 4)
510	KASSERT(ICMP6_MAXLEN < MCLBYTES);
511	icmp6_ifstat_inc(rcvif, ifs6_in_msg);
512
513	/*
514	 * Locate icmp6 structure in mbuf, and check
515	 * that not corrupted and of at least minimum length
516	 */
517
518	if (icmp6len < sizeof(struct icmp6_hdr)) {
519		ICMP6_STATINC(ICMP6_STAT_TOOSHORT);
520		icmp6_ifstat_inc(rcvif, ifs6_in_error);
521		goto freeit;
522	}
523
524	if (m->m_len < sizeof(struct ip6_hdr)) {
525		m = m_pullup(m, sizeof(struct ip6_hdr));
526		if (m == NULL) {
527			ICMP6_STATINC(ICMP6_STAT_TOOSHORT);
528			icmp6_ifstat_inc(rcvif, ifs6_in_error);
529			goto freeit;
530		}
531	}
532
533	ip6 = mtod(m, struct ip6_hdr *);
534	IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off, sizeof(*icmp6));
535	if (icmp6 == NULL) {
536		ICMP6_STATINC(ICMP6_STAT_TOOSHORT);
537		icmp6_ifstat_inc(rcvif, ifs6_in_error);
538		goto freeit;
539	}
540
541	/*
542	 * Enforce alignment requirements that are violated in
543	 * some cases, see kern/50766 for details.
544	 */
545	if (ACCESSIBLE_POINTER(icmp6, struct ip6_hdr) == 0) {
546		m = m_copyup(m, off + sizeof(struct icmp6_hdr), 0);
547		if (m == NULL) {
548			ICMP6_STATINC(ICMP6_STAT_TOOSHORT);
549			icmp6_ifstat_inc(rcvif, ifs6_in_error);
550			goto freeit;
551		}
552		ip6 = mtod(m, struct ip6_hdr *);
553		icmp6 = (struct icmp6_hdr *)(mtod(m, char *) + off);
554	}
555	KASSERT(ACCESSIBLE_POINTER(icmp6, struct ip6_hdr));
556
557	/*
558	 * calculate the checksum
559	 */
560	if ((sum = in6_cksum(m, IPPROTO_ICMPV6, off, icmp6len)) != 0) {
561		nd6log(LOG_ERR, "ICMP6 checksum error(%d|%x) %s\n",
562		    icmp6->icmp6_type, sum, IN6_PRINT(ip6buf, &ip6->ip6_src));
563		ICMP6_STATINC(ICMP6_STAT_CHECKSUM);
564		icmp6_ifstat_inc(rcvif, ifs6_in_error);
565		goto freeit;
566	}
567
568#if defined(NFAITH) && 0 < NFAITH
569	if (faithprefix(&ip6->ip6_dst)) {
570		/*
571		 * Deliver very specific ICMP6 type only.
572		 * This is important to deliver TOOBIG.  Otherwise PMTUD
573		 * will not work.
574		 */
575		switch (icmp6->icmp6_type) {
576		case ICMP6_DST_UNREACH:
577		case ICMP6_PACKET_TOO_BIG:
578		case ICMP6_TIME_EXCEEDED:
579			break;
580		default:
581			goto freeit;
582		}
583	}
584#endif
585
586	code = icmp6->icmp6_code;
587	ICMP6_STATINC(ICMP6_STAT_INHIST + icmp6->icmp6_type);
588
589	switch (icmp6->icmp6_type) {
590	case ICMP6_DST_UNREACH:
591		icmp6_ifstat_inc(rcvif, ifs6_in_dstunreach);
592		switch (code) {
593		case ICMP6_DST_UNREACH_NOROUTE:
594			code = PRC_UNREACH_NET;
595			break;
596		case ICMP6_DST_UNREACH_ADMIN:
597			icmp6_ifstat_inc(rcvif, ifs6_in_adminprohib);
598			code = PRC_UNREACH_PROTOCOL; /* is this a good code? */
599			break;
600		case ICMP6_DST_UNREACH_ADDR:
601			code = PRC_HOSTDEAD;
602			break;
603		case ICMP6_DST_UNREACH_BEYONDSCOPE:
604			/* I mean "source address was incorrect." */
605			code = PRC_UNREACH_NET;
606			break;
607		case ICMP6_DST_UNREACH_NOPORT:
608			code = PRC_UNREACH_PORT;
609			break;
610		default:
611			goto badcode;
612		}
613		goto deliver;
614
615	case ICMP6_PACKET_TOO_BIG:
616		icmp6_ifstat_inc(rcvif, ifs6_in_pkttoobig);
617
618		/*
619		 * MTU is checked in icmp6_mtudisc.
620		 */
621		code = PRC_MSGSIZE;
622
623		/*
624		 * Updating the path MTU will be done after examining
625		 * intermediate extension headers.
626		 */
627		goto deliver;
628
629	case ICMP6_TIME_EXCEEDED:
630		icmp6_ifstat_inc(rcvif, ifs6_in_timeexceed);
631		switch (code) {
632		case ICMP6_TIME_EXCEED_TRANSIT:
633			code = PRC_TIMXCEED_INTRANS;
634			break;
635		case ICMP6_TIME_EXCEED_REASSEMBLY:
636			code = PRC_TIMXCEED_REASS;
637			break;
638		default:
639			goto badcode;
640		}
641		goto deliver;
642
643	case ICMP6_PARAM_PROB:
644		icmp6_ifstat_inc(rcvif, ifs6_in_paramprob);
645		switch (code) {
646		case ICMP6_PARAMPROB_NEXTHEADER:
647			code = PRC_UNREACH_PROTOCOL;
648			break;
649		case ICMP6_PARAMPROB_HEADER:
650		case ICMP6_PARAMPROB_OPTION:
651			code = PRC_PARAMPROB;
652			break;
653		default:
654			goto badcode;
655		}
656		goto deliver;
657
658	case ICMP6_ECHO_REQUEST:
659		icmp6_ifstat_inc(rcvif, ifs6_in_echo);
660		if (code != 0)
661			goto badcode;
662		/*
663		 * Copy mbuf to send to two data paths: userland socket(s),
664		 * and to the querier (echo reply).
665		 * m: a copy for socket, n: a copy for querier
666		 *
667		 * If the first mbuf is shared, or the first mbuf is too short,
668		 * copy the first part of the data into a fresh mbuf.
669		 * Otherwise, we will wrongly overwrite both copies.
670		 */
671		if ((n = m_copypacket(m, M_DONTWAIT)) == NULL) {
672			/* Give up local */
673			n = m;
674			m = NULL;
675		} else if (M_UNWRITABLE(n, off + sizeof(struct icmp6_hdr))) {
676			struct mbuf *n0 = n;
677
678			/*
679			 * Prepare an internal mbuf.  m_pullup() doesn't
680			 * always copy the length we specified.
681			 */
682			if ((n = m_dup(n0, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
683				/* Give up local */
684				n = m;
685				m = NULL;
686			}
687			m_freem(n0);
688		}
689		IP6_EXTHDR_GET(nicmp6, struct icmp6_hdr *, n, off,
690		    sizeof(*nicmp6));
691		if (nicmp6 == NULL)
692			goto freeit;
693		nicmp6->icmp6_type = ICMP6_ECHO_REPLY;
694		nicmp6->icmp6_code = 0;
695		if (n) {
696			uint64_t *icmp6s = ICMP6_STAT_GETREF();
697			icmp6s[ICMP6_STAT_REFLECT]++;
698			icmp6s[ICMP6_STAT_OUTHIST + ICMP6_ECHO_REPLY]++;
699			ICMP6_STAT_PUTREF();
700			icmp6_reflect(n, off);
701		}
702		if (!m)
703			goto freeit;
704		break;
705
706	case ICMP6_ECHO_REPLY:
707		icmp6_ifstat_inc(rcvif, ifs6_in_echoreply);
708		if (code != 0)
709			goto badcode;
710		break;
711
712	case MLD_LISTENER_QUERY:
713	case MLD_LISTENER_REPORT:
714		if (icmp6len < sizeof(struct mld_hdr))
715			goto badlen;
716		if (icmp6->icmp6_type == MLD_LISTENER_QUERY) /* XXX: ugly... */
717			icmp6_ifstat_inc(rcvif, ifs6_in_mldquery);
718		else
719			icmp6_ifstat_inc(rcvif, ifs6_in_mldreport);
720		if ((n = m_copypacket(m, M_DONTWAIT)) == NULL) {
721			/* give up local */
722			mld_input(m, off);
723			m = NULL;
724			goto freeit;
725		}
726		mld_input(n, off);
727		/* m stays. */
728		break;
729
730	case MLD_LISTENER_DONE:
731		icmp6_ifstat_inc(rcvif, ifs6_in_mlddone);
732		if (icmp6len < sizeof(struct mld_hdr))	/* necessary? */
733			goto badlen;
734		break;		/* nothing to be done in kernel */
735
736	case MLD_MTRACE_RESP:
737	case MLD_MTRACE:
738		/* XXX: these two are experimental.  not officially defined. */
739		/* XXX: per-interface statistics? */
740		break;		/* just pass it to applications */
741
742	case ICMP6_WRUREQUEST:	/* ICMP6_FQDN_QUERY */
743	    {
744		enum { WRU, FQDN } mode;
745
746		if (!icmp6_nodeinfo)
747			break;
748
749		if (icmp6len == sizeof(struct icmp6_hdr) + 4)
750			mode = WRU;
751		else if (icmp6len >= sizeof(struct icmp6_nodeinfo))
752			mode = FQDN;
753		else
754			goto badlen;
755
756		if (mode == FQDN) {
757			n = m_copypacket(m, M_DONTWAIT);
758			if (n)
759				n = ni6_input(n, off);
760		} else {
761			u_char *p;
762			int maxhlen;
763
764			if ((icmp6_nodeinfo & 5) != 5)
765				break;
766
767			if (code != 0)
768				goto badcode;
769			MGETHDR(n, M_DONTWAIT, m->m_type);
770			if (n && ICMP6_MAXLEN > MHLEN) {
771				MCLGET(n, M_DONTWAIT);
772				if ((n->m_flags & M_EXT) == 0) {
773					m_free(n);
774					n = NULL;
775				}
776			}
777			if (n == NULL) {
778				/* Give up remote */
779				break;
780			}
781			m_reset_rcvif(n);
782			n->m_len = 0;
783			maxhlen = M_TRAILINGSPACE(n) - ICMP6_MAXLEN;
784			if (maxhlen < 0) {
785				m_free(n);
786				break;
787			}
788			if (maxhlen > hostnamelen)
789				maxhlen = hostnamelen;
790			/*
791			 * Copy IPv6 and ICMPv6 only.
792			 */
793			nip6 = mtod(n, struct ip6_hdr *);
794			memcpy(nip6, ip6, sizeof(struct ip6_hdr));
795			nicmp6 = (struct icmp6_hdr *)(nip6 + 1);
796			memcpy(nicmp6, icmp6, sizeof(struct icmp6_hdr));
797
798			p = (u_char *)(nicmp6 + 1);
799			memset(p, 0, 4);
800			memcpy(p + 4, hostname, maxhlen); /* meaningless TTL */
801
802			m_copy_pkthdr(n, m);
803			n->m_pkthdr.len = n->m_len = sizeof(struct ip6_hdr) +
804				sizeof(struct icmp6_hdr) + 4 + maxhlen;
805			nicmp6->icmp6_type = ICMP6_WRUREPLY;
806			nicmp6->icmp6_code = 0;
807		}
808		if (n) {
809			uint64_t *icmp6s = ICMP6_STAT_GETREF();
810			icmp6s[ICMP6_STAT_REFLECT]++;
811			icmp6s[ICMP6_STAT_OUTHIST + ICMP6_WRUREPLY]++;
812			ICMP6_STAT_PUTREF();
813			icmp6_reflect(n, sizeof(struct ip6_hdr));
814		}
815		break;
816	    }
817
818	case ICMP6_WRUREPLY:
819		if (code != 0)
820			goto badcode;
821		break;
822
823	case ND_ROUTER_SOLICIT:
824		icmp6_ifstat_inc(rcvif, ifs6_in_routersolicit);
825		/* FALLTHROUGH */
826	case ND_ROUTER_ADVERT:
827		if (icmp6->icmp6_type == ND_ROUTER_ADVERT)
828			icmp6_ifstat_inc(rcvif, ifs6_in_routeradvert);
829		if (code != 0)
830			goto badcode;
831		if ((icmp6->icmp6_type == ND_ROUTER_SOLICIT &&
832		    icmp6len < sizeof(struct nd_router_solicit)) ||
833		    (icmp6->icmp6_type == ND_ROUTER_ADVERT &&
834		    icmp6len < sizeof(struct nd_router_advert)))
835			goto badlen;
836		if ((n = m_copypacket(m, M_DONTWAIT)) == NULL) {
837			/* give up local */
838			nd6_rtr_cache(m, off, icmp6len, icmp6->icmp6_type);
839			m = NULL;
840			goto freeit;
841		}
842		nd6_rtr_cache(n, off, icmp6len, icmp6->icmp6_type);
843		/* m stays. */
844		break;
845
846	case ND_NEIGHBOR_SOLICIT:
847		icmp6_ifstat_inc(rcvif, ifs6_in_neighborsolicit);
848		if (code != 0)
849			goto badcode;
850		if (icmp6len < sizeof(struct nd_neighbor_solicit))
851			goto badlen;
852		if ((n = m_copypacket(m, M_DONTWAIT)) == NULL) {
853			/* give up local */
854			nd6_ns_input(m, off, icmp6len);
855			m = NULL;
856			goto freeit;
857		}
858		nd6_ns_input(n, off, icmp6len);
859		/* m stays. */
860		break;
861
862	case ND_NEIGHBOR_ADVERT:
863		icmp6_ifstat_inc(rcvif, ifs6_in_neighboradvert);
864		if (code != 0)
865			goto badcode;
866		if (icmp6len < sizeof(struct nd_neighbor_advert))
867			goto badlen;
868		if ((n = m_copypacket(m, M_DONTWAIT)) == NULL) {
869			/* give up local */
870			nd6_na_input(m, off, icmp6len);
871			m = NULL;
872			goto freeit;
873		}
874		nd6_na_input(n, off, icmp6len);
875		/* m stays. */
876		break;
877
878	case ND_REDIRECT:
879		icmp6_ifstat_inc(rcvif, ifs6_in_redirect);
880		if (code != 0)
881			goto badcode;
882		if (icmp6len < sizeof(struct nd_redirect))
883			goto badlen;
884		if ((n = m_copypacket(m, M_DONTWAIT)) == NULL) {
885			/* give up local */
886			icmp6_redirect_input(m, off);
887			m = NULL;
888			goto freeit;
889		}
890		icmp6_redirect_input(n, off);
891		/* m stays. */
892		break;
893
894	case ICMP6_ROUTER_RENUMBERING:
895		if (code != ICMP6_ROUTER_RENUMBERING_COMMAND &&
896		    code != ICMP6_ROUTER_RENUMBERING_RESULT)
897			goto badcode;
898		if (icmp6len < sizeof(struct icmp6_router_renum))
899			goto badlen;
900		break;
901
902	default:
903		nd6log(LOG_DEBUG,
904		    "unknown type %d(src=%s, dst=%s, ifid=%d)\n",
905		    icmp6->icmp6_type,
906		    IN6_PRINT(ip6buf, &ip6->ip6_src),
907		    IN6_PRINT(ip6buf2, &ip6->ip6_dst),
908		    rcvif ? rcvif->if_index : 0);
909		if (icmp6->icmp6_type < ICMP6_ECHO_REQUEST) {
910			/* ICMPv6 error: MUST deliver it by spec... */
911			code = PRC_NCMDS;
912			/* deliver */
913		} else {
914			/* ICMPv6 informational: MUST not deliver */
915			break;
916		}
917	deliver:
918		if (icmp6_notify_error(m, off, icmp6len, code)) {
919			/* In this case, m should've been freed. */
920			m_put_rcvif_psref(rcvif, &psref);
921			return;
922		}
923		break;
924
925	badcode:
926		ICMP6_STATINC(ICMP6_STAT_BADCODE);
927		break;
928
929	badlen:
930		ICMP6_STATINC(ICMP6_STAT_BADLEN);
931		break;
932	}
933	m_put_rcvif_psref(rcvif, &psref);
934
935	/* deliver the packet to appropriate sockets */
936	icmp6_rip6_input(&m, off);
937
938	return;
939
940freeit:
941	m_put_rcvif_psref(rcvif, &psref);
942	m_freem(m);
943	return;
944}
945
946int
947icmp6_input(struct mbuf **mp, int *offp, int proto)
948{
949
950	wqinput_input(icmp6_wqinput, *mp, *offp, proto);
951
952	return IPPROTO_DONE;
953}
954
955static int
956icmp6_notify_error(struct mbuf *m, int off, int icmp6len, int code)
957{
958	struct icmp6_hdr *icmp6;
959	struct ip6_hdr *eip6;
960	u_int32_t notifymtu;
961	struct sockaddr_in6 icmp6src, icmp6dst;
962
963	if (icmp6len < sizeof(struct icmp6_hdr) + sizeof(struct ip6_hdr)) {
964		ICMP6_STATINC(ICMP6_STAT_TOOSHORT);
965		goto freeit;
966	}
967	IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off,
968	    sizeof(*icmp6) + sizeof(struct ip6_hdr));
969	if (icmp6 == NULL) {
970		ICMP6_STATINC(ICMP6_STAT_TOOSHORT);
971		return (-1);
972	}
973	eip6 = (struct ip6_hdr *)(icmp6 + 1);
974
975	/* Detect the upper level protocol */
976	{
977		void *(*ctlfunc)(int, const struct sockaddr *, void *);
978		u_int8_t nxt = eip6->ip6_nxt;
979		int eoff = off + sizeof(struct icmp6_hdr) +
980			sizeof(struct ip6_hdr);
981		struct ip6ctlparam ip6cp;
982		struct in6_addr *finaldst = NULL;
983		int icmp6type = icmp6->icmp6_type;
984		struct ip6_frag *fh;
985		struct ip6_rthdr *rth;
986		struct ifnet *rcvif;
987		int s;
988
989		while (1) { /* XXX: should avoid infinite loop explicitly? */
990			struct ip6_ext *eh;
991
992			switch (nxt) {
993			case IPPROTO_HOPOPTS:
994			case IPPROTO_DSTOPTS:
995			case IPPROTO_AH:
996				IP6_EXTHDR_GET(eh, struct ip6_ext *, m,
997				    eoff, sizeof(*eh));
998				if (eh == NULL) {
999					ICMP6_STATINC(ICMP6_STAT_TOOSHORT);
1000					return (-1);
1001				}
1002
1003				if (nxt == IPPROTO_AH)
1004					eoff += (eh->ip6e_len + 2) << 2;
1005				else
1006					eoff += (eh->ip6e_len + 1) << 3;
1007				nxt = eh->ip6e_nxt;
1008				break;
1009			case IPPROTO_ROUTING:
1010				/* Ignore the option. */
1011				IP6_EXTHDR_GET(rth, struct ip6_rthdr *, m,
1012				    eoff, sizeof(*rth));
1013				if (rth == NULL) {
1014					ICMP6_STATINC(ICMP6_STAT_TOOSHORT);
1015					return (-1);
1016				}
1017
1018				eoff += (rth->ip6r_len + 1) << 3;
1019				nxt = rth->ip6r_nxt;
1020				break;
1021			case IPPROTO_FRAGMENT:
1022				IP6_EXTHDR_GET(fh, struct ip6_frag *, m,
1023				    eoff, sizeof(*fh));
1024				if (fh == NULL) {
1025					ICMP6_STATINC(ICMP6_STAT_TOOSHORT);
1026					return (-1);
1027				}
1028				/*
1029				 * Data after a fragment header is meaningless
1030				 * unless it is the first fragment, but
1031				 * we'll go to the notify label for path MTU
1032				 * discovery.
1033				 */
1034				if (fh->ip6f_offlg & IP6F_OFF_MASK)
1035					goto notify;
1036
1037				eoff += sizeof(struct ip6_frag);
1038				nxt = fh->ip6f_nxt;
1039				break;
1040			default:
1041				/*
1042				 * This case includes ESP and the No Next
1043				 * Header.  In such cases going to the notify
1044				 * label does not have any meaning
1045				 * (i.e. ctlfunc will be NULL), but we go
1046				 * anyway since we might have to update
1047				 * path MTU information.
1048				 */
1049				goto notify;
1050			}
1051		}
1052	  notify:
1053		IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off,
1054		    sizeof(*icmp6) + sizeof(struct ip6_hdr));
1055		if (icmp6 == NULL) {
1056			ICMP6_STATINC(ICMP6_STAT_TOOSHORT);
1057			return (-1);
1058		}
1059
1060		/*
1061		 * retrieve parameters from the inner IPv6 header, and convert
1062		 * them into sockaddr structures.
1063		 * XXX: there is no guarantee that the source or destination
1064		 * addresses of the inner packet are in the same scope zone as
1065		 * the addresses of the icmp packet.  But there is no other
1066		 * way to determine the zone.
1067		 */
1068		eip6 = (struct ip6_hdr *)(icmp6 + 1);
1069
1070		rcvif = m_get_rcvif(m, &s);
1071		if (__predict_false(rcvif == NULL))
1072			goto freeit;
1073		sockaddr_in6_init(&icmp6dst,
1074		    (finaldst == NULL) ? &eip6->ip6_dst : finaldst, 0, 0, 0);
1075		if (in6_setscope(&icmp6dst.sin6_addr, rcvif, NULL)) {
1076			m_put_rcvif(rcvif, &s);
1077			goto freeit;
1078		}
1079		sockaddr_in6_init(&icmp6src, &eip6->ip6_src, 0, 0, 0);
1080		if (in6_setscope(&icmp6src.sin6_addr, rcvif, NULL)) {
1081			m_put_rcvif(rcvif, &s);
1082			goto freeit;
1083		}
1084		m_put_rcvif(rcvif, &s);
1085
1086		icmp6src.sin6_flowinfo =
1087			(eip6->ip6_flow & IPV6_FLOWLABEL_MASK);
1088
1089		if (finaldst == NULL)
1090			finaldst = &eip6->ip6_dst;
1091		ip6cp.ip6c_m = m;
1092		ip6cp.ip6c_icmp6 = icmp6;
1093		ip6cp.ip6c_ip6 = (struct ip6_hdr *)(icmp6 + 1);
1094		ip6cp.ip6c_off = eoff;
1095		ip6cp.ip6c_finaldst = finaldst;
1096		ip6cp.ip6c_src = &icmp6src;
1097		ip6cp.ip6c_nxt = nxt;
1098
1099		if (icmp6type == ICMP6_PACKET_TOO_BIG) {
1100			notifymtu = ntohl(icmp6->icmp6_mtu);
1101			ip6cp.ip6c_cmdarg = (void *)&notifymtu;
1102		}
1103
1104		ctlfunc = inet6sw[ip6_protox[nxt]].pr_ctlinput;
1105		if (ctlfunc) {
1106			(void)(*ctlfunc)(code, sin6tosa(&icmp6dst), &ip6cp);
1107		}
1108	}
1109	return (0);
1110
1111freeit:
1112	m_freem(m);
1113	return (-1);
1114}
1115
1116void
1117icmp6_mtudisc_update(struct ip6ctlparam *ip6cp, int validated)
1118{
1119	unsigned long rtcount;
1120	struct icmp6_mtudisc_callback *mc;
1121	struct in6_addr *dst = ip6cp->ip6c_finaldst;
1122	struct icmp6_hdr *icmp6 = ip6cp->ip6c_icmp6;
1123	struct mbuf *m = ip6cp->ip6c_m;	/* will be necessary for scope issue */
1124	u_int mtu = ntohl(icmp6->icmp6_mtu);
1125	struct rtentry *rt = NULL;
1126	struct sockaddr_in6 sin6;
1127	struct ifnet *rcvif;
1128	int s;
1129
1130	/*
1131	 * The MTU should not be less than the minimal IPv6 MTU except for the
1132	 * hack in ip6_output/ip6_setpmtu where we always include a frag header.
1133	 * In that one case, the MTU might be less than 1280.
1134	 */
1135	if (__predict_false(mtu < IPV6_MMTU - sizeof(struct ip6_frag))) {
1136		/* is the mtu even sane? */
1137		if (mtu < sizeof(struct ip6_hdr) + sizeof(struct ip6_frag) + 8)
1138			return;
1139		if (!validated)
1140			return;
1141		mtu = IPV6_MMTU - sizeof(struct ip6_frag);
1142	}
1143
1144	/*
1145	 * allow non-validated cases if memory is plenty, to make traffic
1146	 * from non-connected pcb happy.
1147	 */
1148	mutex_enter(&icmp6_mtx);
1149	rtcount = rt_timer_count(icmp6_mtudisc_timeout_q);
1150	if (validated) {
1151		if (0 <= icmp6_mtudisc_hiwat && rtcount > icmp6_mtudisc_hiwat) {
1152			mutex_exit(&icmp6_mtx);
1153			return;
1154		} else if (0 <= icmp6_mtudisc_lowat &&
1155		    rtcount > icmp6_mtudisc_lowat) {
1156			/*
1157			 * XXX nuke a victim, install the new one.
1158			 */
1159		}
1160	} else {
1161		if (0 <= icmp6_mtudisc_lowat && rtcount > icmp6_mtudisc_lowat) {
1162			mutex_exit(&icmp6_mtx);
1163			return;
1164		}
1165	}
1166	mutex_exit(&icmp6_mtx);
1167
1168	memset(&sin6, 0, sizeof(sin6));
1169	sin6.sin6_family = PF_INET6;
1170	sin6.sin6_len = sizeof(struct sockaddr_in6);
1171	sin6.sin6_addr = *dst;
1172	rcvif = m_get_rcvif(m, &s);
1173	if (__predict_false(rcvif == NULL))
1174		return;
1175	if (in6_setscope(&sin6.sin6_addr, rcvif, NULL)) {
1176		m_put_rcvif(rcvif, &s);
1177		return;
1178	}
1179	m_put_rcvif(rcvif, &s);
1180
1181	rt = icmp6_mtudisc_clone(sin6tosa(&sin6));
1182
1183	if (rt && (rt->rt_flags & RTF_HOST) &&
1184	    !(rt->rt_rmx.rmx_locks & RTV_MTU) &&
1185	    (rt->rt_rmx.rmx_mtu > mtu || rt->rt_rmx.rmx_mtu == 0)) {
1186		if (mtu < rt->rt_ifp->if_mtu) {
1187			ICMP6_STATINC(ICMP6_STAT_PMTUCHG);
1188			rt->rt_rmx.rmx_mtu = mtu;
1189		}
1190	}
1191	if (rt) {
1192		rt_unref(rt);
1193	}
1194
1195	/*
1196	 * Notify protocols that the MTU for this destination
1197	 * has changed.
1198	 */
1199	mutex_enter(&icmp6_mtx);
1200	for (mc = LIST_FIRST(&icmp6_mtudisc_callbacks); mc != NULL;
1201	     mc = LIST_NEXT(mc, mc_list))
1202		(*mc->mc_func)(&sin6.sin6_addr);
1203	mutex_exit(&icmp6_mtx);
1204}
1205
1206/*
1207 * Process a Node Information Query packet, based on
1208 * draft-ietf-ipngwg-icmp-name-lookups-07.
1209 *
1210 * Spec incompatibilities:
1211 * - IPv6 Subject address handling
1212 * - IPv4 Subject address handling support missing
1213 * - Proxy reply (answer even if it's not for me)
1214 * - joins NI group address at in6_ifattach() time only, does not cope
1215 *   with hostname changes by sethostname(3)
1216 */
1217static struct mbuf *
1218ni6_input(struct mbuf *m, int off)
1219{
1220	struct icmp6_nodeinfo *ni6, *nni6;
1221	struct mbuf *n = NULL;
1222	u_int16_t qtype;
1223	int subjlen;
1224	int replylen = sizeof(struct ip6_hdr) + sizeof(struct icmp6_nodeinfo);
1225	struct ni_reply_fqdn *fqdn;
1226	int addrs;		/* for NI_QTYPE_NODEADDR */
1227	struct ifnet *ifp = NULL; /* for NI_QTYPE_NODEADDR */
1228	struct sockaddr_in6 sin6; /* ip6_dst */
1229	struct in6_addr in6_subj; /* subject address */
1230	struct ip6_hdr *ip6;
1231	int oldfqdn = 0;	/* if 1, return pascal string (03 draft) */
1232	char *subj = NULL;
1233	struct ifnet *rcvif;
1234	int s, ss;
1235	struct ifaddr *ifa;
1236	struct psref psref;
1237
1238	ip6 = mtod(m, struct ip6_hdr *);
1239	IP6_EXTHDR_GET(ni6, struct icmp6_nodeinfo *, m, off, sizeof(*ni6));
1240	if (ni6 == NULL) {
1241		/* m is already reclaimed */
1242		return NULL;
1243	}
1244	KASSERT((m->m_flags & M_PKTHDR) != 0);
1245
1246	/*
1247	 * Validate IPv6 destination address.
1248	 *
1249	 * The Responder must discard the Query without further processing
1250	 * unless it is one of the Responder's unicast or anycast addresses, or
1251	 * a link-local scope multicast address which the Responder has joined.
1252	 * [icmp-name-lookups-07, Section 4.]
1253	 */
1254	sockaddr_in6_init(&sin6, &ip6->ip6_dst, 0, 0, 0);
1255	/* XXX scopeid */
1256	ss = pserialize_read_enter();
1257	ifa = ifa_ifwithaddr(sin6tosa(&sin6));
1258	if (ifa != NULL) {
1259		; /* unicast/anycast, fine */
1260	} else if (IN6_IS_ADDR_MC_LINKLOCAL(&sin6.sin6_addr)) {
1261		; /* link-local multicast, fine */
1262	} else {
1263		pserialize_read_exit(ss);
1264		goto bad;
1265	}
1266	pserialize_read_exit(ss);
1267
1268	/* validate query Subject field. */
1269	qtype = ntohs(ni6->ni_qtype);
1270	subjlen = m->m_pkthdr.len - off - sizeof(struct icmp6_nodeinfo);
1271	switch (qtype) {
1272	case NI_QTYPE_NOOP:
1273	case NI_QTYPE_SUPTYPES:
1274		/* 07 draft */
1275		if (ni6->ni_code == ICMP6_NI_SUBJ_FQDN && subjlen == 0)
1276			break;
1277		/* FALLTHROUGH */
1278	case NI_QTYPE_FQDN:
1279	case NI_QTYPE_NODEADDR:
1280	case NI_QTYPE_IPV4ADDR:
1281		switch (ni6->ni_code) {
1282		case ICMP6_NI_SUBJ_IPV6:
1283#if ICMP6_NI_SUBJ_IPV6 != 0
1284		case 0:
1285#endif
1286			/*
1287			 * backward compatibility - try to accept 03 draft
1288			 * format, where no Subject is present.
1289			 */
1290			if (qtype == NI_QTYPE_FQDN && ni6->ni_code == 0 &&
1291			    subjlen == 0) {
1292				oldfqdn++;
1293				break;
1294			}
1295#if ICMP6_NI_SUBJ_IPV6 != 0
1296			if (ni6->ni_code != ICMP6_NI_SUBJ_IPV6)
1297				goto bad;
1298#endif
1299
1300			if (subjlen != sizeof(sin6.sin6_addr))
1301				goto bad;
1302
1303			/*
1304			 * Validate Subject address.
1305			 *
1306			 * Not sure what exactly "address belongs to the node"
1307			 * means in the spec, is it just unicast, or what?
1308			 *
1309			 * At this moment we consider Subject address as
1310			 * "belong to the node" if the Subject address equals
1311			 * to the IPv6 destination address; validation for
1312			 * IPv6 destination address should have done enough
1313			 * check for us.
1314			 *
1315			 * We do not do proxy at this moment.
1316			 */
1317			/* m_pulldown instead of copy? */
1318			m_copydata(m, off + sizeof(struct icmp6_nodeinfo),
1319			    subjlen, (void *)&in6_subj);
1320			rcvif = m_get_rcvif(m, &s);
1321			if (__predict_false(rcvif == NULL))
1322				goto bad;
1323			if (in6_setscope(&in6_subj, rcvif, NULL)) {
1324				m_put_rcvif(rcvif, &s);
1325				goto bad;
1326			}
1327			m_put_rcvif(rcvif, &s);
1328
1329			subj = (char *)&in6_subj;
1330			if (IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &in6_subj))
1331				break;
1332
1333			/*
1334			 * XXX if we are to allow other cases, we should really
1335			 * be careful about scope here.
1336			 * basically, we should disallow queries toward IPv6
1337			 * destination X with subject Y, if scope(X) > scope(Y).
1338			 * if we allow scope(X) > scope(Y), it will result in
1339			 * information leakage across scope boundary.
1340			 */
1341			goto bad;
1342
1343		case ICMP6_NI_SUBJ_FQDN:
1344			/*
1345			 * Validate Subject name with gethostname(3).
1346			 *
1347			 * The behavior may need some debate, since:
1348			 * - we are not sure if the node has FQDN as
1349			 *   hostname (returned by gethostname(3)).
1350			 * - the code does wildcard match for truncated names.
1351			 *   however, we are not sure if we want to perform
1352			 *   wildcard match, if gethostname(3) side has
1353			 *   truncated hostname.
1354			 */
1355			n = ni6_nametodns(hostname, hostnamelen, 0);
1356			if (!n || n->m_next || n->m_len == 0)
1357				goto bad;
1358			IP6_EXTHDR_GET(subj, char *, m,
1359			    off + sizeof(struct icmp6_nodeinfo), subjlen);
1360			if (subj == NULL)
1361				goto bad;
1362			if (!ni6_dnsmatch(subj, subjlen, mtod(n, const char *),
1363			    n->m_len)) {
1364				goto bad;
1365			}
1366			m_freem(n);
1367			n = NULL;
1368			break;
1369
1370		case ICMP6_NI_SUBJ_IPV4:	/* XXX: to be implemented? */
1371		default:
1372			goto bad;
1373		}
1374		break;
1375	}
1376
1377	/* refuse based on configuration.  XXX ICMP6_NI_REFUSED? */
1378	switch (qtype) {
1379	case NI_QTYPE_FQDN:
1380		if ((icmp6_nodeinfo & 1) == 0)
1381			goto bad;
1382		break;
1383	case NI_QTYPE_NODEADDR:
1384	case NI_QTYPE_IPV4ADDR:
1385		if ((icmp6_nodeinfo & 2) == 0)
1386			goto bad;
1387		break;
1388	}
1389
1390	/* guess reply length */
1391	switch (qtype) {
1392	case NI_QTYPE_NOOP:
1393		break;		/* no reply data */
1394	case NI_QTYPE_SUPTYPES:
1395		replylen += sizeof(u_int32_t);
1396		break;
1397	case NI_QTYPE_FQDN:
1398		/* will append an mbuf */
1399		replylen += offsetof(struct ni_reply_fqdn, ni_fqdn_namelen);
1400		break;
1401	case NI_QTYPE_NODEADDR:
1402		addrs = ni6_addrs(ni6, &ifp, subj, &psref);
1403		replylen += addrs *
1404		    (sizeof(struct in6_addr) + sizeof(u_int32_t));
1405		if (replylen > MCLBYTES)
1406			replylen = MCLBYTES; /* XXX: will truncate pkt later */
1407		break;
1408	case NI_QTYPE_IPV4ADDR:
1409		/* unsupported - should respond with unknown Qtype? */
1410		goto bad;
1411	default:
1412		/*
1413		 * XXX: We must return a reply with the ICMP6 code
1414		 * `unknown Qtype' in this case.  However we regard the case
1415		 * as an FQDN query for backward compatibility.
1416		 * Older versions set a random value to this field,
1417		 * so it rarely varies in the defined qtypes.
1418		 * But the mechanism is not reliable...
1419		 * maybe we should obsolete older versions.
1420		 */
1421		qtype = NI_QTYPE_FQDN;
1422		/* will append an mbuf */
1423		replylen += offsetof(struct ni_reply_fqdn, ni_fqdn_namelen);
1424		oldfqdn++;
1425		break;
1426	}
1427
1428	/* allocate an mbuf to reply. */
1429	MGETHDR(n, M_DONTWAIT, m->m_type);
1430	if (n == NULL) {
1431		goto bad;
1432	}
1433	m_move_pkthdr(n, m);
1434	if (replylen > MHLEN) {
1435		if (replylen > MCLBYTES) {
1436			/*
1437			 * XXX: should we try to allocate more? But MCLBYTES
1438			 * is probably much larger than IPV6_MMTU...
1439			 */
1440			goto bad;
1441		}
1442		MCLGET(n, M_DONTWAIT);
1443		if ((n->m_flags & M_EXT) == 0) {
1444			goto bad;
1445		}
1446	}
1447	n->m_pkthdr.len = n->m_len = replylen;
1448
1449	/* copy mbuf header and IPv6 + Node Information base headers */
1450	bcopy(mtod(m, void *), mtod(n, void *), sizeof(struct ip6_hdr));
1451	nni6 = (struct icmp6_nodeinfo *)(mtod(n, struct ip6_hdr *) + 1);
1452	bcopy((void *)ni6, (void *)nni6, sizeof(struct icmp6_nodeinfo));
1453
1454	/* qtype dependent procedure */
1455	switch (qtype) {
1456	case NI_QTYPE_NOOP:
1457		nni6->ni_code = ICMP6_NI_SUCCESS;
1458		nni6->ni_flags = 0;
1459		break;
1460	case NI_QTYPE_SUPTYPES:
1461	{
1462		u_int32_t v;
1463		nni6->ni_code = ICMP6_NI_SUCCESS;
1464		nni6->ni_flags = htons(0x0000);	/* raw bitmap */
1465		/* supports NOOP, SUPTYPES, FQDN, and NODEADDR */
1466		v = (u_int32_t)htonl(0x0000000f);
1467		memcpy(nni6 + 1, &v, sizeof(u_int32_t));
1468		break;
1469	}
1470	case NI_QTYPE_FQDN:
1471		nni6->ni_code = ICMP6_NI_SUCCESS;
1472		fqdn = (struct ni_reply_fqdn *)(mtod(n, char *) +
1473		    sizeof(struct ip6_hdr) + sizeof(struct icmp6_nodeinfo));
1474		nni6->ni_flags = 0; /* XXX: meaningless TTL */
1475		fqdn->ni_fqdn_ttl = 0;	/* ditto. */
1476		/*
1477		 * XXX do we really have FQDN in variable "hostname"?
1478		 */
1479		n->m_next = ni6_nametodns(hostname, hostnamelen, oldfqdn);
1480		if (n->m_next == NULL)
1481			goto bad;
1482		/* XXX we assume that n->m_next is not a chain */
1483		if (n->m_next->m_next != NULL)
1484			goto bad;
1485		n->m_pkthdr.len += n->m_next->m_len;
1486		break;
1487	case NI_QTYPE_NODEADDR:
1488	{
1489		int lenlim, copied;
1490
1491		nni6->ni_code = ICMP6_NI_SUCCESS;
1492		n->m_pkthdr.len = n->m_len = sizeof(struct ip6_hdr) +
1493		    sizeof(struct icmp6_nodeinfo);
1494		lenlim = M_TRAILINGSPACE(n);
1495		copied = ni6_store_addrs(ni6, nni6, ifp, lenlim);
1496		if_put(ifp, &psref);
1497		ifp = NULL;
1498		/* update mbuf length */
1499		n->m_pkthdr.len = n->m_len = sizeof(struct ip6_hdr) +
1500		    sizeof(struct icmp6_nodeinfo) + copied;
1501		break;
1502	}
1503	default:
1504		panic("%s: impossible", __func__);
1505		break;
1506	}
1507
1508	nni6->ni_type = ICMP6_NI_REPLY;
1509	m_freem(m);
1510	return n;
1511
1512bad:
1513	if_put(ifp, &psref);
1514	m_freem(m);
1515	if (n)
1516		m_freem(n);
1517	return NULL;
1518}
1519
1520#define isupper(x) ('A' <= (x) && (x) <= 'Z')
1521#define isalpha(x) (('A' <= (x) && (x) <= 'Z') || ('a' <= (x) && (x) <= 'z'))
1522#define isalnum(x) (isalpha(x) || ('0' <= (x) && (x) <= '9'))
1523#define tolower(x) (isupper(x) ? (x) + 'a' - 'A' : (x))
1524
1525/*
1526 * make a mbuf with DNS-encoded string.  no compression support.
1527 *
1528 * XXX names with less than 2 dots (like "foo" or "foo.section") will be
1529 * treated as truncated name (two \0 at the end).  this is a wild guess.
1530 *
1531 * old - return pascal string if non-zero
1532 */
1533static struct mbuf *
1534ni6_nametodns(const char *name, int namelen, int old)
1535{
1536	struct mbuf *m;
1537	char *cp, *ep;
1538	const char *p, *q;
1539	int i, len, nterm;
1540
1541	if (old)
1542		len = namelen + 1;
1543	else
1544		len = MCLBYTES;
1545
1546	/* because MAXHOSTNAMELEN is usually 256, we use cluster mbuf */
1547	MGET(m, M_DONTWAIT, MT_DATA);
1548	if (m && len > MLEN) {
1549		MCLGET(m, M_DONTWAIT);
1550		if ((m->m_flags & M_EXT) == 0)
1551			goto fail;
1552	}
1553	if (!m)
1554		goto fail;
1555	m->m_next = NULL;
1556
1557	if (old) {
1558		m->m_len = len;
1559		*mtod(m, char *) = namelen;
1560		memcpy(mtod(m, char *) + 1, name, namelen);
1561		return m;
1562	} else {
1563		m->m_len = 0;
1564		cp = mtod(m, char *);
1565		ep = mtod(m, char *) + M_TRAILINGSPACE(m);
1566
1567		/* if not certain about my name, return empty buffer */
1568		if (namelen == 0)
1569			return m;
1570
1571		/*
1572		 * guess if it looks like shortened hostname, or FQDN.
1573		 * shortened hostname needs two trailing "\0".
1574		 */
1575		i = 0;
1576		for (p = name; p < name + namelen; p++) {
1577			if (*p == '.')
1578				i++;
1579		}
1580		if (i < 2)
1581			nterm = 2;
1582		else
1583			nterm = 1;
1584
1585		p = name;
1586		while (cp < ep && p < name + namelen) {
1587			i = 0;
1588			for (q = p; q < name + namelen && *q && *q != '.'; q++)
1589				i++;
1590			/* result does not fit into mbuf */
1591			if (cp + i + 1 >= ep)
1592				goto fail;
1593			/*
1594			 * DNS label length restriction, RFC1035 page 8.
1595			 * "i == 0" case is included here to avoid returning
1596			 * 0-length label on "foo..bar".
1597			 */
1598			if (i <= 0 || i >= 64)
1599				goto fail;
1600			*cp++ = i;
1601			if (!isalpha(p[0]) || !isalnum(p[i - 1]))
1602				goto fail;
1603			while (i > 0) {
1604				if (!isalnum(*p) && *p != '-')
1605					goto fail;
1606				if (isupper(*p)) {
1607					*cp++ = tolower(*p);
1608					p++;
1609				} else
1610					*cp++ = *p++;
1611				i--;
1612			}
1613			p = q;
1614			if (p < name + namelen && *p == '.')
1615				p++;
1616		}
1617		/* termination */
1618		if (cp + nterm >= ep)
1619			goto fail;
1620		while (nterm-- > 0)
1621			*cp++ = '\0';
1622		m->m_len = cp - mtod(m, char *);
1623		return m;
1624	}
1625
1626	panic("should not reach here");
1627	/* NOTREACHED */
1628
1629fail:
1630	if (m)
1631		m_freem(m);
1632	return NULL;
1633}
1634
1635/*
1636 * check if two DNS-encoded string matches.  takes care of truncated
1637 * form (with \0\0 at the end).  no compression support.
1638 * XXX upper/lowercase match (see RFC2065)
1639 */
1640static int
1641ni6_dnsmatch(const char *a, int alen, const char *b, int blen)
1642{
1643	const char *a0, *b0;
1644	int l;
1645
1646	/* simplest case - need validation? */
1647	if (alen == blen && memcmp(a, b, alen) == 0)
1648		return 1;
1649
1650	a0 = a;
1651	b0 = b;
1652
1653	/* termination is mandatory */
1654	if (alen < 2 || blen < 2)
1655		return 0;
1656	if (a0[alen - 1] != '\0' || b0[blen - 1] != '\0')
1657		return 0;
1658	alen--;
1659	blen--;
1660
1661	while (a - a0 < alen && b - b0 < blen) {
1662		if (a - a0 + 1 > alen || b - b0 + 1 > blen)
1663			return 0;
1664
1665		if ((signed char)a[0] < 0 || (signed char)b[0] < 0)
1666			return 0;
1667		/* we don't support compression yet */
1668		if (a[0] >= 64 || b[0] >= 64)
1669			return 0;
1670
1671		/* truncated case */
1672		if (a[0] == 0 && a - a0 == alen - 1)
1673			return 1;
1674		if (b[0] == 0 && b - b0 == blen - 1)
1675			return 1;
1676		if (a[0] == 0 || b[0] == 0)
1677			return 0;
1678
1679		if (a[0] != b[0])
1680			return 0;
1681		l = a[0];
1682		if (a - a0 + 1 + l > alen || b - b0 + 1 + l > blen)
1683			return 0;
1684		if (memcmp(a + 1, b + 1, l) != 0)
1685			return 0;
1686
1687		a += 1 + l;
1688		b += 1 + l;
1689	}
1690
1691	if (a - a0 == alen && b - b0 == blen)
1692		return 1;
1693	else
1694		return 0;
1695}
1696
1697/*
1698 * calculate the number of addresses to be returned in the node info reply.
1699 */
1700static int
1701ni6_addrs(struct icmp6_nodeinfo *ni6, struct ifnet **ifpp, char *subj,
1702    struct psref *psref)
1703{
1704	struct ifnet *ifp;
1705	struct in6_ifaddr *ia6;
1706	struct ifaddr *ifa;
1707	struct sockaddr_in6 *subj_ip6 = NULL; /* XXX pedant */
1708	int addrs = 0, addrsofif, iffound = 0;
1709	int niflags = ni6->ni_flags;
1710	int s;
1711
1712	if ((niflags & NI_NODEADDR_FLAG_ALL) == 0) {
1713		switch (ni6->ni_code) {
1714		case ICMP6_NI_SUBJ_IPV6:
1715			if (subj == NULL) /* must be impossible... */
1716				return 0;
1717			subj_ip6 = (struct sockaddr_in6 *)subj;
1718			break;
1719		default:
1720			/*
1721			 * XXX: we only support IPv6 subject address for
1722			 * this Qtype.
1723			 */
1724			return 0;
1725		}
1726	}
1727
1728	s = pserialize_read_enter();
1729	IFNET_READER_FOREACH(ifp) {
1730		addrsofif = 0;
1731		IFADDR_READER_FOREACH(ifa, ifp) {
1732			if (ifa->ifa_addr->sa_family != AF_INET6)
1733				continue;
1734			ia6 = (struct in6_ifaddr *)ifa;
1735
1736			if ((niflags & NI_NODEADDR_FLAG_ALL) == 0 &&
1737			    IN6_ARE_ADDR_EQUAL(&subj_ip6->sin6_addr,
1738			     &ia6->ia_addr.sin6_addr))
1739				iffound = 1;
1740
1741			/*
1742			 * IPv4-mapped addresses can only be returned by a
1743			 * Node Information proxy, since they represent
1744			 * addresses of IPv4-only nodes, which perforce do
1745			 * not implement this protocol.
1746			 * [icmp-name-lookups-07, Section 5.4]
1747			 * So we don't support NI_NODEADDR_FLAG_COMPAT in
1748			 * this function at this moment.
1749			 */
1750
1751			/* What do we have to do about ::1? */
1752			switch (in6_addrscope(&ia6->ia_addr.sin6_addr)) {
1753			case IPV6_ADDR_SCOPE_LINKLOCAL:
1754				if ((niflags & NI_NODEADDR_FLAG_LINKLOCAL) == 0)
1755					continue;
1756				break;
1757			case IPV6_ADDR_SCOPE_SITELOCAL:
1758				if ((niflags & NI_NODEADDR_FLAG_SITELOCAL) == 0)
1759					continue;
1760				break;
1761			case IPV6_ADDR_SCOPE_GLOBAL:
1762				if ((niflags & NI_NODEADDR_FLAG_GLOBAL) == 0)
1763					continue;
1764				break;
1765			default:
1766				continue;
1767			}
1768
1769			/*
1770			 * check if anycast is okay.
1771			 * XXX: just experimental.  not in the spec.
1772			 */
1773			if ((ia6->ia6_flags & IN6_IFF_ANYCAST) != 0 &&
1774			    (niflags & NI_NODEADDR_FLAG_ANYCAST) == 0)
1775				continue; /* we need only unicast addresses */
1776
1777			addrsofif++; /* count the address */
1778		}
1779		if (iffound) {
1780			if_acquire(ifp, psref);
1781			pserialize_read_exit(s);
1782			*ifpp = ifp;
1783			return addrsofif;
1784		}
1785
1786		addrs += addrsofif;
1787	}
1788	pserialize_read_exit(s);
1789
1790	return addrs;
1791}
1792
1793static int
1794ni6_store_addrs(struct icmp6_nodeinfo *ni6,
1795	struct icmp6_nodeinfo *nni6, struct ifnet *ifp0,
1796	int resid)
1797{
1798	struct ifnet *ifp;
1799	struct in6_ifaddr *ia6;
1800	struct ifaddr *ifa;
1801	struct ifnet *ifp_dep = NULL;
1802	int copied = 0, allow_deprecated = 0;
1803	u_char *cp = (u_char *)(nni6 + 1);
1804	int niflags = ni6->ni_flags;
1805	u_int32_t ltime;
1806	int s;
1807
1808	if (ifp0 == NULL && !(niflags & NI_NODEADDR_FLAG_ALL))
1809		return 0;	/* needless to copy */
1810
1811	s = pserialize_read_enter();
1812	ifp = ifp0 ? ifp0 : IFNET_READER_FIRST();
1813again:
1814
1815	for (; ifp; ifp = IFNET_READER_NEXT(ifp))
1816	{
1817		IFADDR_READER_FOREACH(ifa, ifp) {
1818			if (ifa->ifa_addr->sa_family != AF_INET6)
1819				continue;
1820			ia6 = (struct in6_ifaddr *)ifa;
1821
1822			if ((ia6->ia6_flags & IN6_IFF_DEPRECATED) != 0 &&
1823			    allow_deprecated == 0) {
1824				/*
1825				 * prefererred address should be put before
1826				 * deprecated addresses.
1827				 */
1828
1829				/* record the interface for later search */
1830				if (ifp_dep == NULL)
1831					ifp_dep = ifp;
1832
1833				continue;
1834			}
1835			else if ((ia6->ia6_flags & IN6_IFF_DEPRECATED) == 0 &&
1836				 allow_deprecated != 0)
1837				continue; /* we now collect deprecated addrs */
1838
1839			/* What do we have to do about ::1? */
1840			switch (in6_addrscope(&ia6->ia_addr.sin6_addr)) {
1841			case IPV6_ADDR_SCOPE_LINKLOCAL:
1842				if ((niflags & NI_NODEADDR_FLAG_LINKLOCAL) == 0)
1843					continue;
1844				break;
1845			case IPV6_ADDR_SCOPE_SITELOCAL:
1846				if ((niflags & NI_NODEADDR_FLAG_SITELOCAL) == 0)
1847					continue;
1848				break;
1849			case IPV6_ADDR_SCOPE_GLOBAL:
1850				if ((niflags & NI_NODEADDR_FLAG_GLOBAL) == 0)
1851					continue;
1852				break;
1853			default:
1854				continue;
1855			}
1856
1857			/*
1858			 * check if anycast is okay.
1859			 * XXX: just experimental.  not in the spec.
1860			 */
1861			if ((ia6->ia6_flags & IN6_IFF_ANYCAST) != 0 &&
1862			    (niflags & NI_NODEADDR_FLAG_ANYCAST) == 0)
1863				continue;
1864
1865			/* now we can copy the address */
1866			if (resid < sizeof(struct in6_addr) +
1867			    sizeof(u_int32_t)) {
1868				/*
1869				 * We give up much more copy.
1870				 * Set the truncate flag and return.
1871				 */
1872				nni6->ni_flags |= NI_NODEADDR_FLAG_TRUNCATE;
1873				goto out;
1874			}
1875
1876			/*
1877			 * Set the TTL of the address.
1878			 * The TTL value should be one of the following
1879			 * according to the specification:
1880			 *
1881			 * 1. The remaining lifetime of a DHCP lease on the
1882			 *    address, or
1883			 * 2. The remaining Valid Lifetime of a prefix from
1884			 *    which the address was derived through Stateless
1885			 *    Autoconfiguration.
1886			 *
1887			 * Note that we currently do not support stateful
1888			 * address configuration by DHCPv6, so the former
1889			 * case can't happen.
1890			 *
1891			 * TTL must be 2^31 > TTL >= 0.
1892			 */
1893			if (ia6->ia6_lifetime.ia6t_expire == 0)
1894				ltime = ND6_INFINITE_LIFETIME;
1895			else {
1896				if (ia6->ia6_lifetime.ia6t_expire >
1897				    time_uptime)
1898					ltime = ia6->ia6_lifetime.ia6t_expire -
1899					    time_uptime;
1900				else
1901					ltime = 0;
1902			}
1903			if (ltime > 0x7fffffff)
1904				ltime = 0x7fffffff;
1905			ltime = htonl(ltime);
1906
1907			memcpy(cp, &ltime, sizeof(u_int32_t));
1908			cp += sizeof(u_int32_t);
1909
1910			/* copy the address itself */
1911			bcopy(&ia6->ia_addr.sin6_addr, cp,
1912			      sizeof(struct in6_addr));
1913			in6_clearscope((struct in6_addr *)cp); /* XXX */
1914			cp += sizeof(struct in6_addr);
1915
1916			resid -= (sizeof(struct in6_addr) + sizeof(u_int32_t));
1917			copied += (sizeof(struct in6_addr) + sizeof(u_int32_t));
1918		}
1919		if (ifp0)	/* we need search only on the specified IF */
1920			break;
1921	}
1922
1923	if (allow_deprecated == 0 && ifp_dep != NULL) {
1924		ifp = ifp_dep;
1925		allow_deprecated = 1;
1926
1927		goto again;
1928	}
1929out:
1930	pserialize_read_exit(s);
1931	return copied;
1932}
1933
1934/*
1935 * XXX almost dup'ed code with rip6_input.
1936 */
1937static int
1938icmp6_rip6_input(struct mbuf **mp, int off)
1939{
1940	struct mbuf *m = *mp;
1941	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
1942	struct inpcb_hdr *inph;
1943	struct in6pcb *in6p;
1944	struct in6pcb *last = NULL;
1945	struct sockaddr_in6 rip6src;
1946	struct icmp6_hdr *icmp6;
1947	struct mbuf *n, *opts = NULL;
1948
1949	IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off, sizeof(*icmp6));
1950	if (icmp6 == NULL) {
1951		/* m is already reclaimed */
1952		return IPPROTO_DONE;
1953	}
1954
1955	/*
1956	 * XXX: the address may have embedded scope zone ID, which should be
1957	 * hidden from applications.
1958	 */
1959	sockaddr_in6_init(&rip6src, &ip6->ip6_src, 0, 0, 0);
1960	if (sa6_recoverscope(&rip6src)) {
1961		m_freem(m);
1962		return IPPROTO_DONE;
1963	}
1964
1965	TAILQ_FOREACH(inph, &raw6cbtable.inpt_queue, inph_queue) {
1966		in6p = (struct in6pcb *)inph;
1967		if (in6p->in6p_af != AF_INET6)
1968			continue;
1969		if (in6p->in6p_ip6.ip6_nxt != IPPROTO_ICMPV6)
1970			continue;
1971		if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr) &&
1972		    !IN6_ARE_ADDR_EQUAL(&in6p->in6p_laddr, &ip6->ip6_dst))
1973			continue;
1974		if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr) &&
1975		    !IN6_ARE_ADDR_EQUAL(&in6p->in6p_faddr, &ip6->ip6_src))
1976			continue;
1977		if (in6p->in6p_icmp6filt &&
1978		    ICMP6_FILTER_WILLBLOCK(icmp6->icmp6_type,
1979		    in6p->in6p_icmp6filt))
1980			continue;
1981
1982		if (last == NULL) {
1983			;
1984		}
1985#ifdef IPSEC
1986		else if (ipsec_used && ipsec_in_reject(m, last)) {
1987			/* do not inject data into pcb */
1988		}
1989#endif
1990		else if ((n = m_copypacket(m, M_DONTWAIT)) != NULL) {
1991			if (last->in6p_flags & IN6P_CONTROLOPTS)
1992				ip6_savecontrol(last, &opts, ip6, n);
1993			/* strip intermediate headers */
1994			m_adj(n, off);
1995			if (sbappendaddr(&last->in6p_socket->so_rcv,
1996			    sin6tosa(&rip6src), n, opts) == 0) {
1997				soroverflow(last->in6p_socket);
1998				m_freem(n);
1999				if (opts)
2000					m_freem(opts);
2001			} else {
2002				sorwakeup(last->in6p_socket);
2003			}
2004			opts = NULL;
2005		}
2006
2007		last = in6p;
2008	}
2009
2010#ifdef IPSEC
2011	if (ipsec_used && last && ipsec_in_reject(m, last)) {
2012		m_freem(m);
2013		IP6_STATDEC(IP6_STAT_DELIVERED);
2014		/* do not inject data into pcb */
2015	} else
2016#endif
2017	if (last) {
2018		if (last->in6p_flags & IN6P_CONTROLOPTS)
2019			ip6_savecontrol(last, &opts, ip6, m);
2020		/* strip intermediate headers */
2021		m_adj(m, off);
2022		if (sbappendaddr(&last->in6p_socket->so_rcv,
2023		    sin6tosa(&rip6src), m, opts) == 0) {
2024			soroverflow(last->in6p_socket);
2025			m_freem(m);
2026			if (opts)
2027				m_freem(opts);
2028		} else {
2029			sorwakeup(last->in6p_socket);
2030		}
2031	} else {
2032		m_freem(m);
2033		IP6_STATDEC(IP6_STAT_DELIVERED);
2034	}
2035	return IPPROTO_DONE;
2036}
2037
2038/*
2039 * Reflect the ip6 packet back to the source.
2040 * OFF points to the icmp6 header, counted from the top of the mbuf.
2041 *
2042 * Note: RFC 1885 required that an echo reply should be truncated if it
2043 * did not fit in with (return) path MTU, and KAME code supported the
2044 * behavior.  However, as a clarification after the RFC, this limitation
2045 * was removed in a revised version of the spec, RFC 2463.  We had kept the
2046 * old behavior, with a (non-default) ifdef block, while the new version of
2047 * the spec was an internet-draft status, and even after the new RFC was
2048 * published.  But it would rather make sense to clean the obsoleted part
2049 * up, and to make the code simpler at this stage.
2050 */
2051static void
2052icmp6_reflect(struct mbuf *m, size_t off)
2053{
2054	struct ip6_hdr *ip6;
2055	struct icmp6_hdr *icmp6;
2056	const struct in6_ifaddr *ia;
2057	const struct ip6aux *ip6a;
2058	int plen;
2059	int type, code;
2060	struct ifnet *outif = NULL;
2061	struct in6_addr origdst;
2062	struct ifnet *rcvif;
2063	int s;
2064	bool ip6_src_filled = false;
2065	int flags;
2066
2067	/* too short to reflect */
2068	if (off < sizeof(struct ip6_hdr)) {
2069		nd6log(LOG_DEBUG,
2070		    "sanity fail: off=%lx, sizeof(ip6)=%lx in %s:%d\n",
2071		    (u_long)off, (u_long)sizeof(struct ip6_hdr),
2072		    __FILE__, __LINE__);
2073		goto bad;
2074	}
2075
2076	/*
2077	 * If there are extra headers between IPv6 and ICMPv6, strip
2078	 * off that header first.
2079	 */
2080	CTASSERT(sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr) <= MHLEN);
2081	if (off > sizeof(struct ip6_hdr)) {
2082		size_t l;
2083		struct ip6_hdr nip6;
2084
2085		l = off - sizeof(struct ip6_hdr);
2086		m_copydata(m, 0, sizeof(nip6), (void *)&nip6);
2087		m_adj(m, l);
2088		l = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr);
2089		if (m->m_len < l) {
2090			if ((m = m_pullup(m, l)) == NULL)
2091				return;
2092		}
2093		memcpy(mtod(m, void *), (void *)&nip6, sizeof(nip6));
2094	} else {
2095		size_t l = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr);
2096		if (m->m_len < l) {
2097			if ((m = m_pullup(m, l)) == NULL)
2098				return;
2099		}
2100	}
2101
2102	plen = m->m_pkthdr.len - sizeof(struct ip6_hdr);
2103	ip6 = mtod(m, struct ip6_hdr *);
2104	ip6->ip6_nxt = IPPROTO_ICMPV6;
2105	icmp6 = (struct icmp6_hdr *)(ip6 + 1);
2106	type = icmp6->icmp6_type; /* keep type for statistics */
2107	code = icmp6->icmp6_code; /* ditto. */
2108
2109	origdst = ip6->ip6_dst;
2110	/*
2111	 * ip6_input() drops a packet if its src is multicast.
2112	 * So, the src is never multicast.
2113	 */
2114	ip6->ip6_dst = ip6->ip6_src;
2115
2116	/*
2117	 * If the incoming packet was addressed directly to us (i.e. unicast),
2118	 * use dst as the src for the reply.
2119	 * The IN6_IFF_NOTREADY case should be VERY rare, but is possible
2120	 * (for example) when we encounter an error while forwarding procedure
2121	 * destined to a duplicated address of ours.
2122	 * Note that ip6_getdstifaddr() may fail if we are in an error handling
2123	 * procedure of an outgoing packet of our own, in which case we need
2124	 * to search in the ifaddr list.
2125	 */
2126	if (IN6_IS_ADDR_MULTICAST(&origdst)) {
2127		;
2128	} else if ((ip6a = ip6_getdstifaddr(m)) != NULL) {
2129		if ((ip6a->ip6a_flags &
2130		     (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY)) == 0) {
2131			ip6->ip6_src = ip6a->ip6a_src;
2132			ip6_src_filled = true;
2133		}
2134	} else {
2135		union {
2136			struct sockaddr_in6 sin6;
2137			struct sockaddr sa;
2138		} u;
2139		int _s;
2140		struct ifaddr *ifa;
2141
2142		sockaddr_in6_init(&u.sin6, &origdst, 0, 0, 0);
2143
2144		_s = pserialize_read_enter();
2145		ifa = ifa_ifwithaddr(&u.sa);
2146
2147		if (ifa != NULL) {
2148			ia = ifatoia6(ifa);
2149			if ((ia->ia6_flags &
2150				 (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY)) == 0) {
2151				ip6->ip6_src = ia->ia_addr.sin6_addr;
2152				ip6_src_filled = true;
2153			}
2154		}
2155		pserialize_read_exit(_s);
2156	}
2157
2158	if (!ip6_src_filled) {
2159		int e;
2160		struct sockaddr_in6 sin6;
2161		struct route ro;
2162
2163		/*
2164		 * This case matches to multicasts, our anycast, or unicasts
2165		 * that we do not own.  Select a source address based on the
2166		 * source address of the erroneous packet.
2167		 */
2168		/* zone ID should be embedded */
2169		sockaddr_in6_init(&sin6, &ip6->ip6_dst, 0, 0, 0);
2170
2171		memset(&ro, 0, sizeof(ro));
2172		e = in6_selectsrc(&sin6, NULL, NULL, &ro, NULL, NULL, NULL,
2173		    &ip6->ip6_src);
2174		rtcache_free(&ro);
2175		if (e != 0) {
2176			char ip6buf[INET6_ADDRSTRLEN];
2177			nd6log(LOG_DEBUG,
2178			    "source can't be determined: "
2179			    "dst=%s, error=%d\n",
2180			    IN6_PRINT(ip6buf, &sin6.sin6_addr), e);
2181			goto bad;
2182		}
2183	}
2184
2185	ip6->ip6_flow = 0;
2186	ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
2187	ip6->ip6_vfc |= IPV6_VERSION;
2188	ip6->ip6_nxt = IPPROTO_ICMPV6;
2189	rcvif = m_get_rcvif(m, &s);
2190	if (rcvif) {
2191		/* XXX: This may not be the outgoing interface */
2192		ip6->ip6_hlim = ND_IFINFO(rcvif)->chlim;
2193	} else {
2194		ip6->ip6_hlim = ip6_defhlim;
2195	}
2196	m_put_rcvif(rcvif, &s);
2197
2198	m->m_pkthdr.csum_flags = 0;
2199	icmp6->icmp6_cksum = 0;
2200	icmp6->icmp6_cksum = in6_cksum(m, IPPROTO_ICMPV6,
2201	    sizeof(struct ip6_hdr), plen);
2202
2203	/*
2204	 * XXX option handling
2205	 */
2206
2207	m->m_flags &= ~(M_BCAST|M_MCAST);
2208
2209	/*
2210	 * Note for icmp6_reflect_pmtu == false
2211	 * To avoid a "too big" situation at an intermediate router
2212	 * and the path MTU discovery process, specify the IPV6_MINMTU flag.
2213	 * Note that only echo and node information replies are affected,
2214	 * since the length of ICMP6 errors is limited to the minimum MTU.
2215	 */
2216	flags = icmp6_reflect_pmtu ? 0 : IPV6_MINMTU;
2217	if (ip6_output(m, NULL, NULL, flags, NULL, NULL, &outif) != 0 &&
2218	    outif)
2219		icmp6_ifstat_inc(outif, ifs6_out_error);
2220	if (outif)
2221		icmp6_ifoutstat_inc(outif, type, code);
2222
2223	return;
2224
2225 bad:
2226	m_freem(m);
2227	return;
2228}
2229
2230static const char *
2231icmp6_redirect_diag(char *buf, size_t buflen, struct in6_addr *src6,
2232    struct in6_addr *dst6,  struct in6_addr *tgt6)
2233{
2234	char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
2235	char ip6buft[INET6_ADDRSTRLEN];
2236
2237	snprintf(buf, buflen, "(src=%s dst=%s tgt=%s)",
2238	    IN6_PRINT(ip6bufs, src6), IN6_PRINT(ip6bufd, dst6),
2239	    IN6_PRINT(ip6buft, tgt6));
2240	return buf;
2241}
2242
2243static void
2244icmp6_redirect_input(struct mbuf *m, int off)
2245{
2246	struct ifnet *ifp;
2247	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
2248	struct nd_redirect *nd_rd;
2249	int icmp6len = m->m_pkthdr.len - off;
2250	char *lladdr = NULL;
2251	int lladdrlen = 0;
2252	struct rtentry *rt = NULL;
2253	int is_router;
2254	int is_onlink;
2255	struct in6_addr src6 = ip6->ip6_src;
2256	struct in6_addr redtgt6;
2257	struct in6_addr reddst6;
2258	union nd_opts ndopts;
2259	struct psref psref;
2260	char ip6buf[INET6_ADDRSTRLEN];
2261	char diagbuf[256];
2262
2263	ifp = m_get_rcvif_psref(m, &psref);
2264	if (ifp == NULL)
2265		goto freeit;
2266
2267	/* XXX if we are router, we don't update route by icmp6 redirect */
2268	if (ip6_forwarding)
2269		goto freeit;
2270	if (!icmp6_rediraccept)
2271		goto freeit;
2272
2273	IP6_EXTHDR_GET(nd_rd, struct nd_redirect *, m, off, icmp6len);
2274	if (nd_rd == NULL) {
2275		ICMP6_STATINC(ICMP6_STAT_TOOSHORT);
2276		m_put_rcvif_psref(ifp, &psref);
2277		return;
2278	}
2279	redtgt6 = nd_rd->nd_rd_target;
2280	reddst6 = nd_rd->nd_rd_dst;
2281
2282	if (in6_setscope(&redtgt6, ifp, NULL) ||
2283	    in6_setscope(&reddst6, ifp, NULL)) {
2284		goto freeit;
2285	}
2286
2287	/* validation */
2288	if (!IN6_IS_ADDR_LINKLOCAL(&src6)) {
2289		nd6log(LOG_ERR,
2290		    "ICMP6 redirect sent from %s rejected; "
2291		    "must be from linklocal\n", IN6_PRINT(ip6buf, &src6));
2292		goto bad;
2293	}
2294	if (ip6->ip6_hlim != 255) {
2295		nd6log(LOG_ERR,
2296		    "ICMP6 redirect sent from %s rejected; "
2297		    "hlim=%d (must be 255)\n",
2298		    IN6_PRINT(ip6buf, &src6), ip6->ip6_hlim);
2299		goto bad;
2300	}
2301
2302    {
2303	/* ip6->ip6_src must be equal to gw for icmp6->icmp6_reddst */
2304	struct sockaddr_in6 sin6;
2305	struct in6_addr *gw6;
2306
2307	sockaddr_in6_init(&sin6, &reddst6, 0, 0, 0);
2308	rt = rtalloc1(sin6tosa(&sin6), 0);
2309	if (rt) {
2310		if (rt->rt_gateway == NULL ||
2311		    rt->rt_gateway->sa_family != AF_INET6) {
2312			nd6log(LOG_ERR,
2313			    "ICMP6 redirect rejected; no route "
2314			    "with inet6 gateway found for redirect dst: %s\n",
2315			    icmp6_redirect_diag(diagbuf, sizeof(diagbuf),
2316			    &src6, &reddst6, &redtgt6));
2317			rt_unref(rt);
2318			goto bad;
2319		}
2320
2321		gw6 = &(((struct sockaddr_in6 *)rt->rt_gateway)->sin6_addr);
2322		if (memcmp(&src6, gw6, sizeof(struct in6_addr)) != 0) {
2323			nd6log(LOG_ERR,
2324			    "ICMP6 redirect rejected; "
2325			    "not equal to gw-for-src=%s (must be same): %s\n",
2326			    IN6_PRINT(ip6buf, gw6),
2327			    icmp6_redirect_diag(diagbuf, sizeof(diagbuf),
2328			    &src6, &reddst6, &redtgt6));
2329			rt_unref(rt);
2330			goto bad;
2331		}
2332	} else {
2333		nd6log(LOG_ERR, "ICMP6 redirect rejected; "
2334		    "no route found for redirect dst: %s\n",
2335		    icmp6_redirect_diag(diagbuf, sizeof(diagbuf),
2336		    &src6, &reddst6, &redtgt6));
2337		goto bad;
2338	}
2339	rt_unref(rt);
2340	rt = NULL;
2341    }
2342
2343	if (IN6_IS_ADDR_MULTICAST(&reddst6)) {
2344		nd6log(LOG_ERR, "ICMP6 redirect rejected; "
2345		    "redirect dst must be unicast: %s\n",
2346		    icmp6_redirect_diag(diagbuf, sizeof(diagbuf),
2347		    &src6, &reddst6, &redtgt6));
2348		goto bad;
2349	}
2350
2351	is_router = is_onlink = 0;
2352	if (IN6_IS_ADDR_LINKLOCAL(&redtgt6))
2353		is_router = 1;	/* router case */
2354	if (memcmp(&redtgt6, &reddst6, sizeof(redtgt6)) == 0)
2355		is_onlink = 1;	/* on-link destination case */
2356	if (!is_router && !is_onlink) {
2357		nd6log(LOG_ERR, "ICMP6 redirect rejected; "
2358		    "neither router case nor onlink case: %s\n",
2359		    icmp6_redirect_diag(diagbuf, sizeof(diagbuf),
2360		    &src6, &reddst6, &redtgt6));
2361		goto bad;
2362	}
2363	/* validation passed */
2364
2365	icmp6len -= sizeof(*nd_rd);
2366	nd6_option_init(nd_rd + 1, icmp6len, &ndopts);
2367	if (nd6_options(&ndopts) < 0) {
2368		nd6log(LOG_INFO, "invalid ND option, rejected: %s\n",
2369		    icmp6_redirect_diag(diagbuf, sizeof(diagbuf),
2370		    &src6, &reddst6, &redtgt6));
2371		/* nd6_options have incremented stats */
2372		goto freeit;
2373	}
2374
2375	if (ndopts.nd_opts_tgt_lladdr) {
2376		lladdr = (char *)(ndopts.nd_opts_tgt_lladdr + 1);
2377		lladdrlen = ndopts.nd_opts_tgt_lladdr->nd_opt_len << 3;
2378	}
2379
2380	if (lladdr && ((ifp->if_addrlen + 2 + 7) & ~7) != lladdrlen) {
2381		nd6log(LOG_INFO, "lladdrlen mismatch for %s "
2382		    "(if %d, icmp6 packet %d): %s\n",
2383		    IN6_PRINT(ip6buf, &redtgt6),
2384		    ifp->if_addrlen, lladdrlen - 2,
2385		    icmp6_redirect_diag(diagbuf, sizeof(diagbuf),
2386		    &src6, &reddst6, &redtgt6));
2387		goto bad;
2388	}
2389
2390	/* RFC 2461 8.3 */
2391	nd6_cache_lladdr(ifp, &redtgt6, lladdr, lladdrlen, ND_REDIRECT,
2392	    is_onlink ? ND_REDIRECT_ONLINK : ND_REDIRECT_ROUTER);
2393
2394	m_put_rcvif_psref(ifp, &psref);
2395	ifp = NULL;
2396
2397	if (!is_onlink) {	/* better router case.  perform rtredirect. */
2398		/* perform rtredirect */
2399		struct sockaddr_in6 sdst;
2400		struct sockaddr_in6 sgw;
2401		struct sockaddr_in6 ssrc;
2402		unsigned long rtcount;
2403		struct rtentry *newrt = NULL;
2404
2405		/*
2406		 * do not install redirect route, if the number of entries
2407		 * is too much (> hiwat).  note that, the node (= host) will
2408		 * work just fine even if we do not install redirect route
2409		 * (there will be additional hops, though).
2410		 */
2411		mutex_enter(&icmp6_mtx);
2412		rtcount = rt_timer_count(icmp6_redirect_timeout_q);
2413		if (0 <= ip6_maxdynroutes && rtcount >= ip6_maxdynroutes) {
2414			mutex_exit(&icmp6_mtx);
2415			goto freeit;
2416		}
2417		if (0 <= icmp6_redirect_hiwat && rtcount > icmp6_redirect_hiwat) {
2418			mutex_exit(&icmp6_mtx);
2419			goto freeit;
2420		} else if (0 <= icmp6_redirect_lowat &&
2421		    rtcount > icmp6_redirect_lowat) {
2422			/*
2423			 * XXX nuke a victim, install the new one.
2424			 */
2425		}
2426
2427		memset(&sdst, 0, sizeof(sdst));
2428		memset(&sgw, 0, sizeof(sgw));
2429		memset(&ssrc, 0, sizeof(ssrc));
2430		sdst.sin6_family = sgw.sin6_family = ssrc.sin6_family = AF_INET6;
2431		sdst.sin6_len = sgw.sin6_len = ssrc.sin6_len =
2432		    sizeof(struct sockaddr_in6);
2433		bcopy(&redtgt6, &sgw.sin6_addr, sizeof(struct in6_addr));
2434		bcopy(&reddst6, &sdst.sin6_addr, sizeof(struct in6_addr));
2435		bcopy(&src6, &ssrc.sin6_addr, sizeof(struct in6_addr));
2436		rtredirect(sin6tosa(&sdst), sin6tosa(&sgw), NULL,
2437		    RTF_GATEWAY | RTF_HOST, sin6tosa(&ssrc), &newrt);
2438
2439		if (newrt) {
2440			(void)rt_timer_add(newrt, icmp6_redirect_timeout,
2441			    icmp6_redirect_timeout_q);
2442			rt_unref(newrt);
2443		}
2444		mutex_exit(&icmp6_mtx);
2445	}
2446	/* finally update cached route in each socket via pfctlinput */
2447	{
2448		struct sockaddr_in6 sdst;
2449
2450		sockaddr_in6_init(&sdst, &reddst6, 0, 0, 0);
2451		pfctlinput(PRC_REDIRECT_HOST, sin6tosa(&sdst));
2452#if defined(IPSEC)
2453		if (ipsec_used)
2454			key_sa_routechange(sin6tosa(&sdst));
2455#endif
2456	}
2457
2458freeit:
2459	if (ifp != NULL)
2460		m_put_rcvif_psref(ifp, &psref);
2461	m_freem(m);
2462	return;
2463
2464bad:
2465	m_put_rcvif_psref(ifp, &psref);
2466	ICMP6_STATINC(ICMP6_STAT_BADREDIRECT);
2467	m_freem(m);
2468}
2469
2470void
2471icmp6_redirect_output(struct mbuf *m0, struct rtentry *rt)
2472{
2473	struct ifnet *ifp;	/* my outgoing interface */
2474	struct in6_addr *ifp_ll6;
2475	struct in6_addr *nexthop;
2476	struct ip6_hdr *sip6;	/* m0 as struct ip6_hdr */
2477	struct mbuf *m = NULL;	/* newly allocated one */
2478	struct ip6_hdr *ip6;	/* m as struct ip6_hdr */
2479	struct nd_redirect *nd_rd;
2480	size_t maxlen;
2481	u_char *p;
2482	struct sockaddr_in6 src_sa;
2483
2484	icmp6_errcount(ICMP6_STAT_OUTERRHIST, ND_REDIRECT, 0);
2485
2486	/* if we are not router, we don't send icmp6 redirect */
2487	if (!ip6_forwarding)
2488		goto fail;
2489
2490	/* sanity check */
2491	KASSERT(m0 != NULL);
2492	KASSERT(rt != NULL);
2493
2494	ifp = rt->rt_ifp;
2495
2496	/*
2497	 * Address check:
2498	 *  the source address must identify a neighbor, and
2499	 *  the destination address must not be a multicast address
2500	 *  [RFC 2461, sec 8.2]
2501	 */
2502	sip6 = mtod(m0, struct ip6_hdr *);
2503	sockaddr_in6_init(&src_sa, &sip6->ip6_src, 0, 0, 0);
2504	if (nd6_is_addr_neighbor(&src_sa, ifp) == 0)
2505		goto fail;
2506	if (IN6_IS_ADDR_MULTICAST(&sip6->ip6_dst))
2507		goto fail;	/* what should we do here? */
2508
2509	/* rate limit */
2510	if (icmp6_ratelimit(&sip6->ip6_src, ND_REDIRECT, 0))
2511		goto fail;
2512
2513	/*
2514	 * Since we are going to append up to 1280 bytes (= IPV6_MMTU),
2515	 * we almost always ask for an mbuf cluster for simplicity.
2516	 * (MHLEN < IPV6_MMTU is almost always true)
2517	 */
2518	MGETHDR(m, M_DONTWAIT, MT_HEADER);
2519	if (m && IPV6_MMTU >= MHLEN) {
2520#if IPV6_MMTU >= MCLBYTES
2521		MEXTMALLOC(m, IPV6_MMTU, M_NOWAIT);
2522#else
2523		MCLGET(m, M_DONTWAIT);
2524#endif
2525	}
2526
2527	if (!m)
2528		goto fail;
2529	m_reset_rcvif(m);
2530	m->m_len = 0;
2531	maxlen = M_TRAILINGSPACE(m);
2532	maxlen = uimin(IPV6_MMTU, maxlen);
2533
2534	/* just for safety */
2535	if (maxlen < sizeof(struct ip6_hdr) + sizeof(struct nd_redirect) +
2536	    ((sizeof(struct nd_opt_hdr) + ifp->if_addrlen + 7) & ~7)) {
2537		goto fail;
2538	}
2539
2540	{
2541		/* get ip6 linklocal address for ifp(my outgoing interface). */
2542		struct in6_ifaddr *ia;
2543		int s = pserialize_read_enter();
2544		if ((ia = in6ifa_ifpforlinklocal(ifp,
2545						 IN6_IFF_NOTREADY|
2546						 IN6_IFF_ANYCAST)) == NULL) {
2547			pserialize_read_exit(s);
2548			goto fail;
2549		}
2550		ifp_ll6 = &ia->ia_addr.sin6_addr;
2551		pserialize_read_exit(s);
2552	}
2553
2554	/* get ip6 linklocal address for the router. */
2555	if (rt->rt_gateway && (rt->rt_flags & RTF_GATEWAY)) {
2556		struct sockaddr_in6 *sin6;
2557		sin6 = (struct sockaddr_in6 *)rt->rt_gateway;
2558		nexthop = &sin6->sin6_addr;
2559		if (!IN6_IS_ADDR_LINKLOCAL(nexthop))
2560			nexthop = NULL;
2561	} else
2562		nexthop = NULL;
2563
2564	/* ip6 */
2565	ip6 = mtod(m, struct ip6_hdr *);
2566	ip6->ip6_flow = 0;
2567	ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
2568	ip6->ip6_vfc |= IPV6_VERSION;
2569	/* ip6->ip6_plen will be set later */
2570	ip6->ip6_nxt = IPPROTO_ICMPV6;
2571	ip6->ip6_hlim = 255;
2572	/* ip6->ip6_src must be linklocal addr for my outgoing if. */
2573	bcopy(ifp_ll6, &ip6->ip6_src, sizeof(struct in6_addr));
2574	bcopy(&sip6->ip6_src, &ip6->ip6_dst, sizeof(struct in6_addr));
2575
2576	/* ND Redirect */
2577	nd_rd = (struct nd_redirect *)(ip6 + 1);
2578	nd_rd->nd_rd_type = ND_REDIRECT;
2579	nd_rd->nd_rd_code = 0;
2580	nd_rd->nd_rd_reserved = 0;
2581	if (rt->rt_flags & RTF_GATEWAY) {
2582		/*
2583		 * nd_rd->nd_rd_target must be a link-local address in
2584		 * better router cases.
2585		 */
2586		if (!nexthop)
2587			goto fail;
2588		bcopy(nexthop, &nd_rd->nd_rd_target,
2589		      sizeof(nd_rd->nd_rd_target));
2590		bcopy(&sip6->ip6_dst, &nd_rd->nd_rd_dst,
2591		      sizeof(nd_rd->nd_rd_dst));
2592	} else {
2593		/* make sure redtgt == reddst */
2594		nexthop = &sip6->ip6_dst;
2595		bcopy(&sip6->ip6_dst, &nd_rd->nd_rd_target,
2596		      sizeof(nd_rd->nd_rd_target));
2597		bcopy(&sip6->ip6_dst, &nd_rd->nd_rd_dst,
2598		      sizeof(nd_rd->nd_rd_dst));
2599	}
2600
2601	p = (u_char *)(nd_rd + 1);
2602
2603	{
2604		/* target lladdr option */
2605		struct llentry *ln = NULL;
2606		int len, pad;
2607		struct nd_opt_hdr *nd_opt;
2608		char *lladdr;
2609
2610		ln = nd6_lookup(nexthop, ifp, false);
2611		if (ln == NULL)
2612			goto nolladdropt;
2613		len = sizeof(*nd_opt) + ifp->if_addrlen;
2614		len = (len + 7) & ~7;	/* round by 8 */
2615		pad = len - (sizeof(*nd_opt) + ifp->if_addrlen);
2616
2617		/* safety check */
2618		if (len + (p - (u_char *)ip6) > maxlen) {
2619			LLE_RUNLOCK(ln);
2620			goto nolladdropt;
2621		}
2622
2623		if (ln->la_flags & LLE_VALID) {
2624			nd_opt = (struct nd_opt_hdr *)p;
2625			nd_opt->nd_opt_type = ND_OPT_TARGET_LINKADDR;
2626			nd_opt->nd_opt_len = len >> 3;
2627			lladdr = (char *)(nd_opt + 1);
2628			memcpy(lladdr, &ln->ll_addr, ifp->if_addrlen);
2629			memset(lladdr + ifp->if_addrlen, 0, pad);
2630			p += len;
2631		}
2632		LLE_RUNLOCK(ln);
2633	}
2634nolladdropt:
2635
2636	m->m_pkthdr.len = m->m_len = p - (u_char *)ip6;
2637
2638	/* just to be safe */
2639	if (m0->m_flags & M_DECRYPTED)
2640		goto noredhdropt;
2641	if (p - (u_char *)ip6 > maxlen)
2642		goto noredhdropt;
2643
2644	{
2645		/* redirected header option */
2646		int len;
2647		struct nd_opt_rd_hdr *nd_opt_rh;
2648
2649		/*
2650		 * compute the maximum size for icmp6 redirect header option.
2651		 * XXX room for auth header?
2652		 */
2653		len = maxlen - (p - (u_char *)ip6);
2654		len &= ~7;
2655
2656		if (len < sizeof(*nd_opt_rh)) {
2657			goto noredhdropt;
2658		}
2659
2660		/*
2661		 * Redirected header option spec (RFC2461 4.6.3) talks nothing
2662		 * about padding/truncate rule for the original IP packet.
2663		 * From the discussion on IPv6imp in Feb 1999,
2664		 * the consensus was:
2665		 * - "attach as much as possible" is the goal
2666		 * - pad if not aligned (original size can be guessed by
2667		 *   original ip6 header)
2668		 * Following code adds the padding if it is simple enough,
2669		 * and truncates if not.
2670		 */
2671		if (len - sizeof(*nd_opt_rh) < m0->m_pkthdr.len) {
2672			/* not enough room, truncate */
2673			m_adj(m0, (len - sizeof(*nd_opt_rh)) -
2674			    m0->m_pkthdr.len);
2675		} else {
2676			/*
2677			 * enough room, truncate if not aligned.
2678			 * we don't pad here for simplicity.
2679			 */
2680			int extra;
2681
2682			extra = m0->m_pkthdr.len % 8;
2683			if (extra) {
2684				/* truncate */
2685				m_adj(m0, -extra);
2686			}
2687			len = m0->m_pkthdr.len + sizeof(*nd_opt_rh);
2688		}
2689
2690		nd_opt_rh = (struct nd_opt_rd_hdr *)p;
2691		memset(nd_opt_rh, 0, sizeof(*nd_opt_rh));
2692		nd_opt_rh->nd_opt_rh_type = ND_OPT_REDIRECTED_HEADER;
2693		nd_opt_rh->nd_opt_rh_len = len >> 3;
2694		p += sizeof(*nd_opt_rh);
2695		m->m_pkthdr.len = m->m_len = p - (u_char *)ip6;
2696
2697		/* connect m0 to m */
2698		m->m_pkthdr.len += m0->m_pkthdr.len;
2699		m_cat(m, m0);
2700		m0 = NULL;
2701	}
2702noredhdropt:
2703	if (m0) {
2704		m_freem(m0);
2705		m0 = NULL;
2706	}
2707
2708	/* XXX: clear embedded link IDs in the inner header */
2709	in6_clearscope(&sip6->ip6_src);
2710	in6_clearscope(&sip6->ip6_dst);
2711	in6_clearscope(&nd_rd->nd_rd_target);
2712	in6_clearscope(&nd_rd->nd_rd_dst);
2713
2714	ip6->ip6_plen = htons(m->m_pkthdr.len - sizeof(struct ip6_hdr));
2715
2716	nd_rd->nd_rd_cksum = 0;
2717	nd_rd->nd_rd_cksum =
2718	    in6_cksum(m, IPPROTO_ICMPV6, sizeof(*ip6), ntohs(ip6->ip6_plen));
2719
2720	/* send the packet to outside... */
2721	if (ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL) != 0)
2722		icmp6_ifstat_inc(ifp, ifs6_out_error);
2723
2724	icmp6_ifstat_inc(ifp, ifs6_out_msg);
2725	icmp6_ifstat_inc(ifp, ifs6_out_redirect);
2726	ICMP6_STATINC(ICMP6_STAT_OUTHIST + ND_REDIRECT);
2727
2728	return;
2729
2730fail:
2731	if (m)
2732		m_freem(m);
2733	if (m0)
2734		m_freem(m0);
2735}
2736
2737/*
2738 * ICMPv6 socket option processing.
2739 */
2740int
2741icmp6_ctloutput(int op, struct socket *so, struct sockopt *sopt)
2742{
2743	int error = 0;
2744	struct in6pcb *in6p = sotoin6pcb(so);
2745
2746	if (sopt->sopt_level != IPPROTO_ICMPV6)
2747		return rip6_ctloutput(op, so, sopt);
2748
2749	switch (op) {
2750	case PRCO_SETOPT:
2751		switch (sopt->sopt_name) {
2752		case ICMP6_FILTER:
2753		    {
2754			struct icmp6_filter fil;
2755
2756			error = sockopt_get(sopt, &fil, sizeof(fil));
2757			if (error)
2758				break;
2759			memcpy(in6p->in6p_icmp6filt, &fil,
2760			    sizeof(struct icmp6_filter));
2761			error = 0;
2762			break;
2763		    }
2764
2765		default:
2766			error = ENOPROTOOPT;
2767			break;
2768		}
2769		break;
2770
2771	case PRCO_GETOPT:
2772		switch (sopt->sopt_name) {
2773		case ICMP6_FILTER:
2774		    {
2775			if (in6p->in6p_icmp6filt == NULL) {
2776				error = EINVAL;
2777				break;
2778			}
2779			error = sockopt_set(sopt, in6p->in6p_icmp6filt,
2780			    sizeof(struct icmp6_filter));
2781			break;
2782		    }
2783
2784		default:
2785			error = ENOPROTOOPT;
2786			break;
2787		}
2788		break;
2789	}
2790
2791	return error;
2792}
2793
2794/*
2795 * Perform rate limit check.
2796 * Returns 0 if it is okay to send the icmp6 packet.
2797 * Returns 1 if the router SHOULD NOT send this icmp6 packet due to rate
2798 * limitation.
2799 *
2800 * XXX per-destination/type check necessary?
2801 */
2802static int
2803icmp6_ratelimit(
2804	const struct in6_addr *dst,	/* not used at this moment */
2805	const int type,		/* not used at this moment */
2806	const int code)		/* not used at this moment */
2807{
2808	int ret;
2809
2810	ret = 0;	/* okay to send */
2811
2812	/* PPS limit */
2813	if (!ppsratecheck(&icmp6errppslim_last, &icmp6errpps_count,
2814	    icmp6errppslim)) {
2815		/* The packet is subject to rate limit */
2816		ret++;
2817	}
2818
2819	return ret;
2820}
2821
2822static struct rtentry *
2823icmp6_mtudisc_clone(struct sockaddr *dst)
2824{
2825	struct rtentry *rt;
2826	int    error;
2827
2828	rt = rtalloc1(dst, 1);
2829	if (rt == NULL)
2830		return NULL;
2831
2832	/* If we didn't get a host route, allocate one */
2833	if ((rt->rt_flags & RTF_HOST) == 0) {
2834		struct rtentry *nrt;
2835
2836		error = rtrequest(RTM_ADD, dst, rt->rt_gateway, NULL,
2837		    RTF_GATEWAY | RTF_HOST | RTF_DYNAMIC, &nrt);
2838		if (error) {
2839			rt_unref(rt);
2840			return NULL;
2841		}
2842		nrt->rt_rmx = rt->rt_rmx;
2843		rt_newmsg_dynamic(RTM_ADD, nrt);
2844		rt_unref(rt);
2845		rt = nrt;
2846	}
2847
2848	mutex_enter(&icmp6_mtx);
2849	error = rt_timer_add(rt, icmp6_mtudisc_timeout,
2850			icmp6_mtudisc_timeout_q);
2851	mutex_exit(&icmp6_mtx);
2852
2853	if (error) {
2854		rt_unref(rt);
2855		return NULL;
2856	}
2857
2858	return rt;	/* caller need to call rtfree() */
2859}
2860
2861static void
2862icmp6_mtudisc_timeout(struct rtentry *rt, struct rttimer *r)
2863{
2864	struct rtentry *retrt;
2865
2866	KASSERT(rt != NULL);
2867	rt_assert_referenced(rt);
2868
2869	if ((rt->rt_flags & (RTF_DYNAMIC | RTF_HOST)) ==
2870	    (RTF_DYNAMIC | RTF_HOST)) {
2871		rtrequest(RTM_DELETE, rt_getkey(rt),
2872		    rt->rt_gateway, rt_mask(rt), rt->rt_flags, &retrt);
2873		rt_newmsg_dynamic(RTM_DELETE, retrt);
2874		rt_unref(rt);
2875		rt_free(retrt);
2876	} else {
2877		if (!(rt->rt_rmx.rmx_locks & RTV_MTU))
2878			rt->rt_rmx.rmx_mtu = 0;
2879	}
2880}
2881
2882static void
2883icmp6_redirect_timeout(struct rtentry *rt, struct rttimer *r)
2884{
2885	struct rtentry *retrt;
2886
2887	KASSERT(rt != NULL);
2888	rt_assert_referenced(rt);
2889
2890	if ((rt->rt_flags & (RTF_GATEWAY | RTF_DYNAMIC | RTF_HOST)) ==
2891	    (RTF_GATEWAY | RTF_DYNAMIC | RTF_HOST)) {
2892		rtrequest(RTM_DELETE, rt_getkey(rt),
2893		    rt->rt_gateway, rt_mask(rt), rt->rt_flags, &retrt);
2894		rt_newmsg_dynamic(RTM_DELETE, retrt);
2895		rt_unref(rt);
2896		rt_free(retrt);
2897	}
2898}
2899
2900#ifdef COMPAT_90
2901/*
2902 * sysctl helper routine for the net.inet6.icmp6.nd6 nodes.  silly?
2903 */
2904static int
2905sysctl_net_inet6_icmp6_nd6(SYSCTLFN_ARGS)
2906{
2907	(void)&name;
2908	(void)&l;
2909	(void)&oname;
2910
2911	if (namelen != 0)
2912		return (EINVAL);
2913
2914	return (nd6_sysctl(rnode->sysctl_num, oldp, oldlenp,
2915	    /*XXXUNCONST*/
2916	    __UNCONST(newp), newlen));
2917}
2918#endif
2919
2920static int
2921sysctl_net_inet6_icmp6_stats(SYSCTLFN_ARGS)
2922{
2923
2924	return (NETSTAT_SYSCTL(icmp6stat_percpu, ICMP6_NSTATS));
2925}
2926
2927static int
2928sysctl_net_inet6_icmp6_redirtimeout(SYSCTLFN_ARGS)
2929{
2930	int error, tmp;
2931	struct sysctlnode node;
2932
2933	mutex_enter(&icmp6_mtx);
2934
2935	node = *rnode;
2936	node.sysctl_data = &tmp;
2937	tmp = icmp6_redirtimeout;
2938	error = sysctl_lookup(SYSCTLFN_CALL(&node));
2939	if (error || newp == NULL)
2940		goto out;
2941	if (tmp < 0) {
2942		error = EINVAL;
2943		goto out;
2944	}
2945	icmp6_redirtimeout = tmp;
2946
2947	if (icmp6_redirect_timeout_q != NULL) {
2948		if (icmp6_redirtimeout == 0) {
2949			rt_timer_queue_destroy(icmp6_redirect_timeout_q);
2950		} else {
2951			rt_timer_queue_change(icmp6_redirect_timeout_q,
2952			    icmp6_redirtimeout);
2953		}
2954	} else if (icmp6_redirtimeout > 0) {
2955		icmp6_redirect_timeout_q =
2956		    rt_timer_queue_create(icmp6_redirtimeout);
2957	}
2958	error = 0;
2959out:
2960	mutex_exit(&icmp6_mtx);
2961	return error;
2962}
2963
2964static void
2965sysctl_net_inet6_icmp6_setup(struct sysctllog **clog)
2966{
2967
2968	sysctl_createv(clog, 0, NULL, NULL,
2969		       CTLFLAG_PERMANENT,
2970		       CTLTYPE_NODE, "inet6", NULL,
2971		       NULL, 0, NULL, 0,
2972		       CTL_NET, PF_INET6, CTL_EOL);
2973	sysctl_createv(clog, 0, NULL, NULL,
2974		       CTLFLAG_PERMANENT,
2975		       CTLTYPE_NODE, "icmp6",
2976		       SYSCTL_DESCR("ICMPv6 related settings"),
2977		       NULL, 0, NULL, 0,
2978		       CTL_NET, PF_INET6, IPPROTO_ICMPV6, CTL_EOL);
2979
2980	sysctl_createv(clog, 0, NULL, NULL,
2981		       CTLFLAG_PERMANENT,
2982		       CTLTYPE_STRUCT, "stats",
2983		       SYSCTL_DESCR("ICMPv6 transmission statistics"),
2984		       sysctl_net_inet6_icmp6_stats, 0, NULL, 0,
2985		       CTL_NET, PF_INET6, IPPROTO_ICMPV6,
2986		       ICMPV6CTL_STATS, CTL_EOL);
2987	sysctl_createv(clog, 0, NULL, NULL,
2988		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2989		       CTLTYPE_INT, "rediraccept",
2990		       SYSCTL_DESCR("Accept and process redirect messages"),
2991		       NULL, 0, &icmp6_rediraccept, 0,
2992		       CTL_NET, PF_INET6, IPPROTO_ICMPV6,
2993		       ICMPV6CTL_REDIRACCEPT, CTL_EOL);
2994	sysctl_createv(clog, 0, NULL, NULL,
2995		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2996		       CTLTYPE_INT, "redirtimeout",
2997		       SYSCTL_DESCR("Redirect generated route lifetime"),
2998		       sysctl_net_inet6_icmp6_redirtimeout, 0,
2999		       &icmp6_redirtimeout, 0,
3000		       CTL_NET, PF_INET6, IPPROTO_ICMPV6,
3001		       ICMPV6CTL_REDIRTIMEOUT, CTL_EOL);
3002#if 0 /* obsoleted */
3003	sysctl_createv(clog, 0, NULL, NULL,
3004		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
3005		       CTLTYPE_INT, "errratelimit", NULL,
3006		       NULL, 0, &icmp6_errratelimit, 0,
3007		       CTL_NET, PF_INET6, IPPROTO_ICMPV6,
3008		       ICMPV6CTL_ERRRATELIMIT, CTL_EOL);
3009#endif
3010	sysctl_createv(clog, 0, NULL, NULL,
3011		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
3012		       CTLTYPE_INT, "nd6_prune",
3013		       SYSCTL_DESCR("Neighbor discovery prune interval"),
3014		       NULL, 0, &nd6_prune, 0,
3015		       CTL_NET, PF_INET6, IPPROTO_ICMPV6,
3016		       ICMPV6CTL_ND6_PRUNE, CTL_EOL);
3017	sysctl_createv(clog, 0, NULL, NULL,
3018		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
3019		       CTLTYPE_INT, "nd6_delay",
3020		       SYSCTL_DESCR("First probe delay time"),
3021		       NULL, 0, &nd6_nd_domain.nd_delay, 0,
3022		       CTL_NET, PF_INET6, IPPROTO_ICMPV6,
3023		       ICMPV6CTL_ND6_DELAY, CTL_EOL);
3024	sysctl_createv(clog, 0, NULL, NULL,
3025		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
3026		       CTLTYPE_INT, "nd6_mmaxtries",
3027		       SYSCTL_DESCR("Number of multicast discovery attempts"),
3028		       NULL, 0, &nd6_nd_domain.nd_mmaxtries, 0,
3029		       CTL_NET, PF_INET6, IPPROTO_ICMPV6,
3030		       ICMPV6CTL_ND6_MMAXTRIES, CTL_EOL);
3031	sysctl_createv(clog, 0, NULL, NULL,
3032		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
3033		       CTLTYPE_INT, "nd6_umaxtries",
3034		       SYSCTL_DESCR("Number of unicast discovery attempts"),
3035		       NULL, 0, &nd6_nd_domain.nd_umaxtries, 0,
3036		       CTL_NET, PF_INET6, IPPROTO_ICMPV6,
3037		       ICMPV6CTL_ND6_UMAXTRIES, CTL_EOL);
3038	sysctl_createv(clog, 0, NULL, NULL,
3039		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
3040		       CTLTYPE_INT, "nd6_maxnudhint",
3041		       SYSCTL_DESCR("Maximum neighbor unreachable hint count"),
3042		       NULL, 0, &nd6_nd_domain.nd_maxnudhint, 0,
3043		       CTL_NET, PF_INET6, IPPROTO_ICMPV6,
3044		       ICMPV6CTL_ND6_MAXNUDHINT, CTL_EOL);
3045	sysctl_createv(clog, 0, NULL, NULL,
3046		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
3047		       CTLTYPE_INT, "maxqueuelen",
3048		       SYSCTL_DESCR("max packet queue len for a unresolved ND"),
3049		       NULL, 1, &nd6_nd_domain.nd_maxqueuelen, 0,
3050		       CTL_NET, PF_INET6, IPPROTO_ICMPV6,
3051		       ICMPV6CTL_ND6_MAXQLEN, CTL_EOL);
3052	sysctl_createv(clog, 0, NULL, NULL,
3053		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
3054		       CTLTYPE_INT, "nd6_useloopback",
3055		       SYSCTL_DESCR("Use loopback interface for local traffic"),
3056		       NULL, 0, &nd6_useloopback, 0,
3057		       CTL_NET, PF_INET6, IPPROTO_ICMPV6,
3058		       ICMPV6CTL_ND6_USELOOPBACK, CTL_EOL);
3059#if 0 /* obsoleted */
3060	sysctl_createv(clog, 0, NULL, NULL,
3061		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
3062		       CTLTYPE_INT, "nd6_proxyall", NULL,
3063		       NULL, 0, &nd6_proxyall, 0,
3064		       CTL_NET, PF_INET6, IPPROTO_ICMPV6,
3065		       ICMPV6CTL_ND6_PROXYALL, CTL_EOL);
3066#endif
3067	sysctl_createv(clog, 0, NULL, NULL,
3068		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
3069		       CTLTYPE_INT, "nodeinfo",
3070		       SYSCTL_DESCR("Respond to node information requests"),
3071		       NULL, 0, &icmp6_nodeinfo, 0,
3072		       CTL_NET, PF_INET6, IPPROTO_ICMPV6,
3073		       ICMPV6CTL_NODEINFO, CTL_EOL);
3074	sysctl_createv(clog, 0, NULL, NULL,
3075		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
3076		       CTLTYPE_INT, "errppslimit",
3077		       SYSCTL_DESCR("Maximum ICMP errors sent per second"),
3078		       NULL, 0, &icmp6errppslim, 0,
3079		       CTL_NET, PF_INET6, IPPROTO_ICMPV6,
3080		       ICMPV6CTL_ERRPPSLIMIT, CTL_EOL);
3081	sysctl_createv(clog, 0, NULL, NULL,
3082		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
3083		       CTLTYPE_INT, "mtudisc_hiwat",
3084		       SYSCTL_DESCR("Low mark on MTU Discovery route timers"),
3085		       NULL, 0, &icmp6_mtudisc_hiwat, 0,
3086		       CTL_NET, PF_INET6, IPPROTO_ICMPV6,
3087		       ICMPV6CTL_MTUDISC_HIWAT, CTL_EOL);
3088	sysctl_createv(clog, 0, NULL, NULL,
3089		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
3090		       CTLTYPE_INT, "mtudisc_lowat",
3091		       SYSCTL_DESCR("Low mark on MTU Discovery route timers"),
3092		       NULL, 0, &icmp6_mtudisc_lowat, 0,
3093		       CTL_NET, PF_INET6, IPPROTO_ICMPV6,
3094		       ICMPV6CTL_MTUDISC_LOWAT, CTL_EOL);
3095	sysctl_createv(clog, 0, NULL, NULL,
3096		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
3097		       CTLTYPE_INT, "nd6_debug",
3098		       SYSCTL_DESCR("Enable neighbor discovery debug output"),
3099		       NULL, 0, &nd6_debug, 0,
3100		       CTL_NET, PF_INET6, IPPROTO_ICMPV6,
3101		       ICMPV6CTL_ND6_DEBUG, CTL_EOL);
3102#ifdef COMPAT_90
3103	sysctl_createv(clog, 0, NULL, NULL,
3104		       CTLFLAG_PERMANENT,
3105		       CTLTYPE_STRUCT, "nd6_drlist",
3106		       SYSCTL_DESCR("Default router list"),
3107		       sysctl_net_inet6_icmp6_nd6, 0, NULL, 0,
3108		       CTL_NET, PF_INET6, IPPROTO_ICMPV6,
3109		       OICMPV6CTL_ND6_DRLIST, CTL_EOL);
3110	sysctl_createv(clog, 0, NULL, NULL,
3111		       CTLFLAG_PERMANENT,
3112		       CTLTYPE_STRUCT, "nd6_prlist",
3113		       SYSCTL_DESCR("Prefix list"),
3114		       sysctl_net_inet6_icmp6_nd6, 0, NULL, 0,
3115		       CTL_NET, PF_INET6, IPPROTO_ICMPV6,
3116		       OICMPV6CTL_ND6_PRLIST, CTL_EOL);
3117#endif
3118	sysctl_createv(clog, 0, NULL, NULL,
3119		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
3120		       CTLTYPE_BOOL, "reflect_pmtu",
3121		       SYSCTL_DESCR("Use path MTU Discovery for icmpv6 reflect"),
3122		       NULL, 0, &icmp6_reflect_pmtu, 0,
3123		       CTL_NET, PF_INET6, IPPROTO_ICMPV6,
3124		       ICMPV6CTL_REFLECT_PMTU, CTL_EOL);
3125	sysctl_createv(clog, 0, NULL, NULL,
3126		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
3127		       CTLTYPE_BOOL, "dynamic_rt_msg",
3128		       SYSCTL_DESCR("Send routing message for RTF_DYNAMIC"),
3129		       NULL, 0, &icmp6_dynamic_rt_msg, 0,
3130		       CTL_NET, PF_INET6, IPPROTO_ICMPV6,
3131		       ICMPV6CTL_DYNAMIC_RT_MSG, CTL_EOL);
3132}
3133
3134void
3135icmp6_statinc(u_int stat)
3136{
3137
3138	KASSERT(stat < ICMP6_NSTATS);
3139	ICMP6_STATINC(stat);
3140}
3141