icmp6.c revision 1.106
1/*	$NetBSD: icmp6.c,v 1.106 2004/03/26 03:35:02 itojun Exp $	*/
2/*	$KAME: icmp6.c,v 1.217 2001/06/20 15:03:29 jinmei Exp $	*/
3
4/*
5 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the project nor the names of its contributors
17 *    may be used to endorse or promote products derived from this software
18 *    without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 */
32
33/*
34 * Copyright (c) 1982, 1986, 1988, 1993
35 *	The Regents of the University of California.  All rights reserved.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 *    notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 *    notice, this list of conditions and the following disclaimer in the
44 *    documentation and/or other materials provided with the distribution.
45 * 3. Neither the name of the University nor the names of its contributors
46 *    may be used to endorse or promote products derived from this software
47 *    without specific prior written permission.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59 * SUCH DAMAGE.
60 *
61 *	@(#)ip_icmp.c	8.2 (Berkeley) 1/4/94
62 */
63
64#include <sys/cdefs.h>
65__KERNEL_RCSID(0, "$NetBSD: icmp6.c,v 1.106 2004/03/26 03:35:02 itojun Exp $");
66
67#include "opt_inet.h"
68#include "opt_ipsec.h"
69
70#include <sys/param.h>
71#include <sys/systm.h>
72#include <sys/malloc.h>
73#include <sys/mbuf.h>
74#include <sys/protosw.h>
75#include <sys/socket.h>
76#include <sys/socketvar.h>
77#include <sys/time.h>
78#include <sys/kernel.h>
79#include <sys/syslog.h>
80#include <sys/domain.h>
81#include <sys/sysctl.h>
82
83#include <net/if.h>
84#include <net/route.h>
85#include <net/if_dl.h>
86#include <net/if_types.h>
87
88#include <netinet/in.h>
89#include <netinet/in_var.h>
90#include <netinet/ip6.h>
91#include <netinet6/ip6_var.h>
92#include <netinet/icmp6.h>
93#include <netinet6/mld6_var.h>
94#include <netinet6/in6_pcb.h>
95#include <netinet6/nd6.h>
96#include <netinet6/in6_ifattach.h>
97#include <netinet6/ip6protosw.h>
98
99#ifdef IPSEC
100#include <netinet6/ipsec.h>
101#include <netkey/key.h>
102#endif
103
104#include "faith.h"
105#if defined(NFAITH) && 0 < NFAITH
106#include <net/if_faith.h>
107#endif
108
109#include <net/net_osdep.h>
110
111extern struct domain inet6domain;
112
113struct icmp6stat icmp6stat;
114
115extern struct inpcbtable raw6cbtable;
116extern int icmp6errppslim;
117static int icmp6errpps_count = 0;
118static struct timeval icmp6errppslim_last;
119extern int icmp6_nodeinfo;
120
121/*
122 * List of callbacks to notify when Path MTU changes are made.
123 */
124struct icmp6_mtudisc_callback {
125	LIST_ENTRY(icmp6_mtudisc_callback) mc_list;
126	void (*mc_func) __P((struct in6_addr *));
127};
128
129LIST_HEAD(, icmp6_mtudisc_callback) icmp6_mtudisc_callbacks =
130    LIST_HEAD_INITIALIZER(&icmp6_mtudisc_callbacks);
131
132static struct rttimer_queue *icmp6_mtudisc_timeout_q = NULL;
133extern int pmtu_expire;
134
135/* XXX do these values make any sense? */
136static int icmp6_mtudisc_hiwat = 1280;
137static int icmp6_mtudisc_lowat = 256;
138
139/*
140 * keep track of # of redirect routes.
141 */
142static struct rttimer_queue *icmp6_redirect_timeout_q = NULL;
143
144/* XXX experimental, turned off */
145static int icmp6_redirect_hiwat = -1;
146static int icmp6_redirect_lowat = -1;
147
148static void icmp6_errcount __P((struct icmp6errstat *, int, int));
149static int icmp6_rip6_input __P((struct mbuf **, int));
150static int icmp6_ratelimit __P((const struct in6_addr *, const int, const int));
151static const char *icmp6_redirect_diag __P((struct in6_addr *,
152	struct in6_addr *, struct in6_addr *));
153static struct mbuf *ni6_input __P((struct mbuf *, int));
154static struct mbuf *ni6_nametodns __P((const char *, int, int));
155static int ni6_dnsmatch __P((const char *, int, const char *, int));
156static int ni6_addrs __P((struct icmp6_nodeinfo *, struct mbuf *,
157			  struct ifnet **, char *));
158static int ni6_store_addrs __P((struct icmp6_nodeinfo *, struct icmp6_nodeinfo *,
159				struct ifnet *, int));
160static int icmp6_notify_error __P((struct mbuf *, int, int, int));
161static struct rtentry *icmp6_mtudisc_clone __P((struct sockaddr *));
162static void icmp6_mtudisc_timeout __P((struct rtentry *, struct rttimer *));
163static void icmp6_redirect_timeout __P((struct rtentry *, struct rttimer *));
164
165void
166icmp6_init()
167{
168	mld6_init();
169	icmp6_mtudisc_timeout_q = rt_timer_queue_create(pmtu_expire);
170	icmp6_redirect_timeout_q = rt_timer_queue_create(icmp6_redirtimeout);
171}
172
173static void
174icmp6_errcount(stat, type, code)
175	struct icmp6errstat *stat;
176	int type, code;
177{
178	switch (type) {
179	case ICMP6_DST_UNREACH:
180		switch (code) {
181		case ICMP6_DST_UNREACH_NOROUTE:
182			stat->icp6errs_dst_unreach_noroute++;
183			return;
184		case ICMP6_DST_UNREACH_ADMIN:
185			stat->icp6errs_dst_unreach_admin++;
186			return;
187		case ICMP6_DST_UNREACH_BEYONDSCOPE:
188			stat->icp6errs_dst_unreach_beyondscope++;
189			return;
190		case ICMP6_DST_UNREACH_ADDR:
191			stat->icp6errs_dst_unreach_addr++;
192			return;
193		case ICMP6_DST_UNREACH_NOPORT:
194			stat->icp6errs_dst_unreach_noport++;
195			return;
196		}
197		break;
198	case ICMP6_PACKET_TOO_BIG:
199		stat->icp6errs_packet_too_big++;
200		return;
201	case ICMP6_TIME_EXCEEDED:
202		switch (code) {
203		case ICMP6_TIME_EXCEED_TRANSIT:
204			stat->icp6errs_time_exceed_transit++;
205			return;
206		case ICMP6_TIME_EXCEED_REASSEMBLY:
207			stat->icp6errs_time_exceed_reassembly++;
208			return;
209		}
210		break;
211	case ICMP6_PARAM_PROB:
212		switch (code) {
213		case ICMP6_PARAMPROB_HEADER:
214			stat->icp6errs_paramprob_header++;
215			return;
216		case ICMP6_PARAMPROB_NEXTHEADER:
217			stat->icp6errs_paramprob_nextheader++;
218			return;
219		case ICMP6_PARAMPROB_OPTION:
220			stat->icp6errs_paramprob_option++;
221			return;
222		}
223		break;
224	case ND_REDIRECT:
225		stat->icp6errs_redirect++;
226		return;
227	}
228	stat->icp6errs_unknown++;
229}
230
231/*
232 * Register a Path MTU Discovery callback.
233 */
234void
235icmp6_mtudisc_callback_register(func)
236	void (*func) __P((struct in6_addr *));
237{
238	struct icmp6_mtudisc_callback *mc;
239
240	for (mc = LIST_FIRST(&icmp6_mtudisc_callbacks); mc != NULL;
241	     mc = LIST_NEXT(mc, mc_list)) {
242		if (mc->mc_func == func)
243			return;
244	}
245
246	mc = malloc(sizeof(*mc), M_PCB, M_NOWAIT);
247	if (mc == NULL)
248		panic("icmp6_mtudisc_callback_register");
249
250	mc->mc_func = func;
251	LIST_INSERT_HEAD(&icmp6_mtudisc_callbacks, mc, mc_list);
252}
253
254/*
255 * Generate an error packet of type error in response to bad IP6 packet.
256 */
257void
258icmp6_error(m, type, code, param)
259	struct mbuf *m;
260	int type, code, param;
261{
262	struct ip6_hdr *oip6, *nip6;
263	struct icmp6_hdr *icmp6;
264	u_int preplen;
265	int off;
266	int nxt;
267
268	icmp6stat.icp6s_error++;
269
270	/* count per-type-code statistics */
271	icmp6_errcount(&icmp6stat.icp6s_outerrhist, type, code);
272
273	if (m->m_flags & M_DECRYPTED) {
274		icmp6stat.icp6s_canterror++;
275		goto freeit;
276	}
277
278	if (m->m_len < sizeof(struct ip6_hdr)) {
279		m = m_pullup(m, sizeof(struct ip6_hdr));
280		if (m == NULL)
281			return;
282	}
283	oip6 = mtod(m, struct ip6_hdr *);
284
285	/*
286	 * If the destination address of the erroneous packet is a multicast
287	 * address, or the packet was sent using link-layer multicast,
288	 * we should basically suppress sending an error (RFC 2463, Section
289	 * 2.4).
290	 * We have two exceptions (the item e.2 in that section):
291	 * - the Pakcet Too Big message can be sent for path MTU discovery.
292	 * - the Parameter Problem Message that can be allowed an icmp6 error
293	 *   in the option type field.  This check has been done in
294	 *   ip6_unknown_opt(), so we can just check the type and code.
295	 */
296	if ((m->m_flags & (M_BCAST|M_MCAST) ||
297	     IN6_IS_ADDR_MULTICAST(&oip6->ip6_dst)) &&
298	    (type != ICMP6_PACKET_TOO_BIG &&
299	     (type != ICMP6_PARAM_PROB ||
300	      code != ICMP6_PARAMPROB_OPTION)))
301		goto freeit;
302
303	/*
304	 * RFC 2463, 2.4 (e.5): source address check.
305	 * XXX: the case of anycast source?
306	 */
307	if (IN6_IS_ADDR_UNSPECIFIED(&oip6->ip6_src) ||
308	    IN6_IS_ADDR_MULTICAST(&oip6->ip6_src))
309		goto freeit;
310
311	/*
312	 * If we are about to send ICMPv6 against ICMPv6 error/redirect,
313	 * don't do it.
314	 */
315	nxt = -1;
316	off = ip6_lasthdr(m, 0, IPPROTO_IPV6, &nxt);
317	if (off >= 0 && nxt == IPPROTO_ICMPV6) {
318		struct icmp6_hdr *icp;
319
320		IP6_EXTHDR_GET(icp, struct icmp6_hdr *, m, off,
321			sizeof(*icp));
322		if (icp == NULL) {
323			icmp6stat.icp6s_tooshort++;
324			return;
325		}
326		if (icp->icmp6_type < ICMP6_ECHO_REQUEST ||
327		    icp->icmp6_type == ND_REDIRECT) {
328			/*
329			 * ICMPv6 error
330			 * Special case: for redirect (which is
331			 * informational) we must not send icmp6 error.
332			 */
333			icmp6stat.icp6s_canterror++;
334			goto freeit;
335		} else {
336			/* ICMPv6 informational - send the error */
337		}
338	}
339#if 0 /* controversial */
340	else if (off >= 0 && nxt == IPPROTO_ESP) {
341		/*
342		 * It could be ICMPv6 error inside ESP.  Take a safer side,
343		 * don't respond.
344		 */
345		icmp6stat.icp6s_canterror++;
346		goto freeit;
347	}
348#endif
349	else {
350		/* non-ICMPv6 - send the error */
351	}
352
353	oip6 = mtod(m, struct ip6_hdr *); /* adjust pointer */
354
355	/* Finally, do rate limitation check. */
356	if (icmp6_ratelimit(&oip6->ip6_src, type, code)) {
357		icmp6stat.icp6s_toofreq++;
358		goto freeit;
359	}
360
361	/*
362	 * OK, ICMP6 can be generated.
363	 */
364
365	if (m->m_pkthdr.len >= ICMPV6_PLD_MAXLEN)
366		m_adj(m, ICMPV6_PLD_MAXLEN - m->m_pkthdr.len);
367
368	preplen = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr);
369	M_PREPEND(m, preplen, M_DONTWAIT);
370	if (m && m->m_len < preplen)
371		m = m_pullup(m, preplen);
372	if (m == NULL) {
373		nd6log((LOG_DEBUG, "ENOBUFS in icmp6_error %d\n", __LINE__));
374		return;
375	}
376
377	nip6 = mtod(m, struct ip6_hdr *);
378	nip6->ip6_src  = oip6->ip6_src;
379	nip6->ip6_dst  = oip6->ip6_dst;
380
381	if (IN6_IS_SCOPE_LINKLOCAL(&oip6->ip6_src))
382		oip6->ip6_src.s6_addr16[1] = 0;
383	if (IN6_IS_SCOPE_LINKLOCAL(&oip6->ip6_dst))
384		oip6->ip6_dst.s6_addr16[1] = 0;
385
386	icmp6 = (struct icmp6_hdr *)(nip6 + 1);
387	icmp6->icmp6_type = type;
388	icmp6->icmp6_code = code;
389	icmp6->icmp6_pptr = htonl((u_int32_t)param);
390
391	/*
392	 * icmp6_reflect() is designed to be in the input path.
393	 * icmp6_error() can be called from both input and outut path,
394	 * and if we are in output path rcvif could contain bogus value.
395	 * clear m->m_pkthdr.rcvif for safety, we should have enough scope
396	 * information in ip header (nip6).
397	 */
398	m->m_pkthdr.rcvif = NULL;
399
400	icmp6stat.icp6s_outhist[type]++;
401	icmp6_reflect(m, sizeof(struct ip6_hdr)); /* header order: IPv6 - ICMPv6 */
402
403	return;
404
405  freeit:
406	/*
407	 * If we can't tell wheter or not we can generate ICMP6, free it.
408	 */
409	m_freem(m);
410}
411
412/*
413 * Process a received ICMP6 message.
414 */
415int
416icmp6_input(mp, offp, proto)
417	struct mbuf **mp;
418	int *offp, proto;
419{
420	struct mbuf *m = *mp, *n;
421	struct ip6_hdr *ip6, *nip6;
422	struct icmp6_hdr *icmp6, *nicmp6;
423	int off = *offp;
424	int icmp6len = m->m_pkthdr.len - *offp;
425	int code, sum, noff;
426
427	icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_msg);
428
429	/*
430	 * Locate icmp6 structure in mbuf, and check
431	 * that not corrupted and of at least minimum length
432	 */
433
434	ip6 = mtod(m, struct ip6_hdr *);
435	if (icmp6len < sizeof(struct icmp6_hdr)) {
436		icmp6stat.icp6s_tooshort++;
437		icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_error);
438		goto freeit;
439	}
440
441	/*
442	 * calculate the checksum
443	 */
444	IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off, sizeof(*icmp6));
445	if (icmp6 == NULL) {
446		icmp6stat.icp6s_tooshort++;
447		/* m is invalid */
448		/*icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_error);*/
449		return IPPROTO_DONE;
450	}
451	KASSERT(IP6_HDR_ALIGNED_P(icmp6));
452	code = icmp6->icmp6_code;
453
454	if ((sum = in6_cksum(m, IPPROTO_ICMPV6, off, icmp6len)) != 0) {
455		nd6log((LOG_ERR,
456		    "ICMP6 checksum error(%d|%x) %s\n",
457		    icmp6->icmp6_type, sum, ip6_sprintf(&ip6->ip6_src)));
458		icmp6stat.icp6s_checksum++;
459		icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_error);
460		goto freeit;
461	}
462
463#if defined(NFAITH) && 0 < NFAITH
464	if (faithprefix(&ip6->ip6_dst)) {
465		/*
466		 * Deliver very specific ICMP6 type only.
467		 * This is important to deilver TOOBIG.  Otherwise PMTUD
468		 * will not work.
469		 */
470		switch (icmp6->icmp6_type) {
471		case ICMP6_DST_UNREACH:
472		case ICMP6_PACKET_TOO_BIG:
473		case ICMP6_TIME_EXCEEDED:
474			break;
475		default:
476			goto freeit;
477		}
478	}
479#endif
480
481	icmp6stat.icp6s_inhist[icmp6->icmp6_type]++;
482
483	switch (icmp6->icmp6_type) {
484	case ICMP6_DST_UNREACH:
485		icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_dstunreach);
486		switch (code) {
487		case ICMP6_DST_UNREACH_NOROUTE:
488			code = PRC_UNREACH_NET;
489			break;
490		case ICMP6_DST_UNREACH_ADMIN:
491			icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_adminprohib);
492			code = PRC_UNREACH_PROTOCOL; /* is this a good code? */
493			break;
494		case ICMP6_DST_UNREACH_ADDR:
495			code = PRC_HOSTDEAD;
496			break;
497#ifdef COMPAT_RFC1885
498		case ICMP6_DST_UNREACH_NOTNEIGHBOR:
499			code = PRC_UNREACH_SRCFAIL;
500			break;
501#else
502		case ICMP6_DST_UNREACH_BEYONDSCOPE:
503			/* I mean "source address was incorrect." */
504			code = PRC_UNREACH_NET;
505			break;
506#endif
507		case ICMP6_DST_UNREACH_NOPORT:
508			code = PRC_UNREACH_PORT;
509			break;
510		default:
511			goto badcode;
512		}
513		goto deliver;
514
515	case ICMP6_PACKET_TOO_BIG:
516		icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_pkttoobig);
517		if (code != 0)
518			goto badcode;
519
520		code = PRC_MSGSIZE;
521
522		/*
523		 * Updating the path MTU will be done after examining
524		 * intermediate extension headers.
525		 */
526		goto deliver;
527
528	case ICMP6_TIME_EXCEEDED:
529		icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_timeexceed);
530		switch (code) {
531		case ICMP6_TIME_EXCEED_TRANSIT:
532			code = PRC_TIMXCEED_INTRANS;
533			break;
534		case ICMP6_TIME_EXCEED_REASSEMBLY:
535			code = PRC_TIMXCEED_REASS;
536			break;
537		default:
538			goto badcode;
539		}
540		goto deliver;
541
542	case ICMP6_PARAM_PROB:
543		icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_paramprob);
544		switch (code) {
545		case ICMP6_PARAMPROB_NEXTHEADER:
546			code = PRC_UNREACH_PROTOCOL;
547			break;
548		case ICMP6_PARAMPROB_HEADER:
549		case ICMP6_PARAMPROB_OPTION:
550			code = PRC_PARAMPROB;
551			break;
552		default:
553			goto badcode;
554		}
555		goto deliver;
556
557	case ICMP6_ECHO_REQUEST:
558		icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_echo);
559		if (code != 0)
560			goto badcode;
561		/*
562		 * Copy mbuf to send to two data paths: userland socket(s),
563		 * and to the querier (echo reply).
564		 * m: a copy for socket, n: a copy for querier
565		 */
566		if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
567			/* Give up local */
568			n = m;
569			m = NULL;
570			goto deliverecho;
571		}
572		/*
573		 * If the first mbuf is shared, or the first mbuf is too short,
574		 * copy the first part of the data into a fresh mbuf.
575		 * Otherwise, we will wrongly overwrite both copies.
576		 */
577		if ((n->m_flags & M_EXT) != 0 ||
578		    n->m_len < off + sizeof(struct icmp6_hdr)) {
579			struct mbuf *n0 = n;
580			const int maxlen = sizeof(*nip6) + sizeof(*nicmp6);
581
582			/*
583			 * Prepare an internal mbuf.  m_pullup() doesn't
584			 * always copy the length we specified.
585			 */
586			if (maxlen >= MCLBYTES) {
587				/* Give up remote */
588				m_freem(n0);
589				break;
590			}
591			MGETHDR(n, M_DONTWAIT, n0->m_type);
592			if (n && maxlen >= MHLEN) {
593				MCLGET(n, M_DONTWAIT);
594				if ((n->m_flags & M_EXT) == 0) {
595					m_free(n);
596					n = NULL;
597				}
598			}
599			if (n == NULL) {
600				/* Give up local */
601				m_freem(n0);
602				n = m;
603				m = NULL;
604				goto deliverecho;
605			}
606			M_COPY_PKTHDR(n, n0);
607			/*
608			 * Copy IPv6 and ICMPv6 only.
609			 */
610			nip6 = mtod(n, struct ip6_hdr *);
611			bcopy(ip6, nip6, sizeof(struct ip6_hdr));
612			nicmp6 = (struct icmp6_hdr *)(nip6 + 1);
613			bcopy(icmp6, nicmp6, sizeof(struct icmp6_hdr));
614			noff = sizeof(struct ip6_hdr);
615			n->m_len = noff + sizeof(struct icmp6_hdr);
616			/*
617			 * Adjust mbuf.  ip6_plen will be adjusted in
618			 * ip6_output().
619			 * n->m_pkthdr.len == n0->m_pkthdr.len at this point.
620			 */
621			n->m_pkthdr.len += noff + sizeof(struct icmp6_hdr);
622			n->m_pkthdr.len -= (off + sizeof(struct icmp6_hdr));
623			m_adj(n0, off + sizeof(struct icmp6_hdr));
624			n->m_next = n0;
625			n0->m_flags &= ~M_PKTHDR;
626		} else {
627	 deliverecho:
628			nip6 = mtod(n, struct ip6_hdr *);
629			nicmp6 = (struct icmp6_hdr *)((caddr_t)nip6 + off);
630			noff = off;
631		}
632		nicmp6->icmp6_type = ICMP6_ECHO_REPLY;
633		nicmp6->icmp6_code = 0;
634		if (n) {
635			icmp6stat.icp6s_reflect++;
636			icmp6stat.icp6s_outhist[ICMP6_ECHO_REPLY]++;
637			icmp6_reflect(n, noff);
638		}
639		if (!m)
640			goto freeit;
641		break;
642
643	case ICMP6_ECHO_REPLY:
644		icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_echoreply);
645		if (code != 0)
646			goto badcode;
647		break;
648
649	case MLD_LISTENER_QUERY:
650	case MLD_LISTENER_REPORT:
651		if (icmp6len < sizeof(struct mld_hdr))
652			goto badlen;
653		if (icmp6->icmp6_type == MLD_LISTENER_QUERY) /* XXX: ugly... */
654			icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_mldquery);
655		else
656			icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_mldreport);
657		if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
658			/* give up local */
659			mld6_input(m, off);
660			m = NULL;
661			goto freeit;
662		}
663		mld6_input(n, off);
664		/* m stays. */
665		break;
666
667	case MLD_LISTENER_DONE:
668		icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_mlddone);
669		if (icmp6len < sizeof(struct mld_hdr))	/* necessary? */
670			goto badlen;
671		break;		/* nothing to be done in kernel */
672
673	case MLD_MTRACE_RESP:
674	case MLD_MTRACE:
675		/* XXX: these two are experimental.  not officially defined. */
676		/* XXX: per-interface statistics? */
677		break;		/* just pass it to applications */
678
679	case ICMP6_WRUREQUEST:	/* ICMP6_FQDN_QUERY */
680	    {
681		enum { WRU, FQDN } mode;
682
683		if (!icmp6_nodeinfo)
684			break;
685
686		if (icmp6len == sizeof(struct icmp6_hdr) + 4)
687			mode = WRU;
688		else if (icmp6len >= sizeof(struct icmp6_nodeinfo))
689			mode = FQDN;
690		else
691			goto badlen;
692
693		if (mode == FQDN) {
694			n = m_copym(m, 0, M_COPYALL, M_DONTWAIT);
695			if (n)
696				n = ni6_input(n, off);
697			/* XXX meaningless if n == NULL */
698			noff = sizeof(struct ip6_hdr);
699		} else {
700			u_char *p;
701			int maxlen, maxhlen;
702
703			if ((icmp6_nodeinfo & 5) != 5)
704				break;
705
706			if (code != 0)
707				goto badcode;
708			maxlen = sizeof(*nip6) + sizeof(*nicmp6) + 4;
709			if (maxlen >= MCLBYTES) {
710				/* Give up remote */
711				break;
712			}
713			MGETHDR(n, M_DONTWAIT, m->m_type);
714			if (n && maxlen > MHLEN) {
715				MCLGET(n, M_DONTWAIT);
716				if ((n->m_flags & M_EXT) == 0) {
717					m_free(n);
718					n = NULL;
719				}
720			}
721			if (n == NULL) {
722				/* Give up remote */
723				break;
724			}
725			n->m_pkthdr.rcvif = NULL;
726			n->m_len = 0;
727			maxhlen = M_TRAILINGSPACE(n) - maxlen;
728			if (maxhlen > hostnamelen)
729				maxhlen = hostnamelen;
730			/*
731			 * Copy IPv6 and ICMPv6 only.
732			 */
733			nip6 = mtod(n, struct ip6_hdr *);
734			bcopy(ip6, nip6, sizeof(struct ip6_hdr));
735			nicmp6 = (struct icmp6_hdr *)(nip6 + 1);
736			bcopy(icmp6, nicmp6, sizeof(struct icmp6_hdr));
737			p = (u_char *)(nicmp6 + 1);
738			bzero(p, 4);
739			bcopy(hostname, p + 4, maxhlen); /* meaningless TTL */
740			noff = sizeof(struct ip6_hdr);
741			M_COPY_PKTHDR(n, m); /* just for rcvif */
742			n->m_pkthdr.len = n->m_len = sizeof(struct ip6_hdr) +
743				sizeof(struct icmp6_hdr) + 4 + maxhlen;
744			nicmp6->icmp6_type = ICMP6_WRUREPLY;
745			nicmp6->icmp6_code = 0;
746		}
747#undef hostnamelen
748		if (n) {
749			icmp6stat.icp6s_reflect++;
750			icmp6stat.icp6s_outhist[ICMP6_WRUREPLY]++;
751			icmp6_reflect(n, noff);
752		}
753		break;
754	    }
755
756	case ICMP6_WRUREPLY:
757		if (code != 0)
758			goto badcode;
759		break;
760
761	case ND_ROUTER_SOLICIT:
762		icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_routersolicit);
763		if (code != 0)
764			goto badcode;
765		if (icmp6len < sizeof(struct nd_router_solicit))
766			goto badlen;
767		if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
768			/* give up local */
769			nd6_rs_input(m, off, icmp6len);
770			m = NULL;
771			goto freeit;
772		}
773		nd6_rs_input(n, off, icmp6len);
774		/* m stays. */
775		break;
776
777	case ND_ROUTER_ADVERT:
778		icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_routeradvert);
779		if (code != 0)
780			goto badcode;
781		if (icmp6len < sizeof(struct nd_router_advert))
782			goto badlen;
783		if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
784			/* give up local */
785			nd6_ra_input(m, off, icmp6len);
786			m = NULL;
787			goto freeit;
788		}
789		nd6_ra_input(n, off, icmp6len);
790		/* m stays. */
791		break;
792
793	case ND_NEIGHBOR_SOLICIT:
794		icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_neighborsolicit);
795		if (code != 0)
796			goto badcode;
797		if (icmp6len < sizeof(struct nd_neighbor_solicit))
798			goto badlen;
799		if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
800			/* give up local */
801			nd6_ns_input(m, off, icmp6len);
802			m = NULL;
803			goto freeit;
804		}
805		nd6_ns_input(n, off, icmp6len);
806		/* m stays. */
807		break;
808
809	case ND_NEIGHBOR_ADVERT:
810		icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_neighboradvert);
811		if (code != 0)
812			goto badcode;
813		if (icmp6len < sizeof(struct nd_neighbor_advert))
814			goto badlen;
815		if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
816			/* give up local */
817			nd6_na_input(m, off, icmp6len);
818			m = NULL;
819			goto freeit;
820		}
821		nd6_na_input(n, off, icmp6len);
822		/* m stays. */
823		break;
824
825	case ND_REDIRECT:
826		icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_redirect);
827		if (code != 0)
828			goto badcode;
829		if (icmp6len < sizeof(struct nd_redirect))
830			goto badlen;
831		if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
832			/* give up local */
833			icmp6_redirect_input(m, off);
834			m = NULL;
835			goto freeit;
836		}
837		icmp6_redirect_input(n, off);
838		/* m stays. */
839		break;
840
841	case ICMP6_ROUTER_RENUMBERING:
842		if (code != ICMP6_ROUTER_RENUMBERING_COMMAND &&
843		    code != ICMP6_ROUTER_RENUMBERING_RESULT)
844			goto badcode;
845		if (icmp6len < sizeof(struct icmp6_router_renum))
846			goto badlen;
847		break;
848
849	default:
850		nd6log((LOG_DEBUG,
851		    "icmp6_input: unknown type %d(src=%s, dst=%s, ifid=%d)\n",
852		    icmp6->icmp6_type, ip6_sprintf(&ip6->ip6_src),
853		    ip6_sprintf(&ip6->ip6_dst),
854		    m->m_pkthdr.rcvif ? m->m_pkthdr.rcvif->if_index : 0));
855		if (icmp6->icmp6_type < ICMP6_ECHO_REQUEST) {
856			/* ICMPv6 error: MUST deliver it by spec... */
857			code = PRC_NCMDS;
858			/* deliver */
859		} else {
860			/* ICMPv6 informational: MUST not deliver */
861			break;
862		}
863	deliver:
864		if (icmp6_notify_error(m, off, icmp6len, code)) {
865			/* In this case, m should've been freed. */
866			return (IPPROTO_DONE);
867		}
868		break;
869
870	badcode:
871		icmp6stat.icp6s_badcode++;
872		break;
873
874	badlen:
875		icmp6stat.icp6s_badlen++;
876		break;
877	}
878
879	/* deliver the packet to appropriate sockets */
880	icmp6_rip6_input(&m, *offp);
881
882	return IPPROTO_DONE;
883
884 freeit:
885	m_freem(m);
886	return IPPROTO_DONE;
887}
888
889static int
890icmp6_notify_error(m, off, icmp6len, code)
891	struct mbuf *m;
892	int off, icmp6len;
893{
894	struct icmp6_hdr *icmp6;
895	struct ip6_hdr *eip6;
896	u_int32_t notifymtu;
897	struct sockaddr_in6 icmp6src, icmp6dst;
898
899	if (icmp6len < sizeof(struct icmp6_hdr) + sizeof(struct ip6_hdr)) {
900		icmp6stat.icp6s_tooshort++;
901		goto freeit;
902	}
903	IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off,
904		       sizeof(*icmp6) + sizeof(struct ip6_hdr));
905	if (icmp6 == NULL) {
906		icmp6stat.icp6s_tooshort++;
907		return (-1);
908	}
909	eip6 = (struct ip6_hdr *)(icmp6 + 1);
910
911	/* Detect the upper level protocol */
912	{
913		void (*ctlfunc) __P((int, struct sockaddr *, void *));
914		u_int8_t nxt = eip6->ip6_nxt;
915		int eoff = off + sizeof(struct icmp6_hdr) +
916			sizeof(struct ip6_hdr);
917		struct ip6ctlparam ip6cp;
918		struct in6_addr *finaldst = NULL;
919		int icmp6type = icmp6->icmp6_type;
920		struct ip6_frag *fh;
921		struct ip6_rthdr *rth;
922		struct ip6_rthdr0 *rth0;
923		int rthlen;
924
925		while (1) { /* XXX: should avoid infinite loop explicitly? */
926			struct ip6_ext *eh;
927
928			switch (nxt) {
929			case IPPROTO_HOPOPTS:
930			case IPPROTO_DSTOPTS:
931			case IPPROTO_AH:
932				IP6_EXTHDR_GET(eh, struct ip6_ext *, m,
933					       eoff, sizeof(*eh));
934				if (eh == NULL) {
935					icmp6stat.icp6s_tooshort++;
936					return (-1);
937				}
938
939				if (nxt == IPPROTO_AH)
940					eoff += (eh->ip6e_len + 2) << 2;
941				else
942					eoff += (eh->ip6e_len + 1) << 3;
943				nxt = eh->ip6e_nxt;
944				break;
945			case IPPROTO_ROUTING:
946				/*
947				 * When the erroneous packet contains a
948				 * routing header, we should examine the
949				 * header to determine the final destination.
950				 * Otherwise, we can't properly update
951				 * information that depends on the final
952				 * destination (e.g. path MTU).
953				 */
954				IP6_EXTHDR_GET(rth, struct ip6_rthdr *, m,
955					       eoff, sizeof(*rth));
956				if (rth == NULL) {
957					icmp6stat.icp6s_tooshort++;
958					return (-1);
959				}
960				rthlen = (rth->ip6r_len + 1) << 3;
961				/*
962				 * XXX: currently there is no
963				 * officially defined type other
964				 * than type-0.
965				 * Note that if the segment left field
966				 * is 0, all intermediate hops must
967				 * have been passed.
968				 */
969				if (rth->ip6r_segleft &&
970				    rth->ip6r_type == IPV6_RTHDR_TYPE_0) {
971					int hops;
972
973					IP6_EXTHDR_GET(rth0,
974						       struct ip6_rthdr0 *, m,
975						       eoff, rthlen);
976					if (rth0 == NULL) {
977						icmp6stat.icp6s_tooshort++;
978						return (-1);
979					}
980					/* just ignore a bogus header */
981					if ((rth0->ip6r0_len % 2) == 0 &&
982					    (hops = rth0->ip6r0_len/2))
983						finaldst = (struct in6_addr *)(rth0 + 1) + (hops - 1);
984				}
985				eoff += rthlen;
986				nxt = rth->ip6r_nxt;
987				break;
988			case IPPROTO_FRAGMENT:
989				IP6_EXTHDR_GET(fh, struct ip6_frag *, m,
990					       eoff, sizeof(*fh));
991				if (fh == NULL) {
992					icmp6stat.icp6s_tooshort++;
993					return (-1);
994				}
995				/*
996				 * Data after a fragment header is meaningless
997				 * unless it is the first fragment, but
998				 * we'll go to the notify label for path MTU
999				 * discovery.
1000				 */
1001				if (fh->ip6f_offlg & IP6F_OFF_MASK)
1002					goto notify;
1003
1004				eoff += sizeof(struct ip6_frag);
1005				nxt = fh->ip6f_nxt;
1006				break;
1007			default:
1008				/*
1009				 * This case includes ESP and the No Next
1010				 * Header.  In such cases going to the notify
1011				 * label does not have any meaning
1012				 * (i.e. ctlfunc will be NULL), but we go
1013				 * anyway since we might have to update
1014				 * path MTU information.
1015				 */
1016				goto notify;
1017			}
1018		}
1019	  notify:
1020		IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off,
1021			       sizeof(*icmp6) + sizeof(struct ip6_hdr));
1022		if (icmp6 == NULL) {
1023			icmp6stat.icp6s_tooshort++;
1024			return (-1);
1025		}
1026
1027		eip6 = (struct ip6_hdr *)(icmp6 + 1);
1028		bzero(&icmp6dst, sizeof(icmp6dst));
1029		icmp6dst.sin6_len = sizeof(struct sockaddr_in6);
1030		icmp6dst.sin6_family = AF_INET6;
1031		if (finaldst == NULL)
1032			icmp6dst.sin6_addr = eip6->ip6_dst;
1033		else
1034			icmp6dst.sin6_addr = *finaldst;
1035		icmp6dst.sin6_scope_id = in6_addr2scopeid(m->m_pkthdr.rcvif,
1036							  &icmp6dst.sin6_addr);
1037#ifndef SCOPEDROUTING
1038		if (in6_embedscope(&icmp6dst.sin6_addr, &icmp6dst,
1039				   NULL, NULL)) {
1040			/* should be impossbile */
1041			nd6log((LOG_DEBUG,
1042			    "icmp6_notify_error: in6_embedscope failed\n"));
1043			goto freeit;
1044		}
1045#endif
1046
1047		/*
1048		 * retrieve parameters from the inner IPv6 header, and convert
1049		 * them into sockaddr structures.
1050		 */
1051		bzero(&icmp6src, sizeof(icmp6src));
1052		icmp6src.sin6_len = sizeof(struct sockaddr_in6);
1053		icmp6src.sin6_family = AF_INET6;
1054		icmp6src.sin6_addr = eip6->ip6_src;
1055		icmp6src.sin6_scope_id = in6_addr2scopeid(m->m_pkthdr.rcvif,
1056							  &icmp6src.sin6_addr);
1057#ifndef SCOPEDROUTING
1058		if (in6_embedscope(&icmp6src.sin6_addr, &icmp6src,
1059				   NULL, NULL)) {
1060			/* should be impossbile */
1061			nd6log((LOG_DEBUG,
1062			    "icmp6_notify_error: in6_embedscope failed\n"));
1063			goto freeit;
1064		}
1065#endif
1066		icmp6src.sin6_flowinfo =
1067			(eip6->ip6_flow & IPV6_FLOWLABEL_MASK);
1068
1069		if (finaldst == NULL)
1070			finaldst = &eip6->ip6_dst;
1071		ip6cp.ip6c_m = m;
1072		ip6cp.ip6c_icmp6 = icmp6;
1073		ip6cp.ip6c_ip6 = (struct ip6_hdr *)(icmp6 + 1);
1074		ip6cp.ip6c_off = eoff;
1075		ip6cp.ip6c_finaldst = finaldst;
1076		ip6cp.ip6c_src = &icmp6src;
1077		ip6cp.ip6c_nxt = nxt;
1078
1079		if (icmp6type == ICMP6_PACKET_TOO_BIG) {
1080			notifymtu = ntohl(icmp6->icmp6_mtu);
1081			ip6cp.ip6c_cmdarg = (void *)&notifymtu;
1082		}
1083
1084		ctlfunc = (void (*) __P((int, struct sockaddr *, void *)))
1085			(inet6sw[ip6_protox[nxt]].pr_ctlinput);
1086		if (ctlfunc) {
1087			(void) (*ctlfunc)(code, (struct sockaddr *)&icmp6dst,
1088					  &ip6cp);
1089		}
1090	}
1091	return (0);
1092
1093  freeit:
1094	m_freem(m);
1095	return (-1);
1096}
1097
1098void
1099icmp6_mtudisc_update(ip6cp, validated)
1100	struct ip6ctlparam *ip6cp;
1101	int validated;
1102{
1103	unsigned long rtcount;
1104	struct icmp6_mtudisc_callback *mc;
1105	struct in6_addr *dst = ip6cp->ip6c_finaldst;
1106	struct icmp6_hdr *icmp6 = ip6cp->ip6c_icmp6;
1107	struct mbuf *m = ip6cp->ip6c_m;	/* will be necessary for scope issue */
1108	u_int mtu = ntohl(icmp6->icmp6_mtu);
1109	struct rtentry *rt = NULL;
1110	struct sockaddr_in6 sin6;
1111
1112	/*
1113	 * allow non-validated cases if memory is plenty, to make traffic
1114	 * from non-connected pcb happy.
1115	 */
1116	rtcount = rt_timer_count(icmp6_mtudisc_timeout_q);
1117	if (validated) {
1118		if (0 <= icmp6_mtudisc_hiwat && rtcount > icmp6_mtudisc_hiwat)
1119			return;
1120		else if (0 <= icmp6_mtudisc_lowat &&
1121		    rtcount > icmp6_mtudisc_lowat) {
1122			/*
1123			 * XXX nuke a victim, install the new one.
1124			 */
1125		}
1126	} else {
1127		if (0 <= icmp6_mtudisc_lowat && rtcount > icmp6_mtudisc_lowat)
1128			return;
1129	}
1130
1131	bzero(&sin6, sizeof(sin6));
1132	sin6.sin6_family = PF_INET6;
1133	sin6.sin6_len = sizeof(struct sockaddr_in6);
1134	sin6.sin6_addr = *dst;
1135	/* XXX normally, this won't happen */
1136	if (IN6_IS_ADDR_LINKLOCAL(dst)) {
1137		sin6.sin6_addr.s6_addr16[1] =
1138		    htons(m->m_pkthdr.rcvif->if_index);
1139	}
1140	/* sin6.sin6_scope_id = XXX: should be set if DST is a scoped addr */
1141	rt = icmp6_mtudisc_clone((struct sockaddr *)&sin6);
1142
1143	if (rt && (rt->rt_flags & RTF_HOST) &&
1144	    !(rt->rt_rmx.rmx_locks & RTV_MTU) &&
1145	    (rt->rt_rmx.rmx_mtu > mtu || rt->rt_rmx.rmx_mtu == 0)) {
1146		if (mtu < IN6_LINKMTU(rt->rt_ifp)) {
1147			icmp6stat.icp6s_pmtuchg++;
1148			rt->rt_rmx.rmx_mtu = mtu;
1149		}
1150	}
1151	if (rt) { /* XXX: need braces to avoid conflict with else in RTFREE. */
1152		RTFREE(rt);
1153	}
1154
1155	/*
1156	 * Notify protocols that the MTU for this destination
1157	 * has changed.
1158	 */
1159	for (mc = LIST_FIRST(&icmp6_mtudisc_callbacks); mc != NULL;
1160	     mc = LIST_NEXT(mc, mc_list))
1161		(*mc->mc_func)(&sin6.sin6_addr);
1162}
1163
1164/*
1165 * Process a Node Information Query packet, based on
1166 * draft-ietf-ipngwg-icmp-name-lookups-07.
1167 *
1168 * Spec incompatibilities:
1169 * - IPv6 Subject address handling
1170 * - IPv4 Subject address handling support missing
1171 * - Proxy reply (answer even if it's not for me)
1172 * - joins NI group address at in6_ifattach() time only, does not cope
1173 *   with hostname changes by sethostname(3)
1174 */
1175#ifndef offsetof		/* XXX */
1176#define	offsetof(type, member)	((size_t)(&((type *)0)->member))
1177#endif
1178static struct mbuf *
1179ni6_input(m, off)
1180	struct mbuf *m;
1181	int off;
1182{
1183	struct icmp6_nodeinfo *ni6, *nni6;
1184	struct mbuf *n = NULL;
1185	u_int16_t qtype;
1186	int subjlen;
1187	int replylen = sizeof(struct ip6_hdr) + sizeof(struct icmp6_nodeinfo);
1188	struct ni_reply_fqdn *fqdn;
1189	int addrs;		/* for NI_QTYPE_NODEADDR */
1190	struct ifnet *ifp = NULL; /* for NI_QTYPE_NODEADDR */
1191	struct sockaddr_in6 sin6; /* double meaning; ip6_dst and subjectaddr */
1192	struct ip6_hdr *ip6;
1193	int oldfqdn = 0;	/* if 1, return pascal string (03 draft) */
1194	char *subj = NULL;
1195
1196	ip6 = mtod(m, struct ip6_hdr *);
1197	IP6_EXTHDR_GET(ni6, struct icmp6_nodeinfo *, m, off, sizeof(*ni6));
1198	if (ni6 == NULL) {
1199		/* m is already reclaimed */
1200		return NULL;
1201	}
1202
1203	/*
1204	 * Validate IPv6 destination address.
1205	 *
1206	 * The Responder must discard the Query without further processing
1207	 * unless it is one of the Responder's unicast or anycast addresses, or
1208	 * a link-local scope multicast address which the Responder has joined.
1209	 * [icmp-name-lookups-07, Section 4.]
1210	 */
1211	bzero(&sin6, sizeof(sin6));
1212	sin6.sin6_family = AF_INET6;
1213	sin6.sin6_len = sizeof(struct sockaddr_in6);
1214	bcopy(&ip6->ip6_dst, &sin6.sin6_addr, sizeof(sin6.sin6_addr));
1215	/* XXX scopeid */
1216	if (ifa_ifwithaddr((struct sockaddr *)&sin6))
1217		; /* unicast/anycast, fine */
1218	else if (IN6_IS_ADDR_MC_LINKLOCAL(&sin6.sin6_addr))
1219		; /* link-local multicast, fine */
1220	else
1221		goto bad;
1222
1223	/* validate query Subject field. */
1224	qtype = ntohs(ni6->ni_qtype);
1225	subjlen = m->m_pkthdr.len - off - sizeof(struct icmp6_nodeinfo);
1226	switch (qtype) {
1227	case NI_QTYPE_NOOP:
1228	case NI_QTYPE_SUPTYPES:
1229		/* 07 draft */
1230		if (ni6->ni_code == ICMP6_NI_SUBJ_FQDN && subjlen == 0)
1231			break;
1232		/* FALLTHROUGH */
1233	case NI_QTYPE_FQDN:
1234	case NI_QTYPE_NODEADDR:
1235		switch (ni6->ni_code) {
1236		case ICMP6_NI_SUBJ_IPV6:
1237#if ICMP6_NI_SUBJ_IPV6 != 0
1238		case 0:
1239#endif
1240			/*
1241			 * backward compatibility - try to accept 03 draft
1242			 * format, where no Subject is present.
1243			 */
1244			if (qtype == NI_QTYPE_FQDN && ni6->ni_code == 0 &&
1245			    subjlen == 0) {
1246				oldfqdn++;
1247				break;
1248			}
1249#if ICMP6_NI_SUBJ_IPV6 != 0
1250			if (ni6->ni_code != ICMP6_NI_SUBJ_IPV6)
1251				goto bad;
1252#endif
1253
1254			if (subjlen != sizeof(sin6.sin6_addr))
1255				goto bad;
1256
1257			/*
1258			 * Validate Subject address.
1259			 *
1260			 * Not sure what exactly "address belongs to the node"
1261			 * means in the spec, is it just unicast, or what?
1262			 *
1263			 * At this moment we consider Subject address as
1264			 * "belong to the node" if the Subject address equals
1265			 * to the IPv6 destination address; validation for
1266			 * IPv6 destination address should have done enough
1267			 * check for us.
1268			 *
1269			 * We do not do proxy at this moment.
1270			 */
1271			/* m_pulldown instead of copy? */
1272			m_copydata(m, off + sizeof(struct icmp6_nodeinfo),
1273			    subjlen, (caddr_t)&sin6.sin6_addr);
1274			/* XXX kame scope hack */
1275			if (IN6_IS_SCOPE_LINKLOCAL(&sin6.sin6_addr)) {
1276				if ((m->m_flags & M_PKTHDR) != 0 &&
1277				    m->m_pkthdr.rcvif) {
1278					sin6.sin6_addr.s6_addr16[1] =
1279					    htons(m->m_pkthdr.rcvif->if_index);
1280				}
1281			}
1282			subj = (char *)&sin6;
1283			if (IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &sin6.sin6_addr))
1284				break;
1285
1286			/*
1287			 * XXX if we are to allow other cases, we should really
1288			 * be careful about scope here.
1289			 * basically, we should disallow queries toward IPv6
1290			 * destination X with subject Y, if scope(X) > scope(Y).
1291			 * if we allow scope(X) > scope(Y), it will result in
1292			 * information leakage across scope boundary.
1293			 */
1294			goto bad;
1295
1296		case ICMP6_NI_SUBJ_FQDN:
1297			/*
1298			 * Validate Subject name with gethostname(3).
1299			 *
1300			 * The behavior may need some debate, since:
1301			 * - we are not sure if the node has FQDN as
1302			 *   hostname (returned by gethostname(3)).
1303			 * - the code does wildcard match for truncated names.
1304			 *   however, we are not sure if we want to perform
1305			 *   wildcard match, if gethostname(3) side has
1306			 *   truncated hostname.
1307			 */
1308			n = ni6_nametodns(hostname, hostnamelen, 0);
1309			if (!n || n->m_next || n->m_len == 0)
1310				goto bad;
1311			IP6_EXTHDR_GET(subj, char *, m,
1312			    off + sizeof(struct icmp6_nodeinfo), subjlen);
1313			if (subj == NULL)
1314				goto bad;
1315			if (!ni6_dnsmatch(subj, subjlen, mtod(n, const char *),
1316					n->m_len)) {
1317				goto bad;
1318			}
1319			m_freem(n);
1320			n = NULL;
1321			break;
1322
1323		case ICMP6_NI_SUBJ_IPV4:	/* XXX: to be implemented? */
1324		default:
1325			goto bad;
1326		}
1327		break;
1328	}
1329
1330	/* refuse based on configuration.  XXX ICMP6_NI_REFUSED? */
1331	switch (qtype) {
1332	case NI_QTYPE_FQDN:
1333		if ((icmp6_nodeinfo & 1) == 0)
1334			goto bad;
1335		break;
1336	case NI_QTYPE_NODEADDR:
1337		if ((icmp6_nodeinfo & 2) == 0)
1338			goto bad;
1339		break;
1340	}
1341
1342	/* guess reply length */
1343	switch (qtype) {
1344	case NI_QTYPE_NOOP:
1345		break;		/* no reply data */
1346	case NI_QTYPE_SUPTYPES:
1347		replylen += sizeof(u_int32_t);
1348		break;
1349	case NI_QTYPE_FQDN:
1350		/* XXX will append an mbuf */
1351		replylen += offsetof(struct ni_reply_fqdn, ni_fqdn_namelen);
1352		break;
1353	case NI_QTYPE_NODEADDR:
1354		addrs = ni6_addrs(ni6, m, &ifp, subj);
1355		if ((replylen += addrs * (sizeof(struct in6_addr) +
1356					  sizeof(u_int32_t))) > MCLBYTES)
1357			replylen = MCLBYTES; /* XXX: will truncate pkt later */
1358		break;
1359	default:
1360		/*
1361		 * XXX: We must return a reply with the ICMP6 code
1362		 * `unknown Qtype' in this case.  However we regard the case
1363		 * as an FQDN query for backward compatibility.
1364		 * Older versions set a random value to this field,
1365		 * so it rarely varies in the defined qtypes.
1366		 * But the mechanism is not reliable...
1367		 * maybe we should obsolete older versions.
1368		 */
1369		qtype = NI_QTYPE_FQDN;
1370		/* XXX will append an mbuf */
1371		replylen += offsetof(struct ni_reply_fqdn, ni_fqdn_namelen);
1372		oldfqdn++;
1373		break;
1374	}
1375
1376	/* allocate an mbuf to reply. */
1377	MGETHDR(n, M_DONTWAIT, m->m_type);
1378	if (n == NULL) {
1379		m_freem(m);
1380		return (NULL);
1381	}
1382	M_COPY_PKTHDR(n, m); /* just for rcvif */
1383	if (replylen > MHLEN) {
1384		if (replylen > MCLBYTES) {
1385			/*
1386			 * XXX: should we try to allocate more? But MCLBYTES
1387			 * is probably much larger than IPV6_MMTU...
1388			 */
1389			goto bad;
1390		}
1391		MCLGET(n, M_DONTWAIT);
1392		if ((n->m_flags & M_EXT) == 0) {
1393			goto bad;
1394		}
1395	}
1396	n->m_pkthdr.len = n->m_len = replylen;
1397
1398	/* copy mbuf header and IPv6 + Node Information base headers */
1399	bcopy(mtod(m, caddr_t), mtod(n, caddr_t), sizeof(struct ip6_hdr));
1400	nni6 = (struct icmp6_nodeinfo *)(mtod(n, struct ip6_hdr *) + 1);
1401	bcopy((caddr_t)ni6, (caddr_t)nni6, sizeof(struct icmp6_nodeinfo));
1402
1403	/* qtype dependent procedure */
1404	switch (qtype) {
1405	case NI_QTYPE_NOOP:
1406		nni6->ni_code = ICMP6_NI_SUCCESS;
1407		nni6->ni_flags = 0;
1408		break;
1409	case NI_QTYPE_SUPTYPES:
1410	{
1411		u_int32_t v;
1412		nni6->ni_code = ICMP6_NI_SUCCESS;
1413		nni6->ni_flags = htons(0x0000);	/* raw bitmap */
1414		/* supports NOOP, SUPTYPES, FQDN, and NODEADDR */
1415		v = (u_int32_t)htonl(0x0000000f);
1416		bcopy(&v, nni6 + 1, sizeof(u_int32_t));
1417		break;
1418	}
1419	case NI_QTYPE_FQDN:
1420		nni6->ni_code = ICMP6_NI_SUCCESS;
1421		fqdn = (struct ni_reply_fqdn *)(mtod(n, caddr_t) +
1422						sizeof(struct ip6_hdr) +
1423						sizeof(struct icmp6_nodeinfo));
1424		nni6->ni_flags = 0; /* XXX: meaningless TTL */
1425		fqdn->ni_fqdn_ttl = 0;	/* ditto. */
1426		/*
1427		 * XXX do we really have FQDN in variable "hostname"?
1428		 */
1429		n->m_next = ni6_nametodns(hostname, hostnamelen, oldfqdn);
1430		if (n->m_next == NULL)
1431			goto bad;
1432		/* XXX we assume that n->m_next is not a chain */
1433		if (n->m_next->m_next != NULL)
1434			goto bad;
1435		n->m_pkthdr.len += n->m_next->m_len;
1436		break;
1437	case NI_QTYPE_NODEADDR:
1438	{
1439		int lenlim, copied;
1440
1441		nni6->ni_code = ICMP6_NI_SUCCESS;
1442		n->m_pkthdr.len = n->m_len =
1443		    sizeof(struct ip6_hdr) + sizeof(struct icmp6_nodeinfo);
1444		lenlim = M_TRAILINGSPACE(n);
1445		copied = ni6_store_addrs(ni6, nni6, ifp, lenlim);
1446		/* XXX: reset mbuf length */
1447		n->m_pkthdr.len = n->m_len = sizeof(struct ip6_hdr) +
1448			sizeof(struct icmp6_nodeinfo) + copied;
1449		break;
1450	}
1451	default:
1452		break;		/* XXX impossible! */
1453	}
1454
1455	nni6->ni_type = ICMP6_NI_REPLY;
1456	m_freem(m);
1457	return (n);
1458
1459  bad:
1460	m_freem(m);
1461	if (n)
1462		m_freem(n);
1463	return (NULL);
1464}
1465#undef hostnamelen
1466
1467#define isupper(x) ('A' <= (x) && (x) <= 'Z')
1468#define isalpha(x) (('A' <= (x) && (x) <= 'Z') || ('a' <= (x) && (x) <= 'z'))
1469#define isalnum(x) (isalpha(x) || ('0' <= (x) && (x) <= '9'))
1470#define tolower(x) (isupper(x) ? (x) + 'a' - 'A' : (x))
1471
1472/*
1473 * make a mbuf with DNS-encoded string.  no compression support.
1474 *
1475 * XXX names with less than 2 dots (like "foo" or "foo.section") will be
1476 * treated as truncated name (two \0 at the end).  this is a wild guess.
1477 */
1478static struct mbuf *
1479ni6_nametodns(name, namelen, old)
1480	const char *name;
1481	int namelen;
1482	int old;	/* return pascal string if non-zero */
1483{
1484	struct mbuf *m;
1485	char *cp, *ep;
1486	const char *p, *q;
1487	int i, len, nterm;
1488
1489	if (old)
1490		len = namelen + 1;
1491	else
1492		len = MCLBYTES;
1493
1494	/* because MAXHOSTNAMELEN is usually 256, we use cluster mbuf */
1495	MGET(m, M_DONTWAIT, MT_DATA);
1496	if (m && len > MLEN) {
1497		MCLGET(m, M_DONTWAIT);
1498		if ((m->m_flags & M_EXT) == 0)
1499			goto fail;
1500	}
1501	if (!m)
1502		goto fail;
1503	m->m_next = NULL;
1504
1505	if (old) {
1506		m->m_len = len;
1507		*mtod(m, char *) = namelen;
1508		bcopy(name, mtod(m, char *) + 1, namelen);
1509		return m;
1510	} else {
1511		m->m_len = 0;
1512		cp = mtod(m, char *);
1513		ep = mtod(m, char *) + M_TRAILINGSPACE(m);
1514
1515		/* if not certain about my name, return empty buffer */
1516		if (namelen == 0)
1517			return m;
1518
1519		/*
1520		 * guess if it looks like shortened hostname, or FQDN.
1521		 * shortened hostname needs two trailing "\0".
1522		 */
1523		i = 0;
1524		for (p = name; p < name + namelen; p++) {
1525			if (*p && *p == '.')
1526				i++;
1527		}
1528		if (i < 2)
1529			nterm = 2;
1530		else
1531			nterm = 1;
1532
1533		p = name;
1534		while (cp < ep && p < name + namelen) {
1535			i = 0;
1536			for (q = p; q < name + namelen && *q && *q != '.'; q++)
1537				i++;
1538			/* result does not fit into mbuf */
1539			if (cp + i + 1 >= ep)
1540				goto fail;
1541			/*
1542			 * DNS label length restriction, RFC1035 page 8.
1543			 * "i == 0" case is included here to avoid returning
1544			 * 0-length label on "foo..bar".
1545			 */
1546			if (i <= 0 || i >= 64)
1547				goto fail;
1548			*cp++ = i;
1549			if (!isalpha(p[0]) || !isalnum(p[i - 1]))
1550				goto fail;
1551			while (i > 0) {
1552				if (!isalnum(*p) && *p != '-')
1553					goto fail;
1554				if (isupper(*p)) {
1555					*cp++ = tolower(*p);
1556					p++;
1557				} else
1558					*cp++ = *p++;
1559				i--;
1560			}
1561			p = q;
1562			if (p < name + namelen && *p == '.')
1563				p++;
1564		}
1565		/* termination */
1566		if (cp + nterm >= ep)
1567			goto fail;
1568		while (nterm-- > 0)
1569			*cp++ = '\0';
1570		m->m_len = cp - mtod(m, char *);
1571		return m;
1572	}
1573
1574	panic("should not reach here");
1575	/* NOTREACHED */
1576
1577 fail:
1578	if (m)
1579		m_freem(m);
1580	return NULL;
1581}
1582
1583/*
1584 * check if two DNS-encoded string matches.  takes care of truncated
1585 * form (with \0\0 at the end).  no compression support.
1586 * XXX upper/lowercase match (see RFC2065)
1587 */
1588static int
1589ni6_dnsmatch(a, alen, b, blen)
1590	const char *a;
1591	int alen;
1592	const char *b;
1593	int blen;
1594{
1595	const char *a0, *b0;
1596	int l;
1597
1598	/* simplest case - need validation? */
1599	if (alen == blen && bcmp(a, b, alen) == 0)
1600		return 1;
1601
1602	a0 = a;
1603	b0 = b;
1604
1605	/* termination is mandatory */
1606	if (alen < 2 || blen < 2)
1607		return 0;
1608	if (a0[alen - 1] != '\0' || b0[blen - 1] != '\0')
1609		return 0;
1610	alen--;
1611	blen--;
1612
1613	while (a - a0 < alen && b - b0 < blen) {
1614		if (a - a0 + 1 > alen || b - b0 + 1 > blen)
1615			return 0;
1616
1617		if ((signed char)a[0] < 0 || (signed char)b[0] < 0)
1618			return 0;
1619		/* we don't support compression yet */
1620		if (a[0] >= 64 || b[0] >= 64)
1621			return 0;
1622
1623		/* truncated case */
1624		if (a[0] == 0 && a - a0 == alen - 1)
1625			return 1;
1626		if (b[0] == 0 && b - b0 == blen - 1)
1627			return 1;
1628		if (a[0] == 0 || b[0] == 0)
1629			return 0;
1630
1631		if (a[0] != b[0])
1632			return 0;
1633		l = a[0];
1634		if (a - a0 + 1 + l > alen || b - b0 + 1 + l > blen)
1635			return 0;
1636		if (bcmp(a + 1, b + 1, l) != 0)
1637			return 0;
1638
1639		a += 1 + l;
1640		b += 1 + l;
1641	}
1642
1643	if (a - a0 == alen && b - b0 == blen)
1644		return 1;
1645	else
1646		return 0;
1647}
1648
1649/*
1650 * calculate the number of addresses to be returned in the node info reply.
1651 */
1652static int
1653ni6_addrs(ni6, m, ifpp, subj)
1654	struct icmp6_nodeinfo *ni6;
1655	struct mbuf *m;
1656	struct ifnet **ifpp;
1657	char *subj;
1658{
1659	struct ifnet *ifp;
1660	struct in6_ifaddr *ifa6;
1661	struct ifaddr *ifa;
1662	struct sockaddr_in6 *subj_ip6 = NULL; /* XXX pedant */
1663	int addrs = 0, addrsofif, iffound = 0;
1664	int niflags = ni6->ni_flags;
1665
1666	if ((niflags & NI_NODEADDR_FLAG_ALL) == 0) {
1667		switch (ni6->ni_code) {
1668		case ICMP6_NI_SUBJ_IPV6:
1669			if (subj == NULL) /* must be impossible... */
1670				return (0);
1671			subj_ip6 = (struct sockaddr_in6 *)subj;
1672			break;
1673		default:
1674			/*
1675			 * XXX: we only support IPv6 subject address for
1676			 * this Qtype.
1677			 */
1678			return (0);
1679		}
1680	}
1681
1682	for (ifp = TAILQ_FIRST(&ifnet); ifp; ifp = TAILQ_NEXT(ifp, if_list))
1683	{
1684		addrsofif = 0;
1685		for (ifa = ifp->if_addrlist.tqh_first; ifa;
1686		     ifa = ifa->ifa_list.tqe_next)
1687		{
1688			if (ifa->ifa_addr->sa_family != AF_INET6)
1689				continue;
1690			ifa6 = (struct in6_ifaddr *)ifa;
1691
1692			if ((niflags & NI_NODEADDR_FLAG_ALL) == 0 &&
1693			    IN6_ARE_ADDR_EQUAL(&subj_ip6->sin6_addr,
1694					       &ifa6->ia_addr.sin6_addr))
1695				iffound = 1;
1696
1697			/*
1698			 * IPv4-mapped addresses can only be returned by a
1699			 * Node Information proxy, since they represent
1700			 * addresses of IPv4-only nodes, which perforce do
1701			 * not implement this protocol.
1702			 * [icmp-name-lookups-07, Section 5.4]
1703			 * So we don't support NI_NODEADDR_FLAG_COMPAT in
1704			 * this function at this moment.
1705			 */
1706
1707			/* What do we have to do about ::1? */
1708			switch (in6_addrscope(&ifa6->ia_addr.sin6_addr)) {
1709			case IPV6_ADDR_SCOPE_LINKLOCAL:
1710				if ((niflags & NI_NODEADDR_FLAG_LINKLOCAL) == 0)
1711					continue;
1712				break;
1713			case IPV6_ADDR_SCOPE_SITELOCAL:
1714				if ((niflags & NI_NODEADDR_FLAG_SITELOCAL) == 0)
1715					continue;
1716				break;
1717			case IPV6_ADDR_SCOPE_GLOBAL:
1718				if ((niflags & NI_NODEADDR_FLAG_GLOBAL) == 0)
1719					continue;
1720				break;
1721			default:
1722				continue;
1723			}
1724
1725			/*
1726			 * check if anycast is okay.
1727			 * XXX: just experimental.  not in the spec.
1728			 */
1729			if ((ifa6->ia6_flags & IN6_IFF_ANYCAST) != 0 &&
1730			    (niflags & NI_NODEADDR_FLAG_ANYCAST) == 0)
1731				continue; /* we need only unicast addresses */
1732
1733			addrsofif++; /* count the address */
1734		}
1735		if (iffound) {
1736			*ifpp = ifp;
1737			return (addrsofif);
1738		}
1739
1740		addrs += addrsofif;
1741	}
1742
1743	return (addrs);
1744}
1745
1746static int
1747ni6_store_addrs(ni6, nni6, ifp0, resid)
1748	struct icmp6_nodeinfo *ni6, *nni6;
1749	struct ifnet *ifp0;
1750	int resid;
1751{
1752	struct ifnet *ifp = ifp0 ? ifp0 : TAILQ_FIRST(&ifnet);
1753	struct in6_ifaddr *ifa6;
1754	struct ifaddr *ifa;
1755	struct ifnet *ifp_dep = NULL;
1756	int copied = 0, allow_deprecated = 0;
1757	u_char *cp = (u_char *)(nni6 + 1);
1758	int niflags = ni6->ni_flags;
1759	u_int32_t ltime;
1760
1761	if (ifp0 == NULL && !(niflags & NI_NODEADDR_FLAG_ALL))
1762		return (0);	/* needless to copy */
1763
1764  again:
1765
1766	for (; ifp; ifp = TAILQ_NEXT(ifp, if_list))
1767	{
1768		for (ifa = ifp->if_addrlist.tqh_first; ifa;
1769		     ifa = ifa->ifa_list.tqe_next)
1770		{
1771			if (ifa->ifa_addr->sa_family != AF_INET6)
1772				continue;
1773			ifa6 = (struct in6_ifaddr *)ifa;
1774
1775			if ((ifa6->ia6_flags & IN6_IFF_DEPRECATED) != 0 &&
1776			    allow_deprecated == 0) {
1777				/*
1778				 * prefererred address should be put before
1779				 * deprecated addresses.
1780				 */
1781
1782				/* record the interface for later search */
1783				if (ifp_dep == NULL)
1784					ifp_dep = ifp;
1785
1786				continue;
1787			}
1788			else if ((ifa6->ia6_flags & IN6_IFF_DEPRECATED) == 0 &&
1789				 allow_deprecated != 0)
1790				continue; /* we now collect deprecated addrs */
1791
1792			/* What do we have to do about ::1? */
1793			switch (in6_addrscope(&ifa6->ia_addr.sin6_addr)) {
1794			case IPV6_ADDR_SCOPE_LINKLOCAL:
1795				if ((niflags & NI_NODEADDR_FLAG_LINKLOCAL) == 0)
1796					continue;
1797				break;
1798			case IPV6_ADDR_SCOPE_SITELOCAL:
1799				if ((niflags & NI_NODEADDR_FLAG_SITELOCAL) == 0)
1800					continue;
1801				break;
1802			case IPV6_ADDR_SCOPE_GLOBAL:
1803				if ((niflags & NI_NODEADDR_FLAG_GLOBAL) == 0)
1804					continue;
1805				break;
1806			default:
1807				continue;
1808			}
1809
1810			/*
1811			 * check if anycast is okay.
1812			 * XXX: just experimental.  not in the spec.
1813			 */
1814			if ((ifa6->ia6_flags & IN6_IFF_ANYCAST) != 0 &&
1815			    (niflags & NI_NODEADDR_FLAG_ANYCAST) == 0)
1816				continue;
1817
1818			/* now we can copy the address */
1819			if (resid < sizeof(struct in6_addr) +
1820			    sizeof(u_int32_t)) {
1821				/*
1822				 * We give up much more copy.
1823				 * Set the truncate flag and return.
1824				 */
1825				nni6->ni_flags |=
1826					NI_NODEADDR_FLAG_TRUNCATE;
1827				return (copied);
1828			}
1829
1830			/*
1831			 * Set the TTL of the address.
1832			 * The TTL value should be one of the following
1833			 * according to the specification:
1834			 *
1835			 * 1. The remaining lifetime of a DHCP lease on the
1836			 *    address, or
1837			 * 2. The remaining Valid Lifetime of a prefix from
1838			 *    which the address was derived through Stateless
1839			 *    Autoconfiguration.
1840			 *
1841			 * Note that we currently do not support stateful
1842			 * address configuration by DHCPv6, so the former
1843			 * case can't happen.
1844			 *
1845			 * TTL must be 2^31 > TTL >= 0.
1846			 */
1847			if (ifa6->ia6_lifetime.ia6t_expire == 0)
1848				ltime = ND6_INFINITE_LIFETIME;
1849			else {
1850				if (ifa6->ia6_lifetime.ia6t_expire >
1851				    time.tv_sec)
1852					ltime = ifa6->ia6_lifetime.ia6t_expire - time.tv_sec;
1853				else
1854					ltime = 0;
1855			}
1856			if (ltime > 0x7fffffff)
1857				ltime = 0x7fffffff;
1858			ltime = htonl(ltime);
1859
1860			bcopy(&ltime, cp, sizeof(u_int32_t));
1861			cp += sizeof(u_int32_t);
1862
1863			/* copy the address itself */
1864			bcopy(&ifa6->ia_addr.sin6_addr, cp,
1865			      sizeof(struct in6_addr));
1866			/* XXX: KAME link-local hack; remove ifindex */
1867			if (IN6_IS_ADDR_LINKLOCAL(&ifa6->ia_addr.sin6_addr))
1868				((struct in6_addr *)cp)->s6_addr16[1] = 0;
1869			cp += sizeof(struct in6_addr);
1870
1871			resid -= (sizeof(struct in6_addr) + sizeof(u_int32_t));
1872			copied += (sizeof(struct in6_addr) +
1873				   sizeof(u_int32_t));
1874		}
1875		if (ifp0)	/* we need search only on the specified IF */
1876			break;
1877	}
1878
1879	if (allow_deprecated == 0 && ifp_dep != NULL) {
1880		ifp = ifp_dep;
1881		allow_deprecated = 1;
1882
1883		goto again;
1884	}
1885
1886	return (copied);
1887}
1888
1889/*
1890 * XXX almost dup'ed code with rip6_input.
1891 */
1892static int
1893icmp6_rip6_input(mp, off)
1894	struct	mbuf **mp;
1895	int	off;
1896{
1897	struct mbuf *m = *mp;
1898	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
1899	struct inpcb_hdr *inph;
1900	struct in6pcb *in6p;
1901	struct in6pcb *last = NULL;
1902	struct sockaddr_in6 rip6src;
1903	struct icmp6_hdr *icmp6;
1904	struct mbuf *opts = NULL;
1905
1906	IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off, sizeof(*icmp6));
1907	if (icmp6 == NULL) {
1908		/* m is already reclaimed */
1909		return IPPROTO_DONE;
1910	}
1911
1912	bzero(&rip6src, sizeof(rip6src));
1913	rip6src.sin6_len = sizeof(struct sockaddr_in6);
1914	rip6src.sin6_family = AF_INET6;
1915	/* KAME hack: recover scopeid */
1916	(void)in6_recoverscope(&rip6src, &ip6->ip6_src, m->m_pkthdr.rcvif);
1917
1918	CIRCLEQ_FOREACH(inph, &raw6cbtable.inpt_queue, inph_queue) {
1919		in6p = (struct in6pcb *)inph;
1920		if (in6p->in6p_af != AF_INET6)
1921			continue;
1922		if (in6p->in6p_ip6.ip6_nxt != IPPROTO_ICMPV6)
1923			continue;
1924		if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr) &&
1925		   !IN6_ARE_ADDR_EQUAL(&in6p->in6p_laddr, &ip6->ip6_dst))
1926			continue;
1927		if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr) &&
1928		   !IN6_ARE_ADDR_EQUAL(&in6p->in6p_faddr, &ip6->ip6_src))
1929			continue;
1930		if (in6p->in6p_icmp6filt
1931		    && ICMP6_FILTER_WILLBLOCK(icmp6->icmp6_type,
1932				 in6p->in6p_icmp6filt))
1933			continue;
1934		if (last) {
1935			struct	mbuf *n;
1936			if ((n = m_copy(m, 0, (int)M_COPYALL)) != NULL) {
1937				if (last->in6p_flags & IN6P_CONTROLOPTS)
1938					ip6_savecontrol(last, &opts, ip6, n);
1939				/* strip intermediate headers */
1940				m_adj(n, off);
1941				if (sbappendaddr(&last->in6p_socket->so_rcv,
1942						 (struct sockaddr *)&rip6src,
1943						 n, opts) == 0) {
1944					/* should notify about lost packet */
1945					m_freem(n);
1946					if (opts)
1947						m_freem(opts);
1948				} else
1949					sorwakeup(last->in6p_socket);
1950				opts = NULL;
1951			}
1952		}
1953		last = in6p;
1954	}
1955	if (last) {
1956		if (last->in6p_flags & IN6P_CONTROLOPTS)
1957			ip6_savecontrol(last, &opts, ip6, m);
1958		/* strip intermediate headers */
1959		m_adj(m, off);
1960		if (sbappendaddr(&last->in6p_socket->so_rcv,
1961				(struct sockaddr *)&rip6src, m, opts) == 0) {
1962			m_freem(m);
1963			if (opts)
1964				m_freem(opts);
1965		} else
1966			sorwakeup(last->in6p_socket);
1967	} else {
1968		m_freem(m);
1969		ip6stat.ip6s_delivered--;
1970	}
1971	return IPPROTO_DONE;
1972}
1973
1974/*
1975 * Reflect the ip6 packet back to the source.
1976 * OFF points to the icmp6 header, counted from the top of the mbuf.
1977 *
1978 * Note: RFC 1885 required that an echo reply should be truncated if it
1979 * did not fit in with (return) path MTU, and KAME code supported the
1980 * behavior.  However, as a clarification after the RFC, this limitation
1981 * was removed in a revised version of the spec, RFC 2463.  We had kept the
1982 * old behavior, with a (non-default) ifdef block, while the new version of
1983 * the spec was an internet-draft status, and even after the new RFC was
1984 * published.  But it would rather make sense to clean the obsoleted part
1985 * up, and to make the code simpler at this stage.
1986 */
1987void
1988icmp6_reflect(m, off)
1989	struct	mbuf *m;
1990	size_t off;
1991{
1992	struct ip6_hdr *ip6;
1993	struct icmp6_hdr *icmp6;
1994	struct in6_ifaddr *ia;
1995	struct in6_addr t, *src = 0;
1996	int plen;
1997	int type, code;
1998	struct ifnet *outif = NULL;
1999	struct sockaddr_in6 sa6_src, sa6_dst;
2000
2001	/* too short to reflect */
2002	if (off < sizeof(struct ip6_hdr)) {
2003		nd6log((LOG_DEBUG,
2004		    "sanity fail: off=%lx, sizeof(ip6)=%lx in %s:%d\n",
2005		    (u_long)off, (u_long)sizeof(struct ip6_hdr),
2006		    __FILE__, __LINE__));
2007		goto bad;
2008	}
2009
2010	/*
2011	 * If there are extra headers between IPv6 and ICMPv6, strip
2012	 * off that header first.
2013	 */
2014#ifdef DIAGNOSTIC
2015	if (sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr) > MHLEN)
2016		panic("assumption failed in icmp6_reflect");
2017#endif
2018	if (off > sizeof(struct ip6_hdr)) {
2019		size_t l;
2020		struct ip6_hdr nip6;
2021
2022		l = off - sizeof(struct ip6_hdr);
2023		m_copydata(m, 0, sizeof(nip6), (caddr_t)&nip6);
2024		m_adj(m, l);
2025		l = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr);
2026		if (m->m_len < l) {
2027			if ((m = m_pullup(m, l)) == NULL)
2028				return;
2029		}
2030		bcopy((caddr_t)&nip6, mtod(m, caddr_t), sizeof(nip6));
2031	} else /* off == sizeof(struct ip6_hdr) */ {
2032		size_t l;
2033		l = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr);
2034		if (m->m_len < l) {
2035			if ((m = m_pullup(m, l)) == NULL)
2036				return;
2037		}
2038	}
2039	plen = m->m_pkthdr.len - sizeof(struct ip6_hdr);
2040	ip6 = mtod(m, struct ip6_hdr *);
2041	ip6->ip6_nxt = IPPROTO_ICMPV6;
2042	icmp6 = (struct icmp6_hdr *)(ip6 + 1);
2043	type = icmp6->icmp6_type; /* keep type for statistics */
2044	code = icmp6->icmp6_code; /* ditto. */
2045
2046	t = ip6->ip6_dst;
2047	/*
2048	 * ip6_input() drops a packet if its src is multicast.
2049	 * So, the src is never multicast.
2050	 */
2051	ip6->ip6_dst = ip6->ip6_src;
2052
2053	/*
2054	 * XXX: make sure to embed scope zone information, using
2055	 * already embedded IDs or the received interface (if any).
2056	 * Note that rcvif may be NULL.
2057	 * TODO: scoped routing case (XXX).
2058	 */
2059	bzero(&sa6_src, sizeof(sa6_src));
2060	sa6_src.sin6_family = AF_INET6;
2061	sa6_src.sin6_len = sizeof(sa6_src);
2062	sa6_src.sin6_addr = ip6->ip6_dst;
2063	in6_recoverscope(&sa6_src, &ip6->ip6_dst, m->m_pkthdr.rcvif);
2064	in6_embedscope(&sa6_src.sin6_addr, &sa6_src, NULL, NULL);
2065	ip6->ip6_dst = sa6_src.sin6_addr;
2066
2067	bzero(&sa6_dst, sizeof(sa6_dst));
2068	sa6_dst.sin6_family = AF_INET6;
2069	sa6_dst.sin6_len = sizeof(sa6_dst);
2070	sa6_dst.sin6_addr = t;
2071	in6_recoverscope(&sa6_dst, &t, m->m_pkthdr.rcvif);
2072	in6_embedscope(&t, &sa6_dst, NULL, NULL);
2073
2074	/*
2075	 * If the incoming packet was addressed directly to us (i.e. unicast),
2076	 * use dst as the src for the reply.
2077	 * The IN6_IFF_NOTREADY case would be VERY rare, but is possible
2078	 * (for example) when we encounter an error while forwarding procedure
2079	 * destined to a duplicated address of ours.
2080	 */
2081	for (ia = in6_ifaddr; ia; ia = ia->ia_next)
2082		if (IN6_ARE_ADDR_EQUAL(&t, &ia->ia_addr.sin6_addr) &&
2083		    (ia->ia6_flags & (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY)) == 0) {
2084			src = &t;
2085			break;
2086		}
2087	if (ia == NULL && IN6_IS_ADDR_LINKLOCAL(&t) && (m->m_flags & M_LOOP)) {
2088		/*
2089		 * This is the case if the dst is our link-local address
2090		 * and the sender is also ourselves.
2091		 */
2092		src = &t;
2093	}
2094
2095	if (src == 0) {
2096		int e;
2097		struct route_in6 ro;
2098
2099		/*
2100		 * This case matches to multicasts, our anycast, or unicasts
2101		 * that we do not own.  Select a source address based on the
2102		 * source address of the erroneous packet.
2103		 */
2104		bzero(&ro, sizeof(ro));
2105		src = in6_selectsrc(&sa6_src, NULL, NULL, &ro, NULL, &e);
2106		if (ro.ro_rt) { /* XXX: see comments in icmp6_mtudisc_update */
2107			RTFREE(ro.ro_rt); /* XXX: we could use this */
2108		}
2109		if (src == NULL) {
2110			nd6log((LOG_DEBUG,
2111			    "icmp6_reflect: source can't be determined: "
2112			    "dst=%s, error=%d\n",
2113			    ip6_sprintf(&sa6_src.sin6_addr), e));
2114			goto bad;
2115		}
2116	}
2117
2118	ip6->ip6_src = *src;
2119
2120	ip6->ip6_flow = 0;
2121	ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
2122	ip6->ip6_vfc |= IPV6_VERSION;
2123	ip6->ip6_nxt = IPPROTO_ICMPV6;
2124	if (m->m_pkthdr.rcvif) {
2125		/* XXX: This may not be the outgoing interface */
2126		ip6->ip6_hlim = ND_IFINFO(m->m_pkthdr.rcvif)->chlim;
2127	} else
2128		ip6->ip6_hlim = ip6_defhlim;
2129
2130	icmp6->icmp6_cksum = 0;
2131	icmp6->icmp6_cksum = in6_cksum(m, IPPROTO_ICMPV6,
2132					sizeof(struct ip6_hdr), plen);
2133
2134	/*
2135	 * XXX option handling
2136	 */
2137
2138	m->m_flags &= ~(M_BCAST|M_MCAST);
2139
2140	/*
2141	 * To avoid a "too big" situation at an intermediate router
2142	 * and the path MTU discovery process, specify the IPV6_MINMTU flag.
2143	 * Note that only echo and node information replies are affected,
2144	 * since the length of ICMP6 errors is limited to the minimum MTU.
2145	 */
2146	if (ip6_output(m, NULL, NULL, IPV6_MINMTU,
2147		(struct ip6_moptions *)NULL, (struct socket *)NULL, &outif) != 0
2148	    && outif)
2149		icmp6_ifstat_inc(outif, ifs6_out_error);
2150
2151	if (outif)
2152		icmp6_ifoutstat_inc(outif, type, code);
2153
2154	return;
2155
2156 bad:
2157	m_freem(m);
2158	return;
2159}
2160
2161void
2162icmp6_fasttimo()
2163{
2164
2165	mld6_fasttimeo();
2166}
2167
2168static const char *
2169icmp6_redirect_diag(src6, dst6, tgt6)
2170	struct in6_addr *src6;
2171	struct in6_addr *dst6;
2172	struct in6_addr *tgt6;
2173{
2174	static char buf[1024];
2175	snprintf(buf, sizeof(buf), "(src=%s dst=%s tgt=%s)",
2176		ip6_sprintf(src6), ip6_sprintf(dst6), ip6_sprintf(tgt6));
2177	return buf;
2178}
2179
2180void
2181icmp6_redirect_input(m, off)
2182	struct mbuf *m;
2183	int off;
2184{
2185	struct ifnet *ifp = m->m_pkthdr.rcvif;
2186	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
2187	struct nd_redirect *nd_rd;
2188	int icmp6len = ntohs(ip6->ip6_plen);
2189	char *lladdr = NULL;
2190	int lladdrlen = 0;
2191	struct rtentry *rt = NULL;
2192	int is_router;
2193	int is_onlink;
2194	struct in6_addr src6 = ip6->ip6_src;
2195	struct in6_addr redtgt6;
2196	struct in6_addr reddst6;
2197	union nd_opts ndopts;
2198
2199	if (!ifp)
2200		return;
2201
2202	/* XXX if we are router, we don't update route by icmp6 redirect */
2203	if (ip6_forwarding)
2204		goto freeit;
2205	if (!icmp6_rediraccept)
2206		goto freeit;
2207
2208	IP6_EXTHDR_GET(nd_rd, struct nd_redirect *, m, off, icmp6len);
2209	if (nd_rd == NULL) {
2210		icmp6stat.icp6s_tooshort++;
2211		return;
2212	}
2213	redtgt6 = nd_rd->nd_rd_target;
2214	reddst6 = nd_rd->nd_rd_dst;
2215
2216	if (IN6_IS_ADDR_LINKLOCAL(&redtgt6))
2217		redtgt6.s6_addr16[1] = htons(ifp->if_index);
2218	if (IN6_IS_ADDR_LINKLOCAL(&reddst6))
2219		reddst6.s6_addr16[1] = htons(ifp->if_index);
2220
2221	/* validation */
2222	if (!IN6_IS_ADDR_LINKLOCAL(&src6)) {
2223		nd6log((LOG_ERR,
2224			"ICMP6 redirect sent from %s rejected; "
2225			"must be from linklocal\n", ip6_sprintf(&src6)));
2226		goto bad;
2227	}
2228	if (ip6->ip6_hlim != 255) {
2229		nd6log((LOG_ERR,
2230			"ICMP6 redirect sent from %s rejected; "
2231			"hlim=%d (must be 255)\n",
2232			ip6_sprintf(&src6), ip6->ip6_hlim));
2233		goto bad;
2234	}
2235    {
2236	/* ip6->ip6_src must be equal to gw for icmp6->icmp6_reddst */
2237	struct sockaddr_in6 sin6;
2238	struct in6_addr *gw6;
2239
2240	bzero(&sin6, sizeof(sin6));
2241	sin6.sin6_family = AF_INET6;
2242	sin6.sin6_len = sizeof(struct sockaddr_in6);
2243	bcopy(&reddst6, &sin6.sin6_addr, sizeof(reddst6));
2244	rt = rtalloc1((struct sockaddr *)&sin6, 0);
2245	if (rt) {
2246		if (rt->rt_gateway == NULL ||
2247		    rt->rt_gateway->sa_family != AF_INET6) {
2248			nd6log((LOG_ERR,
2249			    "ICMP6 redirect rejected; no route "
2250			    "with inet6 gateway found for redirect dst: %s\n",
2251			    icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
2252			RTFREE(rt);
2253			goto bad;
2254		}
2255
2256		gw6 = &(((struct sockaddr_in6 *)rt->rt_gateway)->sin6_addr);
2257		if (bcmp(&src6, gw6, sizeof(struct in6_addr)) != 0) {
2258			nd6log((LOG_ERR,
2259				"ICMP6 redirect rejected; "
2260				"not equal to gw-for-src=%s (must be same): "
2261				"%s\n",
2262				ip6_sprintf(gw6),
2263				icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
2264			RTFREE(rt);
2265			goto bad;
2266		}
2267	} else {
2268		nd6log((LOG_ERR,
2269			"ICMP6 redirect rejected; "
2270			"no route found for redirect dst: %s\n",
2271			icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
2272		goto bad;
2273	}
2274	RTFREE(rt);
2275	rt = NULL;
2276    }
2277	if (IN6_IS_ADDR_MULTICAST(&reddst6)) {
2278		nd6log((LOG_ERR,
2279			"ICMP6 redirect rejected; "
2280			"redirect dst must be unicast: %s\n",
2281			icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
2282		goto bad;
2283	}
2284
2285	is_router = is_onlink = 0;
2286	if (IN6_IS_ADDR_LINKLOCAL(&redtgt6))
2287		is_router = 1;	/* router case */
2288	if (bcmp(&redtgt6, &reddst6, sizeof(redtgt6)) == 0)
2289		is_onlink = 1;	/* on-link destination case */
2290	if (!is_router && !is_onlink) {
2291		nd6log((LOG_ERR,
2292			"ICMP6 redirect rejected; "
2293			"neither router case nor onlink case: %s\n",
2294			icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
2295		goto bad;
2296	}
2297	/* validation passed */
2298
2299	icmp6len -= sizeof(*nd_rd);
2300	nd6_option_init(nd_rd + 1, icmp6len, &ndopts);
2301	if (nd6_options(&ndopts) < 0) {
2302		nd6log((LOG_INFO, "icmp6_redirect_input: "
2303			"invalid ND option, rejected: %s\n",
2304			icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
2305		/* nd6_options have incremented stats */
2306		goto freeit;
2307	}
2308
2309	if (ndopts.nd_opts_tgt_lladdr) {
2310		lladdr = (char *)(ndopts.nd_opts_tgt_lladdr + 1);
2311		lladdrlen = ndopts.nd_opts_tgt_lladdr->nd_opt_len << 3;
2312	}
2313
2314	if (lladdr && ((ifp->if_addrlen + 2 + 7) & ~7) != lladdrlen) {
2315		nd6log((LOG_INFO,
2316			"icmp6_redirect_input: lladdrlen mismatch for %s "
2317			"(if %d, icmp6 packet %d): %s\n",
2318			ip6_sprintf(&redtgt6), ifp->if_addrlen, lladdrlen - 2,
2319			icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
2320		goto bad;
2321	}
2322
2323	/* RFC 2461 8.3 */
2324	nd6_cache_lladdr(ifp, &redtgt6, lladdr, lladdrlen, ND_REDIRECT,
2325			 is_onlink ? ND_REDIRECT_ONLINK : ND_REDIRECT_ROUTER);
2326
2327	if (!is_onlink) {	/* better router case.  perform rtredirect. */
2328		/* perform rtredirect */
2329		struct sockaddr_in6 sdst;
2330		struct sockaddr_in6 sgw;
2331		struct sockaddr_in6 ssrc;
2332		unsigned long rtcount;
2333		struct rtentry *newrt = NULL;
2334
2335		/*
2336		 * do not install redirect route, if the number of entries
2337		 * is too much (> hiwat).  note that, the node (= host) will
2338		 * work just fine even if we do not install redirect route
2339		 * (there will be additional hops, though).
2340		 */
2341		rtcount = rt_timer_count(icmp6_redirect_timeout_q);
2342		if (0 <= icmp6_redirect_hiwat && rtcount > icmp6_redirect_hiwat)
2343			return;
2344		else if (0 <= icmp6_redirect_lowat &&
2345		    rtcount > icmp6_redirect_lowat) {
2346			/*
2347			 * XXX nuke a victim, install the new one.
2348			 */
2349		}
2350
2351		bzero(&sdst, sizeof(sdst));
2352		bzero(&sgw, sizeof(sgw));
2353		bzero(&ssrc, sizeof(ssrc));
2354		sdst.sin6_family = sgw.sin6_family = ssrc.sin6_family = AF_INET6;
2355		sdst.sin6_len = sgw.sin6_len = ssrc.sin6_len =
2356			sizeof(struct sockaddr_in6);
2357		bcopy(&redtgt6, &sgw.sin6_addr, sizeof(struct in6_addr));
2358		bcopy(&reddst6, &sdst.sin6_addr, sizeof(struct in6_addr));
2359		bcopy(&src6, &ssrc.sin6_addr, sizeof(struct in6_addr));
2360		rtredirect((struct sockaddr *)&sdst, (struct sockaddr *)&sgw,
2361			   (struct sockaddr *)NULL, RTF_GATEWAY | RTF_HOST,
2362			   (struct sockaddr *)&ssrc,
2363			   &newrt);
2364
2365		if (newrt) {
2366			(void)rt_timer_add(newrt, icmp6_redirect_timeout,
2367			    icmp6_redirect_timeout_q);
2368			rtfree(newrt);
2369		}
2370	}
2371	/* finally update cached route in each socket via pfctlinput */
2372	{
2373		struct sockaddr_in6 sdst;
2374
2375		bzero(&sdst, sizeof(sdst));
2376		sdst.sin6_family = AF_INET6;
2377		sdst.sin6_len = sizeof(struct sockaddr_in6);
2378		bcopy(&reddst6, &sdst.sin6_addr, sizeof(struct in6_addr));
2379		pfctlinput(PRC_REDIRECT_HOST, (struct sockaddr *)&sdst);
2380#ifdef IPSEC
2381		key_sa_routechange((struct sockaddr *)&sdst);
2382#endif
2383	}
2384
2385 freeit:
2386	m_freem(m);
2387	return;
2388
2389 bad:
2390	icmp6stat.icp6s_badredirect++;
2391	m_freem(m);
2392}
2393
2394void
2395icmp6_redirect_output(m0, rt)
2396	struct mbuf *m0;
2397	struct rtentry *rt;
2398{
2399	struct ifnet *ifp;	/* my outgoing interface */
2400	struct in6_addr *ifp_ll6;
2401	struct in6_addr *nexthop;
2402	struct ip6_hdr *sip6;	/* m0 as struct ip6_hdr */
2403	struct mbuf *m = NULL;	/* newly allocated one */
2404	struct ip6_hdr *ip6;	/* m as struct ip6_hdr */
2405	struct nd_redirect *nd_rd;
2406	size_t maxlen;
2407	u_char *p;
2408	struct sockaddr_in6 src_sa;
2409
2410	icmp6_errcount(&icmp6stat.icp6s_outerrhist, ND_REDIRECT, 0);
2411
2412	/* if we are not router, we don't send icmp6 redirect */
2413	if (!ip6_forwarding)
2414		goto fail;
2415
2416	/* sanity check */
2417	if (!m0 || !rt || !(rt->rt_flags & RTF_UP) || !(ifp = rt->rt_ifp))
2418		goto fail;
2419
2420	/*
2421	 * Address check:
2422	 *  the source address must identify a neighbor, and
2423	 *  the destination address must not be a multicast address
2424	 *  [RFC 2461, sec 8.2]
2425	 */
2426	sip6 = mtod(m0, struct ip6_hdr *);
2427	bzero(&src_sa, sizeof(src_sa));
2428	src_sa.sin6_family = AF_INET6;
2429	src_sa.sin6_len = sizeof(src_sa);
2430	src_sa.sin6_addr = sip6->ip6_src;
2431	/* we don't currently use sin6_scope_id, but eventually use it */
2432	src_sa.sin6_scope_id = in6_addr2scopeid(ifp, &sip6->ip6_src);
2433	if (nd6_is_addr_neighbor(&src_sa, ifp) == 0)
2434		goto fail;
2435	if (IN6_IS_ADDR_MULTICAST(&sip6->ip6_dst))
2436		goto fail;	/* what should we do here? */
2437
2438	/* rate limit */
2439	if (icmp6_ratelimit(&sip6->ip6_src, ND_REDIRECT, 0))
2440		goto fail;
2441
2442	/*
2443	 * Since we are going to append up to 1280 bytes (= IPV6_MMTU),
2444	 * we almost always ask for an mbuf cluster for simplicity.
2445	 * (MHLEN < IPV6_MMTU is almost always true)
2446	 */
2447#if IPV6_MMTU >= MCLBYTES
2448# error assumption failed about IPV6_MMTU and MCLBYTES
2449#endif
2450	MGETHDR(m, M_DONTWAIT, MT_HEADER);
2451	if (m && IPV6_MMTU >= MHLEN)
2452		MCLGET(m, M_DONTWAIT);
2453	if (!m)
2454		goto fail;
2455	m->m_pkthdr.rcvif = NULL;
2456	m->m_len = 0;
2457	maxlen = M_TRAILINGSPACE(m);
2458	maxlen = min(IPV6_MMTU, maxlen);
2459	/* just for safety */
2460	if (maxlen < sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr) +
2461	    ((sizeof(struct nd_opt_hdr) + ifp->if_addrlen + 7) & ~7)) {
2462		goto fail;
2463	}
2464
2465	{
2466		/* get ip6 linklocal address for ifp(my outgoing interface). */
2467		struct in6_ifaddr *ia;
2468		if ((ia = in6ifa_ifpforlinklocal(ifp,
2469						 IN6_IFF_NOTREADY|
2470						 IN6_IFF_ANYCAST)) == NULL)
2471			goto fail;
2472		ifp_ll6 = &ia->ia_addr.sin6_addr;
2473	}
2474
2475	/* get ip6 linklocal address for the router. */
2476	if (rt->rt_gateway && (rt->rt_flags & RTF_GATEWAY)) {
2477		struct sockaddr_in6 *sin6;
2478		sin6 = (struct sockaddr_in6 *)rt->rt_gateway;
2479		nexthop = &sin6->sin6_addr;
2480		if (!IN6_IS_ADDR_LINKLOCAL(nexthop))
2481			nexthop = NULL;
2482	} else
2483		nexthop = NULL;
2484
2485	/* ip6 */
2486	ip6 = mtod(m, struct ip6_hdr *);
2487	ip6->ip6_flow = 0;
2488	ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
2489	ip6->ip6_vfc |= IPV6_VERSION;
2490	/* ip6->ip6_plen will be set later */
2491	ip6->ip6_nxt = IPPROTO_ICMPV6;
2492	ip6->ip6_hlim = 255;
2493	/* ip6->ip6_src must be linklocal addr for my outgoing if. */
2494	bcopy(ifp_ll6, &ip6->ip6_src, sizeof(struct in6_addr));
2495	bcopy(&sip6->ip6_src, &ip6->ip6_dst, sizeof(struct in6_addr));
2496
2497	/* ND Redirect */
2498	nd_rd = (struct nd_redirect *)(ip6 + 1);
2499	nd_rd->nd_rd_type = ND_REDIRECT;
2500	nd_rd->nd_rd_code = 0;
2501	nd_rd->nd_rd_reserved = 0;
2502	if (rt->rt_flags & RTF_GATEWAY) {
2503		/*
2504		 * nd_rd->nd_rd_target must be a link-local address in
2505		 * better router cases.
2506		 */
2507		if (!nexthop)
2508			goto fail;
2509		bcopy(nexthop, &nd_rd->nd_rd_target,
2510		      sizeof(nd_rd->nd_rd_target));
2511		bcopy(&sip6->ip6_dst, &nd_rd->nd_rd_dst,
2512		      sizeof(nd_rd->nd_rd_dst));
2513	} else {
2514		/* make sure redtgt == reddst */
2515		nexthop = &sip6->ip6_dst;
2516		bcopy(&sip6->ip6_dst, &nd_rd->nd_rd_target,
2517		      sizeof(nd_rd->nd_rd_target));
2518		bcopy(&sip6->ip6_dst, &nd_rd->nd_rd_dst,
2519		      sizeof(nd_rd->nd_rd_dst));
2520	}
2521
2522	p = (u_char *)(nd_rd + 1);
2523
2524	{
2525		/* target lladdr option */
2526		struct rtentry *rt_nexthop = NULL;
2527		int len;
2528		struct sockaddr_dl *sdl;
2529		struct nd_opt_hdr *nd_opt;
2530		char *lladdr;
2531
2532		rt_nexthop = nd6_lookup(nexthop, 0, ifp);
2533		if (!rt_nexthop)
2534			goto nolladdropt;
2535		len = sizeof(*nd_opt) + ifp->if_addrlen;
2536		len = (len + 7) & ~7;	/* round by 8 */
2537		/* safety check */
2538		if (len + (p - (u_char *)ip6) > maxlen)
2539			goto nolladdropt;
2540		if (!(rt_nexthop->rt_flags & RTF_GATEWAY) &&
2541		    (rt_nexthop->rt_flags & RTF_LLINFO) &&
2542		    (rt_nexthop->rt_gateway->sa_family == AF_LINK) &&
2543		    (sdl = (struct sockaddr_dl *)rt_nexthop->rt_gateway) &&
2544		    sdl->sdl_alen) {
2545			nd_opt = (struct nd_opt_hdr *)p;
2546			nd_opt->nd_opt_type = ND_OPT_TARGET_LINKADDR;
2547			nd_opt->nd_opt_len = len >> 3;
2548			lladdr = (char *)(nd_opt + 1);
2549			bcopy(LLADDR(sdl), lladdr, ifp->if_addrlen);
2550			p += len;
2551		}
2552	}
2553  nolladdropt:;
2554
2555	m->m_pkthdr.len = m->m_len = p - (u_char *)ip6;
2556
2557	/* just to be safe */
2558	if (m0->m_flags & M_DECRYPTED)
2559		goto noredhdropt;
2560	if (p - (u_char *)ip6 > maxlen)
2561		goto noredhdropt;
2562
2563	{
2564		/* redirected header option */
2565		int len;
2566		struct nd_opt_rd_hdr *nd_opt_rh;
2567
2568		/*
2569		 * compute the maximum size for icmp6 redirect header option.
2570		 * XXX room for auth header?
2571		 */
2572		len = maxlen - (p - (u_char *)ip6);
2573		len &= ~7;
2574
2575		/*
2576		 * Redirected header option spec (RFC2461 4.6.3) talks nothing
2577		 * about padding/truncate rule for the original IP packet.
2578		 * From the discussion on IPv6imp in Feb 1999,
2579		 * the consensus was:
2580		 * - "attach as much as possible" is the goal
2581		 * - pad if not aligned (original size can be guessed by
2582		 *   original ip6 header)
2583		 * Following code adds the padding if it is simple enough,
2584		 * and truncates if not.
2585		 */
2586		if (len - sizeof(*nd_opt_rh) < m0->m_pkthdr.len) {
2587			/* not enough room, truncate */
2588			m_adj(m0, (len - sizeof(*nd_opt_rh)) -
2589			    m0->m_pkthdr.len);
2590		} else {
2591			/*
2592                         * enough room, truncate if not aligned.
2593			 * we don't pad here for simplicity.
2594			 */
2595			size_t extra;
2596
2597			extra = m0->m_pkthdr.len % 8;
2598			if (extra) {
2599				/* truncate */
2600				m_adj(m0, -extra);
2601			}
2602			len = m0->m_pkthdr.len + sizeof(*nd_opt_rh);
2603		}
2604
2605		nd_opt_rh = (struct nd_opt_rd_hdr *)p;
2606		bzero(nd_opt_rh, sizeof(*nd_opt_rh));
2607		nd_opt_rh->nd_opt_rh_type = ND_OPT_REDIRECTED_HEADER;
2608		nd_opt_rh->nd_opt_rh_len = len >> 3;
2609		p += sizeof(*nd_opt_rh);
2610		m->m_pkthdr.len = m->m_len = p - (u_char *)ip6;
2611
2612		/* connect m0 to m */
2613		m->m_pkthdr.len += m0->m_pkthdr.len;
2614		m_cat(m, m0);
2615		m0 = NULL;
2616	}
2617noredhdropt:
2618	if (m0) {
2619		m_freem(m0);
2620		m0 = NULL;
2621	}
2622
2623	if (IN6_IS_ADDR_LINKLOCAL(&sip6->ip6_src))
2624		sip6->ip6_src.s6_addr16[1] = 0;
2625	if (IN6_IS_ADDR_LINKLOCAL(&sip6->ip6_dst))
2626		sip6->ip6_dst.s6_addr16[1] = 0;
2627#if 0
2628	if (IN6_IS_ADDR_LINKLOCAL(&ip6->ip6_src))
2629		ip6->ip6_src.s6_addr16[1] = 0;
2630	if (IN6_IS_ADDR_LINKLOCAL(&ip6->ip6_dst))
2631		ip6->ip6_dst.s6_addr16[1] = 0;
2632#endif
2633	if (IN6_IS_ADDR_LINKLOCAL(&nd_rd->nd_rd_target))
2634		nd_rd->nd_rd_target.s6_addr16[1] = 0;
2635	if (IN6_IS_ADDR_LINKLOCAL(&nd_rd->nd_rd_dst))
2636		nd_rd->nd_rd_dst.s6_addr16[1] = 0;
2637
2638	ip6->ip6_plen = htons(m->m_pkthdr.len - sizeof(struct ip6_hdr));
2639
2640	nd_rd->nd_rd_cksum = 0;
2641	nd_rd->nd_rd_cksum
2642		= in6_cksum(m, IPPROTO_ICMPV6, sizeof(*ip6), ntohs(ip6->ip6_plen));
2643
2644	/* send the packet to outside... */
2645	if (ip6_output(m, NULL, NULL, 0,
2646		(struct ip6_moptions *)NULL, (struct socket *)NULL, NULL) != 0)
2647		icmp6_ifstat_inc(ifp, ifs6_out_error);
2648
2649	icmp6_ifstat_inc(ifp, ifs6_out_msg);
2650	icmp6_ifstat_inc(ifp, ifs6_out_redirect);
2651	icmp6stat.icp6s_outhist[ND_REDIRECT]++;
2652
2653	return;
2654
2655fail:
2656	if (m)
2657		m_freem(m);
2658	if (m0)
2659		m_freem(m0);
2660}
2661
2662/*
2663 * ICMPv6 socket option processing.
2664 */
2665int
2666icmp6_ctloutput(op, so, level, optname, mp)
2667	int op;
2668	struct socket *so;
2669	int level, optname;
2670	struct mbuf **mp;
2671{
2672	int error = 0;
2673	int optlen;
2674	struct in6pcb *in6p = sotoin6pcb(so);
2675	struct mbuf *m = *mp;
2676
2677	optlen = m ? m->m_len : 0;
2678
2679	if (level != IPPROTO_ICMPV6) {
2680		if (op == PRCO_SETOPT && m)
2681			(void)m_free(m);
2682		return EINVAL;
2683	}
2684
2685	switch (op) {
2686	case PRCO_SETOPT:
2687		switch (optname) {
2688		case ICMP6_FILTER:
2689		    {
2690			struct icmp6_filter *p;
2691
2692			if (optlen != sizeof(*p)) {
2693				error = EMSGSIZE;
2694				break;
2695			}
2696			p = mtod(m, struct icmp6_filter *);
2697			if (!p || !in6p->in6p_icmp6filt) {
2698				error = EINVAL;
2699				break;
2700			}
2701			bcopy(p, in6p->in6p_icmp6filt,
2702				sizeof(struct icmp6_filter));
2703			error = 0;
2704			break;
2705		    }
2706
2707		default:
2708			error = ENOPROTOOPT;
2709			break;
2710		}
2711		if (m)
2712			(void)m_freem(m);
2713		break;
2714
2715	case PRCO_GETOPT:
2716		switch (optname) {
2717		case ICMP6_FILTER:
2718		    {
2719			struct icmp6_filter *p;
2720
2721			if (!in6p->in6p_icmp6filt) {
2722				error = EINVAL;
2723				break;
2724			}
2725			*mp = m = m_get(M_WAIT, MT_SOOPTS);
2726			m->m_len = sizeof(struct icmp6_filter);
2727			p = mtod(m, struct icmp6_filter *);
2728			bcopy(in6p->in6p_icmp6filt, p,
2729				sizeof(struct icmp6_filter));
2730			error = 0;
2731			break;
2732		    }
2733
2734		default:
2735			error = ENOPROTOOPT;
2736			break;
2737		}
2738		break;
2739	}
2740
2741	return (error);
2742}
2743
2744/*
2745 * Perform rate limit check.
2746 * Returns 0 if it is okay to send the icmp6 packet.
2747 * Returns 1 if the router SHOULD NOT send this icmp6 packet due to rate
2748 * limitation.
2749 *
2750 * XXX per-destination/type check necessary?
2751 */
2752static int
2753icmp6_ratelimit(dst, type, code)
2754	const struct in6_addr *dst;	/* not used at this moment */
2755	const int type;			/* not used at this moment */
2756	const int code;			/* not used at this moment */
2757{
2758	int ret;
2759
2760	ret = 0;	/* okay to send */
2761
2762	/* PPS limit */
2763	if (!ppsratecheck(&icmp6errppslim_last, &icmp6errpps_count,
2764	    icmp6errppslim)) {
2765		/* The packet is subject to rate limit */
2766		ret++;
2767	}
2768
2769	return ret;
2770}
2771
2772static struct rtentry *
2773icmp6_mtudisc_clone(dst)
2774	struct sockaddr *dst;
2775{
2776	struct rtentry *rt;
2777	int    error;
2778
2779	rt = rtalloc1(dst, 1);
2780	if (rt == 0)
2781		return NULL;
2782
2783	/* If we didn't get a host route, allocate one */
2784	if ((rt->rt_flags & RTF_HOST) == 0) {
2785		struct rtentry *nrt;
2786
2787		error = rtrequest((int) RTM_ADD, dst,
2788		    (struct sockaddr *) rt->rt_gateway,
2789		    (struct sockaddr *) 0,
2790		    RTF_GATEWAY | RTF_HOST | RTF_DYNAMIC, &nrt);
2791		if (error) {
2792			rtfree(rt);
2793			return NULL;
2794		}
2795		nrt->rt_rmx = rt->rt_rmx;
2796		rtfree(rt);
2797		rt = nrt;
2798	}
2799	error = rt_timer_add(rt, icmp6_mtudisc_timeout,
2800			icmp6_mtudisc_timeout_q);
2801	if (error) {
2802		rtfree(rt);
2803		return NULL;
2804	}
2805
2806	return rt;	/* caller need to call rtfree() */
2807}
2808
2809static void
2810icmp6_mtudisc_timeout(rt, r)
2811	struct rtentry *rt;
2812	struct rttimer *r;
2813{
2814	if (rt == NULL)
2815		panic("icmp6_mtudisc_timeout: bad route to timeout");
2816	if ((rt->rt_flags & (RTF_DYNAMIC | RTF_HOST)) ==
2817	    (RTF_DYNAMIC | RTF_HOST)) {
2818		rtrequest((int) RTM_DELETE, (struct sockaddr *)rt_key(rt),
2819		    rt->rt_gateway, rt_mask(rt), rt->rt_flags, 0);
2820	} else {
2821		if (!(rt->rt_rmx.rmx_locks & RTV_MTU))
2822			rt->rt_rmx.rmx_mtu = 0;
2823	}
2824}
2825
2826static void
2827icmp6_redirect_timeout(rt, r)
2828	struct rtentry *rt;
2829	struct rttimer *r;
2830{
2831	if (rt == NULL)
2832		panic("icmp6_redirect_timeout: bad route to timeout");
2833	if ((rt->rt_flags & (RTF_GATEWAY | RTF_DYNAMIC | RTF_HOST)) ==
2834	    (RTF_GATEWAY | RTF_DYNAMIC | RTF_HOST)) {
2835		rtrequest((int) RTM_DELETE, (struct sockaddr *)rt_key(rt),
2836		    rt->rt_gateway, rt_mask(rt), rt->rt_flags, 0);
2837	}
2838}
2839
2840/*
2841 * sysctl helper routine for the net.inet6.icmp6.nd6 nodes.  silly?
2842 */
2843static int
2844sysctl_net_inet6_icmp6_nd6(SYSCTLFN_ARGS)
2845{
2846
2847	if (namelen != 0)
2848		return (EINVAL);
2849
2850	return (nd6_sysctl(rnode->sysctl_num, oldp, oldlenp,
2851	    (void*)newp, newlen));
2852}
2853
2854SYSCTL_SETUP(sysctl_net_inet6_icmp6_setup,
2855	     "sysctl net.inet6.icmp6 subtree setup")
2856{
2857
2858	sysctl_createv(clog, 0, NULL, NULL,
2859		       CTLFLAG_PERMANENT,
2860		       CTLTYPE_NODE, "net", NULL,
2861		       NULL, 0, NULL, 0,
2862		       CTL_NET, CTL_EOL);
2863	sysctl_createv(clog, 0, NULL, NULL,
2864		       CTLFLAG_PERMANENT,
2865		       CTLTYPE_NODE, "inet6", NULL,
2866		       NULL, 0, NULL, 0,
2867		       CTL_NET, PF_INET6, CTL_EOL);
2868	sysctl_createv(clog, 0, NULL, NULL,
2869		       CTLFLAG_PERMANENT,
2870		       CTLTYPE_NODE, "icmp6", NULL,
2871		       NULL, 0, NULL, 0,
2872		       CTL_NET, PF_INET6, IPPROTO_ICMPV6, CTL_EOL);
2873
2874	sysctl_createv(clog, 0, NULL, NULL,
2875		       CTLFLAG_PERMANENT,
2876		       CTLTYPE_STRUCT, "stats", NULL,
2877		       NULL, 0, &icmp6stat, sizeof(icmp6stat),
2878		       CTL_NET, PF_INET6, IPPROTO_ICMPV6,
2879		       ICMPV6CTL_STATS, CTL_EOL);
2880	sysctl_createv(clog, 0, NULL, NULL,
2881		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2882		       CTLTYPE_INT, "rediraccept", NULL,
2883		       NULL, 0, &icmp6_rediraccept, 0,
2884		       CTL_NET, PF_INET6, IPPROTO_ICMPV6,
2885		       ICMPV6CTL_REDIRACCEPT, CTL_EOL);
2886	sysctl_createv(clog, 0, NULL, NULL,
2887		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2888		       CTLTYPE_INT, "redirtimeout", NULL,
2889		       NULL, 0, &icmp6_redirtimeout, 0,
2890		       CTL_NET, PF_INET6, IPPROTO_ICMPV6,
2891		       ICMPV6CTL_REDIRTIMEOUT, CTL_EOL);
2892#if 0 /* obsoleted */
2893	sysctl_createv(clog, 0, NULL, NULL,
2894		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2895		       CTLTYPE_INT, "errratelimit", NULL,
2896		       NULL, 0, &icmp6_errratelimit, 0,
2897		       CTL_NET, PF_INET6, IPPROTO_ICMPV6,
2898		       ICMPV6CTL_ERRRATELIMIT, CTL_EOL);
2899#endif
2900	sysctl_createv(clog, 0, NULL, NULL,
2901		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2902		       CTLTYPE_INT, "nd6_prune", NULL,
2903		       NULL, 0, &nd6_prune, 0,
2904		       CTL_NET, PF_INET6, IPPROTO_ICMPV6,
2905		       ICMPV6CTL_ND6_PRUNE, CTL_EOL);
2906	sysctl_createv(clog, 0, NULL, NULL,
2907		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2908		       CTLTYPE_INT, "nd6_delay", NULL,
2909		       NULL, 0, &nd6_delay, 0,
2910		       CTL_NET, PF_INET6, IPPROTO_ICMPV6,
2911		       ICMPV6CTL_ND6_DELAY, CTL_EOL);
2912	sysctl_createv(clog, 0, NULL, NULL,
2913		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2914		       CTLTYPE_INT, "nd6_umaxtries", NULL,
2915		       NULL, 0, &nd6_umaxtries, 0,
2916		       CTL_NET, PF_INET6, IPPROTO_ICMPV6,
2917		       ICMPV6CTL_ND6_UMAXTRIES, CTL_EOL);
2918	sysctl_createv(clog, 0, NULL, NULL,
2919		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2920		       CTLTYPE_INT, "nd6_mmaxtries", NULL,
2921		       NULL, 0, &nd6_mmaxtries, 0,
2922		       CTL_NET, PF_INET6, IPPROTO_ICMPV6,
2923		       ICMPV6CTL_ND6_MMAXTRIES, CTL_EOL);
2924	sysctl_createv(clog, 0, NULL, NULL,
2925		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2926		       CTLTYPE_INT, "nd6_useloopback", NULL,
2927		       NULL, 0, &nd6_useloopback, 0,
2928		       CTL_NET, PF_INET6, IPPROTO_ICMPV6,
2929		       ICMPV6CTL_ND6_USELOOPBACK, CTL_EOL);
2930#if 0 /* obsoleted */
2931	sysctl_createv(clog, 0, NULL, NULL,
2932		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2933		       CTLTYPE_INT, "nd6_proxyall", NULL,
2934		       NULL, 0, &nd6_proxyall, 0,
2935		       CTL_NET, PF_INET6, IPPROTO_ICMPV6,
2936		       ICMPV6CTL_ND6_PROXYALL, CTL_EOL);
2937#endif
2938	sysctl_createv(clog, 0, NULL, NULL,
2939		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2940		       CTLTYPE_INT, "nodeinfo", NULL,
2941		       NULL, 0, &icmp6_nodeinfo, 0,
2942		       CTL_NET, PF_INET6, IPPROTO_ICMPV6,
2943		       ICMPV6CTL_NODEINFO, CTL_EOL);
2944	sysctl_createv(clog, 0, NULL, NULL,
2945		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2946		       CTLTYPE_INT, "errppslimit", NULL,
2947		       NULL, 0, &icmp6errppslim, 0,
2948		       CTL_NET, PF_INET6, IPPROTO_ICMPV6,
2949		       ICMPV6CTL_ERRPPSLIMIT, CTL_EOL);
2950	sysctl_createv(clog, 0, NULL, NULL,
2951		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2952		       CTLTYPE_INT, "nd6_maxnudhint", NULL,
2953		       NULL, 0, &nd6_maxnudhint, 0,
2954		       CTL_NET, PF_INET6, IPPROTO_ICMPV6,
2955		       ICMPV6CTL_ND6_MAXNUDHINT, CTL_EOL);
2956	sysctl_createv(clog, 0, NULL, NULL,
2957		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2958		       CTLTYPE_INT, "mtudisc_hiwat", NULL,
2959		       NULL, 0, &icmp6_mtudisc_hiwat, 0,
2960		       CTL_NET, PF_INET6, IPPROTO_ICMPV6,
2961		       ICMPV6CTL_MTUDISC_HIWAT, CTL_EOL);
2962	sysctl_createv(clog, 0, NULL, NULL,
2963		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2964		       CTLTYPE_INT, "mtudisc_lowat", NULL,
2965		       NULL, 0, &icmp6_mtudisc_lowat, 0,
2966		       CTL_NET, PF_INET6, IPPROTO_ICMPV6,
2967		       ICMPV6CTL_MTUDISC_LOWAT, CTL_EOL);
2968	sysctl_createv(clog, 0, NULL, NULL,
2969		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2970		       CTLTYPE_INT, "nd6_debug", NULL,
2971		       NULL, 0, &nd6_debug, 0,
2972		       CTL_NET, PF_INET6, IPPROTO_ICMPV6,
2973		       ICMPV6CTL_ND6_DEBUG, CTL_EOL);
2974	sysctl_createv(clog, 0, NULL, NULL,
2975		       CTLFLAG_PERMANENT,
2976		       CTLTYPE_STRUCT, "nd6_drlist", NULL,
2977		       sysctl_net_inet6_icmp6_nd6, 0, NULL, 0,
2978		       CTL_NET, PF_INET6, IPPROTO_ICMPV6,
2979		       ICMPV6CTL_ND6_DRLIST, CTL_EOL);
2980	sysctl_createv(clog, 0, NULL, NULL,
2981		       CTLFLAG_PERMANENT,
2982		       CTLTYPE_STRUCT, "nd6_prlist", NULL,
2983		       sysctl_net_inet6_icmp6_nd6, 0, NULL, 0,
2984		       CTL_NET, PF_INET6, IPPROTO_ICMPV6,
2985		       ICMPV6CTL_ND6_PRLIST, CTL_EOL);
2986}
2987