icmp6.c revision 1.96
1/*	$NetBSD: icmp6.c,v 1.96 2003/08/07 16:33:22 agc Exp $	*/
2/*	$KAME: icmp6.c,v 1.217 2001/06/20 15:03:29 jinmei Exp $	*/
3
4/*
5 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the project nor the names of its contributors
17 *    may be used to endorse or promote products derived from this software
18 *    without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 */
32
33/*
34 * Copyright (c) 1982, 1986, 1988, 1993
35 *	The Regents of the University of California.  All rights reserved.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 *    notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 *    notice, this list of conditions and the following disclaimer in the
44 *    documentation and/or other materials provided with the distribution.
45 * 3. Neither the name of the University nor the names of its contributors
46 *    may be used to endorse or promote products derived from this software
47 *    without specific prior written permission.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59 * SUCH DAMAGE.
60 *
61 *	@(#)ip_icmp.c	8.2 (Berkeley) 1/4/94
62 */
63
64#include <sys/cdefs.h>
65__KERNEL_RCSID(0, "$NetBSD: icmp6.c,v 1.96 2003/08/07 16:33:22 agc Exp $");
66
67#include "opt_inet.h"
68#include "opt_ipsec.h"
69
70#include <sys/param.h>
71#include <sys/systm.h>
72#include <sys/malloc.h>
73#include <sys/mbuf.h>
74#include <sys/protosw.h>
75#include <sys/socket.h>
76#include <sys/socketvar.h>
77#include <sys/time.h>
78#include <sys/kernel.h>
79#include <sys/syslog.h>
80#include <sys/domain.h>
81#include <sys/sysctl.h>
82
83#include <net/if.h>
84#include <net/route.h>
85#include <net/if_dl.h>
86#include <net/if_types.h>
87
88#include <netinet/in.h>
89#include <netinet/in_var.h>
90#include <netinet/ip6.h>
91#include <netinet6/ip6_var.h>
92#include <netinet/icmp6.h>
93#include <netinet6/mld6_var.h>
94#include <netinet6/in6_pcb.h>
95#include <netinet6/nd6.h>
96#include <netinet6/in6_ifattach.h>
97#include <netinet6/ip6protosw.h>
98
99#ifdef IPSEC
100#include <netinet6/ipsec.h>
101#include <netkey/key.h>
102#endif
103
104#include "faith.h"
105#if defined(NFAITH) && 0 < NFAITH
106#include <net/if_faith.h>
107#endif
108
109#include <net/net_osdep.h>
110
111extern struct domain inet6domain;
112
113struct icmp6stat icmp6stat;
114
115extern struct in6pcb rawin6pcb;
116extern int icmp6errppslim;
117static int icmp6errpps_count = 0;
118static struct timeval icmp6errppslim_last;
119extern int icmp6_nodeinfo;
120
121/*
122 * List of callbacks to notify when Path MTU changes are made.
123 */
124struct icmp6_mtudisc_callback {
125	LIST_ENTRY(icmp6_mtudisc_callback) mc_list;
126	void (*mc_func) __P((struct in6_addr *));
127};
128
129LIST_HEAD(, icmp6_mtudisc_callback) icmp6_mtudisc_callbacks =
130    LIST_HEAD_INITIALIZER(&icmp6_mtudisc_callbacks);
131
132static struct rttimer_queue *icmp6_mtudisc_timeout_q = NULL;
133extern int pmtu_expire;
134
135/* XXX do these values make any sense? */
136static int icmp6_mtudisc_hiwat = 1280;
137static int icmp6_mtudisc_lowat = 256;
138
139/*
140 * keep track of # of redirect routes.
141 */
142static struct rttimer_queue *icmp6_redirect_timeout_q = NULL;
143
144/* XXX experimental, turned off */
145static int icmp6_redirect_hiwat = -1;
146static int icmp6_redirect_lowat = -1;
147
148static void icmp6_errcount __P((struct icmp6errstat *, int, int));
149static int icmp6_rip6_input __P((struct mbuf **, int));
150static int icmp6_ratelimit __P((const struct in6_addr *, const int, const int));
151static const char *icmp6_redirect_diag __P((struct in6_addr *,
152	struct in6_addr *, struct in6_addr *));
153static struct mbuf *ni6_input __P((struct mbuf *, int));
154static struct mbuf *ni6_nametodns __P((const char *, int, int));
155static int ni6_dnsmatch __P((const char *, int, const char *, int));
156static int ni6_addrs __P((struct icmp6_nodeinfo *, struct mbuf *,
157			  struct ifnet **, char *));
158static int ni6_store_addrs __P((struct icmp6_nodeinfo *, struct icmp6_nodeinfo *,
159				struct ifnet *, int));
160static int icmp6_notify_error __P((struct mbuf *, int, int, int));
161static struct rtentry *icmp6_mtudisc_clone __P((struct sockaddr *));
162static void icmp6_mtudisc_timeout __P((struct rtentry *, struct rttimer *));
163static void icmp6_redirect_timeout __P((struct rtentry *, struct rttimer *));
164
165void
166icmp6_init()
167{
168	mld6_init();
169	icmp6_mtudisc_timeout_q = rt_timer_queue_create(pmtu_expire);
170	icmp6_redirect_timeout_q = rt_timer_queue_create(icmp6_redirtimeout);
171}
172
173static void
174icmp6_errcount(stat, type, code)
175	struct icmp6errstat *stat;
176	int type, code;
177{
178	switch (type) {
179	case ICMP6_DST_UNREACH:
180		switch (code) {
181		case ICMP6_DST_UNREACH_NOROUTE:
182			stat->icp6errs_dst_unreach_noroute++;
183			return;
184		case ICMP6_DST_UNREACH_ADMIN:
185			stat->icp6errs_dst_unreach_admin++;
186			return;
187		case ICMP6_DST_UNREACH_BEYONDSCOPE:
188			stat->icp6errs_dst_unreach_beyondscope++;
189			return;
190		case ICMP6_DST_UNREACH_ADDR:
191			stat->icp6errs_dst_unreach_addr++;
192			return;
193		case ICMP6_DST_UNREACH_NOPORT:
194			stat->icp6errs_dst_unreach_noport++;
195			return;
196		}
197		break;
198	case ICMP6_PACKET_TOO_BIG:
199		stat->icp6errs_packet_too_big++;
200		return;
201	case ICMP6_TIME_EXCEEDED:
202		switch (code) {
203		case ICMP6_TIME_EXCEED_TRANSIT:
204			stat->icp6errs_time_exceed_transit++;
205			return;
206		case ICMP6_TIME_EXCEED_REASSEMBLY:
207			stat->icp6errs_time_exceed_reassembly++;
208			return;
209		}
210		break;
211	case ICMP6_PARAM_PROB:
212		switch (code) {
213		case ICMP6_PARAMPROB_HEADER:
214			stat->icp6errs_paramprob_header++;
215			return;
216		case ICMP6_PARAMPROB_NEXTHEADER:
217			stat->icp6errs_paramprob_nextheader++;
218			return;
219		case ICMP6_PARAMPROB_OPTION:
220			stat->icp6errs_paramprob_option++;
221			return;
222		}
223		break;
224	case ND_REDIRECT:
225		stat->icp6errs_redirect++;
226		return;
227	}
228	stat->icp6errs_unknown++;
229}
230
231/*
232 * Register a Path MTU Discovery callback.
233 */
234void
235icmp6_mtudisc_callback_register(func)
236	void (*func) __P((struct in6_addr *));
237{
238	struct icmp6_mtudisc_callback *mc;
239
240	for (mc = LIST_FIRST(&icmp6_mtudisc_callbacks); mc != NULL;
241	     mc = LIST_NEXT(mc, mc_list)) {
242		if (mc->mc_func == func)
243			return;
244	}
245
246	mc = malloc(sizeof(*mc), M_PCB, M_NOWAIT);
247	if (mc == NULL)
248		panic("icmp6_mtudisc_callback_register");
249
250	mc->mc_func = func;
251	LIST_INSERT_HEAD(&icmp6_mtudisc_callbacks, mc, mc_list);
252}
253
254/*
255 * Generate an error packet of type error in response to bad IP6 packet.
256 */
257void
258icmp6_error(m, type, code, param)
259	struct mbuf *m;
260	int type, code, param;
261{
262	struct ip6_hdr *oip6, *nip6;
263	struct icmp6_hdr *icmp6;
264	u_int preplen;
265	int off;
266	int nxt;
267
268	icmp6stat.icp6s_error++;
269
270	/* count per-type-code statistics */
271	icmp6_errcount(&icmp6stat.icp6s_outerrhist, type, code);
272
273	if (m->m_flags & M_DECRYPTED) {
274		icmp6stat.icp6s_canterror++;
275		goto freeit;
276	}
277
278	if (m->m_len < sizeof(struct ip6_hdr)) {
279		m = m_pullup(m, sizeof(struct ip6_hdr));
280		if (m == NULL)
281			return;
282	}
283	oip6 = mtod(m, struct ip6_hdr *);
284
285	/*
286	 * If the destination address of the erroneous packet is a multicast
287	 * address, or the packet was sent using link-layer multicast,
288	 * we should basically suppress sending an error (RFC 2463, Section
289	 * 2.4).
290	 * We have two exceptions (the item e.2 in that section):
291	 * - the Pakcet Too Big message can be sent for path MTU discovery.
292	 * - the Parameter Problem Message that can be allowed an icmp6 error
293	 *   in the option type field.  This check has been done in
294	 *   ip6_unknown_opt(), so we can just check the type and code.
295	 */
296	if ((m->m_flags & (M_BCAST|M_MCAST) ||
297	     IN6_IS_ADDR_MULTICAST(&oip6->ip6_dst)) &&
298	    (type != ICMP6_PACKET_TOO_BIG &&
299	     (type != ICMP6_PARAM_PROB ||
300	      code != ICMP6_PARAMPROB_OPTION)))
301		goto freeit;
302
303	/*
304	 * RFC 2463, 2.4 (e.5): source address check.
305	 * XXX: the case of anycast source?
306	 */
307	if (IN6_IS_ADDR_UNSPECIFIED(&oip6->ip6_src) ||
308	    IN6_IS_ADDR_MULTICAST(&oip6->ip6_src))
309		goto freeit;
310
311	/*
312	 * If we are about to send ICMPv6 against ICMPv6 error/redirect,
313	 * don't do it.
314	 */
315	nxt = -1;
316	off = ip6_lasthdr(m, 0, IPPROTO_IPV6, &nxt);
317	if (off >= 0 && nxt == IPPROTO_ICMPV6) {
318		struct icmp6_hdr *icp;
319
320		IP6_EXTHDR_GET(icp, struct icmp6_hdr *, m, off,
321			sizeof(*icp));
322		if (icp == NULL) {
323			icmp6stat.icp6s_tooshort++;
324			return;
325		}
326		if (icp->icmp6_type < ICMP6_ECHO_REQUEST ||
327		    icp->icmp6_type == ND_REDIRECT) {
328			/*
329			 * ICMPv6 error
330			 * Special case: for redirect (which is
331			 * informational) we must not send icmp6 error.
332			 */
333			icmp6stat.icp6s_canterror++;
334			goto freeit;
335		} else {
336			/* ICMPv6 informational - send the error */
337		}
338	}
339#if 0 /* controversial */
340	else if (off >= 0 && nxt == IPPROTO_ESP) {
341		/*
342		 * It could be ICMPv6 error inside ESP.  Take a safer side,
343		 * don't respond.
344		 */
345		icmp6stat.icp6s_canterror++;
346		goto freeit;
347	}
348#endif
349	else {
350		/* non-ICMPv6 - send the error */
351	}
352
353	oip6 = mtod(m, struct ip6_hdr *); /* adjust pointer */
354
355	/* Finally, do rate limitation check. */
356	if (icmp6_ratelimit(&oip6->ip6_src, type, code)) {
357		icmp6stat.icp6s_toofreq++;
358		goto freeit;
359	}
360
361	/*
362	 * OK, ICMP6 can be generated.
363	 */
364
365	if (m->m_pkthdr.len >= ICMPV6_PLD_MAXLEN)
366		m_adj(m, ICMPV6_PLD_MAXLEN - m->m_pkthdr.len);
367
368	preplen = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr);
369	M_PREPEND(m, preplen, M_DONTWAIT);
370	if (m && m->m_len < preplen)
371		m = m_pullup(m, preplen);
372	if (m == NULL) {
373		nd6log((LOG_DEBUG, "ENOBUFS in icmp6_error %d\n", __LINE__));
374		return;
375	}
376
377	nip6 = mtod(m, struct ip6_hdr *);
378	nip6->ip6_src  = oip6->ip6_src;
379	nip6->ip6_dst  = oip6->ip6_dst;
380
381	if (IN6_IS_SCOPE_LINKLOCAL(&oip6->ip6_src))
382		oip6->ip6_src.s6_addr16[1] = 0;
383	if (IN6_IS_SCOPE_LINKLOCAL(&oip6->ip6_dst))
384		oip6->ip6_dst.s6_addr16[1] = 0;
385
386	icmp6 = (struct icmp6_hdr *)(nip6 + 1);
387	icmp6->icmp6_type = type;
388	icmp6->icmp6_code = code;
389	icmp6->icmp6_pptr = htonl((u_int32_t)param);
390
391	/*
392	 * icmp6_reflect() is designed to be in the input path.
393	 * icmp6_error() can be called from both input and outut path,
394	 * and if we are in output path rcvif could contain bogus value.
395	 * clear m->m_pkthdr.rcvif for safety, we should have enough scope
396	 * information in ip header (nip6).
397	 */
398	m->m_pkthdr.rcvif = NULL;
399
400	icmp6stat.icp6s_outhist[type]++;
401	icmp6_reflect(m, sizeof(struct ip6_hdr)); /* header order: IPv6 - ICMPv6 */
402
403	return;
404
405  freeit:
406	/*
407	 * If we can't tell wheter or not we can generate ICMP6, free it.
408	 */
409	m_freem(m);
410}
411
412/*
413 * Process a received ICMP6 message.
414 */
415int
416icmp6_input(mp, offp, proto)
417	struct mbuf **mp;
418	int *offp, proto;
419{
420	struct mbuf *m = *mp, *n;
421	struct ip6_hdr *ip6, *nip6;
422	struct icmp6_hdr *icmp6, *nicmp6;
423	int off = *offp;
424	int icmp6len = m->m_pkthdr.len - *offp;
425	int code, sum, noff;
426
427	icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_msg);
428
429	/*
430	 * Locate icmp6 structure in mbuf, and check
431	 * that not corrupted and of at least minimum length
432	 */
433
434	ip6 = mtod(m, struct ip6_hdr *);
435	if (icmp6len < sizeof(struct icmp6_hdr)) {
436		icmp6stat.icp6s_tooshort++;
437		icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_error);
438		goto freeit;
439	}
440
441	/*
442	 * calculate the checksum
443	 */
444	IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off, sizeof(*icmp6));
445	if (icmp6 == NULL) {
446		icmp6stat.icp6s_tooshort++;
447		icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_error);
448		return IPPROTO_DONE;
449	}
450	KASSERT(IP6_HDR_ALIGNED_P(icmp6));
451	code = icmp6->icmp6_code;
452
453	if ((sum = in6_cksum(m, IPPROTO_ICMPV6, off, icmp6len)) != 0) {
454		nd6log((LOG_ERR,
455		    "ICMP6 checksum error(%d|%x) %s\n",
456		    icmp6->icmp6_type, sum, ip6_sprintf(&ip6->ip6_src)));
457		icmp6stat.icp6s_checksum++;
458		icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_error);
459		goto freeit;
460	}
461
462#if defined(NFAITH) && 0 < NFAITH
463	if (faithprefix(&ip6->ip6_dst)) {
464		/*
465		 * Deliver very specific ICMP6 type only.
466		 * This is important to deilver TOOBIG.  Otherwise PMTUD
467		 * will not work.
468		 */
469		switch (icmp6->icmp6_type) {
470		case ICMP6_DST_UNREACH:
471		case ICMP6_PACKET_TOO_BIG:
472		case ICMP6_TIME_EXCEEDED:
473			break;
474		default:
475			goto freeit;
476		}
477	}
478#endif
479
480	icmp6stat.icp6s_inhist[icmp6->icmp6_type]++;
481
482	switch (icmp6->icmp6_type) {
483	case ICMP6_DST_UNREACH:
484		icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_dstunreach);
485		switch (code) {
486		case ICMP6_DST_UNREACH_NOROUTE:
487			code = PRC_UNREACH_NET;
488			break;
489		case ICMP6_DST_UNREACH_ADMIN:
490			icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_adminprohib);
491			code = PRC_UNREACH_PROTOCOL; /* is this a good code? */
492			break;
493		case ICMP6_DST_UNREACH_ADDR:
494			code = PRC_HOSTDEAD;
495			break;
496#ifdef COMPAT_RFC1885
497		case ICMP6_DST_UNREACH_NOTNEIGHBOR:
498			code = PRC_UNREACH_SRCFAIL;
499			break;
500#else
501		case ICMP6_DST_UNREACH_BEYONDSCOPE:
502			/* I mean "source address was incorrect." */
503			code = PRC_UNREACH_NET;
504			break;
505#endif
506		case ICMP6_DST_UNREACH_NOPORT:
507			code = PRC_UNREACH_PORT;
508			break;
509		default:
510			goto badcode;
511		}
512		goto deliver;
513
514	case ICMP6_PACKET_TOO_BIG:
515		icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_pkttoobig);
516		if (code != 0)
517			goto badcode;
518
519		code = PRC_MSGSIZE;
520
521		/*
522		 * Updating the path MTU will be done after examining
523		 * intermediate extension headers.
524		 */
525		goto deliver;
526
527	case ICMP6_TIME_EXCEEDED:
528		icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_timeexceed);
529		switch (code) {
530		case ICMP6_TIME_EXCEED_TRANSIT:
531			code = PRC_TIMXCEED_INTRANS;
532			break;
533		case ICMP6_TIME_EXCEED_REASSEMBLY:
534			code = PRC_TIMXCEED_REASS;
535			break;
536		default:
537			goto badcode;
538		}
539		goto deliver;
540
541	case ICMP6_PARAM_PROB:
542		icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_paramprob);
543		switch (code) {
544		case ICMP6_PARAMPROB_NEXTHEADER:
545			code = PRC_UNREACH_PROTOCOL;
546			break;
547		case ICMP6_PARAMPROB_HEADER:
548		case ICMP6_PARAMPROB_OPTION:
549			code = PRC_PARAMPROB;
550			break;
551		default:
552			goto badcode;
553		}
554		goto deliver;
555
556	case ICMP6_ECHO_REQUEST:
557		icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_echo);
558		if (code != 0)
559			goto badcode;
560		/*
561		 * Copy mbuf to send to two data paths: userland socket(s),
562		 * and to the querier (echo reply).
563		 * m: a copy for socket, n: a copy for querier
564		 */
565		if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
566			/* Give up local */
567			n = m;
568			m = NULL;
569			goto deliverecho;
570		}
571		/*
572		 * If the first mbuf is shared, or the first mbuf is too short,
573		 * copy the first part of the data into a fresh mbuf.
574		 * Otherwise, we will wrongly overwrite both copies.
575		 */
576		if ((n->m_flags & M_EXT) != 0 ||
577		    n->m_len < off + sizeof(struct icmp6_hdr)) {
578			struct mbuf *n0 = n;
579			const int maxlen = sizeof(*nip6) + sizeof(*nicmp6);
580
581			/*
582			 * Prepare an internal mbuf.  m_pullup() doesn't
583			 * always copy the length we specified.
584			 */
585			if (maxlen >= MCLBYTES) {
586				/* Give up remote */
587				m_freem(n0);
588				break;
589			}
590			MGETHDR(n, M_DONTWAIT, n0->m_type);
591			if (n && maxlen >= MHLEN) {
592				MCLGET(n, M_DONTWAIT);
593				if ((n->m_flags & M_EXT) == 0) {
594					m_free(n);
595					n = NULL;
596				}
597			}
598			if (n == NULL) {
599				/* Give up local */
600				m_freem(n0);
601				n = m;
602				m = NULL;
603				goto deliverecho;
604			}
605			M_COPY_PKTHDR(n, n0);
606			/*
607			 * Copy IPv6 and ICMPv6 only.
608			 */
609			nip6 = mtod(n, struct ip6_hdr *);
610			bcopy(ip6, nip6, sizeof(struct ip6_hdr));
611			nicmp6 = (struct icmp6_hdr *)(nip6 + 1);
612			bcopy(icmp6, nicmp6, sizeof(struct icmp6_hdr));
613			noff = sizeof(struct ip6_hdr);
614			n->m_len = noff + sizeof(struct icmp6_hdr);
615			/*
616			 * Adjust mbuf.  ip6_plen will be adjusted in
617			 * ip6_output().
618			 * n->m_pkthdr.len == n0->m_pkthdr.len at this point.
619			 */
620			n->m_pkthdr.len += noff + sizeof(struct icmp6_hdr);
621			n->m_pkthdr.len -= (off + sizeof(struct icmp6_hdr));
622			m_adj(n0, off + sizeof(struct icmp6_hdr));
623			n->m_next = n0;
624			n0->m_flags &= ~M_PKTHDR;
625		} else {
626	 deliverecho:
627			nip6 = mtod(n, struct ip6_hdr *);
628			nicmp6 = (struct icmp6_hdr *)((caddr_t)nip6 + off);
629			noff = off;
630		}
631		nicmp6->icmp6_type = ICMP6_ECHO_REPLY;
632		nicmp6->icmp6_code = 0;
633		if (n) {
634			icmp6stat.icp6s_reflect++;
635			icmp6stat.icp6s_outhist[ICMP6_ECHO_REPLY]++;
636			icmp6_reflect(n, noff);
637		}
638		if (!m)
639			goto freeit;
640		break;
641
642	case ICMP6_ECHO_REPLY:
643		icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_echoreply);
644		if (code != 0)
645			goto badcode;
646		break;
647
648	case MLD_LISTENER_QUERY:
649	case MLD_LISTENER_REPORT:
650		if (icmp6len < sizeof(struct mld_hdr))
651			goto badlen;
652		if (icmp6->icmp6_type == MLD_LISTENER_QUERY) /* XXX: ugly... */
653			icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_mldquery);
654		else
655			icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_mldreport);
656		if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
657			/* give up local */
658			mld6_input(m, off);
659			m = NULL;
660			goto freeit;
661		}
662		mld6_input(n, off);
663		/* m stays. */
664		break;
665
666	case MLD_LISTENER_DONE:
667		icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_mlddone);
668		if (icmp6len < sizeof(struct mld_hdr))	/* necessary? */
669			goto badlen;
670		break;		/* nothing to be done in kernel */
671
672	case MLD_MTRACE_RESP:
673	case MLD_MTRACE:
674		/* XXX: these two are experimental.  not officially defined. */
675		/* XXX: per-interface statistics? */
676		break;		/* just pass it to applications */
677
678	case ICMP6_WRUREQUEST:	/* ICMP6_FQDN_QUERY */
679	    {
680		enum { WRU, FQDN } mode;
681
682		if (!icmp6_nodeinfo)
683			break;
684
685		if (icmp6len == sizeof(struct icmp6_hdr) + 4)
686			mode = WRU;
687		else if (icmp6len >= sizeof(struct icmp6_nodeinfo))
688			mode = FQDN;
689		else
690			goto badlen;
691
692		if (mode == FQDN) {
693			n = m_copym(m, 0, M_COPYALL, M_DONTWAIT);
694			if (n)
695				n = ni6_input(n, off);
696			/* XXX meaningless if n == NULL */
697			noff = sizeof(struct ip6_hdr);
698		} else {
699			u_char *p;
700			int maxlen, maxhlen;
701
702			if ((icmp6_nodeinfo & 5) != 5)
703				break;
704
705			if (code != 0)
706				goto badcode;
707			maxlen = sizeof(*nip6) + sizeof(*nicmp6) + 4;
708			if (maxlen >= MCLBYTES) {
709				/* Give up remote */
710				break;
711			}
712			MGETHDR(n, M_DONTWAIT, m->m_type);
713			if (n && maxlen > MHLEN) {
714				MCLGET(n, M_DONTWAIT);
715				if ((n->m_flags & M_EXT) == 0) {
716					m_free(n);
717					n = NULL;
718				}
719			}
720			if (n == NULL) {
721				/* Give up remote */
722				break;
723			}
724			n->m_pkthdr.rcvif = NULL;
725			n->m_len = 0;
726			maxhlen = M_TRAILINGSPACE(n) - maxlen;
727			if (maxhlen > hostnamelen)
728				maxhlen = hostnamelen;
729			/*
730			 * Copy IPv6 and ICMPv6 only.
731			 */
732			nip6 = mtod(n, struct ip6_hdr *);
733			bcopy(ip6, nip6, sizeof(struct ip6_hdr));
734			nicmp6 = (struct icmp6_hdr *)(nip6 + 1);
735			bcopy(icmp6, nicmp6, sizeof(struct icmp6_hdr));
736			p = (u_char *)(nicmp6 + 1);
737			bzero(p, 4);
738			bcopy(hostname, p + 4, maxhlen); /* meaningless TTL */
739			noff = sizeof(struct ip6_hdr);
740			M_COPY_PKTHDR(n, m); /* just for rcvif */
741			n->m_pkthdr.len = n->m_len = sizeof(struct ip6_hdr) +
742				sizeof(struct icmp6_hdr) + 4 + maxhlen;
743			nicmp6->icmp6_type = ICMP6_WRUREPLY;
744			nicmp6->icmp6_code = 0;
745		}
746#undef hostnamelen
747		if (n) {
748			icmp6stat.icp6s_reflect++;
749			icmp6stat.icp6s_outhist[ICMP6_WRUREPLY]++;
750			icmp6_reflect(n, noff);
751		}
752		break;
753	    }
754
755	case ICMP6_WRUREPLY:
756		if (code != 0)
757			goto badcode;
758		break;
759
760	case ND_ROUTER_SOLICIT:
761		icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_routersolicit);
762		if (code != 0)
763			goto badcode;
764		if (icmp6len < sizeof(struct nd_router_solicit))
765			goto badlen;
766		if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
767			/* give up local */
768			nd6_rs_input(m, off, icmp6len);
769			m = NULL;
770			goto freeit;
771		}
772		nd6_rs_input(n, off, icmp6len);
773		/* m stays. */
774		break;
775
776	case ND_ROUTER_ADVERT:
777		icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_routeradvert);
778		if (code != 0)
779			goto badcode;
780		if (icmp6len < sizeof(struct nd_router_advert))
781			goto badlen;
782		if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
783			/* give up local */
784			nd6_ra_input(m, off, icmp6len);
785			m = NULL;
786			goto freeit;
787		}
788		nd6_ra_input(n, off, icmp6len);
789		/* m stays. */
790		break;
791
792	case ND_NEIGHBOR_SOLICIT:
793		icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_neighborsolicit);
794		if (code != 0)
795			goto badcode;
796		if (icmp6len < sizeof(struct nd_neighbor_solicit))
797			goto badlen;
798		if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
799			/* give up local */
800			nd6_ns_input(m, off, icmp6len);
801			m = NULL;
802			goto freeit;
803		}
804		nd6_ns_input(n, off, icmp6len);
805		/* m stays. */
806		break;
807
808	case ND_NEIGHBOR_ADVERT:
809		icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_neighboradvert);
810		if (code != 0)
811			goto badcode;
812		if (icmp6len < sizeof(struct nd_neighbor_advert))
813			goto badlen;
814		if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
815			/* give up local */
816			nd6_na_input(m, off, icmp6len);
817			m = NULL;
818			goto freeit;
819		}
820		nd6_na_input(n, off, icmp6len);
821		/* m stays. */
822		break;
823
824	case ND_REDIRECT:
825		icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_redirect);
826		if (code != 0)
827			goto badcode;
828		if (icmp6len < sizeof(struct nd_redirect))
829			goto badlen;
830		if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
831			/* give up local */
832			icmp6_redirect_input(m, off);
833			m = NULL;
834			goto freeit;
835		}
836		icmp6_redirect_input(n, off);
837		/* m stays. */
838		break;
839
840	case ICMP6_ROUTER_RENUMBERING:
841		if (code != ICMP6_ROUTER_RENUMBERING_COMMAND &&
842		    code != ICMP6_ROUTER_RENUMBERING_RESULT)
843			goto badcode;
844		if (icmp6len < sizeof(struct icmp6_router_renum))
845			goto badlen;
846		break;
847
848	default:
849		nd6log((LOG_DEBUG,
850		    "icmp6_input: unknown type %d(src=%s, dst=%s, ifid=%d)\n",
851		    icmp6->icmp6_type, ip6_sprintf(&ip6->ip6_src),
852		    ip6_sprintf(&ip6->ip6_dst),
853		    m->m_pkthdr.rcvif ? m->m_pkthdr.rcvif->if_index : 0));
854		if (icmp6->icmp6_type < ICMP6_ECHO_REQUEST) {
855			/* ICMPv6 error: MUST deliver it by spec... */
856			code = PRC_NCMDS;
857			/* deliver */
858		} else {
859			/* ICMPv6 informational: MUST not deliver */
860			break;
861		}
862	deliver:
863		if (icmp6_notify_error(m, off, icmp6len, code)) {
864			/* In this case, m should've been freed. */
865			return (IPPROTO_DONE);
866		}
867		break;
868
869	badcode:
870		icmp6stat.icp6s_badcode++;
871		break;
872
873	badlen:
874		icmp6stat.icp6s_badlen++;
875		break;
876	}
877
878	/* deliver the packet to appropriate sockets */
879	icmp6_rip6_input(&m, *offp);
880
881	return IPPROTO_DONE;
882
883 freeit:
884	m_freem(m);
885	return IPPROTO_DONE;
886}
887
888static int
889icmp6_notify_error(m, off, icmp6len, code)
890	struct mbuf *m;
891	int off, icmp6len;
892{
893	struct icmp6_hdr *icmp6;
894	struct ip6_hdr *eip6;
895	u_int32_t notifymtu;
896	struct sockaddr_in6 icmp6src, icmp6dst;
897
898	if (icmp6len < sizeof(struct icmp6_hdr) + sizeof(struct ip6_hdr)) {
899		icmp6stat.icp6s_tooshort++;
900		goto freeit;
901	}
902	IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off,
903		       sizeof(*icmp6) + sizeof(struct ip6_hdr));
904	if (icmp6 == NULL) {
905		icmp6stat.icp6s_tooshort++;
906		return (-1);
907	}
908	eip6 = (struct ip6_hdr *)(icmp6 + 1);
909
910	/* Detect the upper level protocol */
911	{
912		void (*ctlfunc) __P((int, struct sockaddr *, void *));
913		u_int8_t nxt = eip6->ip6_nxt;
914		int eoff = off + sizeof(struct icmp6_hdr) +
915			sizeof(struct ip6_hdr);
916		struct ip6ctlparam ip6cp;
917		struct in6_addr *finaldst = NULL;
918		int icmp6type = icmp6->icmp6_type;
919		struct ip6_frag *fh;
920		struct ip6_rthdr *rth;
921		struct ip6_rthdr0 *rth0;
922		int rthlen;
923
924		while (1) { /* XXX: should avoid infinite loop explicitly? */
925			struct ip6_ext *eh;
926
927			switch (nxt) {
928			case IPPROTO_HOPOPTS:
929			case IPPROTO_DSTOPTS:
930			case IPPROTO_AH:
931				IP6_EXTHDR_GET(eh, struct ip6_ext *, m,
932					       eoff, sizeof(*eh));
933				if (eh == NULL) {
934					icmp6stat.icp6s_tooshort++;
935					return (-1);
936				}
937
938				if (nxt == IPPROTO_AH)
939					eoff += (eh->ip6e_len + 2) << 2;
940				else
941					eoff += (eh->ip6e_len + 1) << 3;
942				nxt = eh->ip6e_nxt;
943				break;
944			case IPPROTO_ROUTING:
945				/*
946				 * When the erroneous packet contains a
947				 * routing header, we should examine the
948				 * header to determine the final destination.
949				 * Otherwise, we can't properly update
950				 * information that depends on the final
951				 * destination (e.g. path MTU).
952				 */
953				IP6_EXTHDR_GET(rth, struct ip6_rthdr *, m,
954					       eoff, sizeof(*rth));
955				if (rth == NULL) {
956					icmp6stat.icp6s_tooshort++;
957					return (-1);
958				}
959				rthlen = (rth->ip6r_len + 1) << 3;
960				/*
961				 * XXX: currently there is no
962				 * officially defined type other
963				 * than type-0.
964				 * Note that if the segment left field
965				 * is 0, all intermediate hops must
966				 * have been passed.
967				 */
968				if (rth->ip6r_segleft &&
969				    rth->ip6r_type == IPV6_RTHDR_TYPE_0) {
970					int hops;
971
972					IP6_EXTHDR_GET(rth0,
973						       struct ip6_rthdr0 *, m,
974						       eoff, rthlen);
975					if (rth0 == NULL) {
976						icmp6stat.icp6s_tooshort++;
977						return (-1);
978					}
979					/* just ignore a bogus header */
980					if ((rth0->ip6r0_len % 2) == 0 &&
981					    (hops = rth0->ip6r0_len/2))
982						finaldst = (struct in6_addr *)(rth0 + 1) + (hops - 1);
983				}
984				eoff += rthlen;
985				nxt = rth->ip6r_nxt;
986				break;
987			case IPPROTO_FRAGMENT:
988				IP6_EXTHDR_GET(fh, struct ip6_frag *, m,
989					       eoff, sizeof(*fh));
990				if (fh == NULL) {
991					icmp6stat.icp6s_tooshort++;
992					return (-1);
993				}
994				/*
995				 * Data after a fragment header is meaningless
996				 * unless it is the first fragment, but
997				 * we'll go to the notify label for path MTU
998				 * discovery.
999				 */
1000				if (fh->ip6f_offlg & IP6F_OFF_MASK)
1001					goto notify;
1002
1003				eoff += sizeof(struct ip6_frag);
1004				nxt = fh->ip6f_nxt;
1005				break;
1006			default:
1007				/*
1008				 * This case includes ESP and the No Next
1009				 * Header.  In such cases going to the notify
1010				 * label does not have any meaning
1011				 * (i.e. ctlfunc will be NULL), but we go
1012				 * anyway since we might have to update
1013				 * path MTU information.
1014				 */
1015				goto notify;
1016			}
1017		}
1018	  notify:
1019		IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off,
1020			       sizeof(*icmp6) + sizeof(struct ip6_hdr));
1021		if (icmp6 == NULL) {
1022			icmp6stat.icp6s_tooshort++;
1023			return (-1);
1024		}
1025
1026		eip6 = (struct ip6_hdr *)(icmp6 + 1);
1027		bzero(&icmp6dst, sizeof(icmp6dst));
1028		icmp6dst.sin6_len = sizeof(struct sockaddr_in6);
1029		icmp6dst.sin6_family = AF_INET6;
1030		if (finaldst == NULL)
1031			icmp6dst.sin6_addr = eip6->ip6_dst;
1032		else
1033			icmp6dst.sin6_addr = *finaldst;
1034		icmp6dst.sin6_scope_id = in6_addr2scopeid(m->m_pkthdr.rcvif,
1035							  &icmp6dst.sin6_addr);
1036#ifndef SCOPEDROUTING
1037		if (in6_embedscope(&icmp6dst.sin6_addr, &icmp6dst,
1038				   NULL, NULL)) {
1039			/* should be impossbile */
1040			nd6log((LOG_DEBUG,
1041			    "icmp6_notify_error: in6_embedscope failed\n"));
1042			goto freeit;
1043		}
1044#endif
1045
1046		/*
1047		 * retrieve parameters from the inner IPv6 header, and convert
1048		 * them into sockaddr structures.
1049		 */
1050		bzero(&icmp6src, sizeof(icmp6src));
1051		icmp6src.sin6_len = sizeof(struct sockaddr_in6);
1052		icmp6src.sin6_family = AF_INET6;
1053		icmp6src.sin6_addr = eip6->ip6_src;
1054		icmp6src.sin6_scope_id = in6_addr2scopeid(m->m_pkthdr.rcvif,
1055							  &icmp6src.sin6_addr);
1056#ifndef SCOPEDROUTING
1057		if (in6_embedscope(&icmp6src.sin6_addr, &icmp6src,
1058				   NULL, NULL)) {
1059			/* should be impossbile */
1060			nd6log((LOG_DEBUG,
1061			    "icmp6_notify_error: in6_embedscope failed\n"));
1062			goto freeit;
1063		}
1064#endif
1065		icmp6src.sin6_flowinfo =
1066			(eip6->ip6_flow & IPV6_FLOWLABEL_MASK);
1067
1068		if (finaldst == NULL)
1069			finaldst = &eip6->ip6_dst;
1070		ip6cp.ip6c_m = m;
1071		ip6cp.ip6c_icmp6 = icmp6;
1072		ip6cp.ip6c_ip6 = (struct ip6_hdr *)(icmp6 + 1);
1073		ip6cp.ip6c_off = eoff;
1074		ip6cp.ip6c_finaldst = finaldst;
1075		ip6cp.ip6c_src = &icmp6src;
1076		ip6cp.ip6c_nxt = nxt;
1077
1078		if (icmp6type == ICMP6_PACKET_TOO_BIG) {
1079			notifymtu = ntohl(icmp6->icmp6_mtu);
1080			ip6cp.ip6c_cmdarg = (void *)&notifymtu;
1081		}
1082
1083		ctlfunc = (void (*) __P((int, struct sockaddr *, void *)))
1084			(inet6sw[ip6_protox[nxt]].pr_ctlinput);
1085		if (ctlfunc) {
1086			(void) (*ctlfunc)(code, (struct sockaddr *)&icmp6dst,
1087					  &ip6cp);
1088		}
1089	}
1090	return (0);
1091
1092  freeit:
1093	m_freem(m);
1094	return (-1);
1095}
1096
1097void
1098icmp6_mtudisc_update(ip6cp, validated)
1099	struct ip6ctlparam *ip6cp;
1100	int validated;
1101{
1102	unsigned long rtcount;
1103	struct icmp6_mtudisc_callback *mc;
1104	struct in6_addr *dst = ip6cp->ip6c_finaldst;
1105	struct icmp6_hdr *icmp6 = ip6cp->ip6c_icmp6;
1106	struct mbuf *m = ip6cp->ip6c_m;	/* will be necessary for scope issue */
1107	u_int mtu = ntohl(icmp6->icmp6_mtu);
1108	struct rtentry *rt = NULL;
1109	struct sockaddr_in6 sin6;
1110
1111	/*
1112	 * allow non-validated cases if memory is plenty, to make traffic
1113	 * from non-connected pcb happy.
1114	 */
1115	rtcount = rt_timer_count(icmp6_mtudisc_timeout_q);
1116	if (validated) {
1117		if (0 <= icmp6_mtudisc_hiwat && rtcount > icmp6_mtudisc_hiwat)
1118			return;
1119		else if (0 <= icmp6_mtudisc_lowat &&
1120		    rtcount > icmp6_mtudisc_lowat) {
1121			/*
1122			 * XXX nuke a victim, install the new one.
1123			 */
1124		}
1125	} else {
1126		if (0 <= icmp6_mtudisc_lowat && rtcount > icmp6_mtudisc_lowat)
1127			return;
1128	}
1129
1130	bzero(&sin6, sizeof(sin6));
1131	sin6.sin6_family = PF_INET6;
1132	sin6.sin6_len = sizeof(struct sockaddr_in6);
1133	sin6.sin6_addr = *dst;
1134	/* XXX normally, this won't happen */
1135	if (IN6_IS_ADDR_LINKLOCAL(dst)) {
1136		sin6.sin6_addr.s6_addr16[1] =
1137		    htons(m->m_pkthdr.rcvif->if_index);
1138	}
1139	/* sin6.sin6_scope_id = XXX: should be set if DST is a scoped addr */
1140	rt = icmp6_mtudisc_clone((struct sockaddr *)&sin6);
1141
1142	if (rt && (rt->rt_flags & RTF_HOST) &&
1143	    !(rt->rt_rmx.rmx_locks & RTV_MTU) &&
1144	    (rt->rt_rmx.rmx_mtu > mtu || rt->rt_rmx.rmx_mtu == 0)) {
1145		if (mtu < IN6_LINKMTU(rt->rt_ifp)) {
1146			icmp6stat.icp6s_pmtuchg++;
1147			rt->rt_rmx.rmx_mtu = mtu;
1148		}
1149	}
1150	if (rt) { /* XXX: need braces to avoid conflict with else in RTFREE. */
1151		RTFREE(rt);
1152	}
1153
1154	/*
1155	 * Notify protocols that the MTU for this destination
1156	 * has changed.
1157	 */
1158	for (mc = LIST_FIRST(&icmp6_mtudisc_callbacks); mc != NULL;
1159	     mc = LIST_NEXT(mc, mc_list))
1160		(*mc->mc_func)(&sin6.sin6_addr);
1161}
1162
1163/*
1164 * Process a Node Information Query packet, based on
1165 * draft-ietf-ipngwg-icmp-name-lookups-07.
1166 *
1167 * Spec incompatibilities:
1168 * - IPv6 Subject address handling
1169 * - IPv4 Subject address handling support missing
1170 * - Proxy reply (answer even if it's not for me)
1171 * - joins NI group address at in6_ifattach() time only, does not cope
1172 *   with hostname changes by sethostname(3)
1173 */
1174#ifndef offsetof		/* XXX */
1175#define	offsetof(type, member)	((size_t)(&((type *)0)->member))
1176#endif
1177static struct mbuf *
1178ni6_input(m, off)
1179	struct mbuf *m;
1180	int off;
1181{
1182	struct icmp6_nodeinfo *ni6, *nni6;
1183	struct mbuf *n = NULL;
1184	u_int16_t qtype;
1185	int subjlen;
1186	int replylen = sizeof(struct ip6_hdr) + sizeof(struct icmp6_nodeinfo);
1187	struct ni_reply_fqdn *fqdn;
1188	int addrs;		/* for NI_QTYPE_NODEADDR */
1189	struct ifnet *ifp = NULL; /* for NI_QTYPE_NODEADDR */
1190	struct sockaddr_in6 sin6; /* double meaning; ip6_dst and subjectaddr */
1191	struct ip6_hdr *ip6;
1192	int oldfqdn = 0;	/* if 1, return pascal string (03 draft) */
1193	char *subj = NULL;
1194
1195	ip6 = mtod(m, struct ip6_hdr *);
1196	IP6_EXTHDR_GET(ni6, struct icmp6_nodeinfo *, m, off, sizeof(*ni6));
1197	if (ni6 == NULL) {
1198		/* m is already reclaimed */
1199		return NULL;
1200	}
1201
1202	/*
1203	 * Validate IPv6 destination address.
1204	 *
1205	 * The Responder must discard the Query without further processing
1206	 * unless it is one of the Responder's unicast or anycast addresses, or
1207	 * a link-local scope multicast address which the Responder has joined.
1208	 * [icmp-name-lookups-07, Section 4.]
1209	 */
1210	bzero(&sin6, sizeof(sin6));
1211	sin6.sin6_family = AF_INET6;
1212	sin6.sin6_len = sizeof(struct sockaddr_in6);
1213	bcopy(&ip6->ip6_dst, &sin6.sin6_addr, sizeof(sin6.sin6_addr));
1214	/* XXX scopeid */
1215	if (ifa_ifwithaddr((struct sockaddr *)&sin6))
1216		; /* unicast/anycast, fine */
1217	else if (IN6_IS_ADDR_MC_LINKLOCAL(&sin6.sin6_addr))
1218		; /* link-local multicast, fine */
1219	else
1220		goto bad;
1221
1222	/* validate query Subject field. */
1223	qtype = ntohs(ni6->ni_qtype);
1224	subjlen = m->m_pkthdr.len - off - sizeof(struct icmp6_nodeinfo);
1225	switch (qtype) {
1226	case NI_QTYPE_NOOP:
1227	case NI_QTYPE_SUPTYPES:
1228		/* 07 draft */
1229		if (ni6->ni_code == ICMP6_NI_SUBJ_FQDN && subjlen == 0)
1230			break;
1231		/* FALLTHROUGH */
1232	case NI_QTYPE_FQDN:
1233	case NI_QTYPE_NODEADDR:
1234		switch (ni6->ni_code) {
1235		case ICMP6_NI_SUBJ_IPV6:
1236#if ICMP6_NI_SUBJ_IPV6 != 0
1237		case 0:
1238#endif
1239			/*
1240			 * backward compatibility - try to accept 03 draft
1241			 * format, where no Subject is present.
1242			 */
1243			if (qtype == NI_QTYPE_FQDN && ni6->ni_code == 0 &&
1244			    subjlen == 0) {
1245				oldfqdn++;
1246				break;
1247			}
1248#if ICMP6_NI_SUBJ_IPV6 != 0
1249			if (ni6->ni_code != ICMP6_NI_SUBJ_IPV6)
1250				goto bad;
1251#endif
1252
1253			if (subjlen != sizeof(sin6.sin6_addr))
1254				goto bad;
1255
1256			/*
1257			 * Validate Subject address.
1258			 *
1259			 * Not sure what exactly "address belongs to the node"
1260			 * means in the spec, is it just unicast, or what?
1261			 *
1262			 * At this moment we consider Subject address as
1263			 * "belong to the node" if the Subject address equals
1264			 * to the IPv6 destination address; validation for
1265			 * IPv6 destination address should have done enough
1266			 * check for us.
1267			 *
1268			 * We do not do proxy at this moment.
1269			 */
1270			/* m_pulldown instead of copy? */
1271			m_copydata(m, off + sizeof(struct icmp6_nodeinfo),
1272			    subjlen, (caddr_t)&sin6.sin6_addr);
1273			/* XXX kame scope hack */
1274			if (IN6_IS_SCOPE_LINKLOCAL(&sin6.sin6_addr)) {
1275				if ((m->m_flags & M_PKTHDR) != 0 &&
1276				    m->m_pkthdr.rcvif) {
1277					sin6.sin6_addr.s6_addr16[1] =
1278					    htons(m->m_pkthdr.rcvif->if_index);
1279				}
1280			}
1281			subj = (char *)&sin6;
1282			if (IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &sin6.sin6_addr))
1283				break;
1284
1285			/*
1286			 * XXX if we are to allow other cases, we should really
1287			 * be careful about scope here.
1288			 * basically, we should disallow queries toward IPv6
1289			 * destination X with subject Y, if scope(X) > scope(Y).
1290			 * if we allow scope(X) > scope(Y), it will result in
1291			 * information leakage across scope boundary.
1292			 */
1293			goto bad;
1294
1295		case ICMP6_NI_SUBJ_FQDN:
1296			/*
1297			 * Validate Subject name with gethostname(3).
1298			 *
1299			 * The behavior may need some debate, since:
1300			 * - we are not sure if the node has FQDN as
1301			 *   hostname (returned by gethostname(3)).
1302			 * - the code does wildcard match for truncated names.
1303			 *   however, we are not sure if we want to perform
1304			 *   wildcard match, if gethostname(3) side has
1305			 *   truncated hostname.
1306			 */
1307			n = ni6_nametodns(hostname, hostnamelen, 0);
1308			if (!n || n->m_next || n->m_len == 0)
1309				goto bad;
1310			IP6_EXTHDR_GET(subj, char *, m,
1311			    off + sizeof(struct icmp6_nodeinfo), subjlen);
1312			if (subj == NULL)
1313				goto bad;
1314			if (!ni6_dnsmatch(subj, subjlen, mtod(n, const char *),
1315					n->m_len)) {
1316				goto bad;
1317			}
1318			m_freem(n);
1319			n = NULL;
1320			break;
1321
1322		case ICMP6_NI_SUBJ_IPV4:	/* XXX: to be implemented? */
1323		default:
1324			goto bad;
1325		}
1326		break;
1327	}
1328
1329	/* refuse based on configuration.  XXX ICMP6_NI_REFUSED? */
1330	switch (qtype) {
1331	case NI_QTYPE_FQDN:
1332		if ((icmp6_nodeinfo & 1) == 0)
1333			goto bad;
1334		break;
1335	case NI_QTYPE_NODEADDR:
1336		if ((icmp6_nodeinfo & 2) == 0)
1337			goto bad;
1338		break;
1339	}
1340
1341	/* guess reply length */
1342	switch (qtype) {
1343	case NI_QTYPE_NOOP:
1344		break;		/* no reply data */
1345	case NI_QTYPE_SUPTYPES:
1346		replylen += sizeof(u_int32_t);
1347		break;
1348	case NI_QTYPE_FQDN:
1349		/* XXX will append an mbuf */
1350		replylen += offsetof(struct ni_reply_fqdn, ni_fqdn_namelen);
1351		break;
1352	case NI_QTYPE_NODEADDR:
1353		addrs = ni6_addrs(ni6, m, &ifp, subj);
1354		if ((replylen += addrs * (sizeof(struct in6_addr) +
1355					  sizeof(u_int32_t))) > MCLBYTES)
1356			replylen = MCLBYTES; /* XXX: will truncate pkt later */
1357		break;
1358	default:
1359		/*
1360		 * XXX: We must return a reply with the ICMP6 code
1361		 * `unknown Qtype' in this case.  However we regard the case
1362		 * as an FQDN query for backward compatibility.
1363		 * Older versions set a random value to this field,
1364		 * so it rarely varies in the defined qtypes.
1365		 * But the mechanism is not reliable...
1366		 * maybe we should obsolete older versions.
1367		 */
1368		qtype = NI_QTYPE_FQDN;
1369		/* XXX will append an mbuf */
1370		replylen += offsetof(struct ni_reply_fqdn, ni_fqdn_namelen);
1371		oldfqdn++;
1372		break;
1373	}
1374
1375	/* allocate an mbuf to reply. */
1376	MGETHDR(n, M_DONTWAIT, m->m_type);
1377	if (n == NULL) {
1378		m_freem(m);
1379		return (NULL);
1380	}
1381	M_COPY_PKTHDR(n, m); /* just for rcvif */
1382	if (replylen > MHLEN) {
1383		if (replylen > MCLBYTES) {
1384			/*
1385			 * XXX: should we try to allocate more? But MCLBYTES
1386			 * is probably much larger than IPV6_MMTU...
1387			 */
1388			goto bad;
1389		}
1390		MCLGET(n, M_DONTWAIT);
1391		if ((n->m_flags & M_EXT) == 0) {
1392			goto bad;
1393		}
1394	}
1395	n->m_pkthdr.len = n->m_len = replylen;
1396
1397	/* copy mbuf header and IPv6 + Node Information base headers */
1398	bcopy(mtod(m, caddr_t), mtod(n, caddr_t), sizeof(struct ip6_hdr));
1399	nni6 = (struct icmp6_nodeinfo *)(mtod(n, struct ip6_hdr *) + 1);
1400	bcopy((caddr_t)ni6, (caddr_t)nni6, sizeof(struct icmp6_nodeinfo));
1401
1402	/* qtype dependent procedure */
1403	switch (qtype) {
1404	case NI_QTYPE_NOOP:
1405		nni6->ni_code = ICMP6_NI_SUCCESS;
1406		nni6->ni_flags = 0;
1407		break;
1408	case NI_QTYPE_SUPTYPES:
1409	{
1410		u_int32_t v;
1411		nni6->ni_code = ICMP6_NI_SUCCESS;
1412		nni6->ni_flags = htons(0x0000);	/* raw bitmap */
1413		/* supports NOOP, SUPTYPES, FQDN, and NODEADDR */
1414		v = (u_int32_t)htonl(0x0000000f);
1415		bcopy(&v, nni6 + 1, sizeof(u_int32_t));
1416		break;
1417	}
1418	case NI_QTYPE_FQDN:
1419		nni6->ni_code = ICMP6_NI_SUCCESS;
1420		fqdn = (struct ni_reply_fqdn *)(mtod(n, caddr_t) +
1421						sizeof(struct ip6_hdr) +
1422						sizeof(struct icmp6_nodeinfo));
1423		nni6->ni_flags = 0; /* XXX: meaningless TTL */
1424		fqdn->ni_fqdn_ttl = 0;	/* ditto. */
1425		/*
1426		 * XXX do we really have FQDN in variable "hostname"?
1427		 */
1428		n->m_next = ni6_nametodns(hostname, hostnamelen, oldfqdn);
1429		if (n->m_next == NULL)
1430			goto bad;
1431		/* XXX we assume that n->m_next is not a chain */
1432		if (n->m_next->m_next != NULL)
1433			goto bad;
1434		n->m_pkthdr.len += n->m_next->m_len;
1435		break;
1436	case NI_QTYPE_NODEADDR:
1437	{
1438		int lenlim, copied;
1439
1440		nni6->ni_code = ICMP6_NI_SUCCESS;
1441		n->m_pkthdr.len = n->m_len =
1442		    sizeof(struct ip6_hdr) + sizeof(struct icmp6_nodeinfo);
1443		lenlim = M_TRAILINGSPACE(n);
1444		copied = ni6_store_addrs(ni6, nni6, ifp, lenlim);
1445		/* XXX: reset mbuf length */
1446		n->m_pkthdr.len = n->m_len = sizeof(struct ip6_hdr) +
1447			sizeof(struct icmp6_nodeinfo) + copied;
1448		break;
1449	}
1450	default:
1451		break;		/* XXX impossible! */
1452	}
1453
1454	nni6->ni_type = ICMP6_NI_REPLY;
1455	m_freem(m);
1456	return (n);
1457
1458  bad:
1459	m_freem(m);
1460	if (n)
1461		m_freem(n);
1462	return (NULL);
1463}
1464#undef hostnamelen
1465
1466#define isupper(x) ('A' <= (x) && (x) <= 'Z')
1467#define isalpha(x) (('A' <= (x) && (x) <= 'Z') || ('a' <= (x) && (x) <= 'z'))
1468#define isalnum(x) (isalpha(x) || ('0' <= (x) && (x) <= '9'))
1469#define tolower(x) (isupper(x) ? (x) + 'a' - 'A' : (x))
1470
1471/*
1472 * make a mbuf with DNS-encoded string.  no compression support.
1473 *
1474 * XXX names with less than 2 dots (like "foo" or "foo.section") will be
1475 * treated as truncated name (two \0 at the end).  this is a wild guess.
1476 */
1477static struct mbuf *
1478ni6_nametodns(name, namelen, old)
1479	const char *name;
1480	int namelen;
1481	int old;	/* return pascal string if non-zero */
1482{
1483	struct mbuf *m;
1484	char *cp, *ep;
1485	const char *p, *q;
1486	int i, len, nterm;
1487
1488	if (old)
1489		len = namelen + 1;
1490	else
1491		len = MCLBYTES;
1492
1493	/* because MAXHOSTNAMELEN is usually 256, we use cluster mbuf */
1494	MGET(m, M_DONTWAIT, MT_DATA);
1495	if (m && len > MLEN) {
1496		MCLGET(m, M_DONTWAIT);
1497		if ((m->m_flags & M_EXT) == 0)
1498			goto fail;
1499	}
1500	if (!m)
1501		goto fail;
1502	m->m_next = NULL;
1503
1504	if (old) {
1505		m->m_len = len;
1506		*mtod(m, char *) = namelen;
1507		bcopy(name, mtod(m, char *) + 1, namelen);
1508		return m;
1509	} else {
1510		m->m_len = 0;
1511		cp = mtod(m, char *);
1512		ep = mtod(m, char *) + M_TRAILINGSPACE(m);
1513
1514		/* if not certain about my name, return empty buffer */
1515		if (namelen == 0)
1516			return m;
1517
1518		/*
1519		 * guess if it looks like shortened hostname, or FQDN.
1520		 * shortened hostname needs two trailing "\0".
1521		 */
1522		i = 0;
1523		for (p = name; p < name + namelen; p++) {
1524			if (*p && *p == '.')
1525				i++;
1526		}
1527		if (i < 2)
1528			nterm = 2;
1529		else
1530			nterm = 1;
1531
1532		p = name;
1533		while (cp < ep && p < name + namelen) {
1534			i = 0;
1535			for (q = p; q < name + namelen && *q && *q != '.'; q++)
1536				i++;
1537			/* result does not fit into mbuf */
1538			if (cp + i + 1 >= ep)
1539				goto fail;
1540			/*
1541			 * DNS label length restriction, RFC1035 page 8.
1542			 * "i == 0" case is included here to avoid returning
1543			 * 0-length label on "foo..bar".
1544			 */
1545			if (i <= 0 || i >= 64)
1546				goto fail;
1547			*cp++ = i;
1548			if (!isalpha(p[0]) || !isalnum(p[i - 1]))
1549				goto fail;
1550			while (i > 0) {
1551				if (!isalnum(*p) && *p != '-')
1552					goto fail;
1553				if (isupper(*p)) {
1554					*cp++ = tolower(*p);
1555					p++;
1556				} else
1557					*cp++ = *p++;
1558				i--;
1559			}
1560			p = q;
1561			if (p < name + namelen && *p == '.')
1562				p++;
1563		}
1564		/* termination */
1565		if (cp + nterm >= ep)
1566			goto fail;
1567		while (nterm-- > 0)
1568			*cp++ = '\0';
1569		m->m_len = cp - mtod(m, char *);
1570		return m;
1571	}
1572
1573	panic("should not reach here");
1574	/* NOTREACHED */
1575
1576 fail:
1577	if (m)
1578		m_freem(m);
1579	return NULL;
1580}
1581
1582/*
1583 * check if two DNS-encoded string matches.  takes care of truncated
1584 * form (with \0\0 at the end).  no compression support.
1585 * XXX upper/lowercase match (see RFC2065)
1586 */
1587static int
1588ni6_dnsmatch(a, alen, b, blen)
1589	const char *a;
1590	int alen;
1591	const char *b;
1592	int blen;
1593{
1594	const char *a0, *b0;
1595	int l;
1596
1597	/* simplest case - need validation? */
1598	if (alen == blen && bcmp(a, b, alen) == 0)
1599		return 1;
1600
1601	a0 = a;
1602	b0 = b;
1603
1604	/* termination is mandatory */
1605	if (alen < 2 || blen < 2)
1606		return 0;
1607	if (a0[alen - 1] != '\0' || b0[blen - 1] != '\0')
1608		return 0;
1609	alen--;
1610	blen--;
1611
1612	while (a - a0 < alen && b - b0 < blen) {
1613		if (a - a0 + 1 > alen || b - b0 + 1 > blen)
1614			return 0;
1615
1616		if ((signed char)a[0] < 0 || (signed char)b[0] < 0)
1617			return 0;
1618		/* we don't support compression yet */
1619		if (a[0] >= 64 || b[0] >= 64)
1620			return 0;
1621
1622		/* truncated case */
1623		if (a[0] == 0 && a - a0 == alen - 1)
1624			return 1;
1625		if (b[0] == 0 && b - b0 == blen - 1)
1626			return 1;
1627		if (a[0] == 0 || b[0] == 0)
1628			return 0;
1629
1630		if (a[0] != b[0])
1631			return 0;
1632		l = a[0];
1633		if (a - a0 + 1 + l > alen || b - b0 + 1 + l > blen)
1634			return 0;
1635		if (bcmp(a + 1, b + 1, l) != 0)
1636			return 0;
1637
1638		a += 1 + l;
1639		b += 1 + l;
1640	}
1641
1642	if (a - a0 == alen && b - b0 == blen)
1643		return 1;
1644	else
1645		return 0;
1646}
1647
1648/*
1649 * calculate the number of addresses to be returned in the node info reply.
1650 */
1651static int
1652ni6_addrs(ni6, m, ifpp, subj)
1653	struct icmp6_nodeinfo *ni6;
1654	struct mbuf *m;
1655	struct ifnet **ifpp;
1656	char *subj;
1657{
1658	struct ifnet *ifp;
1659	struct in6_ifaddr *ifa6;
1660	struct ifaddr *ifa;
1661	struct sockaddr_in6 *subj_ip6 = NULL; /* XXX pedant */
1662	int addrs = 0, addrsofif, iffound = 0;
1663	int niflags = ni6->ni_flags;
1664
1665	if ((niflags & NI_NODEADDR_FLAG_ALL) == 0) {
1666		switch (ni6->ni_code) {
1667		case ICMP6_NI_SUBJ_IPV6:
1668			if (subj == NULL) /* must be impossible... */
1669				return (0);
1670			subj_ip6 = (struct sockaddr_in6 *)subj;
1671			break;
1672		default:
1673			/*
1674			 * XXX: we only support IPv6 subject address for
1675			 * this Qtype.
1676			 */
1677			return (0);
1678		}
1679	}
1680
1681	for (ifp = TAILQ_FIRST(&ifnet); ifp; ifp = TAILQ_NEXT(ifp, if_list))
1682	{
1683		addrsofif = 0;
1684		for (ifa = ifp->if_addrlist.tqh_first; ifa;
1685		     ifa = ifa->ifa_list.tqe_next)
1686		{
1687			if (ifa->ifa_addr->sa_family != AF_INET6)
1688				continue;
1689			ifa6 = (struct in6_ifaddr *)ifa;
1690
1691			if ((niflags & NI_NODEADDR_FLAG_ALL) == 0 &&
1692			    IN6_ARE_ADDR_EQUAL(&subj_ip6->sin6_addr,
1693					       &ifa6->ia_addr.sin6_addr))
1694				iffound = 1;
1695
1696			/*
1697			 * IPv4-mapped addresses can only be returned by a
1698			 * Node Information proxy, since they represent
1699			 * addresses of IPv4-only nodes, which perforce do
1700			 * not implement this protocol.
1701			 * [icmp-name-lookups-07, Section 5.4]
1702			 * So we don't support NI_NODEADDR_FLAG_COMPAT in
1703			 * this function at this moment.
1704			 */
1705
1706			/* What do we have to do about ::1? */
1707			switch (in6_addrscope(&ifa6->ia_addr.sin6_addr)) {
1708			case IPV6_ADDR_SCOPE_LINKLOCAL:
1709				if ((niflags & NI_NODEADDR_FLAG_LINKLOCAL) == 0)
1710					continue;
1711				break;
1712			case IPV6_ADDR_SCOPE_SITELOCAL:
1713				if ((niflags & NI_NODEADDR_FLAG_SITELOCAL) == 0)
1714					continue;
1715				break;
1716			case IPV6_ADDR_SCOPE_GLOBAL:
1717				if ((niflags & NI_NODEADDR_FLAG_GLOBAL) == 0)
1718					continue;
1719				break;
1720			default:
1721				continue;
1722			}
1723
1724			/*
1725			 * check if anycast is okay.
1726			 * XXX: just experimental.  not in the spec.
1727			 */
1728			if ((ifa6->ia6_flags & IN6_IFF_ANYCAST) != 0 &&
1729			    (niflags & NI_NODEADDR_FLAG_ANYCAST) == 0)
1730				continue; /* we need only unicast addresses */
1731
1732			addrsofif++; /* count the address */
1733		}
1734		if (iffound) {
1735			*ifpp = ifp;
1736			return (addrsofif);
1737		}
1738
1739		addrs += addrsofif;
1740	}
1741
1742	return (addrs);
1743}
1744
1745static int
1746ni6_store_addrs(ni6, nni6, ifp0, resid)
1747	struct icmp6_nodeinfo *ni6, *nni6;
1748	struct ifnet *ifp0;
1749	int resid;
1750{
1751	struct ifnet *ifp = ifp0 ? ifp0 : TAILQ_FIRST(&ifnet);
1752	struct in6_ifaddr *ifa6;
1753	struct ifaddr *ifa;
1754	struct ifnet *ifp_dep = NULL;
1755	int copied = 0, allow_deprecated = 0;
1756	u_char *cp = (u_char *)(nni6 + 1);
1757	int niflags = ni6->ni_flags;
1758	u_int32_t ltime;
1759
1760	if (ifp0 == NULL && !(niflags & NI_NODEADDR_FLAG_ALL))
1761		return (0);	/* needless to copy */
1762
1763  again:
1764
1765	for (; ifp; ifp = TAILQ_NEXT(ifp, if_list))
1766	{
1767		for (ifa = ifp->if_addrlist.tqh_first; ifa;
1768		     ifa = ifa->ifa_list.tqe_next)
1769		{
1770			if (ifa->ifa_addr->sa_family != AF_INET6)
1771				continue;
1772			ifa6 = (struct in6_ifaddr *)ifa;
1773
1774			if ((ifa6->ia6_flags & IN6_IFF_DEPRECATED) != 0 &&
1775			    allow_deprecated == 0) {
1776				/*
1777				 * prefererred address should be put before
1778				 * deprecated addresses.
1779				 */
1780
1781				/* record the interface for later search */
1782				if (ifp_dep == NULL)
1783					ifp_dep = ifp;
1784
1785				continue;
1786			}
1787			else if ((ifa6->ia6_flags & IN6_IFF_DEPRECATED) == 0 &&
1788				 allow_deprecated != 0)
1789				continue; /* we now collect deprecated addrs */
1790
1791			/* What do we have to do about ::1? */
1792			switch (in6_addrscope(&ifa6->ia_addr.sin6_addr)) {
1793			case IPV6_ADDR_SCOPE_LINKLOCAL:
1794				if ((niflags & NI_NODEADDR_FLAG_LINKLOCAL) == 0)
1795					continue;
1796				break;
1797			case IPV6_ADDR_SCOPE_SITELOCAL:
1798				if ((niflags & NI_NODEADDR_FLAG_SITELOCAL) == 0)
1799					continue;
1800				break;
1801			case IPV6_ADDR_SCOPE_GLOBAL:
1802				if ((niflags & NI_NODEADDR_FLAG_GLOBAL) == 0)
1803					continue;
1804				break;
1805			default:
1806				continue;
1807			}
1808
1809			/*
1810			 * check if anycast is okay.
1811			 * XXX: just experimental.  not in the spec.
1812			 */
1813			if ((ifa6->ia6_flags & IN6_IFF_ANYCAST) != 0 &&
1814			    (niflags & NI_NODEADDR_FLAG_ANYCAST) == 0)
1815				continue;
1816
1817			/* now we can copy the address */
1818			if (resid < sizeof(struct in6_addr) +
1819			    sizeof(u_int32_t)) {
1820				/*
1821				 * We give up much more copy.
1822				 * Set the truncate flag and return.
1823				 */
1824				nni6->ni_flags |=
1825					NI_NODEADDR_FLAG_TRUNCATE;
1826				return (copied);
1827			}
1828
1829			/*
1830			 * Set the TTL of the address.
1831			 * The TTL value should be one of the following
1832			 * according to the specification:
1833			 *
1834			 * 1. The remaining lifetime of a DHCP lease on the
1835			 *    address, or
1836			 * 2. The remaining Valid Lifetime of a prefix from
1837			 *    which the address was derived through Stateless
1838			 *    Autoconfiguration.
1839			 *
1840			 * Note that we currently do not support stateful
1841			 * address configuration by DHCPv6, so the former
1842			 * case can't happen.
1843			 *
1844			 * TTL must be 2^31 > TTL >= 0.
1845			 */
1846			if (ifa6->ia6_lifetime.ia6t_expire == 0)
1847				ltime = ND6_INFINITE_LIFETIME;
1848			else {
1849				if (ifa6->ia6_lifetime.ia6t_expire >
1850				    time.tv_sec)
1851					ltime = ifa6->ia6_lifetime.ia6t_expire - time.tv_sec;
1852				else
1853					ltime = 0;
1854			}
1855			if (ltime > 0x7fffffff)
1856				ltime = 0x7fffffff;
1857			ltime = htonl(ltime);
1858
1859			bcopy(&ltime, cp, sizeof(u_int32_t));
1860			cp += sizeof(u_int32_t);
1861
1862			/* copy the address itself */
1863			bcopy(&ifa6->ia_addr.sin6_addr, cp,
1864			      sizeof(struct in6_addr));
1865			/* XXX: KAME link-local hack; remove ifindex */
1866			if (IN6_IS_ADDR_LINKLOCAL(&ifa6->ia_addr.sin6_addr))
1867				((struct in6_addr *)cp)->s6_addr16[1] = 0;
1868			cp += sizeof(struct in6_addr);
1869
1870			resid -= (sizeof(struct in6_addr) + sizeof(u_int32_t));
1871			copied += (sizeof(struct in6_addr) +
1872				   sizeof(u_int32_t));
1873		}
1874		if (ifp0)	/* we need search only on the specified IF */
1875			break;
1876	}
1877
1878	if (allow_deprecated == 0 && ifp_dep != NULL) {
1879		ifp = ifp_dep;
1880		allow_deprecated = 1;
1881
1882		goto again;
1883	}
1884
1885	return (copied);
1886}
1887
1888/*
1889 * XXX almost dup'ed code with rip6_input.
1890 */
1891static int
1892icmp6_rip6_input(mp, off)
1893	struct	mbuf **mp;
1894	int	off;
1895{
1896	struct mbuf *m = *mp;
1897	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
1898	struct in6pcb *in6p;
1899	struct in6pcb *last = NULL;
1900	struct sockaddr_in6 rip6src;
1901	struct icmp6_hdr *icmp6;
1902	struct mbuf *opts = NULL;
1903
1904	IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off, sizeof(*icmp6));
1905	if (icmp6 == NULL) {
1906		/* m is already reclaimed */
1907		return IPPROTO_DONE;
1908	}
1909
1910	bzero(&rip6src, sizeof(rip6src));
1911	rip6src.sin6_len = sizeof(struct sockaddr_in6);
1912	rip6src.sin6_family = AF_INET6;
1913	/* KAME hack: recover scopeid */
1914	(void)in6_recoverscope(&rip6src, &ip6->ip6_src, m->m_pkthdr.rcvif);
1915
1916	for (in6p = rawin6pcb.in6p_next;
1917	     in6p != &rawin6pcb; in6p = in6p->in6p_next)
1918	{
1919		if (in6p->in6p_ip6_nxt != IPPROTO_ICMPV6)
1920			continue;
1921		if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr) &&
1922		   !IN6_ARE_ADDR_EQUAL(&in6p->in6p_laddr, &ip6->ip6_dst))
1923			continue;
1924		if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr) &&
1925		   !IN6_ARE_ADDR_EQUAL(&in6p->in6p_faddr, &ip6->ip6_src))
1926			continue;
1927		if (in6p->in6p_icmp6filt
1928		    && ICMP6_FILTER_WILLBLOCK(icmp6->icmp6_type,
1929				 in6p->in6p_icmp6filt))
1930			continue;
1931		if (last) {
1932			struct	mbuf *n;
1933			if ((n = m_copy(m, 0, (int)M_COPYALL)) != NULL) {
1934				if (last->in6p_flags & IN6P_CONTROLOPTS)
1935					ip6_savecontrol(last, &opts, ip6, n);
1936				/* strip intermediate headers */
1937				m_adj(n, off);
1938				if (sbappendaddr(&last->in6p_socket->so_rcv,
1939						 (struct sockaddr *)&rip6src,
1940						 n, opts) == 0) {
1941					/* should notify about lost packet */
1942					m_freem(n);
1943					if (opts)
1944						m_freem(opts);
1945				} else
1946					sorwakeup(last->in6p_socket);
1947				opts = NULL;
1948			}
1949		}
1950		last = in6p;
1951	}
1952	if (last) {
1953		if (last->in6p_flags & IN6P_CONTROLOPTS)
1954			ip6_savecontrol(last, &opts, ip6, m);
1955		/* strip intermediate headers */
1956		m_adj(m, off);
1957		if (sbappendaddr(&last->in6p_socket->so_rcv,
1958				(struct sockaddr *)&rip6src, m, opts) == 0) {
1959			m_freem(m);
1960			if (opts)
1961				m_freem(opts);
1962		} else
1963			sorwakeup(last->in6p_socket);
1964	} else {
1965		m_freem(m);
1966		ip6stat.ip6s_delivered--;
1967	}
1968	return IPPROTO_DONE;
1969}
1970
1971/*
1972 * Reflect the ip6 packet back to the source.
1973 * OFF points to the icmp6 header, counted from the top of the mbuf.
1974 *
1975 * Note: RFC 1885 required that an echo reply should be truncated if it
1976 * did not fit in with (return) path MTU, and KAME code supported the
1977 * behavior.  However, as a clarification after the RFC, this limitation
1978 * was removed in a revised version of the spec, RFC 2463.  We had kept the
1979 * old behavior, with a (non-default) ifdef block, while the new version of
1980 * the spec was an internet-draft status, and even after the new RFC was
1981 * published.  But it would rather make sense to clean the obsoleted part
1982 * up, and to make the code simpler at this stage.
1983 */
1984void
1985icmp6_reflect(m, off)
1986	struct	mbuf *m;
1987	size_t off;
1988{
1989	struct ip6_hdr *ip6;
1990	struct icmp6_hdr *icmp6;
1991	struct in6_ifaddr *ia;
1992	struct in6_addr t, *src = 0;
1993	int plen;
1994	int type, code;
1995	struct ifnet *outif = NULL;
1996	struct sockaddr_in6 sa6_src, sa6_dst;
1997
1998	/* too short to reflect */
1999	if (off < sizeof(struct ip6_hdr)) {
2000		nd6log((LOG_DEBUG,
2001		    "sanity fail: off=%lx, sizeof(ip6)=%lx in %s:%d\n",
2002		    (u_long)off, (u_long)sizeof(struct ip6_hdr),
2003		    __FILE__, __LINE__));
2004		goto bad;
2005	}
2006
2007	/*
2008	 * If there are extra headers between IPv6 and ICMPv6, strip
2009	 * off that header first.
2010	 */
2011#ifdef DIAGNOSTIC
2012	if (sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr) > MHLEN)
2013		panic("assumption failed in icmp6_reflect");
2014#endif
2015	if (off > sizeof(struct ip6_hdr)) {
2016		size_t l;
2017		struct ip6_hdr nip6;
2018
2019		l = off - sizeof(struct ip6_hdr);
2020		m_copydata(m, 0, sizeof(nip6), (caddr_t)&nip6);
2021		m_adj(m, l);
2022		l = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr);
2023		if (m->m_len < l) {
2024			if ((m = m_pullup(m, l)) == NULL)
2025				return;
2026		}
2027		bcopy((caddr_t)&nip6, mtod(m, caddr_t), sizeof(nip6));
2028	} else /* off == sizeof(struct ip6_hdr) */ {
2029		size_t l;
2030		l = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr);
2031		if (m->m_len < l) {
2032			if ((m = m_pullup(m, l)) == NULL)
2033				return;
2034		}
2035	}
2036	plen = m->m_pkthdr.len - sizeof(struct ip6_hdr);
2037	ip6 = mtod(m, struct ip6_hdr *);
2038	ip6->ip6_nxt = IPPROTO_ICMPV6;
2039	icmp6 = (struct icmp6_hdr *)(ip6 + 1);
2040	type = icmp6->icmp6_type; /* keep type for statistics */
2041	code = icmp6->icmp6_code; /* ditto. */
2042
2043	t = ip6->ip6_dst;
2044	/*
2045	 * ip6_input() drops a packet if its src is multicast.
2046	 * So, the src is never multicast.
2047	 */
2048	ip6->ip6_dst = ip6->ip6_src;
2049
2050	/*
2051	 * XXX: make sure to embed scope zone information, using
2052	 * already embedded IDs or the received interface (if any).
2053	 * Note that rcvif may be NULL.
2054	 * TODO: scoped routing case (XXX).
2055	 */
2056	bzero(&sa6_src, sizeof(sa6_src));
2057	sa6_src.sin6_family = AF_INET6;
2058	sa6_src.sin6_len = sizeof(sa6_src);
2059	sa6_src.sin6_addr = ip6->ip6_dst;
2060	in6_recoverscope(&sa6_src, &ip6->ip6_dst, m->m_pkthdr.rcvif);
2061	in6_embedscope(&sa6_src.sin6_addr, &sa6_src, NULL, NULL);
2062	ip6->ip6_dst = sa6_src.sin6_addr;
2063
2064	bzero(&sa6_dst, sizeof(sa6_dst));
2065	sa6_dst.sin6_family = AF_INET6;
2066	sa6_dst.sin6_len = sizeof(sa6_dst);
2067	sa6_dst.sin6_addr = t;
2068	in6_recoverscope(&sa6_dst, &t, m->m_pkthdr.rcvif);
2069	in6_embedscope(&t, &sa6_dst, NULL, NULL);
2070
2071	/*
2072	 * If the incoming packet was addressed directly to us (i.e. unicast),
2073	 * use dst as the src for the reply.
2074	 * The IN6_IFF_NOTREADY case would be VERY rare, but is possible
2075	 * (for example) when we encounter an error while forwarding procedure
2076	 * destined to a duplicated address of ours.
2077	 */
2078	for (ia = in6_ifaddr; ia; ia = ia->ia_next)
2079		if (IN6_ARE_ADDR_EQUAL(&t, &ia->ia_addr.sin6_addr) &&
2080		    (ia->ia6_flags & (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY)) == 0) {
2081			src = &t;
2082			break;
2083		}
2084	if (ia == NULL && IN6_IS_ADDR_LINKLOCAL(&t) && (m->m_flags & M_LOOP)) {
2085		/*
2086		 * This is the case if the dst is our link-local address
2087		 * and the sender is also ourselves.
2088		 */
2089		src = &t;
2090	}
2091
2092	if (src == 0) {
2093		int e;
2094		struct route_in6 ro;
2095
2096		/*
2097		 * This case matches to multicasts, our anycast, or unicasts
2098		 * that we do not own.  Select a source address based on the
2099		 * source address of the erroneous packet.
2100		 */
2101		bzero(&ro, sizeof(ro));
2102		src = in6_selectsrc(&sa6_src, NULL, NULL, &ro, NULL, &e);
2103		if (ro.ro_rt) { /* XXX: see comments in icmp6_mtudisc_update */
2104			RTFREE(ro.ro_rt); /* XXX: we could use this */
2105		}
2106		if (src == NULL) {
2107			nd6log((LOG_DEBUG,
2108			    "icmp6_reflect: source can't be determined: "
2109			    "dst=%s, error=%d\n",
2110			    ip6_sprintf(&sa6_src.sin6_addr), e));
2111			goto bad;
2112		}
2113	}
2114
2115	ip6->ip6_src = *src;
2116
2117	ip6->ip6_flow = 0;
2118	ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
2119	ip6->ip6_vfc |= IPV6_VERSION;
2120	ip6->ip6_nxt = IPPROTO_ICMPV6;
2121	if (m->m_pkthdr.rcvif) {
2122		/* XXX: This may not be the outgoing interface */
2123		ip6->ip6_hlim = ND_IFINFO(m->m_pkthdr.rcvif)->chlim;
2124	} else
2125		ip6->ip6_hlim = ip6_defhlim;
2126
2127	icmp6->icmp6_cksum = 0;
2128	icmp6->icmp6_cksum = in6_cksum(m, IPPROTO_ICMPV6,
2129					sizeof(struct ip6_hdr), plen);
2130
2131	/*
2132	 * XXX option handling
2133	 */
2134
2135	m->m_flags &= ~(M_BCAST|M_MCAST);
2136#ifdef IPSEC
2137	/* Don't lookup socket */
2138	(void)ipsec_setsocket(m, NULL);
2139#endif /* IPSEC */
2140
2141	/*
2142	 * To avoid a "too big" situation at an intermediate router
2143	 * and the path MTU discovery process, specify the IPV6_MINMTU flag.
2144	 * Note that only echo and node information replies are affected,
2145	 * since the length of ICMP6 errors is limited to the minimum MTU.
2146	 */
2147	if (ip6_output(m, NULL, NULL, IPV6_MINMTU, NULL, &outif) != 0 && outif)
2148		icmp6_ifstat_inc(outif, ifs6_out_error);
2149
2150	if (outif)
2151		icmp6_ifoutstat_inc(outif, type, code);
2152
2153	return;
2154
2155 bad:
2156	m_freem(m);
2157	return;
2158}
2159
2160void
2161icmp6_fasttimo()
2162{
2163
2164	mld6_fasttimeo();
2165}
2166
2167static const char *
2168icmp6_redirect_diag(src6, dst6, tgt6)
2169	struct in6_addr *src6;
2170	struct in6_addr *dst6;
2171	struct in6_addr *tgt6;
2172{
2173	static char buf[1024];
2174	snprintf(buf, sizeof(buf), "(src=%s dst=%s tgt=%s)",
2175		ip6_sprintf(src6), ip6_sprintf(dst6), ip6_sprintf(tgt6));
2176	return buf;
2177}
2178
2179void
2180icmp6_redirect_input(m, off)
2181	struct mbuf *m;
2182	int off;
2183{
2184	struct ifnet *ifp = m->m_pkthdr.rcvif;
2185	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
2186	struct nd_redirect *nd_rd;
2187	int icmp6len = ntohs(ip6->ip6_plen);
2188	char *lladdr = NULL;
2189	int lladdrlen = 0;
2190	u_char *redirhdr = NULL;
2191	int redirhdrlen = 0;
2192	struct rtentry *rt = NULL;
2193	int is_router;
2194	int is_onlink;
2195	struct in6_addr src6 = ip6->ip6_src;
2196	struct in6_addr redtgt6;
2197	struct in6_addr reddst6;
2198	union nd_opts ndopts;
2199
2200	if (!ifp)
2201		return;
2202
2203	/* XXX if we are router, we don't update route by icmp6 redirect */
2204	if (ip6_forwarding)
2205		goto freeit;
2206	if (!icmp6_rediraccept)
2207		goto freeit;
2208
2209	IP6_EXTHDR_GET(nd_rd, struct nd_redirect *, m, off, icmp6len);
2210	if (nd_rd == NULL) {
2211		icmp6stat.icp6s_tooshort++;
2212		return;
2213	}
2214	redtgt6 = nd_rd->nd_rd_target;
2215	reddst6 = nd_rd->nd_rd_dst;
2216
2217	if (IN6_IS_ADDR_LINKLOCAL(&redtgt6))
2218		redtgt6.s6_addr16[1] = htons(ifp->if_index);
2219	if (IN6_IS_ADDR_LINKLOCAL(&reddst6))
2220		reddst6.s6_addr16[1] = htons(ifp->if_index);
2221
2222	/* validation */
2223	if (!IN6_IS_ADDR_LINKLOCAL(&src6)) {
2224		nd6log((LOG_ERR,
2225			"ICMP6 redirect sent from %s rejected; "
2226			"must be from linklocal\n", ip6_sprintf(&src6)));
2227		goto bad;
2228	}
2229	if (ip6->ip6_hlim != 255) {
2230		nd6log((LOG_ERR,
2231			"ICMP6 redirect sent from %s rejected; "
2232			"hlim=%d (must be 255)\n",
2233			ip6_sprintf(&src6), ip6->ip6_hlim));
2234		goto bad;
2235	}
2236    {
2237	/* ip6->ip6_src must be equal to gw for icmp6->icmp6_reddst */
2238	struct sockaddr_in6 sin6;
2239	struct in6_addr *gw6;
2240
2241	bzero(&sin6, sizeof(sin6));
2242	sin6.sin6_family = AF_INET6;
2243	sin6.sin6_len = sizeof(struct sockaddr_in6);
2244	bcopy(&reddst6, &sin6.sin6_addr, sizeof(reddst6));
2245	rt = rtalloc1((struct sockaddr *)&sin6, 0);
2246	if (rt) {
2247		if (rt->rt_gateway == NULL ||
2248		    rt->rt_gateway->sa_family != AF_INET6) {
2249			nd6log((LOG_ERR,
2250			    "ICMP6 redirect rejected; no route "
2251			    "with inet6 gateway found for redirect dst: %s\n",
2252			    icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
2253			RTFREE(rt);
2254			goto bad;
2255		}
2256
2257		gw6 = &(((struct sockaddr_in6 *)rt->rt_gateway)->sin6_addr);
2258		if (bcmp(&src6, gw6, sizeof(struct in6_addr)) != 0) {
2259			nd6log((LOG_ERR,
2260				"ICMP6 redirect rejected; "
2261				"not equal to gw-for-src=%s (must be same): "
2262				"%s\n",
2263				ip6_sprintf(gw6),
2264				icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
2265			RTFREE(rt);
2266			goto bad;
2267		}
2268	} else {
2269		nd6log((LOG_ERR,
2270			"ICMP6 redirect rejected; "
2271			"no route found for redirect dst: %s\n",
2272			icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
2273		goto bad;
2274	}
2275	RTFREE(rt);
2276	rt = NULL;
2277    }
2278	if (IN6_IS_ADDR_MULTICAST(&reddst6)) {
2279		nd6log((LOG_ERR,
2280			"ICMP6 redirect rejected; "
2281			"redirect dst must be unicast: %s\n",
2282			icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
2283		goto bad;
2284	}
2285
2286	is_router = is_onlink = 0;
2287	if (IN6_IS_ADDR_LINKLOCAL(&redtgt6))
2288		is_router = 1;	/* router case */
2289	if (bcmp(&redtgt6, &reddst6, sizeof(redtgt6)) == 0)
2290		is_onlink = 1;	/* on-link destination case */
2291	if (!is_router && !is_onlink) {
2292		nd6log((LOG_ERR,
2293			"ICMP6 redirect rejected; "
2294			"neither router case nor onlink case: %s\n",
2295			icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
2296		goto bad;
2297	}
2298	/* validation passed */
2299
2300	icmp6len -= sizeof(*nd_rd);
2301	nd6_option_init(nd_rd + 1, icmp6len, &ndopts);
2302	if (nd6_options(&ndopts) < 0) {
2303		nd6log((LOG_INFO, "icmp6_redirect_input: "
2304			"invalid ND option, rejected: %s\n",
2305			icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
2306		/* nd6_options have incremented stats */
2307		goto freeit;
2308	}
2309
2310	if (ndopts.nd_opts_tgt_lladdr) {
2311		lladdr = (char *)(ndopts.nd_opts_tgt_lladdr + 1);
2312		lladdrlen = ndopts.nd_opts_tgt_lladdr->nd_opt_len << 3;
2313	}
2314
2315	if (ndopts.nd_opts_rh) {
2316		redirhdrlen = ndopts.nd_opts_rh->nd_opt_rh_len;
2317		redirhdr = (u_char *)(ndopts.nd_opts_rh + 1); /* xxx */
2318	}
2319
2320	if (lladdr && ((ifp->if_addrlen + 2 + 7) & ~7) != lladdrlen) {
2321		nd6log((LOG_INFO,
2322			"icmp6_redirect_input: lladdrlen mismatch for %s "
2323			"(if %d, icmp6 packet %d): %s\n",
2324			ip6_sprintf(&redtgt6), ifp->if_addrlen, lladdrlen - 2,
2325			icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
2326		goto bad;
2327	}
2328
2329	/* RFC 2461 8.3 */
2330	nd6_cache_lladdr(ifp, &redtgt6, lladdr, lladdrlen, ND_REDIRECT,
2331			 is_onlink ? ND_REDIRECT_ONLINK : ND_REDIRECT_ROUTER);
2332
2333	if (!is_onlink) {	/* better router case.  perform rtredirect. */
2334		/* perform rtredirect */
2335		struct sockaddr_in6 sdst;
2336		struct sockaddr_in6 sgw;
2337		struct sockaddr_in6 ssrc;
2338		unsigned long rtcount;
2339		struct rtentry *newrt = NULL;
2340
2341		/*
2342		 * do not install redirect route, if the number of entries
2343		 * is too much (> hiwat).  note that, the node (= host) will
2344		 * work just fine even if we do not install redirect route
2345		 * (there will be additional hops, though).
2346		 */
2347		rtcount = rt_timer_count(icmp6_redirect_timeout_q);
2348		if (0 <= icmp6_redirect_hiwat && rtcount > icmp6_redirect_hiwat)
2349			return;
2350		else if (0 <= icmp6_redirect_lowat &&
2351		    rtcount > icmp6_redirect_lowat) {
2352			/*
2353			 * XXX nuke a victim, install the new one.
2354			 */
2355		}
2356
2357		bzero(&sdst, sizeof(sdst));
2358		bzero(&sgw, sizeof(sgw));
2359		bzero(&ssrc, sizeof(ssrc));
2360		sdst.sin6_family = sgw.sin6_family = ssrc.sin6_family = AF_INET6;
2361		sdst.sin6_len = sgw.sin6_len = ssrc.sin6_len =
2362			sizeof(struct sockaddr_in6);
2363		bcopy(&redtgt6, &sgw.sin6_addr, sizeof(struct in6_addr));
2364		bcopy(&reddst6, &sdst.sin6_addr, sizeof(struct in6_addr));
2365		bcopy(&src6, &ssrc.sin6_addr, sizeof(struct in6_addr));
2366		rtredirect((struct sockaddr *)&sdst, (struct sockaddr *)&sgw,
2367			   (struct sockaddr *)NULL, RTF_GATEWAY | RTF_HOST,
2368			   (struct sockaddr *)&ssrc,
2369			   &newrt);
2370
2371		if (newrt) {
2372			(void)rt_timer_add(newrt, icmp6_redirect_timeout,
2373			    icmp6_redirect_timeout_q);
2374			rtfree(newrt);
2375		}
2376	}
2377	/* finally update cached route in each socket via pfctlinput */
2378	{
2379		struct sockaddr_in6 sdst;
2380
2381		bzero(&sdst, sizeof(sdst));
2382		sdst.sin6_family = AF_INET6;
2383		sdst.sin6_len = sizeof(struct sockaddr_in6);
2384		bcopy(&reddst6, &sdst.sin6_addr, sizeof(struct in6_addr));
2385		pfctlinput(PRC_REDIRECT_HOST, (struct sockaddr *)&sdst);
2386#ifdef IPSEC
2387		key_sa_routechange((struct sockaddr *)&sdst);
2388#endif
2389	}
2390
2391 freeit:
2392	m_freem(m);
2393	return;
2394
2395 bad:
2396	icmp6stat.icp6s_badredirect++;
2397	m_freem(m);
2398}
2399
2400void
2401icmp6_redirect_output(m0, rt)
2402	struct mbuf *m0;
2403	struct rtentry *rt;
2404{
2405	struct ifnet *ifp;	/* my outgoing interface */
2406	struct in6_addr *ifp_ll6;
2407	struct in6_addr *nexthop;
2408	struct ip6_hdr *sip6;	/* m0 as struct ip6_hdr */
2409	struct mbuf *m = NULL;	/* newly allocated one */
2410	struct ip6_hdr *ip6;	/* m as struct ip6_hdr */
2411	struct nd_redirect *nd_rd;
2412	size_t maxlen;
2413	u_char *p;
2414	struct sockaddr_in6 src_sa;
2415
2416	icmp6_errcount(&icmp6stat.icp6s_outerrhist, ND_REDIRECT, 0);
2417
2418	/* if we are not router, we don't send icmp6 redirect */
2419	if (!ip6_forwarding)
2420		goto fail;
2421
2422	/* sanity check */
2423	if (!m0 || !rt || !(rt->rt_flags & RTF_UP) || !(ifp = rt->rt_ifp))
2424		goto fail;
2425
2426	/*
2427	 * Address check:
2428	 *  the source address must identify a neighbor, and
2429	 *  the destination address must not be a multicast address
2430	 *  [RFC 2461, sec 8.2]
2431	 */
2432	sip6 = mtod(m0, struct ip6_hdr *);
2433	bzero(&src_sa, sizeof(src_sa));
2434	src_sa.sin6_family = AF_INET6;
2435	src_sa.sin6_len = sizeof(src_sa);
2436	src_sa.sin6_addr = sip6->ip6_src;
2437	/* we don't currently use sin6_scope_id, but eventually use it */
2438	src_sa.sin6_scope_id = in6_addr2scopeid(ifp, &sip6->ip6_src);
2439	if (nd6_is_addr_neighbor(&src_sa, ifp) == 0)
2440		goto fail;
2441	if (IN6_IS_ADDR_MULTICAST(&sip6->ip6_dst))
2442		goto fail;	/* what should we do here? */
2443
2444	/* rate limit */
2445	if (icmp6_ratelimit(&sip6->ip6_src, ND_REDIRECT, 0))
2446		goto fail;
2447
2448	/*
2449	 * Since we are going to append up to 1280 bytes (= IPV6_MMTU),
2450	 * we almost always ask for an mbuf cluster for simplicity.
2451	 * (MHLEN < IPV6_MMTU is almost always true)
2452	 */
2453#if IPV6_MMTU >= MCLBYTES
2454# error assumption failed about IPV6_MMTU and MCLBYTES
2455#endif
2456	MGETHDR(m, M_DONTWAIT, MT_HEADER);
2457	if (m && IPV6_MMTU >= MHLEN)
2458		MCLGET(m, M_DONTWAIT);
2459	if (!m)
2460		goto fail;
2461	m->m_pkthdr.rcvif = NULL;
2462	m->m_len = 0;
2463	maxlen = M_TRAILINGSPACE(m);
2464	maxlen = min(IPV6_MMTU, maxlen);
2465	/* just for safety */
2466	if (maxlen < sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr) +
2467	    ((sizeof(struct nd_opt_hdr) + ifp->if_addrlen + 7) & ~7)) {
2468		goto fail;
2469	}
2470
2471	{
2472		/* get ip6 linklocal address for ifp(my outgoing interface). */
2473		struct in6_ifaddr *ia;
2474		if ((ia = in6ifa_ifpforlinklocal(ifp,
2475						 IN6_IFF_NOTREADY|
2476						 IN6_IFF_ANYCAST)) == NULL)
2477			goto fail;
2478		ifp_ll6 = &ia->ia_addr.sin6_addr;
2479	}
2480
2481	/* get ip6 linklocal address for the router. */
2482	if (rt->rt_gateway && (rt->rt_flags & RTF_GATEWAY)) {
2483		struct sockaddr_in6 *sin6;
2484		sin6 = (struct sockaddr_in6 *)rt->rt_gateway;
2485		nexthop = &sin6->sin6_addr;
2486		if (!IN6_IS_ADDR_LINKLOCAL(nexthop))
2487			nexthop = NULL;
2488	} else
2489		nexthop = NULL;
2490
2491	/* ip6 */
2492	ip6 = mtod(m, struct ip6_hdr *);
2493	ip6->ip6_flow = 0;
2494	ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
2495	ip6->ip6_vfc |= IPV6_VERSION;
2496	/* ip6->ip6_plen will be set later */
2497	ip6->ip6_nxt = IPPROTO_ICMPV6;
2498	ip6->ip6_hlim = 255;
2499	/* ip6->ip6_src must be linklocal addr for my outgoing if. */
2500	bcopy(ifp_ll6, &ip6->ip6_src, sizeof(struct in6_addr));
2501	bcopy(&sip6->ip6_src, &ip6->ip6_dst, sizeof(struct in6_addr));
2502
2503	/* ND Redirect */
2504	nd_rd = (struct nd_redirect *)(ip6 + 1);
2505	nd_rd->nd_rd_type = ND_REDIRECT;
2506	nd_rd->nd_rd_code = 0;
2507	nd_rd->nd_rd_reserved = 0;
2508	if (rt->rt_flags & RTF_GATEWAY) {
2509		/*
2510		 * nd_rd->nd_rd_target must be a link-local address in
2511		 * better router cases.
2512		 */
2513		if (!nexthop)
2514			goto fail;
2515		bcopy(nexthop, &nd_rd->nd_rd_target,
2516		      sizeof(nd_rd->nd_rd_target));
2517		bcopy(&sip6->ip6_dst, &nd_rd->nd_rd_dst,
2518		      sizeof(nd_rd->nd_rd_dst));
2519	} else {
2520		/* make sure redtgt == reddst */
2521		nexthop = &sip6->ip6_dst;
2522		bcopy(&sip6->ip6_dst, &nd_rd->nd_rd_target,
2523		      sizeof(nd_rd->nd_rd_target));
2524		bcopy(&sip6->ip6_dst, &nd_rd->nd_rd_dst,
2525		      sizeof(nd_rd->nd_rd_dst));
2526	}
2527
2528	p = (u_char *)(nd_rd + 1);
2529
2530	{
2531		/* target lladdr option */
2532		struct rtentry *rt_nexthop = NULL;
2533		int len;
2534		struct sockaddr_dl *sdl;
2535		struct nd_opt_hdr *nd_opt;
2536		char *lladdr;
2537
2538		rt_nexthop = nd6_lookup(nexthop, 0, ifp);
2539		if (!rt_nexthop)
2540			goto nolladdropt;
2541		len = sizeof(*nd_opt) + ifp->if_addrlen;
2542		len = (len + 7) & ~7;	/* round by 8 */
2543		/* safety check */
2544		if (len + (p - (u_char *)ip6) > maxlen)
2545			goto nolladdropt;
2546		if (!(rt_nexthop->rt_flags & RTF_GATEWAY) &&
2547		    (rt_nexthop->rt_flags & RTF_LLINFO) &&
2548		    (rt_nexthop->rt_gateway->sa_family == AF_LINK) &&
2549		    (sdl = (struct sockaddr_dl *)rt_nexthop->rt_gateway) &&
2550		    sdl->sdl_alen) {
2551			nd_opt = (struct nd_opt_hdr *)p;
2552			nd_opt->nd_opt_type = ND_OPT_TARGET_LINKADDR;
2553			nd_opt->nd_opt_len = len >> 3;
2554			lladdr = (char *)(nd_opt + 1);
2555			bcopy(LLADDR(sdl), lladdr, ifp->if_addrlen);
2556			p += len;
2557		}
2558	}
2559  nolladdropt:;
2560
2561	m->m_pkthdr.len = m->m_len = p - (u_char *)ip6;
2562
2563	/* just to be safe */
2564	if (m0->m_flags & M_DECRYPTED)
2565		goto noredhdropt;
2566	if (p - (u_char *)ip6 > maxlen)
2567		goto noredhdropt;
2568
2569	{
2570		/* redirected header option */
2571		int len;
2572		struct nd_opt_rd_hdr *nd_opt_rh;
2573
2574		/*
2575		 * compute the maximum size for icmp6 redirect header option.
2576		 * XXX room for auth header?
2577		 */
2578		len = maxlen - (p - (u_char *)ip6);
2579		len &= ~7;
2580
2581		/*
2582		 * Redirected header option spec (RFC2461 4.6.3) talks nothing
2583		 * about padding/truncate rule for the original IP packet.
2584		 * From the discussion on IPv6imp in Feb 1999,
2585		 * the consensus was:
2586		 * - "attach as much as possible" is the goal
2587		 * - pad if not aligned (original size can be guessed by
2588		 *   original ip6 header)
2589		 * Following code adds the padding if it is simple enough,
2590		 * and truncates if not.
2591		 */
2592		if (len - sizeof(*nd_opt_rh) < m0->m_pkthdr.len) {
2593			/* not enough room, truncate */
2594			m_adj(m0, (len - sizeof(*nd_opt_rh)) -
2595			    m0->m_pkthdr.len);
2596		} else {
2597			/*
2598                         * enough room, truncate if not aligned.
2599			 * we don't pad here for simplicity.
2600			 */
2601			size_t extra;
2602
2603			extra = m0->m_pkthdr.len % 8;
2604			if (extra) {
2605				/* truncate */
2606				m_adj(m0, -extra);
2607			}
2608			len = m0->m_pkthdr.len + sizeof(*nd_opt_rh);
2609		}
2610
2611		nd_opt_rh = (struct nd_opt_rd_hdr *)p;
2612		bzero(nd_opt_rh, sizeof(*nd_opt_rh));
2613		nd_opt_rh->nd_opt_rh_type = ND_OPT_REDIRECTED_HEADER;
2614		nd_opt_rh->nd_opt_rh_len = len >> 3;
2615		p += sizeof(*nd_opt_rh);
2616		m->m_pkthdr.len = m->m_len = p - (u_char *)ip6;
2617
2618		/* connect m0 to m */
2619		m->m_pkthdr.len += m0->m_pkthdr.len;
2620		m_cat(m, m0);
2621		m0 = NULL;
2622	}
2623noredhdropt:
2624	if (m0) {
2625		m_freem(m0);
2626		m0 = NULL;
2627	}
2628
2629	if (IN6_IS_ADDR_LINKLOCAL(&sip6->ip6_src))
2630		sip6->ip6_src.s6_addr16[1] = 0;
2631	if (IN6_IS_ADDR_LINKLOCAL(&sip6->ip6_dst))
2632		sip6->ip6_dst.s6_addr16[1] = 0;
2633#if 0
2634	if (IN6_IS_ADDR_LINKLOCAL(&ip6->ip6_src))
2635		ip6->ip6_src.s6_addr16[1] = 0;
2636	if (IN6_IS_ADDR_LINKLOCAL(&ip6->ip6_dst))
2637		ip6->ip6_dst.s6_addr16[1] = 0;
2638#endif
2639	if (IN6_IS_ADDR_LINKLOCAL(&nd_rd->nd_rd_target))
2640		nd_rd->nd_rd_target.s6_addr16[1] = 0;
2641	if (IN6_IS_ADDR_LINKLOCAL(&nd_rd->nd_rd_dst))
2642		nd_rd->nd_rd_dst.s6_addr16[1] = 0;
2643
2644	ip6->ip6_plen = htons(m->m_pkthdr.len - sizeof(struct ip6_hdr));
2645
2646	nd_rd->nd_rd_cksum = 0;
2647	nd_rd->nd_rd_cksum
2648		= in6_cksum(m, IPPROTO_ICMPV6, sizeof(*ip6), ntohs(ip6->ip6_plen));
2649
2650	/* send the packet to outside... */
2651#ifdef IPSEC
2652	/* Don't lookup socket */
2653	(void)ipsec_setsocket(m, NULL);
2654#endif /* IPSEC */
2655	if (ip6_output(m, NULL, NULL, 0, NULL, NULL) != 0)
2656		icmp6_ifstat_inc(ifp, ifs6_out_error);
2657
2658	icmp6_ifstat_inc(ifp, ifs6_out_msg);
2659	icmp6_ifstat_inc(ifp, ifs6_out_redirect);
2660	icmp6stat.icp6s_outhist[ND_REDIRECT]++;
2661
2662	return;
2663
2664fail:
2665	if (m)
2666		m_freem(m);
2667	if (m0)
2668		m_freem(m0);
2669}
2670
2671/*
2672 * ICMPv6 socket option processing.
2673 */
2674int
2675icmp6_ctloutput(op, so, level, optname, mp)
2676	int op;
2677	struct socket *so;
2678	int level, optname;
2679	struct mbuf **mp;
2680{
2681	int error = 0;
2682	int optlen;
2683	struct in6pcb *in6p = sotoin6pcb(so);
2684	struct mbuf *m = *mp;
2685
2686	optlen = m ? m->m_len : 0;
2687
2688	if (level != IPPROTO_ICMPV6) {
2689		if (op == PRCO_SETOPT && m)
2690			(void)m_free(m);
2691		return EINVAL;
2692	}
2693
2694	switch (op) {
2695	case PRCO_SETOPT:
2696		switch (optname) {
2697		case ICMP6_FILTER:
2698		    {
2699			struct icmp6_filter *p;
2700
2701			if (optlen != sizeof(*p)) {
2702				error = EMSGSIZE;
2703				break;
2704			}
2705			p = mtod(m, struct icmp6_filter *);
2706			if (!p || !in6p->in6p_icmp6filt) {
2707				error = EINVAL;
2708				break;
2709			}
2710			bcopy(p, in6p->in6p_icmp6filt,
2711				sizeof(struct icmp6_filter));
2712			error = 0;
2713			break;
2714		    }
2715
2716		default:
2717			error = ENOPROTOOPT;
2718			break;
2719		}
2720		if (m)
2721			(void)m_freem(m);
2722		break;
2723
2724	case PRCO_GETOPT:
2725		switch (optname) {
2726		case ICMP6_FILTER:
2727		    {
2728			struct icmp6_filter *p;
2729
2730			if (!in6p->in6p_icmp6filt) {
2731				error = EINVAL;
2732				break;
2733			}
2734			*mp = m = m_get(M_WAIT, MT_SOOPTS);
2735			m->m_len = sizeof(struct icmp6_filter);
2736			p = mtod(m, struct icmp6_filter *);
2737			bcopy(in6p->in6p_icmp6filt, p,
2738				sizeof(struct icmp6_filter));
2739			error = 0;
2740			break;
2741		    }
2742
2743		default:
2744			error = ENOPROTOOPT;
2745			break;
2746		}
2747		break;
2748	}
2749
2750	return (error);
2751}
2752
2753/*
2754 * Perform rate limit check.
2755 * Returns 0 if it is okay to send the icmp6 packet.
2756 * Returns 1 if the router SHOULD NOT send this icmp6 packet due to rate
2757 * limitation.
2758 *
2759 * XXX per-destination/type check necessary?
2760 */
2761static int
2762icmp6_ratelimit(dst, type, code)
2763	const struct in6_addr *dst;	/* not used at this moment */
2764	const int type;			/* not used at this moment */
2765	const int code;			/* not used at this moment */
2766{
2767	int ret;
2768
2769	ret = 0;	/* okay to send */
2770
2771	/* PPS limit */
2772	if (!ppsratecheck(&icmp6errppslim_last, &icmp6errpps_count,
2773	    icmp6errppslim)) {
2774		/* The packet is subject to rate limit */
2775		ret++;
2776	}
2777
2778	return ret;
2779}
2780
2781static struct rtentry *
2782icmp6_mtudisc_clone(dst)
2783	struct sockaddr *dst;
2784{
2785	struct rtentry *rt;
2786	int    error;
2787
2788	rt = rtalloc1(dst, 1);
2789	if (rt == 0)
2790		return NULL;
2791
2792	/* If we didn't get a host route, allocate one */
2793	if ((rt->rt_flags & RTF_HOST) == 0) {
2794		struct rtentry *nrt;
2795
2796		error = rtrequest((int) RTM_ADD, dst,
2797		    (struct sockaddr *) rt->rt_gateway,
2798		    (struct sockaddr *) 0,
2799		    RTF_GATEWAY | RTF_HOST | RTF_DYNAMIC, &nrt);
2800		if (error) {
2801			rtfree(rt);
2802			return NULL;
2803		}
2804		nrt->rt_rmx = rt->rt_rmx;
2805		rtfree(rt);
2806		rt = nrt;
2807	}
2808	error = rt_timer_add(rt, icmp6_mtudisc_timeout,
2809			icmp6_mtudisc_timeout_q);
2810	if (error) {
2811		rtfree(rt);
2812		return NULL;
2813	}
2814
2815	return rt;	/* caller need to call rtfree() */
2816}
2817
2818static void
2819icmp6_mtudisc_timeout(rt, r)
2820	struct rtentry *rt;
2821	struct rttimer *r;
2822{
2823	if (rt == NULL)
2824		panic("icmp6_mtudisc_timeout: bad route to timeout");
2825	if ((rt->rt_flags & (RTF_DYNAMIC | RTF_HOST)) ==
2826	    (RTF_DYNAMIC | RTF_HOST)) {
2827		rtrequest((int) RTM_DELETE, (struct sockaddr *)rt_key(rt),
2828		    rt->rt_gateway, rt_mask(rt), rt->rt_flags, 0);
2829	} else {
2830		if (!(rt->rt_rmx.rmx_locks & RTV_MTU))
2831			rt->rt_rmx.rmx_mtu = 0;
2832	}
2833}
2834
2835static void
2836icmp6_redirect_timeout(rt, r)
2837	struct rtentry *rt;
2838	struct rttimer *r;
2839{
2840	if (rt == NULL)
2841		panic("icmp6_redirect_timeout: bad route to timeout");
2842	if ((rt->rt_flags & (RTF_GATEWAY | RTF_DYNAMIC | RTF_HOST)) ==
2843	    (RTF_GATEWAY | RTF_DYNAMIC | RTF_HOST)) {
2844		rtrequest((int) RTM_DELETE, (struct sockaddr *)rt_key(rt),
2845		    rt->rt_gateway, rt_mask(rt), rt->rt_flags, 0);
2846	}
2847}
2848
2849int
2850icmp6_sysctl(name, namelen, oldp, oldlenp, newp, newlen)
2851	int *name;
2852	u_int namelen;
2853	void *oldp;
2854	size_t *oldlenp;
2855	void *newp;
2856	size_t newlen;
2857{
2858
2859	/* All sysctl names at this level are terminal. */
2860	if (namelen != 1)
2861		return ENOTDIR;
2862
2863	switch (name[0]) {
2864
2865	case ICMPV6CTL_REDIRACCEPT:
2866		return sysctl_int(oldp, oldlenp, newp, newlen,
2867				&icmp6_rediraccept);
2868	case ICMPV6CTL_REDIRTIMEOUT:
2869		return sysctl_int(oldp, oldlenp, newp, newlen,
2870				&icmp6_redirtimeout);
2871	case ICMPV6CTL_STATS:
2872		return sysctl_rdstruct(oldp, oldlenp, newp,
2873				&icmp6stat, sizeof(icmp6stat));
2874	case ICMPV6CTL_ND6_PRUNE:
2875		return sysctl_int(oldp, oldlenp, newp, newlen, &nd6_prune);
2876	case ICMPV6CTL_ND6_DELAY:
2877		return sysctl_int(oldp, oldlenp, newp, newlen, &nd6_delay);
2878	case ICMPV6CTL_ND6_UMAXTRIES:
2879		return sysctl_int(oldp, oldlenp, newp, newlen, &nd6_umaxtries);
2880	case ICMPV6CTL_ND6_MMAXTRIES:
2881		return sysctl_int(oldp, oldlenp, newp, newlen, &nd6_mmaxtries);
2882	case ICMPV6CTL_ND6_USELOOPBACK:
2883		return sysctl_int(oldp, oldlenp, newp, newlen,
2884				&nd6_useloopback);
2885	case ICMPV6CTL_NODEINFO:
2886		return sysctl_int(oldp, oldlenp, newp, newlen, &icmp6_nodeinfo);
2887	case ICMPV6CTL_ERRPPSLIMIT:
2888		return sysctl_int(oldp, oldlenp, newp, newlen, &icmp6errppslim);
2889	case ICMPV6CTL_ND6_MAXNUDHINT:
2890		return sysctl_int(oldp, oldlenp, newp, newlen,
2891				&nd6_maxnudhint);
2892	case ICMPV6CTL_MTUDISC_HIWAT:
2893		return sysctl_int(oldp, oldlenp, newp, newlen,
2894				&icmp6_mtudisc_hiwat);
2895	case ICMPV6CTL_MTUDISC_LOWAT:
2896		return sysctl_int(oldp, oldlenp, newp, newlen,
2897				&icmp6_mtudisc_lowat);
2898	case ICMPV6CTL_ND6_DEBUG:
2899		return sysctl_int(oldp, oldlenp, newp, newlen, &nd6_debug);
2900	case ICMPV6CTL_ND6_DRLIST:
2901	case ICMPV6CTL_ND6_PRLIST:
2902		return nd6_sysctl(name[0], oldp, oldlenp, newp, newlen);
2903	default:
2904		return ENOPROTOOPT;
2905	}
2906	/* NOTREACHED */
2907}
2908