icmp6.c revision 1.60
1/*	$NetBSD: icmp6.c,v 1.60 2001/03/08 00:19:03 itojun Exp $	*/
2/*	$KAME: icmp6.c,v 1.203 2001/03/08 00:17:54 itojun Exp $	*/
3
4/*
5 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the project nor the names of its contributors
17 *    may be used to endorse or promote products derived from this software
18 *    without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 */
32
33/*
34 * Copyright (c) 1982, 1986, 1988, 1993
35 *	The Regents of the University of California.  All rights reserved.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 *    notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 *    notice, this list of conditions and the following disclaimer in the
44 *    documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 *    must display the following acknowledgement:
47 *	This product includes software developed by the University of
48 *	California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 *    may be used to endorse or promote products derived from this software
51 *    without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 *	@(#)ip_icmp.c	8.2 (Berkeley) 1/4/94
66 */
67
68#include "opt_inet.h"
69#include "opt_ipsec.h"
70
71#include <sys/param.h>
72#include <sys/systm.h>
73#include <sys/malloc.h>
74#include <sys/mbuf.h>
75#include <sys/protosw.h>
76#include <sys/socket.h>
77#include <sys/socketvar.h>
78#include <sys/time.h>
79#include <sys/kernel.h>
80#include <sys/syslog.h>
81#include <sys/domain.h>
82
83#include <net/if.h>
84#include <net/route.h>
85#include <net/if_dl.h>
86#include <net/if_types.h>
87
88#include <netinet/in.h>
89#include <netinet/in_var.h>
90#include <netinet/ip6.h>
91#include <netinet6/ip6_var.h>
92#include <netinet/icmp6.h>
93#include <netinet6/mld6_var.h>
94#include <netinet6/in6_pcb.h>
95#include <netinet6/nd6.h>
96#include <netinet6/in6_ifattach.h>
97#include <netinet6/ip6protosw.h>
98
99
100#ifdef IPSEC
101#include <netinet6/ipsec.h>
102#include <netkey/key.h>
103#endif
104
105#include "faith.h"
106
107#include <net/net_osdep.h>
108
109extern struct domain inet6domain;
110
111struct icmp6stat icmp6stat;
112
113extern struct in6pcb rawin6pcb;
114extern int icmp6errppslim;
115static int icmp6errpps_count = 0;
116static struct timeval icmp6errppslim_last;
117extern int icmp6_nodeinfo;
118
119/*
120 * List of callbacks to notify when Path MTU changes are made.
121 */
122struct icmp6_mtudisc_callback {
123	LIST_ENTRY(icmp6_mtudisc_callback) mc_list;
124	void (*mc_func) __P((struct in6_addr *));
125};
126
127LIST_HEAD(, icmp6_mtudisc_callback) icmp6_mtudisc_callbacks =
128    LIST_HEAD_INITIALIZER(&icmp6_mtudisc_callbacks);
129
130static struct rttimer_queue *icmp6_mtudisc_timeout_q = NULL;
131extern int pmtu_expire;
132
133/* XXX do these values make any sense? */
134static int icmp6_mtudisc_hiwat = 1280;
135static int icmp6_mtudisc_lowat = 256;
136
137/*
138 * keep track of # of redirect routes.
139 */
140static struct rttimer_queue *icmp6_redirect_timeout_q = NULL;
141
142/* XXX experimental, turned off */
143static int icmp6_redirect_hiwat = -1;
144static int icmp6_redirect_lowat = -1;
145
146static void icmp6_errcount __P((struct icmp6errstat *, int, int));
147static int icmp6_rip6_input __P((struct mbuf **, int));
148static int icmp6_ratelimit __P((const struct in6_addr *, const int, const int));
149static const char *icmp6_redirect_diag __P((struct in6_addr *,
150	struct in6_addr *, struct in6_addr *));
151static struct mbuf *ni6_input __P((struct mbuf *, int));
152static struct mbuf *ni6_nametodns __P((const char *, int, int));
153static int ni6_dnsmatch __P((const char *, int, const char *, int));
154static int ni6_addrs __P((struct icmp6_nodeinfo *, struct mbuf *,
155			  struct ifnet **, char *));
156static int ni6_store_addrs __P((struct icmp6_nodeinfo *, struct icmp6_nodeinfo *,
157				struct ifnet *, int));
158static int icmp6_notify_error __P((struct mbuf *, int, int, int));
159static struct rtentry *icmp6_mtudisc_clone __P((struct sockaddr *));
160static void icmp6_mtudisc_timeout __P((struct rtentry *, struct rttimer *));
161static void icmp6_redirect_timeout __P((struct rtentry *, struct rttimer *));
162
163#ifdef COMPAT_RFC1885
164static struct route_in6 icmp6_reflect_rt;
165#endif
166
167void
168icmp6_init()
169{
170	mld6_init();
171	icmp6_mtudisc_timeout_q = rt_timer_queue_create(pmtu_expire);
172	icmp6_redirect_timeout_q = rt_timer_queue_create(icmp6_redirtimeout);
173}
174
175static void
176icmp6_errcount(stat, type, code)
177	struct icmp6errstat *stat;
178	int type, code;
179{
180	switch (type) {
181	case ICMP6_DST_UNREACH:
182		switch (code) {
183		case ICMP6_DST_UNREACH_NOROUTE:
184			stat->icp6errs_dst_unreach_noroute++;
185			return;
186		case ICMP6_DST_UNREACH_ADMIN:
187			stat->icp6errs_dst_unreach_admin++;
188			return;
189		case ICMP6_DST_UNREACH_BEYONDSCOPE:
190			stat->icp6errs_dst_unreach_beyondscope++;
191			return;
192		case ICMP6_DST_UNREACH_ADDR:
193			stat->icp6errs_dst_unreach_addr++;
194			return;
195		case ICMP6_DST_UNREACH_NOPORT:
196			stat->icp6errs_dst_unreach_noport++;
197			return;
198		}
199		break;
200	case ICMP6_PACKET_TOO_BIG:
201		stat->icp6errs_packet_too_big++;
202		return;
203	case ICMP6_TIME_EXCEEDED:
204		switch (code) {
205		case ICMP6_TIME_EXCEED_TRANSIT:
206			stat->icp6errs_time_exceed_transit++;
207			return;
208		case ICMP6_TIME_EXCEED_REASSEMBLY:
209			stat->icp6errs_time_exceed_reassembly++;
210			return;
211		}
212		break;
213	case ICMP6_PARAM_PROB:
214		switch (code) {
215		case ICMP6_PARAMPROB_HEADER:
216			stat->icp6errs_paramprob_header++;
217			return;
218		case ICMP6_PARAMPROB_NEXTHEADER:
219			stat->icp6errs_paramprob_nextheader++;
220			return;
221		case ICMP6_PARAMPROB_OPTION:
222			stat->icp6errs_paramprob_option++;
223			return;
224		}
225		break;
226	case ND_REDIRECT:
227		stat->icp6errs_redirect++;
228		return;
229	}
230	stat->icp6errs_unknown++;
231}
232
233/*
234 * Register a Path MTU Discovery callback.
235 */
236void
237icmp6_mtudisc_callback_register(func)
238	void (*func) __P((struct in6_addr *));
239{
240	struct icmp6_mtudisc_callback *mc;
241
242	for (mc = LIST_FIRST(&icmp6_mtudisc_callbacks); mc != NULL;
243	     mc = LIST_NEXT(mc, mc_list)) {
244		if (mc->mc_func == func)
245			return;
246	}
247
248	mc = malloc(sizeof(*mc), M_PCB, M_NOWAIT);
249	if (mc == NULL)
250		panic("icmp6_mtudisc_callback_register");
251
252	mc->mc_func = func;
253	LIST_INSERT_HEAD(&icmp6_mtudisc_callbacks, mc, mc_list);
254}
255
256/*
257 * Generate an error packet of type error in response to bad IP6 packet.
258 */
259void
260icmp6_error(m, type, code, param)
261	struct mbuf *m;
262	int type, code, param;
263{
264	struct ip6_hdr *oip6, *nip6;
265	struct icmp6_hdr *icmp6;
266	u_int preplen;
267	int off;
268	int nxt;
269
270	icmp6stat.icp6s_error++;
271
272	/* count per-type-code statistics */
273	icmp6_errcount(&icmp6stat.icp6s_outerrhist, type, code);
274
275	if (m->m_flags & M_DECRYPTED) {
276		icmp6stat.icp6s_canterror++;
277		goto freeit;
278	}
279
280#ifndef PULLDOWN_TEST
281	IP6_EXTHDR_CHECK(m, 0, sizeof(struct ip6_hdr), );
282#else
283	if (m->m_len < sizeof(struct ip6_hdr)) {
284		m = m_pullup(m, sizeof(struct ip6_hdr));
285		if (m == NULL)
286			return;
287	}
288#endif
289	oip6 = mtod(m, struct ip6_hdr *);
290
291	/*
292	 * Multicast destination check. For unrecognized option errors,
293	 * this check has already done in ip6_unknown_opt(), so we can
294	 * check only for other errors.
295	 */
296	if ((m->m_flags & (M_BCAST|M_MCAST) ||
297	     IN6_IS_ADDR_MULTICAST(&oip6->ip6_dst)) &&
298	    (type != ICMP6_PACKET_TOO_BIG &&
299	     (type != ICMP6_PARAM_PROB ||
300	      code != ICMP6_PARAMPROB_OPTION)))
301		goto freeit;
302
303	/* Source address check. XXX: the case of anycast source? */
304	if (IN6_IS_ADDR_UNSPECIFIED(&oip6->ip6_src) ||
305	    IN6_IS_ADDR_MULTICAST(&oip6->ip6_src))
306		goto freeit;
307
308	/*
309	 * If we are about to send ICMPv6 against ICMPv6 error/redirect,
310	 * don't do it.
311	 */
312	nxt = -1;
313	off = ip6_lasthdr(m, 0, IPPROTO_IPV6, &nxt);
314	if (off >= 0 && nxt == IPPROTO_ICMPV6) {
315		struct icmp6_hdr *icp;
316
317#ifndef PULLDOWN_TEST
318		IP6_EXTHDR_CHECK(m, 0, off + sizeof(struct icmp6_hdr), );
319		icp = (struct icmp6_hdr *)(mtod(m, caddr_t) + off);
320#else
321		IP6_EXTHDR_GET(icp, struct icmp6_hdr *, m, off,
322			sizeof(*icp));
323		if (icp == NULL) {
324			icmp6stat.icp6s_tooshort++;
325			return;
326		}
327#endif
328		if (icp->icmp6_type < ICMP6_ECHO_REQUEST ||
329		    icp->icmp6_type == ND_REDIRECT) {
330			/*
331			 * ICMPv6 error
332			 * Special case: for redirect (which is
333			 * informational) we must not send icmp6 error.
334			 */
335			icmp6stat.icp6s_canterror++;
336			goto freeit;
337		} else {
338			/* ICMPv6 informational - send the error */
339		}
340	} else {
341		/* non-ICMPv6 - send the error */
342	}
343
344	oip6 = mtod(m, struct ip6_hdr *); /* adjust pointer */
345
346	/* Finally, do rate limitation check. */
347	if (icmp6_ratelimit(&oip6->ip6_src, type, code)) {
348		icmp6stat.icp6s_toofreq++;
349		goto freeit;
350	}
351
352	/*
353	 * OK, ICMP6 can be generated.
354	 */
355
356	if (m->m_pkthdr.len >= ICMPV6_PLD_MAXLEN)
357		m_adj(m, ICMPV6_PLD_MAXLEN - m->m_pkthdr.len);
358
359	preplen = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr);
360	M_PREPEND(m, preplen, M_DONTWAIT);
361	if (m && m->m_len < preplen)
362		m = m_pullup(m, preplen);
363	if (m == NULL) {
364		nd6log((LOG_DEBUG, "ENOBUFS in icmp6_error %d\n", __LINE__));
365		return;
366	}
367
368	nip6 = mtod(m, struct ip6_hdr *);
369	nip6->ip6_src  = oip6->ip6_src;
370	nip6->ip6_dst  = oip6->ip6_dst;
371
372	if (IN6_IS_SCOPE_LINKLOCAL(&oip6->ip6_src))
373		oip6->ip6_src.s6_addr16[1] = 0;
374	if (IN6_IS_SCOPE_LINKLOCAL(&oip6->ip6_dst))
375		oip6->ip6_dst.s6_addr16[1] = 0;
376
377	icmp6 = (struct icmp6_hdr *)(nip6 + 1);
378	icmp6->icmp6_type = type;
379	icmp6->icmp6_code = code;
380	icmp6->icmp6_pptr = htonl((u_int32_t)param);
381
382	icmp6stat.icp6s_outhist[type]++;
383	icmp6_reflect(m, sizeof(struct ip6_hdr)); /*header order: IPv6 - ICMPv6*/
384
385	return;
386
387  freeit:
388	/*
389	 * If we can't tell wheter or not we can generate ICMP6, free it.
390	 */
391	m_freem(m);
392}
393
394/*
395 * Process a received ICMP6 message.
396 */
397int
398icmp6_input(mp, offp, proto)
399	struct mbuf **mp;
400	int *offp, proto;
401{
402	struct mbuf *m = *mp, *n;
403	struct ip6_hdr *ip6, *nip6;
404	struct icmp6_hdr *icmp6, *nicmp6;
405	int off = *offp;
406	int icmp6len = m->m_pkthdr.len - *offp;
407	int code, sum, noff;
408
409#ifndef PULLDOWN_TEST
410	IP6_EXTHDR_CHECK(m, off, sizeof(struct icmp6_hdr), IPPROTO_DONE);
411	/* m might change if M_LOOP. So, call mtod after this */
412#endif
413
414	/*
415	 * Locate icmp6 structure in mbuf, and check
416	 * that not corrupted and of at least minimum length
417	 */
418
419	ip6 = mtod(m, struct ip6_hdr *);
420	if (icmp6len < sizeof(struct icmp6_hdr)) {
421		icmp6stat.icp6s_tooshort++;
422		goto freeit;
423	}
424
425	/*
426	 * calculate the checksum
427	 */
428#ifndef PULLDOWN_TEST
429	icmp6 = (struct icmp6_hdr *)((caddr_t)ip6 + off);
430#else
431	IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off, sizeof(*icmp6));
432	if (icmp6 == NULL) {
433		icmp6stat.icp6s_tooshort++;
434		return IPPROTO_DONE;
435	}
436#endif
437	code = icmp6->icmp6_code;
438
439	if ((sum = in6_cksum(m, IPPROTO_ICMPV6, off, icmp6len)) != 0) {
440		nd6log((LOG_ERR,
441		    "ICMP6 checksum error(%d|%x) %s\n",
442		    icmp6->icmp6_type, sum, ip6_sprintf(&ip6->ip6_src)));
443		icmp6stat.icp6s_checksum++;
444		goto freeit;
445	}
446
447#if defined(NFAITH) && 0 < NFAITH
448	if (m->m_pkthdr.rcvif && m->m_pkthdr.rcvif->if_type == IFT_FAITH) {
449		/*
450		 * Deliver very specific ICMP6 type only.
451		 * This is important to deilver TOOBIG.  Otherwise PMTUD
452		 * will not work.
453		 */
454		switch (icmp6->icmp6_type) {
455		case ICMP6_DST_UNREACH:
456		case ICMP6_PACKET_TOO_BIG:
457		case ICMP6_TIME_EXCEEDED:
458			break;
459		default:
460			goto freeit;
461		}
462	}
463#endif
464
465	icmp6stat.icp6s_inhist[icmp6->icmp6_type]++;
466	icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_msg);
467	if (icmp6->icmp6_type < ICMP6_INFOMSG_MASK)
468		icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_error);
469
470	switch (icmp6->icmp6_type) {
471
472	case ICMP6_DST_UNREACH:
473		icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_dstunreach);
474		switch (code) {
475		case ICMP6_DST_UNREACH_NOROUTE:
476			code = PRC_UNREACH_NET;
477			break;
478		case ICMP6_DST_UNREACH_ADMIN:
479			icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_adminprohib);
480			code = PRC_UNREACH_PROTOCOL; /* is this a good code? */
481			break;
482		case ICMP6_DST_UNREACH_ADDR:
483			code = PRC_HOSTDEAD;
484			break;
485#ifdef COMPAT_RFC1885
486		case ICMP6_DST_UNREACH_NOTNEIGHBOR:
487			code = PRC_UNREACH_SRCFAIL;
488			break;
489#else
490		case ICMP6_DST_UNREACH_BEYONDSCOPE:
491			/* I mean "source address was incorrect." */
492			code = PRC_PARAMPROB;
493			break;
494#endif
495		case ICMP6_DST_UNREACH_NOPORT:
496			code = PRC_UNREACH_PORT;
497			break;
498		default:
499			goto badcode;
500		}
501		goto deliver;
502		break;
503
504	case ICMP6_PACKET_TOO_BIG:
505		icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_pkttoobig);
506		if (code != 0)
507			goto badcode;
508
509		code = PRC_MSGSIZE;
510
511		/*
512		 * Updating the path MTU will be done after examining
513		 * intermediate extension headers.
514		 */
515		goto deliver;
516		break;
517
518	case ICMP6_TIME_EXCEEDED:
519		icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_timeexceed);
520		switch (code) {
521		case ICMP6_TIME_EXCEED_TRANSIT:
522		case ICMP6_TIME_EXCEED_REASSEMBLY:
523			code += PRC_TIMXCEED_INTRANS;
524			break;
525		default:
526			goto badcode;
527		}
528		goto deliver;
529		break;
530
531	case ICMP6_PARAM_PROB:
532		icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_paramprob);
533		switch (code) {
534		case ICMP6_PARAMPROB_NEXTHEADER:
535			code = PRC_UNREACH_PROTOCOL;
536			break;
537		case ICMP6_PARAMPROB_HEADER:
538		case ICMP6_PARAMPROB_OPTION:
539			code = PRC_PARAMPROB;
540			break;
541		default:
542			goto badcode;
543		}
544		goto deliver;
545		break;
546
547	case ICMP6_ECHO_REQUEST:
548		icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_echo);
549		if (code != 0)
550			goto badcode;
551		if ((n = m_copy(m, 0, M_COPYALL)) == NULL) {
552			/* Give up remote */
553			break;
554		}
555		if ((n->m_flags & M_EXT) != 0
556		 || n->m_len < off + sizeof(struct icmp6_hdr)) {
557			struct mbuf *n0 = n;
558			const int maxlen = sizeof(*nip6) + sizeof(*nicmp6);
559
560			/*
561			 * Prepare an internal mbuf. m_pullup() doesn't
562			 * always copy the length we specified.
563			 */
564			if (maxlen >= MCLBYTES) {
565				/* Give up remote */
566				m_freem(n0);
567				break;
568			}
569			MGETHDR(n, M_DONTWAIT, n0->m_type);
570			if (n && maxlen >= MHLEN) {
571				MCLGET(n, M_DONTWAIT);
572				if ((n->m_flags & M_EXT) == 0) {
573					m_free(n);
574					n = NULL;
575				}
576			}
577			if (n == NULL) {
578				/* Give up remote */
579				m_freem(n0);
580				break;
581			}
582			M_COPY_PKTHDR(n, n0);
583			/*
584			 * Copy IPv6 and ICMPv6 only.
585			 */
586			nip6 = mtod(n, struct ip6_hdr *);
587			bcopy(ip6, nip6, sizeof(struct ip6_hdr));
588			nicmp6 = (struct icmp6_hdr *)(nip6 + 1);
589			bcopy(icmp6, nicmp6, sizeof(struct icmp6_hdr));
590			noff = sizeof(struct ip6_hdr);
591			n->m_pkthdr.len = n->m_len =
592				noff + sizeof(struct icmp6_hdr);
593			/*
594			 * Adjust mbuf. ip6_plen will be adjusted in
595			 * ip6_output().
596			 */
597			m_adj(n0, off + sizeof(struct icmp6_hdr));
598			n->m_pkthdr.len += n0->m_pkthdr.len;
599			n->m_next = n0;
600			n0->m_flags &= ~M_PKTHDR;
601		} else {
602			nip6 = mtod(n, struct ip6_hdr *);
603			nicmp6 = (struct icmp6_hdr *)((caddr_t)nip6 + off);
604			noff = off;
605		}
606		nicmp6->icmp6_type = ICMP6_ECHO_REPLY;
607		nicmp6->icmp6_code = 0;
608		if (n) {
609			icmp6stat.icp6s_reflect++;
610			icmp6stat.icp6s_outhist[ICMP6_ECHO_REPLY]++;
611			icmp6_reflect(n, noff);
612		}
613		break;
614
615	case ICMP6_ECHO_REPLY:
616		icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_echoreply);
617		if (code != 0)
618			goto badcode;
619		break;
620
621	case MLD6_LISTENER_QUERY:
622	case MLD6_LISTENER_REPORT:
623		if (icmp6len < sizeof(struct mld6_hdr))
624			goto badlen;
625		if (icmp6->icmp6_type == MLD6_LISTENER_QUERY) /* XXX: ugly... */
626			icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_mldquery);
627		else
628			icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_mldreport);
629		if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
630			/* give up local */
631			mld6_input(m, off);
632			m = NULL;
633			goto freeit;
634		}
635		mld6_input(n, off);
636		/* m stays. */
637		break;
638
639	case MLD6_LISTENER_DONE:
640		icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_mlddone);
641		if (icmp6len < sizeof(struct mld6_hdr))	/* necessary? */
642			goto badlen;
643		break;		/* nothing to be done in kernel */
644
645	case MLD6_MTRACE_RESP:
646	case MLD6_MTRACE:
647		/* XXX: these two are experimental. not officially defind. */
648		/* XXX: per-interface statistics? */
649		break;		/* just pass it to applications */
650
651	case ICMP6_WRUREQUEST:	/* ICMP6_FQDN_QUERY */
652	    {
653		enum { WRU, FQDN } mode;
654
655		if (!icmp6_nodeinfo)
656			break;
657
658		if (icmp6len == sizeof(struct icmp6_hdr) + 4)
659			mode = WRU;
660		else if (icmp6len >= sizeof(struct icmp6_nodeinfo))
661			mode = FQDN;
662		else
663			goto badlen;
664
665		if (mode == FQDN) {
666#ifndef PULLDOWN_TEST
667			IP6_EXTHDR_CHECK(m, off, sizeof(struct icmp6_nodeinfo),
668					 IPPROTO_DONE);
669#endif
670			n = m_copy(m, 0, M_COPYALL);
671			if (n)
672				n = ni6_input(n, off);
673			/* XXX meaningless if n == NULL */
674			noff = sizeof(struct ip6_hdr);
675		} else {
676			u_char *p;
677			int maxlen, maxhlen;
678
679			if (code != 0)
680				goto badcode;
681			maxlen = sizeof(*nip6) + sizeof(*nicmp6) + 4;
682			if (maxlen >= MCLBYTES) {
683				/* Give up remote */
684				break;
685			}
686			MGETHDR(n, M_DONTWAIT, m->m_type);
687			if (n && maxlen > MHLEN) {
688				MCLGET(n, M_DONTWAIT);
689				if ((n->m_flags & M_EXT) == 0) {
690					m_free(n);
691					n = NULL;
692				}
693			}
694			if (n == NULL) {
695				/* Give up remote */
696				break;
697			}
698			n->m_len = 0;
699			maxhlen = M_TRAILINGSPACE(n) - maxlen;
700			if (maxhlen > hostnamelen)
701				maxhlen = hostnamelen;
702			/*
703			 * Copy IPv6 and ICMPv6 only.
704			 */
705			nip6 = mtod(n, struct ip6_hdr *);
706			bcopy(ip6, nip6, sizeof(struct ip6_hdr));
707			nicmp6 = (struct icmp6_hdr *)(nip6 + 1);
708			bcopy(icmp6, nicmp6, sizeof(struct icmp6_hdr));
709			p = (u_char *)(nicmp6 + 1);
710			bzero(p, 4);
711			bcopy(hostname, p + 4, maxhlen); /*meaningless TTL*/
712			noff = sizeof(struct ip6_hdr);
713			M_COPY_PKTHDR(n, m); /* just for recvif */
714			n->m_pkthdr.len = n->m_len = sizeof(struct ip6_hdr) +
715				sizeof(struct icmp6_hdr) + 4 + maxhlen;
716			nicmp6->icmp6_type = ICMP6_WRUREPLY;
717			nicmp6->icmp6_code = 0;
718		}
719#undef hostnamelen
720		if (n) {
721			icmp6stat.icp6s_reflect++;
722			icmp6stat.icp6s_outhist[ICMP6_WRUREPLY]++;
723			icmp6_reflect(n, noff);
724		}
725		break;
726	    }
727
728	case ICMP6_WRUREPLY:
729		if (code != 0)
730			goto badcode;
731		break;
732
733	case ND_ROUTER_SOLICIT:
734		icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_routersolicit);
735		if (code != 0)
736			goto badcode;
737		if (icmp6len < sizeof(struct nd_router_solicit))
738			goto badlen;
739		if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
740			/* give up local */
741			nd6_rs_input(m, off, icmp6len);
742			m = NULL;
743			goto freeit;
744		}
745		nd6_rs_input(n, off, icmp6len);
746		/* m stays. */
747		break;
748
749	case ND_ROUTER_ADVERT:
750		icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_routeradvert);
751		if (code != 0)
752			goto badcode;
753		if (icmp6len < sizeof(struct nd_router_advert))
754			goto badlen;
755		if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
756			/* give up local */
757			nd6_ra_input(m, off, icmp6len);
758			m = NULL;
759			goto freeit;
760		}
761		nd6_ra_input(n, off, icmp6len);
762		/* m stays. */
763		break;
764
765	case ND_NEIGHBOR_SOLICIT:
766		icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_neighborsolicit);
767		if (code != 0)
768			goto badcode;
769		if (icmp6len < sizeof(struct nd_neighbor_solicit))
770			goto badlen;
771		if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
772			/* give up local */
773			nd6_ns_input(m, off, icmp6len);
774			m = NULL;
775			goto freeit;
776		}
777		nd6_ns_input(n, off, icmp6len);
778		/* m stays. */
779		break;
780
781	case ND_NEIGHBOR_ADVERT:
782		icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_neighboradvert);
783		if (code != 0)
784			goto badcode;
785		if (icmp6len < sizeof(struct nd_neighbor_advert))
786			goto badlen;
787		if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
788			/* give up local */
789			nd6_na_input(m, off, icmp6len);
790			m = NULL;
791			goto freeit;
792		}
793		nd6_na_input(n, off, icmp6len);
794		/* m stays. */
795		break;
796
797	case ND_REDIRECT:
798		icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_redirect);
799		if (code != 0)
800			goto badcode;
801		if (icmp6len < sizeof(struct nd_redirect))
802			goto badlen;
803		if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
804			/* give up local */
805			icmp6_redirect_input(m, off);
806			m = NULL;
807			goto freeit;
808		}
809		icmp6_redirect_input(n, off);
810		/* m stays. */
811		break;
812
813	case ICMP6_ROUTER_RENUMBERING:
814		if (code != ICMP6_ROUTER_RENUMBERING_COMMAND &&
815		    code != ICMP6_ROUTER_RENUMBERING_RESULT)
816			goto badcode;
817		if (icmp6len < sizeof(struct icmp6_router_renum))
818			goto badlen;
819		break;
820
821	default:
822		nd6log((LOG_DEBUG,
823		    "icmp6_input: unknown type %d(src=%s, dst=%s, ifid=%d)\n",
824		    icmp6->icmp6_type, ip6_sprintf(&ip6->ip6_src),
825		    ip6_sprintf(&ip6->ip6_dst),
826		    m->m_pkthdr.rcvif ? m->m_pkthdr.rcvif->if_index : 0));
827		if (icmp6->icmp6_type < ICMP6_ECHO_REQUEST) {
828			/* ICMPv6 error: MUST deliver it by spec... */
829			code = PRC_NCMDS;
830			/* deliver */
831		} else {
832			/* ICMPv6 informational: MUST not deliver */
833			break;
834		}
835	deliver:
836		if (icmp6_notify_error(m, off, icmp6len, code)) {
837			/* In this case, m should've been freed. */
838			return(IPPROTO_DONE);
839		}
840		break;
841
842	badcode:
843		icmp6stat.icp6s_badcode++;
844		break;
845
846	badlen:
847		icmp6stat.icp6s_badlen++;
848		break;
849	}
850
851	/* deliver the packet to appropriate sockets */
852	icmp6_rip6_input(&m, *offp);
853
854	return IPPROTO_DONE;
855
856 freeit:
857	m_freem(m);
858	return IPPROTO_DONE;
859}
860
861static int
862icmp6_notify_error(m, off, icmp6len, code)
863	struct mbuf *m;
864	int off, icmp6len;
865{
866	struct icmp6_hdr *icmp6;
867	struct ip6_hdr *eip6;
868	u_int32_t notifymtu;
869	struct sockaddr_in6 icmp6src, icmp6dst;
870
871	if (icmp6len < sizeof(struct icmp6_hdr) + sizeof(struct ip6_hdr)) {
872		icmp6stat.icp6s_tooshort++;
873		goto freeit;
874	}
875#ifndef PULLDOWN_TEST
876	IP6_EXTHDR_CHECK(m, off,
877			 sizeof(struct icmp6_hdr) + sizeof(struct ip6_hdr),
878			 -1);
879	icmp6 = (struct icmp6_hdr *)(mtod(m, caddr_t) + off);
880#else
881	IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off,
882		       sizeof(*icmp6) + sizeof(struct ip6_hdr));
883	if (icmp6 == NULL) {
884		icmp6stat.icp6s_tooshort++;
885		return(-1);
886	}
887#endif
888	eip6 = (struct ip6_hdr *)(icmp6 + 1);
889
890	/* Detect the upper level protocol */
891	{
892		void (*ctlfunc) __P((int, struct sockaddr *, void *));
893		u_int8_t nxt = eip6->ip6_nxt;
894		int eoff = off + sizeof(struct icmp6_hdr) +
895			sizeof(struct ip6_hdr);
896		struct ip6ctlparam ip6cp;
897		struct in6_addr *finaldst = NULL;
898		int icmp6type = icmp6->icmp6_type;
899		struct ip6_frag *fh;
900		struct ip6_rthdr *rth;
901		struct ip6_rthdr0 *rth0;
902		int rthlen;
903
904		while (1) { /* XXX: should avoid inf. loop explicitly? */
905			struct ip6_ext *eh;
906
907			switch (nxt) {
908			case IPPROTO_HOPOPTS:
909			case IPPROTO_DSTOPTS:
910			case IPPROTO_AH:
911#ifndef PULLDOWN_TEST
912				IP6_EXTHDR_CHECK(m, 0, eoff +
913						 sizeof(struct ip6_ext),
914						 -1);
915				eh = (struct ip6_ext *)(mtod(m, caddr_t)
916							+ eoff);
917#else
918				IP6_EXTHDR_GET(eh, struct ip6_ext *, m,
919					       eoff, sizeof(*eh));
920				if (eh == NULL) {
921					icmp6stat.icp6s_tooshort++;
922					return(-1);
923				}
924#endif
925
926				if (nxt == IPPROTO_AH)
927					eoff += (eh->ip6e_len + 2) << 2;
928				else
929					eoff += (eh->ip6e_len + 1) << 3;
930				nxt = eh->ip6e_nxt;
931				break;
932			case IPPROTO_ROUTING:
933				/*
934				 * When the erroneous packet contains a
935				 * routing header, we should examine the
936				 * header to determine the final destination.
937				 * Otherwise, we can't properly update
938				 * information that depends on the final
939				 * destination (e.g. path MTU).
940				 */
941#ifndef PULLDOWN_TEST
942				IP6_EXTHDR_CHECK(m, 0, eoff + sizeof(*rth),
943						 -1);
944				rth = (struct ip6_rthdr *)(mtod(m, caddr_t)
945							   + eoff);
946#else
947				IP6_EXTHDR_GET(rth, struct ip6_rthdr *, m,
948					       eoff, sizeof(*rth));
949				if (rth == NULL) {
950					icmp6stat.icp6s_tooshort++;
951					return(-1);
952				}
953#endif
954				rthlen = (rth->ip6r_len + 1) << 3;
955				/*
956				 * XXX: currently there is no
957				 * officially defined type other
958				 * than type-0.
959				 * Note that if the segment left field
960				 * is 0, all intermediate hops must
961				 * have been passed.
962				 */
963				if (rth->ip6r_segleft &&
964				    rth->ip6r_type == IPV6_RTHDR_TYPE_0) {
965					int hops;
966
967#ifndef PULLDOWN_TEST
968					IP6_EXTHDR_CHECK(m, 0, eoff + rthlen,
969							 -1);
970					rth0 = (struct ip6_rthdr0 *)(mtod(m, caddr_t) + eoff);
971#else
972					IP6_EXTHDR_GET(rth0,
973						       struct ip6_rthdr0 *, m,
974						       eoff, rthlen);
975					if (rth0 == NULL) {
976						icmp6stat.icp6s_tooshort++;
977						return(-1);
978					}
979#endif
980					/* just ignore a bogus header */
981					if ((rth0->ip6r0_len % 2) == 0 &&
982					    (hops = rth0->ip6r0_len/2))
983						finaldst = (struct in6_addr *)(rth0 + 1) + (hops - 1);
984				}
985				eoff += rthlen;
986				nxt = rth->ip6r_nxt;
987				break;
988			case IPPROTO_FRAGMENT:
989#ifndef PULLDOWN_TEST
990				IP6_EXTHDR_CHECK(m, 0, eoff +
991						 sizeof(struct ip6_frag),
992						 -1);
993				fh = (struct ip6_frag *)(mtod(m, caddr_t)
994							 + eoff);
995#else
996				IP6_EXTHDR_GET(fh, struct ip6_frag *, m,
997					       eoff, sizeof(*fh));
998				if (fh == NULL) {
999					icmp6stat.icp6s_tooshort++;
1000					return(-1);
1001				}
1002#endif
1003				/*
1004				 * Data after a fragment header is meaningless
1005				 * unless it is the first fragment, but
1006				 * we'll go to the notify label for path MTU
1007				 * discovery.
1008				 */
1009				if (fh->ip6f_offlg & IP6F_OFF_MASK)
1010					goto notify;
1011
1012				eoff += sizeof(struct ip6_frag);
1013				nxt = fh->ip6f_nxt;
1014				break;
1015			default:
1016				/*
1017				 * This case includes ESP and the No Next
1018				 * Header. In such cases going to the notify
1019				 * label does not have any meaning
1020				 * (i.e. ctlfunc will be NULL), but we go
1021				 * anyway since we might have to update
1022				 * path MTU information.
1023				 */
1024				goto notify;
1025			}
1026		}
1027	  notify:
1028#ifndef PULLDOWN_TEST
1029		icmp6 = (struct icmp6_hdr *)(mtod(m, caddr_t) + off);
1030#else
1031		IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off,
1032			       sizeof(*icmp6) + sizeof(struct ip6_hdr));
1033		if (icmp6 == NULL) {
1034			icmp6stat.icp6s_tooshort++;
1035			return(-1);
1036		}
1037#endif
1038
1039		eip6 = (struct ip6_hdr *)(icmp6 + 1);
1040		bzero(&icmp6dst, sizeof(icmp6dst));
1041		icmp6dst.sin6_len = sizeof(struct sockaddr_in6);
1042		icmp6dst.sin6_family = AF_INET6;
1043		if (finaldst == NULL)
1044			icmp6dst.sin6_addr = eip6->ip6_dst;
1045		else
1046			icmp6dst.sin6_addr = *finaldst;
1047		icmp6dst.sin6_scope_id = in6_addr2scopeid(m->m_pkthdr.rcvif,
1048							  &icmp6dst.sin6_addr);
1049#ifndef SCOPEDROUTING
1050		if (in6_embedscope(&icmp6dst.sin6_addr, &icmp6dst,
1051				   NULL, NULL)) {
1052			/* should be impossbile */
1053			nd6log((LOG_DEBUG,
1054			    "icmp6_notify_error: in6_embedscope failed\n"));
1055			goto freeit;
1056		}
1057#endif
1058
1059		/*
1060		 * retrieve parameters from the inner IPv6 header, and convert
1061		 * them into sockaddr structures.
1062		 */
1063		bzero(&icmp6src, sizeof(icmp6src));
1064		icmp6src.sin6_len = sizeof(struct sockaddr_in6);
1065		icmp6src.sin6_family = AF_INET6;
1066		icmp6src.sin6_addr = eip6->ip6_src;
1067		icmp6src.sin6_scope_id = in6_addr2scopeid(m->m_pkthdr.rcvif,
1068							  &icmp6src.sin6_addr);
1069#ifndef SCOPEDROUTING
1070		if (in6_embedscope(&icmp6src.sin6_addr, &icmp6src,
1071				   NULL, NULL)) {
1072			/* should be impossbile */
1073			nd6log((LOG_DEBUG,
1074			    "icmp6_notify_error: in6_embedscope failed\n"));
1075			goto freeit;
1076		}
1077#endif
1078		icmp6src.sin6_flowinfo =
1079			(eip6->ip6_flow & IPV6_FLOWLABEL_MASK);
1080
1081		if (finaldst == NULL)
1082			finaldst = &eip6->ip6_dst;
1083		ip6cp.ip6c_m = m;
1084		ip6cp.ip6c_icmp6 = icmp6;
1085		ip6cp.ip6c_ip6 = (struct ip6_hdr *)(icmp6 + 1);
1086		ip6cp.ip6c_off = eoff;
1087		ip6cp.ip6c_finaldst = finaldst;
1088		ip6cp.ip6c_src = &icmp6src;
1089		ip6cp.ip6c_nxt = nxt;
1090
1091		if (icmp6type == ICMP6_PACKET_TOO_BIG) {
1092			notifymtu = ntohl(icmp6->icmp6_mtu);
1093			ip6cp.ip6c_cmdarg = (void *)&notifymtu;
1094		}
1095
1096		ctlfunc = (void (*) __P((int, struct sockaddr *, void *)))
1097			(inet6sw[ip6_protox[nxt]].pr_ctlinput);
1098		if (ctlfunc) {
1099			(void) (*ctlfunc)(code, (struct sockaddr *)&icmp6dst,
1100					  &ip6cp);
1101		}
1102	}
1103	return(0);
1104
1105  freeit:
1106	m_freem(m);
1107	return(-1);
1108}
1109
1110void
1111icmp6_mtudisc_update(ip6cp, validated)
1112	struct ip6ctlparam *ip6cp;
1113	int validated;
1114{
1115	unsigned long rtcount;
1116	struct icmp6_mtudisc_callback *mc;
1117	struct in6_addr *dst = ip6cp->ip6c_finaldst;
1118	struct icmp6_hdr *icmp6 = ip6cp->ip6c_icmp6;
1119	struct mbuf *m = ip6cp->ip6c_m;	/* will be necessary for scope issue */
1120	u_int mtu = ntohl(icmp6->icmp6_mtu);
1121	struct rtentry *rt = NULL;
1122	struct sockaddr_in6 sin6;
1123
1124	/*
1125	 * allow non-validated cases if memory is plenty, to make traffic
1126	 * from non-connected pcb happy.
1127	 */
1128	rtcount = rt_timer_count(icmp6_mtudisc_timeout_q);
1129	if (validated) {
1130		if (0 <= icmp6_mtudisc_hiwat && rtcount > icmp6_mtudisc_hiwat)
1131			return;
1132		else if (0 <= icmp6_mtudisc_lowat &&
1133		    rtcount > icmp6_mtudisc_lowat) {
1134			/*
1135			 * XXX nuke a victim, install the new one.
1136			 */
1137		}
1138	} else {
1139		if (0 <= icmp6_mtudisc_lowat && rtcount > icmp6_mtudisc_lowat)
1140			return;
1141	}
1142
1143	bzero(&sin6, sizeof(sin6));
1144	sin6.sin6_family = PF_INET6;
1145	sin6.sin6_len = sizeof(struct sockaddr_in6);
1146	sin6.sin6_addr = *dst;
1147	/* XXX normally, this won't happen */
1148	if (IN6_IS_ADDR_LINKLOCAL(dst)) {
1149		sin6.sin6_addr.s6_addr16[1] =
1150		    htons(m->m_pkthdr.rcvif->if_index);
1151	}
1152	/* sin6.sin6_scope_id = XXX: should be set if DST is a scoped addr */
1153	rt = icmp6_mtudisc_clone((struct sockaddr *)&sin6);
1154
1155	if (rt && (rt->rt_flags & RTF_HOST)
1156	    && !(rt->rt_rmx.rmx_locks & RTV_MTU)) {
1157		if (mtu < IPV6_MMTU) {
1158				/* xxx */
1159			rt->rt_rmx.rmx_locks |= RTV_MTU;
1160		} else if (mtu < rt->rt_ifp->if_mtu &&
1161			   rt->rt_rmx.rmx_mtu > mtu) {
1162			icmp6stat.icp6s_pmtuchg++;
1163			rt->rt_rmx.rmx_mtu = mtu;
1164		}
1165	}
1166	if (rt)
1167		RTFREE(rt);
1168
1169	/*
1170	 * Notify protocols that the MTU for this destination
1171	 * has changed.
1172	 */
1173	for (mc = LIST_FIRST(&icmp6_mtudisc_callbacks); mc != NULL;
1174	     mc = LIST_NEXT(mc, mc_list))
1175		(*mc->mc_func)(&sin6.sin6_addr);
1176}
1177
1178/*
1179 * Process a Node Information Query packet, based on
1180 * draft-ietf-ipngwg-icmp-name-lookups-07.
1181 *
1182 * Spec incompatibilities:
1183 * - IPv6 Subject address handling
1184 * - IPv4 Subject address handling support missing
1185 * - Proxy reply (answer even if it's not for me)
1186 * - joins NI group address at in6_ifattach() time only, does not cope
1187 *   with hostname changes by sethostname(3)
1188 */
1189#ifndef offsetof		/* XXX */
1190#define	offsetof(type, member)	((size_t)(&((type *)0)->member))
1191#endif
1192static struct mbuf *
1193ni6_input(m, off)
1194	struct mbuf *m;
1195	int off;
1196{
1197	struct icmp6_nodeinfo *ni6, *nni6;
1198	struct mbuf *n = NULL;
1199	u_int16_t qtype;
1200	int subjlen;
1201	int replylen = sizeof(struct ip6_hdr) + sizeof(struct icmp6_nodeinfo);
1202	struct ni_reply_fqdn *fqdn;
1203	int addrs;		/* for NI_QTYPE_NODEADDR */
1204	struct ifnet *ifp = NULL; /* for NI_QTYPE_NODEADDR */
1205	struct sockaddr_in6 sin6; /* double meaning; ip6_dst and subjectaddr */
1206	struct ip6_hdr *ip6;
1207	int oldfqdn = 0;	/* if 1, return pascal string (03 draft) */
1208	char *subj = NULL;
1209
1210	ip6 = mtod(m, struct ip6_hdr *);
1211#ifndef PULLDOWN_TEST
1212	ni6 = (struct icmp6_nodeinfo *)(mtod(m, caddr_t) + off);
1213#else
1214	IP6_EXTHDR_GET(ni6, struct icmp6_nodeinfo *, m, off, sizeof(*ni6));
1215	if (ni6 == NULL) {
1216		/* m is already reclaimed */
1217		return NULL;
1218	}
1219#endif
1220
1221	/*
1222	 * Validate IPv6 destination address.
1223	 *
1224	 * The Responder must discard the Query without further processing
1225	 * unless it is one of the Responder's unicast or anycast addresses, or
1226	 * a link-local scope multicast address which the Responder has joined.
1227	 * [icmp-name-lookups-07, Section 4.]
1228	 */
1229	bzero(&sin6, sizeof(sin6));
1230	sin6.sin6_family = AF_INET6;
1231	sin6.sin6_len = sizeof(struct sockaddr_in6);
1232	bcopy(&ip6->ip6_dst, &sin6.sin6_addr, sizeof(sin6.sin6_addr));
1233	/* XXX scopeid */
1234	if (ifa_ifwithaddr((struct sockaddr *)&sin6))
1235		; /* unicast/anycast, fine */
1236	else if (IN6_IS_ADDR_MC_LINKLOCAL(&sin6.sin6_addr))
1237		; /* link-local multicast, fine */
1238	else
1239		goto bad;
1240
1241	/* validate query Subject field. */
1242	qtype = ntohs(ni6->ni_qtype);
1243	subjlen = m->m_pkthdr.len - off - sizeof(struct icmp6_nodeinfo);
1244	switch (qtype) {
1245	case NI_QTYPE_NOOP:
1246	case NI_QTYPE_SUPTYPES:
1247		/* 07 draft */
1248		if (ni6->ni_code == ICMP6_NI_SUBJ_FQDN && subjlen == 0)
1249			break;
1250		/*FALLTHROUGH*/
1251	case NI_QTYPE_FQDN:
1252	case NI_QTYPE_NODEADDR:
1253		switch (ni6->ni_code) {
1254		case ICMP6_NI_SUBJ_IPV6:
1255#if ICMP6_NI_SUBJ_IPV6 != 0
1256		case 0:
1257#endif
1258			/*
1259			 * backward compatibility - try to accept 03 draft
1260			 * format, where no Subject is present.
1261			 */
1262			if (qtype == NI_QTYPE_FQDN && ni6->ni_code == 0 &&
1263			    subjlen == 0) {
1264				oldfqdn++;
1265				break;
1266			}
1267#if ICMP6_NI_SUBJ_IPV6 != 0
1268			if (ni6->ni_code != ICMP6_NI_SUBJ_IPV6)
1269				goto bad;
1270#endif
1271
1272			if (subjlen != sizeof(sin6.sin6_addr))
1273				goto bad;
1274
1275			/*
1276			 * Validate Subject address.
1277			 *
1278			 * Not sure what exactly "address belongs to the node"
1279			 * means in the spec, is it just unicast, or what?
1280			 *
1281			 * At this moment we consider Subject address as
1282			 * "belong to the node" if the Subject address equals
1283			 * to the IPv6 destination address; validation for
1284			 * IPv6 destination address should have done enough
1285			 * check for us.
1286			 *
1287			 * We do not do proxy at this moment.
1288			 */
1289			/* m_pulldown instead of copy? */
1290			m_copydata(m, off + sizeof(struct icmp6_nodeinfo),
1291			    subjlen, (caddr_t)&sin6.sin6_addr);
1292			/* XXX kame scope hack */
1293			if (IN6_IS_SCOPE_LINKLOCAL(&sin6.sin6_addr)) {
1294#ifdef FAKE_LOOPBACK_IF
1295				if ((m->m_flags & M_PKTHDR) != 0 &&
1296				    m->m_pkthdr.rcvif) {
1297					sin6.sin6_addr.s6_addr16[1] =
1298					    htons(m->m_pkthdr.rcvif->if_index);
1299				}
1300#else
1301				if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_dst)) {
1302					sin6.sin6_addr.s6_addr16[1] =
1303					    ip6->ip6_dst.s6_addr16[1];
1304				}
1305#endif
1306			}
1307			subj = (char *)&sin6;
1308			if (IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &sin6.sin6_addr))
1309				break;
1310
1311			/*
1312			 * XXX if we are to allow other cases, we should really
1313			 * be careful about scope here.
1314			 * basically, we should disallow queries toward IPv6
1315			 * destination X with subject Y, if scope(X) > scope(Y).
1316			 * if we allow scope(X) > scope(Y), it will result in
1317			 * information leakage across scope boundary.
1318			 */
1319			goto bad;
1320
1321		case ICMP6_NI_SUBJ_FQDN:
1322			/*
1323			 * Validate Subject name with gethostname(3).
1324			 *
1325			 * The behavior may need some debate, since:
1326			 * - we are not sure if the node has FQDN as
1327			 *   hostname (returned by gethostname(3)).
1328			 * - the code does wildcard match for truncated names.
1329			 *   however, we are not sure if we want to perform
1330			 *   wildcard match, if gethostname(3) side has
1331			 *   truncated hostname.
1332			 */
1333			n = ni6_nametodns(hostname, hostnamelen, 0);
1334			if (!n || n->m_next || n->m_len == 0)
1335				goto bad;
1336			IP6_EXTHDR_GET(subj, char *, m,
1337			    off + sizeof(struct icmp6_nodeinfo), subjlen);
1338			if (subj == NULL)
1339				goto bad;
1340			if (!ni6_dnsmatch(subj, subjlen, mtod(n, const char *),
1341					n->m_len)) {
1342				goto bad;
1343			}
1344			m_freem(n);
1345			n = NULL;
1346			break;
1347
1348		case ICMP6_NI_SUBJ_IPV4:	/* XXX: to be implemented? */
1349		default:
1350			goto bad;
1351		}
1352		break;
1353	}
1354
1355	/* guess reply length */
1356	switch (qtype) {
1357	case NI_QTYPE_NOOP:
1358		break;		/* no reply data */
1359	case NI_QTYPE_SUPTYPES:
1360		replylen += sizeof(u_int32_t);
1361		break;
1362	case NI_QTYPE_FQDN:
1363		/* XXX will append an mbuf */
1364		replylen += offsetof(struct ni_reply_fqdn, ni_fqdn_namelen);
1365		break;
1366	case NI_QTYPE_NODEADDR:
1367		addrs = ni6_addrs(ni6, m, &ifp, subj);
1368		if ((replylen += addrs * (sizeof(struct in6_addr) +
1369					  sizeof(u_int32_t))) > MCLBYTES)
1370			replylen = MCLBYTES; /* XXX: will truncate pkt later */
1371		break;
1372	default:
1373		/*
1374		 * XXX: We must return a reply with the ICMP6 code
1375		 * `unknown Qtype' in this case. However we regard the case
1376		 * as an FQDN query for backward compatibility.
1377		 * Older versions set a random value to this field,
1378		 * so it rarely varies in the defined qtypes.
1379		 * But the mechanism is not reliable...
1380		 * maybe we should obsolete older versions.
1381		 */
1382		qtype = NI_QTYPE_FQDN;
1383		/* XXX will append an mbuf */
1384		replylen += offsetof(struct ni_reply_fqdn, ni_fqdn_namelen);
1385		oldfqdn++;
1386		break;
1387	}
1388
1389	/* allocate an mbuf to reply. */
1390	MGETHDR(n, M_DONTWAIT, m->m_type);
1391	if (n == NULL) {
1392		m_freem(m);
1393		return(NULL);
1394	}
1395	M_COPY_PKTHDR(n, m); /* just for recvif */
1396	if (replylen > MHLEN) {
1397		if (replylen > MCLBYTES) {
1398			/*
1399			 * XXX: should we try to allocate more? But MCLBYTES
1400			 * is probably much larger than IPV6_MMTU...
1401			 */
1402			goto bad;
1403		}
1404		MCLGET(n, M_DONTWAIT);
1405		if ((n->m_flags & M_EXT) == 0) {
1406			goto bad;
1407		}
1408	}
1409	n->m_pkthdr.len = n->m_len = replylen;
1410
1411	/* copy mbuf header and IPv6 + Node Information base headers */
1412	bcopy(mtod(m, caddr_t), mtod(n, caddr_t), sizeof(struct ip6_hdr));
1413	nni6 = (struct icmp6_nodeinfo *)(mtod(n, struct ip6_hdr *) + 1);
1414	bcopy((caddr_t)ni6, (caddr_t)nni6, sizeof(struct icmp6_nodeinfo));
1415
1416	/* qtype dependent procedure */
1417	switch (qtype) {
1418	case NI_QTYPE_NOOP:
1419		nni6->ni_code = ICMP6_NI_SUCCESS;
1420		nni6->ni_flags = 0;
1421		break;
1422	case NI_QTYPE_SUPTYPES:
1423	{
1424		u_int32_t v;
1425		nni6->ni_code = ICMP6_NI_SUCCESS;
1426		nni6->ni_flags = htons(0x0000);	/* raw bitmap */
1427		/* supports NOOP, SUPTYPES, FQDN, and NODEADDR */
1428		v = (u_int32_t)htonl(0x0000000f);
1429		bcopy(&v, nni6 + 1, sizeof(u_int32_t));
1430		break;
1431	}
1432	case NI_QTYPE_FQDN:
1433		nni6->ni_code = ICMP6_NI_SUCCESS;
1434		fqdn = (struct ni_reply_fqdn *)(mtod(n, caddr_t) +
1435						sizeof(struct ip6_hdr) +
1436						sizeof(struct icmp6_nodeinfo));
1437		nni6->ni_flags = 0; /* XXX: meaningless TTL */
1438		fqdn->ni_fqdn_ttl = 0;	/* ditto. */
1439		/*
1440		 * XXX do we really have FQDN in variable "hostname"?
1441		 */
1442		n->m_next = ni6_nametodns(hostname, hostnamelen, oldfqdn);
1443		if (n->m_next == NULL)
1444			goto bad;
1445		/* XXX we assume that n->m_next is not a chain */
1446		if (n->m_next->m_next != NULL)
1447			goto bad;
1448		n->m_pkthdr.len += n->m_next->m_len;
1449		break;
1450	case NI_QTYPE_NODEADDR:
1451	{
1452		int lenlim, copied;
1453
1454		nni6->ni_code = ICMP6_NI_SUCCESS;
1455		n->m_pkthdr.len = n->m_len =
1456		    sizeof(struct ip6_hdr) + sizeof(struct icmp6_nodeinfo);
1457		lenlim = M_TRAILINGSPACE(n);
1458		copied = ni6_store_addrs(ni6, nni6, ifp, lenlim);
1459		/* XXX: reset mbuf length */
1460		n->m_pkthdr.len = n->m_len = sizeof(struct ip6_hdr) +
1461			sizeof(struct icmp6_nodeinfo) + copied;
1462		break;
1463	}
1464	default:
1465		break;		/* XXX impossible! */
1466	}
1467
1468	nni6->ni_type = ICMP6_NI_REPLY;
1469	m_freem(m);
1470	return(n);
1471
1472  bad:
1473	m_freem(m);
1474	if (n)
1475		m_freem(n);
1476	return(NULL);
1477}
1478#undef hostnamelen
1479
1480/*
1481 * make a mbuf with DNS-encoded string.  no compression support.
1482 *
1483 * XXX names with less than 2 dots (like "foo" or "foo.section") will be
1484 * treated as truncated name (two \0 at the end).  this is a wild guess.
1485 */
1486static struct mbuf *
1487ni6_nametodns(name, namelen, old)
1488	const char *name;
1489	int namelen;
1490	int old;	/* return pascal string if non-zero */
1491{
1492	struct mbuf *m;
1493	char *cp, *ep;
1494	const char *p, *q;
1495	int i, len, nterm;
1496
1497	if (old)
1498		len = namelen + 1;
1499	else
1500		len = MCLBYTES;
1501
1502	/* because MAXHOSTNAMELEN is usually 256, we use cluster mbuf */
1503	MGET(m, M_DONTWAIT, MT_DATA);
1504	if (m && len > MLEN) {
1505		MCLGET(m, M_DONTWAIT);
1506		if ((m->m_flags & M_EXT) == 0)
1507			goto fail;
1508	}
1509	if (!m)
1510		goto fail;
1511	m->m_next = NULL;
1512
1513	if (old) {
1514		m->m_len = len;
1515		*mtod(m, char *) = namelen;
1516		bcopy(name, mtod(m, char *) + 1, namelen);
1517		return m;
1518	} else {
1519		m->m_len = 0;
1520		cp = mtod(m, char *);
1521		ep = mtod(m, char *) + M_TRAILINGSPACE(m);
1522
1523		/* if not certain about my name, return empty buffer */
1524		if (namelen == 0)
1525			return m;
1526
1527		/*
1528		 * guess if it looks like shortened hostname, or FQDN.
1529		 * shortened hostname needs two trailing "\0".
1530		 */
1531		i = 0;
1532		for (p = name; p < name + namelen; p++) {
1533			if (*p && *p == '.')
1534				i++;
1535		}
1536		if (i < 2)
1537			nterm = 2;
1538		else
1539			nterm = 1;
1540
1541		p = name;
1542		while (cp < ep && p < name + namelen) {
1543			i = 0;
1544			for (q = p; q < name + namelen && *q && *q != '.'; q++)
1545				i++;
1546			/* result does not fit into mbuf */
1547			if (cp + i + 1 >= ep)
1548				goto fail;
1549			/* DNS label length restriction, RFC1035 page 8 */
1550			if (i >= 64)
1551				goto fail;
1552			*cp++ = i;
1553			bcopy(p, cp, i);
1554			cp += i;
1555			p = q;
1556			if (p < name + namelen && *p == '.')
1557				p++;
1558		}
1559		/* termination */
1560		if (cp + nterm >= ep)
1561			goto fail;
1562		while (nterm-- > 0)
1563			*cp++ = '\0';
1564		m->m_len = cp - mtod(m, char *);
1565		return m;
1566	}
1567
1568	panic("should not reach here");
1569	/*NOTREACHED*/
1570
1571 fail:
1572	if (m)
1573		m_freem(m);
1574	return NULL;
1575}
1576
1577/*
1578 * check if two DNS-encoded string matches.  takes care of truncated
1579 * form (with \0\0 at the end).  no compression support.
1580 * XXX upper/lowercase match (see RFC2065)
1581 */
1582static int
1583ni6_dnsmatch(a, alen, b, blen)
1584	const char *a;
1585	int alen;
1586	const char *b;
1587	int blen;
1588{
1589	const char *a0, *b0;
1590	int l;
1591
1592	/* simplest case - need validation? */
1593	if (alen == blen && bcmp(a, b, alen) == 0)
1594		return 1;
1595
1596	a0 = a;
1597	b0 = b;
1598
1599	/* termination is mandatory */
1600	if (alen < 2 || blen < 2)
1601		return 0;
1602	if (a0[alen - 1] != '\0' || b0[blen - 1] != '\0')
1603		return 0;
1604	alen--;
1605	blen--;
1606
1607	while (a - a0 < alen && b - b0 < blen) {
1608		if (a - a0 + 1 > alen || b - b0 + 1 > blen)
1609			return 0;
1610
1611		if ((signed char)a[0] < 0 || (signed char)b[0] < 0)
1612			return 0;
1613		/* we don't support compression yet */
1614		if (a[0] >= 64 || b[0] >= 64)
1615			return 0;
1616
1617		/* truncated case */
1618		if (a[0] == 0 && a - a0 == alen - 1)
1619			return 1;
1620		if (b[0] == 0 && b - b0 == blen - 1)
1621			return 1;
1622		if (a[0] == 0 || b[0] == 0)
1623			return 0;
1624
1625		if (a[0] != b[0])
1626			return 0;
1627		l = a[0];
1628		if (a - a0 + 1 + l > alen || b - b0 + 1 + l > blen)
1629			return 0;
1630		if (bcmp(a + 1, b + 1, l) != 0)
1631			return 0;
1632
1633		a += 1 + l;
1634		b += 1 + l;
1635	}
1636
1637	if (a - a0 == alen && b - b0 == blen)
1638		return 1;
1639	else
1640		return 0;
1641}
1642
1643/*
1644 * calculate the number of addresses to be returned in the node info reply.
1645 */
1646static int
1647ni6_addrs(ni6, m, ifpp, subj)
1648	struct icmp6_nodeinfo *ni6;
1649	struct mbuf *m;
1650	struct ifnet **ifpp;
1651	char *subj;
1652{
1653	struct ifnet *ifp;
1654	struct in6_ifaddr *ifa6;
1655	struct ifaddr *ifa;
1656	struct sockaddr_in6 *subj_ip6 = NULL; /* XXX pedant */
1657	int addrs = 0, addrsofif, iffound = 0;
1658	int niflags = ni6->ni_flags;
1659
1660	if ((niflags & NI_NODEADDR_FLAG_ALL) == 0) {
1661		switch (ni6->ni_code) {
1662		case ICMP6_NI_SUBJ_IPV6:
1663			if (subj == NULL) /* must be impossible... */
1664				return(0);
1665			subj_ip6 = (struct sockaddr_in6 *)subj;
1666			break;
1667		default:
1668			/*
1669			 * XXX: we only support IPv6 subject address for
1670			 * this Qtype.
1671			 */
1672			return(0);
1673		}
1674	}
1675
1676	for (ifp = TAILQ_FIRST(&ifnet); ifp; ifp = TAILQ_NEXT(ifp, if_list))
1677	{
1678		addrsofif = 0;
1679		for (ifa = ifp->if_addrlist.tqh_first; ifa;
1680		     ifa = ifa->ifa_list.tqe_next)
1681		{
1682			if (ifa->ifa_addr->sa_family != AF_INET6)
1683				continue;
1684			ifa6 = (struct in6_ifaddr *)ifa;
1685
1686			if ((niflags & NI_NODEADDR_FLAG_ALL) == 0 &&
1687			    IN6_ARE_ADDR_EQUAL(&subj_ip6->sin6_addr,
1688					       &ifa6->ia_addr.sin6_addr))
1689				iffound = 1;
1690
1691			/*
1692			 * IPv4-mapped addresses can only be returned by a
1693			 * Node Information proxy, since they represent
1694			 * addresses of IPv4-only nodes, which perforce do
1695			 * not implement this protocol.
1696			 * [icmp-name-lookups-07, Section 5.4]
1697			 * So we don't support NI_NODEADDR_FLAG_COMPAT in
1698			 * this function at this moment.
1699			 */
1700
1701			/* What do we have to do about ::1? */
1702			switch (in6_addrscope(&ifa6->ia_addr.sin6_addr)) {
1703			case IPV6_ADDR_SCOPE_LINKLOCAL:
1704				if ((niflags & NI_NODEADDR_FLAG_LINKLOCAL) == 0)
1705					continue;
1706				break;
1707			case IPV6_ADDR_SCOPE_SITELOCAL:
1708				if ((niflags & NI_NODEADDR_FLAG_SITELOCAL) == 0)
1709					continue;
1710				break;
1711			case IPV6_ADDR_SCOPE_GLOBAL:
1712				if ((niflags & NI_NODEADDR_FLAG_GLOBAL) == 0)
1713					continue;
1714				break;
1715			default:
1716				continue;
1717			}
1718
1719			/*
1720			 * check if anycast is okay.
1721			 * XXX: just experimental. not in the spec.
1722			 */
1723			if ((ifa6->ia6_flags & IN6_IFF_ANYCAST) != 0 &&
1724			    (niflags & NI_NODEADDR_FLAG_ANYCAST) == 0)
1725				continue; /* we need only unicast addresses */
1726
1727			addrsofif++; /* count the address */
1728		}
1729		if (iffound) {
1730			*ifpp = ifp;
1731			return(addrsofif);
1732		}
1733
1734		addrs += addrsofif;
1735	}
1736
1737	return(addrs);
1738}
1739
1740static int
1741ni6_store_addrs(ni6, nni6, ifp0, resid)
1742	struct icmp6_nodeinfo *ni6, *nni6;
1743	struct ifnet *ifp0;
1744	int resid;
1745{
1746	struct ifnet *ifp = ifp0 ? ifp0 : TAILQ_FIRST(&ifnet);
1747	struct in6_ifaddr *ifa6;
1748	struct ifaddr *ifa;
1749	struct ifnet *ifp_dep = NULL;
1750	int copied = 0, allow_deprecated = 0;
1751	u_char *cp = (u_char *)(nni6 + 1);
1752	int niflags = ni6->ni_flags;
1753	u_int32_t ltime;
1754	long time_second = time.tv_sec;
1755
1756	if (ifp0 == NULL && !(niflags & NI_NODEADDR_FLAG_ALL))
1757		return(0);	/* needless to copy */
1758
1759  again:
1760
1761	for (; ifp; ifp = TAILQ_NEXT(ifp, if_list))
1762	{
1763		for (ifa = ifp->if_addrlist.tqh_first; ifa;
1764		     ifa = ifa->ifa_list.tqe_next)
1765		{
1766			if (ifa->ifa_addr->sa_family != AF_INET6)
1767				continue;
1768			ifa6 = (struct in6_ifaddr *)ifa;
1769
1770			if ((ifa6->ia6_flags & IN6_IFF_DEPRECATED) != 0 &&
1771			    allow_deprecated == 0) {
1772				/*
1773				 * prefererred address should be put before
1774				 * deprecated addresses.
1775				 */
1776
1777				/* record the interface for later search */
1778				if (ifp_dep == NULL)
1779					ifp_dep = ifp;
1780
1781				continue;
1782			}
1783			else if ((ifa6->ia6_flags & IN6_IFF_DEPRECATED) == 0 &&
1784				 allow_deprecated != 0)
1785				continue; /* we now collect deprecated addrs */
1786
1787			/* What do we have to do about ::1? */
1788			switch (in6_addrscope(&ifa6->ia_addr.sin6_addr)) {
1789			case IPV6_ADDR_SCOPE_LINKLOCAL:
1790				if ((niflags & NI_NODEADDR_FLAG_LINKLOCAL) == 0)
1791					continue;
1792				break;
1793			case IPV6_ADDR_SCOPE_SITELOCAL:
1794				if ((niflags & NI_NODEADDR_FLAG_SITELOCAL) == 0)
1795					continue;
1796				break;
1797			case IPV6_ADDR_SCOPE_GLOBAL:
1798				if ((niflags & NI_NODEADDR_FLAG_GLOBAL) == 0)
1799					continue;
1800				break;
1801			default:
1802				continue;
1803			}
1804
1805			/*
1806			 * check if anycast is okay.
1807			 * XXX: just experimental. not in the spec.
1808			 */
1809			if ((ifa6->ia6_flags & IN6_IFF_ANYCAST) != 0 &&
1810			    (niflags & NI_NODEADDR_FLAG_ANYCAST) == 0)
1811				continue;
1812
1813			/* now we can copy the address */
1814			if (resid < sizeof(struct in6_addr) +
1815			    sizeof(u_int32_t)) {
1816				/*
1817				 * We give up much more copy.
1818				 * Set the truncate flag and return.
1819				 */
1820				nni6->ni_flags |=
1821					NI_NODEADDR_FLAG_TRUNCATE;
1822				return(copied);
1823			}
1824
1825			/*
1826			 * Set the TTL of the address.
1827			 * The TTL value should be one of the following
1828			 * according to the specification:
1829			 *
1830			 * 1. The remaining lifetime of a DHCP lease on the
1831			 *    address, or
1832			 * 2. The remaining Valid Lifetime of a prefix from
1833			 *    which the address was derived through Stateless
1834			 *    Autoconfiguration.
1835			 *
1836			 * Note that we currently do not support stateful
1837			 * address configuration by DHCPv6, so the former
1838			 * case can't happen.
1839			 */
1840			if (ifa6->ia6_lifetime.ia6t_expire == 0)
1841				ltime = ND6_INFINITE_LIFETIME;
1842			else {
1843				if (ifa6->ia6_lifetime.ia6t_expire >
1844				    time_second)
1845					ltime = htonl(ifa6->ia6_lifetime.ia6t_expire - time_second);
1846				else
1847					ltime = 0;
1848			}
1849
1850			bcopy(&ltime, cp, sizeof(u_int32_t));
1851			cp += sizeof(u_int32_t);
1852
1853			/* copy the address itself */
1854			bcopy(&ifa6->ia_addr.sin6_addr, cp,
1855			      sizeof(struct in6_addr));
1856			/* XXX: KAME link-local hack; remove ifindex */
1857			if (IN6_IS_ADDR_LINKLOCAL(&ifa6->ia_addr.sin6_addr))
1858				((struct in6_addr *)cp)->s6_addr16[1] = 0;
1859			cp += sizeof(struct in6_addr);
1860
1861			resid -= (sizeof(struct in6_addr) + sizeof(u_int32_t));
1862			copied += (sizeof(struct in6_addr) +
1863				   sizeof(u_int32_t));
1864		}
1865		if (ifp0)	/* we need search only on the specified IF */
1866			break;
1867	}
1868
1869	if (allow_deprecated == 0 && ifp_dep != NULL) {
1870		ifp = ifp_dep;
1871		allow_deprecated = 1;
1872
1873		goto again;
1874	}
1875
1876	return(copied);
1877}
1878
1879/*
1880 * XXX almost dup'ed code with rip6_input.
1881 */
1882static int
1883icmp6_rip6_input(mp, off)
1884	struct	mbuf **mp;
1885	int	off;
1886{
1887	struct mbuf *m = *mp;
1888	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
1889	struct in6pcb *in6p;
1890	struct in6pcb *last = NULL;
1891	struct sockaddr_in6 rip6src;
1892	struct icmp6_hdr *icmp6;
1893	struct mbuf *opts = NULL;
1894
1895#ifndef PULLDOWN_TEST
1896	/* this is assumed to be safe. */
1897	icmp6 = (struct icmp6_hdr *)((caddr_t)ip6 + off);
1898#else
1899	IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off, sizeof(*icmp6));
1900	if (icmp6 == NULL) {
1901		/* m is already reclaimed */
1902		return IPPROTO_DONE;
1903	}
1904#endif
1905
1906	bzero(&rip6src, sizeof(rip6src));
1907	rip6src.sin6_len = sizeof(struct sockaddr_in6);
1908	rip6src.sin6_family = AF_INET6;
1909	/* KAME hack: recover scopeid */
1910	(void)in6_recoverscope(&rip6src, &ip6->ip6_src, m->m_pkthdr.rcvif);
1911
1912	for (in6p = rawin6pcb.in6p_next;
1913	     in6p != &rawin6pcb; in6p = in6p->in6p_next)
1914	{
1915		if (in6p->in6p_ip6_nxt != IPPROTO_ICMPV6)
1916			continue;
1917		if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr) &&
1918		   !IN6_ARE_ADDR_EQUAL(&in6p->in6p_laddr, &ip6->ip6_dst))
1919			continue;
1920		if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr) &&
1921		   !IN6_ARE_ADDR_EQUAL(&in6p->in6p_faddr, &ip6->ip6_src))
1922			continue;
1923		if (in6p->in6p_icmp6filt
1924		    && ICMP6_FILTER_WILLBLOCK(icmp6->icmp6_type,
1925				 in6p->in6p_icmp6filt))
1926			continue;
1927		if (last) {
1928			struct	mbuf *n;
1929			if ((n = m_copy(m, 0, (int)M_COPYALL)) != NULL) {
1930				if (last->in6p_flags & IN6P_CONTROLOPTS)
1931					ip6_savecontrol(last, &opts, ip6, n);
1932				/* strip intermediate headers */
1933				m_adj(n, off);
1934				if (sbappendaddr(&last->in6p_socket->so_rcv,
1935						 (struct sockaddr *)&rip6src,
1936						 n, opts) == 0) {
1937					/* should notify about lost packet */
1938					m_freem(n);
1939					if (opts)
1940						m_freem(opts);
1941				} else
1942					sorwakeup(last->in6p_socket);
1943				opts = NULL;
1944			}
1945		}
1946		last = in6p;
1947	}
1948	if (last) {
1949		if (last->in6p_flags & IN6P_CONTROLOPTS)
1950			ip6_savecontrol(last, &opts, ip6, m);
1951		/* strip intermediate headers */
1952		m_adj(m, off);
1953		if (sbappendaddr(&last->in6p_socket->so_rcv,
1954				(struct sockaddr *)&rip6src, m, opts) == 0) {
1955			m_freem(m);
1956			if (opts)
1957				m_freem(opts);
1958		} else
1959			sorwakeup(last->in6p_socket);
1960	} else {
1961		m_freem(m);
1962		ip6stat.ip6s_delivered--;
1963	}
1964	return IPPROTO_DONE;
1965}
1966
1967/*
1968 * Reflect the ip6 packet back to the source.
1969 * OFF points to the icmp6 header, counted from the top of the mbuf.
1970 */
1971void
1972icmp6_reflect(m, off)
1973	struct	mbuf *m;
1974	size_t off;
1975{
1976	struct ip6_hdr *ip6;
1977	struct icmp6_hdr *icmp6;
1978	struct in6_ifaddr *ia;
1979	struct in6_addr t, *src = 0;
1980	int plen;
1981	int type, code;
1982	struct ifnet *outif = NULL;
1983	struct sockaddr_in6 sa6_src, sa6_dst;
1984#ifdef COMPAT_RFC1885
1985	int mtu = IPV6_MMTU;
1986	struct sockaddr_in6 *sin6 = &icmp6_reflect_rt.ro_dst;
1987#endif
1988
1989	/* too short to reflect */
1990	if (off < sizeof(struct ip6_hdr)) {
1991		nd6log((LOG_DEBUG,
1992		    "sanity fail: off=%lx, sizeof(ip6)=%lx in %s:%d\n",
1993		    (u_long)off, (u_long)sizeof(struct ip6_hdr),
1994		    __FILE__, __LINE__));
1995		goto bad;
1996	}
1997
1998	/*
1999	 * If there are extra headers between IPv6 and ICMPv6, strip
2000	 * off that header first.
2001	 */
2002#ifdef DIAGNOSTIC
2003	if (sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr) > MHLEN)
2004		panic("assumption failed in icmp6_reflect");
2005#endif
2006	if (off > sizeof(struct ip6_hdr)) {
2007		size_t l;
2008		struct ip6_hdr nip6;
2009
2010		l = off - sizeof(struct ip6_hdr);
2011		m_copydata(m, 0, sizeof(nip6), (caddr_t)&nip6);
2012		m_adj(m, l);
2013		l = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr);
2014		if (m->m_len < l) {
2015			if ((m = m_pullup(m, l)) == NULL)
2016				return;
2017		}
2018		bcopy((caddr_t)&nip6, mtod(m, caddr_t), sizeof(nip6));
2019	} else /* off == sizeof(struct ip6_hdr) */ {
2020		size_t l;
2021		l = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr);
2022		if (m->m_len < l) {
2023			if ((m = m_pullup(m, l)) == NULL)
2024				return;
2025		}
2026	}
2027	plen = m->m_pkthdr.len - sizeof(struct ip6_hdr);
2028	ip6 = mtod(m, struct ip6_hdr *);
2029	ip6->ip6_nxt = IPPROTO_ICMPV6;
2030	icmp6 = (struct icmp6_hdr *)(ip6 + 1);
2031	type = icmp6->icmp6_type; /* keep type for statistics */
2032	code = icmp6->icmp6_code; /* ditto. */
2033
2034	t = ip6->ip6_dst;
2035	/*
2036	 * ip6_input() drops a packet if its src is multicast.
2037	 * So, the src is never multicast.
2038	 */
2039	ip6->ip6_dst = ip6->ip6_src;
2040
2041	/*
2042	 * XXX: make sure to embed scope zone information, using
2043	 * already embedded IDs or the received interface (if any).
2044	 * Note that rcvif may be NULL.
2045	 * TODO: scoped routing case (XXX).
2046	 */
2047	bzero(&sa6_src, sizeof(sa6_src));
2048	sa6_src.sin6_family = AF_INET6;
2049	sa6_src.sin6_len = sizeof(sa6_src);
2050	sa6_src.sin6_addr = ip6->ip6_dst;
2051	in6_recoverscope(&sa6_src, &ip6->ip6_dst, m->m_pkthdr.rcvif);
2052	in6_embedscope(&ip6->ip6_dst, &sa6_src, NULL, NULL);
2053	bzero(&sa6_dst, sizeof(sa6_dst));
2054	sa6_dst.sin6_family = AF_INET6;
2055	sa6_dst.sin6_len = sizeof(sa6_dst);
2056	sa6_dst.sin6_addr = t;
2057	in6_recoverscope(&sa6_dst, &t, m->m_pkthdr.rcvif);
2058	in6_embedscope(&t, &sa6_dst, NULL, NULL);
2059
2060#ifdef COMPAT_RFC1885
2061	/*
2062	 * xxx guess MTU
2063	 * RFC 1885 requires that echo reply should be truncated if it
2064	 * does not fit in with (return) path MTU, but the description was
2065	 * removed in the new spec.
2066	 */
2067	if (icmp6_reflect_rt.ro_rt == 0 ||
2068	    ! (IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr, &ip6->ip6_dst))) {
2069		if (icmp6_reflect_rt.ro_rt) {
2070			RTFREE(icmp6_reflect_rt.ro_rt);
2071			icmp6_reflect_rt.ro_rt = 0;
2072		}
2073		bzero(sin6, sizeof(*sin6));
2074		sin6->sin6_family = PF_INET6;
2075		sin6->sin6_len = sizeof(struct sockaddr_in6);
2076		sin6->sin6_addr = ip6->ip6_dst;
2077
2078		rtalloc((struct route *)&icmp6_reflect_rt.ro_rt);
2079	}
2080
2081	if (icmp6_reflect_rt.ro_rt == 0)
2082		goto bad;
2083
2084	if ((icmp6_reflect_rt.ro_rt->rt_flags & RTF_HOST)
2085	    && mtu < icmp6_reflect_rt.ro_rt->rt_ifp->if_mtu)
2086		mtu = icmp6_reflect_rt.ro_rt->rt_rmx.rmx_mtu;
2087
2088	if (mtu < m->m_pkthdr.len) {
2089		plen -= (m->m_pkthdr.len - mtu);
2090		m_adj(m, mtu - m->m_pkthdr.len);
2091	}
2092#endif
2093	/*
2094	 * If the incoming packet was addressed directly to us(i.e. unicast),
2095	 * use dst as the src for the reply.
2096	 * The IN6_IFF_NOTREADY case would be VERY rare, but is possible
2097	 * (for example) when we encounter an error while forwarding procedure
2098	 * destined to a duplicated address of ours.
2099	 */
2100	for (ia = in6_ifaddr; ia; ia = ia->ia_next)
2101		if (IN6_ARE_ADDR_EQUAL(&t, &ia->ia_addr.sin6_addr) &&
2102		    (ia->ia6_flags & (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY)) == 0) {
2103			src = &t;
2104			break;
2105		}
2106	if (ia == NULL && IN6_IS_ADDR_LINKLOCAL(&t) && (m->m_flags & M_LOOP)) {
2107		/*
2108		 * This is the case if the dst is our link-local address
2109		 * and the sender is also ourseleves.
2110		 */
2111		src = &t;
2112	}
2113
2114	if (src == 0) {
2115		int e;
2116		struct route_in6 ro;
2117
2118		/*
2119		 * This case matches to multicasts, our anycast, or unicasts
2120		 * that we do not own. Select a source address based on the
2121		 * source address of the erroneous packet.
2122		 */
2123		bzero(&ro, sizeof(ro));
2124		src = in6_selectsrc(&sa6_src, NULL, NULL, &ro, NULL, &e);
2125		if (ro.ro_rt)
2126			RTFREE(ro.ro_rt); /* XXX: we could use this */
2127		if (src == NULL) {
2128			nd6log((LOG_DEBUG,
2129			    "icmp6_reflect: source can't be determined: "
2130			    "dst=%s, error=%d\n",
2131			    ip6_sprintf(&sa6_src.sin6_addr), e));
2132			goto bad;
2133		}
2134	}
2135
2136	ip6->ip6_src = *src;
2137
2138	ip6->ip6_flow = 0;
2139	ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
2140	ip6->ip6_vfc |= IPV6_VERSION;
2141	ip6->ip6_nxt = IPPROTO_ICMPV6;
2142	if (m->m_pkthdr.rcvif) {
2143		/* XXX: This may not be the outgoing interface */
2144		ip6->ip6_hlim = nd_ifinfo[m->m_pkthdr.rcvif->if_index].chlim;
2145	}
2146
2147	icmp6->icmp6_cksum = 0;
2148	icmp6->icmp6_cksum = in6_cksum(m, IPPROTO_ICMPV6,
2149					sizeof(struct ip6_hdr), plen);
2150
2151	/*
2152	 * xxx option handling
2153	 */
2154
2155	m->m_flags &= ~(M_BCAST|M_MCAST);
2156#ifdef IPSEC
2157	/* Don't lookup socket */
2158	(void)ipsec_setsocket(m, NULL);
2159#endif /*IPSEC*/
2160
2161#ifdef COMPAT_RFC1885
2162	ip6_output(m, NULL, &icmp6_reflect_rt, 0, NULL, &outif);
2163#else
2164	ip6_output(m, NULL, NULL, 0, NULL, &outif);
2165#endif
2166	if (outif)
2167		icmp6_ifoutstat_inc(outif, type, code);
2168
2169	return;
2170
2171 bad:
2172	m_freem(m);
2173	return;
2174}
2175
2176void
2177icmp6_fasttimo()
2178{
2179
2180	mld6_fasttimeo();
2181}
2182
2183static const char *
2184icmp6_redirect_diag(src6, dst6, tgt6)
2185	struct in6_addr *src6;
2186	struct in6_addr *dst6;
2187	struct in6_addr *tgt6;
2188{
2189	static char buf[1024];
2190	snprintf(buf, sizeof(buf), "(src=%s dst=%s tgt=%s)",
2191		ip6_sprintf(src6), ip6_sprintf(dst6), ip6_sprintf(tgt6));
2192	return buf;
2193}
2194
2195void
2196icmp6_redirect_input(m, off)
2197	struct mbuf *m;
2198	int off;
2199{
2200	struct ifnet *ifp = m->m_pkthdr.rcvif;
2201	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
2202	struct nd_redirect *nd_rd;
2203	int icmp6len = ntohs(ip6->ip6_plen);
2204	char *lladdr = NULL;
2205	int lladdrlen = 0;
2206	u_char *redirhdr = NULL;
2207	int redirhdrlen = 0;
2208	struct rtentry *rt = NULL;
2209	int is_router;
2210	int is_onlink;
2211	struct in6_addr src6 = ip6->ip6_src;
2212	struct in6_addr redtgt6;
2213	struct in6_addr reddst6;
2214	union nd_opts ndopts;
2215
2216	if (!m || !ifp)
2217		return;
2218
2219	/* XXX if we are router, we don't update route by icmp6 redirect */
2220	if (ip6_forwarding)
2221		goto freeit;
2222	if (!icmp6_rediraccept)
2223		goto freeit;
2224
2225#ifndef PULLDOWN_TEST
2226	IP6_EXTHDR_CHECK(m, off, icmp6len,);
2227	nd_rd = (struct nd_redirect *)((caddr_t)ip6 + off);
2228#else
2229	IP6_EXTHDR_GET(nd_rd, struct nd_redirect *, m, off, icmp6len);
2230	if (nd_rd == NULL) {
2231		icmp6stat.icp6s_tooshort++;
2232		return;
2233	}
2234#endif
2235	redtgt6 = nd_rd->nd_rd_target;
2236	reddst6 = nd_rd->nd_rd_dst;
2237
2238	if (IN6_IS_ADDR_LINKLOCAL(&redtgt6))
2239		redtgt6.s6_addr16[1] = htons(ifp->if_index);
2240	if (IN6_IS_ADDR_LINKLOCAL(&reddst6))
2241		reddst6.s6_addr16[1] = htons(ifp->if_index);
2242
2243	/* validation */
2244	if (!IN6_IS_ADDR_LINKLOCAL(&src6)) {
2245		nd6log((LOG_ERR,
2246			"ICMP6 redirect sent from %s rejected; "
2247			"must be from linklocal\n", ip6_sprintf(&src6)));
2248		goto bad;
2249	}
2250	if (ip6->ip6_hlim != 255) {
2251		nd6log((LOG_ERR,
2252			"ICMP6 redirect sent from %s rejected; "
2253			"hlim=%d (must be 255)\n",
2254			ip6_sprintf(&src6), ip6->ip6_hlim));
2255		goto bad;
2256	}
2257    {
2258	/* ip6->ip6_src must be equal to gw for icmp6->icmp6_reddst */
2259	struct sockaddr_in6 sin6;
2260	struct in6_addr *gw6;
2261
2262	bzero(&sin6, sizeof(sin6));
2263	sin6.sin6_family = AF_INET6;
2264	sin6.sin6_len = sizeof(struct sockaddr_in6);
2265	bcopy(&reddst6, &sin6.sin6_addr, sizeof(reddst6));
2266	rt = rtalloc1((struct sockaddr *)&sin6, 0);
2267	if (rt) {
2268		if (rt->rt_gateway == NULL ||
2269		    rt->rt_gateway->sa_family != AF_INET6) {
2270			nd6log((LOG_ERR,
2271			    "ICMP6 redirect rejected; no route "
2272			    "with inet6 gateway found for redirect dst: %s\n",
2273			    icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
2274			RTFREE(rt);
2275			goto bad;
2276		}
2277
2278		gw6 = &(((struct sockaddr_in6 *)rt->rt_gateway)->sin6_addr);
2279		if (bcmp(&src6, gw6, sizeof(struct in6_addr)) != 0) {
2280			nd6log((LOG_ERR,
2281				"ICMP6 redirect rejected; "
2282				"not equal to gw-for-src=%s (must be same): "
2283				"%s\n",
2284				ip6_sprintf(gw6),
2285				icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
2286			RTFREE(rt);
2287			goto bad;
2288		}
2289	} else {
2290		nd6log((LOG_ERR,
2291			"ICMP6 redirect rejected; "
2292			"no route found for redirect dst: %s\n",
2293			icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
2294		goto bad;
2295	}
2296	RTFREE(rt);
2297	rt = NULL;
2298    }
2299	if (IN6_IS_ADDR_MULTICAST(&reddst6)) {
2300		nd6log((LOG_ERR,
2301			"ICMP6 redirect rejected; "
2302			"redirect dst must be unicast: %s\n",
2303			icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
2304		goto bad;
2305	}
2306
2307	is_router = is_onlink = 0;
2308	if (IN6_IS_ADDR_LINKLOCAL(&redtgt6))
2309		is_router = 1;	/* router case */
2310	if (bcmp(&redtgt6, &reddst6, sizeof(redtgt6)) == 0)
2311		is_onlink = 1;	/* on-link destination case */
2312	if (!is_router && !is_onlink) {
2313		nd6log((LOG_ERR,
2314			"ICMP6 redirect rejected; "
2315			"neither router case nor onlink case: %s\n",
2316			icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
2317		goto bad;
2318	}
2319	/* validation passed */
2320
2321	icmp6len -= sizeof(*nd_rd);
2322	nd6_option_init(nd_rd + 1, icmp6len, &ndopts);
2323	if (nd6_options(&ndopts) < 0) {
2324		nd6log((LOG_INFO, "icmp6_redirect_input: "
2325			"invalid ND option, rejected: %s\n",
2326			icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
2327		/* nd6_options have incremented stats */
2328		goto freeit;
2329	}
2330
2331	if (ndopts.nd_opts_tgt_lladdr) {
2332		lladdr = (char *)(ndopts.nd_opts_tgt_lladdr + 1);
2333		lladdrlen = ndopts.nd_opts_tgt_lladdr->nd_opt_len << 3;
2334	}
2335
2336	if (ndopts.nd_opts_rh) {
2337		redirhdrlen = ndopts.nd_opts_rh->nd_opt_rh_len;
2338		redirhdr = (u_char *)(ndopts.nd_opts_rh + 1); /* xxx */
2339	}
2340
2341	if (lladdr && ((ifp->if_addrlen + 2 + 7) & ~7) != lladdrlen) {
2342		nd6log((LOG_INFO,
2343			"icmp6_redirect_input: lladdrlen mismatch for %s "
2344			"(if %d, icmp6 packet %d): %s\n",
2345			ip6_sprintf(&redtgt6), ifp->if_addrlen, lladdrlen - 2,
2346			icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
2347		goto bad;
2348	}
2349
2350	/* RFC 2461 8.3 */
2351	nd6_cache_lladdr(ifp, &redtgt6, lladdr, lladdrlen, ND_REDIRECT,
2352			 is_onlink ? ND_REDIRECT_ONLINK : ND_REDIRECT_ROUTER);
2353
2354	if (!is_onlink) {	/* better router case. perform rtredirect. */
2355		/* perform rtredirect */
2356		struct sockaddr_in6 sdst;
2357		struct sockaddr_in6 sgw;
2358		struct sockaddr_in6 ssrc;
2359		unsigned long rtcount;
2360		struct rtentry *newrt = NULL;
2361
2362		/*
2363		 * do not install redirect route, if the number of entries
2364		 * is too much (> hiwat).  note that, the node (= host) will
2365		 * work just fine even if we do not install redirect route
2366		 * (there will be additional hops, though).
2367		 */
2368		rtcount = rt_timer_count(icmp6_redirect_timeout_q);
2369		if (0 <= icmp6_redirect_hiwat && rtcount > icmp6_redirect_hiwat)
2370			return;
2371		else if (0 <= icmp6_redirect_lowat &&
2372		    rtcount > icmp6_redirect_lowat) {
2373			/*
2374			 * XXX nuke a victim, install the new one.
2375			 */
2376		}
2377
2378		bzero(&sdst, sizeof(sdst));
2379		bzero(&sgw, sizeof(sgw));
2380		bzero(&ssrc, sizeof(ssrc));
2381		sdst.sin6_family = sgw.sin6_family = ssrc.sin6_family = AF_INET6;
2382		sdst.sin6_len = sgw.sin6_len = ssrc.sin6_len =
2383			sizeof(struct sockaddr_in6);
2384		bcopy(&redtgt6, &sgw.sin6_addr, sizeof(struct in6_addr));
2385		bcopy(&reddst6, &sdst.sin6_addr, sizeof(struct in6_addr));
2386		bcopy(&src6, &ssrc.sin6_addr, sizeof(struct in6_addr));
2387		rtredirect((struct sockaddr *)&sdst, (struct sockaddr *)&sgw,
2388			   (struct sockaddr *)NULL, RTF_GATEWAY | RTF_HOST,
2389			   (struct sockaddr *)&ssrc,
2390			   &newrt);
2391
2392		if (newrt) {
2393			(void)rt_timer_add(newrt, icmp6_redirect_timeout,
2394			    icmp6_redirect_timeout_q);
2395			rtfree(newrt);
2396		}
2397	}
2398	/* finally update cached route in each socket via pfctlinput */
2399    {
2400	struct sockaddr_in6 sdst;
2401
2402	bzero(&sdst, sizeof(sdst));
2403	sdst.sin6_family = AF_INET6;
2404	sdst.sin6_len = sizeof(struct sockaddr_in6);
2405	bcopy(&reddst6, &sdst.sin6_addr, sizeof(struct in6_addr));
2406	pfctlinput(PRC_REDIRECT_HOST, (struct sockaddr *)&sdst);
2407#ifdef IPSEC
2408	key_sa_routechange((struct sockaddr *)&sdst);
2409#endif
2410    }
2411
2412 freeit:
2413	m_freem(m);
2414	return;
2415
2416 bad:
2417	icmp6stat.icp6s_badredirect++;
2418	m_freem(m);
2419}
2420
2421void
2422icmp6_redirect_output(m0, rt)
2423	struct mbuf *m0;
2424	struct rtentry *rt;
2425{
2426	struct ifnet *ifp;	/* my outgoing interface */
2427	struct in6_addr *ifp_ll6;
2428	struct in6_addr *router_ll6;
2429	struct ip6_hdr *sip6;	/* m0 as struct ip6_hdr */
2430	struct mbuf *m = NULL;	/* newly allocated one */
2431	struct ip6_hdr *ip6;	/* m as struct ip6_hdr */
2432	struct nd_redirect *nd_rd;
2433	size_t maxlen;
2434	u_char *p;
2435	struct ifnet *outif = NULL;
2436	struct sockaddr_in6 src_sa;
2437
2438	icmp6_errcount(&icmp6stat.icp6s_outerrhist, ND_REDIRECT, 0);
2439
2440	/* if we are not router, we don't send icmp6 redirect */
2441	if (!ip6_forwarding || ip6_accept_rtadv)
2442		goto fail;
2443
2444	/* sanity check */
2445	if (!m0 || !rt || !(rt->rt_flags & RTF_UP) || !(ifp = rt->rt_ifp))
2446		goto fail;
2447
2448	/*
2449	 * Address check:
2450	 *  the source address must identify a neighbor, and
2451	 *  the destination address must not be a multicast address
2452	 *  [RFC 2461, sec 8.2]
2453	 */
2454	sip6 = mtod(m0, struct ip6_hdr *);
2455	bzero(&src_sa, sizeof(src_sa));
2456	src_sa.sin6_family = AF_INET6;
2457	src_sa.sin6_len = sizeof(src_sa);
2458	src_sa.sin6_addr = sip6->ip6_src;
2459	/* we don't currently use sin6_scope_id, but eventually use it */
2460	src_sa.sin6_scope_id = in6_addr2scopeid(ifp, &sip6->ip6_src);
2461	if (nd6_is_addr_neighbor(&src_sa, ifp) == 0)
2462		goto fail;
2463	if (IN6_IS_ADDR_MULTICAST(&sip6->ip6_dst))
2464		goto fail;	/* what should we do here? */
2465
2466	/* rate limit */
2467	if (icmp6_ratelimit(&sip6->ip6_src, ND_REDIRECT, 0))
2468		goto fail;
2469
2470	/*
2471	 * Since we are going to append up to 1280 bytes (= IPV6_MMTU),
2472	 * we almost always ask for an mbuf cluster for simplicity.
2473	 * (MHLEN < IPV6_MMTU is almost always true)
2474	 */
2475#if IPV6_MMTU >= MCLBYTES
2476# error assumption failed about IPV6_MMTU and MCLBYTES
2477#endif
2478	MGETHDR(m, M_DONTWAIT, MT_HEADER);
2479	if (m && IPV6_MMTU >= MHLEN)
2480		MCLGET(m, M_DONTWAIT);
2481	if (!m)
2482		goto fail;
2483	m->m_len = 0;
2484	maxlen = M_TRAILINGSPACE(m);
2485	maxlen = min(IPV6_MMTU, maxlen);
2486	/* just for safety */
2487	if (maxlen < sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr) +
2488	    ((sizeof(struct nd_opt_hdr) + ifp->if_addrlen + 7) & ~7)) {
2489		goto fail;
2490	}
2491
2492	{
2493		/* get ip6 linklocal address for ifp(my outgoing interface). */
2494		struct in6_ifaddr *ia;
2495		if ((ia = in6ifa_ifpforlinklocal(ifp,
2496						 IN6_IFF_NOTREADY|
2497						 IN6_IFF_ANYCAST)) == NULL)
2498			goto fail;
2499		ifp_ll6 = &ia->ia_addr.sin6_addr;
2500	}
2501
2502	/* get ip6 linklocal address for the router. */
2503	if (rt->rt_gateway && (rt->rt_flags & RTF_GATEWAY)) {
2504		struct sockaddr_in6 *sin6;
2505		sin6 = (struct sockaddr_in6 *)rt->rt_gateway;
2506		router_ll6 = &sin6->sin6_addr;
2507		if (!IN6_IS_ADDR_LINKLOCAL(router_ll6))
2508			router_ll6 = (struct in6_addr *)NULL;
2509	} else
2510		router_ll6 = (struct in6_addr *)NULL;
2511
2512	/* ip6 */
2513	ip6 = mtod(m, struct ip6_hdr *);
2514	ip6->ip6_flow = 0;
2515	ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
2516	ip6->ip6_vfc |= IPV6_VERSION;
2517	/* ip6->ip6_plen will be set later */
2518	ip6->ip6_nxt = IPPROTO_ICMPV6;
2519	ip6->ip6_hlim = 255;
2520	/* ip6->ip6_src must be linklocal addr for my outgoing if. */
2521	bcopy(ifp_ll6, &ip6->ip6_src, sizeof(struct in6_addr));
2522	bcopy(&sip6->ip6_src, &ip6->ip6_dst, sizeof(struct in6_addr));
2523
2524	/* ND Redirect */
2525	nd_rd = (struct nd_redirect *)(ip6 + 1);
2526	nd_rd->nd_rd_type = ND_REDIRECT;
2527	nd_rd->nd_rd_code = 0;
2528	nd_rd->nd_rd_reserved = 0;
2529	if (rt->rt_flags & RTF_GATEWAY) {
2530		/*
2531		 * nd_rd->nd_rd_target must be a link-local address in
2532		 * better router cases.
2533		 */
2534		if (!router_ll6)
2535			goto fail;
2536		bcopy(router_ll6, &nd_rd->nd_rd_target,
2537		      sizeof(nd_rd->nd_rd_target));
2538		bcopy(&sip6->ip6_dst, &nd_rd->nd_rd_dst,
2539		      sizeof(nd_rd->nd_rd_dst));
2540	} else {
2541		/* make sure redtgt == reddst */
2542		bcopy(&sip6->ip6_dst, &nd_rd->nd_rd_target,
2543		      sizeof(nd_rd->nd_rd_target));
2544		bcopy(&sip6->ip6_dst, &nd_rd->nd_rd_dst,
2545		      sizeof(nd_rd->nd_rd_dst));
2546	}
2547
2548	p = (u_char *)(nd_rd + 1);
2549
2550	if (!router_ll6)
2551		goto nolladdropt;
2552
2553    {
2554	/* target lladdr option */
2555	struct rtentry *rt_router = NULL;
2556	int len;
2557	struct sockaddr_dl *sdl;
2558	struct nd_opt_hdr *nd_opt;
2559	char *lladdr;
2560
2561	rt_router = nd6_lookup(router_ll6, 0, ifp);
2562	if (!rt_router)
2563		goto nolladdropt;
2564	len = sizeof(*nd_opt) + ifp->if_addrlen;
2565	len = (len + 7) & ~7;	/*round by 8*/
2566	/* safety check */
2567	if (len + (p - (u_char *)ip6) > maxlen)
2568		goto nolladdropt;
2569	if (!(rt_router->rt_flags & RTF_GATEWAY) &&
2570	    (rt_router->rt_flags & RTF_LLINFO) &&
2571	    (rt_router->rt_gateway->sa_family == AF_LINK) &&
2572	    (sdl = (struct sockaddr_dl *)rt_router->rt_gateway) &&
2573	    sdl->sdl_alen) {
2574		nd_opt = (struct nd_opt_hdr *)p;
2575		nd_opt->nd_opt_type = ND_OPT_TARGET_LINKADDR;
2576		nd_opt->nd_opt_len = len >> 3;
2577		lladdr = (char *)(nd_opt + 1);
2578		bcopy(LLADDR(sdl), lladdr, ifp->if_addrlen);
2579		p += len;
2580	}
2581    }
2582nolladdropt:;
2583
2584	m->m_pkthdr.len = m->m_len = p - (u_char *)ip6;
2585
2586	/* just to be safe */
2587	if (m0->m_flags & M_DECRYPTED)
2588		goto noredhdropt;
2589	if (p - (u_char *)ip6 > maxlen)
2590		goto noredhdropt;
2591
2592    {
2593	/* redirected header option */
2594	int len;
2595	struct nd_opt_rd_hdr *nd_opt_rh;
2596
2597	/*
2598	 * compute the maximum size for icmp6 redirect header option.
2599	 * XXX room for auth header?
2600	 */
2601	len = maxlen - (p - (u_char *)ip6);
2602	len &= ~7;
2603
2604	/* This is just for simplicity. */
2605	if (m0->m_pkthdr.len != m0->m_len) {
2606		if (m0->m_next) {
2607			m_freem(m0->m_next);
2608			m0->m_next = NULL;
2609		}
2610		m0->m_pkthdr.len = m0->m_len;
2611	}
2612
2613	/*
2614	 * Redirected header option spec (RFC2461 4.6.3) talks nothing
2615	 * about padding/truncate rule for the original IP packet.
2616	 * From the discussion on IPv6imp in Feb 1999, the consensus was:
2617	 * - "attach as much as possible" is the goal
2618	 * - pad if not aligned (original size can be guessed by original
2619	 *   ip6 header)
2620	 * Following code adds the padding if it is simple enough,
2621	 * and truncates if not.
2622	 */
2623	if (m0->m_next || m0->m_pkthdr.len != m0->m_len)
2624		panic("assumption failed in %s:%d\n", __FILE__, __LINE__);
2625
2626	if (len - sizeof(*nd_opt_rh) < m0->m_pkthdr.len) {
2627		/* not enough room, truncate */
2628		m0->m_pkthdr.len = m0->m_len = len - sizeof(*nd_opt_rh);
2629	} else {
2630		/* enough room, pad or truncate */
2631		size_t extra;
2632
2633		extra = m0->m_pkthdr.len % 8;
2634		if (extra) {
2635			/* pad if easy enough, truncate if not */
2636			if (8 - extra <= M_TRAILINGSPACE(m0)) {
2637				/* pad */
2638				m0->m_len += (8 - extra);
2639				m0->m_pkthdr.len += (8 - extra);
2640			} else {
2641				/* truncate */
2642				m0->m_pkthdr.len -= extra;
2643				m0->m_len -= extra;
2644			}
2645		}
2646		len = m0->m_pkthdr.len + sizeof(*nd_opt_rh);
2647		m0->m_pkthdr.len = m0->m_len = len - sizeof(*nd_opt_rh);
2648	}
2649
2650	nd_opt_rh = (struct nd_opt_rd_hdr *)p;
2651	bzero(nd_opt_rh, sizeof(*nd_opt_rh));
2652	nd_opt_rh->nd_opt_rh_type = ND_OPT_REDIRECTED_HEADER;
2653	nd_opt_rh->nd_opt_rh_len = len >> 3;
2654	p += sizeof(*nd_opt_rh);
2655	m->m_pkthdr.len = m->m_len = p - (u_char *)ip6;
2656
2657	/* connect m0 to m */
2658	m->m_next = m0;
2659	m->m_pkthdr.len = m->m_len + m0->m_len;
2660    }
2661noredhdropt:;
2662
2663	if (IN6_IS_ADDR_LINKLOCAL(&sip6->ip6_src))
2664		sip6->ip6_src.s6_addr16[1] = 0;
2665	if (IN6_IS_ADDR_LINKLOCAL(&sip6->ip6_dst))
2666		sip6->ip6_dst.s6_addr16[1] = 0;
2667#if 0
2668	if (IN6_IS_ADDR_LINKLOCAL(&ip6->ip6_src))
2669		ip6->ip6_src.s6_addr16[1] = 0;
2670	if (IN6_IS_ADDR_LINKLOCAL(&ip6->ip6_dst))
2671		ip6->ip6_dst.s6_addr16[1] = 0;
2672#endif
2673	if (IN6_IS_ADDR_LINKLOCAL(&nd_rd->nd_rd_target))
2674		nd_rd->nd_rd_target.s6_addr16[1] = 0;
2675	if (IN6_IS_ADDR_LINKLOCAL(&nd_rd->nd_rd_dst))
2676		nd_rd->nd_rd_dst.s6_addr16[1] = 0;
2677
2678	ip6->ip6_plen = htons(m->m_pkthdr.len - sizeof(struct ip6_hdr));
2679
2680	nd_rd->nd_rd_cksum = 0;
2681	nd_rd->nd_rd_cksum
2682		= in6_cksum(m, IPPROTO_ICMPV6, sizeof(*ip6), ntohs(ip6->ip6_plen));
2683
2684	/* send the packet to outside... */
2685#ifdef IPSEC
2686	/* Don't lookup socket */
2687	(void)ipsec_setsocket(m, NULL);
2688#endif /*IPSEC*/
2689	ip6_output(m, NULL, NULL, 0, NULL, &outif);
2690	if (outif) {
2691		icmp6_ifstat_inc(outif, ifs6_out_msg);
2692		icmp6_ifstat_inc(outif, ifs6_out_redirect);
2693	}
2694	icmp6stat.icp6s_outhist[ND_REDIRECT]++;
2695
2696	return;
2697
2698fail:
2699	if (m)
2700		m_freem(m);
2701	if (m0)
2702		m_freem(m0);
2703}
2704
2705/*
2706 * ICMPv6 socket option processing.
2707 */
2708int
2709icmp6_ctloutput(op, so, level, optname, mp)
2710	int op;
2711	struct socket *so;
2712	int level, optname;
2713	struct mbuf **mp;
2714{
2715	int error = 0;
2716	int optlen;
2717	struct in6pcb *in6p = sotoin6pcb(so);
2718	struct mbuf *m = *mp;
2719
2720	optlen = m ? m->m_len : 0;
2721
2722	if (level != IPPROTO_ICMPV6) {
2723		if (op == PRCO_SETOPT && m)
2724			(void)m_free(m);
2725		return EINVAL;
2726	}
2727
2728	switch (op) {
2729	case PRCO_SETOPT:
2730		switch (optname) {
2731		case ICMP6_FILTER:
2732		    {
2733			struct icmp6_filter *p;
2734
2735			if (optlen != sizeof(*p)) {
2736				error = EMSGSIZE;
2737				break;
2738			}
2739			p = mtod(m, struct icmp6_filter *);
2740			if (!p || !in6p->in6p_icmp6filt) {
2741				error = EINVAL;
2742				break;
2743			}
2744			bcopy(p, in6p->in6p_icmp6filt,
2745				sizeof(struct icmp6_filter));
2746			error = 0;
2747			break;
2748		    }
2749
2750		default:
2751			error = ENOPROTOOPT;
2752			break;
2753		}
2754		if (m)
2755			(void)m_freem(m);
2756		break;
2757
2758	case PRCO_GETOPT:
2759		switch (optname) {
2760		case ICMP6_FILTER:
2761		    {
2762			struct icmp6_filter *p;
2763
2764			if (!in6p->in6p_icmp6filt) {
2765				error = EINVAL;
2766				break;
2767			}
2768			*mp = m = m_get(M_WAIT, MT_SOOPTS);
2769			m->m_len = sizeof(struct icmp6_filter);
2770			p = mtod(m, struct icmp6_filter *);
2771			bcopy(in6p->in6p_icmp6filt, p,
2772				sizeof(struct icmp6_filter));
2773			error = 0;
2774			break;
2775		    }
2776
2777		default:
2778			error = ENOPROTOOPT;
2779			break;
2780		}
2781		break;
2782	}
2783
2784	return(error);
2785}
2786
2787/*
2788 * Perform rate limit check.
2789 * Returns 0 if it is okay to send the icmp6 packet.
2790 * Returns 1 if the router SHOULD NOT send this icmp6 packet due to rate
2791 * limitation.
2792 *
2793 * XXX per-destination/type check necessary?
2794 */
2795static int
2796icmp6_ratelimit(dst, type, code)
2797	const struct in6_addr *dst;	/* not used at this moment */
2798	const int type;			/* not used at this moment */
2799	const int code;			/* not used at this moment */
2800{
2801	int ret;
2802
2803	ret = 0;	/*okay to send*/
2804
2805	/* PPS limit */
2806	if (!ppsratecheck(&icmp6errppslim_last, &icmp6errpps_count,
2807	    icmp6errppslim)) {
2808		/* The packet is subject to rate limit */
2809		ret++;
2810	}
2811
2812	return ret;
2813}
2814
2815static struct rtentry *
2816icmp6_mtudisc_clone(dst)
2817	struct sockaddr *dst;
2818{
2819	struct rtentry *rt;
2820	int    error;
2821
2822	rt = rtalloc1(dst, 1);
2823	if (rt == 0)
2824		return NULL;
2825
2826	/* If we didn't get a host route, allocate one */
2827	if ((rt->rt_flags & RTF_HOST) == 0) {
2828		struct rtentry *nrt;
2829
2830		error = rtrequest((int) RTM_ADD, dst,
2831		    (struct sockaddr *) rt->rt_gateway,
2832		    (struct sockaddr *) 0,
2833		    RTF_GATEWAY | RTF_HOST | RTF_DYNAMIC, &nrt);
2834		if (error) {
2835			rtfree(rt);
2836			return NULL;
2837		}
2838		nrt->rt_rmx = rt->rt_rmx;
2839		rtfree(rt);
2840		rt = nrt;
2841	}
2842	error = rt_timer_add(rt, icmp6_mtudisc_timeout,
2843			icmp6_mtudisc_timeout_q);
2844	if (error) {
2845		rtfree(rt);
2846		return NULL;
2847	}
2848
2849	return rt;	/* caller need to call rtfree() */
2850}
2851
2852static void
2853icmp6_mtudisc_timeout(rt, r)
2854	struct rtentry *rt;
2855	struct rttimer *r;
2856{
2857	if (rt == NULL)
2858		panic("icmp6_mtudisc_timeout: bad route to timeout");
2859	if ((rt->rt_flags & (RTF_DYNAMIC | RTF_HOST)) ==
2860	    (RTF_DYNAMIC | RTF_HOST)) {
2861		rtrequest((int) RTM_DELETE, (struct sockaddr *)rt_key(rt),
2862		    rt->rt_gateway, rt_mask(rt), rt->rt_flags, 0);
2863	} else {
2864		if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0) {
2865			rt->rt_rmx.rmx_mtu = 0;
2866		}
2867	}
2868}
2869
2870static void
2871icmp6_redirect_timeout(rt, r)
2872	struct rtentry *rt;
2873	struct rttimer *r;
2874{
2875	if (rt == NULL)
2876		panic("icmp6_redirect_timeout: bad route to timeout");
2877	if ((rt->rt_flags & (RTF_GATEWAY | RTF_DYNAMIC | RTF_HOST)) ==
2878	    (RTF_GATEWAY | RTF_DYNAMIC | RTF_HOST)) {
2879		rtrequest((int) RTM_DELETE, (struct sockaddr *)rt_key(rt),
2880		    rt->rt_gateway, rt_mask(rt), rt->rt_flags, 0);
2881	}
2882}
2883
2884#include <uvm/uvm_extern.h>
2885#include <sys/sysctl.h>
2886
2887int
2888icmp6_sysctl(name, namelen, oldp, oldlenp, newp, newlen)
2889	int *name;
2890	u_int namelen;
2891	void *oldp;
2892	size_t *oldlenp;
2893	void *newp;
2894	size_t newlen;
2895{
2896
2897	/* All sysctl names at this level are terminal. */
2898	if (namelen != 1)
2899		return ENOTDIR;
2900
2901	switch (name[0]) {
2902
2903	case ICMPV6CTL_REDIRACCEPT:
2904		return sysctl_int(oldp, oldlenp, newp, newlen,
2905				&icmp6_rediraccept);
2906	case ICMPV6CTL_REDIRTIMEOUT:
2907		return sysctl_int(oldp, oldlenp, newp, newlen,
2908				&icmp6_redirtimeout);
2909	case ICMPV6CTL_STATS:
2910		return sysctl_rdstruct(oldp, oldlenp, newp,
2911				&icmp6stat, sizeof(icmp6stat));
2912	case ICMPV6CTL_ND6_PRUNE:
2913		return sysctl_int(oldp, oldlenp, newp, newlen, &nd6_prune);
2914	case ICMPV6CTL_ND6_DELAY:
2915		return sysctl_int(oldp, oldlenp, newp, newlen, &nd6_delay);
2916	case ICMPV6CTL_ND6_UMAXTRIES:
2917		return sysctl_int(oldp, oldlenp, newp, newlen, &nd6_umaxtries);
2918	case ICMPV6CTL_ND6_MMAXTRIES:
2919		return sysctl_int(oldp, oldlenp, newp, newlen, &nd6_mmaxtries);
2920	case ICMPV6CTL_ND6_USELOOPBACK:
2921		return sysctl_int(oldp, oldlenp, newp, newlen,
2922				&nd6_useloopback);
2923	case ICMPV6CTL_NODEINFO:
2924		return sysctl_int(oldp, oldlenp, newp, newlen, &icmp6_nodeinfo);
2925	case ICMPV6CTL_ERRPPSLIMIT:
2926		return sysctl_int(oldp, oldlenp, newp, newlen, &icmp6errppslim);
2927	case ICMPV6CTL_ND6_MAXNUDHINT:
2928		return sysctl_int(oldp, oldlenp, newp, newlen,
2929				&nd6_maxnudhint);
2930	case ICMPV6CTL_MTUDISC_HIWAT:
2931		return sysctl_int(oldp, oldlenp, newp, newlen,
2932				&icmp6_mtudisc_hiwat);
2933	case ICMPV6CTL_MTUDISC_LOWAT:
2934		return sysctl_int(oldp, oldlenp, newp, newlen,
2935				&icmp6_mtudisc_lowat);
2936	case ICMPV6CTL_ND6_DEBUG:
2937		return sysctl_int(oldp, oldlenp, newp, newlen, &nd6_debug);
2938	default:
2939		return ENOPROTOOPT;
2940	}
2941	/* NOTREACHED */
2942}
2943