icmp6.c revision 185348
1/*-
2 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. Neither the name of the project nor the names of its contributors
14 *    may be used to endorse or promote products derived from this software
15 *    without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 *	$KAME: icmp6.c,v 1.211 2001/04/04 05:56:20 itojun Exp $
30 */
31
32/*-
33 * Copyright (c) 1982, 1986, 1988, 1993
34 *	The Regents of the University of California.  All rights reserved.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 *    notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 *    notice, this list of conditions and the following disclaimer in the
43 *    documentation and/or other materials provided with the distribution.
44 * 4. Neither the name of the University nor the names of its contributors
45 *    may be used to endorse or promote products derived from this software
46 *    without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 *	@(#)ip_icmp.c	8.2 (Berkeley) 1/4/94
61 */
62
63#include <sys/cdefs.h>
64__FBSDID("$FreeBSD: head/sys/netinet6/icmp6.c 185348 2008-11-26 22:32:07Z zec $");
65
66#include "opt_inet.h"
67#include "opt_inet6.h"
68#include "opt_ipsec.h"
69
70#include <sys/param.h>
71#include <sys/domain.h>
72#include <sys/kernel.h>
73#include <sys/lock.h>
74#include <sys/malloc.h>
75#include <sys/mbuf.h>
76#include <sys/protosw.h>
77#include <sys/signalvar.h>
78#include <sys/socket.h>
79#include <sys/socketvar.h>
80#include <sys/sx.h>
81#include <sys/syslog.h>
82#include <sys/systm.h>
83#include <sys/time.h>
84#include <sys/vimage.h>
85
86#include <net/if.h>
87#include <net/if_dl.h>
88#include <net/if_types.h>
89#include <net/route.h>
90
91#include <netinet/in.h>
92#include <netinet/in_pcb.h>
93#include <netinet/in_var.h>
94#include <netinet/ip6.h>
95#include <netinet/icmp6.h>
96#include <netinet/tcp_var.h>
97#include <netinet6/in6_ifattach.h>
98#include <netinet6/in6_pcb.h>
99#include <netinet6/ip6protosw.h>
100#include <netinet6/ip6_var.h>
101#include <netinet6/scope6_var.h>
102#include <netinet6/mld6_var.h>
103#include <netinet6/nd6.h>
104
105#ifdef IPSEC
106#include <netipsec/ipsec.h>
107#include <netipsec/key.h>
108#endif
109
110extern struct domain inet6domain;
111
112#ifdef VIMAGE_GLOBALS
113extern struct inpcbinfo ripcbinfo;
114extern struct inpcbhead ripcb;
115extern int icmp6errppslim;
116extern int icmp6_nodeinfo;
117
118struct icmp6stat icmp6stat;
119static int icmp6errpps_count;
120static struct timeval icmp6errppslim_last;
121#endif
122
123static void icmp6_errcount(struct icmp6errstat *, int, int);
124static int icmp6_rip6_input(struct mbuf **, int);
125static int icmp6_ratelimit(const struct in6_addr *, const int, const int);
126static const char *icmp6_redirect_diag __P((struct in6_addr *,
127	struct in6_addr *, struct in6_addr *));
128static struct mbuf *ni6_input(struct mbuf *, int);
129static struct mbuf *ni6_nametodns(const char *, int, int);
130static int ni6_dnsmatch(const char *, int, const char *, int);
131static int ni6_addrs __P((struct icmp6_nodeinfo *, struct mbuf *,
132			  struct ifnet **, struct in6_addr *));
133static int ni6_store_addrs __P((struct icmp6_nodeinfo *, struct icmp6_nodeinfo *,
134				struct ifnet *, int));
135static int icmp6_notify_error(struct mbuf **, int, int, int);
136
137
138void
139icmp6_init(void)
140{
141	INIT_VNET_INET6(curvnet);
142
143	V_icmp6errpps_count = 0;
144
145	mld6_init();
146}
147
148static void
149icmp6_errcount(struct icmp6errstat *stat, int type, int code)
150{
151	switch (type) {
152	case ICMP6_DST_UNREACH:
153		switch (code) {
154		case ICMP6_DST_UNREACH_NOROUTE:
155			stat->icp6errs_dst_unreach_noroute++;
156			return;
157		case ICMP6_DST_UNREACH_ADMIN:
158			stat->icp6errs_dst_unreach_admin++;
159			return;
160		case ICMP6_DST_UNREACH_BEYONDSCOPE:
161			stat->icp6errs_dst_unreach_beyondscope++;
162			return;
163		case ICMP6_DST_UNREACH_ADDR:
164			stat->icp6errs_dst_unreach_addr++;
165			return;
166		case ICMP6_DST_UNREACH_NOPORT:
167			stat->icp6errs_dst_unreach_noport++;
168			return;
169		}
170		break;
171	case ICMP6_PACKET_TOO_BIG:
172		stat->icp6errs_packet_too_big++;
173		return;
174	case ICMP6_TIME_EXCEEDED:
175		switch (code) {
176		case ICMP6_TIME_EXCEED_TRANSIT:
177			stat->icp6errs_time_exceed_transit++;
178			return;
179		case ICMP6_TIME_EXCEED_REASSEMBLY:
180			stat->icp6errs_time_exceed_reassembly++;
181			return;
182		}
183		break;
184	case ICMP6_PARAM_PROB:
185		switch (code) {
186		case ICMP6_PARAMPROB_HEADER:
187			stat->icp6errs_paramprob_header++;
188			return;
189		case ICMP6_PARAMPROB_NEXTHEADER:
190			stat->icp6errs_paramprob_nextheader++;
191			return;
192		case ICMP6_PARAMPROB_OPTION:
193			stat->icp6errs_paramprob_option++;
194			return;
195		}
196		break;
197	case ND_REDIRECT:
198		stat->icp6errs_redirect++;
199		return;
200	}
201	stat->icp6errs_unknown++;
202}
203
204/*
205 * A wrapper function for icmp6_error() necessary when the erroneous packet
206 * may not contain enough scope zone information.
207 */
208void
209icmp6_error2(struct mbuf *m, int type, int code, int param,
210    struct ifnet *ifp)
211{
212	INIT_VNET_INET6(curvnet);
213	struct ip6_hdr *ip6;
214
215	if (ifp == NULL)
216		return;
217
218#ifndef PULLDOWN_TEST
219	IP6_EXTHDR_CHECK(m, 0, sizeof(struct ip6_hdr), );
220#else
221	if (m->m_len < sizeof(struct ip6_hdr)) {
222		m = m_pullup(m, sizeof(struct ip6_hdr));
223		if (m == NULL)
224			return;
225	}
226#endif
227
228	ip6 = mtod(m, struct ip6_hdr *);
229
230	if (in6_setscope(&ip6->ip6_src, ifp, NULL) != 0)
231		return;
232	if (in6_setscope(&ip6->ip6_dst, ifp, NULL) != 0)
233		return;
234
235	icmp6_error(m, type, code, param);
236}
237
238/*
239 * Generate an error packet of type error in response to bad IP6 packet.
240 */
241void
242icmp6_error(struct mbuf *m, int type, int code, int param)
243{
244	INIT_VNET_INET6(curvnet);
245	struct ip6_hdr *oip6, *nip6;
246	struct icmp6_hdr *icmp6;
247	u_int preplen;
248	int off;
249	int nxt;
250
251	V_icmp6stat.icp6s_error++;
252
253	/* count per-type-code statistics */
254	icmp6_errcount(&V_icmp6stat.icp6s_outerrhist, type, code);
255
256#ifdef M_DECRYPTED	/*not openbsd*/
257	if (m->m_flags & M_DECRYPTED) {
258		V_icmp6stat.icp6s_canterror++;
259		goto freeit;
260	}
261#endif
262
263#ifndef PULLDOWN_TEST
264	IP6_EXTHDR_CHECK(m, 0, sizeof(struct ip6_hdr), );
265#else
266	if (m->m_len < sizeof(struct ip6_hdr)) {
267		m = m_pullup(m, sizeof(struct ip6_hdr));
268		if (m == NULL)
269			return;
270	}
271#endif
272	oip6 = mtod(m, struct ip6_hdr *);
273
274	/*
275	 * If the destination address of the erroneous packet is a multicast
276	 * address, or the packet was sent using link-layer multicast,
277	 * we should basically suppress sending an error (RFC 2463, Section
278	 * 2.4).
279	 * We have two exceptions (the item e.2 in that section):
280	 * - the Pakcet Too Big message can be sent for path MTU discovery.
281	 * - the Parameter Problem Message that can be allowed an icmp6 error
282	 *   in the option type field.  This check has been done in
283	 *   ip6_unknown_opt(), so we can just check the type and code.
284	 */
285	if ((m->m_flags & (M_BCAST|M_MCAST) ||
286	     IN6_IS_ADDR_MULTICAST(&oip6->ip6_dst)) &&
287	    (type != ICMP6_PACKET_TOO_BIG &&
288	     (type != ICMP6_PARAM_PROB ||
289	      code != ICMP6_PARAMPROB_OPTION)))
290		goto freeit;
291
292	/*
293	 * RFC 2463, 2.4 (e.5): source address check.
294	 * XXX: the case of anycast source?
295	 */
296	if (IN6_IS_ADDR_UNSPECIFIED(&oip6->ip6_src) ||
297	    IN6_IS_ADDR_MULTICAST(&oip6->ip6_src))
298		goto freeit;
299
300	/*
301	 * If we are about to send ICMPv6 against ICMPv6 error/redirect,
302	 * don't do it.
303	 */
304	nxt = -1;
305	off = ip6_lasthdr(m, 0, IPPROTO_IPV6, &nxt);
306	if (off >= 0 && nxt == IPPROTO_ICMPV6) {
307		struct icmp6_hdr *icp;
308
309#ifndef PULLDOWN_TEST
310		IP6_EXTHDR_CHECK(m, 0, off + sizeof(struct icmp6_hdr), );
311		icp = (struct icmp6_hdr *)(mtod(m, caddr_t) + off);
312#else
313		IP6_EXTHDR_GET(icp, struct icmp6_hdr *, m, off,
314			sizeof(*icp));
315		if (icp == NULL) {
316			V_icmp6stat.icp6s_tooshort++;
317			return;
318		}
319#endif
320		if (icp->icmp6_type < ICMP6_ECHO_REQUEST ||
321		    icp->icmp6_type == ND_REDIRECT) {
322			/*
323			 * ICMPv6 error
324			 * Special case: for redirect (which is
325			 * informational) we must not send icmp6 error.
326			 */
327			V_icmp6stat.icp6s_canterror++;
328			goto freeit;
329		} else {
330			/* ICMPv6 informational - send the error */
331		}
332	} else {
333		/* non-ICMPv6 - send the error */
334	}
335
336	oip6 = mtod(m, struct ip6_hdr *); /* adjust pointer */
337
338	/* Finally, do rate limitation check. */
339	if (icmp6_ratelimit(&oip6->ip6_src, type, code)) {
340		V_icmp6stat.icp6s_toofreq++;
341		goto freeit;
342	}
343
344	/*
345	 * OK, ICMP6 can be generated.
346	 */
347
348	if (m->m_pkthdr.len >= ICMPV6_PLD_MAXLEN)
349		m_adj(m, ICMPV6_PLD_MAXLEN - m->m_pkthdr.len);
350
351	preplen = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr);
352	M_PREPEND(m, preplen, M_DONTWAIT);
353	if (m && m->m_len < preplen)
354		m = m_pullup(m, preplen);
355	if (m == NULL) {
356		nd6log((LOG_DEBUG, "ENOBUFS in icmp6_error %d\n", __LINE__));
357		return;
358	}
359
360	nip6 = mtod(m, struct ip6_hdr *);
361	nip6->ip6_src  = oip6->ip6_src;
362	nip6->ip6_dst  = oip6->ip6_dst;
363
364	in6_clearscope(&oip6->ip6_src);
365	in6_clearscope(&oip6->ip6_dst);
366
367	icmp6 = (struct icmp6_hdr *)(nip6 + 1);
368	icmp6->icmp6_type = type;
369	icmp6->icmp6_code = code;
370	icmp6->icmp6_pptr = htonl((u_int32_t)param);
371
372	/*
373	 * icmp6_reflect() is designed to be in the input path.
374	 * icmp6_error() can be called from both input and output path,
375	 * and if we are in output path rcvif could contain bogus value.
376	 * clear m->m_pkthdr.rcvif for safety, we should have enough scope
377	 * information in ip header (nip6).
378	 */
379	m->m_pkthdr.rcvif = NULL;
380
381	V_icmp6stat.icp6s_outhist[type]++;
382	icmp6_reflect(m, sizeof(struct ip6_hdr)); /* header order: IPv6 - ICMPv6 */
383
384	return;
385
386  freeit:
387	/*
388	 * If we can't tell whether or not we can generate ICMP6, free it.
389	 */
390	m_freem(m);
391}
392
393/*
394 * Process a received ICMP6 message.
395 */
396int
397icmp6_input(struct mbuf **mp, int *offp, int proto)
398{
399	INIT_VNET_INET6(curvnet);
400	INIT_VPROCG(TD_TO_VPROCG(curthread)); /* XXX V_hostname needs this */
401	struct mbuf *m = *mp, *n;
402	struct ip6_hdr *ip6, *nip6;
403	struct icmp6_hdr *icmp6, *nicmp6;
404	int off = *offp;
405	int icmp6len = m->m_pkthdr.len - *offp;
406	int code, sum, noff;
407	char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
408
409#ifndef PULLDOWN_TEST
410	IP6_EXTHDR_CHECK(m, off, sizeof(struct icmp6_hdr), IPPROTO_DONE);
411	/* m might change if M_LOOP.  So, call mtod after this */
412#endif
413
414	/*
415	 * Locate icmp6 structure in mbuf, and check
416	 * that not corrupted and of at least minimum length
417	 */
418
419	ip6 = mtod(m, struct ip6_hdr *);
420	if (icmp6len < sizeof(struct icmp6_hdr)) {
421		V_icmp6stat.icp6s_tooshort++;
422		goto freeit;
423	}
424
425	/*
426	 * calculate the checksum
427	 */
428#ifndef PULLDOWN_TEST
429	icmp6 = (struct icmp6_hdr *)((caddr_t)ip6 + off);
430#else
431	IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off, sizeof(*icmp6));
432	if (icmp6 == NULL) {
433		V_icmp6stat.icp6s_tooshort++;
434		return IPPROTO_DONE;
435	}
436#endif
437	code = icmp6->icmp6_code;
438
439	if ((sum = in6_cksum(m, IPPROTO_ICMPV6, off, icmp6len)) != 0) {
440		nd6log((LOG_ERR,
441		    "ICMP6 checksum error(%d|%x) %s\n",
442		    icmp6->icmp6_type, sum,
443		    ip6_sprintf(ip6bufs, &ip6->ip6_src)));
444		V_icmp6stat.icp6s_checksum++;
445		goto freeit;
446	}
447
448	if (faithprefix_p != NULL && (*faithprefix_p)(&ip6->ip6_dst)) {
449		/*
450		 * Deliver very specific ICMP6 type only.
451		 * This is important to deliver TOOBIG.  Otherwise PMTUD
452		 * will not work.
453		 */
454		switch (icmp6->icmp6_type) {
455		case ICMP6_DST_UNREACH:
456		case ICMP6_PACKET_TOO_BIG:
457		case ICMP6_TIME_EXCEEDED:
458			break;
459		default:
460			goto freeit;
461		}
462	}
463
464	V_icmp6stat.icp6s_inhist[icmp6->icmp6_type]++;
465	icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_msg);
466	if (icmp6->icmp6_type < ICMP6_INFOMSG_MASK)
467		icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_error);
468
469	switch (icmp6->icmp6_type) {
470	case ICMP6_DST_UNREACH:
471		icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_dstunreach);
472		switch (code) {
473		case ICMP6_DST_UNREACH_NOROUTE:
474			code = PRC_UNREACH_NET;
475			break;
476		case ICMP6_DST_UNREACH_ADMIN:
477			icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_adminprohib);
478			code = PRC_UNREACH_PROTOCOL; /* is this a good code? */
479			break;
480		case ICMP6_DST_UNREACH_ADDR:
481			code = PRC_HOSTDEAD;
482			break;
483		case ICMP6_DST_UNREACH_BEYONDSCOPE:
484			/* I mean "source address was incorrect." */
485			code = PRC_PARAMPROB;
486			break;
487		case ICMP6_DST_UNREACH_NOPORT:
488			code = PRC_UNREACH_PORT;
489			break;
490		default:
491			goto badcode;
492		}
493		goto deliver;
494		break;
495
496	case ICMP6_PACKET_TOO_BIG:
497		icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_pkttoobig);
498
499		/* validation is made in icmp6_mtudisc_update */
500
501		code = PRC_MSGSIZE;
502
503		/*
504		 * Updating the path MTU will be done after examining
505		 * intermediate extension headers.
506		 */
507		goto deliver;
508		break;
509
510	case ICMP6_TIME_EXCEEDED:
511		icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_timeexceed);
512		switch (code) {
513		case ICMP6_TIME_EXCEED_TRANSIT:
514			code = PRC_TIMXCEED_INTRANS;
515			break;
516		case ICMP6_TIME_EXCEED_REASSEMBLY:
517			code = PRC_TIMXCEED_REASS;
518			break;
519		default:
520			goto badcode;
521		}
522		goto deliver;
523		break;
524
525	case ICMP6_PARAM_PROB:
526		icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_paramprob);
527		switch (code) {
528		case ICMP6_PARAMPROB_NEXTHEADER:
529			code = PRC_UNREACH_PROTOCOL;
530			break;
531		case ICMP6_PARAMPROB_HEADER:
532		case ICMP6_PARAMPROB_OPTION:
533			code = PRC_PARAMPROB;
534			break;
535		default:
536			goto badcode;
537		}
538		goto deliver;
539		break;
540
541	case ICMP6_ECHO_REQUEST:
542		icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_echo);
543		if (code != 0)
544			goto badcode;
545		if ((n = m_copy(m, 0, M_COPYALL)) == NULL) {
546			/* Give up remote */
547			break;
548		}
549		if ((n->m_flags & M_EXT) != 0
550		 || n->m_len < off + sizeof(struct icmp6_hdr)) {
551			struct mbuf *n0 = n;
552			const int maxlen = sizeof(*nip6) + sizeof(*nicmp6);
553			int n0len;
554
555			MGETHDR(n, M_DONTWAIT, n0->m_type);
556			n0len = n0->m_pkthdr.len;	/* save for use below */
557			if (n)
558				M_MOVE_PKTHDR(n, n0);
559			if (n && maxlen >= MHLEN) {
560				MCLGET(n, M_DONTWAIT);
561				if ((n->m_flags & M_EXT) == 0) {
562					m_free(n);
563					n = NULL;
564				}
565			}
566			if (n == NULL) {
567				/* Give up remote */
568				m_freem(n0);
569				break;
570			}
571			/*
572			 * Copy IPv6 and ICMPv6 only.
573			 */
574			nip6 = mtod(n, struct ip6_hdr *);
575			bcopy(ip6, nip6, sizeof(struct ip6_hdr));
576			nicmp6 = (struct icmp6_hdr *)(nip6 + 1);
577			bcopy(icmp6, nicmp6, sizeof(struct icmp6_hdr));
578			noff = sizeof(struct ip6_hdr);
579			/* new mbuf contains only ipv6+icmpv6 headers */
580			n->m_len = noff + sizeof(struct icmp6_hdr);
581			/*
582			 * Adjust mbuf.  ip6_plen will be adjusted in
583			 * ip6_output().
584			 */
585			m_adj(n0, off + sizeof(struct icmp6_hdr));
586			/* recalculate complete packet size */
587			n->m_pkthdr.len = n0len + (noff - off);
588			n->m_next = n0;
589		} else {
590			nip6 = mtod(n, struct ip6_hdr *);
591			IP6_EXTHDR_GET(nicmp6, struct icmp6_hdr *, n, off,
592			    sizeof(*nicmp6));
593			noff = off;
594		}
595		nicmp6->icmp6_type = ICMP6_ECHO_REPLY;
596		nicmp6->icmp6_code = 0;
597		if (n) {
598			V_icmp6stat.icp6s_reflect++;
599			V_icmp6stat.icp6s_outhist[ICMP6_ECHO_REPLY]++;
600			icmp6_reflect(n, noff);
601		}
602		break;
603
604	case ICMP6_ECHO_REPLY:
605		icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_echoreply);
606		if (code != 0)
607			goto badcode;
608		break;
609
610	case MLD_LISTENER_QUERY:
611	case MLD_LISTENER_REPORT:
612		if (icmp6len < sizeof(struct mld_hdr))
613			goto badlen;
614		if (icmp6->icmp6_type == MLD_LISTENER_QUERY) /* XXX: ugly... */
615			icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_mldquery);
616		else
617			icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_mldreport);
618		if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
619			/* give up local */
620			mld6_input(m, off);
621			m = NULL;
622			goto freeit;
623		}
624		mld6_input(n, off);
625		/* m stays. */
626		break;
627
628	case MLD_LISTENER_DONE:
629		icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_mlddone);
630		if (icmp6len < sizeof(struct mld_hdr))	/* necessary? */
631			goto badlen;
632		break;		/* nothing to be done in kernel */
633
634	case MLD_MTRACE_RESP:
635	case MLD_MTRACE:
636		/* XXX: these two are experimental.  not officially defined. */
637		/* XXX: per-interface statistics? */
638		break;		/* just pass it to applications */
639
640	case ICMP6_WRUREQUEST:	/* ICMP6_FQDN_QUERY */
641	    {
642		enum { WRU, FQDN } mode;
643
644		if (!V_icmp6_nodeinfo)
645			break;
646
647		if (icmp6len == sizeof(struct icmp6_hdr) + 4)
648			mode = WRU;
649		else if (icmp6len >= sizeof(struct icmp6_nodeinfo))
650			mode = FQDN;
651		else
652			goto badlen;
653
654#define hostnamelen	strlen(V_hostname)
655		if (mode == FQDN) {
656#ifndef PULLDOWN_TEST
657			IP6_EXTHDR_CHECK(m, off, sizeof(struct icmp6_nodeinfo),
658			    IPPROTO_DONE);
659#endif
660			n = m_copy(m, 0, M_COPYALL);
661			if (n)
662				n = ni6_input(n, off);
663			/* XXX meaningless if n == NULL */
664			noff = sizeof(struct ip6_hdr);
665		} else {
666			u_char *p;
667			int maxlen, maxhlen;
668
669			/*
670			 * XXX: this combination of flags is pointless,
671			 * but should we keep this for compatibility?
672			 */
673			if ((V_icmp6_nodeinfo & 5) != 5)
674				break;
675
676			if (code != 0)
677				goto badcode;
678			maxlen = sizeof(*nip6) + sizeof(*nicmp6) + 4;
679			if (maxlen >= MCLBYTES) {
680				/* Give up remote */
681				break;
682			}
683			MGETHDR(n, M_DONTWAIT, m->m_type);
684			if (n && maxlen > MHLEN) {
685				MCLGET(n, M_DONTWAIT);
686				if ((n->m_flags & M_EXT) == 0) {
687					m_free(n);
688					n = NULL;
689				}
690			}
691			if (n && !m_dup_pkthdr(n, m, M_DONTWAIT)) {
692				/*
693				 * Previous code did a blind M_COPY_PKTHDR
694				 * and said "just for rcvif".  If true, then
695				 * we could tolerate the dup failing (due to
696				 * the deep copy of the tag chain).  For now
697				 * be conservative and just fail.
698				 */
699				m_free(n);
700				n = NULL;
701			}
702			if (n == NULL) {
703				/* Give up remote */
704				break;
705			}
706			n->m_pkthdr.rcvif = NULL;
707			n->m_len = 0;
708			maxhlen = M_TRAILINGSPACE(n) - maxlen;
709			mtx_lock(&hostname_mtx);
710			if (maxhlen > hostnamelen)
711				maxhlen = hostnamelen;
712			/*
713			 * Copy IPv6 and ICMPv6 only.
714			 */
715			nip6 = mtod(n, struct ip6_hdr *);
716			bcopy(ip6, nip6, sizeof(struct ip6_hdr));
717			nicmp6 = (struct icmp6_hdr *)(nip6 + 1);
718			bcopy(icmp6, nicmp6, sizeof(struct icmp6_hdr));
719			p = (u_char *)(nicmp6 + 1);
720			bzero(p, 4);
721			bcopy(V_hostname, p + 4, maxhlen); /* meaningless TTL */
722			mtx_unlock(&hostname_mtx);
723			noff = sizeof(struct ip6_hdr);
724			n->m_pkthdr.len = n->m_len = sizeof(struct ip6_hdr) +
725				sizeof(struct icmp6_hdr) + 4 + maxhlen;
726			nicmp6->icmp6_type = ICMP6_WRUREPLY;
727			nicmp6->icmp6_code = 0;
728		}
729#undef hostnamelen
730		if (n) {
731			V_icmp6stat.icp6s_reflect++;
732			V_icmp6stat.icp6s_outhist[ICMP6_WRUREPLY]++;
733			icmp6_reflect(n, noff);
734		}
735		break;
736	    }
737
738	case ICMP6_WRUREPLY:
739		if (code != 0)
740			goto badcode;
741		break;
742
743	case ND_ROUTER_SOLICIT:
744		icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_routersolicit);
745		if (code != 0)
746			goto badcode;
747		if (icmp6len < sizeof(struct nd_router_solicit))
748			goto badlen;
749		if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
750			/* give up local */
751			nd6_rs_input(m, off, icmp6len);
752			m = NULL;
753			goto freeit;
754		}
755		nd6_rs_input(n, off, icmp6len);
756		/* m stays. */
757		break;
758
759	case ND_ROUTER_ADVERT:
760		icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_routeradvert);
761		if (code != 0)
762			goto badcode;
763		if (icmp6len < sizeof(struct nd_router_advert))
764			goto badlen;
765		if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
766			/* give up local */
767			nd6_ra_input(m, off, icmp6len);
768			m = NULL;
769			goto freeit;
770		}
771		nd6_ra_input(n, off, icmp6len);
772		/* m stays. */
773		break;
774
775	case ND_NEIGHBOR_SOLICIT:
776		icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_neighborsolicit);
777		if (code != 0)
778			goto badcode;
779		if (icmp6len < sizeof(struct nd_neighbor_solicit))
780			goto badlen;
781		if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
782			/* give up local */
783			nd6_ns_input(m, off, icmp6len);
784			m = NULL;
785			goto freeit;
786		}
787		nd6_ns_input(n, off, icmp6len);
788		/* m stays. */
789		break;
790
791	case ND_NEIGHBOR_ADVERT:
792		icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_neighboradvert);
793		if (code != 0)
794			goto badcode;
795		if (icmp6len < sizeof(struct nd_neighbor_advert))
796			goto badlen;
797		if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
798			/* give up local */
799			nd6_na_input(m, off, icmp6len);
800			m = NULL;
801			goto freeit;
802		}
803		nd6_na_input(n, off, icmp6len);
804		/* m stays. */
805		break;
806
807	case ND_REDIRECT:
808		icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_redirect);
809		if (code != 0)
810			goto badcode;
811		if (icmp6len < sizeof(struct nd_redirect))
812			goto badlen;
813		if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
814			/* give up local */
815			icmp6_redirect_input(m, off);
816			m = NULL;
817			goto freeit;
818		}
819		icmp6_redirect_input(n, off);
820		/* m stays. */
821		break;
822
823	case ICMP6_ROUTER_RENUMBERING:
824		if (code != ICMP6_ROUTER_RENUMBERING_COMMAND &&
825		    code != ICMP6_ROUTER_RENUMBERING_RESULT)
826			goto badcode;
827		if (icmp6len < sizeof(struct icmp6_router_renum))
828			goto badlen;
829		break;
830
831	default:
832		nd6log((LOG_DEBUG,
833		    "icmp6_input: unknown type %d(src=%s, dst=%s, ifid=%d)\n",
834		    icmp6->icmp6_type, ip6_sprintf(ip6bufs, &ip6->ip6_src),
835		    ip6_sprintf(ip6bufd, &ip6->ip6_dst),
836		    m->m_pkthdr.rcvif ? m->m_pkthdr.rcvif->if_index : 0));
837		if (icmp6->icmp6_type < ICMP6_ECHO_REQUEST) {
838			/* ICMPv6 error: MUST deliver it by spec... */
839			code = PRC_NCMDS;
840			/* deliver */
841		} else {
842			/* ICMPv6 informational: MUST not deliver */
843			break;
844		}
845	deliver:
846		if (icmp6_notify_error(&m, off, icmp6len, code)) {
847			/* In this case, m should've been freed. */
848			return (IPPROTO_DONE);
849		}
850		break;
851
852	badcode:
853		V_icmp6stat.icp6s_badcode++;
854		break;
855
856	badlen:
857		V_icmp6stat.icp6s_badlen++;
858		break;
859	}
860
861	/* deliver the packet to appropriate sockets */
862	icmp6_rip6_input(&m, *offp);
863
864	return IPPROTO_DONE;
865
866 freeit:
867	m_freem(m);
868	return IPPROTO_DONE;
869}
870
871static int
872icmp6_notify_error(struct mbuf **mp, int off, int icmp6len, int code)
873{
874	INIT_VNET_INET6(curvnet);
875	struct mbuf *m = *mp;
876	struct icmp6_hdr *icmp6;
877	struct ip6_hdr *eip6;
878	u_int32_t notifymtu;
879	struct sockaddr_in6 icmp6src, icmp6dst;
880
881	if (icmp6len < sizeof(struct icmp6_hdr) + sizeof(struct ip6_hdr)) {
882		V_icmp6stat.icp6s_tooshort++;
883		goto freeit;
884	}
885#ifndef PULLDOWN_TEST
886	IP6_EXTHDR_CHECK(m, off,
887	    sizeof(struct icmp6_hdr) + sizeof(struct ip6_hdr), -1);
888	icmp6 = (struct icmp6_hdr *)(mtod(m, caddr_t) + off);
889#else
890	IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off,
891	    sizeof(*icmp6) + sizeof(struct ip6_hdr));
892	if (icmp6 == NULL) {
893		V_icmp6stat.icp6s_tooshort++;
894		return (-1);
895	}
896#endif
897	eip6 = (struct ip6_hdr *)(icmp6 + 1);
898
899	/* Detect the upper level protocol */
900	{
901		void (*ctlfunc)(int, struct sockaddr *, void *);
902		u_int8_t nxt = eip6->ip6_nxt;
903		int eoff = off + sizeof(struct icmp6_hdr) +
904		    sizeof(struct ip6_hdr);
905		struct ip6ctlparam ip6cp;
906		struct in6_addr *finaldst = NULL;
907		int icmp6type = icmp6->icmp6_type;
908		struct ip6_frag *fh;
909		struct ip6_rthdr *rth;
910		struct ip6_rthdr0 *rth0;
911		int rthlen;
912
913		while (1) { /* XXX: should avoid infinite loop explicitly? */
914			struct ip6_ext *eh;
915
916			switch (nxt) {
917			case IPPROTO_HOPOPTS:
918			case IPPROTO_DSTOPTS:
919			case IPPROTO_AH:
920#ifndef PULLDOWN_TEST
921				IP6_EXTHDR_CHECK(m, 0,
922				    eoff + sizeof(struct ip6_ext), -1);
923				eh = (struct ip6_ext *)(mtod(m, caddr_t) + eoff);
924#else
925				IP6_EXTHDR_GET(eh, struct ip6_ext *, m,
926				    eoff, sizeof(*eh));
927				if (eh == NULL) {
928					V_icmp6stat.icp6s_tooshort++;
929					return (-1);
930				}
931#endif
932
933				if (nxt == IPPROTO_AH)
934					eoff += (eh->ip6e_len + 2) << 2;
935				else
936					eoff += (eh->ip6e_len + 1) << 3;
937				nxt = eh->ip6e_nxt;
938				break;
939			case IPPROTO_ROUTING:
940				/*
941				 * When the erroneous packet contains a
942				 * routing header, we should examine the
943				 * header to determine the final destination.
944				 * Otherwise, we can't properly update
945				 * information that depends on the final
946				 * destination (e.g. path MTU).
947				 */
948#ifndef PULLDOWN_TEST
949				IP6_EXTHDR_CHECK(m, 0, eoff + sizeof(*rth), -1);
950				rth = (struct ip6_rthdr *)
951				    (mtod(m, caddr_t) + eoff);
952#else
953				IP6_EXTHDR_GET(rth, struct ip6_rthdr *, m,
954				    eoff, sizeof(*rth));
955				if (rth == NULL) {
956					V_icmp6stat.icp6s_tooshort++;
957					return (-1);
958				}
959#endif
960				rthlen = (rth->ip6r_len + 1) << 3;
961				/*
962				 * XXX: currently there is no
963				 * officially defined type other
964				 * than type-0.
965				 * Note that if the segment left field
966				 * is 0, all intermediate hops must
967				 * have been passed.
968				 */
969				if (rth->ip6r_segleft &&
970				    rth->ip6r_type == IPV6_RTHDR_TYPE_0) {
971					int hops;
972
973#ifndef PULLDOWN_TEST
974					IP6_EXTHDR_CHECK(m, 0, eoff + rthlen, -1);
975					rth0 = (struct ip6_rthdr0 *)
976					    (mtod(m, caddr_t) + eoff);
977#else
978					IP6_EXTHDR_GET(rth0,
979					    struct ip6_rthdr0 *, m,
980					    eoff, rthlen);
981					if (rth0 == NULL) {
982						V_icmp6stat.icp6s_tooshort++;
983						return (-1);
984					}
985#endif
986					/* just ignore a bogus header */
987					if ((rth0->ip6r0_len % 2) == 0 &&
988					    (hops = rth0->ip6r0_len/2))
989						finaldst = (struct in6_addr *)(rth0 + 1) + (hops - 1);
990				}
991				eoff += rthlen;
992				nxt = rth->ip6r_nxt;
993				break;
994			case IPPROTO_FRAGMENT:
995#ifndef PULLDOWN_TEST
996				IP6_EXTHDR_CHECK(m, 0, eoff +
997				    sizeof(struct ip6_frag), -1);
998				fh = (struct ip6_frag *)(mtod(m, caddr_t) +
999				    eoff);
1000#else
1001				IP6_EXTHDR_GET(fh, struct ip6_frag *, m,
1002				    eoff, sizeof(*fh));
1003				if (fh == NULL) {
1004					V_icmp6stat.icp6s_tooshort++;
1005					return (-1);
1006				}
1007#endif
1008				/*
1009				 * Data after a fragment header is meaningless
1010				 * unless it is the first fragment, but
1011				 * we'll go to the notify label for path MTU
1012				 * discovery.
1013				 */
1014				if (fh->ip6f_offlg & IP6F_OFF_MASK)
1015					goto notify;
1016
1017				eoff += sizeof(struct ip6_frag);
1018				nxt = fh->ip6f_nxt;
1019				break;
1020			default:
1021				/*
1022				 * This case includes ESP and the No Next
1023				 * Header.  In such cases going to the notify
1024				 * label does not have any meaning
1025				 * (i.e. ctlfunc will be NULL), but we go
1026				 * anyway since we might have to update
1027				 * path MTU information.
1028				 */
1029				goto notify;
1030			}
1031		}
1032	  notify:
1033#ifndef PULLDOWN_TEST
1034		icmp6 = (struct icmp6_hdr *)(mtod(m, caddr_t) + off);
1035#else
1036		IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off,
1037		    sizeof(*icmp6) + sizeof(struct ip6_hdr));
1038		if (icmp6 == NULL) {
1039			V_icmp6stat.icp6s_tooshort++;
1040			return (-1);
1041		}
1042#endif
1043
1044		/*
1045		 * retrieve parameters from the inner IPv6 header, and convert
1046		 * them into sockaddr structures.
1047		 * XXX: there is no guarantee that the source or destination
1048		 * addresses of the inner packet are in the same scope as
1049		 * the addresses of the icmp packet.  But there is no other
1050		 * way to determine the zone.
1051		 */
1052		eip6 = (struct ip6_hdr *)(icmp6 + 1);
1053
1054		bzero(&icmp6dst, sizeof(icmp6dst));
1055		icmp6dst.sin6_len = sizeof(struct sockaddr_in6);
1056		icmp6dst.sin6_family = AF_INET6;
1057		if (finaldst == NULL)
1058			icmp6dst.sin6_addr = eip6->ip6_dst;
1059		else
1060			icmp6dst.sin6_addr = *finaldst;
1061		if (in6_setscope(&icmp6dst.sin6_addr, m->m_pkthdr.rcvif, NULL))
1062			goto freeit;
1063		bzero(&icmp6src, sizeof(icmp6src));
1064		icmp6src.sin6_len = sizeof(struct sockaddr_in6);
1065		icmp6src.sin6_family = AF_INET6;
1066		icmp6src.sin6_addr = eip6->ip6_src;
1067		if (in6_setscope(&icmp6src.sin6_addr, m->m_pkthdr.rcvif, NULL))
1068			goto freeit;
1069		icmp6src.sin6_flowinfo =
1070		    (eip6->ip6_flow & IPV6_FLOWLABEL_MASK);
1071
1072		if (finaldst == NULL)
1073			finaldst = &eip6->ip6_dst;
1074		ip6cp.ip6c_m = m;
1075		ip6cp.ip6c_icmp6 = icmp6;
1076		ip6cp.ip6c_ip6 = (struct ip6_hdr *)(icmp6 + 1);
1077		ip6cp.ip6c_off = eoff;
1078		ip6cp.ip6c_finaldst = finaldst;
1079		ip6cp.ip6c_src = &icmp6src;
1080		ip6cp.ip6c_nxt = nxt;
1081
1082		if (icmp6type == ICMP6_PACKET_TOO_BIG) {
1083			notifymtu = ntohl(icmp6->icmp6_mtu);
1084			ip6cp.ip6c_cmdarg = (void *)&notifymtu;
1085			icmp6_mtudisc_update(&ip6cp, 1);	/*XXX*/
1086		}
1087
1088		ctlfunc = (void (*)(int, struct sockaddr *, void *))
1089		    (inet6sw[ip6_protox[nxt]].pr_ctlinput);
1090		if (ctlfunc) {
1091			(void) (*ctlfunc)(code, (struct sockaddr *)&icmp6dst,
1092			    &ip6cp);
1093		}
1094	}
1095	*mp = m;
1096	return (0);
1097
1098  freeit:
1099	m_freem(m);
1100	return (-1);
1101}
1102
1103void
1104icmp6_mtudisc_update(struct ip6ctlparam *ip6cp, int validated)
1105{
1106	INIT_VNET_INET6(curvnet);
1107	struct in6_addr *dst = ip6cp->ip6c_finaldst;
1108	struct icmp6_hdr *icmp6 = ip6cp->ip6c_icmp6;
1109	struct mbuf *m = ip6cp->ip6c_m;	/* will be necessary for scope issue */
1110	u_int mtu = ntohl(icmp6->icmp6_mtu);
1111	struct in_conninfo inc;
1112
1113#if 0
1114	/*
1115	 * RFC2460 section 5, last paragraph.
1116	 * even though minimum link MTU for IPv6 is IPV6_MMTU,
1117	 * we may see ICMPv6 too big with mtu < IPV6_MMTU
1118	 * due to packet translator in the middle.
1119	 * see ip6_output() and ip6_getpmtu() "alwaysfrag" case for
1120	 * special handling.
1121	 */
1122	if (mtu < IPV6_MMTU)
1123		return;
1124#endif
1125
1126	/*
1127	 * we reject ICMPv6 too big with abnormally small value.
1128	 * XXX what is the good definition of "abnormally small"?
1129	 */
1130	if (mtu < sizeof(struct ip6_hdr) + sizeof(struct ip6_frag) + 8)
1131		return;
1132
1133	if (!validated)
1134		return;
1135
1136	/*
1137	 * In case the suggested mtu is less than IPV6_MMTU, we
1138	 * only need to remember that it was for above mentioned
1139	 * "alwaysfrag" case.
1140	 * Try to be as close to the spec as possible.
1141	 */
1142	if (mtu < IPV6_MMTU)
1143		mtu = IPV6_MMTU - 8;
1144
1145	bzero(&inc, sizeof(inc));
1146	inc.inc_flags = 1; /* IPv6 */
1147	inc.inc6_faddr = *dst;
1148	if (in6_setscope(&inc.inc6_faddr, m->m_pkthdr.rcvif, NULL))
1149		return;
1150
1151	if (mtu < tcp_maxmtu6(&inc, NULL)) {
1152		tcp_hc_updatemtu(&inc, mtu);
1153		V_icmp6stat.icp6s_pmtuchg++;
1154	}
1155}
1156
1157/*
1158 * Process a Node Information Query packet, based on
1159 * draft-ietf-ipngwg-icmp-name-lookups-07.
1160 *
1161 * Spec incompatibilities:
1162 * - IPv6 Subject address handling
1163 * - IPv4 Subject address handling support missing
1164 * - Proxy reply (answer even if it's not for me)
1165 * - joins NI group address at in6_ifattach() time only, does not cope
1166 *   with hostname changes by sethostname(3)
1167 */
1168#define hostnamelen	strlen(V_hostname)
1169static struct mbuf *
1170ni6_input(struct mbuf *m, int off)
1171{
1172	INIT_VNET_INET6(curvnet);
1173	INIT_VPROCG(TD_TO_VPROCG(curthread)); /* XXX V_hostname needs this */
1174	struct icmp6_nodeinfo *ni6, *nni6;
1175	struct mbuf *n = NULL;
1176	u_int16_t qtype;
1177	int subjlen;
1178	int replylen = sizeof(struct ip6_hdr) + sizeof(struct icmp6_nodeinfo);
1179	struct ni_reply_fqdn *fqdn;
1180	int addrs;		/* for NI_QTYPE_NODEADDR */
1181	struct ifnet *ifp = NULL; /* for NI_QTYPE_NODEADDR */
1182	struct in6_addr in6_subj; /* subject address */
1183	struct ip6_hdr *ip6;
1184	int oldfqdn = 0;	/* if 1, return pascal string (03 draft) */
1185	char *subj = NULL;
1186	struct in6_ifaddr *ia6 = NULL;
1187
1188	ip6 = mtod(m, struct ip6_hdr *);
1189#ifndef PULLDOWN_TEST
1190	ni6 = (struct icmp6_nodeinfo *)(mtod(m, caddr_t) + off);
1191#else
1192	IP6_EXTHDR_GET(ni6, struct icmp6_nodeinfo *, m, off, sizeof(*ni6));
1193	if (ni6 == NULL) {
1194		/* m is already reclaimed */
1195		return (NULL);
1196	}
1197#endif
1198
1199	/*
1200	 * Validate IPv6 source address.
1201	 * The default configuration MUST be to refuse answering queries from
1202	 * global-scope addresses according to RFC4602.
1203	 * Notes:
1204	 *  - it's not very clear what "refuse" means; this implementation
1205	 *    simply drops it.
1206	 *  - it's not very easy to identify global-scope (unicast) addresses
1207	 *    since there are many prefixes for them.  It should be safer
1208	 *    and in practice sufficient to check "all" but loopback and
1209	 *    link-local (note that site-local unicast was deprecated and
1210	 *    ULA is defined as global scope-wise)
1211	 */
1212	if ((V_icmp6_nodeinfo & ICMP6_NODEINFO_GLOBALOK) == 0 &&
1213	    !IN6_IS_ADDR_LOOPBACK(&ip6->ip6_src) &&
1214	    !IN6_IS_ADDR_LINKLOCAL(&ip6->ip6_src))
1215		goto bad;
1216
1217	/*
1218	 * Validate IPv6 destination address.
1219	 *
1220	 * The Responder must discard the Query without further processing
1221	 * unless it is one of the Responder's unicast or anycast addresses, or
1222	 * a link-local scope multicast address which the Responder has joined.
1223	 * [RFC4602, Section 5.]
1224	 */
1225	if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
1226		if (!IN6_IS_ADDR_MC_LINKLOCAL(&ip6->ip6_dst))
1227			goto bad;
1228		/* else it's a link-local multicast, fine */
1229	} else {		/* unicast or anycast */
1230		if ((ia6 = ip6_getdstifaddr(m)) == NULL)
1231			goto bad; /* XXX impossible */
1232
1233		if ((ia6->ia6_flags & IN6_IFF_TEMPORARY) &&
1234		    !(V_icmp6_nodeinfo & ICMP6_NODEINFO_TMPADDROK)) {
1235			nd6log((LOG_DEBUG, "ni6_input: ignore node info to "
1236				"a temporary address in %s:%d",
1237			       __FILE__, __LINE__));
1238			goto bad;
1239		}
1240	}
1241
1242	/* validate query Subject field. */
1243	qtype = ntohs(ni6->ni_qtype);
1244	subjlen = m->m_pkthdr.len - off - sizeof(struct icmp6_nodeinfo);
1245	switch (qtype) {
1246	case NI_QTYPE_NOOP:
1247	case NI_QTYPE_SUPTYPES:
1248		/* 07 draft */
1249		if (ni6->ni_code == ICMP6_NI_SUBJ_FQDN && subjlen == 0)
1250			break;
1251		/* FALLTHROUGH */
1252	case NI_QTYPE_FQDN:
1253	case NI_QTYPE_NODEADDR:
1254	case NI_QTYPE_IPV4ADDR:
1255		switch (ni6->ni_code) {
1256		case ICMP6_NI_SUBJ_IPV6:
1257#if ICMP6_NI_SUBJ_IPV6 != 0
1258		case 0:
1259#endif
1260			/*
1261			 * backward compatibility - try to accept 03 draft
1262			 * format, where no Subject is present.
1263			 */
1264			if (qtype == NI_QTYPE_FQDN && ni6->ni_code == 0 &&
1265			    subjlen == 0) {
1266				oldfqdn++;
1267				break;
1268			}
1269#if ICMP6_NI_SUBJ_IPV6 != 0
1270			if (ni6->ni_code != ICMP6_NI_SUBJ_IPV6)
1271				goto bad;
1272#endif
1273
1274			if (subjlen != sizeof(struct in6_addr))
1275				goto bad;
1276
1277			/*
1278			 * Validate Subject address.
1279			 *
1280			 * Not sure what exactly "address belongs to the node"
1281			 * means in the spec, is it just unicast, or what?
1282			 *
1283			 * At this moment we consider Subject address as
1284			 * "belong to the node" if the Subject address equals
1285			 * to the IPv6 destination address; validation for
1286			 * IPv6 destination address should have done enough
1287			 * check for us.
1288			 *
1289			 * We do not do proxy at this moment.
1290			 */
1291			/* m_pulldown instead of copy? */
1292			m_copydata(m, off + sizeof(struct icmp6_nodeinfo),
1293			    subjlen, (caddr_t)&in6_subj);
1294			if (in6_setscope(&in6_subj, m->m_pkthdr.rcvif, NULL))
1295				goto bad;
1296
1297			subj = (char *)&in6_subj;
1298			if (IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &in6_subj))
1299				break;
1300
1301			/*
1302			 * XXX if we are to allow other cases, we should really
1303			 * be careful about scope here.
1304			 * basically, we should disallow queries toward IPv6
1305			 * destination X with subject Y,
1306			 * if scope(X) > scope(Y).
1307			 * if we allow scope(X) > scope(Y), it will result in
1308			 * information leakage across scope boundary.
1309			 */
1310			goto bad;
1311
1312		case ICMP6_NI_SUBJ_FQDN:
1313			/*
1314			 * Validate Subject name with gethostname(3).
1315			 *
1316			 * The behavior may need some debate, since:
1317			 * - we are not sure if the node has FQDN as
1318			 *   hostname (returned by gethostname(3)).
1319			 * - the code does wildcard match for truncated names.
1320			 *   however, we are not sure if we want to perform
1321			 *   wildcard match, if gethostname(3) side has
1322			 *   truncated hostname.
1323			 */
1324			mtx_lock(&hostname_mtx);
1325			n = ni6_nametodns(V_hostname, hostnamelen, 0);
1326			mtx_unlock(&hostname_mtx);
1327			if (!n || n->m_next || n->m_len == 0)
1328				goto bad;
1329			IP6_EXTHDR_GET(subj, char *, m,
1330			    off + sizeof(struct icmp6_nodeinfo), subjlen);
1331			if (subj == NULL)
1332				goto bad;
1333			if (!ni6_dnsmatch(subj, subjlen, mtod(n, const char *),
1334			    n->m_len)) {
1335				goto bad;
1336			}
1337			m_freem(n);
1338			n = NULL;
1339			break;
1340
1341		case ICMP6_NI_SUBJ_IPV4:	/* XXX: to be implemented? */
1342		default:
1343			goto bad;
1344		}
1345		break;
1346	}
1347
1348	/* refuse based on configuration.  XXX ICMP6_NI_REFUSED? */
1349	switch (qtype) {
1350	case NI_QTYPE_FQDN:
1351		if ((V_icmp6_nodeinfo & ICMP6_NODEINFO_FQDNOK) == 0)
1352			goto bad;
1353		break;
1354	case NI_QTYPE_NODEADDR:
1355	case NI_QTYPE_IPV4ADDR:
1356		if ((V_icmp6_nodeinfo & ICMP6_NODEINFO_NODEADDROK) == 0)
1357			goto bad;
1358		break;
1359	}
1360
1361	/* guess reply length */
1362	switch (qtype) {
1363	case NI_QTYPE_NOOP:
1364		break;		/* no reply data */
1365	case NI_QTYPE_SUPTYPES:
1366		replylen += sizeof(u_int32_t);
1367		break;
1368	case NI_QTYPE_FQDN:
1369		/* XXX will append an mbuf */
1370		replylen += offsetof(struct ni_reply_fqdn, ni_fqdn_namelen);
1371		break;
1372	case NI_QTYPE_NODEADDR:
1373		addrs = ni6_addrs(ni6, m, &ifp, (struct in6_addr *)subj);
1374		if ((replylen += addrs * (sizeof(struct in6_addr) +
1375		    sizeof(u_int32_t))) > MCLBYTES)
1376			replylen = MCLBYTES; /* XXX: will truncate pkt later */
1377		break;
1378	case NI_QTYPE_IPV4ADDR:
1379		/* unsupported - should respond with unknown Qtype? */
1380		break;
1381	default:
1382		/*
1383		 * XXX: We must return a reply with the ICMP6 code
1384		 * `unknown Qtype' in this case.  However we regard the case
1385		 * as an FQDN query for backward compatibility.
1386		 * Older versions set a random value to this field,
1387		 * so it rarely varies in the defined qtypes.
1388		 * But the mechanism is not reliable...
1389		 * maybe we should obsolete older versions.
1390		 */
1391		qtype = NI_QTYPE_FQDN;
1392		/* XXX will append an mbuf */
1393		replylen += offsetof(struct ni_reply_fqdn, ni_fqdn_namelen);
1394		oldfqdn++;
1395		break;
1396	}
1397
1398	/* allocate an mbuf to reply. */
1399	MGETHDR(n, M_DONTWAIT, m->m_type);
1400	if (n == NULL) {
1401		m_freem(m);
1402		return (NULL);
1403	}
1404	M_MOVE_PKTHDR(n, m); /* just for recvif */
1405	if (replylen > MHLEN) {
1406		if (replylen > MCLBYTES) {
1407			/*
1408			 * XXX: should we try to allocate more? But MCLBYTES
1409			 * is probably much larger than IPV6_MMTU...
1410			 */
1411			goto bad;
1412		}
1413		MCLGET(n, M_DONTWAIT);
1414		if ((n->m_flags & M_EXT) == 0) {
1415			goto bad;
1416		}
1417	}
1418	n->m_pkthdr.len = n->m_len = replylen;
1419
1420	/* copy mbuf header and IPv6 + Node Information base headers */
1421	bcopy(mtod(m, caddr_t), mtod(n, caddr_t), sizeof(struct ip6_hdr));
1422	nni6 = (struct icmp6_nodeinfo *)(mtod(n, struct ip6_hdr *) + 1);
1423	bcopy((caddr_t)ni6, (caddr_t)nni6, sizeof(struct icmp6_nodeinfo));
1424
1425	/* qtype dependent procedure */
1426	switch (qtype) {
1427	case NI_QTYPE_NOOP:
1428		nni6->ni_code = ICMP6_NI_SUCCESS;
1429		nni6->ni_flags = 0;
1430		break;
1431	case NI_QTYPE_SUPTYPES:
1432	{
1433		u_int32_t v;
1434		nni6->ni_code = ICMP6_NI_SUCCESS;
1435		nni6->ni_flags = htons(0x0000);	/* raw bitmap */
1436		/* supports NOOP, SUPTYPES, FQDN, and NODEADDR */
1437		v = (u_int32_t)htonl(0x0000000f);
1438		bcopy(&v, nni6 + 1, sizeof(u_int32_t));
1439		break;
1440	}
1441	case NI_QTYPE_FQDN:
1442		nni6->ni_code = ICMP6_NI_SUCCESS;
1443		fqdn = (struct ni_reply_fqdn *)(mtod(n, caddr_t) +
1444		    sizeof(struct ip6_hdr) + sizeof(struct icmp6_nodeinfo));
1445		nni6->ni_flags = 0; /* XXX: meaningless TTL */
1446		fqdn->ni_fqdn_ttl = 0;	/* ditto. */
1447		/*
1448		 * XXX do we really have FQDN in variable "hostname"?
1449		 */
1450		mtx_lock(&hostname_mtx);
1451		n->m_next = ni6_nametodns(V_hostname, hostnamelen, oldfqdn);
1452		mtx_unlock(&hostname_mtx);
1453		if (n->m_next == NULL)
1454			goto bad;
1455		/* XXX we assume that n->m_next is not a chain */
1456		if (n->m_next->m_next != NULL)
1457			goto bad;
1458		n->m_pkthdr.len += n->m_next->m_len;
1459		break;
1460	case NI_QTYPE_NODEADDR:
1461	{
1462		int lenlim, copied;
1463
1464		nni6->ni_code = ICMP6_NI_SUCCESS;
1465		n->m_pkthdr.len = n->m_len =
1466		    sizeof(struct ip6_hdr) + sizeof(struct icmp6_nodeinfo);
1467		lenlim = M_TRAILINGSPACE(n);
1468		copied = ni6_store_addrs(ni6, nni6, ifp, lenlim);
1469		/* XXX: reset mbuf length */
1470		n->m_pkthdr.len = n->m_len = sizeof(struct ip6_hdr) +
1471		    sizeof(struct icmp6_nodeinfo) + copied;
1472		break;
1473	}
1474	default:
1475		break;		/* XXX impossible! */
1476	}
1477
1478	nni6->ni_type = ICMP6_NI_REPLY;
1479	m_freem(m);
1480	return (n);
1481
1482  bad:
1483	m_freem(m);
1484	if (n)
1485		m_freem(n);
1486	return (NULL);
1487}
1488#undef hostnamelen
1489
1490/*
1491 * make a mbuf with DNS-encoded string.  no compression support.
1492 *
1493 * XXX names with less than 2 dots (like "foo" or "foo.section") will be
1494 * treated as truncated name (two \0 at the end).  this is a wild guess.
1495 *
1496 * old - return pascal string if non-zero
1497 */
1498static struct mbuf *
1499ni6_nametodns(const char *name, int namelen, int old)
1500{
1501	struct mbuf *m;
1502	char *cp, *ep;
1503	const char *p, *q;
1504	int i, len, nterm;
1505
1506	if (old)
1507		len = namelen + 1;
1508	else
1509		len = MCLBYTES;
1510
1511	/* because MAXHOSTNAMELEN is usually 256, we use cluster mbuf */
1512	MGET(m, M_DONTWAIT, MT_DATA);
1513	if (m && len > MLEN) {
1514		MCLGET(m, M_DONTWAIT);
1515		if ((m->m_flags & M_EXT) == 0)
1516			goto fail;
1517	}
1518	if (!m)
1519		goto fail;
1520	m->m_next = NULL;
1521
1522	if (old) {
1523		m->m_len = len;
1524		*mtod(m, char *) = namelen;
1525		bcopy(name, mtod(m, char *) + 1, namelen);
1526		return m;
1527	} else {
1528		m->m_len = 0;
1529		cp = mtod(m, char *);
1530		ep = mtod(m, char *) + M_TRAILINGSPACE(m);
1531
1532		/* if not certain about my name, return empty buffer */
1533		if (namelen == 0)
1534			return m;
1535
1536		/*
1537		 * guess if it looks like shortened hostname, or FQDN.
1538		 * shortened hostname needs two trailing "\0".
1539		 */
1540		i = 0;
1541		for (p = name; p < name + namelen; p++) {
1542			if (*p && *p == '.')
1543				i++;
1544		}
1545		if (i < 2)
1546			nterm = 2;
1547		else
1548			nterm = 1;
1549
1550		p = name;
1551		while (cp < ep && p < name + namelen) {
1552			i = 0;
1553			for (q = p; q < name + namelen && *q && *q != '.'; q++)
1554				i++;
1555			/* result does not fit into mbuf */
1556			if (cp + i + 1 >= ep)
1557				goto fail;
1558			/*
1559			 * DNS label length restriction, RFC1035 page 8.
1560			 * "i == 0" case is included here to avoid returning
1561			 * 0-length label on "foo..bar".
1562			 */
1563			if (i <= 0 || i >= 64)
1564				goto fail;
1565			*cp++ = i;
1566			bcopy(p, cp, i);
1567			cp += i;
1568			p = q;
1569			if (p < name + namelen && *p == '.')
1570				p++;
1571		}
1572		/* termination */
1573		if (cp + nterm >= ep)
1574			goto fail;
1575		while (nterm-- > 0)
1576			*cp++ = '\0';
1577		m->m_len = cp - mtod(m, char *);
1578		return m;
1579	}
1580
1581	panic("should not reach here");
1582	/* NOTREACHED */
1583
1584 fail:
1585	if (m)
1586		m_freem(m);
1587	return NULL;
1588}
1589
1590/*
1591 * check if two DNS-encoded string matches.  takes care of truncated
1592 * form (with \0\0 at the end).  no compression support.
1593 * XXX upper/lowercase match (see RFC2065)
1594 */
1595static int
1596ni6_dnsmatch(const char *a, int alen, const char *b, int blen)
1597{
1598	const char *a0, *b0;
1599	int l;
1600
1601	/* simplest case - need validation? */
1602	if (alen == blen && bcmp(a, b, alen) == 0)
1603		return 1;
1604
1605	a0 = a;
1606	b0 = b;
1607
1608	/* termination is mandatory */
1609	if (alen < 2 || blen < 2)
1610		return 0;
1611	if (a0[alen - 1] != '\0' || b0[blen - 1] != '\0')
1612		return 0;
1613	alen--;
1614	blen--;
1615
1616	while (a - a0 < alen && b - b0 < blen) {
1617		if (a - a0 + 1 > alen || b - b0 + 1 > blen)
1618			return 0;
1619
1620		if ((signed char)a[0] < 0 || (signed char)b[0] < 0)
1621			return 0;
1622		/* we don't support compression yet */
1623		if (a[0] >= 64 || b[0] >= 64)
1624			return 0;
1625
1626		/* truncated case */
1627		if (a[0] == 0 && a - a0 == alen - 1)
1628			return 1;
1629		if (b[0] == 0 && b - b0 == blen - 1)
1630			return 1;
1631		if (a[0] == 0 || b[0] == 0)
1632			return 0;
1633
1634		if (a[0] != b[0])
1635			return 0;
1636		l = a[0];
1637		if (a - a0 + 1 + l > alen || b - b0 + 1 + l > blen)
1638			return 0;
1639		if (bcmp(a + 1, b + 1, l) != 0)
1640			return 0;
1641
1642		a += 1 + l;
1643		b += 1 + l;
1644	}
1645
1646	if (a - a0 == alen && b - b0 == blen)
1647		return 1;
1648	else
1649		return 0;
1650}
1651
1652/*
1653 * calculate the number of addresses to be returned in the node info reply.
1654 */
1655static int
1656ni6_addrs(struct icmp6_nodeinfo *ni6, struct mbuf *m, struct ifnet **ifpp,
1657    struct in6_addr *subj)
1658{
1659	INIT_VNET_NET(curvnet);
1660	INIT_VNET_INET6(curvnet);
1661	struct ifnet *ifp;
1662	struct in6_ifaddr *ifa6;
1663	struct ifaddr *ifa;
1664	int addrs = 0, addrsofif, iffound = 0;
1665	int niflags = ni6->ni_flags;
1666
1667	if ((niflags & NI_NODEADDR_FLAG_ALL) == 0) {
1668		switch (ni6->ni_code) {
1669		case ICMP6_NI_SUBJ_IPV6:
1670			if (subj == NULL) /* must be impossible... */
1671				return (0);
1672			break;
1673		default:
1674			/*
1675			 * XXX: we only support IPv6 subject address for
1676			 * this Qtype.
1677			 */
1678			return (0);
1679		}
1680	}
1681
1682	IFNET_RLOCK();
1683	for (ifp = TAILQ_FIRST(&V_ifnet); ifp; ifp = TAILQ_NEXT(ifp, if_list)) {
1684		addrsofif = 0;
1685		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1686			if (ifa->ifa_addr->sa_family != AF_INET6)
1687				continue;
1688			ifa6 = (struct in6_ifaddr *)ifa;
1689
1690			if ((niflags & NI_NODEADDR_FLAG_ALL) == 0 &&
1691			    IN6_ARE_ADDR_EQUAL(subj, &ifa6->ia_addr.sin6_addr))
1692				iffound = 1;
1693
1694			/*
1695			 * IPv4-mapped addresses can only be returned by a
1696			 * Node Information proxy, since they represent
1697			 * addresses of IPv4-only nodes, which perforce do
1698			 * not implement this protocol.
1699			 * [icmp-name-lookups-07, Section 5.4]
1700			 * So we don't support NI_NODEADDR_FLAG_COMPAT in
1701			 * this function at this moment.
1702			 */
1703
1704			/* What do we have to do about ::1? */
1705			switch (in6_addrscope(&ifa6->ia_addr.sin6_addr)) {
1706			case IPV6_ADDR_SCOPE_LINKLOCAL:
1707				if ((niflags & NI_NODEADDR_FLAG_LINKLOCAL) == 0)
1708					continue;
1709				break;
1710			case IPV6_ADDR_SCOPE_SITELOCAL:
1711				if ((niflags & NI_NODEADDR_FLAG_SITELOCAL) == 0)
1712					continue;
1713				break;
1714			case IPV6_ADDR_SCOPE_GLOBAL:
1715				if ((niflags & NI_NODEADDR_FLAG_GLOBAL) == 0)
1716					continue;
1717				break;
1718			default:
1719				continue;
1720			}
1721
1722			/*
1723			 * check if anycast is okay.
1724			 * XXX: just experimental.  not in the spec.
1725			 */
1726			if ((ifa6->ia6_flags & IN6_IFF_ANYCAST) != 0 &&
1727			    (niflags & NI_NODEADDR_FLAG_ANYCAST) == 0)
1728				continue; /* we need only unicast addresses */
1729			if ((ifa6->ia6_flags & IN6_IFF_TEMPORARY) != 0 &&
1730			    (V_icmp6_nodeinfo & ICMP6_NODEINFO_TMPADDROK) == 0) {
1731				continue;
1732			}
1733			addrsofif++; /* count the address */
1734		}
1735		if (iffound) {
1736			*ifpp = ifp;
1737			IFNET_RUNLOCK();
1738			return (addrsofif);
1739		}
1740
1741		addrs += addrsofif;
1742	}
1743	IFNET_RUNLOCK();
1744
1745	return (addrs);
1746}
1747
1748static int
1749ni6_store_addrs(struct icmp6_nodeinfo *ni6, struct icmp6_nodeinfo *nni6,
1750    struct ifnet *ifp0, int resid)
1751{
1752	INIT_VNET_NET(curvnet);
1753	INIT_VNET_INET6(curvnet);
1754	struct ifnet *ifp = ifp0 ? ifp0 : TAILQ_FIRST(&V_ifnet);
1755	struct in6_ifaddr *ifa6;
1756	struct ifaddr *ifa;
1757	struct ifnet *ifp_dep = NULL;
1758	int copied = 0, allow_deprecated = 0;
1759	u_char *cp = (u_char *)(nni6 + 1);
1760	int niflags = ni6->ni_flags;
1761	u_int32_t ltime;
1762
1763	if (ifp0 == NULL && !(niflags & NI_NODEADDR_FLAG_ALL))
1764		return (0);	/* needless to copy */
1765
1766	IFNET_RLOCK();
1767  again:
1768
1769	for (; ifp; ifp = TAILQ_NEXT(ifp, if_list)) {
1770		for (ifa = ifp->if_addrlist.tqh_first; ifa;
1771		     ifa = ifa->ifa_list.tqe_next) {
1772			if (ifa->ifa_addr->sa_family != AF_INET6)
1773				continue;
1774			ifa6 = (struct in6_ifaddr *)ifa;
1775
1776			if ((ifa6->ia6_flags & IN6_IFF_DEPRECATED) != 0 &&
1777			    allow_deprecated == 0) {
1778				/*
1779				 * prefererred address should be put before
1780				 * deprecated addresses.
1781				 */
1782
1783				/* record the interface for later search */
1784				if (ifp_dep == NULL)
1785					ifp_dep = ifp;
1786
1787				continue;
1788			} else if ((ifa6->ia6_flags & IN6_IFF_DEPRECATED) == 0 &&
1789			    allow_deprecated != 0)
1790				continue; /* we now collect deprecated addrs */
1791
1792			/* What do we have to do about ::1? */
1793			switch (in6_addrscope(&ifa6->ia_addr.sin6_addr)) {
1794			case IPV6_ADDR_SCOPE_LINKLOCAL:
1795				if ((niflags & NI_NODEADDR_FLAG_LINKLOCAL) == 0)
1796					continue;
1797				break;
1798			case IPV6_ADDR_SCOPE_SITELOCAL:
1799				if ((niflags & NI_NODEADDR_FLAG_SITELOCAL) == 0)
1800					continue;
1801				break;
1802			case IPV6_ADDR_SCOPE_GLOBAL:
1803				if ((niflags & NI_NODEADDR_FLAG_GLOBAL) == 0)
1804					continue;
1805				break;
1806			default:
1807				continue;
1808			}
1809
1810			/*
1811			 * check if anycast is okay.
1812			 * XXX: just experimental.  not in the spec.
1813			 */
1814			if ((ifa6->ia6_flags & IN6_IFF_ANYCAST) != 0 &&
1815			    (niflags & NI_NODEADDR_FLAG_ANYCAST) == 0)
1816				continue;
1817			if ((ifa6->ia6_flags & IN6_IFF_TEMPORARY) != 0 &&
1818			    (V_icmp6_nodeinfo & ICMP6_NODEINFO_TMPADDROK) == 0) {
1819				continue;
1820			}
1821
1822			/* now we can copy the address */
1823			if (resid < sizeof(struct in6_addr) +
1824			    sizeof(u_int32_t)) {
1825				/*
1826				 * We give up much more copy.
1827				 * Set the truncate flag and return.
1828				 */
1829				nni6->ni_flags |= NI_NODEADDR_FLAG_TRUNCATE;
1830				IFNET_RUNLOCK();
1831				return (copied);
1832			}
1833
1834			/*
1835			 * Set the TTL of the address.
1836			 * The TTL value should be one of the following
1837			 * according to the specification:
1838			 *
1839			 * 1. The remaining lifetime of a DHCP lease on the
1840			 *    address, or
1841			 * 2. The remaining Valid Lifetime of a prefix from
1842			 *    which the address was derived through Stateless
1843			 *    Autoconfiguration.
1844			 *
1845			 * Note that we currently do not support stateful
1846			 * address configuration by DHCPv6, so the former
1847			 * case can't happen.
1848			 */
1849			if (ifa6->ia6_lifetime.ia6t_expire == 0)
1850				ltime = ND6_INFINITE_LIFETIME;
1851			else {
1852				if (ifa6->ia6_lifetime.ia6t_expire >
1853				    time_second)
1854					ltime = htonl(ifa6->ia6_lifetime.ia6t_expire - time_second);
1855				else
1856					ltime = 0;
1857			}
1858
1859			bcopy(&ltime, cp, sizeof(u_int32_t));
1860			cp += sizeof(u_int32_t);
1861
1862			/* copy the address itself */
1863			bcopy(&ifa6->ia_addr.sin6_addr, cp,
1864			    sizeof(struct in6_addr));
1865			in6_clearscope((struct in6_addr *)cp); /* XXX */
1866			cp += sizeof(struct in6_addr);
1867
1868			resid -= (sizeof(struct in6_addr) + sizeof(u_int32_t));
1869			copied += (sizeof(struct in6_addr) + sizeof(u_int32_t));
1870		}
1871		if (ifp0)	/* we need search only on the specified IF */
1872			break;
1873	}
1874
1875	if (allow_deprecated == 0 && ifp_dep != NULL) {
1876		ifp = ifp_dep;
1877		allow_deprecated = 1;
1878
1879		goto again;
1880	}
1881
1882	IFNET_RUNLOCK();
1883
1884	return (copied);
1885}
1886
1887/*
1888 * XXX almost dup'ed code with rip6_input.
1889 */
1890static int
1891icmp6_rip6_input(struct mbuf **mp, int off)
1892{
1893	INIT_VNET_INET(curvnet);
1894	INIT_VNET_INET6(curvnet);
1895	struct mbuf *m = *mp;
1896	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
1897	struct in6pcb *in6p;
1898	struct in6pcb *last = NULL;
1899	struct sockaddr_in6 fromsa;
1900	struct icmp6_hdr *icmp6;
1901	struct mbuf *opts = NULL;
1902
1903#ifndef PULLDOWN_TEST
1904	/* this is assumed to be safe. */
1905	icmp6 = (struct icmp6_hdr *)((caddr_t)ip6 + off);
1906#else
1907	IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off, sizeof(*icmp6));
1908	if (icmp6 == NULL) {
1909		/* m is already reclaimed */
1910		return (IPPROTO_DONE);
1911	}
1912#endif
1913
1914	/*
1915	 * XXX: the address may have embedded scope zone ID, which should be
1916	 * hidden from applications.
1917	 */
1918	bzero(&fromsa, sizeof(fromsa));
1919	fromsa.sin6_family = AF_INET6;
1920	fromsa.sin6_len = sizeof(struct sockaddr_in6);
1921	fromsa.sin6_addr = ip6->ip6_src;
1922	if (sa6_recoverscope(&fromsa)) {
1923		m_freem(m);
1924		return (IPPROTO_DONE);
1925	}
1926
1927	INP_INFO_RLOCK(&V_ripcbinfo);
1928	LIST_FOREACH(in6p, &V_ripcb, inp_list) {
1929		if ((in6p->inp_vflag & INP_IPV6) == 0)
1930			continue;
1931		if (in6p->in6p_ip6_nxt != IPPROTO_ICMPV6)
1932			continue;
1933		if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr) &&
1934		   !IN6_ARE_ADDR_EQUAL(&in6p->in6p_laddr, &ip6->ip6_dst))
1935			continue;
1936		if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr) &&
1937		   !IN6_ARE_ADDR_EQUAL(&in6p->in6p_faddr, &ip6->ip6_src))
1938			continue;
1939		INP_RLOCK(in6p);
1940		if (ICMP6_FILTER_WILLBLOCK(icmp6->icmp6_type,
1941		    in6p->in6p_icmp6filt)) {
1942			INP_RUNLOCK(in6p);
1943			continue;
1944		}
1945		if (last) {
1946			struct	mbuf *n = NULL;
1947
1948			/*
1949			 * Recent network drivers tend to allocate a single
1950			 * mbuf cluster, rather than to make a couple of
1951			 * mbufs without clusters.  Also, since the IPv6 code
1952			 * path tries to avoid m_pullup(), it is highly
1953			 * probable that we still have an mbuf cluster here
1954			 * even though the necessary length can be stored in an
1955			 * mbuf's internal buffer.
1956			 * Meanwhile, the default size of the receive socket
1957			 * buffer for raw sockets is not so large.  This means
1958			 * the possibility of packet loss is relatively higher
1959			 * than before.  To avoid this scenario, we copy the
1960			 * received data to a separate mbuf that does not use
1961			 * a cluster, if possible.
1962			 * XXX: it is better to copy the data after stripping
1963			 * intermediate headers.
1964			 */
1965			if ((m->m_flags & M_EXT) && m->m_next == NULL &&
1966			    m->m_len <= MHLEN) {
1967				MGET(n, M_DONTWAIT, m->m_type);
1968				if (n != NULL) {
1969					if (m_dup_pkthdr(n, m, M_NOWAIT)) {
1970						bcopy(m->m_data, n->m_data,
1971						      m->m_len);
1972						n->m_len = m->m_len;
1973					} else {
1974						m_free(n);
1975						n = NULL;
1976					}
1977				}
1978			}
1979			if (n != NULL ||
1980			    (n = m_copy(m, 0, (int)M_COPYALL)) != NULL) {
1981				if (last->in6p_flags & IN6P_CONTROLOPTS)
1982					ip6_savecontrol(last, n, &opts);
1983				/* strip intermediate headers */
1984				m_adj(n, off);
1985				SOCKBUF_LOCK(&last->in6p_socket->so_rcv);
1986				if (sbappendaddr_locked(
1987				    &last->in6p_socket->so_rcv,
1988				    (struct sockaddr *)&fromsa, n, opts)
1989				    == 0) {
1990					/* should notify about lost packet */
1991					m_freem(n);
1992					if (opts) {
1993						m_freem(opts);
1994					}
1995					SOCKBUF_UNLOCK(
1996					    &last->in6p_socket->so_rcv);
1997				} else
1998					sorwakeup_locked(last->in6p_socket);
1999				opts = NULL;
2000			}
2001			INP_RUNLOCK(last);
2002		}
2003		last = in6p;
2004	}
2005	INP_INFO_RUNLOCK(&V_ripcbinfo);
2006	if (last) {
2007		if (last->in6p_flags & IN6P_CONTROLOPTS)
2008			ip6_savecontrol(last, m, &opts);
2009		/* strip intermediate headers */
2010		m_adj(m, off);
2011
2012		/* avoid using mbuf clusters if possible (see above) */
2013		if ((m->m_flags & M_EXT) && m->m_next == NULL &&
2014		    m->m_len <= MHLEN) {
2015			struct mbuf *n;
2016
2017			MGET(n, M_DONTWAIT, m->m_type);
2018			if (n != NULL) {
2019				if (m_dup_pkthdr(n, m, M_NOWAIT)) {
2020					bcopy(m->m_data, n->m_data, m->m_len);
2021					n->m_len = m->m_len;
2022
2023					m_freem(m);
2024					m = n;
2025				} else {
2026					m_freem(n);
2027					n = NULL;
2028				}
2029			}
2030		}
2031		SOCKBUF_LOCK(&last->in6p_socket->so_rcv);
2032		if (sbappendaddr_locked(&last->in6p_socket->so_rcv,
2033		    (struct sockaddr *)&fromsa, m, opts) == 0) {
2034			m_freem(m);
2035			if (opts)
2036				m_freem(opts);
2037			SOCKBUF_UNLOCK(&last->in6p_socket->so_rcv);
2038		} else
2039			sorwakeup_locked(last->in6p_socket);
2040		INP_RUNLOCK(last);
2041	} else {
2042		m_freem(m);
2043		V_ip6stat.ip6s_delivered--;
2044	}
2045	return IPPROTO_DONE;
2046}
2047
2048/*
2049 * Reflect the ip6 packet back to the source.
2050 * OFF points to the icmp6 header, counted from the top of the mbuf.
2051 */
2052void
2053icmp6_reflect(struct mbuf *m, size_t off)
2054{
2055	INIT_VNET_INET6(curvnet);
2056	struct ip6_hdr *ip6;
2057	struct icmp6_hdr *icmp6;
2058	struct in6_ifaddr *ia;
2059	int plen;
2060	int type, code;
2061	struct ifnet *outif = NULL;
2062	struct in6_addr origdst, *src = NULL;
2063
2064	/* too short to reflect */
2065	if (off < sizeof(struct ip6_hdr)) {
2066		nd6log((LOG_DEBUG,
2067		    "sanity fail: off=%lx, sizeof(ip6)=%lx in %s:%d\n",
2068		    (u_long)off, (u_long)sizeof(struct ip6_hdr),
2069		    __FILE__, __LINE__));
2070		goto bad;
2071	}
2072
2073	/*
2074	 * If there are extra headers between IPv6 and ICMPv6, strip
2075	 * off that header first.
2076	 */
2077#ifdef DIAGNOSTIC
2078	if (sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr) > MHLEN)
2079		panic("assumption failed in icmp6_reflect");
2080#endif
2081	if (off > sizeof(struct ip6_hdr)) {
2082		size_t l;
2083		struct ip6_hdr nip6;
2084
2085		l = off - sizeof(struct ip6_hdr);
2086		m_copydata(m, 0, sizeof(nip6), (caddr_t)&nip6);
2087		m_adj(m, l);
2088		l = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr);
2089		if (m->m_len < l) {
2090			if ((m = m_pullup(m, l)) == NULL)
2091				return;
2092		}
2093		bcopy((caddr_t)&nip6, mtod(m, caddr_t), sizeof(nip6));
2094	} else /* off == sizeof(struct ip6_hdr) */ {
2095		size_t l;
2096		l = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr);
2097		if (m->m_len < l) {
2098			if ((m = m_pullup(m, l)) == NULL)
2099				return;
2100		}
2101	}
2102	plen = m->m_pkthdr.len - sizeof(struct ip6_hdr);
2103	ip6 = mtod(m, struct ip6_hdr *);
2104	ip6->ip6_nxt = IPPROTO_ICMPV6;
2105	icmp6 = (struct icmp6_hdr *)(ip6 + 1);
2106	type = icmp6->icmp6_type; /* keep type for statistics */
2107	code = icmp6->icmp6_code; /* ditto. */
2108
2109	origdst = ip6->ip6_dst;
2110	/*
2111	 * ip6_input() drops a packet if its src is multicast.
2112	 * So, the src is never multicast.
2113	 */
2114	ip6->ip6_dst = ip6->ip6_src;
2115
2116	/*
2117	 * If the incoming packet was addressed directly to us (i.e. unicast),
2118	 * use dst as the src for the reply.
2119	 * The IN6_IFF_NOTREADY case should be VERY rare, but is possible
2120	 * (for example) when we encounter an error while forwarding procedure
2121	 * destined to a duplicated address of ours.
2122	 * Note that ip6_getdstifaddr() may fail if we are in an error handling
2123	 * procedure of an outgoing packet of our own, in which case we need
2124	 * to search in the ifaddr list.
2125	 */
2126	if (!IN6_IS_ADDR_MULTICAST(&origdst)) {
2127		if ((ia = ip6_getdstifaddr(m))) {
2128			if (!(ia->ia6_flags &
2129			    (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY)))
2130				src = &ia->ia_addr.sin6_addr;
2131		} else {
2132			struct sockaddr_in6 d;
2133
2134			bzero(&d, sizeof(d));
2135			d.sin6_family = AF_INET6;
2136			d.sin6_len = sizeof(d);
2137			d.sin6_addr = origdst;
2138			ia = (struct in6_ifaddr *)
2139			    ifa_ifwithaddr((struct sockaddr *)&d);
2140			if (ia &&
2141			    !(ia->ia6_flags &
2142			    (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY))) {
2143				src = &ia->ia_addr.sin6_addr;
2144			}
2145		}
2146	}
2147
2148	if (src == NULL) {
2149		int e;
2150		struct sockaddr_in6 sin6;
2151		struct route_in6 ro;
2152
2153		/*
2154		 * This case matches to multicasts, our anycast, or unicasts
2155		 * that we do not own.  Select a source address based on the
2156		 * source address of the erroneous packet.
2157		 */
2158		bzero(&sin6, sizeof(sin6));
2159		sin6.sin6_family = AF_INET6;
2160		sin6.sin6_len = sizeof(sin6);
2161		sin6.sin6_addr = ip6->ip6_dst; /* zone ID should be embedded */
2162
2163		bzero(&ro, sizeof(ro));
2164		src = in6_selectsrc(&sin6, NULL, NULL, &ro, NULL, &outif, &e);
2165		if (ro.ro_rt)
2166			RTFREE(ro.ro_rt); /* XXX: we could use this */
2167		if (src == NULL) {
2168			char ip6buf[INET6_ADDRSTRLEN];
2169			nd6log((LOG_DEBUG,
2170			    "icmp6_reflect: source can't be determined: "
2171			    "dst=%s, error=%d\n",
2172			    ip6_sprintf(ip6buf, &sin6.sin6_addr), e));
2173			goto bad;
2174		}
2175	}
2176
2177	ip6->ip6_src = *src;
2178	ip6->ip6_flow = 0;
2179	ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
2180	ip6->ip6_vfc |= IPV6_VERSION;
2181	ip6->ip6_nxt = IPPROTO_ICMPV6;
2182	if (outif)
2183		ip6->ip6_hlim = ND_IFINFO(outif)->chlim;
2184	else if (m->m_pkthdr.rcvif) {
2185		/* XXX: This may not be the outgoing interface */
2186		ip6->ip6_hlim = ND_IFINFO(m->m_pkthdr.rcvif)->chlim;
2187	} else
2188		ip6->ip6_hlim = V_ip6_defhlim;
2189
2190	icmp6->icmp6_cksum = 0;
2191	icmp6->icmp6_cksum = in6_cksum(m, IPPROTO_ICMPV6,
2192	    sizeof(struct ip6_hdr), plen);
2193
2194	/*
2195	 * XXX option handling
2196	 */
2197
2198	m->m_flags &= ~(M_BCAST|M_MCAST);
2199
2200	ip6_output(m, NULL, NULL, 0, NULL, &outif, NULL);
2201	if (outif)
2202		icmp6_ifoutstat_inc(outif, type, code);
2203
2204	return;
2205
2206 bad:
2207	m_freem(m);
2208	return;
2209}
2210
2211void
2212icmp6_fasttimo(void)
2213{
2214
2215	return;
2216}
2217
2218static const char *
2219icmp6_redirect_diag(struct in6_addr *src6, struct in6_addr *dst6,
2220    struct in6_addr *tgt6)
2221{
2222	static char buf[1024];
2223	char ip6bufs[INET6_ADDRSTRLEN];
2224	char ip6bufd[INET6_ADDRSTRLEN];
2225	char ip6buft[INET6_ADDRSTRLEN];
2226	snprintf(buf, sizeof(buf), "(src=%s dst=%s tgt=%s)",
2227	    ip6_sprintf(ip6bufs, src6), ip6_sprintf(ip6bufd, dst6),
2228	    ip6_sprintf(ip6buft, tgt6));
2229	return buf;
2230}
2231
2232void
2233icmp6_redirect_input(struct mbuf *m, int off)
2234{
2235	INIT_VNET_INET6(curvnet);
2236	struct ifnet *ifp;
2237	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
2238	struct nd_redirect *nd_rd;
2239	int icmp6len = ntohs(ip6->ip6_plen);
2240	char *lladdr = NULL;
2241	int lladdrlen = 0;
2242	u_char *redirhdr = NULL;
2243	int redirhdrlen = 0;
2244	struct rtentry *rt = NULL;
2245	int is_router;
2246	int is_onlink;
2247	struct in6_addr src6 = ip6->ip6_src;
2248	struct in6_addr redtgt6;
2249	struct in6_addr reddst6;
2250	union nd_opts ndopts;
2251	char ip6buf[INET6_ADDRSTRLEN];
2252
2253	if (!m)
2254		return;
2255
2256	ifp = m->m_pkthdr.rcvif;
2257
2258	if (!ifp)
2259		return;
2260
2261	/* XXX if we are router, we don't update route by icmp6 redirect */
2262	if (V_ip6_forwarding)
2263		goto freeit;
2264	if (!V_icmp6_rediraccept)
2265		goto freeit;
2266
2267#ifndef PULLDOWN_TEST
2268	IP6_EXTHDR_CHECK(m, off, icmp6len,);
2269	nd_rd = (struct nd_redirect *)((caddr_t)ip6 + off);
2270#else
2271	IP6_EXTHDR_GET(nd_rd, struct nd_redirect *, m, off, icmp6len);
2272	if (nd_rd == NULL) {
2273		V_icmp6stat.icp6s_tooshort++;
2274		return;
2275	}
2276#endif
2277	redtgt6 = nd_rd->nd_rd_target;
2278	reddst6 = nd_rd->nd_rd_dst;
2279
2280	if (in6_setscope(&redtgt6, m->m_pkthdr.rcvif, NULL) ||
2281	    in6_setscope(&reddst6, m->m_pkthdr.rcvif, NULL)) {
2282		goto freeit;
2283	}
2284
2285	/* validation */
2286	if (!IN6_IS_ADDR_LINKLOCAL(&src6)) {
2287		nd6log((LOG_ERR,
2288		    "ICMP6 redirect sent from %s rejected; "
2289		    "must be from linklocal\n",
2290		    ip6_sprintf(ip6buf, &src6)));
2291		goto bad;
2292	}
2293	if (ip6->ip6_hlim != 255) {
2294		nd6log((LOG_ERR,
2295		    "ICMP6 redirect sent from %s rejected; "
2296		    "hlim=%d (must be 255)\n",
2297		    ip6_sprintf(ip6buf, &src6), ip6->ip6_hlim));
2298		goto bad;
2299	}
2300    {
2301	/* ip6->ip6_src must be equal to gw for icmp6->icmp6_reddst */
2302	struct sockaddr_in6 sin6;
2303	struct in6_addr *gw6;
2304
2305	bzero(&sin6, sizeof(sin6));
2306	sin6.sin6_family = AF_INET6;
2307	sin6.sin6_len = sizeof(struct sockaddr_in6);
2308	bcopy(&reddst6, &sin6.sin6_addr, sizeof(reddst6));
2309	rt = rtalloc1((struct sockaddr *)&sin6, 0, 0UL);
2310	if (rt) {
2311		if (rt->rt_gateway == NULL ||
2312		    rt->rt_gateway->sa_family != AF_INET6) {
2313			nd6log((LOG_ERR,
2314			    "ICMP6 redirect rejected; no route "
2315			    "with inet6 gateway found for redirect dst: %s\n",
2316			    icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
2317			RTFREE_LOCKED(rt);
2318			goto bad;
2319		}
2320
2321		gw6 = &(((struct sockaddr_in6 *)rt->rt_gateway)->sin6_addr);
2322		if (bcmp(&src6, gw6, sizeof(struct in6_addr)) != 0) {
2323			nd6log((LOG_ERR,
2324			    "ICMP6 redirect rejected; "
2325			    "not equal to gw-for-src=%s (must be same): "
2326			    "%s\n",
2327			    ip6_sprintf(ip6buf, gw6),
2328			    icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
2329			RTFREE_LOCKED(rt);
2330			goto bad;
2331		}
2332	} else {
2333		nd6log((LOG_ERR,
2334		    "ICMP6 redirect rejected; "
2335		    "no route found for redirect dst: %s\n",
2336		    icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
2337		goto bad;
2338	}
2339	RTFREE_LOCKED(rt);
2340	rt = NULL;
2341    }
2342	if (IN6_IS_ADDR_MULTICAST(&reddst6)) {
2343		nd6log((LOG_ERR,
2344		    "ICMP6 redirect rejected; "
2345		    "redirect dst must be unicast: %s\n",
2346		    icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
2347		goto bad;
2348	}
2349
2350	is_router = is_onlink = 0;
2351	if (IN6_IS_ADDR_LINKLOCAL(&redtgt6))
2352		is_router = 1;	/* router case */
2353	if (bcmp(&redtgt6, &reddst6, sizeof(redtgt6)) == 0)
2354		is_onlink = 1;	/* on-link destination case */
2355	if (!is_router && !is_onlink) {
2356		nd6log((LOG_ERR,
2357		    "ICMP6 redirect rejected; "
2358		    "neither router case nor onlink case: %s\n",
2359		    icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
2360		goto bad;
2361	}
2362	/* validation passed */
2363
2364	icmp6len -= sizeof(*nd_rd);
2365	nd6_option_init(nd_rd + 1, icmp6len, &ndopts);
2366	if (nd6_options(&ndopts) < 0) {
2367		nd6log((LOG_INFO, "icmp6_redirect_input: "
2368		    "invalid ND option, rejected: %s\n",
2369		    icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
2370		/* nd6_options have incremented stats */
2371		goto freeit;
2372	}
2373
2374	if (ndopts.nd_opts_tgt_lladdr) {
2375		lladdr = (char *)(ndopts.nd_opts_tgt_lladdr + 1);
2376		lladdrlen = ndopts.nd_opts_tgt_lladdr->nd_opt_len << 3;
2377	}
2378
2379	if (ndopts.nd_opts_rh) {
2380		redirhdrlen = ndopts.nd_opts_rh->nd_opt_rh_len;
2381		redirhdr = (u_char *)(ndopts.nd_opts_rh + 1); /* xxx */
2382	}
2383
2384	if (lladdr && ((ifp->if_addrlen + 2 + 7) & ~7) != lladdrlen) {
2385		nd6log((LOG_INFO,
2386		    "icmp6_redirect_input: lladdrlen mismatch for %s "
2387		    "(if %d, icmp6 packet %d): %s\n",
2388		    ip6_sprintf(ip6buf, &redtgt6),
2389		    ifp->if_addrlen, lladdrlen - 2,
2390		    icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
2391		goto bad;
2392	}
2393
2394	/* RFC 2461 8.3 */
2395	nd6_cache_lladdr(ifp, &redtgt6, lladdr, lladdrlen, ND_REDIRECT,
2396	    is_onlink ? ND_REDIRECT_ONLINK : ND_REDIRECT_ROUTER);
2397
2398	if (!is_onlink) {	/* better router case.  perform rtredirect. */
2399		/* perform rtredirect */
2400		struct sockaddr_in6 sdst;
2401		struct sockaddr_in6 sgw;
2402		struct sockaddr_in6 ssrc;
2403
2404		bzero(&sdst, sizeof(sdst));
2405		bzero(&sgw, sizeof(sgw));
2406		bzero(&ssrc, sizeof(ssrc));
2407		sdst.sin6_family = sgw.sin6_family = ssrc.sin6_family = AF_INET6;
2408		sdst.sin6_len = sgw.sin6_len = ssrc.sin6_len =
2409			sizeof(struct sockaddr_in6);
2410		bcopy(&redtgt6, &sgw.sin6_addr, sizeof(struct in6_addr));
2411		bcopy(&reddst6, &sdst.sin6_addr, sizeof(struct in6_addr));
2412		bcopy(&src6, &ssrc.sin6_addr, sizeof(struct in6_addr));
2413		rtredirect((struct sockaddr *)&sdst, (struct sockaddr *)&sgw,
2414		    (struct sockaddr *)NULL, RTF_GATEWAY | RTF_HOST,
2415		    (struct sockaddr *)&ssrc);
2416	}
2417	/* finally update cached route in each socket via pfctlinput */
2418    {
2419	struct sockaddr_in6 sdst;
2420
2421	bzero(&sdst, sizeof(sdst));
2422	sdst.sin6_family = AF_INET6;
2423	sdst.sin6_len = sizeof(struct sockaddr_in6);
2424	bcopy(&reddst6, &sdst.sin6_addr, sizeof(struct in6_addr));
2425	pfctlinput(PRC_REDIRECT_HOST, (struct sockaddr *)&sdst);
2426#ifdef IPSEC
2427	key_sa_routechange((struct sockaddr *)&sdst);
2428#endif /* IPSEC */
2429    }
2430
2431 freeit:
2432	m_freem(m);
2433	return;
2434
2435 bad:
2436	V_icmp6stat.icp6s_badredirect++;
2437	m_freem(m);
2438}
2439
2440void
2441icmp6_redirect_output(struct mbuf *m0, struct rtentry *rt)
2442{
2443	INIT_VNET_INET6(curvnet);
2444	struct ifnet *ifp;	/* my outgoing interface */
2445	struct in6_addr *ifp_ll6;
2446	struct in6_addr *router_ll6;
2447	struct ip6_hdr *sip6;	/* m0 as struct ip6_hdr */
2448	struct mbuf *m = NULL;	/* newly allocated one */
2449	struct ip6_hdr *ip6;	/* m as struct ip6_hdr */
2450	struct nd_redirect *nd_rd;
2451	size_t maxlen;
2452	u_char *p;
2453	struct ifnet *outif = NULL;
2454	struct sockaddr_in6 src_sa;
2455
2456	icmp6_errcount(&V_icmp6stat.icp6s_outerrhist, ND_REDIRECT, 0);
2457
2458	/* if we are not router, we don't send icmp6 redirect */
2459	if (!V_ip6_forwarding)
2460		goto fail;
2461
2462	/* sanity check */
2463	if (!m0 || !rt || !(rt->rt_flags & RTF_UP) || !(ifp = rt->rt_ifp))
2464		goto fail;
2465
2466	/*
2467	 * Address check:
2468	 *  the source address must identify a neighbor, and
2469	 *  the destination address must not be a multicast address
2470	 *  [RFC 2461, sec 8.2]
2471	 */
2472	sip6 = mtod(m0, struct ip6_hdr *);
2473	bzero(&src_sa, sizeof(src_sa));
2474	src_sa.sin6_family = AF_INET6;
2475	src_sa.sin6_len = sizeof(src_sa);
2476	src_sa.sin6_addr = sip6->ip6_src;
2477	if (nd6_is_addr_neighbor(&src_sa, ifp) == 0)
2478		goto fail;
2479	if (IN6_IS_ADDR_MULTICAST(&sip6->ip6_dst))
2480		goto fail;	/* what should we do here? */
2481
2482	/* rate limit */
2483	if (icmp6_ratelimit(&sip6->ip6_src, ND_REDIRECT, 0))
2484		goto fail;
2485
2486	/*
2487	 * Since we are going to append up to 1280 bytes (= IPV6_MMTU),
2488	 * we almost always ask for an mbuf cluster for simplicity.
2489	 * (MHLEN < IPV6_MMTU is almost always true)
2490	 */
2491#if IPV6_MMTU >= MCLBYTES
2492# error assumption failed about IPV6_MMTU and MCLBYTES
2493#endif
2494	MGETHDR(m, M_DONTWAIT, MT_HEADER);
2495	if (m && IPV6_MMTU >= MHLEN)
2496		MCLGET(m, M_DONTWAIT);
2497	if (!m)
2498		goto fail;
2499	m->m_pkthdr.rcvif = NULL;
2500	m->m_len = 0;
2501	maxlen = M_TRAILINGSPACE(m);
2502	maxlen = min(IPV6_MMTU, maxlen);
2503	/* just for safety */
2504	if (maxlen < sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr) +
2505	    ((sizeof(struct nd_opt_hdr) + ifp->if_addrlen + 7) & ~7)) {
2506		goto fail;
2507	}
2508
2509	{
2510		/* get ip6 linklocal address for ifp(my outgoing interface). */
2511		struct in6_ifaddr *ia;
2512		if ((ia = in6ifa_ifpforlinklocal(ifp,
2513						 IN6_IFF_NOTREADY|
2514						 IN6_IFF_ANYCAST)) == NULL)
2515			goto fail;
2516		ifp_ll6 = &ia->ia_addr.sin6_addr;
2517	}
2518
2519	/* get ip6 linklocal address for the router. */
2520	if (rt->rt_gateway && (rt->rt_flags & RTF_GATEWAY)) {
2521		struct sockaddr_in6 *sin6;
2522		sin6 = (struct sockaddr_in6 *)rt->rt_gateway;
2523		router_ll6 = &sin6->sin6_addr;
2524		if (!IN6_IS_ADDR_LINKLOCAL(router_ll6))
2525			router_ll6 = (struct in6_addr *)NULL;
2526	} else
2527		router_ll6 = (struct in6_addr *)NULL;
2528
2529	/* ip6 */
2530	ip6 = mtod(m, struct ip6_hdr *);
2531	ip6->ip6_flow = 0;
2532	ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
2533	ip6->ip6_vfc |= IPV6_VERSION;
2534	/* ip6->ip6_plen will be set later */
2535	ip6->ip6_nxt = IPPROTO_ICMPV6;
2536	ip6->ip6_hlim = 255;
2537	/* ip6->ip6_src must be linklocal addr for my outgoing if. */
2538	bcopy(ifp_ll6, &ip6->ip6_src, sizeof(struct in6_addr));
2539	bcopy(&sip6->ip6_src, &ip6->ip6_dst, sizeof(struct in6_addr));
2540
2541	/* ND Redirect */
2542	nd_rd = (struct nd_redirect *)(ip6 + 1);
2543	nd_rd->nd_rd_type = ND_REDIRECT;
2544	nd_rd->nd_rd_code = 0;
2545	nd_rd->nd_rd_reserved = 0;
2546	if (rt->rt_flags & RTF_GATEWAY) {
2547		/*
2548		 * nd_rd->nd_rd_target must be a link-local address in
2549		 * better router cases.
2550		 */
2551		if (!router_ll6)
2552			goto fail;
2553		bcopy(router_ll6, &nd_rd->nd_rd_target,
2554		    sizeof(nd_rd->nd_rd_target));
2555		bcopy(&sip6->ip6_dst, &nd_rd->nd_rd_dst,
2556		    sizeof(nd_rd->nd_rd_dst));
2557	} else {
2558		/* make sure redtgt == reddst */
2559		bcopy(&sip6->ip6_dst, &nd_rd->nd_rd_target,
2560		    sizeof(nd_rd->nd_rd_target));
2561		bcopy(&sip6->ip6_dst, &nd_rd->nd_rd_dst,
2562		    sizeof(nd_rd->nd_rd_dst));
2563	}
2564
2565	p = (u_char *)(nd_rd + 1);
2566
2567	if (!router_ll6)
2568		goto nolladdropt;
2569
2570	{
2571		/* target lladdr option */
2572		struct rtentry *rt_router = NULL;
2573		int len;
2574		struct sockaddr_dl *sdl;
2575		struct nd_opt_hdr *nd_opt;
2576		char *lladdr;
2577
2578		rt_router = nd6_lookup(router_ll6, 0, ifp);
2579		if (!rt_router)
2580			goto nolladdropt;
2581		len = sizeof(*nd_opt) + ifp->if_addrlen;
2582		len = (len + 7) & ~7;	/* round by 8 */
2583		/* safety check */
2584		if (len + (p - (u_char *)ip6) > maxlen)
2585			goto nolladdropt;
2586		if (!(rt_router->rt_flags & RTF_GATEWAY) &&
2587		    (rt_router->rt_flags & RTF_LLINFO) &&
2588		    (rt_router->rt_gateway->sa_family == AF_LINK) &&
2589		    (sdl = (struct sockaddr_dl *)rt_router->rt_gateway) &&
2590		    sdl->sdl_alen) {
2591			nd_opt = (struct nd_opt_hdr *)p;
2592			nd_opt->nd_opt_type = ND_OPT_TARGET_LINKADDR;
2593			nd_opt->nd_opt_len = len >> 3;
2594			lladdr = (char *)(nd_opt + 1);
2595			bcopy(LLADDR(sdl), lladdr, ifp->if_addrlen);
2596			p += len;
2597		}
2598	}
2599nolladdropt:;
2600
2601	m->m_pkthdr.len = m->m_len = p - (u_char *)ip6;
2602
2603	/* just to be safe */
2604#ifdef M_DECRYPTED	/*not openbsd*/
2605	if (m0->m_flags & M_DECRYPTED)
2606		goto noredhdropt;
2607#endif
2608	if (p - (u_char *)ip6 > maxlen)
2609		goto noredhdropt;
2610
2611	{
2612		/* redirected header option */
2613		int len;
2614		struct nd_opt_rd_hdr *nd_opt_rh;
2615
2616		/*
2617		 * compute the maximum size for icmp6 redirect header option.
2618		 * XXX room for auth header?
2619		 */
2620		len = maxlen - (p - (u_char *)ip6);
2621		len &= ~7;
2622
2623		/* This is just for simplicity. */
2624		if (m0->m_pkthdr.len != m0->m_len) {
2625			if (m0->m_next) {
2626				m_freem(m0->m_next);
2627				m0->m_next = NULL;
2628			}
2629			m0->m_pkthdr.len = m0->m_len;
2630		}
2631
2632		/*
2633		 * Redirected header option spec (RFC2461 4.6.3) talks nothing
2634		 * about padding/truncate rule for the original IP packet.
2635		 * From the discussion on IPv6imp in Feb 1999,
2636		 * the consensus was:
2637		 * - "attach as much as possible" is the goal
2638		 * - pad if not aligned (original size can be guessed by
2639		 *   original ip6 header)
2640		 * Following code adds the padding if it is simple enough,
2641		 * and truncates if not.
2642		 */
2643		if (m0->m_next || m0->m_pkthdr.len != m0->m_len)
2644			panic("assumption failed in %s:%d", __FILE__,
2645			    __LINE__);
2646
2647		if (len - sizeof(*nd_opt_rh) < m0->m_pkthdr.len) {
2648			/* not enough room, truncate */
2649			m0->m_pkthdr.len = m0->m_len = len -
2650			    sizeof(*nd_opt_rh);
2651		} else {
2652			/* enough room, pad or truncate */
2653			size_t extra;
2654
2655			extra = m0->m_pkthdr.len % 8;
2656			if (extra) {
2657				/* pad if easy enough, truncate if not */
2658				if (8 - extra <= M_TRAILINGSPACE(m0)) {
2659					/* pad */
2660					m0->m_len += (8 - extra);
2661					m0->m_pkthdr.len += (8 - extra);
2662				} else {
2663					/* truncate */
2664					m0->m_pkthdr.len -= extra;
2665					m0->m_len -= extra;
2666				}
2667			}
2668			len = m0->m_pkthdr.len + sizeof(*nd_opt_rh);
2669			m0->m_pkthdr.len = m0->m_len = len -
2670			    sizeof(*nd_opt_rh);
2671		}
2672
2673		nd_opt_rh = (struct nd_opt_rd_hdr *)p;
2674		bzero(nd_opt_rh, sizeof(*nd_opt_rh));
2675		nd_opt_rh->nd_opt_rh_type = ND_OPT_REDIRECTED_HEADER;
2676		nd_opt_rh->nd_opt_rh_len = len >> 3;
2677		p += sizeof(*nd_opt_rh);
2678		m->m_pkthdr.len = m->m_len = p - (u_char *)ip6;
2679
2680		/* connect m0 to m */
2681		m_tag_delete_chain(m0, NULL);
2682		m0->m_flags &= ~M_PKTHDR;
2683		m->m_next = m0;
2684		m->m_pkthdr.len = m->m_len + m0->m_len;
2685		m0 = NULL;
2686	}
2687noredhdropt:;
2688	if (m0) {
2689		m_freem(m0);
2690		m0 = NULL;
2691	}
2692
2693	/* XXX: clear embedded link IDs in the inner header */
2694	in6_clearscope(&sip6->ip6_src);
2695	in6_clearscope(&sip6->ip6_dst);
2696	in6_clearscope(&nd_rd->nd_rd_target);
2697	in6_clearscope(&nd_rd->nd_rd_dst);
2698
2699	ip6->ip6_plen = htons(m->m_pkthdr.len - sizeof(struct ip6_hdr));
2700
2701	nd_rd->nd_rd_cksum = 0;
2702	nd_rd->nd_rd_cksum = in6_cksum(m, IPPROTO_ICMPV6,
2703	    sizeof(*ip6), ntohs(ip6->ip6_plen));
2704
2705	/* send the packet to outside... */
2706	ip6_output(m, NULL, NULL, 0, NULL, &outif, NULL);
2707	if (outif) {
2708		icmp6_ifstat_inc(outif, ifs6_out_msg);
2709		icmp6_ifstat_inc(outif, ifs6_out_redirect);
2710	}
2711	V_icmp6stat.icp6s_outhist[ND_REDIRECT]++;
2712
2713	return;
2714
2715fail:
2716	if (m)
2717		m_freem(m);
2718	if (m0)
2719		m_freem(m0);
2720}
2721
2722/*
2723 * ICMPv6 socket option processing.
2724 */
2725int
2726icmp6_ctloutput(struct socket *so, struct sockopt *sopt)
2727{
2728	int error = 0;
2729	int optlen;
2730	struct inpcb *inp = sotoinpcb(so);
2731	int level, op, optname;
2732
2733	if (sopt) {
2734		level = sopt->sopt_level;
2735		op = sopt->sopt_dir;
2736		optname = sopt->sopt_name;
2737		optlen = sopt->sopt_valsize;
2738	} else
2739		level = op = optname = optlen = 0;
2740
2741	if (level != IPPROTO_ICMPV6) {
2742		return EINVAL;
2743	}
2744
2745	switch (op) {
2746	case PRCO_SETOPT:
2747		switch (optname) {
2748		case ICMP6_FILTER:
2749		    {
2750			struct icmp6_filter ic6f;
2751
2752			if (optlen != sizeof(ic6f)) {
2753				error = EMSGSIZE;
2754				break;
2755			}
2756			error = sooptcopyin(sopt, &ic6f, optlen, optlen);
2757			if (error == 0) {
2758				INP_WLOCK(inp);
2759				*inp->in6p_icmp6filt = ic6f;
2760				INP_WUNLOCK(inp);
2761			}
2762			break;
2763		    }
2764
2765		default:
2766			error = ENOPROTOOPT;
2767			break;
2768		}
2769		break;
2770
2771	case PRCO_GETOPT:
2772		switch (optname) {
2773		case ICMP6_FILTER:
2774		    {
2775			struct icmp6_filter ic6f;
2776
2777			INP_RLOCK(inp);
2778			ic6f = *inp->in6p_icmp6filt;
2779			INP_RUNLOCK(inp);
2780			error = sooptcopyout(sopt, &ic6f, sizeof(ic6f));
2781			break;
2782		    }
2783
2784		default:
2785			error = ENOPROTOOPT;
2786			break;
2787		}
2788		break;
2789	}
2790
2791	return (error);
2792}
2793
2794/*
2795 * Perform rate limit check.
2796 * Returns 0 if it is okay to send the icmp6 packet.
2797 * Returns 1 if the router SHOULD NOT send this icmp6 packet due to rate
2798 * limitation.
2799 *
2800 * XXX per-destination/type check necessary?
2801 *
2802 * dst - not used at this moment
2803 * type - not used at this moment
2804 * code - not used at this moment
2805 */
2806static int
2807icmp6_ratelimit(const struct in6_addr *dst, const int type,
2808    const int code)
2809{
2810	INIT_VNET_INET6(curvnet);
2811	int ret;
2812
2813	ret = 0;	/* okay to send */
2814
2815	/* PPS limit */
2816	if (!ppsratecheck(&V_icmp6errppslim_last, &V_icmp6errpps_count,
2817	    V_icmp6errppslim)) {
2818		/* The packet is subject to rate limit */
2819		ret++;
2820	}
2821
2822	return ret;
2823}
2824