nd6_nbr.c revision 293633
1/*-
2 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. Neither the name of the project nor the names of its contributors
14 *    may be used to endorse or promote products derived from this software
15 *    without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 *	$KAME: nd6_nbr.c,v 1.86 2002/01/21 02:33:04 jinmei Exp $
30 */
31
32#include <sys/cdefs.h>
33__FBSDID("$FreeBSD: head/sys/netinet6/nd6_nbr.c 293633 2016-01-10 13:40:29Z melifaro $");
34
35#include "opt_inet.h"
36#include "opt_inet6.h"
37#include "opt_ipsec.h"
38#include "opt_mpath.h"
39
40#include <sys/param.h>
41#include <sys/systm.h>
42#include <sys/malloc.h>
43#include <sys/libkern.h>
44#include <sys/lock.h>
45#include <sys/rwlock.h>
46#include <sys/mbuf.h>
47#include <sys/socket.h>
48#include <sys/sockio.h>
49#include <sys/time.h>
50#include <sys/kernel.h>
51#include <sys/errno.h>
52#include <sys/sysctl.h>
53#include <sys/syslog.h>
54#include <sys/queue.h>
55#include <sys/callout.h>
56#include <sys/refcount.h>
57
58#include <net/if.h>
59#include <net/if_types.h>
60#include <net/if_dl.h>
61#include <net/if_var.h>
62#include <net/route.h>
63#ifdef RADIX_MPATH
64#include <net/radix_mpath.h>
65#endif
66#include <net/vnet.h>
67
68#include <netinet/in.h>
69#include <netinet/in_var.h>
70#include <net/if_llatbl.h>
71#include <netinet6/in6_var.h>
72#include <netinet6/in6_ifattach.h>
73#include <netinet/ip6.h>
74#include <netinet6/ip6_var.h>
75#include <netinet6/scope6_var.h>
76#include <netinet6/nd6.h>
77#include <netinet/icmp6.h>
78#include <netinet/ip_carp.h>
79#include <netinet6/send.h>
80
81#define SDL(s) ((struct sockaddr_dl *)s)
82
83struct dadq;
84static struct dadq *nd6_dad_find(struct ifaddr *, struct nd_opt_nonce *);
85static void nd6_dad_add(struct dadq *dp);
86static void nd6_dad_del(struct dadq *dp);
87static void nd6_dad_rele(struct dadq *);
88static void nd6_dad_starttimer(struct dadq *, int, int);
89static void nd6_dad_stoptimer(struct dadq *);
90static void nd6_dad_timer(struct dadq *);
91static void nd6_dad_duplicated(struct ifaddr *, struct dadq *);
92static void nd6_dad_ns_output(struct dadq *);
93static void nd6_dad_ns_input(struct ifaddr *, struct nd_opt_nonce *);
94static void nd6_dad_na_input(struct ifaddr *);
95static void nd6_na_output_fib(struct ifnet *, const struct in6_addr *,
96    const struct in6_addr *, u_long, int, struct sockaddr *, u_int);
97static void nd6_ns_output_fib(struct ifnet *, const struct in6_addr *,
98    const struct in6_addr *, const struct in6_addr *, uint8_t *, u_int);
99
100static VNET_DEFINE(int, dad_enhanced) = 1;
101#define	V_dad_enhanced			VNET(dad_enhanced)
102
103SYSCTL_DECL(_net_inet6_ip6);
104SYSCTL_INT(_net_inet6_ip6, OID_AUTO, dad_enhanced, CTLFLAG_VNET | CTLFLAG_RW,
105    &VNET_NAME(dad_enhanced), 0,
106    "Enable Enhanced DAD, which adds a random nonce to NS messages for DAD.");
107
108static VNET_DEFINE(int, dad_maxtry) = 15;	/* max # of *tries* to
109						   transmit DAD packet */
110#define	V_dad_maxtry			VNET(dad_maxtry)
111
112/*
113 * Input a Neighbor Solicitation Message.
114 *
115 * Based on RFC 2461
116 * Based on RFC 2462 (duplicate address detection)
117 */
118void
119nd6_ns_input(struct mbuf *m, int off, int icmp6len)
120{
121	struct ifnet *ifp = m->m_pkthdr.rcvif;
122	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
123	struct nd_neighbor_solicit *nd_ns;
124	struct in6_addr saddr6 = ip6->ip6_src;
125	struct in6_addr daddr6 = ip6->ip6_dst;
126	struct in6_addr taddr6;
127	struct in6_addr myaddr6;
128	char *lladdr = NULL;
129	struct ifaddr *ifa = NULL;
130	int lladdrlen = 0;
131	int anycast = 0, proxy = 0, tentative = 0;
132	int tlladdr;
133	int rflag;
134	union nd_opts ndopts;
135	struct sockaddr_dl proxydl;
136	char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
137
138	rflag = (V_ip6_forwarding) ? ND_NA_FLAG_ROUTER : 0;
139	if (ND_IFINFO(ifp)->flags & ND6_IFF_ACCEPT_RTADV && V_ip6_norbit_raif)
140		rflag = 0;
141#ifndef PULLDOWN_TEST
142	IP6_EXTHDR_CHECK(m, off, icmp6len,);
143	nd_ns = (struct nd_neighbor_solicit *)((caddr_t)ip6 + off);
144#else
145	IP6_EXTHDR_GET(nd_ns, struct nd_neighbor_solicit *, m, off, icmp6len);
146	if (nd_ns == NULL) {
147		ICMP6STAT_INC(icp6s_tooshort);
148		return;
149	}
150#endif
151	ip6 = mtod(m, struct ip6_hdr *); /* adjust pointer for safety */
152	taddr6 = nd_ns->nd_ns_target;
153	if (in6_setscope(&taddr6, ifp, NULL) != 0)
154		goto bad;
155
156	if (ip6->ip6_hlim != 255) {
157		nd6log((LOG_ERR,
158		    "nd6_ns_input: invalid hlim (%d) from %s to %s on %s\n",
159		    ip6->ip6_hlim, ip6_sprintf(ip6bufs, &ip6->ip6_src),
160		    ip6_sprintf(ip6bufd, &ip6->ip6_dst), if_name(ifp)));
161		goto bad;
162	}
163
164	if (IN6_IS_ADDR_UNSPECIFIED(&saddr6)) {
165		/* dst has to be a solicited node multicast address. */
166		if (daddr6.s6_addr16[0] == IPV6_ADDR_INT16_MLL &&
167		    /* don't check ifindex portion */
168		    daddr6.s6_addr32[1] == 0 &&
169		    daddr6.s6_addr32[2] == IPV6_ADDR_INT32_ONE &&
170		    daddr6.s6_addr8[12] == 0xff) {
171			; /* good */
172		} else {
173			nd6log((LOG_INFO, "nd6_ns_input: bad DAD packet "
174			    "(wrong ip6 dst)\n"));
175			goto bad;
176		}
177	} else if (!V_nd6_onlink_ns_rfc4861) {
178		struct sockaddr_in6 src_sa6;
179
180		/*
181		 * According to recent IETF discussions, it is not a good idea
182		 * to accept a NS from an address which would not be deemed
183		 * to be a neighbor otherwise.  This point is expected to be
184		 * clarified in future revisions of the specification.
185		 */
186		bzero(&src_sa6, sizeof(src_sa6));
187		src_sa6.sin6_family = AF_INET6;
188		src_sa6.sin6_len = sizeof(src_sa6);
189		src_sa6.sin6_addr = saddr6;
190		if (nd6_is_addr_neighbor(&src_sa6, ifp) == 0) {
191			nd6log((LOG_INFO, "nd6_ns_input: "
192				"NS packet from non-neighbor\n"));
193			goto bad;
194		}
195	}
196
197	if (IN6_IS_ADDR_MULTICAST(&taddr6)) {
198		nd6log((LOG_INFO, "nd6_ns_input: bad NS target (multicast)\n"));
199		goto bad;
200	}
201
202	icmp6len -= sizeof(*nd_ns);
203	nd6_option_init(nd_ns + 1, icmp6len, &ndopts);
204	if (nd6_options(&ndopts) < 0) {
205		nd6log((LOG_INFO,
206		    "nd6_ns_input: invalid ND option, ignored\n"));
207		/* nd6_options have incremented stats */
208		goto freeit;
209	}
210
211	if (ndopts.nd_opts_src_lladdr) {
212		lladdr = (char *)(ndopts.nd_opts_src_lladdr + 1);
213		lladdrlen = ndopts.nd_opts_src_lladdr->nd_opt_len << 3;
214	}
215
216	if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src) && lladdr) {
217		nd6log((LOG_INFO, "nd6_ns_input: bad DAD packet "
218		    "(link-layer address option)\n"));
219		goto bad;
220	}
221
222	/*
223	 * Attaching target link-layer address to the NA?
224	 * (RFC 2461 7.2.4)
225	 *
226	 * NS IP dst is unicast/anycast			MUST NOT add
227	 * NS IP dst is solicited-node multicast	MUST add
228	 *
229	 * In implementation, we add target link-layer address by default.
230	 * We do not add one in MUST NOT cases.
231	 */
232	if (!IN6_IS_ADDR_MULTICAST(&daddr6))
233		tlladdr = 0;
234	else
235		tlladdr = 1;
236
237	/*
238	 * Target address (taddr6) must be either:
239	 * (1) Valid unicast/anycast address for my receiving interface,
240	 * (2) Unicast address for which I'm offering proxy service, or
241	 * (3) "tentative" address on which DAD is being performed.
242	 */
243	/* (1) and (3) check. */
244	if (ifp->if_carp)
245		ifa = (*carp_iamatch6_p)(ifp, &taddr6);
246	else
247		ifa = (struct ifaddr *)in6ifa_ifpwithaddr(ifp, &taddr6);
248
249	/* (2) check. */
250	if (ifa == NULL) {
251		struct sockaddr_dl rt_gateway;
252		struct rt_addrinfo info;
253		struct sockaddr_in6 dst6;
254
255		bzero(&dst6, sizeof(dst6));
256		dst6.sin6_len = sizeof(struct sockaddr_in6);
257		dst6.sin6_family = AF_INET6;
258		dst6.sin6_addr = taddr6;
259
260		bzero(&rt_gateway, sizeof(rt_gateway));
261		rt_gateway.sdl_len = sizeof(rt_gateway);
262		bzero(&info, sizeof(info));
263		info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&rt_gateway;
264
265		/* Always use the default FIB. */
266		if (rib_lookup_info(RT_DEFAULT_FIB, (struct sockaddr *)&dst6,
267		    0, 0, &info) == 0) {
268			if ((info.rti_flags & RTF_ANNOUNCE) != 0 &&
269			    rt_gateway.sdl_family == AF_LINK) {
270
271				/*
272				 * proxy NDP for single entry
273				 */
274				proxydl = *SDL(&rt_gateway);
275				ifa = (struct ifaddr *)in6ifa_ifpforlinklocal(
276				    ifp, IN6_IFF_NOTREADY|IN6_IFF_ANYCAST);
277				if (ifa)
278					proxy = 1;
279			}
280		}
281	}
282	if (ifa == NULL) {
283		/*
284		 * We've got an NS packet, and we don't have that adddress
285		 * assigned for us.  We MUST silently ignore it.
286		 * See RFC2461 7.2.3.
287		 */
288		goto freeit;
289	}
290	myaddr6 = *IFA_IN6(ifa);
291	anycast = ((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_ANYCAST;
292	tentative = ((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_TENTATIVE;
293	if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_DUPLICATED)
294		goto freeit;
295
296	if (lladdr && ((ifp->if_addrlen + 2 + 7) & ~7) != lladdrlen) {
297		nd6log((LOG_INFO, "nd6_ns_input: lladdrlen mismatch for %s "
298		    "(if %d, NS packet %d)\n",
299		    ip6_sprintf(ip6bufs, &taddr6),
300		    ifp->if_addrlen, lladdrlen - 2));
301		goto bad;
302	}
303
304	if (IN6_ARE_ADDR_EQUAL(&myaddr6, &saddr6)) {
305		nd6log((LOG_INFO, "nd6_ns_input: duplicate IP6 address %s\n",
306		    ip6_sprintf(ip6bufs, &saddr6)));
307		goto freeit;
308	}
309
310	/*
311	 * We have neighbor solicitation packet, with target address equals to
312	 * one of my tentative address.
313	 *
314	 * src addr	how to process?
315	 * ---		---
316	 * multicast	of course, invalid (rejected in ip6_input)
317	 * unicast	somebody is doing address resolution -> ignore
318	 * unspec	dup address detection
319	 *
320	 * The processing is defined in RFC 2462.
321	 */
322	if (tentative) {
323		/*
324		 * If source address is unspecified address, it is for
325		 * duplicate address detection.
326		 *
327		 * If not, the packet is for addess resolution;
328		 * silently ignore it.
329		 */
330		if (IN6_IS_ADDR_UNSPECIFIED(&saddr6))
331			nd6_dad_ns_input(ifa, ndopts.nd_opts_nonce);
332
333		goto freeit;
334	}
335
336	/*
337	 * If the source address is unspecified address, entries must not
338	 * be created or updated.
339	 * It looks that sender is performing DAD.  Output NA toward
340	 * all-node multicast address, to tell the sender that I'm using
341	 * the address.
342	 * S bit ("solicited") must be zero.
343	 */
344	if (IN6_IS_ADDR_UNSPECIFIED(&saddr6)) {
345		struct in6_addr in6_all;
346
347		in6_all = in6addr_linklocal_allnodes;
348		if (in6_setscope(&in6_all, ifp, NULL) != 0)
349			goto bad;
350		nd6_na_output_fib(ifp, &in6_all, &taddr6,
351		    ((anycast || proxy || !tlladdr) ? 0 : ND_NA_FLAG_OVERRIDE) |
352		    rflag, tlladdr, proxy ? (struct sockaddr *)&proxydl : NULL,
353		    M_GETFIB(m));
354		goto freeit;
355	}
356
357	nd6_cache_lladdr(ifp, &saddr6, lladdr, lladdrlen,
358	    ND_NEIGHBOR_SOLICIT, 0);
359
360	nd6_na_output_fib(ifp, &saddr6, &taddr6,
361	    ((anycast || proxy || !tlladdr) ? 0 : ND_NA_FLAG_OVERRIDE) |
362	    rflag | ND_NA_FLAG_SOLICITED, tlladdr,
363	    proxy ? (struct sockaddr *)&proxydl : NULL, M_GETFIB(m));
364 freeit:
365	if (ifa != NULL)
366		ifa_free(ifa);
367	m_freem(m);
368	return;
369
370 bad:
371	nd6log((LOG_ERR, "nd6_ns_input: src=%s\n",
372		ip6_sprintf(ip6bufs, &saddr6)));
373	nd6log((LOG_ERR, "nd6_ns_input: dst=%s\n",
374		ip6_sprintf(ip6bufs, &daddr6)));
375	nd6log((LOG_ERR, "nd6_ns_input: tgt=%s\n",
376		ip6_sprintf(ip6bufs, &taddr6)));
377	ICMP6STAT_INC(icp6s_badns);
378	if (ifa != NULL)
379		ifa_free(ifa);
380	m_freem(m);
381}
382
383/*
384 * Output a Neighbor Solicitation Message. Caller specifies:
385 *	- ICMP6 header source IP6 address
386 *	- ND6 header target IP6 address
387 *	- ND6 header source datalink address
388 *
389 * Based on RFC 2461
390 * Based on RFC 2462 (duplicate address detection)
391 *
392 *    ln - for source address determination
393 * nonce - If non-NULL, NS is used for duplicate address detection and
394 *         the value (length is ND_OPT_NONCE_LEN) is used as a random nonce.
395 */
396static void
397nd6_ns_output_fib(struct ifnet *ifp, const struct in6_addr *saddr6,
398    const struct in6_addr *daddr6, const struct in6_addr *taddr6,
399    uint8_t *nonce, u_int fibnum)
400{
401	struct mbuf *m;
402	struct m_tag *mtag;
403	struct ip6_hdr *ip6;
404	struct nd_neighbor_solicit *nd_ns;
405	struct ip6_moptions im6o;
406	int icmp6len;
407	int maxlen;
408	caddr_t mac;
409
410	if (IN6_IS_ADDR_MULTICAST(taddr6))
411		return;
412
413	/* estimate the size of message */
414	maxlen = sizeof(*ip6) + sizeof(*nd_ns);
415	maxlen += (sizeof(struct nd_opt_hdr) + ifp->if_addrlen + 7) & ~7;
416	KASSERT(max_linkhdr + maxlen <= MCLBYTES, (
417	    "%s: max_linkhdr + maxlen > MCLBYTES (%d + %d > %d)",
418	    __func__, max_linkhdr, maxlen, MCLBYTES));
419
420	if (max_linkhdr + maxlen > MHLEN)
421		m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
422	else
423		m = m_gethdr(M_NOWAIT, MT_DATA);
424	if (m == NULL)
425		return;
426	M_SETFIB(m, fibnum);
427
428	if (daddr6 == NULL || IN6_IS_ADDR_MULTICAST(daddr6)) {
429		m->m_flags |= M_MCAST;
430		im6o.im6o_multicast_ifp = ifp;
431		im6o.im6o_multicast_hlim = 255;
432		im6o.im6o_multicast_loop = 0;
433	}
434
435	icmp6len = sizeof(*nd_ns);
436	m->m_pkthdr.len = m->m_len = sizeof(*ip6) + icmp6len;
437	m->m_data += max_linkhdr;	/* or M_ALIGN() equivalent? */
438
439	/* fill neighbor solicitation packet */
440	ip6 = mtod(m, struct ip6_hdr *);
441	ip6->ip6_flow = 0;
442	ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
443	ip6->ip6_vfc |= IPV6_VERSION;
444	/* ip6->ip6_plen will be set later */
445	ip6->ip6_nxt = IPPROTO_ICMPV6;
446	ip6->ip6_hlim = 255;
447	if (daddr6)
448		ip6->ip6_dst = *daddr6;
449	else {
450		ip6->ip6_dst.s6_addr16[0] = IPV6_ADDR_INT16_MLL;
451		ip6->ip6_dst.s6_addr16[1] = 0;
452		ip6->ip6_dst.s6_addr32[1] = 0;
453		ip6->ip6_dst.s6_addr32[2] = IPV6_ADDR_INT32_ONE;
454		ip6->ip6_dst.s6_addr32[3] = taddr6->s6_addr32[3];
455		ip6->ip6_dst.s6_addr8[12] = 0xff;
456		if (in6_setscope(&ip6->ip6_dst, ifp, NULL) != 0)
457			goto bad;
458	}
459	if (nonce == NULL) {
460		struct ifaddr *ifa = NULL;
461
462		/*
463		 * RFC2461 7.2.2:
464		 * "If the source address of the packet prompting the
465		 * solicitation is the same as one of the addresses assigned
466		 * to the outgoing interface, that address SHOULD be placed
467		 * in the IP Source Address of the outgoing solicitation.
468		 * Otherwise, any one of the addresses assigned to the
469		 * interface should be used."
470		 *
471		 * We use the source address for the prompting packet
472		 * (saddr6), if saddr6 belongs to the outgoing interface.
473		 * Otherwise, we perform the source address selection as usual.
474		 */
475
476		if (saddr6 != NULL)
477			ifa = (struct ifaddr *)in6ifa_ifpwithaddr(ifp, saddr6);
478		if (ifa != NULL) {
479			/* ip6_src set already. */
480			ip6->ip6_src = *saddr6;
481			ifa_free(ifa);
482		} else {
483			int error;
484			struct in6_addr dst6, src6;
485			uint32_t scopeid;
486
487			in6_splitscope(&ip6->ip6_dst, &dst6, &scopeid);
488			error = in6_selectsrc_addr(RT_DEFAULT_FIB, &dst6,
489			    scopeid, ifp, &src6, NULL);
490			if (error) {
491				char ip6buf[INET6_ADDRSTRLEN];
492				nd6log((LOG_DEBUG, "%s: source can't be "
493				    "determined: dst=%s, error=%d\n", __func__,
494				    ip6_sprintf(ip6buf, &dst6),
495				    error));
496				goto bad;
497			}
498			ip6->ip6_src = src6;
499		}
500	} else {
501		/*
502		 * Source address for DAD packet must always be IPv6
503		 * unspecified address. (0::0)
504		 * We actually don't have to 0-clear the address (we did it
505		 * above), but we do so here explicitly to make the intention
506		 * clearer.
507		 */
508		bzero(&ip6->ip6_src, sizeof(ip6->ip6_src));
509	}
510	nd_ns = (struct nd_neighbor_solicit *)(ip6 + 1);
511	nd_ns->nd_ns_type = ND_NEIGHBOR_SOLICIT;
512	nd_ns->nd_ns_code = 0;
513	nd_ns->nd_ns_reserved = 0;
514	nd_ns->nd_ns_target = *taddr6;
515	in6_clearscope(&nd_ns->nd_ns_target); /* XXX */
516
517	/*
518	 * Add source link-layer address option.
519	 *
520	 *				spec		implementation
521	 *				---		---
522	 * DAD packet			MUST NOT	do not add the option
523	 * there's no link layer address:
524	 *				impossible	do not add the option
525	 * there's link layer address:
526	 *	Multicast NS		MUST add one	add the option
527	 *	Unicast NS		SHOULD add one	add the option
528	 */
529	if (nonce == NULL && (mac = nd6_ifptomac(ifp))) {
530		int optlen = sizeof(struct nd_opt_hdr) + ifp->if_addrlen;
531		struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)(nd_ns + 1);
532		/* 8 byte alignments... */
533		optlen = (optlen + 7) & ~7;
534
535		m->m_pkthdr.len += optlen;
536		m->m_len += optlen;
537		icmp6len += optlen;
538		bzero((caddr_t)nd_opt, optlen);
539		nd_opt->nd_opt_type = ND_OPT_SOURCE_LINKADDR;
540		nd_opt->nd_opt_len = optlen >> 3;
541		bcopy(mac, (caddr_t)(nd_opt + 1), ifp->if_addrlen);
542	}
543	/*
544	 * Add a Nonce option (RFC 3971) to detect looped back NS messages.
545	 * This behavior is documented as Enhanced Duplicate Address
546	 * Detection in RFC 7527.
547	 * net.inet6.ip6.dad_enhanced=0 disables this.
548	 */
549	if (V_dad_enhanced != 0 && nonce != NULL) {
550		int optlen = sizeof(struct nd_opt_hdr) + ND_OPT_NONCE_LEN;
551		struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)(nd_ns + 1);
552		/* 8-byte alignment is required. */
553		optlen = (optlen + 7) & ~7;
554
555		m->m_pkthdr.len += optlen;
556		m->m_len += optlen;
557		icmp6len += optlen;
558		bzero((caddr_t)nd_opt, optlen);
559		nd_opt->nd_opt_type = ND_OPT_NONCE;
560		nd_opt->nd_opt_len = optlen >> 3;
561		bcopy(nonce, (caddr_t)(nd_opt + 1), ND_OPT_NONCE_LEN);
562	}
563	ip6->ip6_plen = htons((u_short)icmp6len);
564	nd_ns->nd_ns_cksum = 0;
565	nd_ns->nd_ns_cksum =
566	    in6_cksum(m, IPPROTO_ICMPV6, sizeof(*ip6), icmp6len);
567
568	if (send_sendso_input_hook != NULL) {
569		mtag = m_tag_get(PACKET_TAG_ND_OUTGOING,
570			sizeof(unsigned short), M_NOWAIT);
571		if (mtag == NULL)
572			goto bad;
573		*(unsigned short *)(mtag + 1) = nd_ns->nd_ns_type;
574		m_tag_prepend(m, mtag);
575	}
576
577	ip6_output(m, NULL, NULL, (nonce != NULL) ? IPV6_UNSPECSRC : 0,
578	    &im6o, NULL, NULL);
579	icmp6_ifstat_inc(ifp, ifs6_out_msg);
580	icmp6_ifstat_inc(ifp, ifs6_out_neighborsolicit);
581	ICMP6STAT_INC(icp6s_outhist[ND_NEIGHBOR_SOLICIT]);
582
583	return;
584
585  bad:
586	m_freem(m);
587	return;
588}
589
590#ifndef BURN_BRIDGES
591void
592nd6_ns_output(struct ifnet *ifp, const struct in6_addr *saddr6,
593    const struct in6_addr *daddr6, const struct in6_addr *taddr6,uint8_t *nonce)
594{
595
596	nd6_ns_output_fib(ifp, saddr6, daddr6, taddr6, nonce, RT_DEFAULT_FIB);
597}
598#endif
599/*
600 * Neighbor advertisement input handling.
601 *
602 * Based on RFC 2461
603 * Based on RFC 2462 (duplicate address detection)
604 *
605 * the following items are not implemented yet:
606 * - proxy advertisement delay rule (RFC2461 7.2.8, last paragraph, SHOULD)
607 * - anycast advertisement delay rule (RFC2461 7.2.7, SHOULD)
608 */
609void
610nd6_na_input(struct mbuf *m, int off, int icmp6len)
611{
612	struct ifnet *ifp = m->m_pkthdr.rcvif;
613	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
614	struct nd_neighbor_advert *nd_na;
615	struct in6_addr daddr6 = ip6->ip6_dst;
616	struct in6_addr taddr6;
617	int flags;
618	int is_router;
619	int is_solicited;
620	int is_override;
621	char *lladdr = NULL;
622	int lladdrlen = 0;
623	int checklink = 0;
624	struct ifaddr *ifa;
625	struct llentry *ln = NULL;
626	union nd_opts ndopts;
627	struct mbuf *chain = NULL;
628	struct sockaddr_in6 sin6;
629	u_char linkhdr[LLE_MAX_LINKHDR];
630	size_t linkhdrsize;
631	int lladdr_off;
632	char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
633
634	if (ip6->ip6_hlim != 255) {
635		nd6log((LOG_ERR,
636		    "nd6_na_input: invalid hlim (%d) from %s to %s on %s\n",
637		    ip6->ip6_hlim, ip6_sprintf(ip6bufs, &ip6->ip6_src),
638		    ip6_sprintf(ip6bufd, &ip6->ip6_dst), if_name(ifp)));
639		goto bad;
640	}
641
642#ifndef PULLDOWN_TEST
643	IP6_EXTHDR_CHECK(m, off, icmp6len,);
644	nd_na = (struct nd_neighbor_advert *)((caddr_t)ip6 + off);
645#else
646	IP6_EXTHDR_GET(nd_na, struct nd_neighbor_advert *, m, off, icmp6len);
647	if (nd_na == NULL) {
648		ICMP6STAT_INC(icp6s_tooshort);
649		return;
650	}
651#endif
652
653	flags = nd_na->nd_na_flags_reserved;
654	is_router = ((flags & ND_NA_FLAG_ROUTER) != 0);
655	is_solicited = ((flags & ND_NA_FLAG_SOLICITED) != 0);
656	is_override = ((flags & ND_NA_FLAG_OVERRIDE) != 0);
657	memset(&sin6, 0, sizeof(sin6));
658
659	taddr6 = nd_na->nd_na_target;
660	if (in6_setscope(&taddr6, ifp, NULL))
661		goto bad;	/* XXX: impossible */
662
663	if (IN6_IS_ADDR_MULTICAST(&taddr6)) {
664		nd6log((LOG_ERR,
665		    "nd6_na_input: invalid target address %s\n",
666		    ip6_sprintf(ip6bufs, &taddr6)));
667		goto bad;
668	}
669	if (IN6_IS_ADDR_MULTICAST(&daddr6))
670		if (is_solicited) {
671			nd6log((LOG_ERR,
672			    "nd6_na_input: a solicited adv is multicasted\n"));
673			goto bad;
674		}
675
676	icmp6len -= sizeof(*nd_na);
677	nd6_option_init(nd_na + 1, icmp6len, &ndopts);
678	if (nd6_options(&ndopts) < 0) {
679		nd6log((LOG_INFO,
680		    "nd6_na_input: invalid ND option, ignored\n"));
681		/* nd6_options have incremented stats */
682		goto freeit;
683	}
684
685	if (ndopts.nd_opts_tgt_lladdr) {
686		lladdr = (char *)(ndopts.nd_opts_tgt_lladdr + 1);
687		lladdrlen = ndopts.nd_opts_tgt_lladdr->nd_opt_len << 3;
688	}
689
690	/*
691	 * This effectively disables the DAD check on a non-master CARP
692	 * address.
693	 */
694	if (ifp->if_carp)
695		ifa = (*carp_iamatch6_p)(ifp, &taddr6);
696	else
697		ifa = (struct ifaddr *)in6ifa_ifpwithaddr(ifp, &taddr6);
698
699	/*
700	 * Target address matches one of my interface address.
701	 *
702	 * If my address is tentative, this means that there's somebody
703	 * already using the same address as mine.  This indicates DAD failure.
704	 * This is defined in RFC 2462.
705	 *
706	 * Otherwise, process as defined in RFC 2461.
707	 */
708	if (ifa
709	 && (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_TENTATIVE)) {
710		nd6_dad_na_input(ifa);
711		ifa_free(ifa);
712		goto freeit;
713	}
714
715	/* Just for safety, maybe unnecessary. */
716	if (ifa) {
717		ifa_free(ifa);
718		log(LOG_ERR,
719		    "nd6_na_input: duplicate IP6 address %s\n",
720		    ip6_sprintf(ip6bufs, &taddr6));
721		goto freeit;
722	}
723
724	if (lladdr && ((ifp->if_addrlen + 2 + 7) & ~7) != lladdrlen) {
725		nd6log((LOG_INFO, "nd6_na_input: lladdrlen mismatch for %s "
726		    "(if %d, NA packet %d)\n", ip6_sprintf(ip6bufs, &taddr6),
727		    ifp->if_addrlen, lladdrlen - 2));
728		goto bad;
729	}
730
731	/*
732	 * If no neighbor cache entry is found, NA SHOULD silently be
733	 * discarded.
734	 */
735	IF_AFDATA_RLOCK(ifp);
736	ln = nd6_lookup(&taddr6, LLE_EXCLUSIVE, ifp);
737	IF_AFDATA_RUNLOCK(ifp);
738	if (ln == NULL) {
739		goto freeit;
740	}
741
742	if (ln->ln_state == ND6_LLINFO_INCOMPLETE) {
743		/*
744		 * If the link-layer has address, and no lladdr option came,
745		 * discard the packet.
746		 */
747		if (ifp->if_addrlen && lladdr == NULL) {
748			goto freeit;
749		}
750
751		/*
752		 * Record link-layer address, and update the state.
753		 */
754		linkhdrsize = sizeof(linkhdr);
755		if (lltable_calc_llheader(ifp, AF_INET6, lladdr,
756		    linkhdr, &linkhdrsize, &lladdr_off) != 0)
757			return;
758
759		if (lltable_try_set_entry_addr(ifp, ln, linkhdr, linkhdrsize,
760		    lladdr_off) == 0) {
761			ln = NULL;
762			goto freeit;
763		}
764		EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_RESOLVED);
765		if (is_solicited)
766			nd6_llinfo_setstate(ln, ND6_LLINFO_REACHABLE);
767		else
768			nd6_llinfo_setstate(ln, ND6_LLINFO_STALE);
769		if ((ln->ln_router = is_router) != 0) {
770			/*
771			 * This means a router's state has changed from
772			 * non-reachable to probably reachable, and might
773			 * affect the status of associated prefixes..
774			 */
775			checklink = 1;
776		}
777	} else {
778		int llchange;
779
780		/*
781		 * Check if the link-layer address has changed or not.
782		 */
783		if (lladdr == NULL)
784			llchange = 0;
785		else {
786			if (ln->la_flags & LLE_VALID) {
787				if (bcmp(lladdr, ln->ll_addr, ifp->if_addrlen))
788					llchange = 1;
789				else
790					llchange = 0;
791			} else
792				llchange = 1;
793		}
794
795		/*
796		 * This is VERY complex.  Look at it with care.
797		 *
798		 * override solicit lladdr llchange	action
799		 *					(L: record lladdr)
800		 *
801		 *	0	0	n	--	(2c)
802		 *	0	0	y	n	(2b) L
803		 *	0	0	y	y	(1)    REACHABLE->STALE
804		 *	0	1	n	--	(2c)   *->REACHABLE
805		 *	0	1	y	n	(2b) L *->REACHABLE
806		 *	0	1	y	y	(1)    REACHABLE->STALE
807		 *	1	0	n	--	(2a)
808		 *	1	0	y	n	(2a) L
809		 *	1	0	y	y	(2a) L *->STALE
810		 *	1	1	n	--	(2a)   *->REACHABLE
811		 *	1	1	y	n	(2a) L *->REACHABLE
812		 *	1	1	y	y	(2a) L *->REACHABLE
813		 */
814		if (!is_override && (lladdr != NULL && llchange)) {  /* (1) */
815			/*
816			 * If state is REACHABLE, make it STALE.
817			 * no other updates should be done.
818			 */
819			if (ln->ln_state == ND6_LLINFO_REACHABLE)
820				nd6_llinfo_setstate(ln, ND6_LLINFO_STALE);
821			goto freeit;
822		} else if (is_override				   /* (2a) */
823			|| (!is_override && (lladdr != NULL && !llchange)) /* (2b) */
824			|| lladdr == NULL) {			   /* (2c) */
825			/*
826			 * Update link-local address, if any.
827			 */
828			if (lladdr != NULL) {
829				linkhdrsize = sizeof(linkhdr);
830				if (lltable_calc_llheader(ifp, AF_INET6, lladdr,
831				    linkhdr, &linkhdrsize, &lladdr_off) != 0)
832					goto freeit;
833				if (lltable_try_set_entry_addr(ifp, ln, linkhdr,
834				    linkhdrsize, lladdr_off) == 0) {
835					ln = NULL;
836					goto freeit;
837				}
838				EVENTHANDLER_INVOKE(lle_event, ln,
839				    LLENTRY_RESOLVED);
840			}
841
842			/*
843			 * If solicited, make the state REACHABLE.
844			 * If not solicited and the link-layer address was
845			 * changed, make it STALE.
846			 */
847			if (is_solicited)
848				nd6_llinfo_setstate(ln, ND6_LLINFO_REACHABLE);
849			else {
850				if (lladdr != NULL && llchange)
851					nd6_llinfo_setstate(ln, ND6_LLINFO_STALE);
852			}
853		}
854
855		if (ln->ln_router && !is_router) {
856			/*
857			 * The peer dropped the router flag.
858			 * Remove the sender from the Default Router List and
859			 * update the Destination Cache entries.
860			 */
861			struct nd_defrouter *dr;
862			struct in6_addr *in6;
863			struct ifnet *nd6_ifp;
864
865			in6 = &ln->r_l3addr.addr6;
866
867			/*
868			 * Lock to protect the default router list.
869			 * XXX: this might be unnecessary, since this function
870			 * is only called under the network software interrupt
871			 * context.  However, we keep it just for safety.
872			 */
873			nd6_ifp = lltable_get_ifp(ln->lle_tbl);
874			dr = defrouter_lookup(in6, nd6_ifp);
875			if (dr)
876				defrtrlist_del(dr);
877			else if (ND_IFINFO(nd6_ifp)->flags &
878			    ND6_IFF_ACCEPT_RTADV) {
879				/*
880				 * Even if the neighbor is not in the default
881				 * router list, the neighbor may be used
882				 * as a next hop for some destinations
883				 * (e.g. redirect case). So we must
884				 * call rt6_flush explicitly.
885				 */
886				rt6_flush(&ip6->ip6_src, ifp);
887			}
888		}
889		ln->ln_router = is_router;
890	}
891        /* XXX - QL
892	 *  Does this matter?
893	 *  rt->rt_flags &= ~RTF_REJECT;
894	 */
895	ln->la_asked = 0;
896	if (ln->la_hold != NULL)
897		nd6_grab_holdchain(ln, &chain, &sin6);
898 freeit:
899	if (ln != NULL)
900		LLE_WUNLOCK(ln);
901
902	if (chain != NULL)
903		nd6_flush_holdchain(ifp, ifp, chain, &sin6);
904
905	if (checklink)
906		pfxlist_onlink_check();
907
908	m_freem(m);
909	return;
910
911 bad:
912	if (ln != NULL)
913		LLE_WUNLOCK(ln);
914
915	ICMP6STAT_INC(icp6s_badna);
916	m_freem(m);
917}
918
919/*
920 * Neighbor advertisement output handling.
921 *
922 * Based on RFC 2461
923 *
924 * the following items are not implemented yet:
925 * - proxy advertisement delay rule (RFC2461 7.2.8, last paragraph, SHOULD)
926 * - anycast advertisement delay rule (RFC2461 7.2.7, SHOULD)
927 *
928 * tlladdr - 1 if include target link-layer address
929 * sdl0 - sockaddr_dl (= proxy NA) or NULL
930 */
931static void
932nd6_na_output_fib(struct ifnet *ifp, const struct in6_addr *daddr6_0,
933    const struct in6_addr *taddr6, u_long flags, int tlladdr,
934    struct sockaddr *sdl0, u_int fibnum)
935{
936	struct mbuf *m;
937	struct m_tag *mtag;
938	struct ip6_hdr *ip6;
939	struct nd_neighbor_advert *nd_na;
940	struct ip6_moptions im6o;
941	struct in6_addr daddr6, dst6, src6;
942	uint32_t scopeid;
943
944	int icmp6len, maxlen, error;
945	caddr_t mac = NULL;
946
947	daddr6 = *daddr6_0;	/* make a local copy for modification */
948
949	/* estimate the size of message */
950	maxlen = sizeof(*ip6) + sizeof(*nd_na);
951	maxlen += (sizeof(struct nd_opt_hdr) + ifp->if_addrlen + 7) & ~7;
952	KASSERT(max_linkhdr + maxlen <= MCLBYTES, (
953	    "%s: max_linkhdr + maxlen > MCLBYTES (%d + %d > %d)",
954	    __func__, max_linkhdr, maxlen, MCLBYTES));
955
956	if (max_linkhdr + maxlen > MHLEN)
957		m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
958	else
959		m = m_gethdr(M_NOWAIT, MT_DATA);
960	if (m == NULL)
961		return;
962	M_SETFIB(m, fibnum);
963
964	if (IN6_IS_ADDR_MULTICAST(&daddr6)) {
965		m->m_flags |= M_MCAST;
966		im6o.im6o_multicast_ifp = ifp;
967		im6o.im6o_multicast_hlim = 255;
968		im6o.im6o_multicast_loop = 0;
969	}
970
971	icmp6len = sizeof(*nd_na);
972	m->m_pkthdr.len = m->m_len = sizeof(struct ip6_hdr) + icmp6len;
973	m->m_data += max_linkhdr;	/* or M_ALIGN() equivalent? */
974
975	/* fill neighbor advertisement packet */
976	ip6 = mtod(m, struct ip6_hdr *);
977	ip6->ip6_flow = 0;
978	ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
979	ip6->ip6_vfc |= IPV6_VERSION;
980	ip6->ip6_nxt = IPPROTO_ICMPV6;
981	ip6->ip6_hlim = 255;
982	if (IN6_IS_ADDR_UNSPECIFIED(&daddr6)) {
983		/* reply to DAD */
984		daddr6.s6_addr16[0] = IPV6_ADDR_INT16_MLL;
985		daddr6.s6_addr16[1] = 0;
986		daddr6.s6_addr32[1] = 0;
987		daddr6.s6_addr32[2] = 0;
988		daddr6.s6_addr32[3] = IPV6_ADDR_INT32_ONE;
989		if (in6_setscope(&daddr6, ifp, NULL))
990			goto bad;
991
992		flags &= ~ND_NA_FLAG_SOLICITED;
993	}
994	ip6->ip6_dst = daddr6;
995
996	/*
997	 * Select a source whose scope is the same as that of the dest.
998	 */
999	in6_splitscope(&daddr6, &dst6, &scopeid);
1000	error = in6_selectsrc_addr(RT_DEFAULT_FIB, &dst6,
1001	    scopeid, ifp, &src6, NULL);
1002	if (error) {
1003		char ip6buf[INET6_ADDRSTRLEN];
1004		nd6log((LOG_DEBUG, "nd6_na_output: source can't be "
1005		    "determined: dst=%s, error=%d\n",
1006		    ip6_sprintf(ip6buf, &daddr6), error));
1007		goto bad;
1008	}
1009	ip6->ip6_src = src6;
1010	nd_na = (struct nd_neighbor_advert *)(ip6 + 1);
1011	nd_na->nd_na_type = ND_NEIGHBOR_ADVERT;
1012	nd_na->nd_na_code = 0;
1013	nd_na->nd_na_target = *taddr6;
1014	in6_clearscope(&nd_na->nd_na_target); /* XXX */
1015
1016	/*
1017	 * "tlladdr" indicates NS's condition for adding tlladdr or not.
1018	 * see nd6_ns_input() for details.
1019	 * Basically, if NS packet is sent to unicast/anycast addr,
1020	 * target lladdr option SHOULD NOT be included.
1021	 */
1022	if (tlladdr) {
1023		/*
1024		 * sdl0 != NULL indicates proxy NA.  If we do proxy, use
1025		 * lladdr in sdl0.  If we are not proxying (sending NA for
1026		 * my address) use lladdr configured for the interface.
1027		 */
1028		if (sdl0 == NULL) {
1029			if (ifp->if_carp)
1030				mac = (*carp_macmatch6_p)(ifp, m, taddr6);
1031			if (mac == NULL)
1032				mac = nd6_ifptomac(ifp);
1033		} else if (sdl0->sa_family == AF_LINK) {
1034			struct sockaddr_dl *sdl;
1035			sdl = (struct sockaddr_dl *)sdl0;
1036			if (sdl->sdl_alen == ifp->if_addrlen)
1037				mac = LLADDR(sdl);
1038		}
1039	}
1040	if (tlladdr && mac) {
1041		int optlen = sizeof(struct nd_opt_hdr) + ifp->if_addrlen;
1042		struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)(nd_na + 1);
1043
1044		/* roundup to 8 bytes alignment! */
1045		optlen = (optlen + 7) & ~7;
1046
1047		m->m_pkthdr.len += optlen;
1048		m->m_len += optlen;
1049		icmp6len += optlen;
1050		bzero((caddr_t)nd_opt, optlen);
1051		nd_opt->nd_opt_type = ND_OPT_TARGET_LINKADDR;
1052		nd_opt->nd_opt_len = optlen >> 3;
1053		bcopy(mac, (caddr_t)(nd_opt + 1), ifp->if_addrlen);
1054	} else
1055		flags &= ~ND_NA_FLAG_OVERRIDE;
1056
1057	ip6->ip6_plen = htons((u_short)icmp6len);
1058	nd_na->nd_na_flags_reserved = flags;
1059	nd_na->nd_na_cksum = 0;
1060	nd_na->nd_na_cksum =
1061	    in6_cksum(m, IPPROTO_ICMPV6, sizeof(struct ip6_hdr), icmp6len);
1062
1063	if (send_sendso_input_hook != NULL) {
1064		mtag = m_tag_get(PACKET_TAG_ND_OUTGOING,
1065		    sizeof(unsigned short), M_NOWAIT);
1066		if (mtag == NULL)
1067			goto bad;
1068		*(unsigned short *)(mtag + 1) = nd_na->nd_na_type;
1069		m_tag_prepend(m, mtag);
1070	}
1071
1072	ip6_output(m, NULL, NULL, 0, &im6o, NULL, NULL);
1073	icmp6_ifstat_inc(ifp, ifs6_out_msg);
1074	icmp6_ifstat_inc(ifp, ifs6_out_neighboradvert);
1075	ICMP6STAT_INC(icp6s_outhist[ND_NEIGHBOR_ADVERT]);
1076
1077	return;
1078
1079  bad:
1080	m_freem(m);
1081	return;
1082}
1083
1084#ifndef BURN_BRIDGES
1085void
1086nd6_na_output(struct ifnet *ifp, const struct in6_addr *daddr6_0,
1087    const struct in6_addr *taddr6, u_long flags, int tlladdr,
1088    struct sockaddr *sdl0)
1089{
1090
1091	nd6_na_output_fib(ifp, daddr6_0, taddr6, flags, tlladdr, sdl0,
1092	    RT_DEFAULT_FIB);
1093}
1094#endif
1095
1096caddr_t
1097nd6_ifptomac(struct ifnet *ifp)
1098{
1099	switch (ifp->if_type) {
1100	case IFT_ARCNET:
1101	case IFT_ETHER:
1102	case IFT_FDDI:
1103	case IFT_IEEE1394:
1104	case IFT_L2VLAN:
1105	case IFT_IEEE80211:
1106	case IFT_INFINIBAND:
1107	case IFT_BRIDGE:
1108	case IFT_ISO88025:
1109		return IF_LLADDR(ifp);
1110	default:
1111		return NULL;
1112	}
1113}
1114
1115struct dadq {
1116	TAILQ_ENTRY(dadq) dad_list;
1117	struct ifaddr *dad_ifa;
1118	int dad_count;		/* max NS to send */
1119	int dad_ns_tcount;	/* # of trials to send NS */
1120	int dad_ns_ocount;	/* NS sent so far */
1121	int dad_ns_icount;
1122	int dad_na_icount;
1123	int dad_ns_lcount;	/* looped back NS */
1124	int dad_loopbackprobe;	/* probing state for loopback detection */
1125	struct callout dad_timer_ch;
1126	struct vnet *dad_vnet;
1127	u_int dad_refcnt;
1128#define	ND_OPT_NONCE_LEN32 \
1129		((ND_OPT_NONCE_LEN + sizeof(uint32_t) - 1)/sizeof(uint32_t))
1130	uint32_t dad_nonce[ND_OPT_NONCE_LEN32];
1131};
1132
1133static VNET_DEFINE(TAILQ_HEAD(, dadq), dadq);
1134static VNET_DEFINE(struct rwlock, dad_rwlock);
1135#define	V_dadq			VNET(dadq)
1136#define	V_dad_rwlock		VNET(dad_rwlock)
1137
1138#define	DADQ_RLOCK()		rw_rlock(&V_dad_rwlock)
1139#define	DADQ_RUNLOCK()		rw_runlock(&V_dad_rwlock)
1140#define	DADQ_WLOCK()		rw_wlock(&V_dad_rwlock)
1141#define	DADQ_WUNLOCK()		rw_wunlock(&V_dad_rwlock)
1142
1143static void
1144nd6_dad_add(struct dadq *dp)
1145{
1146
1147	DADQ_WLOCK();
1148	TAILQ_INSERT_TAIL(&V_dadq, dp, dad_list);
1149	DADQ_WUNLOCK();
1150}
1151
1152static void
1153nd6_dad_del(struct dadq *dp)
1154{
1155
1156	DADQ_WLOCK();
1157	TAILQ_REMOVE(&V_dadq, dp, dad_list);
1158	DADQ_WUNLOCK();
1159	nd6_dad_rele(dp);
1160}
1161
1162static struct dadq *
1163nd6_dad_find(struct ifaddr *ifa, struct nd_opt_nonce *n)
1164{
1165	struct dadq *dp;
1166
1167	DADQ_RLOCK();
1168	TAILQ_FOREACH(dp, &V_dadq, dad_list) {
1169		if (dp->dad_ifa != ifa)
1170			continue;
1171		/*
1172		 * Skip if the nonce matches the received one.
1173		 * +2 in the length is required because of type and
1174		 * length fields are included in a header.
1175		 */
1176		if (n != NULL &&
1177		    n->nd_opt_nonce_len == (ND_OPT_NONCE_LEN + 2) / 8 &&
1178		    memcmp(&n->nd_opt_nonce[0], &dp->dad_nonce[0],
1179		        ND_OPT_NONCE_LEN) == 0) {
1180			dp->dad_ns_lcount++;
1181			continue;
1182		}
1183		refcount_acquire(&dp->dad_refcnt);
1184		break;
1185	}
1186	DADQ_RUNLOCK();
1187
1188	return (dp);
1189}
1190
1191static void
1192nd6_dad_starttimer(struct dadq *dp, int ticks, int send_ns)
1193{
1194
1195	if (send_ns != 0)
1196		nd6_dad_ns_output(dp);
1197	callout_reset(&dp->dad_timer_ch, ticks,
1198	    (void (*)(void *))nd6_dad_timer, (void *)dp);
1199}
1200
1201static void
1202nd6_dad_stoptimer(struct dadq *dp)
1203{
1204
1205	callout_drain(&dp->dad_timer_ch);
1206}
1207
1208static void
1209nd6_dad_rele(struct dadq *dp)
1210{
1211
1212	if (refcount_release(&dp->dad_refcnt)) {
1213		ifa_free(dp->dad_ifa);
1214		free(dp, M_IP6NDP);
1215	}
1216}
1217
1218void
1219nd6_dad_init(void)
1220{
1221
1222	rw_init(&V_dad_rwlock, "nd6 DAD queue");
1223	TAILQ_INIT(&V_dadq);
1224}
1225
1226/*
1227 * Start Duplicate Address Detection (DAD) for specified interface address.
1228 */
1229void
1230nd6_dad_start(struct ifaddr *ifa, int delay)
1231{
1232	struct in6_ifaddr *ia = (struct in6_ifaddr *)ifa;
1233	struct dadq *dp;
1234	char ip6buf[INET6_ADDRSTRLEN];
1235	int send_ns;
1236
1237	/*
1238	 * If we don't need DAD, don't do it.
1239	 * There are several cases:
1240	 * - DAD is disabled (ip6_dad_count == 0)
1241	 * - the interface address is anycast
1242	 */
1243	if (!(ia->ia6_flags & IN6_IFF_TENTATIVE)) {
1244		log(LOG_DEBUG,
1245			"nd6_dad_start: called with non-tentative address "
1246			"%s(%s)\n",
1247			ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr),
1248			ifa->ifa_ifp ? if_name(ifa->ifa_ifp) : "???");
1249		return;
1250	}
1251	if (ia->ia6_flags & IN6_IFF_ANYCAST) {
1252		ia->ia6_flags &= ~IN6_IFF_TENTATIVE;
1253		return;
1254	}
1255	if (!V_ip6_dad_count) {
1256		ia->ia6_flags &= ~IN6_IFF_TENTATIVE;
1257		return;
1258	}
1259	if (ifa->ifa_ifp == NULL)
1260		panic("nd6_dad_start: ifa->ifa_ifp == NULL");
1261	if (ND_IFINFO(ifa->ifa_ifp)->flags & ND6_IFF_NO_DAD) {
1262		ia->ia6_flags &= ~IN6_IFF_TENTATIVE;
1263		return;
1264	}
1265	if (!(ifa->ifa_ifp->if_flags & IFF_UP) ||
1266	    !(ifa->ifa_ifp->if_drv_flags & IFF_DRV_RUNNING) ||
1267	    (ND_IFINFO(ifa->ifa_ifp)->flags & ND6_IFF_IFDISABLED)) {
1268		ia->ia6_flags |= IN6_IFF_TENTATIVE;
1269		return;
1270	}
1271	if ((dp = nd6_dad_find(ifa, NULL)) != NULL) {
1272		/*
1273		 * DAD already in progress.  Let the existing entry
1274		 * to finish it.
1275		 */
1276		return;
1277	}
1278
1279	dp = malloc(sizeof(*dp), M_IP6NDP, M_NOWAIT | M_ZERO);
1280	if (dp == NULL) {
1281		log(LOG_ERR, "nd6_dad_start: memory allocation failed for "
1282			"%s(%s)\n",
1283			ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr),
1284			ifa->ifa_ifp ? if_name(ifa->ifa_ifp) : "???");
1285		return;
1286	}
1287	callout_init(&dp->dad_timer_ch, 0);
1288#ifdef VIMAGE
1289	dp->dad_vnet = curvnet;
1290#endif
1291	nd6log((LOG_DEBUG, "%s: starting DAD for %s\n", if_name(ifa->ifa_ifp),
1292	    ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr)));
1293
1294	/*
1295	 * Send NS packet for DAD, ip6_dad_count times.
1296	 * Note that we must delay the first transmission, if this is the
1297	 * first packet to be sent from the interface after interface
1298	 * (re)initialization.
1299	 */
1300	dp->dad_ifa = ifa;
1301	ifa_ref(dp->dad_ifa);
1302	dp->dad_count = V_ip6_dad_count;
1303	dp->dad_ns_icount = dp->dad_na_icount = 0;
1304	dp->dad_ns_ocount = dp->dad_ns_tcount = 0;
1305	dp->dad_ns_lcount = dp->dad_loopbackprobe = 0;
1306	refcount_init(&dp->dad_refcnt, 1);
1307	nd6_dad_add(dp);
1308	send_ns = 0;
1309	if (delay == 0) {
1310		send_ns = 1;
1311		delay = (long)ND_IFINFO(ifa->ifa_ifp)->retrans * hz / 1000;
1312	}
1313	nd6_dad_starttimer(dp, delay, send_ns);
1314}
1315
1316/*
1317 * terminate DAD unconditionally.  used for address removals.
1318 */
1319void
1320nd6_dad_stop(struct ifaddr *ifa)
1321{
1322	struct dadq *dp;
1323
1324	dp = nd6_dad_find(ifa, NULL);
1325	if (!dp) {
1326		/* DAD wasn't started yet */
1327		return;
1328	}
1329
1330	nd6_dad_stoptimer(dp);
1331
1332	/*
1333	 * The DAD queue entry may have been removed by nd6_dad_timer() while
1334	 * we were waiting for it to stop, so re-do the lookup.
1335	 */
1336	nd6_dad_rele(dp);
1337	if (nd6_dad_find(ifa, NULL) == NULL)
1338		return;
1339
1340	nd6_dad_del(dp);
1341	nd6_dad_rele(dp);
1342}
1343
1344static void
1345nd6_dad_timer(struct dadq *dp)
1346{
1347	CURVNET_SET(dp->dad_vnet);
1348	struct ifaddr *ifa = dp->dad_ifa;
1349	struct ifnet *ifp = dp->dad_ifa->ifa_ifp;
1350	struct in6_ifaddr *ia = (struct in6_ifaddr *)ifa;
1351	char ip6buf[INET6_ADDRSTRLEN];
1352
1353	/* Sanity check */
1354	if (ia == NULL) {
1355		log(LOG_ERR, "nd6_dad_timer: called with null parameter\n");
1356		goto err;
1357	}
1358	if (ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) {
1359		/* Do not need DAD for ifdisabled interface. */
1360		log(LOG_ERR, "nd6_dad_timer: cancel DAD on %s because of "
1361		    "ND6_IFF_IFDISABLED.\n", ifp->if_xname);
1362		goto err;
1363	}
1364	if (ia->ia6_flags & IN6_IFF_DUPLICATED) {
1365		log(LOG_ERR, "nd6_dad_timer: called with duplicated address "
1366			"%s(%s)\n",
1367			ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr),
1368			ifa->ifa_ifp ? if_name(ifa->ifa_ifp) : "???");
1369		goto err;
1370	}
1371	if ((ia->ia6_flags & IN6_IFF_TENTATIVE) == 0) {
1372		log(LOG_ERR, "nd6_dad_timer: called with non-tentative address "
1373			"%s(%s)\n",
1374			ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr),
1375			ifa->ifa_ifp ? if_name(ifa->ifa_ifp) : "???");
1376		goto err;
1377	}
1378
1379	/* Stop DAD if the interface is down even after dad_maxtry attempts. */
1380	if ((dp->dad_ns_tcount > V_dad_maxtry) &&
1381	    (((ifp->if_flags & IFF_UP) == 0) ||
1382	     ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0))) {
1383		nd6log((LOG_INFO, "%s: could not run DAD "
1384		    "because the interface was down or not running.\n",
1385		    if_name(ifa->ifa_ifp)));
1386		goto err;
1387	}
1388
1389	/* Need more checks? */
1390	if (dp->dad_ns_ocount < dp->dad_count) {
1391		/*
1392		 * We have more NS to go.  Send NS packet for DAD.
1393		 */
1394		nd6_dad_starttimer(dp,
1395		    (long)ND_IFINFO(ifa->ifa_ifp)->retrans * hz / 1000, 1);
1396		goto done;
1397	} else {
1398		/*
1399		 * We have transmitted sufficient number of DAD packets.
1400		 * See what we've got.
1401		 */
1402		if (dp->dad_ns_icount > 0 || dp->dad_na_icount > 0)
1403			/* We've seen NS or NA, means DAD has failed. */
1404			nd6_dad_duplicated(ifa, dp);
1405		else if (V_dad_enhanced != 0 &&
1406		    dp->dad_ns_lcount > 0 &&
1407		    dp->dad_ns_lcount > dp->dad_loopbackprobe) {
1408			/*
1409			 * Sec. 4.1 in RFC 7527 requires transmission of
1410			 * additional probes until the loopback condition
1411			 * becomes clear when a looped back probe is detected.
1412			 */
1413			log(LOG_ERR, "%s: a looped back NS message is "
1414			    "detected during DAD for %s.  "
1415			    "Another DAD probes are being sent.\n",
1416			    if_name(ifa->ifa_ifp),
1417			    ip6_sprintf(ip6buf, IFA_IN6(ifa)));
1418			dp->dad_loopbackprobe = dp->dad_ns_lcount;
1419			/*
1420			 * Send an NS immediately and increase dad_count by
1421			 * V_nd6_mmaxtries - 1.
1422			 */
1423			dp->dad_count =
1424			    dp->dad_ns_ocount + V_nd6_mmaxtries - 1;
1425			nd6_dad_starttimer(dp,
1426			    (long)ND_IFINFO(ifa->ifa_ifp)->retrans * hz / 1000,
1427			    1);
1428			goto done;
1429		} else {
1430			/*
1431			 * We are done with DAD.  No NA came, no NS came.
1432			 * No duplicate address found.  Check IFDISABLED flag
1433			 * again in case that it is changed between the
1434			 * beginning of this function and here.
1435			 */
1436			if ((ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) == 0)
1437				ia->ia6_flags &= ~IN6_IFF_TENTATIVE;
1438
1439			nd6log((LOG_DEBUG,
1440			    "%s: DAD complete for %s - no duplicates found\n",
1441			    if_name(ifa->ifa_ifp),
1442			    ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr)));
1443			if (dp->dad_ns_lcount > 0)
1444				log(LOG_ERR, "%s: DAD completed while "
1445				    "a looped back NS message is detected "
1446				    "during DAD for %s.\n",
1447				    if_name(ifa->ifa_ifp),
1448				    ip6_sprintf(ip6buf, IFA_IN6(ifa)));
1449		}
1450	}
1451err:
1452	nd6_dad_del(dp);
1453done:
1454	CURVNET_RESTORE();
1455}
1456
1457static void
1458nd6_dad_duplicated(struct ifaddr *ifa, struct dadq *dp)
1459{
1460	struct in6_ifaddr *ia = (struct in6_ifaddr *)ifa;
1461	struct ifnet *ifp;
1462	char ip6buf[INET6_ADDRSTRLEN];
1463
1464	log(LOG_ERR, "%s: DAD detected duplicate IPv6 address %s: "
1465	    "NS in/out/loopback=%d/%d/%d, NA in=%d\n",
1466	    if_name(ifa->ifa_ifp), ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr),
1467	    dp->dad_ns_icount, dp->dad_ns_ocount, dp->dad_ns_lcount,
1468	    dp->dad_na_icount);
1469
1470	ia->ia6_flags &= ~IN6_IFF_TENTATIVE;
1471	ia->ia6_flags |= IN6_IFF_DUPLICATED;
1472
1473	ifp = ifa->ifa_ifp;
1474	log(LOG_ERR, "%s: DAD complete for %s - duplicate found\n",
1475	    if_name(ifp), ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr));
1476	log(LOG_ERR, "%s: manual intervention required\n",
1477	    if_name(ifp));
1478
1479	/*
1480	 * If the address is a link-local address formed from an interface
1481	 * identifier based on the hardware address which is supposed to be
1482	 * uniquely assigned (e.g., EUI-64 for an Ethernet interface), IP
1483	 * operation on the interface SHOULD be disabled.
1484	 * [RFC 4862, Section 5.4.5]
1485	 */
1486	if (IN6_IS_ADDR_LINKLOCAL(&ia->ia_addr.sin6_addr)) {
1487		struct in6_addr in6;
1488
1489		/*
1490		 * To avoid over-reaction, we only apply this logic when we are
1491		 * very sure that hardware addresses are supposed to be unique.
1492		 */
1493		switch (ifp->if_type) {
1494		case IFT_ETHER:
1495		case IFT_FDDI:
1496		case IFT_ATM:
1497		case IFT_IEEE1394:
1498		case IFT_IEEE80211:
1499		case IFT_INFINIBAND:
1500			in6 = ia->ia_addr.sin6_addr;
1501			if (in6_get_hw_ifid(ifp, &in6) == 0 &&
1502			    IN6_ARE_ADDR_EQUAL(&ia->ia_addr.sin6_addr, &in6)) {
1503				ND_IFINFO(ifp)->flags |= ND6_IFF_IFDISABLED;
1504				log(LOG_ERR, "%s: possible hardware address "
1505				    "duplication detected, disable IPv6\n",
1506				    if_name(ifp));
1507			}
1508			break;
1509		}
1510	}
1511}
1512
1513static void
1514nd6_dad_ns_output(struct dadq *dp)
1515{
1516	struct in6_ifaddr *ia = (struct in6_ifaddr *)dp->dad_ifa;
1517	struct ifnet *ifp = dp->dad_ifa->ifa_ifp;
1518	int i;
1519
1520	dp->dad_ns_tcount++;
1521	if ((ifp->if_flags & IFF_UP) == 0) {
1522		return;
1523	}
1524	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1525		return;
1526	}
1527
1528	dp->dad_ns_ocount++;
1529	if (V_dad_enhanced != 0) {
1530		for (i = 0; i < ND_OPT_NONCE_LEN32; i++)
1531			dp->dad_nonce[i] = arc4random();
1532		/*
1533		 * XXXHRS: Note that in the case that
1534		 * DupAddrDetectTransmits > 1, multiple NS messages with
1535		 * different nonces can be looped back in an unexpected
1536		 * order.  The current implementation recognizes only
1537		 * the latest nonce on the sender side.  Practically it
1538		 * should work well in almost all cases.
1539		 */
1540	}
1541	nd6_ns_output(ifp, NULL, NULL, &ia->ia_addr.sin6_addr,
1542	    (uint8_t *)&dp->dad_nonce[0]);
1543}
1544
1545static void
1546nd6_dad_ns_input(struct ifaddr *ifa, struct nd_opt_nonce *ndopt_nonce)
1547{
1548	struct in6_ifaddr *ia;
1549	struct ifnet *ifp;
1550	const struct in6_addr *taddr6;
1551	struct dadq *dp;
1552
1553	if (ifa == NULL)
1554		panic("ifa == NULL in nd6_dad_ns_input");
1555
1556	ia = (struct in6_ifaddr *)ifa;
1557	ifp = ifa->ifa_ifp;
1558	taddr6 = &ia->ia_addr.sin6_addr;
1559	/* Ignore Nonce option when Enhanced DAD is disabled. */
1560	if (V_dad_enhanced == 0)
1561		ndopt_nonce = NULL;
1562	dp = nd6_dad_find(ifa, ndopt_nonce);
1563	if (dp == NULL)
1564		return;
1565
1566	dp->dad_ns_icount++;
1567	nd6_dad_rele(dp);
1568}
1569
1570static void
1571nd6_dad_na_input(struct ifaddr *ifa)
1572{
1573	struct dadq *dp;
1574
1575	if (ifa == NULL)
1576		panic("ifa == NULL in nd6_dad_na_input");
1577
1578	dp = nd6_dad_find(ifa, NULL);
1579	if (dp != NULL) {
1580		dp->dad_na_icount++;
1581		nd6_dad_rele(dp);
1582	}
1583}
1584