1/*
2 * Copyright (c) 2011-2013 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29/*
30 * Prefix-based Neighbor Discovery Proxy
31 *
32 * When an interface is marked with the ND6_IFF_PROXY_PREFIXES flag, all
33 * of current and future non-scoped on-link prefixes configured on the
34 * interface will be shared with the scoped variant of such prefixes on
35 * other interfaces.  This allows for one or more prefixes to be shared
36 * across multiple links, with full support for Duplicate Addres Detection,
37 * Address Resolution and Neighbor Unreachability Detection.
38 *
39 * A non-scoped prefix may be configured statically, or dynamically via
40 * Router Advertisement.  An interface is said to be an "upstream" interface
41 * when it is marked with ND6_IFF_PROXY_PREFIXES and has at least one prefix
42 * that is non-scoped (global, not scoped.)  Such prefixes are marked with
43 * the NDPRF_PRPROXY flag.
44 *
45 * A scoped prefix typically gets configured by way of adding an address
46 * to a "downstream" interface, when the added address is part of an existing
47 * prefix that is allowed to be shared (i.e. NDPRF_PRPROXY prefixes.)  Unlike
48 * non-scoped prefixes, however, scoped prefixes will never be marked with
49 * the NDPRF_PRPROXY flag.
50 *
51 * The setting of NDPRF_PRPROXY depends on whether the prefix is on-link;
52 * an off-link prefix on an interface marked with ND6_IFF_PROXY_PREFIXES
53 * will not cause NDPRF_PRPROXY to be set (it will only happen when that
54 * prefix goes on-link.)  Likewise, a previously on-link prefix that has
55 * transitioned to off-link will cause its NDPRF_PRPROXY flag to be cleared.
56 *
57 * Prefix proxying relies on IPv6 Scoped Routing to be in effect, as it would
58 * otherwise be impossible to install scoped prefix route entries in the
59 * routing table.  By default, such cloning prefix routes will generate cloned
60 * routes that are scoped according to their interfaces.  Because prefix
61 * proxying is essentially creating a larger network comprised of multiple
62 * links sharing a prefix, we need to treat the cloned routes as if they
63 * weren't scoped route entries.  This requires marking such cloning prefix
64 * routes with the RTF_PROXY flag, which serves as an indication that the
65 * route entry (and its clones) are part of a proxied prefix, and that the
66 * entries are non-scoped.
67 *
68 * In order to handle solicited-node destined ND packets (Address Resolution,
69 * Neighbor Unreachability Detection), prefix proxying also requires that the
70 * "upstream" and "downstream" interfaces be configured for all-multicast mode.
71 *
72 * The setting and clearing of RTF_PROXY flag, as well as the entering and
73 * exiting of all-multicast mode on those interfaces happen when a prefix
74 * transitions between on-link and off-link (vice versa.)
75 *
76 * Note that this is not a strict implementation of RFC 4389, but rather a
77 * derivative based on similar concept.  In particular, we only proxy NS and
78 * NA packets; RA packets are never proxied.  Care should be taken to enable
79 * prefix proxying only on non-looping network topology.
80 */
81
82#include <sys/param.h>
83#include <sys/systm.h>
84#include <sys/malloc.h>
85#include <sys/mbuf.h>
86#include <sys/errno.h>
87#include <sys/syslog.h>
88#include <sys/sysctl.h>
89#include <sys/mcache.h>
90#include <sys/protosw.h>
91
92#include <kern/queue.h>
93#include <kern/zalloc.h>
94
95#include <net/if.h>
96#include <net/if_var.h>
97#include <net/if_types.h>
98#include <net/route.h>
99
100#include <netinet/in.h>
101#include <netinet/in_var.h>
102#include <netinet6/in6_var.h>
103#include <netinet/ip6.h>
104#include <netinet6/ip6_var.h>
105#include <netinet/icmp6.h>
106#include <netinet6/nd6.h>
107#include <netinet6/scope6_var.h>
108
109struct nd6_prproxy_prelist {
110	SLIST_ENTRY(nd6_prproxy_prelist) ndprl_le;
111	struct nd_prefix *ndprl_pr;		/* prefix */
112	struct nd_prefix *ndprl_up;		/* non-NULL for upstream */
113	struct ifnet	*ndprl_fwd_ifp;		/* outgoing interface */
114	boolean_t	ndprl_sol;		/* unicast solicitor? */
115	struct in6_addr	ndprl_sol_saddr;	/* solicitor's address */
116};
117
118/*
119 * Soliciting node (source) record.
120 */
121struct nd6_prproxy_solsrc {
122	TAILQ_ENTRY(nd6_prproxy_solsrc) solsrc_tqe;
123	struct in6_addr solsrc_saddr;		/* soliciting (src) address */
124	struct ifnet	*solsrc_ifp;		/* iface where NS arrived on */
125};
126
127/*
128 * Solicited node (target) record.
129 */
130struct nd6_prproxy_soltgt {
131	RB_ENTRY(nd6_prproxy_soltgt) soltgt_link; /* RB tree links */
132	struct soltgt_key_s {
133		struct in6_addr taddr;		/* solicited (tgt) address */
134	} soltgt_key;
135	u_int64_t	soltgt_expire;		/* expiration time */
136	u_int32_t	soltgt_cnt;		/* total # of solicitors */
137	TAILQ_HEAD(, nd6_prproxy_solsrc) soltgt_q;
138};
139
140SLIST_HEAD(nd6_prproxy_prelist_head, nd6_prproxy_prelist);
141
142static void nd6_prproxy_prelist_setroute(boolean_t enable,
143    struct nd6_prproxy_prelist_head *, struct nd6_prproxy_prelist_head *);
144static struct nd6_prproxy_prelist *nd6_ndprl_alloc(int);
145static void nd6_ndprl_free(struct nd6_prproxy_prelist *);
146static struct nd6_prproxy_solsrc *nd6_solsrc_alloc(int);
147static void nd6_solsrc_free(struct nd6_prproxy_solsrc *);
148static boolean_t nd6_solsrc_enq(struct nd_prefix *, struct ifnet *,
149    struct in6_addr *, struct in6_addr *);
150static boolean_t nd6_solsrc_deq(struct nd_prefix *, struct in6_addr *,
151    struct in6_addr *, struct ifnet **);
152static struct nd6_prproxy_soltgt *nd6_soltgt_alloc(int);
153static void nd6_soltgt_free(struct nd6_prproxy_soltgt *);
154static void nd6_soltgt_prune(struct nd6_prproxy_soltgt *, u_int32_t);
155static __inline int soltgt_cmp(const struct nd6_prproxy_soltgt *,
156    const struct nd6_prproxy_soltgt *);
157static void nd6_prproxy_sols_purge(struct nd_prefix *, u_int64_t);
158
159RB_PROTOTYPE_SC_PREV(__private_extern__, prproxy_sols_tree, nd6_prproxy_soltgt,
160    soltgt_link, soltgt_cmp);
161
162/*
163 * Time (in seconds) before a target record expires (is idle).
164 */
165#define	ND6_TGT_SOLS_EXPIRE			5
166
167/*
168 * Maximum number of queued soliciting (source) records per target.
169 */
170#define	ND6_MAX_SRC_SOLS_DEFAULT		4
171
172/*
173 * Maximum number of queued solicited (target) records per prefix.
174 */
175#define	ND6_MAX_TGT_SOLS_DEFAULT		8
176
177static u_int32_t nd6_max_tgt_sols = ND6_MAX_TGT_SOLS_DEFAULT;
178static u_int32_t nd6_max_src_sols = ND6_MAX_SRC_SOLS_DEFAULT;
179
180static unsigned int ndprl_size;			/* size of zone element */
181static struct zone *ndprl_zone;			/* nd6_prproxy_prelist zone */
182
183#define	NDPRL_ZONE_MAX	256			/* maximum elements in zone */
184#define	NDPRL_ZONE_NAME	"nd6_prproxy_prelist"	/* name for zone */
185
186static unsigned int solsrc_size;		/* size of zone element */
187static struct zone *solsrc_zone;		/* nd6_prproxy_solsrc zone */
188
189#define	SOLSRC_ZONE_MAX	 256			/* maximum elements in zone */
190#define	SOLSRC_ZONE_NAME "nd6_prproxy_solsrc"	/* name for zone */
191
192static unsigned int soltgt_size;		/* size of zone element */
193static struct zone *soltgt_zone;		/* nd6_prproxy_soltgt zone */
194
195#define	SOLTGT_ZONE_MAX	 256			/* maximum elements in zone */
196#define	SOLTGT_ZONE_NAME "nd6_prproxy_soltgt"	/* name for zone */
197
198/* The following is protected by ndpr_lock */
199RB_GENERATE_PREV(prproxy_sols_tree, nd6_prproxy_soltgt,
200    soltgt_link, soltgt_cmp);
201
202/* The following is protected by proxy6_lock (for updates) */
203u_int32_t nd6_prproxy;
204
205extern lck_mtx_t *nd6_mutex;
206
207SYSCTL_DECL(_net_inet6_icmp6);
208
209SYSCTL_UINT(_net_inet6_icmp6, OID_AUTO, nd6_maxsolstgt,
210    CTLFLAG_RW | CTLFLAG_LOCKED, &nd6_max_tgt_sols, ND6_MAX_TGT_SOLS_DEFAULT,
211    "maximum number of outstanding solicited targets per prefix");
212
213SYSCTL_UINT(_net_inet6_icmp6, OID_AUTO, nd6_maxproxiedsol,
214    CTLFLAG_RW | CTLFLAG_LOCKED, &nd6_max_src_sols, ND6_MAX_SRC_SOLS_DEFAULT,
215    "maximum number of outstanding solicitations per target");
216
217SYSCTL_UINT(_net_inet6_icmp6, OID_AUTO, prproxy_cnt,
218    CTLFLAG_RD | CTLFLAG_LOCKED, &nd6_prproxy, 0,
219    "total number of proxied prefixes");
220
221/*
222 * Called by nd6_init() during initialization time.
223 */
224void
225nd6_prproxy_init(void)
226{
227	ndprl_size = sizeof (struct nd6_prproxy_prelist);
228	ndprl_zone = zinit(ndprl_size, NDPRL_ZONE_MAX * ndprl_size, 0,
229	    NDPRL_ZONE_NAME);
230	if (ndprl_zone == NULL)
231		panic("%s: failed allocating ndprl_zone", __func__);
232
233	zone_change(ndprl_zone, Z_EXPAND, TRUE);
234	zone_change(ndprl_zone, Z_CALLERACCT, FALSE);
235
236	solsrc_size = sizeof (struct nd6_prproxy_solsrc);
237	solsrc_zone = zinit(solsrc_size, SOLSRC_ZONE_MAX * solsrc_size, 0,
238	    SOLSRC_ZONE_NAME);
239	if (solsrc_zone == NULL)
240		panic("%s: failed allocating solsrc_zone", __func__);
241
242	zone_change(solsrc_zone, Z_EXPAND, TRUE);
243	zone_change(solsrc_zone, Z_CALLERACCT, FALSE);
244
245	soltgt_size = sizeof (struct nd6_prproxy_soltgt);
246	soltgt_zone = zinit(soltgt_size, SOLTGT_ZONE_MAX * soltgt_size, 0,
247	    SOLTGT_ZONE_NAME);
248	if (soltgt_zone == NULL)
249		panic("%s: failed allocating soltgt_zone", __func__);
250
251	zone_change(soltgt_zone, Z_EXPAND, TRUE);
252	zone_change(soltgt_zone, Z_CALLERACCT, FALSE);
253}
254
255static struct nd6_prproxy_prelist *
256nd6_ndprl_alloc(int how)
257{
258	struct nd6_prproxy_prelist *ndprl;
259
260	ndprl = (how == M_WAITOK) ? zalloc(ndprl_zone) :
261	    zalloc_noblock(ndprl_zone);
262	if (ndprl != NULL)
263		bzero(ndprl, ndprl_size);
264
265	return (ndprl);
266}
267
268static void
269nd6_ndprl_free(struct nd6_prproxy_prelist *ndprl)
270{
271	zfree(ndprl_zone, ndprl);
272}
273
274/*
275 * Apply routing function on the affected upstream and downstream prefixes,
276 * i.e. either set or clear RTF_PROXY on the cloning prefix route; all route
277 * entries that were cloned off these prefixes will be blown away.  Caller
278 * must have acquried proxy6_lock and must not be holding nd6_mutex.
279 */
280static void
281nd6_prproxy_prelist_setroute(boolean_t enable,
282    struct nd6_prproxy_prelist_head *up_head,
283    struct nd6_prproxy_prelist_head *down_head)
284{
285	struct nd6_prproxy_prelist *up, *down, *ndprl_tmp;
286	struct nd_prefix *pr;
287
288	lck_mtx_assert(&proxy6_lock, LCK_MTX_ASSERT_OWNED);
289	lck_mtx_assert(nd6_mutex, LCK_MTX_ASSERT_NOTOWNED);
290
291	SLIST_FOREACH_SAFE(up, up_head, ndprl_le, ndprl_tmp) {
292		struct rtentry *rt;
293		boolean_t prproxy, set_allmulti = FALSE;
294		int allmulti_sw;
295		struct ifnet *ifp = NULL;
296
297		SLIST_REMOVE(up_head, up, nd6_prproxy_prelist, ndprl_le);
298		pr = up->ndprl_pr;
299		VERIFY(up->ndprl_up == NULL);
300
301		NDPR_LOCK(pr);
302		ifp = pr->ndpr_ifp;
303		prproxy = (pr->ndpr_stateflags & NDPRF_PRPROXY);
304		VERIFY(!prproxy || ((pr->ndpr_stateflags & NDPRF_ONLINK) &&
305		    !(pr->ndpr_stateflags & NDPRF_IFSCOPE)));
306
307		nd6_prproxy_sols_reap(pr);
308		VERIFY(pr->ndpr_prproxy_sols_cnt == 0);
309		VERIFY(RB_EMPTY(&pr->ndpr_prproxy_sols));
310
311		if (enable && pr->ndpr_allmulti_cnt == 0) {
312			nd6_prproxy++;
313			pr->ndpr_allmulti_cnt++;
314			set_allmulti = TRUE;
315			allmulti_sw = TRUE;
316		} else if (!enable && pr->ndpr_allmulti_cnt > 0) {
317			nd6_prproxy--;
318			pr->ndpr_allmulti_cnt--;
319			set_allmulti = TRUE;
320			allmulti_sw = FALSE;
321		}
322
323		if ((rt = pr->ndpr_rt) != NULL) {
324			if ((enable && prproxy) || (!enable && !prproxy))
325				RT_ADDREF(rt);
326			else
327				rt = NULL;
328			NDPR_UNLOCK(pr);
329		} else {
330			NDPR_UNLOCK(pr);
331		}
332
333		/* Call the following ioctl after releasing NDPR lock */
334		if (set_allmulti && ifp != NULL)
335			if_allmulti(ifp, allmulti_sw);
336
337
338		NDPR_REMREF(pr);
339		if (rt != NULL) {
340			rt_set_proxy(rt, enable);
341			rtfree(rt);
342		}
343		nd6_ndprl_free(up);
344	}
345
346	SLIST_FOREACH_SAFE(down, down_head, ndprl_le, ndprl_tmp) {
347		struct nd_prefix *pr_up;
348		struct rtentry *rt;
349		boolean_t prproxy, set_allmulti = FALSE;
350		int allmulti_sw;
351		struct ifnet *ifp = NULL;
352
353		SLIST_REMOVE(down_head, down, nd6_prproxy_prelist, ndprl_le);
354		pr = down->ndprl_pr;
355		pr_up = down->ndprl_up;
356		VERIFY(pr_up != NULL);
357
358		NDPR_LOCK(pr_up);
359		ifp = pr->ndpr_ifp;
360		prproxy = (pr_up->ndpr_stateflags & NDPRF_PRPROXY);
361		VERIFY(!prproxy || ((pr_up->ndpr_stateflags & NDPRF_ONLINK) &&
362		    !(pr_up->ndpr_stateflags & NDPRF_IFSCOPE)));
363		NDPR_UNLOCK(pr_up);
364
365		NDPR_LOCK(pr);
366		if (enable && pr->ndpr_allmulti_cnt == 0) {
367			pr->ndpr_allmulti_cnt++;
368			set_allmulti = TRUE;
369			allmulti_sw = TRUE;
370		} else if (!enable && pr->ndpr_allmulti_cnt > 0) {
371			pr->ndpr_allmulti_cnt--;
372			set_allmulti = TRUE;
373			allmulti_sw = FALSE;
374		}
375
376		if ((rt = pr->ndpr_rt) != NULL) {
377			if ((enable && prproxy) || (!enable && !prproxy))
378				RT_ADDREF(rt);
379			else
380				rt = NULL;
381			NDPR_UNLOCK(pr);
382		} else {
383			NDPR_UNLOCK(pr);
384		}
385		if (set_allmulti && ifp != NULL)
386			if_allmulti(ifp, allmulti_sw);
387
388		NDPR_REMREF(pr);
389		NDPR_REMREF(pr_up);
390		if (rt != NULL) {
391			rt_set_proxy(rt, enable);
392			rtfree(rt);
393		}
394		nd6_ndprl_free(down);
395	}
396}
397
398/*
399 * Enable/disable prefix proxying on an interface; typically called
400 * as part of handling SIOCSIFINFO_FLAGS[IFEF_IPV6_ROUTER].
401 */
402int
403nd6_if_prproxy(struct ifnet *ifp, boolean_t enable)
404{
405	SLIST_HEAD(, nd6_prproxy_prelist) up_head;
406	SLIST_HEAD(, nd6_prproxy_prelist) down_head;
407	struct nd6_prproxy_prelist *up, *down;
408	struct nd_prefix *pr;
409
410	/* Can't be enabled if we are an advertising router on the interface */
411	ifnet_lock_shared(ifp);
412	if (enable && (ifp->if_eflags & IFEF_IPV6_ROUTER)) {
413		ifnet_lock_done(ifp);
414		return (EBUSY);
415	}
416	ifnet_lock_done(ifp);
417
418	SLIST_INIT(&up_head);
419	SLIST_INIT(&down_head);
420
421	/*
422	 * Serialize the clearing/setting of NDPRF_PRPROXY.
423	 */
424	lck_mtx_lock(&proxy6_lock);
425
426	/*
427	 * First build a list of upstream prefixes on this interface for
428	 * which we need to enable/disable prefix proxy functionality.
429	 */
430	lck_mtx_lock(nd6_mutex);
431	for (pr = nd_prefix.lh_first; pr; pr = pr->ndpr_next) {
432		NDPR_LOCK(pr);
433		if (IN6_IS_ADDR_LINKLOCAL(&pr->ndpr_prefix.sin6_addr) ||
434		    (!enable && !(pr->ndpr_stateflags & NDPRF_PRPROXY)) ||
435		    (enable && (pr->ndpr_stateflags & NDPRF_PRPROXY)) ||
436		    (pr->ndpr_stateflags & NDPRF_IFSCOPE) ||
437		    pr->ndpr_ifp != ifp) {
438			NDPR_UNLOCK(pr);
439			continue;
440		}
441
442		/*
443		 * At present, in order for the prefix to be eligible
444		 * as a proxying/proxied prefix, we require that the
445		 * prefix route entry be marked as a cloning route with
446		 * RTF_PROXY; i.e. nd6_need_cache() needs to return
447		 * true for the interface type.
448		 */
449		if (enable && (pr->ndpr_stateflags & NDPRF_ONLINK) &&
450		    nd6_need_cache(ifp)) {
451			pr->ndpr_stateflags |= NDPRF_PRPROXY;
452			NDPR_ADDREF_LOCKED(pr);
453			NDPR_UNLOCK(pr);
454		} else if (!enable) {
455			pr->ndpr_stateflags &= ~NDPRF_PRPROXY;
456			NDPR_ADDREF_LOCKED(pr);
457			NDPR_UNLOCK(pr);
458		} else {
459			NDPR_UNLOCK(pr);
460			pr = NULL;	/* don't go further */
461		}
462
463		if (pr == NULL)
464			break;
465
466		up = nd6_ndprl_alloc(M_WAITOK);
467		if (up == NULL) {
468			NDPR_REMREF(pr);
469			continue;
470		}
471
472		up->ndprl_pr = pr;	/* keep reference from above */
473		SLIST_INSERT_HEAD(&up_head, up, ndprl_le);
474	}
475
476	/*
477	 * Now build a list of matching (scoped) downstream prefixes on other
478	 * interfaces which need to be enabled/disabled accordingly.  Note that
479	 * the NDPRF_PRPROXY is never set/cleared on the downstream prefixes.
480	 */
481	SLIST_FOREACH(up, &up_head, ndprl_le) {
482		struct nd_prefix *fwd;
483		struct in6_addr pr_addr;
484		u_char pr_len;
485
486		pr = up->ndprl_pr;
487
488		NDPR_LOCK(pr);
489		bcopy(&pr->ndpr_prefix.sin6_addr, &pr_addr, sizeof (pr_addr));
490		pr_len = pr->ndpr_plen;
491		NDPR_UNLOCK(pr);
492
493		for (fwd = nd_prefix.lh_first; fwd; fwd = fwd->ndpr_next) {
494			NDPR_LOCK(fwd);
495			if (!(fwd->ndpr_stateflags & NDPRF_ONLINK) ||
496			    !(fwd->ndpr_stateflags & NDPRF_IFSCOPE) ||
497			    fwd->ndpr_plen != pr_len ||
498			    !in6_are_prefix_equal(&fwd->ndpr_prefix.sin6_addr,
499			    &pr_addr, pr_len)) {
500				NDPR_UNLOCK(fwd);
501				continue;
502			}
503			NDPR_UNLOCK(fwd);
504
505			down = nd6_ndprl_alloc(M_WAITOK);
506			if (down == NULL)
507				continue;
508
509			NDPR_ADDREF(fwd);
510			down->ndprl_pr = fwd;
511			NDPR_ADDREF(pr);
512			down->ndprl_up = pr;
513			SLIST_INSERT_HEAD(&down_head, down, ndprl_le);
514		}
515	}
516	lck_mtx_unlock(nd6_mutex);
517
518	/*
519	 * Apply routing function on prefixes; callee will free resources.
520	 */
521	nd6_prproxy_prelist_setroute(enable,
522	    (struct nd6_prproxy_prelist_head *)&up_head,
523	    (struct nd6_prproxy_prelist_head *)&down_head);
524
525	VERIFY(SLIST_EMPTY(&up_head));
526	VERIFY(SLIST_EMPTY(&down_head));
527
528	lck_mtx_unlock(&proxy6_lock);
529
530	return (0);
531}
532
533/*
534 * Called from the input path to determine whether the packet is destined
535 * to a proxied node; if so, mark the mbuf with PKTFF_PROXY_DST so that
536 * icmp6_input() knows that this is not to be delivered to socket(s).
537 */
538boolean_t
539nd6_prproxy_isours(struct mbuf *m, struct ip6_hdr *ip6, struct route_in6 *ro6,
540    unsigned int ifscope)
541{
542	struct rtentry *rt;
543	boolean_t ours = FALSE;
544
545	if (ip6->ip6_hlim != IPV6_MAXHLIM || ip6->ip6_nxt != IPPROTO_ICMPV6)
546		goto done;
547
548	if (IN6_IS_ADDR_MC_NODELOCAL(&ip6->ip6_dst) ||
549	    IN6_IS_ADDR_MC_LINKLOCAL(&ip6->ip6_dst)) {
550		VERIFY(ro6 == NULL);
551		ours = TRUE;
552		goto done;
553	} else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
554		goto done;
555	}
556
557	if (ro6 == NULL)
558		goto done;
559
560	if ((rt = ro6->ro_rt) != NULL)
561		RT_LOCK(rt);
562
563	if (ROUTE_UNUSABLE(ro6)) {
564		if (rt != NULL)
565			RT_UNLOCK(rt);
566
567		ROUTE_RELEASE(ro6);
568
569		/* Caller must have ensured this condition (not srcrt) */
570		VERIFY(IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst,
571		    &ro6->ro_dst.sin6_addr));
572
573		rtalloc_scoped_ign((struct route *)ro6, RTF_PRCLONING, ifscope);
574		if ((rt = ro6->ro_rt) == NULL)
575			goto done;
576
577		RT_LOCK(rt);
578	}
579
580	ours = (rt->rt_flags & RTF_PROXY) ? TRUE : FALSE;
581	RT_UNLOCK(rt);
582
583done:
584	if (ours)
585		m->m_pkthdr.pkt_flags |= PKTF_PROXY_DST;
586
587	return (ours);
588}
589
590/*
591 * Called from the input path to determine whether or not the proxy
592 * route entry is pointing to the correct interface, and to perform
593 * the necessary route fixups otherwise.
594 */
595void
596nd6_proxy_find_fwdroute(struct ifnet *ifp, struct route_in6 *ro6)
597{
598	struct in6_addr *dst6 = &ro6->ro_dst.sin6_addr;
599	struct ifnet *fwd_ifp = NULL;
600	struct nd_prefix *pr;
601	struct rtentry *rt;
602
603	if ((rt = ro6->ro_rt) != NULL) {
604		RT_LOCK(rt);
605		if (!(rt->rt_flags & RTF_PROXY) || rt->rt_ifp == ifp) {
606			nd6log2((LOG_DEBUG, "%s: found incorrect prefix "
607			    "proxy route for dst %s on %s\n", if_name(ifp),
608			    ip6_sprintf(dst6),
609			    if_name(rt->rt_ifp)));
610			RT_UNLOCK(rt);
611			/* look it up below */
612		} else {
613			RT_UNLOCK(rt);
614			/*
615			 * The route is already marked with RTF_PRPROXY and
616			 * it isn't pointing back to the inbound interface;
617			 * optimistically return (see notes below).
618			 */
619			return;
620		}
621	}
622
623	/*
624	 * Find out where we should forward this packet to, by searching
625	 * for another interface that is proxying for the prefix.  Our
626	 * current implementation assumes that the proxied prefix is shared
627	 * to no more than one downstream interfaces (typically a bridge
628	 * interface).
629	 */
630	lck_mtx_lock(nd6_mutex);
631	for (pr = nd_prefix.lh_first; pr; pr = pr->ndpr_next) {
632		struct in6_addr pr_addr;
633		struct nd_prefix *fwd;
634		u_char pr_len;
635
636		NDPR_LOCK(pr);
637		if (!(pr->ndpr_stateflags & NDPRF_ONLINK) ||
638		    !(pr->ndpr_stateflags & NDPRF_PRPROXY) ||
639		    !IN6_ARE_MASKED_ADDR_EQUAL(&pr->ndpr_prefix.sin6_addr,
640		    dst6, &pr->ndpr_mask)) {
641			NDPR_UNLOCK(pr);
642			continue;
643		}
644
645		VERIFY(!(pr->ndpr_stateflags & NDPRF_IFSCOPE));
646		bcopy(&pr->ndpr_prefix.sin6_addr, &pr_addr, sizeof (pr_addr));
647		pr_len = pr->ndpr_plen;
648		NDPR_UNLOCK(pr);
649
650		for (fwd = nd_prefix.lh_first; fwd; fwd = fwd->ndpr_next) {
651			NDPR_LOCK(fwd);
652			if (!(fwd->ndpr_stateflags & NDPRF_ONLINK) ||
653			    fwd->ndpr_ifp == ifp ||
654			    fwd->ndpr_plen != pr_len ||
655			    !in6_are_prefix_equal(&fwd->ndpr_prefix.sin6_addr,
656			    &pr_addr, pr_len)) {
657				NDPR_UNLOCK(fwd);
658				continue;
659			}
660
661			fwd_ifp = fwd->ndpr_ifp;
662			NDPR_UNLOCK(fwd);
663			break;
664		}
665		break;
666	}
667	lck_mtx_unlock(nd6_mutex);
668
669	lck_mtx_lock(rnh_lock);
670	ROUTE_RELEASE_LOCKED(ro6);
671
672	/*
673	 * Lookup a forwarding route; delete the route if it's incorrect,
674	 * or return to caller if the correct one got created prior to
675	 * our acquiring the rnh_lock.
676	 */
677	if ((rt = rtalloc1_scoped_locked(SA(&ro6->ro_dst), 0,
678	    RTF_CLONING | RTF_PRCLONING, IFSCOPE_NONE)) != NULL) {
679		RT_LOCK(rt);
680		if (rt->rt_ifp != fwd_ifp || !(rt->rt_flags & RTF_PROXY)) {
681			rt->rt_flags |= RTF_CONDEMNED;
682			RT_UNLOCK(rt);
683			(void) rtrequest_locked(RTM_DELETE, rt_key(rt),
684			    rt->rt_gateway, rt_mask(rt), rt->rt_flags, NULL);
685			rtfree_locked(rt);
686			rt = NULL;
687		} else {
688			nd6log2((LOG_DEBUG, "%s: found prefix proxy route "
689			    "for dst %s\n", if_name(rt->rt_ifp),
690			    ip6_sprintf(dst6)));
691			RT_UNLOCK(rt);
692			ro6->ro_rt = rt;	/* refcnt held by rtalloc1 */
693			lck_mtx_unlock(rnh_lock);
694			return;
695		}
696	}
697	VERIFY(rt == NULL && ro6->ro_rt == NULL);
698
699	/*
700	 * Clone a route from the correct parent prefix route and return it.
701	 */
702	if (fwd_ifp != NULL && (rt = rtalloc1_scoped_locked(SA(&ro6->ro_dst), 1,
703	    RTF_PRCLONING, fwd_ifp->if_index)) != NULL) {
704		RT_LOCK(rt);
705		if (!(rt->rt_flags & RTF_PROXY)) {
706			RT_UNLOCK(rt);
707			rtfree_locked(rt);
708			rt = NULL;
709		} else {
710			nd6log2((LOG_DEBUG, "%s: allocated prefix proxy "
711			    "route for dst %s\n", if_name(rt->rt_ifp),
712			    ip6_sprintf(dst6)));
713			RT_UNLOCK(rt);
714			ro6->ro_rt = rt;	/* refcnt held by rtalloc1 */
715		}
716	}
717	VERIFY(rt != NULL || ro6->ro_rt == NULL);
718
719	if (fwd_ifp == NULL || rt == NULL) {
720		nd6log2((LOG_ERR, "%s: failed to find forwarding prefix "
721		    "proxy entry for dst %s\n", if_name(ifp),
722		    ip6_sprintf(dst6)));
723	}
724	lck_mtx_unlock(rnh_lock);
725}
726
727/*
728 * Called when a prefix transitions between on-link and off-link.  Perform
729 * routing (RTF_PROXY) and interface (all-multicast) related operations on
730 * the affected prefixes.
731 */
732void
733nd6_prproxy_prelist_update(struct nd_prefix *pr_cur, struct nd_prefix *pr_up)
734{
735	SLIST_HEAD(, nd6_prproxy_prelist) up_head;
736	SLIST_HEAD(, nd6_prproxy_prelist) down_head;
737	struct nd6_prproxy_prelist *up, *down;
738	struct nd_prefix *pr;
739	struct in6_addr pr_addr;
740	boolean_t enable;
741	u_char pr_len;
742
743	SLIST_INIT(&up_head);
744	SLIST_INIT(&down_head);
745	VERIFY(pr_cur != NULL);
746
747	lck_mtx_assert(&proxy6_lock, LCK_MTX_ASSERT_OWNED);
748
749	/*
750	 * Upstream prefix.  If caller did not specify one, search for one
751	 * based on the information in current prefix.  Caller is expected
752	 * to have held an extra reference for the passed-in prefixes.
753	 */
754	lck_mtx_lock(nd6_mutex);
755	if (pr_up == NULL) {
756		NDPR_LOCK(pr_cur);
757		bcopy(&pr_cur->ndpr_prefix.sin6_addr, &pr_addr,
758		    sizeof (pr_addr));
759		pr_len = pr_cur->ndpr_plen;
760		NDPR_UNLOCK(pr_cur);
761
762		for (pr = nd_prefix.lh_first; pr; pr = pr->ndpr_next) {
763			NDPR_LOCK(pr);
764			if (!(pr->ndpr_stateflags & NDPRF_ONLINK) ||
765			    !(pr->ndpr_stateflags & NDPRF_PRPROXY) ||
766			    pr->ndpr_plen != pr_len ||
767			    !in6_are_prefix_equal(&pr->ndpr_prefix.sin6_addr,
768			    &pr_addr, pr_len)) {
769				NDPR_UNLOCK(pr);
770				continue;
771			}
772			NDPR_UNLOCK(pr);
773			break;
774		}
775
776		if ((pr_up = pr) == NULL) {
777			lck_mtx_unlock(nd6_mutex);
778			goto done;
779		}
780		NDPR_LOCK(pr_up);
781	} else {
782		NDPR_LOCK(pr_up);
783		bcopy(&pr_up->ndpr_prefix.sin6_addr, &pr_addr,
784		    sizeof (pr_addr));
785		pr_len = pr_up->ndpr_plen;
786	}
787	NDPR_LOCK_ASSERT_HELD(pr_up);
788	/*
789	 * Upstream prefix could be offlink by now; therefore we cannot
790	 * assert that NDPRF_PRPROXY is set; however, we can insist that
791	 * it must not be a scoped prefix.
792	 */
793	VERIFY(!(pr_up->ndpr_stateflags & NDPRF_IFSCOPE));
794	enable = (pr_up->ndpr_stateflags & NDPRF_PRPROXY);
795	NDPR_UNLOCK(pr_up);
796
797	up = nd6_ndprl_alloc(M_WAITOK);
798	if (up == NULL) {
799		lck_mtx_unlock(nd6_mutex);
800		goto done;
801	}
802
803	NDPR_ADDREF(pr_up);
804	up->ndprl_pr = pr_up;
805	SLIST_INSERT_HEAD(&up_head, up, ndprl_le);
806
807	/*
808	 * Now build a list of matching (scoped) downstream prefixes on other
809	 * interfaces which need to be enabled/disabled accordingly.  Note that
810	 * the NDPRF_PRPROXY is never set/cleared on the downstream prefixes.
811	 */
812	for (pr = nd_prefix.lh_first; pr; pr = pr->ndpr_next) {
813		NDPR_LOCK(pr);
814		if (!(pr->ndpr_stateflags & NDPRF_ONLINK) ||
815		    !(pr->ndpr_stateflags & NDPRF_IFSCOPE) ||
816		    pr->ndpr_plen != pr_len ||
817		    !in6_are_prefix_equal(&pr->ndpr_prefix.sin6_addr,
818		    &pr_addr, pr_len)) {
819			NDPR_UNLOCK(pr);
820			continue;
821		}
822		NDPR_UNLOCK(pr);
823
824		down = nd6_ndprl_alloc(M_WAITOK);
825		if (down == NULL)
826			continue;
827
828		NDPR_ADDREF(pr);
829		down->ndprl_pr = pr;
830		NDPR_ADDREF(pr_up);
831		down->ndprl_up = pr_up;
832		SLIST_INSERT_HEAD(&down_head, down, ndprl_le);
833	}
834	lck_mtx_unlock(nd6_mutex);
835
836	/*
837	 * Apply routing function on prefixes; callee will free resources.
838	 */
839	nd6_prproxy_prelist_setroute(enable,
840	    (struct nd6_prproxy_prelist_head *)&up_head,
841	    (struct nd6_prproxy_prelist_head *)&down_head);
842
843done:
844	VERIFY(SLIST_EMPTY(&up_head));
845	VERIFY(SLIST_EMPTY(&down_head));
846}
847
848/*
849 * Given an interface address, determine whether or not the address
850 * is part of of a proxied prefix.
851 */
852boolean_t
853nd6_prproxy_ifaddr(struct in6_ifaddr *ia)
854{
855	struct nd_prefix *pr;
856	struct in6_addr addr, pr_mask;
857	u_int32_t pr_len;
858	boolean_t proxied = FALSE;
859
860	lck_mtx_assert(nd6_mutex, LCK_MTX_ASSERT_NOTOWNED);
861
862	IFA_LOCK(&ia->ia_ifa);
863	bcopy(&ia->ia_addr.sin6_addr, &addr, sizeof (addr));
864	bcopy(&ia->ia_prefixmask.sin6_addr, &pr_mask, sizeof (pr_mask));
865	pr_len = ia->ia_plen;
866	IFA_UNLOCK(&ia->ia_ifa);
867
868	lck_mtx_lock(nd6_mutex);
869	for (pr = nd_prefix.lh_first; pr; pr = pr->ndpr_next) {
870		NDPR_LOCK(pr);
871		if ((pr->ndpr_stateflags & NDPRF_ONLINK) &&
872		    (pr->ndpr_stateflags & NDPRF_PRPROXY) &&
873		    in6_are_prefix_equal(&pr->ndpr_prefix.sin6_addr,
874		    &addr, pr_len)) {
875			NDPR_UNLOCK(pr);
876			proxied = TRUE;
877			break;
878		}
879		NDPR_UNLOCK(pr);
880	}
881	lck_mtx_unlock(nd6_mutex);
882
883	return (proxied);
884}
885
886/*
887 * Perform automatic proxy function with NS output.
888 *
889 * If the target address matches a global prefix obtained from a router
890 * advertisement received on an interface with the ND6_IFF_PROXY_PREFIXES
891 * flag set, then we send solicitations for the target address to all other
892 * interfaces where a matching prefix is currently on-link, in addition to
893 * the original interface.
894 */
895void
896nd6_prproxy_ns_output(struct ifnet *ifp, struct ifnet *exclifp,
897    struct in6_addr *daddr, struct in6_addr *taddr, struct llinfo_nd6 *ln)
898{
899	SLIST_HEAD(, nd6_prproxy_prelist) ndprl_head;
900	struct nd6_prproxy_prelist *ndprl, *ndprl_tmp;
901	struct nd_prefix *pr, *fwd;
902	struct ifnet *fwd_ifp;
903	struct in6_addr pr_addr;
904	u_char pr_len;
905
906	/*
907	 * Ignore excluded interface if it's the same as the original;
908	 * we always send a NS on the original interface down below.
909	 */
910	if (exclifp != NULL && exclifp == ifp)
911		exclifp = NULL;
912
913	if (exclifp == NULL)
914		nd6log2((LOG_DEBUG, "%s: sending NS who has %s on ALL\n",
915		    if_name(ifp), ip6_sprintf(taddr)));
916	else
917		nd6log2((LOG_DEBUG, "%s: sending NS who has %s on ALL "
918		    "(except %s)\n", if_name(ifp),
919		    ip6_sprintf(taddr), if_name(exclifp)));
920
921	SLIST_INIT(&ndprl_head);
922
923	lck_mtx_lock(nd6_mutex);
924
925	for (pr = nd_prefix.lh_first; pr; pr = pr->ndpr_next) {
926		NDPR_LOCK(pr);
927		if (!(pr->ndpr_stateflags & NDPRF_ONLINK) ||
928		    !(pr->ndpr_stateflags & NDPRF_PRPROXY) ||
929		    !IN6_ARE_MASKED_ADDR_EQUAL(&pr->ndpr_prefix.sin6_addr,
930		    taddr, &pr->ndpr_mask)) {
931			NDPR_UNLOCK(pr);
932			continue;
933		}
934
935		VERIFY(!(pr->ndpr_stateflags & NDPRF_IFSCOPE));
936		bcopy(&pr->ndpr_prefix.sin6_addr, &pr_addr, sizeof (pr_addr));
937		pr_len = pr->ndpr_plen;
938		NDPR_UNLOCK(pr);
939
940		for (fwd = nd_prefix.lh_first; fwd; fwd = fwd->ndpr_next) {
941			NDPR_LOCK(fwd);
942			if (!(fwd->ndpr_stateflags & NDPRF_ONLINK) ||
943			    fwd->ndpr_ifp == ifp || fwd->ndpr_ifp == exclifp ||
944			    fwd->ndpr_plen != pr_len ||
945			    !in6_are_prefix_equal(&fwd->ndpr_prefix.sin6_addr,
946			    &pr_addr, pr_len)) {
947				NDPR_UNLOCK(fwd);
948				continue;
949			}
950
951			fwd_ifp = fwd->ndpr_ifp;
952			NDPR_UNLOCK(fwd);
953
954			ndprl = nd6_ndprl_alloc(M_WAITOK);
955			if (ndprl == NULL)
956				continue;
957
958			NDPR_ADDREF(fwd);
959			ndprl->ndprl_pr = fwd;
960			ndprl->ndprl_fwd_ifp = fwd_ifp;
961
962			SLIST_INSERT_HEAD(&ndprl_head, ndprl, ndprl_le);
963		}
964		break;
965	}
966
967	lck_mtx_unlock(nd6_mutex);
968
969	SLIST_FOREACH_SAFE(ndprl, &ndprl_head, ndprl_le, ndprl_tmp) {
970		SLIST_REMOVE(&ndprl_head, ndprl, nd6_prproxy_prelist, ndprl_le);
971
972		pr = ndprl->ndprl_pr;
973		fwd_ifp = ndprl->ndprl_fwd_ifp;
974
975		if ((fwd_ifp->if_eflags & IFEF_IPV6_ND6ALT) != 0) {
976			NDPR_REMREF(pr);
977			nd6_ndprl_free(ndprl);
978			continue;
979		}
980
981		NDPR_LOCK(pr);
982		if (pr->ndpr_stateflags & NDPRF_ONLINK) {
983			NDPR_UNLOCK(pr);
984			nd6log2((LOG_DEBUG,
985			    "%s: Sending cloned NS who has %s, originally "
986			    "on %s\n", if_name(fwd_ifp),
987			    ip6_sprintf(taddr), if_name(ifp)));
988
989			nd6_ns_output(fwd_ifp, daddr, taddr, NULL, 0);
990		} else {
991			NDPR_UNLOCK(pr);
992		}
993		NDPR_REMREF(pr);
994
995		nd6_ndprl_free(ndprl);
996	}
997	VERIFY(SLIST_EMPTY(&ndprl_head));
998
999	nd6_ns_output(ifp, daddr, taddr, ln, 0);
1000}
1001
1002/*
1003 * Perform automatic proxy function with NS input.
1004 *
1005 * If the target address matches a global prefix obtained from a router
1006 * advertisement received on an interface with the ND6_IFF_PROXY_PREFIXES
1007 * flag set, then we send solicitations for the target address to all other
1008 * interfaces where a matching prefix is currently on-link.
1009 */
1010void
1011nd6_prproxy_ns_input(struct ifnet *ifp, struct in6_addr *saddr,
1012    char *lladdr, int lladdrlen, struct in6_addr *daddr, struct in6_addr *taddr)
1013{
1014	SLIST_HEAD(, nd6_prproxy_prelist) ndprl_head;
1015	struct nd6_prproxy_prelist *ndprl, *ndprl_tmp;
1016	struct nd_prefix *pr, *fwd;
1017	struct ifnet *fwd_ifp;
1018	struct in6_addr pr_addr;
1019	u_char pr_len;
1020	boolean_t solrec = FALSE;
1021
1022	SLIST_INIT(&ndprl_head);
1023
1024	lck_mtx_lock(nd6_mutex);
1025
1026	for (pr = nd_prefix.lh_first; pr; pr = pr->ndpr_next) {
1027		NDPR_LOCK(pr);
1028		if (!(pr->ndpr_stateflags & NDPRF_ONLINK) ||
1029		    !(pr->ndpr_stateflags & NDPRF_PRPROXY) ||
1030		    !IN6_ARE_MASKED_ADDR_EQUAL(&pr->ndpr_prefix.sin6_addr,
1031		    taddr, &pr->ndpr_mask)) {
1032			NDPR_UNLOCK(pr);
1033			continue;
1034		}
1035
1036		VERIFY(!(pr->ndpr_stateflags & NDPRF_IFSCOPE));
1037		bcopy(&pr->ndpr_prefix.sin6_addr, &pr_addr, sizeof (pr_addr));
1038		pr_len = pr->ndpr_plen;
1039
1040		/*
1041		 * If this is a NS for NUD/AR, record it so that we know
1042		 * how to forward the NA reply later on (if/when it arrives.)
1043		 * Give up if we fail to save the NS info.
1044		 */
1045		if ((solrec = !IN6_IS_ADDR_UNSPECIFIED(saddr)) &&
1046		    !nd6_solsrc_enq(pr, ifp, saddr, taddr)) {
1047			NDPR_UNLOCK(pr);
1048			solrec = FALSE;
1049			break;			/* bail out */
1050		} else {
1051			NDPR_UNLOCK(pr);
1052		}
1053
1054		for (fwd = nd_prefix.lh_first; fwd; fwd = fwd->ndpr_next) {
1055			NDPR_LOCK(fwd);
1056			if (!(fwd->ndpr_stateflags & NDPRF_ONLINK) ||
1057			    fwd->ndpr_ifp == ifp ||
1058			    fwd->ndpr_plen != pr_len ||
1059			    !in6_are_prefix_equal(&fwd->ndpr_prefix.sin6_addr,
1060			    &pr_addr, pr_len)) {
1061				NDPR_UNLOCK(fwd);
1062				continue;
1063			}
1064
1065			fwd_ifp = fwd->ndpr_ifp;
1066			NDPR_UNLOCK(fwd);
1067
1068			ndprl = nd6_ndprl_alloc(M_WAITOK);
1069			if (ndprl == NULL)
1070				continue;
1071
1072			NDPR_ADDREF(fwd);
1073			ndprl->ndprl_pr = fwd;
1074			ndprl->ndprl_fwd_ifp = fwd_ifp;
1075			ndprl->ndprl_sol = solrec;
1076
1077			SLIST_INSERT_HEAD(&ndprl_head, ndprl, ndprl_le);
1078		}
1079		break;
1080	}
1081
1082	lck_mtx_unlock(nd6_mutex);
1083
1084	/*
1085	 * If this is a recorded solicitation (NS for NUD/AR), create
1086	 * or update the neighbor cache entry for the soliciting node.
1087	 * Later on, when the NA reply arrives, we will need this cache
1088	 * entry in order to send the NA back to the original solicitor.
1089	 * Without a neighbor cache entry, we'd end up with an endless
1090	 * cycle of NS ping-pong between the us (the proxy) and the node
1091	 * which is soliciting for the address.
1092	 */
1093	if (solrec) {
1094		VERIFY(!IN6_IS_ADDR_UNSPECIFIED(saddr));
1095		nd6_cache_lladdr(ifp, saddr, lladdr, lladdrlen,
1096		    ND_NEIGHBOR_SOLICIT, 0);
1097	}
1098
1099	SLIST_FOREACH_SAFE(ndprl, &ndprl_head, ndprl_le, ndprl_tmp) {
1100		SLIST_REMOVE(&ndprl_head, ndprl, nd6_prproxy_prelist, ndprl_le);
1101
1102		pr = ndprl->ndprl_pr;
1103		fwd_ifp = ndprl->ndprl_fwd_ifp;
1104
1105		if ((fwd_ifp->if_eflags & IFEF_IPV6_ND6ALT) != 0) {
1106			NDPR_REMREF(pr);
1107			nd6_ndprl_free(ndprl);
1108			continue;
1109		}
1110
1111		NDPR_LOCK(pr);
1112		if (pr->ndpr_stateflags & NDPRF_ONLINK) {
1113			NDPR_UNLOCK(pr);
1114			nd6log2((LOG_DEBUG,
1115			    "%s: Forwarding NS (%s) from %s to %s who "
1116			    "has %s, originally on %s\n", if_name(fwd_ifp),
1117			    ndprl->ndprl_sol ? "NUD/AR" :
1118			    "DAD", ip6_sprintf(saddr), ip6_sprintf(daddr),
1119			    ip6_sprintf(taddr), if_name(ifp)));
1120
1121			nd6_ns_output(fwd_ifp, ndprl->ndprl_sol ? taddr : NULL,
1122			    taddr, NULL, !ndprl->ndprl_sol);
1123		} else {
1124			NDPR_UNLOCK(pr);
1125		}
1126		NDPR_REMREF(pr);
1127
1128		nd6_ndprl_free(ndprl);
1129	}
1130	VERIFY(SLIST_EMPTY(&ndprl_head));
1131}
1132
1133/*
1134 * Perform automatic proxy function with NA input.
1135 *
1136 * If the target address matches a global prefix obtained from a router
1137 * advertisement received on an interface with the ND6_IFF_PROXY_PREFIXES flag
1138 * set, then we send neighbor advertisements for the target address on all
1139 * other interfaces where a matching prefix is currently on link.
1140 */
1141void
1142nd6_prproxy_na_input(struct ifnet *ifp, struct in6_addr *saddr,
1143    struct in6_addr *daddr0, struct in6_addr *taddr, int flags)
1144{
1145	SLIST_HEAD(, nd6_prproxy_prelist) ndprl_head;
1146	struct nd6_prproxy_prelist *ndprl, *ndprl_tmp;
1147	struct nd_prefix *pr;
1148	struct ifnet *fwd_ifp;
1149	struct in6_addr daddr;
1150
1151	SLIST_INIT(&ndprl_head);
1152
1153
1154	lck_mtx_lock(nd6_mutex);
1155
1156	for (pr = nd_prefix.lh_first; pr; pr = pr->ndpr_next) {
1157		NDPR_LOCK(pr);
1158		if (!(pr->ndpr_stateflags & NDPRF_ONLINK) ||
1159		    !(pr->ndpr_stateflags & NDPRF_PRPROXY) ||
1160		    !IN6_ARE_MASKED_ADDR_EQUAL(&pr->ndpr_prefix.sin6_addr,
1161		    taddr, &pr->ndpr_mask)) {
1162			NDPR_UNLOCK(pr);
1163			continue;
1164		}
1165
1166		VERIFY(!(pr->ndpr_stateflags & NDPRF_IFSCOPE));
1167		/*
1168		 * If this is a NA for NUD, see if there is a record created
1169		 * for the corresponding NS; upon success, we get back the
1170		 * interface where the NS originally arrived on, as well as
1171		 * the soliciting node's address.  Give up if we can't find it.
1172		 */
1173		if (!IN6_IS_ADDR_MULTICAST(daddr0)) {
1174			fwd_ifp = NULL;
1175			bzero(&daddr, sizeof (daddr));
1176			if (!nd6_solsrc_deq(pr, taddr, &daddr, &fwd_ifp)) {
1177				NDPR_UNLOCK(pr);
1178				break;		/* bail out */
1179			}
1180			VERIFY(!IN6_IS_ADDR_UNSPECIFIED(&daddr) && fwd_ifp);
1181			NDPR_UNLOCK(pr);
1182
1183			ndprl = nd6_ndprl_alloc(M_WAITOK);
1184			if (ndprl == NULL)
1185				break;		/* bail out */
1186
1187			ndprl->ndprl_fwd_ifp = fwd_ifp;
1188			ndprl->ndprl_sol = TRUE;
1189			ndprl->ndprl_sol_saddr = *(&daddr);
1190
1191			SLIST_INSERT_HEAD(&ndprl_head, ndprl, ndprl_le);
1192		} else {
1193			struct nd_prefix *fwd;
1194			struct in6_addr pr_addr;
1195			u_char pr_len;
1196
1197			bcopy(&pr->ndpr_prefix.sin6_addr, &pr_addr,
1198			    sizeof (pr_addr));
1199			pr_len = pr->ndpr_plen;
1200			NDPR_UNLOCK(pr);
1201
1202			for (fwd = nd_prefix.lh_first; fwd;
1203			    fwd = fwd->ndpr_next) {
1204				NDPR_LOCK(fwd);
1205				if (!(fwd->ndpr_stateflags & NDPRF_ONLINK) ||
1206				    fwd->ndpr_ifp == ifp ||
1207				    fwd->ndpr_plen != pr_len ||
1208				    !in6_are_prefix_equal(
1209				    &fwd->ndpr_prefix.sin6_addr,
1210				    &pr_addr, pr_len)) {
1211					NDPR_UNLOCK(fwd);
1212					continue;
1213				}
1214
1215				fwd_ifp = fwd->ndpr_ifp;
1216				NDPR_UNLOCK(fwd);
1217
1218				ndprl = nd6_ndprl_alloc(M_WAITOK);
1219				if (ndprl == NULL)
1220					continue;
1221
1222				NDPR_ADDREF(fwd);
1223				ndprl->ndprl_pr = fwd;
1224				ndprl->ndprl_fwd_ifp = fwd_ifp;
1225
1226				SLIST_INSERT_HEAD(&ndprl_head, ndprl, ndprl_le);
1227			}
1228		}
1229		break;
1230	}
1231
1232	lck_mtx_unlock(nd6_mutex);
1233
1234	SLIST_FOREACH_SAFE(ndprl, &ndprl_head, ndprl_le, ndprl_tmp) {
1235		boolean_t send_na;
1236
1237		SLIST_REMOVE(&ndprl_head, ndprl, nd6_prproxy_prelist, ndprl_le);
1238
1239		pr = ndprl->ndprl_pr;
1240		fwd_ifp = ndprl->ndprl_fwd_ifp;
1241
1242		if (ndprl->ndprl_sol) {
1243			VERIFY(pr == NULL);
1244			daddr = *(&ndprl->ndprl_sol_saddr);
1245			VERIFY(!IN6_IS_ADDR_UNSPECIFIED(&daddr));
1246			send_na = (in6_setscope(&daddr, fwd_ifp, NULL) == 0);
1247		} else {
1248			VERIFY(pr != NULL);
1249			daddr = *daddr0;
1250			NDPR_LOCK(pr);
1251			send_na = ((pr->ndpr_stateflags & NDPRF_ONLINK) &&
1252			    in6_setscope(&daddr, fwd_ifp, NULL) == 0);
1253			NDPR_UNLOCK(pr);
1254		}
1255
1256		if (send_na) {
1257			if (!ndprl->ndprl_sol) {
1258				nd6log2((LOG_DEBUG,
1259				    "%s: Forwarding NA (DAD) from %s to %s "
1260				    "tgt is %s, originally on %s\n",
1261				    if_name(fwd_ifp),
1262				    ip6_sprintf(saddr), ip6_sprintf(&daddr),
1263				    ip6_sprintf(taddr), if_name(ifp)));
1264			} else {
1265				nd6log2((LOG_DEBUG,
1266				    "%s: Forwarding NA (NUD/AR) from %s to "
1267				    "%s (was %s) tgt is %s, originally on "
1268				    "%s\n", if_name(fwd_ifp),
1269				    ip6_sprintf(saddr),
1270				    ip6_sprintf(&daddr), ip6_sprintf(daddr0),
1271				    ip6_sprintf(taddr), if_name(ifp)));
1272			}
1273
1274			nd6_na_output(fwd_ifp, &daddr, taddr, flags, 1, NULL);
1275		}
1276
1277		if (pr != NULL)
1278			NDPR_REMREF(pr);
1279
1280		nd6_ndprl_free(ndprl);
1281	}
1282	VERIFY(SLIST_EMPTY(&ndprl_head));
1283}
1284
1285static struct nd6_prproxy_solsrc *
1286nd6_solsrc_alloc(int how)
1287{
1288	struct nd6_prproxy_solsrc *ssrc;
1289
1290	ssrc = (how == M_WAITOK) ? zalloc(solsrc_zone) :
1291	    zalloc_noblock(solsrc_zone);
1292	if (ssrc != NULL)
1293		bzero(ssrc, solsrc_size);
1294
1295	return (ssrc);
1296}
1297
1298static void
1299nd6_solsrc_free(struct nd6_prproxy_solsrc *ssrc)
1300{
1301	zfree(solsrc_zone, ssrc);
1302}
1303
1304static void
1305nd6_prproxy_sols_purge(struct nd_prefix *pr, u_int64_t max_stgt)
1306{
1307	struct nd6_prproxy_soltgt *soltgt, *tmp;
1308	u_int64_t expire = (max_stgt > 0) ? net_uptime() : 0;
1309
1310	NDPR_LOCK_ASSERT_HELD(pr);
1311
1312	/* Either trim all or those that have expired or are idle */
1313	RB_FOREACH_SAFE(soltgt, prproxy_sols_tree,
1314	    &pr->ndpr_prproxy_sols, tmp) {
1315		VERIFY(pr->ndpr_prproxy_sols_cnt > 0);
1316		if (expire == 0 || soltgt->soltgt_expire <= expire ||
1317		    soltgt->soltgt_cnt == 0) {
1318			pr->ndpr_prproxy_sols_cnt--;
1319			RB_REMOVE(prproxy_sols_tree,
1320			    &pr->ndpr_prproxy_sols, soltgt);
1321			nd6_soltgt_free(soltgt);
1322		}
1323	}
1324
1325	if (max_stgt == 0 || pr->ndpr_prproxy_sols_cnt < max_stgt) {
1326		VERIFY(max_stgt != 0 || (pr->ndpr_prproxy_sols_cnt == 0 &&
1327		    RB_EMPTY(&pr->ndpr_prproxy_sols)));
1328		return;
1329	}
1330
1331	/* Brute force; mercilessly evict entries until we are under limit */
1332	RB_FOREACH_SAFE(soltgt, prproxy_sols_tree,
1333	    &pr->ndpr_prproxy_sols, tmp) {
1334		VERIFY(pr->ndpr_prproxy_sols_cnt > 0);
1335		pr->ndpr_prproxy_sols_cnt--;
1336		RB_REMOVE(prproxy_sols_tree, &pr->ndpr_prproxy_sols, soltgt);
1337		nd6_soltgt_free(soltgt);
1338		if (pr->ndpr_prproxy_sols_cnt < max_stgt)
1339			break;
1340	}
1341}
1342
1343/*
1344 * Purges all solicitation records on a given prefix.
1345 * Caller is responsible for holding prefix lock.
1346 */
1347void
1348nd6_prproxy_sols_reap(struct nd_prefix *pr)
1349{
1350	nd6_prproxy_sols_purge(pr, 0);
1351}
1352
1353/*
1354 * Purges expired or idle solicitation records on a given prefix.
1355 * Caller is responsible for holding prefix lock.
1356 */
1357void
1358nd6_prproxy_sols_prune(struct nd_prefix *pr, u_int32_t max_stgt)
1359{
1360	nd6_prproxy_sols_purge(pr, max_stgt);
1361}
1362
1363/*
1364 * Enqueue a soliciation record in the target record of a prefix.
1365 */
1366static boolean_t
1367nd6_solsrc_enq(struct nd_prefix *pr, struct ifnet *ifp,
1368    struct in6_addr *saddr, struct in6_addr *taddr)
1369{
1370	struct nd6_prproxy_soltgt find, *soltgt;
1371	struct nd6_prproxy_solsrc *ssrc;
1372	u_int32_t max_stgt = nd6_max_tgt_sols;
1373	u_int32_t max_ssrc = nd6_max_src_sols;
1374
1375	NDPR_LOCK_ASSERT_HELD(pr);
1376	VERIFY(!(pr->ndpr_stateflags & NDPRF_IFSCOPE));
1377	VERIFY((pr->ndpr_stateflags & (NDPRF_ONLINK|NDPRF_PRPROXY)) ==
1378	    (NDPRF_ONLINK|NDPRF_PRPROXY));
1379	VERIFY(!IN6_IS_ADDR_UNSPECIFIED(saddr));
1380
1381	ssrc = nd6_solsrc_alloc(M_WAITOK);
1382	if (ssrc == NULL)
1383		return (FALSE);
1384
1385	ssrc->solsrc_saddr = *saddr;
1386	ssrc->solsrc_ifp = ifp;
1387
1388	find.soltgt_key.taddr = *taddr;		/* search key */
1389
1390	soltgt = RB_FIND(prproxy_sols_tree, &pr->ndpr_prproxy_sols, &find);
1391	if (soltgt == NULL) {
1392		if (max_stgt != 0 && pr->ndpr_prproxy_sols_cnt >= max_stgt) {
1393			VERIFY(!RB_EMPTY(&pr->ndpr_prproxy_sols));
1394			nd6_prproxy_sols_prune(pr, max_stgt);
1395			VERIFY(pr->ndpr_prproxy_sols_cnt < max_stgt);
1396		}
1397
1398		soltgt = nd6_soltgt_alloc(M_WAITOK);
1399		if (soltgt == NULL) {
1400			nd6_solsrc_free(ssrc);
1401			return (FALSE);
1402		}
1403
1404		soltgt->soltgt_key.taddr = *taddr;
1405		VERIFY(soltgt->soltgt_cnt == 0);
1406		VERIFY(TAILQ_EMPTY(&soltgt->soltgt_q));
1407
1408		pr->ndpr_prproxy_sols_cnt++;
1409		VERIFY(pr->ndpr_prproxy_sols_cnt != 0);
1410		RB_INSERT(prproxy_sols_tree, &pr->ndpr_prproxy_sols, soltgt);
1411	}
1412
1413	if (max_ssrc != 0 && soltgt->soltgt_cnt >= max_ssrc) {
1414		VERIFY(!TAILQ_EMPTY(&soltgt->soltgt_q));
1415		nd6_soltgt_prune(soltgt, max_ssrc);
1416		VERIFY(soltgt->soltgt_cnt < max_ssrc);
1417	}
1418
1419	soltgt->soltgt_cnt++;
1420	VERIFY(soltgt->soltgt_cnt != 0);
1421	TAILQ_INSERT_TAIL(&soltgt->soltgt_q, ssrc, solsrc_tqe);
1422	if (soltgt->soltgt_cnt == 1)
1423		soltgt->soltgt_expire = net_uptime() + ND6_TGT_SOLS_EXPIRE;
1424
1425	return (TRUE);
1426}
1427
1428/*
1429 * Dequeue a solicitation record from a target record of a prefix.
1430 */
1431static boolean_t
1432nd6_solsrc_deq(struct nd_prefix *pr, struct in6_addr *taddr,
1433    struct in6_addr *daddr, struct ifnet **ifp)
1434{
1435	struct nd6_prproxy_soltgt find, *soltgt;
1436	struct nd6_prproxy_solsrc *ssrc;
1437
1438	NDPR_LOCK_ASSERT_HELD(pr);
1439	VERIFY(!(pr->ndpr_stateflags & NDPRF_IFSCOPE));
1440	VERIFY((pr->ndpr_stateflags & (NDPRF_ONLINK|NDPRF_PRPROXY)) ==
1441	    (NDPRF_ONLINK|NDPRF_PRPROXY));
1442
1443	bzero(daddr, sizeof (*daddr));
1444	*ifp = NULL;
1445
1446	find.soltgt_key.taddr = *taddr;		/* search key */
1447
1448	soltgt = RB_FIND(prproxy_sols_tree, &pr->ndpr_prproxy_sols, &find);
1449	if (soltgt == NULL || soltgt->soltgt_cnt == 0) {
1450		VERIFY(soltgt == NULL || TAILQ_EMPTY(&soltgt->soltgt_q));
1451		return (FALSE);
1452	}
1453
1454	VERIFY(soltgt->soltgt_cnt != 0);
1455	--soltgt->soltgt_cnt;
1456	ssrc = TAILQ_FIRST(&soltgt->soltgt_q);
1457	VERIFY(ssrc != NULL);
1458	TAILQ_REMOVE(&soltgt->soltgt_q, ssrc, solsrc_tqe);
1459	*daddr = *(&ssrc->solsrc_saddr);
1460	*ifp = ssrc->solsrc_ifp;
1461	nd6_solsrc_free(ssrc);
1462
1463	return (TRUE);
1464}
1465
1466static struct nd6_prproxy_soltgt *
1467nd6_soltgt_alloc(int how)
1468{
1469	struct nd6_prproxy_soltgt *soltgt;
1470
1471	soltgt = (how == M_WAITOK) ? zalloc(soltgt_zone) :
1472	    zalloc_noblock(soltgt_zone);
1473	if (soltgt != NULL) {
1474		bzero(soltgt, soltgt_size);
1475		TAILQ_INIT(&soltgt->soltgt_q);
1476	}
1477	return (soltgt);
1478}
1479
1480static void
1481nd6_soltgt_free(struct nd6_prproxy_soltgt *soltgt)
1482{
1483	struct nd6_prproxy_solsrc *ssrc, *tssrc;
1484
1485	TAILQ_FOREACH_SAFE(ssrc, &soltgt->soltgt_q, solsrc_tqe, tssrc) {
1486		VERIFY(soltgt->soltgt_cnt > 0);
1487		soltgt->soltgt_cnt--;
1488		TAILQ_REMOVE(&soltgt->soltgt_q, ssrc, solsrc_tqe);
1489		nd6_solsrc_free(ssrc);
1490	}
1491
1492	VERIFY(soltgt->soltgt_cnt == 0);
1493	VERIFY(TAILQ_EMPTY(&soltgt->soltgt_q));
1494
1495	zfree(soltgt_zone, soltgt);
1496}
1497
1498static void
1499nd6_soltgt_prune(struct nd6_prproxy_soltgt *soltgt, u_int32_t max_ssrc)
1500{
1501	while (soltgt->soltgt_cnt >= max_ssrc) {
1502		struct nd6_prproxy_solsrc *ssrc;
1503
1504		VERIFY(soltgt->soltgt_cnt != 0);
1505		--soltgt->soltgt_cnt;
1506		ssrc = TAILQ_FIRST(&soltgt->soltgt_q);
1507		VERIFY(ssrc != NULL);
1508		TAILQ_REMOVE(&soltgt->soltgt_q, ssrc, solsrc_tqe);
1509		nd6_solsrc_free(ssrc);
1510	}
1511}
1512
1513/*
1514 * Solicited target tree comparison function.
1515 *
1516 * An ordered predicate is necessary; bcmp() is not documented to return
1517 * an indication of order, memcmp() is, and is an ISO C99 requirement.
1518 */
1519static __inline int
1520soltgt_cmp(const struct nd6_prproxy_soltgt *a,
1521    const struct nd6_prproxy_soltgt *b)
1522{
1523	return (memcmp(&a->soltgt_key, &b->soltgt_key, sizeof (a->soltgt_key)));
1524}
1525