1/*
2 * Copyright (c) 2011-2012 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29/*
30 * Prefix-based Neighbor Discovery Proxy
31 *
32 * When an interface is marked with the ND6_IFF_PROXY_PREFIXES flag, all
33 * of current and future non-scoped on-link prefixes configured on the
34 * interface will be shared with the scoped variant of such prefixes on
35 * other interfaces.  This allows for one or more prefixes to be shared
36 * across multiple links, with full support for Duplicate Addres Detection,
37 * Address Resolution and Neighbor Unreachability Detection.
38 *
39 * A non-scoped prefix may be configured statically, or dynamically via
40 * Router Advertisement.  An interface is said to be an "upstream" interface
41 * when it is marked with ND6_IFF_PROXY_PREFIXES and has at least one prefix
42 * that is non-scoped (global, not scoped.)  Such prefixes are marked with
43 * the NDPRF_PRPROXY flag.
44 *
45 * A scoped prefix typically gets configured by way of adding an address
46 * to a "downstream" interface, when the added address is part of an existing
47 * prefix that is allowed to be shared (i.e. NDPRF_PRPROXY prefixes.)  Unlike
48 * non-scoped prefixes, however, scoped prefixes will never be marked with
49 * the NDPRF_PRPROXY flag.
50 *
51 * The setting of NDPRF_PRPROXY depends on whether the prefix is on-link;
52 * an off-link prefix on an interface marked with ND6_IFF_PROXY_PREFIXES
53 * will not cause NDPRF_PRPROXY to be set (it will only happen when that
54 * prefix goes on-link.)  Likewise, a previously on-link prefix that has
55 * transitioned to off-link will cause its NDPRF_PRPROXY flag to be cleared.
56 *
57 * Prefix proxying relies on IPv6 Scoped Routing to be in effect, as it would
58 * otherwise be impossible to install scoped prefix route entries in the
59 * routing table.  By default, such cloning prefix routes will generate cloned
60 * routes that are scoped according to their interfaces.  Because prefix
61 * proxying is essentially creating a larger network comprised of multiple
62 * links sharing a prefix, we need to treat the cloned routes as if they
63 * weren't scoped route entries.  This requires marking such cloning prefix
64 * routes with the RTF_PROXY flag, which serves as an indication that the
65 * route entry (and its clones) are part of a proxied prefix, and that the
66 * entries are non-scoped.
67 *
68 * In order to handle solicited-node destined ND packets (Address Resolution,
69 * Neighbor Unreachability Detection), prefix proxying also requires that the
70 * "upstream" and "downstream" interfaces be configured for all-multicast mode.
71 *
72 * The setting and clearing of RTF_PROXY flag, as well as the entering and
73 * exiting of all-multicast mode on those interfaces happen when a prefix
74 * transitions between on-link and off-link (vice versa.)
75 *
76 * Note that this is not a strict implementation of RFC 4389, but rather a
77 * derivative based on similar concept.  In particular, we only proxy NS and
78 * NA packets; RA packets are never proxied.  Care should be taken to enable
79 * prefix proxying only on non-looping network topology.
80 */
81
82#include <sys/param.h>
83#include <sys/systm.h>
84#include <sys/malloc.h>
85#include <sys/mbuf.h>
86#include <sys/errno.h>
87#include <sys/syslog.h>
88#include <sys/sysctl.h>
89#include <sys/mcache.h>
90#include <sys/protosw.h>
91
92#include <kern/queue.h>
93#include <kern/zalloc.h>
94
95#include <net/if.h>
96#include <net/if_var.h>
97#include <net/if_types.h>
98#include <net/route.h>
99
100#include <netinet/in.h>
101#include <netinet/in_var.h>
102#include <netinet6/in6_var.h>
103#include <netinet/ip6.h>
104#include <netinet6/ip6_var.h>
105#include <netinet/icmp6.h>
106#include <netinet6/nd6.h>
107#include <netinet6/scope6_var.h>
108
109struct nd6_prproxy_prelist {
110	SLIST_ENTRY(nd6_prproxy_prelist) ndprl_le;
111	struct nd_prefix *ndprl_pr;		/* prefix */
112	struct nd_prefix *ndprl_up;		/* non-NULL for upstream */
113	struct ifnet	*ndprl_fwd_ifp;		/* outgoing interface */
114	boolean_t	ndprl_sol;		/* unicast solicitor? */
115	struct in6_addr	ndprl_sol_saddr;	/* solicitor's address */
116};
117
118/*
119 * Soliciting node (source) record.
120 */
121struct nd6_prproxy_solsrc {
122	TAILQ_ENTRY(nd6_prproxy_solsrc) solsrc_tqe;
123	struct in6_addr solsrc_saddr;		/* soliciting (src) address */
124	struct ifnet	*solsrc_ifp;		/* iface where NS arrived on */
125};
126
127/*
128 * Solicited node (target) record.
129 */
130struct nd6_prproxy_soltgt {
131	RB_ENTRY(nd6_prproxy_soltgt) soltgt_link; /* RB tree links */
132	struct soltgt_key_s {
133		struct in6_addr taddr;		/* solicited (tgt) address */
134	} soltgt_key;
135	u_int64_t	soltgt_expire;		/* expiration time */
136	u_int32_t	soltgt_cnt;		/* total # of solicitors */
137	TAILQ_HEAD(, nd6_prproxy_solsrc) soltgt_q;
138};
139
140SLIST_HEAD(nd6_prproxy_prelist_head, nd6_prproxy_prelist);
141
142static void nd6_prproxy_prelist_setroute(boolean_t enable,
143    struct nd6_prproxy_prelist_head *, struct nd6_prproxy_prelist_head *);
144static struct nd6_prproxy_prelist *nd6_ndprl_alloc(int);
145static void nd6_ndprl_free(struct nd6_prproxy_prelist *);
146static struct nd6_prproxy_solsrc *nd6_solsrc_alloc(int);
147static void nd6_solsrc_free(struct nd6_prproxy_solsrc *);
148static boolean_t nd6_solsrc_enq(struct nd_prefix *, struct ifnet *,
149    struct in6_addr *, struct in6_addr *);
150static boolean_t nd6_solsrc_deq(struct nd_prefix *, struct in6_addr *,
151    struct in6_addr *, struct ifnet **);
152static struct nd6_prproxy_soltgt *nd6_soltgt_alloc(int);
153static void nd6_soltgt_free(struct nd6_prproxy_soltgt *);
154static void nd6_soltgt_prune(struct nd6_prproxy_soltgt *, u_int32_t);
155static __inline int soltgt_cmp(const struct nd6_prproxy_soltgt *,
156    const struct nd6_prproxy_soltgt *);
157static void nd6_prproxy_sols_purge(struct nd_prefix *, u_int64_t);
158
159RB_PROTOTYPE_SC_PREV(__private_extern__, prproxy_sols_tree, nd6_prproxy_soltgt,
160    soltgt_link, soltgt_cmp);
161
162/*
163 * Time (in seconds) before a target record expires (is idle).
164 */
165#define	ND6_TGT_SOLS_EXPIRE			5
166
167/*
168 * Maximum number of queued soliciting (source) records per target.
169 */
170#define	ND6_MAX_SRC_SOLS_DEFAULT		4
171
172/*
173 * Maximum number of queued solicited (target) records per prefix.
174 */
175#define	ND6_MAX_TGT_SOLS_DEFAULT		8
176
177static u_int32_t nd6_max_tgt_sols = ND6_MAX_TGT_SOLS_DEFAULT;
178static u_int32_t nd6_max_src_sols = ND6_MAX_SRC_SOLS_DEFAULT;
179
180static unsigned int ndprl_size;			/* size of zone element */
181static struct zone *ndprl_zone;			/* nd6_prproxy_prelist zone */
182
183#define	NDPRL_ZONE_MAX	256			/* maximum elements in zone */
184#define	NDPRL_ZONE_NAME	"nd6_prproxy_prelist"	/* name for zone */
185
186static unsigned int solsrc_size;		/* size of zone element */
187static struct zone *solsrc_zone;		/* nd6_prproxy_solsrc zone */
188
189#define	SOLSRC_ZONE_MAX	 256			/* maximum elements in zone */
190#define	SOLSRC_ZONE_NAME "nd6_prproxy_solsrc"	/* name for zone */
191
192static unsigned int soltgt_size;		/* size of zone element */
193static struct zone *soltgt_zone;		/* nd6_prproxy_soltgt zone */
194
195#define	SOLTGT_ZONE_MAX	 256			/* maximum elements in zone */
196#define	SOLTGT_ZONE_NAME "nd6_prproxy_soltgt"	/* name for zone */
197
198/* The following is protected by ndpr_lock */
199RB_GENERATE_PREV(prproxy_sols_tree, nd6_prproxy_soltgt,
200    soltgt_link, soltgt_cmp);
201
202/* The following is protected by proxy6_lock (for updates) */
203u_int32_t nd6_prproxy;
204
205extern lck_mtx_t *nd6_mutex;
206
207SYSCTL_DECL(_net_inet6_icmp6);
208
209SYSCTL_UINT(_net_inet6_icmp6, OID_AUTO, nd6_maxsolstgt,
210    CTLFLAG_RW | CTLFLAG_LOCKED, &nd6_max_tgt_sols, ND6_MAX_TGT_SOLS_DEFAULT,
211    "maximum number of outstanding solicited targets per prefix");
212
213SYSCTL_UINT(_net_inet6_icmp6, OID_AUTO, nd6_maxproxiedsol,
214    CTLFLAG_RW | CTLFLAG_LOCKED, &nd6_max_src_sols, ND6_MAX_SRC_SOLS_DEFAULT,
215    "maximum number of outstanding solicitations per target");
216
217SYSCTL_UINT(_net_inet6_icmp6, OID_AUTO, prproxy_cnt,
218    CTLFLAG_RD | CTLFLAG_LOCKED, &nd6_prproxy, 0,
219    "total number of proxied prefixes");
220
221/*
222 * Called by nd6_init() during initialization time.
223 */
224void
225nd6_prproxy_init(void)
226{
227	ndprl_size = sizeof (struct nd6_prproxy_prelist);
228	ndprl_zone = zinit(ndprl_size, NDPRL_ZONE_MAX * ndprl_size, 0,
229	    NDPRL_ZONE_NAME);
230	if (ndprl_zone == NULL)
231		panic("%s: failed allocating ndprl_zone", __func__);
232
233	zone_change(ndprl_zone, Z_EXPAND, TRUE);
234	zone_change(ndprl_zone, Z_CALLERACCT, FALSE);
235
236	solsrc_size = sizeof (struct nd6_prproxy_solsrc);
237	solsrc_zone = zinit(solsrc_size, SOLSRC_ZONE_MAX * solsrc_size, 0,
238	    SOLSRC_ZONE_NAME);
239	if (solsrc_zone == NULL)
240		panic("%s: failed allocating solsrc_zone", __func__);
241
242	zone_change(solsrc_zone, Z_EXPAND, TRUE);
243	zone_change(solsrc_zone, Z_CALLERACCT, FALSE);
244
245	soltgt_size = sizeof (struct nd6_prproxy_soltgt);
246	soltgt_zone = zinit(soltgt_size, SOLTGT_ZONE_MAX * soltgt_size, 0,
247	    SOLTGT_ZONE_NAME);
248	if (soltgt_zone == NULL)
249		panic("%s: failed allocating soltgt_zone", __func__);
250
251	zone_change(soltgt_zone, Z_EXPAND, TRUE);
252	zone_change(soltgt_zone, Z_CALLERACCT, FALSE);
253}
254
255static struct nd6_prproxy_prelist *
256nd6_ndprl_alloc(int how)
257{
258	struct nd6_prproxy_prelist *ndprl;
259
260	ndprl = (how == M_WAITOK) ? zalloc(ndprl_zone) :
261	    zalloc_noblock(ndprl_zone);
262	if (ndprl != NULL)
263		bzero(ndprl, ndprl_size);
264
265	return (ndprl);
266}
267
268static void
269nd6_ndprl_free(struct nd6_prproxy_prelist *ndprl)
270{
271	zfree(ndprl_zone, ndprl);
272}
273
274/*
275 * Apply routing function on the affected upstream and downstream prefixes,
276 * i.e. either set or clear RTF_PROXY on the cloning prefix route; all route
277 * entries that were cloned off these prefixes will be blown away.  Caller
278 * must have acquried proxy6_lock and must not be holding nd6_mutex.
279 */
280static void
281nd6_prproxy_prelist_setroute(boolean_t enable,
282    struct nd6_prproxy_prelist_head *up_head,
283    struct nd6_prproxy_prelist_head *down_head)
284{
285	struct nd6_prproxy_prelist *up, *down, *ndprl_tmp;
286	struct nd_prefix *pr;
287
288	lck_mtx_assert(&proxy6_lock, LCK_MTX_ASSERT_OWNED);
289	lck_mtx_assert(nd6_mutex, LCK_MTX_ASSERT_NOTOWNED);
290
291	SLIST_FOREACH_SAFE(up, up_head, ndprl_le, ndprl_tmp) {
292		struct rtentry *rt;
293		boolean_t prproxy;
294
295		SLIST_REMOVE(up_head, up, nd6_prproxy_prelist, ndprl_le);
296		pr = up->ndprl_pr;
297		VERIFY(up->ndprl_up == NULL);
298
299		NDPR_LOCK(pr);
300		prproxy = (pr->ndpr_stateflags & NDPRF_PRPROXY);
301		VERIFY(!prproxy || ((pr->ndpr_stateflags & NDPRF_ONLINK) &&
302		    !(pr->ndpr_stateflags & NDPRF_IFSCOPE)));
303
304		nd6_prproxy_sols_reap(pr);
305		VERIFY(pr->ndpr_prproxy_sols_cnt == 0);
306		VERIFY(RB_EMPTY(&pr->ndpr_prproxy_sols));
307
308		if (enable && pr->ndpr_allmulti_cnt == 0) {
309			nd6_prproxy++;
310			pr->ndpr_allmulti_cnt++;
311			if_allmulti(pr->ndpr_ifp, TRUE);
312		} else if (!enable && pr->ndpr_allmulti_cnt > 0) {
313			nd6_prproxy--;
314			pr->ndpr_allmulti_cnt--;
315			if_allmulti(pr->ndpr_ifp, FALSE);
316		}
317
318		if ((rt = pr->ndpr_rt) != NULL) {
319			if ((enable && prproxy) || (!enable && !prproxy))
320				RT_ADDREF(rt);
321			else
322				rt = NULL;
323			NDPR_UNLOCK(pr);
324		} else {
325			NDPR_UNLOCK(pr);
326		}
327		NDPR_REMREF(pr);
328		if (rt != NULL) {
329			rt_set_proxy(rt, enable);
330			rtfree(rt);
331		}
332		nd6_ndprl_free(up);
333	}
334
335	SLIST_FOREACH_SAFE(down, down_head, ndprl_le, ndprl_tmp) {
336		struct nd_prefix *pr_up;
337		struct rtentry *rt;
338		boolean_t prproxy;
339
340		SLIST_REMOVE(down_head, down, nd6_prproxy_prelist, ndprl_le);
341		pr = down->ndprl_pr;
342		pr_up = down->ndprl_up;
343		VERIFY(pr_up != NULL);
344
345		NDPR_LOCK(pr_up);
346		prproxy = (pr_up->ndpr_stateflags & NDPRF_PRPROXY);
347		VERIFY(!prproxy || ((pr_up->ndpr_stateflags & NDPRF_ONLINK) &&
348		    !(pr_up->ndpr_stateflags & NDPRF_IFSCOPE)));
349		NDPR_UNLOCK(pr_up);
350
351		NDPR_LOCK(pr);
352		if (enable && pr->ndpr_allmulti_cnt == 0) {
353			pr->ndpr_allmulti_cnt++;
354			if_allmulti(pr->ndpr_ifp, TRUE);
355		} else if (!enable && pr->ndpr_allmulti_cnt > 0) {
356			pr->ndpr_allmulti_cnt--;
357			if_allmulti(pr->ndpr_ifp, FALSE);
358		}
359
360		if ((rt = pr->ndpr_rt) != NULL) {
361			if ((enable && prproxy) || (!enable && !prproxy))
362				RT_ADDREF(rt);
363			else
364				rt = NULL;
365			NDPR_UNLOCK(pr);
366		} else {
367			NDPR_UNLOCK(pr);
368		}
369		NDPR_REMREF(pr);
370		NDPR_REMREF(pr_up);
371		if (rt != NULL) {
372			rt_set_proxy(rt, enable);
373			rtfree(rt);
374		}
375		nd6_ndprl_free(down);
376	}
377}
378
379/*
380 * Enable/disable prefix proxying on an interface; typically called
381 * as part of handling SIOCSIFINFO_FLAGS[IFEF_IPV6_ROUTER].
382 */
383int
384nd6_if_prproxy(struct ifnet *ifp, boolean_t enable)
385{
386	SLIST_HEAD(, nd6_prproxy_prelist) up_head;
387	SLIST_HEAD(, nd6_prproxy_prelist) down_head;
388	struct nd6_prproxy_prelist *up, *down;
389	struct nd_prefix *pr;
390
391	/* Can't be enabled if we are an advertising router on the interface */
392	ifnet_lock_shared(ifp);
393	if (enable && (ifp->if_eflags & IFEF_IPV6_ROUTER)) {
394		ifnet_lock_done(ifp);
395		return (EBUSY);
396	}
397	ifnet_lock_done(ifp);
398
399	SLIST_INIT(&up_head);
400	SLIST_INIT(&down_head);
401
402	/*
403	 * Serialize the clearing/setting of NDPRF_PRPROXY.
404	 */
405	lck_mtx_lock(&proxy6_lock);
406
407	/*
408	 * First build a list of upstream prefixes on this interface for
409	 * which we need to enable/disable prefix proxy functionality.
410	 */
411	lck_mtx_lock(nd6_mutex);
412	for (pr = nd_prefix.lh_first; pr; pr = pr->ndpr_next) {
413		NDPR_LOCK(pr);
414		if (IN6_IS_ADDR_LINKLOCAL(&pr->ndpr_prefix.sin6_addr) ||
415		    (!enable && !(pr->ndpr_stateflags & NDPRF_PRPROXY)) ||
416		    (enable && (pr->ndpr_stateflags & NDPRF_PRPROXY)) ||
417		    (pr->ndpr_stateflags & NDPRF_IFSCOPE) ||
418		    pr->ndpr_ifp != ifp) {
419			NDPR_UNLOCK(pr);
420			continue;
421		}
422
423		/*
424		 * At present, in order for the prefix to be eligible
425		 * as a proxying/proxied prefix, we require that the
426		 * prefix route entry be marked as a cloning route with
427		 * RTF_PROXY; i.e. nd6_need_cache() needs to return
428		 * true for the interface type.
429		 */
430		if (enable && (pr->ndpr_stateflags & NDPRF_ONLINK) &&
431		    nd6_need_cache(ifp)) {
432			pr->ndpr_stateflags |= NDPRF_PRPROXY;
433			NDPR_ADDREF_LOCKED(pr);
434			NDPR_UNLOCK(pr);
435		} else if (!enable) {
436			pr->ndpr_stateflags &= ~NDPRF_PRPROXY;
437			NDPR_ADDREF_LOCKED(pr);
438			NDPR_UNLOCK(pr);
439		} else {
440			NDPR_UNLOCK(pr);
441			pr = NULL;	/* don't go further */
442		}
443
444		if (pr == NULL)
445			continue;
446
447		up = nd6_ndprl_alloc(M_WAITOK);
448		if (up == NULL) {
449			NDPR_REMREF(pr);
450			continue;
451		}
452
453		up->ndprl_pr = pr;	/* keep reference from above */
454		SLIST_INSERT_HEAD(&up_head, up, ndprl_le);
455	}
456
457	/*
458	 * Now build a list of matching (scoped) downstream prefixes on other
459	 * interfaces which need to be enabled/disabled accordingly.  Note that
460	 * the NDPRF_PRPROXY is never set/cleared on the downstream prefixes.
461	 */
462	SLIST_FOREACH(up, &up_head, ndprl_le) {
463		struct nd_prefix *fwd;
464		struct in6_addr pr_addr;
465		u_char pr_len;
466
467		pr = up->ndprl_pr;
468
469		NDPR_LOCK(pr);
470		bcopy(&pr->ndpr_prefix.sin6_addr, &pr_addr, sizeof (pr_addr));
471		pr_len = pr->ndpr_plen;
472		NDPR_UNLOCK(pr);
473
474		for (fwd = nd_prefix.lh_first; fwd; fwd = fwd->ndpr_next) {
475			NDPR_LOCK(fwd);
476			if (!(fwd->ndpr_stateflags & NDPRF_ONLINK) ||
477			    !(fwd->ndpr_stateflags & NDPRF_IFSCOPE) ||
478			    fwd->ndpr_plen != pr_len ||
479			    !in6_are_prefix_equal(&fwd->ndpr_prefix.sin6_addr,
480			    &pr_addr, pr_len)) {
481				NDPR_UNLOCK(fwd);
482				continue;
483			}
484			NDPR_UNLOCK(fwd);
485
486			down = nd6_ndprl_alloc(M_WAITOK);
487			if (down == NULL)
488				continue;
489
490			NDPR_ADDREF(fwd);
491			down->ndprl_pr = fwd;
492			NDPR_ADDREF(pr);
493			down->ndprl_up = pr;
494			SLIST_INSERT_HEAD(&down_head, down, ndprl_le);
495		}
496	}
497	lck_mtx_unlock(nd6_mutex);
498
499	/*
500	 * Apply routing function on prefixes; callee will free resources.
501	 */
502	nd6_prproxy_prelist_setroute(enable,
503	    (struct nd6_prproxy_prelist_head *)&up_head,
504	    (struct nd6_prproxy_prelist_head *)&down_head);
505
506	VERIFY(SLIST_EMPTY(&up_head));
507	VERIFY(SLIST_EMPTY(&down_head));
508
509	lck_mtx_unlock(&proxy6_lock);
510
511	return (0);
512}
513
514/*
515 * Called from the input path to determine whether the packet is destined
516 * to a proxied node; if so, mark the mbuf with MAUXF_PROXY_DST so that
517 * icmp6_input() knows that this is not to be delivered to socket(s).
518 */
519boolean_t
520nd6_prproxy_isours(struct mbuf *m, struct ip6_hdr *ip6, struct route_in6 *ro6,
521    unsigned int ifscope)
522{
523	struct rtentry *rt;
524	boolean_t ours = FALSE;
525
526	if (ip6->ip6_hlim != IPV6_MAXHLIM || ip6->ip6_nxt != IPPROTO_ICMPV6)
527		goto done;
528
529	if (IN6_IS_ADDR_MC_NODELOCAL(&ip6->ip6_dst) ||
530	    IN6_IS_ADDR_MC_LINKLOCAL(&ip6->ip6_dst)) {
531		VERIFY(ro6 == NULL);
532		ours = TRUE;
533		goto done;
534	} else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
535		goto done;
536	}
537
538	if (ro6 == NULL)
539		goto done;
540
541	if ((rt = ro6->ro_rt) != NULL)
542		RT_LOCK(rt);
543
544	if (rt == NULL || !(rt->rt_flags & RTF_UP) ||
545	    rt->generation_id != route_generation) {
546		if (rt != NULL) {
547			RT_UNLOCK(rt);
548			rtfree(rt);
549			rt = ro6->ro_rt = NULL;
550		}
551
552		/* Caller must have ensured this condition (not srcrt) */
553		VERIFY(IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst,
554		    &ro6->ro_dst.sin6_addr));
555
556		rtalloc_scoped_ign((struct route *)ro6, RTF_PRCLONING, ifscope);
557		if ((rt = ro6->ro_rt) == NULL)
558			goto done;
559
560		RT_LOCK(rt);
561	}
562
563	ours = (rt->rt_flags & RTF_PROXY) ? TRUE : FALSE;
564	RT_UNLOCK(rt);
565
566done:
567	if (ours)
568		m->m_pkthdr.aux_flags |= MAUXF_PROXY_DST;
569
570	return (ours);
571}
572
573/*
574 * Called when a prefix transitions between on-link and off-link.  Perform
575 * routing (RTF_PROXY) and interface (all-multicast) related operations on
576 * the affected prefixes.
577 */
578void
579nd6_prproxy_prelist_update(struct nd_prefix *pr_cur, struct nd_prefix *pr_up)
580{
581	SLIST_HEAD(, nd6_prproxy_prelist) up_head;
582	SLIST_HEAD(, nd6_prproxy_prelist) down_head;
583	struct nd6_prproxy_prelist *up, *down;
584	struct nd_prefix *pr;
585	struct in6_addr pr_addr;
586	boolean_t enable;
587	u_char pr_len;
588
589	SLIST_INIT(&up_head);
590	SLIST_INIT(&down_head);
591	VERIFY(pr_cur != NULL);
592
593	lck_mtx_assert(&proxy6_lock, LCK_MTX_ASSERT_OWNED);
594
595	/*
596	 * Upstream prefix.  If caller did not specify one, search for one
597	 * based on the information in current prefix.  Caller is expected
598	 * to have held an extra reference for the passed-in prefixes.
599	 */
600	lck_mtx_lock(nd6_mutex);
601	if (pr_up == NULL) {
602		NDPR_LOCK(pr_cur);
603		bcopy(&pr_cur->ndpr_prefix.sin6_addr, &pr_addr,
604		    sizeof (pr_addr));
605		pr_len = pr_cur->ndpr_plen;
606		NDPR_UNLOCK(pr_cur);
607
608		for (pr = nd_prefix.lh_first; pr; pr = pr->ndpr_next) {
609			NDPR_LOCK(pr);
610			if (!(pr->ndpr_stateflags & NDPRF_ONLINK) ||
611			    !(pr->ndpr_stateflags & NDPRF_PRPROXY) ||
612			    pr->ndpr_plen != pr_len ||
613			    !in6_are_prefix_equal(&pr->ndpr_prefix.sin6_addr,
614			    &pr_addr, pr_len)) {
615				NDPR_UNLOCK(pr);
616				continue;
617			}
618			NDPR_UNLOCK(pr);
619			break;
620		}
621
622		if ((pr_up = pr) == NULL) {
623			lck_mtx_unlock(nd6_mutex);
624			goto done;
625		}
626		NDPR_LOCK(pr_up);
627	} else {
628		NDPR_LOCK(pr_up);
629		bcopy(&pr_up->ndpr_prefix.sin6_addr, &pr_addr,
630		    sizeof (pr_addr));
631		pr_len = pr_up->ndpr_plen;
632	}
633	NDPR_LOCK_ASSERT_HELD(pr_up);
634	/*
635	 * Upstream prefix could be offlink by now; therefore we cannot
636	 * assert that NDPRF_PRPROXY is set; however, we can insist that
637	 * it must not be a scoped prefix.
638	 */
639	VERIFY(!(pr_up->ndpr_stateflags & NDPRF_IFSCOPE));
640	enable = (pr_up->ndpr_stateflags & NDPRF_PRPROXY);
641	NDPR_UNLOCK(pr_up);
642
643	up = nd6_ndprl_alloc(M_WAITOK);
644	if (up == NULL) {
645		lck_mtx_unlock(nd6_mutex);
646		goto done;
647	}
648
649	NDPR_ADDREF(pr_up);
650	up->ndprl_pr = pr_up;
651	SLIST_INSERT_HEAD(&up_head, up, ndprl_le);
652
653	/*
654	 * Now build a list of matching (scoped) downstream prefixes on other
655	 * interfaces which need to be enabled/disabled accordingly.  Note that
656	 * the NDPRF_PRPROXY is never set/cleared on the downstream prefixes.
657	 */
658	for (pr = nd_prefix.lh_first; pr; pr = pr->ndpr_next) {
659		NDPR_LOCK(pr);
660		if (!(pr->ndpr_stateflags & NDPRF_ONLINK) ||
661		    !(pr->ndpr_stateflags & NDPRF_IFSCOPE) ||
662		    pr->ndpr_plen != pr_len ||
663		    !in6_are_prefix_equal(&pr->ndpr_prefix.sin6_addr,
664		    &pr_addr, pr_len)) {
665			NDPR_UNLOCK(pr);
666			continue;
667		}
668		NDPR_UNLOCK(pr);
669
670		down = nd6_ndprl_alloc(M_WAITOK);
671		if (down == NULL)
672			continue;
673
674		NDPR_ADDREF(pr);
675		down->ndprl_pr = pr;
676		NDPR_ADDREF(pr_up);
677		down->ndprl_up = pr_up;
678		SLIST_INSERT_HEAD(&down_head, down, ndprl_le);
679	}
680	lck_mtx_unlock(nd6_mutex);
681
682	/*
683	 * Apply routing function on prefixes; callee will free resources.
684	 */
685	nd6_prproxy_prelist_setroute(enable,
686	    (struct nd6_prproxy_prelist_head *)&up_head,
687	    (struct nd6_prproxy_prelist_head *)&down_head);
688
689done:
690	VERIFY(SLIST_EMPTY(&up_head));
691	VERIFY(SLIST_EMPTY(&down_head));
692}
693
694/*
695 * Given an interface address, determine whether or not the address
696 * is part of of a proxied prefix.
697 */
698boolean_t
699nd6_prproxy_ifaddr(struct in6_ifaddr *ia)
700{
701	struct nd_prefix *pr;
702	struct in6_addr addr, pr_mask;
703	u_int32_t pr_len;
704	boolean_t proxied = FALSE;
705
706	lck_mtx_assert(nd6_mutex, LCK_MTX_ASSERT_NOTOWNED);
707
708	IFA_LOCK(&ia->ia_ifa);
709	bcopy(&ia->ia_addr.sin6_addr, &addr, sizeof (addr));
710	bcopy(&ia->ia_prefixmask.sin6_addr, &pr_mask, sizeof (pr_mask));
711	pr_len = ia->ia_plen;
712	IFA_UNLOCK(&ia->ia_ifa);
713
714	lck_mtx_lock(nd6_mutex);
715	for (pr = nd_prefix.lh_first; pr; pr = pr->ndpr_next) {
716		NDPR_LOCK(pr);
717		if ((pr->ndpr_stateflags & NDPRF_ONLINK) &&
718		    (pr->ndpr_stateflags & NDPRF_PRPROXY) &&
719		    in6_are_prefix_equal(&pr->ndpr_prefix.sin6_addr,
720		    &addr, pr_len)) {
721			NDPR_UNLOCK(pr);
722			proxied = TRUE;
723			break;
724		}
725		NDPR_UNLOCK(pr);
726	}
727	lck_mtx_unlock(nd6_mutex);
728
729	return (proxied);
730}
731
732/*
733 * Perform automatic proxy function with NS output.
734 *
735 * If the target address matches a global prefix obtained from a router
736 * advertisement received on an interface with the ND6_IFF_PROXY_PREFIXES
737 * flag set, then we send solicitations for the target address to all other
738 * interfaces where a matching prefix is currently on-link, in addition to
739 * the original interface.
740 */
741void
742nd6_prproxy_ns_output(struct ifnet *ifp, struct in6_addr *daddr,
743    struct in6_addr *taddr, struct llinfo_nd6 *ln)
744{
745	SLIST_HEAD(, nd6_prproxy_prelist) ndprl_head;
746	struct nd6_prproxy_prelist *ndprl, *ndprl_tmp;
747	struct nd_prefix *pr, *fwd;
748	struct ifnet *fwd_ifp;
749	struct in6_addr pr_addr;
750	u_char pr_len;
751
752	SLIST_INIT(&ndprl_head);
753
754	lck_mtx_lock(nd6_mutex);
755
756	for (pr = nd_prefix.lh_first; pr; pr = pr->ndpr_next) {
757		NDPR_LOCK(pr);
758		if (!(pr->ndpr_stateflags & NDPRF_ONLINK) ||
759		    !(pr->ndpr_stateflags & NDPRF_PRPROXY) ||
760		    !IN6_ARE_MASKED_ADDR_EQUAL(&pr->ndpr_prefix.sin6_addr,
761		    taddr, &pr->ndpr_mask)) {
762			NDPR_UNLOCK(pr);
763			continue;
764		}
765
766		VERIFY(!(pr->ndpr_stateflags & NDPRF_IFSCOPE));
767		bcopy(&pr->ndpr_prefix.sin6_addr, &pr_addr, sizeof (pr_addr));
768		pr_len = pr->ndpr_plen;
769		NDPR_UNLOCK(pr);
770
771		for (fwd = nd_prefix.lh_first; fwd; fwd = fwd->ndpr_next) {
772			NDPR_LOCK(fwd);
773			if (!(fwd->ndpr_stateflags & NDPRF_ONLINK) ||
774			    fwd->ndpr_ifp == ifp ||
775			    fwd->ndpr_plen != pr_len ||
776			    !in6_are_prefix_equal(&fwd->ndpr_prefix.sin6_addr,
777			    &pr_addr, pr_len)) {
778				NDPR_UNLOCK(fwd);
779				continue;
780			}
781
782			fwd_ifp = fwd->ndpr_ifp;
783			NDPR_UNLOCK(fwd);
784
785			ndprl = nd6_ndprl_alloc(M_WAITOK);
786			if (ndprl == NULL)
787				continue;
788
789			NDPR_ADDREF(fwd);
790			ndprl->ndprl_pr = fwd;
791			ndprl->ndprl_fwd_ifp = fwd_ifp;
792
793			SLIST_INSERT_HEAD(&ndprl_head, ndprl, ndprl_le);
794		}
795		break;
796	}
797
798	lck_mtx_unlock(nd6_mutex);
799
800	SLIST_FOREACH_SAFE(ndprl, &ndprl_head, ndprl_le, ndprl_tmp) {
801		SLIST_REMOVE(&ndprl_head, ndprl, nd6_prproxy_prelist, ndprl_le);
802
803		pr = ndprl->ndprl_pr;
804		fwd_ifp = ndprl->ndprl_fwd_ifp;
805
806		if ((fwd_ifp->if_eflags & IFEF_IPV6_ND6ALT) != 0) {
807			NDPR_REMREF(pr);
808			nd6_ndprl_free(ndprl);
809			continue;
810		}
811
812		NDPR_LOCK(pr);
813		if (pr->ndpr_stateflags & NDPRF_ONLINK) {
814			NDPR_UNLOCK(pr);
815			nd6log2((LOG_DEBUG,
816			    "%s%d: Sending cloned NS who has %s on %s%d\n",
817			    fwd_ifp->if_name, fwd_ifp->if_unit,
818			    ip6_sprintf(taddr), ifp->if_name,
819			    ifp->if_unit));
820
821			nd6_ns_output(fwd_ifp, daddr, taddr, NULL, 0);
822		} else {
823			NDPR_UNLOCK(pr);
824		}
825		NDPR_REMREF(pr);
826
827		nd6_ndprl_free(ndprl);
828	}
829	VERIFY(SLIST_EMPTY(&ndprl_head));
830
831	nd6_ns_output(ifp, daddr, taddr, ln, 0);
832}
833
834/*
835 * Perform automatic proxy function with NS input.
836 *
837 * If the target address matches a global prefix obtained from a router
838 * advertisement received on an interface with the ND6_IFF_PROXY_PREFIXES
839 * flag set, then we send solicitations for the target address to all other
840 * interfaces where a matching prefix is currently on-link.
841 */
842void
843nd6_prproxy_ns_input(struct ifnet *ifp, struct in6_addr *saddr,
844    char *lladdr, int lladdrlen, struct in6_addr *daddr, struct in6_addr *taddr)
845{
846	SLIST_HEAD(, nd6_prproxy_prelist) ndprl_head;
847	struct nd6_prproxy_prelist *ndprl, *ndprl_tmp;
848	struct nd_prefix *pr, *fwd;
849	struct ifnet *fwd_ifp;
850	struct in6_addr pr_addr;
851	u_char pr_len;
852	boolean_t solrec = FALSE;
853
854	SLIST_INIT(&ndprl_head);
855
856	lck_mtx_lock(nd6_mutex);
857
858	for (pr = nd_prefix.lh_first; pr; pr = pr->ndpr_next) {
859		NDPR_LOCK(pr);
860		if (!(pr->ndpr_stateflags & NDPRF_ONLINK) ||
861		    !(pr->ndpr_stateflags & NDPRF_PRPROXY) ||
862		    !IN6_ARE_MASKED_ADDR_EQUAL(&pr->ndpr_prefix.sin6_addr,
863		    taddr, &pr->ndpr_mask)) {
864			NDPR_UNLOCK(pr);
865			continue;
866		}
867
868		VERIFY(!(pr->ndpr_stateflags & NDPRF_IFSCOPE));
869		bcopy(&pr->ndpr_prefix.sin6_addr, &pr_addr, sizeof (pr_addr));
870		pr_len = pr->ndpr_plen;
871
872		/*
873		 * If this is a NS for NUD/AR, record it so that we know
874		 * how to forward the NA reply later on (if/when it arrives.)
875		 * Give up if we fail to save the NS info.
876		 */
877		if ((solrec = !IN6_IS_ADDR_UNSPECIFIED(saddr)) &&
878		    !nd6_solsrc_enq(pr, ifp, saddr, taddr)) {
879			NDPR_UNLOCK(pr);
880			solrec = FALSE;
881			break;			/* bail out */
882		} else {
883			NDPR_UNLOCK(pr);
884		}
885
886		for (fwd = nd_prefix.lh_first; fwd; fwd = fwd->ndpr_next) {
887			NDPR_LOCK(fwd);
888			if (!(fwd->ndpr_stateflags & NDPRF_ONLINK) ||
889			    fwd->ndpr_ifp == ifp ||
890			    fwd->ndpr_plen != pr_len ||
891			    !in6_are_prefix_equal(&fwd->ndpr_prefix.sin6_addr,
892			    &pr_addr, pr_len)) {
893				NDPR_UNLOCK(fwd);
894				continue;
895			}
896
897			fwd_ifp = fwd->ndpr_ifp;
898			NDPR_UNLOCK(fwd);
899
900			ndprl = nd6_ndprl_alloc(M_WAITOK);
901			if (ndprl == NULL)
902				continue;
903
904			NDPR_ADDREF(fwd);
905			ndprl->ndprl_pr = fwd;
906			ndprl->ndprl_fwd_ifp = fwd_ifp;
907			ndprl->ndprl_sol = solrec;
908
909			SLIST_INSERT_HEAD(&ndprl_head, ndprl, ndprl_le);
910		}
911		break;
912	}
913
914	lck_mtx_unlock(nd6_mutex);
915
916	/*
917	 * If this is a recorded solicitation (NS for NUD/AR), create
918	 * or update the neighbor cache entry for the soliciting node.
919	 * Later on, when the NA reply arrives, we will need this cache
920	 * entry in order to send the NA back to the original solicitor.
921	 * Without a neighbor cache entry, we'd end up with an endless
922	 * cycle of NS ping-pong between the us (the proxy) and the node
923	 * which is soliciting for the address.
924	 */
925	if (solrec) {
926		VERIFY(!IN6_IS_ADDR_UNSPECIFIED(saddr));
927		nd6_cache_lladdr(ifp, saddr, lladdr, lladdrlen,
928		    ND_NEIGHBOR_SOLICIT, 0);
929	}
930
931	SLIST_FOREACH_SAFE(ndprl, &ndprl_head, ndprl_le, ndprl_tmp) {
932		SLIST_REMOVE(&ndprl_head, ndprl, nd6_prproxy_prelist, ndprl_le);
933
934		pr = ndprl->ndprl_pr;
935		fwd_ifp = ndprl->ndprl_fwd_ifp;
936
937		if ((fwd_ifp->if_eflags & IFEF_IPV6_ND6ALT) != 0) {
938			NDPR_REMREF(pr);
939			nd6_ndprl_free(ndprl);
940			continue;
941		}
942
943		NDPR_LOCK(pr);
944		if (pr->ndpr_stateflags & NDPRF_ONLINK) {
945			NDPR_UNLOCK(pr);
946			nd6log2((LOG_DEBUG,
947			    "%s%d: Forwarding NS (%s) from %s to %s who has %s "
948			    "on %s%d\n", fwd_ifp->if_name, fwd_ifp->if_unit,
949			    ndprl->ndprl_sol ? "NUD/AR" : "DAD",
950			    ip6_sprintf(saddr), ip6_sprintf(daddr),
951			    ip6_sprintf(taddr), ifp->if_name, ifp->if_unit));
952
953			nd6_ns_output(fwd_ifp, ndprl->ndprl_sol ? taddr : NULL,
954			    taddr, NULL, !ndprl->ndprl_sol);
955		} else {
956			NDPR_UNLOCK(pr);
957		}
958		NDPR_REMREF(pr);
959
960		nd6_ndprl_free(ndprl);
961	}
962	VERIFY(SLIST_EMPTY(&ndprl_head));
963}
964
965/*
966 * Perform automatic proxy function with NA input.
967 *
968 * If the target address matches a global prefix obtained from a router
969 * advertisement received on an interface with the ND6_IFF_PROXY_PREFIXES flag
970 * set, then we send neighbor advertisements for the target address on all
971 * other interfaces where a matching prefix is currently on link.
972 */
973void
974nd6_prproxy_na_input(struct ifnet *ifp, struct in6_addr *saddr,
975    struct in6_addr *daddr0, struct in6_addr *taddr, int flags)
976{
977	SLIST_HEAD(, nd6_prproxy_prelist) ndprl_head;
978	struct nd6_prproxy_prelist *ndprl, *ndprl_tmp;
979	struct nd_prefix *pr;
980	struct ifnet *fwd_ifp;
981	struct in6_addr daddr;
982
983	SLIST_INIT(&ndprl_head);
984
985
986	lck_mtx_lock(nd6_mutex);
987
988	for (pr = nd_prefix.lh_first; pr; pr = pr->ndpr_next) {
989		NDPR_LOCK(pr);
990		if (!(pr->ndpr_stateflags & NDPRF_ONLINK) ||
991		    !(pr->ndpr_stateflags & NDPRF_PRPROXY) ||
992		    !IN6_ARE_MASKED_ADDR_EQUAL(&pr->ndpr_prefix.sin6_addr,
993		    taddr, &pr->ndpr_mask)) {
994			NDPR_UNLOCK(pr);
995			continue;
996		}
997
998		VERIFY(!(pr->ndpr_stateflags & NDPRF_IFSCOPE));
999		/*
1000		 * If this is a NA for NUD, see if there is a record created
1001		 * for the corresponding NS; upon success, we get back the
1002		 * interface where the NS originally arrived on, as well as
1003		 * the soliciting node's address.  Give up if we can't find it.
1004		 */
1005		if (!IN6_IS_ADDR_MULTICAST(daddr0)) {
1006			fwd_ifp = NULL;
1007			bzero(&daddr, sizeof (daddr));
1008			if (!nd6_solsrc_deq(pr, taddr, &daddr, &fwd_ifp)) {
1009				NDPR_UNLOCK(pr);
1010				break;		/* bail out */
1011			}
1012			VERIFY(!IN6_IS_ADDR_UNSPECIFIED(&daddr) && fwd_ifp);
1013			NDPR_UNLOCK(pr);
1014
1015			ndprl = nd6_ndprl_alloc(M_WAITOK);
1016			if (ndprl == NULL)
1017				break;		/* bail out */
1018
1019			ndprl->ndprl_fwd_ifp = fwd_ifp;
1020			ndprl->ndprl_sol = TRUE;
1021			ndprl->ndprl_sol_saddr = *(&daddr);
1022
1023			SLIST_INSERT_HEAD(&ndprl_head, ndprl, ndprl_le);
1024		} else {
1025			struct nd_prefix *fwd;
1026			struct in6_addr pr_addr;
1027			u_char pr_len;
1028
1029			bcopy(&pr->ndpr_prefix.sin6_addr, &pr_addr,
1030			    sizeof (pr_addr));
1031			pr_len = pr->ndpr_plen;
1032			NDPR_UNLOCK(pr);
1033
1034			for (fwd = nd_prefix.lh_first; fwd;
1035			    fwd = fwd->ndpr_next) {
1036				NDPR_LOCK(fwd);
1037				if (!(fwd->ndpr_stateflags & NDPRF_ONLINK) ||
1038				    fwd->ndpr_ifp == ifp ||
1039				    fwd->ndpr_plen != pr_len ||
1040				    !in6_are_prefix_equal(
1041				    &fwd->ndpr_prefix.sin6_addr,
1042				    &pr_addr, pr_len)) {
1043					NDPR_UNLOCK(fwd);
1044					continue;
1045				}
1046
1047				fwd_ifp = fwd->ndpr_ifp;
1048				NDPR_UNLOCK(fwd);
1049
1050				ndprl = nd6_ndprl_alloc(M_WAITOK);
1051				if (ndprl == NULL)
1052					continue;
1053
1054				NDPR_ADDREF(fwd);
1055				ndprl->ndprl_pr = fwd;
1056				ndprl->ndprl_fwd_ifp = fwd_ifp;
1057
1058				SLIST_INSERT_HEAD(&ndprl_head, ndprl, ndprl_le);
1059			}
1060		}
1061		break;
1062	}
1063
1064	lck_mtx_unlock(nd6_mutex);
1065
1066	SLIST_FOREACH_SAFE(ndprl, &ndprl_head, ndprl_le, ndprl_tmp) {
1067		boolean_t send_na;
1068
1069		SLIST_REMOVE(&ndprl_head, ndprl, nd6_prproxy_prelist, ndprl_le);
1070
1071		pr = ndprl->ndprl_pr;
1072		fwd_ifp = ndprl->ndprl_fwd_ifp;
1073
1074		if (ndprl->ndprl_sol) {
1075			VERIFY(pr == NULL);
1076			daddr = *(&ndprl->ndprl_sol_saddr);
1077			VERIFY(!IN6_IS_ADDR_UNSPECIFIED(&daddr));
1078			send_na = (in6_setscope(&daddr, fwd_ifp, NULL) == 0);
1079		} else {
1080			VERIFY(pr != NULL);
1081			daddr = *daddr0;
1082			NDPR_LOCK(pr);
1083			send_na = ((pr->ndpr_stateflags & NDPRF_ONLINK) &&
1084			    in6_setscope(&daddr, fwd_ifp, NULL) == 0);
1085			NDPR_UNLOCK(pr);
1086		}
1087
1088		if (send_na) {
1089			if (!ndprl->ndprl_sol) {
1090				nd6log2((LOG_DEBUG,
1091				    "%s%d: Forwarding NA (DAD) from %s to %s "
1092				    "tgt is %s on %s%d\n",
1093				    fwd_ifp->if_name, fwd_ifp->if_unit,
1094				    ip6_sprintf(saddr), ip6_sprintf(&daddr),
1095				    ip6_sprintf(taddr), ifp->if_name,
1096				    ifp->if_unit));
1097			} else {
1098				nd6log2((LOG_DEBUG,
1099				    "%s%d: Forwarding NA (NUD/AR) from %s to "
1100				    "%s (was %s) tgt is %s on %s%d\n",
1101				    fwd_ifp->if_name, fwd_ifp->if_unit,
1102				    ip6_sprintf(saddr), ip6_sprintf(&daddr),
1103				    ip6_sprintf(daddr0), ip6_sprintf(taddr),
1104				    ifp->if_name, ifp->if_unit));
1105			}
1106
1107			nd6_na_output(fwd_ifp, &daddr, taddr, flags, 1, NULL);
1108		}
1109
1110		if (pr != NULL)
1111			NDPR_REMREF(pr);
1112
1113		nd6_ndprl_free(ndprl);
1114	}
1115	VERIFY(SLIST_EMPTY(&ndprl_head));
1116}
1117
1118static struct nd6_prproxy_solsrc *
1119nd6_solsrc_alloc(int how)
1120{
1121	struct nd6_prproxy_solsrc *ssrc;
1122
1123	ssrc = (how == M_WAITOK) ? zalloc(solsrc_zone) :
1124	    zalloc_noblock(solsrc_zone);
1125	if (ssrc != NULL)
1126		bzero(ssrc, solsrc_size);
1127
1128	return (ssrc);
1129}
1130
1131static void
1132nd6_solsrc_free(struct nd6_prproxy_solsrc *ssrc)
1133{
1134	zfree(solsrc_zone, ssrc);
1135}
1136
1137static void
1138nd6_prproxy_sols_purge(struct nd_prefix *pr, u_int64_t max_stgt)
1139{
1140	struct nd6_prproxy_soltgt *soltgt, *tmp;
1141	u_int64_t expire = (max_stgt > 0) ? net_uptime() : 0;
1142
1143	NDPR_LOCK_ASSERT_HELD(pr);
1144
1145	/* Either trim all or those that have expired or are idle */
1146	RB_FOREACH_SAFE(soltgt, prproxy_sols_tree,
1147	    &pr->ndpr_prproxy_sols, tmp) {
1148		VERIFY(pr->ndpr_prproxy_sols_cnt > 0);
1149		if (expire == 0 || soltgt->soltgt_expire <= expire ||
1150		    soltgt->soltgt_cnt == 0) {
1151			pr->ndpr_prproxy_sols_cnt--;
1152			RB_REMOVE(prproxy_sols_tree,
1153			    &pr->ndpr_prproxy_sols, soltgt);
1154			nd6_soltgt_free(soltgt);
1155		}
1156	}
1157
1158	if (max_stgt == 0 || pr->ndpr_prproxy_sols_cnt < max_stgt) {
1159		VERIFY(max_stgt != 0 || (pr->ndpr_prproxy_sols_cnt == 0 &&
1160		    RB_EMPTY(&pr->ndpr_prproxy_sols)));
1161		return;
1162	}
1163
1164	/* Brute force; mercilessly evict entries until we are under limit */
1165	RB_FOREACH_SAFE(soltgt, prproxy_sols_tree,
1166	    &pr->ndpr_prproxy_sols, tmp) {
1167		VERIFY(pr->ndpr_prproxy_sols_cnt > 0);
1168		pr->ndpr_prproxy_sols_cnt--;
1169		RB_REMOVE(prproxy_sols_tree, &pr->ndpr_prproxy_sols, soltgt);
1170		nd6_soltgt_free(soltgt);
1171		if (pr->ndpr_prproxy_sols_cnt < max_stgt)
1172			break;
1173	}
1174}
1175
1176/*
1177 * Purges all solicitation records on a given prefix.
1178 * Caller is responsible for holding prefix lock.
1179 */
1180void
1181nd6_prproxy_sols_reap(struct nd_prefix *pr)
1182{
1183	nd6_prproxy_sols_purge(pr, 0);
1184}
1185
1186/*
1187 * Purges expired or idle solicitation records on a given prefix.
1188 * Caller is responsible for holding prefix lock.
1189 */
1190void
1191nd6_prproxy_sols_prune(struct nd_prefix *pr, u_int32_t max_stgt)
1192{
1193	nd6_prproxy_sols_purge(pr, max_stgt);
1194}
1195
1196/*
1197 * Enqueue a soliciation record in the target record of a prefix.
1198 */
1199static boolean_t
1200nd6_solsrc_enq(struct nd_prefix *pr, struct ifnet *ifp,
1201    struct in6_addr *saddr, struct in6_addr *taddr)
1202{
1203	struct nd6_prproxy_soltgt find, *soltgt;
1204	struct nd6_prproxy_solsrc *ssrc;
1205	u_int32_t max_stgt = nd6_max_tgt_sols;
1206	u_int32_t max_ssrc = nd6_max_src_sols;
1207
1208	NDPR_LOCK_ASSERT_HELD(pr);
1209	VERIFY(!(pr->ndpr_stateflags & NDPRF_IFSCOPE));
1210	VERIFY((pr->ndpr_stateflags & (NDPRF_ONLINK|NDPRF_PRPROXY)) ==
1211	    (NDPRF_ONLINK|NDPRF_PRPROXY));
1212	VERIFY(!IN6_IS_ADDR_UNSPECIFIED(saddr));
1213
1214	ssrc = nd6_solsrc_alloc(M_WAITOK);
1215	if (ssrc == NULL)
1216		return (FALSE);
1217
1218	ssrc->solsrc_saddr = *saddr;
1219	ssrc->solsrc_ifp = ifp;
1220
1221	find.soltgt_key.taddr = *taddr;		/* search key */
1222
1223	soltgt = RB_FIND(prproxy_sols_tree, &pr->ndpr_prproxy_sols, &find);
1224	if (soltgt == NULL) {
1225		if (max_stgt != 0 && pr->ndpr_prproxy_sols_cnt >= max_stgt) {
1226			VERIFY(!RB_EMPTY(&pr->ndpr_prproxy_sols));
1227			nd6_prproxy_sols_prune(pr, max_stgt);
1228			VERIFY(pr->ndpr_prproxy_sols_cnt < max_stgt);
1229		}
1230
1231		soltgt = nd6_soltgt_alloc(M_WAITOK);
1232		if (soltgt == NULL) {
1233			nd6_solsrc_free(ssrc);
1234			return (FALSE);
1235		}
1236
1237		soltgt->soltgt_key.taddr = *taddr;
1238		VERIFY(soltgt->soltgt_cnt == 0);
1239		VERIFY(TAILQ_EMPTY(&soltgt->soltgt_q));
1240
1241		pr->ndpr_prproxy_sols_cnt++;
1242		VERIFY(pr->ndpr_prproxy_sols_cnt != 0);
1243		RB_INSERT(prproxy_sols_tree, &pr->ndpr_prproxy_sols, soltgt);
1244	}
1245
1246	if (max_ssrc != 0 && soltgt->soltgt_cnt >= max_ssrc) {
1247		VERIFY(!TAILQ_EMPTY(&soltgt->soltgt_q));
1248		nd6_soltgt_prune(soltgt, max_ssrc);
1249		VERIFY(soltgt->soltgt_cnt < max_ssrc);
1250	}
1251
1252	soltgt->soltgt_cnt++;
1253	VERIFY(soltgt->soltgt_cnt != 0);
1254	TAILQ_INSERT_TAIL(&soltgt->soltgt_q, ssrc, solsrc_tqe);
1255	if (soltgt->soltgt_cnt == 1)
1256		soltgt->soltgt_expire = net_uptime() + ND6_TGT_SOLS_EXPIRE;
1257
1258	return (TRUE);
1259}
1260
1261/*
1262 * Dequeue a solicitation record from a target record of a prefix.
1263 */
1264static boolean_t
1265nd6_solsrc_deq(struct nd_prefix *pr, struct in6_addr *taddr,
1266    struct in6_addr *daddr, struct ifnet **ifp)
1267{
1268	struct nd6_prproxy_soltgt find, *soltgt;
1269	struct nd6_prproxy_solsrc *ssrc;
1270
1271	NDPR_LOCK_ASSERT_HELD(pr);
1272	VERIFY(!(pr->ndpr_stateflags & NDPRF_IFSCOPE));
1273	VERIFY((pr->ndpr_stateflags & (NDPRF_ONLINK|NDPRF_PRPROXY)) ==
1274	    (NDPRF_ONLINK|NDPRF_PRPROXY));
1275
1276	bzero(daddr, sizeof (*daddr));
1277	*ifp = NULL;
1278
1279	find.soltgt_key.taddr = *taddr;		/* search key */
1280
1281	soltgt = RB_FIND(prproxy_sols_tree, &pr->ndpr_prproxy_sols, &find);
1282	if (soltgt == NULL || soltgt->soltgt_cnt == 0) {
1283		VERIFY(soltgt == NULL || TAILQ_EMPTY(&soltgt->soltgt_q));
1284		return (FALSE);
1285	}
1286
1287	VERIFY(soltgt->soltgt_cnt != 0);
1288	--soltgt->soltgt_cnt;
1289	ssrc = TAILQ_FIRST(&soltgt->soltgt_q);
1290	VERIFY(ssrc != NULL);
1291	TAILQ_REMOVE(&soltgt->soltgt_q, ssrc, solsrc_tqe);
1292	*daddr = *(&ssrc->solsrc_saddr);
1293	*ifp = ssrc->solsrc_ifp;
1294	nd6_solsrc_free(ssrc);
1295
1296	return (TRUE);
1297}
1298
1299static struct nd6_prproxy_soltgt *
1300nd6_soltgt_alloc(int how)
1301{
1302	struct nd6_prproxy_soltgt *soltgt;
1303
1304	soltgt = (how == M_WAITOK) ? zalloc(soltgt_zone) :
1305	    zalloc_noblock(soltgt_zone);
1306	if (soltgt != NULL) {
1307		bzero(soltgt, soltgt_size);
1308		TAILQ_INIT(&soltgt->soltgt_q);
1309	}
1310	return (soltgt);
1311}
1312
1313static void
1314nd6_soltgt_free(struct nd6_prproxy_soltgt *soltgt)
1315{
1316	struct nd6_prproxy_solsrc *ssrc, *tssrc;
1317
1318	TAILQ_FOREACH_SAFE(ssrc, &soltgt->soltgt_q, solsrc_tqe, tssrc) {
1319		VERIFY(soltgt->soltgt_cnt > 0);
1320		soltgt->soltgt_cnt--;
1321		TAILQ_REMOVE(&soltgt->soltgt_q, ssrc, solsrc_tqe);
1322		nd6_solsrc_free(ssrc);
1323	}
1324
1325	VERIFY(soltgt->soltgt_cnt == 0);
1326	VERIFY(TAILQ_EMPTY(&soltgt->soltgt_q));
1327
1328	zfree(soltgt_zone, soltgt);
1329}
1330
1331static void
1332nd6_soltgt_prune(struct nd6_prproxy_soltgt *soltgt, u_int32_t max_ssrc)
1333{
1334	while (soltgt->soltgt_cnt >= max_ssrc) {
1335		struct nd6_prproxy_solsrc *ssrc;
1336
1337		VERIFY(soltgt->soltgt_cnt != 0);
1338		--soltgt->soltgt_cnt;
1339		ssrc = TAILQ_FIRST(&soltgt->soltgt_q);
1340		VERIFY(ssrc != NULL);
1341		TAILQ_REMOVE(&soltgt->soltgt_q, ssrc, solsrc_tqe);
1342		nd6_solsrc_free(ssrc);
1343	}
1344}
1345
1346/*
1347 * Solicited target tree comparison function.
1348 *
1349 * An ordered predicate is necessary; bcmp() is not documented to return
1350 * an indication of order, memcmp() is, and is an ISO C99 requirement.
1351 */
1352static __inline int
1353soltgt_cmp(const struct nd6_prproxy_soltgt *a,
1354    const struct nd6_prproxy_soltgt *b)
1355{
1356	return (memcmp(&a->soltgt_key, &b->soltgt_key, sizeof (a->soltgt_key)));
1357}
1358