1/*
2 * Copyright (c) 2004-2012 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * Copyright (c) 1982, 1989, 1993
30 *	The Regents of the University of California.  All rights reserved.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 *    notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 *    notice, this list of conditions and the following disclaimer in the
39 *    documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 *    must display the following acknowledgement:
42 *	This product includes software developed by the University of
43 *	California, Berkeley and its contributors.
44 * 4. Neither the name of the University nor the names of its contributors
45 *    may be used to endorse or promote products derived from this software
46 *    without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 */
61
62#include <kern/debug.h>
63#include <netinet/in_arp.h>
64#include <sys/types.h>
65#include <sys/param.h>
66#include <sys/kernel_types.h>
67#include <sys/syslog.h>
68#include <sys/systm.h>
69#include <sys/time.h>
70#include <sys/kernel.h>
71#include <sys/mbuf.h>
72#include <sys/sysctl.h>
73#include <sys/mcache.h>
74#include <sys/protosw.h>
75#include <string.h>
76#include <net/if_arp.h>
77#include <net/if_dl.h>
78#include <net/dlil.h>
79#include <net/if_types.h>
80#include <net/if_llreach.h>
81#include <net/route.h>
82#include <netinet/if_ether.h>
83#include <netinet/in_var.h>
84#include <kern/zalloc.h>
85
86#define CONST_LLADDR(s) ((const u_char*)((s)->sdl_data + (s)->sdl_nlen))
87#define	equal(a1, a2) (bcmp((caddr_t)(a1), (caddr_t)(a2), (a1)->sa_len) == 0)
88
89static const size_t MAX_HW_LEN = 10;
90
91SYSCTL_DECL(_net_link_ether);
92SYSCTL_NODE(_net_link_ether, PF_INET, inet, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "");
93
94/* timer values */
95static int arpt_prune = (5*60*1); /* walk list every 5 minutes */
96static int arpt_keep = (20*60); /* once resolved, good for 20 more minutes */
97static int arpt_down = 20;	/* once declared down, don't send for 20 sec */
98
99/* Apple Hardware SUM16 checksuming */
100int apple_hwcksum_tx = 1;
101int apple_hwcksum_rx = 1;
102
103static int arp_llreach_base = (LL_BASE_REACHABLE / 1000); /* seconds */
104
105SYSCTL_INT(_net_link_ether_inet, OID_AUTO, prune_intvl,
106    CTLFLAG_RW | CTLFLAG_LOCKED, &arpt_prune, 0, "");
107
108SYSCTL_INT(_net_link_ether_inet, OID_AUTO, max_age,
109    CTLFLAG_RW | CTLFLAG_LOCKED, &arpt_keep, 0, "");
110
111SYSCTL_INT(_net_link_ether_inet, OID_AUTO, host_down_time,
112    CTLFLAG_RW | CTLFLAG_LOCKED, &arpt_down, 0, "");
113
114SYSCTL_INT(_net_link_ether_inet, OID_AUTO, apple_hwcksum_tx,
115    CTLFLAG_RW | CTLFLAG_LOCKED, &apple_hwcksum_tx, 0, "");
116
117SYSCTL_INT(_net_link_ether_inet, OID_AUTO, apple_hwcksum_rx,
118    CTLFLAG_RW | CTLFLAG_LOCKED, &apple_hwcksum_rx, 0, "");
119
120SYSCTL_INT(_net_link_ether_inet, OID_AUTO, arp_llreach_base,
121    CTLFLAG_RW | CTLFLAG_LOCKED, &arp_llreach_base, LL_BASE_REACHABLE,
122    "default ARP link-layer reachability max lifetime (in seconds)");
123
124struct llinfo_arp {
125	/*
126	 * The following are protected by rnh_lock
127	 */
128	LIST_ENTRY(llinfo_arp) la_le;
129	struct	rtentry *la_rt;
130	/*
131	 * The following are protected by rt_lock
132	 */
133	struct	mbuf *la_hold;		/* last packet until resolved/timeout */
134	struct	if_llreach *la_llreach;	/* link-layer reachability record */
135	u_int64_t la_lastused;		/* last used timestamp */
136	u_int32_t la_asked;		/* # of requests sent */
137	u_int32_t la_persist;		/* expirable, but stays around */
138};
139
140/*
141 * Synchronization notes:
142 *
143 * The global list of ARP entries are stored in llinfo_arp; an entry
144 * gets inserted into the list when the route is created and gets
145 * removed from the list when it is deleted; this is done as part
146 * of RTM_ADD/RTM_RESOLVE/RTM_DELETE in arp_rtrequest().
147 *
148 * Because rnh_lock and rt_lock for the entry are held during those
149 * operations, the same locks (and thus lock ordering) must be used
150 * elsewhere to access the relevant data structure fields:
151 *
152 * la_le.{le_next,le_prev}, la_rt
153 *
154 *	- Routing lock (rnh_lock)
155 *
156 * la_hold, la_asked, la_llreach, la_lastused
157 *
158 *	- Routing entry lock (rt_lock)
159 *
160 * Due to the dependency on rt_lock, llinfo_arp has the same lifetime
161 * as the route entry itself.  When a route is deleted (RTM_DELETE),
162 * it is simply removed from the global list but the memory is not
163 * freed until the route itself is freed.
164 */
165static LIST_HEAD(, llinfo_arp) llinfo_arp;
166
167static int	arp_inuse, arp_allocated;
168
169static u_int32_t arp_maxtries = 5;
170static int	useloopback = 1; /* use loopback interface for local traffic */
171static int	arp_proxyall = 0;
172static int	arp_sendllconflict = 0;
173
174SYSCTL_INT(_net_link_ether_inet, OID_AUTO, maxtries, CTLFLAG_RW | CTLFLAG_LOCKED,
175	   &arp_maxtries, 0, "");
176SYSCTL_INT(_net_link_ether_inet, OID_AUTO, useloopback, CTLFLAG_RW | CTLFLAG_LOCKED,
177	   &useloopback, 0, "");
178SYSCTL_INT(_net_link_ether_inet, OID_AUTO, proxyall, CTLFLAG_RW | CTLFLAG_LOCKED,
179	   &arp_proxyall, 0, "");
180SYSCTL_INT(_net_link_ether_inet, OID_AUTO, sendllconflict, CTLFLAG_RW | CTLFLAG_LOCKED,
181	   &arp_sendllconflict, 0, "");
182
183static int log_arp_warnings = 0;	/* Thread safe: no accumulated state */
184
185SYSCTL_INT(_net_link_ether_inet, OID_AUTO, log_arp_warnings,
186	CTLFLAG_RW | CTLFLAG_LOCKED,
187	&log_arp_warnings, 0,
188	"log arp warning messages");
189
190static int keep_announcements = 1;	/* Thread safe: no aging of state */
191SYSCTL_INT(_net_link_ether_inet, OID_AUTO, keep_announcements,
192	CTLFLAG_RW | CTLFLAG_LOCKED,
193	&keep_announcements, 0,
194	"keep arp announcements");
195
196static int send_conflicting_probes = 1;	/* Thread safe: no accumulated state */
197SYSCTL_INT(_net_link_ether_inet, OID_AUTO, send_conflicting_probes,
198	CTLFLAG_RW | CTLFLAG_LOCKED,
199	&send_conflicting_probes, 0,
200	"send conflicting link-local arp probes");
201
202static errno_t arp_lookup_route(const struct in_addr *, int,
203    int, route_t *, unsigned int);
204static void arptimer(void *);
205static struct llinfo_arp *arp_llinfo_alloc(void);
206static void arp_llinfo_free(void *);
207static void arp_llinfo_purge(struct rtentry *);
208static void arp_llinfo_get_ri(struct rtentry *, struct rt_reach_info *);
209static void arp_llinfo_get_iflri(struct rtentry *, struct ifnet_llreach_info *);
210
211static __inline void arp_llreach_use(struct llinfo_arp *);
212static __inline int arp_llreach_reachable(struct llinfo_arp *);
213static void arp_llreach_alloc(struct rtentry *, struct ifnet *, void *,
214    unsigned int, boolean_t);
215
216extern u_int32_t	ipv4_ll_arp_aware;
217
218static int arpinit_done;
219
220static struct zone *llinfo_arp_zone;
221#define	LLINFO_ARP_ZONE_MAX	256		/* maximum elements in zone */
222#define	LLINFO_ARP_ZONE_NAME	"llinfo_arp"	/* name for zone */
223
224void
225arp_init(void)
226{
227	if (arpinit_done) {
228		log(LOG_NOTICE, "arp_init called more than once (ignored)\n");
229		return;
230	}
231
232	LIST_INIT(&llinfo_arp);
233
234	llinfo_arp_zone = zinit(sizeof (struct llinfo_arp),
235	    LLINFO_ARP_ZONE_MAX * sizeof (struct llinfo_arp), 0,
236	    LLINFO_ARP_ZONE_NAME);
237	if (llinfo_arp_zone == NULL)
238		panic("%s: failed allocating llinfo_arp_zone", __func__);
239
240	zone_change(llinfo_arp_zone, Z_EXPAND, TRUE);
241	zone_change(llinfo_arp_zone, Z_CALLERACCT, FALSE);
242
243	arpinit_done = 1;
244
245	/* start timer */
246	timeout(arptimer, (caddr_t)0, hz);
247}
248
249static struct llinfo_arp *
250arp_llinfo_alloc(void)
251{
252	return (zalloc(llinfo_arp_zone));
253}
254
255static void
256arp_llinfo_free(void *arg)
257{
258	struct llinfo_arp *la = arg;
259
260	if (la->la_le.le_next != NULL || la->la_le.le_prev != NULL) {
261		panic("%s: trying to free %p when it is in use", __func__, la);
262		/* NOTREACHED */
263	}
264
265	/* Just in case there's anything there, free it */
266	if (la->la_hold != NULL) {
267		m_freem(la->la_hold);
268		la->la_hold = NULL;
269	}
270
271	/* Purge any link-layer info caching */
272	VERIFY(la->la_rt->rt_llinfo == la);
273	if (la->la_rt->rt_llinfo_purge != NULL)
274		la->la_rt->rt_llinfo_purge(la->la_rt);
275
276	zfree(llinfo_arp_zone, la);
277}
278
279static void
280arp_llinfo_purge(struct rtentry *rt)
281{
282	struct llinfo_arp *la = rt->rt_llinfo;
283
284	RT_LOCK_ASSERT_HELD(rt);
285	VERIFY(rt->rt_llinfo_purge == arp_llinfo_purge && la != NULL);
286
287	if (la->la_llreach != NULL) {
288		RT_CONVERT_LOCK(rt);
289		ifnet_llreach_free(la->la_llreach);
290		la->la_llreach = NULL;
291	}
292	la->la_lastused = 0;
293}
294
295static void
296arp_llinfo_get_ri(struct rtentry *rt, struct rt_reach_info *ri)
297{
298	struct llinfo_arp *la = rt->rt_llinfo;
299	struct if_llreach *lr = la->la_llreach;
300
301	if (lr == NULL) {
302		bzero(ri, sizeof (*ri));
303		ri->ri_rssi = IFNET_RSSI_UNKNOWN;
304		ri->ri_lqm = IFNET_LQM_THRESH_OFF;
305		ri->ri_npm = IFNET_NPM_THRESH_UNKNOWN;
306	} else {
307		IFLR_LOCK(lr);
308		/* Export to rt_reach_info structure */
309		ifnet_lr2ri(lr, ri);
310		/* Export ARP send expiration (calendar) time */
311		ri->ri_snd_expire =
312		    ifnet_llreach_up2calexp(lr, la->la_lastused);
313		IFLR_UNLOCK(lr);
314	}
315}
316
317static void
318arp_llinfo_get_iflri(struct rtentry *rt, struct ifnet_llreach_info *iflri)
319{
320	struct llinfo_arp *la = rt->rt_llinfo;
321	struct if_llreach *lr = la->la_llreach;
322
323	if (lr == NULL) {
324		bzero(iflri, sizeof (*iflri));
325		iflri->iflri_rssi = IFNET_RSSI_UNKNOWN;
326		iflri->iflri_lqm = IFNET_LQM_THRESH_OFF;
327		iflri->iflri_npm = IFNET_NPM_THRESH_UNKNOWN;
328	} else {
329		IFLR_LOCK(lr);
330		/* Export to ifnet_llreach_info structure */
331		ifnet_lr2iflri(lr, iflri);
332		/* Export ARP send expiration (uptime) time */
333		iflri->iflri_snd_expire =
334		    ifnet_llreach_up2upexp(lr, la->la_lastused);
335		IFLR_UNLOCK(lr);
336	}
337}
338
339void
340arp_llreach_set_reachable(struct ifnet *ifp, void *addr, unsigned int alen)
341{
342	/* Nothing more to do if it's disabled */
343	if (arp_llreach_base == 0)
344		return;
345
346	ifnet_llreach_set_reachable(ifp, ETHERTYPE_IP, addr, alen);
347}
348
349static __inline void
350arp_llreach_use(struct llinfo_arp *la)
351{
352	if (la->la_llreach != NULL)
353		la->la_lastused = net_uptime();
354}
355
356static __inline int
357arp_llreach_reachable(struct llinfo_arp *la)
358{
359	struct if_llreach *lr;
360	const char *why = NULL;
361
362	/* Nothing more to do if it's disabled; pretend it's reachable  */
363	if (arp_llreach_base == 0)
364		return (1);
365
366	if ((lr = la->la_llreach) == NULL) {
367		/*
368		 * Link-layer reachability record isn't present for this
369		 * ARP entry; pretend it's reachable and use it as is.
370		 */
371		return (1);
372	} else if (ifnet_llreach_reachable(lr)) {
373		/*
374		 * Record is present, it's not shared with other ARP
375		 * entries and a packet has recently been received
376		 * from the remote host; consider it reachable.
377		 */
378		if (lr->lr_reqcnt == 1)
379			return (1);
380
381		/* Prime it up, if this is the first time */
382		if (la->la_lastused == 0) {
383			VERIFY(la->la_llreach != NULL);
384			arp_llreach_use(la);
385		}
386
387		/*
388		 * Record is present and shared with one or more ARP
389		 * entries, and a packet has recently been received
390		 * from the remote host.  Since it's shared by more
391		 * than one IP addresses, we can't rely on the link-
392		 * layer reachability alone; consider it reachable if
393		 * this ARP entry has been used "recently."
394		 */
395		if (ifnet_llreach_reachable_delta(lr, la->la_lastused))
396			return (1);
397
398		why = "has alias(es) and hasn't been used in a while";
399	} else {
400		why = "haven't heard from it in a while";
401	}
402
403	if (log_arp_warnings) {
404		char tmp[MAX_IPv4_STR_LEN];
405		u_int64_t now = net_uptime();
406
407		log(LOG_DEBUG, "%s%d: ARP probe(s) needed for %s; "
408		    "%s [lastused %lld, lastrcvd %lld] secs ago\n",
409		    lr->lr_ifp->if_name, lr->lr_ifp->if_unit, inet_ntop(AF_INET,
410		    &SIN(rt_key(la->la_rt))->sin_addr, tmp, sizeof (tmp)), why,
411		    (la->la_lastused ?  (int64_t)(now - la->la_lastused) : -1),
412		    (lr->lr_lastrcvd ?  (int64_t)(now - lr->lr_lastrcvd) : -1));
413
414	}
415	return (0);
416}
417
418/*
419 * Obtain a link-layer source cache entry for the sender.
420 *
421 * NOTE: This is currently only for ARP/Ethernet.
422 */
423static void
424arp_llreach_alloc(struct rtentry *rt, struct ifnet *ifp, void *addr,
425    unsigned int alen, boolean_t solicited)
426{
427	VERIFY(rt->rt_expire == 0 || rt->rt_rmx.rmx_expire != 0);
428	VERIFY(rt->rt_expire != 0 || rt->rt_rmx.rmx_expire == 0);
429	if (arp_llreach_base != 0 &&
430	    rt->rt_expire != 0 && rt->rt_ifp != lo_ifp &&
431	    ifp->if_addrlen == IF_LLREACH_MAXLEN &&	/* Ethernet */
432	    alen == ifp->if_addrlen) {
433		struct llinfo_arp *la = rt->rt_llinfo;
434		struct if_llreach *lr;
435		const char *why = NULL, *type = "";
436
437		/* Become a regular mutex, just in case */
438		RT_CONVERT_LOCK(rt);
439
440		if ((lr = la->la_llreach) != NULL) {
441			type = (solicited ? "ARP reply" : "ARP announcement");
442			/*
443			 * If target has changed, create a new record;
444			 * otherwise keep existing record.
445			 */
446			IFLR_LOCK(lr);
447			if (bcmp(addr, lr->lr_key.addr, alen) != 0) {
448				IFLR_UNLOCK(lr);
449				/* Purge any link-layer info caching */
450				VERIFY(rt->rt_llinfo_purge != NULL);
451				rt->rt_llinfo_purge(rt);
452				lr = NULL;
453				why = " for different target HW address; "
454				    "using new llreach record";
455			} else {
456				lr->lr_probes = 0;	/* reset probe count */
457				IFLR_UNLOCK(lr);
458				if (solicited) {
459					why = " for same target HW address; "
460					    "keeping existing llreach record";
461				}
462			}
463		}
464
465		if (lr == NULL) {
466			lr = la->la_llreach = ifnet_llreach_alloc(ifp,
467			    ETHERTYPE_IP, addr, alen, arp_llreach_base);
468			if (lr != NULL) {
469				lr->lr_probes = 0;	/* reset probe count */
470				if (why == NULL)
471					why = "creating new llreach record";
472			}
473		}
474
475		if (log_arp_warnings && lr != NULL && why != NULL) {
476			char tmp[MAX_IPv4_STR_LEN];
477
478			log(LOG_DEBUG, "%s%d: %s%s for %s\n", ifp->if_name,
479			    ifp->if_unit, type, why, inet_ntop(AF_INET,
480			    &SIN(rt_key(rt))->sin_addr, tmp, sizeof (tmp)));
481		}
482	}
483}
484
485/*
486 * Free an arp entry.
487 */
488static void
489arptfree(struct llinfo_arp *la)
490{
491	struct rtentry *rt = la->la_rt;
492	struct sockaddr_dl *sdl;
493
494	lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED);
495	RT_LOCK_ASSERT_HELD(rt);
496
497	if (rt->rt_refcnt > 0 && (sdl = SDL(rt->rt_gateway)) &&
498	    sdl->sdl_family == AF_LINK) {
499		sdl->sdl_alen = 0;
500		la->la_asked = 0;
501		rt->rt_flags &= ~RTF_REJECT;
502		RT_UNLOCK(rt);
503	} else if (la->la_persist) {
504		/*
505		 * Instead of issuing RTM_DELETE, stop this route entry
506		 * from holding an interface idle reference count; if
507		 * the route is later reused, arp_validate() will revert
508		 * this action.
509		 */
510		if (rt->rt_refcnt == 0)
511			rt_clear_idleref(rt);
512		RT_UNLOCK(rt);
513	} else {
514		/*
515		 * Safe to drop rt_lock and use rt_key, since holding
516		 * rnh_lock here prevents another thread from calling
517		 * rt_setgate() on this route.
518		 */
519		RT_UNLOCK(rt);
520		rtrequest_locked(RTM_DELETE, rt_key(rt), NULL, rt_mask(rt),
521		    0, NULL);
522	}
523}
524
525void
526in_arpdrain(void *ignored_arg)
527{
528#pragma unused (ignored_arg)
529	struct llinfo_arp *la, *ola;
530	uint64_t timenow;
531
532	lck_mtx_lock(rnh_lock);
533	la = llinfo_arp.lh_first;
534	timenow = net_uptime();
535	while ((ola = la) != 0) {
536		struct rtentry *rt = la->la_rt;
537		la = la->la_le.le_next;
538		RT_LOCK(rt);
539		VERIFY(rt->rt_expire == 0 || rt->rt_rmx.rmx_expire != 0);
540		VERIFY(rt->rt_expire != 0 || rt->rt_rmx.rmx_expire == 0);
541		if (rt->rt_expire && rt->rt_expire <= timenow)
542			arptfree(ola); /* timer has expired, clear */
543		else
544			RT_UNLOCK(rt);
545	}
546	lck_mtx_unlock(rnh_lock);
547}
548
549void
550arp_validate(struct rtentry *rt)
551{
552	struct llinfo_arp *la = rt->rt_llinfo;
553
554	RT_LOCK_ASSERT_HELD(rt);
555	/*
556	 * If this is a persistent ARP entry, make it count towards the
557	 * interface idleness just like before arptfree() was called.
558	 */
559	if (la->la_persist)
560		rt_set_idleref(rt);
561}
562
563/*
564 * Timeout routine.  Age arp_tab entries periodically.
565 */
566/* ARGSUSED */
567static void
568arptimer(void *ignored_arg)
569{
570#pragma unused (ignored_arg)
571	in_arpdrain(NULL);
572	timeout(arptimer, (caddr_t)0, arpt_prune * hz);
573}
574
575/*
576 * Parallel to llc_rtrequest.
577 */
578static void
579arp_rtrequest(
580	int req,
581	struct rtentry *rt,
582	__unused struct sockaddr *sa)
583{
584	struct sockaddr *gate = rt->rt_gateway;
585	struct llinfo_arp *la = rt->rt_llinfo;
586	static struct sockaddr_dl null_sdl = {sizeof(null_sdl), AF_LINK, 0, 0, 0, 0, 0, {0}};
587	uint64_t timenow;
588
589	if (!arpinit_done) {
590		panic("%s: ARP has not been initialized", __func__);
591		/* NOTREACHED */
592	}
593	lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED);
594	RT_LOCK_ASSERT_HELD(rt);
595
596	if (rt->rt_flags & RTF_GATEWAY)
597		return;
598	timenow = net_uptime();
599	switch (req) {
600
601	case RTM_ADD:
602		/*
603		 * XXX: If this is a manually added route to interface
604		 * such as older version of routed or gated might provide,
605		 * restore cloning bit.
606		 */
607		if ((rt->rt_flags & RTF_HOST) == 0 && rt_mask(rt) != NULL &&
608		    SIN(rt_mask(rt))->sin_addr.s_addr != 0xffffffff)
609			rt->rt_flags |= RTF_CLONING;
610		if (rt->rt_flags & RTF_CLONING) {
611			/*
612			 * Case 1: This route should come from a route to iface.
613			 */
614			if (rt_setgate(rt, rt_key(rt),
615			    (struct sockaddr *)&null_sdl) == 0) {
616				gate = rt->rt_gateway;
617				SDL(gate)->sdl_type = rt->rt_ifp->if_type;
618				SDL(gate)->sdl_index = rt->rt_ifp->if_index;
619				/*
620				 * In case we're called before 1.0 sec.
621				 * has elapsed.
622				 */
623				rt_setexpire(rt, MAX(timenow, 1));
624			}
625			break;
626		}
627		/* Announce a new entry if requested. */
628		if (rt->rt_flags & RTF_ANNOUNCE) {
629			if (la != NULL)
630				arp_llreach_use(la); /* Mark use timestamp */
631			RT_UNLOCK(rt);
632			dlil_send_arp(rt->rt_ifp, ARPOP_REQUEST,
633			    SDL(gate), rt_key(rt), NULL, rt_key(rt), 0);
634			RT_LOCK(rt);
635		}
636		/*FALLTHROUGH*/
637	case RTM_RESOLVE:
638		if (gate->sa_family != AF_LINK ||
639		    gate->sa_len < sizeof(null_sdl)) {
640		        if (log_arp_warnings)
641				log(LOG_DEBUG, "arp_rtrequest: bad gateway value\n");
642			break;
643		}
644		SDL(gate)->sdl_type = rt->rt_ifp->if_type;
645		SDL(gate)->sdl_index = rt->rt_ifp->if_index;
646		if (la != 0)
647			break; /* This happens on a route change */
648		/*
649		 * Case 2:  This route may come from cloning, or a manual route
650		 * add with a LL address.
651		 */
652		rt->rt_llinfo = la = arp_llinfo_alloc();
653		if (la == NULL) {
654			if (log_arp_warnings)
655				log(LOG_DEBUG, "%s: malloc failed\n", __func__);
656			break;
657		}
658		rt->rt_llinfo_get_ri = arp_llinfo_get_ri;
659		rt->rt_llinfo_get_iflri = arp_llinfo_get_iflri;
660		rt->rt_llinfo_purge = arp_llinfo_purge;
661		rt->rt_llinfo_free = arp_llinfo_free;
662
663		arp_inuse++, arp_allocated++;
664		Bzero(la, sizeof(*la));
665		la->la_rt = rt;
666		rt->rt_flags |= RTF_LLINFO;
667		LIST_INSERT_HEAD(&llinfo_arp, la, la_le);
668
669		/*
670		 * This keeps the multicast addresses from showing up
671		 * in `arp -a' listings as unresolved.  It's not actually
672		 * functional.  Then the same for broadcast.  For IPv4
673		 * link-local address, keep the entry around even after
674		 * it has expired.
675		 */
676		if (IN_MULTICAST(ntohl(SIN(rt_key(rt))->sin_addr.s_addr))) {
677			RT_UNLOCK(rt);
678			dlil_resolve_multi(rt->rt_ifp, rt_key(rt), gate,
679			    sizeof(struct sockaddr_dl));
680			RT_LOCK(rt);
681			rt_setexpire(rt, 0);
682		}
683		else if (in_broadcast(SIN(rt_key(rt))->sin_addr, rt->rt_ifp)) {
684			struct sockaddr_dl	*gate_ll = SDL(gate);
685			size_t	broadcast_len;
686			ifnet_llbroadcast_copy_bytes(rt->rt_ifp,
687			    LLADDR(gate_ll), sizeof(gate_ll->sdl_data),
688			    &broadcast_len);
689			gate_ll->sdl_alen = broadcast_len;
690			gate_ll->sdl_family = AF_LINK;
691			gate_ll->sdl_len = sizeof(struct sockaddr_dl);
692			/* In case we're called before 1.0 sec. has elapsed */
693			rt_setexpire(rt, MAX(timenow, 1));
694		} else if (IN_LINKLOCAL(ntohl(SIN(rt_key(rt))->sin_addr.s_addr))) {
695			/*
696			 * The persistent bit implies that once the ARP
697			 * entry has reached it expiration time, the idle
698			 * reference count to the interface will be released,
699			 * but the ARP entry itself stays in the routing table
700			 * until it is explicitly removed.
701			 */
702			la->la_persist = 1;
703			rt->rt_flags |= RTF_STATIC;
704		}
705
706		/* Become a regular mutex, just in case */
707		RT_CONVERT_LOCK(rt);
708		IFA_LOCK_SPIN(rt->rt_ifa);
709		if (SIN(rt_key(rt))->sin_addr.s_addr ==
710		    (IA_SIN(rt->rt_ifa))->sin_addr.s_addr) {
711			IFA_UNLOCK(rt->rt_ifa);
712			/*
713			 * This test used to be
714			 *	if (loif.if_flags & IFF_UP)
715			 * It allowed local traffic to be forced through the
716			 * hardware by configuring the loopback down.  However,
717			 * it causes problems during network configuration
718			 * for boards that can't receive packets they send.
719			 * It is now necessary to clear "useloopback" and
720			 * remove the route to force traffic out to the
721			 * hardware.
722			 */
723			rt_setexpire(rt, 0);
724			ifnet_lladdr_copy_bytes(rt->rt_ifp, LLADDR(SDL(gate)),
725			    SDL(gate)->sdl_alen = rt->rt_ifp->if_addrlen);
726			if (useloopback) {
727				if (rt->rt_ifp != lo_ifp) {
728					/*
729					 * Purge any link-layer info caching.
730					 */
731					if (rt->rt_llinfo_purge != NULL)
732						rt->rt_llinfo_purge(rt);
733
734					/*
735					 * Adjust route ref count for the
736					 * interfaces.
737					 */
738					if (rt->rt_if_ref_fn != NULL) {
739						rt->rt_if_ref_fn(lo_ifp, 1);
740						rt->rt_if_ref_fn(rt->rt_ifp, -1);
741					}
742				}
743				rt->rt_ifp = lo_ifp;
744			}
745		} else {
746			IFA_UNLOCK(rt->rt_ifa);
747		}
748		break;
749
750	case RTM_DELETE:
751		if (la == 0)
752			break;
753		arp_inuse--;
754		/*
755		 * Unchain it but defer the actual freeing until the route
756		 * itself is to be freed.  rt->rt_llinfo still points to
757		 * llinfo_arp, and likewise, la->la_rt still points to this
758		 * route entry, except that RTF_LLINFO is now cleared.
759		 */
760		LIST_REMOVE(la, la_le);
761		la->la_le.le_next = NULL;
762		la->la_le.le_prev = NULL;
763
764		/*
765		 * Purge any link-layer info caching.
766		 */
767		if (rt->rt_llinfo_purge != NULL)
768			rt->rt_llinfo_purge(rt);
769
770		rt->rt_flags &= ~RTF_LLINFO;
771		if (la->la_hold != NULL) {
772			m_freem(la->la_hold);
773			la->la_hold = NULL;
774		}
775	}
776}
777
778/*
779 * convert hardware address to hex string for logging errors.
780 */
781static const char *
782sdl_addr_to_hex(const struct sockaddr_dl *sdl, char * orig_buf, int buflen)
783{
784	char *		buf = orig_buf;
785	int 		i;
786	const u_char *	lladdr = (u_char *)(size_t)sdl->sdl_data;
787	int			maxbytes = buflen / 3;
788
789	if (maxbytes > sdl->sdl_alen) {
790		maxbytes = sdl->sdl_alen;
791	}
792	*buf = '\0';
793	for (i = 0; i < maxbytes; i++) {
794		snprintf(buf, 3, "%02x", lladdr[i]);
795		buf += 2;
796		*buf = (i == maxbytes - 1) ? '\0' : ':';
797		buf++;
798	}
799	return (orig_buf);
800}
801
802/*
803 * arp_lookup_route will lookup the route for a given address.
804 *
805 * The address must be for a host on a local network on this interface.
806 * If the returned route is non-NULL, the route is locked and the caller
807 * is responsible for unlocking it and releasing its reference.
808 */
809static errno_t
810arp_lookup_route(const struct in_addr *addr, int create, int proxy,
811    route_t *route, unsigned int ifscope)
812{
813	struct sockaddr_inarp sin = {sizeof(sin), AF_INET, 0, {0}, {0}, 0, 0};
814	const char *why = NULL;
815	errno_t	error = 0;
816	route_t rt;
817
818	*route = NULL;
819
820	sin.sin_addr.s_addr = addr->s_addr;
821	sin.sin_other = proxy ? SIN_PROXY : 0;
822
823	/*
824	 * If the destination is a link-local address, don't
825	 * constrain the lookup (don't scope it).
826	 */
827	if (IN_LINKLOCAL(ntohl(addr->s_addr)))
828		ifscope = IFSCOPE_NONE;
829
830	rt = rtalloc1_scoped((struct sockaddr*)&sin, create, 0, ifscope);
831	if (rt == NULL)
832		return (ENETUNREACH);
833
834	RT_LOCK(rt);
835
836	if (rt->rt_flags & RTF_GATEWAY) {
837		why = "host is not on local network";
838		error = ENETUNREACH;
839	} else if (!(rt->rt_flags & RTF_LLINFO)) {
840		why = "could not allocate llinfo";
841		error = ENOMEM;
842	} else if (rt->rt_gateway->sa_family != AF_LINK) {
843		why = "gateway route is not ours";
844		error = EPROTONOSUPPORT;
845	}
846
847	if (error != 0) {
848		if (create && log_arp_warnings) {
849			char tmp[MAX_IPv4_STR_LEN];
850			log(LOG_DEBUG, "arplookup link#%d %s failed: %s\n",
851			    ifscope, inet_ntop(AF_INET, addr, tmp,
852			    sizeof (tmp)), why);
853		}
854
855		/*
856		 * If there are no references to this route, and it is
857		 * a cloned route, and not static, and ARP had created
858		 * the route, then purge it from the routing table as
859		 * it is probably bogus.
860		 */
861		if (rt->rt_refcnt == 1 &&
862		    (rt->rt_flags & (RTF_WASCLONED | RTF_STATIC)) ==
863		    RTF_WASCLONED) {
864			/*
865			 * Prevent another thread from modiying rt_key,
866			 * rt_gateway via rt_setgate() after rt_lock is
867			 * dropped by marking the route as defunct.
868			 */
869			rt->rt_flags |= RTF_CONDEMNED;
870			RT_UNLOCK(rt);
871			rtrequest(RTM_DELETE, rt_key(rt), rt->rt_gateway,
872			    rt_mask(rt), rt->rt_flags, 0);
873			rtfree(rt);
874		} else {
875			RT_REMREF_LOCKED(rt);
876			RT_UNLOCK(rt);
877		}
878		return (error);
879	}
880
881	/*
882	 * Caller releases reference and does RT_UNLOCK(rt).
883	 */
884	*route = rt;
885	return (0);
886}
887
888/*
889 * This is the ARP pre-output routine; care must be taken to ensure that
890 * the "hint" route never gets freed via rtfree(), since the caller may
891 * have stored it inside a struct route with a reference held for that
892 * placeholder.
893 */
894errno_t
895arp_lookup_ip(ifnet_t ifp, const struct sockaddr_in *net_dest,
896    struct sockaddr_dl *ll_dest, size_t	ll_dest_len, route_t hint,
897    mbuf_t packet)
898{
899	route_t	route = NULL;	/* output route */
900	errno_t	result = 0;
901	struct sockaddr_dl	*gateway;
902	struct llinfo_arp	*llinfo = NULL;
903	uint64_t timenow;
904	int unreachable = 0;
905
906	if (net_dest->sin_family != AF_INET)
907		return (EAFNOSUPPORT);
908
909	if ((ifp->if_flags & (IFF_UP|IFF_RUNNING)) != (IFF_UP|IFF_RUNNING))
910		return (ENETDOWN);
911
912	/*
913	 * If we were given a route, verify the route and grab the gateway
914	 */
915	if (hint != NULL) {
916		/*
917		 * Callee holds a reference on the route and returns
918		 * with the route entry locked, upon success.
919		 */
920		result = route_to_gwroute((const struct sockaddr *)
921		    net_dest, hint, &route);
922		if (result != 0)
923			return (result);
924		if (route != NULL)
925			RT_LOCK_ASSERT_HELD(route);
926	}
927
928	if (packet->m_flags & M_BCAST) {
929		size_t	broadcast_len;
930		bzero(ll_dest, ll_dest_len);
931		result = ifnet_llbroadcast_copy_bytes(ifp, LLADDR(ll_dest),
932		    ll_dest_len - offsetof(struct sockaddr_dl, sdl_data),
933		    &broadcast_len);
934		if (result == 0) {
935			ll_dest->sdl_alen = broadcast_len;
936			ll_dest->sdl_family = AF_LINK;
937			ll_dest->sdl_len = sizeof(struct sockaddr_dl);
938		}
939		goto release;
940	}
941	if (packet->m_flags & M_MCAST) {
942		if (route != NULL)
943			RT_UNLOCK(route);
944		result = dlil_resolve_multi(ifp,
945		    (const struct sockaddr*)net_dest,
946		    (struct sockaddr*)ll_dest, ll_dest_len);
947		if (route != NULL)
948			RT_LOCK(route);
949		goto release;
950	}
951
952	/*
953	 * If we didn't find a route, or the route doesn't have
954	 * link layer information, trigger the creation of the
955	 * route and link layer information.
956	 */
957	if (route == NULL || route->rt_llinfo == NULL) {
958		/* Clean up now while we can */
959		if (route != NULL) {
960			if (route == hint) {
961				RT_REMREF_LOCKED(route);
962				RT_UNLOCK(route);
963			} else {
964				RT_UNLOCK(route);
965				rtfree(route);
966			}
967		}
968		/*
969		 * Callee holds a reference on the route and returns
970		 * with the route entry locked, upon success.
971		 */
972		result = arp_lookup_route(&net_dest->sin_addr, 1, 0, &route,
973		    ifp->if_index);
974		if (result == 0)
975			RT_LOCK_ASSERT_HELD(route);
976	}
977
978	if (result || route == NULL || (llinfo = route->rt_llinfo) == NULL) {
979		char	tmp[MAX_IPv4_STR_LEN];
980
981		/* In case result is 0 but no route, return an error */
982		if (result == 0)
983			result = EHOSTUNREACH;
984
985		if (log_arp_warnings &&
986		    route != NULL && route->rt_llinfo == NULL)
987			log(LOG_DEBUG, "arpresolve: can't allocate llinfo "
988			    "for %s\n", inet_ntop(AF_INET, &net_dest->sin_addr,
989			    tmp, sizeof(tmp)));
990		goto release;
991	}
992
993	/*
994	 * Now that we have the right route, is it filled in?
995	 */
996	gateway = SDL(route->rt_gateway);
997	timenow = net_uptime();
998	VERIFY(route->rt_expire == 0 || route->rt_rmx.rmx_expire != 0);
999	VERIFY(route->rt_expire != 0 || route->rt_rmx.rmx_expire == 0);
1000	if ((route->rt_expire == 0 ||
1001	    route->rt_expire > timenow) && gateway != NULL &&
1002	    gateway->sdl_family == AF_LINK && gateway->sdl_alen != 0 &&
1003	    !(unreachable = !arp_llreach_reachable(llinfo))) {
1004		bcopy(gateway, ll_dest, MIN(gateway->sdl_len, ll_dest_len));
1005		result = 0;
1006		arp_llreach_use(llinfo);	/* Mark use timestamp */
1007		goto release;
1008	} else if (unreachable) {
1009		/*
1010		 * Discard existing answer in case we need to probe.
1011		 */
1012		gateway->sdl_alen = 0;
1013	}
1014
1015	if (ifp->if_flags & IFF_NOARP) {
1016		result = ENOTSUP;
1017		goto release;
1018	}
1019
1020	/*
1021	 * Route wasn't complete/valid. We need to arp.
1022	 */
1023	if (packet != NULL) {
1024		if (llinfo->la_hold != NULL)
1025			m_freem(llinfo->la_hold);
1026		llinfo->la_hold = packet;
1027	}
1028
1029	if (route->rt_expire) {
1030		route->rt_flags &= ~RTF_REJECT;
1031		if (llinfo->la_asked == 0 ||
1032		    route->rt_expire != timenow) {
1033			rt_setexpire(route, timenow);
1034			if (llinfo->la_asked++ < arp_maxtries) {
1035				struct ifaddr *rt_ifa = route->rt_ifa;
1036				struct sockaddr *sa;
1037				u_int32_t rtflags;
1038
1039				/* Become a regular mutex, just in case */
1040				RT_CONVERT_LOCK(route);
1041				/* Update probe count, if applicable */
1042				if (llinfo->la_llreach != NULL) {
1043					IFLR_LOCK_SPIN(llinfo->la_llreach);
1044					llinfo->la_llreach->lr_probes++;
1045					IFLR_UNLOCK(llinfo->la_llreach);
1046				}
1047				IFA_LOCK_SPIN(rt_ifa);
1048				IFA_ADDREF_LOCKED(rt_ifa);
1049				sa = rt_ifa->ifa_addr;
1050				IFA_UNLOCK(rt_ifa);
1051				arp_llreach_use(llinfo); /* Mark use timestamp */
1052				rtflags = route->rt_flags;
1053				RT_UNLOCK(route);
1054				dlil_send_arp(ifp, ARPOP_REQUEST, NULL,
1055				    sa, NULL, (const struct sockaddr*)net_dest,
1056				    rtflags);
1057				IFA_REMREF(rt_ifa);
1058				RT_LOCK(route);
1059				result = EJUSTRETURN;
1060				goto release;
1061			} else {
1062				route->rt_flags |= RTF_REJECT;
1063				rt_setexpire(route, rt_expiry(route,
1064				    route->rt_expire, arpt_down));
1065				llinfo->la_asked = 0;
1066				/*
1067				 * Clear la_hold; don't free the packet since
1068				 * we're not returning EJUSTRETURN; the caller
1069				 * will handle the freeing.
1070				 */
1071				llinfo->la_hold = NULL;
1072				result = EHOSTUNREACH;
1073				goto release;
1074			}
1075		}
1076	}
1077
1078	/* The packet is now held inside la_hold (can "packet" be NULL?) */
1079	result = EJUSTRETURN;
1080
1081release:
1082	if (route != NULL) {
1083		if (route == hint) {
1084			RT_REMREF_LOCKED(route);
1085			RT_UNLOCK(route);
1086		} else {
1087			RT_UNLOCK(route);
1088			rtfree(route);
1089		}
1090	}
1091	return (result);
1092}
1093
1094errno_t
1095arp_ip_handle_input(
1096	ifnet_t		ifp,
1097	u_short		arpop,
1098	const struct sockaddr_dl *sender_hw,
1099	const struct sockaddr_in *sender_ip,
1100	const struct sockaddr_in *target_ip)
1101{
1102	char	ipv4str[MAX_IPv4_STR_LEN];
1103	struct sockaddr_dl proxied;
1104	struct sockaddr_dl *gateway, *target_hw = NULL;
1105	struct ifaddr *ifa;
1106	struct in_ifaddr *ia;
1107	struct in_ifaddr *best_ia = NULL;
1108	struct sockaddr_in best_ia_sin;
1109	route_t	route = NULL;
1110	char buf[3 * MAX_HW_LEN]; // enough for MAX_HW_LEN byte hw address
1111	struct llinfo_arp *llinfo;
1112	errno_t	error;
1113	int created_announcement = 0;
1114	int bridged = 0, is_bridge = 0;
1115
1116	/* Do not respond to requests for 0.0.0.0 */
1117	if (target_ip->sin_addr.s_addr == 0 && arpop == ARPOP_REQUEST)
1118		goto done;
1119
1120	if (ifp->if_bridge)
1121		bridged = 1;
1122	if (ifp->if_type == IFT_BRIDGE)
1123		is_bridge = 1;
1124
1125	/*
1126	 * Determine if this ARP is for us
1127	 * For a bridge, we want to check the address irrespective
1128	 * of the receive interface.
1129	 */
1130	lck_rw_lock_shared(in_ifaddr_rwlock);
1131	TAILQ_FOREACH(ia, INADDR_HASH(target_ip->sin_addr.s_addr), ia_hash) {
1132		IFA_LOCK_SPIN(&ia->ia_ifa);
1133		if (((bridged && ia->ia_ifp->if_bridge != NULL) ||
1134		    (ia->ia_ifp == ifp)) &&
1135		    ia->ia_addr.sin_addr.s_addr == target_ip->sin_addr.s_addr) {
1136			best_ia = ia;
1137			best_ia_sin = best_ia->ia_addr;
1138			IFA_ADDREF_LOCKED(&ia->ia_ifa);
1139			IFA_UNLOCK(&ia->ia_ifa);
1140			lck_rw_done(in_ifaddr_rwlock);
1141			goto match;
1142		}
1143		IFA_UNLOCK(&ia->ia_ifa);
1144	}
1145
1146	TAILQ_FOREACH(ia, INADDR_HASH(sender_ip->sin_addr.s_addr), ia_hash) {
1147		IFA_LOCK_SPIN(&ia->ia_ifa);
1148		if (((bridged && ia->ia_ifp->if_bridge != NULL) ||
1149		    (ia->ia_ifp == ifp)) &&
1150		    ia->ia_addr.sin_addr.s_addr == sender_ip->sin_addr.s_addr) {
1151			best_ia = ia;
1152			best_ia_sin = best_ia->ia_addr;
1153			IFA_ADDREF_LOCKED(&ia->ia_ifa);
1154			IFA_UNLOCK(&ia->ia_ifa);
1155			lck_rw_done(in_ifaddr_rwlock);
1156			goto match;
1157		}
1158		IFA_UNLOCK(&ia->ia_ifa);
1159	}
1160
1161#define BDG_MEMBER_MATCHES_ARP(addr, ifp, ia)					\
1162	(ia->ia_ifp->if_bridge == ifp->if_softc &&				\
1163	!bcmp(ifnet_lladdr(ia->ia_ifp), ifnet_lladdr(ifp), ifp->if_addrlen) &&	\
1164	addr == ia->ia_addr.sin_addr.s_addr)
1165	/*
1166	 * Check the case when bridge shares its MAC address with
1167	 * some of its children, so packets are claimed by bridge
1168	 * itself (bridge_input() does it first), but they are really
1169	 * meant to be destined to the bridge member.
1170	 */
1171	if (is_bridge) {
1172		TAILQ_FOREACH(ia, INADDR_HASH(target_ip->sin_addr.s_addr),
1173		    ia_hash) {
1174			IFA_LOCK_SPIN(&ia->ia_ifa);
1175			if (BDG_MEMBER_MATCHES_ARP(target_ip->sin_addr.s_addr,
1176			    ifp, ia)) {
1177				ifp = ia->ia_ifp;
1178				best_ia = ia;
1179				best_ia_sin = best_ia->ia_addr;
1180				IFA_ADDREF_LOCKED(&ia->ia_ifa);
1181				IFA_UNLOCK(&ia->ia_ifa);
1182				lck_rw_done(in_ifaddr_rwlock);
1183				goto match;
1184			}
1185			IFA_UNLOCK(&ia->ia_ifa);
1186		}
1187	}
1188	lck_rw_done(in_ifaddr_rwlock);
1189
1190	/*
1191	 * No match, use the first inet address on the receive interface
1192	 * as a dummy address for the rest of the function; we may be
1193	 * proxying for another address.
1194	 */
1195	ifnet_lock_shared(ifp);
1196	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1197		IFA_LOCK_SPIN(ifa);
1198		if (ifa->ifa_addr->sa_family != AF_INET) {
1199			IFA_UNLOCK(ifa);
1200			continue;
1201		}
1202		best_ia = (struct in_ifaddr *)ifa;
1203		best_ia_sin = best_ia->ia_addr;
1204		IFA_ADDREF_LOCKED(ifa);
1205		IFA_UNLOCK(ifa);
1206		ifnet_lock_done(ifp);
1207		goto match;
1208	}
1209	ifnet_lock_done(ifp);
1210
1211	/*
1212	 * If we're not a bridge member, or if we are but there's no
1213	 * IPv4 address to use for the interface, drop the packet.
1214	 */
1215	if (!bridged || best_ia == NULL)
1216		goto done;
1217
1218match:
1219	/* If the packet is from this interface, ignore the packet */
1220	if (!bcmp(CONST_LLADDR(sender_hw), ifnet_lladdr(ifp), sender_hw->sdl_alen)) {
1221		goto done;
1222	}
1223
1224	/* Check for a conflict */
1225	if (!bridged && sender_ip->sin_addr.s_addr == best_ia_sin.sin_addr.s_addr) {
1226		struct kev_msg        ev_msg;
1227		struct kev_in_collision	*in_collision;
1228		u_char	storage[sizeof(struct kev_in_collision) + MAX_HW_LEN];
1229		bzero(&ev_msg, sizeof(struct kev_msg));
1230		bzero(storage, (sizeof(struct kev_in_collision) + MAX_HW_LEN));
1231		in_collision = (struct kev_in_collision*)(void *)storage;
1232		log(LOG_ERR, "%s%d duplicate IP address %s sent from address %s\n",
1233			ifp->if_name, ifp->if_unit,
1234			inet_ntop(AF_INET, &sender_ip->sin_addr, ipv4str, sizeof(ipv4str)),
1235			sdl_addr_to_hex(sender_hw, buf, sizeof(buf)));
1236
1237		/* Send a kernel event so anyone can learn of the conflict */
1238		in_collision->link_data.if_family = ifp->if_family;
1239		in_collision->link_data.if_unit = ifp->if_unit;
1240		strncpy(&in_collision->link_data.if_name[0], ifp->if_name, IFNAMSIZ);
1241		in_collision->ia_ipaddr = sender_ip->sin_addr;
1242		in_collision->hw_len = sender_hw->sdl_alen < MAX_HW_LEN ? sender_hw->sdl_alen : MAX_HW_LEN;
1243		bcopy(CONST_LLADDR(sender_hw), (caddr_t)in_collision->hw_addr, in_collision->hw_len);
1244		ev_msg.vendor_code = KEV_VENDOR_APPLE;
1245		ev_msg.kev_class = KEV_NETWORK_CLASS;
1246		ev_msg.kev_subclass = KEV_INET_SUBCLASS;
1247		ev_msg.event_code = KEV_INET_ARPCOLLISION;
1248		ev_msg.dv[0].data_ptr = in_collision;
1249		ev_msg.dv[0].data_length = sizeof(struct kev_in_collision) + in_collision->hw_len;
1250		ev_msg.dv[1].data_length = 0;
1251		kev_post_msg(&ev_msg);
1252
1253		goto respond;
1254	}
1255
1256	/*
1257	 * Look up the routing entry. If it doesn't exist and we are the
1258	 * target, and the sender isn't 0.0.0.0, go ahead and create one.
1259	 * Callee holds a reference on the route and returns with the route
1260	 * entry locked, upon success.
1261	 */
1262	error = arp_lookup_route(&sender_ip->sin_addr,
1263	    (target_ip->sin_addr.s_addr == best_ia_sin.sin_addr.s_addr &&
1264	    sender_ip->sin_addr.s_addr != 0), 0, &route, ifp->if_index);
1265
1266	if (error == 0)
1267		RT_LOCK_ASSERT_HELD(route);
1268
1269	if (error || route == 0 || route->rt_gateway == 0) {
1270		if (arpop != ARPOP_REQUEST) {
1271			goto respond;
1272		}
1273		if (arp_sendllconflict
1274		    && send_conflicting_probes != 0
1275		    && (ifp->if_eflags & IFEF_ARPLL) != 0
1276		    && IN_LINKLOCAL(ntohl(target_ip->sin_addr.s_addr))
1277		    && sender_ip->sin_addr.s_addr == 0) {
1278			/*
1279			 * Verify this ARP probe doesn't conflict with an IPv4LL we know of
1280			 * on another interface.
1281			 */
1282			if (route != NULL) {
1283				RT_REMREF_LOCKED(route);
1284				RT_UNLOCK(route);
1285				route = NULL;
1286			}
1287			/*
1288			 * Callee holds a reference on the route and returns
1289			 * with the route entry locked, upon success.
1290			 */
1291			error = arp_lookup_route(&target_ip->sin_addr, 0, 0,
1292			    &route, ifp->if_index);
1293
1294			if (error == 0)
1295				RT_LOCK_ASSERT_HELD(route);
1296
1297			if (error == 0 && route && route->rt_gateway) {
1298				gateway = SDL(route->rt_gateway);
1299				if (route->rt_ifp != ifp && gateway->sdl_alen != 0
1300				    && (gateway->sdl_alen != sender_hw->sdl_alen
1301					|| bcmp(CONST_LLADDR(gateway), CONST_LLADDR(sender_hw),
1302						gateway->sdl_alen) != 0)) {
1303					/*
1304					 * A node is probing for an IPv4LL we know exists on a
1305					 * different interface. We respond with a conflicting probe
1306					 * to force the new device to pick a different IPv4LL
1307					 * address.
1308					 */
1309					if (log_arp_warnings) {
1310					    log(LOG_INFO,
1311						"arp: %s on %s%d sent probe for %s, already on %s%d\n",
1312						sdl_addr_to_hex(sender_hw, buf, sizeof(buf)),
1313						ifp->if_name, ifp->if_unit,
1314						inet_ntop(AF_INET, &target_ip->sin_addr, ipv4str,
1315								  sizeof(ipv4str)),
1316						route->rt_ifp->if_name, route->rt_ifp->if_unit);
1317					    log(LOG_INFO,
1318						"arp: sending conflicting probe to %s on %s%d\n",
1319						sdl_addr_to_hex(sender_hw, buf, sizeof(buf)),
1320						ifp->if_name, ifp->if_unit);
1321					}
1322					/* Mark use timestamp */
1323					if (route->rt_llinfo != NULL)
1324						arp_llreach_use(route->rt_llinfo);
1325					/* We're done with the route */
1326					RT_REMREF_LOCKED(route);
1327					RT_UNLOCK(route);
1328					route = NULL;
1329					/*
1330					 * Send a conservative unicast "ARP probe".
1331					 * This should force the other device to pick a new number.
1332					 * This will not force the device to pick a new number if the device
1333					 * has already assigned that number.
1334					 * This will not imply to the device that we own that address.
1335					 * The link address is always present; it's never freed.
1336					 */
1337					ifnet_lock_shared(ifp);
1338					ifa = ifp->if_lladdr;
1339					IFA_ADDREF(ifa);
1340					ifnet_lock_done(ifp);
1341					dlil_send_arp_internal(ifp, ARPOP_REQUEST,
1342						SDL(ifa->ifa_addr),
1343						(const struct sockaddr*)sender_ip, sender_hw,
1344						(const struct sockaddr*)target_ip);
1345					IFA_REMREF(ifa);
1346					ifa = NULL;
1347				}
1348			}
1349			goto respond;
1350		} else if (keep_announcements != 0
1351			   && target_ip->sin_addr.s_addr == sender_ip->sin_addr.s_addr) {
1352			/* don't create entry if link-local address and link-local is disabled */
1353			if (!IN_LINKLOCAL(ntohl(sender_ip->sin_addr.s_addr))
1354			    || (ifp->if_eflags & IFEF_ARPLL) != 0) {
1355				if (route != NULL) {
1356					RT_REMREF_LOCKED(route);
1357					RT_UNLOCK(route);
1358					route = NULL;
1359				}
1360				/*
1361				 * Callee holds a reference on the route and
1362				 * returns with the route entry locked, upon
1363				 * success.
1364				 */
1365				error = arp_lookup_route(&sender_ip->sin_addr,
1366				    1, 0, &route, ifp->if_index);
1367
1368				if (error == 0)
1369					RT_LOCK_ASSERT_HELD(route);
1370
1371				if (error == 0 && route != NULL && route->rt_gateway != NULL) {
1372					created_announcement = 1;
1373				}
1374			}
1375			if (created_announcement == 0) {
1376				goto respond;
1377			}
1378		} else {
1379			goto respond;
1380		}
1381	}
1382
1383	RT_LOCK_ASSERT_HELD(route);
1384	VERIFY(route->rt_expire == 0 || route->rt_rmx.rmx_expire != 0);
1385	VERIFY(route->rt_expire != 0 || route->rt_rmx.rmx_expire == 0);
1386	gateway = SDL(route->rt_gateway);
1387	if (!bridged && route->rt_ifp != ifp) {
1388		if (!IN_LINKLOCAL(ntohl(sender_ip->sin_addr.s_addr)) || (ifp->if_eflags & IFEF_ARPLL) == 0) {
1389			if (log_arp_warnings)
1390				log(LOG_ERR, "arp: %s is on %s%d but got reply from %s on %s%d\n",
1391					inet_ntop(AF_INET, &sender_ip->sin_addr, ipv4str,
1392							  sizeof(ipv4str)),
1393					route->rt_ifp->if_name,
1394					route->rt_ifp->if_unit,
1395					sdl_addr_to_hex(sender_hw, buf, sizeof(buf)),
1396					ifp->if_name, ifp->if_unit);
1397			goto respond;
1398		}
1399		else {
1400			/* Don't change a permanent address */
1401			if (route->rt_expire == 0) {
1402				goto respond;
1403			}
1404
1405			/*
1406			 * We're about to check and/or change the route's ifp
1407			 * and ifa, so do the lock dance: drop rt_lock, hold
1408			 * rnh_lock and re-hold rt_lock to avoid violating the
1409			 * lock ordering.  We have an extra reference on the
1410			 * route, so it won't go away while we do this.
1411			 */
1412			RT_UNLOCK(route);
1413			lck_mtx_lock(rnh_lock);
1414			RT_LOCK(route);
1415			/*
1416			 * Don't change the cloned route away from the
1417			 * parent's interface if the address did resolve
1418			 * or if the route is defunct.  rt_ifp on both
1419			 * the parent and the clone can now be freely
1420			 * accessed now that we have acquired rnh_lock.
1421			 */
1422			gateway = SDL(route->rt_gateway);
1423			if ((gateway->sdl_alen != 0 && route->rt_parent &&
1424			    route->rt_parent->rt_ifp == route->rt_ifp) ||
1425			    (route->rt_flags & RTF_CONDEMNED)) {
1426				RT_REMREF_LOCKED(route);
1427				RT_UNLOCK(route);
1428				route = NULL;
1429				lck_mtx_unlock(rnh_lock);
1430				goto respond;
1431			}
1432			if (route->rt_ifp != ifp) {
1433				/*
1434				 * Purge any link-layer info caching.
1435				 */
1436				if (route->rt_llinfo_purge != NULL)
1437					route->rt_llinfo_purge(route);
1438
1439				/* Adjust route ref count for the interfaces */
1440				if (route->rt_if_ref_fn != NULL) {
1441					route->rt_if_ref_fn(ifp, 1);
1442					route->rt_if_ref_fn(route->rt_ifp, -1);
1443				}
1444			}
1445			/* Change the interface when the existing route is on */
1446			route->rt_ifp = ifp;
1447			rtsetifa(route, &best_ia->ia_ifa);
1448			gateway->sdl_index = ifp->if_index;
1449			RT_UNLOCK(route);
1450			lck_mtx_unlock(rnh_lock);
1451			RT_LOCK(route);
1452			/* Don't bother if the route is down */
1453			if (!(route->rt_flags & RTF_UP))
1454				goto respond;
1455			/* Refresh gateway pointer */
1456			gateway = SDL(route->rt_gateway);
1457		}
1458		RT_LOCK_ASSERT_HELD(route);
1459	}
1460
1461	if (gateway->sdl_alen && bcmp(LLADDR(gateway), CONST_LLADDR(sender_hw), gateway->sdl_alen)) {
1462		if (route->rt_expire && log_arp_warnings) {
1463			char buf2[3 * MAX_HW_LEN];
1464			log(LOG_INFO, "arp: %s moved from %s to %s on %s%d\n",
1465			    inet_ntop(AF_INET, &sender_ip->sin_addr, ipv4str,
1466			    sizeof(ipv4str)),
1467			    sdl_addr_to_hex(gateway, buf, sizeof(buf)),
1468			    sdl_addr_to_hex(sender_hw, buf2, sizeof(buf2)),
1469			    ifp->if_name, ifp->if_unit);
1470		}
1471		else if (route->rt_expire == 0) {
1472			if (log_arp_warnings) {
1473				log(LOG_ERR, "arp: %s attempts to modify "
1474				    "permanent entry for %s on %s%d\n",
1475				    sdl_addr_to_hex(sender_hw, buf,
1476				    sizeof(buf)),
1477				    inet_ntop(AF_INET, &sender_ip->sin_addr,
1478				    ipv4str, sizeof(ipv4str)),
1479				    ifp->if_name, ifp->if_unit);
1480			}
1481			goto respond;
1482		}
1483	}
1484
1485	/* Copy the sender hardware address in to the route's gateway address */
1486	gateway->sdl_alen = sender_hw->sdl_alen;
1487	bcopy(CONST_LLADDR(sender_hw), LLADDR(gateway), gateway->sdl_alen);
1488
1489	/* Update the expire time for the route and clear the reject flag */
1490	if (route->rt_expire) {
1491		uint64_t timenow;
1492
1493		timenow = net_uptime();
1494		rt_setexpire(route,
1495		    rt_expiry(route, timenow, arpt_keep));
1496	}
1497	route->rt_flags &= ~RTF_REJECT;
1498
1499	/* cache the gateway (sender HW) address */
1500	arp_llreach_alloc(route, ifp, LLADDR(gateway), gateway->sdl_alen,
1501	    (arpop == ARPOP_REPLY));
1502
1503	/* update the llinfo, send a queued packet if there is one */
1504	llinfo = route->rt_llinfo;
1505	llinfo->la_asked = 0;
1506	if (llinfo->la_hold) {
1507		struct mbuf *m0;
1508		m0 = llinfo->la_hold;
1509		llinfo->la_hold = NULL;
1510
1511		RT_UNLOCK(route);
1512		dlil_output(ifp, PF_INET, m0, (caddr_t)route, rt_key(route), 0, NULL);
1513		RT_REMREF(route);
1514		route = NULL;
1515	}
1516
1517respond:
1518	if (route != NULL) {
1519		/* Mark use timestamp if we're going to send a reply */
1520		if (arpop == ARPOP_REQUEST && route->rt_llinfo != NULL)
1521			arp_llreach_use(route->rt_llinfo);
1522		RT_REMREF_LOCKED(route);
1523		RT_UNLOCK(route);
1524		route = NULL;
1525	}
1526
1527	if (arpop != ARPOP_REQUEST)
1528		goto done;
1529
1530	/* If we are not the target, check if we should proxy */
1531	if (target_ip->sin_addr.s_addr != best_ia_sin.sin_addr.s_addr) {
1532		/*
1533		 * Find a proxy route; callee holds a reference on the
1534		 * route and returns with the route entry locked, upon
1535		 * success.
1536		 */
1537		error = arp_lookup_route(&target_ip->sin_addr, 0, SIN_PROXY,
1538		    &route, ifp->if_index);
1539
1540		if (error == 0) {
1541			RT_LOCK_ASSERT_HELD(route);
1542			/*
1543			 * Return proxied ARP replies only on the interface
1544			 * or bridge cluster where this network resides.
1545			 * Otherwise we may conflict with the host we are
1546			 * proxying for.
1547			 */
1548			if (route->rt_ifp != ifp &&
1549				(route->rt_ifp->if_bridge != ifp->if_bridge ||
1550				 ifp->if_bridge == NULL)) {
1551					RT_REMREF_LOCKED(route);
1552					RT_UNLOCK(route);
1553					goto done;
1554				}
1555			proxied = *SDL(route->rt_gateway);
1556			target_hw = &proxied;
1557		} else {
1558			/*
1559			 * We don't have a route entry indicating we should
1560			 * use proxy.  If we aren't supposed to proxy all,
1561			 * we are done.
1562			 */
1563			if (!arp_proxyall)
1564				goto done;
1565
1566			/*
1567			 * See if we have a route to the target ip before
1568			 * we proxy it.
1569			 */
1570			route = rtalloc1_scoped((struct sockaddr *)
1571			    (size_t)target_ip, 0, 0, ifp->if_index);
1572			if (!route)
1573				goto done;
1574
1575			/*
1576			 * Don't proxy for hosts already on the same interface.
1577			 */
1578			RT_LOCK(route);
1579			if (route->rt_ifp == ifp) {
1580				RT_UNLOCK(route);
1581				rtfree(route);
1582				goto done;
1583			}
1584		}
1585		/* Mark use timestamp */
1586		if (route->rt_llinfo != NULL)
1587			arp_llreach_use(route->rt_llinfo);
1588		RT_REMREF_LOCKED(route);
1589		RT_UNLOCK(route);
1590	}
1591
1592	dlil_send_arp(ifp, ARPOP_REPLY,
1593	    target_hw, (const struct sockaddr*)target_ip,
1594	    sender_hw, (const struct sockaddr*)sender_ip, 0);
1595
1596done:
1597	if (best_ia != NULL)
1598		IFA_REMREF(&best_ia->ia_ifa);
1599	return 0;
1600}
1601
1602void
1603arp_ifinit(struct ifnet *ifp, struct ifaddr *ifa)
1604{
1605	struct sockaddr *sa;
1606
1607	IFA_LOCK(ifa);
1608	ifa->ifa_rtrequest = arp_rtrequest;
1609	ifa->ifa_flags |= RTF_CLONING;
1610	sa = ifa->ifa_addr;
1611	IFA_UNLOCK(ifa);
1612	dlil_send_arp(ifp, ARPOP_REQUEST, NULL, sa, NULL, sa, 0);
1613}
1614