1/*
2 * Copyright (c) 2004-2013 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * Copyright (c) 1982, 1989, 1993
30 *	The Regents of the University of California.  All rights reserved.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 *    notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 *    notice, this list of conditions and the following disclaimer in the
39 *    documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 *    must display the following acknowledgement:
42 *	This product includes software developed by the University of
43 *	California, Berkeley and its contributors.
44 * 4. Neither the name of the University nor the names of its contributors
45 *    may be used to endorse or promote products derived from this software
46 *    without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 */
61
62#include <kern/debug.h>
63#include <netinet/in_arp.h>
64#include <sys/types.h>
65#include <sys/param.h>
66#include <sys/kernel_types.h>
67#include <sys/syslog.h>
68#include <sys/systm.h>
69#include <sys/time.h>
70#include <sys/kernel.h>
71#include <sys/mbuf.h>
72#include <sys/sysctl.h>
73#include <sys/mcache.h>
74#include <sys/protosw.h>
75#include <string.h>
76#include <net/if_arp.h>
77#include <net/if_dl.h>
78#include <net/dlil.h>
79#include <net/if_types.h>
80#include <net/if_llreach.h>
81#include <net/route.h>
82
83#include <netinet/if_ether.h>
84#include <netinet/in_var.h>
85#include <kern/zalloc.h>
86
87#define	CONST_LLADDR(s)	((const u_char*)((s)->sdl_data + (s)->sdl_nlen))
88
89static const size_t MAX_HW_LEN = 10;
90
91/*
92 * Synchronization notes:
93 *
94 * The global list of ARP entries are stored in llinfo_arp; an entry
95 * gets inserted into the list when the route is created and gets
96 * removed from the list when it is deleted; this is done as part
97 * of RTM_ADD/RTM_RESOLVE/RTM_DELETE in arp_rtrequest().
98 *
99 * Because rnh_lock and rt_lock for the entry are held during those
100 * operations, the same locks (and thus lock ordering) must be used
101 * elsewhere to access the relevant data structure fields:
102 *
103 * la_le.{le_next,le_prev}, la_rt
104 *
105 *	- Routing lock (rnh_lock)
106 *
107 * la_hold, la_asked, la_llreach, la_lastused
108 *
109 *	- Routing entry lock (rt_lock)
110 *
111 * Due to the dependency on rt_lock, llinfo_arp has the same lifetime
112 * as the route entry itself.  When a route is deleted (RTM_DELETE),
113 * it is simply removed from the global list but the memory is not
114 * freed until the route itself is freed.
115 */
116struct llinfo_arp {
117	/*
118	 * The following are protected by rnh_lock
119	 */
120	LIST_ENTRY(llinfo_arp) la_le;
121	struct	rtentry *la_rt;
122	/*
123	 * The following are protected by rt_lock
124	 */
125	struct	mbuf *la_hold;		/* last packet until resolved/timeout */
126	struct	if_llreach *la_llreach;	/* link-layer reachability record */
127	u_int64_t la_lastused;		/* last used timestamp */
128	u_int32_t la_asked;		/* # of requests sent */
129	u_int32_t la_maxtries;		/* retry limit */
130};
131static LIST_HEAD(, llinfo_arp) llinfo_arp;
132
133static int arp_timeout_run;		/* arp_timeout is scheduled to run */
134static void arp_timeout(void *);
135static void arp_sched_timeout(struct timeval *);
136
137static void arptfree(struct llinfo_arp *, void *);
138static errno_t arp_lookup_route(const struct in_addr *, int,
139    int, route_t *, unsigned int);
140static int arp_getstat SYSCTL_HANDLER_ARGS;
141
142static struct llinfo_arp *arp_llinfo_alloc(int);
143static void arp_llinfo_free(void *);
144static void arp_llinfo_purge(struct rtentry *);
145static void arp_llinfo_get_ri(struct rtentry *, struct rt_reach_info *);
146static void arp_llinfo_get_iflri(struct rtentry *, struct ifnet_llreach_info *);
147
148static __inline void arp_llreach_use(struct llinfo_arp *);
149static __inline int arp_llreach_reachable(struct llinfo_arp *);
150static void arp_llreach_alloc(struct rtentry *, struct ifnet *, void *,
151    unsigned int, boolean_t);
152
153extern int tvtohz(struct timeval *);
154
155static int arpinit_done;
156
157SYSCTL_DECL(_net_link_ether);
158SYSCTL_NODE(_net_link_ether, PF_INET, inet, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "");
159
160/* timer values */
161static int arpt_prune = (5*60*1); /* walk list every 5 minutes */
162SYSCTL_INT(_net_link_ether_inet, OID_AUTO, prune_intvl,
163	CTLFLAG_RW | CTLFLAG_LOCKED, &arpt_prune, 0, "");
164
165static int arpt_keep = (20*60); /* once resolved, good for 20 more minutes */
166SYSCTL_INT(_net_link_ether_inet, OID_AUTO, max_age,
167	CTLFLAG_RW | CTLFLAG_LOCKED, &arpt_keep, 0, "");
168
169static int arpt_down = 20;	/* once declared down, don't send for 20 sec */
170SYSCTL_INT(_net_link_ether_inet, OID_AUTO, host_down_time,
171	CTLFLAG_RW | CTLFLAG_LOCKED, &arpt_down, 0, "");
172
173static int arp_llreach_base = (LL_BASE_REACHABLE / 1000); /* seconds */
174SYSCTL_INT(_net_link_ether_inet, OID_AUTO, arp_llreach_base,
175	CTLFLAG_RW | CTLFLAG_LOCKED, &arp_llreach_base, LL_BASE_REACHABLE,
176	"default ARP link-layer reachability max lifetime (in seconds)");
177
178#define	ARP_UNICAST_LIMIT 5	/* # of probes until ARP refresh broadcast */
179static u_int32_t arp_unicast_lim = ARP_UNICAST_LIMIT;
180SYSCTL_INT(_net_link_ether_inet, OID_AUTO, arp_unicast_lim,
181	CTLFLAG_RW | CTLFLAG_LOCKED, &arp_unicast_lim, ARP_UNICAST_LIMIT,
182	"number of unicast ARP refresh probes before using broadcast");
183
184static u_int32_t arp_maxtries = 5;
185SYSCTL_INT(_net_link_ether_inet, OID_AUTO, maxtries,
186	CTLFLAG_RW | CTLFLAG_LOCKED, &arp_maxtries, 0, "");
187
188static int useloopback = 1;	/* use loopback interface for local traffic */
189SYSCTL_INT(_net_link_ether_inet, OID_AUTO, useloopback,
190	CTLFLAG_RW | CTLFLAG_LOCKED, &useloopback, 0, "");
191
192static int arp_proxyall = 0;
193SYSCTL_INT(_net_link_ether_inet, OID_AUTO, proxyall,
194	CTLFLAG_RW | CTLFLAG_LOCKED, &arp_proxyall, 0, "");
195
196static int arp_sendllconflict = 0;
197SYSCTL_INT(_net_link_ether_inet, OID_AUTO, sendllconflict,
198	CTLFLAG_RW | CTLFLAG_LOCKED, &arp_sendllconflict, 0, "");
199
200static int log_arp_warnings = 0;	/* Thread safe: no accumulated state */
201SYSCTL_INT(_net_link_ether_inet, OID_AUTO, log_arp_warnings,
202	CTLFLAG_RW | CTLFLAG_LOCKED,
203	&log_arp_warnings, 0,
204	"log arp warning messages");
205
206static int keep_announcements = 1;	/* Thread safe: no aging of state */
207SYSCTL_INT(_net_link_ether_inet, OID_AUTO, keep_announcements,
208	CTLFLAG_RW | CTLFLAG_LOCKED,
209	&keep_announcements, 0,
210	"keep arp announcements");
211
212static int send_conflicting_probes = 1;	/* Thread safe: no accumulated state */
213SYSCTL_INT(_net_link_ether_inet, OID_AUTO, send_conflicting_probes,
214	CTLFLAG_RW | CTLFLAG_LOCKED,
215	&send_conflicting_probes, 0,
216	"send conflicting link-local arp probes");
217
218static int arp_verbose;
219SYSCTL_INT(_net_link_ether_inet, OID_AUTO, verbose,
220	CTLFLAG_RW | CTLFLAG_LOCKED, &arp_verbose, 0, "");
221
222struct arpstat arpstat;
223SYSCTL_PROC(_net_link_ether_inet, OID_AUTO, stats, CTLFLAG_RD | CTLFLAG_LOCKED,
224	0, 0, arp_getstat, "S,arpstat",
225	"ARP statistics (struct arpstat, net/if_arp.h)");
226
227/* these are deprecated (read-only); use net.link.generic.system node instead */
228SYSCTL_INT(_net_link_ether_inet, OID_AUTO, apple_hwcksum_tx,
229	CTLFLAG_RD | CTLFLAG_LOCKED, &hwcksum_tx, 0, "");
230
231SYSCTL_INT(_net_link_ether_inet, OID_AUTO, apple_hwcksum_rx,
232	CTLFLAG_RD | CTLFLAG_LOCKED, &hwcksum_rx, 0, "");
233
234static struct zone *llinfo_arp_zone;
235#define	LLINFO_ARP_ZONE_MAX	256		/* maximum elements in zone */
236#define	LLINFO_ARP_ZONE_NAME	"llinfo_arp"	/* name for zone */
237
238void
239arp_init(void)
240{
241	VERIFY(!arpinit_done);
242
243	LIST_INIT(&llinfo_arp);
244
245	llinfo_arp_zone = zinit(sizeof (struct llinfo_arp),
246	    LLINFO_ARP_ZONE_MAX * sizeof (struct llinfo_arp), 0,
247	    LLINFO_ARP_ZONE_NAME);
248	if (llinfo_arp_zone == NULL)
249		panic("%s: failed allocating llinfo_arp_zone", __func__);
250
251	zone_change(llinfo_arp_zone, Z_EXPAND, TRUE);
252	zone_change(llinfo_arp_zone, Z_CALLERACCT, FALSE);
253
254	arpinit_done = 1;
255}
256
257static struct llinfo_arp *
258arp_llinfo_alloc(int how)
259{
260	struct llinfo_arp *la;
261
262	la = (how == M_WAITOK) ? zalloc(llinfo_arp_zone) :
263	    zalloc_noblock(llinfo_arp_zone);
264	if (la != NULL)
265		bzero(la, sizeof (*la));
266
267	return (la);
268}
269
270static void
271arp_llinfo_free(void *arg)
272{
273	struct llinfo_arp *la = arg;
274
275	if (la->la_le.le_next != NULL || la->la_le.le_prev != NULL) {
276		panic("%s: trying to free %p when it is in use", __func__, la);
277		/* NOTREACHED */
278	}
279
280	/* Just in case there's anything there, free it */
281	if (la->la_hold != NULL) {
282		m_freem(la->la_hold);
283		la->la_hold = NULL;
284		arpstat.purged++;
285	}
286
287	/* Purge any link-layer info caching */
288	VERIFY(la->la_rt->rt_llinfo == la);
289	if (la->la_rt->rt_llinfo_purge != NULL)
290		la->la_rt->rt_llinfo_purge(la->la_rt);
291
292	zfree(llinfo_arp_zone, la);
293}
294
295static void
296arp_llinfo_purge(struct rtentry *rt)
297{
298	struct llinfo_arp *la = rt->rt_llinfo;
299
300	RT_LOCK_ASSERT_HELD(rt);
301	VERIFY(rt->rt_llinfo_purge == arp_llinfo_purge && la != NULL);
302
303	if (la->la_llreach != NULL) {
304		RT_CONVERT_LOCK(rt);
305		ifnet_llreach_free(la->la_llreach);
306		la->la_llreach = NULL;
307	}
308	la->la_lastused = 0;
309}
310
311static void
312arp_llinfo_get_ri(struct rtentry *rt, struct rt_reach_info *ri)
313{
314	struct llinfo_arp *la = rt->rt_llinfo;
315	struct if_llreach *lr = la->la_llreach;
316
317	if (lr == NULL) {
318		bzero(ri, sizeof (*ri));
319		ri->ri_rssi = IFNET_RSSI_UNKNOWN;
320		ri->ri_lqm = IFNET_LQM_THRESH_OFF;
321		ri->ri_npm = IFNET_NPM_THRESH_UNKNOWN;
322	} else {
323		IFLR_LOCK(lr);
324		/* Export to rt_reach_info structure */
325		ifnet_lr2ri(lr, ri);
326		/* Export ARP send expiration (calendar) time */
327		ri->ri_snd_expire =
328		    ifnet_llreach_up2calexp(lr, la->la_lastused);
329		IFLR_UNLOCK(lr);
330	}
331}
332
333static void
334arp_llinfo_get_iflri(struct rtentry *rt, struct ifnet_llreach_info *iflri)
335{
336	struct llinfo_arp *la = rt->rt_llinfo;
337	struct if_llreach *lr = la->la_llreach;
338
339	if (lr == NULL) {
340		bzero(iflri, sizeof (*iflri));
341		iflri->iflri_rssi = IFNET_RSSI_UNKNOWN;
342		iflri->iflri_lqm = IFNET_LQM_THRESH_OFF;
343		iflri->iflri_npm = IFNET_NPM_THRESH_UNKNOWN;
344	} else {
345		IFLR_LOCK(lr);
346		/* Export to ifnet_llreach_info structure */
347		ifnet_lr2iflri(lr, iflri);
348		/* Export ARP send expiration (uptime) time */
349		iflri->iflri_snd_expire =
350		    ifnet_llreach_up2upexp(lr, la->la_lastused);
351		IFLR_UNLOCK(lr);
352	}
353}
354
355void
356arp_llreach_set_reachable(struct ifnet *ifp, void *addr, unsigned int alen)
357{
358	/* Nothing more to do if it's disabled */
359	if (arp_llreach_base == 0)
360		return;
361
362	ifnet_llreach_set_reachable(ifp, ETHERTYPE_IP, addr, alen);
363}
364
365static __inline void
366arp_llreach_use(struct llinfo_arp *la)
367{
368	if (la->la_llreach != NULL)
369		la->la_lastused = net_uptime();
370}
371
372static __inline int
373arp_llreach_reachable(struct llinfo_arp *la)
374{
375	struct if_llreach *lr;
376	const char *why = NULL;
377
378	/* Nothing more to do if it's disabled; pretend it's reachable  */
379	if (arp_llreach_base == 0)
380		return (1);
381
382	if ((lr = la->la_llreach) == NULL) {
383		/*
384		 * Link-layer reachability record isn't present for this
385		 * ARP entry; pretend it's reachable and use it as is.
386		 */
387		return (1);
388	} else if (ifnet_llreach_reachable(lr)) {
389		/*
390		 * Record is present, it's not shared with other ARP
391		 * entries and a packet has recently been received
392		 * from the remote host; consider it reachable.
393		 */
394		if (lr->lr_reqcnt == 1)
395			return (1);
396
397		/* Prime it up, if this is the first time */
398		if (la->la_lastused == 0) {
399			VERIFY(la->la_llreach != NULL);
400			arp_llreach_use(la);
401		}
402
403		/*
404		 * Record is present and shared with one or more ARP
405		 * entries, and a packet has recently been received
406		 * from the remote host.  Since it's shared by more
407		 * than one IP addresses, we can't rely on the link-
408		 * layer reachability alone; consider it reachable if
409		 * this ARP entry has been used "recently."
410		 */
411		if (ifnet_llreach_reachable_delta(lr, la->la_lastused))
412			return (1);
413
414		why = "has alias(es) and hasn't been used in a while";
415	} else {
416		why = "haven't heard from it in a while";
417	}
418
419	if (arp_verbose > 1) {
420		char tmp[MAX_IPv4_STR_LEN];
421		u_int64_t now = net_uptime();
422
423		log(LOG_DEBUG, "%s: ARP probe(s) needed for %s; "
424		    "%s [lastused %lld, lastrcvd %lld] secs ago\n",
425		    if_name(lr->lr_ifp), inet_ntop(AF_INET,
426		    &SIN(rt_key(la->la_rt))->sin_addr, tmp, sizeof (tmp)), why,
427		    (la->la_lastused ? (int64_t)(now - la->la_lastused) : -1),
428		    (lr->lr_lastrcvd ? (int64_t)(now - lr->lr_lastrcvd) : -1));
429
430	}
431	return (0);
432}
433
434/*
435 * Obtain a link-layer source cache entry for the sender.
436 *
437 * NOTE: This is currently only for ARP/Ethernet.
438 */
439static void
440arp_llreach_alloc(struct rtentry *rt, struct ifnet *ifp, void *addr,
441    unsigned int alen, boolean_t solicited)
442{
443	VERIFY(rt->rt_expire == 0 || rt->rt_rmx.rmx_expire != 0);
444	VERIFY(rt->rt_expire != 0 || rt->rt_rmx.rmx_expire == 0);
445
446	if (arp_llreach_base != 0 && rt->rt_expire != 0 &&
447	    !(rt->rt_ifp->if_flags & IFF_LOOPBACK) &&
448	    ifp->if_addrlen == IF_LLREACH_MAXLEN &&	/* Ethernet */
449	    alen == ifp->if_addrlen) {
450		struct llinfo_arp *la = rt->rt_llinfo;
451		struct if_llreach *lr;
452		const char *why = NULL, *type = "";
453
454		/* Become a regular mutex, just in case */
455		RT_CONVERT_LOCK(rt);
456
457		if ((lr = la->la_llreach) != NULL) {
458			type = (solicited ? "ARP reply" : "ARP announcement");
459			/*
460			 * If target has changed, create a new record;
461			 * otherwise keep existing record.
462			 */
463			IFLR_LOCK(lr);
464			if (bcmp(addr, lr->lr_key.addr, alen) != 0) {
465				IFLR_UNLOCK(lr);
466				/* Purge any link-layer info caching */
467				VERIFY(rt->rt_llinfo_purge != NULL);
468				rt->rt_llinfo_purge(rt);
469				lr = NULL;
470				why = " for different target HW address; "
471				    "using new llreach record";
472			} else {
473				lr->lr_probes = 0;	/* reset probe count */
474				IFLR_UNLOCK(lr);
475				if (solicited) {
476					why = " for same target HW address; "
477					    "keeping existing llreach record";
478				}
479			}
480		}
481
482		if (lr == NULL) {
483			lr = la->la_llreach = ifnet_llreach_alloc(ifp,
484			    ETHERTYPE_IP, addr, alen, arp_llreach_base);
485			if (lr != NULL) {
486				lr->lr_probes = 0;	/* reset probe count */
487				if (why == NULL)
488					why = "creating new llreach record";
489			}
490		}
491
492		/* Bump up retry ceiling to accomodate unicast retries */
493		if (lr != NULL)
494			la->la_maxtries = arp_maxtries + arp_unicast_lim;
495
496		if (arp_verbose > 1 && lr != NULL && why != NULL) {
497			char tmp[MAX_IPv4_STR_LEN];
498
499			log(LOG_DEBUG, "%s: %s%s for %s\n", if_name(ifp),
500			    type, why, inet_ntop(AF_INET,
501			    &SIN(rt_key(rt))->sin_addr, tmp, sizeof (tmp)));
502		}
503	}
504}
505
506struct arptf_arg {
507	int draining;
508	uint32_t killed;
509	uint32_t aging;
510	uint32_t sticky;
511	uint32_t found;
512};
513
514/*
515 * Free an arp entry.
516 */
517static void
518arptfree(struct llinfo_arp *la, void *arg)
519{
520	struct arptf_arg *ap = arg;
521	struct rtentry *rt = la->la_rt;
522
523	lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED);
524
525	/* rnh_lock acquired by caller protects rt from going away */
526	RT_LOCK(rt);
527
528	VERIFY(rt->rt_expire == 0 || rt->rt_rmx.rmx_expire != 0);
529	VERIFY(rt->rt_expire != 0 || rt->rt_rmx.rmx_expire == 0);
530
531	ap->found++;
532	if (rt->rt_expire == 0 || (rt->rt_flags & RTF_STATIC)) {
533		ap->sticky++;
534		/* ARP entry is permanent? */
535		if (rt->rt_expire == 0) {
536			RT_UNLOCK(rt);
537			return;
538		}
539	}
540
541	/* ARP entry hasn't expired and we're not draining? */
542	if (!ap->draining && rt->rt_expire > net_uptime()) {
543		RT_UNLOCK(rt);
544		ap->aging++;
545		return;
546	}
547
548	if (rt->rt_refcnt > 0) {
549		/*
550		 * ARP entry has expired, with outstanding refcnt.
551		 * If we're not draining, force ARP query to be
552		 * generated next time this entry is used.
553		 */
554		if (!ap->draining) {
555			struct sockaddr_dl *sdl = SDL(rt->rt_gateway);
556			if (sdl != NULL)
557				sdl->sdl_alen = 0;
558			la->la_asked = 0;
559			rt->rt_flags &= ~RTF_REJECT;
560		}
561		RT_UNLOCK(rt);
562	} else if (!(rt->rt_flags & RTF_STATIC)) {
563		/*
564		 * ARP entry has no outstanding refcnt, and we're either
565		 * draining or it has expired; delete it from the routing
566		 * table.  Safe to drop rt_lock and use rt_key, since holding
567		 * rnh_lock here prevents another thread from calling
568		 * rt_setgate() on this route.
569		 */
570		RT_UNLOCK(rt);
571		rtrequest_locked(RTM_DELETE, rt_key(rt), NULL,
572		    rt_mask(rt), 0, NULL);
573		arpstat.timeouts++;
574		ap->killed++;
575	} else {
576		/* ARP entry is static; let it linger */
577		RT_UNLOCK(rt);
578	}
579}
580
581void
582in_arpdrain(void *arg)
583{
584#pragma unused(arg)
585	struct llinfo_arp *la, *ola;
586	struct arptf_arg farg;
587
588	if (arp_verbose)
589		log(LOG_DEBUG, "%s: draining ARP entries\n", __func__);
590
591	lck_mtx_lock(rnh_lock);
592	la = llinfo_arp.lh_first;
593	bzero(&farg, sizeof (farg));
594	farg.draining = 1;
595	while ((ola = la) != NULL) {
596		la = la->la_le.le_next;
597		arptfree(ola, &farg);
598	}
599	if (arp_verbose) {
600		log(LOG_DEBUG, "%s: found %u, aging %u, sticky %u, killed %u\n",
601		    __func__, farg.found, farg.aging, farg.sticky, farg.killed);
602	}
603	lck_mtx_unlock(rnh_lock);
604}
605
606/*
607 * Timeout routine.  Age arp_tab entries periodically.
608 */
609static void
610arp_timeout(void *arg)
611{
612#pragma unused(arg)
613	struct llinfo_arp *la, *ola;
614	struct timeval atv;
615	struct arptf_arg farg;
616
617	lck_mtx_lock(rnh_lock);
618	la = llinfo_arp.lh_first;
619	bzero(&farg, sizeof (farg));
620	while ((ola = la) != NULL) {
621		la = la->la_le.le_next;
622		arptfree(ola, &farg);
623	}
624	if (arp_verbose) {
625		log(LOG_DEBUG, "%s: found %u, aging %u, sticky %u, killed %u\n",
626		    __func__, farg.found, farg.aging, farg.sticky, farg.killed);
627	}
628	atv.tv_usec = 0;
629	atv.tv_sec = arpt_prune;
630	/* re-arm the timer if there's work to do */
631	arp_timeout_run = 0;
632	if (farg.aging > 0)
633		arp_sched_timeout(&atv);
634	else if (arp_verbose)
635		log(LOG_DEBUG, "%s: not rescheduling timer\n", __func__);
636	lck_mtx_unlock(rnh_lock);
637}
638
639static void
640arp_sched_timeout(struct timeval *atv)
641{
642	lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED);
643
644	if (!arp_timeout_run) {
645		struct timeval tv;
646
647		if (atv == NULL) {
648			tv.tv_usec = 0;
649			tv.tv_sec = MAX(arpt_prune / 5, 1);
650			atv = &tv;
651		}
652		if (arp_verbose) {
653			log(LOG_DEBUG, "%s: timer scheduled in "
654			    "T+%llus.%lluu\n", __func__,
655			    (uint64_t)atv->tv_sec, (uint64_t)atv->tv_usec);
656		}
657		arp_timeout_run = 1;
658		timeout(arp_timeout, NULL, tvtohz(atv));
659	}
660}
661
662/*
663 * ifa_rtrequest() callback
664 */
665static void
666arp_rtrequest(int req, struct rtentry *rt, struct sockaddr *sa)
667{
668#pragma unused(sa)
669	struct sockaddr *gate = rt->rt_gateway;
670	struct llinfo_arp *la = rt->rt_llinfo;
671	static struct sockaddr_dl null_sdl =
672	    { .sdl_len = sizeof (null_sdl), .sdl_family = AF_LINK };
673	uint64_t timenow;
674	char buf[MAX_IPv4_STR_LEN];
675
676	VERIFY(arpinit_done);
677	lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED);
678	RT_LOCK_ASSERT_HELD(rt);
679
680	if (rt->rt_flags & RTF_GATEWAY)
681		return;
682
683	timenow = net_uptime();
684	switch (req) {
685	case RTM_ADD:
686		/*
687		 * XXX: If this is a manually added route to interface
688		 * such as older version of routed or gated might provide,
689		 * restore cloning bit.
690		 */
691		if (!(rt->rt_flags & RTF_HOST) && rt_mask(rt) != NULL &&
692		    SIN(rt_mask(rt))->sin_addr.s_addr != INADDR_BROADCAST)
693			rt->rt_flags |= RTF_CLONING;
694
695		if (rt->rt_flags & RTF_CLONING) {
696			/*
697			 * Case 1: This route should come from a route to iface.
698			 */
699			if (rt_setgate(rt, rt_key(rt), SA(&null_sdl)) == 0) {
700				gate = rt->rt_gateway;
701				SDL(gate)->sdl_type = rt->rt_ifp->if_type;
702				SDL(gate)->sdl_index = rt->rt_ifp->if_index;
703				/*
704				 * In case we're called before 1.0 sec.
705				 * has elapsed.
706				 */
707				rt_setexpire(rt, MAX(timenow, 1));
708			}
709			break;
710		}
711		/* Announce a new entry if requested. */
712		if (rt->rt_flags & RTF_ANNOUNCE) {
713			if (la != NULL)
714				arp_llreach_use(la); /* Mark use timestamp */
715			RT_UNLOCK(rt);
716			dlil_send_arp(rt->rt_ifp, ARPOP_REQUEST,
717			    SDL(gate), rt_key(rt), NULL, rt_key(rt), 0);
718			RT_LOCK(rt);
719			arpstat.txannounces++;
720		}
721		/* FALLTHRU */
722	case RTM_RESOLVE:
723		if (gate->sa_family != AF_LINK ||
724		    gate->sa_len < sizeof (null_sdl)) {
725			arpstat.invalidreqs++;
726			log(LOG_ERR, "%s: route to %s has bad gateway address "
727			    "(sa_family %u sa_len %u) on %s\n",
728			    __func__, inet_ntop(AF_INET,
729			    &SIN(rt_key(rt))->sin_addr.s_addr, buf,
730			    sizeof (buf)), gate->sa_family, gate->sa_len,
731			    if_name(rt->rt_ifp));
732			break;
733		}
734		SDL(gate)->sdl_type = rt->rt_ifp->if_type;
735		SDL(gate)->sdl_index = rt->rt_ifp->if_index;
736
737		if (la != NULL)
738			break; /* This happens on a route change */
739
740		/*
741		 * Case 2:  This route may come from cloning, or a manual route
742		 * add with a LL address.
743		 */
744		rt->rt_llinfo = la = arp_llinfo_alloc(M_WAITOK);
745		if (la == NULL) {
746			arpstat.reqnobufs++;
747			break;
748		}
749		rt->rt_llinfo_get_ri	= arp_llinfo_get_ri;
750		rt->rt_llinfo_get_iflri	= arp_llinfo_get_iflri;
751		rt->rt_llinfo_purge	= arp_llinfo_purge;
752		rt->rt_llinfo_free	= arp_llinfo_free;
753		rt->rt_flags |= RTF_LLINFO;
754		la->la_rt = rt;
755		LIST_INSERT_HEAD(&llinfo_arp, la, la_le);
756		arpstat.inuse++;
757
758		/* We have at least one entry; arm the timer if not already */
759		arp_sched_timeout(NULL);
760
761		/*
762		 * This keeps the multicast addresses from showing up
763		 * in `arp -a' listings as unresolved.  It's not actually
764		 * functional.  Then the same for broadcast.  For IPv4
765		 * link-local address, keep the entry around even after
766		 * it has expired.
767		 */
768		if (IN_MULTICAST(ntohl(SIN(rt_key(rt))->sin_addr.s_addr))) {
769			RT_UNLOCK(rt);
770			dlil_resolve_multi(rt->rt_ifp, rt_key(rt), gate,
771			    sizeof (struct sockaddr_dl));
772			RT_LOCK(rt);
773			rt_setexpire(rt, 0);
774		} else if (in_broadcast(SIN(rt_key(rt))->sin_addr,
775		    rt->rt_ifp)) {
776			struct sockaddr_dl *gate_ll = SDL(gate);
777			size_t broadcast_len;
778			ifnet_llbroadcast_copy_bytes(rt->rt_ifp,
779			    LLADDR(gate_ll), sizeof (gate_ll->sdl_data),
780			    &broadcast_len);
781			gate_ll->sdl_alen = broadcast_len;
782			gate_ll->sdl_family = AF_LINK;
783			gate_ll->sdl_len = sizeof (struct sockaddr_dl);
784			/* In case we're called before 1.0 sec. has elapsed */
785			rt_setexpire(rt, MAX(timenow, 1));
786		} else if (IN_LINKLOCAL(ntohl(SIN(rt_key(rt))->
787		    sin_addr.s_addr))) {
788			rt->rt_flags |= RTF_STATIC;
789		}
790
791		/* Set default maximum number of retries */
792		la->la_maxtries = arp_maxtries;
793
794		/* Become a regular mutex, just in case */
795		RT_CONVERT_LOCK(rt);
796		IFA_LOCK_SPIN(rt->rt_ifa);
797		if (SIN(rt_key(rt))->sin_addr.s_addr ==
798		    (IA_SIN(rt->rt_ifa))->sin_addr.s_addr) {
799			IFA_UNLOCK(rt->rt_ifa);
800			/*
801			 * This test used to be
802			 *	if (loif.if_flags & IFF_UP)
803			 * It allowed local traffic to be forced through the
804			 * hardware by configuring the loopback down.  However,
805			 * it causes problems during network configuration
806			 * for boards that can't receive packets they send.
807			 * It is now necessary to clear "useloopback" and
808			 * remove the route to force traffic out to the
809			 * hardware.
810			 */
811			rt_setexpire(rt, 0);
812			ifnet_lladdr_copy_bytes(rt->rt_ifp, LLADDR(SDL(gate)),
813			    SDL(gate)->sdl_alen = rt->rt_ifp->if_addrlen);
814			if (useloopback) {
815				if (rt->rt_ifp != lo_ifp) {
816					/*
817					 * Purge any link-layer info caching.
818					 */
819					if (rt->rt_llinfo_purge != NULL)
820						rt->rt_llinfo_purge(rt);
821
822					/*
823					 * Adjust route ref count for the
824					 * interfaces.
825					 */
826					if (rt->rt_if_ref_fn != NULL) {
827						rt->rt_if_ref_fn(lo_ifp, 1);
828						rt->rt_if_ref_fn(rt->rt_ifp, -1);
829					}
830				}
831				rt->rt_ifp = lo_ifp;
832				/*
833				 * If rmx_mtu is not locked, update it
834				 * to the MTU used by the new interface.
835				 */
836				if (!(rt->rt_rmx.rmx_locks & RTV_MTU))
837					rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu;
838			}
839		} else {
840			IFA_UNLOCK(rt->rt_ifa);
841		}
842		break;
843
844	case RTM_DELETE:
845		if (la == NULL)
846			break;
847		/*
848		 * Unchain it but defer the actual freeing until the route
849		 * itself is to be freed.  rt->rt_llinfo still points to
850		 * llinfo_arp, and likewise, la->la_rt still points to this
851		 * route entry, except that RTF_LLINFO is now cleared.
852		 */
853		LIST_REMOVE(la, la_le);
854		la->la_le.le_next = NULL;
855		la->la_le.le_prev = NULL;
856		arpstat.inuse--;
857
858		/*
859		 * Purge any link-layer info caching.
860		 */
861		if (rt->rt_llinfo_purge != NULL)
862			rt->rt_llinfo_purge(rt);
863
864		rt->rt_flags &= ~RTF_LLINFO;
865		if (la->la_hold != NULL) {
866			m_freem(la->la_hold);
867			la->la_hold = NULL;
868			arpstat.purged++;
869		}
870	}
871}
872
873/*
874 * convert hardware address to hex string for logging errors.
875 */
876static const char *
877sdl_addr_to_hex(const struct sockaddr_dl *sdl, char *orig_buf, int buflen)
878{
879	char *buf = orig_buf;
880	int i;
881	const u_char *lladdr = (u_char *)(size_t)sdl->sdl_data;
882	int maxbytes = buflen / 3;
883
884	if (maxbytes > sdl->sdl_alen) {
885		maxbytes = sdl->sdl_alen;
886	}
887	*buf = '\0';
888	for (i = 0; i < maxbytes; i++) {
889		snprintf(buf, 3, "%02x", lladdr[i]);
890		buf += 2;
891		*buf = (i == maxbytes - 1) ? '\0' : ':';
892		buf++;
893	}
894	return (orig_buf);
895}
896
897/*
898 * arp_lookup_route will lookup the route for a given address.
899 *
900 * The address must be for a host on a local network on this interface.
901 * If the returned route is non-NULL, the route is locked and the caller
902 * is responsible for unlocking it and releasing its reference.
903 */
904static errno_t
905arp_lookup_route(const struct in_addr *addr, int create, int proxy,
906    route_t *route, unsigned int ifscope)
907{
908	struct sockaddr_inarp sin =
909	    { sizeof (sin), AF_INET, 0, { 0 }, { 0 }, 0, 0 };
910	const char *why = NULL;
911	errno_t	error = 0;
912	route_t rt;
913
914	*route = NULL;
915
916	sin.sin_addr.s_addr = addr->s_addr;
917	sin.sin_other = proxy ? SIN_PROXY : 0;
918
919	/*
920	 * If the destination is a link-local address, don't
921	 * constrain the lookup (don't scope it).
922	 */
923	if (IN_LINKLOCAL(ntohl(addr->s_addr)))
924		ifscope = IFSCOPE_NONE;
925
926	rt = rtalloc1_scoped((struct sockaddr *)&sin, create, 0, ifscope);
927	if (rt == NULL)
928		return (ENETUNREACH);
929
930	RT_LOCK(rt);
931
932	if (rt->rt_flags & RTF_GATEWAY) {
933		why = "host is not on local network";
934		error = ENETUNREACH;
935	} else if (!(rt->rt_flags & RTF_LLINFO)) {
936		why = "could not allocate llinfo";
937		error = ENOMEM;
938	} else if (rt->rt_gateway->sa_family != AF_LINK) {
939		why = "gateway route is not ours";
940		error = EPROTONOSUPPORT;
941	}
942
943	if (error != 0) {
944		if (create && (arp_verbose || log_arp_warnings)) {
945			char tmp[MAX_IPv4_STR_LEN];
946			log(LOG_DEBUG, "%s: link#%d %s failed: %s\n",
947			    __func__, ifscope, inet_ntop(AF_INET, addr, tmp,
948			    sizeof (tmp)), why);
949		}
950
951		/*
952		 * If there are no references to this route, and it is
953		 * a cloned route, and not static, and ARP had created
954		 * the route, then purge it from the routing table as
955		 * it is probably bogus.
956		 */
957		if (rt->rt_refcnt == 1 &&
958		    (rt->rt_flags & (RTF_WASCLONED | RTF_STATIC)) ==
959		    RTF_WASCLONED) {
960			/*
961			 * Prevent another thread from modiying rt_key,
962			 * rt_gateway via rt_setgate() after rt_lock is
963			 * dropped by marking the route as defunct.
964			 */
965			rt->rt_flags |= RTF_CONDEMNED;
966			RT_UNLOCK(rt);
967			rtrequest(RTM_DELETE, rt_key(rt), rt->rt_gateway,
968			    rt_mask(rt), rt->rt_flags, NULL);
969			rtfree(rt);
970		} else {
971			RT_REMREF_LOCKED(rt);
972			RT_UNLOCK(rt);
973		}
974		return (error);
975	}
976
977	/*
978	 * Caller releases reference and does RT_UNLOCK(rt).
979	 */
980	*route = rt;
981	return (0);
982}
983
984/*
985 * This is the ARP pre-output routine; care must be taken to ensure that
986 * the "hint" route never gets freed via rtfree(), since the caller may
987 * have stored it inside a struct route with a reference held for that
988 * placeholder.
989 */
990errno_t
991arp_lookup_ip(ifnet_t ifp, const struct sockaddr_in *net_dest,
992    struct sockaddr_dl *ll_dest, size_t	ll_dest_len, route_t hint,
993    mbuf_t packet)
994{
995	route_t	route = NULL;	/* output route */
996	errno_t	result = 0;
997	struct sockaddr_dl *gateway;
998	struct llinfo_arp *llinfo = NULL;
999	uint64_t timenow;
1000	int unreachable = 0;
1001
1002	if (net_dest->sin_family != AF_INET)
1003		return (EAFNOSUPPORT);
1004
1005	if ((ifp->if_flags & (IFF_UP|IFF_RUNNING)) != (IFF_UP|IFF_RUNNING))
1006		return (ENETDOWN);
1007
1008	/*
1009	 * If we were given a route, verify the route and grab the gateway
1010	 */
1011	if (hint != NULL) {
1012		/*
1013		 * Callee holds a reference on the route and returns
1014		 * with the route entry locked, upon success.
1015		 */
1016		result = route_to_gwroute((const struct sockaddr *)
1017		    net_dest, hint, &route);
1018		if (result != 0)
1019			return (result);
1020		if (route != NULL)
1021			RT_LOCK_ASSERT_HELD(route);
1022	}
1023
1024	if (packet->m_flags & M_BCAST) {
1025		size_t broadcast_len;
1026		bzero(ll_dest, ll_dest_len);
1027		result = ifnet_llbroadcast_copy_bytes(ifp, LLADDR(ll_dest),
1028		    ll_dest_len - offsetof(struct sockaddr_dl, sdl_data),
1029		    &broadcast_len);
1030		if (result == 0) {
1031			ll_dest->sdl_alen = broadcast_len;
1032			ll_dest->sdl_family = AF_LINK;
1033			ll_dest->sdl_len = sizeof (struct sockaddr_dl);
1034		}
1035		goto release;
1036	}
1037	if (packet->m_flags & M_MCAST) {
1038		if (route != NULL)
1039			RT_UNLOCK(route);
1040		result = dlil_resolve_multi(ifp,
1041		    (const struct sockaddr *)net_dest,
1042		    (struct sockaddr *)ll_dest, ll_dest_len);
1043		if (route != NULL)
1044			RT_LOCK(route);
1045		goto release;
1046	}
1047
1048	/*
1049	 * If we didn't find a route, or the route doesn't have
1050	 * link layer information, trigger the creation of the
1051	 * route and link layer information.
1052	 */
1053	if (route == NULL || route->rt_llinfo == NULL) {
1054		/* Clean up now while we can */
1055		if (route != NULL) {
1056			if (route == hint) {
1057				RT_REMREF_LOCKED(route);
1058				RT_UNLOCK(route);
1059			} else {
1060				RT_UNLOCK(route);
1061				rtfree(route);
1062			}
1063		}
1064		/*
1065		 * Callee holds a reference on the route and returns
1066		 * with the route entry locked, upon success.
1067		 */
1068		result = arp_lookup_route(&net_dest->sin_addr, 1, 0, &route,
1069		    ifp->if_index);
1070		if (result == 0)
1071			RT_LOCK_ASSERT_HELD(route);
1072	}
1073
1074	if (result || route == NULL || (llinfo = route->rt_llinfo) == NULL) {
1075		/* In case result is 0 but no route, return an error */
1076		if (result == 0)
1077			result = EHOSTUNREACH;
1078
1079		if (route != NULL && route->rt_llinfo == NULL) {
1080			char tmp[MAX_IPv4_STR_LEN];
1081			log(LOG_ERR, "%s: can't allocate llinfo for %s\n",
1082			    __func__, inet_ntop(AF_INET, &net_dest->sin_addr,
1083			    tmp, sizeof (tmp)));
1084		}
1085		goto release;
1086	}
1087
1088	/*
1089	 * Now that we have the right route, is it filled in?
1090	 */
1091	gateway = SDL(route->rt_gateway);
1092	timenow = net_uptime();
1093	VERIFY(route->rt_expire == 0 || route->rt_rmx.rmx_expire != 0);
1094	VERIFY(route->rt_expire != 0 || route->rt_rmx.rmx_expire == 0);
1095	if ((route->rt_expire == 0 ||
1096	    route->rt_expire > timenow) && gateway != NULL &&
1097	    gateway->sdl_family == AF_LINK && gateway->sdl_alen != 0 &&
1098	    !(unreachable = !arp_llreach_reachable(llinfo))) {
1099		bcopy(gateway, ll_dest, MIN(gateway->sdl_len, ll_dest_len));
1100		result = 0;
1101		arp_llreach_use(llinfo);	/* Mark use timestamp */
1102		goto release;
1103	} else if (unreachable) {
1104		/*
1105		 * Discard existing answer in case we need to probe.
1106		 */
1107		gateway->sdl_alen = 0;
1108	}
1109
1110	if (ifp->if_flags & IFF_NOARP) {
1111		result = ENOTSUP;
1112		goto release;
1113	}
1114
1115	/*
1116	 * Route wasn't complete/valid. We need to arp.
1117	 */
1118	if (packet != NULL) {
1119		if (llinfo->la_hold != NULL) {
1120			m_freem(llinfo->la_hold);
1121			arpstat.dropped++;
1122		}
1123		llinfo->la_hold = packet;
1124	}
1125
1126	if (route->rt_expire) {
1127		route->rt_flags &= ~RTF_REJECT;
1128		if (llinfo->la_asked == 0 || route->rt_expire != timenow) {
1129			rt_setexpire(route, timenow);
1130			if (llinfo->la_asked++ < llinfo->la_maxtries) {
1131				struct if_llreach *lr = llinfo->la_llreach;
1132				struct ifaddr *rt_ifa = route->rt_ifa;
1133				struct sockaddr_dl *hw_dest = NULL, sdl;
1134				struct sockaddr *sa;
1135				u_int32_t rtflags, alen;
1136
1137				/* Become a regular mutex, just in case */
1138				RT_CONVERT_LOCK(route);
1139				/* Update probe count, if applicable */
1140				if (lr != NULL) {
1141					IFLR_LOCK_SPIN(lr);
1142					lr->lr_probes++;
1143					alen = ifp->if_addrlen;
1144					/* Ethernet only for now */
1145					if (alen == IF_LLREACH_MAXLEN &&
1146					    lr->lr_probes <= arp_unicast_lim) {
1147						bzero(&sdl, sizeof (sdl));
1148						sdl.sdl_alen = alen;
1149						bcopy(&lr->lr_key.addr,
1150						    LLADDR(&sdl), alen);
1151						hw_dest = &sdl;
1152					}
1153					IFLR_UNLOCK(lr);
1154				}
1155				IFA_LOCK_SPIN(rt_ifa);
1156				IFA_ADDREF_LOCKED(rt_ifa);
1157				sa = rt_ifa->ifa_addr;
1158				IFA_UNLOCK(rt_ifa);
1159				arp_llreach_use(llinfo); /* Mark use tstamp */
1160				rtflags = route->rt_flags;
1161				RT_UNLOCK(route);
1162				dlil_send_arp(ifp, ARPOP_REQUEST, NULL, sa,
1163				    (const struct sockaddr_dl *)hw_dest,
1164				    (const struct sockaddr *)net_dest, rtflags);
1165				IFA_REMREF(rt_ifa);
1166				RT_LOCK(route);
1167				result = EJUSTRETURN;
1168				goto release;
1169			} else {
1170				route->rt_flags |= RTF_REJECT;
1171				rt_setexpire(route,
1172				    route->rt_expire + arpt_down);
1173				llinfo->la_asked = 0;
1174				/*
1175				 * Clear la_hold; don't free the packet since
1176				 * we're not returning EJUSTRETURN; the caller
1177				 * will handle the freeing.
1178				 */
1179				llinfo->la_hold = NULL;
1180				result = EHOSTUNREACH;
1181				goto release;
1182			}
1183		}
1184	}
1185
1186	/* The packet is now held inside la_hold (can "packet" be NULL?) */
1187	result = EJUSTRETURN;
1188
1189release:
1190	if (result == EHOSTUNREACH)
1191		arpstat.dropped++;
1192
1193	if (route != NULL) {
1194		if (route == hint) {
1195			RT_REMREF_LOCKED(route);
1196			RT_UNLOCK(route);
1197		} else {
1198			RT_UNLOCK(route);
1199			rtfree(route);
1200		}
1201	}
1202	return (result);
1203}
1204
1205errno_t
1206arp_ip_handle_input(ifnet_t ifp, u_short arpop,
1207    const struct sockaddr_dl *sender_hw, const struct sockaddr_in *sender_ip,
1208    const struct sockaddr_in *target_ip)
1209{
1210	char ipv4str[MAX_IPv4_STR_LEN];
1211	struct sockaddr_dl proxied;
1212	struct sockaddr_dl *gateway, *target_hw = NULL;
1213	struct ifaddr *ifa;
1214	struct in_ifaddr *ia;
1215	struct in_ifaddr *best_ia = NULL;
1216	struct sockaddr_in best_ia_sin;
1217	route_t	route = NULL;
1218	char buf[3 * MAX_HW_LEN]; /* enough for MAX_HW_LEN byte hw address */
1219	struct llinfo_arp *llinfo;
1220	errno_t	error;
1221	int created_announcement = 0;
1222	int bridged = 0, is_bridge = 0;
1223
1224	arpstat.received++;
1225
1226	/* Do not respond to requests for 0.0.0.0 */
1227	if (target_ip->sin_addr.s_addr == INADDR_ANY && arpop == ARPOP_REQUEST)
1228		goto done;
1229
1230	if (ifp->if_bridge)
1231		bridged = 1;
1232	if (ifp->if_type == IFT_BRIDGE)
1233		is_bridge = 1;
1234
1235	if (arpop == ARPOP_REPLY)
1236		arpstat.rxreplies++;
1237
1238	/*
1239	 * Determine if this ARP is for us
1240	 * For a bridge, we want to check the address irrespective
1241	 * of the receive interface.
1242	 */
1243	lck_rw_lock_shared(in_ifaddr_rwlock);
1244	TAILQ_FOREACH(ia, INADDR_HASH(target_ip->sin_addr.s_addr), ia_hash) {
1245		IFA_LOCK_SPIN(&ia->ia_ifa);
1246		if (((bridged && ia->ia_ifp->if_bridge != NULL) ||
1247		    (ia->ia_ifp == ifp)) &&
1248		    ia->ia_addr.sin_addr.s_addr == target_ip->sin_addr.s_addr) {
1249			best_ia = ia;
1250			best_ia_sin = best_ia->ia_addr;
1251			IFA_ADDREF_LOCKED(&ia->ia_ifa);
1252			IFA_UNLOCK(&ia->ia_ifa);
1253			lck_rw_done(in_ifaddr_rwlock);
1254			goto match;
1255		}
1256		IFA_UNLOCK(&ia->ia_ifa);
1257	}
1258
1259	TAILQ_FOREACH(ia, INADDR_HASH(sender_ip->sin_addr.s_addr), ia_hash) {
1260		IFA_LOCK_SPIN(&ia->ia_ifa);
1261		if (((bridged && ia->ia_ifp->if_bridge != NULL) ||
1262		    (ia->ia_ifp == ifp)) &&
1263		    ia->ia_addr.sin_addr.s_addr == sender_ip->sin_addr.s_addr) {
1264			best_ia = ia;
1265			best_ia_sin = best_ia->ia_addr;
1266			IFA_ADDREF_LOCKED(&ia->ia_ifa);
1267			IFA_UNLOCK(&ia->ia_ifa);
1268			lck_rw_done(in_ifaddr_rwlock);
1269			goto match;
1270		}
1271		IFA_UNLOCK(&ia->ia_ifa);
1272	}
1273
1274#define	BDG_MEMBER_MATCHES_ARP(addr, ifp, ia)				     \
1275	(ia->ia_ifp->if_bridge == ifp->if_softc &&			     \
1276	bcmp(IF_LLADDR(ia->ia_ifp), IF_LLADDR(ifp), ifp->if_addrlen) == 0 && \
1277	addr == ia->ia_addr.sin_addr.s_addr)
1278	/*
1279	 * Check the case when bridge shares its MAC address with
1280	 * some of its children, so packets are claimed by bridge
1281	 * itself (bridge_input() does it first), but they are really
1282	 * meant to be destined to the bridge member.
1283	 */
1284	if (is_bridge) {
1285		TAILQ_FOREACH(ia, INADDR_HASH(target_ip->sin_addr.s_addr),
1286		    ia_hash) {
1287			IFA_LOCK_SPIN(&ia->ia_ifa);
1288			if (BDG_MEMBER_MATCHES_ARP(target_ip->sin_addr.s_addr,
1289			    ifp, ia)) {
1290				ifp = ia->ia_ifp;
1291				best_ia = ia;
1292				best_ia_sin = best_ia->ia_addr;
1293				IFA_ADDREF_LOCKED(&ia->ia_ifa);
1294				IFA_UNLOCK(&ia->ia_ifa);
1295				lck_rw_done(in_ifaddr_rwlock);
1296				goto match;
1297			}
1298			IFA_UNLOCK(&ia->ia_ifa);
1299		}
1300	}
1301#undef BDG_MEMBER_MATCHES_ARP
1302	lck_rw_done(in_ifaddr_rwlock);
1303
1304	/*
1305	 * No match, use the first inet address on the receive interface
1306	 * as a dummy address for the rest of the function; we may be
1307	 * proxying for another address.
1308	 */
1309	ifnet_lock_shared(ifp);
1310	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1311		IFA_LOCK_SPIN(ifa);
1312		if (ifa->ifa_addr->sa_family != AF_INET) {
1313			IFA_UNLOCK(ifa);
1314			continue;
1315		}
1316		best_ia = (struct in_ifaddr *)ifa;
1317		best_ia_sin = best_ia->ia_addr;
1318		IFA_ADDREF_LOCKED(ifa);
1319		IFA_UNLOCK(ifa);
1320		ifnet_lock_done(ifp);
1321		goto match;
1322	}
1323	ifnet_lock_done(ifp);
1324
1325	/*
1326	 * If we're not a bridge member, or if we are but there's no
1327	 * IPv4 address to use for the interface, drop the packet.
1328	 */
1329	if (!bridged || best_ia == NULL)
1330		goto done;
1331
1332match:
1333	/* If the packet is from this interface, ignore the packet */
1334	if (bcmp(CONST_LLADDR(sender_hw), IF_LLADDR(ifp),
1335	    sender_hw->sdl_alen) == 0)
1336		goto done;
1337
1338	/* Check for a conflict */
1339	if (!bridged &&
1340	    sender_ip->sin_addr.s_addr == best_ia_sin.sin_addr.s_addr) {
1341		struct kev_msg ev_msg;
1342		struct kev_in_collision	*in_collision;
1343		u_char storage[sizeof (struct kev_in_collision) + MAX_HW_LEN];
1344
1345		bzero(&ev_msg, sizeof (struct kev_msg));
1346		bzero(storage, (sizeof (struct kev_in_collision) + MAX_HW_LEN));
1347		in_collision = (struct kev_in_collision *)(void *)storage;
1348		log(LOG_ERR, "%s duplicate IP address %s sent from "
1349		    "address %s\n", if_name(ifp),
1350		    inet_ntop(AF_INET, &sender_ip->sin_addr, ipv4str,
1351		    sizeof (ipv4str)), sdl_addr_to_hex(sender_hw, buf,
1352		    sizeof (buf)));
1353
1354		/* Send a kernel event so anyone can learn of the conflict */
1355		in_collision->link_data.if_family = ifp->if_family;
1356		in_collision->link_data.if_unit = ifp->if_unit;
1357		strncpy(&in_collision->link_data.if_name[0],
1358		    ifp->if_name, IFNAMSIZ);
1359		in_collision->ia_ipaddr = sender_ip->sin_addr;
1360		in_collision->hw_len = (sender_hw->sdl_alen < MAX_HW_LEN) ?
1361		    sender_hw->sdl_alen : MAX_HW_LEN;
1362		bcopy(CONST_LLADDR(sender_hw), (caddr_t)in_collision->hw_addr,
1363		    in_collision->hw_len);
1364		ev_msg.vendor_code = KEV_VENDOR_APPLE;
1365		ev_msg.kev_class = KEV_NETWORK_CLASS;
1366		ev_msg.kev_subclass = KEV_INET_SUBCLASS;
1367		ev_msg.event_code = KEV_INET_ARPCOLLISION;
1368		ev_msg.dv[0].data_ptr = in_collision;
1369		ev_msg.dv[0].data_length =
1370		    sizeof (struct kev_in_collision) + in_collision->hw_len;
1371		ev_msg.dv[1].data_length = 0;
1372		kev_post_msg(&ev_msg);
1373		arpstat.dupips++;
1374		goto respond;
1375	}
1376
1377	/*
1378	 * Look up the routing entry. If it doesn't exist and we are the
1379	 * target, and the sender isn't 0.0.0.0, go ahead and create one.
1380	 * Callee holds a reference on the route and returns with the route
1381	 * entry locked, upon success.
1382	 */
1383	error = arp_lookup_route(&sender_ip->sin_addr,
1384	    (target_ip->sin_addr.s_addr == best_ia_sin.sin_addr.s_addr &&
1385	    sender_ip->sin_addr.s_addr != 0), 0, &route, ifp->if_index);
1386
1387	if (error == 0)
1388		RT_LOCK_ASSERT_HELD(route);
1389
1390	if (error || route == NULL || route->rt_gateway == NULL) {
1391		if (arpop != ARPOP_REQUEST)
1392			goto respond;
1393
1394		if (arp_sendllconflict && send_conflicting_probes != 0 &&
1395		    (ifp->if_eflags & IFEF_ARPLL) &&
1396		    IN_LINKLOCAL(ntohl(target_ip->sin_addr.s_addr)) &&
1397		    sender_ip->sin_addr.s_addr == INADDR_ANY) {
1398			/*
1399			 * Verify this ARP probe doesn't conflict with
1400			 * an IPv4LL we know of on another interface.
1401			 */
1402			if (route != NULL) {
1403				RT_REMREF_LOCKED(route);
1404				RT_UNLOCK(route);
1405				route = NULL;
1406			}
1407			/*
1408			 * Callee holds a reference on the route and returns
1409			 * with the route entry locked, upon success.
1410			 */
1411			error = arp_lookup_route(&target_ip->sin_addr, 0, 0,
1412			    &route, ifp->if_index);
1413
1414			if (error != 0 || route == NULL ||
1415			    route->rt_gateway == NULL)
1416				goto respond;
1417
1418			RT_LOCK_ASSERT_HELD(route);
1419
1420			gateway = SDL(route->rt_gateway);
1421			if (route->rt_ifp != ifp && gateway->sdl_alen != 0 &&
1422			    (gateway->sdl_alen != sender_hw->sdl_alen ||
1423			    bcmp(CONST_LLADDR(gateway), CONST_LLADDR(sender_hw),
1424			    gateway->sdl_alen) != 0)) {
1425				/*
1426				 * A node is probing for an IPv4LL we know
1427				 * exists on a different interface. We respond
1428				 * with a conflicting probe to force the new
1429				 * device to pick a different IPv4LL address.
1430				 */
1431				if (arp_verbose || log_arp_warnings) {
1432					log(LOG_INFO, "arp: %s on %s sent "
1433					    "probe for %s, already on %s\n",
1434					    sdl_addr_to_hex(sender_hw, buf,
1435					    sizeof (buf)), if_name(ifp),
1436					    inet_ntop(AF_INET,
1437					    &target_ip->sin_addr, ipv4str,
1438					    sizeof (ipv4str)),
1439					    if_name(route->rt_ifp));
1440					log(LOG_INFO, "arp: sending "
1441					    "conflicting probe to %s on %s\n",
1442					    sdl_addr_to_hex(sender_hw, buf,
1443					    sizeof (buf)), if_name(ifp));
1444				}
1445				/* Mark use timestamp */
1446				if (route->rt_llinfo != NULL)
1447					arp_llreach_use(route->rt_llinfo);
1448				/* We're done with the route */
1449				RT_REMREF_LOCKED(route);
1450				RT_UNLOCK(route);
1451				route = NULL;
1452				/*
1453				 * Send a conservative unicast "ARP probe".
1454				 * This should force the other device to pick
1455				 * a new number.  This will not force the
1456				 * device to pick a new number if the device
1457				 * has already assigned that number.  This will
1458				 * not imply to the device that we own that
1459				 * address.  The link address is always
1460				 * present; it's never freed.
1461				 */
1462				ifnet_lock_shared(ifp);
1463				ifa = ifp->if_lladdr;
1464				IFA_ADDREF(ifa);
1465				ifnet_lock_done(ifp);
1466				dlil_send_arp_internal(ifp, ARPOP_REQUEST,
1467				    SDL(ifa->ifa_addr),
1468				    (const struct sockaddr *)sender_ip,
1469				    sender_hw,
1470				    (const struct sockaddr *)target_ip);
1471				IFA_REMREF(ifa);
1472				ifa = NULL;
1473				arpstat.txconflicts++;
1474			}
1475			goto respond;
1476		} else if (keep_announcements != 0 &&
1477		    target_ip->sin_addr.s_addr == sender_ip->sin_addr.s_addr) {
1478			/*
1479			 * Don't create entry if link-local address and
1480			 * link-local is disabled
1481			 */
1482			if (!IN_LINKLOCAL(ntohl(sender_ip->sin_addr.s_addr)) ||
1483			    (ifp->if_eflags & IFEF_ARPLL)) {
1484				if (route != NULL) {
1485					RT_REMREF_LOCKED(route);
1486					RT_UNLOCK(route);
1487					route = NULL;
1488				}
1489				/*
1490				 * Callee holds a reference on the route and
1491				 * returns with the route entry locked, upon
1492				 * success.
1493				 */
1494				error = arp_lookup_route(&sender_ip->sin_addr,
1495				    1, 0, &route, ifp->if_index);
1496
1497				if (error == 0)
1498					RT_LOCK_ASSERT_HELD(route);
1499
1500				if (error == 0 && route != NULL &&
1501				    route->rt_gateway != NULL)
1502					created_announcement = 1;
1503			}
1504			if (created_announcement == 0)
1505				goto respond;
1506		} else {
1507			goto respond;
1508		}
1509	}
1510
1511	RT_LOCK_ASSERT_HELD(route);
1512	VERIFY(route->rt_expire == 0 || route->rt_rmx.rmx_expire != 0);
1513	VERIFY(route->rt_expire != 0 || route->rt_rmx.rmx_expire == 0);
1514
1515	gateway = SDL(route->rt_gateway);
1516	if (!bridged && route->rt_ifp != ifp) {
1517		if (!IN_LINKLOCAL(ntohl(sender_ip->sin_addr.s_addr)) ||
1518		    !(ifp->if_eflags & IFEF_ARPLL)) {
1519			if (arp_verbose || log_arp_warnings)
1520				log(LOG_ERR, "arp: %s is on %s but got "
1521				    "reply from %s on %s\n",
1522				    inet_ntop(AF_INET, &sender_ip->sin_addr,
1523				    ipv4str, sizeof (ipv4str)),
1524				    if_name(route->rt_ifp),
1525				    sdl_addr_to_hex(sender_hw, buf,
1526				    sizeof (buf)), if_name(ifp));
1527			goto respond;
1528		} else {
1529			/* Don't change a permanent address */
1530			if (route->rt_expire == 0)
1531				goto respond;
1532
1533			/*
1534			 * We're about to check and/or change the route's ifp
1535			 * and ifa, so do the lock dance: drop rt_lock, hold
1536			 * rnh_lock and re-hold rt_lock to avoid violating the
1537			 * lock ordering.  We have an extra reference on the
1538			 * route, so it won't go away while we do this.
1539			 */
1540			RT_UNLOCK(route);
1541			lck_mtx_lock(rnh_lock);
1542			RT_LOCK(route);
1543			/*
1544			 * Don't change the cloned route away from the
1545			 * parent's interface if the address did resolve
1546			 * or if the route is defunct.  rt_ifp on both
1547			 * the parent and the clone can now be freely
1548			 * accessed now that we have acquired rnh_lock.
1549			 */
1550			gateway = SDL(route->rt_gateway);
1551			if ((gateway->sdl_alen != 0 &&
1552			    route->rt_parent != NULL &&
1553			    route->rt_parent->rt_ifp == route->rt_ifp) ||
1554			    (route->rt_flags & RTF_CONDEMNED)) {
1555				RT_REMREF_LOCKED(route);
1556				RT_UNLOCK(route);
1557				route = NULL;
1558				lck_mtx_unlock(rnh_lock);
1559				goto respond;
1560			}
1561			if (route->rt_ifp != ifp) {
1562				/*
1563				 * Purge any link-layer info caching.
1564				 */
1565				if (route->rt_llinfo_purge != NULL)
1566					route->rt_llinfo_purge(route);
1567
1568				/* Adjust route ref count for the interfaces */
1569				if (route->rt_if_ref_fn != NULL) {
1570					route->rt_if_ref_fn(ifp, 1);
1571					route->rt_if_ref_fn(route->rt_ifp, -1);
1572				}
1573			}
1574			/* Change the interface when the existing route is on */
1575			route->rt_ifp = ifp;
1576			/*
1577			 * If rmx_mtu is not locked, update it
1578			 * to the MTU used by the new interface.
1579			 */
1580			if (!(route->rt_rmx.rmx_locks & RTV_MTU))
1581				route->rt_rmx.rmx_mtu = route->rt_ifp->if_mtu;
1582
1583			rtsetifa(route, &best_ia->ia_ifa);
1584			gateway->sdl_index = ifp->if_index;
1585			RT_UNLOCK(route);
1586			lck_mtx_unlock(rnh_lock);
1587			RT_LOCK(route);
1588			/* Don't bother if the route is down */
1589			if (!(route->rt_flags & RTF_UP))
1590				goto respond;
1591			/* Refresh gateway pointer */
1592			gateway = SDL(route->rt_gateway);
1593		}
1594		RT_LOCK_ASSERT_HELD(route);
1595	}
1596
1597	if (gateway->sdl_alen != 0 && bcmp(LLADDR(gateway),
1598	    CONST_LLADDR(sender_hw), gateway->sdl_alen) != 0) {
1599		if (route->rt_expire != 0 &&
1600		    (arp_verbose || log_arp_warnings)) {
1601			char buf2[3 * MAX_HW_LEN];
1602			log(LOG_INFO, "arp: %s moved from %s to %s on %s\n",
1603			    inet_ntop(AF_INET, &sender_ip->sin_addr, ipv4str,
1604			    sizeof (ipv4str)),
1605			    sdl_addr_to_hex(gateway, buf, sizeof (buf)),
1606			    sdl_addr_to_hex(sender_hw, buf2, sizeof (buf2)),
1607			    if_name(ifp));
1608		} else if (route->rt_expire == 0) {
1609			if (arp_verbose || log_arp_warnings) {
1610				log(LOG_ERR, "arp: %s attempts to modify "
1611				    "permanent entry for %s on %s\n",
1612				    sdl_addr_to_hex(sender_hw, buf,
1613				    sizeof (buf)),
1614				    inet_ntop(AF_INET, &sender_ip->sin_addr,
1615				    ipv4str, sizeof (ipv4str)),
1616				    if_name(ifp));
1617			}
1618			goto respond;
1619		}
1620	}
1621
1622	/* Copy the sender hardware address in to the route's gateway address */
1623	gateway->sdl_alen = sender_hw->sdl_alen;
1624	bcopy(CONST_LLADDR(sender_hw), LLADDR(gateway), gateway->sdl_alen);
1625
1626	/* Update the expire time for the route and clear the reject flag */
1627	if (route->rt_expire != 0)
1628		rt_setexpire(route, net_uptime() + arpt_keep);
1629	route->rt_flags &= ~RTF_REJECT;
1630
1631	/* cache the gateway (sender HW) address */
1632	arp_llreach_alloc(route, ifp, LLADDR(gateway), gateway->sdl_alen,
1633	    (arpop == ARPOP_REPLY));
1634
1635	/* update the llinfo, send a queued packet if there is one */
1636	llinfo = route->rt_llinfo;
1637	llinfo->la_asked = 0;
1638	if (llinfo->la_hold) {
1639		struct mbuf *m0 = llinfo->la_hold;
1640		llinfo->la_hold = NULL;
1641		RT_UNLOCK(route);
1642		dlil_output(ifp, PF_INET, m0, (caddr_t)route,
1643		    rt_key(route), 0, NULL);
1644		RT_REMREF(route);
1645		route = NULL;
1646	}
1647
1648respond:
1649	if (route != NULL) {
1650		/* Mark use timestamp if we're going to send a reply */
1651		if (arpop == ARPOP_REQUEST && route->rt_llinfo != NULL)
1652			arp_llreach_use(route->rt_llinfo);
1653		RT_REMREF_LOCKED(route);
1654		RT_UNLOCK(route);
1655		route = NULL;
1656	}
1657
1658	if (arpop != ARPOP_REQUEST)
1659		goto done;
1660
1661	arpstat.rxrequests++;
1662
1663	/* If we are not the target, check if we should proxy */
1664	if (target_ip->sin_addr.s_addr != best_ia_sin.sin_addr.s_addr) {
1665		/*
1666		 * Find a proxy route; callee holds a reference on the
1667		 * route and returns with the route entry locked, upon
1668		 * success.
1669		 */
1670		error = arp_lookup_route(&target_ip->sin_addr, 0, SIN_PROXY,
1671		    &route, ifp->if_index);
1672
1673		if (error == 0) {
1674			RT_LOCK_ASSERT_HELD(route);
1675			/*
1676			 * Return proxied ARP replies only on the interface
1677			 * or bridge cluster where this network resides.
1678			 * Otherwise we may conflict with the host we are
1679			 * proxying for.
1680			 */
1681			if (route->rt_ifp != ifp &&
1682			    (route->rt_ifp->if_bridge != ifp->if_bridge ||
1683			    ifp->if_bridge == NULL)) {
1684				RT_REMREF_LOCKED(route);
1685				RT_UNLOCK(route);
1686				goto done;
1687			}
1688			proxied = *SDL(route->rt_gateway);
1689			target_hw = &proxied;
1690		} else {
1691			/*
1692			 * We don't have a route entry indicating we should
1693			 * use proxy.  If we aren't supposed to proxy all,
1694			 * we are done.
1695			 */
1696			if (!arp_proxyall)
1697				goto done;
1698
1699			/*
1700			 * See if we have a route to the target ip before
1701			 * we proxy it.
1702			 */
1703			route = rtalloc1_scoped((struct sockaddr *)
1704			    (size_t)target_ip, 0, 0, ifp->if_index);
1705			if (!route)
1706				goto done;
1707
1708			/*
1709			 * Don't proxy for hosts already on the same interface.
1710			 */
1711			RT_LOCK(route);
1712			if (route->rt_ifp == ifp) {
1713				RT_UNLOCK(route);
1714				rtfree(route);
1715				goto done;
1716			}
1717		}
1718		/* Mark use timestamp */
1719		if (route->rt_llinfo != NULL)
1720			arp_llreach_use(route->rt_llinfo);
1721		RT_REMREF_LOCKED(route);
1722		RT_UNLOCK(route);
1723	}
1724
1725	dlil_send_arp(ifp, ARPOP_REPLY,
1726	    target_hw, (const struct sockaddr *)target_ip,
1727	    sender_hw, (const struct sockaddr *)sender_ip, 0);
1728
1729done:
1730	if (best_ia != NULL)
1731		IFA_REMREF(&best_ia->ia_ifa);
1732	return (0);
1733}
1734
1735void
1736arp_ifinit(struct ifnet *ifp, struct ifaddr *ifa)
1737{
1738	struct sockaddr *sa;
1739
1740	IFA_LOCK(ifa);
1741	ifa->ifa_rtrequest = arp_rtrequest;
1742	ifa->ifa_flags |= RTF_CLONING;
1743	sa = ifa->ifa_addr;
1744	IFA_UNLOCK(ifa);
1745	dlil_send_arp(ifp, ARPOP_REQUEST, NULL, sa, NULL, sa, 0);
1746}
1747
1748static int
1749arp_getstat SYSCTL_HANDLER_ARGS
1750{
1751#pragma unused(oidp, arg1, arg2)
1752	if (req->oldptr == USER_ADDR_NULL)
1753		req->oldlen = (size_t)sizeof (struct arpstat);
1754
1755	return (SYSCTL_OUT(req, &arpstat, MIN(sizeof (arpstat), req->oldlen)));
1756}
1757