1/*	$NetBSD: if_arp.c,v 1.154 2012/01/02 22:17:11 liamjfoy Exp $	*/
2
3/*-
4 * Copyright (c) 1998, 2000, 2008 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Public Access Networks Corporation ("Panix").  It was developed under
9 * contract to Panix by Eric Haszlakiewicz and Thor Lancelot Simon.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33/*
34 * Copyright (c) 1982, 1986, 1988, 1993
35 *	The Regents of the University of California.  All rights reserved.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 *    notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 *    notice, this list of conditions and the following disclaimer in the
44 *    documentation and/or other materials provided with the distribution.
45 * 3. Neither the name of the University nor the names of its contributors
46 *    may be used to endorse or promote products derived from this software
47 *    without specific prior written permission.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59 * SUCH DAMAGE.
60 *
61 *	@(#)if_ether.c	8.2 (Berkeley) 9/26/94
62 */
63
64/*
65 * Ethernet address resolution protocol.
66 * TODO:
67 *	add "inuse/lock" bit (or ref. count) along with valid bit
68 */
69
70#include <sys/cdefs.h>
71__KERNEL_RCSID(0, "$NetBSD: if_arp.c,v 1.154 2012/01/02 22:17:11 liamjfoy Exp $");
72
73#include "opt_ddb.h"
74#include "opt_inet.h"
75
76#ifdef INET
77
78#include "bridge.h"
79
80#include <sys/param.h>
81#include <sys/systm.h>
82#include <sys/callout.h>
83#include <sys/malloc.h>
84#include <sys/mbuf.h>
85#include <sys/socket.h>
86#include <sys/time.h>
87#include <sys/timetc.h>
88#include <sys/kernel.h>
89#include <sys/errno.h>
90#include <sys/ioctl.h>
91#include <sys/syslog.h>
92#include <sys/proc.h>
93#include <sys/protosw.h>
94#include <sys/domain.h>
95#include <sys/sysctl.h>
96#include <sys/socketvar.h>
97#include <sys/percpu.h>
98
99#include <net/ethertypes.h>
100#include <net/if.h>
101#include <net/if_dl.h>
102#include <net/if_token.h>
103#include <net/if_types.h>
104#include <net/if_ether.h>
105#include <net/route.h>
106#include <net/net_stats.h>
107
108#include <netinet/in.h>
109#include <netinet/in_systm.h>
110#include <netinet/in_var.h>
111#include <netinet/ip.h>
112#include <netinet/if_inarp.h>
113
114#include "arcnet.h"
115#if NARCNET > 0
116#include <net/if_arc.h>
117#endif
118#include "fddi.h"
119#if NFDDI > 0
120#include <net/if_fddi.h>
121#endif
122#include "token.h"
123#include "carp.h"
124#if NCARP > 0
125#include <netinet/ip_carp.h>
126#endif
127
128#define SIN(s) ((struct sockaddr_in *)s)
129#define SRP(s) ((struct sockaddr_inarp *)s)
130
131/*
132 * ARP trailer negotiation.  Trailer protocol is not IP specific,
133 * but ARP request/response use IP addresses.
134 */
135#define ETHERTYPE_IPTRAILERS ETHERTYPE_TRAIL
136
137/* timer values */
138int	arpt_prune = (5*60*1);	/* walk list every 5 minutes */
139int	arpt_keep = (20*60);	/* once resolved, good for 20 more minutes */
140int	arpt_down = 20;		/* once declared down, don't send for 20 secs */
141int	arpt_refresh = (5*60);	/* time left before refreshing */
142#define	rt_expire rt_rmx.rmx_expire
143#define	rt_pksent rt_rmx.rmx_pksent
144
145static	struct sockaddr *arp_setgate(struct rtentry *, struct sockaddr *,
146	    const struct sockaddr *);
147static	void arptfree(struct llinfo_arp *);
148static	void arptimer(void *);
149static	struct llinfo_arp *arplookup1(struct mbuf *, const struct in_addr *,
150				      int, int, struct rtentry *);
151static	struct llinfo_arp *arplookup(struct mbuf *, const struct in_addr *,
152					  int, int);
153static	void in_arpinput(struct mbuf *);
154static	void arp_drainstub(void);
155
156LIST_HEAD(, llinfo_arp) llinfo_arp;
157struct	ifqueue arpintrq = {
158	.ifq_head = NULL,
159	.ifq_tail = NULL,
160	.ifq_len = 0,
161	.ifq_maxlen = 50,
162	.ifq_drops = 0,
163};
164int	arp_inuse, arp_allocated;
165int	arp_maxtries = 5;
166int	useloopback = 1;	/* use loopback interface for local traffic */
167int	arpinit_done = 0;
168
169static percpu_t *arpstat_percpu;
170
171#define	ARP_STAT_GETREF()	_NET_STAT_GETREF(arpstat_percpu)
172#define	ARP_STAT_PUTREF()	_NET_STAT_PUTREF(arpstat_percpu)
173
174#define	ARP_STATINC(x)		_NET_STATINC(arpstat_percpu, x)
175#define	ARP_STATADD(x, v)	_NET_STATADD(arpstat_percpu, x, v)
176
177struct	callout arptimer_ch;
178
179/* revarp state */
180struct	in_addr myip, srv_ip;
181int	myip_initialized = 0;
182int	revarp_in_progress = 0;
183struct	ifnet *myip_ifp = NULL;
184
185#ifdef DDB
186static void db_print_sa(const struct sockaddr *);
187static void db_print_ifa(struct ifaddr *);
188static void db_print_llinfo(void *);
189static int db_show_rtentry(struct rtentry *, void *);
190#endif
191
192static int arp_drainwanted;
193
194static int log_movements = 1;
195static int log_permanent_modify = 1;
196static int log_wrong_iface = 1;
197
198/*
199 * this should be elsewhere.
200 */
201
202static char *
203lla_snprintf(u_int8_t *, int);
204
205static char *
206lla_snprintf(u_int8_t *adrp, int len)
207{
208#define NUMBUFS 3
209	static char buf[NUMBUFS][16*3];
210	static int bnum = 0;
211
212	int i;
213	char *p;
214
215	p = buf[bnum];
216
217	*p++ = hexdigits[(*adrp)>>4];
218	*p++ = hexdigits[(*adrp++)&0xf];
219
220	for (i=1; i<len && i<16; i++) {
221		*p++ = ':';
222		*p++ = hexdigits[(*adrp)>>4];
223		*p++ = hexdigits[(*adrp++)&0xf];
224	}
225
226	*p = 0;
227	p = buf[bnum];
228	bnum = (bnum + 1) % NUMBUFS;
229	return p;
230}
231
232DOMAIN_DEFINE(arpdomain);	/* forward declare and add to link set */
233
234static void
235arp_fasttimo(void)
236{
237	if (arp_drainwanted) {
238		arp_drain();
239		arp_drainwanted = 0;
240	}
241}
242
243const struct protosw arpsw[] = {
244	{ .pr_type = 0,
245	  .pr_domain = &arpdomain,
246	  .pr_protocol = 0,
247	  .pr_flags = 0,
248	  .pr_input = 0,
249	  .pr_output = 0,
250	  .pr_ctlinput = 0,
251	  .pr_ctloutput = 0,
252	  .pr_usrreq =  0,
253	  .pr_init = arp_init,
254	  .pr_fasttimo = arp_fasttimo,
255	  .pr_slowtimo = 0,
256	  .pr_drain = arp_drainstub,
257	}
258};
259
260
261struct domain arpdomain = {
262	.dom_family = PF_ARP,
263	.dom_name = "arp",
264	.dom_protosw = arpsw,
265	.dom_protoswNPROTOSW = &arpsw[__arraycount(arpsw)],
266};
267
268/*
269 * ARP table locking.
270 *
271 * to prevent lossage vs. the arp_drain routine (which may be called at
272 * any time, including in a device driver context), we do two things:
273 *
274 * 1) manipulation of la->la_hold is done at splnet() (for all of
275 * about two instructions).
276 *
277 * 2) manipulation of the arp table's linked list is done under the
278 * protection of the ARP_LOCK; if arp_drain() or arptimer is called
279 * while the arp table is locked, we punt and try again later.
280 */
281
282static int	arp_locked;
283static inline int arp_lock_try(int);
284static inline void arp_unlock(void);
285
286static inline int
287arp_lock_try(int recurse)
288{
289	int s;
290
291	/*
292	 * Use splvm() -- we're blocking things that would cause
293	 * mbuf allocation.
294	 */
295	s = splvm();
296	if (!recurse && arp_locked) {
297		splx(s);
298		return 0;
299	}
300	arp_locked++;
301	splx(s);
302	return 1;
303}
304
305static inline void
306arp_unlock(void)
307{
308	int s;
309
310	s = splvm();
311	arp_locked--;
312	splx(s);
313}
314
315#ifdef DIAGNOSTIC
316#define	ARP_LOCK(recurse)						\
317do {									\
318	if (arp_lock_try(recurse) == 0) {				\
319		printf("%s:%d: arp already locked\n", __FILE__, __LINE__); \
320		panic("arp_lock");					\
321	}								\
322} while (/*CONSTCOND*/ 0)
323#define	ARP_LOCK_CHECK()						\
324do {									\
325	if (arp_locked == 0) {						\
326		printf("%s:%d: arp lock not held\n", __FILE__, __LINE__); \
327		panic("arp lock check");				\
328	}								\
329} while (/*CONSTCOND*/ 0)
330#else
331#define	ARP_LOCK(x)		(void) arp_lock_try(x)
332#define	ARP_LOCK_CHECK()	/* nothing */
333#endif
334
335#define	ARP_UNLOCK()		arp_unlock()
336
337static void sysctl_net_inet_arp_setup(struct sysctllog **);
338
339void
340arp_init(void)
341{
342
343	sysctl_net_inet_arp_setup(NULL);
344	arpstat_percpu = percpu_alloc(sizeof(uint64_t) * ARP_NSTATS);
345}
346
347static void
348arp_drainstub(void)
349{
350	arp_drainwanted = 1;
351}
352
353/*
354 * ARP protocol drain routine.  Called when memory is in short supply.
355 * Called at splvm();  don't acquire softnet_lock as can be called from
356 * hardware interrupt handlers.
357 */
358void
359arp_drain(void)
360{
361	struct llinfo_arp *la, *nla;
362	int count = 0;
363	struct mbuf *mold;
364
365	KERNEL_LOCK(1, NULL);
366
367	if (arp_lock_try(0) == 0) {
368		KERNEL_UNLOCK_ONE(NULL);
369		return;
370	}
371
372	for (la = LIST_FIRST(&llinfo_arp); la != NULL; la = nla) {
373		nla = LIST_NEXT(la, la_list);
374
375		mold = la->la_hold;
376		la->la_hold = 0;
377
378		if (mold) {
379			m_freem(mold);
380			count++;
381		}
382	}
383	ARP_UNLOCK();
384	ARP_STATADD(ARP_STAT_DFRDROPPED, count);
385	KERNEL_UNLOCK_ONE(NULL);
386}
387
388
389/*
390 * Timeout routine.  Age arp_tab entries periodically.
391 */
392/* ARGSUSED */
393static void
394arptimer(void *arg)
395{
396	struct llinfo_arp *la, *nla;
397
398	mutex_enter(softnet_lock);
399	KERNEL_LOCK(1, NULL);
400
401	if (arp_lock_try(0) == 0) {
402		/* get it later.. */
403		KERNEL_UNLOCK_ONE(NULL);
404		mutex_exit(softnet_lock);
405		return;
406	}
407
408	callout_reset(&arptimer_ch, arpt_prune * hz, arptimer, NULL);
409	for (la = LIST_FIRST(&llinfo_arp); la != NULL; la = nla) {
410		struct rtentry *rt = la->la_rt;
411
412		nla = LIST_NEXT(la, la_list);
413		if (rt->rt_expire == 0)
414			continue;
415		if ((rt->rt_expire - time_second) < arpt_refresh &&
416		    rt->rt_pksent > (time_second - arpt_keep)) {
417			/*
418			 * If the entry has been used during since last
419			 * refresh, try to renew it before deleting.
420			 */
421			arprequest(rt->rt_ifp,
422			    &satocsin(rt->rt_ifa->ifa_addr)->sin_addr,
423			    &satocsin(rt_getkey(rt))->sin_addr,
424			    CLLADDR(rt->rt_ifp->if_sadl));
425		} else if (rt->rt_expire <= time_second)
426			arptfree(la); /* timer has expired; clear */
427	}
428
429	ARP_UNLOCK();
430
431	KERNEL_UNLOCK_ONE(NULL);
432	mutex_exit(softnet_lock);
433}
434
435/*
436 * We set the gateway for RTF_CLONING routes to a "prototype"
437 * link-layer sockaddr whose interface type (if_type) and interface
438 * index (if_index) fields are prepared.
439 */
440static struct sockaddr *
441arp_setgate(struct rtentry *rt, struct sockaddr *gate,
442    const struct sockaddr *netmask)
443{
444	const struct ifnet *ifp = rt->rt_ifp;
445	uint8_t namelen = strlen(ifp->if_xname);
446	uint8_t addrlen = ifp->if_addrlen;
447
448	/*
449	 * XXX: If this is a manually added route to interface
450	 * such as older version of routed or gated might provide,
451	 * restore cloning bit.
452	 */
453	if ((rt->rt_flags & RTF_HOST) == 0 && netmask != NULL &&
454	    satocsin(netmask)->sin_addr.s_addr != 0xffffffff)
455		rt->rt_flags |= RTF_CLONING;
456	if (rt->rt_flags & RTF_CLONING) {
457		union {
458			struct sockaddr sa;
459			struct sockaddr_storage ss;
460			struct sockaddr_dl sdl;
461		} u;
462		/*
463		 * Case 1: This route should come from a route to iface.
464		 */
465		sockaddr_dl_init(&u.sdl, sizeof(u.ss),
466		    ifp->if_index, ifp->if_type, NULL, namelen, NULL, addrlen);
467		rt_setgate(rt, &u.sa);
468		gate = rt->rt_gateway;
469	}
470	return gate;
471}
472
473/*
474 * Parallel to llc_rtrequest.
475 */
476void
477arp_rtrequest(int req, struct rtentry *rt, const struct rt_addrinfo *info)
478{
479	struct sockaddr *gate = rt->rt_gateway;
480	struct llinfo_arp *la = (struct llinfo_arp *)rt->rt_llinfo;
481	size_t allocsize;
482	struct mbuf *mold;
483	int s;
484	struct in_ifaddr *ia;
485	struct ifaddr *ifa;
486	struct ifnet *ifp = rt->rt_ifp;
487
488	if (!arpinit_done) {
489		arpinit_done = 1;
490		/*
491		 * We generate expiration times from time_second
492		 * so avoid accidentally creating permanent routes.
493		 */
494		if (time_second == 0) {
495			struct timespec ts;
496			ts.tv_sec = 1;
497			ts.tv_nsec = 0;
498			tc_setclock(&ts);
499		}
500		callout_init(&arptimer_ch, CALLOUT_MPSAFE);
501		callout_reset(&arptimer_ch, hz, arptimer, NULL);
502	}
503
504	if (req == RTM_LLINFO_UPD) {
505		struct in_addr *in;
506
507		if ((ifa = info->rti_ifa) == NULL)
508			return;
509
510		in = &ifatoia(ifa)->ia_addr.sin_addr;
511
512		arprequest(ifa->ifa_ifp, in, in,
513		    CLLADDR(ifa->ifa_ifp->if_sadl));
514		return;
515	}
516
517	if ((rt->rt_flags & RTF_GATEWAY) != 0) {
518		if (req != RTM_ADD)
519			return;
520
521		/*
522		 * linklayers with particular link MTU limitation.
523		 */
524		switch(ifp->if_type) {
525#if NFDDI > 0
526		case IFT_FDDI:
527			if (ifp->if_mtu > FDDIIPMTU)
528				rt->rt_rmx.rmx_mtu = FDDIIPMTU;
529			break;
530#endif
531#if NARC > 0
532		case IFT_ARCNET:
533		    {
534			int arcipifmtu;
535
536			if (ifp->if_flags & IFF_LINK0)
537				arcipifmtu = arc_ipmtu;
538			else
539				arcipifmtu = ARCMTU;
540			if (ifp->if_mtu > arcipifmtu)
541				rt->rt_rmx.rmx_mtu = arcipifmtu;
542			break;
543		    }
544#endif
545		}
546		return;
547	}
548
549	ARP_LOCK(1);		/* we may already be locked here. */
550
551	switch (req) {
552	case RTM_SETGATE:
553		gate = arp_setgate(rt, gate, info->rti_info[RTAX_NETMASK]);
554		break;
555	case RTM_ADD:
556		gate = arp_setgate(rt, gate, info->rti_info[RTAX_NETMASK]);
557		if (rt->rt_flags & RTF_CLONING) {
558			/*
559			 * Give this route an expiration time, even though
560			 * it's a "permanent" route, so that routes cloned
561			 * from it do not need their expiration time set.
562			 */
563			rt->rt_expire = time_second;
564			/*
565			 * linklayers with particular link MTU limitation.
566			 */
567			switch (ifp->if_type) {
568#if NFDDI > 0
569			case IFT_FDDI:
570				if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0 &&
571				    (rt->rt_rmx.rmx_mtu > FDDIIPMTU ||
572				     (rt->rt_rmx.rmx_mtu == 0 &&
573				      ifp->if_mtu > FDDIIPMTU)))
574					rt->rt_rmx.rmx_mtu = FDDIIPMTU;
575				break;
576#endif
577#if NARC > 0
578			case IFT_ARCNET:
579			    {
580				int arcipifmtu;
581				if (ifp->if_flags & IFF_LINK0)
582					arcipifmtu = arc_ipmtu;
583				else
584					arcipifmtu = ARCMTU;
585
586				if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0 &&
587				    (rt->rt_rmx.rmx_mtu > arcipifmtu ||
588				     (rt->rt_rmx.rmx_mtu == 0 &&
589				      ifp->if_mtu > arcipifmtu)))
590					rt->rt_rmx.rmx_mtu = arcipifmtu;
591				break;
592			    }
593#endif
594			}
595			break;
596		}
597		/* Announce a new entry if requested. */
598		if (rt->rt_flags & RTF_ANNOUNCE) {
599			arprequest(ifp,
600			    &satocsin(rt_getkey(rt))->sin_addr,
601			    &satocsin(rt_getkey(rt))->sin_addr,
602			    CLLADDR(satocsdl(gate)));
603		}
604		/*FALLTHROUGH*/
605	case RTM_RESOLVE:
606		if (gate->sa_family != AF_LINK ||
607		    gate->sa_len < sockaddr_dl_measure(0, ifp->if_addrlen)) {
608			log(LOG_DEBUG, "arp_rtrequest: bad gateway value\n");
609			break;
610		}
611		satosdl(gate)->sdl_type = ifp->if_type;
612		satosdl(gate)->sdl_index = ifp->if_index;
613		if (la != NULL)
614			break; /* This happens on a route change */
615		/*
616		 * Case 2:  This route may come from cloning, or a manual route
617		 * add with a LL address.
618		 */
619		switch (ifp->if_type) {
620#if NTOKEN > 0
621		case IFT_ISO88025:
622			allocsize = sizeof(*la) + sizeof(struct token_rif);
623			break;
624#endif /* NTOKEN > 0 */
625		default:
626			allocsize = sizeof(*la);
627		}
628		R_Malloc(la, struct llinfo_arp *, allocsize);
629		rt->rt_llinfo = (void *)la;
630		if (la == NULL) {
631			log(LOG_DEBUG, "arp_rtrequest: malloc failed\n");
632			break;
633		}
634		arp_inuse++, arp_allocated++;
635		memset(la, 0, allocsize);
636		la->la_rt = rt;
637		rt->rt_flags |= RTF_LLINFO;
638		LIST_INSERT_HEAD(&llinfo_arp, la, la_list);
639
640		INADDR_TO_IA(satocsin(rt_getkey(rt))->sin_addr, ia);
641		while (ia && ia->ia_ifp != ifp)
642			NEXT_IA_WITH_SAME_ADDR(ia);
643		if (ia) {
644			/*
645			 * This test used to be
646			 *	if (lo0ifp->if_flags & IFF_UP)
647			 * It allowed local traffic to be forced through
648			 * the hardware by configuring the loopback down.
649			 * However, it causes problems during network
650			 * configuration for boards that can't receive
651			 * packets they send.  It is now necessary to clear
652			 * "useloopback" and remove the route to force
653			 * traffic out to the hardware.
654			 *
655			 * In 4.4BSD, the above "if" statement checked
656			 * rt->rt_ifa against rt_getkey(rt).  It was changed
657			 * to the current form so that we can provide a
658			 * better support for multiple IPv4 addresses on a
659			 * interface.
660			 */
661			rt->rt_expire = 0;
662			if (sockaddr_dl_init(satosdl(gate), gate->sa_len,
663			    ifp->if_index, ifp->if_type, NULL, 0,
664			    CLLADDR(ifp->if_sadl), ifp->if_addrlen) == NULL) {
665				panic("%s(%s): sockaddr_dl_init cannot fail",
666				    __func__, ifp->if_xname);
667			}
668			if (useloopback)
669				ifp = rt->rt_ifp = lo0ifp;
670			/*
671			 * make sure to set rt->rt_ifa to the interface
672			 * address we are using, otherwise we will have trouble
673			 * with source address selection.
674			 */
675			ifa = &ia->ia_ifa;
676			if (ifa != rt->rt_ifa)
677				rt_replace_ifa(rt, ifa);
678		}
679		break;
680
681	case RTM_DELETE:
682		if (la == NULL)
683			break;
684		arp_inuse--;
685		LIST_REMOVE(la, la_list);
686		rt->rt_llinfo = NULL;
687		rt->rt_flags &= ~RTF_LLINFO;
688
689		s = splnet();
690		mold = la->la_hold;
691		la->la_hold = 0;
692		splx(s);
693
694		if (mold)
695			m_freem(mold);
696
697		Free((void *)la);
698	}
699	ARP_UNLOCK();
700}
701
702/*
703 * Broadcast an ARP request. Caller specifies:
704 *	- arp header source ip address
705 *	- arp header target ip address
706 *	- arp header source ethernet address
707 */
708void
709arprequest(struct ifnet *ifp,
710    const struct in_addr *sip, const struct in_addr *tip,
711    const u_int8_t *enaddr)
712{
713	struct mbuf *m;
714	struct arphdr *ah;
715	struct sockaddr sa;
716	uint64_t *arps;
717
718	if ((m = m_gethdr(M_DONTWAIT, MT_DATA)) == NULL)
719		return;
720	MCLAIM(m, &arpdomain.dom_mowner);
721	switch (ifp->if_type) {
722	case IFT_IEEE1394:
723		m->m_len = sizeof(*ah) + 2 * sizeof(struct in_addr) +
724		    ifp->if_addrlen;
725		break;
726	default:
727		m->m_len = sizeof(*ah) + 2 * sizeof(struct in_addr) +
728		    2 * ifp->if_addrlen;
729		break;
730	}
731	m->m_pkthdr.len = m->m_len;
732	MH_ALIGN(m, m->m_len);
733	ah = mtod(m, struct arphdr *);
734	memset(ah, 0, m->m_len);
735	switch (ifp->if_type) {
736	case IFT_IEEE1394:	/* RFC2734 */
737		/* fill it now for ar_tpa computation */
738		ah->ar_hrd = htons(ARPHRD_IEEE1394);
739		break;
740	default:
741		/* ifp->if_output will fill ar_hrd */
742		break;
743	}
744	ah->ar_pro = htons(ETHERTYPE_IP);
745	ah->ar_hln = ifp->if_addrlen;		/* hardware address length */
746	ah->ar_pln = sizeof(struct in_addr);	/* protocol address length */
747	ah->ar_op = htons(ARPOP_REQUEST);
748	memcpy(ar_sha(ah), enaddr, ah->ar_hln);
749	memcpy(ar_spa(ah), sip, ah->ar_pln);
750	memcpy(ar_tpa(ah), tip, ah->ar_pln);
751	sa.sa_family = AF_ARP;
752	sa.sa_len = 2;
753	m->m_flags |= M_BCAST;
754	arps = ARP_STAT_GETREF();
755	arps[ARP_STAT_SNDTOTAL]++;
756	arps[ARP_STAT_SENDREQUEST]++;
757	ARP_STAT_PUTREF();
758	(*ifp->if_output)(ifp, m, &sa, NULL);
759}
760
761/*
762 * Resolve an IP address into an ethernet address.  If success,
763 * desten is filled in.  If there is no entry in arptab,
764 * set one up and broadcast a request for the IP address.
765 * Hold onto this mbuf and resend it once the address
766 * is finally resolved.  A return value of 1 indicates
767 * that desten has been filled in and the packet should be sent
768 * normally; a 0 return indicates that the packet has been
769 * taken over here, either now or for later transmission.
770 */
771int
772arpresolve(struct ifnet *ifp, struct rtentry *rt, struct mbuf *m,
773    const struct sockaddr *dst, u_char *desten)
774{
775	struct llinfo_arp *la;
776	const struct sockaddr_dl *sdl;
777	struct mbuf *mold;
778	int s;
779
780	if ((la = arplookup1(m, &satocsin(dst)->sin_addr, 1, 0, rt)) != NULL)
781		rt = la->la_rt;
782
783	if (la == NULL || rt == NULL) {
784		ARP_STATINC(ARP_STAT_ALLOCFAIL);
785		log(LOG_DEBUG,
786		    "arpresolve: can't allocate llinfo on %s for %s\n",
787		    ifp->if_xname, in_fmtaddr(satocsin(dst)->sin_addr));
788		m_freem(m);
789		return 0;
790	}
791	sdl = satocsdl(rt->rt_gateway);
792	/*
793	 * Check the address family and length is valid, the address
794	 * is resolved; otherwise, try to resolve.
795	 */
796	if ((rt->rt_expire == 0 || rt->rt_expire > time_second) &&
797	    sdl->sdl_family == AF_LINK && sdl->sdl_alen != 0) {
798		memcpy(desten, CLLADDR(sdl),
799		    min(sdl->sdl_alen, ifp->if_addrlen));
800		rt->rt_pksent = time_second; /* Time for last pkt sent */
801		return 1;
802	}
803	/*
804	 * There is an arptab entry, but no ethernet address
805	 * response yet.  Replace the held mbuf with this
806	 * latest one.
807	 */
808
809	ARP_STATINC(ARP_STAT_DFRTOTAL);
810	s = splnet();
811	mold = la->la_hold;
812	la->la_hold = m;
813	splx(s);
814
815	if (mold) {
816		ARP_STATINC(ARP_STAT_DFRDROPPED);
817		m_freem(mold);
818	}
819
820	/*
821	 * Re-send the ARP request when appropriate.
822	 */
823#ifdef	DIAGNOSTIC
824	if (rt->rt_expire == 0) {
825		/* This should never happen. (Should it? -gwr) */
826		printf("arpresolve: unresolved and rt_expire == 0\n");
827		/* Set expiration time to now (expired). */
828		rt->rt_expire = time_second;
829	}
830#endif
831	if (rt->rt_expire) {
832		rt->rt_flags &= ~RTF_REJECT;
833		if (la->la_asked == 0 || rt->rt_expire != time_second) {
834			rt->rt_expire = time_second;
835			if (la->la_asked++ < arp_maxtries) {
836				arprequest(ifp,
837				    &satocsin(rt->rt_ifa->ifa_addr)->sin_addr,
838				    &satocsin(dst)->sin_addr,
839#if NCARP > 0
840				    (rt->rt_ifp->if_type == IFT_CARP) ?
841				    CLLADDR(rt->rt_ifp->if_sadl):
842#endif
843				    CLLADDR(ifp->if_sadl));
844			} else {
845				rt->rt_flags |= RTF_REJECT;
846				rt->rt_expire += arpt_down;
847				la->la_asked = 0;
848			}
849		}
850	}
851	return 0;
852}
853
854/*
855 * Common length and type checks are done here,
856 * then the protocol-specific routine is called.
857 */
858void
859arpintr(void)
860{
861	struct mbuf *m;
862	struct arphdr *ar;
863	int s;
864	int arplen;
865
866	mutex_enter(softnet_lock);
867	KERNEL_LOCK(1, NULL);
868	while (arpintrq.ifq_head) {
869		s = splnet();
870		IF_DEQUEUE(&arpintrq, m);
871		splx(s);
872		if (m == 0 || (m->m_flags & M_PKTHDR) == 0)
873			panic("arpintr");
874
875		MCLAIM(m, &arpdomain.dom_mowner);
876		ARP_STATINC(ARP_STAT_RCVTOTAL);
877
878		/*
879		 * First, make sure we have at least struct arphdr.
880		 */
881		if (m->m_len < sizeof(struct arphdr) ||
882		    (ar = mtod(m, struct arphdr *)) == NULL)
883			goto badlen;
884
885		switch (m->m_pkthdr.rcvif->if_type) {
886		case IFT_IEEE1394:
887			arplen = sizeof(struct arphdr) +
888			    ar->ar_hln + 2 * ar->ar_pln;
889			break;
890		default:
891			arplen = sizeof(struct arphdr) +
892			    2 * ar->ar_hln + 2 * ar->ar_pln;
893			break;
894		}
895
896		if (/* XXX ntohs(ar->ar_hrd) == ARPHRD_ETHER && */
897		    m->m_len >= arplen)
898			switch (ntohs(ar->ar_pro)) {
899			case ETHERTYPE_IP:
900			case ETHERTYPE_IPTRAILERS:
901				in_arpinput(m);
902				continue;
903			default:
904				ARP_STATINC(ARP_STAT_RCVBADPROTO);
905			}
906		else {
907badlen:
908			ARP_STATINC(ARP_STAT_RCVBADLEN);
909		}
910		m_freem(m);
911	}
912	KERNEL_UNLOCK_ONE(NULL);
913	mutex_exit(softnet_lock);
914}
915
916/*
917 * ARP for Internet protocols on 10 Mb/s Ethernet.
918 * Algorithm is that given in RFC 826.
919 * In addition, a sanity check is performed on the sender
920 * protocol address, to catch impersonators.
921 * We no longer handle negotiations for use of trailer protocol:
922 * Formerly, ARP replied for protocol type ETHERTYPE_TRAIL sent
923 * along with IP replies if we wanted trailers sent to us,
924 * and also sent them in response to IP replies.
925 * This allowed either end to announce the desire to receive
926 * trailer packets.
927 * We no longer reply to requests for ETHERTYPE_TRAIL protocol either,
928 * but formerly didn't normally send requests.
929 */
930static void
931in_arpinput(struct mbuf *m)
932{
933	struct arphdr *ah;
934	struct ifnet *ifp = m->m_pkthdr.rcvif;
935	struct llinfo_arp *la = NULL;
936	struct rtentry  *rt;
937	struct in_ifaddr *ia;
938#if NBRIDGE > 0
939	struct in_ifaddr *bridge_ia = NULL;
940#endif
941#if NCARP > 0
942	u_int32_t count = 0, index = 0;
943#endif
944	struct sockaddr_dl *sdl;
945	struct sockaddr sa;
946	struct in_addr isaddr, itaddr, myaddr;
947	int op;
948	struct mbuf *mold;
949	void *tha;
950	int s;
951	uint64_t *arps;
952
953	if (__predict_false(m_makewritable(&m, 0, m->m_pkthdr.len, M_DONTWAIT)))
954		goto out;
955	ah = mtod(m, struct arphdr *);
956	op = ntohs(ah->ar_op);
957
958	/*
959	 * Fix up ah->ar_hrd if necessary, before using ar_tha() or
960	 * ar_tpa().
961	 */
962	switch (ifp->if_type) {
963	case IFT_IEEE1394:
964		if (ntohs(ah->ar_hrd) == ARPHRD_IEEE1394)
965			;
966		else {
967			/* XXX this is to make sure we compute ar_tha right */
968			/* XXX check ar_hrd more strictly? */
969			ah->ar_hrd = htons(ARPHRD_IEEE1394);
970		}
971		break;
972	default:
973		/* XXX check ar_hrd? */
974		break;
975	}
976
977	memcpy(&isaddr, ar_spa(ah), sizeof (isaddr));
978	memcpy(&itaddr, ar_tpa(ah), sizeof (itaddr));
979
980	if (m->m_flags & (M_BCAST|M_MCAST))
981		ARP_STATINC(ARP_STAT_RCVMCAST);
982
983	/*
984	 * If the target IP address is zero, ignore the packet.
985	 * This prevents the code below from tring to answer
986	 * when we are using IP address zero (booting).
987	 */
988	if (in_nullhost(itaddr)) {
989		ARP_STATINC(ARP_STAT_RCVZEROTPA);
990		goto out;
991	}
992
993
994	/*
995	 * Search for a matching interface address
996	 * or any address on the interface to use
997	 * as a dummy address in the rest of this function
998	 */
999
1000	INADDR_TO_IA(itaddr, ia);
1001	while (ia != NULL) {
1002#if NCARP > 0
1003		if (ia->ia_ifp->if_type == IFT_CARP &&
1004		    ((ia->ia_ifp->if_flags & (IFF_UP|IFF_RUNNING)) ==
1005		    (IFF_UP|IFF_RUNNING))) {
1006			index++;
1007			if (ia->ia_ifp == m->m_pkthdr.rcvif &&
1008			    carp_iamatch(ia, ar_sha(ah),
1009			    &count, index)) {
1010				break;
1011				}
1012		} else
1013#endif
1014			    if (ia->ia_ifp == m->m_pkthdr.rcvif)
1015				break;
1016#if NBRIDGE > 0
1017		/*
1018		 * If the interface we received the packet on
1019		 * is part of a bridge, check to see if we need
1020		 * to "bridge" the packet to ourselves at this
1021		 * layer.  Note we still prefer a perfect match,
1022		 * but allow this weaker match if necessary.
1023		 */
1024		if (m->m_pkthdr.rcvif->if_bridge != NULL &&
1025		    m->m_pkthdr.rcvif->if_bridge == ia->ia_ifp->if_bridge)
1026			bridge_ia = ia;
1027#endif /* NBRIDGE > 0 */
1028
1029		NEXT_IA_WITH_SAME_ADDR(ia);
1030	}
1031
1032#if NBRIDGE > 0
1033	if (ia == NULL && bridge_ia != NULL) {
1034		ia = bridge_ia;
1035		ifp = bridge_ia->ia_ifp;
1036	}
1037#endif
1038
1039	if (ia == NULL) {
1040		INADDR_TO_IA(isaddr, ia);
1041		while ((ia != NULL) && ia->ia_ifp != m->m_pkthdr.rcvif)
1042			NEXT_IA_WITH_SAME_ADDR(ia);
1043
1044		if (ia == NULL) {
1045			IFP_TO_IA(ifp, ia);
1046			if (ia == NULL) {
1047				ARP_STATINC(ARP_STAT_RCVNOINT);
1048				goto out;
1049			}
1050		}
1051	}
1052
1053	myaddr = ia->ia_addr.sin_addr;
1054
1055	/* XXX checks for bridge case? */
1056	if (!memcmp(ar_sha(ah), CLLADDR(ifp->if_sadl), ifp->if_addrlen)) {
1057		ARP_STATINC(ARP_STAT_RCVLOCALSHA);
1058		goto out;	/* it's from me, ignore it. */
1059	}
1060
1061	/* XXX checks for bridge case? */
1062	if (!memcmp(ar_sha(ah), ifp->if_broadcastaddr, ifp->if_addrlen)) {
1063		ARP_STATINC(ARP_STAT_RCVBCASTSHA);
1064		log(LOG_ERR,
1065		    "%s: arp: link address is broadcast for IP address %s!\n",
1066		    ifp->if_xname, in_fmtaddr(isaddr));
1067		goto out;
1068	}
1069
1070	/*
1071	 * If the source IP address is zero, this is an RFC 5227 ARP probe
1072	 */
1073	if (in_nullhost(isaddr)) {
1074		ARP_STATINC(ARP_STAT_RCVZEROSPA);
1075		goto reply;
1076	}
1077
1078	if (in_hosteq(isaddr, myaddr)) {
1079		ARP_STATINC(ARP_STAT_RCVLOCALSPA);
1080		log(LOG_ERR,
1081		   "duplicate IP address %s sent from link address %s\n",
1082		   in_fmtaddr(isaddr), lla_snprintf(ar_sha(ah), ah->ar_hln));
1083		itaddr = myaddr;
1084		goto reply;
1085	}
1086	la = arplookup(m, &isaddr, in_hosteq(itaddr, myaddr), 0);
1087	if (la != NULL && (rt = la->la_rt) && (sdl = satosdl(rt->rt_gateway))) {
1088		if (sdl->sdl_alen &&
1089		    memcmp(ar_sha(ah), CLLADDR(sdl), sdl->sdl_alen)) {
1090			if (rt->rt_flags & RTF_STATIC) {
1091				ARP_STATINC(ARP_STAT_RCVOVERPERM);
1092				if (!log_permanent_modify)
1093					goto out;
1094				log(LOG_INFO,
1095				    "%s tried to overwrite permanent arp info"
1096				    " for %s\n",
1097				    lla_snprintf(ar_sha(ah), ah->ar_hln),
1098				    in_fmtaddr(isaddr));
1099				goto out;
1100			} else if (rt->rt_ifp != ifp) {
1101				ARP_STATINC(ARP_STAT_RCVOVERINT);
1102				if (!log_wrong_iface)
1103					goto out;
1104				log(LOG_INFO,
1105				    "%s on %s tried to overwrite "
1106				    "arp info for %s on %s\n",
1107				    lla_snprintf(ar_sha(ah), ah->ar_hln),
1108				    ifp->if_xname, in_fmtaddr(isaddr),
1109				    rt->rt_ifp->if_xname);
1110				    goto out;
1111			} else {
1112				ARP_STATINC(ARP_STAT_RCVOVER);
1113				if (log_movements)
1114					log(LOG_INFO, "arp info overwritten "
1115					    "for %s by %s\n",
1116					    in_fmtaddr(isaddr),
1117					    lla_snprintf(ar_sha(ah),
1118					    ah->ar_hln));
1119			}
1120		}
1121		/*
1122		 * sanity check for the address length.
1123		 * XXX this does not work for protocols with variable address
1124		 * length. -is
1125		 */
1126		if (sdl->sdl_alen &&
1127		    sdl->sdl_alen != ah->ar_hln) {
1128			ARP_STATINC(ARP_STAT_RCVLENCHG);
1129			log(LOG_WARNING,
1130			    "arp from %s: new addr len %d, was %d\n",
1131			    in_fmtaddr(isaddr), ah->ar_hln, sdl->sdl_alen);
1132		}
1133		if (ifp->if_addrlen != ah->ar_hln) {
1134			ARP_STATINC(ARP_STAT_RCVBADLEN);
1135			log(LOG_WARNING,
1136			    "arp from %s: addr len: new %d, i/f %d (ignored)\n",
1137			    in_fmtaddr(isaddr), ah->ar_hln,
1138			    ifp->if_addrlen);
1139			goto reply;
1140		}
1141#if NTOKEN > 0
1142		/*
1143		 * XXX uses m_data and assumes the complete answer including
1144		 * XXX token-ring headers is in the same buf
1145		 */
1146		if (ifp->if_type == IFT_ISO88025) {
1147			struct token_header *trh;
1148
1149			trh = (struct token_header *)M_TRHSTART(m);
1150			if (trh->token_shost[0] & TOKEN_RI_PRESENT) {
1151				struct token_rif	*rif;
1152				size_t	riflen;
1153
1154				rif = TOKEN_RIF(trh);
1155				riflen = (ntohs(rif->tr_rcf) &
1156				    TOKEN_RCF_LEN_MASK) >> 8;
1157
1158				if (riflen > 2 &&
1159				    riflen < sizeof(struct token_rif) &&
1160				    (riflen & 1) == 0) {
1161					rif->tr_rcf ^= htons(TOKEN_RCF_DIRECTION);
1162					rif->tr_rcf &= htons(~TOKEN_RCF_BROADCAST_MASK);
1163					memcpy(TOKEN_RIF(la), rif, riflen);
1164				}
1165			}
1166		}
1167#endif /* NTOKEN > 0 */
1168		(void)sockaddr_dl_setaddr(sdl, sdl->sdl_len, ar_sha(ah),
1169		    ah->ar_hln);
1170		if (rt->rt_expire)
1171			rt->rt_expire = time_second + arpt_keep;
1172		rt->rt_flags &= ~RTF_REJECT;
1173		la->la_asked = 0;
1174
1175		s = splnet();
1176		mold = la->la_hold;
1177		la->la_hold = 0;
1178		splx(s);
1179
1180		if (mold) {
1181			ARP_STATINC(ARP_STAT_DFRSENT);
1182			(*ifp->if_output)(ifp, mold, rt_getkey(rt), rt);
1183		}
1184	}
1185reply:
1186	if (op != ARPOP_REQUEST) {
1187		if (op == ARPOP_REPLY)
1188			ARP_STATINC(ARP_STAT_RCVREPLY);
1189	out:
1190		m_freem(m);
1191		return;
1192	}
1193	ARP_STATINC(ARP_STAT_RCVREQUEST);
1194	if (in_hosteq(itaddr, myaddr)) {
1195		/* I am the target */
1196		tha = ar_tha(ah);
1197		if (tha)
1198			memcpy(tha, ar_sha(ah), ah->ar_hln);
1199		memcpy(ar_sha(ah), CLLADDR(ifp->if_sadl), ah->ar_hln);
1200	} else {
1201		la = arplookup(m, &itaddr, 0, SIN_PROXY);
1202		if (la == NULL)
1203			goto out;
1204		rt = la->la_rt;
1205		if (rt->rt_ifp->if_type == IFT_CARP &&
1206		    m->m_pkthdr.rcvif->if_type != IFT_CARP)
1207			goto out;
1208		tha = ar_tha(ah);
1209		if (tha)
1210			memcpy(tha, ar_sha(ah), ah->ar_hln);
1211		sdl = satosdl(rt->rt_gateway);
1212		memcpy(ar_sha(ah), CLLADDR(sdl), ah->ar_hln);
1213	}
1214
1215	memcpy(ar_tpa(ah), ar_spa(ah), ah->ar_pln);
1216	memcpy(ar_spa(ah), &itaddr, ah->ar_pln);
1217	ah->ar_op = htons(ARPOP_REPLY);
1218	ah->ar_pro = htons(ETHERTYPE_IP); /* let's be sure! */
1219	switch (ifp->if_type) {
1220	case IFT_IEEE1394:
1221		/*
1222		 * ieee1394 arp reply is broadcast
1223		 */
1224		m->m_flags &= ~M_MCAST;
1225		m->m_flags |= M_BCAST;
1226		m->m_len = sizeof(*ah) + (2 * ah->ar_pln) + ah->ar_hln;
1227		break;
1228
1229	default:
1230		m->m_flags &= ~(M_BCAST|M_MCAST); /* never reply by broadcast */
1231		m->m_len = sizeof(*ah) + (2 * ah->ar_pln) + (2 * ah->ar_hln);
1232		break;
1233	}
1234	m->m_pkthdr.len = m->m_len;
1235	sa.sa_family = AF_ARP;
1236	sa.sa_len = 2;
1237	arps = ARP_STAT_GETREF();
1238	arps[ARP_STAT_SNDTOTAL]++;
1239	arps[ARP_STAT_SNDREPLY]++;
1240	ARP_STAT_PUTREF();
1241	(*ifp->if_output)(ifp, m, &sa, NULL);
1242	return;
1243}
1244
1245/*
1246 * Free an arp entry.
1247 */
1248static void arptfree(struct llinfo_arp *la)
1249{
1250	struct rtentry *rt = la->la_rt;
1251	struct sockaddr_dl *sdl;
1252
1253	ARP_LOCK_CHECK();
1254
1255	if (rt == NULL)
1256		panic("arptfree");
1257	if (rt->rt_refcnt > 0 && (sdl = satosdl(rt->rt_gateway)) &&
1258	    sdl->sdl_family == AF_LINK) {
1259		sdl->sdl_alen = 0;
1260		la->la_asked = 0;
1261		rt->rt_flags &= ~RTF_REJECT;
1262		return;
1263	}
1264	rtrequest(RTM_DELETE, rt_getkey(rt), NULL, rt_mask(rt), 0, NULL);
1265}
1266
1267static struct llinfo_arp *
1268arplookup(struct mbuf *m, const struct in_addr *addr, int create, int proxy)
1269{
1270	return arplookup1(m, addr, create, proxy, NULL);
1271}
1272
1273/*
1274 * Lookup or enter a new address in arptab.
1275 */
1276static struct llinfo_arp *
1277arplookup1(struct mbuf *m, const struct in_addr *addr, int create, int proxy,
1278    struct rtentry *rt0)
1279{
1280	struct arphdr *ah;
1281	struct ifnet *ifp = m->m_pkthdr.rcvif;
1282	struct rtentry *rt;
1283	struct sockaddr_inarp sin;
1284	const char *why = NULL;
1285
1286	ah = mtod(m, struct arphdr *);
1287	if (rt0 == NULL) {
1288		memset(&sin, 0, sizeof(sin));
1289		sin.sin_len = sizeof(sin);
1290		sin.sin_family = AF_INET;
1291		sin.sin_addr = *addr;
1292		sin.sin_other = proxy ? SIN_PROXY : 0;
1293		rt = rtalloc1(sintosa(&sin), create);
1294		if (rt == NULL)
1295			return NULL;
1296		rt->rt_refcnt--;
1297	} else
1298		rt = rt0;
1299
1300#define	IS_LLINFO(__rt)							  \
1301	(((__rt)->rt_flags & (RTF_GATEWAY | RTF_LLINFO)) == RTF_LLINFO && \
1302	 (__rt)->rt_gateway->sa_family == AF_LINK)
1303
1304
1305	if (IS_LLINFO(rt))
1306		return (struct llinfo_arp *)rt->rt_llinfo;
1307
1308	if (create) {
1309		if (rt->rt_flags & RTF_GATEWAY)
1310			why = "host is not on local network";
1311		else if ((rt->rt_flags & RTF_LLINFO) == 0) {
1312			ARP_STATINC(ARP_STAT_ALLOCFAIL);
1313			why = "could not allocate llinfo";
1314		} else
1315			why = "gateway route is not ours";
1316		log(LOG_DEBUG, "arplookup: unable to enter address"
1317		    " for %s@%s on %s (%s)\n",
1318		    in_fmtaddr(*addr), lla_snprintf(ar_sha(ah), ah->ar_hln),
1319		    (ifp) ? ifp->if_xname : "null", why);
1320		if (rt->rt_refcnt <= 0 && (rt->rt_flags & RTF_CLONED) != 0) {
1321			rtrequest(RTM_DELETE, rt_getkey(rt),
1322		    	    rt->rt_gateway, rt_mask(rt), rt->rt_flags, NULL);
1323		}
1324	}
1325	return NULL;
1326}
1327
1328int
1329arpioctl(u_long cmd, void *data)
1330{
1331
1332	return EOPNOTSUPP;
1333}
1334
1335void
1336arp_ifinit(struct ifnet *ifp, struct ifaddr *ifa)
1337{
1338	struct in_addr *ip;
1339
1340	/*
1341	 * Warn the user if another station has this IP address,
1342	 * but only if the interface IP address is not zero.
1343	 */
1344	ip = &IA_SIN(ifa)->sin_addr;
1345	if (!in_nullhost(*ip))
1346		arprequest(ifp, ip, ip, CLLADDR(ifp->if_sadl));
1347
1348	ifa->ifa_rtrequest = arp_rtrequest;
1349	ifa->ifa_flags |= RTF_CLONING;
1350}
1351
1352/*
1353 * Called from 10 Mb/s Ethernet interrupt handlers
1354 * when ether packet type ETHERTYPE_REVARP
1355 * is received.  Common length and type checks are done here,
1356 * then the protocol-specific routine is called.
1357 */
1358void
1359revarpinput(struct mbuf *m)
1360{
1361	struct arphdr *ar;
1362
1363	if (m->m_len < sizeof(struct arphdr))
1364		goto out;
1365	ar = mtod(m, struct arphdr *);
1366#if 0 /* XXX I don't think we need this... and it will prevent other LL */
1367	if (ntohs(ar->ar_hrd) != ARPHRD_ETHER)
1368		goto out;
1369#endif
1370	if (m->m_len < sizeof(struct arphdr) + 2 * (ar->ar_hln + ar->ar_pln))
1371		goto out;
1372	switch (ntohs(ar->ar_pro)) {
1373	case ETHERTYPE_IP:
1374	case ETHERTYPE_IPTRAILERS:
1375		in_revarpinput(m);
1376		return;
1377
1378	default:
1379		break;
1380	}
1381out:
1382	m_freem(m);
1383}
1384
1385/*
1386 * RARP for Internet protocols on 10 Mb/s Ethernet.
1387 * Algorithm is that given in RFC 903.
1388 * We are only using for bootstrap purposes to get an ip address for one of
1389 * our interfaces.  Thus we support no user-interface.
1390 *
1391 * Since the contents of the RARP reply are specific to the interface that
1392 * sent the request, this code must ensure that they are properly associated.
1393 *
1394 * Note: also supports ARP via RARP packets, per the RFC.
1395 */
1396void
1397in_revarpinput(struct mbuf *m)
1398{
1399	struct ifnet *ifp;
1400	struct arphdr *ah;
1401	void *tha;
1402	int op;
1403
1404	ah = mtod(m, struct arphdr *);
1405	op = ntohs(ah->ar_op);
1406
1407	switch (m->m_pkthdr.rcvif->if_type) {
1408	case IFT_IEEE1394:
1409		/* ARP without target hardware address is not supported */
1410		goto out;
1411	default:
1412		break;
1413	}
1414
1415	switch (op) {
1416	case ARPOP_REQUEST:
1417	case ARPOP_REPLY:	/* per RFC */
1418		in_arpinput(m);
1419		return;
1420	case ARPOP_REVREPLY:
1421		break;
1422	case ARPOP_REVREQUEST:	/* handled by rarpd(8) */
1423	default:
1424		goto out;
1425	}
1426	if (!revarp_in_progress)
1427		goto out;
1428	ifp = m->m_pkthdr.rcvif;
1429	if (ifp != myip_ifp) /* !same interface */
1430		goto out;
1431	if (myip_initialized)
1432		goto wake;
1433	tha = ar_tha(ah);
1434	if (tha == NULL)
1435		goto out;
1436	if (memcmp(tha, CLLADDR(ifp->if_sadl), ifp->if_sadl->sdl_alen))
1437		goto out;
1438	memcpy(&srv_ip, ar_spa(ah), sizeof(srv_ip));
1439	memcpy(&myip, ar_tpa(ah), sizeof(myip));
1440	myip_initialized = 1;
1441wake:	/* Do wakeup every time in case it was missed. */
1442	wakeup((void *)&myip);
1443
1444out:
1445	m_freem(m);
1446}
1447
1448/*
1449 * Send a RARP request for the ip address of the specified interface.
1450 * The request should be RFC 903-compliant.
1451 */
1452void
1453revarprequest(struct ifnet *ifp)
1454{
1455	struct sockaddr sa;
1456	struct mbuf *m;
1457	struct arphdr *ah;
1458	void *tha;
1459
1460	if ((m = m_gethdr(M_DONTWAIT, MT_DATA)) == NULL)
1461		return;
1462	MCLAIM(m, &arpdomain.dom_mowner);
1463	m->m_len = sizeof(*ah) + 2*sizeof(struct in_addr) +
1464	    2*ifp->if_addrlen;
1465	m->m_pkthdr.len = m->m_len;
1466	MH_ALIGN(m, m->m_len);
1467	ah = mtod(m, struct arphdr *);
1468	memset(ah, 0, m->m_len);
1469	ah->ar_pro = htons(ETHERTYPE_IP);
1470	ah->ar_hln = ifp->if_addrlen;		/* hardware address length */
1471	ah->ar_pln = sizeof(struct in_addr);	/* protocol address length */
1472	ah->ar_op = htons(ARPOP_REVREQUEST);
1473
1474	memcpy(ar_sha(ah), CLLADDR(ifp->if_sadl), ah->ar_hln);
1475	tha = ar_tha(ah);
1476	if (tha == NULL)
1477		return;
1478	memcpy(tha, CLLADDR(ifp->if_sadl), ah->ar_hln);
1479
1480	sa.sa_family = AF_ARP;
1481	sa.sa_len = 2;
1482	m->m_flags |= M_BCAST;
1483
1484	KERNEL_LOCK(1, NULL);
1485	(*ifp->if_output)(ifp, m, &sa, NULL);
1486	KERNEL_UNLOCK_ONE(NULL);
1487}
1488
1489/*
1490 * RARP for the ip address of the specified interface, but also
1491 * save the ip address of the server that sent the answer.
1492 * Timeout if no response is received.
1493 */
1494int
1495revarpwhoarewe(struct ifnet *ifp, struct in_addr *serv_in,
1496    struct in_addr *clnt_in)
1497{
1498	int result, count = 20;
1499
1500	myip_initialized = 0;
1501	myip_ifp = ifp;
1502
1503	revarp_in_progress = 1;
1504	while (count--) {
1505		revarprequest(ifp);
1506		result = tsleep((void *)&myip, PSOCK, "revarp", hz/2);
1507		if (result != EWOULDBLOCK)
1508			break;
1509	}
1510	revarp_in_progress = 0;
1511
1512	if (!myip_initialized)
1513		return ENETUNREACH;
1514
1515	memcpy(serv_in, &srv_ip, sizeof(*serv_in));
1516	memcpy(clnt_in, &myip, sizeof(*clnt_in));
1517	return 0;
1518}
1519
1520
1521
1522#ifdef DDB
1523
1524#include <machine/db_machdep.h>
1525#include <ddb/db_interface.h>
1526#include <ddb/db_output.h>
1527
1528static void
1529db_print_sa(const struct sockaddr *sa)
1530{
1531	int len;
1532	const u_char *p;
1533
1534	if (sa == NULL) {
1535		db_printf("[NULL]");
1536		return;
1537	}
1538
1539	p = (const u_char *)sa;
1540	len = sa->sa_len;
1541	db_printf("[");
1542	while (len > 0) {
1543		db_printf("%d", *p);
1544		p++; len--;
1545		if (len) db_printf(",");
1546	}
1547	db_printf("]\n");
1548}
1549
1550static void
1551db_print_ifa(struct ifaddr *ifa)
1552{
1553	if (ifa == NULL)
1554		return;
1555	db_printf("  ifa_addr=");
1556	db_print_sa(ifa->ifa_addr);
1557	db_printf("  ifa_dsta=");
1558	db_print_sa(ifa->ifa_dstaddr);
1559	db_printf("  ifa_mask=");
1560	db_print_sa(ifa->ifa_netmask);
1561	db_printf("  flags=0x%x,refcnt=%d,metric=%d\n",
1562			  ifa->ifa_flags,
1563			  ifa->ifa_refcnt,
1564			  ifa->ifa_metric);
1565}
1566
1567static void
1568db_print_llinfo(void *li)
1569{
1570	struct llinfo_arp *la;
1571
1572	if (li == NULL)
1573		return;
1574	la = (struct llinfo_arp *)li;
1575	db_printf("  la_rt=%p la_hold=%p, la_asked=0x%lx\n",
1576			  la->la_rt, la->la_hold, la->la_asked);
1577}
1578
1579/*
1580 * Function to pass to rt_walktree().
1581 * Return non-zero error to abort walk.
1582 */
1583static int
1584db_show_rtentry(struct rtentry *rt, void *w)
1585{
1586	db_printf("rtentry=%p", rt);
1587
1588	db_printf(" flags=0x%x refcnt=%d use=%"PRId64" expire=%"PRId64"\n",
1589			  rt->rt_flags, rt->rt_refcnt,
1590			  rt->rt_use, (uint64_t)rt->rt_expire);
1591
1592	db_printf(" key="); db_print_sa(rt_getkey(rt));
1593	db_printf(" mask="); db_print_sa(rt_mask(rt));
1594	db_printf(" gw="); db_print_sa(rt->rt_gateway);
1595
1596	db_printf(" ifp=%p ", rt->rt_ifp);
1597	if (rt->rt_ifp)
1598		db_printf("(%s)", rt->rt_ifp->if_xname);
1599	else
1600		db_printf("(NULL)");
1601
1602	db_printf(" ifa=%p\n", rt->rt_ifa);
1603	db_print_ifa(rt->rt_ifa);
1604
1605	db_printf(" gwroute=%p llinfo=%p\n",
1606			  rt->rt_gwroute, rt->rt_llinfo);
1607	db_print_llinfo(rt->rt_llinfo);
1608
1609	return 0;
1610}
1611
1612/*
1613 * Function to print all the route trees.
1614 * Use this from ddb:  "show arptab"
1615 */
1616void
1617db_show_arptab(db_expr_t addr, bool have_addr,
1618    db_expr_t count, const char *modif)
1619{
1620	rt_walktree(AF_INET, db_show_rtentry, NULL);
1621}
1622#endif
1623
1624static int
1625sysctl_net_inet_arp_stats(SYSCTLFN_ARGS)
1626{
1627
1628	return NETSTAT_SYSCTL(arpstat_percpu, ARP_NSTATS);
1629}
1630
1631static void
1632sysctl_net_inet_arp_setup(struct sysctllog **clog)
1633{
1634	const struct sysctlnode *node;
1635
1636	sysctl_createv(clog, 0, NULL, NULL,
1637			CTLFLAG_PERMANENT,
1638			CTLTYPE_NODE, "net", NULL,
1639			NULL, 0, NULL, 0,
1640			CTL_NET, CTL_EOL);
1641	sysctl_createv(clog, 0, NULL, NULL,
1642			CTLFLAG_PERMANENT,
1643			CTLTYPE_NODE, "inet", NULL,
1644			NULL, 0, NULL, 0,
1645			CTL_NET, PF_INET, CTL_EOL);
1646	sysctl_createv(clog, 0, NULL, &node,
1647			CTLFLAG_PERMANENT,
1648			CTLTYPE_NODE, "arp",
1649			SYSCTL_DESCR("Address Resolution Protocol"),
1650			NULL, 0, NULL, 0,
1651			CTL_NET, PF_INET, CTL_CREATE, CTL_EOL);
1652
1653	sysctl_createv(clog, 0, NULL, NULL,
1654			CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1655			CTLTYPE_INT, "prune",
1656			SYSCTL_DESCR("ARP cache pruning interval in seconds"),
1657			NULL, 0, &arpt_prune, 0,
1658			CTL_NET,PF_INET, node->sysctl_num, CTL_CREATE, CTL_EOL);
1659
1660	sysctl_createv(clog, 0, NULL, NULL,
1661			CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1662			CTLTYPE_INT, "keep",
1663			SYSCTL_DESCR("Valid ARP entry lifetime in seconds"),
1664			NULL, 0, &arpt_keep, 0,
1665			CTL_NET,PF_INET, node->sysctl_num, CTL_CREATE, CTL_EOL);
1666
1667	sysctl_createv(clog, 0, NULL, NULL,
1668			CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1669			CTLTYPE_INT, "down",
1670			SYSCTL_DESCR("Failed ARP entry lifetime in seconds"),
1671			NULL, 0, &arpt_down, 0,
1672			CTL_NET,PF_INET, node->sysctl_num, CTL_CREATE, CTL_EOL);
1673
1674	sysctl_createv(clog, 0, NULL, NULL,
1675			CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1676			CTLTYPE_INT, "refresh",
1677			SYSCTL_DESCR("ARP entry refresh interval"),
1678			NULL, 0, &arpt_refresh, 0,
1679			CTL_NET,PF_INET, node->sysctl_num, CTL_CREATE, CTL_EOL);
1680
1681	sysctl_createv(clog, 0, NULL, NULL,
1682			CTLFLAG_PERMANENT,
1683			CTLTYPE_STRUCT, "stats",
1684			SYSCTL_DESCR("ARP statistics"),
1685			sysctl_net_inet_arp_stats, 0, NULL, 0,
1686			CTL_NET,PF_INET, node->sysctl_num, CTL_CREATE, CTL_EOL);
1687
1688	sysctl_createv(clog, 0, NULL, NULL,
1689			CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1690			CTLTYPE_INT, "log_movements",
1691			SYSCTL_DESCR("log ARP replies from MACs different than"
1692			    " the one in the cache"),
1693			NULL, 0, &log_movements, 0,
1694			CTL_NET,PF_INET, node->sysctl_num, CTL_CREATE, CTL_EOL);
1695
1696	sysctl_createv(clog, 0, NULL, NULL,
1697			CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1698			CTLTYPE_INT, "log_permanent_modify",
1699			SYSCTL_DESCR("log ARP replies from MACs different than"
1700			    " the one in the permanent arp entry"),
1701			NULL, 0, &log_permanent_modify, 0,
1702			CTL_NET,PF_INET, node->sysctl_num, CTL_CREATE, CTL_EOL);
1703
1704	sysctl_createv(clog, 0, NULL, NULL,
1705			CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1706			CTLTYPE_INT, "log_wrong_iface",
1707			SYSCTL_DESCR("log ARP packets arriving on the wrong"
1708			    " interface"),
1709			NULL, 0, &log_wrong_iface, 0,
1710			CTL_NET,PF_INET, node->sysctl_num, CTL_CREATE, CTL_EOL);
1711}
1712
1713#endif /* INET */
1714