in.c revision 229621
1/*-
2 * Copyright (c) 1982, 1986, 1991, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 * Copyright (C) 2001 WIDE Project.  All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 * 4. Neither the name of the University nor the names of its contributors
15 *    may be used to endorse or promote products derived from this software
16 *    without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 *
30 *	@(#)in.c	8.4 (Berkeley) 1/9/95
31 */
32
33#include <sys/cdefs.h>
34__FBSDID("$FreeBSD: head/sys/netinet/in.c 229621 2012-01-05 19:00:36Z jhb $");
35
36#include "opt_mpath.h"
37
38#include <sys/param.h>
39#include <sys/systm.h>
40#include <sys/sockio.h>
41#include <sys/malloc.h>
42#include <sys/priv.h>
43#include <sys/socket.h>
44#include <sys/jail.h>
45#include <sys/kernel.h>
46#include <sys/proc.h>
47#include <sys/sysctl.h>
48#include <sys/syslog.h>
49
50#include <net/if.h>
51#include <net/if_var.h>
52#include <net/if_arp.h>
53#include <net/if_dl.h>
54#include <net/if_llatbl.h>
55#include <net/if_types.h>
56#include <net/route.h>
57#include <net/vnet.h>
58
59#include <netinet/if_ether.h>
60#include <netinet/in.h>
61#include <netinet/in_var.h>
62#include <netinet/in_pcb.h>
63#include <netinet/ip_var.h>
64#include <netinet/ip_carp.h>
65#include <netinet/igmp_var.h>
66#include <netinet/udp.h>
67#include <netinet/udp_var.h>
68
69static int in_mask2len(struct in_addr *);
70static void in_len2mask(struct in_addr *, int);
71static int in_lifaddr_ioctl(struct socket *, u_long, caddr_t,
72	struct ifnet *, struct thread *);
73
74static void	in_socktrim(struct sockaddr_in *);
75static int	in_ifinit(struct ifnet *, struct in_ifaddr *,
76		    struct sockaddr_in *, int, int, int);
77static void	in_purgemaddrs(struct ifnet *);
78
79static VNET_DEFINE(int, nosameprefix);
80#define	V_nosameprefix			VNET(nosameprefix)
81SYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, no_same_prefix, CTLFLAG_RW,
82	&VNET_NAME(nosameprefix), 0,
83	"Refuse to create same prefixes on different interfaces");
84
85VNET_DECLARE(struct inpcbinfo, ripcbinfo);
86#define	V_ripcbinfo			VNET(ripcbinfo)
87
88VNET_DECLARE(struct arpstat, arpstat);  /* ARP statistics, see if_arp.h */
89#define	V_arpstat		VNET(arpstat)
90
91/*
92 * Return 1 if an internet address is for a ``local'' host
93 * (one to which we have a connection).
94 */
95int
96in_localaddr(struct in_addr in)
97{
98	register u_long i = ntohl(in.s_addr);
99	register struct in_ifaddr *ia;
100
101	IN_IFADDR_RLOCK();
102	TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
103		if ((i & ia->ia_subnetmask) == ia->ia_subnet) {
104			IN_IFADDR_RUNLOCK();
105			return (1);
106		}
107	}
108	IN_IFADDR_RUNLOCK();
109	return (0);
110}
111
112/*
113 * Return 1 if an internet address is for the local host and configured
114 * on one of its interfaces.
115 */
116int
117in_localip(struct in_addr in)
118{
119	struct in_ifaddr *ia;
120
121	IN_IFADDR_RLOCK();
122	LIST_FOREACH(ia, INADDR_HASH(in.s_addr), ia_hash) {
123		if (IA_SIN(ia)->sin_addr.s_addr == in.s_addr) {
124			IN_IFADDR_RUNLOCK();
125			return (1);
126		}
127	}
128	IN_IFADDR_RUNLOCK();
129	return (0);
130}
131
132/*
133 * Determine whether an IP address is in a reserved set of addresses
134 * that may not be forwarded, or whether datagrams to that destination
135 * may be forwarded.
136 */
137int
138in_canforward(struct in_addr in)
139{
140	register u_long i = ntohl(in.s_addr);
141	register u_long net;
142
143	if (IN_EXPERIMENTAL(i) || IN_MULTICAST(i) || IN_LINKLOCAL(i))
144		return (0);
145	if (IN_CLASSA(i)) {
146		net = i & IN_CLASSA_NET;
147		if (net == 0 || net == (IN_LOOPBACKNET << IN_CLASSA_NSHIFT))
148			return (0);
149	}
150	return (1);
151}
152
153/*
154 * Trim a mask in a sockaddr
155 */
156static void
157in_socktrim(struct sockaddr_in *ap)
158{
159    register char *cplim = (char *) &ap->sin_addr;
160    register char *cp = (char *) (&ap->sin_addr + 1);
161
162    ap->sin_len = 0;
163    while (--cp >= cplim)
164	if (*cp) {
165	    (ap)->sin_len = cp - (char *) (ap) + 1;
166	    break;
167	}
168}
169
170static int
171in_mask2len(mask)
172	struct in_addr *mask;
173{
174	int x, y;
175	u_char *p;
176
177	p = (u_char *)mask;
178	for (x = 0; x < sizeof(*mask); x++) {
179		if (p[x] != 0xff)
180			break;
181	}
182	y = 0;
183	if (x < sizeof(*mask)) {
184		for (y = 0; y < 8; y++) {
185			if ((p[x] & (0x80 >> y)) == 0)
186				break;
187		}
188	}
189	return (x * 8 + y);
190}
191
192static void
193in_len2mask(struct in_addr *mask, int len)
194{
195	int i;
196	u_char *p;
197
198	p = (u_char *)mask;
199	bzero(mask, sizeof(*mask));
200	for (i = 0; i < len / 8; i++)
201		p[i] = 0xff;
202	if (len % 8)
203		p[i] = (0xff00 >> (len % 8)) & 0xff;
204}
205
206/*
207 * Generic internet control operations (ioctl's).
208 *
209 * ifp is NULL if not an interface-specific ioctl.
210 */
211/* ARGSUSED */
212int
213in_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
214    struct thread *td)
215{
216	register struct ifreq *ifr = (struct ifreq *)data;
217	register struct in_ifaddr *ia, *iap;
218	register struct ifaddr *ifa;
219	struct in_addr allhosts_addr;
220	struct in_addr dst;
221	struct in_ifinfo *ii;
222	struct in_aliasreq *ifra = (struct in_aliasreq *)data;
223	struct sockaddr_in oldaddr;
224	int error, hostIsNew, iaIsNew, maskIsNew;
225	int iaIsFirst;
226	u_long ocmd = cmd;
227
228	/*
229	 * Pre-10.x compat: OSIOCAIFADDR passes a shorter
230	 * struct in_aliasreq, without ifra_vhid.
231	 */
232	if (cmd == OSIOCAIFADDR)
233		cmd = SIOCAIFADDR;
234
235	ia = NULL;
236	iaIsFirst = 0;
237	iaIsNew = 0;
238	allhosts_addr.s_addr = htonl(INADDR_ALLHOSTS_GROUP);
239
240	/*
241	 * Filter out ioctls we implement directly; forward the rest on to
242	 * in_lifaddr_ioctl() and ifp->if_ioctl().
243	 */
244	switch (cmd) {
245	case SIOCGIFADDR:
246	case SIOCGIFBRDADDR:
247	case SIOCGIFDSTADDR:
248	case SIOCGIFNETMASK:
249	case SIOCDIFADDR:
250		break;
251	case SIOCAIFADDR:
252		/*
253		 * ifra_addr must be present and be of INET family.
254		 * ifra_broadaddr and ifra_mask are optional.
255		 */
256		if (ifra->ifra_addr.sin_len != sizeof(struct sockaddr_in) ||
257		    ifra->ifra_addr.sin_family != AF_INET)
258			return (EINVAL);
259		if (ifra->ifra_broadaddr.sin_len != 0 &&
260		    (ifra->ifra_broadaddr.sin_len !=
261		    sizeof(struct sockaddr_in) ||
262		    ifra->ifra_broadaddr.sin_family != AF_INET))
263			return (EINVAL);
264#if 0
265		/*
266		 * ifconfig(8) in pre-10.x doesn't set sin_family for the
267		 * mask. The code is disabled for the 10.x timeline, to
268		 * make SIOCAIFADDR compatible with 9.x ifconfig(8).
269		 * The code should be enabled in 11.x
270		 */
271		if (ifra->ifra_mask.sin_len != 0 &&
272		    (ifra->ifra_mask.sin_len != sizeof(struct sockaddr_in) ||
273		    ifra->ifra_mask.sin_family != AF_INET))
274			return (EINVAL);
275#endif
276		break;
277	case SIOCSIFADDR:
278	case SIOCSIFBRDADDR:
279	case SIOCSIFDSTADDR:
280	case SIOCSIFNETMASK:
281		if (ifr->ifr_addr.sa_family != AF_INET ||
282		    ifr->ifr_addr.sa_len != sizeof(struct sockaddr_in))
283			return (EINVAL);
284		break;
285
286	case SIOCALIFADDR:
287		if (td != NULL) {
288			error = priv_check(td, PRIV_NET_ADDIFADDR);
289			if (error)
290				return (error);
291		}
292		if (ifp == NULL)
293			return (EINVAL);
294		return in_lifaddr_ioctl(so, cmd, data, ifp, td);
295
296	case SIOCDLIFADDR:
297		if (td != NULL) {
298			error = priv_check(td, PRIV_NET_DELIFADDR);
299			if (error)
300				return (error);
301		}
302		if (ifp == NULL)
303			return (EINVAL);
304		return in_lifaddr_ioctl(so, cmd, data, ifp, td);
305
306	case SIOCGLIFADDR:
307		if (ifp == NULL)
308			return (EINVAL);
309		return in_lifaddr_ioctl(so, cmd, data, ifp, td);
310
311	default:
312		if (ifp == NULL || ifp->if_ioctl == NULL)
313			return (EOPNOTSUPP);
314		return ((*ifp->if_ioctl)(ifp, cmd, data));
315	}
316
317	if (ifp == NULL)
318		return (EADDRNOTAVAIL);
319
320	/*
321	 * Security checks before we get involved in any work.
322	 */
323	switch (cmd) {
324	case SIOCAIFADDR:
325	case SIOCSIFADDR:
326	case SIOCSIFBRDADDR:
327	case SIOCSIFNETMASK:
328	case SIOCSIFDSTADDR:
329		if (td != NULL) {
330			error = priv_check(td, PRIV_NET_ADDIFADDR);
331			if (error)
332				return (error);
333		}
334		break;
335
336	case SIOCDIFADDR:
337		if (td != NULL) {
338			error = priv_check(td, PRIV_NET_DELIFADDR);
339			if (error)
340				return (error);
341		}
342		break;
343	}
344
345	/*
346	 * Find address for this interface, if it exists.
347	 *
348	 * If an alias address was specified, find that one instead of the
349	 * first one on the interface, if possible.
350	 */
351	dst = ((struct sockaddr_in *)&ifr->ifr_addr)->sin_addr;
352	IN_IFADDR_RLOCK();
353	LIST_FOREACH(iap, INADDR_HASH(dst.s_addr), ia_hash) {
354		if (iap->ia_ifp == ifp &&
355		    iap->ia_addr.sin_addr.s_addr == dst.s_addr) {
356			if (td == NULL || prison_check_ip4(td->td_ucred,
357			    &dst) == 0)
358				ia = iap;
359			break;
360		}
361	}
362	if (ia != NULL)
363		ifa_ref(&ia->ia_ifa);
364	IN_IFADDR_RUNLOCK();
365	if (ia == NULL) {
366		IF_ADDR_RLOCK(ifp);
367		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
368			iap = ifatoia(ifa);
369			if (iap->ia_addr.sin_family == AF_INET) {
370				if (td != NULL &&
371				    prison_check_ip4(td->td_ucred,
372				    &iap->ia_addr.sin_addr) != 0)
373					continue;
374				ia = iap;
375				break;
376			}
377		}
378		if (ia != NULL)
379			ifa_ref(&ia->ia_ifa);
380		IF_ADDR_RUNLOCK(ifp);
381	}
382	if (ia == NULL)
383		iaIsFirst = 1;
384
385	error = 0;
386	switch (cmd) {
387	case SIOCAIFADDR:
388	case SIOCDIFADDR:
389		if (ifra->ifra_addr.sin_family == AF_INET) {
390			struct in_ifaddr *oia;
391
392			IN_IFADDR_RLOCK();
393			for (oia = ia; ia; ia = TAILQ_NEXT(ia, ia_link)) {
394				if (ia->ia_ifp == ifp  &&
395				    ia->ia_addr.sin_addr.s_addr ==
396				    ifra->ifra_addr.sin_addr.s_addr)
397					break;
398			}
399			if (ia != NULL && ia != oia)
400				ifa_ref(&ia->ia_ifa);
401			if (oia != NULL && ia != oia)
402				ifa_free(&oia->ia_ifa);
403			IN_IFADDR_RUNLOCK();
404			if ((ifp->if_flags & IFF_POINTOPOINT)
405			    && (cmd == SIOCAIFADDR)
406			    && (ifra->ifra_dstaddr.sin_addr.s_addr
407				== INADDR_ANY)) {
408				error = EDESTADDRREQ;
409				goto out;
410			}
411		}
412		if (cmd == SIOCDIFADDR && ia == NULL) {
413			error = EADDRNOTAVAIL;
414			goto out;
415		}
416		/* FALLTHROUGH */
417	case SIOCSIFADDR:
418	case SIOCSIFNETMASK:
419	case SIOCSIFDSTADDR:
420		if (ia == NULL) {
421			ia = (struct in_ifaddr *)
422				malloc(sizeof *ia, M_IFADDR, M_NOWAIT |
423				    M_ZERO);
424			if (ia == NULL) {
425				error = ENOBUFS;
426				goto out;
427			}
428
429			ifa = &ia->ia_ifa;
430			ifa_init(ifa);
431			ifa->ifa_addr = (struct sockaddr *)&ia->ia_addr;
432			ifa->ifa_dstaddr = (struct sockaddr *)&ia->ia_dstaddr;
433			ifa->ifa_netmask = (struct sockaddr *)&ia->ia_sockmask;
434
435			ia->ia_sockmask.sin_len = 8;
436			ia->ia_sockmask.sin_family = AF_INET;
437			if (ifp->if_flags & IFF_BROADCAST) {
438				ia->ia_broadaddr.sin_len = sizeof(ia->ia_addr);
439				ia->ia_broadaddr.sin_family = AF_INET;
440			}
441			ia->ia_ifp = ifp;
442
443			ifa_ref(ifa);			/* if_addrhead */
444			IF_ADDR_WLOCK(ifp);
445			TAILQ_INSERT_TAIL(&ifp->if_addrhead, ifa, ifa_link);
446			IF_ADDR_WUNLOCK(ifp);
447			ifa_ref(ifa);			/* in_ifaddrhead */
448			IN_IFADDR_WLOCK();
449			TAILQ_INSERT_TAIL(&V_in_ifaddrhead, ia, ia_link);
450			IN_IFADDR_WUNLOCK();
451			iaIsNew = 1;
452		}
453		break;
454
455	case SIOCSIFBRDADDR:
456	case SIOCGIFADDR:
457	case SIOCGIFNETMASK:
458	case SIOCGIFDSTADDR:
459	case SIOCGIFBRDADDR:
460		if (ia == NULL) {
461			error = EADDRNOTAVAIL;
462			goto out;
463		}
464		break;
465	}
466
467	/*
468	 * Most paths in this switch return directly or via out.  Only paths
469	 * that remove the address break in order to hit common removal code.
470	 */
471	switch (cmd) {
472	case SIOCGIFADDR:
473		*((struct sockaddr_in *)&ifr->ifr_addr) = ia->ia_addr;
474		goto out;
475
476	case SIOCGIFBRDADDR:
477		if ((ifp->if_flags & IFF_BROADCAST) == 0) {
478			error = EINVAL;
479			goto out;
480		}
481		*((struct sockaddr_in *)&ifr->ifr_dstaddr) = ia->ia_broadaddr;
482		goto out;
483
484	case SIOCGIFDSTADDR:
485		if ((ifp->if_flags & IFF_POINTOPOINT) == 0) {
486			error = EINVAL;
487			goto out;
488		}
489		*((struct sockaddr_in *)&ifr->ifr_dstaddr) = ia->ia_dstaddr;
490		goto out;
491
492	case SIOCGIFNETMASK:
493		*((struct sockaddr_in *)&ifr->ifr_addr) = ia->ia_sockmask;
494		goto out;
495
496	case SIOCSIFDSTADDR:
497		if ((ifp->if_flags & IFF_POINTOPOINT) == 0) {
498			error = EINVAL;
499			goto out;
500		}
501		oldaddr = ia->ia_dstaddr;
502		ia->ia_dstaddr = *(struct sockaddr_in *)&ifr->ifr_dstaddr;
503		if (ifp->if_ioctl != NULL) {
504			error = (*ifp->if_ioctl)(ifp, SIOCSIFDSTADDR,
505			    (caddr_t)ia);
506			if (error) {
507				ia->ia_dstaddr = oldaddr;
508				goto out;
509			}
510		}
511		if (ia->ia_flags & IFA_ROUTE) {
512			ia->ia_ifa.ifa_dstaddr = (struct sockaddr *)&oldaddr;
513			rtinit(&(ia->ia_ifa), (int)RTM_DELETE, RTF_HOST);
514			ia->ia_ifa.ifa_dstaddr =
515					(struct sockaddr *)&ia->ia_dstaddr;
516			rtinit(&(ia->ia_ifa), (int)RTM_ADD, RTF_HOST|RTF_UP);
517		}
518		goto out;
519
520	case SIOCSIFBRDADDR:
521		if ((ifp->if_flags & IFF_BROADCAST) == 0) {
522			error = EINVAL;
523			goto out;
524		}
525		ia->ia_broadaddr = *(struct sockaddr_in *)&ifr->ifr_broadaddr;
526		goto out;
527
528	case SIOCSIFADDR:
529		error = in_ifinit(ifp, ia,
530		    (struct sockaddr_in *) &ifr->ifr_addr, 1, 0, 0);
531		if (error != 0 && iaIsNew)
532			break;
533		if (error == 0) {
534			ii = ((struct in_ifinfo *)ifp->if_afdata[AF_INET]);
535			if (iaIsFirst &&
536			    (ifp->if_flags & IFF_MULTICAST) != 0) {
537				error = in_joingroup(ifp, &allhosts_addr,
538				    NULL, &ii->ii_allhosts);
539			}
540			EVENTHANDLER_INVOKE(ifaddr_event, ifp);
541		}
542		error = 0;
543		goto out;
544
545	case SIOCSIFNETMASK:
546		ia->ia_sockmask.sin_addr = ((struct sockaddr_in *)
547		    &ifr->ifr_addr)->sin_addr;
548		ia->ia_subnetmask = ntohl(ia->ia_sockmask.sin_addr.s_addr);
549		goto out;
550
551	case SIOCAIFADDR:
552		maskIsNew = 0;
553		hostIsNew = 1;
554		error = 0;
555		if (ifra->ifra_addr.sin_addr.s_addr ==
556			    ia->ia_addr.sin_addr.s_addr)
557			hostIsNew = 0;
558		if (ifra->ifra_mask.sin_len) {
559			/*
560			 * QL: XXX
561			 * Need to scrub the prefix here in case
562			 * the issued command is SIOCAIFADDR with
563			 * the same address, but with a different
564			 * prefix length. And if the prefix length
565			 * is the same as before, then the call is
566			 * un-necessarily executed here.
567			 */
568			in_ifscrub(ifp, ia, LLE_STATIC);
569			ia->ia_sockmask = ifra->ifra_mask;
570			ia->ia_sockmask.sin_family = AF_INET;
571			ia->ia_subnetmask =
572			     ntohl(ia->ia_sockmask.sin_addr.s_addr);
573			maskIsNew = 1;
574		}
575		if ((ifp->if_flags & IFF_POINTOPOINT) &&
576		    (ifra->ifra_dstaddr.sin_family == AF_INET)) {
577			in_ifscrub(ifp, ia, LLE_STATIC);
578			ia->ia_dstaddr = ifra->ifra_dstaddr;
579			maskIsNew  = 1; /* We lie; but the effect's the same */
580		}
581		if (hostIsNew || maskIsNew)
582			error = in_ifinit(ifp, ia, &ifra->ifra_addr, 0,
583			    maskIsNew, (ocmd == cmd ? ifra->ifra_vhid : 0));
584		if (error != 0 && iaIsNew)
585			break;
586
587		if ((ifp->if_flags & IFF_BROADCAST) &&
588		    ifra->ifra_broadaddr.sin_len)
589			ia->ia_broadaddr = ifra->ifra_broadaddr;
590		if (error == 0) {
591			ii = ((struct in_ifinfo *)ifp->if_afdata[AF_INET]);
592			if (iaIsFirst &&
593			    (ifp->if_flags & IFF_MULTICAST) != 0) {
594				error = in_joingroup(ifp, &allhosts_addr,
595				    NULL, &ii->ii_allhosts);
596			}
597			EVENTHANDLER_INVOKE(ifaddr_event, ifp);
598		}
599		goto out;
600
601	case SIOCDIFADDR:
602		/*
603		 * in_ifscrub kills the interface route.
604		 */
605		in_ifscrub(ifp, ia, LLE_STATIC);
606
607		/*
608		 * in_ifadown gets rid of all the rest of
609		 * the routes.  This is not quite the right
610		 * thing to do, but at least if we are running
611		 * a routing process they will come back.
612		 */
613		in_ifadown(&ia->ia_ifa, 1);
614		EVENTHANDLER_INVOKE(ifaddr_event, ifp);
615		error = 0;
616		break;
617
618	default:
619		panic("in_control: unsupported ioctl");
620	}
621
622	if (ia->ia_ifa.ifa_carp)
623		(*carp_detach_p)(&ia->ia_ifa);
624
625	IF_ADDR_WLOCK(ifp);
626	/* Re-check that ia is still part of the list. */
627	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
628		if (ifa == &ia->ia_ifa)
629			break;
630	}
631	if (ifa == NULL) {
632		/*
633		 * If we lost the race with another thread, there is no need to
634		 * try it again for the next loop as there is no other exit
635		 * path between here and out.
636		 */
637		IF_ADDR_WUNLOCK(ifp);
638		error = EADDRNOTAVAIL;
639		goto out;
640	}
641	TAILQ_REMOVE(&ifp->if_addrhead, &ia->ia_ifa, ifa_link);
642	IF_ADDR_WUNLOCK(ifp);
643	ifa_free(&ia->ia_ifa);				/* if_addrhead */
644
645	IN_IFADDR_WLOCK();
646	TAILQ_REMOVE(&V_in_ifaddrhead, ia, ia_link);
647
648	LIST_REMOVE(ia, ia_hash);
649	IN_IFADDR_WUNLOCK();
650	/*
651	 * If this is the last IPv4 address configured on this
652	 * interface, leave the all-hosts group.
653	 * No state-change report need be transmitted.
654	 */
655	IFP_TO_IA(ifp, iap);
656	if (iap == NULL) {
657		ii = ((struct in_ifinfo *)ifp->if_afdata[AF_INET]);
658		IN_MULTI_LOCK();
659		if (ii->ii_allhosts) {
660			(void)in_leavegroup_locked(ii->ii_allhosts, NULL);
661			ii->ii_allhosts = NULL;
662		}
663		IN_MULTI_UNLOCK();
664	} else
665		ifa_free(&iap->ia_ifa);
666
667	ifa_free(&ia->ia_ifa);				/* in_ifaddrhead */
668out:
669	if (ia != NULL)
670		ifa_free(&ia->ia_ifa);
671	return (error);
672}
673
674/*
675 * SIOC[GAD]LIFADDR.
676 *	SIOCGLIFADDR: get first address. (?!?)
677 *	SIOCGLIFADDR with IFLR_PREFIX:
678 *		get first address that matches the specified prefix.
679 *	SIOCALIFADDR: add the specified address.
680 *	SIOCALIFADDR with IFLR_PREFIX:
681 *		EINVAL since we can't deduce hostid part of the address.
682 *	SIOCDLIFADDR: delete the specified address.
683 *	SIOCDLIFADDR with IFLR_PREFIX:
684 *		delete the first address that matches the specified prefix.
685 * return values:
686 *	EINVAL on invalid parameters
687 *	EADDRNOTAVAIL on prefix match failed/specified address not found
688 *	other values may be returned from in_ioctl()
689 */
690static int
691in_lifaddr_ioctl(struct socket *so, u_long cmd, caddr_t data,
692    struct ifnet *ifp, struct thread *td)
693{
694	struct if_laddrreq *iflr = (struct if_laddrreq *)data;
695	struct ifaddr *ifa;
696
697	/* sanity checks */
698	if (data == NULL || ifp == NULL) {
699		panic("invalid argument to in_lifaddr_ioctl");
700		/*NOTRECHED*/
701	}
702
703	switch (cmd) {
704	case SIOCGLIFADDR:
705		/* address must be specified on GET with IFLR_PREFIX */
706		if ((iflr->flags & IFLR_PREFIX) == 0)
707			break;
708		/*FALLTHROUGH*/
709	case SIOCALIFADDR:
710	case SIOCDLIFADDR:
711		/* address must be specified on ADD and DELETE */
712		if (iflr->addr.ss_family != AF_INET)
713			return (EINVAL);
714		if (iflr->addr.ss_len != sizeof(struct sockaddr_in))
715			return (EINVAL);
716		/* XXX need improvement */
717		if (iflr->dstaddr.ss_family
718		 && iflr->dstaddr.ss_family != AF_INET)
719			return (EINVAL);
720		if (iflr->dstaddr.ss_family
721		 && iflr->dstaddr.ss_len != sizeof(struct sockaddr_in))
722			return (EINVAL);
723		break;
724	default: /*shouldn't happen*/
725		return (EOPNOTSUPP);
726	}
727	if (sizeof(struct in_addr) * 8 < iflr->prefixlen)
728		return (EINVAL);
729
730	switch (cmd) {
731	case SIOCALIFADDR:
732	    {
733		struct in_aliasreq ifra;
734
735		if (iflr->flags & IFLR_PREFIX)
736			return (EINVAL);
737
738		/* copy args to in_aliasreq, perform ioctl(SIOCAIFADDR). */
739		bzero(&ifra, sizeof(ifra));
740		bcopy(iflr->iflr_name, ifra.ifra_name,
741			sizeof(ifra.ifra_name));
742
743		bcopy(&iflr->addr, &ifra.ifra_addr, iflr->addr.ss_len);
744
745		if (iflr->dstaddr.ss_family) {	/*XXX*/
746			bcopy(&iflr->dstaddr, &ifra.ifra_dstaddr,
747				iflr->dstaddr.ss_len);
748		}
749
750		ifra.ifra_mask.sin_family = AF_INET;
751		ifra.ifra_mask.sin_len = sizeof(struct sockaddr_in);
752		in_len2mask(&ifra.ifra_mask.sin_addr, iflr->prefixlen);
753
754		return (in_control(so, SIOCAIFADDR, (caddr_t)&ifra, ifp, td));
755	    }
756	case SIOCGLIFADDR:
757	case SIOCDLIFADDR:
758	    {
759		struct in_ifaddr *ia;
760		struct in_addr mask, candidate, match;
761		struct sockaddr_in *sin;
762
763		bzero(&mask, sizeof(mask));
764		bzero(&match, sizeof(match));
765		if (iflr->flags & IFLR_PREFIX) {
766			/* lookup a prefix rather than address. */
767			in_len2mask(&mask, iflr->prefixlen);
768
769			sin = (struct sockaddr_in *)&iflr->addr;
770			match.s_addr = sin->sin_addr.s_addr;
771			match.s_addr &= mask.s_addr;
772
773			/* if you set extra bits, that's wrong */
774			if (match.s_addr != sin->sin_addr.s_addr)
775				return (EINVAL);
776
777		} else {
778			/* on getting an address, take the 1st match */
779			/* on deleting an address, do exact match */
780			if (cmd != SIOCGLIFADDR) {
781				in_len2mask(&mask, 32);
782				sin = (struct sockaddr_in *)&iflr->addr;
783				match.s_addr = sin->sin_addr.s_addr;
784			}
785		}
786
787		IF_ADDR_RLOCK(ifp);
788		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)	{
789			if (ifa->ifa_addr->sa_family != AF_INET)
790				continue;
791			if (match.s_addr == 0)
792				break;
793			sin = (struct sockaddr_in *)&ifa->ifa_addr;
794			candidate.s_addr = sin->sin_addr.s_addr;
795			candidate.s_addr &= mask.s_addr;
796			if (candidate.s_addr == match.s_addr)
797				break;
798		}
799		if (ifa != NULL)
800			ifa_ref(ifa);
801		IF_ADDR_RUNLOCK(ifp);
802		if (ifa == NULL)
803			return (EADDRNOTAVAIL);
804		ia = (struct in_ifaddr *)ifa;
805
806		if (cmd == SIOCGLIFADDR) {
807			/* fill in the if_laddrreq structure */
808			bcopy(&ia->ia_addr, &iflr->addr, ia->ia_addr.sin_len);
809
810			if ((ifp->if_flags & IFF_POINTOPOINT) != 0) {
811				bcopy(&ia->ia_dstaddr, &iflr->dstaddr,
812					ia->ia_dstaddr.sin_len);
813			} else
814				bzero(&iflr->dstaddr, sizeof(iflr->dstaddr));
815
816			iflr->prefixlen =
817				in_mask2len(&ia->ia_sockmask.sin_addr);
818
819			iflr->flags = 0;	/*XXX*/
820			ifa_free(ifa);
821
822			return (0);
823		} else {
824			struct in_aliasreq ifra;
825
826			/* fill in_aliasreq and do ioctl(SIOCDIFADDR) */
827			bzero(&ifra, sizeof(ifra));
828			bcopy(iflr->iflr_name, ifra.ifra_name,
829				sizeof(ifra.ifra_name));
830
831			bcopy(&ia->ia_addr, &ifra.ifra_addr,
832				ia->ia_addr.sin_len);
833			if ((ifp->if_flags & IFF_POINTOPOINT) != 0) {
834				bcopy(&ia->ia_dstaddr, &ifra.ifra_dstaddr,
835					ia->ia_dstaddr.sin_len);
836			}
837			bcopy(&ia->ia_sockmask, &ifra.ifra_dstaddr,
838				ia->ia_sockmask.sin_len);
839			ifa_free(ifa);
840
841			return (in_control(so, SIOCDIFADDR, (caddr_t)&ifra,
842			    ifp, td));
843		}
844	    }
845	}
846
847	return (EOPNOTSUPP);	/*just for safety*/
848}
849
850/*
851 * Delete any existing route for an interface.
852 */
853void
854in_ifscrub(struct ifnet *ifp, struct in_ifaddr *ia, u_int flags)
855{
856
857	in_scrubprefix(ia, flags);
858}
859
860/*
861 * Initialize an interface's internet address
862 * and routing table entry.
863 */
864static int
865in_ifinit(struct ifnet *ifp, struct in_ifaddr *ia, struct sockaddr_in *sin,
866    int scrub, int masksupplied, int vhid)
867{
868	register u_long i = ntohl(sin->sin_addr.s_addr);
869	int flags = RTF_UP, error = 0;
870
871	if (scrub)
872		in_scrubprefix(ia, LLE_STATIC);
873
874	IN_IFADDR_WLOCK();
875	if (ia->ia_addr.sin_family == AF_INET)
876		LIST_REMOVE(ia, ia_hash);
877	ia->ia_addr = *sin;
878	LIST_INSERT_HEAD(INADDR_HASH(ia->ia_addr.sin_addr.s_addr),
879	    ia, ia_hash);
880	IN_IFADDR_WUNLOCK();
881
882	if (vhid > 0) {
883		if (carp_attach_p != NULL)
884			error = (*carp_attach_p)(&ia->ia_ifa, vhid);
885		else
886			error = EPROTONOSUPPORT;
887	}
888	if (error)
889		return (error);
890
891	/*
892	 * Give the interface a chance to initialize
893	 * if this is its first address,
894	 * and to validate the address if necessary.
895	 */
896	if (ifp->if_ioctl != NULL &&
897	    (error = (*ifp->if_ioctl)(ifp, SIOCSIFADDR, (caddr_t)ia)) != 0)
898			/* LIST_REMOVE(ia, ia_hash) is done in in_control */
899			return (error);
900
901	/*
902	 * Be compatible with network classes, if netmask isn't supplied,
903	 * guess it based on classes.
904	 */
905	if (!masksupplied) {
906		if (IN_CLASSA(i))
907			ia->ia_subnetmask = IN_CLASSA_NET;
908		else if (IN_CLASSB(i))
909			ia->ia_subnetmask = IN_CLASSB_NET;
910		else
911			ia->ia_subnetmask = IN_CLASSC_NET;
912		ia->ia_sockmask.sin_addr.s_addr = htonl(ia->ia_subnetmask);
913	}
914	ia->ia_subnet = i & ia->ia_subnetmask;
915	in_socktrim(&ia->ia_sockmask);
916	/*
917	 * Add route for the network.
918	 */
919	ia->ia_ifa.ifa_metric = ifp->if_metric;
920	if (ifp->if_flags & IFF_BROADCAST) {
921		if (ia->ia_subnetmask == IN_RFC3021_MASK)
922			ia->ia_broadaddr.sin_addr.s_addr = INADDR_BROADCAST;
923		else
924			ia->ia_broadaddr.sin_addr.s_addr =
925			    htonl(ia->ia_subnet | ~ia->ia_subnetmask);
926	} else if (ifp->if_flags & IFF_LOOPBACK) {
927		ia->ia_dstaddr = ia->ia_addr;
928		flags |= RTF_HOST;
929	} else if (ifp->if_flags & IFF_POINTOPOINT) {
930		if (ia->ia_dstaddr.sin_family != AF_INET)
931			return (0);
932		flags |= RTF_HOST;
933	}
934	if (!vhid && (error = in_addprefix(ia, flags)) != 0)
935		return (error);
936
937	if (ia->ia_addr.sin_addr.s_addr == INADDR_ANY)
938		return (0);
939
940	if (ifp->if_flags & IFF_POINTOPOINT &&
941	    ia->ia_dstaddr.sin_addr.s_addr == ia->ia_addr.sin_addr.s_addr)
942			return (0);
943
944	/*
945	 * add a loopback route to self
946	 */
947	if (V_useloopback && !vhid && !(ifp->if_flags & IFF_LOOPBACK)) {
948		struct route ia_ro;
949
950		bzero(&ia_ro, sizeof(ia_ro));
951		*((struct sockaddr_in *)(&ia_ro.ro_dst)) = ia->ia_addr;
952		rtalloc_ign_fib(&ia_ro, 0, 0);
953		if ((ia_ro.ro_rt != NULL) && (ia_ro.ro_rt->rt_ifp != NULL) &&
954		    (ia_ro.ro_rt->rt_ifp == V_loif)) {
955			RT_LOCK(ia_ro.ro_rt);
956			RT_ADDREF(ia_ro.ro_rt);
957			RTFREE_LOCKED(ia_ro.ro_rt);
958		} else
959			error = ifa_add_loopback_route((struct ifaddr *)ia,
960				       (struct sockaddr *)&ia->ia_addr);
961		if (error == 0)
962			ia->ia_flags |= IFA_RTSELF;
963		if (ia_ro.ro_rt != NULL)
964			RTFREE(ia_ro.ro_rt);
965	}
966
967	return (error);
968}
969
970#define rtinitflags(x) \
971	((((x)->ia_ifp->if_flags & (IFF_LOOPBACK | IFF_POINTOPOINT)) != 0) \
972	    ? RTF_HOST : 0)
973
974/*
975 * Generate a routing message when inserting or deleting
976 * an interface address alias.
977 */
978static void in_addralias_rtmsg(int cmd, struct in_addr *prefix,
979    struct in_ifaddr *target)
980{
981	struct route pfx_ro;
982	struct sockaddr_in *pfx_addr;
983	struct rtentry msg_rt;
984
985	/* QL: XXX
986	 * This is a bit questionable because there is no
987	 * additional route entry added/deleted for an address
988	 * alias. Therefore this route report is inaccurate.
989	 */
990	bzero(&pfx_ro, sizeof(pfx_ro));
991	pfx_addr = (struct sockaddr_in *)(&pfx_ro.ro_dst);
992	pfx_addr->sin_len = sizeof(*pfx_addr);
993	pfx_addr->sin_family = AF_INET;
994	pfx_addr->sin_addr = *prefix;
995	rtalloc_ign_fib(&pfx_ro, 0, 0);
996	if (pfx_ro.ro_rt != NULL) {
997		msg_rt = *pfx_ro.ro_rt;
998
999		/* QL: XXX
1000		 * Point the gateway to the new interface
1001		 * address as if a new prefix route entry has
1002		 * been added through the new address alias.
1003		 * All other parts of the rtentry is accurate,
1004		 * e.g., rt_key, rt_mask, rt_ifp etc.
1005		 */
1006		msg_rt.rt_gateway =
1007			(struct sockaddr *)&target->ia_addr;
1008		rt_newaddrmsg(cmd,
1009			      (struct ifaddr *)target,
1010			      0, &msg_rt);
1011		RTFREE(pfx_ro.ro_rt);
1012	}
1013	return;
1014}
1015
1016/*
1017 * Check if we have a route for the given prefix already or add one accordingly.
1018 */
1019int
1020in_addprefix(struct in_ifaddr *target, int flags)
1021{
1022	struct in_ifaddr *ia;
1023	struct in_addr prefix, mask, p, m;
1024	int error;
1025
1026	if ((flags & RTF_HOST) != 0) {
1027		prefix = target->ia_dstaddr.sin_addr;
1028		mask.s_addr = 0;
1029	} else {
1030		prefix = target->ia_addr.sin_addr;
1031		mask = target->ia_sockmask.sin_addr;
1032		prefix.s_addr &= mask.s_addr;
1033	}
1034
1035	IN_IFADDR_RLOCK();
1036	TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
1037		if (rtinitflags(ia)) {
1038			p = ia->ia_dstaddr.sin_addr;
1039
1040			if (prefix.s_addr != p.s_addr)
1041				continue;
1042		} else {
1043			p = ia->ia_addr.sin_addr;
1044			m = ia->ia_sockmask.sin_addr;
1045			p.s_addr &= m.s_addr;
1046
1047			if (prefix.s_addr != p.s_addr ||
1048			    mask.s_addr != m.s_addr)
1049				continue;
1050		}
1051
1052		/*
1053		 * If we got a matching prefix route inserted by other
1054		 * interface address, we are done here.
1055		 */
1056		if (ia->ia_flags & IFA_ROUTE) {
1057#ifdef RADIX_MPATH
1058			if (ia->ia_addr.sin_addr.s_addr ==
1059			    target->ia_addr.sin_addr.s_addr) {
1060				IN_IFADDR_RUNLOCK();
1061				return (EEXIST);
1062			} else
1063				break;
1064#endif
1065			if (V_nosameprefix) {
1066				IN_IFADDR_RUNLOCK();
1067				return (EEXIST);
1068			} else {
1069				in_addralias_rtmsg(RTM_ADD, &prefix, target);
1070				IN_IFADDR_RUNLOCK();
1071				return (0);
1072			}
1073		}
1074	}
1075	IN_IFADDR_RUNLOCK();
1076
1077	/*
1078	 * No-one seem to have this prefix route, so we try to insert it.
1079	 */
1080	error = rtinit(&target->ia_ifa, (int)RTM_ADD, flags);
1081	if (!error)
1082		target->ia_flags |= IFA_ROUTE;
1083	return (error);
1084}
1085
1086/*
1087 * If there is no other address in the system that can serve a route to the
1088 * same prefix, remove the route.  Hand over the route to the new address
1089 * otherwise.
1090 */
1091int
1092in_scrubprefix(struct in_ifaddr *target, u_int flags)
1093{
1094	struct in_ifaddr *ia;
1095	struct in_addr prefix, mask, p, m;
1096	int error = 0;
1097	struct sockaddr_in prefix0, mask0;
1098
1099	/*
1100	 * Remove the loopback route to the interface address.
1101	 * The "useloopback" setting is not consulted because if the
1102	 * user configures an interface address, turns off this
1103	 * setting, and then tries to delete that interface address,
1104	 * checking the current setting of "useloopback" would leave
1105	 * that interface address loopback route untouched, which
1106	 * would be wrong. Therefore the interface address loopback route
1107	 * deletion is unconditional.
1108	 */
1109	if ((target->ia_addr.sin_addr.s_addr != INADDR_ANY) &&
1110	    !(target->ia_ifp->if_flags & IFF_LOOPBACK) &&
1111	    (target->ia_flags & IFA_RTSELF)) {
1112		struct route ia_ro;
1113		int freeit = 0;
1114
1115		bzero(&ia_ro, sizeof(ia_ro));
1116		*((struct sockaddr_in *)(&ia_ro.ro_dst)) = target->ia_addr;
1117		rtalloc_ign_fib(&ia_ro, 0, 0);
1118		if ((ia_ro.ro_rt != NULL) && (ia_ro.ro_rt->rt_ifp != NULL) &&
1119		    (ia_ro.ro_rt->rt_ifp == V_loif)) {
1120			RT_LOCK(ia_ro.ro_rt);
1121			if (ia_ro.ro_rt->rt_refcnt <= 1)
1122				freeit = 1;
1123			else if (flags & LLE_STATIC) {
1124				RT_REMREF(ia_ro.ro_rt);
1125				target->ia_flags &= ~IFA_RTSELF;
1126			}
1127			RTFREE_LOCKED(ia_ro.ro_rt);
1128		}
1129		if (freeit && (flags & LLE_STATIC)) {
1130			error = ifa_del_loopback_route((struct ifaddr *)target,
1131				       (struct sockaddr *)&target->ia_addr);
1132			if (error == 0)
1133				target->ia_flags &= ~IFA_RTSELF;
1134		}
1135		if ((flags & LLE_STATIC) &&
1136			!(target->ia_ifp->if_flags & IFF_NOARP))
1137			/* remove arp cache */
1138			arp_ifscrub(target->ia_ifp, IA_SIN(target)->sin_addr.s_addr);
1139	}
1140
1141	if (rtinitflags(target)) {
1142		prefix = target->ia_dstaddr.sin_addr;
1143		mask.s_addr = 0;
1144	} else {
1145		prefix = target->ia_addr.sin_addr;
1146		mask = target->ia_sockmask.sin_addr;
1147		prefix.s_addr &= mask.s_addr;
1148	}
1149
1150	if ((target->ia_flags & IFA_ROUTE) == 0) {
1151		in_addralias_rtmsg(RTM_DELETE, &prefix, target);
1152		return (0);
1153	}
1154
1155	IN_IFADDR_RLOCK();
1156	TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
1157		if (rtinitflags(ia)) {
1158			p = ia->ia_dstaddr.sin_addr;
1159
1160			if (prefix.s_addr != p.s_addr)
1161				continue;
1162		} else {
1163			p = ia->ia_addr.sin_addr;
1164			m = ia->ia_sockmask.sin_addr;
1165			p.s_addr &= m.s_addr;
1166
1167			if (prefix.s_addr != p.s_addr ||
1168			    mask.s_addr != m.s_addr)
1169				continue;
1170		}
1171
1172		if ((ia->ia_ifp->if_flags & IFF_UP) == 0)
1173			continue;
1174
1175		/*
1176		 * If we got a matching prefix address, move IFA_ROUTE and
1177		 * the route itself to it.  Make sure that routing daemons
1178		 * get a heads-up.
1179		 */
1180		if ((ia->ia_flags & IFA_ROUTE) == 0) {
1181			ifa_ref(&ia->ia_ifa);
1182			IN_IFADDR_RUNLOCK();
1183			error = rtinit(&(target->ia_ifa), (int)RTM_DELETE,
1184			    rtinitflags(target));
1185			if (error == 0)
1186				target->ia_flags &= ~IFA_ROUTE;
1187			else
1188				log(LOG_INFO, "in_scrubprefix: err=%d, old prefix delete failed\n",
1189					error);
1190			error = rtinit(&ia->ia_ifa, (int)RTM_ADD,
1191			    rtinitflags(ia) | RTF_UP);
1192			if (error == 0)
1193				ia->ia_flags |= IFA_ROUTE;
1194			else
1195				log(LOG_INFO, "in_scrubprefix: err=%d, new prefix add failed\n",
1196					error);
1197			ifa_free(&ia->ia_ifa);
1198			return (error);
1199		}
1200	}
1201	IN_IFADDR_RUNLOCK();
1202
1203	/*
1204	 * remove all L2 entries on the given prefix
1205	 */
1206	bzero(&prefix0, sizeof(prefix0));
1207	prefix0.sin_len = sizeof(prefix0);
1208	prefix0.sin_family = AF_INET;
1209	prefix0.sin_addr.s_addr = target->ia_subnet;
1210	bzero(&mask0, sizeof(mask0));
1211	mask0.sin_len = sizeof(mask0);
1212	mask0.sin_family = AF_INET;
1213	mask0.sin_addr.s_addr = target->ia_subnetmask;
1214	lltable_prefix_free(AF_INET, (struct sockaddr *)&prefix0,
1215			    (struct sockaddr *)&mask0, flags);
1216
1217	/*
1218	 * As no-one seem to have this prefix, we can remove the route.
1219	 */
1220	error = rtinit(&(target->ia_ifa), (int)RTM_DELETE, rtinitflags(target));
1221	if (error == 0)
1222		target->ia_flags &= ~IFA_ROUTE;
1223	else
1224		log(LOG_INFO, "in_scrubprefix: err=%d, prefix delete failed\n", error);
1225	return (error);
1226}
1227
1228#undef rtinitflags
1229
1230/*
1231 * Return 1 if the address might be a local broadcast address.
1232 */
1233int
1234in_broadcast(struct in_addr in, struct ifnet *ifp)
1235{
1236	register struct ifaddr *ifa;
1237	u_long t;
1238
1239	if (in.s_addr == INADDR_BROADCAST ||
1240	    in.s_addr == INADDR_ANY)
1241		return (1);
1242	if ((ifp->if_flags & IFF_BROADCAST) == 0)
1243		return (0);
1244	t = ntohl(in.s_addr);
1245	/*
1246	 * Look through the list of addresses for a match
1247	 * with a broadcast address.
1248	 */
1249#define ia ((struct in_ifaddr *)ifa)
1250	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
1251		if (ifa->ifa_addr->sa_family == AF_INET &&
1252		    (in.s_addr == ia->ia_broadaddr.sin_addr.s_addr ||
1253		     /*
1254		      * Check for old-style (host 0) broadcast, but
1255		      * taking into account that RFC 3021 obsoletes it.
1256		      */
1257		     (ia->ia_subnetmask != IN_RFC3021_MASK &&
1258		     t == ia->ia_subnet)) &&
1259		     /*
1260		      * Check for an all one subnetmask. These
1261		      * only exist when an interface gets a secondary
1262		      * address.
1263		      */
1264		     ia->ia_subnetmask != (u_long)0xffffffff)
1265			    return (1);
1266	return (0);
1267#undef ia
1268}
1269
1270/*
1271 * On interface removal, clean up IPv4 data structures hung off of the ifnet.
1272 */
1273void
1274in_ifdetach(struct ifnet *ifp)
1275{
1276
1277	in_pcbpurgeif0(&V_ripcbinfo, ifp);
1278	in_pcbpurgeif0(&V_udbinfo, ifp);
1279	in_purgemaddrs(ifp);
1280}
1281
1282/*
1283 * Delete all IPv4 multicast address records, and associated link-layer
1284 * multicast address records, associated with ifp.
1285 * XXX It looks like domifdetach runs AFTER the link layer cleanup.
1286 * XXX This should not race with ifma_protospec being set during
1287 * a new allocation, if it does, we have bigger problems.
1288 */
1289static void
1290in_purgemaddrs(struct ifnet *ifp)
1291{
1292	LIST_HEAD(,in_multi) purgeinms;
1293	struct in_multi		*inm, *tinm;
1294	struct ifmultiaddr	*ifma;
1295
1296	LIST_INIT(&purgeinms);
1297	IN_MULTI_LOCK();
1298
1299	/*
1300	 * Extract list of in_multi associated with the detaching ifp
1301	 * which the PF_INET layer is about to release.
1302	 * We need to do this as IF_ADDR_LOCK() may be re-acquired
1303	 * by code further down.
1304	 */
1305	IF_ADDR_RLOCK(ifp);
1306	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1307		if (ifma->ifma_addr->sa_family != AF_INET ||
1308		    ifma->ifma_protospec == NULL)
1309			continue;
1310#if 0
1311		KASSERT(ifma->ifma_protospec != NULL,
1312		    ("%s: ifma_protospec is NULL", __func__));
1313#endif
1314		inm = (struct in_multi *)ifma->ifma_protospec;
1315		LIST_INSERT_HEAD(&purgeinms, inm, inm_link);
1316	}
1317	IF_ADDR_RUNLOCK(ifp);
1318
1319	LIST_FOREACH_SAFE(inm, &purgeinms, inm_link, tinm) {
1320		LIST_REMOVE(inm, inm_link);
1321		inm_release_locked(inm);
1322	}
1323	igmp_ifdetach(ifp);
1324
1325	IN_MULTI_UNLOCK();
1326}
1327
1328struct in_llentry {
1329	struct llentry		base;
1330	struct sockaddr_in	l3_addr4;
1331};
1332
1333static struct llentry *
1334in_lltable_new(const struct sockaddr *l3addr, u_int flags)
1335{
1336	struct in_llentry *lle;
1337
1338	lle = malloc(sizeof(struct in_llentry), M_LLTABLE, M_DONTWAIT | M_ZERO);
1339	if (lle == NULL)		/* NB: caller generates msg */
1340		return NULL;
1341
1342	callout_init(&lle->base.la_timer, CALLOUT_MPSAFE);
1343	/*
1344	 * For IPv4 this will trigger "arpresolve" to generate
1345	 * an ARP request.
1346	 */
1347	lle->base.la_expire = time_uptime; /* mark expired */
1348	lle->l3_addr4 = *(const struct sockaddr_in *)l3addr;
1349	lle->base.lle_refcnt = 1;
1350	LLE_LOCK_INIT(&lle->base);
1351	return &lle->base;
1352}
1353
1354/*
1355 * Deletes an address from the address table.
1356 * This function is called by the timer functions
1357 * such as arptimer() and nd6_llinfo_timer(), and
1358 * the caller does the locking.
1359 */
1360static void
1361in_lltable_free(struct lltable *llt, struct llentry *lle)
1362{
1363	LLE_WUNLOCK(lle);
1364	LLE_LOCK_DESTROY(lle);
1365	free(lle, M_LLTABLE);
1366}
1367
1368
1369#define IN_ARE_MASKED_ADDR_EQUAL(d, a, m)	(			\
1370	    (((ntohl((d)->sin_addr.s_addr) ^ (a)->sin_addr.s_addr) & (m)->sin_addr.s_addr)) == 0 )
1371
1372static void
1373in_lltable_prefix_free(struct lltable *llt,
1374		       const struct sockaddr *prefix,
1375		       const struct sockaddr *mask,
1376		       u_int flags)
1377{
1378	const struct sockaddr_in *pfx = (const struct sockaddr_in *)prefix;
1379	const struct sockaddr_in *msk = (const struct sockaddr_in *)mask;
1380	struct llentry *lle, *next;
1381	register int i;
1382	size_t pkts_dropped;
1383
1384	for (i=0; i < LLTBL_HASHTBL_SIZE; i++) {
1385		LIST_FOREACH_SAFE(lle, &llt->lle_head[i], lle_next, next) {
1386
1387		        /*
1388			 * (flags & LLE_STATIC) means deleting all entries
1389			 * including static ARP entries
1390			 */
1391			if (IN_ARE_MASKED_ADDR_EQUAL((struct sockaddr_in *)L3_ADDR(lle),
1392						     pfx, msk) &&
1393			    ((flags & LLE_STATIC) || !(lle->la_flags & LLE_STATIC))) {
1394				int canceled;
1395
1396				canceled = callout_drain(&lle->la_timer);
1397				LLE_WLOCK(lle);
1398				if (canceled)
1399					LLE_REMREF(lle);
1400				pkts_dropped = llentry_free(lle);
1401				ARPSTAT_ADD(dropped, pkts_dropped);
1402			}
1403		}
1404	}
1405}
1406
1407
1408static int
1409in_lltable_rtcheck(struct ifnet *ifp, u_int flags, const struct sockaddr *l3addr)
1410{
1411	struct rtentry *rt;
1412
1413	KASSERT(l3addr->sa_family == AF_INET,
1414	    ("sin_family %d", l3addr->sa_family));
1415
1416	/* XXX rtalloc1 should take a const param */
1417	rt = rtalloc1(__DECONST(struct sockaddr *, l3addr), 0, 0);
1418
1419	if (rt == NULL)
1420		return (EINVAL);
1421
1422	/*
1423	 * If the gateway for an existing host route matches the target L3
1424	 * address, which is a special route inserted by some implementation
1425	 * such as MANET, and the interface is of the correct type, then
1426	 * allow for ARP to proceed.
1427	 */
1428	if (rt->rt_flags & RTF_GATEWAY) {
1429		if (!(rt->rt_flags & RTF_HOST) || !rt->rt_ifp ||
1430			rt->rt_ifp->if_type != IFT_ETHER ||
1431			  (rt->rt_ifp->if_flags &
1432			   (IFF_NOARP | IFF_STATICARP)) != 0 ||
1433			  memcmp(rt->rt_gateway->sa_data, l3addr->sa_data,
1434				 sizeof(in_addr_t)) != 0) {
1435			RTFREE_LOCKED(rt);
1436			return (EINVAL);
1437		}
1438	}
1439
1440	/*
1441	 * Make sure that at least the destination address is covered
1442	 * by the route. This is for handling the case where 2 or more
1443	 * interfaces have the same prefix. An incoming packet arrives
1444	 * on one interface and the corresponding outgoing packet leaves
1445	 * another interface.
1446	 */
1447	if (!(rt->rt_flags & RTF_HOST) && rt->rt_ifp != ifp) {
1448		const char *sa, *mask, *addr, *lim;
1449		int len;
1450
1451		mask = (const char *)rt_mask(rt);
1452		/*
1453		 * Just being extra cautious to avoid some custom
1454		 * code getting into trouble.
1455		 */
1456		if (mask == NULL) {
1457			RTFREE_LOCKED(rt);
1458			return (EINVAL);
1459		}
1460
1461		sa = (const char *)rt_key(rt);
1462		addr = (const char *)l3addr;
1463		len = ((const struct sockaddr_in *)l3addr)->sin_len;
1464		lim = addr + len;
1465
1466		for ( ; addr < lim; sa++, mask++, addr++) {
1467			if ((*sa ^ *addr) & *mask) {
1468#ifdef DIAGNOSTIC
1469				log(LOG_INFO, "IPv4 address: \"%s\" is not on the network\n",
1470				    inet_ntoa(((const struct sockaddr_in *)l3addr)->sin_addr));
1471#endif
1472				RTFREE_LOCKED(rt);
1473				return (EINVAL);
1474			}
1475		}
1476	}
1477
1478	RTFREE_LOCKED(rt);
1479	return (0);
1480}
1481
1482/*
1483 * Return NULL if not found or marked for deletion.
1484 * If found return lle read locked.
1485 */
1486static struct llentry *
1487in_lltable_lookup(struct lltable *llt, u_int flags, const struct sockaddr *l3addr)
1488{
1489	const struct sockaddr_in *sin = (const struct sockaddr_in *)l3addr;
1490	struct ifnet *ifp = llt->llt_ifp;
1491	struct llentry *lle;
1492	struct llentries *lleh;
1493	u_int hashkey;
1494
1495	IF_AFDATA_LOCK_ASSERT(ifp);
1496	KASSERT(l3addr->sa_family == AF_INET,
1497	    ("sin_family %d", l3addr->sa_family));
1498
1499	hashkey = sin->sin_addr.s_addr;
1500	lleh = &llt->lle_head[LLATBL_HASH(hashkey, LLTBL_HASHMASK)];
1501	LIST_FOREACH(lle, lleh, lle_next) {
1502		struct sockaddr_in *sa2 = (struct sockaddr_in *)L3_ADDR(lle);
1503		if (lle->la_flags & LLE_DELETED)
1504			continue;
1505		if (sa2->sin_addr.s_addr == sin->sin_addr.s_addr)
1506			break;
1507	}
1508	if (lle == NULL) {
1509#ifdef DIAGNOSTIC
1510		if (flags & LLE_DELETE)
1511			log(LOG_INFO, "interface address is missing from cache = %p  in delete\n", lle);
1512#endif
1513		if (!(flags & LLE_CREATE))
1514			return (NULL);
1515		/*
1516		 * A route that covers the given address must have
1517		 * been installed 1st because we are doing a resolution,
1518		 * verify this.
1519		 */
1520		if (!(flags & LLE_IFADDR) &&
1521		    in_lltable_rtcheck(ifp, flags, l3addr) != 0)
1522			goto done;
1523
1524		lle = in_lltable_new(l3addr, flags);
1525		if (lle == NULL) {
1526			log(LOG_INFO, "lla_lookup: new lle malloc failed\n");
1527			goto done;
1528		}
1529		lle->la_flags = flags & ~LLE_CREATE;
1530		if ((flags & (LLE_CREATE | LLE_IFADDR)) == (LLE_CREATE | LLE_IFADDR)) {
1531			bcopy(IF_LLADDR(ifp), &lle->ll_addr, ifp->if_addrlen);
1532			lle->la_flags |= (LLE_VALID | LLE_STATIC);
1533		}
1534
1535		lle->lle_tbl  = llt;
1536		lle->lle_head = lleh;
1537		LIST_INSERT_HEAD(lleh, lle, lle_next);
1538	} else if (flags & LLE_DELETE) {
1539		if (!(lle->la_flags & LLE_IFADDR) || (flags & LLE_IFADDR)) {
1540			LLE_WLOCK(lle);
1541			lle->la_flags = LLE_DELETED;
1542			EVENTHANDLER_INVOKE(arp_update_event, lle);
1543			LLE_WUNLOCK(lle);
1544#ifdef DIAGNOSTIC
1545			log(LOG_INFO, "ifaddr cache = %p  is deleted\n", lle);
1546#endif
1547		}
1548		lle = (void *)-1;
1549
1550	}
1551	if (LLE_IS_VALID(lle)) {
1552		if (flags & LLE_EXCLUSIVE)
1553			LLE_WLOCK(lle);
1554		else
1555			LLE_RLOCK(lle);
1556	}
1557done:
1558	return (lle);
1559}
1560
1561static int
1562in_lltable_dump(struct lltable *llt, struct sysctl_req *wr)
1563{
1564#define	SIN(lle)	((struct sockaddr_in *) L3_ADDR(lle))
1565	struct ifnet *ifp = llt->llt_ifp;
1566	struct llentry *lle;
1567	/* XXX stack use */
1568	struct {
1569		struct rt_msghdr	rtm;
1570		struct sockaddr_inarp	sin;
1571		struct sockaddr_dl	sdl;
1572	} arpc;
1573	int error, i;
1574
1575	LLTABLE_LOCK_ASSERT();
1576
1577	error = 0;
1578	for (i = 0; i < LLTBL_HASHTBL_SIZE; i++) {
1579		LIST_FOREACH(lle, &llt->lle_head[i], lle_next) {
1580			struct sockaddr_dl *sdl;
1581
1582			/* skip deleted entries */
1583			if ((lle->la_flags & LLE_DELETED) == LLE_DELETED)
1584				continue;
1585			/* Skip if jailed and not a valid IP of the prison. */
1586			if (prison_if(wr->td->td_ucred, L3_ADDR(lle)) != 0)
1587				continue;
1588			/*
1589			 * produce a msg made of:
1590			 *  struct rt_msghdr;
1591			 *  struct sockaddr_inarp; (IPv4)
1592			 *  struct sockaddr_dl;
1593			 */
1594			bzero(&arpc, sizeof(arpc));
1595			arpc.rtm.rtm_msglen = sizeof(arpc);
1596			arpc.rtm.rtm_version = RTM_VERSION;
1597			arpc.rtm.rtm_type = RTM_GET;
1598			arpc.rtm.rtm_flags = RTF_UP;
1599			arpc.rtm.rtm_addrs = RTA_DST | RTA_GATEWAY;
1600			arpc.sin.sin_family = AF_INET;
1601			arpc.sin.sin_len = sizeof(arpc.sin);
1602			arpc.sin.sin_addr.s_addr = SIN(lle)->sin_addr.s_addr;
1603
1604			/* publish */
1605			if (lle->la_flags & LLE_PUB) {
1606				arpc.rtm.rtm_flags |= RTF_ANNOUNCE;
1607				/* proxy only */
1608				if (lle->la_flags & LLE_PROXY)
1609					arpc.sin.sin_other = SIN_PROXY;
1610			}
1611
1612			sdl = &arpc.sdl;
1613			sdl->sdl_family = AF_LINK;
1614			sdl->sdl_len = sizeof(*sdl);
1615			sdl->sdl_index = ifp->if_index;
1616			sdl->sdl_type = ifp->if_type;
1617			if ((lle->la_flags & LLE_VALID) == LLE_VALID) {
1618				sdl->sdl_alen = ifp->if_addrlen;
1619				bcopy(&lle->ll_addr, LLADDR(sdl), ifp->if_addrlen);
1620			} else {
1621				sdl->sdl_alen = 0;
1622				bzero(LLADDR(sdl), ifp->if_addrlen);
1623			}
1624
1625			arpc.rtm.rtm_rmx.rmx_expire =
1626			    lle->la_flags & LLE_STATIC ? 0 : lle->la_expire;
1627			arpc.rtm.rtm_flags |= (RTF_HOST | RTF_LLDATA);
1628			if (lle->la_flags & LLE_STATIC)
1629				arpc.rtm.rtm_flags |= RTF_STATIC;
1630			arpc.rtm.rtm_index = ifp->if_index;
1631			error = SYSCTL_OUT(wr, &arpc, sizeof(arpc));
1632			if (error)
1633				break;
1634		}
1635	}
1636	return error;
1637#undef SIN
1638}
1639
1640void *
1641in_domifattach(struct ifnet *ifp)
1642{
1643	struct in_ifinfo *ii;
1644	struct lltable *llt;
1645
1646	ii = malloc(sizeof(struct in_ifinfo), M_IFADDR, M_WAITOK|M_ZERO);
1647
1648	llt = lltable_init(ifp, AF_INET);
1649	if (llt != NULL) {
1650		llt->llt_free = in_lltable_free;
1651		llt->llt_prefix_free = in_lltable_prefix_free;
1652		llt->llt_lookup = in_lltable_lookup;
1653		llt->llt_dump = in_lltable_dump;
1654	}
1655	ii->ii_llt = llt;
1656
1657	ii->ii_igmp = igmp_domifattach(ifp);
1658
1659	return ii;
1660}
1661
1662void
1663in_domifdetach(struct ifnet *ifp, void *aux)
1664{
1665	struct in_ifinfo *ii = (struct in_ifinfo *)aux;
1666
1667	igmp_domifdetach(ifp);
1668	lltable_free(ii->ii_llt);
1669	free(ii, M_IFADDR);
1670}
1671