in.c revision 226713
1163516Simp/*-
2163516Simp * Copyright (c) 1982, 1986, 1991, 1993
3163516Simp *	The Regents of the University of California.  All rights reserved.
4163516Simp * Copyright (C) 2001 WIDE Project.  All rights reserved.
5163516Simp *
6163516Simp * Redistribution and use in source and binary forms, with or without
7163516Simp * modification, are permitted provided that the following conditions
8163516Simp * are met:
9163516Simp * 1. Redistributions of source code must retain the above copyright
10163516Simp *    notice, this list of conditions and the following disclaimer.
11163516Simp * 2. Redistributions in binary form must reproduce the above copyright
12163516Simp *    notice, this list of conditions and the following disclaimer in the
13163516Simp *    documentation and/or other materials provided with the distribution.
14163516Simp * 4. Neither the name of the University nor the names of its contributors
15163516Simp *    may be used to endorse or promote products derived from this software
16163516Simp *    without specific prior written permission.
17163516Simp *
18163516Simp * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
19163516Simp * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20163516Simp * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21163516Simp * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
22163516Simp * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23163516Simp * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24170002Simp * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25170002Simp * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26170002Simp * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27170002Simp * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28170002Simp * SUCH DAMAGE.
29170002Simp *
30170002Simp *	@(#)in.c	8.4 (Berkeley) 1/9/95
31170002Simp */
32170002Simp
33170002Simp#include <sys/cdefs.h>
34170002Simp__FBSDID("$FreeBSD: head/sys/netinet/in.c 226713 2011-10-25 04:06:29Z qingli $");
35170002Simp
36170002Simp#include "opt_mpath.h"
37170002Simp
38170002Simp#include <sys/param.h>
39170002Simp#include <sys/systm.h>
40170002Simp#include <sys/sockio.h>
41170002Simp#include <sys/malloc.h>
42170002Simp#include <sys/priv.h>
43170002Simp#include <sys/socket.h>
44170002Simp#include <sys/jail.h>
45170002Simp#include <sys/kernel.h>
46170002Simp#include <sys/proc.h>
47170002Simp#include <sys/sysctl.h>
48170002Simp#include <sys/syslog.h>
49170002Simp
50170002Simp#include <net/if.h>
51163516Simp#include <net/if_var.h>
52163516Simp#include <net/if_arp.h>
53163516Simp#include <net/if_dl.h>
54163516Simp#include <net/if_llatbl.h>
55163516Simp#include <net/if_types.h>
56163516Simp#include <net/route.h>
57163516Simp#include <net/vnet.h>
58163516Simp
59163516Simp#include <netinet/in.h>
60163516Simp#include <netinet/in_var.h>
61163516Simp#include <netinet/in_pcb.h>
62163516Simp#include <netinet/ip_var.h>
63163516Simp#include <netinet/igmp_var.h>
64163516Simp#include <netinet/udp.h>
65163516Simp#include <netinet/udp_var.h>
66163516Simp
67163516Simpstatic int in_mask2len(struct in_addr *);
68163516Simpstatic void in_len2mask(struct in_addr *, int);
69163516Simpstatic int in_lifaddr_ioctl(struct socket *, u_long, caddr_t,
70163516Simp	struct ifnet *, struct thread *);
71163516Simp
72163516Simpstatic int	in_addprefix(struct in_ifaddr *, int);
73163516Simpstatic int	in_scrubprefix(struct in_ifaddr *, u_int);
74163516Simpstatic void	in_socktrim(struct sockaddr_in *);
75163516Simpstatic int	in_ifinit(struct ifnet *,
76163516Simp	    struct in_ifaddr *, struct sockaddr_in *, int);
77163516Simpstatic void	in_purgemaddrs(struct ifnet *);
78163516Simp
79163516Simpstatic VNET_DEFINE(int, sameprefixcarponly);
80169567Simp#define	V_sameprefixcarponly		VNET(sameprefixcarponly)
81163516SimpSYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, same_prefix_carp_only, CTLFLAG_RW,
82163516Simp	&VNET_NAME(sameprefixcarponly), 0,
83183480Simp	"Refuse to create same prefixes on different interfaces");
84163516Simp
85163516SimpVNET_DECLARE(struct inpcbinfo, ripcbinfo);
86163516Simp#define	V_ripcbinfo			VNET(ripcbinfo)
87163516Simp
88163516SimpVNET_DECLARE(struct arpstat, arpstat);  /* ARP statistics, see if_arp.h */
89163516Simp#define	V_arpstat		VNET(arpstat)
90163516Simp
91163516Simp/*
92163516Simp * Return 1 if an internet address is for a ``local'' host
93163516Simp * (one to which we have a connection).
94163516Simp */
95163516Simpint
96183774Simpin_localaddr(struct in_addr in)
97183774Simp{
98183774Simp	register u_long i = ntohl(in.s_addr);
99163516Simp	register struct in_ifaddr *ia;
100163516Simp
101163516Simp	IN_IFADDR_RLOCK();
102163516Simp	TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
103163516Simp		if ((i & ia->ia_subnetmask) == ia->ia_subnet) {
104163516Simp			IN_IFADDR_RUNLOCK();
105163516Simp			return (1);
106163516Simp		}
107163516Simp	}
108163516Simp	IN_IFADDR_RUNLOCK();
109163516Simp	return (0);
110163516Simp}
111163516Simp
112183704Smav/*
113183480Simp * Return 1 if an internet address is for the local host and configured
114163516Simp * on one of its interfaces.
115163516Simp */
116163516Simpint
117163516Simpin_localip(struct in_addr in)
118163516Simp{
119163516Simp	struct in_ifaddr *ia;
120163516Simp
121183774Simp	IN_IFADDR_RLOCK();
122183774Simp	LIST_FOREACH(ia, INADDR_HASH(in.s_addr), ia_hash) {
123183774Simp		if (IA_SIN(ia)->sin_addr.s_addr == in.s_addr) {
124163516Simp			IN_IFADDR_RUNLOCK();
125163516Simp			return (1);
126163516Simp		}
127163516Simp	}
128163516Simp	IN_IFADDR_RUNLOCK();
129183774Simp	return (0);
130183774Simp}
131183774Simp
132183774Simp/*
133183774Simp * Determine whether an IP address is in a reserved set of addresses
134183774Simp * that may not be forwarded, or whether datagrams to that destination
135183774Simp * may be forwarded.
136184033Smav */
137183774Simpint
138183774Simpin_canforward(struct in_addr in)
139183774Simp{
140184033Smav	register u_long i = ntohl(in.s_addr);
141183774Simp	register u_long net;
142183774Simp
143183774Simp	if (IN_EXPERIMENTAL(i) || IN_MULTICAST(i) || IN_LINKLOCAL(i))
144183774Simp		return (0);
145183774Simp	if (IN_CLASSA(i)) {
146183774Simp		net = i & IN_CLASSA_NET;
147183774Simp		if (net == 0 || net == (IN_LOOPBACKNET << IN_CLASSA_NSHIFT))
148183774Simp			return (0);
149183774Simp	}
150183774Simp	return (1);
151183774Simp}
152183805Smav
153183774Simp/*
154183774Simp * Trim a mask in a sockaddr
155183774Simp */
156183774Simpstatic void
157183774Simpin_socktrim(struct sockaddr_in *ap)
158183774Simp{
159183774Simp    register char *cplim = (char *) &ap->sin_addr;
160183774Simp    register char *cp = (char *) (&ap->sin_addr + 1);
161183774Simp
162163516Simp    ap->sin_len = 0;
163169567Simp    while (--cp >= cplim)
164169567Simp	if (*cp) {
165172836Sjulian	    (ap)->sin_len = cp - (char *) (ap) + 1;
166163516Simp	    break;
167163516Simp	}
168163516Simp}
169163516Simp
170163516Simpstatic int
171163516Simpin_mask2len(mask)
172163516Simp	struct in_addr *mask;
173169567Simp{
174169567Simp	int x, y;
175169567Simp	u_char *p;
176169567Simp
177169567Simp	p = (u_char *)mask;
178169567Simp	for (x = 0; x < sizeof(*mask); x++) {
179169567Simp		if (p[x] != 0xff)
180169567Simp			break;
181169567Simp	}
182169567Simp	y = 0;
183169567Simp	if (x < sizeof(*mask)) {
184169567Simp		for (y = 0; y < 8; y++) {
185169567Simp			if ((p[x] & (0x80 >> y)) == 0)
186169567Simp				break;
187169567Simp		}
188169567Simp	}
189169567Simp	return (x * 8 + y);
190169567Simp}
191169567Simp
192169567Simpstatic void
193183467Simpin_len2mask(struct in_addr *mask, int len)
194163516Simp{
195163516Simp	int i;
196163516Simp	u_char *p;
197163516Simp
198163516Simp	p = (u_char *)mask;
199183467Simp	bzero(mask, sizeof(*mask));
200163516Simp	for (i = 0; i < len / 8; i++)
201163516Simp		p[i] = 0xff;
202163516Simp	if (len % 8)
203163516Simp		p[i] = (0xff00 >> (len % 8)) & 0xff;
204163516Simp}
205183467Simp
206163516Simp/*
207163516Simp * Generic internet control operations (ioctl's).
208163516Simp *
209163516Simp * ifp is NULL if not an interface-specific ioctl.
210163516Simp */
211163516Simp/* ARGSUSED */
212163516Simpint
213163516Simpin_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
214163516Simp    struct thread *td)
215163516Simp{
216163516Simp	register struct ifreq *ifr = (struct ifreq *)data;
217163516Simp	register struct in_ifaddr *ia, *iap;
218163516Simp	register struct ifaddr *ifa;
219163516Simp	struct in_addr allhosts_addr;
220184033Smav	struct in_addr dst;
221184033Smav	struct in_ifinfo *ii;
222184033Smav	struct in_aliasreq *ifra = (struct in_aliasreq *)data;
223184033Smav	struct sockaddr_in oldaddr;
224184033Smav	int error, hostIsNew, iaIsNew, maskIsNew;
225184033Smav	int iaIsFirst;
226184033Smav
227184033Smav	ia = NULL;
228184033Smav	iaIsFirst = 0;
229184033Smav	iaIsNew = 0;
230184033Smav	allhosts_addr.s_addr = htonl(INADDR_ALLHOSTS_GROUP);
231184033Smav
232184033Smav	/*
233184033Smav	 * Filter out ioctls we implement directly; forward the rest on to
234184033Smav	 * in_lifaddr_ioctl() and ifp->if_ioctl().
235184033Smav	 */
236184033Smav	switch (cmd) {
237184033Smav	case SIOCAIFADDR:
238184033Smav	case SIOCDIFADDR:
239184033Smav	case SIOCGIFADDR:
240184033Smav	case SIOCGIFBRDADDR:
241184033Smav	case SIOCGIFDSTADDR:
242184033Smav	case SIOCGIFNETMASK:
243184033Smav	case SIOCSIFADDR:
244184033Smav	case SIOCSIFBRDADDR:
245184033Smav	case SIOCSIFDSTADDR:
246184033Smav	case SIOCSIFNETMASK:
247184033Smav		break;
248184033Smav
249184033Smav	case SIOCALIFADDR:
250184033Smav		if (td != NULL) {
251184033Smav			error = priv_check(td, PRIV_NET_ADDIFADDR);
252184033Smav			if (error)
253184033Smav				return (error);
254184033Smav		}
255184033Smav		if (ifp == NULL)
256184033Smav			return (EINVAL);
257184033Smav		return in_lifaddr_ioctl(so, cmd, data, ifp, td);
258184033Smav
259184033Smav	case SIOCDLIFADDR:
260184033Smav		if (td != NULL) {
261184033Smav			error = priv_check(td, PRIV_NET_DELIFADDR);
262184033Smav			if (error)
263184033Smav				return (error);
264184033Smav		}
265184033Smav		if (ifp == NULL)
266184033Smav			return (EINVAL);
267184033Smav		return in_lifaddr_ioctl(so, cmd, data, ifp, td);
268184033Smav
269184033Smav	case SIOCGLIFADDR:
270184033Smav		if (ifp == NULL)
271184033Smav			return (EINVAL);
272184033Smav		return in_lifaddr_ioctl(so, cmd, data, ifp, td);
273184033Smav
274184033Smav	default:
275184033Smav		if (ifp == NULL || ifp->if_ioctl == NULL)
276184033Smav			return (EOPNOTSUPP);
277184033Smav		return ((*ifp->if_ioctl)(ifp, cmd, data));
278184033Smav	}
279184033Smav
280184033Smav	if (ifp == NULL)
281184033Smav		return (EADDRNOTAVAIL);
282184033Smav
283184033Smav	/*
284184033Smav	 * Security checks before we get involved in any work.
285184033Smav	 */
286184033Smav	switch (cmd) {
287184033Smav	case SIOCAIFADDR:
288184033Smav	case SIOCSIFADDR:
289184033Smav	case SIOCSIFBRDADDR:
290184033Smav	case SIOCSIFNETMASK:
291184033Smav	case SIOCSIFDSTADDR:
292184033Smav		if (td != NULL) {
293184033Smav			error = priv_check(td, PRIV_NET_ADDIFADDR);
294184033Smav			if (error)
295184033Smav				return (error);
296184033Smav		}
297184033Smav		break;
298184033Smav
299184033Smav	case SIOCDIFADDR:
300184033Smav		if (td != NULL) {
301184033Smav			error = priv_check(td, PRIV_NET_DELIFADDR);
302184033Smav			if (error)
303184033Smav				return (error);
304184033Smav		}
305184033Smav		break;
306184033Smav	}
307184033Smav
308184033Smav	/*
309184033Smav	 * Find address for this interface, if it exists.
310184033Smav	 *
311184033Smav	 * If an alias address was specified, find that one instead of the
312184033Smav	 * first one on the interface, if possible.
313184033Smav	 */
314184033Smav	dst = ((struct sockaddr_in *)&ifr->ifr_addr)->sin_addr;
315184033Smav	IN_IFADDR_RLOCK();
316184033Smav	LIST_FOREACH(iap, INADDR_HASH(dst.s_addr), ia_hash) {
317184033Smav		if (iap->ia_ifp == ifp &&
318184033Smav		    iap->ia_addr.sin_addr.s_addr == dst.s_addr) {
319184033Smav			if (td == NULL || prison_check_ip4(td->td_ucred,
320184033Smav			    &dst) == 0)
321184033Smav				ia = iap;
322184033Smav			break;
323184033Smav		}
324184033Smav	}
325184033Smav	if (ia != NULL)
326184033Smav		ifa_ref(&ia->ia_ifa);
327184033Smav	IN_IFADDR_RUNLOCK();
328184033Smav	if (ia == NULL) {
329184033Smav		IF_ADDR_LOCK(ifp);
330184033Smav		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
331184033Smav			iap = ifatoia(ifa);
332184033Smav			if (iap->ia_addr.sin_family == AF_INET) {
333184033Smav				if (td != NULL &&
334184033Smav				    prison_check_ip4(td->td_ucred,
335184033Smav				    &iap->ia_addr.sin_addr) != 0)
336184033Smav					continue;
337184033Smav				ia = iap;
338184033Smav				break;
339184033Smav			}
340184033Smav		}
341184033Smav		if (ia != NULL)
342184033Smav			ifa_ref(&ia->ia_ifa);
343184033Smav		IF_ADDR_UNLOCK(ifp);
344184033Smav	}
345184033Smav	if (ia == NULL)
346184033Smav		iaIsFirst = 1;
347184033Smav
348184033Smav	error = 0;
349184033Smav	switch (cmd) {
350184033Smav	case SIOCAIFADDR:
351184033Smav	case SIOCDIFADDR:
352184033Smav		if (ifra->ifra_addr.sin_family == AF_INET) {
353184033Smav			struct in_ifaddr *oia;
354184033Smav
355184033Smav			IN_IFADDR_RLOCK();
356184033Smav			for (oia = ia; ia; ia = TAILQ_NEXT(ia, ia_link)) {
357184033Smav				if (ia->ia_ifp == ifp  &&
358184033Smav				    ia->ia_addr.sin_addr.s_addr ==
359184033Smav				    ifra->ifra_addr.sin_addr.s_addr)
360184033Smav					break;
361184033Smav			}
362184033Smav			if (ia != NULL && ia != oia)
363184033Smav				ifa_ref(&ia->ia_ifa);
364184033Smav			if (oia != NULL && ia != oia)
365163516Simp				ifa_free(&oia->ia_ifa);
366163516Simp			IN_IFADDR_RUNLOCK();
367163516Simp			if ((ifp->if_flags & IFF_POINTOPOINT)
368163516Simp			    && (cmd == SIOCAIFADDR)
369163516Simp			    && (ifra->ifra_dstaddr.sin_addr.s_addr
370163516Simp				== INADDR_ANY)) {
371163516Simp				error = EDESTADDRREQ;
372163516Simp				goto out;
373163516Simp			}
374163516Simp		}
375169567Simp		if (cmd == SIOCDIFADDR && ia == NULL) {
376163516Simp			error = EADDRNOTAVAIL;
377163516Simp			goto out;
378163516Simp		}
379163516Simp		/* FALLTHROUGH */
380163516Simp	case SIOCSIFADDR:
381169567Simp	case SIOCSIFNETMASK:
382169567Simp	case SIOCSIFDSTADDR:
383169567Simp		if (ia == NULL) {
384163516Simp			ia = (struct in_ifaddr *)
385169567Simp				malloc(sizeof *ia, M_IFADDR, M_NOWAIT |
386169567Simp				    M_ZERO);
387183448Simp			if (ia == NULL) {
388183448Simp				error = ENOBUFS;
389183448Simp				goto out;
390183448Simp			}
391183448Simp
392183448Simp			ifa = &ia->ia_ifa;
393183448Simp			ifa_init(ifa);
394163516Simp			ifa->ifa_addr = (struct sockaddr *)&ia->ia_addr;
395163516Simp			ifa->ifa_dstaddr = (struct sockaddr *)&ia->ia_dstaddr;
396184033Smav			ifa->ifa_netmask = (struct sockaddr *)&ia->ia_sockmask;
397163516Simp
398184033Smav			ia->ia_sockmask.sin_len = 8;
399184033Smav			ia->ia_sockmask.sin_family = AF_INET;
400184033Smav			if (ifp->if_flags & IFF_BROADCAST) {
401184033Smav				ia->ia_broadaddr.sin_len = sizeof(ia->ia_addr);
402184033Smav				ia->ia_broadaddr.sin_family = AF_INET;
403184033Smav			}
404184033Smav			ia->ia_ifp = ifp;
405163516Simp
406163516Simp			ifa_ref(ifa);			/* if_addrhead */
407183480Simp			IF_ADDR_LOCK(ifp);
408183480Simp			TAILQ_INSERT_TAIL(&ifp->if_addrhead, ifa, ifa_link);
409183480Simp			IF_ADDR_UNLOCK(ifp);
410183480Simp			ifa_ref(ifa);			/* in_ifaddrhead */
411183480Simp			IN_IFADDR_WLOCK();
412163516Simp			TAILQ_INSERT_TAIL(&V_in_ifaddrhead, ia, ia_link);
413163516Simp			IN_IFADDR_WUNLOCK();
414169567Simp			iaIsNew = 1;
415169567Simp		}
416169567Simp		break;
417169567Simp
418169567Simp	case SIOCSIFBRDADDR:
419169567Simp	case SIOCGIFADDR:
420169567Simp	case SIOCGIFNETMASK:
421172836Sjulian	case SIOCGIFDSTADDR:
422163516Simp	case SIOCGIFBRDADDR:
423163516Simp		if (ia == NULL) {
424183774Simp			error = EADDRNOTAVAIL;
425183774Simp			goto out;
426183774Simp		}
427183774Simp		break;
428183774Simp	}
429183774Simp
430183774Simp	/*
431183774Simp	 * Most paths in this switch return directly or via out.  Only paths
432183774Simp	 * that remove the address break in order to hit common removal code.
433183774Simp	 */
434183774Simp	switch (cmd) {
435183774Simp	case SIOCGIFADDR:
436183774Simp		*((struct sockaddr_in *)&ifr->ifr_addr) = ia->ia_addr;
437183774Simp		goto out;
438183774Simp
439183774Simp	case SIOCGIFBRDADDR:
440183774Simp		if ((ifp->if_flags & IFF_BROADCAST) == 0) {
441183774Simp			error = EINVAL;
442183774Simp			goto out;
443183774Simp		}
444163516Simp		*((struct sockaddr_in *)&ifr->ifr_dstaddr) = ia->ia_broadaddr;
445163516Simp		goto out;
446163516Simp
447163516Simp	case SIOCGIFDSTADDR:
448163516Simp		if ((ifp->if_flags & IFF_POINTOPOINT) == 0) {
449163516Simp			error = EINVAL;
450163516Simp			goto out;
451163516Simp		}
452163516Simp		*((struct sockaddr_in *)&ifr->ifr_dstaddr) = ia->ia_dstaddr;
453163516Simp		goto out;
454163516Simp
455163516Simp	case SIOCGIFNETMASK:
456163516Simp		*((struct sockaddr_in *)&ifr->ifr_addr) = ia->ia_sockmask;
457163516Simp		goto out;
458163516Simp
459	case SIOCSIFDSTADDR:
460		if ((ifp->if_flags & IFF_POINTOPOINT) == 0) {
461			error = EINVAL;
462			goto out;
463		}
464		oldaddr = ia->ia_dstaddr;
465		ia->ia_dstaddr = *(struct sockaddr_in *)&ifr->ifr_dstaddr;
466		if (ifp->if_ioctl != NULL) {
467			error = (*ifp->if_ioctl)(ifp, SIOCSIFDSTADDR,
468			    (caddr_t)ia);
469			if (error) {
470				ia->ia_dstaddr = oldaddr;
471				goto out;
472			}
473		}
474		if (ia->ia_flags & IFA_ROUTE) {
475			ia->ia_ifa.ifa_dstaddr = (struct sockaddr *)&oldaddr;
476			rtinit(&(ia->ia_ifa), (int)RTM_DELETE, RTF_HOST);
477			ia->ia_ifa.ifa_dstaddr =
478					(struct sockaddr *)&ia->ia_dstaddr;
479			rtinit(&(ia->ia_ifa), (int)RTM_ADD, RTF_HOST|RTF_UP);
480		}
481		goto out;
482
483	case SIOCSIFBRDADDR:
484		if ((ifp->if_flags & IFF_BROADCAST) == 0) {
485			error = EINVAL;
486			goto out;
487		}
488		ia->ia_broadaddr = *(struct sockaddr_in *)&ifr->ifr_broadaddr;
489		goto out;
490
491	case SIOCSIFADDR:
492		error = in_ifinit(ifp, ia,
493		    (struct sockaddr_in *) &ifr->ifr_addr, 1);
494		if (error != 0 && iaIsNew)
495			break;
496		if (error == 0) {
497			ii = ((struct in_ifinfo *)ifp->if_afdata[AF_INET]);
498			if (iaIsFirst &&
499			    (ifp->if_flags & IFF_MULTICAST) != 0) {
500				error = in_joingroup(ifp, &allhosts_addr,
501				    NULL, &ii->ii_allhosts);
502			}
503			EVENTHANDLER_INVOKE(ifaddr_event, ifp);
504		}
505		error = 0;
506		goto out;
507
508	case SIOCSIFNETMASK:
509		ia->ia_sockmask.sin_addr = ifra->ifra_addr.sin_addr;
510		ia->ia_subnetmask = ntohl(ia->ia_sockmask.sin_addr.s_addr);
511		goto out;
512
513	case SIOCAIFADDR:
514		maskIsNew = 0;
515		hostIsNew = 1;
516		error = 0;
517		if (ia->ia_addr.sin_family == AF_INET) {
518			if (ifra->ifra_addr.sin_len == 0) {
519				ifra->ifra_addr = ia->ia_addr;
520				hostIsNew = 0;
521			} else if (ifra->ifra_addr.sin_addr.s_addr ==
522					       ia->ia_addr.sin_addr.s_addr)
523				hostIsNew = 0;
524		}
525		if (ifra->ifra_mask.sin_len) {
526			/*
527			 * QL: XXX
528			 * Need to scrub the prefix here in case
529			 * the issued command is SIOCAIFADDR with
530			 * the same address, but with a different
531			 * prefix length. And if the prefix length
532			 * is the same as before, then the call is
533			 * un-necessarily executed here.
534			 */
535			in_ifscrub(ifp, ia, LLE_STATIC);
536			ia->ia_sockmask = ifra->ifra_mask;
537			ia->ia_sockmask.sin_family = AF_INET;
538			ia->ia_subnetmask =
539			     ntohl(ia->ia_sockmask.sin_addr.s_addr);
540			maskIsNew = 1;
541		}
542		if ((ifp->if_flags & IFF_POINTOPOINT) &&
543		    (ifra->ifra_dstaddr.sin_family == AF_INET)) {
544			in_ifscrub(ifp, ia, LLE_STATIC);
545			ia->ia_dstaddr = ifra->ifra_dstaddr;
546			maskIsNew  = 1; /* We lie; but the effect's the same */
547		}
548		if (ifra->ifra_addr.sin_family == AF_INET &&
549		    (hostIsNew || maskIsNew))
550			error = in_ifinit(ifp, ia, &ifra->ifra_addr, 0);
551		if (error != 0 && iaIsNew)
552			break;
553
554		if ((ifp->if_flags & IFF_BROADCAST) &&
555		    (ifra->ifra_broadaddr.sin_family == AF_INET))
556			ia->ia_broadaddr = ifra->ifra_broadaddr;
557		if (error == 0) {
558			ii = ((struct in_ifinfo *)ifp->if_afdata[AF_INET]);
559			if (iaIsFirst &&
560			    (ifp->if_flags & IFF_MULTICAST) != 0) {
561				error = in_joingroup(ifp, &allhosts_addr,
562				    NULL, &ii->ii_allhosts);
563			}
564			EVENTHANDLER_INVOKE(ifaddr_event, ifp);
565		}
566		goto out;
567
568	case SIOCDIFADDR:
569		/*
570		 * in_ifscrub kills the interface route.
571		 */
572		in_ifscrub(ifp, ia, LLE_STATIC);
573
574		/*
575		 * in_ifadown gets rid of all the rest of
576		 * the routes.  This is not quite the right
577		 * thing to do, but at least if we are running
578		 * a routing process they will come back.
579		 */
580		in_ifadown(&ia->ia_ifa, 1);
581		EVENTHANDLER_INVOKE(ifaddr_event, ifp);
582		error = 0;
583		break;
584
585	default:
586		panic("in_control: unsupported ioctl");
587	}
588
589	IF_ADDR_LOCK(ifp);
590	/* Re-check that ia is still part of the list. */
591	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
592		if (ifa == &ia->ia_ifa)
593			break;
594	}
595	if (ifa == NULL) {
596		/*
597		 * If we lost the race with another thread, there is no need to
598		 * try it again for the next loop as there is no other exit
599		 * path between here and out.
600		 */
601		IF_ADDR_UNLOCK(ifp);
602		error = EADDRNOTAVAIL;
603		goto out;
604	}
605	TAILQ_REMOVE(&ifp->if_addrhead, &ia->ia_ifa, ifa_link);
606	IF_ADDR_UNLOCK(ifp);
607	ifa_free(&ia->ia_ifa);				/* if_addrhead */
608
609	IN_IFADDR_WLOCK();
610	TAILQ_REMOVE(&V_in_ifaddrhead, ia, ia_link);
611	if (ia->ia_addr.sin_family == AF_INET) {
612		struct in_ifaddr *if_ia;
613
614		LIST_REMOVE(ia, ia_hash);
615		IN_IFADDR_WUNLOCK();
616		/*
617		 * If this is the last IPv4 address configured on this
618		 * interface, leave the all-hosts group.
619		 * No state-change report need be transmitted.
620		 */
621		if_ia = NULL;
622		IFP_TO_IA(ifp, if_ia);
623		if (if_ia == NULL) {
624			ii = ((struct in_ifinfo *)ifp->if_afdata[AF_INET]);
625			IN_MULTI_LOCK();
626			if (ii->ii_allhosts) {
627				(void)in_leavegroup_locked(ii->ii_allhosts,
628				    NULL);
629				ii->ii_allhosts = NULL;
630			}
631			IN_MULTI_UNLOCK();
632		} else
633			ifa_free(&if_ia->ia_ifa);
634	} else
635		IN_IFADDR_WUNLOCK();
636	ifa_free(&ia->ia_ifa);				/* in_ifaddrhead */
637out:
638	if (ia != NULL)
639		ifa_free(&ia->ia_ifa);
640	return (error);
641}
642
643/*
644 * SIOC[GAD]LIFADDR.
645 *	SIOCGLIFADDR: get first address. (?!?)
646 *	SIOCGLIFADDR with IFLR_PREFIX:
647 *		get first address that matches the specified prefix.
648 *	SIOCALIFADDR: add the specified address.
649 *	SIOCALIFADDR with IFLR_PREFIX:
650 *		EINVAL since we can't deduce hostid part of the address.
651 *	SIOCDLIFADDR: delete the specified address.
652 *	SIOCDLIFADDR with IFLR_PREFIX:
653 *		delete the first address that matches the specified prefix.
654 * return values:
655 *	EINVAL on invalid parameters
656 *	EADDRNOTAVAIL on prefix match failed/specified address not found
657 *	other values may be returned from in_ioctl()
658 */
659static int
660in_lifaddr_ioctl(struct socket *so, u_long cmd, caddr_t data,
661    struct ifnet *ifp, struct thread *td)
662{
663	struct if_laddrreq *iflr = (struct if_laddrreq *)data;
664	struct ifaddr *ifa;
665
666	/* sanity checks */
667	if (data == NULL || ifp == NULL) {
668		panic("invalid argument to in_lifaddr_ioctl");
669		/*NOTRECHED*/
670	}
671
672	switch (cmd) {
673	case SIOCGLIFADDR:
674		/* address must be specified on GET with IFLR_PREFIX */
675		if ((iflr->flags & IFLR_PREFIX) == 0)
676			break;
677		/*FALLTHROUGH*/
678	case SIOCALIFADDR:
679	case SIOCDLIFADDR:
680		/* address must be specified on ADD and DELETE */
681		if (iflr->addr.ss_family != AF_INET)
682			return (EINVAL);
683		if (iflr->addr.ss_len != sizeof(struct sockaddr_in))
684			return (EINVAL);
685		/* XXX need improvement */
686		if (iflr->dstaddr.ss_family
687		 && iflr->dstaddr.ss_family != AF_INET)
688			return (EINVAL);
689		if (iflr->dstaddr.ss_family
690		 && iflr->dstaddr.ss_len != sizeof(struct sockaddr_in))
691			return (EINVAL);
692		break;
693	default: /*shouldn't happen*/
694		return (EOPNOTSUPP);
695	}
696	if (sizeof(struct in_addr) * 8 < iflr->prefixlen)
697		return (EINVAL);
698
699	switch (cmd) {
700	case SIOCALIFADDR:
701	    {
702		struct in_aliasreq ifra;
703
704		if (iflr->flags & IFLR_PREFIX)
705			return (EINVAL);
706
707		/* copy args to in_aliasreq, perform ioctl(SIOCAIFADDR_IN6). */
708		bzero(&ifra, sizeof(ifra));
709		bcopy(iflr->iflr_name, ifra.ifra_name,
710			sizeof(ifra.ifra_name));
711
712		bcopy(&iflr->addr, &ifra.ifra_addr, iflr->addr.ss_len);
713
714		if (iflr->dstaddr.ss_family) {	/*XXX*/
715			bcopy(&iflr->dstaddr, &ifra.ifra_dstaddr,
716				iflr->dstaddr.ss_len);
717		}
718
719		ifra.ifra_mask.sin_family = AF_INET;
720		ifra.ifra_mask.sin_len = sizeof(struct sockaddr_in);
721		in_len2mask(&ifra.ifra_mask.sin_addr, iflr->prefixlen);
722
723		return (in_control(so, SIOCAIFADDR, (caddr_t)&ifra, ifp, td));
724	    }
725	case SIOCGLIFADDR:
726	case SIOCDLIFADDR:
727	    {
728		struct in_ifaddr *ia;
729		struct in_addr mask, candidate, match;
730		struct sockaddr_in *sin;
731
732		bzero(&mask, sizeof(mask));
733		bzero(&match, sizeof(match));
734		if (iflr->flags & IFLR_PREFIX) {
735			/* lookup a prefix rather than address. */
736			in_len2mask(&mask, iflr->prefixlen);
737
738			sin = (struct sockaddr_in *)&iflr->addr;
739			match.s_addr = sin->sin_addr.s_addr;
740			match.s_addr &= mask.s_addr;
741
742			/* if you set extra bits, that's wrong */
743			if (match.s_addr != sin->sin_addr.s_addr)
744				return (EINVAL);
745
746		} else {
747			/* on getting an address, take the 1st match */
748			/* on deleting an address, do exact match */
749			if (cmd != SIOCGLIFADDR) {
750				in_len2mask(&mask, 32);
751				sin = (struct sockaddr_in *)&iflr->addr;
752				match.s_addr = sin->sin_addr.s_addr;
753			}
754		}
755
756		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)	{
757			if (ifa->ifa_addr->sa_family != AF_INET6)
758				continue;
759			if (match.s_addr == 0)
760				break;
761			candidate.s_addr = ((struct sockaddr_in *)&ifa->ifa_addr)->sin_addr.s_addr;
762			candidate.s_addr &= mask.s_addr;
763			if (candidate.s_addr == match.s_addr)
764				break;
765		}
766		if (ifa == NULL)
767			return (EADDRNOTAVAIL);
768		ia = (struct in_ifaddr *)ifa;
769
770		if (cmd == SIOCGLIFADDR) {
771			/* fill in the if_laddrreq structure */
772			bcopy(&ia->ia_addr, &iflr->addr, ia->ia_addr.sin_len);
773
774			if ((ifp->if_flags & IFF_POINTOPOINT) != 0) {
775				bcopy(&ia->ia_dstaddr, &iflr->dstaddr,
776					ia->ia_dstaddr.sin_len);
777			} else
778				bzero(&iflr->dstaddr, sizeof(iflr->dstaddr));
779
780			iflr->prefixlen =
781				in_mask2len(&ia->ia_sockmask.sin_addr);
782
783			iflr->flags = 0;	/*XXX*/
784
785			return (0);
786		} else {
787			struct in_aliasreq ifra;
788
789			/* fill in_aliasreq and do ioctl(SIOCDIFADDR_IN6) */
790			bzero(&ifra, sizeof(ifra));
791			bcopy(iflr->iflr_name, ifra.ifra_name,
792				sizeof(ifra.ifra_name));
793
794			bcopy(&ia->ia_addr, &ifra.ifra_addr,
795				ia->ia_addr.sin_len);
796			if ((ifp->if_flags & IFF_POINTOPOINT) != 0) {
797				bcopy(&ia->ia_dstaddr, &ifra.ifra_dstaddr,
798					ia->ia_dstaddr.sin_len);
799			}
800			bcopy(&ia->ia_sockmask, &ifra.ifra_dstaddr,
801				ia->ia_sockmask.sin_len);
802
803			return (in_control(so, SIOCDIFADDR, (caddr_t)&ifra,
804			    ifp, td));
805		}
806	    }
807	}
808
809	return (EOPNOTSUPP);	/*just for safety*/
810}
811
812/*
813 * Delete any existing route for an interface.
814 */
815void
816in_ifscrub(struct ifnet *ifp, struct in_ifaddr *ia, u_int flags)
817{
818
819	in_scrubprefix(ia, flags);
820}
821
822/*
823 * Initialize an interface's internet address
824 * and routing table entry.
825 */
826static int
827in_ifinit(struct ifnet *ifp, struct in_ifaddr *ia, struct sockaddr_in *sin,
828    int scrub)
829{
830	register u_long i = ntohl(sin->sin_addr.s_addr);
831	struct sockaddr_in oldaddr;
832	int flags = RTF_UP, error = 0;
833
834	oldaddr = ia->ia_addr;
835	if (oldaddr.sin_family == AF_INET)
836		LIST_REMOVE(ia, ia_hash);
837	ia->ia_addr = *sin;
838	if (ia->ia_addr.sin_family == AF_INET) {
839		IN_IFADDR_WLOCK();
840		LIST_INSERT_HEAD(INADDR_HASH(ia->ia_addr.sin_addr.s_addr),
841		    ia, ia_hash);
842		IN_IFADDR_WUNLOCK();
843	}
844	/*
845	 * Give the interface a chance to initialize
846	 * if this is its first address,
847	 * and to validate the address if necessary.
848	 */
849	if (ifp->if_ioctl != NULL) {
850		error = (*ifp->if_ioctl)(ifp, SIOCSIFADDR, (caddr_t)ia);
851		if (error) {
852			/* LIST_REMOVE(ia, ia_hash) is done in in_control */
853			ia->ia_addr = oldaddr;
854			IN_IFADDR_WLOCK();
855			if (ia->ia_addr.sin_family == AF_INET)
856				LIST_INSERT_HEAD(INADDR_HASH(
857				    ia->ia_addr.sin_addr.s_addr), ia, ia_hash);
858			else
859				/*
860				 * If oldaddr family is not AF_INET (e.g.
861				 * interface has been just created) in_control
862				 * does not call LIST_REMOVE, and we end up
863				 * with bogus ia entries in hash
864				 */
865				LIST_REMOVE(ia, ia_hash);
866			IN_IFADDR_WUNLOCK();
867			return (error);
868		}
869	}
870	if (scrub) {
871		ia->ia_ifa.ifa_addr = (struct sockaddr *)&oldaddr;
872		in_ifscrub(ifp, ia, LLE_STATIC);
873		ia->ia_ifa.ifa_addr = (struct sockaddr *)&ia->ia_addr;
874	}
875	/*
876	 * Be compatible with network classes, if netmask isn't supplied,
877	 * guess it based on classes.
878	 */
879	if (ia->ia_subnetmask == 0) {
880		if (IN_CLASSA(i))
881			ia->ia_subnetmask = IN_CLASSA_NET;
882		else if (IN_CLASSB(i))
883			ia->ia_subnetmask = IN_CLASSB_NET;
884		else
885			ia->ia_subnetmask = IN_CLASSC_NET;
886		ia->ia_sockmask.sin_addr.s_addr = htonl(ia->ia_subnetmask);
887	}
888	ia->ia_subnet = i & ia->ia_subnetmask;
889	in_socktrim(&ia->ia_sockmask);
890	/*
891	 * XXX: carp(4) does not have interface route
892	 */
893	if (ifp->if_type == IFT_CARP)
894		return (0);
895	/*
896	 * Add route for the network.
897	 */
898	ia->ia_ifa.ifa_metric = ifp->if_metric;
899	if (ifp->if_flags & IFF_BROADCAST) {
900		if (ia->ia_subnetmask == IN_RFC3021_MASK)
901			ia->ia_broadaddr.sin_addr.s_addr = INADDR_BROADCAST;
902		else
903			ia->ia_broadaddr.sin_addr.s_addr =
904			    htonl(ia->ia_subnet | ~ia->ia_subnetmask);
905	} else if (ifp->if_flags & IFF_LOOPBACK) {
906		ia->ia_dstaddr = ia->ia_addr;
907		flags |= RTF_HOST;
908	} else if (ifp->if_flags & IFF_POINTOPOINT) {
909		if (ia->ia_dstaddr.sin_family != AF_INET)
910			return (0);
911		flags |= RTF_HOST;
912	}
913	if ((error = in_addprefix(ia, flags)) != 0)
914		return (error);
915
916	if (ia->ia_addr.sin_addr.s_addr == INADDR_ANY)
917		return (0);
918
919	if (ifp->if_flags & IFF_POINTOPOINT) {
920		if (ia->ia_dstaddr.sin_addr.s_addr == ia->ia_addr.sin_addr.s_addr)
921			return (0);
922	}
923
924
925	/*
926	 * add a loopback route to self
927	 */
928	if (V_useloopback && !(ifp->if_flags & IFF_LOOPBACK)) {
929		struct route ia_ro;
930
931		bzero(&ia_ro, sizeof(ia_ro));
932		*((struct sockaddr_in *)(&ia_ro.ro_dst)) = ia->ia_addr;
933		rtalloc_ign_fib(&ia_ro, 0, 0);
934		if ((ia_ro.ro_rt != NULL) && (ia_ro.ro_rt->rt_ifp != NULL) &&
935		    (ia_ro.ro_rt->rt_ifp == V_loif)) {
936			RT_LOCK(ia_ro.ro_rt);
937			RT_ADDREF(ia_ro.ro_rt);
938			RTFREE_LOCKED(ia_ro.ro_rt);
939		} else
940			error = ifa_add_loopback_route((struct ifaddr *)ia,
941				       (struct sockaddr *)&ia->ia_addr);
942		if (error == 0)
943			ia->ia_flags |= IFA_RTSELF;
944		if (ia_ro.ro_rt != NULL)
945			RTFREE(ia_ro.ro_rt);
946	}
947
948	return (error);
949}
950
951#define rtinitflags(x) \
952	((((x)->ia_ifp->if_flags & (IFF_LOOPBACK | IFF_POINTOPOINT)) != 0) \
953	    ? RTF_HOST : 0)
954
955/*
956 * Generate a routing message when inserting or deleting
957 * an interface address alias.
958 */
959static void in_addralias_rtmsg(int cmd, struct in_addr *prefix,
960    struct in_ifaddr *target)
961{
962	struct route pfx_ro;
963	struct sockaddr_in *pfx_addr;
964	struct rtentry msg_rt;
965
966	/* QL: XXX
967	 * This is a bit questionable because there is no
968	 * additional route entry added/deleted for an address
969	 * alias. Therefore this route report is inaccurate.
970	 */
971	bzero(&pfx_ro, sizeof(pfx_ro));
972	pfx_addr = (struct sockaddr_in *)(&pfx_ro.ro_dst);
973	pfx_addr->sin_len = sizeof(*pfx_addr);
974	pfx_addr->sin_family = AF_INET;
975	pfx_addr->sin_addr = *prefix;
976	rtalloc_ign_fib(&pfx_ro, 0, 0);
977	if (pfx_ro.ro_rt != NULL) {
978		msg_rt = *pfx_ro.ro_rt;
979
980		/* QL: XXX
981		 * Point the gateway to the new interface
982		 * address as if a new prefix route entry has
983		 * been added through the new address alias.
984		 * All other parts of the rtentry is accurate,
985		 * e.g., rt_key, rt_mask, rt_ifp etc.
986		 */
987		msg_rt.rt_gateway =
988			(struct sockaddr *)&target->ia_addr;
989		rt_newaddrmsg(cmd,
990			      (struct ifaddr *)target,
991			      0, &msg_rt);
992		RTFREE(pfx_ro.ro_rt);
993	}
994	return;
995}
996
997/*
998 * Check if we have a route for the given prefix already or add one accordingly.
999 */
1000static int
1001in_addprefix(struct in_ifaddr *target, int flags)
1002{
1003	struct in_ifaddr *ia;
1004	struct in_addr prefix, mask, p, m;
1005	int error;
1006
1007	if ((flags & RTF_HOST) != 0) {
1008		prefix = target->ia_dstaddr.sin_addr;
1009		mask.s_addr = 0;
1010	} else {
1011		prefix = target->ia_addr.sin_addr;
1012		mask = target->ia_sockmask.sin_addr;
1013		prefix.s_addr &= mask.s_addr;
1014	}
1015
1016	IN_IFADDR_RLOCK();
1017	TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
1018		if (rtinitflags(ia)) {
1019			p = ia->ia_dstaddr.sin_addr;
1020
1021			if (prefix.s_addr != p.s_addr)
1022				continue;
1023		} else {
1024			p = ia->ia_addr.sin_addr;
1025			m = ia->ia_sockmask.sin_addr;
1026			p.s_addr &= m.s_addr;
1027
1028			if (prefix.s_addr != p.s_addr ||
1029			    mask.s_addr != m.s_addr)
1030				continue;
1031		}
1032
1033		/*
1034		 * If we got a matching prefix route inserted by other
1035		 * interface address, we are done here.
1036		 */
1037		if (ia->ia_flags & IFA_ROUTE) {
1038#ifdef RADIX_MPATH
1039			if (ia->ia_addr.sin_addr.s_addr ==
1040			    target->ia_addr.sin_addr.s_addr) {
1041				IN_IFADDR_RUNLOCK();
1042				return (EEXIST);
1043			} else
1044				break;
1045#endif
1046			if (V_sameprefixcarponly &&
1047			    target->ia_ifp->if_type != IFT_CARP &&
1048			    ia->ia_ifp->if_type != IFT_CARP) {
1049				IN_IFADDR_RUNLOCK();
1050				return (EEXIST);
1051			} else {
1052				in_addralias_rtmsg(RTM_ADD, &prefix, target);
1053				IN_IFADDR_RUNLOCK();
1054				return (0);
1055			}
1056		}
1057	}
1058	IN_IFADDR_RUNLOCK();
1059
1060	/*
1061	 * No-one seem to have this prefix route, so we try to insert it.
1062	 */
1063	error = rtinit(&target->ia_ifa, (int)RTM_ADD, flags);
1064	if (!error)
1065		target->ia_flags |= IFA_ROUTE;
1066	return (error);
1067}
1068
1069extern void arp_ifscrub(struct ifnet *ifp, uint32_t addr);
1070
1071/*
1072 * If there is no other address in the system that can serve a route to the
1073 * same prefix, remove the route.  Hand over the route to the new address
1074 * otherwise.
1075 */
1076static int
1077in_scrubprefix(struct in_ifaddr *target, u_int flags)
1078{
1079	struct in_ifaddr *ia;
1080	struct in_addr prefix, mask, p;
1081	int error = 0;
1082	struct sockaddr_in prefix0, mask0;
1083
1084	/*
1085	 * Remove the loopback route to the interface address.
1086	 * The "useloopback" setting is not consulted because if the
1087	 * user configures an interface address, turns off this
1088	 * setting, and then tries to delete that interface address,
1089	 * checking the current setting of "useloopback" would leave
1090	 * that interface address loopback route untouched, which
1091	 * would be wrong. Therefore the interface address loopback route
1092	 * deletion is unconditional.
1093	 */
1094	if ((target->ia_addr.sin_addr.s_addr != INADDR_ANY) &&
1095	    !(target->ia_ifp->if_flags & IFF_LOOPBACK) &&
1096	    (target->ia_flags & IFA_RTSELF)) {
1097		struct route ia_ro;
1098		int freeit = 0;
1099
1100		bzero(&ia_ro, sizeof(ia_ro));
1101		*((struct sockaddr_in *)(&ia_ro.ro_dst)) = target->ia_addr;
1102		rtalloc_ign_fib(&ia_ro, 0, 0);
1103		if ((ia_ro.ro_rt != NULL) && (ia_ro.ro_rt->rt_ifp != NULL) &&
1104		    (ia_ro.ro_rt->rt_ifp == V_loif)) {
1105			RT_LOCK(ia_ro.ro_rt);
1106			if (ia_ro.ro_rt->rt_refcnt <= 1)
1107				freeit = 1;
1108			else if (flags & LLE_STATIC) {
1109				RT_REMREF(ia_ro.ro_rt);
1110				target->ia_flags &= ~IFA_RTSELF;
1111			}
1112			RTFREE_LOCKED(ia_ro.ro_rt);
1113		}
1114		if (freeit && (flags & LLE_STATIC)) {
1115			error = ifa_del_loopback_route((struct ifaddr *)target,
1116				       (struct sockaddr *)&target->ia_addr);
1117			if (error == 0)
1118				target->ia_flags &= ~IFA_RTSELF;
1119		}
1120		if ((flags & LLE_STATIC) &&
1121			!(target->ia_ifp->if_flags & IFF_NOARP))
1122			/* remove arp cache */
1123			arp_ifscrub(target->ia_ifp, IA_SIN(target)->sin_addr.s_addr);
1124	}
1125
1126	if (rtinitflags(target))
1127		prefix = target->ia_dstaddr.sin_addr;
1128	else {
1129		prefix = target->ia_addr.sin_addr;
1130		mask = target->ia_sockmask.sin_addr;
1131		prefix.s_addr &= mask.s_addr;
1132	}
1133
1134	if ((target->ia_flags & IFA_ROUTE) == 0) {
1135		in_addralias_rtmsg(RTM_DELETE, &prefix, target);
1136		return (0);
1137	}
1138
1139	IN_IFADDR_RLOCK();
1140	TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
1141		if (rtinitflags(ia))
1142			p = ia->ia_dstaddr.sin_addr;
1143		else {
1144			p = ia->ia_addr.sin_addr;
1145			p.s_addr &= ia->ia_sockmask.sin_addr.s_addr;
1146		}
1147
1148		if ((prefix.s_addr != p.s_addr) ||
1149		    !(ia->ia_ifp->if_flags & IFF_UP))
1150			continue;
1151
1152		/*
1153		 * If we got a matching prefix address, move IFA_ROUTE and
1154		 * the route itself to it.  Make sure that routing daemons
1155		 * get a heads-up.
1156		 *
1157		 * XXX: a special case for carp(4) interface - this should
1158		 *      be more generally specified as an interface that
1159		 *      doesn't support such action.
1160		 */
1161		if ((ia->ia_flags & IFA_ROUTE) == 0
1162		    && (ia->ia_ifp->if_type != IFT_CARP)) {
1163			ifa_ref(&ia->ia_ifa);
1164			IN_IFADDR_RUNLOCK();
1165			error = rtinit(&(target->ia_ifa), (int)RTM_DELETE,
1166			    rtinitflags(target));
1167			if (error == 0)
1168				target->ia_flags &= ~IFA_ROUTE;
1169			else
1170				log(LOG_INFO, "in_scrubprefix: err=%d, old prefix delete failed\n",
1171					error);
1172			error = rtinit(&ia->ia_ifa, (int)RTM_ADD,
1173			    rtinitflags(ia) | RTF_UP);
1174			if (error == 0)
1175				ia->ia_flags |= IFA_ROUTE;
1176			else
1177				log(LOG_INFO, "in_scrubprefix: err=%d, new prefix add failed\n",
1178					error);
1179			ifa_free(&ia->ia_ifa);
1180			return (error);
1181		}
1182	}
1183	IN_IFADDR_RUNLOCK();
1184
1185	/*
1186	 * remove all L2 entries on the given prefix
1187	 */
1188	bzero(&prefix0, sizeof(prefix0));
1189	prefix0.sin_len = sizeof(prefix0);
1190	prefix0.sin_family = AF_INET;
1191	prefix0.sin_addr.s_addr = target->ia_subnet;
1192	bzero(&mask0, sizeof(mask0));
1193	mask0.sin_len = sizeof(mask0);
1194	mask0.sin_family = AF_INET;
1195	mask0.sin_addr.s_addr = target->ia_subnetmask;
1196	lltable_prefix_free(AF_INET, (struct sockaddr *)&prefix0,
1197			    (struct sockaddr *)&mask0, flags);
1198
1199	/*
1200	 * As no-one seem to have this prefix, we can remove the route.
1201	 */
1202	error = rtinit(&(target->ia_ifa), (int)RTM_DELETE, rtinitflags(target));
1203	if (error == 0)
1204		target->ia_flags &= ~IFA_ROUTE;
1205	else
1206		log(LOG_INFO, "in_scrubprefix: err=%d, prefix delete failed\n", error);
1207	return (error);
1208}
1209
1210#undef rtinitflags
1211
1212/*
1213 * Return 1 if the address might be a local broadcast address.
1214 */
1215int
1216in_broadcast(struct in_addr in, struct ifnet *ifp)
1217{
1218	register struct ifaddr *ifa;
1219	u_long t;
1220
1221	if (in.s_addr == INADDR_BROADCAST ||
1222	    in.s_addr == INADDR_ANY)
1223		return (1);
1224	if ((ifp->if_flags & IFF_BROADCAST) == 0)
1225		return (0);
1226	t = ntohl(in.s_addr);
1227	/*
1228	 * Look through the list of addresses for a match
1229	 * with a broadcast address.
1230	 */
1231#define ia ((struct in_ifaddr *)ifa)
1232	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
1233		if (ifa->ifa_addr->sa_family == AF_INET &&
1234		    (in.s_addr == ia->ia_broadaddr.sin_addr.s_addr ||
1235		     /*
1236		      * Check for old-style (host 0) broadcast, but
1237		      * taking into account that RFC 3021 obsoletes it.
1238		      */
1239		     (ia->ia_subnetmask != IN_RFC3021_MASK &&
1240		     t == ia->ia_subnet)) &&
1241		     /*
1242		      * Check for an all one subnetmask. These
1243		      * only exist when an interface gets a secondary
1244		      * address.
1245		      */
1246		     ia->ia_subnetmask != (u_long)0xffffffff)
1247			    return (1);
1248	return (0);
1249#undef ia
1250}
1251
1252/*
1253 * On interface removal, clean up IPv4 data structures hung off of the ifnet.
1254 */
1255void
1256in_ifdetach(struct ifnet *ifp)
1257{
1258
1259	in_pcbpurgeif0(&V_ripcbinfo, ifp);
1260	in_pcbpurgeif0(&V_udbinfo, ifp);
1261	in_purgemaddrs(ifp);
1262}
1263
1264/*
1265 * Delete all IPv4 multicast address records, and associated link-layer
1266 * multicast address records, associated with ifp.
1267 * XXX It looks like domifdetach runs AFTER the link layer cleanup.
1268 * XXX This should not race with ifma_protospec being set during
1269 * a new allocation, if it does, we have bigger problems.
1270 */
1271static void
1272in_purgemaddrs(struct ifnet *ifp)
1273{
1274	LIST_HEAD(,in_multi) purgeinms;
1275	struct in_multi		*inm, *tinm;
1276	struct ifmultiaddr	*ifma;
1277
1278	LIST_INIT(&purgeinms);
1279	IN_MULTI_LOCK();
1280
1281	/*
1282	 * Extract list of in_multi associated with the detaching ifp
1283	 * which the PF_INET layer is about to release.
1284	 * We need to do this as IF_ADDR_LOCK() may be re-acquired
1285	 * by code further down.
1286	 */
1287	IF_ADDR_LOCK(ifp);
1288	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1289		if (ifma->ifma_addr->sa_family != AF_INET ||
1290		    ifma->ifma_protospec == NULL)
1291			continue;
1292#if 0
1293		KASSERT(ifma->ifma_protospec != NULL,
1294		    ("%s: ifma_protospec is NULL", __func__));
1295#endif
1296		inm = (struct in_multi *)ifma->ifma_protospec;
1297		LIST_INSERT_HEAD(&purgeinms, inm, inm_link);
1298	}
1299	IF_ADDR_UNLOCK(ifp);
1300
1301	LIST_FOREACH_SAFE(inm, &purgeinms, inm_link, tinm) {
1302		LIST_REMOVE(inm, inm_link);
1303		inm_release_locked(inm);
1304	}
1305	igmp_ifdetach(ifp);
1306
1307	IN_MULTI_UNLOCK();
1308}
1309
1310#include <net/if_dl.h>
1311#include <netinet/if_ether.h>
1312
1313struct in_llentry {
1314	struct llentry		base;
1315	struct sockaddr_in	l3_addr4;
1316};
1317
1318static struct llentry *
1319in_lltable_new(const struct sockaddr *l3addr, u_int flags)
1320{
1321	struct in_llentry *lle;
1322
1323	lle = malloc(sizeof(struct in_llentry), M_LLTABLE, M_DONTWAIT | M_ZERO);
1324	if (lle == NULL)		/* NB: caller generates msg */
1325		return NULL;
1326
1327	callout_init(&lle->base.la_timer, CALLOUT_MPSAFE);
1328	/*
1329	 * For IPv4 this will trigger "arpresolve" to generate
1330	 * an ARP request.
1331	 */
1332	lle->base.la_expire = time_uptime; /* mark expired */
1333	lle->l3_addr4 = *(const struct sockaddr_in *)l3addr;
1334	lle->base.lle_refcnt = 1;
1335	LLE_LOCK_INIT(&lle->base);
1336	return &lle->base;
1337}
1338
1339/*
1340 * Deletes an address from the address table.
1341 * This function is called by the timer functions
1342 * such as arptimer() and nd6_llinfo_timer(), and
1343 * the caller does the locking.
1344 */
1345static void
1346in_lltable_free(struct lltable *llt, struct llentry *lle)
1347{
1348	LLE_WUNLOCK(lle);
1349	LLE_LOCK_DESTROY(lle);
1350	free(lle, M_LLTABLE);
1351}
1352
1353
1354#define IN_ARE_MASKED_ADDR_EQUAL(d, a, m)	(			\
1355	    (((ntohl((d)->sin_addr.s_addr) ^ (a)->sin_addr.s_addr) & (m)->sin_addr.s_addr)) == 0 )
1356
1357static void
1358in_lltable_prefix_free(struct lltable *llt,
1359		       const struct sockaddr *prefix,
1360		       const struct sockaddr *mask,
1361		       u_int flags)
1362{
1363	const struct sockaddr_in *pfx = (const struct sockaddr_in *)prefix;
1364	const struct sockaddr_in *msk = (const struct sockaddr_in *)mask;
1365	struct llentry *lle, *next;
1366	register int i;
1367	size_t pkts_dropped;
1368
1369	for (i=0; i < LLTBL_HASHTBL_SIZE; i++) {
1370		LIST_FOREACH_SAFE(lle, &llt->lle_head[i], lle_next, next) {
1371
1372		        /*
1373			 * (flags & LLE_STATIC) means deleting all entries
1374			 * including static ARP entries
1375			 */
1376			if (IN_ARE_MASKED_ADDR_EQUAL((struct sockaddr_in *)L3_ADDR(lle),
1377						     pfx, msk) &&
1378			    ((flags & LLE_STATIC) || !(lle->la_flags & LLE_STATIC))) {
1379				int canceled;
1380
1381				canceled = callout_drain(&lle->la_timer);
1382				LLE_WLOCK(lle);
1383				if (canceled)
1384					LLE_REMREF(lle);
1385				pkts_dropped = llentry_free(lle);
1386				ARPSTAT_ADD(dropped, pkts_dropped);
1387			}
1388		}
1389	}
1390}
1391
1392
1393static int
1394in_lltable_rtcheck(struct ifnet *ifp, u_int flags, const struct sockaddr *l3addr)
1395{
1396	struct rtentry *rt;
1397
1398	KASSERT(l3addr->sa_family == AF_INET,
1399	    ("sin_family %d", l3addr->sa_family));
1400
1401	/* XXX rtalloc1 should take a const param */
1402	rt = rtalloc1(__DECONST(struct sockaddr *, l3addr), 0, 0);
1403
1404	if (rt == NULL)
1405		return (EINVAL);
1406
1407	/*
1408	 * If the gateway for an existing host route matches the target L3
1409	 * address, which is a special route inserted by some implementation
1410	 * such as MANET, and the interface is of the correct type, then
1411	 * allow for ARP to proceed.
1412	 */
1413	if (rt->rt_flags & RTF_GATEWAY) {
1414		if (!(rt->rt_flags & RTF_HOST) || !rt->rt_ifp ||
1415			rt->rt_ifp->if_type != IFT_ETHER ||
1416			  (rt->rt_ifp->if_flags &
1417			   (IFF_NOARP | IFF_STATICARP)) != 0 ||
1418			  memcmp(rt->rt_gateway->sa_data, l3addr->sa_data,
1419				 sizeof(in_addr_t)) != 0) {
1420			RTFREE_LOCKED(rt);
1421			return (EINVAL);
1422		}
1423	}
1424
1425	/*
1426	 * Make sure that at least the destination address is covered
1427	 * by the route. This is for handling the case where 2 or more
1428	 * interfaces have the same prefix. An incoming packet arrives
1429	 * on one interface and the corresponding outgoing packet leaves
1430	 * another interface.
1431	 */
1432	if (!(rt->rt_flags & RTF_HOST) && rt->rt_ifp != ifp) {
1433		const char *sa, *mask, *addr, *lim;
1434		int len;
1435
1436		mask = (const char *)rt_mask(rt);
1437		/*
1438		 * Just being extra cautious to avoid some custom
1439		 * code getting into trouble.
1440		 */
1441		if (mask == NULL) {
1442			RTFREE_LOCKED(rt);
1443			return (EINVAL);
1444		}
1445
1446		sa = (const char *)rt_key(rt);
1447		addr = (const char *)l3addr;
1448		len = ((const struct sockaddr_in *)l3addr)->sin_len;
1449		lim = addr + len;
1450
1451		for ( ; addr < lim; sa++, mask++, addr++) {
1452			if ((*sa ^ *addr) & *mask) {
1453#ifdef DIAGNOSTIC
1454				log(LOG_INFO, "IPv4 address: \"%s\" is not on the network\n",
1455				    inet_ntoa(((const struct sockaddr_in *)l3addr)->sin_addr));
1456#endif
1457				RTFREE_LOCKED(rt);
1458				return (EINVAL);
1459			}
1460		}
1461	}
1462
1463	RTFREE_LOCKED(rt);
1464	return (0);
1465}
1466
1467/*
1468 * Return NULL if not found or marked for deletion.
1469 * If found return lle read locked.
1470 */
1471static struct llentry *
1472in_lltable_lookup(struct lltable *llt, u_int flags, const struct sockaddr *l3addr)
1473{
1474	const struct sockaddr_in *sin = (const struct sockaddr_in *)l3addr;
1475	struct ifnet *ifp = llt->llt_ifp;
1476	struct llentry *lle;
1477	struct llentries *lleh;
1478	u_int hashkey;
1479
1480	IF_AFDATA_LOCK_ASSERT(ifp);
1481	KASSERT(l3addr->sa_family == AF_INET,
1482	    ("sin_family %d", l3addr->sa_family));
1483
1484	hashkey = sin->sin_addr.s_addr;
1485	lleh = &llt->lle_head[LLATBL_HASH(hashkey, LLTBL_HASHMASK)];
1486	LIST_FOREACH(lle, lleh, lle_next) {
1487		struct sockaddr_in *sa2 = (struct sockaddr_in *)L3_ADDR(lle);
1488		if (lle->la_flags & LLE_DELETED)
1489			continue;
1490		if (sa2->sin_addr.s_addr == sin->sin_addr.s_addr)
1491			break;
1492	}
1493	if (lle == NULL) {
1494#ifdef DIAGNOSTIC
1495		if (flags & LLE_DELETE)
1496			log(LOG_INFO, "interface address is missing from cache = %p  in delete\n", lle);
1497#endif
1498		if (!(flags & LLE_CREATE))
1499			return (NULL);
1500		/*
1501		 * A route that covers the given address must have
1502		 * been installed 1st because we are doing a resolution,
1503		 * verify this.
1504		 */
1505		if (!(flags & LLE_IFADDR) &&
1506		    in_lltable_rtcheck(ifp, flags, l3addr) != 0)
1507			goto done;
1508
1509		lle = in_lltable_new(l3addr, flags);
1510		if (lle == NULL) {
1511			log(LOG_INFO, "lla_lookup: new lle malloc failed\n");
1512			goto done;
1513		}
1514		lle->la_flags = flags & ~LLE_CREATE;
1515		if ((flags & (LLE_CREATE | LLE_IFADDR)) == (LLE_CREATE | LLE_IFADDR)) {
1516			bcopy(IF_LLADDR(ifp), &lle->ll_addr, ifp->if_addrlen);
1517			lle->la_flags |= (LLE_VALID | LLE_STATIC);
1518		}
1519
1520		lle->lle_tbl  = llt;
1521		lle->lle_head = lleh;
1522		LIST_INSERT_HEAD(lleh, lle, lle_next);
1523	} else if (flags & LLE_DELETE) {
1524		if (!(lle->la_flags & LLE_IFADDR) || (flags & LLE_IFADDR)) {
1525			LLE_WLOCK(lle);
1526			lle->la_flags = LLE_DELETED;
1527			EVENTHANDLER_INVOKE(arp_update_event, lle);
1528			LLE_WUNLOCK(lle);
1529#ifdef DIAGNOSTIC
1530			log(LOG_INFO, "ifaddr cache = %p  is deleted\n", lle);
1531#endif
1532		}
1533		lle = (void *)-1;
1534
1535	}
1536	if (LLE_IS_VALID(lle)) {
1537		if (flags & LLE_EXCLUSIVE)
1538			LLE_WLOCK(lle);
1539		else
1540			LLE_RLOCK(lle);
1541	}
1542done:
1543	return (lle);
1544}
1545
1546static int
1547in_lltable_dump(struct lltable *llt, struct sysctl_req *wr)
1548{
1549#define	SIN(lle)	((struct sockaddr_in *) L3_ADDR(lle))
1550	struct ifnet *ifp = llt->llt_ifp;
1551	struct llentry *lle;
1552	/* XXX stack use */
1553	struct {
1554		struct rt_msghdr	rtm;
1555		struct sockaddr_inarp	sin;
1556		struct sockaddr_dl	sdl;
1557	} arpc;
1558	int error, i;
1559
1560	LLTABLE_LOCK_ASSERT();
1561
1562	error = 0;
1563	for (i = 0; i < LLTBL_HASHTBL_SIZE; i++) {
1564		LIST_FOREACH(lle, &llt->lle_head[i], lle_next) {
1565			struct sockaddr_dl *sdl;
1566
1567			/* skip deleted entries */
1568			if ((lle->la_flags & LLE_DELETED) == LLE_DELETED)
1569				continue;
1570			/* Skip if jailed and not a valid IP of the prison. */
1571			if (prison_if(wr->td->td_ucred, L3_ADDR(lle)) != 0)
1572				continue;
1573			/*
1574			 * produce a msg made of:
1575			 *  struct rt_msghdr;
1576			 *  struct sockaddr_inarp; (IPv4)
1577			 *  struct sockaddr_dl;
1578			 */
1579			bzero(&arpc, sizeof(arpc));
1580			arpc.rtm.rtm_msglen = sizeof(arpc);
1581			arpc.rtm.rtm_version = RTM_VERSION;
1582			arpc.rtm.rtm_type = RTM_GET;
1583			arpc.rtm.rtm_flags = RTF_UP;
1584			arpc.rtm.rtm_addrs = RTA_DST | RTA_GATEWAY;
1585			arpc.sin.sin_family = AF_INET;
1586			arpc.sin.sin_len = sizeof(arpc.sin);
1587			arpc.sin.sin_addr.s_addr = SIN(lle)->sin_addr.s_addr;
1588
1589			/* publish */
1590			if (lle->la_flags & LLE_PUB) {
1591				arpc.rtm.rtm_flags |= RTF_ANNOUNCE;
1592				/* proxy only */
1593				if (lle->la_flags & LLE_PROXY)
1594					arpc.sin.sin_other = SIN_PROXY;
1595			}
1596
1597			sdl = &arpc.sdl;
1598			sdl->sdl_family = AF_LINK;
1599			sdl->sdl_len = sizeof(*sdl);
1600			sdl->sdl_index = ifp->if_index;
1601			sdl->sdl_type = ifp->if_type;
1602			if ((lle->la_flags & LLE_VALID) == LLE_VALID) {
1603				sdl->sdl_alen = ifp->if_addrlen;
1604				bcopy(&lle->ll_addr, LLADDR(sdl), ifp->if_addrlen);
1605			} else {
1606				sdl->sdl_alen = 0;
1607				bzero(LLADDR(sdl), ifp->if_addrlen);
1608			}
1609
1610			arpc.rtm.rtm_rmx.rmx_expire =
1611			    lle->la_flags & LLE_STATIC ? 0 : lle->la_expire;
1612			arpc.rtm.rtm_flags |= (RTF_HOST | RTF_LLDATA);
1613			if (lle->la_flags & LLE_STATIC)
1614				arpc.rtm.rtm_flags |= RTF_STATIC;
1615			arpc.rtm.rtm_index = ifp->if_index;
1616			error = SYSCTL_OUT(wr, &arpc, sizeof(arpc));
1617			if (error)
1618				break;
1619		}
1620	}
1621	return error;
1622#undef SIN
1623}
1624
1625void *
1626in_domifattach(struct ifnet *ifp)
1627{
1628	struct in_ifinfo *ii;
1629	struct lltable *llt;
1630
1631	ii = malloc(sizeof(struct in_ifinfo), M_IFADDR, M_WAITOK|M_ZERO);
1632
1633	llt = lltable_init(ifp, AF_INET);
1634	if (llt != NULL) {
1635		llt->llt_free = in_lltable_free;
1636		llt->llt_prefix_free = in_lltable_prefix_free;
1637		llt->llt_lookup = in_lltable_lookup;
1638		llt->llt_dump = in_lltable_dump;
1639	}
1640	ii->ii_llt = llt;
1641
1642	ii->ii_igmp = igmp_domifattach(ifp);
1643
1644	return ii;
1645}
1646
1647void
1648in_domifdetach(struct ifnet *ifp, void *aux)
1649{
1650	struct in_ifinfo *ii = (struct in_ifinfo *)aux;
1651
1652	igmp_domifdetach(ifp);
1653	lltable_free(ii->ii_llt);
1654	free(ii, M_IFADDR);
1655}
1656