in.c revision 226402
1/*-
2 * Copyright (c) 1982, 1986, 1991, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 * Copyright (C) 2001 WIDE Project.  All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 * 4. Neither the name of the University nor the names of its contributors
15 *    may be used to endorse or promote products derived from this software
16 *    without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 *
30 *	@(#)in.c	8.4 (Berkeley) 1/9/95
31 */
32
33#include <sys/cdefs.h>
34__FBSDID("$FreeBSD: head/sys/netinet/in.c 226402 2011-10-15 18:41:25Z glebius $");
35
36#include "opt_mpath.h"
37
38#include <sys/param.h>
39#include <sys/systm.h>
40#include <sys/sockio.h>
41#include <sys/malloc.h>
42#include <sys/priv.h>
43#include <sys/socket.h>
44#include <sys/jail.h>
45#include <sys/kernel.h>
46#include <sys/proc.h>
47#include <sys/sysctl.h>
48#include <sys/syslog.h>
49
50#include <net/if.h>
51#include <net/if_var.h>
52#include <net/if_arp.h>
53#include <net/if_dl.h>
54#include <net/if_llatbl.h>
55#include <net/if_types.h>
56#include <net/route.h>
57#include <net/vnet.h>
58
59#include <netinet/in.h>
60#include <netinet/in_var.h>
61#include <netinet/in_pcb.h>
62#include <netinet/ip_var.h>
63#include <netinet/igmp_var.h>
64#include <netinet/udp.h>
65#include <netinet/udp_var.h>
66
67static int in_mask2len(struct in_addr *);
68static void in_len2mask(struct in_addr *, int);
69static int in_lifaddr_ioctl(struct socket *, u_long, caddr_t,
70	struct ifnet *, struct thread *);
71
72static int	in_addprefix(struct in_ifaddr *, int);
73static int	in_scrubprefix(struct in_ifaddr *, u_int);
74static void	in_socktrim(struct sockaddr_in *);
75static int	in_ifinit(struct ifnet *,
76	    struct in_ifaddr *, struct sockaddr_in *, int);
77static void	in_purgemaddrs(struct ifnet *);
78
79static VNET_DEFINE(int, sameprefixcarponly);
80#define	V_sameprefixcarponly		VNET(sameprefixcarponly)
81SYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, same_prefix_carp_only, CTLFLAG_RW,
82	&VNET_NAME(sameprefixcarponly), 0,
83	"Refuse to create same prefixes on different interfaces");
84
85VNET_DECLARE(struct inpcbinfo, ripcbinfo);
86#define	V_ripcbinfo			VNET(ripcbinfo)
87
88VNET_DECLARE(struct arpstat, arpstat);  /* ARP statistics, see if_arp.h */
89#define	V_arpstat		VNET(arpstat)
90
91/*
92 * Return 1 if an internet address is for a ``local'' host
93 * (one to which we have a connection).
94 */
95int
96in_localaddr(struct in_addr in)
97{
98	register u_long i = ntohl(in.s_addr);
99	register struct in_ifaddr *ia;
100
101	IN_IFADDR_RLOCK();
102	TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
103		if ((i & ia->ia_subnetmask) == ia->ia_subnet) {
104			IN_IFADDR_RUNLOCK();
105			return (1);
106		}
107	}
108	IN_IFADDR_RUNLOCK();
109	return (0);
110}
111
112/*
113 * Return 1 if an internet address is for the local host and configured
114 * on one of its interfaces.
115 */
116int
117in_localip(struct in_addr in)
118{
119	struct in_ifaddr *ia;
120
121	IN_IFADDR_RLOCK();
122	LIST_FOREACH(ia, INADDR_HASH(in.s_addr), ia_hash) {
123		if (IA_SIN(ia)->sin_addr.s_addr == in.s_addr) {
124			IN_IFADDR_RUNLOCK();
125			return (1);
126		}
127	}
128	IN_IFADDR_RUNLOCK();
129	return (0);
130}
131
132/*
133 * Determine whether an IP address is in a reserved set of addresses
134 * that may not be forwarded, or whether datagrams to that destination
135 * may be forwarded.
136 */
137int
138in_canforward(struct in_addr in)
139{
140	register u_long i = ntohl(in.s_addr);
141	register u_long net;
142
143	if (IN_EXPERIMENTAL(i) || IN_MULTICAST(i) || IN_LINKLOCAL(i))
144		return (0);
145	if (IN_CLASSA(i)) {
146		net = i & IN_CLASSA_NET;
147		if (net == 0 || net == (IN_LOOPBACKNET << IN_CLASSA_NSHIFT))
148			return (0);
149	}
150	return (1);
151}
152
153/*
154 * Trim a mask in a sockaddr
155 */
156static void
157in_socktrim(struct sockaddr_in *ap)
158{
159    register char *cplim = (char *) &ap->sin_addr;
160    register char *cp = (char *) (&ap->sin_addr + 1);
161
162    ap->sin_len = 0;
163    while (--cp >= cplim)
164	if (*cp) {
165	    (ap)->sin_len = cp - (char *) (ap) + 1;
166	    break;
167	}
168}
169
170static int
171in_mask2len(mask)
172	struct in_addr *mask;
173{
174	int x, y;
175	u_char *p;
176
177	p = (u_char *)mask;
178	for (x = 0; x < sizeof(*mask); x++) {
179		if (p[x] != 0xff)
180			break;
181	}
182	y = 0;
183	if (x < sizeof(*mask)) {
184		for (y = 0; y < 8; y++) {
185			if ((p[x] & (0x80 >> y)) == 0)
186				break;
187		}
188	}
189	return (x * 8 + y);
190}
191
192static void
193in_len2mask(struct in_addr *mask, int len)
194{
195	int i;
196	u_char *p;
197
198	p = (u_char *)mask;
199	bzero(mask, sizeof(*mask));
200	for (i = 0; i < len / 8; i++)
201		p[i] = 0xff;
202	if (len % 8)
203		p[i] = (0xff00 >> (len % 8)) & 0xff;
204}
205
206/*
207 * Generic internet control operations (ioctl's).
208 *
209 * ifp is NULL if not an interface-specific ioctl.
210 */
211/* ARGSUSED */
212int
213in_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
214    struct thread *td)
215{
216	register struct ifreq *ifr = (struct ifreq *)data;
217	register struct in_ifaddr *ia, *iap;
218	register struct ifaddr *ifa;
219	struct in_addr allhosts_addr;
220	struct in_addr dst;
221	struct in_ifinfo *ii;
222	struct in_aliasreq *ifra = (struct in_aliasreq *)data;
223	struct sockaddr_in oldaddr;
224	int error, hostIsNew, iaIsNew, maskIsNew;
225	int iaIsFirst;
226
227	ia = NULL;
228	iaIsFirst = 0;
229	iaIsNew = 0;
230	allhosts_addr.s_addr = htonl(INADDR_ALLHOSTS_GROUP);
231
232	/*
233	 * Filter out ioctls we implement directly; forward the rest on to
234	 * in_lifaddr_ioctl() and ifp->if_ioctl().
235	 */
236	switch (cmd) {
237	case SIOCAIFADDR:
238	case SIOCDIFADDR:
239	case SIOCGIFADDR:
240	case SIOCGIFBRDADDR:
241	case SIOCGIFDSTADDR:
242	case SIOCGIFNETMASK:
243	case SIOCSIFADDR:
244	case SIOCSIFBRDADDR:
245	case SIOCSIFDSTADDR:
246	case SIOCSIFNETMASK:
247		break;
248
249	case SIOCALIFADDR:
250		if (td != NULL) {
251			error = priv_check(td, PRIV_NET_ADDIFADDR);
252			if (error)
253				return (error);
254		}
255		if (ifp == NULL)
256			return (EINVAL);
257		return in_lifaddr_ioctl(so, cmd, data, ifp, td);
258
259	case SIOCDLIFADDR:
260		if (td != NULL) {
261			error = priv_check(td, PRIV_NET_DELIFADDR);
262			if (error)
263				return (error);
264		}
265		if (ifp == NULL)
266			return (EINVAL);
267		return in_lifaddr_ioctl(so, cmd, data, ifp, td);
268
269	case SIOCGLIFADDR:
270		if (ifp == NULL)
271			return (EINVAL);
272		return in_lifaddr_ioctl(so, cmd, data, ifp, td);
273
274	default:
275		if (ifp == NULL || ifp->if_ioctl == NULL)
276			return (EOPNOTSUPP);
277		return ((*ifp->if_ioctl)(ifp, cmd, data));
278	}
279
280	if (ifp == NULL)
281		return (EADDRNOTAVAIL);
282
283	/*
284	 * Security checks before we get involved in any work.
285	 */
286	switch (cmd) {
287	case SIOCAIFADDR:
288	case SIOCSIFADDR:
289	case SIOCSIFBRDADDR:
290	case SIOCSIFNETMASK:
291	case SIOCSIFDSTADDR:
292		if (td != NULL) {
293			error = priv_check(td, PRIV_NET_ADDIFADDR);
294			if (error)
295				return (error);
296		}
297		break;
298
299	case SIOCDIFADDR:
300		if (td != NULL) {
301			error = priv_check(td, PRIV_NET_DELIFADDR);
302			if (error)
303				return (error);
304		}
305		break;
306	}
307
308	/*
309	 * Find address for this interface, if it exists.
310	 *
311	 * If an alias address was specified, find that one instead of the
312	 * first one on the interface, if possible.
313	 */
314	dst = ((struct sockaddr_in *)&ifr->ifr_addr)->sin_addr;
315	IN_IFADDR_RLOCK();
316	LIST_FOREACH(iap, INADDR_HASH(dst.s_addr), ia_hash) {
317		if (iap->ia_ifp == ifp &&
318		    iap->ia_addr.sin_addr.s_addr == dst.s_addr) {
319			if (td == NULL || prison_check_ip4(td->td_ucred,
320			    &dst) == 0)
321				ia = iap;
322			break;
323		}
324	}
325	if (ia != NULL)
326		ifa_ref(&ia->ia_ifa);
327	IN_IFADDR_RUNLOCK();
328	if (ia == NULL) {
329		IF_ADDR_LOCK(ifp);
330		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
331			iap = ifatoia(ifa);
332			if (iap->ia_addr.sin_family == AF_INET) {
333				if (td != NULL &&
334				    prison_check_ip4(td->td_ucred,
335				    &iap->ia_addr.sin_addr) != 0)
336					continue;
337				ia = iap;
338				break;
339			}
340		}
341		if (ia != NULL)
342			ifa_ref(&ia->ia_ifa);
343		IF_ADDR_UNLOCK(ifp);
344	}
345	if (ia == NULL)
346		iaIsFirst = 1;
347
348	error = 0;
349	switch (cmd) {
350	case SIOCAIFADDR:
351	case SIOCDIFADDR:
352		if (ifra->ifra_addr.sin_family == AF_INET) {
353			struct in_ifaddr *oia;
354
355			IN_IFADDR_RLOCK();
356			for (oia = ia; ia; ia = TAILQ_NEXT(ia, ia_link)) {
357				if (ia->ia_ifp == ifp  &&
358				    ia->ia_addr.sin_addr.s_addr ==
359				    ifra->ifra_addr.sin_addr.s_addr)
360					break;
361			}
362			if (ia != NULL && ia != oia)
363				ifa_ref(&ia->ia_ifa);
364			if (oia != NULL && ia != oia)
365				ifa_free(&oia->ia_ifa);
366			IN_IFADDR_RUNLOCK();
367			if ((ifp->if_flags & IFF_POINTOPOINT)
368			    && (cmd == SIOCAIFADDR)
369			    && (ifra->ifra_dstaddr.sin_addr.s_addr
370				== INADDR_ANY)) {
371				error = EDESTADDRREQ;
372				goto out;
373			}
374		}
375		if (cmd == SIOCDIFADDR && ia == NULL) {
376			error = EADDRNOTAVAIL;
377			goto out;
378		}
379		/* FALLTHROUGH */
380	case SIOCSIFADDR:
381	case SIOCSIFNETMASK:
382	case SIOCSIFDSTADDR:
383		if (ia == NULL) {
384			ia = (struct in_ifaddr *)
385				malloc(sizeof *ia, M_IFADDR, M_NOWAIT |
386				    M_ZERO);
387			if (ia == NULL) {
388				error = ENOBUFS;
389				goto out;
390			}
391
392			ifa = &ia->ia_ifa;
393			ifa_init(ifa);
394			ifa->ifa_addr = (struct sockaddr *)&ia->ia_addr;
395			ifa->ifa_dstaddr = (struct sockaddr *)&ia->ia_dstaddr;
396			ifa->ifa_netmask = (struct sockaddr *)&ia->ia_sockmask;
397
398			ia->ia_sockmask.sin_len = 8;
399			ia->ia_sockmask.sin_family = AF_INET;
400			if (ifp->if_flags & IFF_BROADCAST) {
401				ia->ia_broadaddr.sin_len = sizeof(ia->ia_addr);
402				ia->ia_broadaddr.sin_family = AF_INET;
403			}
404			ia->ia_ifp = ifp;
405
406			ifa_ref(ifa);			/* if_addrhead */
407			IF_ADDR_LOCK(ifp);
408			TAILQ_INSERT_TAIL(&ifp->if_addrhead, ifa, ifa_link);
409			IF_ADDR_UNLOCK(ifp);
410			ifa_ref(ifa);			/* in_ifaddrhead */
411			IN_IFADDR_WLOCK();
412			TAILQ_INSERT_TAIL(&V_in_ifaddrhead, ia, ia_link);
413			IN_IFADDR_WUNLOCK();
414			iaIsNew = 1;
415		}
416		break;
417
418	case SIOCSIFBRDADDR:
419	case SIOCGIFADDR:
420	case SIOCGIFNETMASK:
421	case SIOCGIFDSTADDR:
422	case SIOCGIFBRDADDR:
423		if (ia == NULL) {
424			error = EADDRNOTAVAIL;
425			goto out;
426		}
427		break;
428	}
429
430	/*
431	 * Most paths in this switch return directly or via out.  Only paths
432	 * that remove the address break in order to hit common removal code.
433	 */
434	switch (cmd) {
435	case SIOCGIFADDR:
436		*((struct sockaddr_in *)&ifr->ifr_addr) = ia->ia_addr;
437		goto out;
438
439	case SIOCGIFBRDADDR:
440		if ((ifp->if_flags & IFF_BROADCAST) == 0) {
441			error = EINVAL;
442			goto out;
443		}
444		*((struct sockaddr_in *)&ifr->ifr_dstaddr) = ia->ia_broadaddr;
445		goto out;
446
447	case SIOCGIFDSTADDR:
448		if ((ifp->if_flags & IFF_POINTOPOINT) == 0) {
449			error = EINVAL;
450			goto out;
451		}
452		*((struct sockaddr_in *)&ifr->ifr_dstaddr) = ia->ia_dstaddr;
453		goto out;
454
455	case SIOCGIFNETMASK:
456		*((struct sockaddr_in *)&ifr->ifr_addr) = ia->ia_sockmask;
457		goto out;
458
459	case SIOCSIFDSTADDR:
460		if ((ifp->if_flags & IFF_POINTOPOINT) == 0) {
461			error = EINVAL;
462			goto out;
463		}
464		oldaddr = ia->ia_dstaddr;
465		ia->ia_dstaddr = *(struct sockaddr_in *)&ifr->ifr_dstaddr;
466		if (ifp->if_ioctl != NULL) {
467			error = (*ifp->if_ioctl)(ifp, SIOCSIFDSTADDR,
468			    (caddr_t)ia);
469			if (error) {
470				ia->ia_dstaddr = oldaddr;
471				goto out;
472			}
473		}
474		if (ia->ia_flags & IFA_ROUTE) {
475			ia->ia_ifa.ifa_dstaddr = (struct sockaddr *)&oldaddr;
476			rtinit(&(ia->ia_ifa), (int)RTM_DELETE, RTF_HOST);
477			ia->ia_ifa.ifa_dstaddr =
478					(struct sockaddr *)&ia->ia_dstaddr;
479			rtinit(&(ia->ia_ifa), (int)RTM_ADD, RTF_HOST|RTF_UP);
480		}
481		goto out;
482
483	case SIOCSIFBRDADDR:
484		if ((ifp->if_flags & IFF_BROADCAST) == 0) {
485			error = EINVAL;
486			goto out;
487		}
488		ia->ia_broadaddr = *(struct sockaddr_in *)&ifr->ifr_broadaddr;
489		goto out;
490
491	case SIOCSIFADDR:
492		error = in_ifinit(ifp, ia,
493		    (struct sockaddr_in *) &ifr->ifr_addr, 1);
494		if (error != 0 && iaIsNew)
495			break;
496		if (error == 0) {
497			ii = ((struct in_ifinfo *)ifp->if_afdata[AF_INET]);
498			if (iaIsFirst &&
499			    (ifp->if_flags & IFF_MULTICAST) != 0) {
500				error = in_joingroup(ifp, &allhosts_addr,
501				    NULL, &ii->ii_allhosts);
502			}
503			EVENTHANDLER_INVOKE(ifaddr_event, ifp);
504		}
505		error = 0;
506		goto out;
507
508	case SIOCSIFNETMASK:
509		ia->ia_sockmask.sin_addr = ifra->ifra_addr.sin_addr;
510		ia->ia_subnetmask = ntohl(ia->ia_sockmask.sin_addr.s_addr);
511		goto out;
512
513	case SIOCAIFADDR:
514		maskIsNew = 0;
515		hostIsNew = 1;
516		error = 0;
517		if (ia->ia_addr.sin_family == AF_INET) {
518			if (ifra->ifra_addr.sin_len == 0) {
519				ifra->ifra_addr = ia->ia_addr;
520				hostIsNew = 0;
521			} else if (ifra->ifra_addr.sin_addr.s_addr ==
522					       ia->ia_addr.sin_addr.s_addr)
523				hostIsNew = 0;
524		}
525		if (ifra->ifra_mask.sin_len) {
526			/*
527			 * QL: XXX
528			 * Need to scrub the prefix here in case
529			 * the issued command is SIOCAIFADDR with
530			 * the same address, but with a different
531			 * prefix length. And if the prefix length
532			 * is the same as before, then the call is
533			 * un-necessarily executed here.
534			 */
535			in_ifscrub(ifp, ia, LLE_STATIC);
536			ia->ia_sockmask = ifra->ifra_mask;
537			ia->ia_sockmask.sin_family = AF_INET;
538			ia->ia_subnetmask =
539			     ntohl(ia->ia_sockmask.sin_addr.s_addr);
540			maskIsNew = 1;
541		}
542		if ((ifp->if_flags & IFF_POINTOPOINT) &&
543		    (ifra->ifra_dstaddr.sin_family == AF_INET)) {
544			in_ifscrub(ifp, ia, LLE_STATIC);
545			ia->ia_dstaddr = ifra->ifra_dstaddr;
546			maskIsNew  = 1; /* We lie; but the effect's the same */
547		}
548		if (ifra->ifra_addr.sin_family == AF_INET &&
549		    (hostIsNew || maskIsNew))
550			error = in_ifinit(ifp, ia, &ifra->ifra_addr, 0);
551		if (error != 0 && iaIsNew)
552			break;
553
554		if ((ifp->if_flags & IFF_BROADCAST) &&
555		    (ifra->ifra_broadaddr.sin_family == AF_INET))
556			ia->ia_broadaddr = ifra->ifra_broadaddr;
557		if (error == 0) {
558			ii = ((struct in_ifinfo *)ifp->if_afdata[AF_INET]);
559			if (iaIsFirst &&
560			    (ifp->if_flags & IFF_MULTICAST) != 0) {
561				error = in_joingroup(ifp, &allhosts_addr,
562				    NULL, &ii->ii_allhosts);
563			}
564			EVENTHANDLER_INVOKE(ifaddr_event, ifp);
565		}
566		goto out;
567
568	case SIOCDIFADDR:
569		/*
570		 * in_ifscrub kills the interface route.
571		 */
572		in_ifscrub(ifp, ia, LLE_STATIC);
573
574		/*
575		 * in_ifadown gets rid of all the rest of
576		 * the routes.  This is not quite the right
577		 * thing to do, but at least if we are running
578		 * a routing process they will come back.
579		 */
580		in_ifadown(&ia->ia_ifa, 1);
581		EVENTHANDLER_INVOKE(ifaddr_event, ifp);
582		error = 0;
583		break;
584
585	default:
586		panic("in_control: unsupported ioctl");
587	}
588
589	IF_ADDR_LOCK(ifp);
590	/* Re-check that ia is still part of the list. */
591	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
592		if (ifa == &ia->ia_ifa)
593			break;
594	}
595	if (ifa == NULL) {
596		/*
597		 * If we lost the race with another thread, there is no need to
598		 * try it again for the next loop as there is no other exit
599		 * path between here and out.
600		 */
601		IF_ADDR_UNLOCK(ifp);
602		error = EADDRNOTAVAIL;
603		goto out;
604	}
605	TAILQ_REMOVE(&ifp->if_addrhead, &ia->ia_ifa, ifa_link);
606	IF_ADDR_UNLOCK(ifp);
607	ifa_free(&ia->ia_ifa);				/* if_addrhead */
608
609	IN_IFADDR_WLOCK();
610	TAILQ_REMOVE(&V_in_ifaddrhead, ia, ia_link);
611	if (ia->ia_addr.sin_family == AF_INET) {
612		struct in_ifaddr *if_ia;
613
614		LIST_REMOVE(ia, ia_hash);
615		IN_IFADDR_WUNLOCK();
616		/*
617		 * If this is the last IPv4 address configured on this
618		 * interface, leave the all-hosts group.
619		 * No state-change report need be transmitted.
620		 */
621		if_ia = NULL;
622		IFP_TO_IA(ifp, if_ia);
623		if (if_ia == NULL) {
624			ii = ((struct in_ifinfo *)ifp->if_afdata[AF_INET]);
625			IN_MULTI_LOCK();
626			if (ii->ii_allhosts) {
627				(void)in_leavegroup_locked(ii->ii_allhosts,
628				    NULL);
629				ii->ii_allhosts = NULL;
630			}
631			IN_MULTI_UNLOCK();
632		} else
633			ifa_free(&if_ia->ia_ifa);
634	} else
635		IN_IFADDR_WUNLOCK();
636	ifa_free(&ia->ia_ifa);				/* in_ifaddrhead */
637out:
638	if (ia != NULL)
639		ifa_free(&ia->ia_ifa);
640	return (error);
641}
642
643/*
644 * SIOC[GAD]LIFADDR.
645 *	SIOCGLIFADDR: get first address. (?!?)
646 *	SIOCGLIFADDR with IFLR_PREFIX:
647 *		get first address that matches the specified prefix.
648 *	SIOCALIFADDR: add the specified address.
649 *	SIOCALIFADDR with IFLR_PREFIX:
650 *		EINVAL since we can't deduce hostid part of the address.
651 *	SIOCDLIFADDR: delete the specified address.
652 *	SIOCDLIFADDR with IFLR_PREFIX:
653 *		delete the first address that matches the specified prefix.
654 * return values:
655 *	EINVAL on invalid parameters
656 *	EADDRNOTAVAIL on prefix match failed/specified address not found
657 *	other values may be returned from in_ioctl()
658 */
659static int
660in_lifaddr_ioctl(struct socket *so, u_long cmd, caddr_t data,
661    struct ifnet *ifp, struct thread *td)
662{
663	struct if_laddrreq *iflr = (struct if_laddrreq *)data;
664	struct ifaddr *ifa;
665
666	/* sanity checks */
667	if (data == NULL || ifp == NULL) {
668		panic("invalid argument to in_lifaddr_ioctl");
669		/*NOTRECHED*/
670	}
671
672	switch (cmd) {
673	case SIOCGLIFADDR:
674		/* address must be specified on GET with IFLR_PREFIX */
675		if ((iflr->flags & IFLR_PREFIX) == 0)
676			break;
677		/*FALLTHROUGH*/
678	case SIOCALIFADDR:
679	case SIOCDLIFADDR:
680		/* address must be specified on ADD and DELETE */
681		if (iflr->addr.ss_family != AF_INET)
682			return (EINVAL);
683		if (iflr->addr.ss_len != sizeof(struct sockaddr_in))
684			return (EINVAL);
685		/* XXX need improvement */
686		if (iflr->dstaddr.ss_family
687		 && iflr->dstaddr.ss_family != AF_INET)
688			return (EINVAL);
689		if (iflr->dstaddr.ss_family
690		 && iflr->dstaddr.ss_len != sizeof(struct sockaddr_in))
691			return (EINVAL);
692		break;
693	default: /*shouldn't happen*/
694		return (EOPNOTSUPP);
695	}
696	if (sizeof(struct in_addr) * 8 < iflr->prefixlen)
697		return (EINVAL);
698
699	switch (cmd) {
700	case SIOCALIFADDR:
701	    {
702		struct in_aliasreq ifra;
703
704		if (iflr->flags & IFLR_PREFIX)
705			return (EINVAL);
706
707		/* copy args to in_aliasreq, perform ioctl(SIOCAIFADDR_IN6). */
708		bzero(&ifra, sizeof(ifra));
709		bcopy(iflr->iflr_name, ifra.ifra_name,
710			sizeof(ifra.ifra_name));
711
712		bcopy(&iflr->addr, &ifra.ifra_addr, iflr->addr.ss_len);
713
714		if (iflr->dstaddr.ss_family) {	/*XXX*/
715			bcopy(&iflr->dstaddr, &ifra.ifra_dstaddr,
716				iflr->dstaddr.ss_len);
717		}
718
719		ifra.ifra_mask.sin_family = AF_INET;
720		ifra.ifra_mask.sin_len = sizeof(struct sockaddr_in);
721		in_len2mask(&ifra.ifra_mask.sin_addr, iflr->prefixlen);
722
723		return (in_control(so, SIOCAIFADDR, (caddr_t)&ifra, ifp, td));
724	    }
725	case SIOCGLIFADDR:
726	case SIOCDLIFADDR:
727	    {
728		struct in_ifaddr *ia;
729		struct in_addr mask, candidate, match;
730		struct sockaddr_in *sin;
731
732		bzero(&mask, sizeof(mask));
733		bzero(&match, sizeof(match));
734		if (iflr->flags & IFLR_PREFIX) {
735			/* lookup a prefix rather than address. */
736			in_len2mask(&mask, iflr->prefixlen);
737
738			sin = (struct sockaddr_in *)&iflr->addr;
739			match.s_addr = sin->sin_addr.s_addr;
740			match.s_addr &= mask.s_addr;
741
742			/* if you set extra bits, that's wrong */
743			if (match.s_addr != sin->sin_addr.s_addr)
744				return (EINVAL);
745
746		} else {
747			/* on getting an address, take the 1st match */
748			/* on deleting an address, do exact match */
749			if (cmd != SIOCGLIFADDR) {
750				in_len2mask(&mask, 32);
751				sin = (struct sockaddr_in *)&iflr->addr;
752				match.s_addr = sin->sin_addr.s_addr;
753			}
754		}
755
756		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)	{
757			if (ifa->ifa_addr->sa_family != AF_INET6)
758				continue;
759			if (match.s_addr == 0)
760				break;
761			candidate.s_addr = ((struct sockaddr_in *)&ifa->ifa_addr)->sin_addr.s_addr;
762			candidate.s_addr &= mask.s_addr;
763			if (candidate.s_addr == match.s_addr)
764				break;
765		}
766		if (ifa == NULL)
767			return (EADDRNOTAVAIL);
768		ia = (struct in_ifaddr *)ifa;
769
770		if (cmd == SIOCGLIFADDR) {
771			/* fill in the if_laddrreq structure */
772			bcopy(&ia->ia_addr, &iflr->addr, ia->ia_addr.sin_len);
773
774			if ((ifp->if_flags & IFF_POINTOPOINT) != 0) {
775				bcopy(&ia->ia_dstaddr, &iflr->dstaddr,
776					ia->ia_dstaddr.sin_len);
777			} else
778				bzero(&iflr->dstaddr, sizeof(iflr->dstaddr));
779
780			iflr->prefixlen =
781				in_mask2len(&ia->ia_sockmask.sin_addr);
782
783			iflr->flags = 0;	/*XXX*/
784
785			return (0);
786		} else {
787			struct in_aliasreq ifra;
788
789			/* fill in_aliasreq and do ioctl(SIOCDIFADDR_IN6) */
790			bzero(&ifra, sizeof(ifra));
791			bcopy(iflr->iflr_name, ifra.ifra_name,
792				sizeof(ifra.ifra_name));
793
794			bcopy(&ia->ia_addr, &ifra.ifra_addr,
795				ia->ia_addr.sin_len);
796			if ((ifp->if_flags & IFF_POINTOPOINT) != 0) {
797				bcopy(&ia->ia_dstaddr, &ifra.ifra_dstaddr,
798					ia->ia_dstaddr.sin_len);
799			}
800			bcopy(&ia->ia_sockmask, &ifra.ifra_dstaddr,
801				ia->ia_sockmask.sin_len);
802
803			return (in_control(so, SIOCDIFADDR, (caddr_t)&ifra,
804			    ifp, td));
805		}
806	    }
807	}
808
809	return (EOPNOTSUPP);	/*just for safety*/
810}
811
812/*
813 * Delete any existing route for an interface.
814 */
815void
816in_ifscrub(struct ifnet *ifp, struct in_ifaddr *ia, u_int flags)
817{
818
819	in_scrubprefix(ia, flags);
820}
821
822/*
823 * Initialize an interface's internet address
824 * and routing table entry.
825 */
826static int
827in_ifinit(struct ifnet *ifp, struct in_ifaddr *ia, struct sockaddr_in *sin,
828    int scrub)
829{
830	register u_long i = ntohl(sin->sin_addr.s_addr);
831	struct sockaddr_in oldaddr;
832	int flags = RTF_UP, error = 0;
833
834	oldaddr = ia->ia_addr;
835	if (oldaddr.sin_family == AF_INET)
836		LIST_REMOVE(ia, ia_hash);
837	ia->ia_addr = *sin;
838	if (ia->ia_addr.sin_family == AF_INET) {
839		IN_IFADDR_WLOCK();
840		LIST_INSERT_HEAD(INADDR_HASH(ia->ia_addr.sin_addr.s_addr),
841		    ia, ia_hash);
842		IN_IFADDR_WUNLOCK();
843	}
844	/*
845	 * Give the interface a chance to initialize
846	 * if this is its first address,
847	 * and to validate the address if necessary.
848	 */
849	if (ifp->if_ioctl != NULL) {
850		error = (*ifp->if_ioctl)(ifp, SIOCSIFADDR, (caddr_t)ia);
851		if (error) {
852			/* LIST_REMOVE(ia, ia_hash) is done in in_control */
853			ia->ia_addr = oldaddr;
854			IN_IFADDR_WLOCK();
855			if (ia->ia_addr.sin_family == AF_INET)
856				LIST_INSERT_HEAD(INADDR_HASH(
857				    ia->ia_addr.sin_addr.s_addr), ia, ia_hash);
858			else
859				/*
860				 * If oldaddr family is not AF_INET (e.g.
861				 * interface has been just created) in_control
862				 * does not call LIST_REMOVE, and we end up
863				 * with bogus ia entries in hash
864				 */
865				LIST_REMOVE(ia, ia_hash);
866			IN_IFADDR_WUNLOCK();
867			return (error);
868		}
869	}
870	if (scrub) {
871		ia->ia_ifa.ifa_addr = (struct sockaddr *)&oldaddr;
872		in_ifscrub(ifp, ia, LLE_STATIC);
873		ia->ia_ifa.ifa_addr = (struct sockaddr *)&ia->ia_addr;
874	}
875	/*
876	 * Be compatible with network classes, if netmask isn't supplied,
877	 * guess it based on classes.
878	 */
879	if (ia->ia_subnetmask == 0) {
880		if (IN_CLASSA(i))
881			ia->ia_subnetmask = IN_CLASSA_NET;
882		else if (IN_CLASSB(i))
883			ia->ia_subnetmask = IN_CLASSB_NET;
884		else
885			ia->ia_subnetmask = IN_CLASSC_NET;
886		ia->ia_sockmask.sin_addr.s_addr = htonl(ia->ia_subnetmask);
887	}
888	ia->ia_subnet = i & ia->ia_subnetmask;
889	in_socktrim(&ia->ia_sockmask);
890	/*
891	 * XXX: carp(4) does not have interface route
892	 */
893	if (ifp->if_type == IFT_CARP)
894		return (0);
895	/*
896	 * Add route for the network.
897	 */
898	ia->ia_ifa.ifa_metric = ifp->if_metric;
899	if (ifp->if_flags & IFF_BROADCAST) {
900		if (ia->ia_subnetmask == IN_RFC3021_MASK)
901			ia->ia_broadaddr.sin_addr.s_addr = INADDR_BROADCAST;
902		else
903			ia->ia_broadaddr.sin_addr.s_addr =
904			    htonl(ia->ia_subnet | ~ia->ia_subnetmask);
905	} else if (ifp->if_flags & IFF_LOOPBACK) {
906		ia->ia_dstaddr = ia->ia_addr;
907		flags |= RTF_HOST;
908	} else if (ifp->if_flags & IFF_POINTOPOINT) {
909		if (ia->ia_dstaddr.sin_family != AF_INET)
910			return (0);
911		flags |= RTF_HOST;
912	}
913	if ((error = in_addprefix(ia, flags)) != 0)
914		return (error);
915
916	if (ia->ia_addr.sin_addr.s_addr == INADDR_ANY)
917		return (0);
918
919	if (ifp->if_flags & IFF_POINTOPOINT) {
920		if (ia->ia_dstaddr.sin_addr.s_addr == ia->ia_addr.sin_addr.s_addr)
921			return (0);
922	}
923
924
925	/*
926	 * add a loopback route to self
927	 */
928	if (V_useloopback && !(ifp->if_flags & IFF_LOOPBACK)) {
929		struct route ia_ro;
930
931		bzero(&ia_ro, sizeof(ia_ro));
932		*((struct sockaddr_in *)(&ia_ro.ro_dst)) = ia->ia_addr;
933		rtalloc_ign_fib(&ia_ro, 0, 0);
934		if ((ia_ro.ro_rt != NULL) && (ia_ro.ro_rt->rt_ifp != NULL) &&
935		    (ia_ro.ro_rt->rt_ifp == V_loif)) {
936			RT_LOCK(ia_ro.ro_rt);
937			RT_ADDREF(ia_ro.ro_rt);
938			RTFREE_LOCKED(ia_ro.ro_rt);
939		} else
940			error = ifa_add_loopback_route((struct ifaddr *)ia,
941				       (struct sockaddr *)&ia->ia_addr);
942		if (error == 0)
943			ia->ia_flags |= IFA_RTSELF;
944		if (ia_ro.ro_rt != NULL)
945			RTFREE(ia_ro.ro_rt);
946	}
947
948	return (error);
949}
950
951#define rtinitflags(x) \
952	((((x)->ia_ifp->if_flags & (IFF_LOOPBACK | IFF_POINTOPOINT)) != 0) \
953	    ? RTF_HOST : 0)
954
955/*
956 * Generate a routing message when inserting or deleting
957 * an interface address alias.
958 */
959static void in_addralias_rtmsg(int cmd, struct in_addr *prefix,
960    struct in_ifaddr *target)
961{
962	struct route pfx_ro;
963	struct sockaddr_in *pfx_addr;
964	struct rtentry msg_rt;
965
966	/* QL: XXX
967	 * This is a bit questionable because there is no
968	 * additional route entry added/deleted for an address
969	 * alias. Therefore this route report is inaccurate.
970	 */
971	bzero(&pfx_ro, sizeof(pfx_ro));
972	pfx_addr = (struct sockaddr_in *)(&pfx_ro.ro_dst);
973	pfx_addr->sin_len = sizeof(*pfx_addr);
974	pfx_addr->sin_family = AF_INET;
975	pfx_addr->sin_addr = *prefix;
976	rtalloc_ign_fib(&pfx_ro, 0, 0);
977	if (pfx_ro.ro_rt != NULL) {
978		msg_rt = *pfx_ro.ro_rt;
979
980		/* QL: XXX
981		 * Point the gateway to the new interface
982		 * address as if a new prefix route entry has
983		 * been added through the new address alias.
984		 * All other parts of the rtentry is accurate,
985		 * e.g., rt_key, rt_mask, rt_ifp etc.
986		 */
987		msg_rt.rt_gateway =
988			(struct sockaddr *)&target->ia_addr;
989		rt_newaddrmsg(cmd,
990			      (struct ifaddr *)target,
991			      0, &msg_rt);
992		RTFREE(pfx_ro.ro_rt);
993	}
994	return;
995}
996
997/*
998 * Check if we have a route for the given prefix already or add one accordingly.
999 */
1000static int
1001in_addprefix(struct in_ifaddr *target, int flags)
1002{
1003	struct in_ifaddr *ia;
1004	struct in_addr prefix, mask, p, m;
1005	int error;
1006
1007	if ((flags & RTF_HOST) != 0) {
1008		prefix = target->ia_dstaddr.sin_addr;
1009		mask.s_addr = 0;
1010	} else {
1011		prefix = target->ia_addr.sin_addr;
1012		mask = target->ia_sockmask.sin_addr;
1013		prefix.s_addr &= mask.s_addr;
1014	}
1015
1016	IN_IFADDR_RLOCK();
1017	TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
1018		if (rtinitflags(ia)) {
1019			p = ia->ia_dstaddr.sin_addr;
1020
1021			if (prefix.s_addr != p.s_addr)
1022				continue;
1023		} else {
1024			p = ia->ia_addr.sin_addr;
1025			m = ia->ia_sockmask.sin_addr;
1026			p.s_addr &= m.s_addr;
1027
1028			if (prefix.s_addr != p.s_addr ||
1029			    mask.s_addr != m.s_addr)
1030				continue;
1031		}
1032
1033		/*
1034		 * If we got a matching prefix route inserted by other
1035		 * interface address, we are done here.
1036		 */
1037		if (ia->ia_flags & IFA_ROUTE) {
1038#ifdef RADIX_MPATH
1039			if (ia->ia_addr.sin_addr.s_addr ==
1040			    target->ia_addr.sin_addr.s_addr) {
1041				IN_IFADDR_RUNLOCK();
1042				return (EEXIST);
1043			} else
1044				break;
1045#endif
1046			if (V_sameprefixcarponly &&
1047			    target->ia_ifp->if_type != IFT_CARP &&
1048			    ia->ia_ifp->if_type != IFT_CARP) {
1049				IN_IFADDR_RUNLOCK();
1050				return (EEXIST);
1051			} else {
1052				in_addralias_rtmsg(RTM_ADD, &prefix, target);
1053				IN_IFADDR_RUNLOCK();
1054				return (0);
1055			}
1056		}
1057	}
1058	IN_IFADDR_RUNLOCK();
1059
1060	/*
1061	 * No-one seem to have this prefix route, so we try to insert it.
1062	 */
1063	error = rtinit(&target->ia_ifa, (int)RTM_ADD, flags);
1064	if (!error)
1065		target->ia_flags |= IFA_ROUTE;
1066	return (error);
1067}
1068
1069extern void arp_ifscrub(struct ifnet *ifp, uint32_t addr);
1070
1071/*
1072 * If there is no other address in the system that can serve a route to the
1073 * same prefix, remove the route.  Hand over the route to the new address
1074 * otherwise.
1075 */
1076static int
1077in_scrubprefix(struct in_ifaddr *target, u_int flags)
1078{
1079	struct in_ifaddr *ia;
1080	struct in_addr prefix, mask, p;
1081	int error = 0;
1082	struct sockaddr_in prefix0, mask0;
1083
1084	/*
1085	 * Remove the loopback route to the interface address.
1086	 * The "useloopback" setting is not consulted because if the
1087	 * user configures an interface address, turns off this
1088	 * setting, and then tries to delete that interface address,
1089	 * checking the current setting of "useloopback" would leave
1090	 * that interface address loopback route untouched, which
1091	 * would be wrong. Therefore the interface address loopback route
1092	 * deletion is unconditional.
1093	 */
1094	if ((target->ia_addr.sin_addr.s_addr != INADDR_ANY) &&
1095	    !(target->ia_ifp->if_flags & IFF_LOOPBACK) &&
1096	    (target->ia_flags & IFA_RTSELF)) {
1097		struct route ia_ro;
1098		int freeit = 0;
1099
1100		bzero(&ia_ro, sizeof(ia_ro));
1101		*((struct sockaddr_in *)(&ia_ro.ro_dst)) = target->ia_addr;
1102		rtalloc_ign_fib(&ia_ro, 0, 0);
1103		if ((ia_ro.ro_rt != NULL) && (ia_ro.ro_rt->rt_ifp != NULL) &&
1104		    (ia_ro.ro_rt->rt_ifp == V_loif)) {
1105			RT_LOCK(ia_ro.ro_rt);
1106			if (ia_ro.ro_rt->rt_refcnt <= 1)
1107				freeit = 1;
1108			else if (flags & LLE_STATIC) {
1109				RT_REMREF(ia_ro.ro_rt);
1110				target->ia_flags &= ~IFA_RTSELF;
1111			}
1112			RTFREE_LOCKED(ia_ro.ro_rt);
1113		}
1114		if (freeit && (flags & LLE_STATIC)) {
1115			error = ifa_del_loopback_route((struct ifaddr *)target,
1116				       (struct sockaddr *)&target->ia_addr);
1117			if (error == 0)
1118				target->ia_flags &= ~IFA_RTSELF;
1119		}
1120		if ((flags & LLE_STATIC) &&
1121			!(target->ia_ifp->if_flags & IFF_NOARP))
1122			/* remove arp cache */
1123			arp_ifscrub(target->ia_ifp, IA_SIN(target)->sin_addr.s_addr);
1124	}
1125
1126	if (rtinitflags(target))
1127		prefix = target->ia_dstaddr.sin_addr;
1128	else {
1129		prefix = target->ia_addr.sin_addr;
1130		mask = target->ia_sockmask.sin_addr;
1131		prefix.s_addr &= mask.s_addr;
1132	}
1133
1134	if ((target->ia_flags & IFA_ROUTE) == 0) {
1135		in_addralias_rtmsg(RTM_DELETE, &prefix, target);
1136		return (0);
1137	}
1138
1139	IN_IFADDR_RLOCK();
1140	TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
1141		if (rtinitflags(ia))
1142			p = ia->ia_dstaddr.sin_addr;
1143		else {
1144			p = ia->ia_addr.sin_addr;
1145			p.s_addr &= ia->ia_sockmask.sin_addr.s_addr;
1146		}
1147
1148		if ((prefix.s_addr != p.s_addr) ||
1149		    !(ia->ia_ifp->if_flags & IFF_UP))
1150			continue;
1151
1152		/*
1153		 * If we got a matching prefix address, move IFA_ROUTE and
1154		 * the route itself to it.  Make sure that routing daemons
1155		 * get a heads-up.
1156		 *
1157		 * XXX: a special case for carp(4) interface - this should
1158		 *      be more generally specified as an interface that
1159		 *      doesn't support such action.
1160		 */
1161		if ((ia->ia_flags & IFA_ROUTE) == 0
1162		    && (ia->ia_ifp->if_type != IFT_CARP)) {
1163			ifa_ref(&ia->ia_ifa);
1164			IN_IFADDR_RUNLOCK();
1165			error = rtinit(&(target->ia_ifa), (int)RTM_DELETE,
1166			    rtinitflags(target));
1167			if (error == 0)
1168				target->ia_flags &= ~IFA_ROUTE;
1169			else
1170				log(LOG_INFO, "in_scrubprefix: err=%d, old prefix delete failed\n",
1171					error);
1172			error = rtinit(&ia->ia_ifa, (int)RTM_ADD,
1173			    rtinitflags(ia) | RTF_UP);
1174			if (error == 0)
1175				ia->ia_flags |= IFA_ROUTE;
1176			else
1177				log(LOG_INFO, "in_scrubprefix: err=%d, new prefix add failed\n",
1178					error);
1179			ifa_free(&ia->ia_ifa);
1180			return (error);
1181		}
1182	}
1183	IN_IFADDR_RUNLOCK();
1184
1185	/*
1186	 * remove all L2 entries on the given prefix
1187	 */
1188	bzero(&prefix0, sizeof(prefix0));
1189	prefix0.sin_len = sizeof(prefix0);
1190	prefix0.sin_family = AF_INET;
1191	prefix0.sin_addr.s_addr = target->ia_subnet;
1192	bzero(&mask0, sizeof(mask0));
1193	mask0.sin_len = sizeof(mask0);
1194	mask0.sin_family = AF_INET;
1195	mask0.sin_addr.s_addr = target->ia_subnetmask;
1196	lltable_prefix_free(AF_INET, (struct sockaddr *)&prefix0,
1197			    (struct sockaddr *)&mask0, flags);
1198
1199	/*
1200	 * As no-one seem to have this prefix, we can remove the route.
1201	 */
1202	error = rtinit(&(target->ia_ifa), (int)RTM_DELETE, rtinitflags(target));
1203	if (error == 0)
1204		target->ia_flags &= ~IFA_ROUTE;
1205	else
1206		log(LOG_INFO, "in_scrubprefix: err=%d, prefix delete failed\n", error);
1207	return (error);
1208}
1209
1210#undef rtinitflags
1211
1212/*
1213 * Return 1 if the address might be a local broadcast address.
1214 */
1215int
1216in_broadcast(struct in_addr in, struct ifnet *ifp)
1217{
1218	register struct ifaddr *ifa;
1219	u_long t;
1220
1221	if (in.s_addr == INADDR_BROADCAST ||
1222	    in.s_addr == INADDR_ANY)
1223		return (1);
1224	if ((ifp->if_flags & IFF_BROADCAST) == 0)
1225		return (0);
1226	t = ntohl(in.s_addr);
1227	/*
1228	 * Look through the list of addresses for a match
1229	 * with a broadcast address.
1230	 */
1231#define ia ((struct in_ifaddr *)ifa)
1232	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
1233		if (ifa->ifa_addr->sa_family == AF_INET &&
1234		    (in.s_addr == ia->ia_broadaddr.sin_addr.s_addr ||
1235		     /*
1236		      * Check for old-style (host 0) broadcast, but
1237		      * taking into account that RFC 3021 obsoletes it.
1238		      */
1239		     (ia->ia_subnetmask != IN_RFC3021_MASK &&
1240		     t == ia->ia_subnet)) &&
1241		     /*
1242		      * Check for an all one subnetmask. These
1243		      * only exist when an interface gets a secondary
1244		      * address.
1245		      */
1246		     ia->ia_subnetmask != (u_long)0xffffffff)
1247			    return (1);
1248	return (0);
1249#undef ia
1250}
1251
1252/*
1253 * On interface removal, clean up IPv4 data structures hung off of the ifnet.
1254 */
1255void
1256in_ifdetach(struct ifnet *ifp)
1257{
1258
1259	in_pcbpurgeif0(&V_ripcbinfo, ifp);
1260	in_pcbpurgeif0(&V_udbinfo, ifp);
1261	in_purgemaddrs(ifp);
1262}
1263
1264/*
1265 * Delete all IPv4 multicast address records, and associated link-layer
1266 * multicast address records, associated with ifp.
1267 * XXX It looks like domifdetach runs AFTER the link layer cleanup.
1268 * XXX This should not race with ifma_protospec being set during
1269 * a new allocation, if it does, we have bigger problems.
1270 */
1271static void
1272in_purgemaddrs(struct ifnet *ifp)
1273{
1274	LIST_HEAD(,in_multi) purgeinms;
1275	struct in_multi		*inm, *tinm;
1276	struct ifmultiaddr	*ifma;
1277
1278	LIST_INIT(&purgeinms);
1279	IN_MULTI_LOCK();
1280
1281	/*
1282	 * Extract list of in_multi associated with the detaching ifp
1283	 * which the PF_INET layer is about to release.
1284	 * We need to do this as IF_ADDR_LOCK() may be re-acquired
1285	 * by code further down.
1286	 */
1287	IF_ADDR_LOCK(ifp);
1288	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1289		if (ifma->ifma_addr->sa_family != AF_INET ||
1290		    ifma->ifma_protospec == NULL)
1291			continue;
1292#if 0
1293		KASSERT(ifma->ifma_protospec != NULL,
1294		    ("%s: ifma_protospec is NULL", __func__));
1295#endif
1296		inm = (struct in_multi *)ifma->ifma_protospec;
1297		LIST_INSERT_HEAD(&purgeinms, inm, inm_link);
1298	}
1299	IF_ADDR_UNLOCK(ifp);
1300
1301	LIST_FOREACH_SAFE(inm, &purgeinms, inm_link, tinm) {
1302		LIST_REMOVE(inm, inm_link);
1303		inm_release_locked(inm);
1304	}
1305	igmp_ifdetach(ifp);
1306
1307	IN_MULTI_UNLOCK();
1308}
1309
1310#include <net/if_dl.h>
1311#include <netinet/if_ether.h>
1312
1313struct in_llentry {
1314	struct llentry		base;
1315	struct sockaddr_in	l3_addr4;
1316};
1317
1318static struct llentry *
1319in_lltable_new(const struct sockaddr *l3addr, u_int flags)
1320{
1321	struct in_llentry *lle;
1322
1323	lle = malloc(sizeof(struct in_llentry), M_LLTABLE, M_DONTWAIT | M_ZERO);
1324	if (lle == NULL)		/* NB: caller generates msg */
1325		return NULL;
1326
1327	callout_init(&lle->base.la_timer, CALLOUT_MPSAFE);
1328	/*
1329	 * For IPv4 this will trigger "arpresolve" to generate
1330	 * an ARP request.
1331	 */
1332	lle->base.la_expire = time_uptime; /* mark expired */
1333	lle->l3_addr4 = *(const struct sockaddr_in *)l3addr;
1334	lle->base.lle_refcnt = 1;
1335	LLE_LOCK_INIT(&lle->base);
1336	return &lle->base;
1337}
1338
1339/*
1340 * Deletes an address from the address table.
1341 * This function is called by the timer functions
1342 * such as arptimer() and nd6_llinfo_timer(), and
1343 * the caller does the locking.
1344 */
1345static void
1346in_lltable_free(struct lltable *llt, struct llentry *lle)
1347{
1348	LLE_WUNLOCK(lle);
1349	LLE_LOCK_DESTROY(lle);
1350	free(lle, M_LLTABLE);
1351}
1352
1353
1354#define IN_ARE_MASKED_ADDR_EQUAL(d, a, m)	(			\
1355	    (((ntohl((d)->sin_addr.s_addr) ^ (a)->sin_addr.s_addr) & (m)->sin_addr.s_addr)) == 0 )
1356
1357static void
1358in_lltable_prefix_free(struct lltable *llt,
1359		       const struct sockaddr *prefix,
1360		       const struct sockaddr *mask,
1361		       u_int flags)
1362{
1363	const struct sockaddr_in *pfx = (const struct sockaddr_in *)prefix;
1364	const struct sockaddr_in *msk = (const struct sockaddr_in *)mask;
1365	struct llentry *lle, *next;
1366	register int i;
1367	size_t pkts_dropped;
1368
1369	for (i=0; i < LLTBL_HASHTBL_SIZE; i++) {
1370		LIST_FOREACH_SAFE(lle, &llt->lle_head[i], lle_next, next) {
1371
1372		        /*
1373			 * (flags & LLE_STATIC) means deleting all entries
1374			 * including static ARP entries
1375			 */
1376			if (IN_ARE_MASKED_ADDR_EQUAL((struct sockaddr_in *)L3_ADDR(lle),
1377						     pfx, msk) &&
1378			    ((flags & LLE_STATIC) || !(lle->la_flags & LLE_STATIC))) {
1379				int canceled;
1380
1381				canceled = callout_drain(&lle->la_timer);
1382				LLE_WLOCK(lle);
1383				if (canceled)
1384					LLE_REMREF(lle);
1385				pkts_dropped = llentry_free(lle);
1386				ARPSTAT_ADD(dropped, pkts_dropped);
1387			}
1388		}
1389	}
1390}
1391
1392
1393static int
1394in_lltable_rtcheck(struct ifnet *ifp, u_int flags, const struct sockaddr *l3addr)
1395{
1396	struct rtentry *rt;
1397
1398	KASSERT(l3addr->sa_family == AF_INET,
1399	    ("sin_family %d", l3addr->sa_family));
1400
1401	/* XXX rtalloc1 should take a const param */
1402	rt = rtalloc1(__DECONST(struct sockaddr *, l3addr), 0, 0);
1403
1404	if (rt == NULL)
1405		return (EINVAL);
1406
1407	/*
1408	 * If the gateway for an existing host route matches the target L3
1409	 * address, which is a special route inserted by some implementation
1410	 * such as MANET, and the interface is of the correct type, then
1411	 * allow for ARP to proceed.
1412	 */
1413	if (rt->rt_flags & RTF_GATEWAY) {
1414		if (!(rt->rt_flags & RTF_HOST) || !rt->rt_ifp ||
1415			rt->rt_ifp->if_type != IFT_ETHER ||
1416			  (rt->rt_ifp->if_flags &
1417			   (IFF_NOARP | IFF_STATICARP)) != 0 ||
1418			  memcmp(rt->rt_gateway->sa_data, l3addr->sa_data,
1419				 sizeof(in_addr_t)) != 0) {
1420			RTFREE_LOCKED(rt);
1421			return (EINVAL);
1422		}
1423	}
1424
1425	/*
1426	 * Make sure that at least the destination address is covered
1427	 * by the route. This is for handling the case where 2 or more
1428	 * interfaces have the same prefix. An incoming packet arrives
1429	 * on one interface and the corresponding outgoing packet leaves
1430	 * another interface.
1431	 */
1432	if (rt->rt_ifp != ifp) {
1433		const char *sa, *mask, *addr, *lim;
1434		int len;
1435
1436		sa = (const char *)rt_key(rt);
1437		mask = (const char *)rt_mask(rt);
1438		addr = (const char *)l3addr;
1439		len = ((const struct sockaddr_in *)l3addr)->sin_len;
1440		lim = addr + len;
1441
1442		for ( ; addr < lim; sa++, mask++, addr++) {
1443			if ((*sa ^ *addr) & *mask) {
1444#ifdef DIAGNOSTIC
1445				log(LOG_INFO, "IPv4 address: \"%s\" is not on the network\n",
1446				    inet_ntoa(((const struct sockaddr_in *)l3addr)->sin_addr));
1447#endif
1448				RTFREE_LOCKED(rt);
1449				return (EINVAL);
1450			}
1451		}
1452	}
1453
1454	RTFREE_LOCKED(rt);
1455	return (0);
1456}
1457
1458/*
1459 * Return NULL if not found or marked for deletion.
1460 * If found return lle read locked.
1461 */
1462static struct llentry *
1463in_lltable_lookup(struct lltable *llt, u_int flags, const struct sockaddr *l3addr)
1464{
1465	const struct sockaddr_in *sin = (const struct sockaddr_in *)l3addr;
1466	struct ifnet *ifp = llt->llt_ifp;
1467	struct llentry *lle;
1468	struct llentries *lleh;
1469	u_int hashkey;
1470
1471	IF_AFDATA_LOCK_ASSERT(ifp);
1472	KASSERT(l3addr->sa_family == AF_INET,
1473	    ("sin_family %d", l3addr->sa_family));
1474
1475	hashkey = sin->sin_addr.s_addr;
1476	lleh = &llt->lle_head[LLATBL_HASH(hashkey, LLTBL_HASHMASK)];
1477	LIST_FOREACH(lle, lleh, lle_next) {
1478		struct sockaddr_in *sa2 = (struct sockaddr_in *)L3_ADDR(lle);
1479		if (lle->la_flags & LLE_DELETED)
1480			continue;
1481		if (sa2->sin_addr.s_addr == sin->sin_addr.s_addr)
1482			break;
1483	}
1484	if (lle == NULL) {
1485#ifdef DIAGNOSTIC
1486		if (flags & LLE_DELETE)
1487			log(LOG_INFO, "interface address is missing from cache = %p  in delete\n", lle);
1488#endif
1489		if (!(flags & LLE_CREATE))
1490			return (NULL);
1491		/*
1492		 * A route that covers the given address must have
1493		 * been installed 1st because we are doing a resolution,
1494		 * verify this.
1495		 */
1496		if (!(flags & LLE_IFADDR) &&
1497		    in_lltable_rtcheck(ifp, flags, l3addr) != 0)
1498			goto done;
1499
1500		lle = in_lltable_new(l3addr, flags);
1501		if (lle == NULL) {
1502			log(LOG_INFO, "lla_lookup: new lle malloc failed\n");
1503			goto done;
1504		}
1505		lle->la_flags = flags & ~LLE_CREATE;
1506		if ((flags & (LLE_CREATE | LLE_IFADDR)) == (LLE_CREATE | LLE_IFADDR)) {
1507			bcopy(IF_LLADDR(ifp), &lle->ll_addr, ifp->if_addrlen);
1508			lle->la_flags |= (LLE_VALID | LLE_STATIC);
1509		}
1510
1511		lle->lle_tbl  = llt;
1512		lle->lle_head = lleh;
1513		LIST_INSERT_HEAD(lleh, lle, lle_next);
1514	} else if (flags & LLE_DELETE) {
1515		if (!(lle->la_flags & LLE_IFADDR) || (flags & LLE_IFADDR)) {
1516			LLE_WLOCK(lle);
1517			lle->la_flags = LLE_DELETED;
1518			EVENTHANDLER_INVOKE(arp_update_event, lle);
1519			LLE_WUNLOCK(lle);
1520#ifdef DIAGNOSTIC
1521			log(LOG_INFO, "ifaddr cache = %p  is deleted\n", lle);
1522#endif
1523		}
1524		lle = (void *)-1;
1525
1526	}
1527	if (LLE_IS_VALID(lle)) {
1528		if (flags & LLE_EXCLUSIVE)
1529			LLE_WLOCK(lle);
1530		else
1531			LLE_RLOCK(lle);
1532	}
1533done:
1534	return (lle);
1535}
1536
1537static int
1538in_lltable_dump(struct lltable *llt, struct sysctl_req *wr)
1539{
1540#define	SIN(lle)	((struct sockaddr_in *) L3_ADDR(lle))
1541	struct ifnet *ifp = llt->llt_ifp;
1542	struct llentry *lle;
1543	/* XXX stack use */
1544	struct {
1545		struct rt_msghdr	rtm;
1546		struct sockaddr_inarp	sin;
1547		struct sockaddr_dl	sdl;
1548	} arpc;
1549	int error, i;
1550
1551	LLTABLE_LOCK_ASSERT();
1552
1553	error = 0;
1554	for (i = 0; i < LLTBL_HASHTBL_SIZE; i++) {
1555		LIST_FOREACH(lle, &llt->lle_head[i], lle_next) {
1556			struct sockaddr_dl *sdl;
1557
1558			/* skip deleted entries */
1559			if ((lle->la_flags & LLE_DELETED) == LLE_DELETED)
1560				continue;
1561			/* Skip if jailed and not a valid IP of the prison. */
1562			if (prison_if(wr->td->td_ucred, L3_ADDR(lle)) != 0)
1563				continue;
1564			/*
1565			 * produce a msg made of:
1566			 *  struct rt_msghdr;
1567			 *  struct sockaddr_inarp; (IPv4)
1568			 *  struct sockaddr_dl;
1569			 */
1570			bzero(&arpc, sizeof(arpc));
1571			arpc.rtm.rtm_msglen = sizeof(arpc);
1572			arpc.rtm.rtm_version = RTM_VERSION;
1573			arpc.rtm.rtm_type = RTM_GET;
1574			arpc.rtm.rtm_flags = RTF_UP;
1575			arpc.rtm.rtm_addrs = RTA_DST | RTA_GATEWAY;
1576			arpc.sin.sin_family = AF_INET;
1577			arpc.sin.sin_len = sizeof(arpc.sin);
1578			arpc.sin.sin_addr.s_addr = SIN(lle)->sin_addr.s_addr;
1579
1580			/* publish */
1581			if (lle->la_flags & LLE_PUB) {
1582				arpc.rtm.rtm_flags |= RTF_ANNOUNCE;
1583				/* proxy only */
1584				if (lle->la_flags & LLE_PROXY)
1585					arpc.sin.sin_other = SIN_PROXY;
1586			}
1587
1588			sdl = &arpc.sdl;
1589			sdl->sdl_family = AF_LINK;
1590			sdl->sdl_len = sizeof(*sdl);
1591			sdl->sdl_index = ifp->if_index;
1592			sdl->sdl_type = ifp->if_type;
1593			if ((lle->la_flags & LLE_VALID) == LLE_VALID) {
1594				sdl->sdl_alen = ifp->if_addrlen;
1595				bcopy(&lle->ll_addr, LLADDR(sdl), ifp->if_addrlen);
1596			} else {
1597				sdl->sdl_alen = 0;
1598				bzero(LLADDR(sdl), ifp->if_addrlen);
1599			}
1600
1601			arpc.rtm.rtm_rmx.rmx_expire =
1602			    lle->la_flags & LLE_STATIC ? 0 : lle->la_expire;
1603			arpc.rtm.rtm_flags |= (RTF_HOST | RTF_LLDATA);
1604			if (lle->la_flags & LLE_STATIC)
1605				arpc.rtm.rtm_flags |= RTF_STATIC;
1606			arpc.rtm.rtm_index = ifp->if_index;
1607			error = SYSCTL_OUT(wr, &arpc, sizeof(arpc));
1608			if (error)
1609				break;
1610		}
1611	}
1612	return error;
1613#undef SIN
1614}
1615
1616void *
1617in_domifattach(struct ifnet *ifp)
1618{
1619	struct in_ifinfo *ii;
1620	struct lltable *llt;
1621
1622	ii = malloc(sizeof(struct in_ifinfo), M_IFADDR, M_WAITOK|M_ZERO);
1623
1624	llt = lltable_init(ifp, AF_INET);
1625	if (llt != NULL) {
1626		llt->llt_free = in_lltable_free;
1627		llt->llt_prefix_free = in_lltable_prefix_free;
1628		llt->llt_lookup = in_lltable_lookup;
1629		llt->llt_dump = in_lltable_dump;
1630	}
1631	ii->ii_llt = llt;
1632
1633	ii->ii_igmp = igmp_domifattach(ifp);
1634
1635	return ii;
1636}
1637
1638void
1639in_domifdetach(struct ifnet *ifp, void *aux)
1640{
1641	struct in_ifinfo *ii = (struct in_ifinfo *)aux;
1642
1643	igmp_domifdetach(ifp);
1644	lltable_free(ii->ii_llt);
1645	free(ii, M_IFADDR);
1646}
1647