1/*	$NetBSD: in.c,v 1.247 2022/11/25 08:39:32 knakahara Exp $	*/
2
3/*
4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the project nor the names of its contributors
16 *    may be used to endorse or promote products derived from this software
17 *    without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32/*-
33 * Copyright (c) 1998 The NetBSD Foundation, Inc.
34 * All rights reserved.
35 *
36 * This code is derived from software contributed to The NetBSD Foundation
37 * by Public Access Networks Corporation ("Panix").  It was developed under
38 * contract to Panix by Eric Haszlakiewicz and Thor Lancelot Simon.
39 *
40 * Redistribution and use in source and binary forms, with or without
41 * modification, are permitted provided that the following conditions
42 * are met:
43 * 1. Redistributions of source code must retain the above copyright
44 *    notice, this list of conditions and the following disclaimer.
45 * 2. Redistributions in binary form must reproduce the above copyright
46 *    notice, this list of conditions and the following disclaimer in the
47 *    documentation and/or other materials provided with the distribution.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
50 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
51 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
52 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
53 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
54 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
55 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
56 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
57 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
58 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
59 * POSSIBILITY OF SUCH DAMAGE.
60 */
61
62/*
63 * Copyright (c) 1982, 1986, 1991, 1993
64 *	The Regents of the University of California.  All rights reserved.
65 *
66 * Redistribution and use in source and binary forms, with or without
67 * modification, are permitted provided that the following conditions
68 * are met:
69 * 1. Redistributions of source code must retain the above copyright
70 *    notice, this list of conditions and the following disclaimer.
71 * 2. Redistributions in binary form must reproduce the above copyright
72 *    notice, this list of conditions and the following disclaimer in the
73 *    documentation and/or other materials provided with the distribution.
74 * 3. Neither the name of the University nor the names of its contributors
75 *    may be used to endorse or promote products derived from this software
76 *    without specific prior written permission.
77 *
78 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
79 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
80 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
81 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
82 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
83 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
84 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
85 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
86 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
87 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
88 * SUCH DAMAGE.
89 *
90 *	@(#)in.c	8.4 (Berkeley) 1/9/95
91 */
92
93#include <sys/cdefs.h>
94__KERNEL_RCSID(0, "$NetBSD: in.c,v 1.247 2022/11/25 08:39:32 knakahara Exp $");
95
96#include "arp.h"
97
98#ifdef _KERNEL_OPT
99#include "opt_inet.h"
100#include "opt_inet_conf.h"
101#include "opt_mrouting.h"
102#include "opt_net_mpsafe.h"
103#endif
104
105#include <sys/param.h>
106#include <sys/ioctl.h>
107#include <sys/errno.h>
108#include <sys/kernel.h>
109#include <sys/malloc.h>
110#include <sys/socket.h>
111#include <sys/socketvar.h>
112#include <sys/sysctl.h>
113#include <sys/systm.h>
114#include <sys/proc.h>
115#include <sys/syslog.h>
116#include <sys/kauth.h>
117#include <sys/kmem.h>
118
119#include <sys/cprng.h>
120
121#include <net/if.h>
122#include <net/route.h>
123#include <net/pfil.h>
124
125#include <net/if_arp.h>
126#include <net/if_ether.h>
127#include <net/if_types.h>
128#include <net/if_llatbl.h>
129#include <net/if_dl.h>
130
131#include <netinet/in_systm.h>
132#include <netinet/in.h>
133#include <netinet/in_var.h>
134#include <netinet/ip.h>
135#include <netinet/ip_var.h>
136#include <netinet/in_ifattach.h>
137#include <netinet/in_pcb.h>
138#include <netinet/in_selsrc.h>
139#include <netinet/if_inarp.h>
140#include <netinet/ip_mroute.h>
141#include <netinet/igmp_var.h>
142
143#ifdef IPSELSRC
144#include <netinet/in_selsrc.h>
145#endif
146
147static u_int	in_mask2len(struct in_addr *);
148static int	in_lifaddr_ioctl(struct socket *, u_long, void *,
149	struct ifnet *);
150
151static void	in_addrhash_insert_locked(struct in_ifaddr *);
152static void	in_addrhash_remove_locked(struct in_ifaddr *);
153
154static int	in_addprefix(struct in_ifaddr *, int);
155static void	in_scrubaddr(struct in_ifaddr *);
156static int	in_scrubprefix(struct in_ifaddr *);
157static void	in_sysctl_init(struct sysctllog **);
158
159#ifndef SUBNETSARELOCAL
160#define	SUBNETSARELOCAL	1
161#endif
162
163#ifndef HOSTZEROBROADCAST
164#define HOSTZEROBROADCAST 0
165#endif
166
167/* Note: 61, 127, 251, 509, 1021, 2039 are good. */
168#ifndef IN_MULTI_HASH_SIZE
169#define IN_MULTI_HASH_SIZE	509
170#endif
171
172static int			subnetsarelocal = SUBNETSARELOCAL;
173static int			hostzeroisbroadcast = HOSTZEROBROADCAST;
174
175/*
176 * This list is used to keep track of in_multi chains which belong to
177 * deleted interface addresses.  We use in_ifaddr so that a chain head
178 * won't be deallocated until all multicast address record are deleted.
179 */
180
181LIST_HEAD(in_multihashhead, in_multi);		/* Type of the hash head */
182
183static struct pool		inmulti_pool;
184static u_int			in_multientries;
185static struct in_multihashhead *in_multihashtbl;
186static u_long			in_multihash;
187static krwlock_t		in_multilock;
188
189#define IN_MULTI_HASH(x, ifp) \
190    (in_multihashtbl[(u_long)((x) ^ (ifp->if_index)) % IN_MULTI_HASH_SIZE])
191
192/* XXX DEPRECATED. Keep them to avoid breaking kvm(3) users. */
193struct in_ifaddrhashhead *	in_ifaddrhashtbl;
194u_long				in_ifaddrhash;
195struct in_ifaddrhead		in_ifaddrhead;
196static kmutex_t			in_ifaddr_lock;
197
198pserialize_t			in_ifaddrhash_psz;
199struct pslist_head *		in_ifaddrhashtbl_pslist;
200u_long				in_ifaddrhash_pslist;
201struct pslist_head		in_ifaddrhead_pslist;
202
203void
204in_init(void)
205{
206	pool_init(&inmulti_pool, sizeof(struct in_multi), 0, 0, 0, "inmltpl",
207	    NULL, IPL_SOFTNET);
208	TAILQ_INIT(&in_ifaddrhead);
209	PSLIST_INIT(&in_ifaddrhead_pslist);
210
211	in_ifaddrhashtbl = hashinit(IN_IFADDR_HASH_SIZE, HASH_LIST, true,
212	    &in_ifaddrhash);
213
214	in_ifaddrhash_psz = pserialize_create();
215	in_ifaddrhashtbl_pslist = hashinit(IN_IFADDR_HASH_SIZE, HASH_PSLIST,
216	    true, &in_ifaddrhash_pslist);
217	mutex_init(&in_ifaddr_lock, MUTEX_DEFAULT, IPL_NONE);
218
219	in_multihashtbl = hashinit(IN_IFADDR_HASH_SIZE, HASH_LIST, true,
220	    &in_multihash);
221	rw_init(&in_multilock);
222
223	in_sysctl_init(NULL);
224}
225
226/*
227 * Return 1 if an internet address is for a ``local'' host
228 * (one to which we have a connection).  If subnetsarelocal
229 * is true, this includes other subnets of the local net.
230 * Otherwise, it includes only the directly-connected (sub)nets.
231 */
232int
233in_localaddr(struct in_addr in)
234{
235	struct in_ifaddr *ia;
236	int localaddr = 0;
237	int s = pserialize_read_enter();
238
239	if (subnetsarelocal) {
240		IN_ADDRLIST_READER_FOREACH(ia) {
241			if ((in.s_addr & ia->ia_netmask) == ia->ia_net) {
242				localaddr = 1;
243				break;
244			}
245		}
246	} else {
247		IN_ADDRLIST_READER_FOREACH(ia) {
248			if ((in.s_addr & ia->ia_subnetmask) == ia->ia_subnet) {
249				localaddr = 1;
250				break;
251			}
252		}
253	}
254	pserialize_read_exit(s);
255
256	return localaddr;
257}
258
259/*
260 * like in_localaddr() but can specify ifp.
261 */
262int
263in_direct(struct in_addr in, struct ifnet *ifp)
264{
265	struct ifaddr *ifa;
266	int localaddr = 0;
267	int s;
268
269	KASSERT(ifp != NULL);
270
271#define ia (ifatoia(ifa))
272	s = pserialize_read_enter();
273	if (subnetsarelocal) {
274		IFADDR_READER_FOREACH(ifa, ifp) {
275			if (ifa->ifa_addr->sa_family == AF_INET &&
276			    ((in.s_addr & ia->ia_netmask) == ia->ia_net)) {
277				localaddr = 1;
278				break;
279			}
280		}
281	} else {
282		IFADDR_READER_FOREACH(ifa, ifp) {
283			if (ifa->ifa_addr->sa_family == AF_INET &&
284			    (in.s_addr & ia->ia_subnetmask) == ia->ia_subnet) {
285				localaddr = 1;
286				break;
287			}
288		}
289	}
290	pserialize_read_exit(s);
291
292	return localaddr;
293#undef ia
294}
295
296/*
297 * Determine whether an IP address is in a reserved set of addresses
298 * that may not be forwarded, or whether datagrams to that destination
299 * may be forwarded.
300 */
301int
302in_canforward(struct in_addr in)
303{
304	u_int32_t net;
305
306	if (IN_EXPERIMENTAL(in.s_addr) || IN_MULTICAST(in.s_addr))
307		return (0);
308	if (IN_CLASSA(in.s_addr)) {
309		net = in.s_addr & IN_CLASSA_NET;
310		if (net == 0 || net == htonl(IN_LOOPBACKNET << IN_CLASSA_NSHIFT))
311			return (0);
312	}
313	return (1);
314}
315
316/*
317 * Trim a mask in a sockaddr
318 */
319void
320in_socktrim(struct sockaddr_in *ap)
321{
322	char *cplim = (char *) &ap->sin_addr;
323	char *cp = (char *) (&ap->sin_addr + 1);
324
325	ap->sin_len = 0;
326	while (--cp >= cplim)
327		if (*cp) {
328			(ap)->sin_len = cp - (char *) (ap) + 1;
329			break;
330		}
331}
332
333/*
334 * Maintain the "in_maxmtu" variable, which is the largest
335 * mtu for non-local interfaces with AF_INET addresses assigned
336 * to them that are up.
337 */
338unsigned long in_maxmtu;
339
340void
341in_setmaxmtu(void)
342{
343	struct in_ifaddr *ia;
344	struct ifnet *ifp;
345	unsigned long maxmtu = 0;
346	int s = pserialize_read_enter();
347
348	IN_ADDRLIST_READER_FOREACH(ia) {
349		if ((ifp = ia->ia_ifp) == 0)
350			continue;
351		if ((ifp->if_flags & (IFF_UP|IFF_LOOPBACK)) != IFF_UP)
352			continue;
353		if (ifp->if_mtu > maxmtu)
354			maxmtu = ifp->if_mtu;
355	}
356	if (maxmtu)
357		in_maxmtu = maxmtu;
358	pserialize_read_exit(s);
359}
360
361static u_int
362in_mask2len(struct in_addr *mask)
363{
364	u_int x, y;
365	u_char *p;
366
367	p = (u_char *)mask;
368	for (x = 0; x < sizeof(*mask); x++) {
369		if (p[x] != 0xff)
370			break;
371	}
372	y = 0;
373	if (x < sizeof(*mask)) {
374		for (y = 0; y < NBBY; y++) {
375			if ((p[x] & (0x80 >> y)) == 0)
376				break;
377		}
378	}
379	return x * NBBY + y;
380}
381
382void
383in_len2mask(struct in_addr *mask, u_int len)
384{
385	u_int i;
386	u_char *p;
387
388	p = (u_char *)mask;
389	memset(mask, 0, sizeof(*mask));
390	for (i = 0; i < len / NBBY; i++)
391		p[i] = 0xff;
392	if (len % NBBY)
393		p[i] = (0xff00 >> (len % NBBY)) & 0xff;
394}
395
396/*
397 * Generic internet control operations (ioctl's).
398 * Ifp is 0 if not an interface-specific ioctl.
399 */
400/* ARGSUSED */
401static int
402in_control0(struct socket *so, u_long cmd, void *data, struct ifnet *ifp)
403{
404	struct ifreq *ifr = (struct ifreq *)data;
405	struct in_ifaddr *ia = NULL;
406	struct in_aliasreq *ifra = (struct in_aliasreq *)data;
407	struct sockaddr_in oldaddr, *new_dstaddr;
408	int error, hostIsNew, maskIsNew;
409	int newifaddr = 0;
410	bool run_hook = false;
411	bool need_reinsert = false;
412	struct psref psref;
413	int bound;
414
415	switch (cmd) {
416	case SIOCALIFADDR:
417	case SIOCDLIFADDR:
418	case SIOCGLIFADDR:
419		if (ifp == NULL)
420			return EINVAL;
421		return in_lifaddr_ioctl(so, cmd, data, ifp);
422	case SIOCGIFADDRPREF:
423	case SIOCSIFADDRPREF:
424		if (ifp == NULL)
425			return EINVAL;
426		return ifaddrpref_ioctl(so, cmd, data, ifp);
427#if NARP > 0
428	case SIOCGNBRINFO:
429	{
430		struct in_nbrinfo *nbi = (struct in_nbrinfo *)data;
431		struct llentry *ln;
432		struct in_addr nb_addr = nbi->addr; /* make local for safety */
433
434		ln = arplookup(ifp, &nb_addr, NULL, 0);
435		if (ln == NULL)
436			return EINVAL;
437		nbi->state = ln->ln_state;
438		nbi->asked = ln->ln_asked;
439		nbi->expire = ln->ln_expire ?
440		    time_mono_to_wall(ln->ln_expire) : 0;
441		LLE_RUNLOCK(ln);
442		return 0;
443	}
444#endif
445	}
446
447	bound = curlwp_bind();
448	/*
449	 * Find address for this interface, if it exists.
450	 */
451	if (ifp != NULL)
452		ia = in_get_ia_from_ifp_psref(ifp, &psref);
453
454	hostIsNew = 1;		/* moved here to appease gcc */
455	switch (cmd) {
456	case SIOCAIFADDR:
457	case SIOCDIFADDR:
458	case SIOCGIFALIAS:
459	case SIOCGIFAFLAG_IN:
460		if (ifra->ifra_addr.sin_family == AF_INET) {
461			int s;
462
463			if (ia != NULL)
464				ia4_release(ia, &psref);
465			s = pserialize_read_enter();
466			IN_ADDRHASH_READER_FOREACH(ia,
467			    ifra->ifra_addr.sin_addr.s_addr) {
468				if (ia->ia_ifp == ifp &&
469				    in_hosteq(ia->ia_addr.sin_addr,
470				    ifra->ifra_addr.sin_addr))
471					break;
472			}
473			if (ia != NULL)
474				ia4_acquire(ia, &psref);
475			pserialize_read_exit(s);
476		}
477		if ((cmd == SIOCDIFADDR ||
478		    cmd == SIOCGIFALIAS ||
479		    cmd == SIOCGIFAFLAG_IN) &&
480		    ia == NULL) {
481			error = EADDRNOTAVAIL;
482			goto out;
483		}
484
485		if (cmd == SIOCDIFADDR &&
486		    ifra->ifra_addr.sin_family == AF_UNSPEC) {
487			ifra->ifra_addr.sin_family = AF_INET;
488		}
489		/* FALLTHROUGH */
490	case SIOCSIFADDR:
491		if (ia == NULL || ia->ia_addr.sin_family != AF_INET)
492			;
493		else if (ifra->ifra_addr.sin_len == 0) {
494			ifra->ifra_addr = ia->ia_addr;
495			hostIsNew = 0;
496		} else if (in_hosteq(ia->ia_addr.sin_addr,
497		           ifra->ifra_addr.sin_addr))
498			hostIsNew = 0;
499		if (ifra->ifra_addr.sin_family != AF_INET) {
500			error = EAFNOSUPPORT;
501			goto out;
502		}
503		/* FALLTHROUGH */
504	case SIOCSIFDSTADDR:
505		if (cmd == SIOCSIFDSTADDR &&
506		    ifreq_getaddr(cmd, ifr)->sa_family != AF_INET) {
507			error = EAFNOSUPPORT;
508			goto out;
509		}
510		/* FALLTHROUGH */
511	case SIOCSIFNETMASK:
512		if (ifp == NULL)
513			panic("in_control");
514
515		if (cmd == SIOCGIFALIAS || cmd == SIOCGIFAFLAG_IN)
516			break;
517
518		if (ia == NULL &&
519		    (cmd == SIOCSIFNETMASK || cmd == SIOCSIFDSTADDR)) {
520			error = EADDRNOTAVAIL;
521			goto out;
522		}
523
524		if (kauth_authorize_network(kauth_cred_get(),
525		    KAUTH_NETWORK_INTERFACE,
526		    KAUTH_REQ_NETWORK_INTERFACE_SETPRIV, ifp, (void *)cmd,
527		    NULL) != 0) {
528			error = EPERM;
529			goto out;
530		}
531
532		if (ia == NULL) {
533			ia = malloc(sizeof(*ia), M_IFADDR, M_WAITOK|M_ZERO);
534			if (ia == NULL) {
535				error = ENOBUFS;
536				goto out;
537			}
538			ia->ia_ifa.ifa_addr = sintosa(&ia->ia_addr);
539			ia->ia_ifa.ifa_dstaddr = sintosa(&ia->ia_dstaddr);
540			ia->ia_ifa.ifa_netmask = sintosa(&ia->ia_sockmask);
541#ifdef IPSELSRC
542			ia->ia_ifa.ifa_getifa = in_getifa;
543#else /* IPSELSRC */
544			ia->ia_ifa.ifa_getifa = NULL;
545#endif /* IPSELSRC */
546			ia->ia_sockmask.sin_len = 8;
547			ia->ia_sockmask.sin_family = AF_INET;
548			if (ifp->if_flags & IFF_BROADCAST) {
549				ia->ia_broadaddr.sin_len = sizeof(ia->ia_addr);
550				ia->ia_broadaddr.sin_family = AF_INET;
551			}
552			ia->ia_ifp = ifp;
553			ia->ia_idsalt = cprng_fast32() % 65535;
554			LIST_INIT(&ia->ia_multiaddrs);
555			IN_ADDRHASH_ENTRY_INIT(ia);
556			IN_ADDRLIST_ENTRY_INIT(ia);
557			ifa_psref_init(&ia->ia_ifa);
558			/*
559			 * We need a reference to make ia survive over in_ifinit
560			 * that does ifaref and ifafree.
561			 */
562			ifaref(&ia->ia_ifa);
563
564			newifaddr = 1;
565		}
566		break;
567
568	case SIOCSIFBRDADDR:
569		if (kauth_authorize_network(kauth_cred_get(),
570		    KAUTH_NETWORK_INTERFACE,
571		    KAUTH_REQ_NETWORK_INTERFACE_SETPRIV, ifp, (void *)cmd,
572		    NULL) != 0) {
573			error = EPERM;
574			goto out;
575		}
576		/* FALLTHROUGH */
577
578	case SIOCGIFADDR:
579	case SIOCGIFNETMASK:
580	case SIOCGIFDSTADDR:
581	case SIOCGIFBRDADDR:
582		if (ia == NULL) {
583			error = EADDRNOTAVAIL;
584			goto out;
585		}
586		break;
587	}
588	error = 0;
589	switch (cmd) {
590
591	case SIOCGIFADDR:
592		ifreq_setaddr(cmd, ifr, sintocsa(&ia->ia_addr));
593		break;
594
595	case SIOCGIFBRDADDR:
596		if ((ifp->if_flags & IFF_BROADCAST) == 0) {
597			error = EINVAL;
598			goto out;
599		}
600		ifreq_setdstaddr(cmd, ifr, sintocsa(&ia->ia_broadaddr));
601		break;
602
603	case SIOCGIFDSTADDR:
604		if ((ifp->if_flags & IFF_POINTOPOINT) == 0) {
605			error = EINVAL;
606			goto out;
607		}
608		ifreq_setdstaddr(cmd, ifr, sintocsa(&ia->ia_dstaddr));
609		break;
610
611	case SIOCGIFNETMASK:
612		/*
613		 * We keep the number of trailing zero bytes the sin_len field
614		 * of ia_sockmask, so we fix this before we pass it back to
615		 * userland.
616		 */
617		oldaddr = ia->ia_sockmask;
618		oldaddr.sin_len = sizeof(struct sockaddr_in);
619		ifreq_setaddr(cmd, ifr, (const void *)&oldaddr);
620		break;
621
622	case SIOCSIFDSTADDR:
623		if ((ifp->if_flags & IFF_POINTOPOINT) == 0) {
624			error = EINVAL;
625			goto out;
626		}
627		oldaddr = ia->ia_dstaddr;
628		ia->ia_dstaddr = *satocsin(ifreq_getdstaddr(cmd, ifr));
629		if ((error = if_addr_init(ifp, &ia->ia_ifa, false)) != 0) {
630			ia->ia_dstaddr = oldaddr;
631			goto out;
632		}
633		if (ia->ia_flags & IFA_ROUTE) {
634			ia->ia_ifa.ifa_dstaddr = sintosa(&oldaddr);
635			rtinit(&ia->ia_ifa, RTM_DELETE, RTF_HOST);
636			ia->ia_ifa.ifa_dstaddr = sintosa(&ia->ia_dstaddr);
637			rtinit(&ia->ia_ifa, RTM_ADD, RTF_HOST|RTF_UP);
638		}
639		break;
640
641	case SIOCSIFBRDADDR:
642		if ((ifp->if_flags & IFF_BROADCAST) == 0) {
643			error = EINVAL;
644			goto out;
645		}
646		ia->ia_broadaddr = *satocsin(ifreq_getbroadaddr(cmd, ifr));
647		break;
648
649	case SIOCSIFADDR:
650		if (!newifaddr) {
651			in_addrhash_remove(ia);
652			need_reinsert = true;
653		}
654		error = in_ifinit(ifp, ia, satocsin(ifreq_getaddr(cmd, ifr)),
655		    NULL, 1);
656
657		run_hook = true;
658		break;
659
660	case SIOCSIFNETMASK:
661		in_scrubprefix(ia);
662		ia->ia_sockmask = *satocsin(ifreq_getaddr(cmd, ifr));
663		ia->ia_subnetmask = ia->ia_sockmask.sin_addr.s_addr;
664		if (!newifaddr) {
665			in_addrhash_remove(ia);
666			need_reinsert = true;
667		}
668		error = in_ifinit(ifp, ia, NULL, NULL, 0);
669		break;
670
671	case SIOCAIFADDR:
672		maskIsNew = 0;
673		if (ifra->ifra_mask.sin_len) {
674			in_scrubprefix(ia);
675			ia->ia_sockmask = ifra->ifra_mask;
676			ia->ia_subnetmask = ia->ia_sockmask.sin_addr.s_addr;
677			maskIsNew = 1;
678		}
679		if ((ifp->if_flags & IFF_POINTOPOINT) &&
680		    (ifra->ifra_dstaddr.sin_family == AF_INET)) {
681			new_dstaddr = &ifra->ifra_dstaddr;
682			maskIsNew  = 1; /* We lie; but the effect's the same */
683		} else
684			new_dstaddr = NULL;
685		if (ifra->ifra_addr.sin_family == AF_INET &&
686		    (hostIsNew || maskIsNew)) {
687			if (!newifaddr) {
688				in_addrhash_remove(ia);
689				need_reinsert = true;
690			}
691			error = in_ifinit(ifp, ia, &ifra->ifra_addr,
692			    new_dstaddr, 0);
693		}
694		if ((ifp->if_flags & IFF_BROADCAST) &&
695		    (ifra->ifra_broadaddr.sin_family == AF_INET))
696			ia->ia_broadaddr = ifra->ifra_broadaddr;
697		run_hook = true;
698		break;
699
700	case SIOCGIFALIAS:
701		ifra->ifra_mask = ia->ia_sockmask;
702		if ((ifp->if_flags & IFF_POINTOPOINT) &&
703		    (ia->ia_dstaddr.sin_family == AF_INET))
704			ifra->ifra_dstaddr = ia->ia_dstaddr;
705		else if ((ifp->if_flags & IFF_BROADCAST) &&
706		    (ia->ia_broadaddr.sin_family == AF_INET))
707			ifra->ifra_broadaddr = ia->ia_broadaddr;
708		else
709			memset(&ifra->ifra_broadaddr, 0,
710			      sizeof(ifra->ifra_broadaddr));
711		break;
712
713	case SIOCGIFAFLAG_IN:
714		ifr->ifr_addrflags = ia->ia4_flags;
715		break;
716
717	case SIOCDIFADDR:
718		ia4_release(ia, &psref);
719		ifaref(&ia->ia_ifa);
720		in_purgeaddr(&ia->ia_ifa);
721		pfil_run_addrhooks(if_pfil, cmd, &ia->ia_ifa);
722		ifafree(&ia->ia_ifa);
723		ia = NULL;
724		break;
725
726#ifdef MROUTING
727	case SIOCGETVIFCNT:
728	case SIOCGETSGCNT:
729		error = mrt_ioctl(so, cmd, data);
730		break;
731#endif /* MROUTING */
732
733	default:
734		error = ENOTTY;
735		goto out;
736	}
737
738	/*
739	 * XXX insert regardless of error to make in_purgeaddr below work.
740	 * Need to improve.
741	 */
742	if (newifaddr) {
743		ifaref(&ia->ia_ifa);
744		ifa_insert(ifp, &ia->ia_ifa);
745
746		mutex_enter(&in_ifaddr_lock);
747		TAILQ_INSERT_TAIL(&in_ifaddrhead, ia, ia_list);
748		IN_ADDRLIST_WRITER_INSERT_TAIL(ia);
749		in_addrhash_insert_locked(ia);
750		/* Release a reference that is held just after creation. */
751		ifafree(&ia->ia_ifa);
752		mutex_exit(&in_ifaddr_lock);
753	} else if (need_reinsert) {
754		in_addrhash_insert(ia);
755	}
756
757	if (error == 0) {
758		if (run_hook)
759			pfil_run_addrhooks(if_pfil, cmd, &ia->ia_ifa);
760	} else if (newifaddr) {
761		KASSERT(ia != NULL);
762		in_purgeaddr(&ia->ia_ifa);
763		ia = NULL;
764	}
765
766out:
767	if (!newifaddr && ia != NULL)
768		ia4_release(ia, &psref);
769	curlwp_bindx(bound);
770	return error;
771}
772
773int
774in_control(struct socket *so, u_long cmd, void *data, struct ifnet *ifp)
775{
776	int error;
777
778#ifndef NET_MPSAFE
779	KASSERT(KERNEL_LOCKED_P());
780#endif
781	error = in_control0(so, cmd, data, ifp);
782
783	return error;
784}
785
786/* Add ownaddr as loopback rtentry. */
787static void
788in_ifaddlocal(struct ifaddr *ifa)
789{
790	struct in_ifaddr *ia;
791
792	ia = (struct in_ifaddr *)ifa;
793	if ((ia->ia_ifp->if_flags & IFF_UNNUMBERED)) {
794		rt_addrmsg(RTM_NEWADDR, ifa);
795		return;
796	}
797	if (ia->ia_addr.sin_addr.s_addr == INADDR_ANY ||
798	    (ia->ia_ifp->if_flags & IFF_POINTOPOINT &&
799	    in_hosteq(ia->ia_dstaddr.sin_addr, ia->ia_addr.sin_addr)))
800	{
801		rt_addrmsg(RTM_NEWADDR, ifa);
802		return;
803	}
804
805	rt_ifa_addlocal(ifa);
806}
807
808/* Remove loopback entry of ownaddr */
809static void
810in_ifremlocal(struct ifaddr *ifa)
811{
812	struct in_ifaddr *ia, *p;
813	struct ifaddr *alt_ifa = NULL;
814	int ia_count = 0;
815	int s;
816	struct psref psref;
817	int bound = curlwp_bind();
818
819	ia = (struct in_ifaddr *)ifa;
820	if ((ia->ia_ifp->if_flags & IFF_UNNUMBERED)) {
821		rt_addrmsg(RTM_DELADDR, ifa);
822		goto out;
823	}
824	/* Delete the entry if exactly one ifaddr matches the
825	 * address, ifa->ifa_addr. */
826	s = pserialize_read_enter();
827	IN_ADDRLIST_READER_FOREACH(p) {
828		if ((p->ia_ifp->if_flags & IFF_UNNUMBERED))
829			continue;
830
831		if (!in_hosteq(p->ia_addr.sin_addr, ia->ia_addr.sin_addr))
832			continue;
833		if (p->ia_ifp != ia->ia_ifp)
834			alt_ifa = &p->ia_ifa;
835		if (++ia_count > 1 && alt_ifa != NULL)
836			break;
837	}
838	if (alt_ifa != NULL && ia_count > 1)
839		ifa_acquire(alt_ifa, &psref);
840	pserialize_read_exit(s);
841
842	if (ia_count == 0)
843		goto out;
844
845	rt_ifa_remlocal(ifa, ia_count == 1 ? NULL : alt_ifa);
846	if (alt_ifa != NULL && ia_count > 1)
847		ifa_release(alt_ifa, &psref);
848out:
849	curlwp_bindx(bound);
850}
851
852static void
853in_scrubaddr(struct in_ifaddr *ia)
854{
855
856	/* stop DAD processing */
857	if (ia->ia_dad_stop != NULL)
858		ia->ia_dad_stop(&ia->ia_ifa);
859
860	in_scrubprefix(ia);
861	in_ifremlocal(&ia->ia_ifa);
862
863	mutex_enter(&in_ifaddr_lock);
864	if (ia->ia_allhosts != NULL) {
865		in_delmulti(ia->ia_allhosts);
866		ia->ia_allhosts = NULL;
867	}
868	mutex_exit(&in_ifaddr_lock);
869}
870
871/*
872 * Depends on it isn't called in concurrent. It should be guaranteed
873 * by ifa->ifa_ifp's ioctl lock. The possible callers are in_control
874 * and if_purgeaddrs; the former is called iva ifa->ifa_ifp's ioctl
875 * and the latter is called via ifa->ifa_ifp's if_detach. The functions
876 * never be executed in concurrent.
877 */
878void
879in_purgeaddr(struct ifaddr *ifa)
880{
881	struct in_ifaddr *ia = (void *) ifa;
882	struct ifnet *ifp = ifa->ifa_ifp;
883
884	/* KASSERT(!ifa_held(ifa)); XXX need ifa_not_held (psref_not_held) */
885
886	ifa->ifa_flags |= IFA_DESTROYING;
887	in_scrubaddr(ia);
888
889	mutex_enter(&in_ifaddr_lock);
890	in_addrhash_remove_locked(ia);
891	TAILQ_REMOVE(&in_ifaddrhead, ia, ia_list);
892	IN_ADDRLIST_WRITER_REMOVE(ia);
893	ifa_remove(ifp, &ia->ia_ifa);
894	/* Assume ifa_remove called pserialize_perform and psref_destroy */
895	mutex_exit(&in_ifaddr_lock);
896	IN_ADDRHASH_ENTRY_DESTROY(ia);
897	IN_ADDRLIST_ENTRY_DESTROY(ia);
898	ifafree(&ia->ia_ifa);
899	in_setmaxmtu();
900}
901
902static void
903in_addrhash_insert_locked(struct in_ifaddr *ia)
904{
905
906	KASSERT(mutex_owned(&in_ifaddr_lock));
907
908	LIST_INSERT_HEAD(&IN_IFADDR_HASH(ia->ia_addr.sin_addr.s_addr), ia,
909	    ia_hash);
910	IN_ADDRHASH_ENTRY_INIT(ia);
911	IN_ADDRHASH_WRITER_INSERT_HEAD(ia);
912}
913
914void
915in_addrhash_insert(struct in_ifaddr *ia)
916{
917
918	mutex_enter(&in_ifaddr_lock);
919	in_addrhash_insert_locked(ia);
920	mutex_exit(&in_ifaddr_lock);
921}
922
923static void
924in_addrhash_remove_locked(struct in_ifaddr *ia)
925{
926
927	KASSERT(mutex_owned(&in_ifaddr_lock));
928
929	LIST_REMOVE(ia, ia_hash);
930	IN_ADDRHASH_WRITER_REMOVE(ia);
931}
932
933void
934in_addrhash_remove(struct in_ifaddr *ia)
935{
936
937	mutex_enter(&in_ifaddr_lock);
938	in_addrhash_remove_locked(ia);
939#ifdef NET_MPSAFE
940	pserialize_perform(in_ifaddrhash_psz);
941#endif
942	mutex_exit(&in_ifaddr_lock);
943	IN_ADDRHASH_ENTRY_DESTROY(ia);
944}
945
946void
947in_purgeif(struct ifnet *ifp)		/* MUST be called at splsoftnet() */
948{
949
950	IFNET_LOCK(ifp);
951	if_purgeaddrs(ifp, AF_INET, in_purgeaddr);
952	igmp_purgeif(ifp);		/* manipulates pools */
953#ifdef MROUTING
954	ip_mrouter_detach(ifp);
955#endif
956	IFNET_UNLOCK(ifp);
957}
958
959/*
960 * SIOC[GAD]LIFADDR.
961 *	SIOCGLIFADDR: get first address. (???)
962 *	SIOCGLIFADDR with IFLR_PREFIX:
963 *		get first address that matches the specified prefix.
964 *	SIOCALIFADDR: add the specified address.
965 *	SIOCALIFADDR with IFLR_PREFIX:
966 *		EINVAL since we can't deduce hostid part of the address.
967 *	SIOCDLIFADDR: delete the specified address.
968 *	SIOCDLIFADDR with IFLR_PREFIX:
969 *		delete the first address that matches the specified prefix.
970 * return values:
971 *	EINVAL on invalid parameters
972 *	EADDRNOTAVAIL on prefix match failed/specified address not found
973 *	other values may be returned from in_ioctl()
974 */
975static int
976in_lifaddr_ioctl(struct socket *so, u_long cmd, void *data,
977    struct ifnet *ifp)
978{
979	struct if_laddrreq *iflr = (struct if_laddrreq *)data;
980	struct ifaddr *ifa;
981	struct sockaddr *sa;
982
983	/* sanity checks */
984	if (data == NULL || ifp == NULL) {
985		panic("invalid argument to in_lifaddr_ioctl");
986		/*NOTRECHED*/
987	}
988
989	switch (cmd) {
990	case SIOCGLIFADDR:
991		/* address must be specified on GET with IFLR_PREFIX */
992		if ((iflr->flags & IFLR_PREFIX) == 0)
993			break;
994		/*FALLTHROUGH*/
995	case SIOCALIFADDR:
996	case SIOCDLIFADDR:
997		/* address must be specified on ADD and DELETE */
998		sa = (struct sockaddr *)&iflr->addr;
999		if (sa->sa_family != AF_INET)
1000			return EINVAL;
1001		if (sa->sa_len != sizeof(struct sockaddr_in))
1002			return EINVAL;
1003		/* XXX need improvement */
1004		sa = (struct sockaddr *)&iflr->dstaddr;
1005		if (sa->sa_family != AF_UNSPEC && sa->sa_family != AF_INET)
1006			return EINVAL;
1007		if (sa->sa_len != 0 && sa->sa_len != sizeof(struct sockaddr_in))
1008			return EINVAL;
1009		break;
1010	default: /*shouldn't happen*/
1011#if 0
1012		panic("invalid cmd to in_lifaddr_ioctl");
1013		/*NOTREACHED*/
1014#else
1015		return EOPNOTSUPP;
1016#endif
1017	}
1018	if (sizeof(struct in_addr) * NBBY < iflr->prefixlen)
1019		return EINVAL;
1020
1021	switch (cmd) {
1022	case SIOCALIFADDR:
1023	    {
1024		struct in_aliasreq ifra;
1025
1026		if (iflr->flags & IFLR_PREFIX)
1027			return EINVAL;
1028
1029		/* copy args to in_aliasreq, perform ioctl(SIOCAIFADDR). */
1030		memset(&ifra, 0, sizeof(ifra));
1031		memcpy(ifra.ifra_name, iflr->iflr_name,
1032			sizeof(ifra.ifra_name));
1033
1034		memcpy(&ifra.ifra_addr, &iflr->addr,
1035			((struct sockaddr *)&iflr->addr)->sa_len);
1036
1037		if (((struct sockaddr *)&iflr->dstaddr)->sa_family) {	/*XXX*/
1038			memcpy(&ifra.ifra_dstaddr, &iflr->dstaddr,
1039				((struct sockaddr *)&iflr->dstaddr)->sa_len);
1040		}
1041
1042		ifra.ifra_mask.sin_family = AF_INET;
1043		ifra.ifra_mask.sin_len = sizeof(struct sockaddr_in);
1044		in_len2mask(&ifra.ifra_mask.sin_addr, iflr->prefixlen);
1045
1046		return in_control(so, SIOCAIFADDR, &ifra, ifp);
1047	    }
1048	case SIOCGLIFADDR:
1049	case SIOCDLIFADDR:
1050	    {
1051		struct in_ifaddr *ia;
1052		struct in_addr mask, candidate, match;
1053		struct sockaddr_in *sin;
1054		int cmp, s;
1055
1056		memset(&mask, 0, sizeof(mask));
1057		memset(&match, 0, sizeof(match));	/* XXX gcc */
1058		if (iflr->flags & IFLR_PREFIX) {
1059			/* lookup a prefix rather than address. */
1060			in_len2mask(&mask, iflr->prefixlen);
1061
1062			sin = (struct sockaddr_in *)&iflr->addr;
1063			match.s_addr = sin->sin_addr.s_addr;
1064			match.s_addr &= mask.s_addr;
1065
1066			/* if you set extra bits, that's wrong */
1067			if (match.s_addr != sin->sin_addr.s_addr)
1068				return EINVAL;
1069
1070			cmp = 1;
1071		} else {
1072			if (cmd == SIOCGLIFADDR) {
1073				/* on getting an address, take the 1st match */
1074				cmp = 0;	/*XXX*/
1075			} else {
1076				/* on deleting an address, do exact match */
1077				in_len2mask(&mask, 32);
1078				sin = (struct sockaddr_in *)&iflr->addr;
1079				match.s_addr = sin->sin_addr.s_addr;
1080
1081				cmp = 1;
1082			}
1083		}
1084
1085		s = pserialize_read_enter();
1086		IFADDR_READER_FOREACH(ifa, ifp) {
1087			if (ifa->ifa_addr->sa_family != AF_INET)
1088				continue;
1089			if (cmp == 0)
1090				break;
1091			candidate.s_addr = ((struct sockaddr_in *)ifa->ifa_addr)->sin_addr.s_addr;
1092			candidate.s_addr &= mask.s_addr;
1093			if (candidate.s_addr == match.s_addr)
1094				break;
1095		}
1096		if (ifa == NULL) {
1097			pserialize_read_exit(s);
1098			return EADDRNOTAVAIL;
1099		}
1100		ia = (struct in_ifaddr *)ifa;
1101
1102		if (cmd == SIOCGLIFADDR) {
1103			/* fill in the if_laddrreq structure */
1104			memcpy(&iflr->addr, &ia->ia_addr, ia->ia_addr.sin_len);
1105
1106			if ((ifp->if_flags & IFF_POINTOPOINT) != 0) {
1107				memcpy(&iflr->dstaddr, &ia->ia_dstaddr,
1108					ia->ia_dstaddr.sin_len);
1109			} else
1110				memset(&iflr->dstaddr, 0, sizeof(iflr->dstaddr));
1111
1112			iflr->prefixlen =
1113				in_mask2len(&ia->ia_sockmask.sin_addr);
1114
1115			iflr->flags = 0;	/*XXX*/
1116			pserialize_read_exit(s);
1117
1118			return 0;
1119		} else {
1120			struct in_aliasreq ifra;
1121
1122			/* fill in_aliasreq and do ioctl(SIOCDIFADDR) */
1123			memset(&ifra, 0, sizeof(ifra));
1124			memcpy(ifra.ifra_name, iflr->iflr_name,
1125				sizeof(ifra.ifra_name));
1126
1127			memcpy(&ifra.ifra_addr, &ia->ia_addr,
1128				ia->ia_addr.sin_len);
1129			if ((ifp->if_flags & IFF_POINTOPOINT) != 0) {
1130				memcpy(&ifra.ifra_dstaddr, &ia->ia_dstaddr,
1131					ia->ia_dstaddr.sin_len);
1132			}
1133			memcpy(&ifra.ifra_dstaddr, &ia->ia_sockmask,
1134				ia->ia_sockmask.sin_len);
1135			pserialize_read_exit(s);
1136
1137			return in_control(so, SIOCDIFADDR, &ifra, ifp);
1138		}
1139	    }
1140	}
1141
1142	return EOPNOTSUPP;	/*just for safety*/
1143}
1144
1145/*
1146 * Initialize an interface's internet address
1147 * and routing table entry.
1148 */
1149int
1150in_ifinit(struct ifnet *ifp, struct in_ifaddr *ia,
1151    const struct sockaddr_in *sin, const struct sockaddr_in *dst, int scrub)
1152{
1153	u_int32_t i;
1154	struct sockaddr_in oldaddr, olddst;
1155	int s, oldflags, flags = RTF_UP, error, hostIsNew;
1156
1157	if (sin == NULL)
1158		sin = &ia->ia_addr;
1159	if (dst == NULL)
1160		dst = &ia->ia_dstaddr;
1161
1162	/*
1163	 * Set up new addresses.
1164	 */
1165	oldaddr = ia->ia_addr;
1166	olddst = ia->ia_dstaddr;
1167	oldflags = ia->ia4_flags;
1168	ia->ia_addr = *sin;
1169	ia->ia_dstaddr = *dst;
1170	hostIsNew = oldaddr.sin_family != AF_INET ||
1171	    !in_hosteq(ia->ia_addr.sin_addr, oldaddr.sin_addr);
1172	if (!scrub)
1173		scrub = oldaddr.sin_family != ia->ia_dstaddr.sin_family ||
1174		    !in_hosteq(ia->ia_dstaddr.sin_addr, olddst.sin_addr);
1175
1176	/*
1177	 * Configure address flags.
1178	 * We need to do this early because they may be adjusted
1179	 * by if_addr_init depending on the address.
1180	 */
1181	if (ia->ia4_flags & IN_IFF_DUPLICATED) {
1182		ia->ia4_flags &= ~IN_IFF_DUPLICATED;
1183		hostIsNew = 1;
1184	}
1185	if (ifp->if_link_state == LINK_STATE_DOWN) {
1186		ia->ia4_flags |= IN_IFF_DETACHED;
1187		ia->ia4_flags &= ~IN_IFF_TENTATIVE;
1188	} else if (hostIsNew && if_do_dad(ifp) && ip_dad_enabled())
1189		ia->ia4_flags |= IN_IFF_TRYTENTATIVE;
1190
1191	/*
1192	 * Give the interface a chance to initialize
1193	 * if this is its first address,
1194	 * and to validate the address if necessary.
1195	 */
1196	s = splsoftnet();
1197	error = if_addr_init(ifp, &ia->ia_ifa, true);
1198	splx(s);
1199	/* Now clear the try tentative flag, its job is done. */
1200	ia->ia4_flags &= ~IN_IFF_TRYTENTATIVE;
1201	if (error != 0) {
1202		ia->ia_addr = oldaddr;
1203		ia->ia_dstaddr = olddst;
1204		ia->ia4_flags = oldflags;
1205		return error;
1206	}
1207
1208	/*
1209	 * The interface which does not have IPv4 address is not required
1210	 * to scrub old address.  So, skip scrub such cases.
1211	 */
1212	if (oldaddr.sin_family == AF_INET && (scrub || hostIsNew)) {
1213		int newflags = ia->ia4_flags;
1214
1215		ia->ia_ifa.ifa_addr = sintosa(&oldaddr);
1216		ia->ia_ifa.ifa_dstaddr = sintosa(&olddst);
1217		ia->ia4_flags = oldflags;
1218		if (hostIsNew)
1219			in_scrubaddr(ia);
1220		else if (scrub)
1221			in_scrubprefix(ia);
1222		ia->ia_ifa.ifa_addr = sintosa(&ia->ia_addr);
1223		ia->ia_ifa.ifa_dstaddr = sintosa(&ia->ia_dstaddr);
1224		ia->ia4_flags = newflags;
1225	}
1226
1227	i = ia->ia_addr.sin_addr.s_addr;
1228	if (ifp->if_flags & IFF_POINTOPOINT)
1229		ia->ia_netmask = INADDR_BROADCAST;	/* default to /32 */
1230	else if (IN_CLASSA(i))
1231		ia->ia_netmask = IN_CLASSA_NET;
1232	else if (IN_CLASSB(i))
1233		ia->ia_netmask = IN_CLASSB_NET;
1234	else
1235		ia->ia_netmask = IN_CLASSC_NET;
1236	/*
1237	 * The subnet mask usually includes at least the standard network part,
1238	 * but may may be smaller in the case of supernetting.
1239	 * If it is set, we believe it.
1240	 */
1241	if (ia->ia_subnetmask == 0) {
1242		ia->ia_subnetmask = ia->ia_netmask;
1243		ia->ia_sockmask.sin_addr.s_addr = ia->ia_subnetmask;
1244	} else
1245		ia->ia_netmask &= ia->ia_subnetmask;
1246
1247	ia->ia_net = i & ia->ia_netmask;
1248	ia->ia_subnet = i & ia->ia_subnetmask;
1249	in_socktrim(&ia->ia_sockmask);
1250
1251	/* re-calculate the "in_maxmtu" value */
1252	in_setmaxmtu();
1253
1254	ia->ia_ifa.ifa_metric = ifp->if_metric;
1255	if (ifp->if_flags & IFF_BROADCAST) {
1256		if (ia->ia_subnetmask == IN_RFC3021_MASK) {
1257			ia->ia_broadaddr.sin_addr.s_addr = INADDR_BROADCAST;
1258			ia->ia_netbroadcast.s_addr = INADDR_BROADCAST;
1259		} else {
1260			ia->ia_broadaddr.sin_addr.s_addr =
1261				ia->ia_subnet | ~ia->ia_subnetmask;
1262			ia->ia_netbroadcast.s_addr =
1263				ia->ia_net | ~ia->ia_netmask;
1264		}
1265	} else if (ifp->if_flags & IFF_LOOPBACK) {
1266		ia->ia_dstaddr = ia->ia_addr;
1267		flags |= RTF_HOST;
1268	} else if (ifp->if_flags & IFF_POINTOPOINT) {
1269		if (ia->ia_dstaddr.sin_family != AF_INET)
1270			return (0);
1271		flags |= RTF_HOST;
1272	}
1273
1274	/* Add the local route to the address */
1275	in_ifaddlocal(&ia->ia_ifa);
1276
1277	/* Add the prefix route for the address */
1278	error = in_addprefix(ia, flags);
1279
1280	/*
1281	 * If the interface supports multicast, join the "all hosts"
1282	 * multicast group on that interface.
1283	 */
1284	mutex_enter(&in_ifaddr_lock);
1285	if ((ifp->if_flags & IFF_MULTICAST) != 0 && ia->ia_allhosts == NULL) {
1286		struct in_addr addr;
1287
1288		addr.s_addr = INADDR_ALLHOSTS_GROUP;
1289		ia->ia_allhosts = in_addmulti(&addr, ifp);
1290	}
1291	mutex_exit(&in_ifaddr_lock);
1292
1293	if (hostIsNew &&
1294	    ia->ia4_flags & IN_IFF_TENTATIVE &&
1295	    if_do_dad(ifp))
1296		ia->ia_dad_start((struct ifaddr *)ia);
1297
1298	return error;
1299}
1300
1301#define rtinitflags(x) \
1302	((((x)->ia_ifp->if_flags & (IFF_LOOPBACK | IFF_POINTOPOINT)) != 0) \
1303	    ? RTF_HOST : 0)
1304
1305/*
1306 * add a route to prefix ("connected route" in cisco terminology).
1307 * does nothing if there's some interface address with the same prefix already.
1308 */
1309static int
1310in_addprefix(struct in_ifaddr *target, int flags)
1311{
1312	struct in_ifaddr *ia;
1313	struct in_addr prefix, mask, p;
1314	int error;
1315	int s;
1316
1317	if ((flags & RTF_HOST) != 0)
1318		prefix = target->ia_dstaddr.sin_addr;
1319	else {
1320		prefix = target->ia_addr.sin_addr;
1321		mask = target->ia_sockmask.sin_addr;
1322		prefix.s_addr &= mask.s_addr;
1323	}
1324
1325	s = pserialize_read_enter();
1326	IN_ADDRLIST_READER_FOREACH(ia) {
1327		if (rtinitflags(ia))
1328			p = ia->ia_dstaddr.sin_addr;
1329		else {
1330			p = ia->ia_addr.sin_addr;
1331			p.s_addr &= ia->ia_sockmask.sin_addr.s_addr;
1332		}
1333
1334		if (prefix.s_addr != p.s_addr)
1335			continue;
1336
1337		if ((ia->ia_ifp->if_flags & IFF_UNNUMBERED))
1338			continue;
1339
1340		/*
1341		 * if we got a matching prefix route inserted by other
1342		 * interface address, we don't need to bother
1343		 *
1344		 * XXX RADIX_MPATH implications here? -dyoung
1345		 */
1346		if (ia->ia_flags & IFA_ROUTE) {
1347			pserialize_read_exit(s);
1348			return 0;
1349		}
1350	}
1351	pserialize_read_exit(s);
1352
1353	/*
1354	 * noone seem to have prefix route.  insert it.
1355	 */
1356	if (target->ia_ifa.ifa_ifp->if_flags & IFF_UNNUMBERED) {
1357		error = 0;
1358	} else {
1359		error = rtinit(&target->ia_ifa, RTM_ADD, flags);
1360		if (error == 0)
1361			target->ia_flags |= IFA_ROUTE;
1362		else if (error == EEXIST) {
1363			/*
1364			 * the fact the route already exists is not an error.
1365			 */
1366			error = 0;
1367		}
1368	}
1369	return error;
1370}
1371
1372static int
1373in_rt_ifa_matcher(struct rtentry *rt, void *v)
1374{
1375	struct ifaddr *ifa = v;
1376
1377	if (rt->rt_ifa == ifa)
1378		return 1;
1379	else
1380		return 0;
1381}
1382
1383/*
1384 * remove a route to prefix ("connected route" in cisco terminology).
1385 * re-installs the route by using another interface address, if there's one
1386 * with the same prefix (otherwise we lose the route mistakenly).
1387 */
1388static int
1389in_scrubprefix(struct in_ifaddr *target)
1390{
1391	struct in_ifaddr *ia;
1392	struct in_addr prefix, mask, p;
1393	int error;
1394	int s;
1395
1396	/* If we don't have IFA_ROUTE we have nothing to do */
1397	if ((target->ia_flags & IFA_ROUTE) == 0)
1398		return 0;
1399
1400	if (rtinitflags(target))
1401		prefix = target->ia_dstaddr.sin_addr;
1402	else {
1403		prefix = target->ia_addr.sin_addr;
1404		mask = target->ia_sockmask.sin_addr;
1405		prefix.s_addr &= mask.s_addr;
1406	}
1407
1408	s = pserialize_read_enter();
1409	IN_ADDRLIST_READER_FOREACH(ia) {
1410		if (rtinitflags(ia))
1411			p = ia->ia_dstaddr.sin_addr;
1412		else {
1413			p = ia->ia_addr.sin_addr;
1414			p.s_addr &= ia->ia_sockmask.sin_addr.s_addr;
1415		}
1416
1417		if (prefix.s_addr != p.s_addr)
1418			continue;
1419
1420		if ((ia->ia_ifp->if_flags & IFF_UNNUMBERED))
1421			continue;
1422
1423		/*
1424		 * if we got a matching prefix route, move IFA_ROUTE to him
1425		 */
1426		if ((ia->ia_flags & IFA_ROUTE) == 0) {
1427			struct psref psref;
1428			int bound = curlwp_bind();
1429
1430			ia4_acquire(ia, &psref);
1431			pserialize_read_exit(s);
1432
1433			rtinit(&target->ia_ifa, RTM_DELETE,
1434			    rtinitflags(target));
1435			target->ia_flags &= ~IFA_ROUTE;
1436
1437			error = rtinit(&ia->ia_ifa, RTM_ADD,
1438			    rtinitflags(ia) | RTF_UP);
1439			if (error == 0)
1440				ia->ia_flags |= IFA_ROUTE;
1441
1442			if (!ISSET(target->ia_ifa.ifa_flags, IFA_DESTROYING))
1443				goto skip;
1444			/*
1445			 * Replace rt_ifa of routes that have the removing address
1446			 * with the new address.
1447			 */
1448			rt_replace_ifa_matched_entries(AF_INET,
1449			    in_rt_ifa_matcher, &target->ia_ifa, &ia->ia_ifa);
1450
1451		skip:
1452			ia4_release(ia, &psref);
1453			curlwp_bindx(bound);
1454
1455			return error;
1456		}
1457	}
1458	pserialize_read_exit(s);
1459
1460	/*
1461	 * noone seem to have prefix route.  remove it.
1462	 */
1463	rtinit(&target->ia_ifa, RTM_DELETE, rtinitflags(target));
1464	target->ia_flags &= ~IFA_ROUTE;
1465
1466	if (ISSET(target->ia_ifa.ifa_flags, IFA_DESTROYING)) {
1467		/* Remove routes that have the removing address as rt_ifa. */
1468		rt_delete_matched_entries(AF_INET, in_rt_ifa_matcher,
1469		    &target->ia_ifa, true);
1470	}
1471
1472	return 0;
1473}
1474
1475#undef rtinitflags
1476
1477/*
1478 * Return 1 if the address might be a local broadcast address.
1479 */
1480int
1481in_broadcast(struct in_addr in, struct ifnet *ifp)
1482{
1483	struct ifaddr *ifa;
1484	int s;
1485
1486	KASSERT(ifp != NULL);
1487
1488	if (in.s_addr == INADDR_BROADCAST ||
1489	    in_nullhost(in))
1490		return 1;
1491	if ((ifp->if_flags & IFF_BROADCAST) == 0)
1492		return 0;
1493	/*
1494	 * Look through the list of addresses for a match
1495	 * with a broadcast address.
1496	 */
1497#define ia (ifatoia(ifa))
1498	s = pserialize_read_enter();
1499	IFADDR_READER_FOREACH(ifa, ifp) {
1500		if (ifa->ifa_addr->sa_family == AF_INET &&
1501		    !in_hosteq(in, ia->ia_addr.sin_addr) &&
1502		    (in_hosteq(in, ia->ia_broadaddr.sin_addr) ||
1503		     in_hosteq(in, ia->ia_netbroadcast) ||
1504		     (hostzeroisbroadcast &&
1505		      /*
1506		       * Check for old-style (host 0) broadcast, but
1507		       * taking into account that RFC 3021 obsoletes it.
1508		       */
1509		      ia->ia_subnetmask != IN_RFC3021_MASK &&
1510		      (in.s_addr == ia->ia_subnet ||
1511		       in.s_addr == ia->ia_net)))) {
1512			pserialize_read_exit(s);
1513			return 1;
1514		}
1515	}
1516	pserialize_read_exit(s);
1517	return (0);
1518#undef ia
1519}
1520
1521/*
1522 * perform DAD when interface becomes IFF_UP.
1523 */
1524void
1525in_if_link_up(struct ifnet *ifp)
1526{
1527	struct ifaddr *ifa;
1528	struct in_ifaddr *ia;
1529	int s, bound;
1530
1531	/* Ensure it's sane to run DAD */
1532	if (ifp->if_link_state == LINK_STATE_DOWN)
1533		return;
1534	if ((ifp->if_flags & (IFF_UP|IFF_RUNNING)) != (IFF_UP|IFF_RUNNING))
1535		return;
1536
1537	bound = curlwp_bind();
1538	s = pserialize_read_enter();
1539	IFADDR_READER_FOREACH(ifa, ifp) {
1540		struct psref psref;
1541
1542		if (ifa->ifa_addr->sa_family != AF_INET)
1543			continue;
1544		ifa_acquire(ifa, &psref);
1545		pserialize_read_exit(s);
1546
1547		ia = (struct in_ifaddr *)ifa;
1548
1549		/* If detached then mark as tentative */
1550		if (ia->ia4_flags & IN_IFF_DETACHED) {
1551			ia->ia4_flags &= ~IN_IFF_DETACHED;
1552			if (ip_dad_enabled() && if_do_dad(ifp) &&
1553			    ia->ia_dad_start != NULL)
1554				ia->ia4_flags |= IN_IFF_TENTATIVE;
1555			else if ((ia->ia4_flags & IN_IFF_TENTATIVE) == 0)
1556				rt_addrmsg(RTM_NEWADDR, ifa);
1557		}
1558
1559		if (ia->ia4_flags & IN_IFF_TENTATIVE) {
1560			/* Clear the duplicated flag as we're starting DAD. */
1561			ia->ia4_flags &= ~IN_IFF_DUPLICATED;
1562			ia->ia_dad_start(ifa);
1563		}
1564
1565		s = pserialize_read_enter();
1566		ifa_release(ifa, &psref);
1567	}
1568	pserialize_read_exit(s);
1569	curlwp_bindx(bound);
1570}
1571
1572void
1573in_if_up(struct ifnet *ifp)
1574{
1575
1576	/* interface may not support link state, so bring it up also */
1577	in_if_link_up(ifp);
1578}
1579
1580/*
1581 * Mark all addresses as detached.
1582 */
1583void
1584in_if_link_down(struct ifnet *ifp)
1585{
1586	struct ifaddr *ifa;
1587	struct in_ifaddr *ia;
1588	int s, bound;
1589
1590	bound = curlwp_bind();
1591	s = pserialize_read_enter();
1592	IFADDR_READER_FOREACH(ifa, ifp) {
1593		struct psref psref;
1594
1595		if (ifa->ifa_addr->sa_family != AF_INET)
1596			continue;
1597		ifa_acquire(ifa, &psref);
1598		pserialize_read_exit(s);
1599
1600		ia = (struct in_ifaddr *)ifa;
1601
1602		/* Stop DAD processing */
1603		if (ia->ia_dad_stop != NULL)
1604			ia->ia_dad_stop(ifa);
1605
1606		/*
1607		 * Mark the address as detached.
1608		 */
1609		if (!(ia->ia4_flags & IN_IFF_DETACHED)) {
1610			ia->ia4_flags |= IN_IFF_DETACHED;
1611			ia->ia4_flags &=
1612			    ~(IN_IFF_TENTATIVE | IN_IFF_DUPLICATED);
1613			rt_addrmsg(RTM_NEWADDR, ifa);
1614		}
1615
1616		s = pserialize_read_enter();
1617		ifa_release(ifa, &psref);
1618	}
1619	pserialize_read_exit(s);
1620	curlwp_bindx(bound);
1621}
1622
1623void
1624in_if_down(struct ifnet *ifp)
1625{
1626
1627	in_if_link_down(ifp);
1628#if NARP > 0
1629	lltable_purge_entries(LLTABLE(ifp));
1630#endif
1631}
1632
1633void
1634in_if_link_state_change(struct ifnet *ifp, int link_state)
1635{
1636
1637	/*
1638	 * Treat LINK_STATE_UNKNOWN as UP.
1639	 * LINK_STATE_UNKNOWN transitions to LINK_STATE_DOWN when
1640	 * if_link_state_change() transitions to LINK_STATE_UP.
1641	 */
1642	if (link_state == LINK_STATE_DOWN)
1643		in_if_link_down(ifp);
1644	else
1645		in_if_link_up(ifp);
1646}
1647
1648/*
1649 * in_lookup_multi: look up the in_multi record for a given IP
1650 * multicast address on a given interface.  If no matching record is
1651 * found, return NULL.
1652 */
1653struct in_multi *
1654in_lookup_multi(struct in_addr addr, ifnet_t *ifp)
1655{
1656	struct in_multi *inm;
1657
1658	KASSERT(rw_lock_held(&in_multilock));
1659
1660	LIST_FOREACH(inm, &IN_MULTI_HASH(addr.s_addr, ifp), inm_list) {
1661		if (in_hosteq(inm->inm_addr, addr) && inm->inm_ifp == ifp)
1662			break;
1663	}
1664	return inm;
1665}
1666
1667/*
1668 * in_multi_group: check whether the address belongs to an IP multicast
1669 * group we are joined on this interface.  Returns true or false.
1670 */
1671bool
1672in_multi_group(struct in_addr addr, ifnet_t *ifp, int flags)
1673{
1674	bool ingroup;
1675
1676	if (__predict_true(flags & IP_IGMP_MCAST) == 0) {
1677		rw_enter(&in_multilock, RW_READER);
1678		ingroup = in_lookup_multi(addr, ifp) != NULL;
1679		rw_exit(&in_multilock);
1680	} else {
1681		/* XXX Recursive call from ip_output(). */
1682		KASSERT(rw_lock_held(&in_multilock));
1683		ingroup = in_lookup_multi(addr, ifp) != NULL;
1684	}
1685	return ingroup;
1686}
1687
1688/*
1689 * Add an address to the list of IP multicast addresses for a given interface.
1690 */
1691struct in_multi *
1692in_addmulti(struct in_addr *ap, ifnet_t *ifp)
1693{
1694	struct sockaddr_in sin;
1695	struct in_multi *inm;
1696
1697	/*
1698	 * See if address already in list.
1699	 */
1700	rw_enter(&in_multilock, RW_WRITER);
1701	inm = in_lookup_multi(*ap, ifp);
1702	if (inm != NULL) {
1703		/*
1704		 * Found it; just increment the reference count.
1705		 */
1706		inm->inm_refcount++;
1707		rw_exit(&in_multilock);
1708		return inm;
1709	}
1710
1711	/*
1712	 * New address; allocate a new multicast record.
1713	 */
1714	inm = pool_get(&inmulti_pool, PR_NOWAIT);
1715	if (inm == NULL) {
1716		rw_exit(&in_multilock);
1717		return NULL;
1718	}
1719	inm->inm_addr = *ap;
1720	inm->inm_ifp = ifp;
1721	inm->inm_refcount = 1;
1722
1723	/*
1724	 * Ask the network driver to update its multicast reception
1725	 * filter appropriately for the new address.
1726	 */
1727	sockaddr_in_init(&sin, ap, 0);
1728	if (if_mcast_op(ifp, SIOCADDMULTI, sintosa(&sin)) != 0) {
1729		rw_exit(&in_multilock);
1730		pool_put(&inmulti_pool, inm);
1731		return NULL;
1732	}
1733
1734	/*
1735	 * Let IGMP know that we have joined a new IP multicast group.
1736	 */
1737	if (igmp_joingroup(inm) != 0) {
1738		rw_exit(&in_multilock);
1739		pool_put(&inmulti_pool, inm);
1740		return NULL;
1741	}
1742	LIST_INSERT_HEAD(
1743	    &IN_MULTI_HASH(inm->inm_addr.s_addr, ifp),
1744	    inm, inm_list);
1745	in_multientries++;
1746	rw_exit(&in_multilock);
1747
1748	return inm;
1749}
1750
1751/*
1752 * Delete a multicast address record.
1753 */
1754void
1755in_delmulti(struct in_multi *inm)
1756{
1757	struct sockaddr_in sin;
1758
1759	rw_enter(&in_multilock, RW_WRITER);
1760	if (--inm->inm_refcount > 0) {
1761		rw_exit(&in_multilock);
1762		return;
1763	}
1764
1765	/*
1766	 * No remaining claims to this record; let IGMP know that
1767	 * we are leaving the multicast group.
1768	 */
1769	igmp_leavegroup(inm);
1770
1771	/*
1772	 * Notify the network driver to update its multicast reception
1773	 * filter.
1774	 */
1775	sockaddr_in_init(&sin, &inm->inm_addr, 0);
1776	if_mcast_op(inm->inm_ifp, SIOCDELMULTI, sintosa(&sin));
1777
1778	/*
1779	 * Unlink from list.
1780	 */
1781	LIST_REMOVE(inm, inm_list);
1782	in_multientries--;
1783	rw_exit(&in_multilock);
1784
1785	pool_put(&inmulti_pool, inm);
1786}
1787
1788/*
1789 * in_next_multi: step through all of the in_multi records, one at a time.
1790 * The current position is remembered in "step", which the caller must
1791 * provide.  in_first_multi(), below, must be called to initialize "step"
1792 * and get the first record.  Both macros return a NULL "inm" when there
1793 * are no remaining records.
1794 */
1795struct in_multi *
1796in_next_multi(struct in_multistep *step)
1797{
1798	struct in_multi *inm;
1799
1800	KASSERT(rw_lock_held(&in_multilock));
1801
1802	while (step->i_inm == NULL && step->i_n < IN_MULTI_HASH_SIZE) {
1803		step->i_inm = LIST_FIRST(&in_multihashtbl[++step->i_n]);
1804	}
1805	if ((inm = step->i_inm) != NULL) {
1806		step->i_inm = LIST_NEXT(inm, inm_list);
1807	}
1808	return inm;
1809}
1810
1811struct in_multi *
1812in_first_multi(struct in_multistep *step)
1813{
1814	KASSERT(rw_lock_held(&in_multilock));
1815
1816	step->i_n = 0;
1817	step->i_inm = LIST_FIRST(&in_multihashtbl[0]);
1818	return in_next_multi(step);
1819}
1820
1821void
1822in_multi_lock(int op)
1823{
1824	rw_enter(&in_multilock, op);
1825}
1826
1827void
1828in_multi_unlock(void)
1829{
1830	rw_exit(&in_multilock);
1831}
1832
1833int
1834in_multi_lock_held(void)
1835{
1836	return rw_lock_held(&in_multilock);
1837}
1838
1839struct in_ifaddr *
1840in_selectsrc(struct sockaddr_in *sin, struct route *ro,
1841    int soopts, struct ip_moptions *mopts, int *errorp, struct psref *psref)
1842{
1843	struct rtentry *rt = NULL;
1844	struct in_ifaddr *ia = NULL;
1845
1846	KASSERT(ISSET(curlwp->l_pflag, LP_BOUND));
1847	/*
1848         * If route is known or can be allocated now, take the
1849         * source address from the interface.  Otherwise, punt.
1850	 */
1851	if ((soopts & SO_DONTROUTE) != 0)
1852		rtcache_free(ro);
1853	else {
1854		union {
1855			struct sockaddr		dst;
1856			struct sockaddr_in	dst4;
1857		} u;
1858
1859		sockaddr_in_init(&u.dst4, &sin->sin_addr, 0);
1860		rt = rtcache_lookup(ro, &u.dst);
1861	}
1862	/*
1863	 * If we found a route, use the address
1864	 * corresponding to the outgoing interface
1865	 * unless it is the loopback (in case a route
1866	 * to our address on another net goes to loopback).
1867	 *
1868	 * XXX Is this still true?  Do we care?
1869	 */
1870	if (rt != NULL && (rt->rt_ifp->if_flags & IFF_LOOPBACK) == 0) {
1871		int s;
1872		struct ifaddr *ifa;
1873		/*
1874		 * Just in case. May not need to do this workaround.
1875		 * Revisit when working on rtentry MP-ification.
1876		 */
1877		s = pserialize_read_enter();
1878		IFADDR_READER_FOREACH(ifa, rt->rt_ifp) {
1879			if (ifa == rt->rt_ifa)
1880				break;
1881		}
1882		if (ifa != NULL)
1883			ifa_acquire(ifa, psref);
1884		pserialize_read_exit(s);
1885
1886		ia = ifatoia(ifa);
1887	}
1888	if (ia == NULL) {
1889		in_port_t fport = sin->sin_port;
1890		struct ifaddr *ifa;
1891		int s;
1892
1893		sin->sin_port = 0;
1894		ifa = ifa_ifwithladdr_psref(sintosa(sin), psref);
1895		sin->sin_port = fport;
1896		if (ifa == NULL) {
1897			/* Find 1st non-loopback AF_INET address */
1898			s = pserialize_read_enter();
1899			IN_ADDRLIST_READER_FOREACH(ia) {
1900				if (!(ia->ia_ifp->if_flags & IFF_LOOPBACK))
1901					break;
1902			}
1903			if (ia != NULL)
1904				ia4_acquire(ia, psref);
1905			pserialize_read_exit(s);
1906		} else {
1907			/* ia is already referenced by psref */
1908			ia = ifatoia(ifa);
1909		}
1910		if (ia == NULL) {
1911			*errorp = EADDRNOTAVAIL;
1912			goto out;
1913		}
1914	}
1915	/*
1916	 * If the destination address is multicast and an outgoing
1917	 * interface has been set as a multicast option, use the
1918	 * address of that interface as our source address.
1919	 */
1920	if (IN_MULTICAST(sin->sin_addr.s_addr) && mopts != NULL) {
1921		struct ip_moptions *imo;
1922
1923		imo = mopts;
1924		if (imo->imo_multicast_if_index != 0) {
1925			struct ifnet *ifp;
1926			int s;
1927
1928			if (ia != NULL)
1929				ia4_release(ia, psref);
1930			s = pserialize_read_enter();
1931			ifp = if_byindex(imo->imo_multicast_if_index);
1932			if (ifp != NULL) {
1933				/* XXX */
1934				ia = in_get_ia_from_ifp_psref(ifp, psref);
1935			} else
1936				ia = NULL;
1937			if (ia == NULL || ia->ia4_flags & IN_IFF_NOTREADY) {
1938				pserialize_read_exit(s);
1939				if (ia != NULL)
1940					ia4_release(ia, psref);
1941				*errorp = EADDRNOTAVAIL;
1942				ia = NULL;
1943				goto out;
1944			}
1945			pserialize_read_exit(s);
1946		}
1947	}
1948	if (ia->ia_ifa.ifa_getifa != NULL) {
1949		ia = ifatoia((*ia->ia_ifa.ifa_getifa)(&ia->ia_ifa,
1950		                                      sintosa(sin)));
1951		if (ia == NULL) {
1952			*errorp = EADDRNOTAVAIL;
1953			goto out;
1954		}
1955		/* FIXME NOMPSAFE */
1956		ia4_acquire(ia, psref);
1957	}
1958#ifdef GETIFA_DEBUG
1959	else
1960		printf("%s: missing ifa_getifa\n", __func__);
1961#endif
1962out:
1963	rtcache_unref(rt, ro);
1964	return ia;
1965}
1966
1967int
1968in_tunnel_validate(const struct ip *ip, struct in_addr src, struct in_addr dst)
1969{
1970	struct in_ifaddr *ia4;
1971	int s;
1972
1973	/* check for address match */
1974	if (src.s_addr != ip->ip_dst.s_addr ||
1975	    dst.s_addr != ip->ip_src.s_addr)
1976		return 0;
1977
1978	/* martian filters on outer source - NOT done in ip_input! */
1979	if (IN_MULTICAST(ip->ip_src.s_addr))
1980		return 0;
1981	switch ((ntohl(ip->ip_src.s_addr) & 0xff000000) >> 24) {
1982	case 0:
1983	case 127:
1984	case 255:
1985		return 0;
1986	}
1987	/* reject packets with broadcast on source */
1988	s = pserialize_read_enter();
1989	IN_ADDRLIST_READER_FOREACH(ia4) {
1990		if ((ia4->ia_ifa.ifa_ifp->if_flags & IFF_BROADCAST) == 0)
1991			continue;
1992		if (ip->ip_src.s_addr == ia4->ia_broadaddr.sin_addr.s_addr) {
1993			pserialize_read_exit(s);
1994			return 0;
1995		}
1996	}
1997	pserialize_read_exit(s);
1998
1999	/* NOTE: packet may dropped by uRPF */
2000
2001	/* return valid bytes length */
2002	return sizeof(src) + sizeof(dst);
2003}
2004
2005#if NARP > 0
2006
2007#define	IN_LLTBL_DEFAULT_HSIZE	32
2008#define	IN_LLTBL_HASH(k, h) \
2009	(((((((k >> 8) ^ k) >> 8) ^ k) >> 8) ^ k) & ((h) - 1))
2010
2011/*
2012 * Do actual deallocation of @lle.
2013 * Called by LLE_FREE_LOCKED when number of references
2014 * drops to zero.
2015 */
2016static void
2017in_lltable_destroy_lle(struct llentry *lle)
2018{
2019
2020	KASSERTMSG(lle->la_numheld == 0, "la_numheld=%d", lle->la_numheld);
2021
2022	LLE_WUNLOCK(lle);
2023	LLE_LOCK_DESTROY(lle);
2024	llentry_pool_put(lle);
2025}
2026
2027static struct llentry *
2028in_lltable_new(struct in_addr addr4, u_int flags)
2029{
2030	struct llentry *lle;
2031
2032	lle = llentry_pool_get(PR_NOWAIT);
2033	if (lle == NULL)		/* NB: caller generates msg */
2034		return NULL;
2035
2036	lle->r_l3addr.addr4 = addr4;
2037	lle->lle_refcnt = 1;
2038	lle->lle_free = in_lltable_destroy_lle;
2039	LLE_LOCK_INIT(lle);
2040	callout_init(&lle->la_timer, CALLOUT_MPSAFE);
2041
2042	return lle;
2043}
2044
2045#define IN_ARE_MASKED_ADDR_EQUAL(d, a, m)	(			\
2046	    (((ntohl((d).s_addr) ^ (a)->sin_addr.s_addr) & (m)->sin_addr.s_addr)) == 0 )
2047
2048static int
2049in_lltable_match_prefix(const struct sockaddr *prefix,
2050    const struct sockaddr *mask, u_int flags, struct llentry *lle)
2051{
2052	const struct sockaddr_in *pfx = (const struct sockaddr_in *)prefix;
2053	const struct sockaddr_in *msk = (const struct sockaddr_in *)mask;
2054	struct in_addr lle_addr;
2055
2056	lle_addr.s_addr = ntohl(lle->r_l3addr.addr4.s_addr);
2057
2058	/*
2059	 * (flags & LLE_STATIC) means deleting all entries
2060	 * including static ARP entries.
2061	 */
2062	if (IN_ARE_MASKED_ADDR_EQUAL(lle_addr, pfx, msk) &&
2063	    ((flags & LLE_STATIC) || !(lle->la_flags & LLE_STATIC)))
2064		return (1);
2065
2066	return (0);
2067}
2068
2069static void
2070in_lltable_free_entry(struct lltable *llt, struct llentry *lle)
2071{
2072	size_t pkts_dropped;
2073
2074	LLE_WLOCK_ASSERT(lle);
2075	KASSERT(llt != NULL);
2076
2077	pkts_dropped = llentry_free(lle);
2078	arp_stat_add(ARP_STAT_DFRDROPPED, (uint64_t)pkts_dropped);
2079}
2080
2081static int
2082in_lltable_rtcheck(struct ifnet *ifp, u_int flags, const struct sockaddr *l3addr,
2083    const struct rtentry *rt)
2084{
2085	int error = EINVAL;
2086
2087	if (rt == NULL)
2088		return error;
2089
2090	/*
2091	 * If the gateway for an existing host route matches the target L3
2092	 * address, which is a special route inserted by some implementation
2093	 * such as MANET, and the interface is of the correct type, then
2094	 * allow for ARP to proceed.
2095	 */
2096	if (rt->rt_flags & RTF_GATEWAY) {
2097		if (!(rt->rt_flags & RTF_HOST) || !rt->rt_ifp ||
2098		    rt->rt_ifp->if_type != IFT_ETHER ||
2099		    (rt->rt_ifp->if_flags & IFF_NOARP) != 0 ||
2100		    memcmp(rt->rt_gateway->sa_data, l3addr->sa_data,
2101		    sizeof(in_addr_t)) != 0) {
2102			goto error;
2103		}
2104	}
2105
2106	/*
2107	 * Make sure that at least the destination address is covered
2108	 * by the route. This is for handling the case where 2 or more
2109	 * interfaces have the same prefix. An incoming packet arrives
2110	 * on one interface and the corresponding outgoing packet leaves
2111	 * another interface.
2112	 */
2113	if (!(rt->rt_flags & RTF_HOST) && rt->rt_ifp != ifp) {
2114		const char *sa, *mask, *addr, *lim;
2115		int len;
2116
2117		mask = (const char *)rt_mask(rt);
2118		/*
2119		 * Just being extra cautious to avoid some custom
2120		 * code getting into trouble.
2121		 */
2122		if (mask == NULL)
2123			goto error;
2124
2125		sa = (const char *)rt_getkey(rt);
2126		addr = (const char *)l3addr;
2127		len = ((const struct sockaddr_in *)l3addr)->sin_len;
2128		lim = addr + len;
2129
2130		for ( ; addr < lim; sa++, mask++, addr++) {
2131			if ((*sa ^ *addr) & *mask) {
2132#ifdef DIAGNOSTIC
2133				log(LOG_INFO, "IPv4 address: \"%s\" is not on the network\n",
2134				    inet_ntoa(((const struct sockaddr_in *)l3addr)->sin_addr));
2135#endif
2136				goto error;
2137			}
2138		}
2139	}
2140
2141	error = 0;
2142error:
2143	return error;
2144}
2145
2146static inline uint32_t
2147in_lltable_hash_dst(const struct in_addr dst, uint32_t hsize)
2148{
2149
2150	return (IN_LLTBL_HASH(dst.s_addr, hsize));
2151}
2152
2153static uint32_t
2154in_lltable_hash(const struct llentry *lle, uint32_t hsize)
2155{
2156
2157	return (in_lltable_hash_dst(lle->r_l3addr.addr4, hsize));
2158}
2159
2160static void
2161in_lltable_fill_sa_entry(const struct llentry *lle, struct sockaddr *sa)
2162{
2163	struct sockaddr_in *sin;
2164
2165	sin = (struct sockaddr_in *)sa;
2166	memset(sin, 0, sizeof(*sin));
2167	sin->sin_family = AF_INET;
2168	sin->sin_len = sizeof(*sin);
2169	sin->sin_addr = lle->r_l3addr.addr4;
2170}
2171
2172static inline struct llentry *
2173in_lltable_find_dst(struct lltable *llt, struct in_addr dst)
2174{
2175	struct llentry *lle;
2176	struct llentries *lleh;
2177	u_int hashidx;
2178
2179	hashidx = in_lltable_hash_dst(dst, llt->llt_hsize);
2180	lleh = &llt->lle_head[hashidx];
2181	LIST_FOREACH(lle, lleh, lle_next) {
2182		if (lle->la_flags & LLE_DELETED)
2183			continue;
2184		if (lle->r_l3addr.addr4.s_addr == dst.s_addr)
2185			break;
2186	}
2187
2188	return (lle);
2189}
2190
2191static int
2192in_lltable_delete(struct lltable *llt, u_int flags,
2193    const struct sockaddr *l3addr)
2194{
2195	const struct sockaddr_in *sin = (const struct sockaddr_in *)l3addr;
2196	struct ifnet *ifp __diagused = llt->llt_ifp;
2197	struct llentry *lle;
2198
2199	IF_AFDATA_WLOCK_ASSERT(ifp);
2200	KASSERTMSG(l3addr->sa_family == AF_INET,
2201	    "sin_family %d", l3addr->sa_family);
2202
2203	lle = in_lltable_find_dst(llt, sin->sin_addr);
2204	if (lle == NULL) {
2205#ifdef LLTABLE_DEBUG
2206		char buf[64];
2207		sockaddr_format(l3addr, buf, sizeof(buf));
2208		log(LOG_INFO, "%s: cache for %s is not found\n",
2209		    __func__, buf);
2210#endif
2211		return (ENOENT);
2212	}
2213
2214	LLE_WLOCK(lle);
2215#ifdef LLTABLE_DEBUG
2216	{
2217		char buf[64];
2218		sockaddr_format(l3addr, buf, sizeof(buf));
2219		log(LOG_INFO, "%s: cache for %s (%p) is deleted\n",
2220		    __func__, buf, lle);
2221	}
2222#endif
2223	llentry_free(lle);
2224
2225	return (0);
2226}
2227
2228static struct llentry *
2229in_lltable_create(struct lltable *llt, u_int flags, const struct sockaddr *l3addr,
2230    const struct rtentry *rt)
2231{
2232	const struct sockaddr_in *sin = (const struct sockaddr_in *)l3addr;
2233	struct ifnet *ifp = llt->llt_ifp;
2234	struct llentry *lle;
2235
2236	IF_AFDATA_WLOCK_ASSERT(ifp);
2237	KASSERTMSG(l3addr->sa_family == AF_INET,
2238	    "sin_family %d", l3addr->sa_family);
2239
2240	lle = in_lltable_find_dst(llt, sin->sin_addr);
2241
2242	if (lle != NULL) {
2243		LLE_WLOCK(lle);
2244		return (lle);
2245	}
2246
2247	/* no existing record, we need to create new one */
2248
2249	/*
2250	 * A route that covers the given address must have
2251	 * been installed 1st because we are doing a resolution,
2252	 * verify this.
2253	 */
2254	if (!(flags & LLE_IFADDR) &&
2255	    in_lltable_rtcheck(ifp, flags, l3addr, rt) != 0)
2256		return (NULL);
2257
2258	lle = in_lltable_new(sin->sin_addr, flags);
2259	if (lle == NULL) {
2260		log(LOG_INFO, "lla_lookup: new lle malloc failed\n");
2261		return (NULL);
2262	}
2263	lle->la_flags = flags;
2264	if ((flags & LLE_IFADDR) == LLE_IFADDR) {
2265		memcpy(&lle->ll_addr, CLLADDR(ifp->if_sadl), ifp->if_addrlen);
2266		lle->la_flags |= (LLE_VALID | LLE_STATIC);
2267	}
2268
2269	lltable_link_entry(llt, lle);
2270	LLE_WLOCK(lle);
2271
2272	return (lle);
2273}
2274
2275/*
2276 * Return NULL if not found or marked for deletion.
2277 * If found return lle read locked.
2278 */
2279static struct llentry *
2280in_lltable_lookup(struct lltable *llt, u_int flags, const struct sockaddr *l3addr)
2281{
2282	const struct sockaddr_in *sin = (const struct sockaddr_in *)l3addr;
2283	struct llentry *lle;
2284
2285	IF_AFDATA_LOCK_ASSERT(llt->llt_ifp);
2286	KASSERTMSG(l3addr->sa_family == AF_INET,
2287	    "sin_family %d", l3addr->sa_family);
2288
2289	lle = in_lltable_find_dst(llt, sin->sin_addr);
2290
2291	if (lle == NULL)
2292		return NULL;
2293
2294	if (flags & LLE_EXCLUSIVE)
2295		LLE_WLOCK(lle);
2296	else
2297		LLE_RLOCK(lle);
2298
2299	return lle;
2300}
2301
2302static int
2303in_lltable_dump_entry(struct lltable *llt, struct llentry *lle,
2304    struct rt_walkarg *w)
2305{
2306	struct sockaddr_in sin;
2307
2308	LLTABLE_LOCK_ASSERT();
2309
2310	/* skip deleted entries */
2311	if (lle->la_flags & LLE_DELETED)
2312		return 0;
2313
2314	sockaddr_in_init(&sin, &lle->r_l3addr.addr4, 0);
2315
2316	return lltable_dump_entry(llt, lle, w, sintosa(&sin));
2317}
2318
2319#endif /* NARP > 0 */
2320
2321static int
2322in_multicast_sysctl(SYSCTLFN_ARGS)
2323{
2324	struct ifnet *ifp;
2325	struct ifaddr *ifa;
2326	struct in_ifaddr *ifa4;
2327	struct in_multi *inm;
2328	uint32_t tmp;
2329	int error;
2330	size_t written;
2331	struct psref psref;
2332	int bound;
2333
2334	if (namelen != 1)
2335		return EINVAL;
2336
2337	bound = curlwp_bind();
2338	ifp = if_get_byindex(name[0], &psref);
2339	if (ifp == NULL) {
2340		curlwp_bindx(bound);
2341		return ENODEV;
2342	}
2343
2344	if (oldp == NULL) {
2345		*oldlenp = 0;
2346		IFADDR_FOREACH(ifa, ifp) {
2347			if (ifa->ifa_addr->sa_family != AF_INET)
2348				continue;
2349			ifa4 = (void *)ifa;
2350			LIST_FOREACH(inm, &ifa4->ia_multiaddrs, inm_list) {
2351				*oldlenp += 2 * sizeof(struct in_addr) +
2352				    sizeof(uint32_t);
2353			}
2354		}
2355		if_put(ifp, &psref);
2356		curlwp_bindx(bound);
2357		return 0;
2358	}
2359
2360	error = 0;
2361	written = 0;
2362	IFADDR_FOREACH(ifa, ifp) {
2363		if (ifa->ifa_addr->sa_family != AF_INET)
2364			continue;
2365		ifa4 = (void *)ifa;
2366		LIST_FOREACH(inm, &ifa4->ia_multiaddrs, inm_list) {
2367			if (written + 2 * sizeof(struct in_addr) +
2368			    sizeof(uint32_t) > *oldlenp)
2369				goto done;
2370			error = sysctl_copyout(l, &ifa4->ia_addr.sin_addr,
2371			    oldp, sizeof(struct in_addr));
2372			if (error)
2373				goto done;
2374			oldp = (char *)oldp + sizeof(struct in_addr);
2375			written += sizeof(struct in_addr);
2376			error = sysctl_copyout(l, &inm->inm_addr,
2377			    oldp, sizeof(struct in_addr));
2378			if (error)
2379				goto done;
2380			oldp = (char *)oldp + sizeof(struct in_addr);
2381			written += sizeof(struct in_addr);
2382			tmp = inm->inm_refcount;
2383			error = sysctl_copyout(l, &tmp, oldp, sizeof(tmp));
2384			if (error)
2385				goto done;
2386			oldp = (char *)oldp + sizeof(tmp);
2387			written += sizeof(tmp);
2388		}
2389	}
2390done:
2391	if_put(ifp, &psref);
2392	curlwp_bindx(bound);
2393	*oldlenp = written;
2394	return error;
2395}
2396
2397static void
2398in_sysctl_init(struct sysctllog **clog)
2399{
2400	sysctl_createv(clog, 0, NULL, NULL,
2401		       CTLFLAG_PERMANENT,
2402		       CTLTYPE_NODE, "inet",
2403		       SYSCTL_DESCR("PF_INET related settings"),
2404		       NULL, 0, NULL, 0,
2405		       CTL_NET, PF_INET, CTL_EOL);
2406	sysctl_createv(clog, 0, NULL, NULL,
2407		       CTLFLAG_PERMANENT,
2408		       CTLTYPE_NODE, "multicast",
2409		       SYSCTL_DESCR("Multicast information"),
2410		       in_multicast_sysctl, 0, NULL, 0,
2411		       CTL_NET, PF_INET, CTL_CREATE, CTL_EOL);
2412	sysctl_createv(clog, 0, NULL, NULL,
2413		       CTLFLAG_PERMANENT,
2414		       CTLTYPE_NODE, "ip",
2415		       SYSCTL_DESCR("IPv4 related settings"),
2416		       NULL, 0, NULL, 0,
2417		       CTL_NET, PF_INET, IPPROTO_IP, CTL_EOL);
2418
2419	sysctl_createv(clog, 0, NULL, NULL,
2420		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2421		       CTLTYPE_INT, "subnetsarelocal",
2422		       SYSCTL_DESCR("Whether logical subnets are considered "
2423				    "local"),
2424		       NULL, 0, &subnetsarelocal, 0,
2425		       CTL_NET, PF_INET, IPPROTO_IP,
2426		       IPCTL_SUBNETSARELOCAL, CTL_EOL);
2427	sysctl_createv(clog, 0, NULL, NULL,
2428		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2429		       CTLTYPE_INT, "hostzerobroadcast",
2430		       SYSCTL_DESCR("All zeroes address is broadcast address"),
2431		       NULL, 0, &hostzeroisbroadcast, 0,
2432		       CTL_NET, PF_INET, IPPROTO_IP,
2433		       IPCTL_HOSTZEROBROADCAST, CTL_EOL);
2434}
2435
2436#if NARP > 0
2437
2438static struct lltable *
2439in_lltattach(struct ifnet *ifp, struct in_ifinfo *ii)
2440{
2441	struct lltable *llt;
2442
2443	llt = lltable_allocate_htbl(IN_LLTBL_DEFAULT_HSIZE);
2444	llt->llt_af = AF_INET;
2445	llt->llt_ifp = ifp;
2446
2447	llt->llt_lookup = in_lltable_lookup;
2448	llt->llt_create = in_lltable_create;
2449	llt->llt_delete = in_lltable_delete;
2450	llt->llt_dump_entry = in_lltable_dump_entry;
2451	llt->llt_hash = in_lltable_hash;
2452	llt->llt_fill_sa_entry = in_lltable_fill_sa_entry;
2453	llt->llt_free_entry = in_lltable_free_entry;
2454	llt->llt_match_prefix = in_lltable_match_prefix;
2455#ifdef MBUFTRACE
2456	struct mowner *mowner = &ii->ii_mowner;
2457	mowner_init_owner(mowner, ifp->if_xname, "arp");
2458	MOWNER_ATTACH(mowner);
2459	llt->llt_mowner = mowner;
2460#endif
2461	lltable_link(llt);
2462
2463	return (llt);
2464}
2465
2466#endif /* NARP > 0 */
2467
2468void *
2469in_domifattach(struct ifnet *ifp)
2470{
2471	struct in_ifinfo *ii;
2472
2473	ii = kmem_zalloc(sizeof(struct in_ifinfo), KM_SLEEP);
2474
2475#if NARP > 0
2476	ii->ii_llt = in_lltattach(ifp, ii);
2477#endif
2478
2479#ifdef IPSELSRC
2480	ii->ii_selsrc = in_selsrc_domifattach(ifp);
2481	KASSERT(ii->ii_selsrc != NULL);
2482#endif
2483
2484	return ii;
2485}
2486
2487void
2488in_domifdetach(struct ifnet *ifp, void *aux)
2489{
2490	struct in_ifinfo *ii = aux;
2491
2492#ifdef IPSELSRC
2493	in_selsrc_domifdetach(ifp, ii->ii_selsrc);
2494#endif
2495#if NARP > 0
2496	lltable_free(ii->ii_llt);
2497#ifdef MBUFTRACE
2498	MOWNER_DETACH(&ii->ii_mowner);
2499#endif
2500#endif
2501	kmem_free(ii, sizeof(struct in_ifinfo));
2502}
2503