1/*
2 * Copyright (c) 2000-2014 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * Copyright (c) 1980, 1986, 1993
30 *	The Regents of the University of California.  All rights reserved.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 *    notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 *    notice, this list of conditions and the following disclaimer in the
39 *    documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 *    must display the following acknowledgement:
42 *	This product includes software developed by the University of
43 *	California, Berkeley and its contributors.
44 * 4. Neither the name of the University nor the names of its contributors
45 *    may be used to endorse or promote products derived from this software
46 *    without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 *	@(#)if.c	8.3 (Berkeley) 1/4/94
61 * $FreeBSD: src/sys/net/if.c,v 1.85.2.9 2001/07/24 19:10:17 brooks Exp $
62 */
63/*
64 * NOTICE: This file was modified by SPARTA, Inc. in 2006 to introduce
65 * support for mandatory and extensible security protections.  This notice
66 * is included in support of clause 2.2 (b) of the Apple Public License,
67 * Version 2.0.
68 */
69
70#include <kern/locks.h>
71
72#include <sys/param.h>
73#include <sys/malloc.h>
74#include <sys/mbuf.h>
75#include <sys/systm.h>
76#include <sys/proc.h>
77#include <sys/socket.h>
78#include <sys/socketvar.h>
79#include <sys/protosw.h>
80#include <sys/kernel.h>
81#include <sys/sockio.h>
82#include <sys/syslog.h>
83#include <sys/sysctl.h>
84#include <sys/mcache.h>
85#include <sys/kauth.h>
86#include <sys/priv.h>
87#include <kern/zalloc.h>
88
89#include <machine/endian.h>
90
91#include <pexpert/pexpert.h>
92
93#include <net/if.h>
94#include <net/if_arp.h>
95#include <net/if_dl.h>
96#include <net/if_types.h>
97#include <net/if_var.h>
98#include <net/if_ppp.h>
99#include <net/ethernet.h>
100
101#include <net/radix.h>
102#include <net/route.h>
103#include <net/dlil.h>
104#include <sys/domain.h>
105#include <libkern/OSAtomic.h>
106
107#if INET || INET6
108/*XXX*/
109#include <netinet/in.h>
110#include <netinet/in_var.h>
111#include <netinet/ip_var.h>
112#include <netinet/ip6.h>
113#include <netinet/ip_var.h>
114#include <netinet/tcp.h>
115#include <netinet/tcp_var.h>
116#include <netinet/udp.h>
117#include <netinet/udp_var.h>
118#if INET6
119#include <netinet6/in6_var.h>
120#include <netinet6/in6_ifattach.h>
121#include <netinet6/ip6_var.h>
122#include <netinet6/nd6.h>
123#endif /* INET6 */
124#endif /* INET || INET6 */
125
126#if CONFIG_MACF_NET
127#include <security/mac_framework.h>
128#endif
129
130#if PF_ALTQ
131#include <net/altq/if_altq.h>
132#endif /* !PF_ALTQ */
133
134/*
135 * System initialization
136 */
137
138extern char *proc_name_address(void *);
139
140/* Lock group and attribute for ifaddr lock */
141lck_attr_t	*ifa_mtx_attr;
142lck_grp_t	*ifa_mtx_grp;
143static lck_grp_attr_t	*ifa_mtx_grp_attr;
144
145static int ifioctl_ifreq(struct socket *, u_long, struct ifreq *,
146    struct proc *);
147static int ifioctl_ifconf(u_long, caddr_t);
148static int ifioctl_ifclone(u_long, caddr_t);
149static int ifioctl_ifdesc(struct ifnet *, u_long, caddr_t, struct proc *);
150static int ifioctl_linkparams(struct ifnet *, u_long, caddr_t, struct proc *);
151static int ifioctl_qstats(struct ifnet *, u_long, caddr_t);
152static int ifioctl_throttle(struct ifnet *, u_long, caddr_t, struct proc *);
153static int ifconf(u_long cmd, user_addr_t ifrp, int * ret_space);
154__private_extern__ void link_rtrequest(int, struct rtentry *, struct sockaddr *);
155void if_rtproto_del(struct ifnet *ifp, int protocol);
156
157static int if_addmulti_common(struct ifnet *, const struct sockaddr *,
158    struct ifmultiaddr **, int);
159static int if_delmulti_common(struct ifmultiaddr *, struct ifnet *,
160    const struct sockaddr *, int);
161
162static int if_rtmtu(struct radix_node *, void *);
163static void if_rtmtu_update(struct ifnet *);
164
165static int if_clone_list(int, int *, user_addr_t);
166
167MALLOC_DEFINE(M_IFADDR, "ifaddr", "interface address");
168
169struct	ifnethead ifnet_head = TAILQ_HEAD_INITIALIZER(ifnet_head);
170
171static int	if_cloners_count;
172LIST_HEAD(, if_clone) if_cloners = LIST_HEAD_INITIALIZER(if_cloners);
173
174static struct ifaddr *ifa_ifwithnet_common(const struct sockaddr *,
175    unsigned int);
176static void if_attach_ifa_common(struct ifnet *, struct ifaddr *, int);
177static void if_detach_ifa_common(struct ifnet *, struct ifaddr *, int);
178
179static void if_attach_ifma(struct ifnet *, struct ifmultiaddr *, int);
180static int if_detach_ifma(struct ifnet *, struct ifmultiaddr *, int);
181
182static struct ifmultiaddr *ifma_alloc(int);
183static void ifma_free(struct ifmultiaddr *);
184static void ifma_trace(struct ifmultiaddr *, int);
185
186#if DEBUG
187static unsigned int ifma_debug = 1;	/* debugging (enabled) */
188#else
189static unsigned int ifma_debug;		/* debugging (disabled) */
190#endif /* !DEBUG */
191static unsigned int ifma_size;		/* size of zone element */
192static struct zone *ifma_zone;		/* zone for ifmultiaddr */
193
194#define	IFMA_TRACE_HIST_SIZE	32	/* size of trace history */
195
196/* For gdb */
197__private_extern__ unsigned int ifma_trace_hist_size = IFMA_TRACE_HIST_SIZE;
198
199struct ifmultiaddr_dbg {
200	struct ifmultiaddr	ifma;			/* ifmultiaddr */
201	u_int16_t		ifma_refhold_cnt;	/* # of ref */
202	u_int16_t		ifma_refrele_cnt;	/* # of rele */
203	/*
204	 * Circular lists of IFA_ADDREF and IFA_REMREF callers.
205	 */
206	ctrace_t		ifma_refhold[IFMA_TRACE_HIST_SIZE];
207	ctrace_t		ifma_refrele[IFMA_TRACE_HIST_SIZE];
208	/*
209	 * Trash list linkage
210	 */
211	TAILQ_ENTRY(ifmultiaddr_dbg) ifma_trash_link;
212};
213
214/* List of trash ifmultiaddr entries protected by ifma_trash_lock */
215static TAILQ_HEAD(, ifmultiaddr_dbg) ifma_trash_head;
216static decl_lck_mtx_data(, ifma_trash_lock);
217
218#define	IFMA_ZONE_MAX		64		/* maximum elements in zone */
219#define	IFMA_ZONE_NAME		"ifmultiaddr"	/* zone name */
220
221#if INET6
222/*
223 * XXX: declare here to avoid to include many inet6 related files..
224 * should be more generalized?
225 */
226extern void	nd6_setmtu(struct ifnet *);
227extern lck_mtx_t *nd6_mutex;
228#endif
229
230SYSCTL_NODE(_net, PF_LINK, link, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "Link layers");
231SYSCTL_NODE(_net_link, 0, generic, CTLFLAG_RW|CTLFLAG_LOCKED, 0,
232	"Generic link-management");
233
234SYSCTL_DECL(_net_link_generic_system);
235
236static uint32_t if_verbose = 0;
237SYSCTL_INT(_net_link_generic_system, OID_AUTO, if_verbose,
238    CTLFLAG_RW | CTLFLAG_LOCKED, &if_verbose, 0, "");
239
240void
241ifa_init(void)
242{
243	/* Setup lock group and attribute for ifaddr */
244	ifa_mtx_grp_attr = lck_grp_attr_alloc_init();
245	ifa_mtx_grp = lck_grp_alloc_init("ifaddr", ifa_mtx_grp_attr);
246	ifa_mtx_attr = lck_attr_alloc_init();
247
248	PE_parse_boot_argn("ifa_debug", &ifma_debug, sizeof (ifma_debug));
249
250	ifma_size = (ifma_debug == 0) ? sizeof (struct ifmultiaddr) :
251	    sizeof (struct ifmultiaddr_dbg);
252
253	ifma_zone = zinit(ifma_size, IFMA_ZONE_MAX * ifma_size, 0,
254	    IFMA_ZONE_NAME);
255	if (ifma_zone == NULL) {
256		panic("%s: failed allocating %s", __func__, IFMA_ZONE_NAME);
257		/* NOTREACHED */
258	}
259	zone_change(ifma_zone, Z_EXPAND, TRUE);
260	zone_change(ifma_zone, Z_CALLERACCT, FALSE);
261
262	lck_mtx_init(&ifma_trash_lock, ifa_mtx_grp, ifa_mtx_attr);
263	TAILQ_INIT(&ifma_trash_head);
264}
265
266/*
267 * Network interface utility routines.
268 *
269 * Routines with ifa_ifwith* names take sockaddr *'s as
270 * parameters.
271 */
272
273int if_index;
274struct ifaddr **ifnet_addrs;
275struct ifnet **ifindex2ifnet;
276
277__private_extern__ void
278if_attach_ifa(struct ifnet *ifp, struct ifaddr *ifa)
279{
280	if_attach_ifa_common(ifp, ifa, 0);
281}
282
283__private_extern__ void
284if_attach_link_ifa(struct ifnet *ifp, struct ifaddr *ifa)
285{
286	if_attach_ifa_common(ifp, ifa, 1);
287}
288
289static void
290if_attach_ifa_common(struct ifnet *ifp, struct ifaddr *ifa, int link)
291{
292	ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_EXCLUSIVE);
293	IFA_LOCK_ASSERT_HELD(ifa);
294
295	if (ifa->ifa_ifp != ifp) {
296		panic("%s: Mismatch ifa_ifp=%p != ifp=%p", __func__,
297		    ifa->ifa_ifp, ifp);
298		/* NOTREACHED */
299	} else if (ifa->ifa_debug & IFD_ATTACHED) {
300		panic("%s: Attempt to attach an already attached ifa=%p",
301		    __func__, ifa);
302		/* NOTREACHED */
303	} else if (link && !(ifa->ifa_debug & IFD_LINK)) {
304		panic("%s: Unexpected non-link address ifa=%p", __func__, ifa);
305		/* NOTREACHED */
306	} else if (!link && (ifa->ifa_debug & IFD_LINK)) {
307		panic("%s: Unexpected link address ifa=%p", __func__, ifa);
308		/* NOTREACHED */
309	}
310	IFA_ADDREF_LOCKED(ifa);
311	ifa->ifa_debug |= IFD_ATTACHED;
312	if (link)
313		TAILQ_INSERT_HEAD(&ifp->if_addrhead, ifa, ifa_link);
314	else
315		TAILQ_INSERT_TAIL(&ifp->if_addrhead, ifa, ifa_link);
316
317	if (ifa->ifa_attached != NULL)
318		(*ifa->ifa_attached)(ifa);
319}
320
321__private_extern__ void
322if_detach_ifa(struct ifnet *ifp, struct ifaddr *ifa)
323{
324	if_detach_ifa_common(ifp, ifa, 0);
325}
326
327__private_extern__ void
328if_detach_link_ifa(struct ifnet *ifp, struct ifaddr *ifa)
329{
330	if_detach_ifa_common(ifp, ifa, 1);
331}
332
333static void
334if_detach_ifa_common(struct ifnet *ifp, struct ifaddr *ifa, int link)
335{
336	ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_EXCLUSIVE);
337	IFA_LOCK_ASSERT_HELD(ifa);
338
339	if (link && !(ifa->ifa_debug & IFD_LINK)) {
340		panic("%s: Unexpected non-link address ifa=%p", __func__, ifa);
341		/* NOTREACHED */
342	} else if (link && ifa != TAILQ_FIRST(&ifp->if_addrhead)) {
343		panic("%s: Link address ifa=%p not first", __func__, ifa);
344		/* NOTREACHED */
345	} else if (!link && (ifa->ifa_debug & IFD_LINK)) {
346		panic("%s: Unexpected link address ifa=%p", __func__, ifa);
347		/* NOTREACHED */
348	} else if (!(ifa->ifa_debug & IFD_ATTACHED)) {
349		panic("%s: Attempt to detach an unattached address ifa=%p",
350		    __func__, ifa);
351		/* NOTREACHED */
352	} else if (ifa->ifa_ifp != ifp) {
353		panic("%s: Mismatch ifa_ifp=%p, ifp=%p", __func__,
354		    ifa->ifa_ifp, ifp);
355		/* NOTREACHED */
356	} else if (ifa->ifa_debug & IFD_DEBUG) {
357		struct ifaddr *ifa2;
358		TAILQ_FOREACH(ifa2, &ifp->if_addrhead, ifa_link) {
359			if (ifa2 == ifa)
360				break;
361		}
362		if (ifa2 != ifa) {
363			panic("%s: Attempt to detach a stray address ifa=%p",
364			    __func__, ifa);
365			/* NOTREACHED */
366		}
367	}
368	TAILQ_REMOVE(&ifp->if_addrhead, ifa, ifa_link);
369	/* This must not be the last reference to the ifaddr */
370	if (IFA_REMREF_LOCKED(ifa) == NULL) {
371		panic("%s: unexpected (missing) refcnt ifa=%p", __func__, ifa);
372		/* NOTREACHED */
373	}
374	ifa->ifa_debug &= ~IFD_ATTACHED;
375
376	if (ifa->ifa_detached != NULL)
377		(*ifa->ifa_detached)(ifa);
378}
379
380#define INITIAL_IF_INDEXLIM	8
381
382/*
383 * Function: if_next_index
384 * Purpose:
385 *   Return the next available interface index.
386 *   Grow the ifnet_addrs[] and ifindex2ifnet[] arrays to accomodate the
387 *   added entry when necessary.
388 *
389 * Note:
390 *   ifnet_addrs[] is indexed by (if_index - 1), whereas
391 *   ifindex2ifnet[] is indexed by ifp->if_index.  That requires us to
392 *   always allocate one extra element to hold ifindex2ifnet[0], which
393 *   is unused.
394 */
395int if_next_index(void);
396
397__private_extern__ int
398if_next_index(void)
399{
400	static int 	if_indexlim = 0;
401	int		new_index;
402
403	new_index = ++if_index;
404	if (if_index > if_indexlim) {
405		unsigned 	n;
406		int		new_if_indexlim;
407		caddr_t		new_ifnet_addrs;
408		caddr_t		new_ifindex2ifnet;
409		caddr_t		old_ifnet_addrs;
410
411		old_ifnet_addrs = (caddr_t)ifnet_addrs;
412		if (ifnet_addrs == NULL) {
413			new_if_indexlim = INITIAL_IF_INDEXLIM;
414		} else {
415			new_if_indexlim = if_indexlim << 1;
416		}
417
418		/* allocate space for the larger arrays */
419		n = (2 * new_if_indexlim + 1) * sizeof(caddr_t);
420		new_ifnet_addrs = _MALLOC(n, M_IFADDR, M_WAITOK);
421		if (new_ifnet_addrs == NULL) {
422			--if_index;
423			return -1;
424		}
425
426		new_ifindex2ifnet = new_ifnet_addrs
427			+ new_if_indexlim * sizeof(caddr_t);
428		bzero(new_ifnet_addrs, n);
429		if (ifnet_addrs != NULL) {
430			/* copy the existing data */
431			bcopy((caddr_t)ifnet_addrs, new_ifnet_addrs,
432			      if_indexlim * sizeof(caddr_t));
433			bcopy((caddr_t)ifindex2ifnet,
434			      new_ifindex2ifnet,
435			      (if_indexlim + 1) * sizeof(caddr_t));
436		}
437
438		/* switch to the new tables and size */
439		ifnet_addrs = (struct ifaddr **)(void *)new_ifnet_addrs;
440		ifindex2ifnet = (struct ifnet **)(void *)new_ifindex2ifnet;
441		if_indexlim = new_if_indexlim;
442
443		/* release the old data */
444		if (old_ifnet_addrs != NULL) {
445			_FREE((caddr_t)old_ifnet_addrs, M_IFADDR);
446		}
447	}
448	return (new_index);
449}
450
451/*
452 * Create a clone network interface.
453 */
454static int
455if_clone_create(char *name, int len, void *params)
456{
457	struct if_clone *ifc;
458	char *dp;
459	int wildcard;
460	u_int32_t bytoff, bitoff;
461	u_int32_t unit;
462	int err;
463
464	ifc = if_clone_lookup(name, &unit);
465	if (ifc == NULL)
466		return (EINVAL);
467
468	if (ifunit(name) != NULL)
469		return (EEXIST);
470
471	bytoff = bitoff = 0;
472	wildcard = (unit == UINT32_MAX);
473	/*
474	 * Find a free unit if none was given.
475	 */
476	if (wildcard) {
477		while ((bytoff < ifc->ifc_bmlen)
478		    && (ifc->ifc_units[bytoff] == 0xff))
479			bytoff++;
480		if (bytoff >= ifc->ifc_bmlen)
481			return (ENOSPC);
482		while ((ifc->ifc_units[bytoff] & (1 << bitoff)) != 0)
483			bitoff++;
484		unit = (bytoff << 3) + bitoff;
485	}
486
487	if (unit > ifc->ifc_maxunit)
488		return (ENXIO);
489
490	err = (*ifc->ifc_create)(ifc, unit, params);
491	if (err != 0)
492		return (err);
493
494	if (!wildcard) {
495		bytoff = unit >> 3;
496		bitoff = unit - (bytoff << 3);
497	}
498
499	/*
500	 * Allocate the unit in the bitmap.
501	 */
502	KASSERT((ifc->ifc_units[bytoff] & (1 << bitoff)) == 0,
503	    ("%s: bit is already set", __func__));
504	ifc->ifc_units[bytoff] |= (1 << bitoff);
505
506	/* In the wildcard case, we need to update the name. */
507	if (wildcard) {
508		for (dp = name; *dp != '\0'; dp++);
509		if (snprintf(dp, len - (dp-name), "%d", unit) >
510		    len - (dp-name) - 1) {
511			/*
512			 * This can only be a programmer error and
513			 * there's no straightforward way to recover if
514			 * it happens.
515			 */
516			panic("%s: interface name too long", __func__);
517			/* NOTREACHED */
518		}
519
520	}
521
522	return (0);
523}
524
525/*
526 * Destroy a clone network interface.
527 */
528static int
529if_clone_destroy(const char *name)
530{
531	struct if_clone *ifc;
532	struct ifnet *ifp;
533	int bytoff, bitoff;
534	u_int32_t unit;
535
536	ifc = if_clone_lookup(name, &unit);
537	if (ifc == NULL)
538		return (EINVAL);
539
540	if (unit < ifc->ifc_minifs)
541		return (EINVAL);
542
543	ifp = ifunit(name);
544	if (ifp == NULL)
545		return (ENXIO);
546
547	if (ifc->ifc_destroy == NULL)
548		return (EOPNOTSUPP);
549
550	(*ifc->ifc_destroy)(ifp);
551
552	/*
553	 * Compute offset in the bitmap and deallocate the unit.
554	 */
555	bytoff = unit >> 3;
556	bitoff = unit - (bytoff << 3);
557	KASSERT((ifc->ifc_units[bytoff] & (1 << bitoff)) != 0,
558	    ("%s: bit is already cleared", __func__));
559	ifc->ifc_units[bytoff] &= ~(1 << bitoff);
560	return (0);
561}
562
563/*
564 * Look up a network interface cloner.
565 */
566
567__private_extern__ struct if_clone *
568if_clone_lookup(const char *name, u_int32_t *unitp)
569{
570	struct if_clone *ifc;
571	const char *cp;
572	u_int32_t i;
573
574	for (ifc = LIST_FIRST(&if_cloners); ifc != NULL;) {
575		for (cp = name, i = 0; i < ifc->ifc_namelen; i++, cp++) {
576			if (ifc->ifc_name[i] != *cp)
577				goto next_ifc;
578		}
579		goto found_name;
580 next_ifc:
581		ifc = LIST_NEXT(ifc, ifc_list);
582	}
583
584	/* No match. */
585	return ((struct if_clone *)NULL);
586
587 found_name:
588	if (*cp == '\0') {
589		i = UINT32_MAX;
590	} else {
591		for (i = 0; *cp != '\0'; cp++) {
592			if (*cp < '0' || *cp > '9') {
593				/* Bogus unit number. */
594				return (NULL);
595			}
596			i = (i * 10) + (*cp - '0');
597		}
598	}
599
600	if (unitp != NULL)
601		*unitp = i;
602	return (ifc);
603}
604
605/*
606 * Register a network interface cloner.
607 */
608int
609if_clone_attach(struct if_clone *ifc)
610{
611	int bytoff, bitoff;
612	int err;
613	int len, maxclone;
614	u_int32_t unit;
615
616	KASSERT(ifc->ifc_minifs - 1 <= ifc->ifc_maxunit,
617	    ("%s: %s requested more units then allowed (%d > %d)",
618	    __func__, ifc->ifc_name, ifc->ifc_minifs,
619	    ifc->ifc_maxunit + 1));
620	/*
621	 * Compute bitmap size and allocate it.
622	 */
623	maxclone = ifc->ifc_maxunit + 1;
624	len = maxclone >> 3;
625	if ((len << 3) < maxclone)
626		len++;
627	ifc->ifc_units = _MALLOC(len, M_CLONE, M_WAITOK | M_ZERO);
628	if (ifc->ifc_units == NULL)
629		return ENOBUFS;
630	bzero(ifc->ifc_units, len);
631	ifc->ifc_bmlen = len;
632
633	LIST_INSERT_HEAD(&if_cloners, ifc, ifc_list);
634	if_cloners_count++;
635
636	for (unit = 0; unit < ifc->ifc_minifs; unit++) {
637		err = (*ifc->ifc_create)(ifc, unit, NULL);
638		KASSERT(err == 0,
639		    ("%s: failed to create required interface %s%d",
640		    __func__, ifc->ifc_name, unit));
641
642		/* Allocate the unit in the bitmap. */
643		bytoff = unit >> 3;
644		bitoff = unit - (bytoff << 3);
645		ifc->ifc_units[bytoff] |= (1 << bitoff);
646	}
647
648	return 0;
649}
650
651/*
652 * Unregister a network interface cloner.
653 */
654void
655if_clone_detach(struct if_clone *ifc)
656{
657	LIST_REMOVE(ifc, ifc_list);
658	FREE(ifc->ifc_units, M_CLONE);
659	if_cloners_count--;
660}
661
662/*
663 * Provide list of interface cloners to userspace.
664 */
665static int
666if_clone_list(int count, int *ret_total, user_addr_t dst)
667{
668	char outbuf[IFNAMSIZ];
669	struct if_clone *ifc;
670	int error = 0;
671
672	*ret_total = if_cloners_count;
673	if (dst == USER_ADDR_NULL) {
674		/* Just asking how many there are. */
675		return (0);
676	}
677
678	if (count < 0)
679		return (EINVAL);
680
681	count = (if_cloners_count < count) ? if_cloners_count : count;
682
683	for (ifc = LIST_FIRST(&if_cloners); ifc != NULL && count != 0;
684	     ifc = LIST_NEXT(ifc, ifc_list), count--, dst += IFNAMSIZ) {
685		strlcpy(outbuf, ifc->ifc_name, IFNAMSIZ);
686		error = copyout(outbuf, dst, IFNAMSIZ);
687		if (error)
688			break;
689	}
690
691	return (error);
692}
693
694/*
695 * Similar to ifa_ifwithaddr, except that this is IPv4 specific
696 * and that it matches only the local (not broadcast) address.
697 */
698__private_extern__ struct in_ifaddr *
699ifa_foraddr(unsigned int addr)
700{
701	return (ifa_foraddr_scoped(addr, IFSCOPE_NONE));
702}
703
704/*
705 * Similar to ifa_foraddr, except with the added interface scope
706 * constraint (unless the caller passes in IFSCOPE_NONE in which
707 * case there is no scope restriction).
708 */
709__private_extern__ struct in_ifaddr *
710ifa_foraddr_scoped(unsigned int addr, unsigned int scope)
711{
712	struct in_ifaddr *ia = NULL;
713
714	lck_rw_lock_shared(in_ifaddr_rwlock);
715	TAILQ_FOREACH(ia, INADDR_HASH(addr), ia_hash) {
716		IFA_LOCK_SPIN(&ia->ia_ifa);
717		if (ia->ia_addr.sin_addr.s_addr == addr &&
718		    (scope == IFSCOPE_NONE || ia->ia_ifp->if_index == scope)) {
719			IFA_ADDREF_LOCKED(&ia->ia_ifa);	/* for caller */
720			IFA_UNLOCK(&ia->ia_ifa);
721			break;
722		}
723		IFA_UNLOCK(&ia->ia_ifa);
724	}
725	lck_rw_done(in_ifaddr_rwlock);
726	return (ia);
727}
728
729#if INET6
730/*
731 * Similar to ifa_foraddr, except that this for IPv6.
732 */
733__private_extern__ struct in6_ifaddr *
734ifa_foraddr6(struct in6_addr *addr6)
735{
736	return (ifa_foraddr6_scoped(addr6, IFSCOPE_NONE));
737}
738
739__private_extern__ struct in6_ifaddr *
740ifa_foraddr6_scoped(struct in6_addr *addr6, unsigned int scope)
741{
742	struct in6_ifaddr *ia = NULL;
743
744	lck_rw_lock_shared(&in6_ifaddr_rwlock);
745	for (ia = in6_ifaddrs; ia; ia = ia->ia_next) {
746		IFA_LOCK(&ia->ia_ifa);
747		if (IN6_ARE_ADDR_EQUAL(&ia->ia_addr.sin6_addr, addr6) &&
748		    (scope == IFSCOPE_NONE || ia->ia_ifp->if_index == scope)) {
749			IFA_ADDREF_LOCKED(&ia->ia_ifa); /* for caller */
750			IFA_UNLOCK(&ia->ia_ifa);
751			break;
752		}
753		IFA_UNLOCK(&ia->ia_ifa);
754	}
755	lck_rw_done(&in6_ifaddr_rwlock);
756
757	return (ia);
758}
759#endif /* INET6 */
760
761/*
762 * Return the first (primary) address of a given family on an interface.
763 */
764__private_extern__ struct ifaddr *
765ifa_ifpgetprimary(struct ifnet *ifp, int family)
766{
767	struct ifaddr *ifa;
768
769	ifnet_lock_shared(ifp);
770	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
771		IFA_LOCK_SPIN(ifa);
772		if (ifa->ifa_addr->sa_family == family) {
773			IFA_ADDREF_LOCKED(ifa);	/* for caller */
774			IFA_UNLOCK(ifa);
775			break;
776		}
777		IFA_UNLOCK(ifa);
778	}
779	ifnet_lock_done(ifp);
780
781	return (ifa);
782}
783
784/*
785 * Locate an interface based on a complete address.
786 */
787/*ARGSUSED*/
788struct ifaddr *
789ifa_ifwithaddr(const struct sockaddr *addr)
790{
791	struct ifnet *ifp;
792	struct ifaddr *ifa;
793	struct ifaddr *result = NULL;
794
795#define	equal(a1, a2)							\
796	(bcmp((const void*)(a1), (const void*)(a2),			\
797	    ((const struct sockaddr *)(a1))->sa_len) == 0)
798
799	ifnet_head_lock_shared();
800	for (ifp = ifnet_head.tqh_first; ifp && !result;
801	    ifp = ifp->if_link.tqe_next) {
802		ifnet_lock_shared(ifp);
803		for (ifa = ifp->if_addrhead.tqh_first; ifa;
804		    ifa = ifa->ifa_link.tqe_next) {
805			IFA_LOCK_SPIN(ifa);
806			if (ifa->ifa_addr->sa_family != addr->sa_family) {
807				IFA_UNLOCK(ifa);
808				continue;
809			}
810			if (equal(addr, ifa->ifa_addr)) {
811				result = ifa;
812				IFA_ADDREF_LOCKED(ifa);	/* for caller */
813				IFA_UNLOCK(ifa);
814				break;
815			}
816			if ((ifp->if_flags & IFF_BROADCAST) &&
817			    ifa->ifa_broadaddr != NULL &&
818			    /* IP6 doesn't have broadcast */
819			    ifa->ifa_broadaddr->sa_len != 0 &&
820			    equal(ifa->ifa_broadaddr, addr)) {
821				result = ifa;
822				IFA_ADDREF_LOCKED(ifa);	/* for caller */
823				IFA_UNLOCK(ifa);
824				break;
825			}
826			IFA_UNLOCK(ifa);
827		}
828		ifnet_lock_done(ifp);
829	}
830	ifnet_head_done();
831
832	return (result);
833}
834/*
835 * Locate the point to point interface with a given destination address.
836 */
837/*ARGSUSED*/
838struct ifaddr *
839ifa_ifwithdstaddr(const struct sockaddr *addr)
840{
841	struct ifnet *ifp;
842	struct ifaddr *ifa;
843	struct ifaddr *result = NULL;
844
845	ifnet_head_lock_shared();
846	for (ifp = ifnet_head.tqh_first; ifp && !result;
847	    ifp = ifp->if_link.tqe_next) {
848	    if ((ifp->if_flags & IFF_POINTOPOINT)) {
849			ifnet_lock_shared(ifp);
850			for (ifa = ifp->if_addrhead.tqh_first; ifa;
851			    ifa = ifa->ifa_link.tqe_next) {
852				IFA_LOCK_SPIN(ifa);
853				if (ifa->ifa_addr->sa_family !=
854				    addr->sa_family) {
855					IFA_UNLOCK(ifa);
856					continue;
857				}
858				if (ifa->ifa_dstaddr &&
859				    equal(addr, ifa->ifa_dstaddr)) {
860					result = ifa;
861					IFA_ADDREF_LOCKED(ifa);	/* for caller */
862					IFA_UNLOCK(ifa);
863					break;
864				}
865				IFA_UNLOCK(ifa);
866			}
867			ifnet_lock_done(ifp);
868		}
869	}
870	ifnet_head_done();
871	return (result);
872}
873
874/*
875 * Locate the source address of an interface based on a complete address.
876 */
877struct ifaddr *
878ifa_ifwithaddr_scoped(const struct sockaddr *addr, unsigned int ifscope)
879{
880	struct ifaddr *result = NULL;
881	struct ifnet *ifp;
882
883	if (ifscope == IFSCOPE_NONE)
884		return (ifa_ifwithaddr(addr));
885
886	ifnet_head_lock_shared();
887	if (ifscope > (unsigned int)if_index) {
888		ifnet_head_done();
889		return (NULL);
890	}
891
892	ifp = ifindex2ifnet[ifscope];
893	if (ifp != NULL) {
894		struct ifaddr *ifa = NULL;
895
896		/*
897		 * This is suboptimal; there should be a better way
898		 * to search for a given address of an interface
899		 * for any given address family.
900		 */
901		ifnet_lock_shared(ifp);
902		for (ifa = ifp->if_addrhead.tqh_first; ifa != NULL;
903		    ifa = ifa->ifa_link.tqe_next) {
904			IFA_LOCK_SPIN(ifa);
905			if (ifa->ifa_addr->sa_family != addr->sa_family) {
906				IFA_UNLOCK(ifa);
907				continue;
908			}
909			if (equal(addr, ifa->ifa_addr)) {
910				result = ifa;
911				IFA_ADDREF_LOCKED(ifa);	/* for caller */
912				IFA_UNLOCK(ifa);
913				break;
914			}
915			if ((ifp->if_flags & IFF_BROADCAST) &&
916			    ifa->ifa_broadaddr != NULL &&
917			    /* IP6 doesn't have broadcast */
918			    ifa->ifa_broadaddr->sa_len != 0 &&
919			    equal(ifa->ifa_broadaddr, addr)) {
920				result = ifa;
921				IFA_ADDREF_LOCKED(ifa);	/* for caller */
922				IFA_UNLOCK(ifa);
923				break;
924			}
925			IFA_UNLOCK(ifa);
926		}
927		ifnet_lock_done(ifp);
928	}
929	ifnet_head_done();
930
931	return (result);
932}
933
934struct ifaddr *
935ifa_ifwithnet(const struct sockaddr *addr)
936{
937	return (ifa_ifwithnet_common(addr, IFSCOPE_NONE));
938}
939
940struct ifaddr *
941ifa_ifwithnet_scoped(const struct sockaddr *addr, unsigned int ifscope)
942{
943	return (ifa_ifwithnet_common(addr, ifscope));
944}
945
946/*
947 * Find an interface on a specific network.  If many, choice
948 * is most specific found.
949 */
950static struct ifaddr *
951ifa_ifwithnet_common(const struct sockaddr *addr, unsigned int ifscope)
952{
953	struct ifnet *ifp;
954	struct ifaddr *ifa = NULL;
955	struct ifaddr *ifa_maybe = NULL;
956	u_int af = addr->sa_family;
957	const char *addr_data = addr->sa_data, *cplim;
958
959#if INET6
960	if ((af != AF_INET && af != AF_INET6) ||
961	    (af == AF_INET && !ip_doscopedroute) ||
962	    (af == AF_INET6 && !ip6_doscopedroute))
963#else
964	if (af != AF_INET || !ip_doscopedroute)
965#endif /* !INET6 */
966		ifscope = IFSCOPE_NONE;
967
968	ifnet_head_lock_shared();
969	/*
970	 * AF_LINK addresses can be looked up directly by their index number,
971	 * so do that if we can.
972	 */
973	if (af == AF_LINK) {
974		const struct sockaddr_dl *sdl =
975		    (const struct sockaddr_dl *)(uintptr_t)(size_t)addr;
976		if (sdl->sdl_index && sdl->sdl_index <= if_index) {
977			ifa = ifnet_addrs[sdl->sdl_index - 1];
978			if (ifa != NULL)
979				IFA_ADDREF(ifa);
980
981			ifnet_head_done();
982			return (ifa);
983		}
984	}
985
986	/*
987	 * Scan though each interface, looking for ones that have
988	 * addresses in this address family.
989	 */
990	for (ifp = ifnet_head.tqh_first; ifp; ifp = ifp->if_link.tqe_next) {
991		ifnet_lock_shared(ifp);
992		for (ifa = ifp->if_addrhead.tqh_first; ifa;
993		     ifa = ifa->ifa_link.tqe_next) {
994			const char *cp, *cp2, *cp3;
995
996			IFA_LOCK(ifa);
997			if (ifa->ifa_addr == NULL ||
998			    ifa->ifa_addr->sa_family != af) {
999next:
1000				IFA_UNLOCK(ifa);
1001				continue;
1002			}
1003			/*
1004			 * If we're looking up with a scope,
1005			 * find using a matching interface.
1006			 */
1007			if (ifscope != IFSCOPE_NONE &&
1008			    ifp->if_index != ifscope) {
1009				IFA_UNLOCK(ifa);
1010				continue;
1011			}
1012
1013			/*
1014			 * Scan all the bits in the ifa's address.
1015			 * If a bit dissagrees with what we are
1016			 * looking for, mask it with the netmask
1017			 * to see if it really matters.
1018			 * (A byte at a time)
1019			 */
1020			if (ifa->ifa_netmask == 0) {
1021				IFA_UNLOCK(ifa);
1022				continue;
1023			}
1024			cp = addr_data;
1025			cp2 = ifa->ifa_addr->sa_data;
1026			cp3 = ifa->ifa_netmask->sa_data;
1027			cplim = ifa->ifa_netmask->sa_len +
1028			    (char *)ifa->ifa_netmask;
1029			while (cp3 < cplim)
1030				if ((*cp++ ^ *cp2++) & *cp3++)
1031					goto next; /* next address! */
1032			/*
1033			 * If the netmask of what we just found
1034			 * is more specific than what we had before
1035			 * (if we had one) then remember the new one
1036			 * before continuing to search
1037			 * for an even better one.
1038			 */
1039			if (ifa_maybe == NULL ||
1040			    rn_refines((caddr_t)ifa->ifa_netmask,
1041			    (caddr_t)ifa_maybe->ifa_netmask)) {
1042				IFA_ADDREF_LOCKED(ifa);	/* ifa_maybe */
1043				IFA_UNLOCK(ifa);
1044				if (ifa_maybe != NULL)
1045					IFA_REMREF(ifa_maybe);
1046				ifa_maybe = ifa;
1047			} else {
1048				IFA_UNLOCK(ifa);
1049			}
1050			IFA_LOCK_ASSERT_NOTHELD(ifa);
1051		}
1052		ifnet_lock_done(ifp);
1053
1054		if (ifa != NULL)
1055			break;
1056	}
1057	ifnet_head_done();
1058
1059	if (ifa == NULL)
1060		ifa = ifa_maybe;
1061	else if (ifa_maybe != NULL)
1062		IFA_REMREF(ifa_maybe);
1063
1064	return (ifa);
1065}
1066
1067/*
1068 * Find an interface address specific to an interface best matching
1069 * a given address.
1070 */
1071struct ifaddr *
1072ifaof_ifpforaddr(const struct sockaddr *addr, struct ifnet *ifp)
1073{
1074	struct ifaddr *ifa = NULL;
1075	const char *cp, *cp2, *cp3;
1076	char *cplim;
1077	struct ifaddr *ifa_maybe = NULL;
1078	struct ifaddr *better_ifa_maybe = NULL;
1079	u_int af = addr->sa_family;
1080
1081	if (af >= AF_MAX)
1082		return (NULL);
1083
1084	ifnet_lock_shared(ifp);
1085	for (ifa = ifp->if_addrhead.tqh_first; ifa;
1086	     ifa = ifa->ifa_link.tqe_next) {
1087		IFA_LOCK(ifa);
1088		if (ifa->ifa_addr->sa_family != af) {
1089			IFA_UNLOCK(ifa);
1090			continue;
1091		}
1092		if (ifa_maybe == NULL) {
1093			IFA_ADDREF_LOCKED(ifa);	/* for ifa_maybe */
1094			ifa_maybe = ifa;
1095		}
1096		if (ifa->ifa_netmask == 0) {
1097			if (equal(addr, ifa->ifa_addr) || (ifa->ifa_dstaddr &&
1098			    equal(addr, ifa->ifa_dstaddr))) {
1099				IFA_ADDREF_LOCKED(ifa);	/* for caller */
1100				IFA_UNLOCK(ifa);
1101				break;
1102			}
1103			IFA_UNLOCK(ifa);
1104			continue;
1105		}
1106		if (ifp->if_flags & IFF_POINTOPOINT) {
1107			if (ifa->ifa_dstaddr && equal(addr, ifa->ifa_dstaddr)) {
1108				IFA_ADDREF_LOCKED(ifa);	/* for caller */
1109				IFA_UNLOCK(ifa);
1110				break;
1111			}
1112		} else {
1113			if (equal(addr, ifa->ifa_addr)) {
1114				/* exact match */
1115				IFA_ADDREF_LOCKED(ifa);	/* for caller */
1116				IFA_UNLOCK(ifa);
1117				break;
1118			}
1119			cp = addr->sa_data;
1120			cp2 = ifa->ifa_addr->sa_data;
1121			cp3 = ifa->ifa_netmask->sa_data;
1122			cplim = ifa->ifa_netmask->sa_len +
1123			    (char *)ifa->ifa_netmask;
1124			for (; cp3 < cplim; cp3++)
1125				if ((*cp++ ^ *cp2++) & *cp3)
1126					break;
1127			if (cp3 == cplim) {
1128				/* subnet match */
1129				if (better_ifa_maybe == NULL) {
1130					/* for better_ifa_maybe */
1131					IFA_ADDREF_LOCKED(ifa);
1132					better_ifa_maybe = ifa;
1133				}
1134			}
1135		}
1136		IFA_UNLOCK(ifa);
1137	}
1138
1139	if (ifa == NULL) {
1140		if (better_ifa_maybe != NULL) {
1141			ifa = better_ifa_maybe;
1142			better_ifa_maybe = NULL;
1143		} else {
1144			ifa = ifa_maybe;
1145			ifa_maybe = NULL;
1146		}
1147	}
1148
1149	ifnet_lock_done(ifp);
1150
1151	if (better_ifa_maybe != NULL)
1152		IFA_REMREF(better_ifa_maybe);
1153	if (ifa_maybe != NULL)
1154		IFA_REMREF(ifa_maybe);
1155
1156	return (ifa);
1157}
1158
1159#include <net/route.h>
1160
1161/*
1162 * Default action when installing a route with a Link Level gateway.
1163 * Lookup an appropriate real ifa to point to.
1164 * This should be moved to /sys/net/link.c eventually.
1165 */
1166void
1167link_rtrequest(int cmd, struct rtentry *rt, struct sockaddr *sa)
1168{
1169	struct ifaddr *ifa;
1170	struct sockaddr *dst;
1171	struct ifnet *ifp;
1172	void (*ifa_rtrequest)(int, struct rtentry *, struct sockaddr *);
1173
1174	lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED);
1175	RT_LOCK_ASSERT_HELD(rt);
1176
1177	if (cmd != RTM_ADD || ((ifa = rt->rt_ifa) == 0) ||
1178	    ((ifp = ifa->ifa_ifp) == 0) || ((dst = rt_key(rt)) == 0))
1179		return;
1180
1181	/* Become a regular mutex, just in case */
1182	RT_CONVERT_LOCK(rt);
1183
1184	ifa = ifaof_ifpforaddr(dst, ifp);
1185	if (ifa) {
1186		rtsetifa(rt, ifa);
1187		IFA_LOCK_SPIN(ifa);
1188		ifa_rtrequest = ifa->ifa_rtrequest;
1189		IFA_UNLOCK(ifa);
1190		if (ifa_rtrequest != NULL && ifa_rtrequest != link_rtrequest)
1191			ifa_rtrequest(cmd, rt, sa);
1192		IFA_REMREF(ifa);
1193	}
1194}
1195
1196/*
1197 * if_updown will set the interface up or down. It will
1198 * prevent other up/down events from occurring until this
1199 * up/down event has completed.
1200 *
1201 * Caller must lock ifnet. This function will drop the
1202 * lock. This allows ifnet_set_flags to set the rest of
1203 * the flags after we change the up/down state without
1204 * dropping the interface lock between setting the
1205 * up/down state and updating the rest of the flags.
1206 */
1207__private_extern__ void
1208if_updown(
1209	struct ifnet	*ifp,
1210	int				up)
1211{
1212	int i;
1213	struct ifaddr **ifa;
1214	struct timespec	tv;
1215	struct ifclassq *ifq = &ifp->if_snd;
1216
1217	/* Wait until no one else is changing the up/down state */
1218	while ((ifp->if_eflags & IFEF_UPDOWNCHANGE) != 0) {
1219		tv.tv_sec = 0;
1220		tv.tv_nsec = NSEC_PER_SEC / 10;
1221		ifnet_lock_done(ifp);
1222		msleep(&ifp->if_eflags, NULL, 0, "if_updown", &tv);
1223		ifnet_lock_exclusive(ifp);
1224	}
1225
1226	/* Verify that the interface isn't already in the right state */
1227	if ((!up && (ifp->if_flags & IFF_UP) == 0) ||
1228		(up && (ifp->if_flags & IFF_UP) == IFF_UP)) {
1229		return;
1230	}
1231
1232	/* Indicate that the up/down state is changing */
1233	ifp->if_eflags |= IFEF_UPDOWNCHANGE;
1234
1235	/* Mark interface up or down */
1236	if (up) {
1237		ifp->if_flags |= IFF_UP;
1238	}
1239	else {
1240		ifp->if_flags &= ~IFF_UP;
1241	}
1242
1243	ifnet_touch_lastchange(ifp);
1244
1245	/* Drop the lock to notify addresses and route */
1246	ifnet_lock_done(ifp);
1247	if (ifnet_get_address_list(ifp, &ifa) == 0) {
1248		for (i = 0; ifa[i] != 0; i++) {
1249			pfctlinput(up ? PRC_IFUP : PRC_IFDOWN, ifa[i]->ifa_addr);
1250		}
1251		ifnet_free_address_list(ifa);
1252	}
1253	rt_ifmsg(ifp);
1254
1255	if (!up)
1256		if_qflush(ifp, 0);
1257
1258	/* Inform all transmit queues about the new link state */
1259	IFCQ_LOCK(ifq);
1260	ifnet_update_sndq(ifq, up ? CLASSQ_EV_LINK_UP : CLASSQ_EV_LINK_DOWN);
1261	IFCQ_UNLOCK(ifq);
1262
1263	/* Aquire the lock to clear the changing flag */
1264	ifnet_lock_exclusive(ifp);
1265	ifp->if_eflags &= ~IFEF_UPDOWNCHANGE;
1266	wakeup(&ifp->if_eflags);
1267}
1268
1269/*
1270 * Mark an interface down and notify protocols of
1271 * the transition.
1272 */
1273void
1274if_down(
1275	struct ifnet *ifp)
1276{
1277	ifnet_lock_exclusive(ifp);
1278	if_updown(ifp, 0);
1279	ifnet_lock_done(ifp);
1280}
1281
1282/*
1283 * Mark an interface up and notify protocols of
1284 * the transition.
1285 */
1286void
1287if_up(
1288	struct ifnet *ifp)
1289{
1290	ifnet_lock_exclusive(ifp);
1291	if_updown(ifp, 1);
1292	ifnet_lock_done(ifp);
1293}
1294
1295/*
1296 * Flush an interface queue.
1297 */
1298void
1299if_qflush(struct ifnet *ifp, int ifq_locked)
1300{
1301	struct ifclassq *ifq = &ifp->if_snd;
1302
1303	if (!ifq_locked)
1304		IFCQ_LOCK(ifq);
1305
1306	if (IFCQ_IS_ENABLED(ifq))
1307		IFCQ_PURGE(ifq);
1308#if PF_ALTQ
1309	if (IFCQ_IS_DRAINING(ifq))
1310		ifq->ifcq_drain = 0;
1311	if (ALTQ_IS_ENABLED(IFCQ_ALTQ(ifq)))
1312		ALTQ_PURGE(IFCQ_ALTQ(ifq));
1313#endif /* PF_ALTQ */
1314
1315	VERIFY(IFCQ_IS_EMPTY(ifq));
1316
1317	if (!ifq_locked)
1318		IFCQ_UNLOCK(ifq);
1319}
1320
1321void
1322if_qflush_sc(struct ifnet *ifp, mbuf_svc_class_t sc, u_int32_t flow,
1323    u_int32_t *packets, u_int32_t *bytes, int ifq_locked)
1324{
1325	struct ifclassq *ifq = &ifp->if_snd;
1326	u_int32_t cnt = 0, len = 0;
1327	u_int32_t a_cnt = 0, a_len = 0;
1328
1329	VERIFY(sc == MBUF_SC_UNSPEC || MBUF_VALID_SC(sc));
1330	VERIFY(flow != 0);
1331
1332	if (!ifq_locked)
1333		IFCQ_LOCK(ifq);
1334
1335	if (IFCQ_IS_ENABLED(ifq))
1336		IFCQ_PURGE_SC(ifq, sc, flow, cnt, len);
1337#if PF_ALTQ
1338	if (IFCQ_IS_DRAINING(ifq)) {
1339		VERIFY((signed)(ifq->ifcq_drain - cnt) >= 0);
1340		ifq->ifcq_drain -= cnt;
1341	}
1342	if (ALTQ_IS_ENABLED(IFCQ_ALTQ(ifq)))
1343		ALTQ_PURGE_SC(IFCQ_ALTQ(ifq), sc, flow, a_cnt, a_len);
1344#endif /* PF_ALTQ */
1345
1346	if (!ifq_locked)
1347		IFCQ_UNLOCK(ifq);
1348
1349	if (packets != NULL)
1350		*packets = cnt + a_cnt;
1351	if (bytes != NULL)
1352		*bytes = len + a_len;
1353}
1354
1355/*
1356 * Map interface name to
1357 * interface structure pointer.
1358 */
1359struct ifnet *
1360ifunit(const char *name)
1361{
1362	char namebuf[IFNAMSIZ + 1];
1363	const char *cp;
1364	struct ifnet *ifp;
1365	int unit;
1366	unsigned len, m;
1367	char c;
1368
1369	len = strlen(name);
1370	if (len < 2 || len > IFNAMSIZ)
1371		return (NULL);
1372	cp = name + len - 1;
1373	c = *cp;
1374	if (c < '0' || c > '9')
1375		return (NULL);		/* trailing garbage */
1376	unit = 0;
1377	m = 1;
1378	do {
1379		if (cp == name)
1380			return (NULL);	/* no interface name */
1381		unit += (c - '0') * m;
1382		if (unit > 1000000)
1383			return (NULL);	/* number is unreasonable */
1384		m *= 10;
1385		c = *--cp;
1386	} while (c >= '0' && c <= '9');
1387	len = cp - name + 1;
1388	bcopy(name, namebuf, len);
1389	namebuf[len] = '\0';
1390	/*
1391	 * Now search all the interfaces for this name/number
1392	 */
1393	ifnet_head_lock_shared();
1394	TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
1395		if (strncmp(ifp->if_name, namebuf, len))
1396			continue;
1397		if (unit == ifp->if_unit)
1398			break;
1399	}
1400	ifnet_head_done();
1401	return (ifp);
1402}
1403
1404
1405/*
1406 * Map interface name in a sockaddr_dl to
1407 * interface structure pointer.
1408 */
1409struct ifnet *
1410if_withname(struct sockaddr *sa)
1411{
1412	char ifname[IFNAMSIZ+1];
1413	struct sockaddr_dl *sdl = (struct sockaddr_dl *)(void *)sa;
1414
1415	if ( (sa->sa_family != AF_LINK) || (sdl->sdl_nlen == 0) ||
1416	     (sdl->sdl_nlen > IFNAMSIZ) )
1417		return (NULL);
1418
1419	/*
1420	 * ifunit wants a null-terminated name.  It may not be null-terminated
1421	 * in the sockaddr.  We don't want to change the caller's sockaddr,
1422	 * and there might not be room to put the trailing null anyway, so we
1423	 * make a local copy that we know we can null terminate safely.
1424	 */
1425
1426	bcopy(sdl->sdl_data, ifname, sdl->sdl_nlen);
1427	ifname[sdl->sdl_nlen] = '\0';
1428	return (ifunit(ifname));
1429}
1430
1431static __attribute__((noinline)) int
1432ifioctl_ifconf(u_long cmd, caddr_t data)
1433{
1434	int error = 0;
1435
1436	switch (cmd) {
1437	case OSIOCGIFCONF32:			/* struct ifconf32 */
1438	case SIOCGIFCONF32: {			/* struct ifconf32 */
1439		struct ifconf32 ifc;
1440		bcopy(data, &ifc, sizeof (ifc));
1441		error = ifconf(cmd, CAST_USER_ADDR_T(ifc.ifc_req),
1442		    &ifc.ifc_len);
1443		bcopy(&ifc, data, sizeof (ifc));
1444		break;
1445	}
1446
1447	case SIOCGIFCONF64:			/* struct ifconf64 */
1448	case OSIOCGIFCONF64: {			/* struct ifconf64 */
1449		struct ifconf64 ifc;
1450		bcopy(data, &ifc, sizeof (ifc));
1451		error = ifconf(cmd, ifc.ifc_req, &ifc.ifc_len);
1452		bcopy(&ifc, data, sizeof (ifc));
1453		break;
1454	}
1455
1456	default:
1457		VERIFY(0);
1458		/* NOTREACHED */
1459	}
1460
1461	return (error);
1462}
1463
1464static __attribute__((noinline)) int
1465ifioctl_ifclone(u_long cmd, caddr_t data)
1466{
1467	int error = 0;
1468
1469	switch (cmd) {
1470	case SIOCIFGCLONERS32: {		/* struct if_clonereq32 */
1471		struct if_clonereq32 ifcr;
1472		bcopy(data, &ifcr, sizeof (ifcr));
1473		error = if_clone_list(ifcr.ifcr_count, &ifcr.ifcr_total,
1474		    CAST_USER_ADDR_T(ifcr.ifcru_buffer));
1475		bcopy(&ifcr, data, sizeof (ifcr));
1476		break;
1477	}
1478
1479	case SIOCIFGCLONERS64: {		/* struct if_clonereq64 */
1480		struct if_clonereq64 ifcr;
1481		bcopy(data, &ifcr, sizeof (ifcr));
1482		error = if_clone_list(ifcr.ifcr_count, &ifcr.ifcr_total,
1483		    ifcr.ifcru_buffer);
1484		bcopy(&ifcr, data, sizeof (ifcr));
1485		break;
1486	}
1487
1488	default:
1489		VERIFY(0);
1490		/* NOTREACHED */
1491	}
1492
1493	return (error);
1494}
1495
1496static __attribute__((noinline)) int
1497ifioctl_ifdesc(struct ifnet *ifp, u_long cmd, caddr_t data, struct proc *p)
1498{
1499	struct if_descreq *ifdr = (struct if_descreq *)(void *)data;
1500	u_int32_t ifdr_len;
1501	int error = 0;
1502
1503	VERIFY(ifp != NULL);
1504
1505	switch (cmd) {
1506	case SIOCSIFDESC: {			/* struct if_descreq */
1507		if ((error = proc_suser(p)) != 0)
1508                        break;
1509
1510		ifnet_lock_exclusive(ifp);
1511		bcopy(&ifdr->ifdr_len, &ifdr_len, sizeof (ifdr_len));
1512		if (ifdr_len > sizeof (ifdr->ifdr_desc) ||
1513		    ifdr_len > ifp->if_desc.ifd_maxlen) {
1514			error = EINVAL;
1515			ifnet_lock_done(ifp);
1516			break;
1517		}
1518
1519		bzero(ifp->if_desc.ifd_desc, ifp->if_desc.ifd_maxlen);
1520		if ((ifp->if_desc.ifd_len = ifdr_len) > 0) {
1521			bcopy(ifdr->ifdr_desc, ifp->if_desc.ifd_desc,
1522			    MIN(ifdr_len, ifp->if_desc.ifd_maxlen));
1523		}
1524		ifnet_lock_done(ifp);
1525		break;
1526	}
1527
1528	case SIOCGIFDESC: {			/* struct if_descreq */
1529		ifnet_lock_shared(ifp);
1530		ifdr_len = MIN(ifp->if_desc.ifd_len, sizeof (ifdr->ifdr_desc));
1531		bcopy(&ifdr_len, &ifdr->ifdr_len, sizeof (ifdr_len));
1532		bzero(&ifdr->ifdr_desc, sizeof (ifdr->ifdr_desc));
1533		if (ifdr_len > 0) {
1534			bcopy(ifp->if_desc.ifd_desc, ifdr->ifdr_desc, ifdr_len);
1535		}
1536		ifnet_lock_done(ifp);
1537		break;
1538	}
1539
1540	default:
1541		VERIFY(0);
1542		/* NOTREACHED */
1543	}
1544
1545	return (error);
1546}
1547
1548static __attribute__((noinline)) int
1549ifioctl_linkparams(struct ifnet *ifp, u_long cmd, caddr_t data, struct proc *p)
1550{
1551	struct if_linkparamsreq *iflpr =
1552	    (struct if_linkparamsreq *)(void *)data;
1553	struct ifclassq *ifq;
1554	int error = 0;
1555
1556	VERIFY(ifp != NULL);
1557	ifq = &ifp->if_snd;
1558
1559	switch (cmd) {
1560	case SIOCSIFLINKPARAMS: {		/* struct if_linkparamsreq */
1561		struct tb_profile tb = { 0, 0, 0 };
1562
1563		if ((error = proc_suser(p)) != 0)
1564                        break;
1565
1566		IFCQ_LOCK(ifq);
1567		if (!IFCQ_IS_READY(ifq)) {
1568			error = ENXIO;
1569			IFCQ_UNLOCK(ifq);
1570			break;
1571		}
1572		bcopy(&iflpr->iflpr_output_tbr_rate, &tb.rate,
1573		    sizeof (tb.rate));
1574		bcopy(&iflpr->iflpr_output_tbr_percent, &tb.percent,
1575		    sizeof (tb.percent));
1576		error = ifclassq_tbr_set(ifq, &tb, TRUE);
1577		IFCQ_UNLOCK(ifq);
1578		break;
1579	}
1580
1581	case SIOCGIFLINKPARAMS: {		/* struct if_linkparamsreq */
1582		u_int32_t sched_type = PKTSCHEDT_NONE, flags = 0;
1583		u_int64_t tbr_bw = 0, tbr_pct = 0;
1584
1585		IFCQ_LOCK(ifq);
1586#if PF_ALTQ
1587		if (ALTQ_IS_ENABLED(IFCQ_ALTQ(ifq))) {
1588			sched_type = IFCQ_ALTQ(ifq)->altq_type;
1589			flags |= IFLPRF_ALTQ;
1590		} else
1591#endif /* PF_ALTQ */
1592		{
1593			if (IFCQ_IS_ENABLED(ifq))
1594				sched_type = ifq->ifcq_type;
1595		}
1596		bcopy(&sched_type, &iflpr->iflpr_output_sched,
1597		    sizeof (iflpr->iflpr_output_sched));
1598
1599		if (IFCQ_TBR_IS_ENABLED(ifq)) {
1600			tbr_bw = ifq->ifcq_tbr.tbr_rate_raw;
1601			tbr_pct = ifq->ifcq_tbr.tbr_percent;
1602		}
1603		bcopy(&tbr_bw, &iflpr->iflpr_output_tbr_rate,
1604		    sizeof (iflpr->iflpr_output_tbr_rate));
1605		bcopy(&tbr_pct, &iflpr->iflpr_output_tbr_percent,
1606		    sizeof (iflpr->iflpr_output_tbr_percent));
1607		IFCQ_UNLOCK(ifq);
1608
1609		if (ifp->if_output_sched_model ==
1610		    IFNET_SCHED_MODEL_DRIVER_MANAGED)
1611			flags |= IFLPRF_DRVMANAGED;
1612		bcopy(&flags, &iflpr->iflpr_flags, sizeof (iflpr->iflpr_flags));
1613		bcopy(&ifp->if_output_bw, &iflpr->iflpr_output_bw,
1614		    sizeof (iflpr->iflpr_output_bw));
1615		bcopy(&ifp->if_input_bw, &iflpr->iflpr_input_bw,
1616		    sizeof (iflpr->iflpr_input_bw));
1617		bcopy(&ifp->if_output_lt, &iflpr->iflpr_output_lt,
1618		    sizeof (iflpr->iflpr_output_lt));
1619		bcopy(&ifp->if_input_lt, &iflpr->iflpr_input_lt,
1620		    sizeof (iflpr->iflpr_input_lt));
1621		break;
1622	}
1623
1624	default:
1625		VERIFY(0);
1626		/* NOTREACHED */
1627	}
1628
1629	return (error);
1630}
1631
1632static __attribute__((noinline)) int
1633ifioctl_qstats(struct ifnet *ifp, u_long cmd, caddr_t data)
1634{
1635	struct if_qstatsreq *ifqr = (struct if_qstatsreq *)(void *)data;
1636	u_int32_t ifqr_len, ifqr_slot;
1637	int error = 0;
1638
1639	VERIFY(ifp != NULL);
1640
1641	switch (cmd) {
1642	case SIOCGIFQUEUESTATS: {		/* struct if_qstatsreq */
1643		bcopy(&ifqr->ifqr_slot, &ifqr_slot, sizeof (ifqr_slot));
1644		bcopy(&ifqr->ifqr_len, &ifqr_len, sizeof (ifqr_len));
1645		error = ifclassq_getqstats(&ifp->if_snd, ifqr_slot,
1646		    ifqr->ifqr_buf, &ifqr_len);
1647		if (error != 0)
1648			ifqr_len = 0;
1649		bcopy(&ifqr_len, &ifqr->ifqr_len, sizeof (ifqr_len));
1650		break;
1651	}
1652
1653	default:
1654		VERIFY(0);
1655		/* NOTREACHED */
1656	}
1657
1658	return (error);
1659}
1660
1661static __attribute__((noinline)) int
1662ifioctl_throttle(struct ifnet *ifp, u_long cmd, caddr_t data, struct proc *p)
1663{
1664	struct if_throttlereq *ifthr = (struct if_throttlereq *)(void *)data;
1665	u_int32_t ifthr_level;
1666	int error = 0;
1667
1668	VERIFY(ifp != NULL);
1669
1670	switch (cmd) {
1671	case SIOCSIFTHROTTLE: {			/* struct if_throttlereq */
1672		/*
1673		 * XXX: Use priv_check_cred() instead of root check?
1674		 */
1675		if ((error = proc_suser(p)) != 0)
1676                        break;
1677
1678		bcopy(&ifthr->ifthr_level, &ifthr_level, sizeof (ifthr_level));
1679		error = ifnet_set_throttle(ifp, ifthr_level);
1680		if (error == EALREADY)
1681			error = 0;
1682		break;
1683	}
1684
1685	case SIOCGIFTHROTTLE: {			/* struct if_throttlereq */
1686		if ((error = ifnet_get_throttle(ifp, &ifthr_level)) == 0) {
1687			bcopy(&ifthr_level, &ifthr->ifthr_level,
1688			    sizeof (ifthr_level));
1689		}
1690		break;
1691	}
1692
1693	default:
1694		VERIFY(0);
1695		/* NOTREACHED */
1696	}
1697
1698	return (error);
1699}
1700
1701/*
1702 * Interface ioctls.
1703 *
1704 * Most of the routines called to handle the ioctls would end up being
1705 * tail-call optimized, which unfortunately causes this routine to
1706 * consume too much stack space; this is the reason for the "noinline"
1707 * attribute used on those routines.
1708 */
1709int
1710ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p)
1711{
1712	char ifname[IFNAMSIZ + 1];
1713	struct ifnet *ifp = NULL;
1714	struct ifstat *ifs = NULL;
1715	int error = 0;
1716
1717	bzero(ifname, sizeof (ifname));
1718
1719	/*
1720	 * ioctls which don't require ifp, or ifreq ioctls
1721	 */
1722	switch (cmd) {
1723	case OSIOCGIFCONF32:			/* struct ifconf32 */
1724	case SIOCGIFCONF32:			/* struct ifconf32 */
1725	case SIOCGIFCONF64:			/* struct ifconf64 */
1726	case OSIOCGIFCONF64:			/* struct ifconf64 */
1727		error = ifioctl_ifconf(cmd, data);
1728		goto done;
1729
1730	case SIOCIFGCLONERS32:			/* struct if_clonereq32 */
1731	case SIOCIFGCLONERS64:			/* struct if_clonereq64 */
1732		error = ifioctl_ifclone(cmd, data);
1733		goto done;
1734
1735	case SIOCSIFDSTADDR:			/* struct ifreq */
1736	case SIOCSIFADDR:			/* struct ifreq */
1737	case SIOCSIFBRDADDR:			/* struct ifreq */
1738	case SIOCSIFNETMASK:			/* struct ifreq */
1739	case OSIOCGIFADDR:			/* struct ifreq */
1740	case OSIOCGIFDSTADDR:			/* struct ifreq */
1741	case OSIOCGIFBRDADDR:			/* struct ifreq */
1742	case OSIOCGIFNETMASK:			/* struct ifreq */
1743	case SIOCSIFKPI:			/* struct ifreq */
1744		if (so->so_proto == NULL) {
1745			error = EOPNOTSUPP;
1746			goto done;
1747		}
1748		/* FALLTHRU */
1749	case SIOCIFCREATE:			/* struct ifreq */
1750	case SIOCIFCREATE2:			/* struct ifreq */
1751	case SIOCIFDESTROY:			/* struct ifreq */
1752	case SIOCGIFFLAGS:			/* struct ifreq */
1753	case SIOCGIFEFLAGS:			/* struct ifreq */
1754	case SIOCGIFCAP:			/* struct ifreq */
1755#if CONFIG_MACF_NET
1756	case SIOCGIFMAC:			/* struct ifreq */
1757	case SIOCSIFMAC:			/* struct ifreq */
1758#endif /* CONFIG_MACF_NET */
1759	case SIOCGIFMETRIC:			/* struct ifreq */
1760	case SIOCGIFMTU:			/* struct ifreq */
1761	case SIOCGIFPHYS:			/* struct ifreq */
1762	case SIOCSIFFLAGS:			/* struct ifreq */
1763	case SIOCSIFCAP:			/* struct ifreq */
1764	case SIOCSIFMETRIC:			/* struct ifreq */
1765	case SIOCSIFPHYS:			/* struct ifreq */
1766	case SIOCSIFMTU:			/* struct ifreq */
1767	case SIOCADDMULTI:			/* struct ifreq */
1768	case SIOCDELMULTI:			/* struct ifreq */
1769	case SIOCDIFPHYADDR:			/* struct ifreq */
1770	case SIOCSIFMEDIA:			/* struct ifreq */
1771	case SIOCSIFGENERIC:			/* struct ifreq */
1772	case SIOCSIFLLADDR:			/* struct ifreq */
1773	case SIOCSIFALTMTU:			/* struct ifreq */
1774	case SIOCSIFVLAN:			/* struct ifreq */
1775	case SIOCSIFBOND:			/* struct ifreq */
1776	case SIOCGIFLLADDR:			/* struct ifreq */
1777	case SIOCGIFTYPE:			/* struct ifreq */
1778	case SIOCGIFPSRCADDR:			/* struct ifreq */
1779	case SIOCGIFPDSTADDR:			/* struct ifreq */
1780	case SIOCGIFGENERIC:			/* struct ifreq */
1781	case SIOCGIFDEVMTU:			/* struct ifreq */
1782	case SIOCGIFVLAN:			/* struct ifreq */
1783	case SIOCGIFBOND:			/* struct ifreq */
1784	case SIOCGIFWAKEFLAGS:			/* struct ifreq */
1785	case SIOCGIFGETRTREFCNT:		/* struct ifreq */
1786	case SIOCSIFOPPORTUNISTIC:		/* struct ifreq */
1787	case SIOCGIFOPPORTUNISTIC:		/* struct ifreq */
1788	case SIOCGIFLINKQUALITYMETRIC:		/* struct ifreq */
1789	case SIOCSIFLOG:			/* struct ifreq */
1790	case SIOCGIFLOG:			/* struct ifreq */
1791	case SIOCGIFDELEGATE: 			/* struct ifreq */
1792	case SIOCGIFEXPENSIVE:			/* struct ifreq */
1793	case SIOCSIFEXPENSIVE: 			/* struct ifreq */
1794	case SIOCSIF2KCL:			/* struct ifreq */
1795	case SIOCGIF2KCL: {			/* struct ifreq */
1796		struct ifreq ifr;
1797		bcopy(data, &ifr, sizeof (ifr));
1798		ifr.ifr_name[IFNAMSIZ - 1] = '\0';
1799		bcopy(&ifr.ifr_name, ifname, IFNAMSIZ);
1800		error = ifioctl_ifreq(so, cmd, &ifr, p);
1801		bcopy(&ifr, data, sizeof (ifr));
1802		goto done;
1803	}
1804	}
1805
1806	/*
1807	 * ioctls which require ifp.  Note that we acquire dlil_ifnet_lock
1808	 * here to ensure that the ifnet, if found, has been fully attached.
1809	 */
1810	dlil_if_lock();
1811	switch (cmd) {
1812	case SIOCSIFPHYADDR:			/* struct {if,in_}aliasreq */
1813		bcopy(((struct in_aliasreq *)(void *)data)->ifra_name,
1814		    ifname, IFNAMSIZ);
1815		ifp = ifunit(ifname);
1816		break;
1817
1818#if INET6
1819	case SIOCSIFPHYADDR_IN6_32:		/* struct in6_aliasreq_32 */
1820		bcopy(((struct in6_aliasreq_32 *)(void *)data)->ifra_name,
1821		    ifname, IFNAMSIZ);
1822		ifp = ifunit(ifname);
1823		break;
1824
1825	case SIOCSIFPHYADDR_IN6_64:		/* struct in6_aliasreq_64 */
1826		bcopy(((struct in6_aliasreq_64 *)(void *)data)->ifra_name,
1827		    ifname, IFNAMSIZ);
1828		ifp = ifunit(ifname);
1829		break;
1830#endif /* INET6 */
1831
1832	case SIOCGIFSTATUS:			/* struct ifstat */
1833		ifs = _MALLOC(sizeof (*ifs), M_DEVBUF, M_WAITOK);
1834		if (ifs == NULL) {
1835			error = ENOMEM;
1836			dlil_if_unlock();
1837			goto done;
1838		}
1839		bcopy(data, ifs, sizeof (*ifs));
1840		ifs->ifs_name[IFNAMSIZ - 1] = '\0';
1841		bcopy(ifs->ifs_name, ifname, IFNAMSIZ);
1842		ifp = ifunit(ifname);
1843		break;
1844
1845	case SIOCGIFMEDIA32:			/* struct ifmediareq32 */
1846		bcopy(((struct ifmediareq32 *)(void *)data)->ifm_name,
1847		    ifname, IFNAMSIZ);
1848		ifp = ifunit(ifname);
1849		break;
1850
1851	case SIOCGIFMEDIA64:			/* struct ifmediareq64 */
1852		bcopy(((struct ifmediareq64 *)(void *)data)->ifm_name,
1853		    ifname, IFNAMSIZ);
1854		ifp = ifunit(ifname);
1855		break;
1856
1857	case SIOCSIFDESC:			/* struct if_descreq */
1858	case SIOCGIFDESC:			/* struct if_descreq */
1859		bcopy(((struct if_descreq *)(void *)data)->ifdr_name,
1860		    ifname, IFNAMSIZ);
1861		ifp = ifunit(ifname);
1862		break;
1863
1864	case SIOCSIFLINKPARAMS:			/* struct if_linkparamsreq */
1865	case SIOCGIFLINKPARAMS:			/* struct if_linkparamsreq */
1866		bcopy(((struct if_linkparamsreq *)(void *)data)->iflpr_name,
1867		    ifname, IFNAMSIZ);
1868		ifp = ifunit(ifname);
1869		break;
1870
1871	case SIOCGIFQUEUESTATS:			/* struct if_qstatsreq */
1872		bcopy(((struct if_qstatsreq *)(void *)data)->ifqr_name,
1873		    ifname, IFNAMSIZ);
1874		ifp = ifunit(ifname);
1875		break;
1876
1877	case SIOCSIFTHROTTLE:			/* struct if_throttlereq */
1878	case SIOCGIFTHROTTLE:			/* struct if_throttlereq */
1879		bcopy(((struct if_throttlereq *)(void *)data)->ifthr_name,
1880		    ifname, IFNAMSIZ);
1881		ifp = ifunit(ifname);
1882		break;
1883
1884	default:
1885		/*
1886		 * This is a bad assumption, but the code seems to
1887		 * have been doing this in the past; caveat emptor.
1888		 */
1889		bcopy(((struct ifreq *)(void *)data)->ifr_name,
1890		    ifname, IFNAMSIZ);
1891		ifp = ifunit(ifname);
1892		break;
1893	}
1894	dlil_if_unlock();
1895
1896	if (ifp == NULL) {
1897		error = ENXIO;
1898		goto done;
1899	}
1900
1901	switch (cmd) {
1902	case SIOCSIFPHYADDR:			/* struct {if,in_}aliasreq */
1903#if INET6
1904	case SIOCSIFPHYADDR_IN6_32:		/* struct in6_aliasreq_32 */
1905	case SIOCSIFPHYADDR_IN6_64:		/* struct in6_aliasreq_64 */
1906#endif /* INET6 */
1907		error = proc_suser(p);
1908		if (error != 0)
1909			break;
1910
1911		error = ifnet_ioctl(ifp, SOCK_DOM(so), cmd, data);
1912		if (error != 0)
1913			break;
1914
1915		ifnet_touch_lastchange(ifp);
1916		break;
1917
1918	case SIOCGIFSTATUS:			/* struct ifstat */
1919		VERIFY(ifs != NULL);
1920		ifs->ascii[0] = '\0';
1921
1922		error = ifnet_ioctl(ifp, SOCK_DOM(so), cmd, (caddr_t)ifs);
1923
1924		bcopy(ifs, data, sizeof (*ifs));
1925		break;
1926
1927	case SIOCGIFMEDIA32:			/* struct ifmediareq32 */
1928	case SIOCGIFMEDIA64:			/* struct ifmediareq64 */
1929		error = ifnet_ioctl(ifp, SOCK_DOM(so), cmd, data);
1930		break;
1931
1932	case SIOCSIFDESC:			/* struct if_descreq */
1933	case SIOCGIFDESC:			/* struct if_descreq */
1934		error = ifioctl_ifdesc(ifp, cmd, data, p);
1935		break;
1936
1937	case SIOCSIFLINKPARAMS:			/* struct if_linkparamsreq */
1938	case SIOCGIFLINKPARAMS:			/* struct if_linkparamsreq */
1939		error = ifioctl_linkparams(ifp, cmd, data, p);
1940		break;
1941
1942	case SIOCGIFQUEUESTATS:			/* struct if_qstatsreq */
1943		error = ifioctl_qstats(ifp, cmd, data);
1944		break;
1945
1946	case SIOCSIFTHROTTLE:			/* struct if_throttlereq */
1947	case SIOCGIFTHROTTLE:			/* struct if_throttlereq */
1948		error = ifioctl_throttle(ifp, cmd, data, p);
1949		break;
1950
1951	default:
1952		if (so->so_proto == NULL) {
1953			error = EOPNOTSUPP;
1954			break;
1955		}
1956
1957		socket_lock(so, 1);
1958		error = ((*so->so_proto->pr_usrreqs->pru_control)(so, cmd,
1959		    data, ifp, p));
1960		socket_unlock(so, 1);
1961
1962		if (error == EOPNOTSUPP || error == ENOTSUP) {
1963			error = ifnet_ioctl(ifp, SOCK_DOM(so), cmd, data);
1964		}
1965		break;
1966	}
1967
1968done:
1969	if (ifs != NULL)
1970		_FREE(ifs, M_DEVBUF);
1971
1972	if (if_verbose) {
1973		if (ifname[0] == '\0')
1974			(void) snprintf(ifname, sizeof (ifname), "%s",
1975			    "NULL");
1976		else if (ifp != NULL)
1977			(void) snprintf(ifname, sizeof (ifname), "%s",
1978			    if_name(ifp));
1979
1980		if (error != 0) {
1981			printf("%s[%s,%d]: ifp %s cmd 0x%08lx (%c%c [%lu] "
1982			    "%c %lu) error %d\n", __func__,
1983			    proc_name_address(p), proc_pid(p),
1984			    ifname, cmd, (cmd & IOC_IN) ? 'I' : ' ',
1985			    (cmd & IOC_OUT) ? 'O' : ' ', IOCPARM_LEN(cmd),
1986			    (char)IOCGROUP(cmd), cmd & 0xff, error);
1987		} else if (if_verbose > 1) {
1988			printf("%s[%s,%d]: ifp %s cmd 0x%08lx (%c%c [%lu] "
1989			    "%c %lu) OK\n", __func__,
1990			    proc_name_address(p), proc_pid(p),
1991			    ifname, cmd, (cmd & IOC_IN) ? 'I' : ' ',
1992			    (cmd & IOC_OUT) ? 'O' : ' ', IOCPARM_LEN(cmd),
1993			    (char)IOCGROUP(cmd), cmd & 0xff);
1994		}
1995	}
1996
1997	return (error);
1998}
1999
2000static __attribute__((noinline)) int
2001ifioctl_ifreq(struct socket *so, u_long cmd, struct ifreq *ifr, struct proc *p)
2002{
2003	struct ifnet *ifp;
2004	u_long ocmd = cmd;
2005	int error = 0;
2006	struct kev_msg ev_msg;
2007	struct net_event_data ev_data;
2008
2009	bzero(&ev_data, sizeof (struct net_event_data));
2010	bzero(&ev_msg, sizeof (struct kev_msg));
2011
2012	switch (cmd) {
2013	case SIOCIFCREATE:
2014	case SIOCIFCREATE2:
2015                error = proc_suser(p);
2016                if (error)
2017                        return (error);
2018                return (if_clone_create(ifr->ifr_name, sizeof(ifr->ifr_name),
2019		    cmd == SIOCIFCREATE2 ? ifr->ifr_data : NULL));
2020	case SIOCIFDESTROY:
2021		error = proc_suser(p);
2022		if (error)
2023			return (error);
2024		return (if_clone_destroy(ifr->ifr_name));
2025	}
2026
2027	/*
2028	 * ioctls which require ifp.  Note that we acquire dlil_ifnet_lock
2029	 * here to ensure that the ifnet, if found, has been fully attached.
2030	 */
2031	dlil_if_lock();
2032	ifp = ifunit(ifr->ifr_name);
2033	dlil_if_unlock();
2034
2035	if (ifp == NULL)
2036		return (ENXIO);
2037
2038	switch (cmd) {
2039	case SIOCGIFFLAGS:
2040		ifnet_lock_shared(ifp);
2041		ifr->ifr_flags = ifp->if_flags;
2042		ifnet_lock_done(ifp);
2043		break;
2044
2045	case SIOCGIFEFLAGS:
2046		ifnet_lock_shared(ifp);
2047		ifr->ifr_eflags = ifp->if_eflags;
2048		ifnet_lock_done(ifp);
2049		break;
2050
2051	case SIOCGIFCAP:
2052		ifnet_lock_shared(ifp);
2053		ifr->ifr_reqcap = ifp->if_capabilities;
2054		ifr->ifr_curcap = ifp->if_capenable;
2055		ifnet_lock_done(ifp);
2056		break;
2057
2058#if CONFIG_MACF_NET
2059	case SIOCGIFMAC:
2060		error = mac_ifnet_label_get(kauth_cred_get(), ifr, ifp);
2061		break;
2062
2063	case SIOCSIFMAC:
2064		error = mac_ifnet_label_set(kauth_cred_get(), ifr, ifp);
2065		break;
2066#endif /* CONFIG_MACF_NET */
2067
2068	case SIOCGIFMETRIC:
2069		ifnet_lock_shared(ifp);
2070		ifr->ifr_metric = ifp->if_metric;
2071		ifnet_lock_done(ifp);
2072		break;
2073
2074	case SIOCGIFMTU:
2075		ifnet_lock_shared(ifp);
2076		ifr->ifr_mtu = ifp->if_mtu;
2077		ifnet_lock_done(ifp);
2078		break;
2079
2080	case SIOCGIFPHYS:
2081		ifnet_lock_shared(ifp);
2082		ifr->ifr_phys = ifp->if_physical;
2083		ifnet_lock_done(ifp);
2084		break;
2085
2086	case SIOCSIFFLAGS:
2087		error = proc_suser(p);
2088		if (error != 0)
2089			break;
2090
2091		(void) ifnet_set_flags(ifp, ifr->ifr_flags,
2092		    (u_int16_t)~IFF_CANTCHANGE);
2093
2094		/*
2095		 * Note that we intentionally ignore any error from below
2096		 * for the SIOCSIFFLAGS case.
2097		 */
2098		(void) ifnet_ioctl(ifp, SOCK_DOM(so), cmd, (caddr_t)ifr);
2099
2100		/*
2101		 * Send the event even upon error from the driver because
2102		 * we changed the flags.
2103		 */
2104		ev_msg.vendor_code    = KEV_VENDOR_APPLE;
2105		ev_msg.kev_class      = KEV_NETWORK_CLASS;
2106		ev_msg.kev_subclass   = KEV_DL_SUBCLASS;
2107
2108		ev_msg.event_code = KEV_DL_SIFFLAGS;
2109		strlcpy(&ev_data.if_name[0], ifp->if_name, IFNAMSIZ);
2110		ev_data.if_family = ifp->if_family;
2111		ev_data.if_unit   = (u_int32_t) ifp->if_unit;
2112		ev_msg.dv[0].data_length = sizeof(struct net_event_data);
2113		ev_msg.dv[0].data_ptr    = &ev_data;
2114		ev_msg.dv[1].data_length = 0;
2115		kev_post_msg(&ev_msg);
2116
2117		ifnet_touch_lastchange(ifp);
2118		break;
2119
2120	case SIOCSIFCAP:
2121		error = proc_suser(p);
2122		if (error != 0)
2123			break;
2124
2125		if ((ifr->ifr_reqcap & ~ifp->if_capabilities)) {
2126			error = EINVAL;
2127			break;
2128		}
2129		error = ifnet_ioctl(ifp, SOCK_DOM(so), cmd, (caddr_t)ifr);
2130
2131		ifnet_touch_lastchange(ifp);
2132		break;
2133
2134	case SIOCSIFMETRIC:
2135		error = proc_suser(p);
2136		if (error != 0)
2137			break;
2138
2139		ifp->if_metric = ifr->ifr_metric;
2140
2141		ev_msg.vendor_code    = KEV_VENDOR_APPLE;
2142		ev_msg.kev_class      = KEV_NETWORK_CLASS;
2143		ev_msg.kev_subclass   = KEV_DL_SUBCLASS;
2144
2145		ev_msg.event_code = KEV_DL_SIFMETRICS;
2146		strlcpy(&ev_data.if_name[0], ifp->if_name, IFNAMSIZ);
2147		ev_data.if_family = ifp->if_family;
2148		ev_data.if_unit   = (u_int32_t) ifp->if_unit;
2149		ev_msg.dv[0].data_length = sizeof(struct net_event_data);
2150		ev_msg.dv[0].data_ptr    = &ev_data;
2151
2152		ev_msg.dv[1].data_length = 0;
2153		kev_post_msg(&ev_msg);
2154
2155		ifnet_touch_lastchange(ifp);
2156		break;
2157
2158	case SIOCSIFPHYS:
2159		error = proc_suser(p);
2160		if (error != 0)
2161			break;
2162
2163		error = ifnet_ioctl(ifp, SOCK_DOM(so), cmd, (caddr_t)ifr);
2164		if (error != 0)
2165			break;
2166
2167		ev_msg.vendor_code    = KEV_VENDOR_APPLE;
2168		ev_msg.kev_class      = KEV_NETWORK_CLASS;
2169		ev_msg.kev_subclass   = KEV_DL_SUBCLASS;
2170
2171		ev_msg.event_code = KEV_DL_SIFPHYS;
2172		strlcpy(&ev_data.if_name[0], ifp->if_name, IFNAMSIZ);
2173		ev_data.if_family = ifp->if_family;
2174		ev_data.if_unit   = (u_int32_t) ifp->if_unit;
2175		ev_msg.dv[0].data_length = sizeof(struct net_event_data);
2176		ev_msg.dv[0].data_ptr    = &ev_data;
2177		ev_msg.dv[1].data_length = 0;
2178		kev_post_msg(&ev_msg);
2179
2180		ifnet_touch_lastchange(ifp);
2181		break;
2182
2183	case SIOCSIFMTU: {
2184		u_int32_t oldmtu = ifp->if_mtu;
2185		struct ifclassq *ifq = &ifp->if_snd;
2186
2187		error = proc_suser(p);
2188		if (error != 0)
2189			break;
2190
2191		if (ifp->if_ioctl == NULL) {
2192			error = EOPNOTSUPP;
2193			break;
2194		}
2195		if (ifr->ifr_mtu < IF_MINMTU || ifr->ifr_mtu > IF_MAXMTU) {
2196			error = EINVAL;
2197			break;
2198		}
2199		error = ifnet_ioctl(ifp, SOCK_DOM(so), cmd, (caddr_t)ifr);
2200		if (error != 0)
2201			break;
2202
2203		ev_msg.vendor_code    = KEV_VENDOR_APPLE;
2204		ev_msg.kev_class      = KEV_NETWORK_CLASS;
2205		ev_msg.kev_subclass   = KEV_DL_SUBCLASS;
2206
2207		ev_msg.event_code = KEV_DL_SIFMTU;
2208		strlcpy(&ev_data.if_name[0], ifp->if_name, IFNAMSIZ);
2209		ev_data.if_family = ifp->if_family;
2210		ev_data.if_unit   = (u_int32_t) ifp->if_unit;
2211		ev_msg.dv[0].data_length = sizeof(struct net_event_data);
2212		ev_msg.dv[0].data_ptr    = &ev_data;
2213		ev_msg.dv[1].data_length = 0;
2214		kev_post_msg(&ev_msg);
2215
2216		ifnet_touch_lastchange(ifp);
2217		rt_ifmsg(ifp);
2218
2219		/*
2220		 * If the link MTU changed, do network layer specific procedure
2221		 * and update all route entries associated with the interface,
2222		 * so that their MTU metric gets updated.
2223		 */
2224		if (ifp->if_mtu != oldmtu) {
2225			if_rtmtu_update(ifp);
2226#if INET6
2227			nd6_setmtu(ifp);
2228#endif /* INET6 */
2229			/* Inform all transmit queues about the new MTU */
2230			IFCQ_LOCK(ifq);
2231			ifnet_update_sndq(ifq, CLASSQ_EV_LINK_MTU);
2232			IFCQ_UNLOCK(ifq);
2233		}
2234		break;
2235	}
2236
2237	case SIOCADDMULTI:
2238	case SIOCDELMULTI:
2239		error = proc_suser(p);
2240		if (error != 0)
2241			break;
2242
2243		/* Don't allow group membership on non-multicast interfaces. */
2244		if ((ifp->if_flags & IFF_MULTICAST) == 0) {
2245			error = EOPNOTSUPP;
2246			break;
2247		}
2248
2249		/* Don't let users screw up protocols' entries. */
2250		if (ifr->ifr_addr.sa_family != AF_UNSPEC &&
2251		    ifr->ifr_addr.sa_family != AF_LINK) {
2252			error = EINVAL;
2253			break;
2254		}
2255
2256		/*
2257		 * User is permitted to anonymously join a particular link
2258		 * multicast group via SIOCADDMULTI.  Subsequent join requested
2259		 * for the same record which has an outstanding refcnt from a
2260		 * past if_addmulti_anon() will not result in EADDRINUSE error
2261		 * (unlike other BSDs.)  Anonymously leaving a group is also
2262		 * allowed only as long as there is an outstanding refcnt held
2263		 * by a previous anonymous request, or else ENOENT (even if the
2264		 * link-layer multicast membership exists for a network-layer
2265		 * membership.)
2266		 */
2267		if (cmd == SIOCADDMULTI) {
2268			error = if_addmulti_anon(ifp, &ifr->ifr_addr, NULL);
2269			ev_msg.event_code = KEV_DL_ADDMULTI;
2270		} else {
2271			error = if_delmulti_anon(ifp, &ifr->ifr_addr);
2272			ev_msg.event_code = KEV_DL_DELMULTI;
2273		}
2274		if (error != 0)
2275			break;
2276
2277		ev_msg.vendor_code    = KEV_VENDOR_APPLE;
2278		ev_msg.kev_class      = KEV_NETWORK_CLASS;
2279		ev_msg.kev_subclass   = KEV_DL_SUBCLASS;
2280		strlcpy(&ev_data.if_name[0], ifp->if_name, IFNAMSIZ);
2281
2282		ev_data.if_family = ifp->if_family;
2283		ev_data.if_unit   = (u_int32_t) ifp->if_unit;
2284		ev_msg.dv[0].data_length = sizeof(struct net_event_data);
2285		ev_msg.dv[0].data_ptr    = &ev_data;
2286		ev_msg.dv[1].data_length = 0;
2287		kev_post_msg(&ev_msg);
2288
2289		ifnet_touch_lastchange(ifp);
2290		break;
2291
2292	case SIOCDIFPHYADDR:
2293	case SIOCSIFMEDIA:
2294	case SIOCSIFGENERIC:
2295	case SIOCSIFLLADDR:
2296	case SIOCSIFALTMTU:
2297	case SIOCSIFVLAN:
2298	case SIOCSIFBOND:
2299		error = proc_suser(p);
2300		if (error != 0)
2301			break;
2302
2303		error = ifnet_ioctl(ifp, SOCK_DOM(so), cmd, (caddr_t)ifr);
2304		if (error != 0)
2305			break;
2306
2307		ifnet_touch_lastchange(ifp);
2308		break;
2309
2310	case SIOCGIFLLADDR: {
2311		struct sockaddr_dl *sdl = SDL(ifp->if_lladdr->ifa_addr);
2312
2313		if (sdl->sdl_alen == 0) {
2314			error = EADDRNOTAVAIL;
2315			break;
2316		}
2317		/* If larger than 14-bytes we'll need another mechanism */
2318		if (sdl->sdl_alen > sizeof (ifr->ifr_addr.sa_data)) {
2319			error = EMSGSIZE;
2320			break;
2321		}
2322		/* Follow the same convention used by SIOCSIFLLADDR */
2323		bzero(&ifr->ifr_addr, sizeof (ifr->ifr_addr));
2324		ifr->ifr_addr.sa_family = AF_LINK;
2325		ifr->ifr_addr.sa_len = sdl->sdl_alen;
2326		error = ifnet_guarded_lladdr_copy_bytes(ifp,
2327		    &ifr->ifr_addr.sa_data, sdl->sdl_alen);
2328		break;
2329	}
2330
2331	case SIOCGIFTYPE:
2332		ifr->ifr_type.ift_type = ifp->if_type;
2333		ifr->ifr_type.ift_family = ifp->if_family;
2334		ifr->ifr_type.ift_subfamily = ifp->if_subfamily;
2335		break;
2336
2337	case SIOCGIFPSRCADDR:
2338	case SIOCGIFPDSTADDR:
2339	case SIOCGIFGENERIC:
2340	case SIOCGIFDEVMTU:
2341	case SIOCGIFVLAN:
2342	case SIOCGIFBOND:
2343		error = ifnet_ioctl(ifp, SOCK_DOM(so), cmd, (caddr_t)ifr);
2344		break;
2345
2346	case SIOCGIFWAKEFLAGS:
2347		ifnet_lock_shared(ifp);
2348		ifr->ifr_wake_flags = ifnet_get_wake_flags(ifp);
2349		ifnet_lock_done(ifp);
2350		break;
2351
2352	case SIOCGIFGETRTREFCNT:
2353		ifnet_lock_shared(ifp);
2354		ifr->ifr_route_refcnt = ifp->if_route_refcnt;
2355		ifnet_lock_done(ifp);
2356		break;
2357
2358	case SIOCSIFOPPORTUNISTIC:
2359	case SIOCGIFOPPORTUNISTIC:
2360		error = ifnet_getset_opportunistic(ifp, cmd, ifr, p);
2361		break;
2362
2363	case SIOCGIFLINKQUALITYMETRIC:
2364		ifnet_lock_shared(ifp);
2365		ifr->ifr_link_quality_metric = ifp->if_lqm;
2366		ifnet_lock_done(ifp);
2367		break;
2368
2369	case SIOCSIFLOG:
2370	case SIOCGIFLOG:
2371		error = ifnet_getset_log(ifp, cmd, ifr, p);
2372		break;
2373
2374	case SIOCGIFDELEGATE:
2375		ifnet_lock_shared(ifp);
2376		ifr->ifr_delegated = ((ifp->if_delegated.ifp != NULL) ?
2377		    ifp->if_delegated.ifp->if_index : 0);
2378		ifnet_lock_done(ifp);
2379		break;
2380
2381	case SIOCGIFEXPENSIVE:
2382		ifnet_lock_shared(ifp);
2383		if (ifp->if_eflags & IFEF_EXPENSIVE)
2384			ifr->ifr_expensive = 1;
2385		else
2386			ifr->ifr_expensive = 0;
2387		ifnet_lock_done(ifp);
2388		break;
2389
2390	case SIOCSIFEXPENSIVE:
2391	{
2392		struct ifnet *difp;
2393
2394		if ((error = priv_check_cred(kauth_cred_get(),
2395		    PRIV_NET_INTERFACE_CONTROL, 0)) != 0)
2396			return (error);
2397		ifnet_lock_exclusive(ifp);
2398		if (ifr->ifr_expensive)
2399			ifp->if_eflags |= IFEF_EXPENSIVE;
2400		else
2401			ifp->if_eflags &= ~IFEF_EXPENSIVE;
2402		ifnet_lock_done(ifp);
2403		/*
2404		 * Update the expensive bit in the delegated interface
2405		 * structure.
2406		 */
2407		ifnet_head_lock_shared();
2408		TAILQ_FOREACH(difp, &ifnet_head, if_link) {
2409			ifnet_lock_exclusive(difp);
2410			if (difp->if_delegated.ifp == ifp) {
2411				difp->if_delegated.expensive =
2412				    ifp->if_eflags & IFEF_EXPENSIVE ? 1 : 0;
2413
2414			}
2415			ifnet_lock_done(difp);
2416		}
2417		ifnet_head_done();
2418		break;
2419	}
2420
2421	case SIOCGIF2KCL:
2422		ifnet_lock_shared(ifp);
2423		if (ifp->if_eflags & IFEF_2KCL)
2424			ifr->ifr_2kcl = 1;
2425		else
2426			ifr->ifr_2kcl = 0;
2427		ifnet_lock_done(ifp);
2428		break;
2429
2430	case SIOCSIF2KCL:
2431		if ((error = priv_check_cred(kauth_cred_get(),
2432		    PRIV_NET_INTERFACE_CONTROL, 0)) != 0)
2433			return (error);
2434		ifnet_lock_exclusive(ifp);
2435		if (ifr->ifr_2kcl)
2436			ifp->if_eflags |= IFEF_2KCL;
2437		else
2438			ifp->if_eflags &= ~IFEF_2KCL;
2439		ifnet_lock_done(ifp);
2440		break;
2441
2442	case SIOCSIFDSTADDR:
2443	case SIOCSIFADDR:
2444	case SIOCSIFBRDADDR:
2445	case SIOCSIFNETMASK:
2446	case OSIOCGIFADDR:
2447	case OSIOCGIFDSTADDR:
2448	case OSIOCGIFBRDADDR:
2449	case OSIOCGIFNETMASK:
2450	case SIOCSIFKPI:
2451		VERIFY(so->so_proto != NULL);
2452
2453		if (cmd == SIOCSIFDSTADDR || cmd == SIOCSIFADDR ||
2454		    cmd == SIOCSIFBRDADDR || cmd == SIOCSIFNETMASK) {
2455#if BYTE_ORDER != BIG_ENDIAN
2456			if (ifr->ifr_addr.sa_family == 0 &&
2457			    ifr->ifr_addr.sa_len < 16) {
2458				ifr->ifr_addr.sa_family = ifr->ifr_addr.sa_len;
2459				ifr->ifr_addr.sa_len = 16;
2460			}
2461#else
2462			if (ifr->ifr_addr.sa_len == 0)
2463				ifr->ifr_addr.sa_len = 16;
2464#endif
2465		} else if (cmd == OSIOCGIFADDR) {
2466			cmd = SIOCGIFADDR;	/* struct ifreq */
2467		} else if (cmd == OSIOCGIFDSTADDR) {
2468			cmd = SIOCGIFDSTADDR;	/* struct ifreq */
2469		} else if (cmd == OSIOCGIFBRDADDR) {
2470			cmd = SIOCGIFBRDADDR;	/* struct ifreq */
2471		} else if (cmd == OSIOCGIFNETMASK) {
2472			cmd = SIOCGIFNETMASK;	/* struct ifreq */
2473		}
2474
2475		socket_lock(so, 1);
2476		error = ((*so->so_proto->pr_usrreqs->pru_control)(so, cmd,
2477		    (caddr_t)ifr, ifp, p));
2478		socket_unlock(so, 1);
2479
2480		switch (ocmd) {
2481		case OSIOCGIFADDR:
2482		case OSIOCGIFDSTADDR:
2483		case OSIOCGIFBRDADDR:
2484		case OSIOCGIFNETMASK:
2485			bcopy(&ifr->ifr_addr.sa_family, &ifr->ifr_addr,
2486			    sizeof (u_short));
2487		}
2488
2489		if (cmd == SIOCSIFKPI) {
2490			int temperr = proc_suser(p);
2491			if (temperr != 0)
2492				error = temperr;
2493		}
2494
2495		if (error == EOPNOTSUPP || error == ENOTSUP) {
2496			error = ifnet_ioctl(ifp, SOCK_DOM(so), cmd,
2497			    (caddr_t)ifr);
2498		}
2499		break;
2500
2501	default:
2502		VERIFY(0);
2503		/* NOTREACHED */
2504	}
2505
2506	return (error);
2507}
2508
2509int
2510ifioctllocked(struct socket *so, u_long cmd, caddr_t data, struct proc *p)
2511{
2512	int error;
2513
2514	socket_unlock(so, 0);
2515	error = ifioctl(so, cmd, data, p);
2516	socket_lock(so, 0);
2517	return(error);
2518}
2519
2520/*
2521 * Set/clear promiscuous mode on interface ifp based on the truth value
2522 * of pswitch.  The calls are reference counted so that only the first
2523 * "on" request actually has an effect, as does the final "off" request.
2524 * Results are undefined if the "off" and "on" requests are not matched.
2525 */
2526errno_t
2527ifnet_set_promiscuous(
2528	ifnet_t	ifp,
2529	int pswitch)
2530{
2531	int error = 0;
2532	int oldflags = 0;
2533	int newflags = 0;
2534
2535	ifnet_lock_exclusive(ifp);
2536	oldflags = ifp->if_flags;
2537	ifp->if_pcount += pswitch ? 1 : -1;
2538
2539	if (ifp->if_pcount > 0)
2540		ifp->if_flags |= IFF_PROMISC;
2541	else
2542		ifp->if_flags &= ~IFF_PROMISC;
2543
2544	newflags = ifp->if_flags;
2545	ifnet_lock_done(ifp);
2546
2547	if (newflags != oldflags && (newflags & IFF_UP) != 0) {
2548		error = ifnet_ioctl(ifp, 0, SIOCSIFFLAGS, NULL);
2549		if (error == 0) {
2550			rt_ifmsg(ifp);
2551		} else {
2552			ifnet_lock_exclusive(ifp);
2553			// revert the flags
2554			ifp->if_pcount -= pswitch ? 1 : -1;
2555			if (ifp->if_pcount > 0)
2556			    ifp->if_flags |= IFF_PROMISC;
2557			else
2558			    ifp->if_flags &= ~IFF_PROMISC;
2559			ifnet_lock_done(ifp);
2560		}
2561	}
2562
2563	if (newflags != oldflags) {
2564		log(LOG_INFO, "%s: promiscuous mode %s%s\n",
2565		    if_name(ifp),
2566		    (newflags & IFF_PROMISC) != 0 ? "enable" : "disable",
2567		    error != 0 ? " failed" : " succeeded");
2568	}
2569	return error;
2570}
2571
2572/*
2573 * Return interface configuration
2574 * of system.  List may be used
2575 * in later ioctl's (above) to get
2576 * other information.
2577 */
2578/*ARGSUSED*/
2579static int
2580ifconf(u_long cmd, user_addr_t ifrp, int *ret_space)
2581{
2582	struct ifnet *ifp = NULL;
2583	struct ifaddr *ifa;
2584	struct ifreq ifr;
2585	int error = 0;
2586	size_t space;
2587	net_thread_marks_t marks;
2588
2589	marks = net_thread_marks_push(NET_THREAD_CKREQ_LLADDR);
2590
2591	/*
2592	 * Zero the ifr buffer to make sure we don't
2593	 * disclose the contents of the stack.
2594	 */
2595	bzero(&ifr, sizeof (struct ifreq));
2596
2597	space = *ret_space;
2598	ifnet_head_lock_shared();
2599	for (ifp = ifnet_head.tqh_first; space > sizeof (ifr) &&
2600	    ifp; ifp = ifp->if_link.tqe_next) {
2601		char workbuf[64];
2602		size_t ifnlen, addrs;
2603
2604		ifnlen = snprintf(workbuf, sizeof (workbuf),
2605		    "%s", if_name(ifp));
2606		if (ifnlen + 1 > sizeof (ifr.ifr_name)) {
2607			error = ENAMETOOLONG;
2608			break;
2609		} else {
2610			strlcpy(ifr.ifr_name, workbuf, IFNAMSIZ);
2611		}
2612
2613		ifnet_lock_shared(ifp);
2614
2615		addrs = 0;
2616		ifa = ifp->if_addrhead.tqh_first;
2617		for (; space > sizeof (ifr) && ifa;
2618		    ifa = ifa->ifa_link.tqe_next) {
2619			struct sockaddr *sa;
2620			union {
2621				struct sockaddr sa;
2622				struct sockaddr_dl sdl;
2623				uint8_t buf[SOCK_MAXADDRLEN + 1];
2624			} u;
2625
2626			/*
2627			 * Make sure to accomodate the largest possible
2628			 * size of SA(if_lladdr)->sa_len.
2629			 */
2630			_CASSERT(sizeof (u) == (SOCK_MAXADDRLEN + 1));
2631
2632			IFA_LOCK(ifa);
2633			sa = ifa->ifa_addr;
2634			addrs++;
2635
2636			if (ifa == ifp->if_lladdr) {
2637				VERIFY(sa->sa_family == AF_LINK);
2638				bcopy(sa, &u, sa->sa_len);
2639				IFA_UNLOCK(ifa);
2640				ifnet_guarded_lladdr_copy_bytes(ifp,
2641				    LLADDR(&u.sdl), u.sdl.sdl_alen);
2642				IFA_LOCK(ifa);
2643				sa = &u.sa;
2644			}
2645
2646			if (cmd == OSIOCGIFCONF32 || cmd == OSIOCGIFCONF64) {
2647				struct osockaddr *osa =
2648				    (struct osockaddr *)(void *)&ifr.ifr_addr;
2649				ifr.ifr_addr = *sa;
2650				osa->sa_family = sa->sa_family;
2651				error = copyout((caddr_t)&ifr, ifrp,
2652				    sizeof (ifr));
2653				ifrp += sizeof (struct ifreq);
2654			} else if (sa->sa_len <= sizeof (*sa)) {
2655				ifr.ifr_addr = *sa;
2656				error = copyout((caddr_t)&ifr, ifrp,
2657				    sizeof (ifr));
2658				ifrp += sizeof (struct ifreq);
2659			} else {
2660				if (space <
2661				    sizeof (ifr) + sa->sa_len - sizeof (*sa)) {
2662					IFA_UNLOCK(ifa);
2663					break;
2664				}
2665				space -= sa->sa_len - sizeof (*sa);
2666				error = copyout((caddr_t)&ifr, ifrp,
2667				    sizeof (ifr.ifr_name));
2668				if (error == 0) {
2669					error = copyout((caddr_t)sa, (ifrp +
2670					    offsetof(struct ifreq, ifr_addr)),
2671					    sa->sa_len);
2672				}
2673				ifrp += (sa->sa_len + offsetof(struct ifreq,
2674				    ifr_addr));
2675			}
2676			IFA_UNLOCK(ifa);
2677			if (error)
2678				break;
2679			space -= sizeof (ifr);
2680		}
2681		ifnet_lock_done(ifp);
2682
2683		if (error)
2684			break;
2685		if (!addrs) {
2686			bzero((caddr_t)&ifr.ifr_addr, sizeof (ifr.ifr_addr));
2687			error = copyout((caddr_t)&ifr, ifrp, sizeof (ifr));
2688			if (error)
2689				break;
2690			space -= sizeof (ifr);
2691			ifrp += sizeof (struct ifreq);
2692		}
2693	}
2694	ifnet_head_done();
2695	*ret_space -= space;
2696	net_thread_marks_pop(marks);
2697	return (error);
2698}
2699
2700/*
2701 * Just like if_promisc(), but for all-multicast-reception mode.
2702 */
2703int
2704if_allmulti(struct ifnet *ifp, int onswitch)
2705{
2706	int error = 0;
2707	int	modified = 0;
2708
2709	ifnet_lock_exclusive(ifp);
2710
2711	if (onswitch) {
2712		if (ifp->if_amcount++ == 0) {
2713			ifp->if_flags |= IFF_ALLMULTI;
2714			modified = 1;
2715		}
2716	} else {
2717		if (ifp->if_amcount > 1) {
2718			ifp->if_amcount--;
2719		} else {
2720			ifp->if_amcount = 0;
2721			ifp->if_flags &= ~IFF_ALLMULTI;
2722			modified = 1;
2723		}
2724	}
2725	ifnet_lock_done(ifp);
2726
2727	if (modified)
2728		error = ifnet_ioctl(ifp, 0, SIOCSIFFLAGS, NULL);
2729
2730	if (error == 0)
2731		rt_ifmsg(ifp);
2732	return error;
2733}
2734
2735static struct ifmultiaddr *
2736ifma_alloc(int how)
2737{
2738	struct ifmultiaddr *ifma;
2739
2740	ifma = (how == M_WAITOK) ? zalloc(ifma_zone) :
2741	    zalloc_noblock(ifma_zone);
2742
2743	if (ifma != NULL) {
2744		bzero(ifma, ifma_size);
2745		lck_mtx_init(&ifma->ifma_lock, ifa_mtx_grp, ifa_mtx_attr);
2746		ifma->ifma_debug |= IFD_ALLOC;
2747		if (ifma_debug != 0) {
2748			ifma->ifma_debug |= IFD_DEBUG;
2749			ifma->ifma_trace = ifma_trace;
2750		}
2751	}
2752	return (ifma);
2753}
2754
2755static void
2756ifma_free(struct ifmultiaddr *ifma)
2757{
2758	IFMA_LOCK(ifma);
2759
2760	if (ifma->ifma_protospec != NULL) {
2761		panic("%s: Protospec not NULL for ifma=%p", __func__, ifma);
2762		/* NOTREACHED */
2763	} else if ((ifma->ifma_flags & IFMAF_ANONYMOUS) ||
2764	    ifma->ifma_anoncnt != 0) {
2765		panic("%s: Freeing ifma=%p with outstanding anon req",
2766		    __func__, ifma);
2767		/* NOTREACHED */
2768	} else if (ifma->ifma_debug & IFD_ATTACHED) {
2769		panic("%s: ifma=%p attached to ifma_ifp=%p is being freed",
2770		    __func__, ifma, ifma->ifma_ifp);
2771		/* NOTREACHED */
2772	} else if (!(ifma->ifma_debug & IFD_ALLOC)) {
2773		panic("%s: ifma %p cannot be freed", __func__, ifma);
2774		/* NOTREACHED */
2775	} else if (ifma->ifma_refcount != 0) {
2776		panic("%s: non-zero refcount ifma=%p", __func__, ifma);
2777		/* NOTREACHED */
2778	} else if (ifma->ifma_reqcnt != 0) {
2779		panic("%s: non-zero reqcnt ifma=%p", __func__, ifma);
2780		/* NOTREACHED */
2781	} else if (ifma->ifma_ifp != NULL) {
2782		panic("%s: non-NULL ifma_ifp=%p for ifma=%p", __func__,
2783		    ifma->ifma_ifp, ifma);
2784		/* NOTREACHED */
2785	} else if (ifma->ifma_ll != NULL) {
2786		panic("%s: non-NULL ifma_ll=%p for ifma=%p", __func__,
2787		    ifma->ifma_ll, ifma);
2788		/* NOTREACHED */
2789	}
2790	ifma->ifma_debug &= ~IFD_ALLOC;
2791	if ((ifma->ifma_debug & (IFD_DEBUG | IFD_TRASHED)) ==
2792	    (IFD_DEBUG | IFD_TRASHED)) {
2793		lck_mtx_lock(&ifma_trash_lock);
2794		TAILQ_REMOVE(&ifma_trash_head, (struct ifmultiaddr_dbg *)ifma,
2795		    ifma_trash_link);
2796		lck_mtx_unlock(&ifma_trash_lock);
2797		ifma->ifma_debug &= ~IFD_TRASHED;
2798	}
2799	IFMA_UNLOCK(ifma);
2800
2801	if (ifma->ifma_addr != NULL) {
2802		FREE(ifma->ifma_addr, M_IFADDR);
2803		ifma->ifma_addr = NULL;
2804	}
2805	lck_mtx_destroy(&ifma->ifma_lock, ifa_mtx_grp);
2806	zfree(ifma_zone, ifma);
2807}
2808
2809static void
2810ifma_trace(struct ifmultiaddr *ifma, int refhold)
2811{
2812	struct ifmultiaddr_dbg *ifma_dbg = (struct ifmultiaddr_dbg *)ifma;
2813	ctrace_t *tr;
2814	u_int32_t idx;
2815	u_int16_t *cnt;
2816
2817	if (!(ifma->ifma_debug & IFD_DEBUG)) {
2818		panic("%s: ifma %p has no debug structure", __func__, ifma);
2819		/* NOTREACHED */
2820	}
2821	if (refhold) {
2822		cnt = &ifma_dbg->ifma_refhold_cnt;
2823		tr = ifma_dbg->ifma_refhold;
2824	} else {
2825		cnt = &ifma_dbg->ifma_refrele_cnt;
2826		tr = ifma_dbg->ifma_refrele;
2827	}
2828
2829	idx = atomic_add_16_ov(cnt, 1) % IFMA_TRACE_HIST_SIZE;
2830	ctrace_record(&tr[idx]);
2831}
2832
2833void
2834ifma_addref(struct ifmultiaddr *ifma, int locked)
2835{
2836	if (!locked)
2837		IFMA_LOCK(ifma);
2838	else
2839		IFMA_LOCK_ASSERT_HELD(ifma);
2840
2841	if (++ifma->ifma_refcount == 0) {
2842		panic("%s: ifma=%p wraparound refcnt", __func__, ifma);
2843		/* NOTREACHED */
2844	} else if (ifma->ifma_trace != NULL) {
2845		(*ifma->ifma_trace)(ifma, TRUE);
2846	}
2847	if (!locked)
2848		IFMA_UNLOCK(ifma);
2849}
2850
2851void
2852ifma_remref(struct ifmultiaddr *ifma)
2853{
2854	struct ifmultiaddr *ll;
2855
2856	IFMA_LOCK(ifma);
2857
2858	if (ifma->ifma_refcount == 0) {
2859		panic("%s: ifma=%p negative refcnt", __func__, ifma);
2860		/* NOTREACHED */
2861	} else if (ifma->ifma_trace != NULL) {
2862		(*ifma->ifma_trace)(ifma, FALSE);
2863	}
2864
2865	--ifma->ifma_refcount;
2866	if (ifma->ifma_refcount > 0) {
2867		IFMA_UNLOCK(ifma);
2868		return;
2869	}
2870
2871	ll = ifma->ifma_ll;
2872	ifma->ifma_ifp = NULL;
2873	ifma->ifma_ll = NULL;
2874	IFMA_UNLOCK(ifma);
2875	ifma_free(ifma);	/* deallocate it */
2876
2877	if (ll != NULL)
2878		IFMA_REMREF(ll);
2879}
2880
2881static void
2882if_attach_ifma(struct ifnet *ifp, struct ifmultiaddr *ifma, int anon)
2883{
2884	ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_EXCLUSIVE);
2885	IFMA_LOCK_ASSERT_HELD(ifma);
2886
2887	if (ifma->ifma_ifp != ifp) {
2888		panic("%s: Mismatch ifma_ifp=%p != ifp=%p", __func__,
2889		    ifma->ifma_ifp, ifp);
2890		/* NOTREACHED */
2891	} else if (ifma->ifma_debug & IFD_ATTACHED) {
2892		panic("%s: Attempt to attach an already attached ifma=%p",
2893		    __func__, ifma);
2894		/* NOTREACHED */
2895	} else if (anon && (ifma->ifma_flags & IFMAF_ANONYMOUS)) {
2896		panic("%s: ifma=%p unexpected IFMAF_ANONYMOUS", __func__, ifma);
2897		/* NOTREACHED */
2898	} else if (ifma->ifma_debug & IFD_TRASHED) {
2899		panic("%s: Attempt to reattach a detached ifma=%p",
2900		    __func__, ifma);
2901		/* NOTREACHED */
2902	}
2903
2904	ifma->ifma_reqcnt++;
2905	VERIFY(ifma->ifma_reqcnt == 1);
2906	IFMA_ADDREF_LOCKED(ifma);
2907	ifma->ifma_debug |= IFD_ATTACHED;
2908	if (anon) {
2909		ifma->ifma_anoncnt++;
2910		VERIFY(ifma->ifma_anoncnt == 1);
2911		ifma->ifma_flags |= IFMAF_ANONYMOUS;
2912	}
2913
2914	LIST_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link);
2915}
2916
2917static int
2918if_detach_ifma(struct ifnet *ifp, struct ifmultiaddr *ifma, int anon)
2919{
2920	ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_EXCLUSIVE);
2921	IFMA_LOCK_ASSERT_HELD(ifma);
2922
2923	if (ifma->ifma_reqcnt == 0) {
2924		panic("%s: ifma=%p negative reqcnt", __func__, ifma);
2925		/* NOTREACHED */
2926	} else if (anon && !(ifma->ifma_flags & IFMAF_ANONYMOUS)) {
2927		panic("%s: ifma=%p missing IFMAF_ANONYMOUS", __func__, ifma);
2928		/* NOTREACHED */
2929	} else if (anon && ifma->ifma_anoncnt == 0) {
2930		panic("%s: ifma=%p negative anonreqcnt", __func__, ifma);
2931		/* NOTREACHED */
2932	} else if (ifma->ifma_ifp != ifp) {
2933		panic("%s: Mismatch ifma_ifp=%p, ifp=%p", __func__,
2934		    ifma->ifma_ifp, ifp);
2935		/* NOTREACHED */
2936	}
2937
2938	if (anon) {
2939		--ifma->ifma_anoncnt;
2940		if (ifma->ifma_anoncnt > 0)
2941			return (0);
2942		ifma->ifma_flags &= ~IFMAF_ANONYMOUS;
2943	}
2944
2945	--ifma->ifma_reqcnt;
2946	if (ifma->ifma_reqcnt > 0)
2947		return (0);
2948
2949	if (ifma->ifma_protospec != NULL) {
2950		panic("%s: Protospec not NULL for ifma=%p", __func__, ifma);
2951		/* NOTREACHED */
2952	} else if ((ifma->ifma_flags & IFMAF_ANONYMOUS) ||
2953	    ifma->ifma_anoncnt != 0) {
2954		panic("%s: Detaching ifma=%p with outstanding anon req",
2955		    __func__, ifma);
2956		/* NOTREACHED */
2957	} else if (!(ifma->ifma_debug & IFD_ATTACHED)) {
2958		panic("%s: Attempt to detach an unattached address ifma=%p",
2959		    __func__, ifma);
2960		/* NOTREACHED */
2961	} else if (ifma->ifma_debug & IFD_TRASHED) {
2962		panic("%s: ifma %p is already in trash list", __func__, ifma);
2963		/* NOTREACHED */
2964	}
2965
2966	/*
2967	 * NOTE: Caller calls IFMA_REMREF
2968	 */
2969	ifma->ifma_debug &= ~IFD_ATTACHED;
2970	LIST_REMOVE(ifma, ifma_link);
2971	if (LIST_EMPTY(&ifp->if_multiaddrs))
2972		ifp->if_updatemcasts = 0;
2973
2974	if (ifma->ifma_debug & IFD_DEBUG) {
2975		/* Become a regular mutex, just in case */
2976		IFMA_CONVERT_LOCK(ifma);
2977		lck_mtx_lock(&ifma_trash_lock);
2978		TAILQ_INSERT_TAIL(&ifma_trash_head,
2979		    (struct ifmultiaddr_dbg *)ifma, ifma_trash_link);
2980		lck_mtx_unlock(&ifma_trash_lock);
2981		ifma->ifma_debug |= IFD_TRASHED;
2982	}
2983
2984	return (1);
2985}
2986
2987/*
2988 * Find an ifmultiaddr that matches a socket address on an interface.
2989 *
2990 * Caller is responsible for holding the ifnet_lock while calling
2991 * this function.
2992 */
2993static int
2994if_addmulti_doesexist(struct ifnet *ifp, const struct sockaddr *sa,
2995    struct ifmultiaddr **retifma, int anon)
2996{
2997	struct ifmultiaddr *ifma;
2998
2999	for (ifma = LIST_FIRST(&ifp->if_multiaddrs); ifma != NULL;
3000	     ifma = LIST_NEXT(ifma, ifma_link)) {
3001		IFMA_LOCK_SPIN(ifma);
3002		if (!equal(sa, ifma->ifma_addr)) {
3003			IFMA_UNLOCK(ifma);
3004			continue;
3005		}
3006		if (anon) {
3007			VERIFY(!(ifma->ifma_flags & IFMAF_ANONYMOUS) ||
3008			    ifma->ifma_anoncnt != 0);
3009			VERIFY((ifma->ifma_flags & IFMAF_ANONYMOUS) ||
3010			    ifma->ifma_anoncnt == 0);
3011			ifma->ifma_anoncnt++;
3012			if (!(ifma->ifma_flags & IFMAF_ANONYMOUS)) {
3013				VERIFY(ifma->ifma_anoncnt == 1);
3014				ifma->ifma_flags |= IFMAF_ANONYMOUS;
3015			}
3016		}
3017		if (!anon || ifma->ifma_anoncnt == 1) {
3018			ifma->ifma_reqcnt++;
3019			VERIFY(ifma->ifma_reqcnt > 1);
3020		}
3021		if (retifma != NULL) {
3022			*retifma = ifma;
3023			IFMA_ADDREF_LOCKED(ifma);
3024		}
3025		IFMA_UNLOCK(ifma);
3026		return (0);
3027	}
3028	return (ENOENT);
3029}
3030
3031/*
3032 * Radar 3642395, make sure all multicasts are in a standard format.
3033 */
3034static struct sockaddr*
3035copy_and_normalize(const struct sockaddr *original)
3036{
3037	int			alen = 0;
3038	const u_char		*aptr = NULL;
3039	struct sockaddr		*copy = NULL;
3040	struct sockaddr_dl	*sdl_new = NULL;
3041	int			len = 0;
3042
3043	if (original->sa_family != AF_LINK &&
3044	    original->sa_family != AF_UNSPEC) {
3045		/* Just make a copy */
3046		MALLOC(copy, struct sockaddr*, original->sa_len,
3047		    M_IFADDR, M_WAITOK);
3048		if (copy != NULL)
3049			bcopy(original, copy, original->sa_len);
3050		return (copy);
3051	}
3052
3053	switch (original->sa_family) {
3054		case AF_LINK: {
3055			const struct sockaddr_dl *sdl_original =
3056			    (struct sockaddr_dl*)(uintptr_t)(size_t)original;
3057
3058			if (sdl_original->sdl_nlen + sdl_original->sdl_alen +
3059			    sdl_original->sdl_slen +
3060			    offsetof(struct sockaddr_dl, sdl_data) >
3061			    sdl_original->sdl_len)
3062				return (NULL);
3063
3064			alen = sdl_original->sdl_alen;
3065			aptr = CONST_LLADDR(sdl_original);
3066		}
3067		break;
3068
3069		case AF_UNSPEC: {
3070			if (original->sa_len < ETHER_ADDR_LEN +
3071			    offsetof(struct sockaddr, sa_data)) {
3072				return (NULL);
3073			}
3074
3075			alen = ETHER_ADDR_LEN;
3076			aptr = (const u_char*)original->sa_data;
3077		}
3078		break;
3079	}
3080
3081	if (alen == 0 || aptr == NULL)
3082		return (NULL);
3083
3084	len = alen + offsetof(struct sockaddr_dl, sdl_data);
3085	MALLOC(sdl_new, struct sockaddr_dl*, len, M_IFADDR, M_WAITOK);
3086
3087	if (sdl_new != NULL) {
3088		bzero(sdl_new, len);
3089		sdl_new->sdl_len = len;
3090		sdl_new->sdl_family = AF_LINK;
3091		sdl_new->sdl_alen = alen;
3092		bcopy(aptr, LLADDR(sdl_new), alen);
3093	}
3094
3095	return ((struct sockaddr*)sdl_new);
3096}
3097
3098/*
3099 * Network-layer protocol domains which hold references to the underlying
3100 * link-layer record must use this routine.
3101 */
3102int
3103if_addmulti(struct ifnet *ifp, const struct sockaddr *sa,
3104    struct ifmultiaddr **retifma)
3105{
3106	return (if_addmulti_common(ifp, sa, retifma, 0));
3107}
3108
3109/*
3110 * Anything other than network-layer protocol domains which hold references
3111 * to the underlying link-layer record must use this routine: SIOCADDMULTI
3112 * ioctl, ifnet_add_multicast(), if_bond.
3113 */
3114int
3115if_addmulti_anon(struct ifnet *ifp, const struct sockaddr *sa,
3116    struct ifmultiaddr **retifma)
3117{
3118	return (if_addmulti_common(ifp, sa, retifma, 1));
3119}
3120
3121/*
3122 * Register an additional multicast address with a network interface.
3123 *
3124 * - If the address is already present, bump the reference count on the
3125 *   address and return.
3126 * - If the address is not link-layer, look up a link layer address.
3127 * - Allocate address structures for one or both addresses, and attach to the
3128 *   multicast address list on the interface.  If automatically adding a link
3129 *   layer address, the protocol address will own a reference to the link
3130 *   layer address, to be freed when it is freed.
3131 * - Notify the network device driver of an addition to the multicast address
3132 *   list.
3133 *
3134 * 'sa' points to caller-owned memory with the desired multicast address.
3135 *
3136 * 'retifma' will be used to return a pointer to the resulting multicast
3137 * address reference, if desired.
3138 *
3139 * 'anon' indicates a link-layer address with no protocol address reference
3140 * made to it.  Anything other than network-layer protocol domain requests
3141 * are considered as anonymous.
3142 */
3143static int
3144if_addmulti_common(struct ifnet *ifp, const struct sockaddr *sa,
3145    struct ifmultiaddr **retifma, int anon)
3146{
3147	struct sockaddr_storage storage;
3148	struct sockaddr *llsa = NULL;
3149	struct sockaddr *dupsa = NULL;
3150	int error = 0, ll_firstref = 0, lladdr;
3151	struct ifmultiaddr *ifma = NULL;
3152	struct ifmultiaddr *llifma = NULL;
3153
3154	/* Only AF_UNSPEC/AF_LINK is allowed for an "anonymous" address */
3155	VERIFY(!anon || sa->sa_family == AF_UNSPEC ||
3156	    sa->sa_family == AF_LINK);
3157
3158	/* If sa is a AF_LINK or AF_UNSPEC, duplicate and normalize it */
3159	if (sa->sa_family == AF_LINK || sa->sa_family == AF_UNSPEC) {
3160		dupsa = copy_and_normalize(sa);
3161		if (dupsa == NULL) {
3162			error = ENOMEM;
3163			goto cleanup;
3164		}
3165		sa = dupsa;
3166	}
3167
3168	ifnet_lock_exclusive(ifp);
3169	if (!(ifp->if_flags & IFF_MULTICAST)) {
3170		error = EADDRNOTAVAIL;
3171		ifnet_lock_done(ifp);
3172		goto cleanup;
3173	}
3174
3175	/* If the address is already present, return a new reference to it */
3176	error = if_addmulti_doesexist(ifp, sa, retifma, anon);
3177	ifnet_lock_done(ifp);
3178	if (error == 0)
3179		goto cleanup;
3180
3181	/*
3182	 * The address isn't already present; give the link layer a chance
3183	 * to accept/reject it, and also find out which AF_LINK address this
3184	 * maps to, if it isn't one already.
3185	 */
3186	error = dlil_resolve_multi(ifp, sa, (struct sockaddr *)&storage,
3187	    sizeof (storage));
3188	if (error == 0 && storage.ss_len != 0) {
3189		llsa = copy_and_normalize((struct sockaddr *)&storage);
3190		if (llsa == NULL) {
3191			error = ENOMEM;
3192			goto cleanup;
3193		}
3194
3195		llifma = ifma_alloc(M_WAITOK);
3196		if (llifma == NULL) {
3197			error = ENOMEM;
3198			goto cleanup;
3199		}
3200	}
3201
3202	/* to be similar to FreeBSD */
3203	if (error == EOPNOTSUPP)
3204		error = 0;
3205	else if (error != 0)
3206		goto cleanup;
3207
3208	/* Allocate while we aren't holding any locks */
3209	if (dupsa == NULL) {
3210		dupsa = copy_and_normalize(sa);
3211		if (dupsa == NULL) {
3212			error = ENOMEM;
3213			goto cleanup;
3214		}
3215	}
3216	ifma = ifma_alloc(M_WAITOK);
3217	if (ifma == NULL) {
3218		error = ENOMEM;
3219		goto cleanup;
3220	}
3221
3222	ifnet_lock_exclusive(ifp);
3223	/*
3224	 * Check again for the matching multicast.
3225	 */
3226	error = if_addmulti_doesexist(ifp, sa, retifma, anon);
3227	if (error == 0) {
3228		ifnet_lock_done(ifp);
3229		goto cleanup;
3230	}
3231
3232	if (llifma != NULL) {
3233		VERIFY(!anon);	/* must not get here if "anonymous" */
3234		if (if_addmulti_doesexist(ifp, llsa, &ifma->ifma_ll, 0) == 0) {
3235			FREE(llsa, M_IFADDR);
3236			llsa = NULL;
3237			ifma_free(llifma);
3238			llifma = NULL;
3239			VERIFY(ifma->ifma_ll->ifma_ifp == ifp);
3240		} else {
3241			ll_firstref = 1;
3242			llifma->ifma_addr = llsa;
3243			llifma->ifma_ifp = ifp;
3244			IFMA_LOCK(llifma);
3245			if_attach_ifma(ifp, llifma, 0);
3246			/* add extra refcnt for ifma */
3247			IFMA_ADDREF_LOCKED(llifma);
3248			IFMA_UNLOCK(llifma);
3249			ifma->ifma_ll = llifma;
3250		}
3251	}
3252
3253	/* "anonymous" request should not result in network address */
3254	VERIFY(!anon || ifma->ifma_ll == NULL);
3255
3256	ifma->ifma_addr = dupsa;
3257	ifma->ifma_ifp = ifp;
3258	IFMA_LOCK(ifma);
3259	if_attach_ifma(ifp, ifma, anon);
3260	IFMA_ADDREF_LOCKED(ifma);		/* for this routine */
3261	if (retifma != NULL) {
3262		*retifma = ifma;
3263		IFMA_ADDREF_LOCKED(*retifma);	/* for caller */
3264	}
3265	lladdr = (ifma->ifma_addr->sa_family == AF_UNSPEC ||
3266	    ifma->ifma_addr->sa_family == AF_LINK);
3267	IFMA_UNLOCK(ifma);
3268	ifnet_lock_done(ifp);
3269
3270	rt_newmaddrmsg(RTM_NEWMADDR, ifma);
3271	IFMA_REMREF(ifma);			/* for this routine */
3272
3273	/*
3274	 * We are certain we have added something, so call down to the
3275	 * interface to let them know about it.  Do this only for newly-
3276	 * added AF_LINK/AF_UNSPEC address in the if_multiaddrs set.
3277	 */
3278	if (lladdr || ll_firstref)
3279		(void) ifnet_ioctl(ifp, 0, SIOCADDMULTI, NULL);
3280
3281	if (ifp->if_updatemcasts > 0)
3282		ifp->if_updatemcasts = 0;
3283
3284	return (0);
3285
3286cleanup:
3287	if (ifma != NULL)
3288		ifma_free(ifma);
3289	if (dupsa != NULL)
3290		FREE(dupsa, M_IFADDR);
3291	if (llifma != NULL)
3292		ifma_free(llifma);
3293	if (llsa != NULL)
3294		FREE(llsa, M_IFADDR);
3295
3296	return (error);
3297}
3298
3299/*
3300 * Delete a multicast group membership by network-layer group address.
3301 * This routine is deprecated.
3302 */
3303int
3304if_delmulti(struct ifnet *ifp, const struct sockaddr *sa)
3305{
3306	return (if_delmulti_common(NULL, ifp, sa, 0));
3307}
3308
3309/*
3310 * Delete a multicast group membership by group membership pointer.
3311 * Network-layer protocol domains must use this routine.
3312 */
3313int
3314if_delmulti_ifma(struct ifmultiaddr *ifma)
3315{
3316	return (if_delmulti_common(ifma, NULL, NULL, 0));
3317}
3318
3319/*
3320 * Anything other than network-layer protocol domains which hold references
3321 * to the underlying link-layer record must use this routine: SIOCDELMULTI
3322 * ioctl, ifnet_remove_multicast(), if_bond.
3323 */
3324int
3325if_delmulti_anon(struct ifnet *ifp, const struct sockaddr *sa)
3326{
3327	return (if_delmulti_common(NULL, ifp, sa, 1));
3328}
3329
3330/*
3331 * Delete a multicast group membership by network-layer group address.
3332 *
3333 * Returns ENOENT if the entry could not be found.
3334 */
3335static int
3336if_delmulti_common(struct ifmultiaddr *ifma, struct ifnet *ifp,
3337    const struct sockaddr *sa, int anon)
3338{
3339	struct sockaddr		*dupsa = NULL;
3340	int			lastref, ll_lastref = 0, lladdr;
3341	struct ifmultiaddr	*ll = NULL;
3342
3343	/* sanity check for callers */
3344	VERIFY(ifma != NULL || (ifp != NULL && sa != NULL));
3345
3346	if (ifma != NULL)
3347		ifp = ifma->ifma_ifp;
3348
3349	if (sa != NULL &&
3350	    (sa->sa_family == AF_LINK || sa->sa_family == AF_UNSPEC)) {
3351		dupsa = copy_and_normalize(sa);
3352		if (dupsa == NULL)
3353			return (ENOMEM);
3354		sa = dupsa;
3355	}
3356
3357	ifnet_lock_exclusive(ifp);
3358	if (ifma == NULL) {
3359		for (ifma = LIST_FIRST(&ifp->if_multiaddrs); ifma != NULL;
3360		     ifma = LIST_NEXT(ifma, ifma_link)) {
3361			IFMA_LOCK(ifma);
3362			if (!equal(sa, ifma->ifma_addr) ||
3363			    (anon && !(ifma->ifma_flags & IFMAF_ANONYMOUS))) {
3364				VERIFY(!(ifma->ifma_flags & IFMAF_ANONYMOUS) ||
3365				    ifma->ifma_anoncnt != 0);
3366				IFMA_UNLOCK(ifma);
3367				continue;
3368			}
3369			/* found; keep it locked */
3370			break;
3371		}
3372		if (ifma == NULL) {
3373			if (dupsa != NULL)
3374				FREE(dupsa, M_IFADDR);
3375			ifnet_lock_done(ifp);
3376			return (ENOENT);
3377		}
3378	} else {
3379		IFMA_LOCK(ifma);
3380	}
3381	IFMA_LOCK_ASSERT_HELD(ifma);
3382	IFMA_ADDREF_LOCKED(ifma);	/* for this routine */
3383	lastref = if_detach_ifma(ifp, ifma, anon);
3384	VERIFY(!lastref || (!(ifma->ifma_debug & IFD_ATTACHED) &&
3385	    ifma->ifma_reqcnt == 0));
3386	VERIFY(!anon || ifma->ifma_ll == NULL);
3387	ll = ifma->ifma_ll;
3388	lladdr = (ifma->ifma_addr->sa_family == AF_UNSPEC ||
3389	    ifma->ifma_addr->sa_family == AF_LINK);
3390	IFMA_UNLOCK(ifma);
3391	if (lastref && ll != NULL) {
3392		IFMA_LOCK(ll);
3393		ll_lastref = if_detach_ifma(ifp, ll, 0);
3394		IFMA_UNLOCK(ll);
3395	}
3396	ifnet_lock_done(ifp);
3397
3398	if (lastref)
3399		rt_newmaddrmsg(RTM_DELMADDR, ifma);
3400
3401	if ((ll == NULL && lastref && lladdr) || ll_lastref) {
3402		/*
3403		 * Make sure the interface driver is notified in the
3404		 * case of a link layer mcast group being left.  Do
3405		 * this only for a AF_LINK/AF_UNSPEC address that has
3406		 * been removed from the if_multiaddrs set.
3407		 */
3408		ifnet_ioctl(ifp, 0, SIOCDELMULTI, NULL);
3409	}
3410
3411	if (lastref)
3412		IFMA_REMREF(ifma);	/* for if_multiaddrs list */
3413	if (ll_lastref)
3414		IFMA_REMREF(ll);	/* for if_multiaddrs list */
3415
3416	IFMA_REMREF(ifma);		/* for this routine */
3417	if (dupsa != NULL)
3418		FREE(dupsa, M_IFADDR);
3419
3420	return (0);
3421}
3422
3423/*
3424 * Shutdown all network activity.  Used boot() when halting
3425 * system.
3426 */
3427int
3428if_down_all(void)
3429{
3430	struct ifnet **ifp;
3431	u_int32_t	count;
3432	u_int32_t	i;
3433
3434	if (ifnet_list_get_all(IFNET_FAMILY_ANY, &ifp, &count) == 0) {
3435		for (i = 0; i < count; i++) {
3436			if_down(ifp[i]);
3437			dlil_proto_unplumb_all(ifp[i]);
3438		}
3439		ifnet_list_free(ifp);
3440	}
3441
3442	return 0;
3443}
3444
3445/*
3446 * Delete Routes for a Network Interface
3447 *
3448 * Called for each routing entry via the rnh->rnh_walktree() call above
3449 * to delete all route entries referencing a detaching network interface.
3450 *
3451 * Arguments:
3452 *	rn	pointer to node in the routing table
3453 *	arg	argument passed to rnh->rnh_walktree() - detaching interface
3454 *
3455 * Returns:
3456 *	0	successful
3457 *	errno	failed - reason indicated
3458 *
3459 */
3460static int
3461if_rtdel(struct radix_node *rn, void *arg)
3462{
3463	struct rtentry	*rt = (struct rtentry *)rn;
3464	struct ifnet	*ifp = arg;
3465	int		err;
3466
3467	if (rt == NULL)
3468		return (0);
3469	/*
3470	 * Checking against RTF_UP protects against walktree
3471	 * recursion problems with cloned routes.
3472	 */
3473	RT_LOCK(rt);
3474	if (rt->rt_ifp == ifp && (rt->rt_flags & RTF_UP)) {
3475		/*
3476		 * Safe to drop rt_lock and use rt_key, rt_gateway,
3477		 * since holding rnh_lock here prevents another thread
3478		 * from calling rt_setgate() on this route.
3479		 */
3480		RT_UNLOCK(rt);
3481		err = rtrequest_locked(RTM_DELETE, rt_key(rt), rt->rt_gateway,
3482		    rt_mask(rt), rt->rt_flags, NULL);
3483		if (err) {
3484			log(LOG_WARNING, "if_rtdel: error %d\n", err);
3485		}
3486	} else {
3487		RT_UNLOCK(rt);
3488	}
3489	return (0);
3490}
3491
3492/*
3493 * Removes routing table reference to a given interface
3494 * for a given protocol family
3495 */
3496void
3497if_rtproto_del(struct ifnet *ifp, int protocol)
3498{
3499	struct radix_node_head  *rnh;
3500
3501	if ((protocol <= AF_MAX) && (protocol >= 0) &&
3502		((rnh = rt_tables[protocol]) != NULL) && (ifp != NULL)) {
3503		lck_mtx_lock(rnh_lock);
3504		(void) rnh->rnh_walktree(rnh, if_rtdel, ifp);
3505		lck_mtx_unlock(rnh_lock);
3506	}
3507}
3508
3509static int
3510if_rtmtu(struct radix_node *rn, void *arg)
3511{
3512	struct rtentry *rt = (struct rtentry *)rn;
3513	struct ifnet *ifp = arg;
3514
3515	RT_LOCK(rt);
3516	if (rt->rt_ifp == ifp) {
3517		/*
3518		 * Update the MTU of this entry only if the MTU
3519		 * has not been locked (RTV_MTU is not set) and
3520		 * if it was non-zero to begin with.
3521		 */
3522		if (!(rt->rt_rmx.rmx_locks & RTV_MTU) && rt->rt_rmx.rmx_mtu)
3523			rt->rt_rmx.rmx_mtu = ifp->if_mtu;
3524	}
3525	RT_UNLOCK(rt);
3526
3527	return (0);
3528}
3529
3530/*
3531 * Update the MTU metric of all route entries in all protocol tables
3532 * associated with a particular interface; this is called when the
3533 * MTU of that interface has changed.
3534 */
3535static
3536void if_rtmtu_update(struct ifnet *ifp)
3537{
3538	struct radix_node_head *rnh;
3539	int p;
3540
3541	for (p = 0; p < AF_MAX + 1; p++) {
3542		if ((rnh = rt_tables[p]) == NULL)
3543			continue;
3544
3545		lck_mtx_lock(rnh_lock);
3546		(void) rnh->rnh_walktree(rnh, if_rtmtu, ifp);
3547		lck_mtx_unlock(rnh_lock);
3548	}
3549	routegenid_update();
3550}
3551
3552__private_extern__ void
3553if_data_internal_to_if_data(struct ifnet *ifp,
3554    const struct if_data_internal *if_data_int, struct if_data *if_data)
3555{
3556#pragma unused(ifp)
3557#define COPYFIELD(fld)		if_data->fld = if_data_int->fld
3558#define COPYFIELD32(fld)	if_data->fld = (u_int32_t)(if_data_int->fld)
3559/* compiler will cast down to 32-bit */
3560#define	COPYFIELD32_ATOMIC(fld) do {					\
3561	atomic_get_64(if_data->fld,					\
3562	    (u_int64_t *)(void *)(uintptr_t)&if_data_int->fld);		\
3563} while (0)
3564
3565	COPYFIELD(ifi_type);
3566	COPYFIELD(ifi_typelen);
3567	COPYFIELD(ifi_physical);
3568	COPYFIELD(ifi_addrlen);
3569	COPYFIELD(ifi_hdrlen);
3570	COPYFIELD(ifi_recvquota);
3571	COPYFIELD(ifi_xmitquota);
3572	if_data->ifi_unused1 = 0;
3573	COPYFIELD(ifi_mtu);
3574	COPYFIELD(ifi_metric);
3575	if (if_data_int->ifi_baudrate & 0xFFFFFFFF00000000LL) {
3576		if_data->ifi_baudrate = 0xFFFFFFFF;
3577	} else {
3578		COPYFIELD32(ifi_baudrate);
3579	}
3580
3581	COPYFIELD32_ATOMIC(ifi_ipackets);
3582	COPYFIELD32_ATOMIC(ifi_ierrors);
3583	COPYFIELD32_ATOMIC(ifi_opackets);
3584	COPYFIELD32_ATOMIC(ifi_oerrors);
3585	COPYFIELD32_ATOMIC(ifi_collisions);
3586	COPYFIELD32_ATOMIC(ifi_ibytes);
3587	COPYFIELD32_ATOMIC(ifi_obytes);
3588	COPYFIELD32_ATOMIC(ifi_imcasts);
3589	COPYFIELD32_ATOMIC(ifi_omcasts);
3590	COPYFIELD32_ATOMIC(ifi_iqdrops);
3591	COPYFIELD32_ATOMIC(ifi_noproto);
3592
3593	COPYFIELD(ifi_recvtiming);
3594	COPYFIELD(ifi_xmittiming);
3595
3596	if_data->ifi_lastchange.tv_sec = if_data_int->ifi_lastchange.tv_sec;
3597	if_data->ifi_lastchange.tv_usec = if_data_int->ifi_lastchange.tv_usec;
3598
3599	if_data->ifi_lastchange.tv_sec += boottime_sec();
3600
3601	if_data->ifi_unused2 = 0;
3602	COPYFIELD(ifi_hwassist);
3603	if_data->ifi_reserved1 = 0;
3604	if_data->ifi_reserved2 = 0;
3605#undef COPYFIELD32_ATOMIC
3606#undef COPYFIELD32
3607#undef COPYFIELD
3608}
3609
3610__private_extern__ void
3611if_data_internal_to_if_data64(struct ifnet *ifp,
3612    const struct if_data_internal *if_data_int,
3613    struct if_data64 *if_data64)
3614{
3615#pragma unused(ifp)
3616#define COPYFIELD64(fld)	if_data64->fld = if_data_int->fld
3617#define COPYFIELD64_ATOMIC(fld) do {					\
3618	atomic_get_64(if_data64->fld,					\
3619	    (u_int64_t *)(void *)(uintptr_t)&if_data_int->fld);		\
3620} while (0)
3621
3622	COPYFIELD64(ifi_type);
3623	COPYFIELD64(ifi_typelen);
3624	COPYFIELD64(ifi_physical);
3625	COPYFIELD64(ifi_addrlen);
3626	COPYFIELD64(ifi_hdrlen);
3627	COPYFIELD64(ifi_recvquota);
3628	COPYFIELD64(ifi_xmitquota);
3629	if_data64->ifi_unused1 = 0;
3630	COPYFIELD64(ifi_mtu);
3631	COPYFIELD64(ifi_metric);
3632	COPYFIELD64(ifi_baudrate);
3633
3634	COPYFIELD64_ATOMIC(ifi_ipackets);
3635	COPYFIELD64_ATOMIC(ifi_ierrors);
3636	COPYFIELD64_ATOMIC(ifi_opackets);
3637	COPYFIELD64_ATOMIC(ifi_oerrors);
3638	COPYFIELD64_ATOMIC(ifi_collisions);
3639	COPYFIELD64_ATOMIC(ifi_ibytes);
3640	COPYFIELD64_ATOMIC(ifi_obytes);
3641	COPYFIELD64_ATOMIC(ifi_imcasts);
3642	COPYFIELD64_ATOMIC(ifi_omcasts);
3643	COPYFIELD64_ATOMIC(ifi_iqdrops);
3644	COPYFIELD64_ATOMIC(ifi_noproto);
3645
3646	/* Note these two fields are actually 32 bit, so doing COPYFIELD64_ATOMIC will
3647	 * cause them to be misaligned
3648	 */
3649	COPYFIELD64(ifi_recvtiming);
3650	COPYFIELD64(ifi_xmittiming);
3651
3652	if_data64->ifi_lastchange.tv_sec = if_data_int->ifi_lastchange.tv_sec;
3653	if_data64->ifi_lastchange.tv_usec = if_data_int->ifi_lastchange.tv_usec;
3654
3655	if_data64->ifi_lastchange.tv_sec += boottime_sec();
3656
3657#undef COPYFIELD64
3658}
3659
3660__private_extern__ void
3661if_copy_traffic_class(struct ifnet *ifp,
3662    struct if_traffic_class *if_tc)
3663{
3664#define COPY_IF_TC_FIELD64_ATOMIC(fld) do {			\
3665	atomic_get_64(if_tc->fld,				\
3666	    (u_int64_t *)(void *)(uintptr_t)&ifp->if_tc.fld);	\
3667} while (0)
3668
3669	bzero(if_tc, sizeof (*if_tc));
3670	COPY_IF_TC_FIELD64_ATOMIC(ifi_ibepackets);
3671	COPY_IF_TC_FIELD64_ATOMIC(ifi_ibebytes);
3672	COPY_IF_TC_FIELD64_ATOMIC(ifi_obepackets);
3673	COPY_IF_TC_FIELD64_ATOMIC(ifi_obebytes);
3674	COPY_IF_TC_FIELD64_ATOMIC(ifi_ibkpackets);
3675	COPY_IF_TC_FIELD64_ATOMIC(ifi_ibkbytes);
3676	COPY_IF_TC_FIELD64_ATOMIC(ifi_obkpackets);
3677	COPY_IF_TC_FIELD64_ATOMIC(ifi_obkbytes);
3678	COPY_IF_TC_FIELD64_ATOMIC(ifi_ivipackets);
3679	COPY_IF_TC_FIELD64_ATOMIC(ifi_ivibytes);
3680	COPY_IF_TC_FIELD64_ATOMIC(ifi_ovipackets);
3681	COPY_IF_TC_FIELD64_ATOMIC(ifi_ovibytes);
3682	COPY_IF_TC_FIELD64_ATOMIC(ifi_ivopackets);
3683	COPY_IF_TC_FIELD64_ATOMIC(ifi_ivobytes);
3684	COPY_IF_TC_FIELD64_ATOMIC(ifi_ovopackets);
3685	COPY_IF_TC_FIELD64_ATOMIC(ifi_ovobytes);
3686	COPY_IF_TC_FIELD64_ATOMIC(ifi_ipvpackets);
3687	COPY_IF_TC_FIELD64_ATOMIC(ifi_ipvbytes);
3688	COPY_IF_TC_FIELD64_ATOMIC(ifi_opvpackets);
3689	COPY_IF_TC_FIELD64_ATOMIC(ifi_opvbytes);
3690
3691#undef COPY_IF_TC_FIELD64_ATOMIC
3692}
3693
3694void
3695if_copy_data_extended(struct ifnet *ifp, struct if_data_extended *if_de)
3696{
3697#define COPY_IF_DE_FIELD64_ATOMIC(fld) do {			\
3698	atomic_get_64(if_de->fld,				\
3699	    (u_int64_t *)(void *)(uintptr_t)&ifp->if_data.fld);	\
3700} while (0)
3701
3702	bzero(if_de, sizeof (*if_de));
3703	COPY_IF_DE_FIELD64_ATOMIC(ifi_alignerrs);
3704	COPY_IF_DE_FIELD64_ATOMIC(ifi_dt_bytes);
3705	COPY_IF_DE_FIELD64_ATOMIC(ifi_fpackets);
3706	COPY_IF_DE_FIELD64_ATOMIC(ifi_fbytes);
3707
3708#undef COPY_IF_DE_FIELD64_ATOMIC
3709}
3710
3711void
3712if_copy_packet_stats(struct ifnet *ifp, struct if_packet_stats *if_ps)
3713{
3714#define COPY_IF_PS_TCP_FIELD64_ATOMIC(fld) do {				\
3715	atomic_get_64(if_ps->ifi_tcp_##fld,				\
3716	    (u_int64_t *)(void *)(uintptr_t)&ifp->if_tcp_stat->fld);	\
3717} while (0)
3718
3719#define COPY_IF_PS_UDP_FIELD64_ATOMIC(fld) do {				\
3720	atomic_get_64(if_ps->ifi_udp_##fld,				\
3721	    (u_int64_t *)(void *)(uintptr_t)&ifp->if_udp_stat->fld);	\
3722} while (0)
3723
3724	COPY_IF_PS_TCP_FIELD64_ATOMIC(badformat);
3725	COPY_IF_PS_TCP_FIELD64_ATOMIC(unspecv6);
3726	COPY_IF_PS_TCP_FIELD64_ATOMIC(synfin);
3727	COPY_IF_PS_TCP_FIELD64_ATOMIC(badformatipsec);
3728	COPY_IF_PS_TCP_FIELD64_ATOMIC(noconnnolist);
3729	COPY_IF_PS_TCP_FIELD64_ATOMIC(noconnlist);
3730	COPY_IF_PS_TCP_FIELD64_ATOMIC(listbadsyn);
3731	COPY_IF_PS_TCP_FIELD64_ATOMIC(icmp6unreach);
3732	COPY_IF_PS_TCP_FIELD64_ATOMIC(deprecate6);
3733	COPY_IF_PS_TCP_FIELD64_ATOMIC(ooopacket);
3734	COPY_IF_PS_TCP_FIELD64_ATOMIC(rstinsynrcv);
3735	COPY_IF_PS_TCP_FIELD64_ATOMIC(dospacket);
3736	COPY_IF_PS_TCP_FIELD64_ATOMIC(cleanup);
3737	COPY_IF_PS_TCP_FIELD64_ATOMIC(synwindow);
3738
3739	COPY_IF_PS_UDP_FIELD64_ATOMIC(port_unreach);
3740	COPY_IF_PS_UDP_FIELD64_ATOMIC(faithprefix);
3741	COPY_IF_PS_UDP_FIELD64_ATOMIC(port0);
3742	COPY_IF_PS_UDP_FIELD64_ATOMIC(badlength);
3743	COPY_IF_PS_UDP_FIELD64_ATOMIC(badchksum);
3744	COPY_IF_PS_UDP_FIELD64_ATOMIC(badmcast);
3745	COPY_IF_PS_UDP_FIELD64_ATOMIC(cleanup);
3746	COPY_IF_PS_UDP_FIELD64_ATOMIC(badipsec);
3747
3748#undef COPY_IF_PS_TCP_FIELD64_ATOMIC
3749#undef COPY_IF_PS_UDP_FIELD64_ATOMIC
3750}
3751
3752void
3753if_copy_rxpoll_stats(struct ifnet *ifp, struct if_rxpoll_stats *if_rs)
3754{
3755	bzero(if_rs, sizeof (*if_rs));
3756	if (!(ifp->if_eflags & IFEF_RXPOLL) || !ifnet_is_attached(ifp, 1))
3757		return;
3758
3759	/* by now, ifnet will stay attached so if_inp must be valid */
3760	VERIFY(ifp->if_inp != NULL);
3761	bcopy(&ifp->if_inp->pstats, if_rs, sizeof (*if_rs));
3762
3763	/* Release the IO refcnt */
3764	ifnet_decr_iorefcnt(ifp);
3765}
3766
3767struct ifaddr *
3768ifa_remref(struct ifaddr *ifa, int locked)
3769{
3770	if (!locked)
3771		IFA_LOCK_SPIN(ifa);
3772	else
3773		IFA_LOCK_ASSERT_HELD(ifa);
3774
3775	if (ifa->ifa_refcnt == 0)
3776		panic("%s: ifa %p negative refcnt\n", __func__, ifa);
3777	else if (ifa->ifa_trace != NULL)
3778		(*ifa->ifa_trace)(ifa, FALSE);
3779	if (--ifa->ifa_refcnt == 0) {
3780		if (ifa->ifa_debug & IFD_ATTACHED)
3781			panic("ifa %p attached to ifp is being freed\n", ifa);
3782		/*
3783		 * Some interface addresses are allocated either statically
3784		 * or carved out of a larger block.  Only free it if it was
3785		 * allocated via MALLOC or via the corresponding per-address
3786		 * family allocator.  Otherwise, leave it alone.
3787		 */
3788		if (ifa->ifa_debug & IFD_ALLOC) {
3789			if (ifa->ifa_free == NULL) {
3790				IFA_UNLOCK(ifa);
3791				FREE(ifa, M_IFADDR);
3792			} else {
3793				/* Become a regular mutex */
3794				IFA_CONVERT_LOCK(ifa);
3795				/* callee will unlock */
3796				(*ifa->ifa_free)(ifa);
3797			}
3798		} else {
3799			IFA_UNLOCK(ifa);
3800		}
3801		ifa = NULL;
3802	}
3803
3804	if (!locked && ifa != NULL)
3805		IFA_UNLOCK(ifa);
3806
3807	return (ifa);
3808}
3809
3810void
3811ifa_addref(struct ifaddr *ifa, int locked)
3812{
3813	if (!locked)
3814		IFA_LOCK_SPIN(ifa);
3815	else
3816		IFA_LOCK_ASSERT_HELD(ifa);
3817
3818	if (++ifa->ifa_refcnt == 0) {
3819		panic("%s: ifa %p wraparound refcnt\n", __func__, ifa);
3820		/* NOTREACHED */
3821	} else if (ifa->ifa_trace != NULL) {
3822		(*ifa->ifa_trace)(ifa, TRUE);
3823	}
3824	if (!locked)
3825		IFA_UNLOCK(ifa);
3826}
3827
3828void
3829ifa_lock_init(struct ifaddr *ifa)
3830{
3831	lck_mtx_init(&ifa->ifa_lock, ifa_mtx_grp, ifa_mtx_attr);
3832}
3833
3834void
3835ifa_lock_destroy(struct ifaddr *ifa)
3836{
3837	IFA_LOCK_ASSERT_NOTHELD(ifa);
3838	lck_mtx_destroy(&ifa->ifa_lock, ifa_mtx_grp);
3839}
3840
3841/*
3842 * 'i' group ioctls.
3843 *
3844 * The switch statement below does nothing at runtime, as it serves as a
3845 * compile time check to ensure that all of the socket 'i' ioctls (those
3846 * in the 'i' group going thru soo_ioctl) that are made available by the
3847 * networking stack is unique.  This works as long as this routine gets
3848 * updated each time a new interface ioctl gets added.
3849 *
3850 * Any failures at compile time indicates duplicated ioctl values.
3851 */
3852static __attribute__((unused)) void
3853ifioctl_cassert(void)
3854{
3855	/*
3856	 * This is equivalent to _CASSERT() and the compiler wouldn't
3857	 * generate any instructions, thus for compile time only.
3858	 */
3859	switch ((u_long)0) {
3860	case 0:
3861
3862	/* bsd/net/if_ppp.h */
3863	case SIOCGPPPSTATS:
3864	case SIOCGPPPCSTATS:
3865
3866#if INET6
3867	/* bsd/netinet6/in6_var.h */
3868	case SIOCSIFADDR_IN6:
3869	case SIOCGIFADDR_IN6:
3870	case SIOCSIFDSTADDR_IN6:
3871	case SIOCSIFNETMASK_IN6:
3872	case SIOCGIFDSTADDR_IN6:
3873	case SIOCGIFNETMASK_IN6:
3874	case SIOCDIFADDR_IN6:
3875	case SIOCAIFADDR_IN6_32:
3876	case SIOCAIFADDR_IN6_64:
3877	case SIOCSIFPHYADDR_IN6_32:
3878	case SIOCSIFPHYADDR_IN6_64:
3879	case SIOCGIFPSRCADDR_IN6:
3880	case SIOCGIFPDSTADDR_IN6:
3881	case SIOCGIFAFLAG_IN6:
3882	case SIOCGDRLST_IN6_32:
3883	case SIOCGDRLST_IN6_64:
3884	case SIOCGPRLST_IN6_32:
3885	case SIOCGPRLST_IN6_64:
3886	case OSIOCGIFINFO_IN6:
3887	case SIOCGIFINFO_IN6:
3888	case SIOCSNDFLUSH_IN6:
3889	case SIOCGNBRINFO_IN6_32:
3890	case SIOCGNBRINFO_IN6_64:
3891	case SIOCSPFXFLUSH_IN6:
3892	case SIOCSRTRFLUSH_IN6:
3893	case SIOCGIFALIFETIME_IN6:
3894	case SIOCSIFALIFETIME_IN6:
3895	case SIOCGIFSTAT_IN6:
3896	case SIOCGIFSTAT_ICMP6:
3897	case SIOCSDEFIFACE_IN6_32:
3898	case SIOCSDEFIFACE_IN6_64:
3899	case SIOCGDEFIFACE_IN6_32:
3900	case SIOCGDEFIFACE_IN6_64:
3901	case SIOCSIFINFO_FLAGS:
3902	case SIOCSSCOPE6:
3903	case SIOCGSCOPE6:
3904	case SIOCGSCOPE6DEF:
3905	case SIOCSIFPREFIX_IN6:
3906	case SIOCGIFPREFIX_IN6:
3907	case SIOCDIFPREFIX_IN6:
3908	case SIOCAIFPREFIX_IN6:
3909	case SIOCCIFPREFIX_IN6:
3910	case SIOCSGIFPREFIX_IN6:
3911	case SIOCPROTOATTACH_IN6_32:
3912	case SIOCPROTOATTACH_IN6_64:
3913	case SIOCPROTODETACH_IN6:
3914	case SIOCLL_START_32:
3915	case SIOCLL_START_64:
3916	case SIOCLL_STOP:
3917	case SIOCAUTOCONF_START:
3918	case SIOCAUTOCONF_STOP:
3919	case SIOCSETROUTERMODE_IN6:
3920	case SIOCLL_CGASTART_32:
3921	case SIOCLL_CGASTART_64:
3922#endif /* INET6 */
3923
3924	/* bsd/sys/sockio.h */
3925	case SIOCSIFADDR:
3926	case OSIOCGIFADDR:
3927	case SIOCSIFDSTADDR:
3928	case OSIOCGIFDSTADDR:
3929	case SIOCSIFFLAGS:
3930	case SIOCGIFFLAGS:
3931	case OSIOCGIFBRDADDR:
3932	case SIOCSIFBRDADDR:
3933	case OSIOCGIFCONF32:
3934	case OSIOCGIFCONF64:
3935	case OSIOCGIFNETMASK:
3936	case SIOCSIFNETMASK:
3937	case SIOCGIFMETRIC:
3938	case SIOCSIFMETRIC:
3939	case SIOCDIFADDR:
3940	case SIOCAIFADDR:
3941	case SIOCGIFADDR:
3942	case SIOCGIFDSTADDR:
3943	case SIOCGIFBRDADDR:
3944	case SIOCGIFCONF32:
3945	case SIOCGIFCONF64:
3946	case SIOCGIFNETMASK:
3947	case SIOCAUTOADDR:
3948	case SIOCAUTONETMASK:
3949	case SIOCARPIPLL:
3950	case SIOCADDMULTI:
3951	case SIOCDELMULTI:
3952	case SIOCGIFMTU:
3953	case SIOCSIFMTU:
3954	case SIOCGIFPHYS:
3955	case SIOCSIFPHYS:
3956	case SIOCSIFMEDIA:
3957	case SIOCGIFMEDIA32:
3958	case SIOCGIFMEDIA64:
3959	case SIOCSIFGENERIC:
3960	case SIOCGIFGENERIC:
3961	case SIOCRSLVMULTI:
3962	case SIOCSIFLLADDR:
3963	case SIOCGIFSTATUS:
3964	case SIOCSIFPHYADDR:
3965	case SIOCGIFPSRCADDR:
3966	case SIOCGIFPDSTADDR:
3967	case SIOCDIFPHYADDR:
3968	case SIOCGIFDEVMTU:
3969	case SIOCSIFALTMTU:
3970	case SIOCGIFALTMTU:
3971	case SIOCSIFBOND:
3972	case SIOCGIFBOND:
3973	case SIOCPROTOATTACH:
3974	case SIOCPROTODETACH:
3975	case SIOCSIFCAP:
3976	case SIOCGIFCAP:
3977	case SIOCIFCREATE:
3978	case SIOCIFDESTROY:
3979	case SIOCIFCREATE2:
3980	case SIOCSDRVSPEC32:
3981	case SIOCGDRVSPEC32:
3982	case SIOCSDRVSPEC64:
3983	case SIOCGDRVSPEC64:
3984	case SIOCSIFVLAN:
3985	case SIOCGIFVLAN:
3986	case SIOCIFGCLONERS32:
3987	case SIOCIFGCLONERS64:
3988	case SIOCGIFASYNCMAP:
3989	case SIOCSIFASYNCMAP:
3990#if CONFIG_MACF_NET
3991	case SIOCGIFMAC:
3992	case SIOCSIFMAC:
3993#endif /* CONFIG_MACF_NET */
3994	case SIOCSIFKPI:
3995	case SIOCGIFKPI:
3996	case SIOCGIFWAKEFLAGS:
3997	case SIOCGIFGETRTREFCNT:
3998	case SIOCGIFLINKQUALITYMETRIC:
3999	case SIOCSIFOPPORTUNISTIC:
4000	case SIOCGIFOPPORTUNISTIC:
4001	case SIOCSETROUTERMODE:
4002	case SIOCGIFEFLAGS:
4003	case SIOCSIFDESC:
4004	case SIOCGIFDESC:
4005	case SIOCSIFLINKPARAMS:
4006	case SIOCGIFLINKPARAMS:
4007	case SIOCGIFQUEUESTATS:
4008	case SIOCSIFTHROTTLE:
4009	case SIOCGIFTHROTTLE:
4010	case SIOCSIFLOG:
4011	case SIOCGIFLOG:
4012	case SIOCGIFDELEGATE:
4013	case SIOCGIFLLADDR:
4014	case SIOCGIFTYPE:
4015		;
4016	}
4017}
4018