if.c revision 352649
1/*-
2 * Copyright (c) 1980, 1986, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 4. Neither the name of the University nor the names of its contributors
14 *    may be used to endorse or promote products derived from this software
15 *    without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 *	@(#)if.c	8.5 (Berkeley) 1/9/95
30 * $FreeBSD: stable/11/sys/net/if.c 352649 2019-09-24 06:36:25Z kib $
31 */
32
33#include "opt_compat.h"
34#include "opt_inet6.h"
35#include "opt_inet.h"
36
37#include <sys/param.h>
38#include <sys/types.h>
39#include <sys/conf.h>
40#include <sys/malloc.h>
41#include <sys/sbuf.h>
42#include <sys/bus.h>
43#include <sys/mbuf.h>
44#include <sys/systm.h>
45#include <sys/priv.h>
46#include <sys/proc.h>
47#include <sys/socket.h>
48#include <sys/socketvar.h>
49#include <sys/protosw.h>
50#include <sys/kernel.h>
51#include <sys/lock.h>
52#include <sys/refcount.h>
53#include <sys/module.h>
54#include <sys/rwlock.h>
55#include <sys/sockio.h>
56#include <sys/syslog.h>
57#include <sys/sysctl.h>
58#include <sys/sysent.h>
59#include <sys/taskqueue.h>
60#include <sys/domain.h>
61#include <sys/jail.h>
62#include <sys/priv.h>
63
64#include <machine/stdarg.h>
65#include <vm/uma.h>
66
67#include <net/bpf.h>
68#include <net/ethernet.h>
69#include <net/if.h>
70#include <net/if_arp.h>
71#include <net/if_clone.h>
72#include <net/if_dl.h>
73#include <net/if_types.h>
74#include <net/if_var.h>
75#include <net/if_media.h>
76#include <net/if_vlan_var.h>
77#include <net/radix.h>
78#include <net/route.h>
79#include <net/vnet.h>
80
81#if defined(INET) || defined(INET6)
82#include <net/ethernet.h>
83#include <netinet/in.h>
84#include <netinet/in_var.h>
85#include <netinet/ip.h>
86#include <netinet/ip_carp.h>
87#ifdef INET
88#include <netinet/if_ether.h>
89#endif /* INET */
90#ifdef INET6
91#include <netinet6/in6_var.h>
92#include <netinet6/in6_ifattach.h>
93#endif /* INET6 */
94#endif /* INET || INET6 */
95
96#include <security/mac/mac_framework.h>
97
98#ifdef COMPAT_FREEBSD32
99#include <sys/mount.h>
100#include <compat/freebsd32/freebsd32.h>
101
102struct ifreq_buffer32 {
103	uint32_t	length;		/* (size_t) */
104	uint32_t	buffer;		/* (void *) */
105};
106
107/*
108 * Interface request structure used for socket
109 * ioctl's.  All interface ioctl's must have parameter
110 * definitions which begin with ifr_name.  The
111 * remainder may be interface specific.
112 */
113struct ifreq32 {
114	char	ifr_name[IFNAMSIZ];		/* if name, e.g. "en0" */
115	union {
116		struct sockaddr	ifru_addr;
117		struct sockaddr	ifru_dstaddr;
118		struct sockaddr	ifru_broadaddr;
119		struct ifreq_buffer32 ifru_buffer;
120		short		ifru_flags[2];
121		short		ifru_index;
122		int		ifru_jid;
123		int		ifru_metric;
124		int		ifru_mtu;
125		int		ifru_phys;
126		int		ifru_media;
127		uint32_t	ifru_data;
128		int		ifru_cap[2];
129		u_int		ifru_fib;
130		u_char		ifru_vlan_pcp;
131	} ifr_ifru;
132};
133CTASSERT(sizeof(struct ifreq) == sizeof(struct ifreq32));
134CTASSERT(__offsetof(struct ifreq, ifr_ifru) ==
135    __offsetof(struct ifreq32, ifr_ifru));
136
137struct ifgroupreq32 {
138	char	ifgr_name[IFNAMSIZ];
139	u_int	ifgr_len;
140	union {
141		char		ifgru_group[IFNAMSIZ];
142		uint32_t	ifgru_groups;
143	} ifgr_ifgru;
144};
145
146struct ifmediareq32 {
147	char		ifm_name[IFNAMSIZ];
148	int		ifm_current;
149	int		ifm_mask;
150	int		ifm_status;
151	int		ifm_active;
152	int		ifm_count;
153	uint32_t	ifm_ulist;	/* (int *) */
154};
155#define	SIOCGIFMEDIA32	_IOC_NEWTYPE(SIOCGIFMEDIA, struct ifmediareq32)
156#define	SIOCGIFXMEDIA32	_IOC_NEWTYPE(SIOCGIFXMEDIA, struct ifmediareq32)
157
158#define	_CASE_IOC_IFGROUPREQ_32(cmd)				\
159    case _IOC_NEWTYPE((cmd), struct ifgroupreq32):
160#else /* !COMPAT_FREEBSD32 */
161#define _CASE_IOC_IFGROUPREQ_32(cmd)
162#endif /* !COMPAT_FREEBSD32 */
163
164#define CASE_IOC_IFGROUPREQ(cmd)	\
165    _CASE_IOC_IFGROUPREQ_32(cmd)	\
166    case (cmd)
167
168union ifreq_union {
169	struct ifreq	ifr;
170#ifdef COMPAT_FREEBSD32
171	struct ifreq32	ifr32;
172#endif
173};
174
175union ifgroupreq_union {
176	struct ifgroupreq ifgr;
177#ifdef COMPAT_FREEBSD32
178	struct ifgroupreq32 ifgr32;
179#endif
180};
181
182SYSCTL_NODE(_net, PF_LINK, link, CTLFLAG_RW, 0, "Link layers");
183SYSCTL_NODE(_net_link, 0, generic, CTLFLAG_RW, 0, "Generic link-management");
184
185SYSCTL_INT(_net_link, OID_AUTO, ifqmaxlen, CTLFLAG_RDTUN,
186    &ifqmaxlen, 0, "max send queue size");
187
188/* Log link state change events */
189static int log_link_state_change = 1;
190
191SYSCTL_INT(_net_link, OID_AUTO, log_link_state_change, CTLFLAG_RW,
192	&log_link_state_change, 0,
193	"log interface link state change events");
194
195/* Log promiscuous mode change events */
196static int log_promisc_mode_change = 1;
197
198SYSCTL_INT(_net_link, OID_AUTO, log_promisc_mode_change, CTLFLAG_RDTUN,
199	&log_promisc_mode_change, 1,
200	"log promiscuous mode change events");
201
202/* Interface description */
203static unsigned int ifdescr_maxlen = 1024;
204SYSCTL_UINT(_net, OID_AUTO, ifdescr_maxlen, CTLFLAG_RW,
205	&ifdescr_maxlen, 0,
206	"administrative maximum length for interface description");
207
208static MALLOC_DEFINE(M_IFDESCR, "ifdescr", "ifnet descriptions");
209
210/* global sx for non-critical path ifdescr */
211static struct sx ifdescr_sx;
212SX_SYSINIT(ifdescr_sx, &ifdescr_sx, "ifnet descr");
213
214void	(*bridge_linkstate_p)(struct ifnet *ifp);
215void	(*ng_ether_link_state_p)(struct ifnet *ifp, int state);
216void	(*lagg_linkstate_p)(struct ifnet *ifp, int state);
217/* These are external hooks for CARP. */
218void	(*carp_linkstate_p)(struct ifnet *ifp);
219void	(*carp_demote_adj_p)(int, char *);
220int	(*carp_master_p)(struct ifaddr *);
221#if defined(INET) || defined(INET6)
222int	(*carp_forus_p)(struct ifnet *ifp, u_char *dhost);
223int	(*carp_output_p)(struct ifnet *ifp, struct mbuf *m,
224    const struct sockaddr *sa);
225int	(*carp_ioctl_p)(struct ifreq *, u_long, struct thread *);
226int	(*carp_attach_p)(struct ifaddr *, int);
227void	(*carp_detach_p)(struct ifaddr *, bool);
228#endif
229#ifdef INET
230int	(*carp_iamatch_p)(struct ifaddr *, uint8_t **);
231#endif
232#ifdef INET6
233struct ifaddr *(*carp_iamatch6_p)(struct ifnet *ifp, struct in6_addr *taddr6);
234caddr_t	(*carp_macmatch6_p)(struct ifnet *ifp, struct mbuf *m,
235    const struct in6_addr *taddr);
236#endif
237
238struct mbuf *(*tbr_dequeue_ptr)(struct ifaltq *, int) = NULL;
239
240/*
241 * XXX: Style; these should be sorted alphabetically, and unprototyped
242 * static functions should be prototyped. Currently they are sorted by
243 * declaration order.
244 */
245static void	if_attachdomain(void *);
246static void	if_attachdomain1(struct ifnet *);
247static int	ifconf(u_long, caddr_t);
248static void	if_freemulti(struct ifmultiaddr *);
249static void	if_grow(void);
250static void	if_input_default(struct ifnet *, struct mbuf *);
251static int	if_requestencap_default(struct ifnet *, struct if_encap_req *);
252static void	if_route(struct ifnet *, int flag, int fam);
253static int	if_setflag(struct ifnet *, int, int, int *, int);
254static int	if_transmit(struct ifnet *ifp, struct mbuf *m);
255static void	if_unroute(struct ifnet *, int flag, int fam);
256static void	link_rtrequest(int, struct rtentry *, struct rt_addrinfo *);
257static int	if_delmulti_locked(struct ifnet *, struct ifmultiaddr *, int);
258static void	do_link_state_change(void *, int);
259static int	if_getgroup(struct ifgroupreq *, struct ifnet *);
260static int	if_getgroupmembers(struct ifgroupreq *);
261static void	if_delgroups(struct ifnet *);
262static void	if_attach_internal(struct ifnet *, int, struct if_clone *);
263static int	if_detach_internal(struct ifnet *, int, struct if_clone **);
264#ifdef VIMAGE
265static void	if_vmove(struct ifnet *, struct vnet *);
266#endif
267
268#ifdef INET6
269/*
270 * XXX: declare here to avoid to include many inet6 related files..
271 * should be more generalized?
272 */
273extern void	nd6_setmtu(struct ifnet *);
274#endif
275
276/* ipsec helper hooks */
277VNET_DEFINE(struct hhook_head *, ipsec_hhh_in[HHOOK_IPSEC_COUNT]);
278VNET_DEFINE(struct hhook_head *, ipsec_hhh_out[HHOOK_IPSEC_COUNT]);
279
280VNET_DEFINE(int, if_index);
281int	ifqmaxlen = IFQ_MAXLEN;
282VNET_DEFINE(struct ifnethead, ifnet);	/* depend on static init XXX */
283VNET_DEFINE(struct ifgrouphead, ifg_head);
284
285static VNET_DEFINE(int, if_indexlim) = 8;
286
287/* Table of ifnet by index. */
288VNET_DEFINE(struct ifnet **, ifindex_table);
289
290#define	V_if_indexlim		VNET(if_indexlim)
291#define	V_ifindex_table		VNET(ifindex_table)
292
293/*
294 * The global network interface list (V_ifnet) and related state (such as
295 * if_index, if_indexlim, and ifindex_table) are protected by an sxlock and
296 * an rwlock.  Either may be acquired shared to stablize the list, but both
297 * must be acquired writable to modify the list.  This model allows us to
298 * both stablize the interface list during interrupt thread processing, but
299 * also to stablize it over long-running ioctls, without introducing priority
300 * inversions and deadlocks.
301 */
302struct rwlock ifnet_rwlock;
303RW_SYSINIT_FLAGS(ifnet_rw, &ifnet_rwlock, "ifnet_rw", RW_RECURSE);
304struct sx ifnet_sxlock;
305SX_SYSINIT_FLAGS(ifnet_sx, &ifnet_sxlock, "ifnet_sx", SX_RECURSE);
306
307/*
308 * The allocation of network interfaces is a rather non-atomic affair; we
309 * need to select an index before we are ready to expose the interface for
310 * use, so will use this pointer value to indicate reservation.
311 */
312#define	IFNET_HOLD	(void *)(uintptr_t)(-1)
313
314static	if_com_alloc_t *if_com_alloc[256];
315static	if_com_free_t *if_com_free[256];
316
317static MALLOC_DEFINE(M_IFNET, "ifnet", "interface internals");
318MALLOC_DEFINE(M_IFADDR, "ifaddr", "interface address");
319MALLOC_DEFINE(M_IFMADDR, "ether_multi", "link-level multicast address");
320
321struct ifnet *
322ifnet_byindex_locked(u_short idx)
323{
324
325	if (idx > V_if_index)
326		return (NULL);
327	if (V_ifindex_table[idx] == IFNET_HOLD)
328		return (NULL);
329	return (V_ifindex_table[idx]);
330}
331
332struct ifnet *
333ifnet_byindex(u_short idx)
334{
335	struct ifnet *ifp;
336
337	IFNET_RLOCK_NOSLEEP();
338	ifp = ifnet_byindex_locked(idx);
339	IFNET_RUNLOCK_NOSLEEP();
340	return (ifp);
341}
342
343struct ifnet *
344ifnet_byindex_ref(u_short idx)
345{
346	struct ifnet *ifp;
347
348	IFNET_RLOCK_NOSLEEP();
349	ifp = ifnet_byindex_locked(idx);
350	if (ifp == NULL || (ifp->if_flags & IFF_DYING)) {
351		IFNET_RUNLOCK_NOSLEEP();
352		return (NULL);
353	}
354	if_ref(ifp);
355	IFNET_RUNLOCK_NOSLEEP();
356	return (ifp);
357}
358
359/*
360 * Allocate an ifindex array entry; return 0 on success or an error on
361 * failure.
362 */
363static u_short
364ifindex_alloc(void)
365{
366	u_short idx;
367
368	IFNET_WLOCK_ASSERT();
369retry:
370	/*
371	 * Try to find an empty slot below V_if_index.  If we fail, take the
372	 * next slot.
373	 */
374	for (idx = 1; idx <= V_if_index; idx++) {
375		if (V_ifindex_table[idx] == NULL)
376			break;
377	}
378
379	/* Catch if_index overflow. */
380	if (idx >= V_if_indexlim) {
381		if_grow();
382		goto retry;
383	}
384	if (idx > V_if_index)
385		V_if_index = idx;
386	return (idx);
387}
388
389static void
390ifindex_free_locked(u_short idx)
391{
392
393	IFNET_WLOCK_ASSERT();
394
395	V_ifindex_table[idx] = NULL;
396	while (V_if_index > 0 &&
397	    V_ifindex_table[V_if_index] == NULL)
398		V_if_index--;
399}
400
401static void
402ifindex_free(u_short idx)
403{
404
405	IFNET_WLOCK();
406	ifindex_free_locked(idx);
407	IFNET_WUNLOCK();
408}
409
410static void
411ifnet_setbyindex_locked(u_short idx, struct ifnet *ifp)
412{
413
414	IFNET_WLOCK_ASSERT();
415
416	V_ifindex_table[idx] = ifp;
417}
418
419static void
420ifnet_setbyindex(u_short idx, struct ifnet *ifp)
421{
422
423	IFNET_WLOCK();
424	ifnet_setbyindex_locked(idx, ifp);
425	IFNET_WUNLOCK();
426}
427
428struct ifaddr *
429ifaddr_byindex(u_short idx)
430{
431	struct ifnet *ifp;
432	struct ifaddr *ifa = NULL;
433
434	IFNET_RLOCK_NOSLEEP();
435	ifp = ifnet_byindex_locked(idx);
436	if (ifp != NULL && (ifa = ifp->if_addr) != NULL)
437		ifa_ref(ifa);
438	IFNET_RUNLOCK_NOSLEEP();
439	return (ifa);
440}
441
442/*
443 * Network interface utility routines.
444 *
445 * Routines with ifa_ifwith* names take sockaddr *'s as
446 * parameters.
447 */
448
449static void
450vnet_if_init(const void *unused __unused)
451{
452
453	TAILQ_INIT(&V_ifnet);
454	TAILQ_INIT(&V_ifg_head);
455	IFNET_WLOCK();
456	if_grow();				/* create initial table */
457	IFNET_WUNLOCK();
458	vnet_if_clone_init();
459}
460VNET_SYSINIT(vnet_if_init, SI_SUB_INIT_IF, SI_ORDER_SECOND, vnet_if_init,
461    NULL);
462
463#ifdef VIMAGE
464static void
465vnet_if_uninit(const void *unused __unused)
466{
467
468	VNET_ASSERT(TAILQ_EMPTY(&V_ifnet), ("%s:%d tailq &V_ifnet=%p "
469	    "not empty", __func__, __LINE__, &V_ifnet));
470	VNET_ASSERT(TAILQ_EMPTY(&V_ifg_head), ("%s:%d tailq &V_ifg_head=%p "
471	    "not empty", __func__, __LINE__, &V_ifg_head));
472
473	free((caddr_t)V_ifindex_table, M_IFNET);
474}
475VNET_SYSUNINIT(vnet_if_uninit, SI_SUB_INIT_IF, SI_ORDER_FIRST,
476    vnet_if_uninit, NULL);
477
478static void
479vnet_if_return(const void *unused __unused)
480{
481	struct ifnet *ifp, *nifp;
482
483	/* Return all inherited interfaces to their parent vnets. */
484	TAILQ_FOREACH_SAFE(ifp, &V_ifnet, if_link, nifp) {
485		if (ifp->if_home_vnet != ifp->if_vnet)
486			if_vmove(ifp, ifp->if_home_vnet);
487	}
488}
489VNET_SYSUNINIT(vnet_if_return, SI_SUB_VNET_DONE, SI_ORDER_ANY,
490    vnet_if_return, NULL);
491#endif
492
493static void
494if_grow(void)
495{
496	int oldlim;
497	u_int n;
498	struct ifnet **e;
499
500	IFNET_WLOCK_ASSERT();
501	oldlim = V_if_indexlim;
502	IFNET_WUNLOCK();
503	n = (oldlim << 1) * sizeof(*e);
504	e = malloc(n, M_IFNET, M_WAITOK | M_ZERO);
505	IFNET_WLOCK();
506	if (V_if_indexlim != oldlim) {
507		free(e, M_IFNET);
508		return;
509	}
510	if (V_ifindex_table != NULL) {
511		memcpy((caddr_t)e, (caddr_t)V_ifindex_table, n/2);
512		free((caddr_t)V_ifindex_table, M_IFNET);
513	}
514	V_if_indexlim <<= 1;
515	V_ifindex_table = e;
516}
517
518/*
519 * Allocate a struct ifnet and an index for an interface.  A layer 2
520 * common structure will also be allocated if an allocation routine is
521 * registered for the passed type.
522 */
523struct ifnet *
524if_alloc(u_char type)
525{
526	struct ifnet *ifp;
527	u_short idx;
528
529	ifp = malloc(sizeof(struct ifnet), M_IFNET, M_WAITOK|M_ZERO);
530	IFNET_WLOCK();
531	idx = ifindex_alloc();
532	ifnet_setbyindex_locked(idx, IFNET_HOLD);
533	IFNET_WUNLOCK();
534	ifp->if_index = idx;
535	ifp->if_type = type;
536	ifp->if_alloctype = type;
537#ifdef VIMAGE
538	ifp->if_vnet = curvnet;
539#endif
540	if (if_com_alloc[type] != NULL) {
541		ifp->if_l2com = if_com_alloc[type](type, ifp);
542		if (ifp->if_l2com == NULL) {
543			free(ifp, M_IFNET);
544			ifindex_free(idx);
545			return (NULL);
546		}
547	}
548
549	IF_ADDR_LOCK_INIT(ifp);
550	TASK_INIT(&ifp->if_linktask, 0, do_link_state_change, ifp);
551	ifp->if_afdata_initialized = 0;
552	IF_AFDATA_LOCK_INIT(ifp);
553	TAILQ_INIT(&ifp->if_addrhead);
554	TAILQ_INIT(&ifp->if_multiaddrs);
555	TAILQ_INIT(&ifp->if_groups);
556#ifdef MAC
557	mac_ifnet_init(ifp);
558#endif
559	ifq_init(&ifp->if_snd, ifp);
560
561	refcount_init(&ifp->if_refcount, 1);	/* Index reference. */
562	for (int i = 0; i < IFCOUNTERS; i++)
563		ifp->if_counters[i] = counter_u64_alloc(M_WAITOK);
564	ifp->if_get_counter = if_get_counter_default;
565	ifp->if_pcp = IFNET_PCP_NONE;
566	ifnet_setbyindex(ifp->if_index, ifp);
567	return (ifp);
568}
569
570/*
571 * Do the actual work of freeing a struct ifnet, and layer 2 common
572 * structure.  This call is made when the last reference to an
573 * interface is released.
574 */
575static void
576if_free_internal(struct ifnet *ifp)
577{
578
579	KASSERT((ifp->if_flags & IFF_DYING),
580	    ("if_free_internal: interface not dying"));
581
582	if (if_com_free[ifp->if_alloctype] != NULL)
583		if_com_free[ifp->if_alloctype](ifp->if_l2com,
584		    ifp->if_alloctype);
585
586#ifdef MAC
587	mac_ifnet_destroy(ifp);
588#endif /* MAC */
589	if (ifp->if_description != NULL)
590		free(ifp->if_description, M_IFDESCR);
591	IF_AFDATA_DESTROY(ifp);
592	IF_ADDR_LOCK_DESTROY(ifp);
593	ifq_delete(&ifp->if_snd);
594
595	for (int i = 0; i < IFCOUNTERS; i++)
596		counter_u64_free(ifp->if_counters[i]);
597
598	free(ifp, M_IFNET);
599}
600
601/*
602 * Deregister an interface and free the associated storage.
603 */
604void
605if_free(struct ifnet *ifp)
606{
607
608	ifp->if_flags |= IFF_DYING;			/* XXX: Locking */
609
610	CURVNET_SET_QUIET(ifp->if_vnet);
611	IFNET_WLOCK();
612	KASSERT(ifp == ifnet_byindex_locked(ifp->if_index),
613	    ("%s: freeing unallocated ifnet", ifp->if_xname));
614
615	ifindex_free_locked(ifp->if_index);
616	IFNET_WUNLOCK();
617
618	if (refcount_release(&ifp->if_refcount))
619		if_free_internal(ifp);
620	CURVNET_RESTORE();
621}
622
623/*
624 * Interfaces to keep an ifnet type-stable despite the possibility of the
625 * driver calling if_free().  If there are additional references, we defer
626 * freeing the underlying data structure.
627 */
628void
629if_ref(struct ifnet *ifp)
630{
631
632	/* We don't assert the ifnet list lock here, but arguably should. */
633	refcount_acquire(&ifp->if_refcount);
634}
635
636void
637if_rele(struct ifnet *ifp)
638{
639
640	if (!refcount_release(&ifp->if_refcount))
641		return;
642	if_free_internal(ifp);
643}
644
645void
646ifq_init(struct ifaltq *ifq, struct ifnet *ifp)
647{
648
649	mtx_init(&ifq->ifq_mtx, ifp->if_xname, "if send queue", MTX_DEF);
650
651	if (ifq->ifq_maxlen == 0)
652		ifq->ifq_maxlen = ifqmaxlen;
653
654	ifq->altq_type = 0;
655	ifq->altq_disc = NULL;
656	ifq->altq_flags &= ALTQF_CANTCHANGE;
657	ifq->altq_tbr  = NULL;
658	ifq->altq_ifp  = ifp;
659}
660
661void
662ifq_delete(struct ifaltq *ifq)
663{
664	mtx_destroy(&ifq->ifq_mtx);
665}
666
667/*
668 * Perform generic interface initialization tasks and attach the interface
669 * to the list of "active" interfaces.  If vmove flag is set on entry
670 * to if_attach_internal(), perform only a limited subset of initialization
671 * tasks, given that we are moving from one vnet to another an ifnet which
672 * has already been fully initialized.
673 *
674 * Note that if_detach_internal() removes group membership unconditionally
675 * even when vmove flag is set, and if_attach_internal() adds only IFG_ALL.
676 * Thus, when if_vmove() is applied to a cloned interface, group membership
677 * is lost while a cloned one always joins a group whose name is
678 * ifc->ifc_name.  To recover this after if_detach_internal() and
679 * if_attach_internal(), the cloner should be specified to
680 * if_attach_internal() via ifc.  If it is non-NULL, if_attach_internal()
681 * attempts to join a group whose name is ifc->ifc_name.
682 *
683 * XXX:
684 *  - The decision to return void and thus require this function to
685 *    succeed is questionable.
686 *  - We should probably do more sanity checking.  For instance we don't
687 *    do anything to insure if_xname is unique or non-empty.
688 */
689void
690if_attach(struct ifnet *ifp)
691{
692
693	if_attach_internal(ifp, 0, NULL);
694}
695
696/*
697 * Compute the least common TSO limit.
698 */
699void
700if_hw_tsomax_common(if_t ifp, struct ifnet_hw_tsomax *pmax)
701{
702	/*
703	 * 1) If there is no limit currently, take the limit from
704	 * the network adapter.
705	 *
706	 * 2) If the network adapter has a limit below the current
707	 * limit, apply it.
708	 */
709	if (pmax->tsomaxbytes == 0 || (ifp->if_hw_tsomax != 0 &&
710	    ifp->if_hw_tsomax < pmax->tsomaxbytes)) {
711		pmax->tsomaxbytes = ifp->if_hw_tsomax;
712	}
713	if (pmax->tsomaxsegcount == 0 || (ifp->if_hw_tsomaxsegcount != 0 &&
714	    ifp->if_hw_tsomaxsegcount < pmax->tsomaxsegcount)) {
715		pmax->tsomaxsegcount = ifp->if_hw_tsomaxsegcount;
716	}
717	if (pmax->tsomaxsegsize == 0 || (ifp->if_hw_tsomaxsegsize != 0 &&
718	    ifp->if_hw_tsomaxsegsize < pmax->tsomaxsegsize)) {
719		pmax->tsomaxsegsize = ifp->if_hw_tsomaxsegsize;
720	}
721}
722
723/*
724 * Update TSO limit of a network adapter.
725 *
726 * Returns zero if no change. Else non-zero.
727 */
728int
729if_hw_tsomax_update(if_t ifp, struct ifnet_hw_tsomax *pmax)
730{
731	int retval = 0;
732	if (ifp->if_hw_tsomax != pmax->tsomaxbytes) {
733		ifp->if_hw_tsomax = pmax->tsomaxbytes;
734		retval++;
735	}
736	if (ifp->if_hw_tsomaxsegsize != pmax->tsomaxsegsize) {
737		ifp->if_hw_tsomaxsegsize = pmax->tsomaxsegsize;
738		retval++;
739	}
740	if (ifp->if_hw_tsomaxsegcount != pmax->tsomaxsegcount) {
741		ifp->if_hw_tsomaxsegcount = pmax->tsomaxsegcount;
742		retval++;
743	}
744	return (retval);
745}
746
747static void
748if_attach_internal(struct ifnet *ifp, int vmove, struct if_clone *ifc)
749{
750	unsigned socksize, ifasize;
751	int namelen, masklen;
752	struct sockaddr_dl *sdl;
753	struct ifaddr *ifa;
754
755	if (ifp->if_index == 0 || ifp != ifnet_byindex(ifp->if_index))
756		panic ("%s: BUG: if_attach called without if_alloc'd input()\n",
757		    ifp->if_xname);
758
759#ifdef VIMAGE
760	ifp->if_vnet = curvnet;
761	if (ifp->if_home_vnet == NULL)
762		ifp->if_home_vnet = curvnet;
763#endif
764
765	if_addgroup(ifp, IFG_ALL);
766
767	/* Restore group membership for cloned interfaces. */
768	if (vmove && ifc != NULL)
769		if_clone_addgroup(ifp, ifc);
770
771	getmicrotime(&ifp->if_lastchange);
772	ifp->if_epoch = time_uptime;
773
774	KASSERT((ifp->if_transmit == NULL && ifp->if_qflush == NULL) ||
775	    (ifp->if_transmit != NULL && ifp->if_qflush != NULL),
776	    ("transmit and qflush must both either be set or both be NULL"));
777	if (ifp->if_transmit == NULL) {
778		ifp->if_transmit = if_transmit;
779		ifp->if_qflush = if_qflush;
780	}
781	if (ifp->if_input == NULL)
782		ifp->if_input = if_input_default;
783
784	if (ifp->if_requestencap == NULL)
785		ifp->if_requestencap = if_requestencap_default;
786
787	if (!vmove) {
788#ifdef MAC
789		mac_ifnet_create(ifp);
790#endif
791
792		/*
793		 * Create a Link Level name for this device.
794		 */
795		namelen = strlen(ifp->if_xname);
796		/*
797		 * Always save enough space for any possiable name so we
798		 * can do a rename in place later.
799		 */
800		masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + IFNAMSIZ;
801		socksize = masklen + ifp->if_addrlen;
802		if (socksize < sizeof(*sdl))
803			socksize = sizeof(*sdl);
804		socksize = roundup2(socksize, sizeof(long));
805		ifasize = sizeof(*ifa) + 2 * socksize;
806		ifa = ifa_alloc(ifasize, M_WAITOK);
807		sdl = (struct sockaddr_dl *)(ifa + 1);
808		sdl->sdl_len = socksize;
809		sdl->sdl_family = AF_LINK;
810		bcopy(ifp->if_xname, sdl->sdl_data, namelen);
811		sdl->sdl_nlen = namelen;
812		sdl->sdl_index = ifp->if_index;
813		sdl->sdl_type = ifp->if_type;
814		ifp->if_addr = ifa;
815		ifa->ifa_ifp = ifp;
816		ifa->ifa_rtrequest = link_rtrequest;
817		ifa->ifa_addr = (struct sockaddr *)sdl;
818		sdl = (struct sockaddr_dl *)(socksize + (caddr_t)sdl);
819		ifa->ifa_netmask = (struct sockaddr *)sdl;
820		sdl->sdl_len = masklen;
821		while (namelen != 0)
822			sdl->sdl_data[--namelen] = 0xff;
823		TAILQ_INSERT_HEAD(&ifp->if_addrhead, ifa, ifa_link);
824		/* Reliably crash if used uninitialized. */
825		ifp->if_broadcastaddr = NULL;
826
827		if (ifp->if_type == IFT_ETHER) {
828			ifp->if_hw_addr = malloc(ifp->if_addrlen, M_IFADDR,
829			    M_WAITOK | M_ZERO);
830		}
831
832#if defined(INET) || defined(INET6)
833		/* Use defaults for TSO, if nothing is set */
834		if (ifp->if_hw_tsomax == 0 &&
835		    ifp->if_hw_tsomaxsegcount == 0 &&
836		    ifp->if_hw_tsomaxsegsize == 0) {
837			/*
838			 * The TSO defaults needs to be such that an
839			 * NFS mbuf list of 35 mbufs totalling just
840			 * below 64K works and that a chain of mbufs
841			 * can be defragged into at most 32 segments:
842			 */
843			ifp->if_hw_tsomax = min(IP_MAXPACKET, (32 * MCLBYTES) -
844			    (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN));
845			ifp->if_hw_tsomaxsegcount = 35;
846			ifp->if_hw_tsomaxsegsize = 2048;	/* 2K */
847
848			/* XXX some drivers set IFCAP_TSO after ethernet attach */
849			if (ifp->if_capabilities & IFCAP_TSO) {
850				if_printf(ifp, "Using defaults for TSO: %u/%u/%u\n",
851				    ifp->if_hw_tsomax,
852				    ifp->if_hw_tsomaxsegcount,
853				    ifp->if_hw_tsomaxsegsize);
854			}
855		}
856#endif
857	}
858#ifdef VIMAGE
859	else {
860		/*
861		 * Update the interface index in the link layer address
862		 * of the interface.
863		 */
864		for (ifa = ifp->if_addr; ifa != NULL;
865		    ifa = TAILQ_NEXT(ifa, ifa_link)) {
866			if (ifa->ifa_addr->sa_family == AF_LINK) {
867				sdl = (struct sockaddr_dl *)ifa->ifa_addr;
868				sdl->sdl_index = ifp->if_index;
869			}
870		}
871	}
872#endif
873
874	IFNET_WLOCK();
875	TAILQ_INSERT_TAIL(&V_ifnet, ifp, if_link);
876#ifdef VIMAGE
877	curvnet->vnet_ifcnt++;
878#endif
879	IFNET_WUNLOCK();
880
881	if (domain_init_status >= 2)
882		if_attachdomain1(ifp);
883
884	EVENTHANDLER_INVOKE(ifnet_arrival_event, ifp);
885	if (IS_DEFAULT_VNET(curvnet))
886		devctl_notify("IFNET", ifp->if_xname, "ATTACH", NULL);
887
888	/* Announce the interface. */
889	rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
890}
891
892static void
893if_attachdomain(void *dummy)
894{
895	struct ifnet *ifp;
896
897	TAILQ_FOREACH(ifp, &V_ifnet, if_link)
898		if_attachdomain1(ifp);
899}
900SYSINIT(domainifattach, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_SECOND,
901    if_attachdomain, NULL);
902
903static void
904if_attachdomain1(struct ifnet *ifp)
905{
906	struct domain *dp;
907
908	/*
909	 * Since dp->dom_ifattach calls malloc() with M_WAITOK, we
910	 * cannot lock ifp->if_afdata initialization, entirely.
911	 */
912	IF_AFDATA_LOCK(ifp);
913	if (ifp->if_afdata_initialized >= domain_init_status) {
914		IF_AFDATA_UNLOCK(ifp);
915		log(LOG_WARNING, "%s called more than once on %s\n",
916		    __func__, ifp->if_xname);
917		return;
918	}
919	ifp->if_afdata_initialized = domain_init_status;
920	IF_AFDATA_UNLOCK(ifp);
921
922	/* address family dependent data region */
923	bzero(ifp->if_afdata, sizeof(ifp->if_afdata));
924	for (dp = domains; dp; dp = dp->dom_next) {
925		if (dp->dom_ifattach)
926			ifp->if_afdata[dp->dom_family] =
927			    (*dp->dom_ifattach)(ifp);
928	}
929}
930
931/*
932 * Remove any unicast or broadcast network addresses from an interface.
933 */
934void
935if_purgeaddrs(struct ifnet *ifp)
936{
937	struct ifaddr *ifa, *next;
938
939	/* XXX cannot hold IF_ADDR_WLOCK over called functions. */
940	TAILQ_FOREACH_SAFE(ifa, &ifp->if_addrhead, ifa_link, next) {
941		if (ifa->ifa_addr->sa_family == AF_LINK)
942			continue;
943#ifdef INET
944		/* XXX: Ugly!! ad hoc just for INET */
945		if (ifa->ifa_addr->sa_family == AF_INET) {
946			struct ifaliasreq ifr;
947
948			bzero(&ifr, sizeof(ifr));
949			ifr.ifra_addr = *ifa->ifa_addr;
950			if (ifa->ifa_dstaddr)
951				ifr.ifra_broadaddr = *ifa->ifa_dstaddr;
952			if (in_control(NULL, SIOCDIFADDR, (caddr_t)&ifr, ifp,
953			    NULL) == 0)
954				continue;
955		}
956#endif /* INET */
957#ifdef INET6
958		if (ifa->ifa_addr->sa_family == AF_INET6) {
959			in6_purgeaddr(ifa);
960			/* ifp_addrhead is already updated */
961			continue;
962		}
963#endif /* INET6 */
964		IF_ADDR_WLOCK(ifp);
965		TAILQ_REMOVE(&ifp->if_addrhead, ifa, ifa_link);
966		IF_ADDR_WUNLOCK(ifp);
967		ifa_free(ifa);
968	}
969}
970
971/*
972 * Remove any multicast network addresses from an interface when an ifnet
973 * is going away.
974 */
975static void
976if_purgemaddrs(struct ifnet *ifp)
977{
978	struct ifmultiaddr *ifma;
979	struct ifmultiaddr *next;
980
981	IF_ADDR_WLOCK(ifp);
982	TAILQ_FOREACH_SAFE(ifma, &ifp->if_multiaddrs, ifma_link, next)
983		if_delmulti_locked(ifp, ifma, 1);
984	IF_ADDR_WUNLOCK(ifp);
985}
986
987/*
988 * Detach an interface, removing it from the list of "active" interfaces.
989 * If vmove flag is set on entry to if_detach_internal(), perform only a
990 * limited subset of cleanup tasks, given that we are moving an ifnet from
991 * one vnet to another, where it must be fully operational.
992 *
993 * XXXRW: There are some significant questions about event ordering, and
994 * how to prevent things from starting to use the interface during detach.
995 */
996void
997if_detach(struct ifnet *ifp)
998{
999
1000	CURVNET_SET_QUIET(ifp->if_vnet);
1001	if_detach_internal(ifp, 0, NULL);
1002	CURVNET_RESTORE();
1003}
1004
1005/*
1006 * The vmove flag, if set, indicates that we are called from a callpath
1007 * that is moving an interface to a different vnet instance.
1008 *
1009 * The shutdown flag, if set, indicates that we are called in the
1010 * process of shutting down a vnet instance.  Currently only the
1011 * vnet_if_return SYSUNINIT function sets it.  Note: we can be called
1012 * on a vnet instance shutdown without this flag being set, e.g., when
1013 * the cloned interfaces are destoyed as first thing of teardown.
1014 */
1015static int
1016if_detach_internal(struct ifnet *ifp, int vmove, struct if_clone **ifcp)
1017{
1018	struct ifaddr *ifa;
1019	int i;
1020	struct domain *dp;
1021 	struct ifnet *iter;
1022 	int found = 0;
1023#ifdef VIMAGE
1024	int shutdown;
1025
1026	shutdown = (ifp->if_vnet->vnet_state > SI_SUB_VNET &&
1027		 ifp->if_vnet->vnet_state < SI_SUB_VNET_DONE) ? 1 : 0;
1028#endif
1029	IFNET_WLOCK();
1030	TAILQ_FOREACH(iter, &V_ifnet, if_link)
1031		if (iter == ifp) {
1032			TAILQ_REMOVE(&V_ifnet, ifp, if_link);
1033			if (!vmove)
1034				ifp->if_flags |= IFF_DYING;
1035			found = 1;
1036			break;
1037		}
1038	IFNET_WUNLOCK();
1039	if (!found) {
1040		/*
1041		 * While we would want to panic here, we cannot
1042		 * guarantee that the interface is indeed still on
1043		 * the list given we don't hold locks all the way.
1044		 */
1045		return (ENOENT);
1046#if 0
1047		if (vmove)
1048			panic("%s: ifp=%p not on the ifnet tailq %p",
1049			    __func__, ifp, &V_ifnet);
1050		else
1051			return; /* XXX this should panic as well? */
1052#endif
1053	}
1054
1055	/*
1056	 * At this point we know the interface still was on the ifnet list
1057	 * and we removed it so we are in a stable state.
1058	 */
1059#ifdef VIMAGE
1060	curvnet->vnet_ifcnt--;
1061#endif
1062
1063	/*
1064	 * In any case (destroy or vmove) detach us from the groups
1065	 * and remove/wait for pending events on the taskq.
1066	 * XXX-BZ in theory an interface could still enqueue a taskq change?
1067	 */
1068	if_delgroups(ifp);
1069
1070	taskqueue_drain(taskqueue_swi, &ifp->if_linktask);
1071
1072	/*
1073	 * Check if this is a cloned interface or not. Must do even if
1074	 * shutting down as a if_vmove_reclaim() would move the ifp and
1075	 * the if_clone_addgroup() will have a corrupted string overwise
1076	 * from a gibberish pointer.
1077	 */
1078	if (vmove && ifcp != NULL)
1079		*ifcp = if_clone_findifc(ifp);
1080
1081	if_down(ifp);
1082
1083#ifdef VIMAGE
1084	/*
1085	 * On VNET shutdown abort here as the stack teardown will do all
1086	 * the work top-down for us.
1087	 */
1088	if (shutdown) {
1089		/* Give interface users the chance to clean up. */
1090		EVENTHANDLER_INVOKE(ifnet_departure_event, ifp);
1091
1092		/*
1093		 * In case of a vmove we are done here without error.
1094		 * If we would signal an error it would lead to the same
1095		 * abort as if we did not find the ifnet anymore.
1096		 * if_detach() calls us in void context and does not care
1097		 * about an early abort notification, so life is splendid :)
1098		 */
1099		goto finish_vnet_shutdown;
1100	}
1101#endif
1102
1103	/*
1104	 * At this point we are not tearing down a VNET and are either
1105	 * going to destroy or vmove the interface and have to cleanup
1106	 * accordingly.
1107	 */
1108
1109	/*
1110	 * Remove routes and flush queues.
1111	 */
1112#ifdef ALTQ
1113	if (ALTQ_IS_ENABLED(&ifp->if_snd))
1114		altq_disable(&ifp->if_snd);
1115	if (ALTQ_IS_ATTACHED(&ifp->if_snd))
1116		altq_detach(&ifp->if_snd);
1117#endif
1118
1119	if_purgeaddrs(ifp);
1120
1121#ifdef INET
1122	in_ifdetach(ifp);
1123#endif
1124
1125#ifdef INET6
1126	/*
1127	 * Remove all IPv6 kernel structs related to ifp.  This should be done
1128	 * before removing routing entries below, since IPv6 interface direct
1129	 * routes are expected to be removed by the IPv6-specific kernel API.
1130	 * Otherwise, the kernel will detect some inconsistency and bark it.
1131	 */
1132	in6_ifdetach(ifp);
1133#endif
1134	if_purgemaddrs(ifp);
1135
1136	/* Announce that the interface is gone. */
1137	rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
1138	EVENTHANDLER_INVOKE(ifnet_departure_event, ifp);
1139	if (IS_DEFAULT_VNET(curvnet))
1140		devctl_notify("IFNET", ifp->if_xname, "DETACH", NULL);
1141
1142	if (!vmove) {
1143		/*
1144		 * Prevent further calls into the device driver via ifnet.
1145		 */
1146		if_dead(ifp);
1147
1148		/*
1149		 * Remove link ifaddr pointer and maybe decrement if_index.
1150		 * Clean up all addresses.
1151		 */
1152		free(ifp->if_hw_addr, M_IFADDR);
1153		ifp->if_hw_addr = NULL;
1154		ifp->if_addr = NULL;
1155
1156		/* We can now free link ifaddr. */
1157		IF_ADDR_WLOCK(ifp);
1158		if (!TAILQ_EMPTY(&ifp->if_addrhead)) {
1159			ifa = TAILQ_FIRST(&ifp->if_addrhead);
1160			TAILQ_REMOVE(&ifp->if_addrhead, ifa, ifa_link);
1161			IF_ADDR_WUNLOCK(ifp);
1162			ifa_free(ifa);
1163		} else
1164			IF_ADDR_WUNLOCK(ifp);
1165	}
1166
1167	rt_flushifroutes(ifp);
1168
1169#ifdef VIMAGE
1170finish_vnet_shutdown:
1171#endif
1172	/*
1173	 * We cannot hold the lock over dom_ifdetach calls as they might
1174	 * sleep, for example trying to drain a callout, thus open up the
1175	 * theoretical race with re-attaching.
1176	 */
1177	IF_AFDATA_LOCK(ifp);
1178	i = ifp->if_afdata_initialized;
1179	ifp->if_afdata_initialized = 0;
1180	IF_AFDATA_UNLOCK(ifp);
1181	for (dp = domains; i > 0 && dp; dp = dp->dom_next) {
1182		if (dp->dom_ifdetach && ifp->if_afdata[dp->dom_family]) {
1183			(*dp->dom_ifdetach)(ifp,
1184			    ifp->if_afdata[dp->dom_family]);
1185			ifp->if_afdata[dp->dom_family] = NULL;
1186		}
1187	}
1188
1189	return (0);
1190}
1191
1192#ifdef VIMAGE
1193/*
1194 * if_vmove() performs a limited version of if_detach() in current
1195 * vnet and if_attach()es the ifnet to the vnet specified as 2nd arg.
1196 * An attempt is made to shrink if_index in current vnet, find an
1197 * unused if_index in target vnet and calls if_grow() if necessary,
1198 * and finally find an unused if_xname for the target vnet.
1199 */
1200static void
1201if_vmove(struct ifnet *ifp, struct vnet *new_vnet)
1202{
1203	struct if_clone *ifc;
1204	u_int bif_dlt, bif_hdrlen;
1205	int rc;
1206
1207 	/*
1208	 * if_detach_internal() will call the eventhandler to notify
1209	 * interface departure.  That will detach if_bpf.  We need to
1210	 * safe the dlt and hdrlen so we can re-attach it later.
1211	 */
1212	bpf_get_bp_params(ifp->if_bpf, &bif_dlt, &bif_hdrlen);
1213
1214	/*
1215	 * Detach from current vnet, but preserve LLADDR info, do not
1216	 * mark as dead etc. so that the ifnet can be reattached later.
1217	 * If we cannot find it, we lost the race to someone else.
1218	 */
1219	rc = if_detach_internal(ifp, 1, &ifc);
1220	if (rc != 0)
1221		return;
1222
1223	/*
1224	 * Unlink the ifnet from ifindex_table[] in current vnet, and shrink
1225	 * the if_index for that vnet if possible.
1226	 *
1227	 * NOTE: IFNET_WLOCK/IFNET_WUNLOCK() are assumed to be unvirtualized,
1228	 * or we'd lock on one vnet and unlock on another.
1229	 */
1230	IFNET_WLOCK();
1231	ifindex_free_locked(ifp->if_index);
1232	IFNET_WUNLOCK();
1233
1234	/*
1235	 * Perform interface-specific reassignment tasks, if provided by
1236	 * the driver.
1237	 */
1238	if (ifp->if_reassign != NULL)
1239		ifp->if_reassign(ifp, new_vnet, NULL);
1240
1241	/*
1242	 * Switch to the context of the target vnet.
1243	 */
1244	CURVNET_SET_QUIET(new_vnet);
1245
1246	IFNET_WLOCK();
1247	ifp->if_index = ifindex_alloc();
1248	ifnet_setbyindex_locked(ifp->if_index, ifp);
1249	IFNET_WUNLOCK();
1250
1251	if_attach_internal(ifp, 1, ifc);
1252
1253	if (ifp->if_bpf == NULL)
1254		bpfattach(ifp, bif_dlt, bif_hdrlen);
1255
1256	CURVNET_RESTORE();
1257}
1258
1259/*
1260 * Move an ifnet to or from another child prison/vnet, specified by the jail id.
1261 */
1262static int
1263if_vmove_loan(struct thread *td, struct ifnet *ifp, char *ifname, int jid)
1264{
1265	struct prison *pr;
1266	struct ifnet *difp;
1267	int shutdown;
1268
1269	/* Try to find the prison within our visibility. */
1270	sx_slock(&allprison_lock);
1271	pr = prison_find_child(td->td_ucred->cr_prison, jid);
1272	sx_sunlock(&allprison_lock);
1273	if (pr == NULL)
1274		return (ENXIO);
1275	prison_hold_locked(pr);
1276	mtx_unlock(&pr->pr_mtx);
1277
1278	/* Do not try to move the iface from and to the same prison. */
1279	if (pr->pr_vnet == ifp->if_vnet) {
1280		prison_free(pr);
1281		return (EEXIST);
1282	}
1283
1284	/* Make sure the named iface does not exists in the dst. prison/vnet. */
1285	/* XXX Lock interfaces to avoid races. */
1286	CURVNET_SET_QUIET(pr->pr_vnet);
1287	difp = ifunit(ifname);
1288	if (difp != NULL) {
1289		CURVNET_RESTORE();
1290		prison_free(pr);
1291		return (EEXIST);
1292	}
1293
1294	/* Make sure the VNET is stable. */
1295	shutdown = (ifp->if_vnet->vnet_state > SI_SUB_VNET &&
1296		 ifp->if_vnet->vnet_state < SI_SUB_VNET_DONE) ? 1 : 0;
1297	if (shutdown) {
1298		CURVNET_RESTORE();
1299		prison_free(pr);
1300		return (EBUSY);
1301	}
1302	CURVNET_RESTORE();
1303
1304	/* Move the interface into the child jail/vnet. */
1305	if_vmove(ifp, pr->pr_vnet);
1306
1307	/* Report the new if_xname back to the userland. */
1308	sprintf(ifname, "%s", ifp->if_xname);
1309
1310	prison_free(pr);
1311	return (0);
1312}
1313
1314static int
1315if_vmove_reclaim(struct thread *td, char *ifname, int jid)
1316{
1317	struct prison *pr;
1318	struct vnet *vnet_dst;
1319	struct ifnet *ifp;
1320 	int shutdown;
1321
1322	/* Try to find the prison within our visibility. */
1323	sx_slock(&allprison_lock);
1324	pr = prison_find_child(td->td_ucred->cr_prison, jid);
1325	sx_sunlock(&allprison_lock);
1326	if (pr == NULL)
1327		return (ENXIO);
1328	prison_hold_locked(pr);
1329	mtx_unlock(&pr->pr_mtx);
1330
1331	/* Make sure the named iface exists in the source prison/vnet. */
1332	CURVNET_SET(pr->pr_vnet);
1333	ifp = ifunit(ifname);		/* XXX Lock to avoid races. */
1334	if (ifp == NULL) {
1335		CURVNET_RESTORE();
1336		prison_free(pr);
1337		return (ENXIO);
1338	}
1339
1340	/* Do not try to move the iface from and to the same prison. */
1341	vnet_dst = TD_TO_VNET(td);
1342	if (vnet_dst == ifp->if_vnet) {
1343		CURVNET_RESTORE();
1344		prison_free(pr);
1345		return (EEXIST);
1346	}
1347
1348	/* Make sure the VNET is stable. */
1349	shutdown = (ifp->if_vnet->vnet_state > SI_SUB_VNET &&
1350		 ifp->if_vnet->vnet_state < SI_SUB_VNET_DONE) ? 1 : 0;
1351	if (shutdown) {
1352		CURVNET_RESTORE();
1353		prison_free(pr);
1354		return (EBUSY);
1355	}
1356
1357	/* Get interface back from child jail/vnet. */
1358	if_vmove(ifp, vnet_dst);
1359	CURVNET_RESTORE();
1360
1361	/* Report the new if_xname back to the userland. */
1362	sprintf(ifname, "%s", ifp->if_xname);
1363
1364	prison_free(pr);
1365	return (0);
1366}
1367#endif /* VIMAGE */
1368
1369/*
1370 * Add a group to an interface
1371 */
1372int
1373if_addgroup(struct ifnet *ifp, const char *groupname)
1374{
1375	struct ifg_list		*ifgl;
1376	struct ifg_group	*ifg = NULL;
1377	struct ifg_member	*ifgm;
1378	int 			 new = 0;
1379
1380	if (groupname[0] && groupname[strlen(groupname) - 1] >= '0' &&
1381	    groupname[strlen(groupname) - 1] <= '9')
1382		return (EINVAL);
1383
1384	IFNET_WLOCK();
1385	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
1386		if (!strcmp(ifgl->ifgl_group->ifg_group, groupname)) {
1387			IFNET_WUNLOCK();
1388			return (EEXIST);
1389		}
1390
1391	if ((ifgl = (struct ifg_list *)malloc(sizeof(struct ifg_list), M_TEMP,
1392	    M_NOWAIT)) == NULL) {
1393	    	IFNET_WUNLOCK();
1394		return (ENOMEM);
1395	}
1396
1397	if ((ifgm = (struct ifg_member *)malloc(sizeof(struct ifg_member),
1398	    M_TEMP, M_NOWAIT)) == NULL) {
1399		free(ifgl, M_TEMP);
1400		IFNET_WUNLOCK();
1401		return (ENOMEM);
1402	}
1403
1404	TAILQ_FOREACH(ifg, &V_ifg_head, ifg_next)
1405		if (!strcmp(ifg->ifg_group, groupname))
1406			break;
1407
1408	if (ifg == NULL) {
1409		if ((ifg = (struct ifg_group *)malloc(sizeof(struct ifg_group),
1410		    M_TEMP, M_NOWAIT)) == NULL) {
1411			free(ifgl, M_TEMP);
1412			free(ifgm, M_TEMP);
1413			IFNET_WUNLOCK();
1414			return (ENOMEM);
1415		}
1416		strlcpy(ifg->ifg_group, groupname, sizeof(ifg->ifg_group));
1417		ifg->ifg_refcnt = 0;
1418		TAILQ_INIT(&ifg->ifg_members);
1419		TAILQ_INSERT_TAIL(&V_ifg_head, ifg, ifg_next);
1420		new = 1;
1421	}
1422
1423	ifg->ifg_refcnt++;
1424	ifgl->ifgl_group = ifg;
1425	ifgm->ifgm_ifp = ifp;
1426
1427	IF_ADDR_WLOCK(ifp);
1428	TAILQ_INSERT_TAIL(&ifg->ifg_members, ifgm, ifgm_next);
1429	TAILQ_INSERT_TAIL(&ifp->if_groups, ifgl, ifgl_next);
1430	IF_ADDR_WUNLOCK(ifp);
1431
1432	IFNET_WUNLOCK();
1433
1434	if (new)
1435		EVENTHANDLER_INVOKE(group_attach_event, ifg);
1436	EVENTHANDLER_INVOKE(group_change_event, groupname);
1437
1438	return (0);
1439}
1440
1441/*
1442 * Remove a group from an interface
1443 */
1444int
1445if_delgroup(struct ifnet *ifp, const char *groupname)
1446{
1447	struct ifg_list		*ifgl;
1448	struct ifg_member	*ifgm;
1449
1450	IFNET_WLOCK();
1451	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
1452		if (!strcmp(ifgl->ifgl_group->ifg_group, groupname))
1453			break;
1454	if (ifgl == NULL) {
1455		IFNET_WUNLOCK();
1456		return (ENOENT);
1457	}
1458
1459	IF_ADDR_WLOCK(ifp);
1460	TAILQ_REMOVE(&ifp->if_groups, ifgl, ifgl_next);
1461	IF_ADDR_WUNLOCK(ifp);
1462
1463	TAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next)
1464		if (ifgm->ifgm_ifp == ifp)
1465			break;
1466
1467	if (ifgm != NULL) {
1468		TAILQ_REMOVE(&ifgl->ifgl_group->ifg_members, ifgm, ifgm_next);
1469		free(ifgm, M_TEMP);
1470	}
1471
1472	if (--ifgl->ifgl_group->ifg_refcnt == 0) {
1473		TAILQ_REMOVE(&V_ifg_head, ifgl->ifgl_group, ifg_next);
1474		IFNET_WUNLOCK();
1475		EVENTHANDLER_INVOKE(group_detach_event, ifgl->ifgl_group);
1476		free(ifgl->ifgl_group, M_TEMP);
1477	} else
1478		IFNET_WUNLOCK();
1479
1480	free(ifgl, M_TEMP);
1481
1482	EVENTHANDLER_INVOKE(group_change_event, groupname);
1483
1484	return (0);
1485}
1486
1487/*
1488 * Remove an interface from all groups
1489 */
1490static void
1491if_delgroups(struct ifnet *ifp)
1492{
1493	struct ifg_list		*ifgl;
1494	struct ifg_member	*ifgm;
1495	char groupname[IFNAMSIZ];
1496
1497	IFNET_WLOCK();
1498	while (!TAILQ_EMPTY(&ifp->if_groups)) {
1499		ifgl = TAILQ_FIRST(&ifp->if_groups);
1500
1501		strlcpy(groupname, ifgl->ifgl_group->ifg_group, IFNAMSIZ);
1502
1503		IF_ADDR_WLOCK(ifp);
1504		TAILQ_REMOVE(&ifp->if_groups, ifgl, ifgl_next);
1505		IF_ADDR_WUNLOCK(ifp);
1506
1507		TAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next)
1508			if (ifgm->ifgm_ifp == ifp)
1509				break;
1510
1511		if (ifgm != NULL) {
1512			TAILQ_REMOVE(&ifgl->ifgl_group->ifg_members, ifgm,
1513			    ifgm_next);
1514			free(ifgm, M_TEMP);
1515		}
1516
1517		if (--ifgl->ifgl_group->ifg_refcnt == 0) {
1518			TAILQ_REMOVE(&V_ifg_head, ifgl->ifgl_group, ifg_next);
1519			IFNET_WUNLOCK();
1520			EVENTHANDLER_INVOKE(group_detach_event,
1521			    ifgl->ifgl_group);
1522			free(ifgl->ifgl_group, M_TEMP);
1523		} else
1524			IFNET_WUNLOCK();
1525
1526		free(ifgl, M_TEMP);
1527
1528		EVENTHANDLER_INVOKE(group_change_event, groupname);
1529
1530		IFNET_WLOCK();
1531	}
1532	IFNET_WUNLOCK();
1533}
1534
1535static char *
1536ifgr_group_get(void *ifgrp)
1537{
1538	union ifgroupreq_union *ifgrup;
1539
1540	ifgrup = ifgrp;
1541#ifdef COMPAT_FREEBSD32
1542	if (SV_CURPROC_FLAG(SV_ILP32))
1543		return (&ifgrup->ifgr32.ifgr_ifgru.ifgru_group[0]);
1544#endif
1545	return (&ifgrup->ifgr.ifgr_ifgru.ifgru_group[0]);
1546}
1547
1548static struct ifg_req *
1549ifgr_groups_get(void *ifgrp)
1550{
1551	union ifgroupreq_union *ifgrup;
1552
1553	ifgrup = ifgrp;
1554#ifdef COMPAT_FREEBSD32
1555	if (SV_CURPROC_FLAG(SV_ILP32))
1556		return ((struct ifg_req *)(uintptr_t)
1557		    ifgrup->ifgr32.ifgr_ifgru.ifgru_groups);
1558#endif
1559	return (ifgrup->ifgr.ifgr_ifgru.ifgru_groups);
1560}
1561
1562/*
1563 * Stores all groups from an interface in memory pointed to by ifgr.
1564 */
1565static int
1566if_getgroup(struct ifgroupreq *ifgr, struct ifnet *ifp)
1567{
1568	int			 len, error;
1569	struct ifg_list		*ifgl;
1570	struct ifg_req		 ifgrq, *ifgp;
1571
1572	if (ifgr->ifgr_len == 0) {
1573		IF_ADDR_RLOCK(ifp);
1574		TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
1575			ifgr->ifgr_len += sizeof(struct ifg_req);
1576		IF_ADDR_RUNLOCK(ifp);
1577		return (0);
1578	}
1579
1580	len = ifgr->ifgr_len;
1581	ifgp = ifgr_groups_get(ifgr);
1582	/* XXX: wire */
1583	IF_ADDR_RLOCK(ifp);
1584	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) {
1585		if (len < sizeof(ifgrq)) {
1586			IF_ADDR_RUNLOCK(ifp);
1587			return (EINVAL);
1588		}
1589		bzero(&ifgrq, sizeof ifgrq);
1590		strlcpy(ifgrq.ifgrq_group, ifgl->ifgl_group->ifg_group,
1591		    sizeof(ifgrq.ifgrq_group));
1592		if ((error = copyout(&ifgrq, ifgp, sizeof(struct ifg_req)))) {
1593		    	IF_ADDR_RUNLOCK(ifp);
1594			return (error);
1595		}
1596		len -= sizeof(ifgrq);
1597		ifgp++;
1598	}
1599	IF_ADDR_RUNLOCK(ifp);
1600
1601	return (0);
1602}
1603
1604/*
1605 * Stores all members of a group in memory pointed to by igfr
1606 */
1607static int
1608if_getgroupmembers(struct ifgroupreq *ifgr)
1609{
1610	struct ifg_group	*ifg;
1611	struct ifg_member	*ifgm;
1612	struct ifg_req		 ifgrq, *ifgp;
1613	int			 len, error;
1614
1615	IFNET_RLOCK();
1616	TAILQ_FOREACH(ifg, &V_ifg_head, ifg_next)
1617		if (!strcmp(ifg->ifg_group, ifgr->ifgr_name))
1618			break;
1619	if (ifg == NULL) {
1620		IFNET_RUNLOCK();
1621		return (ENOENT);
1622	}
1623
1624	if (ifgr->ifgr_len == 0) {
1625		TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next)
1626			ifgr->ifgr_len += sizeof(ifgrq);
1627		IFNET_RUNLOCK();
1628		return (0);
1629	}
1630
1631	len = ifgr->ifgr_len;
1632	ifgp = ifgr_groups_get(ifgr);
1633	TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) {
1634		if (len < sizeof(ifgrq)) {
1635			IFNET_RUNLOCK();
1636			return (EINVAL);
1637		}
1638		bzero(&ifgrq, sizeof ifgrq);
1639		strlcpy(ifgrq.ifgrq_member, ifgm->ifgm_ifp->if_xname,
1640		    sizeof(ifgrq.ifgrq_member));
1641		if ((error = copyout(&ifgrq, ifgp, sizeof(struct ifg_req)))) {
1642			IFNET_RUNLOCK();
1643			return (error);
1644		}
1645		len -= sizeof(ifgrq);
1646		ifgp++;
1647	}
1648	IFNET_RUNLOCK();
1649
1650	return (0);
1651}
1652
1653/*
1654 * Return counter values from counter(9)s stored in ifnet.
1655 */
1656uint64_t
1657if_get_counter_default(struct ifnet *ifp, ift_counter cnt)
1658{
1659
1660	KASSERT(cnt < IFCOUNTERS, ("%s: invalid cnt %d", __func__, cnt));
1661
1662	return (counter_u64_fetch(ifp->if_counters[cnt]));
1663}
1664
1665/*
1666 * Increase an ifnet counter. Usually used for counters shared
1667 * between the stack and a driver, but function supports them all.
1668 */
1669void
1670if_inc_counter(struct ifnet *ifp, ift_counter cnt, int64_t inc)
1671{
1672
1673	KASSERT(cnt < IFCOUNTERS, ("%s: invalid cnt %d", __func__, cnt));
1674
1675	counter_u64_add(ifp->if_counters[cnt], inc);
1676}
1677
1678/*
1679 * Copy data from ifnet to userland API structure if_data.
1680 */
1681void
1682if_data_copy(struct ifnet *ifp, struct if_data *ifd)
1683{
1684
1685	ifd->ifi_type = ifp->if_type;
1686	ifd->ifi_physical = 0;
1687	ifd->ifi_addrlen = ifp->if_addrlen;
1688	ifd->ifi_hdrlen = ifp->if_hdrlen;
1689	ifd->ifi_link_state = ifp->if_link_state;
1690	ifd->ifi_vhid = 0;
1691	ifd->ifi_datalen = sizeof(struct if_data);
1692	ifd->ifi_mtu = ifp->if_mtu;
1693	ifd->ifi_metric = ifp->if_metric;
1694	ifd->ifi_baudrate = ifp->if_baudrate;
1695	ifd->ifi_hwassist = ifp->if_hwassist;
1696	ifd->ifi_epoch = ifp->if_epoch;
1697	ifd->ifi_lastchange = ifp->if_lastchange;
1698
1699	ifd->ifi_ipackets = ifp->if_get_counter(ifp, IFCOUNTER_IPACKETS);
1700	ifd->ifi_ierrors = ifp->if_get_counter(ifp, IFCOUNTER_IERRORS);
1701	ifd->ifi_opackets = ifp->if_get_counter(ifp, IFCOUNTER_OPACKETS);
1702	ifd->ifi_oerrors = ifp->if_get_counter(ifp, IFCOUNTER_OERRORS);
1703	ifd->ifi_collisions = ifp->if_get_counter(ifp, IFCOUNTER_COLLISIONS);
1704	ifd->ifi_ibytes = ifp->if_get_counter(ifp, IFCOUNTER_IBYTES);
1705	ifd->ifi_obytes = ifp->if_get_counter(ifp, IFCOUNTER_OBYTES);
1706	ifd->ifi_imcasts = ifp->if_get_counter(ifp, IFCOUNTER_IMCASTS);
1707	ifd->ifi_omcasts = ifp->if_get_counter(ifp, IFCOUNTER_OMCASTS);
1708	ifd->ifi_iqdrops = ifp->if_get_counter(ifp, IFCOUNTER_IQDROPS);
1709	ifd->ifi_oqdrops = ifp->if_get_counter(ifp, IFCOUNTER_OQDROPS);
1710	ifd->ifi_noproto = ifp->if_get_counter(ifp, IFCOUNTER_NOPROTO);
1711}
1712
1713/*
1714 * Wrapper functions for struct ifnet address list locking macros.  These are
1715 * used by kernel modules to avoid encoding programming interface or binary
1716 * interface assumptions that may be violated when kernel-internal locking
1717 * approaches change.
1718 */
1719void
1720if_addr_rlock(struct ifnet *ifp)
1721{
1722
1723	IF_ADDR_RLOCK(ifp);
1724}
1725
1726void
1727if_addr_runlock(struct ifnet *ifp)
1728{
1729
1730	IF_ADDR_RUNLOCK(ifp);
1731}
1732
1733void
1734if_maddr_rlock(if_t ifp)
1735{
1736
1737	IF_ADDR_RLOCK((struct ifnet *)ifp);
1738}
1739
1740void
1741if_maddr_runlock(if_t ifp)
1742{
1743
1744	IF_ADDR_RUNLOCK((struct ifnet *)ifp);
1745}
1746
1747/*
1748 * Initialization, destruction and refcounting functions for ifaddrs.
1749 */
1750struct ifaddr *
1751ifa_alloc(size_t size, int flags)
1752{
1753	struct ifaddr *ifa;
1754
1755	KASSERT(size >= sizeof(struct ifaddr),
1756	    ("%s: invalid size %zu", __func__, size));
1757
1758	ifa = malloc(size, M_IFADDR, M_ZERO | flags);
1759	if (ifa == NULL)
1760		return (NULL);
1761
1762	if ((ifa->ifa_opackets = counter_u64_alloc(flags)) == NULL)
1763		goto fail;
1764	if ((ifa->ifa_ipackets = counter_u64_alloc(flags)) == NULL)
1765		goto fail;
1766	if ((ifa->ifa_obytes = counter_u64_alloc(flags)) == NULL)
1767		goto fail;
1768	if ((ifa->ifa_ibytes = counter_u64_alloc(flags)) == NULL)
1769		goto fail;
1770
1771	refcount_init(&ifa->ifa_refcnt, 1);
1772
1773	return (ifa);
1774
1775fail:
1776	/* free(NULL) is okay */
1777	counter_u64_free(ifa->ifa_opackets);
1778	counter_u64_free(ifa->ifa_ipackets);
1779	counter_u64_free(ifa->ifa_obytes);
1780	counter_u64_free(ifa->ifa_ibytes);
1781	free(ifa, M_IFADDR);
1782
1783	return (NULL);
1784}
1785
1786void
1787ifa_ref(struct ifaddr *ifa)
1788{
1789
1790	refcount_acquire(&ifa->ifa_refcnt);
1791}
1792
1793void
1794ifa_free(struct ifaddr *ifa)
1795{
1796
1797	if (refcount_release(&ifa->ifa_refcnt)) {
1798		counter_u64_free(ifa->ifa_opackets);
1799		counter_u64_free(ifa->ifa_ipackets);
1800		counter_u64_free(ifa->ifa_obytes);
1801		counter_u64_free(ifa->ifa_ibytes);
1802		free(ifa, M_IFADDR);
1803	}
1804}
1805
1806static int
1807ifa_maintain_loopback_route(int cmd, const char *otype, struct ifaddr *ifa,
1808    struct sockaddr *ia)
1809{
1810	int error;
1811	struct rt_addrinfo info;
1812	struct sockaddr_dl null_sdl;
1813	struct ifnet *ifp;
1814
1815	ifp = ifa->ifa_ifp;
1816
1817	bzero(&info, sizeof(info));
1818	if (cmd != RTM_DELETE)
1819		info.rti_ifp = V_loif;
1820	info.rti_flags = ifa->ifa_flags | RTF_HOST | RTF_STATIC | RTF_PINNED;
1821	info.rti_info[RTAX_DST] = ia;
1822	info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&null_sdl;
1823	link_init_sdl(ifp, (struct sockaddr *)&null_sdl, ifp->if_type);
1824
1825	error = rtrequest1_fib(cmd, &info, NULL, ifp->if_fib);
1826
1827	if (error != 0)
1828		log(LOG_DEBUG, "%s: %s failed for interface %s: %u\n",
1829		    __func__, otype, if_name(ifp), error);
1830
1831	return (error);
1832}
1833
1834int
1835ifa_add_loopback_route(struct ifaddr *ifa, struct sockaddr *ia)
1836{
1837
1838	return (ifa_maintain_loopback_route(RTM_ADD, "insertion", ifa, ia));
1839}
1840
1841int
1842ifa_del_loopback_route(struct ifaddr *ifa, struct sockaddr *ia)
1843{
1844
1845	return (ifa_maintain_loopback_route(RTM_DELETE, "deletion", ifa, ia));
1846}
1847
1848int
1849ifa_switch_loopback_route(struct ifaddr *ifa, struct sockaddr *ia)
1850{
1851
1852	return (ifa_maintain_loopback_route(RTM_CHANGE, "switch", ifa, ia));
1853}
1854
1855/*
1856 * XXX: Because sockaddr_dl has deeper structure than the sockaddr
1857 * structs used to represent other address families, it is necessary
1858 * to perform a different comparison.
1859 */
1860
1861#define	sa_dl_equal(a1, a2)	\
1862	((((const struct sockaddr_dl *)(a1))->sdl_len ==		\
1863	 ((const struct sockaddr_dl *)(a2))->sdl_len) &&		\
1864	 (bcmp(CLLADDR((const struct sockaddr_dl *)(a1)),		\
1865	       CLLADDR((const struct sockaddr_dl *)(a2)),		\
1866	       ((const struct sockaddr_dl *)(a1))->sdl_alen) == 0))
1867
1868/*
1869 * Locate an interface based on a complete address.
1870 */
1871/*ARGSUSED*/
1872static struct ifaddr *
1873ifa_ifwithaddr_internal(const struct sockaddr *addr, int getref)
1874{
1875	struct ifnet *ifp;
1876	struct ifaddr *ifa;
1877
1878	IFNET_RLOCK_NOSLEEP();
1879	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
1880		IF_ADDR_RLOCK(ifp);
1881		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1882			if (ifa->ifa_addr->sa_family != addr->sa_family)
1883				continue;
1884			if (sa_equal(addr, ifa->ifa_addr)) {
1885				if (getref)
1886					ifa_ref(ifa);
1887				IF_ADDR_RUNLOCK(ifp);
1888				goto done;
1889			}
1890			/* IP6 doesn't have broadcast */
1891			if ((ifp->if_flags & IFF_BROADCAST) &&
1892			    ifa->ifa_broadaddr &&
1893			    ifa->ifa_broadaddr->sa_len != 0 &&
1894			    sa_equal(ifa->ifa_broadaddr, addr)) {
1895				if (getref)
1896					ifa_ref(ifa);
1897				IF_ADDR_RUNLOCK(ifp);
1898				goto done;
1899			}
1900		}
1901		IF_ADDR_RUNLOCK(ifp);
1902	}
1903	ifa = NULL;
1904done:
1905	IFNET_RUNLOCK_NOSLEEP();
1906	return (ifa);
1907}
1908
1909struct ifaddr *
1910ifa_ifwithaddr(const struct sockaddr *addr)
1911{
1912
1913	return (ifa_ifwithaddr_internal(addr, 1));
1914}
1915
1916int
1917ifa_ifwithaddr_check(const struct sockaddr *addr)
1918{
1919
1920	return (ifa_ifwithaddr_internal(addr, 0) != NULL);
1921}
1922
1923/*
1924 * Locate an interface based on the broadcast address.
1925 */
1926/* ARGSUSED */
1927struct ifaddr *
1928ifa_ifwithbroadaddr(const struct sockaddr *addr, int fibnum)
1929{
1930	struct ifnet *ifp;
1931	struct ifaddr *ifa;
1932
1933	IFNET_RLOCK_NOSLEEP();
1934	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
1935		if ((fibnum != RT_ALL_FIBS) && (ifp->if_fib != fibnum))
1936			continue;
1937		IF_ADDR_RLOCK(ifp);
1938		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1939			if (ifa->ifa_addr->sa_family != addr->sa_family)
1940				continue;
1941			if ((ifp->if_flags & IFF_BROADCAST) &&
1942			    ifa->ifa_broadaddr &&
1943			    ifa->ifa_broadaddr->sa_len != 0 &&
1944			    sa_equal(ifa->ifa_broadaddr, addr)) {
1945				ifa_ref(ifa);
1946				IF_ADDR_RUNLOCK(ifp);
1947				goto done;
1948			}
1949		}
1950		IF_ADDR_RUNLOCK(ifp);
1951	}
1952	ifa = NULL;
1953done:
1954	IFNET_RUNLOCK_NOSLEEP();
1955	return (ifa);
1956}
1957
1958/*
1959 * Locate the point to point interface with a given destination address.
1960 */
1961/*ARGSUSED*/
1962struct ifaddr *
1963ifa_ifwithdstaddr(const struct sockaddr *addr, int fibnum)
1964{
1965	struct ifnet *ifp;
1966	struct ifaddr *ifa;
1967
1968	IFNET_RLOCK_NOSLEEP();
1969	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
1970		if ((ifp->if_flags & IFF_POINTOPOINT) == 0)
1971			continue;
1972		if ((fibnum != RT_ALL_FIBS) && (ifp->if_fib != fibnum))
1973			continue;
1974		IF_ADDR_RLOCK(ifp);
1975		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1976			if (ifa->ifa_addr->sa_family != addr->sa_family)
1977				continue;
1978			if (ifa->ifa_dstaddr != NULL &&
1979			    sa_equal(addr, ifa->ifa_dstaddr)) {
1980				ifa_ref(ifa);
1981				IF_ADDR_RUNLOCK(ifp);
1982				goto done;
1983			}
1984		}
1985		IF_ADDR_RUNLOCK(ifp);
1986	}
1987	ifa = NULL;
1988done:
1989	IFNET_RUNLOCK_NOSLEEP();
1990	return (ifa);
1991}
1992
1993/*
1994 * Find an interface on a specific network.  If many, choice
1995 * is most specific found.
1996 */
1997struct ifaddr *
1998ifa_ifwithnet(const struct sockaddr *addr, int ignore_ptp, int fibnum)
1999{
2000	struct ifnet *ifp;
2001	struct ifaddr *ifa;
2002	struct ifaddr *ifa_maybe = NULL;
2003	u_int af = addr->sa_family;
2004	const char *addr_data = addr->sa_data, *cplim;
2005
2006	/*
2007	 * AF_LINK addresses can be looked up directly by their index number,
2008	 * so do that if we can.
2009	 */
2010	if (af == AF_LINK) {
2011	    const struct sockaddr_dl *sdl = (const struct sockaddr_dl *)addr;
2012	    if (sdl->sdl_index && sdl->sdl_index <= V_if_index)
2013		return (ifaddr_byindex(sdl->sdl_index));
2014	}
2015
2016	/*
2017	 * Scan though each interface, looking for ones that have addresses
2018	 * in this address family and the requested fib.  Maintain a reference
2019	 * on ifa_maybe once we find one, as we release the IF_ADDR_RLOCK() that
2020	 * kept it stable when we move onto the next interface.
2021	 */
2022	IFNET_RLOCK_NOSLEEP();
2023	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
2024		if ((fibnum != RT_ALL_FIBS) && (ifp->if_fib != fibnum))
2025			continue;
2026		IF_ADDR_RLOCK(ifp);
2027		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
2028			const char *cp, *cp2, *cp3;
2029
2030			if (ifa->ifa_addr->sa_family != af)
2031next:				continue;
2032			if (af == AF_INET &&
2033			    ifp->if_flags & IFF_POINTOPOINT && !ignore_ptp) {
2034				/*
2035				 * This is a bit broken as it doesn't
2036				 * take into account that the remote end may
2037				 * be a single node in the network we are
2038				 * looking for.
2039				 * The trouble is that we don't know the
2040				 * netmask for the remote end.
2041				 */
2042				if (ifa->ifa_dstaddr != NULL &&
2043				    sa_equal(addr, ifa->ifa_dstaddr)) {
2044					ifa_ref(ifa);
2045					IF_ADDR_RUNLOCK(ifp);
2046					goto done;
2047				}
2048			} else {
2049				/*
2050				 * Scan all the bits in the ifa's address.
2051				 * If a bit dissagrees with what we are
2052				 * looking for, mask it with the netmask
2053				 * to see if it really matters.
2054				 * (A byte at a time)
2055				 */
2056				if (ifa->ifa_netmask == 0)
2057					continue;
2058				cp = addr_data;
2059				cp2 = ifa->ifa_addr->sa_data;
2060				cp3 = ifa->ifa_netmask->sa_data;
2061				cplim = ifa->ifa_netmask->sa_len
2062					+ (char *)ifa->ifa_netmask;
2063				while (cp3 < cplim)
2064					if ((*cp++ ^ *cp2++) & *cp3++)
2065						goto next; /* next address! */
2066				/*
2067				 * If the netmask of what we just found
2068				 * is more specific than what we had before
2069				 * (if we had one), or if the virtual status
2070				 * of new prefix is better than of the old one,
2071				 * then remember the new one before continuing
2072				 * to search for an even better one.
2073				 */
2074				if (ifa_maybe == NULL ||
2075				    ifa_preferred(ifa_maybe, ifa) ||
2076				    rn_refines((caddr_t)ifa->ifa_netmask,
2077				    (caddr_t)ifa_maybe->ifa_netmask)) {
2078					if (ifa_maybe != NULL)
2079						ifa_free(ifa_maybe);
2080					ifa_maybe = ifa;
2081					ifa_ref(ifa_maybe);
2082				}
2083			}
2084		}
2085		IF_ADDR_RUNLOCK(ifp);
2086	}
2087	ifa = ifa_maybe;
2088	ifa_maybe = NULL;
2089done:
2090	IFNET_RUNLOCK_NOSLEEP();
2091	if (ifa_maybe != NULL)
2092		ifa_free(ifa_maybe);
2093	return (ifa);
2094}
2095
2096/*
2097 * Find an interface address specific to an interface best matching
2098 * a given address.
2099 */
2100struct ifaddr *
2101ifaof_ifpforaddr(const struct sockaddr *addr, struct ifnet *ifp)
2102{
2103	struct ifaddr *ifa;
2104	const char *cp, *cp2, *cp3;
2105	char *cplim;
2106	struct ifaddr *ifa_maybe = NULL;
2107	u_int af = addr->sa_family;
2108
2109	if (af >= AF_MAX)
2110		return (NULL);
2111	IF_ADDR_RLOCK(ifp);
2112	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
2113		if (ifa->ifa_addr->sa_family != af)
2114			continue;
2115		if (ifa_maybe == NULL)
2116			ifa_maybe = ifa;
2117		if (ifa->ifa_netmask == 0) {
2118			if (sa_equal(addr, ifa->ifa_addr) ||
2119			    (ifa->ifa_dstaddr &&
2120			    sa_equal(addr, ifa->ifa_dstaddr)))
2121				goto done;
2122			continue;
2123		}
2124		if (ifp->if_flags & IFF_POINTOPOINT) {
2125			if (sa_equal(addr, ifa->ifa_dstaddr))
2126				goto done;
2127		} else {
2128			cp = addr->sa_data;
2129			cp2 = ifa->ifa_addr->sa_data;
2130			cp3 = ifa->ifa_netmask->sa_data;
2131			cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask;
2132			for (; cp3 < cplim; cp3++)
2133				if ((*cp++ ^ *cp2++) & *cp3)
2134					break;
2135			if (cp3 == cplim)
2136				goto done;
2137		}
2138	}
2139	ifa = ifa_maybe;
2140done:
2141	if (ifa != NULL)
2142		ifa_ref(ifa);
2143	IF_ADDR_RUNLOCK(ifp);
2144	return (ifa);
2145}
2146
2147/*
2148 * See whether new ifa is better than current one:
2149 * 1) A non-virtual one is preferred over virtual.
2150 * 2) A virtual in master state preferred over any other state.
2151 *
2152 * Used in several address selecting functions.
2153 */
2154int
2155ifa_preferred(struct ifaddr *cur, struct ifaddr *next)
2156{
2157
2158	return (cur->ifa_carp && (!next->ifa_carp ||
2159	    ((*carp_master_p)(next) && !(*carp_master_p)(cur))));
2160}
2161
2162#include <net/if_llatbl.h>
2163
2164/*
2165 * Default action when installing a route with a Link Level gateway.
2166 * Lookup an appropriate real ifa to point to.
2167 * This should be moved to /sys/net/link.c eventually.
2168 */
2169static void
2170link_rtrequest(int cmd, struct rtentry *rt, struct rt_addrinfo *info)
2171{
2172	struct ifaddr *ifa, *oifa;
2173	struct sockaddr *dst;
2174	struct ifnet *ifp;
2175
2176	if (cmd != RTM_ADD || ((ifa = rt->rt_ifa) == NULL) ||
2177	    ((ifp = ifa->ifa_ifp) == NULL) || ((dst = rt_key(rt)) == NULL))
2178		return;
2179	ifa = ifaof_ifpforaddr(dst, ifp);
2180	if (ifa) {
2181		oifa = rt->rt_ifa;
2182		rt->rt_ifa = ifa;
2183		ifa_free(oifa);
2184		if (ifa->ifa_rtrequest && ifa->ifa_rtrequest != link_rtrequest)
2185			ifa->ifa_rtrequest(cmd, rt, info);
2186	}
2187}
2188
2189struct sockaddr_dl *
2190link_alloc_sdl(size_t size, int flags)
2191{
2192
2193	return (malloc(size, M_TEMP, flags));
2194}
2195
2196void
2197link_free_sdl(struct sockaddr *sa)
2198{
2199	free(sa, M_TEMP);
2200}
2201
2202/*
2203 * Fills in given sdl with interface basic info.
2204 * Returns pointer to filled sdl.
2205 */
2206struct sockaddr_dl *
2207link_init_sdl(struct ifnet *ifp, struct sockaddr *paddr, u_char iftype)
2208{
2209	struct sockaddr_dl *sdl;
2210
2211	sdl = (struct sockaddr_dl *)paddr;
2212	memset(sdl, 0, sizeof(struct sockaddr_dl));
2213	sdl->sdl_len = sizeof(struct sockaddr_dl);
2214	sdl->sdl_family = AF_LINK;
2215	sdl->sdl_index = ifp->if_index;
2216	sdl->sdl_type = iftype;
2217
2218	return (sdl);
2219}
2220
2221/*
2222 * Mark an interface down and notify protocols of
2223 * the transition.
2224 */
2225static void
2226if_unroute(struct ifnet *ifp, int flag, int fam)
2227{
2228	struct ifaddr *ifa;
2229
2230	KASSERT(flag == IFF_UP, ("if_unroute: flag != IFF_UP"));
2231
2232	ifp->if_flags &= ~flag;
2233	getmicrotime(&ifp->if_lastchange);
2234	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
2235		if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family))
2236			pfctlinput(PRC_IFDOWN, ifa->ifa_addr);
2237	ifp->if_qflush(ifp);
2238
2239	if (ifp->if_carp)
2240		(*carp_linkstate_p)(ifp);
2241	rt_ifmsg(ifp);
2242}
2243
2244/*
2245 * Mark an interface up and notify protocols of
2246 * the transition.
2247 */
2248static void
2249if_route(struct ifnet *ifp, int flag, int fam)
2250{
2251	struct ifaddr *ifa;
2252
2253	KASSERT(flag == IFF_UP, ("if_route: flag != IFF_UP"));
2254
2255	ifp->if_flags |= flag;
2256	getmicrotime(&ifp->if_lastchange);
2257	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
2258		if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family))
2259			pfctlinput(PRC_IFUP, ifa->ifa_addr);
2260	if (ifp->if_carp)
2261		(*carp_linkstate_p)(ifp);
2262	rt_ifmsg(ifp);
2263#ifdef INET6
2264	in6_if_up(ifp);
2265#endif
2266}
2267
2268void	(*vlan_link_state_p)(struct ifnet *);	/* XXX: private from if_vlan */
2269void	(*vlan_trunk_cap_p)(struct ifnet *);		/* XXX: private from if_vlan */
2270struct ifnet *(*vlan_trunkdev_p)(struct ifnet *);
2271struct	ifnet *(*vlan_devat_p)(struct ifnet *, uint16_t);
2272int	(*vlan_tag_p)(struct ifnet *, uint16_t *);
2273int	(*vlan_setcookie_p)(struct ifnet *, void *);
2274void	*(*vlan_cookie_p)(struct ifnet *);
2275
2276/*
2277 * Handle a change in the interface link state. To avoid LORs
2278 * between driver lock and upper layer locks, as well as possible
2279 * recursions, we post event to taskqueue, and all job
2280 * is done in static do_link_state_change().
2281 */
2282void
2283if_link_state_change(struct ifnet *ifp, int link_state)
2284{
2285	/* Return if state hasn't changed. */
2286	if (ifp->if_link_state == link_state)
2287		return;
2288
2289	ifp->if_link_state = link_state;
2290
2291	taskqueue_enqueue(taskqueue_swi, &ifp->if_linktask);
2292}
2293
2294static void
2295do_link_state_change(void *arg, int pending)
2296{
2297	struct ifnet *ifp = (struct ifnet *)arg;
2298	int link_state = ifp->if_link_state;
2299	CURVNET_SET(ifp->if_vnet);
2300
2301	/* Notify that the link state has changed. */
2302	rt_ifmsg(ifp);
2303	if (ifp->if_vlantrunk != NULL)
2304		(*vlan_link_state_p)(ifp);
2305
2306	if ((ifp->if_type == IFT_ETHER || ifp->if_type == IFT_L2VLAN) &&
2307	    ifp->if_l2com != NULL)
2308		(*ng_ether_link_state_p)(ifp, link_state);
2309	if (ifp->if_carp)
2310		(*carp_linkstate_p)(ifp);
2311	if (ifp->if_bridge)
2312		(*bridge_linkstate_p)(ifp);
2313	if (ifp->if_lagg)
2314		(*lagg_linkstate_p)(ifp, link_state);
2315
2316	if (IS_DEFAULT_VNET(curvnet))
2317		devctl_notify("IFNET", ifp->if_xname,
2318		    (link_state == LINK_STATE_UP) ? "LINK_UP" : "LINK_DOWN",
2319		    NULL);
2320	if (pending > 1)
2321		if_printf(ifp, "%d link states coalesced\n", pending);
2322	if (log_link_state_change)
2323		log(LOG_NOTICE, "%s: link state changed to %s\n", ifp->if_xname,
2324		    (link_state == LINK_STATE_UP) ? "UP" : "DOWN" );
2325	EVENTHANDLER_INVOKE(ifnet_link_event, ifp, link_state);
2326	CURVNET_RESTORE();
2327}
2328
2329/*
2330 * Mark an interface down and notify protocols of
2331 * the transition.
2332 */
2333void
2334if_down(struct ifnet *ifp)
2335{
2336
2337	EVENTHANDLER_INVOKE(ifnet_event, ifp, IFNET_EVENT_DOWN);
2338	if_unroute(ifp, IFF_UP, AF_UNSPEC);
2339}
2340
2341/*
2342 * Mark an interface up and notify protocols of
2343 * the transition.
2344 */
2345void
2346if_up(struct ifnet *ifp)
2347{
2348
2349	if_route(ifp, IFF_UP, AF_UNSPEC);
2350	EVENTHANDLER_INVOKE(ifnet_event, ifp, IFNET_EVENT_UP);
2351}
2352
2353/*
2354 * Flush an interface queue.
2355 */
2356void
2357if_qflush(struct ifnet *ifp)
2358{
2359	struct mbuf *m, *n;
2360	struct ifaltq *ifq;
2361
2362	ifq = &ifp->if_snd;
2363	IFQ_LOCK(ifq);
2364#ifdef ALTQ
2365	if (ALTQ_IS_ENABLED(ifq))
2366		ALTQ_PURGE(ifq);
2367#endif
2368	n = ifq->ifq_head;
2369	while ((m = n) != NULL) {
2370		n = m->m_nextpkt;
2371		m_freem(m);
2372	}
2373	ifq->ifq_head = 0;
2374	ifq->ifq_tail = 0;
2375	ifq->ifq_len = 0;
2376	IFQ_UNLOCK(ifq);
2377}
2378
2379/*
2380 * Map interface name to interface structure pointer, with or without
2381 * returning a reference.
2382 */
2383struct ifnet *
2384ifunit_ref(const char *name)
2385{
2386	struct ifnet *ifp;
2387
2388	IFNET_RLOCK_NOSLEEP();
2389	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
2390		if (strncmp(name, ifp->if_xname, IFNAMSIZ) == 0 &&
2391		    !(ifp->if_flags & IFF_DYING))
2392			break;
2393	}
2394	if (ifp != NULL)
2395		if_ref(ifp);
2396	IFNET_RUNLOCK_NOSLEEP();
2397	return (ifp);
2398}
2399
2400struct ifnet *
2401ifunit(const char *name)
2402{
2403	struct ifnet *ifp;
2404
2405	IFNET_RLOCK_NOSLEEP();
2406	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
2407		if (strncmp(name, ifp->if_xname, IFNAMSIZ) == 0)
2408			break;
2409	}
2410	IFNET_RUNLOCK_NOSLEEP();
2411	return (ifp);
2412}
2413
2414static void *
2415ifr_buffer_get_buffer(void *data)
2416{
2417	union ifreq_union *ifrup;
2418
2419	ifrup = data;
2420#ifdef COMPAT_FREEBSD32
2421	if (SV_CURPROC_FLAG(SV_ILP32))
2422		return ((void *)(uintptr_t)
2423		    ifrup->ifr32.ifr_ifru.ifru_buffer.buffer);
2424#endif
2425	return (ifrup->ifr.ifr_ifru.ifru_buffer.buffer);
2426}
2427
2428static void
2429ifr_buffer_set_buffer_null(void *data)
2430{
2431	union ifreq_union *ifrup;
2432
2433	ifrup = data;
2434#ifdef COMPAT_FREEBSD32
2435	if (SV_CURPROC_FLAG(SV_ILP32))
2436		ifrup->ifr32.ifr_ifru.ifru_buffer.buffer = 0;
2437	else
2438#endif
2439		ifrup->ifr.ifr_ifru.ifru_buffer.buffer = NULL;
2440}
2441
2442static size_t
2443ifr_buffer_get_length(void *data)
2444{
2445	union ifreq_union *ifrup;
2446
2447	ifrup = data;
2448#ifdef COMPAT_FREEBSD32
2449	if (SV_CURPROC_FLAG(SV_ILP32))
2450		return (ifrup->ifr32.ifr_ifru.ifru_buffer.length);
2451#endif
2452	return (ifrup->ifr.ifr_ifru.ifru_buffer.length);
2453}
2454
2455static void
2456ifr_buffer_set_length(void *data, size_t len)
2457{
2458	union ifreq_union *ifrup;
2459
2460	ifrup = data;
2461#ifdef COMPAT_FREEBSD32
2462	if (SV_CURPROC_FLAG(SV_ILP32))
2463		ifrup->ifr32.ifr_ifru.ifru_buffer.length = len;
2464	else
2465#endif
2466		ifrup->ifr.ifr_ifru.ifru_buffer.length = len;
2467}
2468
2469void *
2470ifr_data_get_ptr(void *ifrp)
2471{
2472	union ifreq_union *ifrup;
2473
2474	ifrup = ifrp;
2475#ifdef COMPAT_FREEBSD32
2476	if (SV_CURPROC_FLAG(SV_ILP32))
2477		return ((void *)(uintptr_t)
2478		    ifrup->ifr32.ifr_ifru.ifru_data);
2479#endif
2480		return (ifrup->ifr.ifr_ifru.ifru_data);
2481}
2482
2483/*
2484 * Hardware specific interface ioctls.
2485 */
2486int
2487ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
2488{
2489	struct ifreq *ifr;
2490	int error = 0, do_ifup = 0;
2491	int new_flags, temp_flags;
2492	size_t namelen, onamelen;
2493	size_t descrlen;
2494	char *descrbuf, *odescrbuf;
2495	char new_name[IFNAMSIZ];
2496	struct ifaddr *ifa;
2497	struct sockaddr_dl *sdl;
2498
2499	ifr = (struct ifreq *)data;
2500	switch (cmd) {
2501	case SIOCGIFINDEX:
2502		ifr->ifr_index = ifp->if_index;
2503		break;
2504
2505	case SIOCGIFFLAGS:
2506		temp_flags = ifp->if_flags | ifp->if_drv_flags;
2507		ifr->ifr_flags = temp_flags & 0xffff;
2508		ifr->ifr_flagshigh = temp_flags >> 16;
2509		break;
2510
2511	case SIOCGIFCAP:
2512		ifr->ifr_reqcap = ifp->if_capabilities;
2513		ifr->ifr_curcap = ifp->if_capenable;
2514		break;
2515
2516#ifdef MAC
2517	case SIOCGIFMAC:
2518		error = mac_ifnet_ioctl_get(td->td_ucred, ifr, ifp);
2519		break;
2520#endif
2521
2522	case SIOCGIFMETRIC:
2523		ifr->ifr_metric = ifp->if_metric;
2524		break;
2525
2526	case SIOCGIFMTU:
2527		ifr->ifr_mtu = ifp->if_mtu;
2528		break;
2529
2530	case SIOCGIFPHYS:
2531		/* XXXGL: did this ever worked? */
2532		ifr->ifr_phys = 0;
2533		break;
2534
2535	case SIOCGIFDESCR:
2536		error = 0;
2537		sx_slock(&ifdescr_sx);
2538		if (ifp->if_description == NULL)
2539			error = ENOMSG;
2540		else {
2541			/* space for terminating nul */
2542			descrlen = strlen(ifp->if_description) + 1;
2543			if (ifr_buffer_get_length(ifr) < descrlen)
2544				ifr_buffer_set_buffer_null(ifr);
2545			else
2546				error = copyout(ifp->if_description,
2547				    ifr_buffer_get_buffer(ifr), descrlen);
2548			ifr_buffer_set_length(ifr, descrlen);
2549		}
2550		sx_sunlock(&ifdescr_sx);
2551		break;
2552
2553	case SIOCSIFDESCR:
2554		error = priv_check(td, PRIV_NET_SETIFDESCR);
2555		if (error)
2556			return (error);
2557
2558		/*
2559		 * Copy only (length-1) bytes to make sure that
2560		 * if_description is always nul terminated.  The
2561		 * length parameter is supposed to count the
2562		 * terminating nul in.
2563		 */
2564		if (ifr_buffer_get_length(ifr) > ifdescr_maxlen)
2565			return (ENAMETOOLONG);
2566		else if (ifr_buffer_get_length(ifr) == 0)
2567			descrbuf = NULL;
2568		else {
2569			descrbuf = malloc(ifr_buffer_get_length(ifr),
2570			    M_IFDESCR, M_WAITOK | M_ZERO);
2571			error = copyin(ifr_buffer_get_buffer(ifr), descrbuf,
2572			    ifr_buffer_get_length(ifr) - 1);
2573			if (error) {
2574				free(descrbuf, M_IFDESCR);
2575				break;
2576			}
2577		}
2578
2579		sx_xlock(&ifdescr_sx);
2580		odescrbuf = ifp->if_description;
2581		ifp->if_description = descrbuf;
2582		sx_xunlock(&ifdescr_sx);
2583
2584		getmicrotime(&ifp->if_lastchange);
2585		free(odescrbuf, M_IFDESCR);
2586		break;
2587
2588	case SIOCGIFFIB:
2589		ifr->ifr_fib = ifp->if_fib;
2590		break;
2591
2592	case SIOCSIFFIB:
2593		error = priv_check(td, PRIV_NET_SETIFFIB);
2594		if (error)
2595			return (error);
2596		if (ifr->ifr_fib >= rt_numfibs)
2597			return (EINVAL);
2598
2599		ifp->if_fib = ifr->ifr_fib;
2600		break;
2601
2602	case SIOCSIFFLAGS:
2603		error = priv_check(td, PRIV_NET_SETIFFLAGS);
2604		if (error)
2605			return (error);
2606		/*
2607		 * Currently, no driver owned flags pass the IFF_CANTCHANGE
2608		 * check, so we don't need special handling here yet.
2609		 */
2610		new_flags = (ifr->ifr_flags & 0xffff) |
2611		    (ifr->ifr_flagshigh << 16);
2612		if (ifp->if_flags & IFF_UP &&
2613		    (new_flags & IFF_UP) == 0) {
2614			if_down(ifp);
2615		} else if (new_flags & IFF_UP &&
2616		    (ifp->if_flags & IFF_UP) == 0) {
2617			do_ifup = 1;
2618		}
2619		/* See if permanently promiscuous mode bit is about to flip */
2620		if ((ifp->if_flags ^ new_flags) & IFF_PPROMISC) {
2621			if (new_flags & IFF_PPROMISC)
2622				ifp->if_flags |= IFF_PROMISC;
2623			else if (ifp->if_pcount == 0)
2624				ifp->if_flags &= ~IFF_PROMISC;
2625			if (log_promisc_mode_change)
2626                                log(LOG_INFO, "%s: permanently promiscuous mode %s\n",
2627                                    ifp->if_xname,
2628                                    ((new_flags & IFF_PPROMISC) ?
2629                                     "enabled" : "disabled"));
2630		}
2631		ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) |
2632			(new_flags &~ IFF_CANTCHANGE);
2633		if (ifp->if_ioctl) {
2634			(void) (*ifp->if_ioctl)(ifp, cmd, data);
2635		}
2636		if (do_ifup)
2637			if_up(ifp);
2638		getmicrotime(&ifp->if_lastchange);
2639		break;
2640
2641	case SIOCSIFCAP:
2642		error = priv_check(td, PRIV_NET_SETIFCAP);
2643		if (error)
2644			return (error);
2645		if (ifp->if_ioctl == NULL)
2646			return (EOPNOTSUPP);
2647		if (ifr->ifr_reqcap & ~ifp->if_capabilities)
2648			return (EINVAL);
2649		error = (*ifp->if_ioctl)(ifp, cmd, data);
2650		if (error == 0)
2651			getmicrotime(&ifp->if_lastchange);
2652		break;
2653
2654#ifdef MAC
2655	case SIOCSIFMAC:
2656		error = mac_ifnet_ioctl_set(td->td_ucred, ifr, ifp);
2657		break;
2658#endif
2659
2660	case SIOCSIFNAME:
2661		error = priv_check(td, PRIV_NET_SETIFNAME);
2662		if (error)
2663			return (error);
2664		error = copyinstr(ifr_data_get_ptr(ifr), new_name, IFNAMSIZ,
2665		    NULL);
2666		if (error != 0)
2667			return (error);
2668		if (new_name[0] == '\0')
2669			return (EINVAL);
2670		if (new_name[IFNAMSIZ-1] != '\0') {
2671			new_name[IFNAMSIZ-1] = '\0';
2672			if (strlen(new_name) == IFNAMSIZ-1)
2673				return (EINVAL);
2674		}
2675		if (strcmp(new_name, ifp->if_xname) == 0)
2676			break;
2677		if (ifunit(new_name) != NULL)
2678			return (EEXIST);
2679
2680		/*
2681		 * XXX: Locking.  Nothing else seems to lock if_flags,
2682		 * and there are numerous other races with the
2683		 * ifunit() checks not being atomic with namespace
2684		 * changes (renames, vmoves, if_attach, etc).
2685		 */
2686		ifp->if_flags |= IFF_RENAMING;
2687
2688		/* Announce the departure of the interface. */
2689		rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
2690		EVENTHANDLER_INVOKE(ifnet_departure_event, ifp);
2691
2692		log(LOG_INFO, "%s: changing name to '%s'\n",
2693		    ifp->if_xname, new_name);
2694
2695		IF_ADDR_WLOCK(ifp);
2696		strlcpy(ifp->if_xname, new_name, sizeof(ifp->if_xname));
2697		ifa = ifp->if_addr;
2698		sdl = (struct sockaddr_dl *)ifa->ifa_addr;
2699		namelen = strlen(new_name);
2700		onamelen = sdl->sdl_nlen;
2701		/*
2702		 * Move the address if needed.  This is safe because we
2703		 * allocate space for a name of length IFNAMSIZ when we
2704		 * create this in if_attach().
2705		 */
2706		if (namelen != onamelen) {
2707			bcopy(sdl->sdl_data + onamelen,
2708			    sdl->sdl_data + namelen, sdl->sdl_alen);
2709		}
2710		bcopy(new_name, sdl->sdl_data, namelen);
2711		sdl->sdl_nlen = namelen;
2712		sdl = (struct sockaddr_dl *)ifa->ifa_netmask;
2713		bzero(sdl->sdl_data, onamelen);
2714		while (namelen != 0)
2715			sdl->sdl_data[--namelen] = 0xff;
2716		IF_ADDR_WUNLOCK(ifp);
2717
2718		EVENTHANDLER_INVOKE(ifnet_arrival_event, ifp);
2719		/* Announce the return of the interface. */
2720		rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
2721
2722		ifp->if_flags &= ~IFF_RENAMING;
2723		break;
2724
2725#ifdef VIMAGE
2726	case SIOCSIFVNET:
2727		error = priv_check(td, PRIV_NET_SETIFVNET);
2728		if (error)
2729			return (error);
2730		error = if_vmove_loan(td, ifp, ifr->ifr_name, ifr->ifr_jid);
2731		break;
2732#endif
2733
2734	case SIOCSIFMETRIC:
2735		error = priv_check(td, PRIV_NET_SETIFMETRIC);
2736		if (error)
2737			return (error);
2738		ifp->if_metric = ifr->ifr_metric;
2739		getmicrotime(&ifp->if_lastchange);
2740		break;
2741
2742	case SIOCSIFPHYS:
2743		error = priv_check(td, PRIV_NET_SETIFPHYS);
2744		if (error)
2745			return (error);
2746		if (ifp->if_ioctl == NULL)
2747			return (EOPNOTSUPP);
2748		error = (*ifp->if_ioctl)(ifp, cmd, data);
2749		if (error == 0)
2750			getmicrotime(&ifp->if_lastchange);
2751		break;
2752
2753	case SIOCSIFMTU:
2754	{
2755		u_long oldmtu = ifp->if_mtu;
2756
2757		error = priv_check(td, PRIV_NET_SETIFMTU);
2758		if (error)
2759			return (error);
2760		if (ifr->ifr_mtu < IF_MINMTU || ifr->ifr_mtu > IF_MAXMTU)
2761			return (EINVAL);
2762		if (ifp->if_ioctl == NULL)
2763			return (EOPNOTSUPP);
2764		error = (*ifp->if_ioctl)(ifp, cmd, data);
2765		if (error == 0) {
2766			getmicrotime(&ifp->if_lastchange);
2767			rt_ifmsg(ifp);
2768		}
2769		/*
2770		 * If the link MTU changed, do network layer specific procedure.
2771		 */
2772		if (ifp->if_mtu != oldmtu) {
2773#ifdef INET6
2774			nd6_setmtu(ifp);
2775#endif
2776			rt_updatemtu(ifp);
2777		}
2778		break;
2779	}
2780
2781	case SIOCADDMULTI:
2782	case SIOCDELMULTI:
2783		if (cmd == SIOCADDMULTI)
2784			error = priv_check(td, PRIV_NET_ADDMULTI);
2785		else
2786			error = priv_check(td, PRIV_NET_DELMULTI);
2787		if (error)
2788			return (error);
2789
2790		/* Don't allow group membership on non-multicast interfaces. */
2791		if ((ifp->if_flags & IFF_MULTICAST) == 0)
2792			return (EOPNOTSUPP);
2793
2794		/* Don't let users screw up protocols' entries. */
2795		if (ifr->ifr_addr.sa_family != AF_LINK)
2796			return (EINVAL);
2797
2798		if (cmd == SIOCADDMULTI) {
2799			struct ifmultiaddr *ifma;
2800
2801			/*
2802			 * Userland is only permitted to join groups once
2803			 * via the if_addmulti() KPI, because it cannot hold
2804			 * struct ifmultiaddr * between calls. It may also
2805			 * lose a race while we check if the membership
2806			 * already exists.
2807			 */
2808			IF_ADDR_RLOCK(ifp);
2809			ifma = if_findmulti(ifp, &ifr->ifr_addr);
2810			IF_ADDR_RUNLOCK(ifp);
2811			if (ifma != NULL)
2812				error = EADDRINUSE;
2813			else
2814				error = if_addmulti(ifp, &ifr->ifr_addr, &ifma);
2815		} else {
2816			error = if_delmulti(ifp, &ifr->ifr_addr);
2817		}
2818		if (error == 0)
2819			getmicrotime(&ifp->if_lastchange);
2820		break;
2821
2822	case SIOCSIFPHYADDR:
2823	case SIOCDIFPHYADDR:
2824#ifdef INET6
2825	case SIOCSIFPHYADDR_IN6:
2826#endif
2827	case SIOCSIFMEDIA:
2828	case SIOCSIFGENERIC:
2829		error = priv_check(td, PRIV_NET_HWIOCTL);
2830		if (error)
2831			return (error);
2832		if (ifp->if_ioctl == NULL)
2833			return (EOPNOTSUPP);
2834		error = (*ifp->if_ioctl)(ifp, cmd, data);
2835		if (error == 0)
2836			getmicrotime(&ifp->if_lastchange);
2837		break;
2838
2839	case SIOCGIFSTATUS:
2840	case SIOCGIFPSRCADDR:
2841	case SIOCGIFPDSTADDR:
2842	case SIOCGIFMEDIA:
2843	case SIOCGIFXMEDIA:
2844	case SIOCGIFGENERIC:
2845	case SIOCGIFRSSKEY:
2846	case SIOCGIFRSSHASH:
2847	case SIOCGIFDOWNREASON:
2848		if (ifp->if_ioctl == NULL)
2849			return (EOPNOTSUPP);
2850		error = (*ifp->if_ioctl)(ifp, cmd, data);
2851		break;
2852
2853	case SIOCSIFLLADDR:
2854		error = priv_check(td, PRIV_NET_SETLLADDR);
2855		if (error)
2856			return (error);
2857		error = if_setlladdr(ifp,
2858		    ifr->ifr_addr.sa_data, ifr->ifr_addr.sa_len);
2859		break;
2860
2861	case SIOCGHWADDR:
2862		error = if_gethwaddr(ifp, ifr);
2863		break;
2864
2865	CASE_IOC_IFGROUPREQ(SIOCAIFGROUP):
2866		error = priv_check(td, PRIV_NET_ADDIFGROUP);
2867		if (error)
2868			return (error);
2869		if ((error = if_addgroup(ifp,
2870		    ifgr_group_get((struct ifgroupreq *)data))))
2871			return (error);
2872		break;
2873
2874	CASE_IOC_IFGROUPREQ(SIOCGIFGROUP):
2875		if ((error = if_getgroup((struct ifgroupreq *)data, ifp)))
2876			return (error);
2877		break;
2878
2879	CASE_IOC_IFGROUPREQ(SIOCDIFGROUP):
2880		error = priv_check(td, PRIV_NET_DELIFGROUP);
2881		if (error)
2882			return (error);
2883		if ((error = if_delgroup(ifp,
2884		    ifgr_group_get((struct ifgroupreq *)data))))
2885			return (error);
2886		break;
2887
2888	default:
2889		error = ENOIOCTL;
2890		break;
2891	}
2892	return (error);
2893}
2894
2895/* COMPAT_SVR4 */
2896#define	OSIOCGIFCONF	_IOWR('i', 20, struct ifconf)
2897
2898#ifdef COMPAT_FREEBSD32
2899struct ifconf32 {
2900	int32_t	ifc_len;
2901	union {
2902		uint32_t	ifcu_buf;
2903		uint32_t	ifcu_req;
2904	} ifc_ifcu;
2905};
2906#define	SIOCGIFCONF32	_IOWR('i', 36, struct ifconf32)
2907#endif
2908
2909#ifdef COMPAT_FREEBSD32
2910static void
2911ifmr_init(struct ifmediareq *ifmr, caddr_t data)
2912{
2913	struct ifmediareq32 *ifmr32;
2914
2915	ifmr32 = (struct ifmediareq32 *)data;
2916	memcpy(ifmr->ifm_name, ifmr32->ifm_name,
2917	    sizeof(ifmr->ifm_name));
2918	ifmr->ifm_current = ifmr32->ifm_current;
2919	ifmr->ifm_mask = ifmr32->ifm_mask;
2920	ifmr->ifm_status = ifmr32->ifm_status;
2921	ifmr->ifm_active = ifmr32->ifm_active;
2922	ifmr->ifm_count = ifmr32->ifm_count;
2923	ifmr->ifm_ulist = (int *)(uintptr_t)ifmr32->ifm_ulist;
2924}
2925
2926static void
2927ifmr_update(const struct ifmediareq *ifmr, caddr_t data)
2928{
2929	struct ifmediareq32 *ifmr32;
2930
2931	ifmr32 = (struct ifmediareq32 *)data;
2932	ifmr32->ifm_current = ifmr->ifm_current;
2933	ifmr32->ifm_mask = ifmr->ifm_mask;
2934	ifmr32->ifm_status = ifmr->ifm_status;
2935	ifmr32->ifm_active = ifmr->ifm_active;
2936	ifmr32->ifm_count = ifmr->ifm_count;
2937}
2938#endif
2939
2940/*
2941 * Interface ioctls.
2942 */
2943int
2944ifioctl(struct socket *so, u_long cmd, caddr_t data, struct thread *td)
2945{
2946#ifdef COMPAT_FREEBSD32
2947	caddr_t saved_data;
2948	struct ifmediareq ifmr;
2949#endif
2950	struct ifmediareq *ifmrp;
2951	struct ifnet *ifp;
2952	struct ifreq *ifr;
2953	int error;
2954	int oif_flags;
2955#ifdef VIMAGE
2956	int shutdown;
2957#endif
2958
2959	CURVNET_SET(so->so_vnet);
2960#ifdef VIMAGE
2961	/* Make sure the VNET is stable. */
2962	shutdown = (so->so_vnet->vnet_state > SI_SUB_VNET &&
2963		 so->so_vnet->vnet_state < SI_SUB_VNET_DONE) ? 1 : 0;
2964	if (shutdown) {
2965		CURVNET_RESTORE();
2966		return (EBUSY);
2967	}
2968#endif
2969
2970
2971	switch (cmd) {
2972	case SIOCGIFCONF:
2973	case OSIOCGIFCONF:	/* COMPAT_SVR4 */
2974		error = ifconf(cmd, data);
2975		CURVNET_RESTORE();
2976		return (error);
2977
2978#ifdef COMPAT_FREEBSD32
2979	case SIOCGIFCONF32:
2980		{
2981			struct ifconf32 *ifc32;
2982			struct ifconf ifc;
2983
2984			ifc32 = (struct ifconf32 *)data;
2985			ifc.ifc_len = ifc32->ifc_len;
2986			ifc.ifc_buf = PTRIN(ifc32->ifc_buf);
2987
2988			error = ifconf(SIOCGIFCONF, (void *)&ifc);
2989			CURVNET_RESTORE();
2990			if (error == 0)
2991				ifc32->ifc_len = ifc.ifc_len;
2992			return (error);
2993		}
2994#endif
2995	}
2996
2997	ifmrp = NULL;
2998#ifdef COMPAT_FREEBSD32
2999	switch (cmd) {
3000	case SIOCGIFMEDIA32:
3001	case SIOCGIFXMEDIA32:
3002		ifmrp = &ifmr;
3003		ifmr_init(ifmrp, data);
3004		cmd = _IOC_NEWTYPE(cmd, struct ifmediareq);
3005		saved_data = data;
3006		data = (caddr_t)ifmrp;
3007	}
3008#endif
3009
3010	ifr = (struct ifreq *)data;
3011	switch (cmd) {
3012#ifdef VIMAGE
3013	case SIOCSIFRVNET:
3014		error = priv_check(td, PRIV_NET_SETIFVNET);
3015		if (error == 0)
3016			error = if_vmove_reclaim(td, ifr->ifr_name,
3017			    ifr->ifr_jid);
3018		goto out_noref;
3019#endif
3020	case SIOCIFCREATE:
3021	case SIOCIFCREATE2:
3022		error = priv_check(td, PRIV_NET_IFCREATE);
3023		if (error == 0)
3024			error = if_clone_create(ifr->ifr_name,
3025			    sizeof(ifr->ifr_name), cmd == SIOCIFCREATE2 ?
3026			    ifr_data_get_ptr(ifr) : NULL);
3027		goto out_noref;
3028	case SIOCIFDESTROY:
3029		error = priv_check(td, PRIV_NET_IFDESTROY);
3030		if (error == 0)
3031			error = if_clone_destroy(ifr->ifr_name);
3032		goto out_noref;
3033
3034	case SIOCIFGCLONERS:
3035		error = if_clone_list((struct if_clonereq *)data);
3036		goto out_noref;
3037
3038	CASE_IOC_IFGROUPREQ(SIOCGIFGMEMB):
3039		error = if_getgroupmembers((struct ifgroupreq *)data);
3040		goto out_noref;
3041
3042#if defined(INET) || defined(INET6)
3043	case SIOCSVH:
3044	case SIOCGVH:
3045		if (carp_ioctl_p == NULL)
3046			error = EPROTONOSUPPORT;
3047		else
3048			error = (*carp_ioctl_p)(ifr, cmd, td);
3049		goto out_noref;
3050#endif
3051	}
3052
3053	ifp = ifunit_ref(ifr->ifr_name);
3054	if (ifp == NULL) {
3055		error = ENXIO;
3056		goto out_noref;
3057	}
3058
3059	error = ifhwioctl(cmd, ifp, data, td);
3060	if (error != ENOIOCTL)
3061		goto out_ref;
3062
3063	oif_flags = ifp->if_flags;
3064	if (so->so_proto == NULL) {
3065		error = EOPNOTSUPP;
3066		goto out_ref;
3067	}
3068
3069	/*
3070	 * Pass the request on to the socket control method, and if the
3071	 * latter returns EOPNOTSUPP, directly to the interface.
3072	 *
3073	 * Make an exception for the legacy SIOCSIF* requests.  Drivers
3074	 * trust SIOCSIFADDR et al to come from an already privileged
3075	 * layer, and do not perform any credentials checks or input
3076	 * validation.
3077	 */
3078	error = ((*so->so_proto->pr_usrreqs->pru_control)(so, cmd, data,
3079	    ifp, td));
3080	if (error == EOPNOTSUPP && ifp != NULL && ifp->if_ioctl != NULL &&
3081	    cmd != SIOCSIFADDR && cmd != SIOCSIFBRDADDR &&
3082	    cmd != SIOCSIFDSTADDR && cmd != SIOCSIFNETMASK)
3083		error = (*ifp->if_ioctl)(ifp, cmd, data);
3084
3085	if ((oif_flags ^ ifp->if_flags) & IFF_UP) {
3086#ifdef INET6
3087		if (ifp->if_flags & IFF_UP)
3088			in6_if_up(ifp);
3089#endif
3090	}
3091
3092out_ref:
3093	if_rele(ifp);
3094out_noref:
3095#ifdef COMPAT_FREEBSD32
3096	if (ifmrp != NULL) {
3097		KASSERT((cmd == SIOCGIFMEDIA || cmd == SIOCGIFXMEDIA),
3098		    ("ifmrp non-NULL, but cmd is not an ifmedia req 0x%lx",
3099		     cmd));
3100		data = saved_data;
3101		ifmr_update(ifmrp, data);
3102	}
3103#endif
3104	CURVNET_RESTORE();
3105	return (error);
3106}
3107
3108/*
3109 * The code common to handling reference counted flags,
3110 * e.g., in ifpromisc() and if_allmulti().
3111 * The "pflag" argument can specify a permanent mode flag to check,
3112 * such as IFF_PPROMISC for promiscuous mode; should be 0 if none.
3113 *
3114 * Only to be used on stack-owned flags, not driver-owned flags.
3115 */
3116static int
3117if_setflag(struct ifnet *ifp, int flag, int pflag, int *refcount, int onswitch)
3118{
3119	struct ifreq ifr;
3120	int error;
3121	int oldflags, oldcount;
3122
3123	/* Sanity checks to catch programming errors */
3124	KASSERT((flag & (IFF_DRV_OACTIVE|IFF_DRV_RUNNING)) == 0,
3125	    ("%s: setting driver-owned flag %d", __func__, flag));
3126
3127	if (onswitch)
3128		KASSERT(*refcount >= 0,
3129		    ("%s: increment negative refcount %d for flag %d",
3130		    __func__, *refcount, flag));
3131	else
3132		KASSERT(*refcount > 0,
3133		    ("%s: decrement non-positive refcount %d for flag %d",
3134		    __func__, *refcount, flag));
3135
3136	/* In case this mode is permanent, just touch refcount */
3137	if (ifp->if_flags & pflag) {
3138		*refcount += onswitch ? 1 : -1;
3139		return (0);
3140	}
3141
3142	/* Save ifnet parameters for if_ioctl() may fail */
3143	oldcount = *refcount;
3144	oldflags = ifp->if_flags;
3145
3146	/*
3147	 * See if we aren't the only and touching refcount is enough.
3148	 * Actually toggle interface flag if we are the first or last.
3149	 */
3150	if (onswitch) {
3151		if ((*refcount)++)
3152			return (0);
3153		ifp->if_flags |= flag;
3154	} else {
3155		if (--(*refcount))
3156			return (0);
3157		ifp->if_flags &= ~flag;
3158	}
3159
3160	/* Call down the driver since we've changed interface flags */
3161	if (ifp->if_ioctl == NULL) {
3162		error = EOPNOTSUPP;
3163		goto recover;
3164	}
3165	ifr.ifr_flags = ifp->if_flags & 0xffff;
3166	ifr.ifr_flagshigh = ifp->if_flags >> 16;
3167	error = (*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
3168	if (error)
3169		goto recover;
3170	/* Notify userland that interface flags have changed */
3171	rt_ifmsg(ifp);
3172	return (0);
3173
3174recover:
3175	/* Recover after driver error */
3176	*refcount = oldcount;
3177	ifp->if_flags = oldflags;
3178	return (error);
3179}
3180
3181/*
3182 * Set/clear promiscuous mode on interface ifp based on the truth value
3183 * of pswitch.  The calls are reference counted so that only the first
3184 * "on" request actually has an effect, as does the final "off" request.
3185 * Results are undefined if the "off" and "on" requests are not matched.
3186 */
3187int
3188ifpromisc(struct ifnet *ifp, int pswitch)
3189{
3190	int error;
3191	int oldflags = ifp->if_flags;
3192
3193	error = if_setflag(ifp, IFF_PROMISC, IFF_PPROMISC,
3194			   &ifp->if_pcount, pswitch);
3195	/* If promiscuous mode status has changed, log a message */
3196	if (error == 0 && ((ifp->if_flags ^ oldflags) & IFF_PROMISC) &&
3197            log_promisc_mode_change)
3198		log(LOG_INFO, "%s: promiscuous mode %s\n",
3199		    ifp->if_xname,
3200		    (ifp->if_flags & IFF_PROMISC) ? "enabled" : "disabled");
3201	return (error);
3202}
3203
3204/*
3205 * Return interface configuration
3206 * of system.  List may be used
3207 * in later ioctl's (above) to get
3208 * other information.
3209 */
3210/*ARGSUSED*/
3211static int
3212ifconf(u_long cmd, caddr_t data)
3213{
3214	struct ifconf *ifc = (struct ifconf *)data;
3215	struct ifnet *ifp;
3216	struct ifaddr *ifa;
3217	struct ifreq ifr;
3218	struct sbuf *sb;
3219	int error, full = 0, valid_len, max_len;
3220
3221	/* Limit initial buffer size to MAXPHYS to avoid DoS from userspace. */
3222	max_len = MAXPHYS - 1;
3223
3224	/* Prevent hostile input from being able to crash the system */
3225	if (ifc->ifc_len <= 0)
3226		return (EINVAL);
3227
3228again:
3229	if (ifc->ifc_len <= max_len) {
3230		max_len = ifc->ifc_len;
3231		full = 1;
3232	}
3233	sb = sbuf_new(NULL, NULL, max_len + 1, SBUF_FIXEDLEN);
3234	max_len = 0;
3235	valid_len = 0;
3236
3237	IFNET_RLOCK();
3238	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
3239		int addrs;
3240
3241		/*
3242		 * Zero the ifr to make sure we don't disclose the contents
3243		 * of the stack.
3244		 */
3245		memset(&ifr, 0, sizeof(ifr));
3246
3247		if (strlcpy(ifr.ifr_name, ifp->if_xname, sizeof(ifr.ifr_name))
3248		    >= sizeof(ifr.ifr_name)) {
3249			sbuf_delete(sb);
3250			IFNET_RUNLOCK();
3251			return (ENAMETOOLONG);
3252		}
3253
3254		addrs = 0;
3255		IF_ADDR_RLOCK(ifp);
3256		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
3257			struct sockaddr *sa = ifa->ifa_addr;
3258
3259			if (prison_if(curthread->td_ucred, sa) != 0)
3260				continue;
3261			addrs++;
3262			/* COMPAT_SVR4 */
3263			if (cmd == OSIOCGIFCONF) {
3264				struct osockaddr *osa =
3265				    (struct osockaddr *)&ifr.ifr_addr;
3266				ifr.ifr_addr = *sa;
3267				osa->sa_family = sa->sa_family;
3268				sbuf_bcat(sb, &ifr, sizeof(ifr));
3269				max_len += sizeof(ifr);
3270			} else
3271			if (sa->sa_len <= sizeof(*sa)) {
3272				if (sa->sa_len < sizeof(*sa)) {
3273					memset(&ifr.ifr_ifru.ifru_addr, 0,
3274					    sizeof(ifr.ifr_ifru.ifru_addr));
3275					memcpy(&ifr.ifr_ifru.ifru_addr, sa,
3276					    sa->sa_len);
3277				} else
3278					ifr.ifr_ifru.ifru_addr = *sa;
3279				sbuf_bcat(sb, &ifr, sizeof(ifr));
3280				max_len += sizeof(ifr);
3281			} else {
3282				sbuf_bcat(sb, &ifr,
3283				    offsetof(struct ifreq, ifr_addr));
3284				max_len += offsetof(struct ifreq, ifr_addr);
3285				sbuf_bcat(sb, sa, sa->sa_len);
3286				max_len += sa->sa_len;
3287			}
3288
3289			if (sbuf_error(sb) == 0)
3290				valid_len = sbuf_len(sb);
3291		}
3292		IF_ADDR_RUNLOCK(ifp);
3293		if (addrs == 0) {
3294			sbuf_bcat(sb, &ifr, sizeof(ifr));
3295			max_len += sizeof(ifr);
3296
3297			if (sbuf_error(sb) == 0)
3298				valid_len = sbuf_len(sb);
3299		}
3300	}
3301	IFNET_RUNLOCK();
3302
3303	/*
3304	 * If we didn't allocate enough space (uncommon), try again.  If
3305	 * we have already allocated as much space as we are allowed,
3306	 * return what we've got.
3307	 */
3308	if (valid_len != max_len && !full) {
3309		sbuf_delete(sb);
3310		goto again;
3311	}
3312
3313	ifc->ifc_len = valid_len;
3314	sbuf_finish(sb);
3315	error = copyout(sbuf_data(sb), ifc->ifc_req, ifc->ifc_len);
3316	sbuf_delete(sb);
3317	return (error);
3318}
3319
3320/*
3321 * Just like ifpromisc(), but for all-multicast-reception mode.
3322 */
3323int
3324if_allmulti(struct ifnet *ifp, int onswitch)
3325{
3326
3327	return (if_setflag(ifp, IFF_ALLMULTI, 0, &ifp->if_amcount, onswitch));
3328}
3329
3330struct ifmultiaddr *
3331if_findmulti(struct ifnet *ifp, const struct sockaddr *sa)
3332{
3333	struct ifmultiaddr *ifma;
3334
3335	IF_ADDR_LOCK_ASSERT(ifp);
3336
3337	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
3338		if (sa->sa_family == AF_LINK) {
3339			if (sa_dl_equal(ifma->ifma_addr, sa))
3340				break;
3341		} else {
3342			if (sa_equal(ifma->ifma_addr, sa))
3343				break;
3344		}
3345	}
3346
3347	return ifma;
3348}
3349
3350/*
3351 * Allocate a new ifmultiaddr and initialize based on passed arguments.  We
3352 * make copies of passed sockaddrs.  The ifmultiaddr will not be added to
3353 * the ifnet multicast address list here, so the caller must do that and
3354 * other setup work (such as notifying the device driver).  The reference
3355 * count is initialized to 1.
3356 */
3357static struct ifmultiaddr *
3358if_allocmulti(struct ifnet *ifp, struct sockaddr *sa, struct sockaddr *llsa,
3359    int mflags)
3360{
3361	struct ifmultiaddr *ifma;
3362	struct sockaddr *dupsa;
3363
3364	ifma = malloc(sizeof *ifma, M_IFMADDR, mflags |
3365	    M_ZERO);
3366	if (ifma == NULL)
3367		return (NULL);
3368
3369	dupsa = malloc(sa->sa_len, M_IFMADDR, mflags);
3370	if (dupsa == NULL) {
3371		free(ifma, M_IFMADDR);
3372		return (NULL);
3373	}
3374	bcopy(sa, dupsa, sa->sa_len);
3375	ifma->ifma_addr = dupsa;
3376
3377	ifma->ifma_ifp = ifp;
3378	ifma->ifma_refcount = 1;
3379	ifma->ifma_protospec = NULL;
3380
3381	if (llsa == NULL) {
3382		ifma->ifma_lladdr = NULL;
3383		return (ifma);
3384	}
3385
3386	dupsa = malloc(llsa->sa_len, M_IFMADDR, mflags);
3387	if (dupsa == NULL) {
3388		free(ifma->ifma_addr, M_IFMADDR);
3389		free(ifma, M_IFMADDR);
3390		return (NULL);
3391	}
3392	bcopy(llsa, dupsa, llsa->sa_len);
3393	ifma->ifma_lladdr = dupsa;
3394
3395	return (ifma);
3396}
3397
3398/*
3399 * if_freemulti: free ifmultiaddr structure and possibly attached related
3400 * addresses.  The caller is responsible for implementing reference
3401 * counting, notifying the driver, handling routing messages, and releasing
3402 * any dependent link layer state.
3403 */
3404static void
3405if_freemulti(struct ifmultiaddr *ifma)
3406{
3407
3408	KASSERT(ifma->ifma_refcount == 0, ("if_freemulti: refcount %d",
3409	    ifma->ifma_refcount));
3410
3411	if (ifma->ifma_lladdr != NULL)
3412		free(ifma->ifma_lladdr, M_IFMADDR);
3413	free(ifma->ifma_addr, M_IFMADDR);
3414	free(ifma, M_IFMADDR);
3415}
3416
3417/*
3418 * Register an additional multicast address with a network interface.
3419 *
3420 * - If the address is already present, bump the reference count on the
3421 *   address and return.
3422 * - If the address is not link-layer, look up a link layer address.
3423 * - Allocate address structures for one or both addresses, and attach to the
3424 *   multicast address list on the interface.  If automatically adding a link
3425 *   layer address, the protocol address will own a reference to the link
3426 *   layer address, to be freed when it is freed.
3427 * - Notify the network device driver of an addition to the multicast address
3428 *   list.
3429 *
3430 * 'sa' points to caller-owned memory with the desired multicast address.
3431 *
3432 * 'retifma' will be used to return a pointer to the resulting multicast
3433 * address reference, if desired.
3434 */
3435int
3436if_addmulti(struct ifnet *ifp, struct sockaddr *sa,
3437    struct ifmultiaddr **retifma)
3438{
3439	struct ifmultiaddr *ifma, *ll_ifma;
3440	struct sockaddr *llsa;
3441	struct sockaddr_dl sdl;
3442	int error;
3443
3444	/*
3445	 * If the address is already present, return a new reference to it;
3446	 * otherwise, allocate storage and set up a new address.
3447	 */
3448	IF_ADDR_WLOCK(ifp);
3449	ifma = if_findmulti(ifp, sa);
3450	if (ifma != NULL) {
3451		ifma->ifma_refcount++;
3452		if (retifma != NULL)
3453			*retifma = ifma;
3454		IF_ADDR_WUNLOCK(ifp);
3455		return (0);
3456	}
3457
3458	/*
3459	 * The address isn't already present; resolve the protocol address
3460	 * into a link layer address, and then look that up, bump its
3461	 * refcount or allocate an ifma for that also.
3462	 * Most link layer resolving functions returns address data which
3463	 * fits inside default sockaddr_dl structure. However callback
3464	 * can allocate another sockaddr structure, in that case we need to
3465	 * free it later.
3466	 */
3467	llsa = NULL;
3468	ll_ifma = NULL;
3469	if (ifp->if_resolvemulti != NULL) {
3470		/* Provide called function with buffer size information */
3471		sdl.sdl_len = sizeof(sdl);
3472		llsa = (struct sockaddr *)&sdl;
3473		error = ifp->if_resolvemulti(ifp, &llsa, sa);
3474		if (error)
3475			goto unlock_out;
3476	}
3477
3478	/*
3479	 * Allocate the new address.  Don't hook it up yet, as we may also
3480	 * need to allocate a link layer multicast address.
3481	 */
3482	ifma = if_allocmulti(ifp, sa, llsa, M_NOWAIT);
3483	if (ifma == NULL) {
3484		error = ENOMEM;
3485		goto free_llsa_out;
3486	}
3487
3488	/*
3489	 * If a link layer address is found, we'll need to see if it's
3490	 * already present in the address list, or allocate is as well.
3491	 * When this block finishes, the link layer address will be on the
3492	 * list.
3493	 */
3494	if (llsa != NULL) {
3495		ll_ifma = if_findmulti(ifp, llsa);
3496		if (ll_ifma == NULL) {
3497			ll_ifma = if_allocmulti(ifp, llsa, NULL, M_NOWAIT);
3498			if (ll_ifma == NULL) {
3499				--ifma->ifma_refcount;
3500				if_freemulti(ifma);
3501				error = ENOMEM;
3502				goto free_llsa_out;
3503			}
3504			TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ll_ifma,
3505			    ifma_link);
3506		} else
3507			ll_ifma->ifma_refcount++;
3508		ifma->ifma_llifma = ll_ifma;
3509	}
3510
3511	/*
3512	 * We now have a new multicast address, ifma, and possibly a new or
3513	 * referenced link layer address.  Add the primary address to the
3514	 * ifnet address list.
3515	 */
3516	TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link);
3517
3518	if (retifma != NULL)
3519		*retifma = ifma;
3520
3521	/*
3522	 * Must generate the message while holding the lock so that 'ifma'
3523	 * pointer is still valid.
3524	 */
3525	rt_newmaddrmsg(RTM_NEWMADDR, ifma);
3526	IF_ADDR_WUNLOCK(ifp);
3527
3528	/*
3529	 * We are certain we have added something, so call down to the
3530	 * interface to let them know about it.
3531	 */
3532	if (ifp->if_ioctl != NULL) {
3533		(void) (*ifp->if_ioctl)(ifp, SIOCADDMULTI, 0);
3534	}
3535
3536	if ((llsa != NULL) && (llsa != (struct sockaddr *)&sdl))
3537		link_free_sdl(llsa);
3538
3539	return (0);
3540
3541free_llsa_out:
3542	if ((llsa != NULL) && (llsa != (struct sockaddr *)&sdl))
3543		link_free_sdl(llsa);
3544
3545unlock_out:
3546	IF_ADDR_WUNLOCK(ifp);
3547	return (error);
3548}
3549
3550/*
3551 * Delete a multicast group membership by network-layer group address.
3552 *
3553 * Returns ENOENT if the entry could not be found. If ifp no longer
3554 * exists, results are undefined. This entry point should only be used
3555 * from subsystems which do appropriate locking to hold ifp for the
3556 * duration of the call.
3557 * Network-layer protocol domains must use if_delmulti_ifma().
3558 */
3559int
3560if_delmulti(struct ifnet *ifp, struct sockaddr *sa)
3561{
3562	struct ifmultiaddr *ifma;
3563	int lastref;
3564#ifdef INVARIANTS
3565	struct ifnet *oifp;
3566
3567	IFNET_RLOCK_NOSLEEP();
3568	TAILQ_FOREACH(oifp, &V_ifnet, if_link)
3569		if (ifp == oifp)
3570			break;
3571	if (ifp != oifp)
3572		ifp = NULL;
3573	IFNET_RUNLOCK_NOSLEEP();
3574
3575	KASSERT(ifp != NULL, ("%s: ifnet went away", __func__));
3576#endif
3577	if (ifp == NULL)
3578		return (ENOENT);
3579
3580	IF_ADDR_WLOCK(ifp);
3581	lastref = 0;
3582	ifma = if_findmulti(ifp, sa);
3583	if (ifma != NULL)
3584		lastref = if_delmulti_locked(ifp, ifma, 0);
3585	IF_ADDR_WUNLOCK(ifp);
3586
3587	if (ifma == NULL)
3588		return (ENOENT);
3589
3590	if (lastref && ifp->if_ioctl != NULL) {
3591		(void)(*ifp->if_ioctl)(ifp, SIOCDELMULTI, 0);
3592	}
3593
3594	return (0);
3595}
3596
3597/*
3598 * Delete all multicast group membership for an interface.
3599 * Should be used to quickly flush all multicast filters.
3600 */
3601void
3602if_delallmulti(struct ifnet *ifp)
3603{
3604	struct ifmultiaddr *ifma;
3605	struct ifmultiaddr *next;
3606
3607	IF_ADDR_WLOCK(ifp);
3608	TAILQ_FOREACH_SAFE(ifma, &ifp->if_multiaddrs, ifma_link, next)
3609		if_delmulti_locked(ifp, ifma, 0);
3610	IF_ADDR_WUNLOCK(ifp);
3611}
3612
3613/*
3614 * Delete a multicast group membership by group membership pointer.
3615 * Network-layer protocol domains must use this routine.
3616 *
3617 * It is safe to call this routine if the ifp disappeared.
3618 */
3619void
3620if_delmulti_ifma(struct ifmultiaddr *ifma)
3621{
3622	struct ifnet *ifp;
3623	int lastref;
3624
3625	ifp = ifma->ifma_ifp;
3626#ifdef DIAGNOSTIC
3627	if (ifp == NULL) {
3628		printf("%s: ifma_ifp seems to be detached\n", __func__);
3629	} else {
3630		struct ifnet *oifp;
3631
3632		IFNET_RLOCK_NOSLEEP();
3633		TAILQ_FOREACH(oifp, &V_ifnet, if_link)
3634			if (ifp == oifp)
3635				break;
3636		if (ifp != oifp) {
3637			printf("%s: ifnet %p disappeared\n", __func__, ifp);
3638			ifp = NULL;
3639		}
3640		IFNET_RUNLOCK_NOSLEEP();
3641	}
3642#endif
3643	/*
3644	 * If and only if the ifnet instance exists: Acquire the address lock.
3645	 */
3646	if (ifp != NULL)
3647		IF_ADDR_WLOCK(ifp);
3648
3649	lastref = if_delmulti_locked(ifp, ifma, 0);
3650
3651	if (ifp != NULL) {
3652		/*
3653		 * If and only if the ifnet instance exists:
3654		 *  Release the address lock.
3655		 *  If the group was left: update the hardware hash filter.
3656		 */
3657		IF_ADDR_WUNLOCK(ifp);
3658		if (lastref && ifp->if_ioctl != NULL) {
3659			(void)(*ifp->if_ioctl)(ifp, SIOCDELMULTI, 0);
3660		}
3661	}
3662}
3663
3664/*
3665 * Perform deletion of network-layer and/or link-layer multicast address.
3666 *
3667 * Return 0 if the reference count was decremented.
3668 * Return 1 if the final reference was released, indicating that the
3669 * hardware hash filter should be reprogrammed.
3670 */
3671static int
3672if_delmulti_locked(struct ifnet *ifp, struct ifmultiaddr *ifma, int detaching)
3673{
3674	struct ifmultiaddr *ll_ifma;
3675
3676	if (ifp != NULL && ifma->ifma_ifp != NULL) {
3677		KASSERT(ifma->ifma_ifp == ifp,
3678		    ("%s: inconsistent ifp %p", __func__, ifp));
3679		IF_ADDR_WLOCK_ASSERT(ifp);
3680	}
3681
3682	ifp = ifma->ifma_ifp;
3683
3684	/*
3685	 * If the ifnet is detaching, null out references to ifnet,
3686	 * so that upper protocol layers will notice, and not attempt
3687	 * to obtain locks for an ifnet which no longer exists. The
3688	 * routing socket announcement must happen before the ifnet
3689	 * instance is detached from the system.
3690	 */
3691	if (detaching) {
3692#ifdef DIAGNOSTIC
3693		printf("%s: detaching ifnet instance %p\n", __func__, ifp);
3694#endif
3695		/*
3696		 * ifp may already be nulled out if we are being reentered
3697		 * to delete the ll_ifma.
3698		 */
3699		if (ifp != NULL) {
3700			rt_newmaddrmsg(RTM_DELMADDR, ifma);
3701			ifma->ifma_ifp = NULL;
3702		}
3703	}
3704
3705	if (--ifma->ifma_refcount > 0)
3706		return 0;
3707
3708	/*
3709	 * If this ifma is a network-layer ifma, a link-layer ifma may
3710	 * have been associated with it. Release it first if so.
3711	 */
3712	ll_ifma = ifma->ifma_llifma;
3713	if (ll_ifma != NULL) {
3714		KASSERT(ifma->ifma_lladdr != NULL,
3715		    ("%s: llifma w/o lladdr", __func__));
3716		if (detaching)
3717			ll_ifma->ifma_ifp = NULL;	/* XXX */
3718		if (--ll_ifma->ifma_refcount == 0) {
3719			if (ifp != NULL) {
3720				TAILQ_REMOVE(&ifp->if_multiaddrs, ll_ifma,
3721				    ifma_link);
3722			}
3723			if_freemulti(ll_ifma);
3724		}
3725	}
3726
3727	if (ifp != NULL)
3728		TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link);
3729
3730	if_freemulti(ifma);
3731
3732	/*
3733	 * The last reference to this instance of struct ifmultiaddr
3734	 * was released; the hardware should be notified of this change.
3735	 */
3736	return 1;
3737}
3738
3739/*
3740 * Set the link layer address on an interface.
3741 *
3742 * At this time we only support certain types of interfaces,
3743 * and we don't allow the length of the address to change.
3744 *
3745 * Set noinline to be dtrace-friendly
3746 */
3747__noinline int
3748if_setlladdr(struct ifnet *ifp, const u_char *lladdr, int len)
3749{
3750	struct sockaddr_dl *sdl;
3751	struct ifaddr *ifa;
3752	struct ifreq ifr;
3753
3754	IF_ADDR_RLOCK(ifp);
3755	ifa = ifp->if_addr;
3756	if (ifa == NULL) {
3757		IF_ADDR_RUNLOCK(ifp);
3758		return (EINVAL);
3759	}
3760	ifa_ref(ifa);
3761	IF_ADDR_RUNLOCK(ifp);
3762	sdl = (struct sockaddr_dl *)ifa->ifa_addr;
3763	if (sdl == NULL) {
3764		ifa_free(ifa);
3765		return (EINVAL);
3766	}
3767	if (len != sdl->sdl_alen) {	/* don't allow length to change */
3768		ifa_free(ifa);
3769		return (EINVAL);
3770	}
3771	switch (ifp->if_type) {
3772	case IFT_ETHER:
3773	case IFT_FDDI:
3774	case IFT_XETHER:
3775	case IFT_ISO88025:
3776	case IFT_L2VLAN:
3777	case IFT_BRIDGE:
3778	case IFT_ARCNET:
3779	case IFT_IEEE8023ADLAG:
3780	case IFT_IEEE80211:
3781		bcopy(lladdr, LLADDR(sdl), len);
3782		ifa_free(ifa);
3783		break;
3784	default:
3785		ifa_free(ifa);
3786		return (ENODEV);
3787	}
3788
3789	/*
3790	 * If the interface is already up, we need
3791	 * to re-init it in order to reprogram its
3792	 * address filter.
3793	 */
3794	if ((ifp->if_flags & IFF_UP) != 0) {
3795		if (ifp->if_ioctl) {
3796			ifp->if_flags &= ~IFF_UP;
3797			ifr.ifr_flags = ifp->if_flags & 0xffff;
3798			ifr.ifr_flagshigh = ifp->if_flags >> 16;
3799			(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
3800			ifp->if_flags |= IFF_UP;
3801			ifr.ifr_flags = ifp->if_flags & 0xffff;
3802			ifr.ifr_flagshigh = ifp->if_flags >> 16;
3803			(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
3804		}
3805	}
3806	EVENTHANDLER_INVOKE(iflladdr_event, ifp);
3807	return (0);
3808}
3809
3810/*
3811 * Compat function for handling basic encapsulation requests.
3812 * Not converted stacks (FDDI, IB, ..) supports traditional
3813 * output model: ARP (and other similar L2 protocols) are handled
3814 * inside output routine, arpresolve/nd6_resolve() returns MAC
3815 * address instead of full prepend.
3816 *
3817 * This function creates calculated header==MAC for IPv4/IPv6 and
3818 * returns EAFNOSUPPORT (which is then handled in ARP code) for other
3819 * address families.
3820 */
3821static int
3822if_requestencap_default(struct ifnet *ifp, struct if_encap_req *req)
3823{
3824
3825	if (req->rtype != IFENCAP_LL)
3826		return (EOPNOTSUPP);
3827
3828	if (req->bufsize < req->lladdr_len)
3829		return (ENOMEM);
3830
3831	switch (req->family) {
3832	case AF_INET:
3833	case AF_INET6:
3834		break;
3835	default:
3836		return (EAFNOSUPPORT);
3837	}
3838
3839	/* Copy lladdr to storage as is */
3840	memmove(req->buf, req->lladdr, req->lladdr_len);
3841	req->bufsize = req->lladdr_len;
3842	req->lladdr_off = 0;
3843
3844	return (0);
3845}
3846
3847/*
3848 * Get the link layer address that was read from the hardware at attach.
3849 *
3850 * This is only set by Ethernet NICs (IFT_ETHER), but laggX interfaces re-type
3851 * their component interfaces as IFT_IEEE8023ADLAG.
3852 */
3853int
3854if_gethwaddr(struct ifnet *ifp, struct ifreq *ifr)
3855{
3856
3857	if (ifp->if_hw_addr == NULL)
3858		return (ENODEV);
3859
3860	switch (ifp->if_type) {
3861	case IFT_ETHER:
3862	case IFT_IEEE8023ADLAG:
3863		bcopy(ifp->if_hw_addr, ifr->ifr_addr.sa_data, ifp->if_addrlen);
3864		return (0);
3865	default:
3866		return (ENODEV);
3867	}
3868}
3869
3870/*
3871 * The name argument must be a pointer to storage which will last as
3872 * long as the interface does.  For physical devices, the result of
3873 * device_get_name(dev) is a good choice and for pseudo-devices a
3874 * static string works well.
3875 */
3876void
3877if_initname(struct ifnet *ifp, const char *name, int unit)
3878{
3879	ifp->if_dname = name;
3880	ifp->if_dunit = unit;
3881	if (unit != IF_DUNIT_NONE)
3882		snprintf(ifp->if_xname, IFNAMSIZ, "%s%d", name, unit);
3883	else
3884		strlcpy(ifp->if_xname, name, IFNAMSIZ);
3885}
3886
3887int
3888if_printf(struct ifnet *ifp, const char * fmt, ...)
3889{
3890	va_list ap;
3891	int retval;
3892
3893	retval = printf("%s: ", ifp->if_xname);
3894	va_start(ap, fmt);
3895	retval += vprintf(fmt, ap);
3896	va_end(ap);
3897	return (retval);
3898}
3899
3900void
3901if_start(struct ifnet *ifp)
3902{
3903
3904	(*(ifp)->if_start)(ifp);
3905}
3906
3907/*
3908 * Backwards compatibility interface for drivers
3909 * that have not implemented it
3910 */
3911static int
3912if_transmit(struct ifnet *ifp, struct mbuf *m)
3913{
3914	int error;
3915
3916	IFQ_HANDOFF(ifp, m, error);
3917	return (error);
3918}
3919
3920static void
3921if_input_default(struct ifnet *ifp __unused, struct mbuf *m)
3922{
3923
3924	m_freem(m);
3925}
3926
3927int
3928if_handoff(struct ifqueue *ifq, struct mbuf *m, struct ifnet *ifp, int adjust)
3929{
3930	int active = 0;
3931
3932	IF_LOCK(ifq);
3933	if (_IF_QFULL(ifq)) {
3934		IF_UNLOCK(ifq);
3935		if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1);
3936		m_freem(m);
3937		return (0);
3938	}
3939	if (ifp != NULL) {
3940		if_inc_counter(ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len + adjust);
3941		if (m->m_flags & (M_BCAST|M_MCAST))
3942			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
3943		active = ifp->if_drv_flags & IFF_DRV_OACTIVE;
3944	}
3945	_IF_ENQUEUE(ifq, m);
3946	IF_UNLOCK(ifq);
3947	if (ifp != NULL && !active)
3948		(*(ifp)->if_start)(ifp);
3949	return (1);
3950}
3951
3952void
3953if_register_com_alloc(u_char type,
3954    if_com_alloc_t *a, if_com_free_t *f)
3955{
3956
3957	KASSERT(if_com_alloc[type] == NULL,
3958	    ("if_register_com_alloc: %d already registered", type));
3959	KASSERT(if_com_free[type] == NULL,
3960	    ("if_register_com_alloc: %d free already registered", type));
3961
3962	if_com_alloc[type] = a;
3963	if_com_free[type] = f;
3964}
3965
3966void
3967if_deregister_com_alloc(u_char type)
3968{
3969
3970	KASSERT(if_com_alloc[type] != NULL,
3971	    ("if_deregister_com_alloc: %d not registered", type));
3972	KASSERT(if_com_free[type] != NULL,
3973	    ("if_deregister_com_alloc: %d free not registered", type));
3974	if_com_alloc[type] = NULL;
3975	if_com_free[type] = NULL;
3976}
3977
3978/* API for driver access to network stack owned ifnet.*/
3979uint64_t
3980if_setbaudrate(struct ifnet *ifp, uint64_t baudrate)
3981{
3982	uint64_t oldbrate;
3983
3984	oldbrate = ifp->if_baudrate;
3985	ifp->if_baudrate = baudrate;
3986	return (oldbrate);
3987}
3988
3989uint64_t
3990if_getbaudrate(if_t ifp)
3991{
3992
3993	return (((struct ifnet *)ifp)->if_baudrate);
3994}
3995
3996int
3997if_setcapabilities(if_t ifp, int capabilities)
3998{
3999	((struct ifnet *)ifp)->if_capabilities = capabilities;
4000	return (0);
4001}
4002
4003int
4004if_setcapabilitiesbit(if_t ifp, int setbit, int clearbit)
4005{
4006	((struct ifnet *)ifp)->if_capabilities |= setbit;
4007	((struct ifnet *)ifp)->if_capabilities &= ~clearbit;
4008
4009	return (0);
4010}
4011
4012int
4013if_getcapabilities(if_t ifp)
4014{
4015	return ((struct ifnet *)ifp)->if_capabilities;
4016}
4017
4018int
4019if_setcapenable(if_t ifp, int capabilities)
4020{
4021	((struct ifnet *)ifp)->if_capenable = capabilities;
4022	return (0);
4023}
4024
4025int
4026if_setcapenablebit(if_t ifp, int setcap, int clearcap)
4027{
4028	if(setcap)
4029		((struct ifnet *)ifp)->if_capenable |= setcap;
4030	if(clearcap)
4031		((struct ifnet *)ifp)->if_capenable &= ~clearcap;
4032
4033	return (0);
4034}
4035
4036const char *
4037if_getdname(if_t ifp)
4038{
4039	return ((struct ifnet *)ifp)->if_dname;
4040}
4041
4042int
4043if_togglecapenable(if_t ifp, int togglecap)
4044{
4045	((struct ifnet *)ifp)->if_capenable ^= togglecap;
4046	return (0);
4047}
4048
4049int
4050if_getcapenable(if_t ifp)
4051{
4052	return ((struct ifnet *)ifp)->if_capenable;
4053}
4054
4055/*
4056 * This is largely undesirable because it ties ifnet to a device, but does
4057 * provide flexiblity for an embedded product vendor. Should be used with
4058 * the understanding that it violates the interface boundaries, and should be
4059 * a last resort only.
4060 */
4061int
4062if_setdev(if_t ifp, void *dev)
4063{
4064	return (0);
4065}
4066
4067int
4068if_setdrvflagbits(if_t ifp, int set_flags, int clear_flags)
4069{
4070	((struct ifnet *)ifp)->if_drv_flags |= set_flags;
4071	((struct ifnet *)ifp)->if_drv_flags &= ~clear_flags;
4072
4073	return (0);
4074}
4075
4076int
4077if_getdrvflags(if_t ifp)
4078{
4079	return ((struct ifnet *)ifp)->if_drv_flags;
4080}
4081
4082int
4083if_setdrvflags(if_t ifp, int flags)
4084{
4085	((struct ifnet *)ifp)->if_drv_flags = flags;
4086	return (0);
4087}
4088
4089
4090int
4091if_setflags(if_t ifp, int flags)
4092{
4093	((struct ifnet *)ifp)->if_flags = flags;
4094	return (0);
4095}
4096
4097int
4098if_setflagbits(if_t ifp, int set, int clear)
4099{
4100	((struct ifnet *)ifp)->if_flags |= set;
4101	((struct ifnet *)ifp)->if_flags &= ~clear;
4102
4103	return (0);
4104}
4105
4106int
4107if_getflags(if_t ifp)
4108{
4109	return ((struct ifnet *)ifp)->if_flags;
4110}
4111
4112int
4113if_clearhwassist(if_t ifp)
4114{
4115	((struct ifnet *)ifp)->if_hwassist = 0;
4116	return (0);
4117}
4118
4119int
4120if_sethwassistbits(if_t ifp, int toset, int toclear)
4121{
4122	((struct ifnet *)ifp)->if_hwassist |= toset;
4123	((struct ifnet *)ifp)->if_hwassist &= ~toclear;
4124
4125	return (0);
4126}
4127
4128int
4129if_sethwassist(if_t ifp, int hwassist_bit)
4130{
4131	((struct ifnet *)ifp)->if_hwassist = hwassist_bit;
4132	return (0);
4133}
4134
4135int
4136if_gethwassist(if_t ifp)
4137{
4138	return ((struct ifnet *)ifp)->if_hwassist;
4139}
4140
4141int
4142if_setmtu(if_t ifp, int mtu)
4143{
4144	((struct ifnet *)ifp)->if_mtu = mtu;
4145	return (0);
4146}
4147
4148int
4149if_getmtu(if_t ifp)
4150{
4151	return ((struct ifnet *)ifp)->if_mtu;
4152}
4153
4154int
4155if_getmtu_family(if_t ifp, int family)
4156{
4157	struct domain *dp;
4158
4159	for (dp = domains; dp; dp = dp->dom_next) {
4160		if (dp->dom_family == family && dp->dom_ifmtu != NULL)
4161			return (dp->dom_ifmtu((struct ifnet *)ifp));
4162	}
4163
4164	return (((struct ifnet *)ifp)->if_mtu);
4165}
4166
4167int
4168if_setsoftc(if_t ifp, void *softc)
4169{
4170	((struct ifnet *)ifp)->if_softc = softc;
4171	return (0);
4172}
4173
4174void *
4175if_getsoftc(if_t ifp)
4176{
4177	return ((struct ifnet *)ifp)->if_softc;
4178}
4179
4180void
4181if_setrcvif(struct mbuf *m, if_t ifp)
4182{
4183	m->m_pkthdr.rcvif = (struct ifnet *)ifp;
4184}
4185
4186void
4187if_setvtag(struct mbuf *m, uint16_t tag)
4188{
4189	m->m_pkthdr.ether_vtag = tag;
4190}
4191
4192uint16_t
4193if_getvtag(struct mbuf *m)
4194{
4195
4196	return (m->m_pkthdr.ether_vtag);
4197}
4198
4199int
4200if_sendq_empty(if_t ifp)
4201{
4202	return IFQ_DRV_IS_EMPTY(&((struct ifnet *)ifp)->if_snd);
4203}
4204
4205struct ifaddr *
4206if_getifaddr(if_t ifp)
4207{
4208	return ((struct ifnet *)ifp)->if_addr;
4209}
4210
4211int
4212if_getamcount(if_t ifp)
4213{
4214	return ((struct ifnet *)ifp)->if_amcount;
4215}
4216
4217
4218int
4219if_setsendqready(if_t ifp)
4220{
4221	IFQ_SET_READY(&((struct ifnet *)ifp)->if_snd);
4222	return (0);
4223}
4224
4225int
4226if_setsendqlen(if_t ifp, int tx_desc_count)
4227{
4228	IFQ_SET_MAXLEN(&((struct ifnet *)ifp)->if_snd, tx_desc_count);
4229	((struct ifnet *)ifp)->if_snd.ifq_drv_maxlen = tx_desc_count;
4230
4231	return (0);
4232}
4233
4234int
4235if_vlantrunkinuse(if_t ifp)
4236{
4237	return ((struct ifnet *)ifp)->if_vlantrunk != NULL?1:0;
4238}
4239
4240int
4241if_input(if_t ifp, struct mbuf* sendmp)
4242{
4243	(*((struct ifnet *)ifp)->if_input)((struct ifnet *)ifp, sendmp);
4244	return (0);
4245
4246}
4247
4248/* XXX */
4249#ifndef ETH_ADDR_LEN
4250#define ETH_ADDR_LEN 6
4251#endif
4252
4253int
4254if_setupmultiaddr(if_t ifp, void *mta, int *cnt, int max)
4255{
4256	struct ifmultiaddr *ifma;
4257	uint8_t *lmta = (uint8_t *)mta;
4258	int mcnt = 0;
4259
4260	TAILQ_FOREACH(ifma, &((struct ifnet *)ifp)->if_multiaddrs, ifma_link) {
4261		if (ifma->ifma_addr->sa_family != AF_LINK)
4262			continue;
4263
4264		if (mcnt == max)
4265			break;
4266
4267		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
4268		    &lmta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
4269		mcnt++;
4270	}
4271	*cnt = mcnt;
4272
4273	return (0);
4274}
4275
4276int
4277if_multiaddr_array(if_t ifp, void *mta, int *cnt, int max)
4278{
4279	int error;
4280
4281	if_maddr_rlock(ifp);
4282	error = if_setupmultiaddr(ifp, mta, cnt, max);
4283	if_maddr_runlock(ifp);
4284	return (error);
4285}
4286
4287int
4288if_multiaddr_count(if_t ifp, int max)
4289{
4290	struct ifmultiaddr *ifma;
4291	int count;
4292
4293	count = 0;
4294	if_maddr_rlock(ifp);
4295	TAILQ_FOREACH(ifma, &((struct ifnet *)ifp)->if_multiaddrs, ifma_link) {
4296		if (ifma->ifma_addr->sa_family != AF_LINK)
4297			continue;
4298		count++;
4299		if (count == max)
4300			break;
4301	}
4302	if_maddr_runlock(ifp);
4303	return (count);
4304}
4305
4306int
4307if_multi_apply(struct ifnet *ifp, int (*filter)(void *, struct ifmultiaddr *, int), void *arg)
4308{
4309	struct ifmultiaddr *ifma;
4310	int cnt = 0;
4311
4312	if_maddr_rlock(ifp);
4313	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link)
4314		cnt += filter(arg, ifma, cnt);
4315	if_maddr_runlock(ifp);
4316	return (cnt);
4317}
4318
4319struct mbuf *
4320if_dequeue(if_t ifp)
4321{
4322	struct mbuf *m;
4323	IFQ_DRV_DEQUEUE(&((struct ifnet *)ifp)->if_snd, m);
4324
4325	return (m);
4326}
4327
4328int
4329if_sendq_prepend(if_t ifp, struct mbuf *m)
4330{
4331	IFQ_DRV_PREPEND(&((struct ifnet *)ifp)->if_snd, m);
4332	return (0);
4333}
4334
4335int
4336if_setifheaderlen(if_t ifp, int len)
4337{
4338	((struct ifnet *)ifp)->if_hdrlen = len;
4339	return (0);
4340}
4341
4342caddr_t
4343if_getlladdr(if_t ifp)
4344{
4345	return (IF_LLADDR((struct ifnet *)ifp));
4346}
4347
4348void *
4349if_gethandle(u_char type)
4350{
4351	return (if_alloc(type));
4352}
4353
4354void
4355if_bpfmtap(if_t ifh, struct mbuf *m)
4356{
4357	struct ifnet *ifp = (struct ifnet *)ifh;
4358
4359	BPF_MTAP(ifp, m);
4360}
4361
4362void
4363if_etherbpfmtap(if_t ifh, struct mbuf *m)
4364{
4365	struct ifnet *ifp = (struct ifnet *)ifh;
4366
4367	ETHER_BPF_MTAP(ifp, m);
4368}
4369
4370void
4371if_vlancap(if_t ifh)
4372{
4373	struct ifnet *ifp = (struct ifnet *)ifh;
4374	VLAN_CAPABILITIES(ifp);
4375}
4376
4377void
4378if_setinitfn(if_t ifp, void (*init_fn)(void *))
4379{
4380	((struct ifnet *)ifp)->if_init = init_fn;
4381}
4382
4383void
4384if_setioctlfn(if_t ifp, int (*ioctl_fn)(if_t, u_long, caddr_t))
4385{
4386	((struct ifnet *)ifp)->if_ioctl = (void *)ioctl_fn;
4387}
4388
4389void
4390if_setstartfn(if_t ifp, void (*start_fn)(if_t))
4391{
4392	((struct ifnet *)ifp)->if_start = (void *)start_fn;
4393}
4394
4395void
4396if_settransmitfn(if_t ifp, if_transmit_fn_t start_fn)
4397{
4398	((struct ifnet *)ifp)->if_transmit = start_fn;
4399}
4400
4401void if_setqflushfn(if_t ifp, if_qflush_fn_t flush_fn)
4402{
4403	((struct ifnet *)ifp)->if_qflush = flush_fn;
4404
4405}
4406
4407void
4408if_setgetcounterfn(if_t ifp, if_get_counter_t fn)
4409{
4410
4411	ifp->if_get_counter = fn;
4412}
4413
4414/* Revisit these - These are inline functions originally. */
4415int
4416drbr_inuse_drv(if_t ifh, struct buf_ring *br)
4417{
4418	return drbr_inuse(ifh, br);
4419}
4420
4421struct mbuf*
4422drbr_dequeue_drv(if_t ifh, struct buf_ring *br)
4423{
4424	return drbr_dequeue(ifh, br);
4425}
4426
4427int
4428drbr_needs_enqueue_drv(if_t ifh, struct buf_ring *br)
4429{
4430	return drbr_needs_enqueue(ifh, br);
4431}
4432
4433int
4434drbr_enqueue_drv(if_t ifh, struct buf_ring *br, struct mbuf *m)
4435{
4436	return drbr_enqueue(ifh, br, m);
4437
4438}
4439