1/*-
2 * Copyright (c) 1980, 1986, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 4. Neither the name of the University nor the names of its contributors
14 *    may be used to endorse or promote products derived from this software
15 *    without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 *	@(#)if.c	8.5 (Berkeley) 1/9/95
30 * $FreeBSD: stable/11/sys/net/if.c 359024 2020-03-16 23:15:20Z brooks $
31 */
32
33#include "opt_compat.h"
34#include "opt_bpf.h"
35#include "opt_inet6.h"
36#include "opt_inet.h"
37
38#include <sys/param.h>
39#include <sys/types.h>
40#include <sys/conf.h>
41#include <sys/malloc.h>
42#include <sys/sbuf.h>
43#include <sys/bus.h>
44#include <sys/mbuf.h>
45#include <sys/systm.h>
46#include <sys/priv.h>
47#include <sys/proc.h>
48#include <sys/socket.h>
49#include <sys/socketvar.h>
50#include <sys/protosw.h>
51#include <sys/kernel.h>
52#include <sys/lock.h>
53#include <sys/refcount.h>
54#include <sys/module.h>
55#include <sys/rwlock.h>
56#include <sys/sockio.h>
57#include <sys/syslog.h>
58#include <sys/sysctl.h>
59#include <sys/sysent.h>
60#include <sys/taskqueue.h>
61#include <sys/domain.h>
62#include <sys/jail.h>
63#include <sys/priv.h>
64
65#include <machine/stdarg.h>
66#include <vm/uma.h>
67
68#include <net/bpf.h>
69#include <net/ethernet.h>
70#include <net/if.h>
71#include <net/if_arp.h>
72#include <net/if_clone.h>
73#include <net/if_dl.h>
74#include <net/if_types.h>
75#include <net/if_var.h>
76#include <net/if_media.h>
77#include <net/if_vlan_var.h>
78#include <net/radix.h>
79#include <net/route.h>
80#include <net/vnet.h>
81
82#if defined(INET) || defined(INET6)
83#include <net/ethernet.h>
84#include <netinet/in.h>
85#include <netinet/in_var.h>
86#include <netinet/ip.h>
87#include <netinet/ip_carp.h>
88#ifdef INET
89#include <netinet/if_ether.h>
90#endif /* INET */
91#ifdef INET6
92#include <netinet6/in6_var.h>
93#include <netinet6/in6_ifattach.h>
94#endif /* INET6 */
95#endif /* INET || INET6 */
96
97#include <security/mac/mac_framework.h>
98
99#ifdef COMPAT_FREEBSD32
100#include <sys/mount.h>
101#include <compat/freebsd32/freebsd32.h>
102
103struct ifreq_buffer32 {
104	uint32_t	length;		/* (size_t) */
105	uint32_t	buffer;		/* (void *) */
106};
107
108/*
109 * Interface request structure used for socket
110 * ioctl's.  All interface ioctl's must have parameter
111 * definitions which begin with ifr_name.  The
112 * remainder may be interface specific.
113 */
114struct ifreq32 {
115	char	ifr_name[IFNAMSIZ];		/* if name, e.g. "en0" */
116	union {
117		struct sockaddr	ifru_addr;
118		struct sockaddr	ifru_dstaddr;
119		struct sockaddr	ifru_broadaddr;
120		struct ifreq_buffer32 ifru_buffer;
121		short		ifru_flags[2];
122		short		ifru_index;
123		int		ifru_jid;
124		int		ifru_metric;
125		int		ifru_mtu;
126		int		ifru_phys;
127		int		ifru_media;
128		uint32_t	ifru_data;
129		int		ifru_cap[2];
130		u_int		ifru_fib;
131		u_char		ifru_vlan_pcp;
132	} ifr_ifru;
133};
134CTASSERT(sizeof(struct ifreq) == sizeof(struct ifreq32));
135CTASSERT(__offsetof(struct ifreq, ifr_ifru) ==
136    __offsetof(struct ifreq32, ifr_ifru));
137
138struct ifgroupreq32 {
139	char	ifgr_name[IFNAMSIZ];
140	u_int	ifgr_len;
141	union {
142		char		ifgru_group[IFNAMSIZ];
143		uint32_t	ifgru_groups;
144	} ifgr_ifgru;
145};
146
147struct ifmediareq32 {
148	char		ifm_name[IFNAMSIZ];
149	int		ifm_current;
150	int		ifm_mask;
151	int		ifm_status;
152	int		ifm_active;
153	int		ifm_count;
154	uint32_t	ifm_ulist;	/* (int *) */
155};
156#define	SIOCGIFMEDIA32	_IOC_NEWTYPE(SIOCGIFMEDIA, struct ifmediareq32)
157#define	SIOCGIFXMEDIA32	_IOC_NEWTYPE(SIOCGIFXMEDIA, struct ifmediareq32)
158
159#define	_CASE_IOC_IFGROUPREQ_32(cmd)				\
160    case _IOC_NEWTYPE((cmd), struct ifgroupreq32):
161#else /* !COMPAT_FREEBSD32 */
162#define _CASE_IOC_IFGROUPREQ_32(cmd)
163#endif /* !COMPAT_FREEBSD32 */
164
165#define CASE_IOC_IFGROUPREQ(cmd)	\
166    _CASE_IOC_IFGROUPREQ_32(cmd)	\
167    case (cmd)
168
169union ifreq_union {
170	struct ifreq	ifr;
171#ifdef COMPAT_FREEBSD32
172	struct ifreq32	ifr32;
173#endif
174};
175
176union ifgroupreq_union {
177	struct ifgroupreq ifgr;
178#ifdef COMPAT_FREEBSD32
179	struct ifgroupreq32 ifgr32;
180#endif
181};
182
183SYSCTL_NODE(_net, PF_LINK, link, CTLFLAG_RW, 0, "Link layers");
184SYSCTL_NODE(_net_link, 0, generic, CTLFLAG_RW, 0, "Generic link-management");
185
186SYSCTL_INT(_net_link, OID_AUTO, ifqmaxlen, CTLFLAG_RDTUN,
187    &ifqmaxlen, 0, "max send queue size");
188
189/* Log link state change events */
190static int log_link_state_change = 1;
191
192SYSCTL_INT(_net_link, OID_AUTO, log_link_state_change, CTLFLAG_RW,
193	&log_link_state_change, 0,
194	"log interface link state change events");
195
196/* Log promiscuous mode change events */
197static int log_promisc_mode_change = 1;
198
199SYSCTL_INT(_net_link, OID_AUTO, log_promisc_mode_change, CTLFLAG_RDTUN,
200	&log_promisc_mode_change, 1,
201	"log promiscuous mode change events");
202
203/* Interface description */
204static unsigned int ifdescr_maxlen = 1024;
205SYSCTL_UINT(_net, OID_AUTO, ifdescr_maxlen, CTLFLAG_RW,
206	&ifdescr_maxlen, 0,
207	"administrative maximum length for interface description");
208
209static MALLOC_DEFINE(M_IFDESCR, "ifdescr", "ifnet descriptions");
210
211/* global sx for non-critical path ifdescr */
212static struct sx ifdescr_sx;
213SX_SYSINIT(ifdescr_sx, &ifdescr_sx, "ifnet descr");
214
215void	(*bridge_linkstate_p)(struct ifnet *ifp);
216void	(*ng_ether_link_state_p)(struct ifnet *ifp, int state);
217void	(*lagg_linkstate_p)(struct ifnet *ifp, int state);
218/* These are external hooks for CARP. */
219void	(*carp_linkstate_p)(struct ifnet *ifp);
220void	(*carp_demote_adj_p)(int, char *);
221int	(*carp_master_p)(struct ifaddr *);
222#if defined(INET) || defined(INET6)
223int	(*carp_forus_p)(struct ifnet *ifp, u_char *dhost);
224int	(*carp_output_p)(struct ifnet *ifp, struct mbuf *m,
225    const struct sockaddr *sa);
226int	(*carp_ioctl_p)(struct ifreq *, u_long, struct thread *);
227int	(*carp_attach_p)(struct ifaddr *, int);
228void	(*carp_detach_p)(struct ifaddr *, bool);
229#endif
230#ifdef INET
231int	(*carp_iamatch_p)(struct ifaddr *, uint8_t **);
232#endif
233#ifdef INET6
234struct ifaddr *(*carp_iamatch6_p)(struct ifnet *ifp, struct in6_addr *taddr6);
235caddr_t	(*carp_macmatch6_p)(struct ifnet *ifp, struct mbuf *m,
236    const struct in6_addr *taddr);
237#endif
238
239struct mbuf *(*tbr_dequeue_ptr)(struct ifaltq *, int) = NULL;
240
241/*
242 * XXX: Style; these should be sorted alphabetically, and unprototyped
243 * static functions should be prototyped. Currently they are sorted by
244 * declaration order.
245 */
246static void	if_attachdomain(void *);
247static void	if_attachdomain1(struct ifnet *);
248static int	ifconf(u_long, caddr_t);
249static void	if_freemulti(struct ifmultiaddr *);
250static void	if_grow(void);
251static void	if_input_default(struct ifnet *, struct mbuf *);
252static int	if_requestencap_default(struct ifnet *, struct if_encap_req *);
253static void	if_route(struct ifnet *, int flag, int fam);
254static int	if_setflag(struct ifnet *, int, int, int *, int);
255static int	if_transmit(struct ifnet *ifp, struct mbuf *m);
256static void	if_unroute(struct ifnet *, int flag, int fam);
257static void	link_rtrequest(int, struct rtentry *, struct rt_addrinfo *);
258static int	if_delmulti_locked(struct ifnet *, struct ifmultiaddr *, int);
259static void	do_link_state_change(void *, int);
260static int	if_getgroup(struct ifgroupreq *, struct ifnet *);
261static int	if_getgroupmembers(struct ifgroupreq *);
262static void	if_delgroups(struct ifnet *);
263static void	if_attach_internal(struct ifnet *, int, struct if_clone *);
264static int	if_detach_internal(struct ifnet *, int, struct if_clone **);
265#ifdef VIMAGE
266static void	if_vmove(struct ifnet *, struct vnet *);
267#endif
268
269#ifdef INET6
270/*
271 * XXX: declare here to avoid to include many inet6 related files..
272 * should be more generalized?
273 */
274extern void	nd6_setmtu(struct ifnet *);
275#endif
276
277/* ipsec helper hooks */
278VNET_DEFINE(struct hhook_head *, ipsec_hhh_in[HHOOK_IPSEC_COUNT]);
279VNET_DEFINE(struct hhook_head *, ipsec_hhh_out[HHOOK_IPSEC_COUNT]);
280
281VNET_DEFINE(int, if_index);
282int	ifqmaxlen = IFQ_MAXLEN;
283VNET_DEFINE(struct ifnethead, ifnet);	/* depend on static init XXX */
284VNET_DEFINE(struct ifgrouphead, ifg_head);
285
286static VNET_DEFINE(int, if_indexlim) = 8;
287
288/* Table of ifnet by index. */
289VNET_DEFINE(struct ifnet **, ifindex_table);
290
291#define	V_if_indexlim		VNET(if_indexlim)
292#define	V_ifindex_table		VNET(ifindex_table)
293
294/*
295 * The global network interface list (V_ifnet) and related state (such as
296 * if_index, if_indexlim, and ifindex_table) are protected by an sxlock and
297 * an rwlock.  Either may be acquired shared to stablize the list, but both
298 * must be acquired writable to modify the list.  This model allows us to
299 * both stablize the interface list during interrupt thread processing, but
300 * also to stablize it over long-running ioctls, without introducing priority
301 * inversions and deadlocks.
302 */
303struct rwlock ifnet_rwlock;
304RW_SYSINIT_FLAGS(ifnet_rw, &ifnet_rwlock, "ifnet_rw", RW_RECURSE);
305struct sx ifnet_sxlock;
306SX_SYSINIT_FLAGS(ifnet_sx, &ifnet_sxlock, "ifnet_sx", SX_RECURSE);
307
308/*
309 * The allocation of network interfaces is a rather non-atomic affair; we
310 * need to select an index before we are ready to expose the interface for
311 * use, so will use this pointer value to indicate reservation.
312 */
313#define	IFNET_HOLD	(void *)(uintptr_t)(-1)
314
315static	if_com_alloc_t *if_com_alloc[256];
316static	if_com_free_t *if_com_free[256];
317
318static MALLOC_DEFINE(M_IFNET, "ifnet", "interface internals");
319MALLOC_DEFINE(M_IFADDR, "ifaddr", "interface address");
320MALLOC_DEFINE(M_IFMADDR, "ether_multi", "link-level multicast address");
321
322struct ifnet *
323ifnet_byindex_locked(u_short idx)
324{
325
326	if (idx > V_if_index)
327		return (NULL);
328	if (V_ifindex_table[idx] == IFNET_HOLD)
329		return (NULL);
330	return (V_ifindex_table[idx]);
331}
332
333struct ifnet *
334ifnet_byindex(u_short idx)
335{
336	struct ifnet *ifp;
337
338	IFNET_RLOCK_NOSLEEP();
339	ifp = ifnet_byindex_locked(idx);
340	IFNET_RUNLOCK_NOSLEEP();
341	return (ifp);
342}
343
344struct ifnet *
345ifnet_byindex_ref(u_short idx)
346{
347	struct ifnet *ifp;
348
349	IFNET_RLOCK_NOSLEEP();
350	ifp = ifnet_byindex_locked(idx);
351	if (ifp == NULL || (ifp->if_flags & IFF_DYING)) {
352		IFNET_RUNLOCK_NOSLEEP();
353		return (NULL);
354	}
355	if_ref(ifp);
356	IFNET_RUNLOCK_NOSLEEP();
357	return (ifp);
358}
359
360/*
361 * Allocate an ifindex array entry; return 0 on success or an error on
362 * failure.
363 */
364static u_short
365ifindex_alloc(void)
366{
367	u_short idx;
368
369	IFNET_WLOCK_ASSERT();
370retry:
371	/*
372	 * Try to find an empty slot below V_if_index.  If we fail, take the
373	 * next slot.
374	 */
375	for (idx = 1; idx <= V_if_index; idx++) {
376		if (V_ifindex_table[idx] == NULL)
377			break;
378	}
379
380	/* Catch if_index overflow. */
381	if (idx >= V_if_indexlim) {
382		if_grow();
383		goto retry;
384	}
385	if (idx > V_if_index)
386		V_if_index = idx;
387	return (idx);
388}
389
390static void
391ifindex_free_locked(u_short idx)
392{
393
394	IFNET_WLOCK_ASSERT();
395
396	V_ifindex_table[idx] = NULL;
397	while (V_if_index > 0 &&
398	    V_ifindex_table[V_if_index] == NULL)
399		V_if_index--;
400}
401
402static void
403ifindex_free(u_short idx)
404{
405
406	IFNET_WLOCK();
407	ifindex_free_locked(idx);
408	IFNET_WUNLOCK();
409}
410
411static void
412ifnet_setbyindex_locked(u_short idx, struct ifnet *ifp)
413{
414
415	IFNET_WLOCK_ASSERT();
416
417	V_ifindex_table[idx] = ifp;
418}
419
420static void
421ifnet_setbyindex(u_short idx, struct ifnet *ifp)
422{
423
424	IFNET_WLOCK();
425	ifnet_setbyindex_locked(idx, ifp);
426	IFNET_WUNLOCK();
427}
428
429struct ifaddr *
430ifaddr_byindex(u_short idx)
431{
432	struct ifnet *ifp;
433	struct ifaddr *ifa = NULL;
434
435	IFNET_RLOCK_NOSLEEP();
436	ifp = ifnet_byindex_locked(idx);
437	if (ifp != NULL && (ifa = ifp->if_addr) != NULL)
438		ifa_ref(ifa);
439	IFNET_RUNLOCK_NOSLEEP();
440	return (ifa);
441}
442
443/*
444 * Network interface utility routines.
445 *
446 * Routines with ifa_ifwith* names take sockaddr *'s as
447 * parameters.
448 */
449
450static void
451vnet_if_init(const void *unused __unused)
452{
453
454	TAILQ_INIT(&V_ifnet);
455	TAILQ_INIT(&V_ifg_head);
456	IFNET_WLOCK();
457	if_grow();				/* create initial table */
458	IFNET_WUNLOCK();
459	vnet_if_clone_init();
460}
461VNET_SYSINIT(vnet_if_init, SI_SUB_INIT_IF, SI_ORDER_SECOND, vnet_if_init,
462    NULL);
463
464#ifdef VIMAGE
465static void
466vnet_if_uninit(const void *unused __unused)
467{
468
469	VNET_ASSERT(TAILQ_EMPTY(&V_ifnet), ("%s:%d tailq &V_ifnet=%p "
470	    "not empty", __func__, __LINE__, &V_ifnet));
471	VNET_ASSERT(TAILQ_EMPTY(&V_ifg_head), ("%s:%d tailq &V_ifg_head=%p "
472	    "not empty", __func__, __LINE__, &V_ifg_head));
473
474	free((caddr_t)V_ifindex_table, M_IFNET);
475}
476VNET_SYSUNINIT(vnet_if_uninit, SI_SUB_INIT_IF, SI_ORDER_FIRST,
477    vnet_if_uninit, NULL);
478
479static void
480vnet_if_return(const void *unused __unused)
481{
482	struct ifnet *ifp, *nifp;
483
484	/* Return all inherited interfaces to their parent vnets. */
485	TAILQ_FOREACH_SAFE(ifp, &V_ifnet, if_link, nifp) {
486		if (ifp->if_home_vnet != ifp->if_vnet)
487			if_vmove(ifp, ifp->if_home_vnet);
488	}
489}
490VNET_SYSUNINIT(vnet_if_return, SI_SUB_VNET_DONE, SI_ORDER_ANY,
491    vnet_if_return, NULL);
492#endif
493
494static void
495if_grow(void)
496{
497	int oldlim;
498	u_int n;
499	struct ifnet **e;
500
501	IFNET_WLOCK_ASSERT();
502	oldlim = V_if_indexlim;
503	IFNET_WUNLOCK();
504	n = (oldlim << 1) * sizeof(*e);
505	e = malloc(n, M_IFNET, M_WAITOK | M_ZERO);
506	IFNET_WLOCK();
507	if (V_if_indexlim != oldlim) {
508		free(e, M_IFNET);
509		return;
510	}
511	if (V_ifindex_table != NULL) {
512		memcpy((caddr_t)e, (caddr_t)V_ifindex_table, n/2);
513		free((caddr_t)V_ifindex_table, M_IFNET);
514	}
515	V_if_indexlim <<= 1;
516	V_ifindex_table = e;
517}
518
519/*
520 * Allocate a struct ifnet and an index for an interface.  A layer 2
521 * common structure will also be allocated if an allocation routine is
522 * registered for the passed type.
523 */
524struct ifnet *
525if_alloc(u_char type)
526{
527	struct ifnet *ifp;
528	u_short idx;
529
530	ifp = malloc(sizeof(struct ifnet), M_IFNET, M_WAITOK|M_ZERO);
531	IFNET_WLOCK();
532	idx = ifindex_alloc();
533	ifnet_setbyindex_locked(idx, IFNET_HOLD);
534	IFNET_WUNLOCK();
535	ifp->if_index = idx;
536	ifp->if_type = type;
537	ifp->if_alloctype = type;
538#ifdef VIMAGE
539	ifp->if_vnet = curvnet;
540#endif
541	if (if_com_alloc[type] != NULL) {
542		ifp->if_l2com = if_com_alloc[type](type, ifp);
543		if (ifp->if_l2com == NULL) {
544			free(ifp, M_IFNET);
545			ifindex_free(idx);
546			return (NULL);
547		}
548	}
549
550	IF_ADDR_LOCK_INIT(ifp);
551	TASK_INIT(&ifp->if_linktask, 0, do_link_state_change, ifp);
552	ifp->if_afdata_initialized = 0;
553	IF_AFDATA_LOCK_INIT(ifp);
554	TAILQ_INIT(&ifp->if_addrhead);
555	TAILQ_INIT(&ifp->if_multiaddrs);
556	TAILQ_INIT(&ifp->if_groups);
557#ifdef MAC
558	mac_ifnet_init(ifp);
559#endif
560	ifq_init(&ifp->if_snd, ifp);
561
562	refcount_init(&ifp->if_refcount, 1);	/* Index reference. */
563	for (int i = 0; i < IFCOUNTERS; i++)
564		ifp->if_counters[i] = counter_u64_alloc(M_WAITOK);
565	ifp->if_get_counter = if_get_counter_default;
566	ifp->if_pcp = IFNET_PCP_NONE;
567	ifnet_setbyindex(ifp->if_index, ifp);
568	return (ifp);
569}
570
571/*
572 * Do the actual work of freeing a struct ifnet, and layer 2 common
573 * structure.  This call is made when the last reference to an
574 * interface is released.
575 */
576static void
577if_free_internal(struct ifnet *ifp)
578{
579
580	KASSERT((ifp->if_flags & IFF_DYING),
581	    ("if_free_internal: interface not dying"));
582
583	if (if_com_free[ifp->if_alloctype] != NULL)
584		if_com_free[ifp->if_alloctype](ifp->if_l2com,
585		    ifp->if_alloctype);
586
587#ifdef MAC
588	mac_ifnet_destroy(ifp);
589#endif /* MAC */
590	if (ifp->if_description != NULL)
591		free(ifp->if_description, M_IFDESCR);
592	IF_AFDATA_DESTROY(ifp);
593	IF_ADDR_LOCK_DESTROY(ifp);
594	ifq_delete(&ifp->if_snd);
595
596	for (int i = 0; i < IFCOUNTERS; i++)
597		counter_u64_free(ifp->if_counters[i]);
598
599	free(ifp, M_IFNET);
600}
601
602/*
603 * Deregister an interface and free the associated storage.
604 */
605void
606if_free(struct ifnet *ifp)
607{
608
609	ifp->if_flags |= IFF_DYING;			/* XXX: Locking */
610
611	CURVNET_SET_QUIET(ifp->if_vnet);
612	IFNET_WLOCK();
613	KASSERT(ifp == ifnet_byindex_locked(ifp->if_index),
614	    ("%s: freeing unallocated ifnet", ifp->if_xname));
615
616	ifindex_free_locked(ifp->if_index);
617	IFNET_WUNLOCK();
618
619	if (refcount_release(&ifp->if_refcount))
620		if_free_internal(ifp);
621	CURVNET_RESTORE();
622}
623
624/*
625 * Interfaces to keep an ifnet type-stable despite the possibility of the
626 * driver calling if_free().  If there are additional references, we defer
627 * freeing the underlying data structure.
628 */
629void
630if_ref(struct ifnet *ifp)
631{
632
633	/* We don't assert the ifnet list lock here, but arguably should. */
634	refcount_acquire(&ifp->if_refcount);
635}
636
637void
638if_rele(struct ifnet *ifp)
639{
640
641	if (!refcount_release(&ifp->if_refcount))
642		return;
643	if_free_internal(ifp);
644}
645
646void
647ifq_init(struct ifaltq *ifq, struct ifnet *ifp)
648{
649
650	mtx_init(&ifq->ifq_mtx, ifp->if_xname, "if send queue", MTX_DEF);
651
652	if (ifq->ifq_maxlen == 0)
653		ifq->ifq_maxlen = ifqmaxlen;
654
655	ifq->altq_type = 0;
656	ifq->altq_disc = NULL;
657	ifq->altq_flags &= ALTQF_CANTCHANGE;
658	ifq->altq_tbr  = NULL;
659	ifq->altq_ifp  = ifp;
660}
661
662void
663ifq_delete(struct ifaltq *ifq)
664{
665	mtx_destroy(&ifq->ifq_mtx);
666}
667
668/*
669 * Perform generic interface initialization tasks and attach the interface
670 * to the list of "active" interfaces.  If vmove flag is set on entry
671 * to if_attach_internal(), perform only a limited subset of initialization
672 * tasks, given that we are moving from one vnet to another an ifnet which
673 * has already been fully initialized.
674 *
675 * Note that if_detach_internal() removes group membership unconditionally
676 * even when vmove flag is set, and if_attach_internal() adds only IFG_ALL.
677 * Thus, when if_vmove() is applied to a cloned interface, group membership
678 * is lost while a cloned one always joins a group whose name is
679 * ifc->ifc_name.  To recover this after if_detach_internal() and
680 * if_attach_internal(), the cloner should be specified to
681 * if_attach_internal() via ifc.  If it is non-NULL, if_attach_internal()
682 * attempts to join a group whose name is ifc->ifc_name.
683 *
684 * XXX:
685 *  - The decision to return void and thus require this function to
686 *    succeed is questionable.
687 *  - We should probably do more sanity checking.  For instance we don't
688 *    do anything to insure if_xname is unique or non-empty.
689 */
690void
691if_attach(struct ifnet *ifp)
692{
693
694	if_attach_internal(ifp, 0, NULL);
695}
696
697/*
698 * Compute the least common TSO limit.
699 */
700void
701if_hw_tsomax_common(if_t ifp, struct ifnet_hw_tsomax *pmax)
702{
703	/*
704	 * 1) If there is no limit currently, take the limit from
705	 * the network adapter.
706	 *
707	 * 2) If the network adapter has a limit below the current
708	 * limit, apply it.
709	 */
710	if (pmax->tsomaxbytes == 0 || (ifp->if_hw_tsomax != 0 &&
711	    ifp->if_hw_tsomax < pmax->tsomaxbytes)) {
712		pmax->tsomaxbytes = ifp->if_hw_tsomax;
713	}
714	if (pmax->tsomaxsegcount == 0 || (ifp->if_hw_tsomaxsegcount != 0 &&
715	    ifp->if_hw_tsomaxsegcount < pmax->tsomaxsegcount)) {
716		pmax->tsomaxsegcount = ifp->if_hw_tsomaxsegcount;
717	}
718	if (pmax->tsomaxsegsize == 0 || (ifp->if_hw_tsomaxsegsize != 0 &&
719	    ifp->if_hw_tsomaxsegsize < pmax->tsomaxsegsize)) {
720		pmax->tsomaxsegsize = ifp->if_hw_tsomaxsegsize;
721	}
722}
723
724/*
725 * Update TSO limit of a network adapter.
726 *
727 * Returns zero if no change. Else non-zero.
728 */
729int
730if_hw_tsomax_update(if_t ifp, struct ifnet_hw_tsomax *pmax)
731{
732	int retval = 0;
733	if (ifp->if_hw_tsomax != pmax->tsomaxbytes) {
734		ifp->if_hw_tsomax = pmax->tsomaxbytes;
735		retval++;
736	}
737	if (ifp->if_hw_tsomaxsegsize != pmax->tsomaxsegsize) {
738		ifp->if_hw_tsomaxsegsize = pmax->tsomaxsegsize;
739		retval++;
740	}
741	if (ifp->if_hw_tsomaxsegcount != pmax->tsomaxsegcount) {
742		ifp->if_hw_tsomaxsegcount = pmax->tsomaxsegcount;
743		retval++;
744	}
745	return (retval);
746}
747
748static void
749if_attach_internal(struct ifnet *ifp, int vmove, struct if_clone *ifc)
750{
751	unsigned socksize, ifasize;
752	int namelen, masklen;
753	struct sockaddr_dl *sdl;
754	struct ifaddr *ifa;
755
756	if (ifp->if_index == 0 || ifp != ifnet_byindex(ifp->if_index))
757		panic ("%s: BUG: if_attach called without if_alloc'd input()\n",
758		    ifp->if_xname);
759
760#ifdef VIMAGE
761	ifp->if_vnet = curvnet;
762	if (ifp->if_home_vnet == NULL)
763		ifp->if_home_vnet = curvnet;
764#endif
765
766	if_addgroup(ifp, IFG_ALL);
767
768	/* Restore group membership for cloned interfaces. */
769	if (vmove && ifc != NULL)
770		if_clone_addgroup(ifp, ifc);
771
772	getmicrotime(&ifp->if_lastchange);
773	ifp->if_epoch = time_uptime;
774
775	KASSERT((ifp->if_transmit == NULL && ifp->if_qflush == NULL) ||
776	    (ifp->if_transmit != NULL && ifp->if_qflush != NULL),
777	    ("transmit and qflush must both either be set or both be NULL"));
778	if (ifp->if_transmit == NULL) {
779		ifp->if_transmit = if_transmit;
780		ifp->if_qflush = if_qflush;
781	}
782	if (ifp->if_input == NULL)
783		ifp->if_input = if_input_default;
784
785	if (ifp->if_requestencap == NULL)
786		ifp->if_requestencap = if_requestencap_default;
787
788	if (!vmove) {
789#ifdef MAC
790		mac_ifnet_create(ifp);
791#endif
792
793		/*
794		 * Create a Link Level name for this device.
795		 */
796		namelen = strlen(ifp->if_xname);
797		/*
798		 * Always save enough space for any possiable name so we
799		 * can do a rename in place later.
800		 */
801		masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + IFNAMSIZ;
802		socksize = masklen + ifp->if_addrlen;
803		if (socksize < sizeof(*sdl))
804			socksize = sizeof(*sdl);
805		socksize = roundup2(socksize, sizeof(long));
806		ifasize = sizeof(*ifa) + 2 * socksize;
807		ifa = ifa_alloc(ifasize, M_WAITOK);
808		sdl = (struct sockaddr_dl *)(ifa + 1);
809		sdl->sdl_len = socksize;
810		sdl->sdl_family = AF_LINK;
811		bcopy(ifp->if_xname, sdl->sdl_data, namelen);
812		sdl->sdl_nlen = namelen;
813		sdl->sdl_index = ifp->if_index;
814		sdl->sdl_type = ifp->if_type;
815		ifp->if_addr = ifa;
816		ifa->ifa_ifp = ifp;
817		ifa->ifa_rtrequest = link_rtrequest;
818		ifa->ifa_addr = (struct sockaddr *)sdl;
819		sdl = (struct sockaddr_dl *)(socksize + (caddr_t)sdl);
820		ifa->ifa_netmask = (struct sockaddr *)sdl;
821		sdl->sdl_len = masklen;
822		while (namelen != 0)
823			sdl->sdl_data[--namelen] = 0xff;
824		TAILQ_INSERT_HEAD(&ifp->if_addrhead, ifa, ifa_link);
825		/* Reliably crash if used uninitialized. */
826		ifp->if_broadcastaddr = NULL;
827
828		if (ifp->if_type == IFT_ETHER) {
829			ifp->if_hw_addr = malloc(ifp->if_addrlen, M_IFADDR,
830			    M_WAITOK | M_ZERO);
831		}
832
833#if defined(INET) || defined(INET6)
834		/* Use defaults for TSO, if nothing is set */
835		if (ifp->if_hw_tsomax == 0 &&
836		    ifp->if_hw_tsomaxsegcount == 0 &&
837		    ifp->if_hw_tsomaxsegsize == 0) {
838			/*
839			 * The TSO defaults needs to be such that an
840			 * NFS mbuf list of 35 mbufs totalling just
841			 * below 64K works and that a chain of mbufs
842			 * can be defragged into at most 32 segments:
843			 */
844			ifp->if_hw_tsomax = min(IP_MAXPACKET, (32 * MCLBYTES) -
845			    (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN));
846			ifp->if_hw_tsomaxsegcount = 35;
847			ifp->if_hw_tsomaxsegsize = 2048;	/* 2K */
848
849			/* XXX some drivers set IFCAP_TSO after ethernet attach */
850			if (ifp->if_capabilities & IFCAP_TSO) {
851				if_printf(ifp, "Using defaults for TSO: %u/%u/%u\n",
852				    ifp->if_hw_tsomax,
853				    ifp->if_hw_tsomaxsegcount,
854				    ifp->if_hw_tsomaxsegsize);
855			}
856		}
857#endif
858	}
859#ifdef VIMAGE
860	else {
861		/*
862		 * Update the interface index in the link layer address
863		 * of the interface.
864		 */
865		for (ifa = ifp->if_addr; ifa != NULL;
866		    ifa = TAILQ_NEXT(ifa, ifa_link)) {
867			if (ifa->ifa_addr->sa_family == AF_LINK) {
868				sdl = (struct sockaddr_dl *)ifa->ifa_addr;
869				sdl->sdl_index = ifp->if_index;
870			}
871		}
872	}
873#endif
874
875	IFNET_WLOCK();
876	TAILQ_INSERT_TAIL(&V_ifnet, ifp, if_link);
877#ifdef VIMAGE
878	curvnet->vnet_ifcnt++;
879#endif
880	IFNET_WUNLOCK();
881
882	if (domain_init_status >= 2)
883		if_attachdomain1(ifp);
884
885	EVENTHANDLER_INVOKE(ifnet_arrival_event, ifp);
886	if (IS_DEFAULT_VNET(curvnet))
887		devctl_notify("IFNET", ifp->if_xname, "ATTACH", NULL);
888
889	/* Announce the interface. */
890	rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
891}
892
893static void
894if_attachdomain(void *dummy)
895{
896	struct ifnet *ifp;
897
898	TAILQ_FOREACH(ifp, &V_ifnet, if_link)
899		if_attachdomain1(ifp);
900}
901SYSINIT(domainifattach, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_SECOND,
902    if_attachdomain, NULL);
903
904static void
905if_attachdomain1(struct ifnet *ifp)
906{
907	struct domain *dp;
908
909	/*
910	 * Since dp->dom_ifattach calls malloc() with M_WAITOK, we
911	 * cannot lock ifp->if_afdata initialization, entirely.
912	 */
913	IF_AFDATA_LOCK(ifp);
914	if (ifp->if_afdata_initialized >= domain_init_status) {
915		IF_AFDATA_UNLOCK(ifp);
916		log(LOG_WARNING, "%s called more than once on %s\n",
917		    __func__, ifp->if_xname);
918		return;
919	}
920	ifp->if_afdata_initialized = domain_init_status;
921	IF_AFDATA_UNLOCK(ifp);
922
923	/* address family dependent data region */
924	bzero(ifp->if_afdata, sizeof(ifp->if_afdata));
925	for (dp = domains; dp; dp = dp->dom_next) {
926		if (dp->dom_ifattach)
927			ifp->if_afdata[dp->dom_family] =
928			    (*dp->dom_ifattach)(ifp);
929	}
930}
931
932/*
933 * Remove any unicast or broadcast network addresses from an interface.
934 */
935void
936if_purgeaddrs(struct ifnet *ifp)
937{
938	struct ifaddr *ifa, *next;
939
940	/* XXX cannot hold IF_ADDR_WLOCK over called functions. */
941	TAILQ_FOREACH_SAFE(ifa, &ifp->if_addrhead, ifa_link, next) {
942		if (ifa->ifa_addr->sa_family == AF_LINK)
943			continue;
944#ifdef INET
945		/* XXX: Ugly!! ad hoc just for INET */
946		if (ifa->ifa_addr->sa_family == AF_INET) {
947			struct ifaliasreq ifr;
948
949			bzero(&ifr, sizeof(ifr));
950			ifr.ifra_addr = *ifa->ifa_addr;
951			if (ifa->ifa_dstaddr)
952				ifr.ifra_broadaddr = *ifa->ifa_dstaddr;
953			if (in_control(NULL, SIOCDIFADDR, (caddr_t)&ifr, ifp,
954			    NULL) == 0)
955				continue;
956		}
957#endif /* INET */
958#ifdef INET6
959		if (ifa->ifa_addr->sa_family == AF_INET6) {
960			in6_purgeaddr(ifa);
961			/* ifp_addrhead is already updated */
962			continue;
963		}
964#endif /* INET6 */
965		IF_ADDR_WLOCK(ifp);
966		TAILQ_REMOVE(&ifp->if_addrhead, ifa, ifa_link);
967		IF_ADDR_WUNLOCK(ifp);
968		ifa_free(ifa);
969	}
970}
971
972/*
973 * Remove any multicast network addresses from an interface when an ifnet
974 * is going away.
975 */
976static void
977if_purgemaddrs(struct ifnet *ifp)
978{
979	struct ifmultiaddr *ifma;
980	struct ifmultiaddr *next;
981
982	IF_ADDR_WLOCK(ifp);
983	TAILQ_FOREACH_SAFE(ifma, &ifp->if_multiaddrs, ifma_link, next)
984		if_delmulti_locked(ifp, ifma, 1);
985	IF_ADDR_WUNLOCK(ifp);
986}
987
988/*
989 * Detach an interface, removing it from the list of "active" interfaces.
990 * If vmove flag is set on entry to if_detach_internal(), perform only a
991 * limited subset of cleanup tasks, given that we are moving an ifnet from
992 * one vnet to another, where it must be fully operational.
993 *
994 * XXXRW: There are some significant questions about event ordering, and
995 * how to prevent things from starting to use the interface during detach.
996 */
997void
998if_detach(struct ifnet *ifp)
999{
1000
1001	CURVNET_SET_QUIET(ifp->if_vnet);
1002	if_detach_internal(ifp, 0, NULL);
1003	CURVNET_RESTORE();
1004}
1005
1006/*
1007 * The vmove flag, if set, indicates that we are called from a callpath
1008 * that is moving an interface to a different vnet instance.
1009 *
1010 * The shutdown flag, if set, indicates that we are called in the
1011 * process of shutting down a vnet instance.  Currently only the
1012 * vnet_if_return SYSUNINIT function sets it.  Note: we can be called
1013 * on a vnet instance shutdown without this flag being set, e.g., when
1014 * the cloned interfaces are destoyed as first thing of teardown.
1015 */
1016static int
1017if_detach_internal(struct ifnet *ifp, int vmove, struct if_clone **ifcp)
1018{
1019	struct ifaddr *ifa;
1020	int i;
1021	struct domain *dp;
1022 	struct ifnet *iter;
1023 	int found = 0;
1024#ifdef VIMAGE
1025	int shutdown;
1026
1027	shutdown = (ifp->if_vnet->vnet_state > SI_SUB_VNET &&
1028		 ifp->if_vnet->vnet_state < SI_SUB_VNET_DONE) ? 1 : 0;
1029#endif
1030	IFNET_WLOCK();
1031	TAILQ_FOREACH(iter, &V_ifnet, if_link)
1032		if (iter == ifp) {
1033			TAILQ_REMOVE(&V_ifnet, ifp, if_link);
1034			if (!vmove)
1035				ifp->if_flags |= IFF_DYING;
1036			found = 1;
1037			break;
1038		}
1039	IFNET_WUNLOCK();
1040	if (!found) {
1041		/*
1042		 * While we would want to panic here, we cannot
1043		 * guarantee that the interface is indeed still on
1044		 * the list given we don't hold locks all the way.
1045		 */
1046		return (ENOENT);
1047#if 0
1048		if (vmove)
1049			panic("%s: ifp=%p not on the ifnet tailq %p",
1050			    __func__, ifp, &V_ifnet);
1051		else
1052			return; /* XXX this should panic as well? */
1053#endif
1054	}
1055
1056	/*
1057	 * At this point we know the interface still was on the ifnet list
1058	 * and we removed it so we are in a stable state.
1059	 */
1060#ifdef VIMAGE
1061	curvnet->vnet_ifcnt--;
1062#endif
1063
1064	/*
1065	 * In any case (destroy or vmove) detach us from the groups
1066	 * and remove/wait for pending events on the taskq.
1067	 * XXX-BZ in theory an interface could still enqueue a taskq change?
1068	 */
1069	if_delgroups(ifp);
1070
1071	taskqueue_drain(taskqueue_swi, &ifp->if_linktask);
1072
1073	/*
1074	 * Check if this is a cloned interface or not. Must do even if
1075	 * shutting down as a if_vmove_reclaim() would move the ifp and
1076	 * the if_clone_addgroup() will have a corrupted string overwise
1077	 * from a gibberish pointer.
1078	 */
1079	if (vmove && ifcp != NULL)
1080		*ifcp = if_clone_findifc(ifp);
1081
1082	if_down(ifp);
1083
1084#ifdef VIMAGE
1085	/*
1086	 * On VNET shutdown abort here as the stack teardown will do all
1087	 * the work top-down for us.
1088	 */
1089	if (shutdown) {
1090		/* Give interface users the chance to clean up. */
1091		EVENTHANDLER_INVOKE(ifnet_departure_event, ifp);
1092
1093		/*
1094		 * In case of a vmove we are done here without error.
1095		 * If we would signal an error it would lead to the same
1096		 * abort as if we did not find the ifnet anymore.
1097		 * if_detach() calls us in void context and does not care
1098		 * about an early abort notification, so life is splendid :)
1099		 */
1100		goto finish_vnet_shutdown;
1101	}
1102#endif
1103
1104	/*
1105	 * At this point we are not tearing down a VNET and are either
1106	 * going to destroy or vmove the interface and have to cleanup
1107	 * accordingly.
1108	 */
1109
1110	/*
1111	 * Remove routes and flush queues.
1112	 */
1113#ifdef ALTQ
1114	if (ALTQ_IS_ENABLED(&ifp->if_snd))
1115		altq_disable(&ifp->if_snd);
1116	if (ALTQ_IS_ATTACHED(&ifp->if_snd))
1117		altq_detach(&ifp->if_snd);
1118#endif
1119
1120	if_purgeaddrs(ifp);
1121
1122#ifdef INET
1123	in_ifdetach(ifp);
1124#endif
1125
1126#ifdef INET6
1127	/*
1128	 * Remove all IPv6 kernel structs related to ifp.  This should be done
1129	 * before removing routing entries below, since IPv6 interface direct
1130	 * routes are expected to be removed by the IPv6-specific kernel API.
1131	 * Otherwise, the kernel will detect some inconsistency and bark it.
1132	 */
1133	in6_ifdetach(ifp);
1134#endif
1135	if_purgemaddrs(ifp);
1136
1137	/* Announce that the interface is gone. */
1138	rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
1139	EVENTHANDLER_INVOKE(ifnet_departure_event, ifp);
1140	if (IS_DEFAULT_VNET(curvnet))
1141		devctl_notify("IFNET", ifp->if_xname, "DETACH", NULL);
1142
1143	if (!vmove) {
1144		/*
1145		 * Prevent further calls into the device driver via ifnet.
1146		 */
1147		if_dead(ifp);
1148
1149		/*
1150		 * Remove link ifaddr pointer and maybe decrement if_index.
1151		 * Clean up all addresses.
1152		 */
1153		free(ifp->if_hw_addr, M_IFADDR);
1154		ifp->if_hw_addr = NULL;
1155		ifp->if_addr = NULL;
1156
1157		/* We can now free link ifaddr. */
1158		IF_ADDR_WLOCK(ifp);
1159		if (!TAILQ_EMPTY(&ifp->if_addrhead)) {
1160			ifa = TAILQ_FIRST(&ifp->if_addrhead);
1161			TAILQ_REMOVE(&ifp->if_addrhead, ifa, ifa_link);
1162			IF_ADDR_WUNLOCK(ifp);
1163			ifa_free(ifa);
1164		} else
1165			IF_ADDR_WUNLOCK(ifp);
1166	}
1167
1168	rt_flushifroutes(ifp);
1169
1170#ifdef VIMAGE
1171finish_vnet_shutdown:
1172#endif
1173	/*
1174	 * We cannot hold the lock over dom_ifdetach calls as they might
1175	 * sleep, for example trying to drain a callout, thus open up the
1176	 * theoretical race with re-attaching.
1177	 */
1178	IF_AFDATA_LOCK(ifp);
1179	i = ifp->if_afdata_initialized;
1180	ifp->if_afdata_initialized = 0;
1181	IF_AFDATA_UNLOCK(ifp);
1182	for (dp = domains; i > 0 && dp; dp = dp->dom_next) {
1183		if (dp->dom_ifdetach && ifp->if_afdata[dp->dom_family]) {
1184			(*dp->dom_ifdetach)(ifp,
1185			    ifp->if_afdata[dp->dom_family]);
1186			ifp->if_afdata[dp->dom_family] = NULL;
1187		}
1188	}
1189
1190	return (0);
1191}
1192
1193#ifdef VIMAGE
1194/*
1195 * if_vmove() performs a limited version of if_detach() in current
1196 * vnet and if_attach()es the ifnet to the vnet specified as 2nd arg.
1197 * An attempt is made to shrink if_index in current vnet, find an
1198 * unused if_index in target vnet and calls if_grow() if necessary,
1199 * and finally find an unused if_xname for the target vnet.
1200 */
1201static void
1202if_vmove(struct ifnet *ifp, struct vnet *new_vnet)
1203{
1204	struct if_clone *ifc;
1205#ifdef DEV_BPF
1206	u_int bif_dlt, bif_hdrlen;
1207#endif
1208	int rc;
1209
1210#ifdef DEV_BPF
1211 	/*
1212	 * if_detach_internal() will call the eventhandler to notify
1213	 * interface departure.  That will detach if_bpf.  We need to
1214	 * safe the dlt and hdrlen so we can re-attach it later.
1215	 */
1216	bpf_get_bp_params(ifp->if_bpf, &bif_dlt, &bif_hdrlen);
1217#endif
1218
1219	/*
1220	 * Detach from current vnet, but preserve LLADDR info, do not
1221	 * mark as dead etc. so that the ifnet can be reattached later.
1222	 * If we cannot find it, we lost the race to someone else.
1223	 */
1224	rc = if_detach_internal(ifp, 1, &ifc);
1225	if (rc != 0)
1226		return;
1227
1228	/*
1229	 * Unlink the ifnet from ifindex_table[] in current vnet, and shrink
1230	 * the if_index for that vnet if possible.
1231	 *
1232	 * NOTE: IFNET_WLOCK/IFNET_WUNLOCK() are assumed to be unvirtualized,
1233	 * or we'd lock on one vnet and unlock on another.
1234	 */
1235	IFNET_WLOCK();
1236	ifindex_free_locked(ifp->if_index);
1237	IFNET_WUNLOCK();
1238
1239	/*
1240	 * Perform interface-specific reassignment tasks, if provided by
1241	 * the driver.
1242	 */
1243	if (ifp->if_reassign != NULL)
1244		ifp->if_reassign(ifp, new_vnet, NULL);
1245
1246	/*
1247	 * Switch to the context of the target vnet.
1248	 */
1249	CURVNET_SET_QUIET(new_vnet);
1250
1251	IFNET_WLOCK();
1252	ifp->if_index = ifindex_alloc();
1253	ifnet_setbyindex_locked(ifp->if_index, ifp);
1254	IFNET_WUNLOCK();
1255
1256	if_attach_internal(ifp, 1, ifc);
1257
1258#ifdef DEV_BPF
1259	if (ifp->if_bpf == NULL)
1260		bpfattach(ifp, bif_dlt, bif_hdrlen);
1261#endif
1262
1263	CURVNET_RESTORE();
1264}
1265
1266/*
1267 * Move an ifnet to or from another child prison/vnet, specified by the jail id.
1268 */
1269static int
1270if_vmove_loan(struct thread *td, struct ifnet *ifp, char *ifname, int jid)
1271{
1272	struct prison *pr;
1273	struct ifnet *difp;
1274	int shutdown;
1275
1276	/* Try to find the prison within our visibility. */
1277	sx_slock(&allprison_lock);
1278	pr = prison_find_child(td->td_ucred->cr_prison, jid);
1279	sx_sunlock(&allprison_lock);
1280	if (pr == NULL)
1281		return (ENXIO);
1282	prison_hold_locked(pr);
1283	mtx_unlock(&pr->pr_mtx);
1284
1285	/* Do not try to move the iface from and to the same prison. */
1286	if (pr->pr_vnet == ifp->if_vnet) {
1287		prison_free(pr);
1288		return (EEXIST);
1289	}
1290
1291	/* Make sure the named iface does not exists in the dst. prison/vnet. */
1292	/* XXX Lock interfaces to avoid races. */
1293	CURVNET_SET_QUIET(pr->pr_vnet);
1294	difp = ifunit(ifname);
1295	if (difp != NULL) {
1296		CURVNET_RESTORE();
1297		prison_free(pr);
1298		return (EEXIST);
1299	}
1300
1301	/* Make sure the VNET is stable. */
1302	shutdown = (ifp->if_vnet->vnet_state > SI_SUB_VNET &&
1303		 ifp->if_vnet->vnet_state < SI_SUB_VNET_DONE) ? 1 : 0;
1304	if (shutdown) {
1305		CURVNET_RESTORE();
1306		prison_free(pr);
1307		return (EBUSY);
1308	}
1309	CURVNET_RESTORE();
1310
1311	/* Move the interface into the child jail/vnet. */
1312	if_vmove(ifp, pr->pr_vnet);
1313
1314	/* Report the new if_xname back to the userland. */
1315	sprintf(ifname, "%s", ifp->if_xname);
1316
1317	prison_free(pr);
1318	return (0);
1319}
1320
1321static int
1322if_vmove_reclaim(struct thread *td, char *ifname, int jid)
1323{
1324	struct prison *pr;
1325	struct vnet *vnet_dst;
1326	struct ifnet *ifp;
1327 	int shutdown;
1328
1329	/* Try to find the prison within our visibility. */
1330	sx_slock(&allprison_lock);
1331	pr = prison_find_child(td->td_ucred->cr_prison, jid);
1332	sx_sunlock(&allprison_lock);
1333	if (pr == NULL)
1334		return (ENXIO);
1335	prison_hold_locked(pr);
1336	mtx_unlock(&pr->pr_mtx);
1337
1338	/* Make sure the named iface exists in the source prison/vnet. */
1339	CURVNET_SET(pr->pr_vnet);
1340	ifp = ifunit(ifname);		/* XXX Lock to avoid races. */
1341	if (ifp == NULL) {
1342		CURVNET_RESTORE();
1343		prison_free(pr);
1344		return (ENXIO);
1345	}
1346
1347	/* Do not try to move the iface from and to the same prison. */
1348	vnet_dst = TD_TO_VNET(td);
1349	if (vnet_dst == ifp->if_vnet) {
1350		CURVNET_RESTORE();
1351		prison_free(pr);
1352		return (EEXIST);
1353	}
1354
1355	/* Make sure the VNET is stable. */
1356	shutdown = (ifp->if_vnet->vnet_state > SI_SUB_VNET &&
1357		 ifp->if_vnet->vnet_state < SI_SUB_VNET_DONE) ? 1 : 0;
1358	if (shutdown) {
1359		CURVNET_RESTORE();
1360		prison_free(pr);
1361		return (EBUSY);
1362	}
1363
1364	/* Get interface back from child jail/vnet. */
1365	if_vmove(ifp, vnet_dst);
1366	CURVNET_RESTORE();
1367
1368	/* Report the new if_xname back to the userland. */
1369	sprintf(ifname, "%s", ifp->if_xname);
1370
1371	prison_free(pr);
1372	return (0);
1373}
1374#endif /* VIMAGE */
1375
1376/*
1377 * Add a group to an interface
1378 */
1379int
1380if_addgroup(struct ifnet *ifp, const char *groupname)
1381{
1382	struct ifg_list		*ifgl;
1383	struct ifg_group	*ifg = NULL;
1384	struct ifg_member	*ifgm;
1385	int 			 new = 0;
1386
1387	if (groupname[0] && groupname[strlen(groupname) - 1] >= '0' &&
1388	    groupname[strlen(groupname) - 1] <= '9')
1389		return (EINVAL);
1390
1391	IFNET_WLOCK();
1392	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
1393		if (!strcmp(ifgl->ifgl_group->ifg_group, groupname)) {
1394			IFNET_WUNLOCK();
1395			return (EEXIST);
1396		}
1397
1398	if ((ifgl = (struct ifg_list *)malloc(sizeof(struct ifg_list), M_TEMP,
1399	    M_NOWAIT)) == NULL) {
1400	    	IFNET_WUNLOCK();
1401		return (ENOMEM);
1402	}
1403
1404	if ((ifgm = (struct ifg_member *)malloc(sizeof(struct ifg_member),
1405	    M_TEMP, M_NOWAIT)) == NULL) {
1406		free(ifgl, M_TEMP);
1407		IFNET_WUNLOCK();
1408		return (ENOMEM);
1409	}
1410
1411	TAILQ_FOREACH(ifg, &V_ifg_head, ifg_next)
1412		if (!strcmp(ifg->ifg_group, groupname))
1413			break;
1414
1415	if (ifg == NULL) {
1416		if ((ifg = (struct ifg_group *)malloc(sizeof(struct ifg_group),
1417		    M_TEMP, M_NOWAIT)) == NULL) {
1418			free(ifgl, M_TEMP);
1419			free(ifgm, M_TEMP);
1420			IFNET_WUNLOCK();
1421			return (ENOMEM);
1422		}
1423		strlcpy(ifg->ifg_group, groupname, sizeof(ifg->ifg_group));
1424		ifg->ifg_refcnt = 0;
1425		TAILQ_INIT(&ifg->ifg_members);
1426		TAILQ_INSERT_TAIL(&V_ifg_head, ifg, ifg_next);
1427		new = 1;
1428	}
1429
1430	ifg->ifg_refcnt++;
1431	ifgl->ifgl_group = ifg;
1432	ifgm->ifgm_ifp = ifp;
1433
1434	IF_ADDR_WLOCK(ifp);
1435	TAILQ_INSERT_TAIL(&ifg->ifg_members, ifgm, ifgm_next);
1436	TAILQ_INSERT_TAIL(&ifp->if_groups, ifgl, ifgl_next);
1437	IF_ADDR_WUNLOCK(ifp);
1438
1439	IFNET_WUNLOCK();
1440
1441	if (new)
1442		EVENTHANDLER_INVOKE(group_attach_event, ifg);
1443	EVENTHANDLER_INVOKE(group_change_event, groupname);
1444
1445	return (0);
1446}
1447
1448/*
1449 * Remove a group from an interface
1450 */
1451int
1452if_delgroup(struct ifnet *ifp, const char *groupname)
1453{
1454	struct ifg_list		*ifgl;
1455	struct ifg_member	*ifgm;
1456
1457	IFNET_WLOCK();
1458	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
1459		if (!strcmp(ifgl->ifgl_group->ifg_group, groupname))
1460			break;
1461	if (ifgl == NULL) {
1462		IFNET_WUNLOCK();
1463		return (ENOENT);
1464	}
1465
1466	IF_ADDR_WLOCK(ifp);
1467	TAILQ_REMOVE(&ifp->if_groups, ifgl, ifgl_next);
1468	IF_ADDR_WUNLOCK(ifp);
1469
1470	TAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next)
1471		if (ifgm->ifgm_ifp == ifp)
1472			break;
1473
1474	if (ifgm != NULL) {
1475		TAILQ_REMOVE(&ifgl->ifgl_group->ifg_members, ifgm, ifgm_next);
1476		free(ifgm, M_TEMP);
1477	}
1478
1479	if (--ifgl->ifgl_group->ifg_refcnt == 0) {
1480		TAILQ_REMOVE(&V_ifg_head, ifgl->ifgl_group, ifg_next);
1481		IFNET_WUNLOCK();
1482		EVENTHANDLER_INVOKE(group_detach_event, ifgl->ifgl_group);
1483		free(ifgl->ifgl_group, M_TEMP);
1484	} else
1485		IFNET_WUNLOCK();
1486
1487	free(ifgl, M_TEMP);
1488
1489	EVENTHANDLER_INVOKE(group_change_event, groupname);
1490
1491	return (0);
1492}
1493
1494/*
1495 * Remove an interface from all groups
1496 */
1497static void
1498if_delgroups(struct ifnet *ifp)
1499{
1500	struct ifg_list		*ifgl;
1501	struct ifg_member	*ifgm;
1502	char groupname[IFNAMSIZ];
1503
1504	IFNET_WLOCK();
1505	while (!TAILQ_EMPTY(&ifp->if_groups)) {
1506		ifgl = TAILQ_FIRST(&ifp->if_groups);
1507
1508		strlcpy(groupname, ifgl->ifgl_group->ifg_group, IFNAMSIZ);
1509
1510		IF_ADDR_WLOCK(ifp);
1511		TAILQ_REMOVE(&ifp->if_groups, ifgl, ifgl_next);
1512		IF_ADDR_WUNLOCK(ifp);
1513
1514		TAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next)
1515			if (ifgm->ifgm_ifp == ifp)
1516				break;
1517
1518		if (ifgm != NULL) {
1519			TAILQ_REMOVE(&ifgl->ifgl_group->ifg_members, ifgm,
1520			    ifgm_next);
1521			free(ifgm, M_TEMP);
1522		}
1523
1524		if (--ifgl->ifgl_group->ifg_refcnt == 0) {
1525			TAILQ_REMOVE(&V_ifg_head, ifgl->ifgl_group, ifg_next);
1526			IFNET_WUNLOCK();
1527			EVENTHANDLER_INVOKE(group_detach_event,
1528			    ifgl->ifgl_group);
1529			free(ifgl->ifgl_group, M_TEMP);
1530		} else
1531			IFNET_WUNLOCK();
1532
1533		free(ifgl, M_TEMP);
1534
1535		EVENTHANDLER_INVOKE(group_change_event, groupname);
1536
1537		IFNET_WLOCK();
1538	}
1539	IFNET_WUNLOCK();
1540}
1541
1542static char *
1543ifgr_group_get(void *ifgrp)
1544{
1545	union ifgroupreq_union *ifgrup;
1546
1547	ifgrup = ifgrp;
1548#ifdef COMPAT_FREEBSD32
1549	if (SV_CURPROC_FLAG(SV_ILP32))
1550		return (&ifgrup->ifgr32.ifgr_ifgru.ifgru_group[0]);
1551#endif
1552	return (&ifgrup->ifgr.ifgr_ifgru.ifgru_group[0]);
1553}
1554
1555static struct ifg_req *
1556ifgr_groups_get(void *ifgrp)
1557{
1558	union ifgroupreq_union *ifgrup;
1559
1560	ifgrup = ifgrp;
1561#ifdef COMPAT_FREEBSD32
1562	if (SV_CURPROC_FLAG(SV_ILP32))
1563		return ((struct ifg_req *)(uintptr_t)
1564		    ifgrup->ifgr32.ifgr_ifgru.ifgru_groups);
1565#endif
1566	return (ifgrup->ifgr.ifgr_ifgru.ifgru_groups);
1567}
1568
1569/*
1570 * Stores all groups from an interface in memory pointed to by ifgr.
1571 */
1572static int
1573if_getgroup(struct ifgroupreq *ifgr, struct ifnet *ifp)
1574{
1575	int			 len, error;
1576	struct ifg_list		*ifgl;
1577	struct ifg_req		 ifgrq, *ifgp;
1578
1579	if (ifgr->ifgr_len == 0) {
1580		IF_ADDR_RLOCK(ifp);
1581		TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
1582			ifgr->ifgr_len += sizeof(struct ifg_req);
1583		IF_ADDR_RUNLOCK(ifp);
1584		return (0);
1585	}
1586
1587	len = ifgr->ifgr_len;
1588	ifgp = ifgr_groups_get(ifgr);
1589	/* XXX: wire */
1590	IF_ADDR_RLOCK(ifp);
1591	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) {
1592		if (len < sizeof(ifgrq)) {
1593			IF_ADDR_RUNLOCK(ifp);
1594			return (EINVAL);
1595		}
1596		bzero(&ifgrq, sizeof ifgrq);
1597		strlcpy(ifgrq.ifgrq_group, ifgl->ifgl_group->ifg_group,
1598		    sizeof(ifgrq.ifgrq_group));
1599		if ((error = copyout(&ifgrq, ifgp, sizeof(struct ifg_req)))) {
1600		    	IF_ADDR_RUNLOCK(ifp);
1601			return (error);
1602		}
1603		len -= sizeof(ifgrq);
1604		ifgp++;
1605	}
1606	IF_ADDR_RUNLOCK(ifp);
1607
1608	return (0);
1609}
1610
1611/*
1612 * Stores all members of a group in memory pointed to by igfr
1613 */
1614static int
1615if_getgroupmembers(struct ifgroupreq *ifgr)
1616{
1617	struct ifg_group	*ifg;
1618	struct ifg_member	*ifgm;
1619	struct ifg_req		 ifgrq, *ifgp;
1620	int			 len, error;
1621
1622	IFNET_RLOCK();
1623	TAILQ_FOREACH(ifg, &V_ifg_head, ifg_next)
1624		if (!strcmp(ifg->ifg_group, ifgr->ifgr_name))
1625			break;
1626	if (ifg == NULL) {
1627		IFNET_RUNLOCK();
1628		return (ENOENT);
1629	}
1630
1631	if (ifgr->ifgr_len == 0) {
1632		TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next)
1633			ifgr->ifgr_len += sizeof(ifgrq);
1634		IFNET_RUNLOCK();
1635		return (0);
1636	}
1637
1638	len = ifgr->ifgr_len;
1639	ifgp = ifgr_groups_get(ifgr);
1640	TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) {
1641		if (len < sizeof(ifgrq)) {
1642			IFNET_RUNLOCK();
1643			return (EINVAL);
1644		}
1645		bzero(&ifgrq, sizeof ifgrq);
1646		strlcpy(ifgrq.ifgrq_member, ifgm->ifgm_ifp->if_xname,
1647		    sizeof(ifgrq.ifgrq_member));
1648		if ((error = copyout(&ifgrq, ifgp, sizeof(struct ifg_req)))) {
1649			IFNET_RUNLOCK();
1650			return (error);
1651		}
1652		len -= sizeof(ifgrq);
1653		ifgp++;
1654	}
1655	IFNET_RUNLOCK();
1656
1657	return (0);
1658}
1659
1660/*
1661 * Return counter values from counter(9)s stored in ifnet.
1662 */
1663uint64_t
1664if_get_counter_default(struct ifnet *ifp, ift_counter cnt)
1665{
1666
1667	KASSERT(cnt < IFCOUNTERS, ("%s: invalid cnt %d", __func__, cnt));
1668
1669	return (counter_u64_fetch(ifp->if_counters[cnt]));
1670}
1671
1672/*
1673 * Increase an ifnet counter. Usually used for counters shared
1674 * between the stack and a driver, but function supports them all.
1675 */
1676void
1677if_inc_counter(struct ifnet *ifp, ift_counter cnt, int64_t inc)
1678{
1679
1680	KASSERT(cnt < IFCOUNTERS, ("%s: invalid cnt %d", __func__, cnt));
1681
1682	counter_u64_add(ifp->if_counters[cnt], inc);
1683}
1684
1685/*
1686 * Copy data from ifnet to userland API structure if_data.
1687 */
1688void
1689if_data_copy(struct ifnet *ifp, struct if_data *ifd)
1690{
1691
1692	ifd->ifi_type = ifp->if_type;
1693	ifd->ifi_physical = 0;
1694	ifd->ifi_addrlen = ifp->if_addrlen;
1695	ifd->ifi_hdrlen = ifp->if_hdrlen;
1696	ifd->ifi_link_state = ifp->if_link_state;
1697	ifd->ifi_vhid = 0;
1698	ifd->ifi_datalen = sizeof(struct if_data);
1699	ifd->ifi_mtu = ifp->if_mtu;
1700	ifd->ifi_metric = ifp->if_metric;
1701	ifd->ifi_baudrate = ifp->if_baudrate;
1702	ifd->ifi_hwassist = ifp->if_hwassist;
1703	ifd->ifi_epoch = ifp->if_epoch;
1704	ifd->ifi_lastchange = ifp->if_lastchange;
1705
1706	ifd->ifi_ipackets = ifp->if_get_counter(ifp, IFCOUNTER_IPACKETS);
1707	ifd->ifi_ierrors = ifp->if_get_counter(ifp, IFCOUNTER_IERRORS);
1708	ifd->ifi_opackets = ifp->if_get_counter(ifp, IFCOUNTER_OPACKETS);
1709	ifd->ifi_oerrors = ifp->if_get_counter(ifp, IFCOUNTER_OERRORS);
1710	ifd->ifi_collisions = ifp->if_get_counter(ifp, IFCOUNTER_COLLISIONS);
1711	ifd->ifi_ibytes = ifp->if_get_counter(ifp, IFCOUNTER_IBYTES);
1712	ifd->ifi_obytes = ifp->if_get_counter(ifp, IFCOUNTER_OBYTES);
1713	ifd->ifi_imcasts = ifp->if_get_counter(ifp, IFCOUNTER_IMCASTS);
1714	ifd->ifi_omcasts = ifp->if_get_counter(ifp, IFCOUNTER_OMCASTS);
1715	ifd->ifi_iqdrops = ifp->if_get_counter(ifp, IFCOUNTER_IQDROPS);
1716	ifd->ifi_oqdrops = ifp->if_get_counter(ifp, IFCOUNTER_OQDROPS);
1717	ifd->ifi_noproto = ifp->if_get_counter(ifp, IFCOUNTER_NOPROTO);
1718}
1719
1720/*
1721 * Wrapper functions for struct ifnet address list locking macros.  These are
1722 * used by kernel modules to avoid encoding programming interface or binary
1723 * interface assumptions that may be violated when kernel-internal locking
1724 * approaches change.
1725 */
1726void
1727if_addr_rlock(struct ifnet *ifp)
1728{
1729
1730	IF_ADDR_RLOCK(ifp);
1731}
1732
1733void
1734if_addr_runlock(struct ifnet *ifp)
1735{
1736
1737	IF_ADDR_RUNLOCK(ifp);
1738}
1739
1740void
1741if_maddr_rlock(if_t ifp)
1742{
1743
1744	IF_ADDR_RLOCK((struct ifnet *)ifp);
1745}
1746
1747void
1748if_maddr_runlock(if_t ifp)
1749{
1750
1751	IF_ADDR_RUNLOCK((struct ifnet *)ifp);
1752}
1753
1754/*
1755 * Initialization, destruction and refcounting functions for ifaddrs.
1756 */
1757struct ifaddr *
1758ifa_alloc(size_t size, int flags)
1759{
1760	struct ifaddr *ifa;
1761
1762	KASSERT(size >= sizeof(struct ifaddr),
1763	    ("%s: invalid size %zu", __func__, size));
1764
1765	ifa = malloc(size, M_IFADDR, M_ZERO | flags);
1766	if (ifa == NULL)
1767		return (NULL);
1768
1769	if ((ifa->ifa_opackets = counter_u64_alloc(flags)) == NULL)
1770		goto fail;
1771	if ((ifa->ifa_ipackets = counter_u64_alloc(flags)) == NULL)
1772		goto fail;
1773	if ((ifa->ifa_obytes = counter_u64_alloc(flags)) == NULL)
1774		goto fail;
1775	if ((ifa->ifa_ibytes = counter_u64_alloc(flags)) == NULL)
1776		goto fail;
1777
1778	refcount_init(&ifa->ifa_refcnt, 1);
1779
1780	return (ifa);
1781
1782fail:
1783	/* free(NULL) is okay */
1784	counter_u64_free(ifa->ifa_opackets);
1785	counter_u64_free(ifa->ifa_ipackets);
1786	counter_u64_free(ifa->ifa_obytes);
1787	counter_u64_free(ifa->ifa_ibytes);
1788	free(ifa, M_IFADDR);
1789
1790	return (NULL);
1791}
1792
1793void
1794ifa_ref(struct ifaddr *ifa)
1795{
1796
1797	refcount_acquire(&ifa->ifa_refcnt);
1798}
1799
1800void
1801ifa_free(struct ifaddr *ifa)
1802{
1803
1804	if (refcount_release(&ifa->ifa_refcnt)) {
1805		counter_u64_free(ifa->ifa_opackets);
1806		counter_u64_free(ifa->ifa_ipackets);
1807		counter_u64_free(ifa->ifa_obytes);
1808		counter_u64_free(ifa->ifa_ibytes);
1809		free(ifa, M_IFADDR);
1810	}
1811}
1812
1813static int
1814ifa_maintain_loopback_route(int cmd, const char *otype, struct ifaddr *ifa,
1815    struct sockaddr *ia)
1816{
1817	int error;
1818	struct rt_addrinfo info;
1819	struct sockaddr_dl null_sdl;
1820	struct ifnet *ifp;
1821
1822	ifp = ifa->ifa_ifp;
1823
1824	bzero(&info, sizeof(info));
1825	if (cmd != RTM_DELETE)
1826		info.rti_ifp = V_loif;
1827	info.rti_flags = ifa->ifa_flags | RTF_HOST | RTF_STATIC | RTF_PINNED;
1828	info.rti_info[RTAX_DST] = ia;
1829	info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&null_sdl;
1830	link_init_sdl(ifp, (struct sockaddr *)&null_sdl, ifp->if_type);
1831
1832	error = rtrequest1_fib(cmd, &info, NULL, ifp->if_fib);
1833
1834	if (error == 0 ||
1835	    (cmd == RTM_ADD && error == EEXIST) ||
1836	    (cmd == RTM_DELETE && (error == ENOENT || error == ESRCH)))
1837		return (error);
1838
1839	log(LOG_DEBUG, "%s: %s failed for interface %s: %u\n",
1840		__func__, otype, if_name(ifp), error);
1841
1842	return (error);
1843}
1844
1845int
1846ifa_add_loopback_route(struct ifaddr *ifa, struct sockaddr *ia)
1847{
1848
1849	return (ifa_maintain_loopback_route(RTM_ADD, "insertion", ifa, ia));
1850}
1851
1852int
1853ifa_del_loopback_route(struct ifaddr *ifa, struct sockaddr *ia)
1854{
1855
1856	return (ifa_maintain_loopback_route(RTM_DELETE, "deletion", ifa, ia));
1857}
1858
1859int
1860ifa_switch_loopback_route(struct ifaddr *ifa, struct sockaddr *ia)
1861{
1862
1863	return (ifa_maintain_loopback_route(RTM_CHANGE, "switch", ifa, ia));
1864}
1865
1866/*
1867 * XXX: Because sockaddr_dl has deeper structure than the sockaddr
1868 * structs used to represent other address families, it is necessary
1869 * to perform a different comparison.
1870 */
1871
1872#define	sa_dl_equal(a1, a2)	\
1873	((((const struct sockaddr_dl *)(a1))->sdl_len ==		\
1874	 ((const struct sockaddr_dl *)(a2))->sdl_len) &&		\
1875	 (bcmp(CLLADDR((const struct sockaddr_dl *)(a1)),		\
1876	       CLLADDR((const struct sockaddr_dl *)(a2)),		\
1877	       ((const struct sockaddr_dl *)(a1))->sdl_alen) == 0))
1878
1879/*
1880 * Locate an interface based on a complete address.
1881 */
1882/*ARGSUSED*/
1883static struct ifaddr *
1884ifa_ifwithaddr_internal(const struct sockaddr *addr, int getref)
1885{
1886	struct ifnet *ifp;
1887	struct ifaddr *ifa;
1888
1889	IFNET_RLOCK_NOSLEEP();
1890	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
1891		IF_ADDR_RLOCK(ifp);
1892		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1893			if (ifa->ifa_addr->sa_family != addr->sa_family)
1894				continue;
1895			if (sa_equal(addr, ifa->ifa_addr)) {
1896				if (getref)
1897					ifa_ref(ifa);
1898				IF_ADDR_RUNLOCK(ifp);
1899				goto done;
1900			}
1901			/* IP6 doesn't have broadcast */
1902			if ((ifp->if_flags & IFF_BROADCAST) &&
1903			    ifa->ifa_broadaddr &&
1904			    ifa->ifa_broadaddr->sa_len != 0 &&
1905			    sa_equal(ifa->ifa_broadaddr, addr)) {
1906				if (getref)
1907					ifa_ref(ifa);
1908				IF_ADDR_RUNLOCK(ifp);
1909				goto done;
1910			}
1911		}
1912		IF_ADDR_RUNLOCK(ifp);
1913	}
1914	ifa = NULL;
1915done:
1916	IFNET_RUNLOCK_NOSLEEP();
1917	return (ifa);
1918}
1919
1920struct ifaddr *
1921ifa_ifwithaddr(const struct sockaddr *addr)
1922{
1923
1924	return (ifa_ifwithaddr_internal(addr, 1));
1925}
1926
1927int
1928ifa_ifwithaddr_check(const struct sockaddr *addr)
1929{
1930
1931	return (ifa_ifwithaddr_internal(addr, 0) != NULL);
1932}
1933
1934/*
1935 * Locate an interface based on the broadcast address.
1936 */
1937/* ARGSUSED */
1938struct ifaddr *
1939ifa_ifwithbroadaddr(const struct sockaddr *addr, int fibnum)
1940{
1941	struct ifnet *ifp;
1942	struct ifaddr *ifa;
1943
1944	IFNET_RLOCK_NOSLEEP();
1945	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
1946		if ((fibnum != RT_ALL_FIBS) && (ifp->if_fib != fibnum))
1947			continue;
1948		IF_ADDR_RLOCK(ifp);
1949		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1950			if (ifa->ifa_addr->sa_family != addr->sa_family)
1951				continue;
1952			if ((ifp->if_flags & IFF_BROADCAST) &&
1953			    ifa->ifa_broadaddr &&
1954			    ifa->ifa_broadaddr->sa_len != 0 &&
1955			    sa_equal(ifa->ifa_broadaddr, addr)) {
1956				ifa_ref(ifa);
1957				IF_ADDR_RUNLOCK(ifp);
1958				goto done;
1959			}
1960		}
1961		IF_ADDR_RUNLOCK(ifp);
1962	}
1963	ifa = NULL;
1964done:
1965	IFNET_RUNLOCK_NOSLEEP();
1966	return (ifa);
1967}
1968
1969/*
1970 * Locate the point to point interface with a given destination address.
1971 */
1972/*ARGSUSED*/
1973struct ifaddr *
1974ifa_ifwithdstaddr(const struct sockaddr *addr, int fibnum)
1975{
1976	struct ifnet *ifp;
1977	struct ifaddr *ifa;
1978
1979	IFNET_RLOCK_NOSLEEP();
1980	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
1981		if ((ifp->if_flags & IFF_POINTOPOINT) == 0)
1982			continue;
1983		if ((fibnum != RT_ALL_FIBS) && (ifp->if_fib != fibnum))
1984			continue;
1985		IF_ADDR_RLOCK(ifp);
1986		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1987			if (ifa->ifa_addr->sa_family != addr->sa_family)
1988				continue;
1989			if (ifa->ifa_dstaddr != NULL &&
1990			    sa_equal(addr, ifa->ifa_dstaddr)) {
1991				ifa_ref(ifa);
1992				IF_ADDR_RUNLOCK(ifp);
1993				goto done;
1994			}
1995		}
1996		IF_ADDR_RUNLOCK(ifp);
1997	}
1998	ifa = NULL;
1999done:
2000	IFNET_RUNLOCK_NOSLEEP();
2001	return (ifa);
2002}
2003
2004/*
2005 * Find an interface on a specific network.  If many, choice
2006 * is most specific found.
2007 */
2008struct ifaddr *
2009ifa_ifwithnet(const struct sockaddr *addr, int ignore_ptp, int fibnum)
2010{
2011	struct ifnet *ifp;
2012	struct ifaddr *ifa;
2013	struct ifaddr *ifa_maybe = NULL;
2014	u_int af = addr->sa_family;
2015	const char *addr_data = addr->sa_data, *cplim;
2016
2017	/*
2018	 * AF_LINK addresses can be looked up directly by their index number,
2019	 * so do that if we can.
2020	 */
2021	if (af == AF_LINK) {
2022	    const struct sockaddr_dl *sdl = (const struct sockaddr_dl *)addr;
2023	    if (sdl->sdl_index && sdl->sdl_index <= V_if_index)
2024		return (ifaddr_byindex(sdl->sdl_index));
2025	}
2026
2027	/*
2028	 * Scan though each interface, looking for ones that have addresses
2029	 * in this address family and the requested fib.  Maintain a reference
2030	 * on ifa_maybe once we find one, as we release the IF_ADDR_RLOCK() that
2031	 * kept it stable when we move onto the next interface.
2032	 */
2033	IFNET_RLOCK_NOSLEEP();
2034	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
2035		if ((fibnum != RT_ALL_FIBS) && (ifp->if_fib != fibnum))
2036			continue;
2037		IF_ADDR_RLOCK(ifp);
2038		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
2039			const char *cp, *cp2, *cp3;
2040
2041			if (ifa->ifa_addr->sa_family != af)
2042next:				continue;
2043			if (af == AF_INET &&
2044			    ifp->if_flags & IFF_POINTOPOINT && !ignore_ptp) {
2045				/*
2046				 * This is a bit broken as it doesn't
2047				 * take into account that the remote end may
2048				 * be a single node in the network we are
2049				 * looking for.
2050				 * The trouble is that we don't know the
2051				 * netmask for the remote end.
2052				 */
2053				if (ifa->ifa_dstaddr != NULL &&
2054				    sa_equal(addr, ifa->ifa_dstaddr)) {
2055					ifa_ref(ifa);
2056					IF_ADDR_RUNLOCK(ifp);
2057					goto done;
2058				}
2059			} else {
2060				/*
2061				 * Scan all the bits in the ifa's address.
2062				 * If a bit dissagrees with what we are
2063				 * looking for, mask it with the netmask
2064				 * to see if it really matters.
2065				 * (A byte at a time)
2066				 */
2067				if (ifa->ifa_netmask == 0)
2068					continue;
2069				cp = addr_data;
2070				cp2 = ifa->ifa_addr->sa_data;
2071				cp3 = ifa->ifa_netmask->sa_data;
2072				cplim = ifa->ifa_netmask->sa_len
2073					+ (char *)ifa->ifa_netmask;
2074				while (cp3 < cplim)
2075					if ((*cp++ ^ *cp2++) & *cp3++)
2076						goto next; /* next address! */
2077				/*
2078				 * If the netmask of what we just found
2079				 * is more specific than what we had before
2080				 * (if we had one), or if the virtual status
2081				 * of new prefix is better than of the old one,
2082				 * then remember the new one before continuing
2083				 * to search for an even better one.
2084				 */
2085				if (ifa_maybe == NULL ||
2086				    ifa_preferred(ifa_maybe, ifa) ||
2087				    rn_refines((caddr_t)ifa->ifa_netmask,
2088				    (caddr_t)ifa_maybe->ifa_netmask)) {
2089					if (ifa_maybe != NULL)
2090						ifa_free(ifa_maybe);
2091					ifa_maybe = ifa;
2092					ifa_ref(ifa_maybe);
2093				}
2094			}
2095		}
2096		IF_ADDR_RUNLOCK(ifp);
2097	}
2098	ifa = ifa_maybe;
2099	ifa_maybe = NULL;
2100done:
2101	IFNET_RUNLOCK_NOSLEEP();
2102	if (ifa_maybe != NULL)
2103		ifa_free(ifa_maybe);
2104	return (ifa);
2105}
2106
2107/*
2108 * Find an interface address specific to an interface best matching
2109 * a given address.
2110 */
2111struct ifaddr *
2112ifaof_ifpforaddr(const struct sockaddr *addr, struct ifnet *ifp)
2113{
2114	struct ifaddr *ifa;
2115	const char *cp, *cp2, *cp3;
2116	char *cplim;
2117	struct ifaddr *ifa_maybe = NULL;
2118	u_int af = addr->sa_family;
2119
2120	if (af >= AF_MAX)
2121		return (NULL);
2122	IF_ADDR_RLOCK(ifp);
2123	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
2124		if (ifa->ifa_addr->sa_family != af)
2125			continue;
2126		if (ifa_maybe == NULL)
2127			ifa_maybe = ifa;
2128		if (ifa->ifa_netmask == 0) {
2129			if (sa_equal(addr, ifa->ifa_addr) ||
2130			    (ifa->ifa_dstaddr &&
2131			    sa_equal(addr, ifa->ifa_dstaddr)))
2132				goto done;
2133			continue;
2134		}
2135		if (ifp->if_flags & IFF_POINTOPOINT) {
2136			if (sa_equal(addr, ifa->ifa_dstaddr))
2137				goto done;
2138		} else {
2139			cp = addr->sa_data;
2140			cp2 = ifa->ifa_addr->sa_data;
2141			cp3 = ifa->ifa_netmask->sa_data;
2142			cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask;
2143			for (; cp3 < cplim; cp3++)
2144				if ((*cp++ ^ *cp2++) & *cp3)
2145					break;
2146			if (cp3 == cplim)
2147				goto done;
2148		}
2149	}
2150	ifa = ifa_maybe;
2151done:
2152	if (ifa != NULL)
2153		ifa_ref(ifa);
2154	IF_ADDR_RUNLOCK(ifp);
2155	return (ifa);
2156}
2157
2158/*
2159 * See whether new ifa is better than current one:
2160 * 1) A non-virtual one is preferred over virtual.
2161 * 2) A virtual in master state preferred over any other state.
2162 *
2163 * Used in several address selecting functions.
2164 */
2165int
2166ifa_preferred(struct ifaddr *cur, struct ifaddr *next)
2167{
2168
2169	return (cur->ifa_carp && (!next->ifa_carp ||
2170	    ((*carp_master_p)(next) && !(*carp_master_p)(cur))));
2171}
2172
2173#include <net/if_llatbl.h>
2174
2175/*
2176 * Default action when installing a route with a Link Level gateway.
2177 * Lookup an appropriate real ifa to point to.
2178 * This should be moved to /sys/net/link.c eventually.
2179 */
2180static void
2181link_rtrequest(int cmd, struct rtentry *rt, struct rt_addrinfo *info)
2182{
2183	struct ifaddr *ifa, *oifa;
2184	struct sockaddr *dst;
2185	struct ifnet *ifp;
2186
2187	if (cmd != RTM_ADD || ((ifa = rt->rt_ifa) == NULL) ||
2188	    ((ifp = ifa->ifa_ifp) == NULL) || ((dst = rt_key(rt)) == NULL))
2189		return;
2190	ifa = ifaof_ifpforaddr(dst, ifp);
2191	if (ifa) {
2192		oifa = rt->rt_ifa;
2193		rt->rt_ifa = ifa;
2194		ifa_free(oifa);
2195		if (ifa->ifa_rtrequest && ifa->ifa_rtrequest != link_rtrequest)
2196			ifa->ifa_rtrequest(cmd, rt, info);
2197	}
2198}
2199
2200struct sockaddr_dl *
2201link_alloc_sdl(size_t size, int flags)
2202{
2203
2204	return (malloc(size, M_TEMP, flags));
2205}
2206
2207void
2208link_free_sdl(struct sockaddr *sa)
2209{
2210	free(sa, M_TEMP);
2211}
2212
2213/*
2214 * Fills in given sdl with interface basic info.
2215 * Returns pointer to filled sdl.
2216 */
2217struct sockaddr_dl *
2218link_init_sdl(struct ifnet *ifp, struct sockaddr *paddr, u_char iftype)
2219{
2220	struct sockaddr_dl *sdl;
2221
2222	sdl = (struct sockaddr_dl *)paddr;
2223	memset(sdl, 0, sizeof(struct sockaddr_dl));
2224	sdl->sdl_len = sizeof(struct sockaddr_dl);
2225	sdl->sdl_family = AF_LINK;
2226	sdl->sdl_index = ifp->if_index;
2227	sdl->sdl_type = iftype;
2228
2229	return (sdl);
2230}
2231
2232/*
2233 * Mark an interface down and notify protocols of
2234 * the transition.
2235 */
2236static void
2237if_unroute(struct ifnet *ifp, int flag, int fam)
2238{
2239	struct ifaddr *ifa;
2240
2241	KASSERT(flag == IFF_UP, ("if_unroute: flag != IFF_UP"));
2242
2243	ifp->if_flags &= ~flag;
2244	getmicrotime(&ifp->if_lastchange);
2245	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
2246		if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family))
2247			pfctlinput(PRC_IFDOWN, ifa->ifa_addr);
2248	ifp->if_qflush(ifp);
2249
2250	if (ifp->if_carp)
2251		(*carp_linkstate_p)(ifp);
2252	rt_ifmsg(ifp);
2253}
2254
2255/*
2256 * Mark an interface up and notify protocols of
2257 * the transition.
2258 */
2259static void
2260if_route(struct ifnet *ifp, int flag, int fam)
2261{
2262	struct ifaddr *ifa;
2263
2264	KASSERT(flag == IFF_UP, ("if_route: flag != IFF_UP"));
2265
2266	ifp->if_flags |= flag;
2267	getmicrotime(&ifp->if_lastchange);
2268	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
2269		if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family))
2270			pfctlinput(PRC_IFUP, ifa->ifa_addr);
2271	if (ifp->if_carp)
2272		(*carp_linkstate_p)(ifp);
2273	rt_ifmsg(ifp);
2274#ifdef INET6
2275	in6_if_up(ifp);
2276#endif
2277}
2278
2279void	(*vlan_link_state_p)(struct ifnet *);	/* XXX: private from if_vlan */
2280void	(*vlan_trunk_cap_p)(struct ifnet *);		/* XXX: private from if_vlan */
2281struct ifnet *(*vlan_trunkdev_p)(struct ifnet *);
2282struct	ifnet *(*vlan_devat_p)(struct ifnet *, uint16_t);
2283int	(*vlan_tag_p)(struct ifnet *, uint16_t *);
2284int	(*vlan_setcookie_p)(struct ifnet *, void *);
2285void	*(*vlan_cookie_p)(struct ifnet *);
2286
2287/*
2288 * Handle a change in the interface link state. To avoid LORs
2289 * between driver lock and upper layer locks, as well as possible
2290 * recursions, we post event to taskqueue, and all job
2291 * is done in static do_link_state_change().
2292 */
2293void
2294if_link_state_change(struct ifnet *ifp, int link_state)
2295{
2296	/* Return if state hasn't changed. */
2297	if (ifp->if_link_state == link_state)
2298		return;
2299
2300	ifp->if_link_state = link_state;
2301
2302	taskqueue_enqueue(taskqueue_swi, &ifp->if_linktask);
2303}
2304
2305static void
2306do_link_state_change(void *arg, int pending)
2307{
2308	struct ifnet *ifp = (struct ifnet *)arg;
2309	int link_state = ifp->if_link_state;
2310	CURVNET_SET(ifp->if_vnet);
2311
2312	/* Notify that the link state has changed. */
2313	rt_ifmsg(ifp);
2314	if (ifp->if_vlantrunk != NULL)
2315		(*vlan_link_state_p)(ifp);
2316
2317	if ((ifp->if_type == IFT_ETHER || ifp->if_type == IFT_L2VLAN) &&
2318	    ifp->if_l2com != NULL)
2319		(*ng_ether_link_state_p)(ifp, link_state);
2320	if (ifp->if_carp)
2321		(*carp_linkstate_p)(ifp);
2322	if (ifp->if_bridge)
2323		(*bridge_linkstate_p)(ifp);
2324	if (ifp->if_lagg)
2325		(*lagg_linkstate_p)(ifp, link_state);
2326
2327	if (IS_DEFAULT_VNET(curvnet))
2328		devctl_notify("IFNET", ifp->if_xname,
2329		    (link_state == LINK_STATE_UP) ? "LINK_UP" : "LINK_DOWN",
2330		    NULL);
2331	if (pending > 1)
2332		if_printf(ifp, "%d link states coalesced\n", pending);
2333	if (log_link_state_change)
2334		log(LOG_NOTICE, "%s: link state changed to %s\n", ifp->if_xname,
2335		    (link_state == LINK_STATE_UP) ? "UP" : "DOWN" );
2336	EVENTHANDLER_INVOKE(ifnet_link_event, ifp, link_state);
2337	CURVNET_RESTORE();
2338}
2339
2340/*
2341 * Mark an interface down and notify protocols of
2342 * the transition.
2343 */
2344void
2345if_down(struct ifnet *ifp)
2346{
2347
2348	EVENTHANDLER_INVOKE(ifnet_event, ifp, IFNET_EVENT_DOWN);
2349	if_unroute(ifp, IFF_UP, AF_UNSPEC);
2350}
2351
2352/*
2353 * Mark an interface up and notify protocols of
2354 * the transition.
2355 */
2356void
2357if_up(struct ifnet *ifp)
2358{
2359
2360	if_route(ifp, IFF_UP, AF_UNSPEC);
2361	EVENTHANDLER_INVOKE(ifnet_event, ifp, IFNET_EVENT_UP);
2362}
2363
2364/*
2365 * Flush an interface queue.
2366 */
2367void
2368if_qflush(struct ifnet *ifp)
2369{
2370	struct mbuf *m, *n;
2371	struct ifaltq *ifq;
2372
2373	ifq = &ifp->if_snd;
2374	IFQ_LOCK(ifq);
2375#ifdef ALTQ
2376	if (ALTQ_IS_ENABLED(ifq))
2377		ALTQ_PURGE(ifq);
2378#endif
2379	n = ifq->ifq_head;
2380	while ((m = n) != NULL) {
2381		n = m->m_nextpkt;
2382		m_freem(m);
2383	}
2384	ifq->ifq_head = 0;
2385	ifq->ifq_tail = 0;
2386	ifq->ifq_len = 0;
2387	IFQ_UNLOCK(ifq);
2388}
2389
2390/*
2391 * Map interface name to interface structure pointer, with or without
2392 * returning a reference.
2393 */
2394struct ifnet *
2395ifunit_ref(const char *name)
2396{
2397	struct ifnet *ifp;
2398
2399	IFNET_RLOCK_NOSLEEP();
2400	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
2401		if (strncmp(name, ifp->if_xname, IFNAMSIZ) == 0 &&
2402		    !(ifp->if_flags & IFF_DYING))
2403			break;
2404	}
2405	if (ifp != NULL)
2406		if_ref(ifp);
2407	IFNET_RUNLOCK_NOSLEEP();
2408	return (ifp);
2409}
2410
2411struct ifnet *
2412ifunit(const char *name)
2413{
2414	struct ifnet *ifp;
2415
2416	IFNET_RLOCK_NOSLEEP();
2417	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
2418		if (strncmp(name, ifp->if_xname, IFNAMSIZ) == 0)
2419			break;
2420	}
2421	IFNET_RUNLOCK_NOSLEEP();
2422	return (ifp);
2423}
2424
2425void *
2426ifr_buffer_get_buffer(void *data)
2427{
2428	union ifreq_union *ifrup;
2429
2430	ifrup = data;
2431#ifdef COMPAT_FREEBSD32
2432	if (SV_CURPROC_FLAG(SV_ILP32))
2433		return ((void *)(uintptr_t)
2434		    ifrup->ifr32.ifr_ifru.ifru_buffer.buffer);
2435#endif
2436	return (ifrup->ifr.ifr_ifru.ifru_buffer.buffer);
2437}
2438
2439static void
2440ifr_buffer_set_buffer_null(void *data)
2441{
2442	union ifreq_union *ifrup;
2443
2444	ifrup = data;
2445#ifdef COMPAT_FREEBSD32
2446	if (SV_CURPROC_FLAG(SV_ILP32))
2447		ifrup->ifr32.ifr_ifru.ifru_buffer.buffer = 0;
2448	else
2449#endif
2450		ifrup->ifr.ifr_ifru.ifru_buffer.buffer = NULL;
2451}
2452
2453size_t
2454ifr_buffer_get_length(void *data)
2455{
2456	union ifreq_union *ifrup;
2457
2458	ifrup = data;
2459#ifdef COMPAT_FREEBSD32
2460	if (SV_CURPROC_FLAG(SV_ILP32))
2461		return (ifrup->ifr32.ifr_ifru.ifru_buffer.length);
2462#endif
2463	return (ifrup->ifr.ifr_ifru.ifru_buffer.length);
2464}
2465
2466static void
2467ifr_buffer_set_length(void *data, size_t len)
2468{
2469	union ifreq_union *ifrup;
2470
2471	ifrup = data;
2472#ifdef COMPAT_FREEBSD32
2473	if (SV_CURPROC_FLAG(SV_ILP32))
2474		ifrup->ifr32.ifr_ifru.ifru_buffer.length = len;
2475	else
2476#endif
2477		ifrup->ifr.ifr_ifru.ifru_buffer.length = len;
2478}
2479
2480void *
2481ifr_data_get_ptr(void *ifrp)
2482{
2483	union ifreq_union *ifrup;
2484
2485	ifrup = ifrp;
2486#ifdef COMPAT_FREEBSD32
2487	if (SV_CURPROC_FLAG(SV_ILP32))
2488		return ((void *)(uintptr_t)
2489		    ifrup->ifr32.ifr_ifru.ifru_data);
2490#endif
2491		return (ifrup->ifr.ifr_ifru.ifru_data);
2492}
2493
2494/*
2495 * Hardware specific interface ioctls.
2496 */
2497int
2498ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
2499{
2500	struct ifreq *ifr;
2501	int error = 0, do_ifup = 0;
2502	int new_flags, temp_flags;
2503	size_t namelen, onamelen;
2504	size_t descrlen;
2505	char *descrbuf, *odescrbuf;
2506	char new_name[IFNAMSIZ];
2507	struct ifaddr *ifa;
2508	struct sockaddr_dl *sdl;
2509
2510	ifr = (struct ifreq *)data;
2511	switch (cmd) {
2512	case SIOCGIFINDEX:
2513		ifr->ifr_index = ifp->if_index;
2514		break;
2515
2516	case SIOCGIFFLAGS:
2517		temp_flags = ifp->if_flags | ifp->if_drv_flags;
2518		ifr->ifr_flags = temp_flags & 0xffff;
2519		ifr->ifr_flagshigh = temp_flags >> 16;
2520		break;
2521
2522	case SIOCGIFCAP:
2523		ifr->ifr_reqcap = ifp->if_capabilities;
2524		ifr->ifr_curcap = ifp->if_capenable;
2525		break;
2526
2527#ifdef MAC
2528	case SIOCGIFMAC:
2529		error = mac_ifnet_ioctl_get(td->td_ucred, ifr, ifp);
2530		break;
2531#endif
2532
2533	case SIOCGIFMETRIC:
2534		ifr->ifr_metric = ifp->if_metric;
2535		break;
2536
2537	case SIOCGIFMTU:
2538		ifr->ifr_mtu = ifp->if_mtu;
2539		break;
2540
2541	case SIOCGIFPHYS:
2542		/* XXXGL: did this ever worked? */
2543		ifr->ifr_phys = 0;
2544		break;
2545
2546	case SIOCGIFDESCR:
2547		error = 0;
2548		sx_slock(&ifdescr_sx);
2549		if (ifp->if_description == NULL)
2550			error = ENOMSG;
2551		else {
2552			/* space for terminating nul */
2553			descrlen = strlen(ifp->if_description) + 1;
2554			if (ifr_buffer_get_length(ifr) < descrlen)
2555				ifr_buffer_set_buffer_null(ifr);
2556			else
2557				error = copyout(ifp->if_description,
2558				    ifr_buffer_get_buffer(ifr), descrlen);
2559			ifr_buffer_set_length(ifr, descrlen);
2560		}
2561		sx_sunlock(&ifdescr_sx);
2562		break;
2563
2564	case SIOCSIFDESCR:
2565		error = priv_check(td, PRIV_NET_SETIFDESCR);
2566		if (error)
2567			return (error);
2568
2569		/*
2570		 * Copy only (length-1) bytes to make sure that
2571		 * if_description is always nul terminated.  The
2572		 * length parameter is supposed to count the
2573		 * terminating nul in.
2574		 */
2575		if (ifr_buffer_get_length(ifr) > ifdescr_maxlen)
2576			return (ENAMETOOLONG);
2577		else if (ifr_buffer_get_length(ifr) == 0)
2578			descrbuf = NULL;
2579		else {
2580			descrbuf = malloc(ifr_buffer_get_length(ifr),
2581			    M_IFDESCR, M_WAITOK | M_ZERO);
2582			error = copyin(ifr_buffer_get_buffer(ifr), descrbuf,
2583			    ifr_buffer_get_length(ifr) - 1);
2584			if (error) {
2585				free(descrbuf, M_IFDESCR);
2586				break;
2587			}
2588		}
2589
2590		sx_xlock(&ifdescr_sx);
2591		odescrbuf = ifp->if_description;
2592		ifp->if_description = descrbuf;
2593		sx_xunlock(&ifdescr_sx);
2594
2595		getmicrotime(&ifp->if_lastchange);
2596		free(odescrbuf, M_IFDESCR);
2597		break;
2598
2599	case SIOCGIFFIB:
2600		ifr->ifr_fib = ifp->if_fib;
2601		break;
2602
2603	case SIOCSIFFIB:
2604		error = priv_check(td, PRIV_NET_SETIFFIB);
2605		if (error)
2606			return (error);
2607		if (ifr->ifr_fib >= rt_numfibs)
2608			return (EINVAL);
2609
2610		ifp->if_fib = ifr->ifr_fib;
2611		break;
2612
2613	case SIOCSIFFLAGS:
2614		error = priv_check(td, PRIV_NET_SETIFFLAGS);
2615		if (error)
2616			return (error);
2617		/*
2618		 * Currently, no driver owned flags pass the IFF_CANTCHANGE
2619		 * check, so we don't need special handling here yet.
2620		 */
2621		new_flags = (ifr->ifr_flags & 0xffff) |
2622		    (ifr->ifr_flagshigh << 16);
2623		if (ifp->if_flags & IFF_UP &&
2624		    (new_flags & IFF_UP) == 0) {
2625			if_down(ifp);
2626		} else if (new_flags & IFF_UP &&
2627		    (ifp->if_flags & IFF_UP) == 0) {
2628			do_ifup = 1;
2629		}
2630		/* See if permanently promiscuous mode bit is about to flip */
2631		if ((ifp->if_flags ^ new_flags) & IFF_PPROMISC) {
2632			if (new_flags & IFF_PPROMISC)
2633				ifp->if_flags |= IFF_PROMISC;
2634			else if (ifp->if_pcount == 0)
2635				ifp->if_flags &= ~IFF_PROMISC;
2636			if (log_promisc_mode_change)
2637                                log(LOG_INFO, "%s: permanently promiscuous mode %s\n",
2638                                    ifp->if_xname,
2639                                    ((new_flags & IFF_PPROMISC) ?
2640                                     "enabled" : "disabled"));
2641		}
2642		ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) |
2643			(new_flags &~ IFF_CANTCHANGE);
2644		if (ifp->if_ioctl) {
2645			(void) (*ifp->if_ioctl)(ifp, cmd, data);
2646		}
2647		if (do_ifup)
2648			if_up(ifp);
2649		getmicrotime(&ifp->if_lastchange);
2650		break;
2651
2652	case SIOCSIFCAP:
2653		error = priv_check(td, PRIV_NET_SETIFCAP);
2654		if (error)
2655			return (error);
2656		if (ifp->if_ioctl == NULL)
2657			return (EOPNOTSUPP);
2658		if (ifr->ifr_reqcap & ~ifp->if_capabilities)
2659			return (EINVAL);
2660		error = (*ifp->if_ioctl)(ifp, cmd, data);
2661		if (error == 0)
2662			getmicrotime(&ifp->if_lastchange);
2663		break;
2664
2665#ifdef MAC
2666	case SIOCSIFMAC:
2667		error = mac_ifnet_ioctl_set(td->td_ucred, ifr, ifp);
2668		break;
2669#endif
2670
2671	case SIOCSIFNAME:
2672		error = priv_check(td, PRIV_NET_SETIFNAME);
2673		if (error)
2674			return (error);
2675		error = copyinstr(ifr_data_get_ptr(ifr), new_name, IFNAMSIZ,
2676		    NULL);
2677		if (error != 0)
2678			return (error);
2679		if (new_name[0] == '\0')
2680			return (EINVAL);
2681		if (new_name[IFNAMSIZ-1] != '\0') {
2682			new_name[IFNAMSIZ-1] = '\0';
2683			if (strlen(new_name) == IFNAMSIZ-1)
2684				return (EINVAL);
2685		}
2686		if (strcmp(new_name, ifp->if_xname) == 0)
2687			break;
2688		if (ifunit(new_name) != NULL)
2689			return (EEXIST);
2690
2691		/*
2692		 * XXX: Locking.  Nothing else seems to lock if_flags,
2693		 * and there are numerous other races with the
2694		 * ifunit() checks not being atomic with namespace
2695		 * changes (renames, vmoves, if_attach, etc).
2696		 */
2697		ifp->if_flags |= IFF_RENAMING;
2698
2699		/* Announce the departure of the interface. */
2700		rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
2701		EVENTHANDLER_INVOKE(ifnet_departure_event, ifp);
2702
2703		log(LOG_INFO, "%s: changing name to '%s'\n",
2704		    ifp->if_xname, new_name);
2705
2706		IF_ADDR_WLOCK(ifp);
2707		strlcpy(ifp->if_xname, new_name, sizeof(ifp->if_xname));
2708		ifa = ifp->if_addr;
2709		sdl = (struct sockaddr_dl *)ifa->ifa_addr;
2710		namelen = strlen(new_name);
2711		onamelen = sdl->sdl_nlen;
2712		/*
2713		 * Move the address if needed.  This is safe because we
2714		 * allocate space for a name of length IFNAMSIZ when we
2715		 * create this in if_attach().
2716		 */
2717		if (namelen != onamelen) {
2718			bcopy(sdl->sdl_data + onamelen,
2719			    sdl->sdl_data + namelen, sdl->sdl_alen);
2720		}
2721		bcopy(new_name, sdl->sdl_data, namelen);
2722		sdl->sdl_nlen = namelen;
2723		sdl = (struct sockaddr_dl *)ifa->ifa_netmask;
2724		bzero(sdl->sdl_data, onamelen);
2725		while (namelen != 0)
2726			sdl->sdl_data[--namelen] = 0xff;
2727		IF_ADDR_WUNLOCK(ifp);
2728
2729		EVENTHANDLER_INVOKE(ifnet_arrival_event, ifp);
2730		/* Announce the return of the interface. */
2731		rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
2732
2733		ifp->if_flags &= ~IFF_RENAMING;
2734		break;
2735
2736#ifdef VIMAGE
2737	case SIOCSIFVNET:
2738		error = priv_check(td, PRIV_NET_SETIFVNET);
2739		if (error)
2740			return (error);
2741		error = if_vmove_loan(td, ifp, ifr->ifr_name, ifr->ifr_jid);
2742		break;
2743#endif
2744
2745	case SIOCSIFMETRIC:
2746		error = priv_check(td, PRIV_NET_SETIFMETRIC);
2747		if (error)
2748			return (error);
2749		ifp->if_metric = ifr->ifr_metric;
2750		getmicrotime(&ifp->if_lastchange);
2751		break;
2752
2753	case SIOCSIFPHYS:
2754		error = priv_check(td, PRIV_NET_SETIFPHYS);
2755		if (error)
2756			return (error);
2757		if (ifp->if_ioctl == NULL)
2758			return (EOPNOTSUPP);
2759		error = (*ifp->if_ioctl)(ifp, cmd, data);
2760		if (error == 0)
2761			getmicrotime(&ifp->if_lastchange);
2762		break;
2763
2764	case SIOCSIFMTU:
2765	{
2766		u_long oldmtu = ifp->if_mtu;
2767
2768		error = priv_check(td, PRIV_NET_SETIFMTU);
2769		if (error)
2770			return (error);
2771		if (ifr->ifr_mtu < IF_MINMTU || ifr->ifr_mtu > IF_MAXMTU)
2772			return (EINVAL);
2773		if (ifp->if_ioctl == NULL)
2774			return (EOPNOTSUPP);
2775		error = (*ifp->if_ioctl)(ifp, cmd, data);
2776		if (error == 0) {
2777			getmicrotime(&ifp->if_lastchange);
2778			rt_ifmsg(ifp);
2779		}
2780		/*
2781		 * If the link MTU changed, do network layer specific procedure.
2782		 */
2783		if (ifp->if_mtu != oldmtu) {
2784#ifdef INET6
2785			nd6_setmtu(ifp);
2786#endif
2787			rt_updatemtu(ifp);
2788		}
2789		break;
2790	}
2791
2792	case SIOCADDMULTI:
2793	case SIOCDELMULTI:
2794		if (cmd == SIOCADDMULTI)
2795			error = priv_check(td, PRIV_NET_ADDMULTI);
2796		else
2797			error = priv_check(td, PRIV_NET_DELMULTI);
2798		if (error)
2799			return (error);
2800
2801		/* Don't allow group membership on non-multicast interfaces. */
2802		if ((ifp->if_flags & IFF_MULTICAST) == 0)
2803			return (EOPNOTSUPP);
2804
2805		/* Don't let users screw up protocols' entries. */
2806		if (ifr->ifr_addr.sa_family != AF_LINK)
2807			return (EINVAL);
2808
2809		if (cmd == SIOCADDMULTI) {
2810			struct ifmultiaddr *ifma;
2811
2812			/*
2813			 * Userland is only permitted to join groups once
2814			 * via the if_addmulti() KPI, because it cannot hold
2815			 * struct ifmultiaddr * between calls. It may also
2816			 * lose a race while we check if the membership
2817			 * already exists.
2818			 */
2819			IF_ADDR_RLOCK(ifp);
2820			ifma = if_findmulti(ifp, &ifr->ifr_addr);
2821			IF_ADDR_RUNLOCK(ifp);
2822			if (ifma != NULL)
2823				error = EADDRINUSE;
2824			else
2825				error = if_addmulti(ifp, &ifr->ifr_addr, &ifma);
2826		} else {
2827			error = if_delmulti(ifp, &ifr->ifr_addr);
2828		}
2829		if (error == 0)
2830			getmicrotime(&ifp->if_lastchange);
2831		break;
2832
2833	case SIOCSIFPHYADDR:
2834	case SIOCDIFPHYADDR:
2835#ifdef INET6
2836	case SIOCSIFPHYADDR_IN6:
2837#endif
2838	case SIOCSIFMEDIA:
2839	case SIOCSIFGENERIC:
2840		error = priv_check(td, PRIV_NET_HWIOCTL);
2841		if (error)
2842			return (error);
2843		if (ifp->if_ioctl == NULL)
2844			return (EOPNOTSUPP);
2845		error = (*ifp->if_ioctl)(ifp, cmd, data);
2846		if (error == 0)
2847			getmicrotime(&ifp->if_lastchange);
2848		break;
2849
2850	case SIOCGIFSTATUS:
2851	case SIOCGIFPSRCADDR:
2852	case SIOCGIFPDSTADDR:
2853	case SIOCGIFMEDIA:
2854	case SIOCGIFXMEDIA:
2855	case SIOCGIFGENERIC:
2856	case SIOCGIFRSSKEY:
2857	case SIOCGIFRSSHASH:
2858	case SIOCGIFDOWNREASON:
2859		if (ifp->if_ioctl == NULL)
2860			return (EOPNOTSUPP);
2861		error = (*ifp->if_ioctl)(ifp, cmd, data);
2862		break;
2863
2864	case SIOCSIFLLADDR:
2865		error = priv_check(td, PRIV_NET_SETLLADDR);
2866		if (error)
2867			return (error);
2868		error = if_setlladdr(ifp,
2869		    ifr->ifr_addr.sa_data, ifr->ifr_addr.sa_len);
2870		break;
2871
2872	case SIOCGHWADDR:
2873		error = if_gethwaddr(ifp, ifr);
2874		break;
2875
2876	CASE_IOC_IFGROUPREQ(SIOCAIFGROUP):
2877		error = priv_check(td, PRIV_NET_ADDIFGROUP);
2878		if (error)
2879			return (error);
2880		if ((error = if_addgroup(ifp,
2881		    ifgr_group_get((struct ifgroupreq *)data))))
2882			return (error);
2883		break;
2884
2885	CASE_IOC_IFGROUPREQ(SIOCGIFGROUP):
2886		if ((error = if_getgroup((struct ifgroupreq *)data, ifp)))
2887			return (error);
2888		break;
2889
2890	CASE_IOC_IFGROUPREQ(SIOCDIFGROUP):
2891		error = priv_check(td, PRIV_NET_DELIFGROUP);
2892		if (error)
2893			return (error);
2894		if ((error = if_delgroup(ifp,
2895		    ifgr_group_get((struct ifgroupreq *)data))))
2896			return (error);
2897		break;
2898
2899	default:
2900		error = ENOIOCTL;
2901		break;
2902	}
2903	return (error);
2904}
2905
2906/* COMPAT_SVR4 */
2907#define	OSIOCGIFCONF	_IOWR('i', 20, struct ifconf)
2908
2909#ifdef COMPAT_FREEBSD32
2910struct ifconf32 {
2911	int32_t	ifc_len;
2912	union {
2913		uint32_t	ifcu_buf;
2914		uint32_t	ifcu_req;
2915	} ifc_ifcu;
2916};
2917#define	SIOCGIFCONF32	_IOWR('i', 36, struct ifconf32)
2918#endif
2919
2920#ifdef COMPAT_FREEBSD32
2921static void
2922ifmr_init(struct ifmediareq *ifmr, caddr_t data)
2923{
2924	struct ifmediareq32 *ifmr32;
2925
2926	ifmr32 = (struct ifmediareq32 *)data;
2927	memcpy(ifmr->ifm_name, ifmr32->ifm_name,
2928	    sizeof(ifmr->ifm_name));
2929	ifmr->ifm_current = ifmr32->ifm_current;
2930	ifmr->ifm_mask = ifmr32->ifm_mask;
2931	ifmr->ifm_status = ifmr32->ifm_status;
2932	ifmr->ifm_active = ifmr32->ifm_active;
2933	ifmr->ifm_count = ifmr32->ifm_count;
2934	ifmr->ifm_ulist = (int *)(uintptr_t)ifmr32->ifm_ulist;
2935}
2936
2937static void
2938ifmr_update(const struct ifmediareq *ifmr, caddr_t data)
2939{
2940	struct ifmediareq32 *ifmr32;
2941
2942	ifmr32 = (struct ifmediareq32 *)data;
2943	ifmr32->ifm_current = ifmr->ifm_current;
2944	ifmr32->ifm_mask = ifmr->ifm_mask;
2945	ifmr32->ifm_status = ifmr->ifm_status;
2946	ifmr32->ifm_active = ifmr->ifm_active;
2947	ifmr32->ifm_count = ifmr->ifm_count;
2948}
2949#endif
2950
2951/*
2952 * Interface ioctls.
2953 */
2954int
2955ifioctl(struct socket *so, u_long cmd, caddr_t data, struct thread *td)
2956{
2957#ifdef COMPAT_FREEBSD32
2958	caddr_t saved_data;
2959	struct ifmediareq ifmr;
2960#endif
2961	struct ifmediareq *ifmrp;
2962	struct ifnet *ifp;
2963	struct ifreq *ifr;
2964	int error;
2965	int oif_flags;
2966#ifdef VIMAGE
2967	int shutdown;
2968#endif
2969
2970	CURVNET_SET(so->so_vnet);
2971#ifdef VIMAGE
2972	/* Make sure the VNET is stable. */
2973	shutdown = (so->so_vnet->vnet_state > SI_SUB_VNET &&
2974		 so->so_vnet->vnet_state < SI_SUB_VNET_DONE) ? 1 : 0;
2975	if (shutdown) {
2976		CURVNET_RESTORE();
2977		return (EBUSY);
2978	}
2979#endif
2980
2981
2982	switch (cmd) {
2983	case SIOCGIFCONF:
2984	case OSIOCGIFCONF:	/* COMPAT_SVR4 */
2985		error = ifconf(cmd, data);
2986		CURVNET_RESTORE();
2987		return (error);
2988
2989#ifdef COMPAT_FREEBSD32
2990	case SIOCGIFCONF32:
2991		{
2992			struct ifconf32 *ifc32;
2993			struct ifconf ifc;
2994
2995			ifc32 = (struct ifconf32 *)data;
2996			ifc.ifc_len = ifc32->ifc_len;
2997			ifc.ifc_buf = PTRIN(ifc32->ifc_buf);
2998
2999			error = ifconf(SIOCGIFCONF, (void *)&ifc);
3000			CURVNET_RESTORE();
3001			if (error == 0)
3002				ifc32->ifc_len = ifc.ifc_len;
3003			return (error);
3004		}
3005#endif
3006	}
3007
3008	ifmrp = NULL;
3009#ifdef COMPAT_FREEBSD32
3010	switch (cmd) {
3011	case SIOCGIFMEDIA32:
3012	case SIOCGIFXMEDIA32:
3013		ifmrp = &ifmr;
3014		ifmr_init(ifmrp, data);
3015		cmd = _IOC_NEWTYPE(cmd, struct ifmediareq);
3016		saved_data = data;
3017		data = (caddr_t)ifmrp;
3018	}
3019#endif
3020
3021	ifr = (struct ifreq *)data;
3022	switch (cmd) {
3023#ifdef VIMAGE
3024	case SIOCSIFRVNET:
3025		error = priv_check(td, PRIV_NET_SETIFVNET);
3026		if (error == 0)
3027			error = if_vmove_reclaim(td, ifr->ifr_name,
3028			    ifr->ifr_jid);
3029		goto out_noref;
3030#endif
3031	case SIOCIFCREATE:
3032	case SIOCIFCREATE2:
3033		error = priv_check(td, PRIV_NET_IFCREATE);
3034		if (error == 0)
3035			error = if_clone_create(ifr->ifr_name,
3036			    sizeof(ifr->ifr_name), cmd == SIOCIFCREATE2 ?
3037			    ifr_data_get_ptr(ifr) : NULL);
3038		goto out_noref;
3039	case SIOCIFDESTROY:
3040		error = priv_check(td, PRIV_NET_IFDESTROY);
3041		if (error == 0)
3042			error = if_clone_destroy(ifr->ifr_name);
3043		goto out_noref;
3044
3045	case SIOCIFGCLONERS:
3046		error = if_clone_list((struct if_clonereq *)data);
3047		goto out_noref;
3048
3049	CASE_IOC_IFGROUPREQ(SIOCGIFGMEMB):
3050		error = if_getgroupmembers((struct ifgroupreq *)data);
3051		goto out_noref;
3052
3053#if defined(INET) || defined(INET6)
3054	case SIOCSVH:
3055	case SIOCGVH:
3056		if (carp_ioctl_p == NULL)
3057			error = EPROTONOSUPPORT;
3058		else
3059			error = (*carp_ioctl_p)(ifr, cmd, td);
3060		goto out_noref;
3061#endif
3062	}
3063
3064	ifp = ifunit_ref(ifr->ifr_name);
3065	if (ifp == NULL) {
3066		error = ENXIO;
3067		goto out_noref;
3068	}
3069
3070	error = ifhwioctl(cmd, ifp, data, td);
3071	if (error != ENOIOCTL)
3072		goto out_ref;
3073
3074	oif_flags = ifp->if_flags;
3075	if (so->so_proto == NULL) {
3076		error = EOPNOTSUPP;
3077		goto out_ref;
3078	}
3079
3080	/*
3081	 * Pass the request on to the socket control method, and if the
3082	 * latter returns EOPNOTSUPP, directly to the interface.
3083	 *
3084	 * Make an exception for the legacy SIOCSIF* requests.  Drivers
3085	 * trust SIOCSIFADDR et al to come from an already privileged
3086	 * layer, and do not perform any credentials checks or input
3087	 * validation.
3088	 */
3089	error = ((*so->so_proto->pr_usrreqs->pru_control)(so, cmd, data,
3090	    ifp, td));
3091	if (error == EOPNOTSUPP && ifp != NULL && ifp->if_ioctl != NULL &&
3092	    cmd != SIOCSIFADDR && cmd != SIOCSIFBRDADDR &&
3093	    cmd != SIOCSIFDSTADDR && cmd != SIOCSIFNETMASK)
3094		error = (*ifp->if_ioctl)(ifp, cmd, data);
3095
3096	if ((oif_flags ^ ifp->if_flags) & IFF_UP) {
3097#ifdef INET6
3098		if (ifp->if_flags & IFF_UP)
3099			in6_if_up(ifp);
3100#endif
3101	}
3102
3103out_ref:
3104	if_rele(ifp);
3105out_noref:
3106#ifdef COMPAT_FREEBSD32
3107	if (ifmrp != NULL) {
3108		KASSERT((cmd == SIOCGIFMEDIA || cmd == SIOCGIFXMEDIA),
3109		    ("ifmrp non-NULL, but cmd is not an ifmedia req 0x%lx",
3110		     cmd));
3111		data = saved_data;
3112		ifmr_update(ifmrp, data);
3113	}
3114#endif
3115	CURVNET_RESTORE();
3116	return (error);
3117}
3118
3119/*
3120 * The code common to handling reference counted flags,
3121 * e.g., in ifpromisc() and if_allmulti().
3122 * The "pflag" argument can specify a permanent mode flag to check,
3123 * such as IFF_PPROMISC for promiscuous mode; should be 0 if none.
3124 *
3125 * Only to be used on stack-owned flags, not driver-owned flags.
3126 */
3127static int
3128if_setflag(struct ifnet *ifp, int flag, int pflag, int *refcount, int onswitch)
3129{
3130	struct ifreq ifr;
3131	int error;
3132	int oldflags, oldcount;
3133
3134	/* Sanity checks to catch programming errors */
3135	KASSERT((flag & (IFF_DRV_OACTIVE|IFF_DRV_RUNNING)) == 0,
3136	    ("%s: setting driver-owned flag %d", __func__, flag));
3137
3138	if (onswitch)
3139		KASSERT(*refcount >= 0,
3140		    ("%s: increment negative refcount %d for flag %d",
3141		    __func__, *refcount, flag));
3142	else
3143		KASSERT(*refcount > 0,
3144		    ("%s: decrement non-positive refcount %d for flag %d",
3145		    __func__, *refcount, flag));
3146
3147	/* In case this mode is permanent, just touch refcount */
3148	if (ifp->if_flags & pflag) {
3149		*refcount += onswitch ? 1 : -1;
3150		return (0);
3151	}
3152
3153	/* Save ifnet parameters for if_ioctl() may fail */
3154	oldcount = *refcount;
3155	oldflags = ifp->if_flags;
3156
3157	/*
3158	 * See if we aren't the only and touching refcount is enough.
3159	 * Actually toggle interface flag if we are the first or last.
3160	 */
3161	if (onswitch) {
3162		if ((*refcount)++)
3163			return (0);
3164		ifp->if_flags |= flag;
3165	} else {
3166		if (--(*refcount))
3167			return (0);
3168		ifp->if_flags &= ~flag;
3169	}
3170
3171	/* Call down the driver since we've changed interface flags */
3172	if (ifp->if_ioctl == NULL) {
3173		error = EOPNOTSUPP;
3174		goto recover;
3175	}
3176	ifr.ifr_flags = ifp->if_flags & 0xffff;
3177	ifr.ifr_flagshigh = ifp->if_flags >> 16;
3178	error = (*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
3179	if (error)
3180		goto recover;
3181	/* Notify userland that interface flags have changed */
3182	rt_ifmsg(ifp);
3183	return (0);
3184
3185recover:
3186	/* Recover after driver error */
3187	*refcount = oldcount;
3188	ifp->if_flags = oldflags;
3189	return (error);
3190}
3191
3192/*
3193 * Set/clear promiscuous mode on interface ifp based on the truth value
3194 * of pswitch.  The calls are reference counted so that only the first
3195 * "on" request actually has an effect, as does the final "off" request.
3196 * Results are undefined if the "off" and "on" requests are not matched.
3197 */
3198int
3199ifpromisc(struct ifnet *ifp, int pswitch)
3200{
3201	int error;
3202	int oldflags = ifp->if_flags;
3203
3204	error = if_setflag(ifp, IFF_PROMISC, IFF_PPROMISC,
3205			   &ifp->if_pcount, pswitch);
3206	/* If promiscuous mode status has changed, log a message */
3207	if (error == 0 && ((ifp->if_flags ^ oldflags) & IFF_PROMISC) &&
3208            log_promisc_mode_change)
3209		log(LOG_INFO, "%s: promiscuous mode %s\n",
3210		    ifp->if_xname,
3211		    (ifp->if_flags & IFF_PROMISC) ? "enabled" : "disabled");
3212	return (error);
3213}
3214
3215/*
3216 * Return interface configuration
3217 * of system.  List may be used
3218 * in later ioctl's (above) to get
3219 * other information.
3220 */
3221/*ARGSUSED*/
3222static int
3223ifconf(u_long cmd, caddr_t data)
3224{
3225	struct ifconf *ifc = (struct ifconf *)data;
3226	struct ifnet *ifp;
3227	struct ifaddr *ifa;
3228	struct ifreq ifr;
3229	struct sbuf *sb;
3230	int error, full = 0, valid_len, max_len;
3231
3232	/* Limit initial buffer size to MAXPHYS to avoid DoS from userspace. */
3233	max_len = MAXPHYS - 1;
3234
3235	/* Prevent hostile input from being able to crash the system */
3236	if (ifc->ifc_len <= 0)
3237		return (EINVAL);
3238
3239again:
3240	if (ifc->ifc_len <= max_len) {
3241		max_len = ifc->ifc_len;
3242		full = 1;
3243	}
3244	sb = sbuf_new(NULL, NULL, max_len + 1, SBUF_FIXEDLEN);
3245	max_len = 0;
3246	valid_len = 0;
3247
3248	IFNET_RLOCK();
3249	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
3250		int addrs;
3251
3252		/*
3253		 * Zero the ifr to make sure we don't disclose the contents
3254		 * of the stack.
3255		 */
3256		memset(&ifr, 0, sizeof(ifr));
3257
3258		if (strlcpy(ifr.ifr_name, ifp->if_xname, sizeof(ifr.ifr_name))
3259		    >= sizeof(ifr.ifr_name)) {
3260			sbuf_delete(sb);
3261			IFNET_RUNLOCK();
3262			return (ENAMETOOLONG);
3263		}
3264
3265		addrs = 0;
3266		IF_ADDR_RLOCK(ifp);
3267		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
3268			struct sockaddr *sa = ifa->ifa_addr;
3269
3270			if (prison_if(curthread->td_ucred, sa) != 0)
3271				continue;
3272			addrs++;
3273			/* COMPAT_SVR4 */
3274			if (cmd == OSIOCGIFCONF) {
3275				struct osockaddr *osa =
3276				    (struct osockaddr *)&ifr.ifr_addr;
3277				ifr.ifr_addr = *sa;
3278				osa->sa_family = sa->sa_family;
3279				sbuf_bcat(sb, &ifr, sizeof(ifr));
3280				max_len += sizeof(ifr);
3281			} else
3282			if (sa->sa_len <= sizeof(*sa)) {
3283				if (sa->sa_len < sizeof(*sa)) {
3284					memset(&ifr.ifr_ifru.ifru_addr, 0,
3285					    sizeof(ifr.ifr_ifru.ifru_addr));
3286					memcpy(&ifr.ifr_ifru.ifru_addr, sa,
3287					    sa->sa_len);
3288				} else
3289					ifr.ifr_ifru.ifru_addr = *sa;
3290				sbuf_bcat(sb, &ifr, sizeof(ifr));
3291				max_len += sizeof(ifr);
3292			} else {
3293				sbuf_bcat(sb, &ifr,
3294				    offsetof(struct ifreq, ifr_addr));
3295				max_len += offsetof(struct ifreq, ifr_addr);
3296				sbuf_bcat(sb, sa, sa->sa_len);
3297				max_len += sa->sa_len;
3298			}
3299
3300			if (sbuf_error(sb) == 0)
3301				valid_len = sbuf_len(sb);
3302		}
3303		IF_ADDR_RUNLOCK(ifp);
3304		if (addrs == 0) {
3305			sbuf_bcat(sb, &ifr, sizeof(ifr));
3306			max_len += sizeof(ifr);
3307
3308			if (sbuf_error(sb) == 0)
3309				valid_len = sbuf_len(sb);
3310		}
3311	}
3312	IFNET_RUNLOCK();
3313
3314	/*
3315	 * If we didn't allocate enough space (uncommon), try again.  If
3316	 * we have already allocated as much space as we are allowed,
3317	 * return what we've got.
3318	 */
3319	if (valid_len != max_len && !full) {
3320		sbuf_delete(sb);
3321		goto again;
3322	}
3323
3324	ifc->ifc_len = valid_len;
3325	sbuf_finish(sb);
3326	error = copyout(sbuf_data(sb), ifc->ifc_req, ifc->ifc_len);
3327	sbuf_delete(sb);
3328	return (error);
3329}
3330
3331/*
3332 * Just like ifpromisc(), but for all-multicast-reception mode.
3333 */
3334int
3335if_allmulti(struct ifnet *ifp, int onswitch)
3336{
3337
3338	return (if_setflag(ifp, IFF_ALLMULTI, 0, &ifp->if_amcount, onswitch));
3339}
3340
3341struct ifmultiaddr *
3342if_findmulti(struct ifnet *ifp, const struct sockaddr *sa)
3343{
3344	struct ifmultiaddr *ifma;
3345
3346	IF_ADDR_LOCK_ASSERT(ifp);
3347
3348	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
3349		if (sa->sa_family == AF_LINK) {
3350			if (sa_dl_equal(ifma->ifma_addr, sa))
3351				break;
3352		} else {
3353			if (sa_equal(ifma->ifma_addr, sa))
3354				break;
3355		}
3356	}
3357
3358	return ifma;
3359}
3360
3361/*
3362 * Allocate a new ifmultiaddr and initialize based on passed arguments.  We
3363 * make copies of passed sockaddrs.  The ifmultiaddr will not be added to
3364 * the ifnet multicast address list here, so the caller must do that and
3365 * other setup work (such as notifying the device driver).  The reference
3366 * count is initialized to 1.
3367 */
3368static struct ifmultiaddr *
3369if_allocmulti(struct ifnet *ifp, struct sockaddr *sa, struct sockaddr *llsa,
3370    int mflags)
3371{
3372	struct ifmultiaddr *ifma;
3373	struct sockaddr *dupsa;
3374
3375	ifma = malloc(sizeof *ifma, M_IFMADDR, mflags |
3376	    M_ZERO);
3377	if (ifma == NULL)
3378		return (NULL);
3379
3380	dupsa = malloc(sa->sa_len, M_IFMADDR, mflags);
3381	if (dupsa == NULL) {
3382		free(ifma, M_IFMADDR);
3383		return (NULL);
3384	}
3385	bcopy(sa, dupsa, sa->sa_len);
3386	ifma->ifma_addr = dupsa;
3387
3388	ifma->ifma_ifp = ifp;
3389	ifma->ifma_refcount = 1;
3390	ifma->ifma_protospec = NULL;
3391
3392	if (llsa == NULL) {
3393		ifma->ifma_lladdr = NULL;
3394		return (ifma);
3395	}
3396
3397	dupsa = malloc(llsa->sa_len, M_IFMADDR, mflags);
3398	if (dupsa == NULL) {
3399		free(ifma->ifma_addr, M_IFMADDR);
3400		free(ifma, M_IFMADDR);
3401		return (NULL);
3402	}
3403	bcopy(llsa, dupsa, llsa->sa_len);
3404	ifma->ifma_lladdr = dupsa;
3405
3406	return (ifma);
3407}
3408
3409/*
3410 * if_freemulti: free ifmultiaddr structure and possibly attached related
3411 * addresses.  The caller is responsible for implementing reference
3412 * counting, notifying the driver, handling routing messages, and releasing
3413 * any dependent link layer state.
3414 */
3415static void
3416if_freemulti(struct ifmultiaddr *ifma)
3417{
3418
3419	KASSERT(ifma->ifma_refcount == 0, ("if_freemulti: refcount %d",
3420	    ifma->ifma_refcount));
3421
3422	if (ifma->ifma_lladdr != NULL)
3423		free(ifma->ifma_lladdr, M_IFMADDR);
3424	free(ifma->ifma_addr, M_IFMADDR);
3425	free(ifma, M_IFMADDR);
3426}
3427
3428/*
3429 * Register an additional multicast address with a network interface.
3430 *
3431 * - If the address is already present, bump the reference count on the
3432 *   address and return.
3433 * - If the address is not link-layer, look up a link layer address.
3434 * - Allocate address structures for one or both addresses, and attach to the
3435 *   multicast address list on the interface.  If automatically adding a link
3436 *   layer address, the protocol address will own a reference to the link
3437 *   layer address, to be freed when it is freed.
3438 * - Notify the network device driver of an addition to the multicast address
3439 *   list.
3440 *
3441 * 'sa' points to caller-owned memory with the desired multicast address.
3442 *
3443 * 'retifma' will be used to return a pointer to the resulting multicast
3444 * address reference, if desired.
3445 */
3446int
3447if_addmulti(struct ifnet *ifp, struct sockaddr *sa,
3448    struct ifmultiaddr **retifma)
3449{
3450	struct ifmultiaddr *ifma, *ll_ifma;
3451	struct sockaddr *llsa;
3452	struct sockaddr_dl sdl;
3453	int error;
3454
3455	/*
3456	 * If the address is already present, return a new reference to it;
3457	 * otherwise, allocate storage and set up a new address.
3458	 */
3459	IF_ADDR_WLOCK(ifp);
3460	ifma = if_findmulti(ifp, sa);
3461	if (ifma != NULL) {
3462		ifma->ifma_refcount++;
3463		if (retifma != NULL)
3464			*retifma = ifma;
3465		IF_ADDR_WUNLOCK(ifp);
3466		return (0);
3467	}
3468
3469	/*
3470	 * The address isn't already present; resolve the protocol address
3471	 * into a link layer address, and then look that up, bump its
3472	 * refcount or allocate an ifma for that also.
3473	 * Most link layer resolving functions returns address data which
3474	 * fits inside default sockaddr_dl structure. However callback
3475	 * can allocate another sockaddr structure, in that case we need to
3476	 * free it later.
3477	 */
3478	llsa = NULL;
3479	ll_ifma = NULL;
3480	if (ifp->if_resolvemulti != NULL) {
3481		/* Provide called function with buffer size information */
3482		sdl.sdl_len = sizeof(sdl);
3483		llsa = (struct sockaddr *)&sdl;
3484		error = ifp->if_resolvemulti(ifp, &llsa, sa);
3485		if (error)
3486			goto unlock_out;
3487	}
3488
3489	/*
3490	 * Allocate the new address.  Don't hook it up yet, as we may also
3491	 * need to allocate a link layer multicast address.
3492	 */
3493	ifma = if_allocmulti(ifp, sa, llsa, M_NOWAIT);
3494	if (ifma == NULL) {
3495		error = ENOMEM;
3496		goto free_llsa_out;
3497	}
3498
3499	/*
3500	 * If a link layer address is found, we'll need to see if it's
3501	 * already present in the address list, or allocate is as well.
3502	 * When this block finishes, the link layer address will be on the
3503	 * list.
3504	 */
3505	if (llsa != NULL) {
3506		ll_ifma = if_findmulti(ifp, llsa);
3507		if (ll_ifma == NULL) {
3508			ll_ifma = if_allocmulti(ifp, llsa, NULL, M_NOWAIT);
3509			if (ll_ifma == NULL) {
3510				--ifma->ifma_refcount;
3511				if_freemulti(ifma);
3512				error = ENOMEM;
3513				goto free_llsa_out;
3514			}
3515			TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ll_ifma,
3516			    ifma_link);
3517		} else
3518			ll_ifma->ifma_refcount++;
3519		ifma->ifma_llifma = ll_ifma;
3520	}
3521
3522	/*
3523	 * We now have a new multicast address, ifma, and possibly a new or
3524	 * referenced link layer address.  Add the primary address to the
3525	 * ifnet address list.
3526	 */
3527	TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link);
3528
3529	if (retifma != NULL)
3530		*retifma = ifma;
3531
3532	/*
3533	 * Must generate the message while holding the lock so that 'ifma'
3534	 * pointer is still valid.
3535	 */
3536	rt_newmaddrmsg(RTM_NEWMADDR, ifma);
3537	IF_ADDR_WUNLOCK(ifp);
3538
3539	/*
3540	 * We are certain we have added something, so call down to the
3541	 * interface to let them know about it.
3542	 */
3543	if (ifp->if_ioctl != NULL) {
3544		(void) (*ifp->if_ioctl)(ifp, SIOCADDMULTI, 0);
3545	}
3546
3547	if ((llsa != NULL) && (llsa != (struct sockaddr *)&sdl))
3548		link_free_sdl(llsa);
3549
3550	return (0);
3551
3552free_llsa_out:
3553	if ((llsa != NULL) && (llsa != (struct sockaddr *)&sdl))
3554		link_free_sdl(llsa);
3555
3556unlock_out:
3557	IF_ADDR_WUNLOCK(ifp);
3558	return (error);
3559}
3560
3561/*
3562 * Delete a multicast group membership by network-layer group address.
3563 *
3564 * Returns ENOENT if the entry could not be found. If ifp no longer
3565 * exists, results are undefined. This entry point should only be used
3566 * from subsystems which do appropriate locking to hold ifp for the
3567 * duration of the call.
3568 * Network-layer protocol domains must use if_delmulti_ifma().
3569 */
3570int
3571if_delmulti(struct ifnet *ifp, struct sockaddr *sa)
3572{
3573	struct ifmultiaddr *ifma;
3574	int lastref;
3575#ifdef INVARIANTS
3576	struct ifnet *oifp;
3577
3578	IFNET_RLOCK_NOSLEEP();
3579	TAILQ_FOREACH(oifp, &V_ifnet, if_link)
3580		if (ifp == oifp)
3581			break;
3582	if (ifp != oifp)
3583		ifp = NULL;
3584	IFNET_RUNLOCK_NOSLEEP();
3585
3586	KASSERT(ifp != NULL, ("%s: ifnet went away", __func__));
3587#endif
3588	if (ifp == NULL)
3589		return (ENOENT);
3590
3591	IF_ADDR_WLOCK(ifp);
3592	lastref = 0;
3593	ifma = if_findmulti(ifp, sa);
3594	if (ifma != NULL)
3595		lastref = if_delmulti_locked(ifp, ifma, 0);
3596	IF_ADDR_WUNLOCK(ifp);
3597
3598	if (ifma == NULL)
3599		return (ENOENT);
3600
3601	if (lastref && ifp->if_ioctl != NULL) {
3602		(void)(*ifp->if_ioctl)(ifp, SIOCDELMULTI, 0);
3603	}
3604
3605	return (0);
3606}
3607
3608/*
3609 * Delete all multicast group membership for an interface.
3610 * Should be used to quickly flush all multicast filters.
3611 */
3612void
3613if_delallmulti(struct ifnet *ifp)
3614{
3615	struct ifmultiaddr *ifma;
3616	struct ifmultiaddr *next;
3617
3618	IF_ADDR_WLOCK(ifp);
3619	TAILQ_FOREACH_SAFE(ifma, &ifp->if_multiaddrs, ifma_link, next)
3620		if_delmulti_locked(ifp, ifma, 0);
3621	IF_ADDR_WUNLOCK(ifp);
3622}
3623
3624/*
3625 * Delete a multicast group membership by group membership pointer.
3626 * Network-layer protocol domains must use this routine.
3627 *
3628 * It is safe to call this routine if the ifp disappeared.
3629 */
3630void
3631if_delmulti_ifma(struct ifmultiaddr *ifma)
3632{
3633	struct ifnet *ifp;
3634	int lastref;
3635
3636	ifp = ifma->ifma_ifp;
3637#ifdef DIAGNOSTIC
3638	if (ifp == NULL) {
3639		printf("%s: ifma_ifp seems to be detached\n", __func__);
3640	} else {
3641		struct ifnet *oifp;
3642
3643		IFNET_RLOCK_NOSLEEP();
3644		TAILQ_FOREACH(oifp, &V_ifnet, if_link)
3645			if (ifp == oifp)
3646				break;
3647		if (ifp != oifp) {
3648			printf("%s: ifnet %p disappeared\n", __func__, ifp);
3649			ifp = NULL;
3650		}
3651		IFNET_RUNLOCK_NOSLEEP();
3652	}
3653#endif
3654	/*
3655	 * If and only if the ifnet instance exists: Acquire the address lock.
3656	 */
3657	if (ifp != NULL)
3658		IF_ADDR_WLOCK(ifp);
3659
3660	lastref = if_delmulti_locked(ifp, ifma, 0);
3661
3662	if (ifp != NULL) {
3663		/*
3664		 * If and only if the ifnet instance exists:
3665		 *  Release the address lock.
3666		 *  If the group was left: update the hardware hash filter.
3667		 */
3668		IF_ADDR_WUNLOCK(ifp);
3669		if (lastref && ifp->if_ioctl != NULL) {
3670			(void)(*ifp->if_ioctl)(ifp, SIOCDELMULTI, 0);
3671		}
3672	}
3673}
3674
3675/*
3676 * Perform deletion of network-layer and/or link-layer multicast address.
3677 *
3678 * Return 0 if the reference count was decremented.
3679 * Return 1 if the final reference was released, indicating that the
3680 * hardware hash filter should be reprogrammed.
3681 */
3682static int
3683if_delmulti_locked(struct ifnet *ifp, struct ifmultiaddr *ifma, int detaching)
3684{
3685	struct ifmultiaddr *ll_ifma;
3686
3687	if (ifp != NULL && ifma->ifma_ifp != NULL) {
3688		KASSERT(ifma->ifma_ifp == ifp,
3689		    ("%s: inconsistent ifp %p", __func__, ifp));
3690		IF_ADDR_WLOCK_ASSERT(ifp);
3691	}
3692
3693	ifp = ifma->ifma_ifp;
3694
3695	/*
3696	 * If the ifnet is detaching, null out references to ifnet,
3697	 * so that upper protocol layers will notice, and not attempt
3698	 * to obtain locks for an ifnet which no longer exists. The
3699	 * routing socket announcement must happen before the ifnet
3700	 * instance is detached from the system.
3701	 */
3702	if (detaching) {
3703#ifdef DIAGNOSTIC
3704		printf("%s: detaching ifnet instance %p\n", __func__, ifp);
3705#endif
3706		/*
3707		 * ifp may already be nulled out if we are being reentered
3708		 * to delete the ll_ifma.
3709		 */
3710		if (ifp != NULL) {
3711			rt_newmaddrmsg(RTM_DELMADDR, ifma);
3712			ifma->ifma_ifp = NULL;
3713		}
3714	}
3715
3716	if (--ifma->ifma_refcount > 0)
3717		return 0;
3718
3719	/*
3720	 * If this ifma is a network-layer ifma, a link-layer ifma may
3721	 * have been associated with it. Release it first if so.
3722	 */
3723	ll_ifma = ifma->ifma_llifma;
3724	if (ll_ifma != NULL) {
3725		KASSERT(ifma->ifma_lladdr != NULL,
3726		    ("%s: llifma w/o lladdr", __func__));
3727		if (detaching)
3728			ll_ifma->ifma_ifp = NULL;	/* XXX */
3729		if (--ll_ifma->ifma_refcount == 0) {
3730			if (ifp != NULL) {
3731				TAILQ_REMOVE(&ifp->if_multiaddrs, ll_ifma,
3732				    ifma_link);
3733			}
3734			if_freemulti(ll_ifma);
3735		}
3736	}
3737
3738	if (ifp != NULL)
3739		TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link);
3740
3741	if_freemulti(ifma);
3742
3743	/*
3744	 * The last reference to this instance of struct ifmultiaddr
3745	 * was released; the hardware should be notified of this change.
3746	 */
3747	return 1;
3748}
3749
3750/*
3751 * Set the link layer address on an interface.
3752 *
3753 * At this time we only support certain types of interfaces,
3754 * and we don't allow the length of the address to change.
3755 *
3756 * Set noinline to be dtrace-friendly
3757 */
3758__noinline int
3759if_setlladdr(struct ifnet *ifp, const u_char *lladdr, int len)
3760{
3761	struct sockaddr_dl *sdl;
3762	struct ifaddr *ifa;
3763	struct ifreq ifr;
3764
3765	IF_ADDR_RLOCK(ifp);
3766	ifa = ifp->if_addr;
3767	if (ifa == NULL) {
3768		IF_ADDR_RUNLOCK(ifp);
3769		return (EINVAL);
3770	}
3771	ifa_ref(ifa);
3772	IF_ADDR_RUNLOCK(ifp);
3773	sdl = (struct sockaddr_dl *)ifa->ifa_addr;
3774	if (sdl == NULL) {
3775		ifa_free(ifa);
3776		return (EINVAL);
3777	}
3778	if (len != sdl->sdl_alen) {	/* don't allow length to change */
3779		ifa_free(ifa);
3780		return (EINVAL);
3781	}
3782	switch (ifp->if_type) {
3783	case IFT_ETHER:
3784	case IFT_FDDI:
3785	case IFT_XETHER:
3786	case IFT_ISO88025:
3787	case IFT_L2VLAN:
3788	case IFT_BRIDGE:
3789	case IFT_ARCNET:
3790	case IFT_IEEE8023ADLAG:
3791	case IFT_IEEE80211:
3792		bcopy(lladdr, LLADDR(sdl), len);
3793		ifa_free(ifa);
3794		break;
3795	default:
3796		ifa_free(ifa);
3797		return (ENODEV);
3798	}
3799
3800	/*
3801	 * If the interface is already up, we need
3802	 * to re-init it in order to reprogram its
3803	 * address filter.
3804	 */
3805	if ((ifp->if_flags & IFF_UP) != 0) {
3806		if (ifp->if_ioctl) {
3807			ifp->if_flags &= ~IFF_UP;
3808			ifr.ifr_flags = ifp->if_flags & 0xffff;
3809			ifr.ifr_flagshigh = ifp->if_flags >> 16;
3810			(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
3811			ifp->if_flags |= IFF_UP;
3812			ifr.ifr_flags = ifp->if_flags & 0xffff;
3813			ifr.ifr_flagshigh = ifp->if_flags >> 16;
3814			(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
3815		}
3816	}
3817	EVENTHANDLER_INVOKE(iflladdr_event, ifp);
3818	return (0);
3819}
3820
3821/*
3822 * Compat function for handling basic encapsulation requests.
3823 * Not converted stacks (FDDI, IB, ..) supports traditional
3824 * output model: ARP (and other similar L2 protocols) are handled
3825 * inside output routine, arpresolve/nd6_resolve() returns MAC
3826 * address instead of full prepend.
3827 *
3828 * This function creates calculated header==MAC for IPv4/IPv6 and
3829 * returns EAFNOSUPPORT (which is then handled in ARP code) for other
3830 * address families.
3831 */
3832static int
3833if_requestencap_default(struct ifnet *ifp, struct if_encap_req *req)
3834{
3835
3836	if (req->rtype != IFENCAP_LL)
3837		return (EOPNOTSUPP);
3838
3839	if (req->bufsize < req->lladdr_len)
3840		return (ENOMEM);
3841
3842	switch (req->family) {
3843	case AF_INET:
3844	case AF_INET6:
3845		break;
3846	default:
3847		return (EAFNOSUPPORT);
3848	}
3849
3850	/* Copy lladdr to storage as is */
3851	memmove(req->buf, req->lladdr, req->lladdr_len);
3852	req->bufsize = req->lladdr_len;
3853	req->lladdr_off = 0;
3854
3855	return (0);
3856}
3857
3858/*
3859 * Get the link layer address that was read from the hardware at attach.
3860 *
3861 * This is only set by Ethernet NICs (IFT_ETHER), but laggX interfaces re-type
3862 * their component interfaces as IFT_IEEE8023ADLAG.
3863 */
3864int
3865if_gethwaddr(struct ifnet *ifp, struct ifreq *ifr)
3866{
3867
3868	if (ifp->if_hw_addr == NULL)
3869		return (ENODEV);
3870
3871	switch (ifp->if_type) {
3872	case IFT_ETHER:
3873	case IFT_IEEE8023ADLAG:
3874		bcopy(ifp->if_hw_addr, ifr->ifr_addr.sa_data, ifp->if_addrlen);
3875		return (0);
3876	default:
3877		return (ENODEV);
3878	}
3879}
3880
3881/*
3882 * The name argument must be a pointer to storage which will last as
3883 * long as the interface does.  For physical devices, the result of
3884 * device_get_name(dev) is a good choice and for pseudo-devices a
3885 * static string works well.
3886 */
3887void
3888if_initname(struct ifnet *ifp, const char *name, int unit)
3889{
3890	ifp->if_dname = name;
3891	ifp->if_dunit = unit;
3892	if (unit != IF_DUNIT_NONE)
3893		snprintf(ifp->if_xname, IFNAMSIZ, "%s%d", name, unit);
3894	else
3895		strlcpy(ifp->if_xname, name, IFNAMSIZ);
3896}
3897
3898int
3899if_printf(struct ifnet *ifp, const char * fmt, ...)
3900{
3901	va_list ap;
3902	int retval;
3903
3904	retval = printf("%s: ", ifp->if_xname);
3905	va_start(ap, fmt);
3906	retval += vprintf(fmt, ap);
3907	va_end(ap);
3908	return (retval);
3909}
3910
3911void
3912if_start(struct ifnet *ifp)
3913{
3914
3915	(*(ifp)->if_start)(ifp);
3916}
3917
3918/*
3919 * Backwards compatibility interface for drivers
3920 * that have not implemented it
3921 */
3922static int
3923if_transmit(struct ifnet *ifp, struct mbuf *m)
3924{
3925	int error;
3926
3927	IFQ_HANDOFF(ifp, m, error);
3928	return (error);
3929}
3930
3931static void
3932if_input_default(struct ifnet *ifp __unused, struct mbuf *m)
3933{
3934
3935	m_freem(m);
3936}
3937
3938int
3939if_handoff(struct ifqueue *ifq, struct mbuf *m, struct ifnet *ifp, int adjust)
3940{
3941	int active = 0;
3942
3943	IF_LOCK(ifq);
3944	if (_IF_QFULL(ifq)) {
3945		IF_UNLOCK(ifq);
3946		if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1);
3947		m_freem(m);
3948		return (0);
3949	}
3950	if (ifp != NULL) {
3951		if_inc_counter(ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len + adjust);
3952		if (m->m_flags & (M_BCAST|M_MCAST))
3953			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
3954		active = ifp->if_drv_flags & IFF_DRV_OACTIVE;
3955	}
3956	_IF_ENQUEUE(ifq, m);
3957	IF_UNLOCK(ifq);
3958	if (ifp != NULL && !active)
3959		(*(ifp)->if_start)(ifp);
3960	return (1);
3961}
3962
3963void
3964if_register_com_alloc(u_char type,
3965    if_com_alloc_t *a, if_com_free_t *f)
3966{
3967
3968	KASSERT(if_com_alloc[type] == NULL,
3969	    ("if_register_com_alloc: %d already registered", type));
3970	KASSERT(if_com_free[type] == NULL,
3971	    ("if_register_com_alloc: %d free already registered", type));
3972
3973	if_com_alloc[type] = a;
3974	if_com_free[type] = f;
3975}
3976
3977void
3978if_deregister_com_alloc(u_char type)
3979{
3980
3981	KASSERT(if_com_alloc[type] != NULL,
3982	    ("if_deregister_com_alloc: %d not registered", type));
3983	KASSERT(if_com_free[type] != NULL,
3984	    ("if_deregister_com_alloc: %d free not registered", type));
3985	if_com_alloc[type] = NULL;
3986	if_com_free[type] = NULL;
3987}
3988
3989/* API for driver access to network stack owned ifnet.*/
3990uint64_t
3991if_setbaudrate(struct ifnet *ifp, uint64_t baudrate)
3992{
3993	uint64_t oldbrate;
3994
3995	oldbrate = ifp->if_baudrate;
3996	ifp->if_baudrate = baudrate;
3997	return (oldbrate);
3998}
3999
4000uint64_t
4001if_getbaudrate(if_t ifp)
4002{
4003
4004	return (((struct ifnet *)ifp)->if_baudrate);
4005}
4006
4007int
4008if_setcapabilities(if_t ifp, int capabilities)
4009{
4010	((struct ifnet *)ifp)->if_capabilities = capabilities;
4011	return (0);
4012}
4013
4014int
4015if_setcapabilitiesbit(if_t ifp, int setbit, int clearbit)
4016{
4017	((struct ifnet *)ifp)->if_capabilities |= setbit;
4018	((struct ifnet *)ifp)->if_capabilities &= ~clearbit;
4019
4020	return (0);
4021}
4022
4023int
4024if_getcapabilities(if_t ifp)
4025{
4026	return ((struct ifnet *)ifp)->if_capabilities;
4027}
4028
4029int
4030if_setcapenable(if_t ifp, int capabilities)
4031{
4032	((struct ifnet *)ifp)->if_capenable = capabilities;
4033	return (0);
4034}
4035
4036int
4037if_setcapenablebit(if_t ifp, int setcap, int clearcap)
4038{
4039	if(setcap)
4040		((struct ifnet *)ifp)->if_capenable |= setcap;
4041	if(clearcap)
4042		((struct ifnet *)ifp)->if_capenable &= ~clearcap;
4043
4044	return (0);
4045}
4046
4047const char *
4048if_getdname(if_t ifp)
4049{
4050	return ((struct ifnet *)ifp)->if_dname;
4051}
4052
4053int
4054if_togglecapenable(if_t ifp, int togglecap)
4055{
4056	((struct ifnet *)ifp)->if_capenable ^= togglecap;
4057	return (0);
4058}
4059
4060int
4061if_getcapenable(if_t ifp)
4062{
4063	return ((struct ifnet *)ifp)->if_capenable;
4064}
4065
4066/*
4067 * This is largely undesirable because it ties ifnet to a device, but does
4068 * provide flexiblity for an embedded product vendor. Should be used with
4069 * the understanding that it violates the interface boundaries, and should be
4070 * a last resort only.
4071 */
4072int
4073if_setdev(if_t ifp, void *dev)
4074{
4075	return (0);
4076}
4077
4078int
4079if_setdrvflagbits(if_t ifp, int set_flags, int clear_flags)
4080{
4081	((struct ifnet *)ifp)->if_drv_flags |= set_flags;
4082	((struct ifnet *)ifp)->if_drv_flags &= ~clear_flags;
4083
4084	return (0);
4085}
4086
4087int
4088if_getdrvflags(if_t ifp)
4089{
4090	return ((struct ifnet *)ifp)->if_drv_flags;
4091}
4092
4093int
4094if_setdrvflags(if_t ifp, int flags)
4095{
4096	((struct ifnet *)ifp)->if_drv_flags = flags;
4097	return (0);
4098}
4099
4100
4101int
4102if_setflags(if_t ifp, int flags)
4103{
4104	((struct ifnet *)ifp)->if_flags = flags;
4105	return (0);
4106}
4107
4108int
4109if_setflagbits(if_t ifp, int set, int clear)
4110{
4111	((struct ifnet *)ifp)->if_flags |= set;
4112	((struct ifnet *)ifp)->if_flags &= ~clear;
4113
4114	return (0);
4115}
4116
4117int
4118if_getflags(if_t ifp)
4119{
4120	return ((struct ifnet *)ifp)->if_flags;
4121}
4122
4123int
4124if_clearhwassist(if_t ifp)
4125{
4126	((struct ifnet *)ifp)->if_hwassist = 0;
4127	return (0);
4128}
4129
4130int
4131if_sethwassistbits(if_t ifp, int toset, int toclear)
4132{
4133	((struct ifnet *)ifp)->if_hwassist |= toset;
4134	((struct ifnet *)ifp)->if_hwassist &= ~toclear;
4135
4136	return (0);
4137}
4138
4139int
4140if_sethwassist(if_t ifp, int hwassist_bit)
4141{
4142	((struct ifnet *)ifp)->if_hwassist = hwassist_bit;
4143	return (0);
4144}
4145
4146int
4147if_gethwassist(if_t ifp)
4148{
4149	return ((struct ifnet *)ifp)->if_hwassist;
4150}
4151
4152int
4153if_setmtu(if_t ifp, int mtu)
4154{
4155	((struct ifnet *)ifp)->if_mtu = mtu;
4156	return (0);
4157}
4158
4159int
4160if_getmtu(if_t ifp)
4161{
4162	return ((struct ifnet *)ifp)->if_mtu;
4163}
4164
4165int
4166if_getmtu_family(if_t ifp, int family)
4167{
4168	struct domain *dp;
4169
4170	for (dp = domains; dp; dp = dp->dom_next) {
4171		if (dp->dom_family == family && dp->dom_ifmtu != NULL)
4172			return (dp->dom_ifmtu((struct ifnet *)ifp));
4173	}
4174
4175	return (((struct ifnet *)ifp)->if_mtu);
4176}
4177
4178int
4179if_setsoftc(if_t ifp, void *softc)
4180{
4181	((struct ifnet *)ifp)->if_softc = softc;
4182	return (0);
4183}
4184
4185void *
4186if_getsoftc(if_t ifp)
4187{
4188	return ((struct ifnet *)ifp)->if_softc;
4189}
4190
4191void
4192if_setrcvif(struct mbuf *m, if_t ifp)
4193{
4194	m->m_pkthdr.rcvif = (struct ifnet *)ifp;
4195}
4196
4197void
4198if_setvtag(struct mbuf *m, uint16_t tag)
4199{
4200	m->m_pkthdr.ether_vtag = tag;
4201}
4202
4203uint16_t
4204if_getvtag(struct mbuf *m)
4205{
4206
4207	return (m->m_pkthdr.ether_vtag);
4208}
4209
4210int
4211if_sendq_empty(if_t ifp)
4212{
4213	return IFQ_DRV_IS_EMPTY(&((struct ifnet *)ifp)->if_snd);
4214}
4215
4216struct ifaddr *
4217if_getifaddr(if_t ifp)
4218{
4219	return ((struct ifnet *)ifp)->if_addr;
4220}
4221
4222int
4223if_getamcount(if_t ifp)
4224{
4225	return ((struct ifnet *)ifp)->if_amcount;
4226}
4227
4228
4229int
4230if_setsendqready(if_t ifp)
4231{
4232	IFQ_SET_READY(&((struct ifnet *)ifp)->if_snd);
4233	return (0);
4234}
4235
4236int
4237if_setsendqlen(if_t ifp, int tx_desc_count)
4238{
4239	IFQ_SET_MAXLEN(&((struct ifnet *)ifp)->if_snd, tx_desc_count);
4240	((struct ifnet *)ifp)->if_snd.ifq_drv_maxlen = tx_desc_count;
4241
4242	return (0);
4243}
4244
4245int
4246if_vlantrunkinuse(if_t ifp)
4247{
4248	return ((struct ifnet *)ifp)->if_vlantrunk != NULL?1:0;
4249}
4250
4251int
4252if_input(if_t ifp, struct mbuf* sendmp)
4253{
4254	(*((struct ifnet *)ifp)->if_input)((struct ifnet *)ifp, sendmp);
4255	return (0);
4256
4257}
4258
4259/* XXX */
4260#ifndef ETH_ADDR_LEN
4261#define ETH_ADDR_LEN 6
4262#endif
4263
4264int
4265if_setupmultiaddr(if_t ifp, void *mta, int *cnt, int max)
4266{
4267	struct ifmultiaddr *ifma;
4268	uint8_t *lmta = (uint8_t *)mta;
4269	int mcnt = 0;
4270
4271	TAILQ_FOREACH(ifma, &((struct ifnet *)ifp)->if_multiaddrs, ifma_link) {
4272		if (ifma->ifma_addr->sa_family != AF_LINK)
4273			continue;
4274
4275		if (mcnt == max)
4276			break;
4277
4278		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
4279		    &lmta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
4280		mcnt++;
4281	}
4282	*cnt = mcnt;
4283
4284	return (0);
4285}
4286
4287int
4288if_multiaddr_array(if_t ifp, void *mta, int *cnt, int max)
4289{
4290	int error;
4291
4292	if_maddr_rlock(ifp);
4293	error = if_setupmultiaddr(ifp, mta, cnt, max);
4294	if_maddr_runlock(ifp);
4295	return (error);
4296}
4297
4298int
4299if_multiaddr_count(if_t ifp, int max)
4300{
4301	struct ifmultiaddr *ifma;
4302	int count;
4303
4304	count = 0;
4305	if_maddr_rlock(ifp);
4306	TAILQ_FOREACH(ifma, &((struct ifnet *)ifp)->if_multiaddrs, ifma_link) {
4307		if (ifma->ifma_addr->sa_family != AF_LINK)
4308			continue;
4309		count++;
4310		if (count == max)
4311			break;
4312	}
4313	if_maddr_runlock(ifp);
4314	return (count);
4315}
4316
4317int
4318if_multi_apply(struct ifnet *ifp, int (*filter)(void *, struct ifmultiaddr *, int), void *arg)
4319{
4320	struct ifmultiaddr *ifma;
4321	int cnt = 0;
4322
4323	if_maddr_rlock(ifp);
4324	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link)
4325		cnt += filter(arg, ifma, cnt);
4326	if_maddr_runlock(ifp);
4327	return (cnt);
4328}
4329
4330struct mbuf *
4331if_dequeue(if_t ifp)
4332{
4333	struct mbuf *m;
4334	IFQ_DRV_DEQUEUE(&((struct ifnet *)ifp)->if_snd, m);
4335
4336	return (m);
4337}
4338
4339int
4340if_sendq_prepend(if_t ifp, struct mbuf *m)
4341{
4342	IFQ_DRV_PREPEND(&((struct ifnet *)ifp)->if_snd, m);
4343	return (0);
4344}
4345
4346int
4347if_setifheaderlen(if_t ifp, int len)
4348{
4349	((struct ifnet *)ifp)->if_hdrlen = len;
4350	return (0);
4351}
4352
4353caddr_t
4354if_getlladdr(if_t ifp)
4355{
4356	return (IF_LLADDR((struct ifnet *)ifp));
4357}
4358
4359void *
4360if_gethandle(u_char type)
4361{
4362	return (if_alloc(type));
4363}
4364
4365void
4366if_bpfmtap(if_t ifh, struct mbuf *m)
4367{
4368	struct ifnet *ifp = (struct ifnet *)ifh;
4369
4370	BPF_MTAP(ifp, m);
4371}
4372
4373void
4374if_etherbpfmtap(if_t ifh, struct mbuf *m)
4375{
4376	struct ifnet *ifp = (struct ifnet *)ifh;
4377
4378	ETHER_BPF_MTAP(ifp, m);
4379}
4380
4381void
4382if_vlancap(if_t ifh)
4383{
4384	struct ifnet *ifp = (struct ifnet *)ifh;
4385	VLAN_CAPABILITIES(ifp);
4386}
4387
4388void
4389if_setinitfn(if_t ifp, void (*init_fn)(void *))
4390{
4391	((struct ifnet *)ifp)->if_init = init_fn;
4392}
4393
4394void
4395if_setioctlfn(if_t ifp, int (*ioctl_fn)(if_t, u_long, caddr_t))
4396{
4397	((struct ifnet *)ifp)->if_ioctl = (void *)ioctl_fn;
4398}
4399
4400void
4401if_setstartfn(if_t ifp, void (*start_fn)(if_t))
4402{
4403	((struct ifnet *)ifp)->if_start = (void *)start_fn;
4404}
4405
4406void
4407if_settransmitfn(if_t ifp, if_transmit_fn_t start_fn)
4408{
4409	((struct ifnet *)ifp)->if_transmit = start_fn;
4410}
4411
4412void if_setqflushfn(if_t ifp, if_qflush_fn_t flush_fn)
4413{
4414	((struct ifnet *)ifp)->if_qflush = flush_fn;
4415
4416}
4417
4418void
4419if_setgetcounterfn(if_t ifp, if_get_counter_t fn)
4420{
4421
4422	ifp->if_get_counter = fn;
4423}
4424
4425/* Revisit these - These are inline functions originally. */
4426int
4427drbr_inuse_drv(if_t ifh, struct buf_ring *br)
4428{
4429	return drbr_inuse(ifh, br);
4430}
4431
4432struct mbuf*
4433drbr_dequeue_drv(if_t ifh, struct buf_ring *br)
4434{
4435	return drbr_dequeue(ifh, br);
4436}
4437
4438int
4439drbr_needs_enqueue_drv(if_t ifh, struct buf_ring *br)
4440{
4441	return drbr_needs_enqueue(ifh, br);
4442}
4443
4444int
4445drbr_enqueue_drv(if_t ifh, struct buf_ring *br, struct mbuf *m)
4446{
4447	return drbr_enqueue(ifh, br, m);
4448
4449}
4450