if.c revision 340569
1/*-
2 * Copyright (c) 1980, 1986, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 4. Neither the name of the University nor the names of its contributors
14 *    may be used to endorse or promote products derived from this software
15 *    without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 *	@(#)if.c	8.5 (Berkeley) 1/9/95
30 * $FreeBSD: stable/11/sys/net/if.c 340569 2018-11-18 12:04:25Z kp $
31 */
32
33#include "opt_compat.h"
34#include "opt_inet6.h"
35#include "opt_inet.h"
36
37#include <sys/param.h>
38#include <sys/types.h>
39#include <sys/conf.h>
40#include <sys/malloc.h>
41#include <sys/sbuf.h>
42#include <sys/bus.h>
43#include <sys/mbuf.h>
44#include <sys/systm.h>
45#include <sys/priv.h>
46#include <sys/proc.h>
47#include <sys/socket.h>
48#include <sys/socketvar.h>
49#include <sys/protosw.h>
50#include <sys/kernel.h>
51#include <sys/lock.h>
52#include <sys/refcount.h>
53#include <sys/module.h>
54#include <sys/rwlock.h>
55#include <sys/sockio.h>
56#include <sys/syslog.h>
57#include <sys/sysctl.h>
58#include <sys/sysent.h>
59#include <sys/taskqueue.h>
60#include <sys/domain.h>
61#include <sys/jail.h>
62#include <sys/priv.h>
63
64#include <machine/stdarg.h>
65#include <vm/uma.h>
66
67#include <net/bpf.h>
68#include <net/ethernet.h>
69#include <net/if.h>
70#include <net/if_arp.h>
71#include <net/if_clone.h>
72#include <net/if_dl.h>
73#include <net/if_types.h>
74#include <net/if_var.h>
75#include <net/if_media.h>
76#include <net/if_vlan_var.h>
77#include <net/radix.h>
78#include <net/route.h>
79#include <net/vnet.h>
80
81#if defined(INET) || defined(INET6)
82#include <net/ethernet.h>
83#include <netinet/in.h>
84#include <netinet/in_var.h>
85#include <netinet/ip.h>
86#include <netinet/ip_carp.h>
87#ifdef INET
88#include <netinet/if_ether.h>
89#endif /* INET */
90#ifdef INET6
91#include <netinet6/in6_var.h>
92#include <netinet6/in6_ifattach.h>
93#endif /* INET6 */
94#endif /* INET || INET6 */
95
96#include <security/mac/mac_framework.h>
97
98#ifdef COMPAT_FREEBSD32
99#include <sys/mount.h>
100#include <compat/freebsd32/freebsd32.h>
101
102struct ifreq_buffer32 {
103	uint32_t	length;		/* (size_t) */
104	uint32_t	buffer;		/* (void *) */
105};
106
107/*
108 * Interface request structure used for socket
109 * ioctl's.  All interface ioctl's must have parameter
110 * definitions which begin with ifr_name.  The
111 * remainder may be interface specific.
112 */
113struct ifreq32 {
114	char	ifr_name[IFNAMSIZ];		/* if name, e.g. "en0" */
115	union {
116		struct sockaddr	ifru_addr;
117		struct sockaddr	ifru_dstaddr;
118		struct sockaddr	ifru_broadaddr;
119		struct ifreq_buffer32 ifru_buffer;
120		short		ifru_flags[2];
121		short		ifru_index;
122		int		ifru_jid;
123		int		ifru_metric;
124		int		ifru_mtu;
125		int		ifru_phys;
126		int		ifru_media;
127		uint32_t	ifru_data;
128		int		ifru_cap[2];
129		u_int		ifru_fib;
130		u_char		ifru_vlan_pcp;
131	} ifr_ifru;
132};
133CTASSERT(sizeof(struct ifreq) == sizeof(struct ifreq32));
134CTASSERT(__offsetof(struct ifreq, ifr_ifru) ==
135    __offsetof(struct ifreq32, ifr_ifru));
136
137struct ifgroupreq32 {
138	char	ifgr_name[IFNAMSIZ];
139	u_int	ifgr_len;
140	union {
141		char		ifgru_group[IFNAMSIZ];
142		uint32_t	ifgru_groups;
143	} ifgr_ifgru;
144};
145
146struct ifmediareq32 {
147	char		ifm_name[IFNAMSIZ];
148	int		ifm_current;
149	int		ifm_mask;
150	int		ifm_status;
151	int		ifm_active;
152	int		ifm_count;
153	uint32_t	ifm_ulist;	/* (int *) */
154};
155#define	SIOCGIFMEDIA32	_IOC_NEWTYPE(SIOCGIFMEDIA, struct ifmediareq32)
156#define	SIOCGIFXMEDIA32	_IOC_NEWTYPE(SIOCGIFXMEDIA, struct ifmediareq32)
157
158#define	_CASE_IOC_IFGROUPREQ_32(cmd)				\
159    case _IOC_NEWTYPE((cmd), struct ifgroupreq32):
160#else /* !COMPAT_FREEBSD32 */
161#define _CASE_IOC_IFGROUPREQ_32(cmd)
162#endif /* !COMPAT_FREEBSD32 */
163
164#define CASE_IOC_IFGROUPREQ(cmd)	\
165    _CASE_IOC_IFGROUPREQ_32(cmd)	\
166    case (cmd)
167
168union ifreq_union {
169	struct ifreq	ifr;
170#ifdef COMPAT_FREEBSD32
171	struct ifreq32	ifr32;
172#endif
173};
174
175union ifgroupreq_union {
176	struct ifgroupreq ifgr;
177#ifdef COMPAT_FREEBSD32
178	struct ifgroupreq32 ifgr32;
179#endif
180};
181
182SYSCTL_NODE(_net, PF_LINK, link, CTLFLAG_RW, 0, "Link layers");
183SYSCTL_NODE(_net_link, 0, generic, CTLFLAG_RW, 0, "Generic link-management");
184
185SYSCTL_INT(_net_link, OID_AUTO, ifqmaxlen, CTLFLAG_RDTUN,
186    &ifqmaxlen, 0, "max send queue size");
187
188/* Log link state change events */
189static int log_link_state_change = 1;
190
191SYSCTL_INT(_net_link, OID_AUTO, log_link_state_change, CTLFLAG_RW,
192	&log_link_state_change, 0,
193	"log interface link state change events");
194
195/* Log promiscuous mode change events */
196static int log_promisc_mode_change = 1;
197
198SYSCTL_INT(_net_link, OID_AUTO, log_promisc_mode_change, CTLFLAG_RDTUN,
199	&log_promisc_mode_change, 1,
200	"log promiscuous mode change events");
201
202/* Interface description */
203static unsigned int ifdescr_maxlen = 1024;
204SYSCTL_UINT(_net, OID_AUTO, ifdescr_maxlen, CTLFLAG_RW,
205	&ifdescr_maxlen, 0,
206	"administrative maximum length for interface description");
207
208static MALLOC_DEFINE(M_IFDESCR, "ifdescr", "ifnet descriptions");
209
210/* global sx for non-critical path ifdescr */
211static struct sx ifdescr_sx;
212SX_SYSINIT(ifdescr_sx, &ifdescr_sx, "ifnet descr");
213
214void	(*bridge_linkstate_p)(struct ifnet *ifp);
215void	(*ng_ether_link_state_p)(struct ifnet *ifp, int state);
216void	(*lagg_linkstate_p)(struct ifnet *ifp, int state);
217/* These are external hooks for CARP. */
218void	(*carp_linkstate_p)(struct ifnet *ifp);
219void	(*carp_demote_adj_p)(int, char *);
220int	(*carp_master_p)(struct ifaddr *);
221#if defined(INET) || defined(INET6)
222int	(*carp_forus_p)(struct ifnet *ifp, u_char *dhost);
223int	(*carp_output_p)(struct ifnet *ifp, struct mbuf *m,
224    const struct sockaddr *sa);
225int	(*carp_ioctl_p)(struct ifreq *, u_long, struct thread *);
226int	(*carp_attach_p)(struct ifaddr *, int);
227void	(*carp_detach_p)(struct ifaddr *, bool);
228#endif
229#ifdef INET
230int	(*carp_iamatch_p)(struct ifaddr *, uint8_t **);
231#endif
232#ifdef INET6
233struct ifaddr *(*carp_iamatch6_p)(struct ifnet *ifp, struct in6_addr *taddr6);
234caddr_t	(*carp_macmatch6_p)(struct ifnet *ifp, struct mbuf *m,
235    const struct in6_addr *taddr);
236#endif
237
238struct mbuf *(*tbr_dequeue_ptr)(struct ifaltq *, int) = NULL;
239
240/*
241 * XXX: Style; these should be sorted alphabetically, and unprototyped
242 * static functions should be prototyped. Currently they are sorted by
243 * declaration order.
244 */
245static void	if_attachdomain(void *);
246static void	if_attachdomain1(struct ifnet *);
247static int	ifconf(u_long, caddr_t);
248static void	if_freemulti(struct ifmultiaddr *);
249static void	if_grow(void);
250static void	if_input_default(struct ifnet *, struct mbuf *);
251static int	if_requestencap_default(struct ifnet *, struct if_encap_req *);
252static void	if_route(struct ifnet *, int flag, int fam);
253static int	if_setflag(struct ifnet *, int, int, int *, int);
254static int	if_transmit(struct ifnet *ifp, struct mbuf *m);
255static void	if_unroute(struct ifnet *, int flag, int fam);
256static void	link_rtrequest(int, struct rtentry *, struct rt_addrinfo *);
257static int	ifhwioctl(u_long, struct ifnet *, caddr_t, struct thread *);
258static int	if_delmulti_locked(struct ifnet *, struct ifmultiaddr *, int);
259static void	do_link_state_change(void *, int);
260static int	if_getgroup(struct ifgroupreq *, struct ifnet *);
261static int	if_getgroupmembers(struct ifgroupreq *);
262static void	if_delgroups(struct ifnet *);
263static void	if_attach_internal(struct ifnet *, int, struct if_clone *);
264static int	if_detach_internal(struct ifnet *, int, struct if_clone **);
265#ifdef VIMAGE
266static void	if_vmove(struct ifnet *, struct vnet *);
267#endif
268
269#ifdef INET6
270/*
271 * XXX: declare here to avoid to include many inet6 related files..
272 * should be more generalized?
273 */
274extern void	nd6_setmtu(struct ifnet *);
275#endif
276
277/* ipsec helper hooks */
278VNET_DEFINE(struct hhook_head *, ipsec_hhh_in[HHOOK_IPSEC_COUNT]);
279VNET_DEFINE(struct hhook_head *, ipsec_hhh_out[HHOOK_IPSEC_COUNT]);
280
281VNET_DEFINE(int, if_index);
282int	ifqmaxlen = IFQ_MAXLEN;
283VNET_DEFINE(struct ifnethead, ifnet);	/* depend on static init XXX */
284VNET_DEFINE(struct ifgrouphead, ifg_head);
285
286static VNET_DEFINE(int, if_indexlim) = 8;
287
288/* Table of ifnet by index. */
289VNET_DEFINE(struct ifnet **, ifindex_table);
290
291#define	V_if_indexlim		VNET(if_indexlim)
292#define	V_ifindex_table		VNET(ifindex_table)
293
294/*
295 * The global network interface list (V_ifnet) and related state (such as
296 * if_index, if_indexlim, and ifindex_table) are protected by an sxlock and
297 * an rwlock.  Either may be acquired shared to stablize the list, but both
298 * must be acquired writable to modify the list.  This model allows us to
299 * both stablize the interface list during interrupt thread processing, but
300 * also to stablize it over long-running ioctls, without introducing priority
301 * inversions and deadlocks.
302 */
303struct rwlock ifnet_rwlock;
304RW_SYSINIT_FLAGS(ifnet_rw, &ifnet_rwlock, "ifnet_rw", RW_RECURSE);
305struct sx ifnet_sxlock;
306SX_SYSINIT_FLAGS(ifnet_sx, &ifnet_sxlock, "ifnet_sx", SX_RECURSE);
307
308/*
309 * The allocation of network interfaces is a rather non-atomic affair; we
310 * need to select an index before we are ready to expose the interface for
311 * use, so will use this pointer value to indicate reservation.
312 */
313#define	IFNET_HOLD	(void *)(uintptr_t)(-1)
314
315static	if_com_alloc_t *if_com_alloc[256];
316static	if_com_free_t *if_com_free[256];
317
318static MALLOC_DEFINE(M_IFNET, "ifnet", "interface internals");
319MALLOC_DEFINE(M_IFADDR, "ifaddr", "interface address");
320MALLOC_DEFINE(M_IFMADDR, "ether_multi", "link-level multicast address");
321
322struct ifnet *
323ifnet_byindex_locked(u_short idx)
324{
325
326	if (idx > V_if_index)
327		return (NULL);
328	if (V_ifindex_table[idx] == IFNET_HOLD)
329		return (NULL);
330	return (V_ifindex_table[idx]);
331}
332
333struct ifnet *
334ifnet_byindex(u_short idx)
335{
336	struct ifnet *ifp;
337
338	IFNET_RLOCK_NOSLEEP();
339	ifp = ifnet_byindex_locked(idx);
340	IFNET_RUNLOCK_NOSLEEP();
341	return (ifp);
342}
343
344struct ifnet *
345ifnet_byindex_ref(u_short idx)
346{
347	struct ifnet *ifp;
348
349	IFNET_RLOCK_NOSLEEP();
350	ifp = ifnet_byindex_locked(idx);
351	if (ifp == NULL || (ifp->if_flags & IFF_DYING)) {
352		IFNET_RUNLOCK_NOSLEEP();
353		return (NULL);
354	}
355	if_ref(ifp);
356	IFNET_RUNLOCK_NOSLEEP();
357	return (ifp);
358}
359
360/*
361 * Allocate an ifindex array entry; return 0 on success or an error on
362 * failure.
363 */
364static u_short
365ifindex_alloc(void)
366{
367	u_short idx;
368
369	IFNET_WLOCK_ASSERT();
370retry:
371	/*
372	 * Try to find an empty slot below V_if_index.  If we fail, take the
373	 * next slot.
374	 */
375	for (idx = 1; idx <= V_if_index; idx++) {
376		if (V_ifindex_table[idx] == NULL)
377			break;
378	}
379
380	/* Catch if_index overflow. */
381	if (idx >= V_if_indexlim) {
382		if_grow();
383		goto retry;
384	}
385	if (idx > V_if_index)
386		V_if_index = idx;
387	return (idx);
388}
389
390static void
391ifindex_free_locked(u_short idx)
392{
393
394	IFNET_WLOCK_ASSERT();
395
396	V_ifindex_table[idx] = NULL;
397	while (V_if_index > 0 &&
398	    V_ifindex_table[V_if_index] == NULL)
399		V_if_index--;
400}
401
402static void
403ifindex_free(u_short idx)
404{
405
406	IFNET_WLOCK();
407	ifindex_free_locked(idx);
408	IFNET_WUNLOCK();
409}
410
411static void
412ifnet_setbyindex_locked(u_short idx, struct ifnet *ifp)
413{
414
415	IFNET_WLOCK_ASSERT();
416
417	V_ifindex_table[idx] = ifp;
418}
419
420static void
421ifnet_setbyindex(u_short idx, struct ifnet *ifp)
422{
423
424	IFNET_WLOCK();
425	ifnet_setbyindex_locked(idx, ifp);
426	IFNET_WUNLOCK();
427}
428
429struct ifaddr *
430ifaddr_byindex(u_short idx)
431{
432	struct ifnet *ifp;
433	struct ifaddr *ifa = NULL;
434
435	IFNET_RLOCK_NOSLEEP();
436	ifp = ifnet_byindex_locked(idx);
437	if (ifp != NULL && (ifa = ifp->if_addr) != NULL)
438		ifa_ref(ifa);
439	IFNET_RUNLOCK_NOSLEEP();
440	return (ifa);
441}
442
443/*
444 * Network interface utility routines.
445 *
446 * Routines with ifa_ifwith* names take sockaddr *'s as
447 * parameters.
448 */
449
450static void
451vnet_if_init(const void *unused __unused)
452{
453
454	TAILQ_INIT(&V_ifnet);
455	TAILQ_INIT(&V_ifg_head);
456	IFNET_WLOCK();
457	if_grow();				/* create initial table */
458	IFNET_WUNLOCK();
459	vnet_if_clone_init();
460}
461VNET_SYSINIT(vnet_if_init, SI_SUB_INIT_IF, SI_ORDER_SECOND, vnet_if_init,
462    NULL);
463
464#ifdef VIMAGE
465static void
466vnet_if_uninit(const void *unused __unused)
467{
468
469	VNET_ASSERT(TAILQ_EMPTY(&V_ifnet), ("%s:%d tailq &V_ifnet=%p "
470	    "not empty", __func__, __LINE__, &V_ifnet));
471	VNET_ASSERT(TAILQ_EMPTY(&V_ifg_head), ("%s:%d tailq &V_ifg_head=%p "
472	    "not empty", __func__, __LINE__, &V_ifg_head));
473
474	free((caddr_t)V_ifindex_table, M_IFNET);
475}
476VNET_SYSUNINIT(vnet_if_uninit, SI_SUB_INIT_IF, SI_ORDER_FIRST,
477    vnet_if_uninit, NULL);
478
479static void
480vnet_if_return(const void *unused __unused)
481{
482	struct ifnet *ifp, *nifp;
483
484	/* Return all inherited interfaces to their parent vnets. */
485	TAILQ_FOREACH_SAFE(ifp, &V_ifnet, if_link, nifp) {
486		if (ifp->if_home_vnet != ifp->if_vnet)
487			if_vmove(ifp, ifp->if_home_vnet);
488	}
489}
490VNET_SYSUNINIT(vnet_if_return, SI_SUB_VNET_DONE, SI_ORDER_ANY,
491    vnet_if_return, NULL);
492#endif
493
494static void
495if_grow(void)
496{
497	int oldlim;
498	u_int n;
499	struct ifnet **e;
500
501	IFNET_WLOCK_ASSERT();
502	oldlim = V_if_indexlim;
503	IFNET_WUNLOCK();
504	n = (oldlim << 1) * sizeof(*e);
505	e = malloc(n, M_IFNET, M_WAITOK | M_ZERO);
506	IFNET_WLOCK();
507	if (V_if_indexlim != oldlim) {
508		free(e, M_IFNET);
509		return;
510	}
511	if (V_ifindex_table != NULL) {
512		memcpy((caddr_t)e, (caddr_t)V_ifindex_table, n/2);
513		free((caddr_t)V_ifindex_table, M_IFNET);
514	}
515	V_if_indexlim <<= 1;
516	V_ifindex_table = e;
517}
518
519/*
520 * Allocate a struct ifnet and an index for an interface.  A layer 2
521 * common structure will also be allocated if an allocation routine is
522 * registered for the passed type.
523 */
524struct ifnet *
525if_alloc(u_char type)
526{
527	struct ifnet *ifp;
528	u_short idx;
529
530	ifp = malloc(sizeof(struct ifnet), M_IFNET, M_WAITOK|M_ZERO);
531	IFNET_WLOCK();
532	idx = ifindex_alloc();
533	ifnet_setbyindex_locked(idx, IFNET_HOLD);
534	IFNET_WUNLOCK();
535	ifp->if_index = idx;
536	ifp->if_type = type;
537	ifp->if_alloctype = type;
538#ifdef VIMAGE
539	ifp->if_vnet = curvnet;
540#endif
541	if (if_com_alloc[type] != NULL) {
542		ifp->if_l2com = if_com_alloc[type](type, ifp);
543		if (ifp->if_l2com == NULL) {
544			free(ifp, M_IFNET);
545			ifindex_free(idx);
546			return (NULL);
547		}
548	}
549
550	IF_ADDR_LOCK_INIT(ifp);
551	TASK_INIT(&ifp->if_linktask, 0, do_link_state_change, ifp);
552	ifp->if_afdata_initialized = 0;
553	IF_AFDATA_LOCK_INIT(ifp);
554	TAILQ_INIT(&ifp->if_addrhead);
555	TAILQ_INIT(&ifp->if_multiaddrs);
556	TAILQ_INIT(&ifp->if_groups);
557#ifdef MAC
558	mac_ifnet_init(ifp);
559#endif
560	ifq_init(&ifp->if_snd, ifp);
561
562	refcount_init(&ifp->if_refcount, 1);	/* Index reference. */
563	for (int i = 0; i < IFCOUNTERS; i++)
564		ifp->if_counters[i] = counter_u64_alloc(M_WAITOK);
565	ifp->if_get_counter = if_get_counter_default;
566	ifp->if_pcp = IFNET_PCP_NONE;
567	ifnet_setbyindex(ifp->if_index, ifp);
568	return (ifp);
569}
570
571/*
572 * Do the actual work of freeing a struct ifnet, and layer 2 common
573 * structure.  This call is made when the last reference to an
574 * interface is released.
575 */
576static void
577if_free_internal(struct ifnet *ifp)
578{
579
580	KASSERT((ifp->if_flags & IFF_DYING),
581	    ("if_free_internal: interface not dying"));
582
583	if (if_com_free[ifp->if_alloctype] != NULL)
584		if_com_free[ifp->if_alloctype](ifp->if_l2com,
585		    ifp->if_alloctype);
586
587#ifdef MAC
588	mac_ifnet_destroy(ifp);
589#endif /* MAC */
590	if (ifp->if_description != NULL)
591		free(ifp->if_description, M_IFDESCR);
592	IF_AFDATA_DESTROY(ifp);
593	IF_ADDR_LOCK_DESTROY(ifp);
594	ifq_delete(&ifp->if_snd);
595
596	for (int i = 0; i < IFCOUNTERS; i++)
597		counter_u64_free(ifp->if_counters[i]);
598
599	free(ifp, M_IFNET);
600}
601
602/*
603 * Deregister an interface and free the associated storage.
604 */
605void
606if_free(struct ifnet *ifp)
607{
608
609	ifp->if_flags |= IFF_DYING;			/* XXX: Locking */
610
611	CURVNET_SET_QUIET(ifp->if_vnet);
612	IFNET_WLOCK();
613	KASSERT(ifp == ifnet_byindex_locked(ifp->if_index),
614	    ("%s: freeing unallocated ifnet", ifp->if_xname));
615
616	ifindex_free_locked(ifp->if_index);
617	IFNET_WUNLOCK();
618
619	if (refcount_release(&ifp->if_refcount))
620		if_free_internal(ifp);
621	CURVNET_RESTORE();
622}
623
624/*
625 * Interfaces to keep an ifnet type-stable despite the possibility of the
626 * driver calling if_free().  If there are additional references, we defer
627 * freeing the underlying data structure.
628 */
629void
630if_ref(struct ifnet *ifp)
631{
632
633	/* We don't assert the ifnet list lock here, but arguably should. */
634	refcount_acquire(&ifp->if_refcount);
635}
636
637void
638if_rele(struct ifnet *ifp)
639{
640
641	if (!refcount_release(&ifp->if_refcount))
642		return;
643	if_free_internal(ifp);
644}
645
646void
647ifq_init(struct ifaltq *ifq, struct ifnet *ifp)
648{
649
650	mtx_init(&ifq->ifq_mtx, ifp->if_xname, "if send queue", MTX_DEF);
651
652	if (ifq->ifq_maxlen == 0)
653		ifq->ifq_maxlen = ifqmaxlen;
654
655	ifq->altq_type = 0;
656	ifq->altq_disc = NULL;
657	ifq->altq_flags &= ALTQF_CANTCHANGE;
658	ifq->altq_tbr  = NULL;
659	ifq->altq_ifp  = ifp;
660}
661
662void
663ifq_delete(struct ifaltq *ifq)
664{
665	mtx_destroy(&ifq->ifq_mtx);
666}
667
668/*
669 * Perform generic interface initialization tasks and attach the interface
670 * to the list of "active" interfaces.  If vmove flag is set on entry
671 * to if_attach_internal(), perform only a limited subset of initialization
672 * tasks, given that we are moving from one vnet to another an ifnet which
673 * has already been fully initialized.
674 *
675 * Note that if_detach_internal() removes group membership unconditionally
676 * even when vmove flag is set, and if_attach_internal() adds only IFG_ALL.
677 * Thus, when if_vmove() is applied to a cloned interface, group membership
678 * is lost while a cloned one always joins a group whose name is
679 * ifc->ifc_name.  To recover this after if_detach_internal() and
680 * if_attach_internal(), the cloner should be specified to
681 * if_attach_internal() via ifc.  If it is non-NULL, if_attach_internal()
682 * attempts to join a group whose name is ifc->ifc_name.
683 *
684 * XXX:
685 *  - The decision to return void and thus require this function to
686 *    succeed is questionable.
687 *  - We should probably do more sanity checking.  For instance we don't
688 *    do anything to insure if_xname is unique or non-empty.
689 */
690void
691if_attach(struct ifnet *ifp)
692{
693
694	if_attach_internal(ifp, 0, NULL);
695}
696
697/*
698 * Compute the least common TSO limit.
699 */
700void
701if_hw_tsomax_common(if_t ifp, struct ifnet_hw_tsomax *pmax)
702{
703	/*
704	 * 1) If there is no limit currently, take the limit from
705	 * the network adapter.
706	 *
707	 * 2) If the network adapter has a limit below the current
708	 * limit, apply it.
709	 */
710	if (pmax->tsomaxbytes == 0 || (ifp->if_hw_tsomax != 0 &&
711	    ifp->if_hw_tsomax < pmax->tsomaxbytes)) {
712		pmax->tsomaxbytes = ifp->if_hw_tsomax;
713	}
714	if (pmax->tsomaxsegcount == 0 || (ifp->if_hw_tsomaxsegcount != 0 &&
715	    ifp->if_hw_tsomaxsegcount < pmax->tsomaxsegcount)) {
716		pmax->tsomaxsegcount = ifp->if_hw_tsomaxsegcount;
717	}
718	if (pmax->tsomaxsegsize == 0 || (ifp->if_hw_tsomaxsegsize != 0 &&
719	    ifp->if_hw_tsomaxsegsize < pmax->tsomaxsegsize)) {
720		pmax->tsomaxsegsize = ifp->if_hw_tsomaxsegsize;
721	}
722}
723
724/*
725 * Update TSO limit of a network adapter.
726 *
727 * Returns zero if no change. Else non-zero.
728 */
729int
730if_hw_tsomax_update(if_t ifp, struct ifnet_hw_tsomax *pmax)
731{
732	int retval = 0;
733	if (ifp->if_hw_tsomax != pmax->tsomaxbytes) {
734		ifp->if_hw_tsomax = pmax->tsomaxbytes;
735		retval++;
736	}
737	if (ifp->if_hw_tsomaxsegsize != pmax->tsomaxsegsize) {
738		ifp->if_hw_tsomaxsegsize = pmax->tsomaxsegsize;
739		retval++;
740	}
741	if (ifp->if_hw_tsomaxsegcount != pmax->tsomaxsegcount) {
742		ifp->if_hw_tsomaxsegcount = pmax->tsomaxsegcount;
743		retval++;
744	}
745	return (retval);
746}
747
748static void
749if_attach_internal(struct ifnet *ifp, int vmove, struct if_clone *ifc)
750{
751	unsigned socksize, ifasize;
752	int namelen, masklen;
753	struct sockaddr_dl *sdl;
754	struct ifaddr *ifa;
755
756	if (ifp->if_index == 0 || ifp != ifnet_byindex(ifp->if_index))
757		panic ("%s: BUG: if_attach called without if_alloc'd input()\n",
758		    ifp->if_xname);
759
760#ifdef VIMAGE
761	ifp->if_vnet = curvnet;
762	if (ifp->if_home_vnet == NULL)
763		ifp->if_home_vnet = curvnet;
764#endif
765
766	if_addgroup(ifp, IFG_ALL);
767
768	/* Restore group membership for cloned interfaces. */
769	if (vmove && ifc != NULL)
770		if_clone_addgroup(ifp, ifc);
771
772	getmicrotime(&ifp->if_lastchange);
773	ifp->if_epoch = time_uptime;
774
775	KASSERT((ifp->if_transmit == NULL && ifp->if_qflush == NULL) ||
776	    (ifp->if_transmit != NULL && ifp->if_qflush != NULL),
777	    ("transmit and qflush must both either be set or both be NULL"));
778	if (ifp->if_transmit == NULL) {
779		ifp->if_transmit = if_transmit;
780		ifp->if_qflush = if_qflush;
781	}
782	if (ifp->if_input == NULL)
783		ifp->if_input = if_input_default;
784
785	if (ifp->if_requestencap == NULL)
786		ifp->if_requestencap = if_requestencap_default;
787
788	if (!vmove) {
789#ifdef MAC
790		mac_ifnet_create(ifp);
791#endif
792
793		/*
794		 * Create a Link Level name for this device.
795		 */
796		namelen = strlen(ifp->if_xname);
797		/*
798		 * Always save enough space for any possiable name so we
799		 * can do a rename in place later.
800		 */
801		masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + IFNAMSIZ;
802		socksize = masklen + ifp->if_addrlen;
803		if (socksize < sizeof(*sdl))
804			socksize = sizeof(*sdl);
805		socksize = roundup2(socksize, sizeof(long));
806		ifasize = sizeof(*ifa) + 2 * socksize;
807		ifa = ifa_alloc(ifasize, M_WAITOK);
808		sdl = (struct sockaddr_dl *)(ifa + 1);
809		sdl->sdl_len = socksize;
810		sdl->sdl_family = AF_LINK;
811		bcopy(ifp->if_xname, sdl->sdl_data, namelen);
812		sdl->sdl_nlen = namelen;
813		sdl->sdl_index = ifp->if_index;
814		sdl->sdl_type = ifp->if_type;
815		ifp->if_addr = ifa;
816		ifa->ifa_ifp = ifp;
817		ifa->ifa_rtrequest = link_rtrequest;
818		ifa->ifa_addr = (struct sockaddr *)sdl;
819		sdl = (struct sockaddr_dl *)(socksize + (caddr_t)sdl);
820		ifa->ifa_netmask = (struct sockaddr *)sdl;
821		sdl->sdl_len = masklen;
822		while (namelen != 0)
823			sdl->sdl_data[--namelen] = 0xff;
824		TAILQ_INSERT_HEAD(&ifp->if_addrhead, ifa, ifa_link);
825		/* Reliably crash if used uninitialized. */
826		ifp->if_broadcastaddr = NULL;
827
828		if (ifp->if_type == IFT_ETHER) {
829			ifp->if_hw_addr = malloc(ifp->if_addrlen, M_IFADDR,
830			    M_WAITOK | M_ZERO);
831		}
832
833#if defined(INET) || defined(INET6)
834		/* Use defaults for TSO, if nothing is set */
835		if (ifp->if_hw_tsomax == 0 &&
836		    ifp->if_hw_tsomaxsegcount == 0 &&
837		    ifp->if_hw_tsomaxsegsize == 0) {
838			/*
839			 * The TSO defaults needs to be such that an
840			 * NFS mbuf list of 35 mbufs totalling just
841			 * below 64K works and that a chain of mbufs
842			 * can be defragged into at most 32 segments:
843			 */
844			ifp->if_hw_tsomax = min(IP_MAXPACKET, (32 * MCLBYTES) -
845			    (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN));
846			ifp->if_hw_tsomaxsegcount = 35;
847			ifp->if_hw_tsomaxsegsize = 2048;	/* 2K */
848
849			/* XXX some drivers set IFCAP_TSO after ethernet attach */
850			if (ifp->if_capabilities & IFCAP_TSO) {
851				if_printf(ifp, "Using defaults for TSO: %u/%u/%u\n",
852				    ifp->if_hw_tsomax,
853				    ifp->if_hw_tsomaxsegcount,
854				    ifp->if_hw_tsomaxsegsize);
855			}
856		}
857#endif
858	}
859#ifdef VIMAGE
860	else {
861		/*
862		 * Update the interface index in the link layer address
863		 * of the interface.
864		 */
865		for (ifa = ifp->if_addr; ifa != NULL;
866		    ifa = TAILQ_NEXT(ifa, ifa_link)) {
867			if (ifa->ifa_addr->sa_family == AF_LINK) {
868				sdl = (struct sockaddr_dl *)ifa->ifa_addr;
869				sdl->sdl_index = ifp->if_index;
870			}
871		}
872	}
873#endif
874
875	IFNET_WLOCK();
876	TAILQ_INSERT_TAIL(&V_ifnet, ifp, if_link);
877#ifdef VIMAGE
878	curvnet->vnet_ifcnt++;
879#endif
880	IFNET_WUNLOCK();
881
882	if (domain_init_status >= 2)
883		if_attachdomain1(ifp);
884
885	EVENTHANDLER_INVOKE(ifnet_arrival_event, ifp);
886	if (IS_DEFAULT_VNET(curvnet))
887		devctl_notify("IFNET", ifp->if_xname, "ATTACH", NULL);
888
889	/* Announce the interface. */
890	rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
891}
892
893static void
894if_attachdomain(void *dummy)
895{
896	struct ifnet *ifp;
897
898	TAILQ_FOREACH(ifp, &V_ifnet, if_link)
899		if_attachdomain1(ifp);
900}
901SYSINIT(domainifattach, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_SECOND,
902    if_attachdomain, NULL);
903
904static void
905if_attachdomain1(struct ifnet *ifp)
906{
907	struct domain *dp;
908
909	/*
910	 * Since dp->dom_ifattach calls malloc() with M_WAITOK, we
911	 * cannot lock ifp->if_afdata initialization, entirely.
912	 */
913	IF_AFDATA_LOCK(ifp);
914	if (ifp->if_afdata_initialized >= domain_init_status) {
915		IF_AFDATA_UNLOCK(ifp);
916		log(LOG_WARNING, "%s called more than once on %s\n",
917		    __func__, ifp->if_xname);
918		return;
919	}
920	ifp->if_afdata_initialized = domain_init_status;
921	IF_AFDATA_UNLOCK(ifp);
922
923	/* address family dependent data region */
924	bzero(ifp->if_afdata, sizeof(ifp->if_afdata));
925	for (dp = domains; dp; dp = dp->dom_next) {
926		if (dp->dom_ifattach)
927			ifp->if_afdata[dp->dom_family] =
928			    (*dp->dom_ifattach)(ifp);
929	}
930}
931
932/*
933 * Remove any unicast or broadcast network addresses from an interface.
934 */
935void
936if_purgeaddrs(struct ifnet *ifp)
937{
938	struct ifaddr *ifa, *next;
939
940	/* XXX cannot hold IF_ADDR_WLOCK over called functions. */
941	TAILQ_FOREACH_SAFE(ifa, &ifp->if_addrhead, ifa_link, next) {
942		if (ifa->ifa_addr->sa_family == AF_LINK)
943			continue;
944#ifdef INET
945		/* XXX: Ugly!! ad hoc just for INET */
946		if (ifa->ifa_addr->sa_family == AF_INET) {
947			struct ifaliasreq ifr;
948
949			bzero(&ifr, sizeof(ifr));
950			ifr.ifra_addr = *ifa->ifa_addr;
951			if (ifa->ifa_dstaddr)
952				ifr.ifra_broadaddr = *ifa->ifa_dstaddr;
953			if (in_control(NULL, SIOCDIFADDR, (caddr_t)&ifr, ifp,
954			    NULL) == 0)
955				continue;
956		}
957#endif /* INET */
958#ifdef INET6
959		if (ifa->ifa_addr->sa_family == AF_INET6) {
960			in6_purgeaddr(ifa);
961			/* ifp_addrhead is already updated */
962			continue;
963		}
964#endif /* INET6 */
965		IF_ADDR_WLOCK(ifp);
966		TAILQ_REMOVE(&ifp->if_addrhead, ifa, ifa_link);
967		IF_ADDR_WUNLOCK(ifp);
968		ifa_free(ifa);
969	}
970}
971
972/*
973 * Remove any multicast network addresses from an interface when an ifnet
974 * is going away.
975 */
976static void
977if_purgemaddrs(struct ifnet *ifp)
978{
979	struct ifmultiaddr *ifma;
980	struct ifmultiaddr *next;
981
982	IF_ADDR_WLOCK(ifp);
983	TAILQ_FOREACH_SAFE(ifma, &ifp->if_multiaddrs, ifma_link, next)
984		if_delmulti_locked(ifp, ifma, 1);
985	IF_ADDR_WUNLOCK(ifp);
986}
987
988/*
989 * Detach an interface, removing it from the list of "active" interfaces.
990 * If vmove flag is set on entry to if_detach_internal(), perform only a
991 * limited subset of cleanup tasks, given that we are moving an ifnet from
992 * one vnet to another, where it must be fully operational.
993 *
994 * XXXRW: There are some significant questions about event ordering, and
995 * how to prevent things from starting to use the interface during detach.
996 */
997void
998if_detach(struct ifnet *ifp)
999{
1000
1001	CURVNET_SET_QUIET(ifp->if_vnet);
1002	if_detach_internal(ifp, 0, NULL);
1003	CURVNET_RESTORE();
1004}
1005
1006/*
1007 * The vmove flag, if set, indicates that we are called from a callpath
1008 * that is moving an interface to a different vnet instance.
1009 *
1010 * The shutdown flag, if set, indicates that we are called in the
1011 * process of shutting down a vnet instance.  Currently only the
1012 * vnet_if_return SYSUNINIT function sets it.  Note: we can be called
1013 * on a vnet instance shutdown without this flag being set, e.g., when
1014 * the cloned interfaces are destoyed as first thing of teardown.
1015 */
1016static int
1017if_detach_internal(struct ifnet *ifp, int vmove, struct if_clone **ifcp)
1018{
1019	struct ifaddr *ifa;
1020	int i;
1021	struct domain *dp;
1022 	struct ifnet *iter;
1023 	int found = 0;
1024#ifdef VIMAGE
1025	int shutdown;
1026
1027	shutdown = (ifp->if_vnet->vnet_state > SI_SUB_VNET &&
1028		 ifp->if_vnet->vnet_state < SI_SUB_VNET_DONE) ? 1 : 0;
1029#endif
1030	IFNET_WLOCK();
1031	TAILQ_FOREACH(iter, &V_ifnet, if_link)
1032		if (iter == ifp) {
1033			TAILQ_REMOVE(&V_ifnet, ifp, if_link);
1034			found = 1;
1035			break;
1036		}
1037	IFNET_WUNLOCK();
1038	if (!found) {
1039		/*
1040		 * While we would want to panic here, we cannot
1041		 * guarantee that the interface is indeed still on
1042		 * the list given we don't hold locks all the way.
1043		 */
1044		return (ENOENT);
1045#if 0
1046		if (vmove)
1047			panic("%s: ifp=%p not on the ifnet tailq %p",
1048			    __func__, ifp, &V_ifnet);
1049		else
1050			return; /* XXX this should panic as well? */
1051#endif
1052	}
1053
1054	/*
1055	 * At this point we know the interface still was on the ifnet list
1056	 * and we removed it so we are in a stable state.
1057	 */
1058#ifdef VIMAGE
1059	curvnet->vnet_ifcnt--;
1060#endif
1061
1062	/*
1063	 * In any case (destroy or vmove) detach us from the groups
1064	 * and remove/wait for pending events on the taskq.
1065	 * XXX-BZ in theory an interface could still enqueue a taskq change?
1066	 */
1067	if_delgroups(ifp);
1068
1069	taskqueue_drain(taskqueue_swi, &ifp->if_linktask);
1070
1071	/*
1072	 * Check if this is a cloned interface or not. Must do even if
1073	 * shutting down as a if_vmove_reclaim() would move the ifp and
1074	 * the if_clone_addgroup() will have a corrupted string overwise
1075	 * from a gibberish pointer.
1076	 */
1077	if (vmove && ifcp != NULL)
1078		*ifcp = if_clone_findifc(ifp);
1079
1080	if_down(ifp);
1081
1082#ifdef VIMAGE
1083	/*
1084	 * On VNET shutdown abort here as the stack teardown will do all
1085	 * the work top-down for us.
1086	 */
1087	if (shutdown) {
1088		/* Give interface users the chance to clean up. */
1089		EVENTHANDLER_INVOKE(ifnet_departure_event, ifp);
1090
1091		/*
1092		 * In case of a vmove we are done here without error.
1093		 * If we would signal an error it would lead to the same
1094		 * abort as if we did not find the ifnet anymore.
1095		 * if_detach() calls us in void context and does not care
1096		 * about an early abort notification, so life is splendid :)
1097		 */
1098		goto finish_vnet_shutdown;
1099	}
1100#endif
1101
1102	/*
1103	 * At this point we are not tearing down a VNET and are either
1104	 * going to destroy or vmove the interface and have to cleanup
1105	 * accordingly.
1106	 */
1107
1108	/*
1109	 * Remove routes and flush queues.
1110	 */
1111#ifdef ALTQ
1112	if (ALTQ_IS_ENABLED(&ifp->if_snd))
1113		altq_disable(&ifp->if_snd);
1114	if (ALTQ_IS_ATTACHED(&ifp->if_snd))
1115		altq_detach(&ifp->if_snd);
1116#endif
1117
1118	if_purgeaddrs(ifp);
1119
1120#ifdef INET
1121	in_ifdetach(ifp);
1122#endif
1123
1124#ifdef INET6
1125	/*
1126	 * Remove all IPv6 kernel structs related to ifp.  This should be done
1127	 * before removing routing entries below, since IPv6 interface direct
1128	 * routes are expected to be removed by the IPv6-specific kernel API.
1129	 * Otherwise, the kernel will detect some inconsistency and bark it.
1130	 */
1131	in6_ifdetach(ifp);
1132#endif
1133	if_purgemaddrs(ifp);
1134
1135	/* Announce that the interface is gone. */
1136	rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
1137	EVENTHANDLER_INVOKE(ifnet_departure_event, ifp);
1138	if (IS_DEFAULT_VNET(curvnet))
1139		devctl_notify("IFNET", ifp->if_xname, "DETACH", NULL);
1140
1141	if (!vmove) {
1142		/*
1143		 * Prevent further calls into the device driver via ifnet.
1144		 */
1145		if_dead(ifp);
1146
1147		/*
1148		 * Remove link ifaddr pointer and maybe decrement if_index.
1149		 * Clean up all addresses.
1150		 */
1151		free(ifp->if_hw_addr, M_IFADDR);
1152		ifp->if_hw_addr = NULL;
1153		ifp->if_addr = NULL;
1154
1155		/* We can now free link ifaddr. */
1156		IF_ADDR_WLOCK(ifp);
1157		if (!TAILQ_EMPTY(&ifp->if_addrhead)) {
1158			ifa = TAILQ_FIRST(&ifp->if_addrhead);
1159			TAILQ_REMOVE(&ifp->if_addrhead, ifa, ifa_link);
1160			IF_ADDR_WUNLOCK(ifp);
1161			ifa_free(ifa);
1162		} else
1163			IF_ADDR_WUNLOCK(ifp);
1164	}
1165
1166	rt_flushifroutes(ifp);
1167
1168#ifdef VIMAGE
1169finish_vnet_shutdown:
1170#endif
1171	/*
1172	 * We cannot hold the lock over dom_ifdetach calls as they might
1173	 * sleep, for example trying to drain a callout, thus open up the
1174	 * theoretical race with re-attaching.
1175	 */
1176	IF_AFDATA_LOCK(ifp);
1177	i = ifp->if_afdata_initialized;
1178	ifp->if_afdata_initialized = 0;
1179	IF_AFDATA_UNLOCK(ifp);
1180	for (dp = domains; i > 0 && dp; dp = dp->dom_next) {
1181		if (dp->dom_ifdetach && ifp->if_afdata[dp->dom_family]) {
1182			(*dp->dom_ifdetach)(ifp,
1183			    ifp->if_afdata[dp->dom_family]);
1184			ifp->if_afdata[dp->dom_family] = NULL;
1185		}
1186	}
1187
1188	return (0);
1189}
1190
1191#ifdef VIMAGE
1192/*
1193 * if_vmove() performs a limited version of if_detach() in current
1194 * vnet and if_attach()es the ifnet to the vnet specified as 2nd arg.
1195 * An attempt is made to shrink if_index in current vnet, find an
1196 * unused if_index in target vnet and calls if_grow() if necessary,
1197 * and finally find an unused if_xname for the target vnet.
1198 */
1199static void
1200if_vmove(struct ifnet *ifp, struct vnet *new_vnet)
1201{
1202	struct if_clone *ifc;
1203	u_int bif_dlt, bif_hdrlen;
1204	int rc;
1205
1206 	/*
1207	 * if_detach_internal() will call the eventhandler to notify
1208	 * interface departure.  That will detach if_bpf.  We need to
1209	 * safe the dlt and hdrlen so we can re-attach it later.
1210	 */
1211	bpf_get_bp_params(ifp->if_bpf, &bif_dlt, &bif_hdrlen);
1212
1213	/*
1214	 * Detach from current vnet, but preserve LLADDR info, do not
1215	 * mark as dead etc. so that the ifnet can be reattached later.
1216	 * If we cannot find it, we lost the race to someone else.
1217	 */
1218	rc = if_detach_internal(ifp, 1, &ifc);
1219	if (rc != 0)
1220		return;
1221
1222	/*
1223	 * Unlink the ifnet from ifindex_table[] in current vnet, and shrink
1224	 * the if_index for that vnet if possible.
1225	 *
1226	 * NOTE: IFNET_WLOCK/IFNET_WUNLOCK() are assumed to be unvirtualized,
1227	 * or we'd lock on one vnet and unlock on another.
1228	 */
1229	IFNET_WLOCK();
1230	ifindex_free_locked(ifp->if_index);
1231	IFNET_WUNLOCK();
1232
1233	/*
1234	 * Perform interface-specific reassignment tasks, if provided by
1235	 * the driver.
1236	 */
1237	if (ifp->if_reassign != NULL)
1238		ifp->if_reassign(ifp, new_vnet, NULL);
1239
1240	/*
1241	 * Switch to the context of the target vnet.
1242	 */
1243	CURVNET_SET_QUIET(new_vnet);
1244
1245	IFNET_WLOCK();
1246	ifp->if_index = ifindex_alloc();
1247	ifnet_setbyindex_locked(ifp->if_index, ifp);
1248	IFNET_WUNLOCK();
1249
1250	if_attach_internal(ifp, 1, ifc);
1251
1252	if (ifp->if_bpf == NULL)
1253		bpfattach(ifp, bif_dlt, bif_hdrlen);
1254
1255	CURVNET_RESTORE();
1256}
1257
1258/*
1259 * Move an ifnet to or from another child prison/vnet, specified by the jail id.
1260 */
1261static int
1262if_vmove_loan(struct thread *td, struct ifnet *ifp, char *ifname, int jid)
1263{
1264	struct prison *pr;
1265	struct ifnet *difp;
1266	int shutdown;
1267
1268	/* Try to find the prison within our visibility. */
1269	sx_slock(&allprison_lock);
1270	pr = prison_find_child(td->td_ucred->cr_prison, jid);
1271	sx_sunlock(&allprison_lock);
1272	if (pr == NULL)
1273		return (ENXIO);
1274	prison_hold_locked(pr);
1275	mtx_unlock(&pr->pr_mtx);
1276
1277	/* Do not try to move the iface from and to the same prison. */
1278	if (pr->pr_vnet == ifp->if_vnet) {
1279		prison_free(pr);
1280		return (EEXIST);
1281	}
1282
1283	/* Make sure the named iface does not exists in the dst. prison/vnet. */
1284	/* XXX Lock interfaces to avoid races. */
1285	CURVNET_SET_QUIET(pr->pr_vnet);
1286	difp = ifunit(ifname);
1287	if (difp != NULL) {
1288		CURVNET_RESTORE();
1289		prison_free(pr);
1290		return (EEXIST);
1291	}
1292
1293	/* Make sure the VNET is stable. */
1294	shutdown = (ifp->if_vnet->vnet_state > SI_SUB_VNET &&
1295		 ifp->if_vnet->vnet_state < SI_SUB_VNET_DONE) ? 1 : 0;
1296	if (shutdown) {
1297		CURVNET_RESTORE();
1298		prison_free(pr);
1299		return (EBUSY);
1300	}
1301	CURVNET_RESTORE();
1302
1303	/* Move the interface into the child jail/vnet. */
1304	if_vmove(ifp, pr->pr_vnet);
1305
1306	/* Report the new if_xname back to the userland. */
1307	sprintf(ifname, "%s", ifp->if_xname);
1308
1309	prison_free(pr);
1310	return (0);
1311}
1312
1313static int
1314if_vmove_reclaim(struct thread *td, char *ifname, int jid)
1315{
1316	struct prison *pr;
1317	struct vnet *vnet_dst;
1318	struct ifnet *ifp;
1319 	int shutdown;
1320
1321	/* Try to find the prison within our visibility. */
1322	sx_slock(&allprison_lock);
1323	pr = prison_find_child(td->td_ucred->cr_prison, jid);
1324	sx_sunlock(&allprison_lock);
1325	if (pr == NULL)
1326		return (ENXIO);
1327	prison_hold_locked(pr);
1328	mtx_unlock(&pr->pr_mtx);
1329
1330	/* Make sure the named iface exists in the source prison/vnet. */
1331	CURVNET_SET(pr->pr_vnet);
1332	ifp = ifunit(ifname);		/* XXX Lock to avoid races. */
1333	if (ifp == NULL) {
1334		CURVNET_RESTORE();
1335		prison_free(pr);
1336		return (ENXIO);
1337	}
1338
1339	/* Do not try to move the iface from and to the same prison. */
1340	vnet_dst = TD_TO_VNET(td);
1341	if (vnet_dst == ifp->if_vnet) {
1342		CURVNET_RESTORE();
1343		prison_free(pr);
1344		return (EEXIST);
1345	}
1346
1347	/* Make sure the VNET is stable. */
1348	shutdown = (ifp->if_vnet->vnet_state > SI_SUB_VNET &&
1349		 ifp->if_vnet->vnet_state < SI_SUB_VNET_DONE) ? 1 : 0;
1350	if (shutdown) {
1351		CURVNET_RESTORE();
1352		prison_free(pr);
1353		return (EBUSY);
1354	}
1355
1356	/* Get interface back from child jail/vnet. */
1357	if_vmove(ifp, vnet_dst);
1358	CURVNET_RESTORE();
1359
1360	/* Report the new if_xname back to the userland. */
1361	sprintf(ifname, "%s", ifp->if_xname);
1362
1363	prison_free(pr);
1364	return (0);
1365}
1366#endif /* VIMAGE */
1367
1368/*
1369 * Add a group to an interface
1370 */
1371int
1372if_addgroup(struct ifnet *ifp, const char *groupname)
1373{
1374	struct ifg_list		*ifgl;
1375	struct ifg_group	*ifg = NULL;
1376	struct ifg_member	*ifgm;
1377	int 			 new = 0;
1378
1379	if (groupname[0] && groupname[strlen(groupname) - 1] >= '0' &&
1380	    groupname[strlen(groupname) - 1] <= '9')
1381		return (EINVAL);
1382
1383	IFNET_WLOCK();
1384	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
1385		if (!strcmp(ifgl->ifgl_group->ifg_group, groupname)) {
1386			IFNET_WUNLOCK();
1387			return (EEXIST);
1388		}
1389
1390	if ((ifgl = (struct ifg_list *)malloc(sizeof(struct ifg_list), M_TEMP,
1391	    M_NOWAIT)) == NULL) {
1392	    	IFNET_WUNLOCK();
1393		return (ENOMEM);
1394	}
1395
1396	if ((ifgm = (struct ifg_member *)malloc(sizeof(struct ifg_member),
1397	    M_TEMP, M_NOWAIT)) == NULL) {
1398		free(ifgl, M_TEMP);
1399		IFNET_WUNLOCK();
1400		return (ENOMEM);
1401	}
1402
1403	TAILQ_FOREACH(ifg, &V_ifg_head, ifg_next)
1404		if (!strcmp(ifg->ifg_group, groupname))
1405			break;
1406
1407	if (ifg == NULL) {
1408		if ((ifg = (struct ifg_group *)malloc(sizeof(struct ifg_group),
1409		    M_TEMP, M_NOWAIT)) == NULL) {
1410			free(ifgl, M_TEMP);
1411			free(ifgm, M_TEMP);
1412			IFNET_WUNLOCK();
1413			return (ENOMEM);
1414		}
1415		strlcpy(ifg->ifg_group, groupname, sizeof(ifg->ifg_group));
1416		ifg->ifg_refcnt = 0;
1417		TAILQ_INIT(&ifg->ifg_members);
1418		TAILQ_INSERT_TAIL(&V_ifg_head, ifg, ifg_next);
1419		new = 1;
1420	}
1421
1422	ifg->ifg_refcnt++;
1423	ifgl->ifgl_group = ifg;
1424	ifgm->ifgm_ifp = ifp;
1425
1426	IF_ADDR_WLOCK(ifp);
1427	TAILQ_INSERT_TAIL(&ifg->ifg_members, ifgm, ifgm_next);
1428	TAILQ_INSERT_TAIL(&ifp->if_groups, ifgl, ifgl_next);
1429	IF_ADDR_WUNLOCK(ifp);
1430
1431	IFNET_WUNLOCK();
1432
1433	if (new)
1434		EVENTHANDLER_INVOKE(group_attach_event, ifg);
1435	EVENTHANDLER_INVOKE(group_change_event, groupname);
1436
1437	return (0);
1438}
1439
1440/*
1441 * Remove a group from an interface
1442 */
1443int
1444if_delgroup(struct ifnet *ifp, const char *groupname)
1445{
1446	struct ifg_list		*ifgl;
1447	struct ifg_member	*ifgm;
1448
1449	IFNET_WLOCK();
1450	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
1451		if (!strcmp(ifgl->ifgl_group->ifg_group, groupname))
1452			break;
1453	if (ifgl == NULL) {
1454		IFNET_WUNLOCK();
1455		return (ENOENT);
1456	}
1457
1458	IF_ADDR_WLOCK(ifp);
1459	TAILQ_REMOVE(&ifp->if_groups, ifgl, ifgl_next);
1460	IF_ADDR_WUNLOCK(ifp);
1461
1462	TAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next)
1463		if (ifgm->ifgm_ifp == ifp)
1464			break;
1465
1466	if (ifgm != NULL) {
1467		TAILQ_REMOVE(&ifgl->ifgl_group->ifg_members, ifgm, ifgm_next);
1468		free(ifgm, M_TEMP);
1469	}
1470
1471	if (--ifgl->ifgl_group->ifg_refcnt == 0) {
1472		TAILQ_REMOVE(&V_ifg_head, ifgl->ifgl_group, ifg_next);
1473		IFNET_WUNLOCK();
1474		EVENTHANDLER_INVOKE(group_detach_event, ifgl->ifgl_group);
1475		free(ifgl->ifgl_group, M_TEMP);
1476	} else
1477		IFNET_WUNLOCK();
1478
1479	free(ifgl, M_TEMP);
1480
1481	EVENTHANDLER_INVOKE(group_change_event, groupname);
1482
1483	return (0);
1484}
1485
1486/*
1487 * Remove an interface from all groups
1488 */
1489static void
1490if_delgroups(struct ifnet *ifp)
1491{
1492	struct ifg_list		*ifgl;
1493	struct ifg_member	*ifgm;
1494	char groupname[IFNAMSIZ];
1495
1496	IFNET_WLOCK();
1497	while (!TAILQ_EMPTY(&ifp->if_groups)) {
1498		ifgl = TAILQ_FIRST(&ifp->if_groups);
1499
1500		strlcpy(groupname, ifgl->ifgl_group->ifg_group, IFNAMSIZ);
1501
1502		IF_ADDR_WLOCK(ifp);
1503		TAILQ_REMOVE(&ifp->if_groups, ifgl, ifgl_next);
1504		IF_ADDR_WUNLOCK(ifp);
1505
1506		TAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next)
1507			if (ifgm->ifgm_ifp == ifp)
1508				break;
1509
1510		if (ifgm != NULL) {
1511			TAILQ_REMOVE(&ifgl->ifgl_group->ifg_members, ifgm,
1512			    ifgm_next);
1513			free(ifgm, M_TEMP);
1514		}
1515
1516		if (--ifgl->ifgl_group->ifg_refcnt == 0) {
1517			TAILQ_REMOVE(&V_ifg_head, ifgl->ifgl_group, ifg_next);
1518			IFNET_WUNLOCK();
1519			EVENTHANDLER_INVOKE(group_detach_event,
1520			    ifgl->ifgl_group);
1521			free(ifgl->ifgl_group, M_TEMP);
1522		} else
1523			IFNET_WUNLOCK();
1524
1525		free(ifgl, M_TEMP);
1526
1527		EVENTHANDLER_INVOKE(group_change_event, groupname);
1528
1529		IFNET_WLOCK();
1530	}
1531	IFNET_WUNLOCK();
1532}
1533
1534static char *
1535ifgr_group_get(void *ifgrp)
1536{
1537	union ifgroupreq_union *ifgrup;
1538
1539	ifgrup = ifgrp;
1540#ifdef COMPAT_FREEBSD32
1541	if (SV_CURPROC_FLAG(SV_ILP32))
1542		return (&ifgrup->ifgr32.ifgr_ifgru.ifgru_group[0]);
1543#endif
1544	return (&ifgrup->ifgr.ifgr_ifgru.ifgru_group[0]);
1545}
1546
1547static struct ifg_req *
1548ifgr_groups_get(void *ifgrp)
1549{
1550	union ifgroupreq_union *ifgrup;
1551
1552	ifgrup = ifgrp;
1553#ifdef COMPAT_FREEBSD32
1554	if (SV_CURPROC_FLAG(SV_ILP32))
1555		return ((struct ifg_req *)(uintptr_t)
1556		    ifgrup->ifgr32.ifgr_ifgru.ifgru_groups);
1557#endif
1558	return (ifgrup->ifgr.ifgr_ifgru.ifgru_groups);
1559}
1560
1561/*
1562 * Stores all groups from an interface in memory pointed to by ifgr.
1563 */
1564static int
1565if_getgroup(struct ifgroupreq *ifgr, struct ifnet *ifp)
1566{
1567	int			 len, error;
1568	struct ifg_list		*ifgl;
1569	struct ifg_req		 ifgrq, *ifgp;
1570
1571	if (ifgr->ifgr_len == 0) {
1572		IF_ADDR_RLOCK(ifp);
1573		TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
1574			ifgr->ifgr_len += sizeof(struct ifg_req);
1575		IF_ADDR_RUNLOCK(ifp);
1576		return (0);
1577	}
1578
1579	len = ifgr->ifgr_len;
1580	ifgp = ifgr_groups_get(ifgr);
1581	/* XXX: wire */
1582	IF_ADDR_RLOCK(ifp);
1583	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) {
1584		if (len < sizeof(ifgrq)) {
1585			IF_ADDR_RUNLOCK(ifp);
1586			return (EINVAL);
1587		}
1588		bzero(&ifgrq, sizeof ifgrq);
1589		strlcpy(ifgrq.ifgrq_group, ifgl->ifgl_group->ifg_group,
1590		    sizeof(ifgrq.ifgrq_group));
1591		if ((error = copyout(&ifgrq, ifgp, sizeof(struct ifg_req)))) {
1592		    	IF_ADDR_RUNLOCK(ifp);
1593			return (error);
1594		}
1595		len -= sizeof(ifgrq);
1596		ifgp++;
1597	}
1598	IF_ADDR_RUNLOCK(ifp);
1599
1600	return (0);
1601}
1602
1603/*
1604 * Stores all members of a group in memory pointed to by igfr
1605 */
1606static int
1607if_getgroupmembers(struct ifgroupreq *ifgr)
1608{
1609	struct ifg_group	*ifg;
1610	struct ifg_member	*ifgm;
1611	struct ifg_req		 ifgrq, *ifgp;
1612	int			 len, error;
1613
1614	IFNET_RLOCK();
1615	TAILQ_FOREACH(ifg, &V_ifg_head, ifg_next)
1616		if (!strcmp(ifg->ifg_group, ifgr->ifgr_name))
1617			break;
1618	if (ifg == NULL) {
1619		IFNET_RUNLOCK();
1620		return (ENOENT);
1621	}
1622
1623	if (ifgr->ifgr_len == 0) {
1624		TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next)
1625			ifgr->ifgr_len += sizeof(ifgrq);
1626		IFNET_RUNLOCK();
1627		return (0);
1628	}
1629
1630	len = ifgr->ifgr_len;
1631	ifgp = ifgr_groups_get(ifgr);
1632	TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) {
1633		if (len < sizeof(ifgrq)) {
1634			IFNET_RUNLOCK();
1635			return (EINVAL);
1636		}
1637		bzero(&ifgrq, sizeof ifgrq);
1638		strlcpy(ifgrq.ifgrq_member, ifgm->ifgm_ifp->if_xname,
1639		    sizeof(ifgrq.ifgrq_member));
1640		if ((error = copyout(&ifgrq, ifgp, sizeof(struct ifg_req)))) {
1641			IFNET_RUNLOCK();
1642			return (error);
1643		}
1644		len -= sizeof(ifgrq);
1645		ifgp++;
1646	}
1647	IFNET_RUNLOCK();
1648
1649	return (0);
1650}
1651
1652/*
1653 * Return counter values from counter(9)s stored in ifnet.
1654 */
1655uint64_t
1656if_get_counter_default(struct ifnet *ifp, ift_counter cnt)
1657{
1658
1659	KASSERT(cnt < IFCOUNTERS, ("%s: invalid cnt %d", __func__, cnt));
1660
1661	return (counter_u64_fetch(ifp->if_counters[cnt]));
1662}
1663
1664/*
1665 * Increase an ifnet counter. Usually used for counters shared
1666 * between the stack and a driver, but function supports them all.
1667 */
1668void
1669if_inc_counter(struct ifnet *ifp, ift_counter cnt, int64_t inc)
1670{
1671
1672	KASSERT(cnt < IFCOUNTERS, ("%s: invalid cnt %d", __func__, cnt));
1673
1674	counter_u64_add(ifp->if_counters[cnt], inc);
1675}
1676
1677/*
1678 * Copy data from ifnet to userland API structure if_data.
1679 */
1680void
1681if_data_copy(struct ifnet *ifp, struct if_data *ifd)
1682{
1683
1684	ifd->ifi_type = ifp->if_type;
1685	ifd->ifi_physical = 0;
1686	ifd->ifi_addrlen = ifp->if_addrlen;
1687	ifd->ifi_hdrlen = ifp->if_hdrlen;
1688	ifd->ifi_link_state = ifp->if_link_state;
1689	ifd->ifi_vhid = 0;
1690	ifd->ifi_datalen = sizeof(struct if_data);
1691	ifd->ifi_mtu = ifp->if_mtu;
1692	ifd->ifi_metric = ifp->if_metric;
1693	ifd->ifi_baudrate = ifp->if_baudrate;
1694	ifd->ifi_hwassist = ifp->if_hwassist;
1695	ifd->ifi_epoch = ifp->if_epoch;
1696	ifd->ifi_lastchange = ifp->if_lastchange;
1697
1698	ifd->ifi_ipackets = ifp->if_get_counter(ifp, IFCOUNTER_IPACKETS);
1699	ifd->ifi_ierrors = ifp->if_get_counter(ifp, IFCOUNTER_IERRORS);
1700	ifd->ifi_opackets = ifp->if_get_counter(ifp, IFCOUNTER_OPACKETS);
1701	ifd->ifi_oerrors = ifp->if_get_counter(ifp, IFCOUNTER_OERRORS);
1702	ifd->ifi_collisions = ifp->if_get_counter(ifp, IFCOUNTER_COLLISIONS);
1703	ifd->ifi_ibytes = ifp->if_get_counter(ifp, IFCOUNTER_IBYTES);
1704	ifd->ifi_obytes = ifp->if_get_counter(ifp, IFCOUNTER_OBYTES);
1705	ifd->ifi_imcasts = ifp->if_get_counter(ifp, IFCOUNTER_IMCASTS);
1706	ifd->ifi_omcasts = ifp->if_get_counter(ifp, IFCOUNTER_OMCASTS);
1707	ifd->ifi_iqdrops = ifp->if_get_counter(ifp, IFCOUNTER_IQDROPS);
1708	ifd->ifi_oqdrops = ifp->if_get_counter(ifp, IFCOUNTER_OQDROPS);
1709	ifd->ifi_noproto = ifp->if_get_counter(ifp, IFCOUNTER_NOPROTO);
1710}
1711
1712/*
1713 * Wrapper functions for struct ifnet address list locking macros.  These are
1714 * used by kernel modules to avoid encoding programming interface or binary
1715 * interface assumptions that may be violated when kernel-internal locking
1716 * approaches change.
1717 */
1718void
1719if_addr_rlock(struct ifnet *ifp)
1720{
1721
1722	IF_ADDR_RLOCK(ifp);
1723}
1724
1725void
1726if_addr_runlock(struct ifnet *ifp)
1727{
1728
1729	IF_ADDR_RUNLOCK(ifp);
1730}
1731
1732void
1733if_maddr_rlock(if_t ifp)
1734{
1735
1736	IF_ADDR_RLOCK((struct ifnet *)ifp);
1737}
1738
1739void
1740if_maddr_runlock(if_t ifp)
1741{
1742
1743	IF_ADDR_RUNLOCK((struct ifnet *)ifp);
1744}
1745
1746/*
1747 * Initialization, destruction and refcounting functions for ifaddrs.
1748 */
1749struct ifaddr *
1750ifa_alloc(size_t size, int flags)
1751{
1752	struct ifaddr *ifa;
1753
1754	KASSERT(size >= sizeof(struct ifaddr),
1755	    ("%s: invalid size %zu", __func__, size));
1756
1757	ifa = malloc(size, M_IFADDR, M_ZERO | flags);
1758	if (ifa == NULL)
1759		return (NULL);
1760
1761	if ((ifa->ifa_opackets = counter_u64_alloc(flags)) == NULL)
1762		goto fail;
1763	if ((ifa->ifa_ipackets = counter_u64_alloc(flags)) == NULL)
1764		goto fail;
1765	if ((ifa->ifa_obytes = counter_u64_alloc(flags)) == NULL)
1766		goto fail;
1767	if ((ifa->ifa_ibytes = counter_u64_alloc(flags)) == NULL)
1768		goto fail;
1769
1770	refcount_init(&ifa->ifa_refcnt, 1);
1771
1772	return (ifa);
1773
1774fail:
1775	/* free(NULL) is okay */
1776	counter_u64_free(ifa->ifa_opackets);
1777	counter_u64_free(ifa->ifa_ipackets);
1778	counter_u64_free(ifa->ifa_obytes);
1779	counter_u64_free(ifa->ifa_ibytes);
1780	free(ifa, M_IFADDR);
1781
1782	return (NULL);
1783}
1784
1785void
1786ifa_ref(struct ifaddr *ifa)
1787{
1788
1789	refcount_acquire(&ifa->ifa_refcnt);
1790}
1791
1792void
1793ifa_free(struct ifaddr *ifa)
1794{
1795
1796	if (refcount_release(&ifa->ifa_refcnt)) {
1797		counter_u64_free(ifa->ifa_opackets);
1798		counter_u64_free(ifa->ifa_ipackets);
1799		counter_u64_free(ifa->ifa_obytes);
1800		counter_u64_free(ifa->ifa_ibytes);
1801		free(ifa, M_IFADDR);
1802	}
1803}
1804
1805static int
1806ifa_maintain_loopback_route(int cmd, const char *otype, struct ifaddr *ifa,
1807    struct sockaddr *ia)
1808{
1809	int error;
1810	struct rt_addrinfo info;
1811	struct sockaddr_dl null_sdl;
1812	struct ifnet *ifp;
1813
1814	ifp = ifa->ifa_ifp;
1815
1816	bzero(&info, sizeof(info));
1817	if (cmd != RTM_DELETE)
1818		info.rti_ifp = V_loif;
1819	info.rti_flags = ifa->ifa_flags | RTF_HOST | RTF_STATIC | RTF_PINNED;
1820	info.rti_info[RTAX_DST] = ia;
1821	info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&null_sdl;
1822	link_init_sdl(ifp, (struct sockaddr *)&null_sdl, ifp->if_type);
1823
1824	error = rtrequest1_fib(cmd, &info, NULL, ifp->if_fib);
1825
1826	if (error != 0)
1827		log(LOG_DEBUG, "%s: %s failed for interface %s: %u\n",
1828		    __func__, otype, if_name(ifp), error);
1829
1830	return (error);
1831}
1832
1833int
1834ifa_add_loopback_route(struct ifaddr *ifa, struct sockaddr *ia)
1835{
1836
1837	return (ifa_maintain_loopback_route(RTM_ADD, "insertion", ifa, ia));
1838}
1839
1840int
1841ifa_del_loopback_route(struct ifaddr *ifa, struct sockaddr *ia)
1842{
1843
1844	return (ifa_maintain_loopback_route(RTM_DELETE, "deletion", ifa, ia));
1845}
1846
1847int
1848ifa_switch_loopback_route(struct ifaddr *ifa, struct sockaddr *ia)
1849{
1850
1851	return (ifa_maintain_loopback_route(RTM_CHANGE, "switch", ifa, ia));
1852}
1853
1854/*
1855 * XXX: Because sockaddr_dl has deeper structure than the sockaddr
1856 * structs used to represent other address families, it is necessary
1857 * to perform a different comparison.
1858 */
1859
1860#define	sa_dl_equal(a1, a2)	\
1861	((((const struct sockaddr_dl *)(a1))->sdl_len ==		\
1862	 ((const struct sockaddr_dl *)(a2))->sdl_len) &&		\
1863	 (bcmp(CLLADDR((const struct sockaddr_dl *)(a1)),		\
1864	       CLLADDR((const struct sockaddr_dl *)(a2)),		\
1865	       ((const struct sockaddr_dl *)(a1))->sdl_alen) == 0))
1866
1867/*
1868 * Locate an interface based on a complete address.
1869 */
1870/*ARGSUSED*/
1871static struct ifaddr *
1872ifa_ifwithaddr_internal(const struct sockaddr *addr, int getref)
1873{
1874	struct ifnet *ifp;
1875	struct ifaddr *ifa;
1876
1877	IFNET_RLOCK_NOSLEEP();
1878	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
1879		IF_ADDR_RLOCK(ifp);
1880		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1881			if (ifa->ifa_addr->sa_family != addr->sa_family)
1882				continue;
1883			if (sa_equal(addr, ifa->ifa_addr)) {
1884				if (getref)
1885					ifa_ref(ifa);
1886				IF_ADDR_RUNLOCK(ifp);
1887				goto done;
1888			}
1889			/* IP6 doesn't have broadcast */
1890			if ((ifp->if_flags & IFF_BROADCAST) &&
1891			    ifa->ifa_broadaddr &&
1892			    ifa->ifa_broadaddr->sa_len != 0 &&
1893			    sa_equal(ifa->ifa_broadaddr, addr)) {
1894				if (getref)
1895					ifa_ref(ifa);
1896				IF_ADDR_RUNLOCK(ifp);
1897				goto done;
1898			}
1899		}
1900		IF_ADDR_RUNLOCK(ifp);
1901	}
1902	ifa = NULL;
1903done:
1904	IFNET_RUNLOCK_NOSLEEP();
1905	return (ifa);
1906}
1907
1908struct ifaddr *
1909ifa_ifwithaddr(const struct sockaddr *addr)
1910{
1911
1912	return (ifa_ifwithaddr_internal(addr, 1));
1913}
1914
1915int
1916ifa_ifwithaddr_check(const struct sockaddr *addr)
1917{
1918
1919	return (ifa_ifwithaddr_internal(addr, 0) != NULL);
1920}
1921
1922/*
1923 * Locate an interface based on the broadcast address.
1924 */
1925/* ARGSUSED */
1926struct ifaddr *
1927ifa_ifwithbroadaddr(const struct sockaddr *addr, int fibnum)
1928{
1929	struct ifnet *ifp;
1930	struct ifaddr *ifa;
1931
1932	IFNET_RLOCK_NOSLEEP();
1933	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
1934		if ((fibnum != RT_ALL_FIBS) && (ifp->if_fib != fibnum))
1935			continue;
1936		IF_ADDR_RLOCK(ifp);
1937		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1938			if (ifa->ifa_addr->sa_family != addr->sa_family)
1939				continue;
1940			if ((ifp->if_flags & IFF_BROADCAST) &&
1941			    ifa->ifa_broadaddr &&
1942			    ifa->ifa_broadaddr->sa_len != 0 &&
1943			    sa_equal(ifa->ifa_broadaddr, addr)) {
1944				ifa_ref(ifa);
1945				IF_ADDR_RUNLOCK(ifp);
1946				goto done;
1947			}
1948		}
1949		IF_ADDR_RUNLOCK(ifp);
1950	}
1951	ifa = NULL;
1952done:
1953	IFNET_RUNLOCK_NOSLEEP();
1954	return (ifa);
1955}
1956
1957/*
1958 * Locate the point to point interface with a given destination address.
1959 */
1960/*ARGSUSED*/
1961struct ifaddr *
1962ifa_ifwithdstaddr(const struct sockaddr *addr, int fibnum)
1963{
1964	struct ifnet *ifp;
1965	struct ifaddr *ifa;
1966
1967	IFNET_RLOCK_NOSLEEP();
1968	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
1969		if ((ifp->if_flags & IFF_POINTOPOINT) == 0)
1970			continue;
1971		if ((fibnum != RT_ALL_FIBS) && (ifp->if_fib != fibnum))
1972			continue;
1973		IF_ADDR_RLOCK(ifp);
1974		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1975			if (ifa->ifa_addr->sa_family != addr->sa_family)
1976				continue;
1977			if (ifa->ifa_dstaddr != NULL &&
1978			    sa_equal(addr, ifa->ifa_dstaddr)) {
1979				ifa_ref(ifa);
1980				IF_ADDR_RUNLOCK(ifp);
1981				goto done;
1982			}
1983		}
1984		IF_ADDR_RUNLOCK(ifp);
1985	}
1986	ifa = NULL;
1987done:
1988	IFNET_RUNLOCK_NOSLEEP();
1989	return (ifa);
1990}
1991
1992/*
1993 * Find an interface on a specific network.  If many, choice
1994 * is most specific found.
1995 */
1996struct ifaddr *
1997ifa_ifwithnet(const struct sockaddr *addr, int ignore_ptp, int fibnum)
1998{
1999	struct ifnet *ifp;
2000	struct ifaddr *ifa;
2001	struct ifaddr *ifa_maybe = NULL;
2002	u_int af = addr->sa_family;
2003	const char *addr_data = addr->sa_data, *cplim;
2004
2005	/*
2006	 * AF_LINK addresses can be looked up directly by their index number,
2007	 * so do that if we can.
2008	 */
2009	if (af == AF_LINK) {
2010	    const struct sockaddr_dl *sdl = (const struct sockaddr_dl *)addr;
2011	    if (sdl->sdl_index && sdl->sdl_index <= V_if_index)
2012		return (ifaddr_byindex(sdl->sdl_index));
2013	}
2014
2015	/*
2016	 * Scan though each interface, looking for ones that have addresses
2017	 * in this address family and the requested fib.  Maintain a reference
2018	 * on ifa_maybe once we find one, as we release the IF_ADDR_RLOCK() that
2019	 * kept it stable when we move onto the next interface.
2020	 */
2021	IFNET_RLOCK_NOSLEEP();
2022	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
2023		if ((fibnum != RT_ALL_FIBS) && (ifp->if_fib != fibnum))
2024			continue;
2025		IF_ADDR_RLOCK(ifp);
2026		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
2027			const char *cp, *cp2, *cp3;
2028
2029			if (ifa->ifa_addr->sa_family != af)
2030next:				continue;
2031			if (af == AF_INET &&
2032			    ifp->if_flags & IFF_POINTOPOINT && !ignore_ptp) {
2033				/*
2034				 * This is a bit broken as it doesn't
2035				 * take into account that the remote end may
2036				 * be a single node in the network we are
2037				 * looking for.
2038				 * The trouble is that we don't know the
2039				 * netmask for the remote end.
2040				 */
2041				if (ifa->ifa_dstaddr != NULL &&
2042				    sa_equal(addr, ifa->ifa_dstaddr)) {
2043					ifa_ref(ifa);
2044					IF_ADDR_RUNLOCK(ifp);
2045					goto done;
2046				}
2047			} else {
2048				/*
2049				 * Scan all the bits in the ifa's address.
2050				 * If a bit dissagrees with what we are
2051				 * looking for, mask it with the netmask
2052				 * to see if it really matters.
2053				 * (A byte at a time)
2054				 */
2055				if (ifa->ifa_netmask == 0)
2056					continue;
2057				cp = addr_data;
2058				cp2 = ifa->ifa_addr->sa_data;
2059				cp3 = ifa->ifa_netmask->sa_data;
2060				cplim = ifa->ifa_netmask->sa_len
2061					+ (char *)ifa->ifa_netmask;
2062				while (cp3 < cplim)
2063					if ((*cp++ ^ *cp2++) & *cp3++)
2064						goto next; /* next address! */
2065				/*
2066				 * If the netmask of what we just found
2067				 * is more specific than what we had before
2068				 * (if we had one), or if the virtual status
2069				 * of new prefix is better than of the old one,
2070				 * then remember the new one before continuing
2071				 * to search for an even better one.
2072				 */
2073				if (ifa_maybe == NULL ||
2074				    ifa_preferred(ifa_maybe, ifa) ||
2075				    rn_refines((caddr_t)ifa->ifa_netmask,
2076				    (caddr_t)ifa_maybe->ifa_netmask)) {
2077					if (ifa_maybe != NULL)
2078						ifa_free(ifa_maybe);
2079					ifa_maybe = ifa;
2080					ifa_ref(ifa_maybe);
2081				}
2082			}
2083		}
2084		IF_ADDR_RUNLOCK(ifp);
2085	}
2086	ifa = ifa_maybe;
2087	ifa_maybe = NULL;
2088done:
2089	IFNET_RUNLOCK_NOSLEEP();
2090	if (ifa_maybe != NULL)
2091		ifa_free(ifa_maybe);
2092	return (ifa);
2093}
2094
2095/*
2096 * Find an interface address specific to an interface best matching
2097 * a given address.
2098 */
2099struct ifaddr *
2100ifaof_ifpforaddr(const struct sockaddr *addr, struct ifnet *ifp)
2101{
2102	struct ifaddr *ifa;
2103	const char *cp, *cp2, *cp3;
2104	char *cplim;
2105	struct ifaddr *ifa_maybe = NULL;
2106	u_int af = addr->sa_family;
2107
2108	if (af >= AF_MAX)
2109		return (NULL);
2110	IF_ADDR_RLOCK(ifp);
2111	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
2112		if (ifa->ifa_addr->sa_family != af)
2113			continue;
2114		if (ifa_maybe == NULL)
2115			ifa_maybe = ifa;
2116		if (ifa->ifa_netmask == 0) {
2117			if (sa_equal(addr, ifa->ifa_addr) ||
2118			    (ifa->ifa_dstaddr &&
2119			    sa_equal(addr, ifa->ifa_dstaddr)))
2120				goto done;
2121			continue;
2122		}
2123		if (ifp->if_flags & IFF_POINTOPOINT) {
2124			if (sa_equal(addr, ifa->ifa_dstaddr))
2125				goto done;
2126		} else {
2127			cp = addr->sa_data;
2128			cp2 = ifa->ifa_addr->sa_data;
2129			cp3 = ifa->ifa_netmask->sa_data;
2130			cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask;
2131			for (; cp3 < cplim; cp3++)
2132				if ((*cp++ ^ *cp2++) & *cp3)
2133					break;
2134			if (cp3 == cplim)
2135				goto done;
2136		}
2137	}
2138	ifa = ifa_maybe;
2139done:
2140	if (ifa != NULL)
2141		ifa_ref(ifa);
2142	IF_ADDR_RUNLOCK(ifp);
2143	return (ifa);
2144}
2145
2146/*
2147 * See whether new ifa is better than current one:
2148 * 1) A non-virtual one is preferred over virtual.
2149 * 2) A virtual in master state preferred over any other state.
2150 *
2151 * Used in several address selecting functions.
2152 */
2153int
2154ifa_preferred(struct ifaddr *cur, struct ifaddr *next)
2155{
2156
2157	return (cur->ifa_carp && (!next->ifa_carp ||
2158	    ((*carp_master_p)(next) && !(*carp_master_p)(cur))));
2159}
2160
2161#include <net/if_llatbl.h>
2162
2163/*
2164 * Default action when installing a route with a Link Level gateway.
2165 * Lookup an appropriate real ifa to point to.
2166 * This should be moved to /sys/net/link.c eventually.
2167 */
2168static void
2169link_rtrequest(int cmd, struct rtentry *rt, struct rt_addrinfo *info)
2170{
2171	struct ifaddr *ifa, *oifa;
2172	struct sockaddr *dst;
2173	struct ifnet *ifp;
2174
2175	if (cmd != RTM_ADD || ((ifa = rt->rt_ifa) == NULL) ||
2176	    ((ifp = ifa->ifa_ifp) == NULL) || ((dst = rt_key(rt)) == NULL))
2177		return;
2178	ifa = ifaof_ifpforaddr(dst, ifp);
2179	if (ifa) {
2180		oifa = rt->rt_ifa;
2181		rt->rt_ifa = ifa;
2182		ifa_free(oifa);
2183		if (ifa->ifa_rtrequest && ifa->ifa_rtrequest != link_rtrequest)
2184			ifa->ifa_rtrequest(cmd, rt, info);
2185	}
2186}
2187
2188struct sockaddr_dl *
2189link_alloc_sdl(size_t size, int flags)
2190{
2191
2192	return (malloc(size, M_TEMP, flags));
2193}
2194
2195void
2196link_free_sdl(struct sockaddr *sa)
2197{
2198	free(sa, M_TEMP);
2199}
2200
2201/*
2202 * Fills in given sdl with interface basic info.
2203 * Returns pointer to filled sdl.
2204 */
2205struct sockaddr_dl *
2206link_init_sdl(struct ifnet *ifp, struct sockaddr *paddr, u_char iftype)
2207{
2208	struct sockaddr_dl *sdl;
2209
2210	sdl = (struct sockaddr_dl *)paddr;
2211	memset(sdl, 0, sizeof(struct sockaddr_dl));
2212	sdl->sdl_len = sizeof(struct sockaddr_dl);
2213	sdl->sdl_family = AF_LINK;
2214	sdl->sdl_index = ifp->if_index;
2215	sdl->sdl_type = iftype;
2216
2217	return (sdl);
2218}
2219
2220/*
2221 * Mark an interface down and notify protocols of
2222 * the transition.
2223 */
2224static void
2225if_unroute(struct ifnet *ifp, int flag, int fam)
2226{
2227	struct ifaddr *ifa;
2228
2229	KASSERT(flag == IFF_UP, ("if_unroute: flag != IFF_UP"));
2230
2231	ifp->if_flags &= ~flag;
2232	getmicrotime(&ifp->if_lastchange);
2233	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
2234		if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family))
2235			pfctlinput(PRC_IFDOWN, ifa->ifa_addr);
2236	ifp->if_qflush(ifp);
2237
2238	if (ifp->if_carp)
2239		(*carp_linkstate_p)(ifp);
2240	rt_ifmsg(ifp);
2241}
2242
2243/*
2244 * Mark an interface up and notify protocols of
2245 * the transition.
2246 */
2247static void
2248if_route(struct ifnet *ifp, int flag, int fam)
2249{
2250	struct ifaddr *ifa;
2251
2252	KASSERT(flag == IFF_UP, ("if_route: flag != IFF_UP"));
2253
2254	ifp->if_flags |= flag;
2255	getmicrotime(&ifp->if_lastchange);
2256	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
2257		if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family))
2258			pfctlinput(PRC_IFUP, ifa->ifa_addr);
2259	if (ifp->if_carp)
2260		(*carp_linkstate_p)(ifp);
2261	rt_ifmsg(ifp);
2262#ifdef INET6
2263	in6_if_up(ifp);
2264#endif
2265}
2266
2267void	(*vlan_link_state_p)(struct ifnet *);	/* XXX: private from if_vlan */
2268void	(*vlan_trunk_cap_p)(struct ifnet *);		/* XXX: private from if_vlan */
2269struct ifnet *(*vlan_trunkdev_p)(struct ifnet *);
2270struct	ifnet *(*vlan_devat_p)(struct ifnet *, uint16_t);
2271int	(*vlan_tag_p)(struct ifnet *, uint16_t *);
2272int	(*vlan_setcookie_p)(struct ifnet *, void *);
2273void	*(*vlan_cookie_p)(struct ifnet *);
2274
2275/*
2276 * Handle a change in the interface link state. To avoid LORs
2277 * between driver lock and upper layer locks, as well as possible
2278 * recursions, we post event to taskqueue, and all job
2279 * is done in static do_link_state_change().
2280 */
2281void
2282if_link_state_change(struct ifnet *ifp, int link_state)
2283{
2284	/* Return if state hasn't changed. */
2285	if (ifp->if_link_state == link_state)
2286		return;
2287
2288	ifp->if_link_state = link_state;
2289
2290	taskqueue_enqueue(taskqueue_swi, &ifp->if_linktask);
2291}
2292
2293static void
2294do_link_state_change(void *arg, int pending)
2295{
2296	struct ifnet *ifp = (struct ifnet *)arg;
2297	int link_state = ifp->if_link_state;
2298	CURVNET_SET(ifp->if_vnet);
2299
2300	/* Notify that the link state has changed. */
2301	rt_ifmsg(ifp);
2302	if (ifp->if_vlantrunk != NULL)
2303		(*vlan_link_state_p)(ifp);
2304
2305	if ((ifp->if_type == IFT_ETHER || ifp->if_type == IFT_L2VLAN) &&
2306	    ifp->if_l2com != NULL)
2307		(*ng_ether_link_state_p)(ifp, link_state);
2308	if (ifp->if_carp)
2309		(*carp_linkstate_p)(ifp);
2310	if (ifp->if_bridge)
2311		(*bridge_linkstate_p)(ifp);
2312	if (ifp->if_lagg)
2313		(*lagg_linkstate_p)(ifp, link_state);
2314
2315	if (IS_DEFAULT_VNET(curvnet))
2316		devctl_notify("IFNET", ifp->if_xname,
2317		    (link_state == LINK_STATE_UP) ? "LINK_UP" : "LINK_DOWN",
2318		    NULL);
2319	if (pending > 1)
2320		if_printf(ifp, "%d link states coalesced\n", pending);
2321	if (log_link_state_change)
2322		log(LOG_NOTICE, "%s: link state changed to %s\n", ifp->if_xname,
2323		    (link_state == LINK_STATE_UP) ? "UP" : "DOWN" );
2324	EVENTHANDLER_INVOKE(ifnet_link_event, ifp, link_state);
2325	CURVNET_RESTORE();
2326}
2327
2328/*
2329 * Mark an interface down and notify protocols of
2330 * the transition.
2331 */
2332void
2333if_down(struct ifnet *ifp)
2334{
2335
2336	EVENTHANDLER_INVOKE(ifnet_event, ifp, IFNET_EVENT_DOWN);
2337	if_unroute(ifp, IFF_UP, AF_UNSPEC);
2338}
2339
2340/*
2341 * Mark an interface up and notify protocols of
2342 * the transition.
2343 */
2344void
2345if_up(struct ifnet *ifp)
2346{
2347
2348	if_route(ifp, IFF_UP, AF_UNSPEC);
2349	EVENTHANDLER_INVOKE(ifnet_event, ifp, IFNET_EVENT_UP);
2350}
2351
2352/*
2353 * Flush an interface queue.
2354 */
2355void
2356if_qflush(struct ifnet *ifp)
2357{
2358	struct mbuf *m, *n;
2359	struct ifaltq *ifq;
2360
2361	ifq = &ifp->if_snd;
2362	IFQ_LOCK(ifq);
2363#ifdef ALTQ
2364	if (ALTQ_IS_ENABLED(ifq))
2365		ALTQ_PURGE(ifq);
2366#endif
2367	n = ifq->ifq_head;
2368	while ((m = n) != NULL) {
2369		n = m->m_nextpkt;
2370		m_freem(m);
2371	}
2372	ifq->ifq_head = 0;
2373	ifq->ifq_tail = 0;
2374	ifq->ifq_len = 0;
2375	IFQ_UNLOCK(ifq);
2376}
2377
2378/*
2379 * Map interface name to interface structure pointer, with or without
2380 * returning a reference.
2381 */
2382struct ifnet *
2383ifunit_ref(const char *name)
2384{
2385	struct ifnet *ifp;
2386
2387	IFNET_RLOCK_NOSLEEP();
2388	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
2389		if (strncmp(name, ifp->if_xname, IFNAMSIZ) == 0 &&
2390		    !(ifp->if_flags & IFF_DYING))
2391			break;
2392	}
2393	if (ifp != NULL)
2394		if_ref(ifp);
2395	IFNET_RUNLOCK_NOSLEEP();
2396	return (ifp);
2397}
2398
2399struct ifnet *
2400ifunit(const char *name)
2401{
2402	struct ifnet *ifp;
2403
2404	IFNET_RLOCK_NOSLEEP();
2405	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
2406		if (strncmp(name, ifp->if_xname, IFNAMSIZ) == 0)
2407			break;
2408	}
2409	IFNET_RUNLOCK_NOSLEEP();
2410	return (ifp);
2411}
2412
2413static void *
2414ifr_buffer_get_buffer(void *data)
2415{
2416	union ifreq_union *ifrup;
2417
2418	ifrup = data;
2419#ifdef COMPAT_FREEBSD32
2420	if (SV_CURPROC_FLAG(SV_ILP32))
2421		return ((void *)(uintptr_t)
2422		    ifrup->ifr32.ifr_ifru.ifru_buffer.buffer);
2423#endif
2424	return (ifrup->ifr.ifr_ifru.ifru_buffer.buffer);
2425}
2426
2427static void
2428ifr_buffer_set_buffer_null(void *data)
2429{
2430	union ifreq_union *ifrup;
2431
2432	ifrup = data;
2433#ifdef COMPAT_FREEBSD32
2434	if (SV_CURPROC_FLAG(SV_ILP32))
2435		ifrup->ifr32.ifr_ifru.ifru_buffer.buffer = 0;
2436	else
2437#endif
2438		ifrup->ifr.ifr_ifru.ifru_buffer.buffer = NULL;
2439}
2440
2441static size_t
2442ifr_buffer_get_length(void *data)
2443{
2444	union ifreq_union *ifrup;
2445
2446	ifrup = data;
2447#ifdef COMPAT_FREEBSD32
2448	if (SV_CURPROC_FLAG(SV_ILP32))
2449		return (ifrup->ifr32.ifr_ifru.ifru_buffer.length);
2450#endif
2451	return (ifrup->ifr.ifr_ifru.ifru_buffer.length);
2452}
2453
2454static void
2455ifr_buffer_set_length(void *data, size_t len)
2456{
2457	union ifreq_union *ifrup;
2458
2459	ifrup = data;
2460#ifdef COMPAT_FREEBSD32
2461	if (SV_CURPROC_FLAG(SV_ILP32))
2462		ifrup->ifr32.ifr_ifru.ifru_buffer.length = len;
2463	else
2464#endif
2465		ifrup->ifr.ifr_ifru.ifru_buffer.length = len;
2466}
2467
2468void *
2469ifr_data_get_ptr(void *ifrp)
2470{
2471	union ifreq_union *ifrup;
2472
2473	ifrup = ifrp;
2474#ifdef COMPAT_FREEBSD32
2475	if (SV_CURPROC_FLAG(SV_ILP32))
2476		return ((void *)(uintptr_t)
2477		    ifrup->ifr32.ifr_ifru.ifru_data);
2478#endif
2479		return (ifrup->ifr.ifr_ifru.ifru_data);
2480}
2481
2482/*
2483 * Hardware specific interface ioctls.
2484 */
2485static int
2486ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
2487{
2488	struct ifreq *ifr;
2489	int error = 0, do_ifup = 0;
2490	int new_flags, temp_flags;
2491	size_t namelen, onamelen;
2492	size_t descrlen;
2493	char *descrbuf, *odescrbuf;
2494	char new_name[IFNAMSIZ];
2495	struct ifaddr *ifa;
2496	struct sockaddr_dl *sdl;
2497
2498	ifr = (struct ifreq *)data;
2499	switch (cmd) {
2500	case SIOCGIFINDEX:
2501		ifr->ifr_index = ifp->if_index;
2502		break;
2503
2504	case SIOCGIFFLAGS:
2505		temp_flags = ifp->if_flags | ifp->if_drv_flags;
2506		ifr->ifr_flags = temp_flags & 0xffff;
2507		ifr->ifr_flagshigh = temp_flags >> 16;
2508		break;
2509
2510	case SIOCGIFCAP:
2511		ifr->ifr_reqcap = ifp->if_capabilities;
2512		ifr->ifr_curcap = ifp->if_capenable;
2513		break;
2514
2515#ifdef MAC
2516	case SIOCGIFMAC:
2517		error = mac_ifnet_ioctl_get(td->td_ucred, ifr, ifp);
2518		break;
2519#endif
2520
2521	case SIOCGIFMETRIC:
2522		ifr->ifr_metric = ifp->if_metric;
2523		break;
2524
2525	case SIOCGIFMTU:
2526		ifr->ifr_mtu = ifp->if_mtu;
2527		break;
2528
2529	case SIOCGIFPHYS:
2530		/* XXXGL: did this ever worked? */
2531		ifr->ifr_phys = 0;
2532		break;
2533
2534	case SIOCGIFDESCR:
2535		error = 0;
2536		sx_slock(&ifdescr_sx);
2537		if (ifp->if_description == NULL)
2538			error = ENOMSG;
2539		else {
2540			/* space for terminating nul */
2541			descrlen = strlen(ifp->if_description) + 1;
2542			if (ifr_buffer_get_length(ifr) < descrlen)
2543				ifr_buffer_set_buffer_null(ifr);
2544			else
2545				error = copyout(ifp->if_description,
2546				    ifr_buffer_get_buffer(ifr), descrlen);
2547			ifr_buffer_set_length(ifr, descrlen);
2548		}
2549		sx_sunlock(&ifdescr_sx);
2550		break;
2551
2552	case SIOCSIFDESCR:
2553		error = priv_check(td, PRIV_NET_SETIFDESCR);
2554		if (error)
2555			return (error);
2556
2557		/*
2558		 * Copy only (length-1) bytes to make sure that
2559		 * if_description is always nul terminated.  The
2560		 * length parameter is supposed to count the
2561		 * terminating nul in.
2562		 */
2563		if (ifr_buffer_get_length(ifr) > ifdescr_maxlen)
2564			return (ENAMETOOLONG);
2565		else if (ifr_buffer_get_length(ifr) == 0)
2566			descrbuf = NULL;
2567		else {
2568			descrbuf = malloc(ifr_buffer_get_length(ifr),
2569			    M_IFDESCR, M_WAITOK | M_ZERO);
2570			error = copyin(ifr_buffer_get_buffer(ifr), descrbuf,
2571			    ifr_buffer_get_length(ifr) - 1);
2572			if (error) {
2573				free(descrbuf, M_IFDESCR);
2574				break;
2575			}
2576		}
2577
2578		sx_xlock(&ifdescr_sx);
2579		odescrbuf = ifp->if_description;
2580		ifp->if_description = descrbuf;
2581		sx_xunlock(&ifdescr_sx);
2582
2583		getmicrotime(&ifp->if_lastchange);
2584		free(odescrbuf, M_IFDESCR);
2585		break;
2586
2587	case SIOCGIFFIB:
2588		ifr->ifr_fib = ifp->if_fib;
2589		break;
2590
2591	case SIOCSIFFIB:
2592		error = priv_check(td, PRIV_NET_SETIFFIB);
2593		if (error)
2594			return (error);
2595		if (ifr->ifr_fib >= rt_numfibs)
2596			return (EINVAL);
2597
2598		ifp->if_fib = ifr->ifr_fib;
2599		break;
2600
2601	case SIOCSIFFLAGS:
2602		error = priv_check(td, PRIV_NET_SETIFFLAGS);
2603		if (error)
2604			return (error);
2605		/*
2606		 * Currently, no driver owned flags pass the IFF_CANTCHANGE
2607		 * check, so we don't need special handling here yet.
2608		 */
2609		new_flags = (ifr->ifr_flags & 0xffff) |
2610		    (ifr->ifr_flagshigh << 16);
2611		if (ifp->if_flags & IFF_UP &&
2612		    (new_flags & IFF_UP) == 0) {
2613			if_down(ifp);
2614		} else if (new_flags & IFF_UP &&
2615		    (ifp->if_flags & IFF_UP) == 0) {
2616			do_ifup = 1;
2617		}
2618		/* See if permanently promiscuous mode bit is about to flip */
2619		if ((ifp->if_flags ^ new_flags) & IFF_PPROMISC) {
2620			if (new_flags & IFF_PPROMISC)
2621				ifp->if_flags |= IFF_PROMISC;
2622			else if (ifp->if_pcount == 0)
2623				ifp->if_flags &= ~IFF_PROMISC;
2624			if (log_promisc_mode_change)
2625                                log(LOG_INFO, "%s: permanently promiscuous mode %s\n",
2626                                    ifp->if_xname,
2627                                    ((new_flags & IFF_PPROMISC) ?
2628                                     "enabled" : "disabled"));
2629		}
2630		ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) |
2631			(new_flags &~ IFF_CANTCHANGE);
2632		if (ifp->if_ioctl) {
2633			(void) (*ifp->if_ioctl)(ifp, cmd, data);
2634		}
2635		if (do_ifup)
2636			if_up(ifp);
2637		getmicrotime(&ifp->if_lastchange);
2638		break;
2639
2640	case SIOCSIFCAP:
2641		error = priv_check(td, PRIV_NET_SETIFCAP);
2642		if (error)
2643			return (error);
2644		if (ifp->if_ioctl == NULL)
2645			return (EOPNOTSUPP);
2646		if (ifr->ifr_reqcap & ~ifp->if_capabilities)
2647			return (EINVAL);
2648		error = (*ifp->if_ioctl)(ifp, cmd, data);
2649		if (error == 0)
2650			getmicrotime(&ifp->if_lastchange);
2651		break;
2652
2653#ifdef MAC
2654	case SIOCSIFMAC:
2655		error = mac_ifnet_ioctl_set(td->td_ucred, ifr, ifp);
2656		break;
2657#endif
2658
2659	case SIOCSIFNAME:
2660		error = priv_check(td, PRIV_NET_SETIFNAME);
2661		if (error)
2662			return (error);
2663		error = copyinstr(ifr_data_get_ptr(ifr), new_name, IFNAMSIZ,
2664		    NULL);
2665		if (error != 0)
2666			return (error);
2667		if (new_name[0] == '\0')
2668			return (EINVAL);
2669		if (new_name[IFNAMSIZ-1] != '\0') {
2670			new_name[IFNAMSIZ-1] = '\0';
2671			if (strlen(new_name) == IFNAMSIZ-1)
2672				return (EINVAL);
2673		}
2674		if (ifunit(new_name) != NULL)
2675			return (EEXIST);
2676
2677		/*
2678		 * XXX: Locking.  Nothing else seems to lock if_flags,
2679		 * and there are numerous other races with the
2680		 * ifunit() checks not being atomic with namespace
2681		 * changes (renames, vmoves, if_attach, etc).
2682		 */
2683		ifp->if_flags |= IFF_RENAMING;
2684
2685		/* Announce the departure of the interface. */
2686		rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
2687		EVENTHANDLER_INVOKE(ifnet_departure_event, ifp);
2688
2689		log(LOG_INFO, "%s: changing name to '%s'\n",
2690		    ifp->if_xname, new_name);
2691
2692		IF_ADDR_WLOCK(ifp);
2693		strlcpy(ifp->if_xname, new_name, sizeof(ifp->if_xname));
2694		ifa = ifp->if_addr;
2695		sdl = (struct sockaddr_dl *)ifa->ifa_addr;
2696		namelen = strlen(new_name);
2697		onamelen = sdl->sdl_nlen;
2698		/*
2699		 * Move the address if needed.  This is safe because we
2700		 * allocate space for a name of length IFNAMSIZ when we
2701		 * create this in if_attach().
2702		 */
2703		if (namelen != onamelen) {
2704			bcopy(sdl->sdl_data + onamelen,
2705			    sdl->sdl_data + namelen, sdl->sdl_alen);
2706		}
2707		bcopy(new_name, sdl->sdl_data, namelen);
2708		sdl->sdl_nlen = namelen;
2709		sdl = (struct sockaddr_dl *)ifa->ifa_netmask;
2710		bzero(sdl->sdl_data, onamelen);
2711		while (namelen != 0)
2712			sdl->sdl_data[--namelen] = 0xff;
2713		IF_ADDR_WUNLOCK(ifp);
2714
2715		EVENTHANDLER_INVOKE(ifnet_arrival_event, ifp);
2716		/* Announce the return of the interface. */
2717		rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
2718
2719		ifp->if_flags &= ~IFF_RENAMING;
2720		break;
2721
2722#ifdef VIMAGE
2723	case SIOCSIFVNET:
2724		error = priv_check(td, PRIV_NET_SETIFVNET);
2725		if (error)
2726			return (error);
2727		error = if_vmove_loan(td, ifp, ifr->ifr_name, ifr->ifr_jid);
2728		break;
2729#endif
2730
2731	case SIOCSIFMETRIC:
2732		error = priv_check(td, PRIV_NET_SETIFMETRIC);
2733		if (error)
2734			return (error);
2735		ifp->if_metric = ifr->ifr_metric;
2736		getmicrotime(&ifp->if_lastchange);
2737		break;
2738
2739	case SIOCSIFPHYS:
2740		error = priv_check(td, PRIV_NET_SETIFPHYS);
2741		if (error)
2742			return (error);
2743		if (ifp->if_ioctl == NULL)
2744			return (EOPNOTSUPP);
2745		error = (*ifp->if_ioctl)(ifp, cmd, data);
2746		if (error == 0)
2747			getmicrotime(&ifp->if_lastchange);
2748		break;
2749
2750	case SIOCSIFMTU:
2751	{
2752		u_long oldmtu = ifp->if_mtu;
2753
2754		error = priv_check(td, PRIV_NET_SETIFMTU);
2755		if (error)
2756			return (error);
2757		if (ifr->ifr_mtu < IF_MINMTU || ifr->ifr_mtu > IF_MAXMTU)
2758			return (EINVAL);
2759		if (ifp->if_ioctl == NULL)
2760			return (EOPNOTSUPP);
2761		error = (*ifp->if_ioctl)(ifp, cmd, data);
2762		if (error == 0) {
2763			getmicrotime(&ifp->if_lastchange);
2764			rt_ifmsg(ifp);
2765		}
2766		/*
2767		 * If the link MTU changed, do network layer specific procedure.
2768		 */
2769		if (ifp->if_mtu != oldmtu) {
2770#ifdef INET6
2771			nd6_setmtu(ifp);
2772#endif
2773			rt_updatemtu(ifp);
2774		}
2775		break;
2776	}
2777
2778	case SIOCADDMULTI:
2779	case SIOCDELMULTI:
2780		if (cmd == SIOCADDMULTI)
2781			error = priv_check(td, PRIV_NET_ADDMULTI);
2782		else
2783			error = priv_check(td, PRIV_NET_DELMULTI);
2784		if (error)
2785			return (error);
2786
2787		/* Don't allow group membership on non-multicast interfaces. */
2788		if ((ifp->if_flags & IFF_MULTICAST) == 0)
2789			return (EOPNOTSUPP);
2790
2791		/* Don't let users screw up protocols' entries. */
2792		if (ifr->ifr_addr.sa_family != AF_LINK)
2793			return (EINVAL);
2794
2795		if (cmd == SIOCADDMULTI) {
2796			struct ifmultiaddr *ifma;
2797
2798			/*
2799			 * Userland is only permitted to join groups once
2800			 * via the if_addmulti() KPI, because it cannot hold
2801			 * struct ifmultiaddr * between calls. It may also
2802			 * lose a race while we check if the membership
2803			 * already exists.
2804			 */
2805			IF_ADDR_RLOCK(ifp);
2806			ifma = if_findmulti(ifp, &ifr->ifr_addr);
2807			IF_ADDR_RUNLOCK(ifp);
2808			if (ifma != NULL)
2809				error = EADDRINUSE;
2810			else
2811				error = if_addmulti(ifp, &ifr->ifr_addr, &ifma);
2812		} else {
2813			error = if_delmulti(ifp, &ifr->ifr_addr);
2814		}
2815		if (error == 0)
2816			getmicrotime(&ifp->if_lastchange);
2817		break;
2818
2819	case SIOCSIFPHYADDR:
2820	case SIOCDIFPHYADDR:
2821#ifdef INET6
2822	case SIOCSIFPHYADDR_IN6:
2823#endif
2824	case SIOCSIFMEDIA:
2825	case SIOCSIFGENERIC:
2826		error = priv_check(td, PRIV_NET_HWIOCTL);
2827		if (error)
2828			return (error);
2829		if (ifp->if_ioctl == NULL)
2830			return (EOPNOTSUPP);
2831		error = (*ifp->if_ioctl)(ifp, cmd, data);
2832		if (error == 0)
2833			getmicrotime(&ifp->if_lastchange);
2834		break;
2835
2836	case SIOCGIFSTATUS:
2837	case SIOCGIFPSRCADDR:
2838	case SIOCGIFPDSTADDR:
2839	case SIOCGIFMEDIA:
2840	case SIOCGIFXMEDIA:
2841	case SIOCGIFGENERIC:
2842	case SIOCGIFRSSKEY:
2843	case SIOCGIFRSSHASH:
2844		if (ifp->if_ioctl == NULL)
2845			return (EOPNOTSUPP);
2846		error = (*ifp->if_ioctl)(ifp, cmd, data);
2847		break;
2848
2849	case SIOCSIFLLADDR:
2850		error = priv_check(td, PRIV_NET_SETLLADDR);
2851		if (error)
2852			return (error);
2853		error = if_setlladdr(ifp,
2854		    ifr->ifr_addr.sa_data, ifr->ifr_addr.sa_len);
2855		break;
2856
2857	case SIOCGHWADDR:
2858		error = if_gethwaddr(ifp, ifr);
2859		break;
2860
2861	CASE_IOC_IFGROUPREQ(SIOCAIFGROUP):
2862		error = priv_check(td, PRIV_NET_ADDIFGROUP);
2863		if (error)
2864			return (error);
2865		if ((error = if_addgroup(ifp,
2866		    ifgr_group_get((struct ifgroupreq *)data))))
2867			return (error);
2868		break;
2869
2870	CASE_IOC_IFGROUPREQ(SIOCGIFGROUP):
2871		if ((error = if_getgroup((struct ifgroupreq *)data, ifp)))
2872			return (error);
2873		break;
2874
2875	CASE_IOC_IFGROUPREQ(SIOCDIFGROUP):
2876		error = priv_check(td, PRIV_NET_DELIFGROUP);
2877		if (error)
2878			return (error);
2879		if ((error = if_delgroup(ifp,
2880		    ifgr_group_get((struct ifgroupreq *)data))))
2881			return (error);
2882		break;
2883
2884	default:
2885		error = ENOIOCTL;
2886		break;
2887	}
2888	return (error);
2889}
2890
2891/* COMPAT_SVR4 */
2892#define	OSIOCGIFCONF	_IOWR('i', 20, struct ifconf)
2893
2894#ifdef COMPAT_FREEBSD32
2895struct ifconf32 {
2896	int32_t	ifc_len;
2897	union {
2898		uint32_t	ifcu_buf;
2899		uint32_t	ifcu_req;
2900	} ifc_ifcu;
2901};
2902#define	SIOCGIFCONF32	_IOWR('i', 36, struct ifconf32)
2903#endif
2904
2905#ifdef COMPAT_FREEBSD32
2906static void
2907ifmr_init(struct ifmediareq *ifmr, caddr_t data)
2908{
2909	struct ifmediareq32 *ifmr32;
2910
2911	ifmr32 = (struct ifmediareq32 *)data;
2912	memcpy(ifmr->ifm_name, ifmr32->ifm_name,
2913	    sizeof(ifmr->ifm_name));
2914	ifmr->ifm_current = ifmr32->ifm_current;
2915	ifmr->ifm_mask = ifmr32->ifm_mask;
2916	ifmr->ifm_status = ifmr32->ifm_status;
2917	ifmr->ifm_active = ifmr32->ifm_active;
2918	ifmr->ifm_count = ifmr32->ifm_count;
2919	ifmr->ifm_ulist = (int *)(uintptr_t)ifmr32->ifm_ulist;
2920}
2921
2922static void
2923ifmr_update(const struct ifmediareq *ifmr, caddr_t data)
2924{
2925	struct ifmediareq32 *ifmr32;
2926
2927	ifmr32 = (struct ifmediareq32 *)data;
2928	ifmr32->ifm_current = ifmr->ifm_current;
2929	ifmr32->ifm_mask = ifmr->ifm_mask;
2930	ifmr32->ifm_status = ifmr->ifm_status;
2931	ifmr32->ifm_active = ifmr->ifm_active;
2932	ifmr32->ifm_count = ifmr->ifm_count;
2933}
2934#endif
2935
2936/*
2937 * Interface ioctls.
2938 */
2939int
2940ifioctl(struct socket *so, u_long cmd, caddr_t data, struct thread *td)
2941{
2942#ifdef COMPAT_FREEBSD32
2943	caddr_t saved_data;
2944	struct ifmediareq ifmr;
2945#endif
2946	struct ifmediareq *ifmrp;
2947	struct ifnet *ifp;
2948	struct ifreq *ifr;
2949	int error;
2950	int oif_flags;
2951#ifdef VIMAGE
2952	int shutdown;
2953#endif
2954
2955	CURVNET_SET(so->so_vnet);
2956#ifdef VIMAGE
2957	/* Make sure the VNET is stable. */
2958	shutdown = (so->so_vnet->vnet_state > SI_SUB_VNET &&
2959		 so->so_vnet->vnet_state < SI_SUB_VNET_DONE) ? 1 : 0;
2960	if (shutdown) {
2961		CURVNET_RESTORE();
2962		return (EBUSY);
2963	}
2964#endif
2965
2966
2967	switch (cmd) {
2968	case SIOCGIFCONF:
2969	case OSIOCGIFCONF:	/* COMPAT_SVR4 */
2970		error = ifconf(cmd, data);
2971		CURVNET_RESTORE();
2972		return (error);
2973
2974#ifdef COMPAT_FREEBSD32
2975	case SIOCGIFCONF32:
2976		{
2977			struct ifconf32 *ifc32;
2978			struct ifconf ifc;
2979
2980			ifc32 = (struct ifconf32 *)data;
2981			ifc.ifc_len = ifc32->ifc_len;
2982			ifc.ifc_buf = PTRIN(ifc32->ifc_buf);
2983
2984			error = ifconf(SIOCGIFCONF, (void *)&ifc);
2985			CURVNET_RESTORE();
2986			if (error == 0)
2987				ifc32->ifc_len = ifc.ifc_len;
2988			return (error);
2989		}
2990#endif
2991	}
2992
2993	ifmrp = NULL;
2994#ifdef COMPAT_FREEBSD32
2995	switch (cmd) {
2996	case SIOCGIFMEDIA32:
2997	case SIOCGIFXMEDIA32:
2998		ifmrp = &ifmr;
2999		ifmr_init(ifmrp, data);
3000		cmd = _IOC_NEWTYPE(cmd, struct ifmediareq);
3001		saved_data = data;
3002		data = (caddr_t)ifmrp;
3003	}
3004#endif
3005
3006	ifr = (struct ifreq *)data;
3007	switch (cmd) {
3008#ifdef VIMAGE
3009	case SIOCSIFRVNET:
3010		error = priv_check(td, PRIV_NET_SETIFVNET);
3011		if (error == 0)
3012			error = if_vmove_reclaim(td, ifr->ifr_name,
3013			    ifr->ifr_jid);
3014		goto out_noref;
3015#endif
3016	case SIOCIFCREATE:
3017	case SIOCIFCREATE2:
3018		error = priv_check(td, PRIV_NET_IFCREATE);
3019		if (error == 0)
3020			error = if_clone_create(ifr->ifr_name,
3021			    sizeof(ifr->ifr_name), cmd == SIOCIFCREATE2 ?
3022			    ifr_data_get_ptr(ifr) : NULL);
3023		goto out_noref;
3024	case SIOCIFDESTROY:
3025		error = priv_check(td, PRIV_NET_IFDESTROY);
3026		if (error == 0)
3027			error = if_clone_destroy(ifr->ifr_name);
3028		goto out_noref;
3029
3030	case SIOCIFGCLONERS:
3031		error = if_clone_list((struct if_clonereq *)data);
3032		goto out_noref;
3033
3034	CASE_IOC_IFGROUPREQ(SIOCGIFGMEMB):
3035		error = if_getgroupmembers((struct ifgroupreq *)data);
3036		goto out_noref;
3037
3038#if defined(INET) || defined(INET6)
3039	case SIOCSVH:
3040	case SIOCGVH:
3041		if (carp_ioctl_p == NULL)
3042			error = EPROTONOSUPPORT;
3043		else
3044			error = (*carp_ioctl_p)(ifr, cmd, td);
3045		goto out_noref;
3046#endif
3047	}
3048
3049	ifp = ifunit_ref(ifr->ifr_name);
3050	if (ifp == NULL) {
3051		error = ENXIO;
3052		goto out_noref;
3053	}
3054
3055	error = ifhwioctl(cmd, ifp, data, td);
3056	if (error != ENOIOCTL)
3057		goto out_ref;
3058
3059	oif_flags = ifp->if_flags;
3060	if (so->so_proto == NULL) {
3061		error = EOPNOTSUPP;
3062		goto out_ref;
3063	}
3064
3065	/*
3066	 * Pass the request on to the socket control method, and if the
3067	 * latter returns EOPNOTSUPP, directly to the interface.
3068	 *
3069	 * Make an exception for the legacy SIOCSIF* requests.  Drivers
3070	 * trust SIOCSIFADDR et al to come from an already privileged
3071	 * layer, and do not perform any credentials checks or input
3072	 * validation.
3073	 */
3074	error = ((*so->so_proto->pr_usrreqs->pru_control)(so, cmd, data,
3075	    ifp, td));
3076	if (error == EOPNOTSUPP && ifp != NULL && ifp->if_ioctl != NULL &&
3077	    cmd != SIOCSIFADDR && cmd != SIOCSIFBRDADDR &&
3078	    cmd != SIOCSIFDSTADDR && cmd != SIOCSIFNETMASK)
3079		error = (*ifp->if_ioctl)(ifp, cmd, data);
3080
3081	if ((oif_flags ^ ifp->if_flags) & IFF_UP) {
3082#ifdef INET6
3083		if (ifp->if_flags & IFF_UP)
3084			in6_if_up(ifp);
3085#endif
3086	}
3087
3088out_ref:
3089	if_rele(ifp);
3090out_noref:
3091#ifdef COMPAT_FREEBSD32
3092	if (ifmrp != NULL) {
3093		KASSERT((cmd == SIOCGIFMEDIA || cmd == SIOCGIFXMEDIA),
3094		    ("ifmrp non-NULL, but cmd is not an ifmedia req 0x%lx",
3095		     cmd));
3096		data = saved_data;
3097		ifmr_update(ifmrp, data);
3098	}
3099#endif
3100	CURVNET_RESTORE();
3101	return (error);
3102}
3103
3104/*
3105 * The code common to handling reference counted flags,
3106 * e.g., in ifpromisc() and if_allmulti().
3107 * The "pflag" argument can specify a permanent mode flag to check,
3108 * such as IFF_PPROMISC for promiscuous mode; should be 0 if none.
3109 *
3110 * Only to be used on stack-owned flags, not driver-owned flags.
3111 */
3112static int
3113if_setflag(struct ifnet *ifp, int flag, int pflag, int *refcount, int onswitch)
3114{
3115	struct ifreq ifr;
3116	int error;
3117	int oldflags, oldcount;
3118
3119	/* Sanity checks to catch programming errors */
3120	KASSERT((flag & (IFF_DRV_OACTIVE|IFF_DRV_RUNNING)) == 0,
3121	    ("%s: setting driver-owned flag %d", __func__, flag));
3122
3123	if (onswitch)
3124		KASSERT(*refcount >= 0,
3125		    ("%s: increment negative refcount %d for flag %d",
3126		    __func__, *refcount, flag));
3127	else
3128		KASSERT(*refcount > 0,
3129		    ("%s: decrement non-positive refcount %d for flag %d",
3130		    __func__, *refcount, flag));
3131
3132	/* In case this mode is permanent, just touch refcount */
3133	if (ifp->if_flags & pflag) {
3134		*refcount += onswitch ? 1 : -1;
3135		return (0);
3136	}
3137
3138	/* Save ifnet parameters for if_ioctl() may fail */
3139	oldcount = *refcount;
3140	oldflags = ifp->if_flags;
3141
3142	/*
3143	 * See if we aren't the only and touching refcount is enough.
3144	 * Actually toggle interface flag if we are the first or last.
3145	 */
3146	if (onswitch) {
3147		if ((*refcount)++)
3148			return (0);
3149		ifp->if_flags |= flag;
3150	} else {
3151		if (--(*refcount))
3152			return (0);
3153		ifp->if_flags &= ~flag;
3154	}
3155
3156	/* Call down the driver since we've changed interface flags */
3157	if (ifp->if_ioctl == NULL) {
3158		error = EOPNOTSUPP;
3159		goto recover;
3160	}
3161	ifr.ifr_flags = ifp->if_flags & 0xffff;
3162	ifr.ifr_flagshigh = ifp->if_flags >> 16;
3163	error = (*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
3164	if (error)
3165		goto recover;
3166	/* Notify userland that interface flags have changed */
3167	rt_ifmsg(ifp);
3168	return (0);
3169
3170recover:
3171	/* Recover after driver error */
3172	*refcount = oldcount;
3173	ifp->if_flags = oldflags;
3174	return (error);
3175}
3176
3177/*
3178 * Set/clear promiscuous mode on interface ifp based on the truth value
3179 * of pswitch.  The calls are reference counted so that only the first
3180 * "on" request actually has an effect, as does the final "off" request.
3181 * Results are undefined if the "off" and "on" requests are not matched.
3182 */
3183int
3184ifpromisc(struct ifnet *ifp, int pswitch)
3185{
3186	int error;
3187	int oldflags = ifp->if_flags;
3188
3189	error = if_setflag(ifp, IFF_PROMISC, IFF_PPROMISC,
3190			   &ifp->if_pcount, pswitch);
3191	/* If promiscuous mode status has changed, log a message */
3192	if (error == 0 && ((ifp->if_flags ^ oldflags) & IFF_PROMISC) &&
3193            log_promisc_mode_change)
3194		log(LOG_INFO, "%s: promiscuous mode %s\n",
3195		    ifp->if_xname,
3196		    (ifp->if_flags & IFF_PROMISC) ? "enabled" : "disabled");
3197	return (error);
3198}
3199
3200/*
3201 * Return interface configuration
3202 * of system.  List may be used
3203 * in later ioctl's (above) to get
3204 * other information.
3205 */
3206/*ARGSUSED*/
3207static int
3208ifconf(u_long cmd, caddr_t data)
3209{
3210	struct ifconf *ifc = (struct ifconf *)data;
3211	struct ifnet *ifp;
3212	struct ifaddr *ifa;
3213	struct ifreq ifr;
3214	struct sbuf *sb;
3215	int error, full = 0, valid_len, max_len;
3216
3217	/* Limit initial buffer size to MAXPHYS to avoid DoS from userspace. */
3218	max_len = MAXPHYS - 1;
3219
3220	/* Prevent hostile input from being able to crash the system */
3221	if (ifc->ifc_len <= 0)
3222		return (EINVAL);
3223
3224again:
3225	if (ifc->ifc_len <= max_len) {
3226		max_len = ifc->ifc_len;
3227		full = 1;
3228	}
3229	sb = sbuf_new(NULL, NULL, max_len + 1, SBUF_FIXEDLEN);
3230	max_len = 0;
3231	valid_len = 0;
3232
3233	IFNET_RLOCK();
3234	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
3235		int addrs;
3236
3237		/*
3238		 * Zero the ifr to make sure we don't disclose the contents
3239		 * of the stack.
3240		 */
3241		memset(&ifr, 0, sizeof(ifr));
3242
3243		if (strlcpy(ifr.ifr_name, ifp->if_xname, sizeof(ifr.ifr_name))
3244		    >= sizeof(ifr.ifr_name)) {
3245			sbuf_delete(sb);
3246			IFNET_RUNLOCK();
3247			return (ENAMETOOLONG);
3248		}
3249
3250		addrs = 0;
3251		IF_ADDR_RLOCK(ifp);
3252		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
3253			struct sockaddr *sa = ifa->ifa_addr;
3254
3255			if (prison_if(curthread->td_ucred, sa) != 0)
3256				continue;
3257			addrs++;
3258			/* COMPAT_SVR4 */
3259			if (cmd == OSIOCGIFCONF) {
3260				struct osockaddr *osa =
3261				    (struct osockaddr *)&ifr.ifr_addr;
3262				ifr.ifr_addr = *sa;
3263				osa->sa_family = sa->sa_family;
3264				sbuf_bcat(sb, &ifr, sizeof(ifr));
3265				max_len += sizeof(ifr);
3266			} else
3267			if (sa->sa_len <= sizeof(*sa)) {
3268				if (sa->sa_len < sizeof(*sa)) {
3269					memset(&ifr.ifr_ifru.ifru_addr, 0,
3270					    sizeof(ifr.ifr_ifru.ifru_addr));
3271					memcpy(&ifr.ifr_ifru.ifru_addr, sa,
3272					    sa->sa_len);
3273				} else
3274					ifr.ifr_ifru.ifru_addr = *sa;
3275				sbuf_bcat(sb, &ifr, sizeof(ifr));
3276				max_len += sizeof(ifr);
3277			} else {
3278				sbuf_bcat(sb, &ifr,
3279				    offsetof(struct ifreq, ifr_addr));
3280				max_len += offsetof(struct ifreq, ifr_addr);
3281				sbuf_bcat(sb, sa, sa->sa_len);
3282				max_len += sa->sa_len;
3283			}
3284
3285			if (sbuf_error(sb) == 0)
3286				valid_len = sbuf_len(sb);
3287		}
3288		IF_ADDR_RUNLOCK(ifp);
3289		if (addrs == 0) {
3290			sbuf_bcat(sb, &ifr, sizeof(ifr));
3291			max_len += sizeof(ifr);
3292
3293			if (sbuf_error(sb) == 0)
3294				valid_len = sbuf_len(sb);
3295		}
3296	}
3297	IFNET_RUNLOCK();
3298
3299	/*
3300	 * If we didn't allocate enough space (uncommon), try again.  If
3301	 * we have already allocated as much space as we are allowed,
3302	 * return what we've got.
3303	 */
3304	if (valid_len != max_len && !full) {
3305		sbuf_delete(sb);
3306		goto again;
3307	}
3308
3309	ifc->ifc_len = valid_len;
3310	sbuf_finish(sb);
3311	error = copyout(sbuf_data(sb), ifc->ifc_req, ifc->ifc_len);
3312	sbuf_delete(sb);
3313	return (error);
3314}
3315
3316/*
3317 * Just like ifpromisc(), but for all-multicast-reception mode.
3318 */
3319int
3320if_allmulti(struct ifnet *ifp, int onswitch)
3321{
3322
3323	return (if_setflag(ifp, IFF_ALLMULTI, 0, &ifp->if_amcount, onswitch));
3324}
3325
3326struct ifmultiaddr *
3327if_findmulti(struct ifnet *ifp, const struct sockaddr *sa)
3328{
3329	struct ifmultiaddr *ifma;
3330
3331	IF_ADDR_LOCK_ASSERT(ifp);
3332
3333	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
3334		if (sa->sa_family == AF_LINK) {
3335			if (sa_dl_equal(ifma->ifma_addr, sa))
3336				break;
3337		} else {
3338			if (sa_equal(ifma->ifma_addr, sa))
3339				break;
3340		}
3341	}
3342
3343	return ifma;
3344}
3345
3346/*
3347 * Allocate a new ifmultiaddr and initialize based on passed arguments.  We
3348 * make copies of passed sockaddrs.  The ifmultiaddr will not be added to
3349 * the ifnet multicast address list here, so the caller must do that and
3350 * other setup work (such as notifying the device driver).  The reference
3351 * count is initialized to 1.
3352 */
3353static struct ifmultiaddr *
3354if_allocmulti(struct ifnet *ifp, struct sockaddr *sa, struct sockaddr *llsa,
3355    int mflags)
3356{
3357	struct ifmultiaddr *ifma;
3358	struct sockaddr *dupsa;
3359
3360	ifma = malloc(sizeof *ifma, M_IFMADDR, mflags |
3361	    M_ZERO);
3362	if (ifma == NULL)
3363		return (NULL);
3364
3365	dupsa = malloc(sa->sa_len, M_IFMADDR, mflags);
3366	if (dupsa == NULL) {
3367		free(ifma, M_IFMADDR);
3368		return (NULL);
3369	}
3370	bcopy(sa, dupsa, sa->sa_len);
3371	ifma->ifma_addr = dupsa;
3372
3373	ifma->ifma_ifp = ifp;
3374	ifma->ifma_refcount = 1;
3375	ifma->ifma_protospec = NULL;
3376
3377	if (llsa == NULL) {
3378		ifma->ifma_lladdr = NULL;
3379		return (ifma);
3380	}
3381
3382	dupsa = malloc(llsa->sa_len, M_IFMADDR, mflags);
3383	if (dupsa == NULL) {
3384		free(ifma->ifma_addr, M_IFMADDR);
3385		free(ifma, M_IFMADDR);
3386		return (NULL);
3387	}
3388	bcopy(llsa, dupsa, llsa->sa_len);
3389	ifma->ifma_lladdr = dupsa;
3390
3391	return (ifma);
3392}
3393
3394/*
3395 * if_freemulti: free ifmultiaddr structure and possibly attached related
3396 * addresses.  The caller is responsible for implementing reference
3397 * counting, notifying the driver, handling routing messages, and releasing
3398 * any dependent link layer state.
3399 */
3400static void
3401if_freemulti(struct ifmultiaddr *ifma)
3402{
3403
3404	KASSERT(ifma->ifma_refcount == 0, ("if_freemulti: refcount %d",
3405	    ifma->ifma_refcount));
3406
3407	if (ifma->ifma_lladdr != NULL)
3408		free(ifma->ifma_lladdr, M_IFMADDR);
3409	free(ifma->ifma_addr, M_IFMADDR);
3410	free(ifma, M_IFMADDR);
3411}
3412
3413/*
3414 * Register an additional multicast address with a network interface.
3415 *
3416 * - If the address is already present, bump the reference count on the
3417 *   address and return.
3418 * - If the address is not link-layer, look up a link layer address.
3419 * - Allocate address structures for one or both addresses, and attach to the
3420 *   multicast address list on the interface.  If automatically adding a link
3421 *   layer address, the protocol address will own a reference to the link
3422 *   layer address, to be freed when it is freed.
3423 * - Notify the network device driver of an addition to the multicast address
3424 *   list.
3425 *
3426 * 'sa' points to caller-owned memory with the desired multicast address.
3427 *
3428 * 'retifma' will be used to return a pointer to the resulting multicast
3429 * address reference, if desired.
3430 */
3431int
3432if_addmulti(struct ifnet *ifp, struct sockaddr *sa,
3433    struct ifmultiaddr **retifma)
3434{
3435	struct ifmultiaddr *ifma, *ll_ifma;
3436	struct sockaddr *llsa;
3437	struct sockaddr_dl sdl;
3438	int error;
3439
3440	/*
3441	 * If the address is already present, return a new reference to it;
3442	 * otherwise, allocate storage and set up a new address.
3443	 */
3444	IF_ADDR_WLOCK(ifp);
3445	ifma = if_findmulti(ifp, sa);
3446	if (ifma != NULL) {
3447		ifma->ifma_refcount++;
3448		if (retifma != NULL)
3449			*retifma = ifma;
3450		IF_ADDR_WUNLOCK(ifp);
3451		return (0);
3452	}
3453
3454	/*
3455	 * The address isn't already present; resolve the protocol address
3456	 * into a link layer address, and then look that up, bump its
3457	 * refcount or allocate an ifma for that also.
3458	 * Most link layer resolving functions returns address data which
3459	 * fits inside default sockaddr_dl structure. However callback
3460	 * can allocate another sockaddr structure, in that case we need to
3461	 * free it later.
3462	 */
3463	llsa = NULL;
3464	ll_ifma = NULL;
3465	if (ifp->if_resolvemulti != NULL) {
3466		/* Provide called function with buffer size information */
3467		sdl.sdl_len = sizeof(sdl);
3468		llsa = (struct sockaddr *)&sdl;
3469		error = ifp->if_resolvemulti(ifp, &llsa, sa);
3470		if (error)
3471			goto unlock_out;
3472	}
3473
3474	/*
3475	 * Allocate the new address.  Don't hook it up yet, as we may also
3476	 * need to allocate a link layer multicast address.
3477	 */
3478	ifma = if_allocmulti(ifp, sa, llsa, M_NOWAIT);
3479	if (ifma == NULL) {
3480		error = ENOMEM;
3481		goto free_llsa_out;
3482	}
3483
3484	/*
3485	 * If a link layer address is found, we'll need to see if it's
3486	 * already present in the address list, or allocate is as well.
3487	 * When this block finishes, the link layer address will be on the
3488	 * list.
3489	 */
3490	if (llsa != NULL) {
3491		ll_ifma = if_findmulti(ifp, llsa);
3492		if (ll_ifma == NULL) {
3493			ll_ifma = if_allocmulti(ifp, llsa, NULL, M_NOWAIT);
3494			if (ll_ifma == NULL) {
3495				--ifma->ifma_refcount;
3496				if_freemulti(ifma);
3497				error = ENOMEM;
3498				goto free_llsa_out;
3499			}
3500			TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ll_ifma,
3501			    ifma_link);
3502		} else
3503			ll_ifma->ifma_refcount++;
3504		ifma->ifma_llifma = ll_ifma;
3505	}
3506
3507	/*
3508	 * We now have a new multicast address, ifma, and possibly a new or
3509	 * referenced link layer address.  Add the primary address to the
3510	 * ifnet address list.
3511	 */
3512	TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link);
3513
3514	if (retifma != NULL)
3515		*retifma = ifma;
3516
3517	/*
3518	 * Must generate the message while holding the lock so that 'ifma'
3519	 * pointer is still valid.
3520	 */
3521	rt_newmaddrmsg(RTM_NEWMADDR, ifma);
3522	IF_ADDR_WUNLOCK(ifp);
3523
3524	/*
3525	 * We are certain we have added something, so call down to the
3526	 * interface to let them know about it.
3527	 */
3528	if (ifp->if_ioctl != NULL) {
3529		(void) (*ifp->if_ioctl)(ifp, SIOCADDMULTI, 0);
3530	}
3531
3532	if ((llsa != NULL) && (llsa != (struct sockaddr *)&sdl))
3533		link_free_sdl(llsa);
3534
3535	return (0);
3536
3537free_llsa_out:
3538	if ((llsa != NULL) && (llsa != (struct sockaddr *)&sdl))
3539		link_free_sdl(llsa);
3540
3541unlock_out:
3542	IF_ADDR_WUNLOCK(ifp);
3543	return (error);
3544}
3545
3546/*
3547 * Delete a multicast group membership by network-layer group address.
3548 *
3549 * Returns ENOENT if the entry could not be found. If ifp no longer
3550 * exists, results are undefined. This entry point should only be used
3551 * from subsystems which do appropriate locking to hold ifp for the
3552 * duration of the call.
3553 * Network-layer protocol domains must use if_delmulti_ifma().
3554 */
3555int
3556if_delmulti(struct ifnet *ifp, struct sockaddr *sa)
3557{
3558	struct ifmultiaddr *ifma;
3559	int lastref;
3560#ifdef INVARIANTS
3561	struct ifnet *oifp;
3562
3563	IFNET_RLOCK_NOSLEEP();
3564	TAILQ_FOREACH(oifp, &V_ifnet, if_link)
3565		if (ifp == oifp)
3566			break;
3567	if (ifp != oifp)
3568		ifp = NULL;
3569	IFNET_RUNLOCK_NOSLEEP();
3570
3571	KASSERT(ifp != NULL, ("%s: ifnet went away", __func__));
3572#endif
3573	if (ifp == NULL)
3574		return (ENOENT);
3575
3576	IF_ADDR_WLOCK(ifp);
3577	lastref = 0;
3578	ifma = if_findmulti(ifp, sa);
3579	if (ifma != NULL)
3580		lastref = if_delmulti_locked(ifp, ifma, 0);
3581	IF_ADDR_WUNLOCK(ifp);
3582
3583	if (ifma == NULL)
3584		return (ENOENT);
3585
3586	if (lastref && ifp->if_ioctl != NULL) {
3587		(void)(*ifp->if_ioctl)(ifp, SIOCDELMULTI, 0);
3588	}
3589
3590	return (0);
3591}
3592
3593/*
3594 * Delete all multicast group membership for an interface.
3595 * Should be used to quickly flush all multicast filters.
3596 */
3597void
3598if_delallmulti(struct ifnet *ifp)
3599{
3600	struct ifmultiaddr *ifma;
3601	struct ifmultiaddr *next;
3602
3603	IF_ADDR_WLOCK(ifp);
3604	TAILQ_FOREACH_SAFE(ifma, &ifp->if_multiaddrs, ifma_link, next)
3605		if_delmulti_locked(ifp, ifma, 0);
3606	IF_ADDR_WUNLOCK(ifp);
3607}
3608
3609/*
3610 * Delete a multicast group membership by group membership pointer.
3611 * Network-layer protocol domains must use this routine.
3612 *
3613 * It is safe to call this routine if the ifp disappeared.
3614 */
3615void
3616if_delmulti_ifma(struct ifmultiaddr *ifma)
3617{
3618	struct ifnet *ifp;
3619	int lastref;
3620
3621	ifp = ifma->ifma_ifp;
3622#ifdef DIAGNOSTIC
3623	if (ifp == NULL) {
3624		printf("%s: ifma_ifp seems to be detached\n", __func__);
3625	} else {
3626		struct ifnet *oifp;
3627
3628		IFNET_RLOCK_NOSLEEP();
3629		TAILQ_FOREACH(oifp, &V_ifnet, if_link)
3630			if (ifp == oifp)
3631				break;
3632		if (ifp != oifp) {
3633			printf("%s: ifnet %p disappeared\n", __func__, ifp);
3634			ifp = NULL;
3635		}
3636		IFNET_RUNLOCK_NOSLEEP();
3637	}
3638#endif
3639	/*
3640	 * If and only if the ifnet instance exists: Acquire the address lock.
3641	 */
3642	if (ifp != NULL)
3643		IF_ADDR_WLOCK(ifp);
3644
3645	lastref = if_delmulti_locked(ifp, ifma, 0);
3646
3647	if (ifp != NULL) {
3648		/*
3649		 * If and only if the ifnet instance exists:
3650		 *  Release the address lock.
3651		 *  If the group was left: update the hardware hash filter.
3652		 */
3653		IF_ADDR_WUNLOCK(ifp);
3654		if (lastref && ifp->if_ioctl != NULL) {
3655			(void)(*ifp->if_ioctl)(ifp, SIOCDELMULTI, 0);
3656		}
3657	}
3658}
3659
3660/*
3661 * Perform deletion of network-layer and/or link-layer multicast address.
3662 *
3663 * Return 0 if the reference count was decremented.
3664 * Return 1 if the final reference was released, indicating that the
3665 * hardware hash filter should be reprogrammed.
3666 */
3667static int
3668if_delmulti_locked(struct ifnet *ifp, struct ifmultiaddr *ifma, int detaching)
3669{
3670	struct ifmultiaddr *ll_ifma;
3671
3672	if (ifp != NULL && ifma->ifma_ifp != NULL) {
3673		KASSERT(ifma->ifma_ifp == ifp,
3674		    ("%s: inconsistent ifp %p", __func__, ifp));
3675		IF_ADDR_WLOCK_ASSERT(ifp);
3676	}
3677
3678	ifp = ifma->ifma_ifp;
3679
3680	/*
3681	 * If the ifnet is detaching, null out references to ifnet,
3682	 * so that upper protocol layers will notice, and not attempt
3683	 * to obtain locks for an ifnet which no longer exists. The
3684	 * routing socket announcement must happen before the ifnet
3685	 * instance is detached from the system.
3686	 */
3687	if (detaching) {
3688#ifdef DIAGNOSTIC
3689		printf("%s: detaching ifnet instance %p\n", __func__, ifp);
3690#endif
3691		/*
3692		 * ifp may already be nulled out if we are being reentered
3693		 * to delete the ll_ifma.
3694		 */
3695		if (ifp != NULL) {
3696			rt_newmaddrmsg(RTM_DELMADDR, ifma);
3697			ifma->ifma_ifp = NULL;
3698		}
3699	}
3700
3701	if (--ifma->ifma_refcount > 0)
3702		return 0;
3703
3704	/*
3705	 * If this ifma is a network-layer ifma, a link-layer ifma may
3706	 * have been associated with it. Release it first if so.
3707	 */
3708	ll_ifma = ifma->ifma_llifma;
3709	if (ll_ifma != NULL) {
3710		KASSERT(ifma->ifma_lladdr != NULL,
3711		    ("%s: llifma w/o lladdr", __func__));
3712		if (detaching)
3713			ll_ifma->ifma_ifp = NULL;	/* XXX */
3714		if (--ll_ifma->ifma_refcount == 0) {
3715			if (ifp != NULL) {
3716				TAILQ_REMOVE(&ifp->if_multiaddrs, ll_ifma,
3717				    ifma_link);
3718			}
3719			if_freemulti(ll_ifma);
3720		}
3721	}
3722
3723	if (ifp != NULL)
3724		TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link);
3725
3726	if_freemulti(ifma);
3727
3728	/*
3729	 * The last reference to this instance of struct ifmultiaddr
3730	 * was released; the hardware should be notified of this change.
3731	 */
3732	return 1;
3733}
3734
3735/*
3736 * Set the link layer address on an interface.
3737 *
3738 * At this time we only support certain types of interfaces,
3739 * and we don't allow the length of the address to change.
3740 *
3741 * Set noinline to be dtrace-friendly
3742 */
3743__noinline int
3744if_setlladdr(struct ifnet *ifp, const u_char *lladdr, int len)
3745{
3746	struct sockaddr_dl *sdl;
3747	struct ifaddr *ifa;
3748	struct ifreq ifr;
3749
3750	IF_ADDR_RLOCK(ifp);
3751	ifa = ifp->if_addr;
3752	if (ifa == NULL) {
3753		IF_ADDR_RUNLOCK(ifp);
3754		return (EINVAL);
3755	}
3756	ifa_ref(ifa);
3757	IF_ADDR_RUNLOCK(ifp);
3758	sdl = (struct sockaddr_dl *)ifa->ifa_addr;
3759	if (sdl == NULL) {
3760		ifa_free(ifa);
3761		return (EINVAL);
3762	}
3763	if (len != sdl->sdl_alen) {	/* don't allow length to change */
3764		ifa_free(ifa);
3765		return (EINVAL);
3766	}
3767	switch (ifp->if_type) {
3768	case IFT_ETHER:
3769	case IFT_FDDI:
3770	case IFT_XETHER:
3771	case IFT_ISO88025:
3772	case IFT_L2VLAN:
3773	case IFT_BRIDGE:
3774	case IFT_ARCNET:
3775	case IFT_IEEE8023ADLAG:
3776	case IFT_IEEE80211:
3777		bcopy(lladdr, LLADDR(sdl), len);
3778		ifa_free(ifa);
3779		break;
3780	default:
3781		ifa_free(ifa);
3782		return (ENODEV);
3783	}
3784
3785	/*
3786	 * If the interface is already up, we need
3787	 * to re-init it in order to reprogram its
3788	 * address filter.
3789	 */
3790	if ((ifp->if_flags & IFF_UP) != 0) {
3791		if (ifp->if_ioctl) {
3792			ifp->if_flags &= ~IFF_UP;
3793			ifr.ifr_flags = ifp->if_flags & 0xffff;
3794			ifr.ifr_flagshigh = ifp->if_flags >> 16;
3795			(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
3796			ifp->if_flags |= IFF_UP;
3797			ifr.ifr_flags = ifp->if_flags & 0xffff;
3798			ifr.ifr_flagshigh = ifp->if_flags >> 16;
3799			(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
3800		}
3801	}
3802	EVENTHANDLER_INVOKE(iflladdr_event, ifp);
3803	return (0);
3804}
3805
3806/*
3807 * Compat function for handling basic encapsulation requests.
3808 * Not converted stacks (FDDI, IB, ..) supports traditional
3809 * output model: ARP (and other similar L2 protocols) are handled
3810 * inside output routine, arpresolve/nd6_resolve() returns MAC
3811 * address instead of full prepend.
3812 *
3813 * This function creates calculated header==MAC for IPv4/IPv6 and
3814 * returns EAFNOSUPPORT (which is then handled in ARP code) for other
3815 * address families.
3816 */
3817static int
3818if_requestencap_default(struct ifnet *ifp, struct if_encap_req *req)
3819{
3820
3821	if (req->rtype != IFENCAP_LL)
3822		return (EOPNOTSUPP);
3823
3824	if (req->bufsize < req->lladdr_len)
3825		return (ENOMEM);
3826
3827	switch (req->family) {
3828	case AF_INET:
3829	case AF_INET6:
3830		break;
3831	default:
3832		return (EAFNOSUPPORT);
3833	}
3834
3835	/* Copy lladdr to storage as is */
3836	memmove(req->buf, req->lladdr, req->lladdr_len);
3837	req->bufsize = req->lladdr_len;
3838	req->lladdr_off = 0;
3839
3840	return (0);
3841}
3842
3843/*
3844 * Get the link layer address that was read from the hardware at attach.
3845 *
3846 * This is only set by Ethernet NICs (IFT_ETHER), but laggX interfaces re-type
3847 * their component interfaces as IFT_IEEE8023ADLAG.
3848 */
3849int
3850if_gethwaddr(struct ifnet *ifp, struct ifreq *ifr)
3851{
3852
3853	if (ifp->if_hw_addr == NULL)
3854		return (ENODEV);
3855
3856	switch (ifp->if_type) {
3857	case IFT_ETHER:
3858	case IFT_IEEE8023ADLAG:
3859		bcopy(ifp->if_hw_addr, ifr->ifr_addr.sa_data, ifp->if_addrlen);
3860		return (0);
3861	default:
3862		return (ENODEV);
3863	}
3864}
3865
3866/*
3867 * The name argument must be a pointer to storage which will last as
3868 * long as the interface does.  For physical devices, the result of
3869 * device_get_name(dev) is a good choice and for pseudo-devices a
3870 * static string works well.
3871 */
3872void
3873if_initname(struct ifnet *ifp, const char *name, int unit)
3874{
3875	ifp->if_dname = name;
3876	ifp->if_dunit = unit;
3877	if (unit != IF_DUNIT_NONE)
3878		snprintf(ifp->if_xname, IFNAMSIZ, "%s%d", name, unit);
3879	else
3880		strlcpy(ifp->if_xname, name, IFNAMSIZ);
3881}
3882
3883int
3884if_printf(struct ifnet *ifp, const char * fmt, ...)
3885{
3886	va_list ap;
3887	int retval;
3888
3889	retval = printf("%s: ", ifp->if_xname);
3890	va_start(ap, fmt);
3891	retval += vprintf(fmt, ap);
3892	va_end(ap);
3893	return (retval);
3894}
3895
3896void
3897if_start(struct ifnet *ifp)
3898{
3899
3900	(*(ifp)->if_start)(ifp);
3901}
3902
3903/*
3904 * Backwards compatibility interface for drivers
3905 * that have not implemented it
3906 */
3907static int
3908if_transmit(struct ifnet *ifp, struct mbuf *m)
3909{
3910	int error;
3911
3912	IFQ_HANDOFF(ifp, m, error);
3913	return (error);
3914}
3915
3916static void
3917if_input_default(struct ifnet *ifp __unused, struct mbuf *m)
3918{
3919
3920	m_freem(m);
3921}
3922
3923int
3924if_handoff(struct ifqueue *ifq, struct mbuf *m, struct ifnet *ifp, int adjust)
3925{
3926	int active = 0;
3927
3928	IF_LOCK(ifq);
3929	if (_IF_QFULL(ifq)) {
3930		IF_UNLOCK(ifq);
3931		if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1);
3932		m_freem(m);
3933		return (0);
3934	}
3935	if (ifp != NULL) {
3936		if_inc_counter(ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len + adjust);
3937		if (m->m_flags & (M_BCAST|M_MCAST))
3938			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
3939		active = ifp->if_drv_flags & IFF_DRV_OACTIVE;
3940	}
3941	_IF_ENQUEUE(ifq, m);
3942	IF_UNLOCK(ifq);
3943	if (ifp != NULL && !active)
3944		(*(ifp)->if_start)(ifp);
3945	return (1);
3946}
3947
3948void
3949if_register_com_alloc(u_char type,
3950    if_com_alloc_t *a, if_com_free_t *f)
3951{
3952
3953	KASSERT(if_com_alloc[type] == NULL,
3954	    ("if_register_com_alloc: %d already registered", type));
3955	KASSERT(if_com_free[type] == NULL,
3956	    ("if_register_com_alloc: %d free already registered", type));
3957
3958	if_com_alloc[type] = a;
3959	if_com_free[type] = f;
3960}
3961
3962void
3963if_deregister_com_alloc(u_char type)
3964{
3965
3966	KASSERT(if_com_alloc[type] != NULL,
3967	    ("if_deregister_com_alloc: %d not registered", type));
3968	KASSERT(if_com_free[type] != NULL,
3969	    ("if_deregister_com_alloc: %d free not registered", type));
3970	if_com_alloc[type] = NULL;
3971	if_com_free[type] = NULL;
3972}
3973
3974/* API for driver access to network stack owned ifnet.*/
3975uint64_t
3976if_setbaudrate(struct ifnet *ifp, uint64_t baudrate)
3977{
3978	uint64_t oldbrate;
3979
3980	oldbrate = ifp->if_baudrate;
3981	ifp->if_baudrate = baudrate;
3982	return (oldbrate);
3983}
3984
3985uint64_t
3986if_getbaudrate(if_t ifp)
3987{
3988
3989	return (((struct ifnet *)ifp)->if_baudrate);
3990}
3991
3992int
3993if_setcapabilities(if_t ifp, int capabilities)
3994{
3995	((struct ifnet *)ifp)->if_capabilities = capabilities;
3996	return (0);
3997}
3998
3999int
4000if_setcapabilitiesbit(if_t ifp, int setbit, int clearbit)
4001{
4002	((struct ifnet *)ifp)->if_capabilities |= setbit;
4003	((struct ifnet *)ifp)->if_capabilities &= ~clearbit;
4004
4005	return (0);
4006}
4007
4008int
4009if_getcapabilities(if_t ifp)
4010{
4011	return ((struct ifnet *)ifp)->if_capabilities;
4012}
4013
4014int
4015if_setcapenable(if_t ifp, int capabilities)
4016{
4017	((struct ifnet *)ifp)->if_capenable = capabilities;
4018	return (0);
4019}
4020
4021int
4022if_setcapenablebit(if_t ifp, int setcap, int clearcap)
4023{
4024	if(setcap)
4025		((struct ifnet *)ifp)->if_capenable |= setcap;
4026	if(clearcap)
4027		((struct ifnet *)ifp)->if_capenable &= ~clearcap;
4028
4029	return (0);
4030}
4031
4032const char *
4033if_getdname(if_t ifp)
4034{
4035	return ((struct ifnet *)ifp)->if_dname;
4036}
4037
4038int
4039if_togglecapenable(if_t ifp, int togglecap)
4040{
4041	((struct ifnet *)ifp)->if_capenable ^= togglecap;
4042	return (0);
4043}
4044
4045int
4046if_getcapenable(if_t ifp)
4047{
4048	return ((struct ifnet *)ifp)->if_capenable;
4049}
4050
4051/*
4052 * This is largely undesirable because it ties ifnet to a device, but does
4053 * provide flexiblity for an embedded product vendor. Should be used with
4054 * the understanding that it violates the interface boundaries, and should be
4055 * a last resort only.
4056 */
4057int
4058if_setdev(if_t ifp, void *dev)
4059{
4060	return (0);
4061}
4062
4063int
4064if_setdrvflagbits(if_t ifp, int set_flags, int clear_flags)
4065{
4066	((struct ifnet *)ifp)->if_drv_flags |= set_flags;
4067	((struct ifnet *)ifp)->if_drv_flags &= ~clear_flags;
4068
4069	return (0);
4070}
4071
4072int
4073if_getdrvflags(if_t ifp)
4074{
4075	return ((struct ifnet *)ifp)->if_drv_flags;
4076}
4077
4078int
4079if_setdrvflags(if_t ifp, int flags)
4080{
4081	((struct ifnet *)ifp)->if_drv_flags = flags;
4082	return (0);
4083}
4084
4085
4086int
4087if_setflags(if_t ifp, int flags)
4088{
4089	((struct ifnet *)ifp)->if_flags = flags;
4090	return (0);
4091}
4092
4093int
4094if_setflagbits(if_t ifp, int set, int clear)
4095{
4096	((struct ifnet *)ifp)->if_flags |= set;
4097	((struct ifnet *)ifp)->if_flags &= ~clear;
4098
4099	return (0);
4100}
4101
4102int
4103if_getflags(if_t ifp)
4104{
4105	return ((struct ifnet *)ifp)->if_flags;
4106}
4107
4108int
4109if_clearhwassist(if_t ifp)
4110{
4111	((struct ifnet *)ifp)->if_hwassist = 0;
4112	return (0);
4113}
4114
4115int
4116if_sethwassistbits(if_t ifp, int toset, int toclear)
4117{
4118	((struct ifnet *)ifp)->if_hwassist |= toset;
4119	((struct ifnet *)ifp)->if_hwassist &= ~toclear;
4120
4121	return (0);
4122}
4123
4124int
4125if_sethwassist(if_t ifp, int hwassist_bit)
4126{
4127	((struct ifnet *)ifp)->if_hwassist = hwassist_bit;
4128	return (0);
4129}
4130
4131int
4132if_gethwassist(if_t ifp)
4133{
4134	return ((struct ifnet *)ifp)->if_hwassist;
4135}
4136
4137int
4138if_setmtu(if_t ifp, int mtu)
4139{
4140	((struct ifnet *)ifp)->if_mtu = mtu;
4141	return (0);
4142}
4143
4144int
4145if_getmtu(if_t ifp)
4146{
4147	return ((struct ifnet *)ifp)->if_mtu;
4148}
4149
4150int
4151if_getmtu_family(if_t ifp, int family)
4152{
4153	struct domain *dp;
4154
4155	for (dp = domains; dp; dp = dp->dom_next) {
4156		if (dp->dom_family == family && dp->dom_ifmtu != NULL)
4157			return (dp->dom_ifmtu((struct ifnet *)ifp));
4158	}
4159
4160	return (((struct ifnet *)ifp)->if_mtu);
4161}
4162
4163int
4164if_setsoftc(if_t ifp, void *softc)
4165{
4166	((struct ifnet *)ifp)->if_softc = softc;
4167	return (0);
4168}
4169
4170void *
4171if_getsoftc(if_t ifp)
4172{
4173	return ((struct ifnet *)ifp)->if_softc;
4174}
4175
4176void
4177if_setrcvif(struct mbuf *m, if_t ifp)
4178{
4179	m->m_pkthdr.rcvif = (struct ifnet *)ifp;
4180}
4181
4182void
4183if_setvtag(struct mbuf *m, uint16_t tag)
4184{
4185	m->m_pkthdr.ether_vtag = tag;
4186}
4187
4188uint16_t
4189if_getvtag(struct mbuf *m)
4190{
4191
4192	return (m->m_pkthdr.ether_vtag);
4193}
4194
4195int
4196if_sendq_empty(if_t ifp)
4197{
4198	return IFQ_DRV_IS_EMPTY(&((struct ifnet *)ifp)->if_snd);
4199}
4200
4201struct ifaddr *
4202if_getifaddr(if_t ifp)
4203{
4204	return ((struct ifnet *)ifp)->if_addr;
4205}
4206
4207int
4208if_getamcount(if_t ifp)
4209{
4210	return ((struct ifnet *)ifp)->if_amcount;
4211}
4212
4213
4214int
4215if_setsendqready(if_t ifp)
4216{
4217	IFQ_SET_READY(&((struct ifnet *)ifp)->if_snd);
4218	return (0);
4219}
4220
4221int
4222if_setsendqlen(if_t ifp, int tx_desc_count)
4223{
4224	IFQ_SET_MAXLEN(&((struct ifnet *)ifp)->if_snd, tx_desc_count);
4225	((struct ifnet *)ifp)->if_snd.ifq_drv_maxlen = tx_desc_count;
4226
4227	return (0);
4228}
4229
4230int
4231if_vlantrunkinuse(if_t ifp)
4232{
4233	return ((struct ifnet *)ifp)->if_vlantrunk != NULL?1:0;
4234}
4235
4236int
4237if_input(if_t ifp, struct mbuf* sendmp)
4238{
4239	(*((struct ifnet *)ifp)->if_input)((struct ifnet *)ifp, sendmp);
4240	return (0);
4241
4242}
4243
4244/* XXX */
4245#ifndef ETH_ADDR_LEN
4246#define ETH_ADDR_LEN 6
4247#endif
4248
4249int
4250if_setupmultiaddr(if_t ifp, void *mta, int *cnt, int max)
4251{
4252	struct ifmultiaddr *ifma;
4253	uint8_t *lmta = (uint8_t *)mta;
4254	int mcnt = 0;
4255
4256	TAILQ_FOREACH(ifma, &((struct ifnet *)ifp)->if_multiaddrs, ifma_link) {
4257		if (ifma->ifma_addr->sa_family != AF_LINK)
4258			continue;
4259
4260		if (mcnt == max)
4261			break;
4262
4263		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
4264		    &lmta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
4265		mcnt++;
4266	}
4267	*cnt = mcnt;
4268
4269	return (0);
4270}
4271
4272int
4273if_multiaddr_array(if_t ifp, void *mta, int *cnt, int max)
4274{
4275	int error;
4276
4277	if_maddr_rlock(ifp);
4278	error = if_setupmultiaddr(ifp, mta, cnt, max);
4279	if_maddr_runlock(ifp);
4280	return (error);
4281}
4282
4283int
4284if_multiaddr_count(if_t ifp, int max)
4285{
4286	struct ifmultiaddr *ifma;
4287	int count;
4288
4289	count = 0;
4290	if_maddr_rlock(ifp);
4291	TAILQ_FOREACH(ifma, &((struct ifnet *)ifp)->if_multiaddrs, ifma_link) {
4292		if (ifma->ifma_addr->sa_family != AF_LINK)
4293			continue;
4294		count++;
4295		if (count == max)
4296			break;
4297	}
4298	if_maddr_runlock(ifp);
4299	return (count);
4300}
4301
4302int
4303if_multi_apply(struct ifnet *ifp, int (*filter)(void *, struct ifmultiaddr *, int), void *arg)
4304{
4305	struct ifmultiaddr *ifma;
4306	int cnt = 0;
4307
4308	if_maddr_rlock(ifp);
4309	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link)
4310		cnt += filter(arg, ifma, cnt);
4311	if_maddr_runlock(ifp);
4312	return (cnt);
4313}
4314
4315struct mbuf *
4316if_dequeue(if_t ifp)
4317{
4318	struct mbuf *m;
4319	IFQ_DRV_DEQUEUE(&((struct ifnet *)ifp)->if_snd, m);
4320
4321	return (m);
4322}
4323
4324int
4325if_sendq_prepend(if_t ifp, struct mbuf *m)
4326{
4327	IFQ_DRV_PREPEND(&((struct ifnet *)ifp)->if_snd, m);
4328	return (0);
4329}
4330
4331int
4332if_setifheaderlen(if_t ifp, int len)
4333{
4334	((struct ifnet *)ifp)->if_hdrlen = len;
4335	return (0);
4336}
4337
4338caddr_t
4339if_getlladdr(if_t ifp)
4340{
4341	return (IF_LLADDR((struct ifnet *)ifp));
4342}
4343
4344void *
4345if_gethandle(u_char type)
4346{
4347	return (if_alloc(type));
4348}
4349
4350void
4351if_bpfmtap(if_t ifh, struct mbuf *m)
4352{
4353	struct ifnet *ifp = (struct ifnet *)ifh;
4354
4355	BPF_MTAP(ifp, m);
4356}
4357
4358void
4359if_etherbpfmtap(if_t ifh, struct mbuf *m)
4360{
4361	struct ifnet *ifp = (struct ifnet *)ifh;
4362
4363	ETHER_BPF_MTAP(ifp, m);
4364}
4365
4366void
4367if_vlancap(if_t ifh)
4368{
4369	struct ifnet *ifp = (struct ifnet *)ifh;
4370	VLAN_CAPABILITIES(ifp);
4371}
4372
4373void
4374if_setinitfn(if_t ifp, void (*init_fn)(void *))
4375{
4376	((struct ifnet *)ifp)->if_init = init_fn;
4377}
4378
4379void
4380if_setioctlfn(if_t ifp, int (*ioctl_fn)(if_t, u_long, caddr_t))
4381{
4382	((struct ifnet *)ifp)->if_ioctl = (void *)ioctl_fn;
4383}
4384
4385void
4386if_setstartfn(if_t ifp, void (*start_fn)(if_t))
4387{
4388	((struct ifnet *)ifp)->if_start = (void *)start_fn;
4389}
4390
4391void
4392if_settransmitfn(if_t ifp, if_transmit_fn_t start_fn)
4393{
4394	((struct ifnet *)ifp)->if_transmit = start_fn;
4395}
4396
4397void if_setqflushfn(if_t ifp, if_qflush_fn_t flush_fn)
4398{
4399	((struct ifnet *)ifp)->if_qflush = flush_fn;
4400
4401}
4402
4403void
4404if_setgetcounterfn(if_t ifp, if_get_counter_t fn)
4405{
4406
4407	ifp->if_get_counter = fn;
4408}
4409
4410/* Revisit these - These are inline functions originally. */
4411int
4412drbr_inuse_drv(if_t ifh, struct buf_ring *br)
4413{
4414	return drbr_inuse(ifh, br);
4415}
4416
4417struct mbuf*
4418drbr_dequeue_drv(if_t ifh, struct buf_ring *br)
4419{
4420	return drbr_dequeue(ifh, br);
4421}
4422
4423int
4424drbr_needs_enqueue_drv(if_t ifh, struct buf_ring *br)
4425{
4426	return drbr_needs_enqueue(ifh, br);
4427}
4428
4429int
4430drbr_enqueue_drv(if_t ifh, struct buf_ring *br, struct mbuf *m)
4431{
4432	return drbr_enqueue(ifh, br, m);
4433
4434}
4435