if.c revision 332281
1/*-
2 * Copyright (c) 1980, 1986, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 4. Neither the name of the University nor the names of its contributors
14 *    may be used to endorse or promote products derived from this software
15 *    without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 *	@(#)if.c	8.5 (Berkeley) 1/9/95
30 * $FreeBSD: stable/11/sys/net/if.c 332281 2018-04-08 15:52:32Z brooks $
31 */
32
33#include "opt_compat.h"
34#include "opt_inet6.h"
35#include "opt_inet.h"
36
37#include <sys/param.h>
38#include <sys/types.h>
39#include <sys/conf.h>
40#include <sys/malloc.h>
41#include <sys/sbuf.h>
42#include <sys/bus.h>
43#include <sys/mbuf.h>
44#include <sys/systm.h>
45#include <sys/priv.h>
46#include <sys/proc.h>
47#include <sys/socket.h>
48#include <sys/socketvar.h>
49#include <sys/protosw.h>
50#include <sys/kernel.h>
51#include <sys/lock.h>
52#include <sys/refcount.h>
53#include <sys/module.h>
54#include <sys/rwlock.h>
55#include <sys/sockio.h>
56#include <sys/syslog.h>
57#include <sys/sysctl.h>
58#include <sys/sysent.h>
59#include <sys/taskqueue.h>
60#include <sys/domain.h>
61#include <sys/jail.h>
62#include <sys/priv.h>
63
64#include <machine/stdarg.h>
65#include <vm/uma.h>
66
67#include <net/bpf.h>
68#include <net/ethernet.h>
69#include <net/if.h>
70#include <net/if_arp.h>
71#include <net/if_clone.h>
72#include <net/if_dl.h>
73#include <net/if_types.h>
74#include <net/if_var.h>
75#include <net/if_media.h>
76#include <net/if_vlan_var.h>
77#include <net/radix.h>
78#include <net/route.h>
79#include <net/vnet.h>
80
81#if defined(INET) || defined(INET6)
82#include <net/ethernet.h>
83#include <netinet/in.h>
84#include <netinet/in_var.h>
85#include <netinet/ip.h>
86#include <netinet/ip_carp.h>
87#ifdef INET
88#include <netinet/if_ether.h>
89#endif /* INET */
90#ifdef INET6
91#include <netinet6/in6_var.h>
92#include <netinet6/in6_ifattach.h>
93#endif /* INET6 */
94#endif /* INET || INET6 */
95
96#include <security/mac/mac_framework.h>
97
98#ifdef COMPAT_FREEBSD32
99#include <sys/mount.h>
100#include <compat/freebsd32/freebsd32.h>
101
102struct ifreq_buffer32 {
103	uint32_t	length;		/* (size_t) */
104	uint32_t	buffer;		/* (void *) */
105};
106
107/*
108 * Interface request structure used for socket
109 * ioctl's.  All interface ioctl's must have parameter
110 * definitions which begin with ifr_name.  The
111 * remainder may be interface specific.
112 */
113struct ifreq32 {
114	char	ifr_name[IFNAMSIZ];		/* if name, e.g. "en0" */
115	union {
116		struct sockaddr	ifru_addr;
117		struct sockaddr	ifru_dstaddr;
118		struct sockaddr	ifru_broadaddr;
119		struct ifreq_buffer32 ifru_buffer;
120		short		ifru_flags[2];
121		short		ifru_index;
122		int		ifru_jid;
123		int		ifru_metric;
124		int		ifru_mtu;
125		int		ifru_phys;
126		int		ifru_media;
127		uint32_t	ifru_data;
128		int		ifru_cap[2];
129		u_int		ifru_fib;
130		u_char		ifru_vlan_pcp;
131	} ifr_ifru;
132};
133CTASSERT(sizeof(struct ifreq) == sizeof(struct ifreq32));
134CTASSERT(__offsetof(struct ifreq, ifr_ifru) ==
135    __offsetof(struct ifreq32, ifr_ifru));
136#endif
137
138union ifreq_union {
139	struct ifreq	ifr;
140#ifdef COMPAT_FREEBSD32
141	struct ifreq32	ifr32;
142#endif
143};
144
145SYSCTL_NODE(_net, PF_LINK, link, CTLFLAG_RW, 0, "Link layers");
146SYSCTL_NODE(_net_link, 0, generic, CTLFLAG_RW, 0, "Generic link-management");
147
148SYSCTL_INT(_net_link, OID_AUTO, ifqmaxlen, CTLFLAG_RDTUN,
149    &ifqmaxlen, 0, "max send queue size");
150
151/* Log link state change events */
152static int log_link_state_change = 1;
153
154SYSCTL_INT(_net_link, OID_AUTO, log_link_state_change, CTLFLAG_RW,
155	&log_link_state_change, 0,
156	"log interface link state change events");
157
158/* Log promiscuous mode change events */
159static int log_promisc_mode_change = 1;
160
161SYSCTL_INT(_net_link, OID_AUTO, log_promisc_mode_change, CTLFLAG_RDTUN,
162	&log_promisc_mode_change, 1,
163	"log promiscuous mode change events");
164
165/* Interface description */
166static unsigned int ifdescr_maxlen = 1024;
167SYSCTL_UINT(_net, OID_AUTO, ifdescr_maxlen, CTLFLAG_RW,
168	&ifdescr_maxlen, 0,
169	"administrative maximum length for interface description");
170
171static MALLOC_DEFINE(M_IFDESCR, "ifdescr", "ifnet descriptions");
172
173/* global sx for non-critical path ifdescr */
174static struct sx ifdescr_sx;
175SX_SYSINIT(ifdescr_sx, &ifdescr_sx, "ifnet descr");
176
177void	(*bridge_linkstate_p)(struct ifnet *ifp);
178void	(*ng_ether_link_state_p)(struct ifnet *ifp, int state);
179void	(*lagg_linkstate_p)(struct ifnet *ifp, int state);
180/* These are external hooks for CARP. */
181void	(*carp_linkstate_p)(struct ifnet *ifp);
182void	(*carp_demote_adj_p)(int, char *);
183int	(*carp_master_p)(struct ifaddr *);
184#if defined(INET) || defined(INET6)
185int	(*carp_forus_p)(struct ifnet *ifp, u_char *dhost);
186int	(*carp_output_p)(struct ifnet *ifp, struct mbuf *m,
187    const struct sockaddr *sa);
188int	(*carp_ioctl_p)(struct ifreq *, u_long, struct thread *);
189int	(*carp_attach_p)(struct ifaddr *, int);
190void	(*carp_detach_p)(struct ifaddr *);
191#endif
192#ifdef INET
193int	(*carp_iamatch_p)(struct ifaddr *, uint8_t **);
194#endif
195#ifdef INET6
196struct ifaddr *(*carp_iamatch6_p)(struct ifnet *ifp, struct in6_addr *taddr6);
197caddr_t	(*carp_macmatch6_p)(struct ifnet *ifp, struct mbuf *m,
198    const struct in6_addr *taddr);
199#endif
200
201struct mbuf *(*tbr_dequeue_ptr)(struct ifaltq *, int) = NULL;
202
203/*
204 * XXX: Style; these should be sorted alphabetically, and unprototyped
205 * static functions should be prototyped. Currently they are sorted by
206 * declaration order.
207 */
208static void	if_attachdomain(void *);
209static void	if_attachdomain1(struct ifnet *);
210static int	ifconf(u_long, caddr_t);
211static void	if_freemulti(struct ifmultiaddr *);
212static void	if_grow(void);
213static void	if_input_default(struct ifnet *, struct mbuf *);
214static int	if_requestencap_default(struct ifnet *, struct if_encap_req *);
215static void	if_route(struct ifnet *, int flag, int fam);
216static int	if_setflag(struct ifnet *, int, int, int *, int);
217static int	if_transmit(struct ifnet *ifp, struct mbuf *m);
218static void	if_unroute(struct ifnet *, int flag, int fam);
219static void	link_rtrequest(int, struct rtentry *, struct rt_addrinfo *);
220static int	ifhwioctl(u_long, struct ifnet *, caddr_t, struct thread *);
221static int	if_delmulti_locked(struct ifnet *, struct ifmultiaddr *, int);
222static void	do_link_state_change(void *, int);
223static int	if_getgroup(struct ifgroupreq *, struct ifnet *);
224static int	if_getgroupmembers(struct ifgroupreq *);
225static void	if_delgroups(struct ifnet *);
226static void	if_attach_internal(struct ifnet *, int, struct if_clone *);
227static int	if_detach_internal(struct ifnet *, int, struct if_clone **);
228#ifdef VIMAGE
229static void	if_vmove(struct ifnet *, struct vnet *);
230#endif
231
232#ifdef INET6
233/*
234 * XXX: declare here to avoid to include many inet6 related files..
235 * should be more generalized?
236 */
237extern void	nd6_setmtu(struct ifnet *);
238#endif
239
240/* ipsec helper hooks */
241VNET_DEFINE(struct hhook_head *, ipsec_hhh_in[HHOOK_IPSEC_COUNT]);
242VNET_DEFINE(struct hhook_head *, ipsec_hhh_out[HHOOK_IPSEC_COUNT]);
243
244VNET_DEFINE(int, if_index);
245int	ifqmaxlen = IFQ_MAXLEN;
246VNET_DEFINE(struct ifnethead, ifnet);	/* depend on static init XXX */
247VNET_DEFINE(struct ifgrouphead, ifg_head);
248
249static VNET_DEFINE(int, if_indexlim) = 8;
250
251/* Table of ifnet by index. */
252VNET_DEFINE(struct ifnet **, ifindex_table);
253
254#define	V_if_indexlim		VNET(if_indexlim)
255#define	V_ifindex_table		VNET(ifindex_table)
256
257/*
258 * The global network interface list (V_ifnet) and related state (such as
259 * if_index, if_indexlim, and ifindex_table) are protected by an sxlock and
260 * an rwlock.  Either may be acquired shared to stablize the list, but both
261 * must be acquired writable to modify the list.  This model allows us to
262 * both stablize the interface list during interrupt thread processing, but
263 * also to stablize it over long-running ioctls, without introducing priority
264 * inversions and deadlocks.
265 */
266struct rwlock ifnet_rwlock;
267RW_SYSINIT_FLAGS(ifnet_rw, &ifnet_rwlock, "ifnet_rw", RW_RECURSE);
268struct sx ifnet_sxlock;
269SX_SYSINIT_FLAGS(ifnet_sx, &ifnet_sxlock, "ifnet_sx", SX_RECURSE);
270
271/*
272 * The allocation of network interfaces is a rather non-atomic affair; we
273 * need to select an index before we are ready to expose the interface for
274 * use, so will use this pointer value to indicate reservation.
275 */
276#define	IFNET_HOLD	(void *)(uintptr_t)(-1)
277
278static	if_com_alloc_t *if_com_alloc[256];
279static	if_com_free_t *if_com_free[256];
280
281static MALLOC_DEFINE(M_IFNET, "ifnet", "interface internals");
282MALLOC_DEFINE(M_IFADDR, "ifaddr", "interface address");
283MALLOC_DEFINE(M_IFMADDR, "ether_multi", "link-level multicast address");
284
285struct ifnet *
286ifnet_byindex_locked(u_short idx)
287{
288
289	if (idx > V_if_index)
290		return (NULL);
291	if (V_ifindex_table[idx] == IFNET_HOLD)
292		return (NULL);
293	return (V_ifindex_table[idx]);
294}
295
296struct ifnet *
297ifnet_byindex(u_short idx)
298{
299	struct ifnet *ifp;
300
301	IFNET_RLOCK_NOSLEEP();
302	ifp = ifnet_byindex_locked(idx);
303	IFNET_RUNLOCK_NOSLEEP();
304	return (ifp);
305}
306
307struct ifnet *
308ifnet_byindex_ref(u_short idx)
309{
310	struct ifnet *ifp;
311
312	IFNET_RLOCK_NOSLEEP();
313	ifp = ifnet_byindex_locked(idx);
314	if (ifp == NULL || (ifp->if_flags & IFF_DYING)) {
315		IFNET_RUNLOCK_NOSLEEP();
316		return (NULL);
317	}
318	if_ref(ifp);
319	IFNET_RUNLOCK_NOSLEEP();
320	return (ifp);
321}
322
323/*
324 * Allocate an ifindex array entry; return 0 on success or an error on
325 * failure.
326 */
327static u_short
328ifindex_alloc(void)
329{
330	u_short idx;
331
332	IFNET_WLOCK_ASSERT();
333retry:
334	/*
335	 * Try to find an empty slot below V_if_index.  If we fail, take the
336	 * next slot.
337	 */
338	for (idx = 1; idx <= V_if_index; idx++) {
339		if (V_ifindex_table[idx] == NULL)
340			break;
341	}
342
343	/* Catch if_index overflow. */
344	if (idx >= V_if_indexlim) {
345		if_grow();
346		goto retry;
347	}
348	if (idx > V_if_index)
349		V_if_index = idx;
350	return (idx);
351}
352
353static void
354ifindex_free_locked(u_short idx)
355{
356
357	IFNET_WLOCK_ASSERT();
358
359	V_ifindex_table[idx] = NULL;
360	while (V_if_index > 0 &&
361	    V_ifindex_table[V_if_index] == NULL)
362		V_if_index--;
363}
364
365static void
366ifindex_free(u_short idx)
367{
368
369	IFNET_WLOCK();
370	ifindex_free_locked(idx);
371	IFNET_WUNLOCK();
372}
373
374static void
375ifnet_setbyindex_locked(u_short idx, struct ifnet *ifp)
376{
377
378	IFNET_WLOCK_ASSERT();
379
380	V_ifindex_table[idx] = ifp;
381}
382
383static void
384ifnet_setbyindex(u_short idx, struct ifnet *ifp)
385{
386
387	IFNET_WLOCK();
388	ifnet_setbyindex_locked(idx, ifp);
389	IFNET_WUNLOCK();
390}
391
392struct ifaddr *
393ifaddr_byindex(u_short idx)
394{
395	struct ifnet *ifp;
396	struct ifaddr *ifa = NULL;
397
398	IFNET_RLOCK_NOSLEEP();
399	ifp = ifnet_byindex_locked(idx);
400	if (ifp != NULL && (ifa = ifp->if_addr) != NULL)
401		ifa_ref(ifa);
402	IFNET_RUNLOCK_NOSLEEP();
403	return (ifa);
404}
405
406/*
407 * Network interface utility routines.
408 *
409 * Routines with ifa_ifwith* names take sockaddr *'s as
410 * parameters.
411 */
412
413static void
414vnet_if_init(const void *unused __unused)
415{
416
417	TAILQ_INIT(&V_ifnet);
418	TAILQ_INIT(&V_ifg_head);
419	IFNET_WLOCK();
420	if_grow();				/* create initial table */
421	IFNET_WUNLOCK();
422	vnet_if_clone_init();
423}
424VNET_SYSINIT(vnet_if_init, SI_SUB_INIT_IF, SI_ORDER_SECOND, vnet_if_init,
425    NULL);
426
427#ifdef VIMAGE
428static void
429vnet_if_uninit(const void *unused __unused)
430{
431
432	VNET_ASSERT(TAILQ_EMPTY(&V_ifnet), ("%s:%d tailq &V_ifnet=%p "
433	    "not empty", __func__, __LINE__, &V_ifnet));
434	VNET_ASSERT(TAILQ_EMPTY(&V_ifg_head), ("%s:%d tailq &V_ifg_head=%p "
435	    "not empty", __func__, __LINE__, &V_ifg_head));
436
437	free((caddr_t)V_ifindex_table, M_IFNET);
438}
439VNET_SYSUNINIT(vnet_if_uninit, SI_SUB_INIT_IF, SI_ORDER_FIRST,
440    vnet_if_uninit, NULL);
441
442static void
443vnet_if_return(const void *unused __unused)
444{
445	struct ifnet *ifp, *nifp;
446
447	/* Return all inherited interfaces to their parent vnets. */
448	TAILQ_FOREACH_SAFE(ifp, &V_ifnet, if_link, nifp) {
449		if (ifp->if_home_vnet != ifp->if_vnet)
450			if_vmove(ifp, ifp->if_home_vnet);
451	}
452}
453VNET_SYSUNINIT(vnet_if_return, SI_SUB_VNET_DONE, SI_ORDER_ANY,
454    vnet_if_return, NULL);
455#endif
456
457static void
458if_grow(void)
459{
460	int oldlim;
461	u_int n;
462	struct ifnet **e;
463
464	IFNET_WLOCK_ASSERT();
465	oldlim = V_if_indexlim;
466	IFNET_WUNLOCK();
467	n = (oldlim << 1) * sizeof(*e);
468	e = malloc(n, M_IFNET, M_WAITOK | M_ZERO);
469	IFNET_WLOCK();
470	if (V_if_indexlim != oldlim) {
471		free(e, M_IFNET);
472		return;
473	}
474	if (V_ifindex_table != NULL) {
475		memcpy((caddr_t)e, (caddr_t)V_ifindex_table, n/2);
476		free((caddr_t)V_ifindex_table, M_IFNET);
477	}
478	V_if_indexlim <<= 1;
479	V_ifindex_table = e;
480}
481
482/*
483 * Allocate a struct ifnet and an index for an interface.  A layer 2
484 * common structure will also be allocated if an allocation routine is
485 * registered for the passed type.
486 */
487struct ifnet *
488if_alloc(u_char type)
489{
490	struct ifnet *ifp;
491	u_short idx;
492
493	ifp = malloc(sizeof(struct ifnet), M_IFNET, M_WAITOK|M_ZERO);
494	IFNET_WLOCK();
495	idx = ifindex_alloc();
496	ifnet_setbyindex_locked(idx, IFNET_HOLD);
497	IFNET_WUNLOCK();
498	ifp->if_index = idx;
499	ifp->if_type = type;
500	ifp->if_alloctype = type;
501#ifdef VIMAGE
502	ifp->if_vnet = curvnet;
503#endif
504	if (if_com_alloc[type] != NULL) {
505		ifp->if_l2com = if_com_alloc[type](type, ifp);
506		if (ifp->if_l2com == NULL) {
507			free(ifp, M_IFNET);
508			ifindex_free(idx);
509			return (NULL);
510		}
511	}
512
513	IF_ADDR_LOCK_INIT(ifp);
514	TASK_INIT(&ifp->if_linktask, 0, do_link_state_change, ifp);
515	ifp->if_afdata_initialized = 0;
516	IF_AFDATA_LOCK_INIT(ifp);
517	TAILQ_INIT(&ifp->if_addrhead);
518	TAILQ_INIT(&ifp->if_multiaddrs);
519	TAILQ_INIT(&ifp->if_groups);
520#ifdef MAC
521	mac_ifnet_init(ifp);
522#endif
523	ifq_init(&ifp->if_snd, ifp);
524
525	refcount_init(&ifp->if_refcount, 1);	/* Index reference. */
526	for (int i = 0; i < IFCOUNTERS; i++)
527		ifp->if_counters[i] = counter_u64_alloc(M_WAITOK);
528	ifp->if_get_counter = if_get_counter_default;
529	ifnet_setbyindex(ifp->if_index, ifp);
530	return (ifp);
531}
532
533/*
534 * Do the actual work of freeing a struct ifnet, and layer 2 common
535 * structure.  This call is made when the last reference to an
536 * interface is released.
537 */
538static void
539if_free_internal(struct ifnet *ifp)
540{
541
542	KASSERT((ifp->if_flags & IFF_DYING),
543	    ("if_free_internal: interface not dying"));
544
545	if (if_com_free[ifp->if_alloctype] != NULL)
546		if_com_free[ifp->if_alloctype](ifp->if_l2com,
547		    ifp->if_alloctype);
548
549#ifdef MAC
550	mac_ifnet_destroy(ifp);
551#endif /* MAC */
552	if (ifp->if_description != NULL)
553		free(ifp->if_description, M_IFDESCR);
554	IF_AFDATA_DESTROY(ifp);
555	IF_ADDR_LOCK_DESTROY(ifp);
556	ifq_delete(&ifp->if_snd);
557
558	for (int i = 0; i < IFCOUNTERS; i++)
559		counter_u64_free(ifp->if_counters[i]);
560
561	free(ifp, M_IFNET);
562}
563
564/*
565 * Deregister an interface and free the associated storage.
566 */
567void
568if_free(struct ifnet *ifp)
569{
570
571	ifp->if_flags |= IFF_DYING;			/* XXX: Locking */
572
573	CURVNET_SET_QUIET(ifp->if_vnet);
574	IFNET_WLOCK();
575	KASSERT(ifp == ifnet_byindex_locked(ifp->if_index),
576	    ("%s: freeing unallocated ifnet", ifp->if_xname));
577
578	ifindex_free_locked(ifp->if_index);
579	IFNET_WUNLOCK();
580
581	if (refcount_release(&ifp->if_refcount))
582		if_free_internal(ifp);
583	CURVNET_RESTORE();
584}
585
586/*
587 * Interfaces to keep an ifnet type-stable despite the possibility of the
588 * driver calling if_free().  If there are additional references, we defer
589 * freeing the underlying data structure.
590 */
591void
592if_ref(struct ifnet *ifp)
593{
594
595	/* We don't assert the ifnet list lock here, but arguably should. */
596	refcount_acquire(&ifp->if_refcount);
597}
598
599void
600if_rele(struct ifnet *ifp)
601{
602
603	if (!refcount_release(&ifp->if_refcount))
604		return;
605	if_free_internal(ifp);
606}
607
608void
609ifq_init(struct ifaltq *ifq, struct ifnet *ifp)
610{
611
612	mtx_init(&ifq->ifq_mtx, ifp->if_xname, "if send queue", MTX_DEF);
613
614	if (ifq->ifq_maxlen == 0)
615		ifq->ifq_maxlen = ifqmaxlen;
616
617	ifq->altq_type = 0;
618	ifq->altq_disc = NULL;
619	ifq->altq_flags &= ALTQF_CANTCHANGE;
620	ifq->altq_tbr  = NULL;
621	ifq->altq_ifp  = ifp;
622}
623
624void
625ifq_delete(struct ifaltq *ifq)
626{
627	mtx_destroy(&ifq->ifq_mtx);
628}
629
630/*
631 * Perform generic interface initialization tasks and attach the interface
632 * to the list of "active" interfaces.  If vmove flag is set on entry
633 * to if_attach_internal(), perform only a limited subset of initialization
634 * tasks, given that we are moving from one vnet to another an ifnet which
635 * has already been fully initialized.
636 *
637 * Note that if_detach_internal() removes group membership unconditionally
638 * even when vmove flag is set, and if_attach_internal() adds only IFG_ALL.
639 * Thus, when if_vmove() is applied to a cloned interface, group membership
640 * is lost while a cloned one always joins a group whose name is
641 * ifc->ifc_name.  To recover this after if_detach_internal() and
642 * if_attach_internal(), the cloner should be specified to
643 * if_attach_internal() via ifc.  If it is non-NULL, if_attach_internal()
644 * attempts to join a group whose name is ifc->ifc_name.
645 *
646 * XXX:
647 *  - The decision to return void and thus require this function to
648 *    succeed is questionable.
649 *  - We should probably do more sanity checking.  For instance we don't
650 *    do anything to insure if_xname is unique or non-empty.
651 */
652void
653if_attach(struct ifnet *ifp)
654{
655
656	if_attach_internal(ifp, 0, NULL);
657}
658
659/*
660 * Compute the least common TSO limit.
661 */
662void
663if_hw_tsomax_common(if_t ifp, struct ifnet_hw_tsomax *pmax)
664{
665	/*
666	 * 1) If there is no limit currently, take the limit from
667	 * the network adapter.
668	 *
669	 * 2) If the network adapter has a limit below the current
670	 * limit, apply it.
671	 */
672	if (pmax->tsomaxbytes == 0 || (ifp->if_hw_tsomax != 0 &&
673	    ifp->if_hw_tsomax < pmax->tsomaxbytes)) {
674		pmax->tsomaxbytes = ifp->if_hw_tsomax;
675	}
676	if (pmax->tsomaxsegcount == 0 || (ifp->if_hw_tsomaxsegcount != 0 &&
677	    ifp->if_hw_tsomaxsegcount < pmax->tsomaxsegcount)) {
678		pmax->tsomaxsegcount = ifp->if_hw_tsomaxsegcount;
679	}
680	if (pmax->tsomaxsegsize == 0 || (ifp->if_hw_tsomaxsegsize != 0 &&
681	    ifp->if_hw_tsomaxsegsize < pmax->tsomaxsegsize)) {
682		pmax->tsomaxsegsize = ifp->if_hw_tsomaxsegsize;
683	}
684}
685
686/*
687 * Update TSO limit of a network adapter.
688 *
689 * Returns zero if no change. Else non-zero.
690 */
691int
692if_hw_tsomax_update(if_t ifp, struct ifnet_hw_tsomax *pmax)
693{
694	int retval = 0;
695	if (ifp->if_hw_tsomax != pmax->tsomaxbytes) {
696		ifp->if_hw_tsomax = pmax->tsomaxbytes;
697		retval++;
698	}
699	if (ifp->if_hw_tsomaxsegsize != pmax->tsomaxsegsize) {
700		ifp->if_hw_tsomaxsegsize = pmax->tsomaxsegsize;
701		retval++;
702	}
703	if (ifp->if_hw_tsomaxsegcount != pmax->tsomaxsegcount) {
704		ifp->if_hw_tsomaxsegcount = pmax->tsomaxsegcount;
705		retval++;
706	}
707	return (retval);
708}
709
710static void
711if_attach_internal(struct ifnet *ifp, int vmove, struct if_clone *ifc)
712{
713	unsigned socksize, ifasize;
714	int namelen, masklen;
715	struct sockaddr_dl *sdl;
716	struct ifaddr *ifa;
717
718	if (ifp->if_index == 0 || ifp != ifnet_byindex(ifp->if_index))
719		panic ("%s: BUG: if_attach called without if_alloc'd input()\n",
720		    ifp->if_xname);
721
722#ifdef VIMAGE
723	ifp->if_vnet = curvnet;
724	if (ifp->if_home_vnet == NULL)
725		ifp->if_home_vnet = curvnet;
726#endif
727
728	if_addgroup(ifp, IFG_ALL);
729
730	/* Restore group membership for cloned interfaces. */
731	if (vmove && ifc != NULL)
732		if_clone_addgroup(ifp, ifc);
733
734	getmicrotime(&ifp->if_lastchange);
735	ifp->if_epoch = time_uptime;
736
737	KASSERT((ifp->if_transmit == NULL && ifp->if_qflush == NULL) ||
738	    (ifp->if_transmit != NULL && ifp->if_qflush != NULL),
739	    ("transmit and qflush must both either be set or both be NULL"));
740	if (ifp->if_transmit == NULL) {
741		ifp->if_transmit = if_transmit;
742		ifp->if_qflush = if_qflush;
743	}
744	if (ifp->if_input == NULL)
745		ifp->if_input = if_input_default;
746
747	if (ifp->if_requestencap == NULL)
748		ifp->if_requestencap = if_requestencap_default;
749
750	if (!vmove) {
751#ifdef MAC
752		mac_ifnet_create(ifp);
753#endif
754
755		/*
756		 * Create a Link Level name for this device.
757		 */
758		namelen = strlen(ifp->if_xname);
759		/*
760		 * Always save enough space for any possiable name so we
761		 * can do a rename in place later.
762		 */
763		masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + IFNAMSIZ;
764		socksize = masklen + ifp->if_addrlen;
765		if (socksize < sizeof(*sdl))
766			socksize = sizeof(*sdl);
767		socksize = roundup2(socksize, sizeof(long));
768		ifasize = sizeof(*ifa) + 2 * socksize;
769		ifa = ifa_alloc(ifasize, M_WAITOK);
770		sdl = (struct sockaddr_dl *)(ifa + 1);
771		sdl->sdl_len = socksize;
772		sdl->sdl_family = AF_LINK;
773		bcopy(ifp->if_xname, sdl->sdl_data, namelen);
774		sdl->sdl_nlen = namelen;
775		sdl->sdl_index = ifp->if_index;
776		sdl->sdl_type = ifp->if_type;
777		ifp->if_addr = ifa;
778		ifa->ifa_ifp = ifp;
779		ifa->ifa_rtrequest = link_rtrequest;
780		ifa->ifa_addr = (struct sockaddr *)sdl;
781		sdl = (struct sockaddr_dl *)(socksize + (caddr_t)sdl);
782		ifa->ifa_netmask = (struct sockaddr *)sdl;
783		sdl->sdl_len = masklen;
784		while (namelen != 0)
785			sdl->sdl_data[--namelen] = 0xff;
786		TAILQ_INSERT_HEAD(&ifp->if_addrhead, ifa, ifa_link);
787		/* Reliably crash if used uninitialized. */
788		ifp->if_broadcastaddr = NULL;
789
790		if (ifp->if_type == IFT_ETHER) {
791			ifp->if_hw_addr = malloc(ifp->if_addrlen, M_IFADDR,
792			    M_WAITOK | M_ZERO);
793		}
794
795#if defined(INET) || defined(INET6)
796		/* Use defaults for TSO, if nothing is set */
797		if (ifp->if_hw_tsomax == 0 &&
798		    ifp->if_hw_tsomaxsegcount == 0 &&
799		    ifp->if_hw_tsomaxsegsize == 0) {
800			/*
801			 * The TSO defaults needs to be such that an
802			 * NFS mbuf list of 35 mbufs totalling just
803			 * below 64K works and that a chain of mbufs
804			 * can be defragged into at most 32 segments:
805			 */
806			ifp->if_hw_tsomax = min(IP_MAXPACKET, (32 * MCLBYTES) -
807			    (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN));
808			ifp->if_hw_tsomaxsegcount = 35;
809			ifp->if_hw_tsomaxsegsize = 2048;	/* 2K */
810
811			/* XXX some drivers set IFCAP_TSO after ethernet attach */
812			if (ifp->if_capabilities & IFCAP_TSO) {
813				if_printf(ifp, "Using defaults for TSO: %u/%u/%u\n",
814				    ifp->if_hw_tsomax,
815				    ifp->if_hw_tsomaxsegcount,
816				    ifp->if_hw_tsomaxsegsize);
817			}
818		}
819#endif
820	}
821#ifdef VIMAGE
822	else {
823		/*
824		 * Update the interface index in the link layer address
825		 * of the interface.
826		 */
827		for (ifa = ifp->if_addr; ifa != NULL;
828		    ifa = TAILQ_NEXT(ifa, ifa_link)) {
829			if (ifa->ifa_addr->sa_family == AF_LINK) {
830				sdl = (struct sockaddr_dl *)ifa->ifa_addr;
831				sdl->sdl_index = ifp->if_index;
832			}
833		}
834	}
835#endif
836
837	IFNET_WLOCK();
838	TAILQ_INSERT_TAIL(&V_ifnet, ifp, if_link);
839#ifdef VIMAGE
840	curvnet->vnet_ifcnt++;
841#endif
842	IFNET_WUNLOCK();
843
844	if (domain_init_status >= 2)
845		if_attachdomain1(ifp);
846
847	EVENTHANDLER_INVOKE(ifnet_arrival_event, ifp);
848	if (IS_DEFAULT_VNET(curvnet))
849		devctl_notify("IFNET", ifp->if_xname, "ATTACH", NULL);
850
851	/* Announce the interface. */
852	rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
853}
854
855static void
856if_attachdomain(void *dummy)
857{
858	struct ifnet *ifp;
859
860	TAILQ_FOREACH(ifp, &V_ifnet, if_link)
861		if_attachdomain1(ifp);
862}
863SYSINIT(domainifattach, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_SECOND,
864    if_attachdomain, NULL);
865
866static void
867if_attachdomain1(struct ifnet *ifp)
868{
869	struct domain *dp;
870
871	/*
872	 * Since dp->dom_ifattach calls malloc() with M_WAITOK, we
873	 * cannot lock ifp->if_afdata initialization, entirely.
874	 */
875	IF_AFDATA_LOCK(ifp);
876	if (ifp->if_afdata_initialized >= domain_init_status) {
877		IF_AFDATA_UNLOCK(ifp);
878		log(LOG_WARNING, "%s called more than once on %s\n",
879		    __func__, ifp->if_xname);
880		return;
881	}
882	ifp->if_afdata_initialized = domain_init_status;
883	IF_AFDATA_UNLOCK(ifp);
884
885	/* address family dependent data region */
886	bzero(ifp->if_afdata, sizeof(ifp->if_afdata));
887	for (dp = domains; dp; dp = dp->dom_next) {
888		if (dp->dom_ifattach)
889			ifp->if_afdata[dp->dom_family] =
890			    (*dp->dom_ifattach)(ifp);
891	}
892}
893
894/*
895 * Remove any unicast or broadcast network addresses from an interface.
896 */
897void
898if_purgeaddrs(struct ifnet *ifp)
899{
900	struct ifaddr *ifa, *next;
901
902	/* XXX cannot hold IF_ADDR_WLOCK over called functions. */
903	TAILQ_FOREACH_SAFE(ifa, &ifp->if_addrhead, ifa_link, next) {
904		if (ifa->ifa_addr->sa_family == AF_LINK)
905			continue;
906#ifdef INET
907		/* XXX: Ugly!! ad hoc just for INET */
908		if (ifa->ifa_addr->sa_family == AF_INET) {
909			struct ifaliasreq ifr;
910
911			bzero(&ifr, sizeof(ifr));
912			ifr.ifra_addr = *ifa->ifa_addr;
913			if (ifa->ifa_dstaddr)
914				ifr.ifra_broadaddr = *ifa->ifa_dstaddr;
915			if (in_control(NULL, SIOCDIFADDR, (caddr_t)&ifr, ifp,
916			    NULL) == 0)
917				continue;
918		}
919#endif /* INET */
920#ifdef INET6
921		if (ifa->ifa_addr->sa_family == AF_INET6) {
922			in6_purgeaddr(ifa);
923			/* ifp_addrhead is already updated */
924			continue;
925		}
926#endif /* INET6 */
927		IF_ADDR_WLOCK(ifp);
928		TAILQ_REMOVE(&ifp->if_addrhead, ifa, ifa_link);
929		IF_ADDR_WUNLOCK(ifp);
930		ifa_free(ifa);
931	}
932}
933
934/*
935 * Remove any multicast network addresses from an interface when an ifnet
936 * is going away.
937 */
938static void
939if_purgemaddrs(struct ifnet *ifp)
940{
941	struct ifmultiaddr *ifma;
942	struct ifmultiaddr *next;
943
944	IF_ADDR_WLOCK(ifp);
945	TAILQ_FOREACH_SAFE(ifma, &ifp->if_multiaddrs, ifma_link, next)
946		if_delmulti_locked(ifp, ifma, 1);
947	IF_ADDR_WUNLOCK(ifp);
948}
949
950/*
951 * Detach an interface, removing it from the list of "active" interfaces.
952 * If vmove flag is set on entry to if_detach_internal(), perform only a
953 * limited subset of cleanup tasks, given that we are moving an ifnet from
954 * one vnet to another, where it must be fully operational.
955 *
956 * XXXRW: There are some significant questions about event ordering, and
957 * how to prevent things from starting to use the interface during detach.
958 */
959void
960if_detach(struct ifnet *ifp)
961{
962
963	CURVNET_SET_QUIET(ifp->if_vnet);
964	if_detach_internal(ifp, 0, NULL);
965	CURVNET_RESTORE();
966}
967
968/*
969 * The vmove flag, if set, indicates that we are called from a callpath
970 * that is moving an interface to a different vnet instance.
971 *
972 * The shutdown flag, if set, indicates that we are called in the
973 * process of shutting down a vnet instance.  Currently only the
974 * vnet_if_return SYSUNINIT function sets it.  Note: we can be called
975 * on a vnet instance shutdown without this flag being set, e.g., when
976 * the cloned interfaces are destoyed as first thing of teardown.
977 */
978static int
979if_detach_internal(struct ifnet *ifp, int vmove, struct if_clone **ifcp)
980{
981	struct ifaddr *ifa;
982	int i;
983	struct domain *dp;
984 	struct ifnet *iter;
985 	int found = 0;
986#ifdef VIMAGE
987	int shutdown;
988
989	shutdown = (ifp->if_vnet->vnet_state > SI_SUB_VNET &&
990		 ifp->if_vnet->vnet_state < SI_SUB_VNET_DONE) ? 1 : 0;
991#endif
992	IFNET_WLOCK();
993	TAILQ_FOREACH(iter, &V_ifnet, if_link)
994		if (iter == ifp) {
995			TAILQ_REMOVE(&V_ifnet, ifp, if_link);
996			found = 1;
997			break;
998		}
999	IFNET_WUNLOCK();
1000	if (!found) {
1001		/*
1002		 * While we would want to panic here, we cannot
1003		 * guarantee that the interface is indeed still on
1004		 * the list given we don't hold locks all the way.
1005		 */
1006		return (ENOENT);
1007#if 0
1008		if (vmove)
1009			panic("%s: ifp=%p not on the ifnet tailq %p",
1010			    __func__, ifp, &V_ifnet);
1011		else
1012			return; /* XXX this should panic as well? */
1013#endif
1014	}
1015
1016	/*
1017	 * At this point we know the interface still was on the ifnet list
1018	 * and we removed it so we are in a stable state.
1019	 */
1020#ifdef VIMAGE
1021	curvnet->vnet_ifcnt--;
1022#endif
1023
1024	/*
1025	 * In any case (destroy or vmove) detach us from the groups
1026	 * and remove/wait for pending events on the taskq.
1027	 * XXX-BZ in theory an interface could still enqueue a taskq change?
1028	 */
1029	if_delgroups(ifp);
1030
1031	taskqueue_drain(taskqueue_swi, &ifp->if_linktask);
1032
1033	/*
1034	 * Check if this is a cloned interface or not. Must do even if
1035	 * shutting down as a if_vmove_reclaim() would move the ifp and
1036	 * the if_clone_addgroup() will have a corrupted string overwise
1037	 * from a gibberish pointer.
1038	 */
1039	if (vmove && ifcp != NULL)
1040		*ifcp = if_clone_findifc(ifp);
1041
1042	if_down(ifp);
1043
1044#ifdef VIMAGE
1045	/*
1046	 * On VNET shutdown abort here as the stack teardown will do all
1047	 * the work top-down for us.
1048	 */
1049	if (shutdown) {
1050		/*
1051		 * In case of a vmove we are done here without error.
1052		 * If we would signal an error it would lead to the same
1053		 * abort as if we did not find the ifnet anymore.
1054		 * if_detach() calls us in void context and does not care
1055		 * about an early abort notification, so life is splendid :)
1056		 */
1057		goto finish_vnet_shutdown;
1058	}
1059#endif
1060
1061	/*
1062	 * At this point we are not tearing down a VNET and are either
1063	 * going to destroy or vmove the interface and have to cleanup
1064	 * accordingly.
1065	 */
1066
1067	/*
1068	 * Remove routes and flush queues.
1069	 */
1070#ifdef ALTQ
1071	if (ALTQ_IS_ENABLED(&ifp->if_snd))
1072		altq_disable(&ifp->if_snd);
1073	if (ALTQ_IS_ATTACHED(&ifp->if_snd))
1074		altq_detach(&ifp->if_snd);
1075#endif
1076
1077	if_purgeaddrs(ifp);
1078
1079#ifdef INET
1080	in_ifdetach(ifp);
1081#endif
1082
1083#ifdef INET6
1084	/*
1085	 * Remove all IPv6 kernel structs related to ifp.  This should be done
1086	 * before removing routing entries below, since IPv6 interface direct
1087	 * routes are expected to be removed by the IPv6-specific kernel API.
1088	 * Otherwise, the kernel will detect some inconsistency and bark it.
1089	 */
1090	in6_ifdetach(ifp);
1091#endif
1092	if_purgemaddrs(ifp);
1093
1094	/* Announce that the interface is gone. */
1095	rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
1096	EVENTHANDLER_INVOKE(ifnet_departure_event, ifp);
1097	if (IS_DEFAULT_VNET(curvnet))
1098		devctl_notify("IFNET", ifp->if_xname, "DETACH", NULL);
1099
1100	if (!vmove) {
1101		/*
1102		 * Prevent further calls into the device driver via ifnet.
1103		 */
1104		if_dead(ifp);
1105
1106		/*
1107		 * Remove link ifaddr pointer and maybe decrement if_index.
1108		 * Clean up all addresses.
1109		 */
1110		free(ifp->if_hw_addr, M_IFADDR);
1111		ifp->if_hw_addr = NULL;
1112		ifp->if_addr = NULL;
1113
1114		/* We can now free link ifaddr. */
1115		IF_ADDR_WLOCK(ifp);
1116		if (!TAILQ_EMPTY(&ifp->if_addrhead)) {
1117			ifa = TAILQ_FIRST(&ifp->if_addrhead);
1118			TAILQ_REMOVE(&ifp->if_addrhead, ifa, ifa_link);
1119			IF_ADDR_WUNLOCK(ifp);
1120			ifa_free(ifa);
1121		} else
1122			IF_ADDR_WUNLOCK(ifp);
1123	}
1124
1125	rt_flushifroutes(ifp);
1126
1127#ifdef VIMAGE
1128finish_vnet_shutdown:
1129#endif
1130	/*
1131	 * We cannot hold the lock over dom_ifdetach calls as they might
1132	 * sleep, for example trying to drain a callout, thus open up the
1133	 * theoretical race with re-attaching.
1134	 */
1135	IF_AFDATA_LOCK(ifp);
1136	i = ifp->if_afdata_initialized;
1137	ifp->if_afdata_initialized = 0;
1138	IF_AFDATA_UNLOCK(ifp);
1139	for (dp = domains; i > 0 && dp; dp = dp->dom_next) {
1140		if (dp->dom_ifdetach && ifp->if_afdata[dp->dom_family]) {
1141			(*dp->dom_ifdetach)(ifp,
1142			    ifp->if_afdata[dp->dom_family]);
1143			ifp->if_afdata[dp->dom_family] = NULL;
1144		}
1145	}
1146
1147	return (0);
1148}
1149
1150#ifdef VIMAGE
1151/*
1152 * if_vmove() performs a limited version of if_detach() in current
1153 * vnet and if_attach()es the ifnet to the vnet specified as 2nd arg.
1154 * An attempt is made to shrink if_index in current vnet, find an
1155 * unused if_index in target vnet and calls if_grow() if necessary,
1156 * and finally find an unused if_xname for the target vnet.
1157 */
1158static void
1159if_vmove(struct ifnet *ifp, struct vnet *new_vnet)
1160{
1161	struct if_clone *ifc;
1162	u_int bif_dlt, bif_hdrlen;
1163	int rc;
1164
1165 	/*
1166	 * if_detach_internal() will call the eventhandler to notify
1167	 * interface departure.  That will detach if_bpf.  We need to
1168	 * safe the dlt and hdrlen so we can re-attach it later.
1169	 */
1170	bpf_get_bp_params(ifp->if_bpf, &bif_dlt, &bif_hdrlen);
1171
1172	/*
1173	 * Detach from current vnet, but preserve LLADDR info, do not
1174	 * mark as dead etc. so that the ifnet can be reattached later.
1175	 * If we cannot find it, we lost the race to someone else.
1176	 */
1177	rc = if_detach_internal(ifp, 1, &ifc);
1178	if (rc != 0)
1179		return;
1180
1181	/*
1182	 * Unlink the ifnet from ifindex_table[] in current vnet, and shrink
1183	 * the if_index for that vnet if possible.
1184	 *
1185	 * NOTE: IFNET_WLOCK/IFNET_WUNLOCK() are assumed to be unvirtualized,
1186	 * or we'd lock on one vnet and unlock on another.
1187	 */
1188	IFNET_WLOCK();
1189	ifindex_free_locked(ifp->if_index);
1190	IFNET_WUNLOCK();
1191
1192	/*
1193	 * Perform interface-specific reassignment tasks, if provided by
1194	 * the driver.
1195	 */
1196	if (ifp->if_reassign != NULL)
1197		ifp->if_reassign(ifp, new_vnet, NULL);
1198
1199	/*
1200	 * Switch to the context of the target vnet.
1201	 */
1202	CURVNET_SET_QUIET(new_vnet);
1203
1204	IFNET_WLOCK();
1205	ifp->if_index = ifindex_alloc();
1206	ifnet_setbyindex_locked(ifp->if_index, ifp);
1207	IFNET_WUNLOCK();
1208
1209	if_attach_internal(ifp, 1, ifc);
1210
1211	if (ifp->if_bpf == NULL)
1212		bpfattach(ifp, bif_dlt, bif_hdrlen);
1213
1214	CURVNET_RESTORE();
1215}
1216
1217/*
1218 * Move an ifnet to or from another child prison/vnet, specified by the jail id.
1219 */
1220static int
1221if_vmove_loan(struct thread *td, struct ifnet *ifp, char *ifname, int jid)
1222{
1223	struct prison *pr;
1224	struct ifnet *difp;
1225	int shutdown;
1226
1227	/* Try to find the prison within our visibility. */
1228	sx_slock(&allprison_lock);
1229	pr = prison_find_child(td->td_ucred->cr_prison, jid);
1230	sx_sunlock(&allprison_lock);
1231	if (pr == NULL)
1232		return (ENXIO);
1233	prison_hold_locked(pr);
1234	mtx_unlock(&pr->pr_mtx);
1235
1236	/* Do not try to move the iface from and to the same prison. */
1237	if (pr->pr_vnet == ifp->if_vnet) {
1238		prison_free(pr);
1239		return (EEXIST);
1240	}
1241
1242	/* Make sure the named iface does not exists in the dst. prison/vnet. */
1243	/* XXX Lock interfaces to avoid races. */
1244	CURVNET_SET_QUIET(pr->pr_vnet);
1245	difp = ifunit(ifname);
1246	if (difp != NULL) {
1247		CURVNET_RESTORE();
1248		prison_free(pr);
1249		return (EEXIST);
1250	}
1251
1252	/* Make sure the VNET is stable. */
1253	shutdown = (ifp->if_vnet->vnet_state > SI_SUB_VNET &&
1254		 ifp->if_vnet->vnet_state < SI_SUB_VNET_DONE) ? 1 : 0;
1255	if (shutdown) {
1256		CURVNET_RESTORE();
1257		prison_free(pr);
1258		return (EBUSY);
1259	}
1260	CURVNET_RESTORE();
1261
1262	/* Move the interface into the child jail/vnet. */
1263	if_vmove(ifp, pr->pr_vnet);
1264
1265	/* Report the new if_xname back to the userland. */
1266	sprintf(ifname, "%s", ifp->if_xname);
1267
1268	prison_free(pr);
1269	return (0);
1270}
1271
1272static int
1273if_vmove_reclaim(struct thread *td, char *ifname, int jid)
1274{
1275	struct prison *pr;
1276	struct vnet *vnet_dst;
1277	struct ifnet *ifp;
1278 	int shutdown;
1279
1280	/* Try to find the prison within our visibility. */
1281	sx_slock(&allprison_lock);
1282	pr = prison_find_child(td->td_ucred->cr_prison, jid);
1283	sx_sunlock(&allprison_lock);
1284	if (pr == NULL)
1285		return (ENXIO);
1286	prison_hold_locked(pr);
1287	mtx_unlock(&pr->pr_mtx);
1288
1289	/* Make sure the named iface exists in the source prison/vnet. */
1290	CURVNET_SET(pr->pr_vnet);
1291	ifp = ifunit(ifname);		/* XXX Lock to avoid races. */
1292	if (ifp == NULL) {
1293		CURVNET_RESTORE();
1294		prison_free(pr);
1295		return (ENXIO);
1296	}
1297
1298	/* Do not try to move the iface from and to the same prison. */
1299	vnet_dst = TD_TO_VNET(td);
1300	if (vnet_dst == ifp->if_vnet) {
1301		CURVNET_RESTORE();
1302		prison_free(pr);
1303		return (EEXIST);
1304	}
1305
1306	/* Make sure the VNET is stable. */
1307	shutdown = (ifp->if_vnet->vnet_state > SI_SUB_VNET &&
1308		 ifp->if_vnet->vnet_state < SI_SUB_VNET_DONE) ? 1 : 0;
1309	if (shutdown) {
1310		CURVNET_RESTORE();
1311		prison_free(pr);
1312		return (EBUSY);
1313	}
1314
1315	/* Get interface back from child jail/vnet. */
1316	if_vmove(ifp, vnet_dst);
1317	CURVNET_RESTORE();
1318
1319	/* Report the new if_xname back to the userland. */
1320	sprintf(ifname, "%s", ifp->if_xname);
1321
1322	prison_free(pr);
1323	return (0);
1324}
1325#endif /* VIMAGE */
1326
1327/*
1328 * Add a group to an interface
1329 */
1330int
1331if_addgroup(struct ifnet *ifp, const char *groupname)
1332{
1333	struct ifg_list		*ifgl;
1334	struct ifg_group	*ifg = NULL;
1335	struct ifg_member	*ifgm;
1336	int 			 new = 0;
1337
1338	if (groupname[0] && groupname[strlen(groupname) - 1] >= '0' &&
1339	    groupname[strlen(groupname) - 1] <= '9')
1340		return (EINVAL);
1341
1342	IFNET_WLOCK();
1343	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
1344		if (!strcmp(ifgl->ifgl_group->ifg_group, groupname)) {
1345			IFNET_WUNLOCK();
1346			return (EEXIST);
1347		}
1348
1349	if ((ifgl = (struct ifg_list *)malloc(sizeof(struct ifg_list), M_TEMP,
1350	    M_NOWAIT)) == NULL) {
1351	    	IFNET_WUNLOCK();
1352		return (ENOMEM);
1353	}
1354
1355	if ((ifgm = (struct ifg_member *)malloc(sizeof(struct ifg_member),
1356	    M_TEMP, M_NOWAIT)) == NULL) {
1357		free(ifgl, M_TEMP);
1358		IFNET_WUNLOCK();
1359		return (ENOMEM);
1360	}
1361
1362	TAILQ_FOREACH(ifg, &V_ifg_head, ifg_next)
1363		if (!strcmp(ifg->ifg_group, groupname))
1364			break;
1365
1366	if (ifg == NULL) {
1367		if ((ifg = (struct ifg_group *)malloc(sizeof(struct ifg_group),
1368		    M_TEMP, M_NOWAIT)) == NULL) {
1369			free(ifgl, M_TEMP);
1370			free(ifgm, M_TEMP);
1371			IFNET_WUNLOCK();
1372			return (ENOMEM);
1373		}
1374		strlcpy(ifg->ifg_group, groupname, sizeof(ifg->ifg_group));
1375		ifg->ifg_refcnt = 0;
1376		TAILQ_INIT(&ifg->ifg_members);
1377		TAILQ_INSERT_TAIL(&V_ifg_head, ifg, ifg_next);
1378		new = 1;
1379	}
1380
1381	ifg->ifg_refcnt++;
1382	ifgl->ifgl_group = ifg;
1383	ifgm->ifgm_ifp = ifp;
1384
1385	IF_ADDR_WLOCK(ifp);
1386	TAILQ_INSERT_TAIL(&ifg->ifg_members, ifgm, ifgm_next);
1387	TAILQ_INSERT_TAIL(&ifp->if_groups, ifgl, ifgl_next);
1388	IF_ADDR_WUNLOCK(ifp);
1389
1390	IFNET_WUNLOCK();
1391
1392	if (new)
1393		EVENTHANDLER_INVOKE(group_attach_event, ifg);
1394	EVENTHANDLER_INVOKE(group_change_event, groupname);
1395
1396	return (0);
1397}
1398
1399/*
1400 * Remove a group from an interface
1401 */
1402int
1403if_delgroup(struct ifnet *ifp, const char *groupname)
1404{
1405	struct ifg_list		*ifgl;
1406	struct ifg_member	*ifgm;
1407
1408	IFNET_WLOCK();
1409	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
1410		if (!strcmp(ifgl->ifgl_group->ifg_group, groupname))
1411			break;
1412	if (ifgl == NULL) {
1413		IFNET_WUNLOCK();
1414		return (ENOENT);
1415	}
1416
1417	IF_ADDR_WLOCK(ifp);
1418	TAILQ_REMOVE(&ifp->if_groups, ifgl, ifgl_next);
1419	IF_ADDR_WUNLOCK(ifp);
1420
1421	TAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next)
1422		if (ifgm->ifgm_ifp == ifp)
1423			break;
1424
1425	if (ifgm != NULL) {
1426		TAILQ_REMOVE(&ifgl->ifgl_group->ifg_members, ifgm, ifgm_next);
1427		free(ifgm, M_TEMP);
1428	}
1429
1430	if (--ifgl->ifgl_group->ifg_refcnt == 0) {
1431		TAILQ_REMOVE(&V_ifg_head, ifgl->ifgl_group, ifg_next);
1432		IFNET_WUNLOCK();
1433		EVENTHANDLER_INVOKE(group_detach_event, ifgl->ifgl_group);
1434		free(ifgl->ifgl_group, M_TEMP);
1435	} else
1436		IFNET_WUNLOCK();
1437
1438	free(ifgl, M_TEMP);
1439
1440	EVENTHANDLER_INVOKE(group_change_event, groupname);
1441
1442	return (0);
1443}
1444
1445/*
1446 * Remove an interface from all groups
1447 */
1448static void
1449if_delgroups(struct ifnet *ifp)
1450{
1451	struct ifg_list		*ifgl;
1452	struct ifg_member	*ifgm;
1453	char groupname[IFNAMSIZ];
1454
1455	IFNET_WLOCK();
1456	while (!TAILQ_EMPTY(&ifp->if_groups)) {
1457		ifgl = TAILQ_FIRST(&ifp->if_groups);
1458
1459		strlcpy(groupname, ifgl->ifgl_group->ifg_group, IFNAMSIZ);
1460
1461		IF_ADDR_WLOCK(ifp);
1462		TAILQ_REMOVE(&ifp->if_groups, ifgl, ifgl_next);
1463		IF_ADDR_WUNLOCK(ifp);
1464
1465		TAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next)
1466			if (ifgm->ifgm_ifp == ifp)
1467				break;
1468
1469		if (ifgm != NULL) {
1470			TAILQ_REMOVE(&ifgl->ifgl_group->ifg_members, ifgm,
1471			    ifgm_next);
1472			free(ifgm, M_TEMP);
1473		}
1474
1475		if (--ifgl->ifgl_group->ifg_refcnt == 0) {
1476			TAILQ_REMOVE(&V_ifg_head, ifgl->ifgl_group, ifg_next);
1477			IFNET_WUNLOCK();
1478			EVENTHANDLER_INVOKE(group_detach_event,
1479			    ifgl->ifgl_group);
1480			free(ifgl->ifgl_group, M_TEMP);
1481		} else
1482			IFNET_WUNLOCK();
1483
1484		free(ifgl, M_TEMP);
1485
1486		EVENTHANDLER_INVOKE(group_change_event, groupname);
1487
1488		IFNET_WLOCK();
1489	}
1490	IFNET_WUNLOCK();
1491}
1492
1493/*
1494 * Stores all groups from an interface in memory pointed
1495 * to by data
1496 */
1497static int
1498if_getgroup(struct ifgroupreq *data, struct ifnet *ifp)
1499{
1500	int			 len, error;
1501	struct ifg_list		*ifgl;
1502	struct ifg_req		 ifgrq, *ifgp;
1503	struct ifgroupreq	*ifgr = data;
1504
1505	if (ifgr->ifgr_len == 0) {
1506		IF_ADDR_RLOCK(ifp);
1507		TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
1508			ifgr->ifgr_len += sizeof(struct ifg_req);
1509		IF_ADDR_RUNLOCK(ifp);
1510		return (0);
1511	}
1512
1513	len = ifgr->ifgr_len;
1514	ifgp = ifgr->ifgr_groups;
1515	/* XXX: wire */
1516	IF_ADDR_RLOCK(ifp);
1517	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) {
1518		if (len < sizeof(ifgrq)) {
1519			IF_ADDR_RUNLOCK(ifp);
1520			return (EINVAL);
1521		}
1522		bzero(&ifgrq, sizeof ifgrq);
1523		strlcpy(ifgrq.ifgrq_group, ifgl->ifgl_group->ifg_group,
1524		    sizeof(ifgrq.ifgrq_group));
1525		if ((error = copyout(&ifgrq, ifgp, sizeof(struct ifg_req)))) {
1526		    	IF_ADDR_RUNLOCK(ifp);
1527			return (error);
1528		}
1529		len -= sizeof(ifgrq);
1530		ifgp++;
1531	}
1532	IF_ADDR_RUNLOCK(ifp);
1533
1534	return (0);
1535}
1536
1537/*
1538 * Stores all members of a group in memory pointed to by data
1539 */
1540static int
1541if_getgroupmembers(struct ifgroupreq *data)
1542{
1543	struct ifgroupreq	*ifgr = data;
1544	struct ifg_group	*ifg;
1545	struct ifg_member	*ifgm;
1546	struct ifg_req		 ifgrq, *ifgp;
1547	int			 len, error;
1548
1549	IFNET_RLOCK();
1550	TAILQ_FOREACH(ifg, &V_ifg_head, ifg_next)
1551		if (!strcmp(ifg->ifg_group, ifgr->ifgr_name))
1552			break;
1553	if (ifg == NULL) {
1554		IFNET_RUNLOCK();
1555		return (ENOENT);
1556	}
1557
1558	if (ifgr->ifgr_len == 0) {
1559		TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next)
1560			ifgr->ifgr_len += sizeof(ifgrq);
1561		IFNET_RUNLOCK();
1562		return (0);
1563	}
1564
1565	len = ifgr->ifgr_len;
1566	ifgp = ifgr->ifgr_groups;
1567	TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) {
1568		if (len < sizeof(ifgrq)) {
1569			IFNET_RUNLOCK();
1570			return (EINVAL);
1571		}
1572		bzero(&ifgrq, sizeof ifgrq);
1573		strlcpy(ifgrq.ifgrq_member, ifgm->ifgm_ifp->if_xname,
1574		    sizeof(ifgrq.ifgrq_member));
1575		if ((error = copyout(&ifgrq, ifgp, sizeof(struct ifg_req)))) {
1576			IFNET_RUNLOCK();
1577			return (error);
1578		}
1579		len -= sizeof(ifgrq);
1580		ifgp++;
1581	}
1582	IFNET_RUNLOCK();
1583
1584	return (0);
1585}
1586
1587/*
1588 * Return counter values from counter(9)s stored in ifnet.
1589 */
1590uint64_t
1591if_get_counter_default(struct ifnet *ifp, ift_counter cnt)
1592{
1593
1594	KASSERT(cnt < IFCOUNTERS, ("%s: invalid cnt %d", __func__, cnt));
1595
1596	return (counter_u64_fetch(ifp->if_counters[cnt]));
1597}
1598
1599/*
1600 * Increase an ifnet counter. Usually used for counters shared
1601 * between the stack and a driver, but function supports them all.
1602 */
1603void
1604if_inc_counter(struct ifnet *ifp, ift_counter cnt, int64_t inc)
1605{
1606
1607	KASSERT(cnt < IFCOUNTERS, ("%s: invalid cnt %d", __func__, cnt));
1608
1609	counter_u64_add(ifp->if_counters[cnt], inc);
1610}
1611
1612/*
1613 * Copy data from ifnet to userland API structure if_data.
1614 */
1615void
1616if_data_copy(struct ifnet *ifp, struct if_data *ifd)
1617{
1618
1619	ifd->ifi_type = ifp->if_type;
1620	ifd->ifi_physical = 0;
1621	ifd->ifi_addrlen = ifp->if_addrlen;
1622	ifd->ifi_hdrlen = ifp->if_hdrlen;
1623	ifd->ifi_link_state = ifp->if_link_state;
1624	ifd->ifi_vhid = 0;
1625	ifd->ifi_datalen = sizeof(struct if_data);
1626	ifd->ifi_mtu = ifp->if_mtu;
1627	ifd->ifi_metric = ifp->if_metric;
1628	ifd->ifi_baudrate = ifp->if_baudrate;
1629	ifd->ifi_hwassist = ifp->if_hwassist;
1630	ifd->ifi_epoch = ifp->if_epoch;
1631	ifd->ifi_lastchange = ifp->if_lastchange;
1632
1633	ifd->ifi_ipackets = ifp->if_get_counter(ifp, IFCOUNTER_IPACKETS);
1634	ifd->ifi_ierrors = ifp->if_get_counter(ifp, IFCOUNTER_IERRORS);
1635	ifd->ifi_opackets = ifp->if_get_counter(ifp, IFCOUNTER_OPACKETS);
1636	ifd->ifi_oerrors = ifp->if_get_counter(ifp, IFCOUNTER_OERRORS);
1637	ifd->ifi_collisions = ifp->if_get_counter(ifp, IFCOUNTER_COLLISIONS);
1638	ifd->ifi_ibytes = ifp->if_get_counter(ifp, IFCOUNTER_IBYTES);
1639	ifd->ifi_obytes = ifp->if_get_counter(ifp, IFCOUNTER_OBYTES);
1640	ifd->ifi_imcasts = ifp->if_get_counter(ifp, IFCOUNTER_IMCASTS);
1641	ifd->ifi_omcasts = ifp->if_get_counter(ifp, IFCOUNTER_OMCASTS);
1642	ifd->ifi_iqdrops = ifp->if_get_counter(ifp, IFCOUNTER_IQDROPS);
1643	ifd->ifi_oqdrops = ifp->if_get_counter(ifp, IFCOUNTER_OQDROPS);
1644	ifd->ifi_noproto = ifp->if_get_counter(ifp, IFCOUNTER_NOPROTO);
1645}
1646
1647/*
1648 * Wrapper functions for struct ifnet address list locking macros.  These are
1649 * used by kernel modules to avoid encoding programming interface or binary
1650 * interface assumptions that may be violated when kernel-internal locking
1651 * approaches change.
1652 */
1653void
1654if_addr_rlock(struct ifnet *ifp)
1655{
1656
1657	IF_ADDR_RLOCK(ifp);
1658}
1659
1660void
1661if_addr_runlock(struct ifnet *ifp)
1662{
1663
1664	IF_ADDR_RUNLOCK(ifp);
1665}
1666
1667void
1668if_maddr_rlock(if_t ifp)
1669{
1670
1671	IF_ADDR_RLOCK((struct ifnet *)ifp);
1672}
1673
1674void
1675if_maddr_runlock(if_t ifp)
1676{
1677
1678	IF_ADDR_RUNLOCK((struct ifnet *)ifp);
1679}
1680
1681/*
1682 * Initialization, destruction and refcounting functions for ifaddrs.
1683 */
1684struct ifaddr *
1685ifa_alloc(size_t size, int flags)
1686{
1687	struct ifaddr *ifa;
1688
1689	KASSERT(size >= sizeof(struct ifaddr),
1690	    ("%s: invalid size %zu", __func__, size));
1691
1692	ifa = malloc(size, M_IFADDR, M_ZERO | flags);
1693	if (ifa == NULL)
1694		return (NULL);
1695
1696	if ((ifa->ifa_opackets = counter_u64_alloc(flags)) == NULL)
1697		goto fail;
1698	if ((ifa->ifa_ipackets = counter_u64_alloc(flags)) == NULL)
1699		goto fail;
1700	if ((ifa->ifa_obytes = counter_u64_alloc(flags)) == NULL)
1701		goto fail;
1702	if ((ifa->ifa_ibytes = counter_u64_alloc(flags)) == NULL)
1703		goto fail;
1704
1705	refcount_init(&ifa->ifa_refcnt, 1);
1706
1707	return (ifa);
1708
1709fail:
1710	/* free(NULL) is okay */
1711	counter_u64_free(ifa->ifa_opackets);
1712	counter_u64_free(ifa->ifa_ipackets);
1713	counter_u64_free(ifa->ifa_obytes);
1714	counter_u64_free(ifa->ifa_ibytes);
1715	free(ifa, M_IFADDR);
1716
1717	return (NULL);
1718}
1719
1720void
1721ifa_ref(struct ifaddr *ifa)
1722{
1723
1724	refcount_acquire(&ifa->ifa_refcnt);
1725}
1726
1727void
1728ifa_free(struct ifaddr *ifa)
1729{
1730
1731	if (refcount_release(&ifa->ifa_refcnt)) {
1732		counter_u64_free(ifa->ifa_opackets);
1733		counter_u64_free(ifa->ifa_ipackets);
1734		counter_u64_free(ifa->ifa_obytes);
1735		counter_u64_free(ifa->ifa_ibytes);
1736		free(ifa, M_IFADDR);
1737	}
1738}
1739
1740static int
1741ifa_maintain_loopback_route(int cmd, const char *otype, struct ifaddr *ifa,
1742    struct sockaddr *ia)
1743{
1744	int error;
1745	struct rt_addrinfo info;
1746	struct sockaddr_dl null_sdl;
1747	struct ifnet *ifp;
1748
1749	ifp = ifa->ifa_ifp;
1750
1751	bzero(&info, sizeof(info));
1752	if (cmd != RTM_DELETE)
1753		info.rti_ifp = V_loif;
1754	info.rti_flags = ifa->ifa_flags | RTF_HOST | RTF_STATIC | RTF_PINNED;
1755	info.rti_info[RTAX_DST] = ia;
1756	info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&null_sdl;
1757	link_init_sdl(ifp, (struct sockaddr *)&null_sdl, ifp->if_type);
1758
1759	error = rtrequest1_fib(cmd, &info, NULL, ifp->if_fib);
1760
1761	if (error != 0)
1762		log(LOG_DEBUG, "%s: %s failed for interface %s: %u\n",
1763		    __func__, otype, if_name(ifp), error);
1764
1765	return (error);
1766}
1767
1768int
1769ifa_add_loopback_route(struct ifaddr *ifa, struct sockaddr *ia)
1770{
1771
1772	return (ifa_maintain_loopback_route(RTM_ADD, "insertion", ifa, ia));
1773}
1774
1775int
1776ifa_del_loopback_route(struct ifaddr *ifa, struct sockaddr *ia)
1777{
1778
1779	return (ifa_maintain_loopback_route(RTM_DELETE, "deletion", ifa, ia));
1780}
1781
1782int
1783ifa_switch_loopback_route(struct ifaddr *ifa, struct sockaddr *ia)
1784{
1785
1786	return (ifa_maintain_loopback_route(RTM_CHANGE, "switch", ifa, ia));
1787}
1788
1789/*
1790 * XXX: Because sockaddr_dl has deeper structure than the sockaddr
1791 * structs used to represent other address families, it is necessary
1792 * to perform a different comparison.
1793 */
1794
1795#define	sa_dl_equal(a1, a2)	\
1796	((((const struct sockaddr_dl *)(a1))->sdl_len ==		\
1797	 ((const struct sockaddr_dl *)(a2))->sdl_len) &&		\
1798	 (bcmp(CLLADDR((const struct sockaddr_dl *)(a1)),		\
1799	       CLLADDR((const struct sockaddr_dl *)(a2)),		\
1800	       ((const struct sockaddr_dl *)(a1))->sdl_alen) == 0))
1801
1802/*
1803 * Locate an interface based on a complete address.
1804 */
1805/*ARGSUSED*/
1806static struct ifaddr *
1807ifa_ifwithaddr_internal(const struct sockaddr *addr, int getref)
1808{
1809	struct ifnet *ifp;
1810	struct ifaddr *ifa;
1811
1812	IFNET_RLOCK_NOSLEEP();
1813	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
1814		IF_ADDR_RLOCK(ifp);
1815		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1816			if (ifa->ifa_addr->sa_family != addr->sa_family)
1817				continue;
1818			if (sa_equal(addr, ifa->ifa_addr)) {
1819				if (getref)
1820					ifa_ref(ifa);
1821				IF_ADDR_RUNLOCK(ifp);
1822				goto done;
1823			}
1824			/* IP6 doesn't have broadcast */
1825			if ((ifp->if_flags & IFF_BROADCAST) &&
1826			    ifa->ifa_broadaddr &&
1827			    ifa->ifa_broadaddr->sa_len != 0 &&
1828			    sa_equal(ifa->ifa_broadaddr, addr)) {
1829				if (getref)
1830					ifa_ref(ifa);
1831				IF_ADDR_RUNLOCK(ifp);
1832				goto done;
1833			}
1834		}
1835		IF_ADDR_RUNLOCK(ifp);
1836	}
1837	ifa = NULL;
1838done:
1839	IFNET_RUNLOCK_NOSLEEP();
1840	return (ifa);
1841}
1842
1843struct ifaddr *
1844ifa_ifwithaddr(const struct sockaddr *addr)
1845{
1846
1847	return (ifa_ifwithaddr_internal(addr, 1));
1848}
1849
1850int
1851ifa_ifwithaddr_check(const struct sockaddr *addr)
1852{
1853
1854	return (ifa_ifwithaddr_internal(addr, 0) != NULL);
1855}
1856
1857/*
1858 * Locate an interface based on the broadcast address.
1859 */
1860/* ARGSUSED */
1861struct ifaddr *
1862ifa_ifwithbroadaddr(const struct sockaddr *addr, int fibnum)
1863{
1864	struct ifnet *ifp;
1865	struct ifaddr *ifa;
1866
1867	IFNET_RLOCK_NOSLEEP();
1868	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
1869		if ((fibnum != RT_ALL_FIBS) && (ifp->if_fib != fibnum))
1870			continue;
1871		IF_ADDR_RLOCK(ifp);
1872		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1873			if (ifa->ifa_addr->sa_family != addr->sa_family)
1874				continue;
1875			if ((ifp->if_flags & IFF_BROADCAST) &&
1876			    ifa->ifa_broadaddr &&
1877			    ifa->ifa_broadaddr->sa_len != 0 &&
1878			    sa_equal(ifa->ifa_broadaddr, addr)) {
1879				ifa_ref(ifa);
1880				IF_ADDR_RUNLOCK(ifp);
1881				goto done;
1882			}
1883		}
1884		IF_ADDR_RUNLOCK(ifp);
1885	}
1886	ifa = NULL;
1887done:
1888	IFNET_RUNLOCK_NOSLEEP();
1889	return (ifa);
1890}
1891
1892/*
1893 * Locate the point to point interface with a given destination address.
1894 */
1895/*ARGSUSED*/
1896struct ifaddr *
1897ifa_ifwithdstaddr(const struct sockaddr *addr, int fibnum)
1898{
1899	struct ifnet *ifp;
1900	struct ifaddr *ifa;
1901
1902	IFNET_RLOCK_NOSLEEP();
1903	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
1904		if ((ifp->if_flags & IFF_POINTOPOINT) == 0)
1905			continue;
1906		if ((fibnum != RT_ALL_FIBS) && (ifp->if_fib != fibnum))
1907			continue;
1908		IF_ADDR_RLOCK(ifp);
1909		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1910			if (ifa->ifa_addr->sa_family != addr->sa_family)
1911				continue;
1912			if (ifa->ifa_dstaddr != NULL &&
1913			    sa_equal(addr, ifa->ifa_dstaddr)) {
1914				ifa_ref(ifa);
1915				IF_ADDR_RUNLOCK(ifp);
1916				goto done;
1917			}
1918		}
1919		IF_ADDR_RUNLOCK(ifp);
1920	}
1921	ifa = NULL;
1922done:
1923	IFNET_RUNLOCK_NOSLEEP();
1924	return (ifa);
1925}
1926
1927/*
1928 * Find an interface on a specific network.  If many, choice
1929 * is most specific found.
1930 */
1931struct ifaddr *
1932ifa_ifwithnet(const struct sockaddr *addr, int ignore_ptp, int fibnum)
1933{
1934	struct ifnet *ifp;
1935	struct ifaddr *ifa;
1936	struct ifaddr *ifa_maybe = NULL;
1937	u_int af = addr->sa_family;
1938	const char *addr_data = addr->sa_data, *cplim;
1939
1940	/*
1941	 * AF_LINK addresses can be looked up directly by their index number,
1942	 * so do that if we can.
1943	 */
1944	if (af == AF_LINK) {
1945	    const struct sockaddr_dl *sdl = (const struct sockaddr_dl *)addr;
1946	    if (sdl->sdl_index && sdl->sdl_index <= V_if_index)
1947		return (ifaddr_byindex(sdl->sdl_index));
1948	}
1949
1950	/*
1951	 * Scan though each interface, looking for ones that have addresses
1952	 * in this address family and the requested fib.  Maintain a reference
1953	 * on ifa_maybe once we find one, as we release the IF_ADDR_RLOCK() that
1954	 * kept it stable when we move onto the next interface.
1955	 */
1956	IFNET_RLOCK_NOSLEEP();
1957	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
1958		if ((fibnum != RT_ALL_FIBS) && (ifp->if_fib != fibnum))
1959			continue;
1960		IF_ADDR_RLOCK(ifp);
1961		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1962			const char *cp, *cp2, *cp3;
1963
1964			if (ifa->ifa_addr->sa_family != af)
1965next:				continue;
1966			if (af == AF_INET &&
1967			    ifp->if_flags & IFF_POINTOPOINT && !ignore_ptp) {
1968				/*
1969				 * This is a bit broken as it doesn't
1970				 * take into account that the remote end may
1971				 * be a single node in the network we are
1972				 * looking for.
1973				 * The trouble is that we don't know the
1974				 * netmask for the remote end.
1975				 */
1976				if (ifa->ifa_dstaddr != NULL &&
1977				    sa_equal(addr, ifa->ifa_dstaddr)) {
1978					ifa_ref(ifa);
1979					IF_ADDR_RUNLOCK(ifp);
1980					goto done;
1981				}
1982			} else {
1983				/*
1984				 * Scan all the bits in the ifa's address.
1985				 * If a bit dissagrees with what we are
1986				 * looking for, mask it with the netmask
1987				 * to see if it really matters.
1988				 * (A byte at a time)
1989				 */
1990				if (ifa->ifa_netmask == 0)
1991					continue;
1992				cp = addr_data;
1993				cp2 = ifa->ifa_addr->sa_data;
1994				cp3 = ifa->ifa_netmask->sa_data;
1995				cplim = ifa->ifa_netmask->sa_len
1996					+ (char *)ifa->ifa_netmask;
1997				while (cp3 < cplim)
1998					if ((*cp++ ^ *cp2++) & *cp3++)
1999						goto next; /* next address! */
2000				/*
2001				 * If the netmask of what we just found
2002				 * is more specific than what we had before
2003				 * (if we had one), or if the virtual status
2004				 * of new prefix is better than of the old one,
2005				 * then remember the new one before continuing
2006				 * to search for an even better one.
2007				 */
2008				if (ifa_maybe == NULL ||
2009				    ifa_preferred(ifa_maybe, ifa) ||
2010				    rn_refines((caddr_t)ifa->ifa_netmask,
2011				    (caddr_t)ifa_maybe->ifa_netmask)) {
2012					if (ifa_maybe != NULL)
2013						ifa_free(ifa_maybe);
2014					ifa_maybe = ifa;
2015					ifa_ref(ifa_maybe);
2016				}
2017			}
2018		}
2019		IF_ADDR_RUNLOCK(ifp);
2020	}
2021	ifa = ifa_maybe;
2022	ifa_maybe = NULL;
2023done:
2024	IFNET_RUNLOCK_NOSLEEP();
2025	if (ifa_maybe != NULL)
2026		ifa_free(ifa_maybe);
2027	return (ifa);
2028}
2029
2030/*
2031 * Find an interface address specific to an interface best matching
2032 * a given address.
2033 */
2034struct ifaddr *
2035ifaof_ifpforaddr(const struct sockaddr *addr, struct ifnet *ifp)
2036{
2037	struct ifaddr *ifa;
2038	const char *cp, *cp2, *cp3;
2039	char *cplim;
2040	struct ifaddr *ifa_maybe = NULL;
2041	u_int af = addr->sa_family;
2042
2043	if (af >= AF_MAX)
2044		return (NULL);
2045	IF_ADDR_RLOCK(ifp);
2046	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
2047		if (ifa->ifa_addr->sa_family != af)
2048			continue;
2049		if (ifa_maybe == NULL)
2050			ifa_maybe = ifa;
2051		if (ifa->ifa_netmask == 0) {
2052			if (sa_equal(addr, ifa->ifa_addr) ||
2053			    (ifa->ifa_dstaddr &&
2054			    sa_equal(addr, ifa->ifa_dstaddr)))
2055				goto done;
2056			continue;
2057		}
2058		if (ifp->if_flags & IFF_POINTOPOINT) {
2059			if (sa_equal(addr, ifa->ifa_dstaddr))
2060				goto done;
2061		} else {
2062			cp = addr->sa_data;
2063			cp2 = ifa->ifa_addr->sa_data;
2064			cp3 = ifa->ifa_netmask->sa_data;
2065			cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask;
2066			for (; cp3 < cplim; cp3++)
2067				if ((*cp++ ^ *cp2++) & *cp3)
2068					break;
2069			if (cp3 == cplim)
2070				goto done;
2071		}
2072	}
2073	ifa = ifa_maybe;
2074done:
2075	if (ifa != NULL)
2076		ifa_ref(ifa);
2077	IF_ADDR_RUNLOCK(ifp);
2078	return (ifa);
2079}
2080
2081/*
2082 * See whether new ifa is better than current one:
2083 * 1) A non-virtual one is preferred over virtual.
2084 * 2) A virtual in master state preferred over any other state.
2085 *
2086 * Used in several address selecting functions.
2087 */
2088int
2089ifa_preferred(struct ifaddr *cur, struct ifaddr *next)
2090{
2091
2092	return (cur->ifa_carp && (!next->ifa_carp ||
2093	    ((*carp_master_p)(next) && !(*carp_master_p)(cur))));
2094}
2095
2096#include <net/if_llatbl.h>
2097
2098/*
2099 * Default action when installing a route with a Link Level gateway.
2100 * Lookup an appropriate real ifa to point to.
2101 * This should be moved to /sys/net/link.c eventually.
2102 */
2103static void
2104link_rtrequest(int cmd, struct rtentry *rt, struct rt_addrinfo *info)
2105{
2106	struct ifaddr *ifa, *oifa;
2107	struct sockaddr *dst;
2108	struct ifnet *ifp;
2109
2110	if (cmd != RTM_ADD || ((ifa = rt->rt_ifa) == NULL) ||
2111	    ((ifp = ifa->ifa_ifp) == NULL) || ((dst = rt_key(rt)) == NULL))
2112		return;
2113	ifa = ifaof_ifpforaddr(dst, ifp);
2114	if (ifa) {
2115		oifa = rt->rt_ifa;
2116		rt->rt_ifa = ifa;
2117		ifa_free(oifa);
2118		if (ifa->ifa_rtrequest && ifa->ifa_rtrequest != link_rtrequest)
2119			ifa->ifa_rtrequest(cmd, rt, info);
2120	}
2121}
2122
2123struct sockaddr_dl *
2124link_alloc_sdl(size_t size, int flags)
2125{
2126
2127	return (malloc(size, M_TEMP, flags));
2128}
2129
2130void
2131link_free_sdl(struct sockaddr *sa)
2132{
2133	free(sa, M_TEMP);
2134}
2135
2136/*
2137 * Fills in given sdl with interface basic info.
2138 * Returns pointer to filled sdl.
2139 */
2140struct sockaddr_dl *
2141link_init_sdl(struct ifnet *ifp, struct sockaddr *paddr, u_char iftype)
2142{
2143	struct sockaddr_dl *sdl;
2144
2145	sdl = (struct sockaddr_dl *)paddr;
2146	memset(sdl, 0, sizeof(struct sockaddr_dl));
2147	sdl->sdl_len = sizeof(struct sockaddr_dl);
2148	sdl->sdl_family = AF_LINK;
2149	sdl->sdl_index = ifp->if_index;
2150	sdl->sdl_type = iftype;
2151
2152	return (sdl);
2153}
2154
2155/*
2156 * Mark an interface down and notify protocols of
2157 * the transition.
2158 */
2159static void
2160if_unroute(struct ifnet *ifp, int flag, int fam)
2161{
2162	struct ifaddr *ifa;
2163
2164	KASSERT(flag == IFF_UP, ("if_unroute: flag != IFF_UP"));
2165
2166	ifp->if_flags &= ~flag;
2167	getmicrotime(&ifp->if_lastchange);
2168	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
2169		if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family))
2170			pfctlinput(PRC_IFDOWN, ifa->ifa_addr);
2171	ifp->if_qflush(ifp);
2172
2173	if (ifp->if_carp)
2174		(*carp_linkstate_p)(ifp);
2175	rt_ifmsg(ifp);
2176}
2177
2178/*
2179 * Mark an interface up and notify protocols of
2180 * the transition.
2181 */
2182static void
2183if_route(struct ifnet *ifp, int flag, int fam)
2184{
2185	struct ifaddr *ifa;
2186
2187	KASSERT(flag == IFF_UP, ("if_route: flag != IFF_UP"));
2188
2189	ifp->if_flags |= flag;
2190	getmicrotime(&ifp->if_lastchange);
2191	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
2192		if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family))
2193			pfctlinput(PRC_IFUP, ifa->ifa_addr);
2194	if (ifp->if_carp)
2195		(*carp_linkstate_p)(ifp);
2196	rt_ifmsg(ifp);
2197#ifdef INET6
2198	in6_if_up(ifp);
2199#endif
2200}
2201
2202void	(*vlan_link_state_p)(struct ifnet *);	/* XXX: private from if_vlan */
2203void	(*vlan_trunk_cap_p)(struct ifnet *);		/* XXX: private from if_vlan */
2204struct ifnet *(*vlan_trunkdev_p)(struct ifnet *);
2205struct	ifnet *(*vlan_devat_p)(struct ifnet *, uint16_t);
2206int	(*vlan_tag_p)(struct ifnet *, uint16_t *);
2207int	(*vlan_setcookie_p)(struct ifnet *, void *);
2208void	*(*vlan_cookie_p)(struct ifnet *);
2209
2210/*
2211 * Handle a change in the interface link state. To avoid LORs
2212 * between driver lock and upper layer locks, as well as possible
2213 * recursions, we post event to taskqueue, and all job
2214 * is done in static do_link_state_change().
2215 */
2216void
2217if_link_state_change(struct ifnet *ifp, int link_state)
2218{
2219	/* Return if state hasn't changed. */
2220	if (ifp->if_link_state == link_state)
2221		return;
2222
2223	ifp->if_link_state = link_state;
2224
2225	taskqueue_enqueue(taskqueue_swi, &ifp->if_linktask);
2226}
2227
2228static void
2229do_link_state_change(void *arg, int pending)
2230{
2231	struct ifnet *ifp = (struct ifnet *)arg;
2232	int link_state = ifp->if_link_state;
2233	CURVNET_SET(ifp->if_vnet);
2234
2235	/* Notify that the link state has changed. */
2236	rt_ifmsg(ifp);
2237	if (ifp->if_vlantrunk != NULL)
2238		(*vlan_link_state_p)(ifp);
2239
2240	if ((ifp->if_type == IFT_ETHER || ifp->if_type == IFT_L2VLAN) &&
2241	    ifp->if_l2com != NULL)
2242		(*ng_ether_link_state_p)(ifp, link_state);
2243	if (ifp->if_carp)
2244		(*carp_linkstate_p)(ifp);
2245	if (ifp->if_bridge)
2246		(*bridge_linkstate_p)(ifp);
2247	if (ifp->if_lagg)
2248		(*lagg_linkstate_p)(ifp, link_state);
2249
2250	if (IS_DEFAULT_VNET(curvnet))
2251		devctl_notify("IFNET", ifp->if_xname,
2252		    (link_state == LINK_STATE_UP) ? "LINK_UP" : "LINK_DOWN",
2253		    NULL);
2254	if (pending > 1)
2255		if_printf(ifp, "%d link states coalesced\n", pending);
2256	if (log_link_state_change)
2257		log(LOG_NOTICE, "%s: link state changed to %s\n", ifp->if_xname,
2258		    (link_state == LINK_STATE_UP) ? "UP" : "DOWN" );
2259	EVENTHANDLER_INVOKE(ifnet_link_event, ifp, link_state);
2260	CURVNET_RESTORE();
2261}
2262
2263/*
2264 * Mark an interface down and notify protocols of
2265 * the transition.
2266 */
2267void
2268if_down(struct ifnet *ifp)
2269{
2270
2271	EVENTHANDLER_INVOKE(ifnet_event, ifp, IFNET_EVENT_DOWN);
2272	if_unroute(ifp, IFF_UP, AF_UNSPEC);
2273}
2274
2275/*
2276 * Mark an interface up and notify protocols of
2277 * the transition.
2278 */
2279void
2280if_up(struct ifnet *ifp)
2281{
2282
2283	if_route(ifp, IFF_UP, AF_UNSPEC);
2284	EVENTHANDLER_INVOKE(ifnet_event, ifp, IFNET_EVENT_UP);
2285}
2286
2287/*
2288 * Flush an interface queue.
2289 */
2290void
2291if_qflush(struct ifnet *ifp)
2292{
2293	struct mbuf *m, *n;
2294	struct ifaltq *ifq;
2295
2296	ifq = &ifp->if_snd;
2297	IFQ_LOCK(ifq);
2298#ifdef ALTQ
2299	if (ALTQ_IS_ENABLED(ifq))
2300		ALTQ_PURGE(ifq);
2301#endif
2302	n = ifq->ifq_head;
2303	while ((m = n) != NULL) {
2304		n = m->m_nextpkt;
2305		m_freem(m);
2306	}
2307	ifq->ifq_head = 0;
2308	ifq->ifq_tail = 0;
2309	ifq->ifq_len = 0;
2310	IFQ_UNLOCK(ifq);
2311}
2312
2313/*
2314 * Map interface name to interface structure pointer, with or without
2315 * returning a reference.
2316 */
2317struct ifnet *
2318ifunit_ref(const char *name)
2319{
2320	struct ifnet *ifp;
2321
2322	IFNET_RLOCK_NOSLEEP();
2323	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
2324		if (strncmp(name, ifp->if_xname, IFNAMSIZ) == 0 &&
2325		    !(ifp->if_flags & IFF_DYING))
2326			break;
2327	}
2328	if (ifp != NULL)
2329		if_ref(ifp);
2330	IFNET_RUNLOCK_NOSLEEP();
2331	return (ifp);
2332}
2333
2334struct ifnet *
2335ifunit(const char *name)
2336{
2337	struct ifnet *ifp;
2338
2339	IFNET_RLOCK_NOSLEEP();
2340	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
2341		if (strncmp(name, ifp->if_xname, IFNAMSIZ) == 0)
2342			break;
2343	}
2344	IFNET_RUNLOCK_NOSLEEP();
2345	return (ifp);
2346}
2347
2348static void *
2349ifr_buffer_get_buffer(void *data)
2350{
2351	union ifreq_union *ifrup;
2352
2353	ifrup = data;
2354#ifdef COMPAT_FREEBSD32
2355	if (SV_CURPROC_FLAG(SV_ILP32))
2356		return ((void *)(uintptr_t)
2357		    ifrup->ifr32.ifr_ifru.ifru_buffer.buffer);
2358#endif
2359	return (ifrup->ifr.ifr_ifru.ifru_buffer.buffer);
2360}
2361
2362static void
2363ifr_buffer_set_buffer_null(void *data)
2364{
2365	union ifreq_union *ifrup;
2366
2367	ifrup = data;
2368#ifdef COMPAT_FREEBSD32
2369	if (SV_CURPROC_FLAG(SV_ILP32))
2370		ifrup->ifr32.ifr_ifru.ifru_buffer.buffer = 0;
2371	else
2372#endif
2373		ifrup->ifr.ifr_ifru.ifru_buffer.buffer = NULL;
2374}
2375
2376static size_t
2377ifr_buffer_get_length(void *data)
2378{
2379	union ifreq_union *ifrup;
2380
2381	ifrup = data;
2382#ifdef COMPAT_FREEBSD32
2383	if (SV_CURPROC_FLAG(SV_ILP32))
2384		return (ifrup->ifr32.ifr_ifru.ifru_buffer.length);
2385#endif
2386	return (ifrup->ifr.ifr_ifru.ifru_buffer.length);
2387}
2388
2389static void
2390ifr_buffer_set_length(void *data, size_t len)
2391{
2392	union ifreq_union *ifrup;
2393
2394	ifrup = data;
2395#ifdef COMPAT_FREEBSD32
2396	if (SV_CURPROC_FLAG(SV_ILP32))
2397		ifrup->ifr32.ifr_ifru.ifru_buffer.length = len;
2398	else
2399#endif
2400		ifrup->ifr.ifr_ifru.ifru_buffer.length = len;
2401}
2402
2403/*
2404 * Hardware specific interface ioctls.
2405 */
2406static int
2407ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
2408{
2409	struct ifreq *ifr;
2410	int error = 0, do_ifup = 0;
2411	int new_flags, temp_flags;
2412	size_t namelen, onamelen;
2413	size_t descrlen;
2414	char *descrbuf, *odescrbuf;
2415	char new_name[IFNAMSIZ];
2416	struct ifaddr *ifa;
2417	struct sockaddr_dl *sdl;
2418
2419	ifr = (struct ifreq *)data;
2420	switch (cmd) {
2421	case SIOCGIFINDEX:
2422		ifr->ifr_index = ifp->if_index;
2423		break;
2424
2425	case SIOCGIFFLAGS:
2426		temp_flags = ifp->if_flags | ifp->if_drv_flags;
2427		ifr->ifr_flags = temp_flags & 0xffff;
2428		ifr->ifr_flagshigh = temp_flags >> 16;
2429		break;
2430
2431	case SIOCGIFCAP:
2432		ifr->ifr_reqcap = ifp->if_capabilities;
2433		ifr->ifr_curcap = ifp->if_capenable;
2434		break;
2435
2436#ifdef MAC
2437	case SIOCGIFMAC:
2438		error = mac_ifnet_ioctl_get(td->td_ucred, ifr, ifp);
2439		break;
2440#endif
2441
2442	case SIOCGIFMETRIC:
2443		ifr->ifr_metric = ifp->if_metric;
2444		break;
2445
2446	case SIOCGIFMTU:
2447		ifr->ifr_mtu = ifp->if_mtu;
2448		break;
2449
2450	case SIOCGIFPHYS:
2451		/* XXXGL: did this ever worked? */
2452		ifr->ifr_phys = 0;
2453		break;
2454
2455	case SIOCGIFDESCR:
2456		error = 0;
2457		sx_slock(&ifdescr_sx);
2458		if (ifp->if_description == NULL)
2459			error = ENOMSG;
2460		else {
2461			/* space for terminating nul */
2462			descrlen = strlen(ifp->if_description) + 1;
2463			if (ifr_buffer_get_length(ifr) < descrlen)
2464				ifr_buffer_set_buffer_null(ifr);
2465			else
2466				error = copyout(ifp->if_description,
2467				    ifr_buffer_get_buffer(ifr), descrlen);
2468			ifr_buffer_set_length(ifr, descrlen);
2469		}
2470		sx_sunlock(&ifdescr_sx);
2471		break;
2472
2473	case SIOCSIFDESCR:
2474		error = priv_check(td, PRIV_NET_SETIFDESCR);
2475		if (error)
2476			return (error);
2477
2478		/*
2479		 * Copy only (length-1) bytes to make sure that
2480		 * if_description is always nul terminated.  The
2481		 * length parameter is supposed to count the
2482		 * terminating nul in.
2483		 */
2484		if (ifr_buffer_get_length(ifr) > ifdescr_maxlen)
2485			return (ENAMETOOLONG);
2486		else if (ifr_buffer_get_length(ifr) == 0)
2487			descrbuf = NULL;
2488		else {
2489			descrbuf = malloc(ifr_buffer_get_length(ifr),
2490			    M_IFDESCR, M_WAITOK | M_ZERO);
2491			error = copyin(ifr_buffer_get_buffer(ifr), descrbuf,
2492			    ifr_buffer_get_length(ifr) - 1);
2493			if (error) {
2494				free(descrbuf, M_IFDESCR);
2495				break;
2496			}
2497		}
2498
2499		sx_xlock(&ifdescr_sx);
2500		odescrbuf = ifp->if_description;
2501		ifp->if_description = descrbuf;
2502		sx_xunlock(&ifdescr_sx);
2503
2504		getmicrotime(&ifp->if_lastchange);
2505		free(odescrbuf, M_IFDESCR);
2506		break;
2507
2508	case SIOCGIFFIB:
2509		ifr->ifr_fib = ifp->if_fib;
2510		break;
2511
2512	case SIOCSIFFIB:
2513		error = priv_check(td, PRIV_NET_SETIFFIB);
2514		if (error)
2515			return (error);
2516		if (ifr->ifr_fib >= rt_numfibs)
2517			return (EINVAL);
2518
2519		ifp->if_fib = ifr->ifr_fib;
2520		break;
2521
2522	case SIOCSIFFLAGS:
2523		error = priv_check(td, PRIV_NET_SETIFFLAGS);
2524		if (error)
2525			return (error);
2526		/*
2527		 * Currently, no driver owned flags pass the IFF_CANTCHANGE
2528		 * check, so we don't need special handling here yet.
2529		 */
2530		new_flags = (ifr->ifr_flags & 0xffff) |
2531		    (ifr->ifr_flagshigh << 16);
2532		if (ifp->if_flags & IFF_UP &&
2533		    (new_flags & IFF_UP) == 0) {
2534			if_down(ifp);
2535		} else if (new_flags & IFF_UP &&
2536		    (ifp->if_flags & IFF_UP) == 0) {
2537			do_ifup = 1;
2538		}
2539		/* See if permanently promiscuous mode bit is about to flip */
2540		if ((ifp->if_flags ^ new_flags) & IFF_PPROMISC) {
2541			if (new_flags & IFF_PPROMISC)
2542				ifp->if_flags |= IFF_PROMISC;
2543			else if (ifp->if_pcount == 0)
2544				ifp->if_flags &= ~IFF_PROMISC;
2545			if (log_promisc_mode_change)
2546                                log(LOG_INFO, "%s: permanently promiscuous mode %s\n",
2547                                    ifp->if_xname,
2548                                    ((new_flags & IFF_PPROMISC) ?
2549                                     "enabled" : "disabled"));
2550		}
2551		ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) |
2552			(new_flags &~ IFF_CANTCHANGE);
2553		if (ifp->if_ioctl) {
2554			(void) (*ifp->if_ioctl)(ifp, cmd, data);
2555		}
2556		if (do_ifup)
2557			if_up(ifp);
2558		getmicrotime(&ifp->if_lastchange);
2559		break;
2560
2561	case SIOCSIFCAP:
2562		error = priv_check(td, PRIV_NET_SETIFCAP);
2563		if (error)
2564			return (error);
2565		if (ifp->if_ioctl == NULL)
2566			return (EOPNOTSUPP);
2567		if (ifr->ifr_reqcap & ~ifp->if_capabilities)
2568			return (EINVAL);
2569		error = (*ifp->if_ioctl)(ifp, cmd, data);
2570		if (error == 0)
2571			getmicrotime(&ifp->if_lastchange);
2572		break;
2573
2574#ifdef MAC
2575	case SIOCSIFMAC:
2576		error = mac_ifnet_ioctl_set(td->td_ucred, ifr, ifp);
2577		break;
2578#endif
2579
2580	case SIOCSIFNAME:
2581		error = priv_check(td, PRIV_NET_SETIFNAME);
2582		if (error)
2583			return (error);
2584		error = copyinstr(ifr->ifr_data, new_name, IFNAMSIZ, NULL);
2585		if (error != 0)
2586			return (error);
2587		if (new_name[0] == '\0')
2588			return (EINVAL);
2589		if (new_name[IFNAMSIZ-1] != '\0') {
2590			new_name[IFNAMSIZ-1] = '\0';
2591			if (strlen(new_name) == IFNAMSIZ-1)
2592				return (EINVAL);
2593		}
2594		if (ifunit(new_name) != NULL)
2595			return (EEXIST);
2596
2597		/*
2598		 * XXX: Locking.  Nothing else seems to lock if_flags,
2599		 * and there are numerous other races with the
2600		 * ifunit() checks not being atomic with namespace
2601		 * changes (renames, vmoves, if_attach, etc).
2602		 */
2603		ifp->if_flags |= IFF_RENAMING;
2604
2605		/* Announce the departure of the interface. */
2606		rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
2607		EVENTHANDLER_INVOKE(ifnet_departure_event, ifp);
2608
2609		log(LOG_INFO, "%s: changing name to '%s'\n",
2610		    ifp->if_xname, new_name);
2611
2612		IF_ADDR_WLOCK(ifp);
2613		strlcpy(ifp->if_xname, new_name, sizeof(ifp->if_xname));
2614		ifa = ifp->if_addr;
2615		sdl = (struct sockaddr_dl *)ifa->ifa_addr;
2616		namelen = strlen(new_name);
2617		onamelen = sdl->sdl_nlen;
2618		/*
2619		 * Move the address if needed.  This is safe because we
2620		 * allocate space for a name of length IFNAMSIZ when we
2621		 * create this in if_attach().
2622		 */
2623		if (namelen != onamelen) {
2624			bcopy(sdl->sdl_data + onamelen,
2625			    sdl->sdl_data + namelen, sdl->sdl_alen);
2626		}
2627		bcopy(new_name, sdl->sdl_data, namelen);
2628		sdl->sdl_nlen = namelen;
2629		sdl = (struct sockaddr_dl *)ifa->ifa_netmask;
2630		bzero(sdl->sdl_data, onamelen);
2631		while (namelen != 0)
2632			sdl->sdl_data[--namelen] = 0xff;
2633		IF_ADDR_WUNLOCK(ifp);
2634
2635		EVENTHANDLER_INVOKE(ifnet_arrival_event, ifp);
2636		/* Announce the return of the interface. */
2637		rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
2638
2639		ifp->if_flags &= ~IFF_RENAMING;
2640		break;
2641
2642#ifdef VIMAGE
2643	case SIOCSIFVNET:
2644		error = priv_check(td, PRIV_NET_SETIFVNET);
2645		if (error)
2646			return (error);
2647		error = if_vmove_loan(td, ifp, ifr->ifr_name, ifr->ifr_jid);
2648		break;
2649#endif
2650
2651	case SIOCSIFMETRIC:
2652		error = priv_check(td, PRIV_NET_SETIFMETRIC);
2653		if (error)
2654			return (error);
2655		ifp->if_metric = ifr->ifr_metric;
2656		getmicrotime(&ifp->if_lastchange);
2657		break;
2658
2659	case SIOCSIFPHYS:
2660		error = priv_check(td, PRIV_NET_SETIFPHYS);
2661		if (error)
2662			return (error);
2663		if (ifp->if_ioctl == NULL)
2664			return (EOPNOTSUPP);
2665		error = (*ifp->if_ioctl)(ifp, cmd, data);
2666		if (error == 0)
2667			getmicrotime(&ifp->if_lastchange);
2668		break;
2669
2670	case SIOCSIFMTU:
2671	{
2672		u_long oldmtu = ifp->if_mtu;
2673
2674		error = priv_check(td, PRIV_NET_SETIFMTU);
2675		if (error)
2676			return (error);
2677		if (ifr->ifr_mtu < IF_MINMTU || ifr->ifr_mtu > IF_MAXMTU)
2678			return (EINVAL);
2679		if (ifp->if_ioctl == NULL)
2680			return (EOPNOTSUPP);
2681		error = (*ifp->if_ioctl)(ifp, cmd, data);
2682		if (error == 0) {
2683			getmicrotime(&ifp->if_lastchange);
2684			rt_ifmsg(ifp);
2685		}
2686		/*
2687		 * If the link MTU changed, do network layer specific procedure.
2688		 */
2689		if (ifp->if_mtu != oldmtu) {
2690#ifdef INET6
2691			nd6_setmtu(ifp);
2692#endif
2693			rt_updatemtu(ifp);
2694		}
2695		break;
2696	}
2697
2698	case SIOCADDMULTI:
2699	case SIOCDELMULTI:
2700		if (cmd == SIOCADDMULTI)
2701			error = priv_check(td, PRIV_NET_ADDMULTI);
2702		else
2703			error = priv_check(td, PRIV_NET_DELMULTI);
2704		if (error)
2705			return (error);
2706
2707		/* Don't allow group membership on non-multicast interfaces. */
2708		if ((ifp->if_flags & IFF_MULTICAST) == 0)
2709			return (EOPNOTSUPP);
2710
2711		/* Don't let users screw up protocols' entries. */
2712		if (ifr->ifr_addr.sa_family != AF_LINK)
2713			return (EINVAL);
2714
2715		if (cmd == SIOCADDMULTI) {
2716			struct ifmultiaddr *ifma;
2717
2718			/*
2719			 * Userland is only permitted to join groups once
2720			 * via the if_addmulti() KPI, because it cannot hold
2721			 * struct ifmultiaddr * between calls. It may also
2722			 * lose a race while we check if the membership
2723			 * already exists.
2724			 */
2725			IF_ADDR_RLOCK(ifp);
2726			ifma = if_findmulti(ifp, &ifr->ifr_addr);
2727			IF_ADDR_RUNLOCK(ifp);
2728			if (ifma != NULL)
2729				error = EADDRINUSE;
2730			else
2731				error = if_addmulti(ifp, &ifr->ifr_addr, &ifma);
2732		} else {
2733			error = if_delmulti(ifp, &ifr->ifr_addr);
2734		}
2735		if (error == 0)
2736			getmicrotime(&ifp->if_lastchange);
2737		break;
2738
2739	case SIOCSIFPHYADDR:
2740	case SIOCDIFPHYADDR:
2741#ifdef INET6
2742	case SIOCSIFPHYADDR_IN6:
2743#endif
2744	case SIOCSIFMEDIA:
2745	case SIOCSIFGENERIC:
2746		error = priv_check(td, PRIV_NET_HWIOCTL);
2747		if (error)
2748			return (error);
2749		if (ifp->if_ioctl == NULL)
2750			return (EOPNOTSUPP);
2751		error = (*ifp->if_ioctl)(ifp, cmd, data);
2752		if (error == 0)
2753			getmicrotime(&ifp->if_lastchange);
2754		break;
2755
2756	case SIOCGIFSTATUS:
2757	case SIOCGIFPSRCADDR:
2758	case SIOCGIFPDSTADDR:
2759	case SIOCGIFMEDIA:
2760	case SIOCGIFXMEDIA:
2761	case SIOCGIFGENERIC:
2762	case SIOCGIFRSSKEY:
2763	case SIOCGIFRSSHASH:
2764		if (ifp->if_ioctl == NULL)
2765			return (EOPNOTSUPP);
2766		error = (*ifp->if_ioctl)(ifp, cmd, data);
2767		break;
2768
2769	case SIOCSIFLLADDR:
2770		error = priv_check(td, PRIV_NET_SETLLADDR);
2771		if (error)
2772			return (error);
2773		error = if_setlladdr(ifp,
2774		    ifr->ifr_addr.sa_data, ifr->ifr_addr.sa_len);
2775		break;
2776
2777	case SIOCGHWADDR:
2778		error = if_gethwaddr(ifp, ifr);
2779		break;
2780
2781	case SIOCAIFGROUP:
2782	{
2783		struct ifgroupreq *ifgr = (struct ifgroupreq *)ifr;
2784
2785		error = priv_check(td, PRIV_NET_ADDIFGROUP);
2786		if (error)
2787			return (error);
2788		if ((error = if_addgroup(ifp, ifgr->ifgr_group)))
2789			return (error);
2790		break;
2791	}
2792
2793	case SIOCGIFGROUP:
2794		if ((error = if_getgroup((struct ifgroupreq *)ifr, ifp)))
2795			return (error);
2796		break;
2797
2798	case SIOCDIFGROUP:
2799	{
2800		struct ifgroupreq *ifgr = (struct ifgroupreq *)ifr;
2801
2802		error = priv_check(td, PRIV_NET_DELIFGROUP);
2803		if (error)
2804			return (error);
2805		if ((error = if_delgroup(ifp, ifgr->ifgr_group)))
2806			return (error);
2807		break;
2808	}
2809
2810	default:
2811		error = ENOIOCTL;
2812		break;
2813	}
2814	return (error);
2815}
2816
2817/* COMPAT_SVR4 */
2818#define	OSIOCGIFCONF	_IOWR('i', 20, struct ifconf)
2819
2820#ifdef COMPAT_FREEBSD32
2821struct ifconf32 {
2822	int32_t	ifc_len;
2823	union {
2824		uint32_t	ifcu_buf;
2825		uint32_t	ifcu_req;
2826	} ifc_ifcu;
2827};
2828#define	SIOCGIFCONF32	_IOWR('i', 36, struct ifconf32)
2829#endif
2830
2831/*
2832 * Interface ioctls.
2833 */
2834int
2835ifioctl(struct socket *so, u_long cmd, caddr_t data, struct thread *td)
2836{
2837	struct ifnet *ifp;
2838	struct ifreq *ifr;
2839	int error;
2840	int oif_flags;
2841#ifdef VIMAGE
2842	int shutdown;
2843#endif
2844
2845	CURVNET_SET(so->so_vnet);
2846#ifdef VIMAGE
2847	/* Make sure the VNET is stable. */
2848	shutdown = (so->so_vnet->vnet_state > SI_SUB_VNET &&
2849		 so->so_vnet->vnet_state < SI_SUB_VNET_DONE) ? 1 : 0;
2850	if (shutdown) {
2851		CURVNET_RESTORE();
2852		return (EBUSY);
2853	}
2854#endif
2855
2856
2857	switch (cmd) {
2858	case SIOCGIFCONF:
2859	case OSIOCGIFCONF:	/* COMPAT_SVR4 */
2860		error = ifconf(cmd, data);
2861		CURVNET_RESTORE();
2862		return (error);
2863
2864#ifdef COMPAT_FREEBSD32
2865	case SIOCGIFCONF32:
2866		{
2867			struct ifconf32 *ifc32;
2868			struct ifconf ifc;
2869
2870			ifc32 = (struct ifconf32 *)data;
2871			ifc.ifc_len = ifc32->ifc_len;
2872			ifc.ifc_buf = PTRIN(ifc32->ifc_buf);
2873
2874			error = ifconf(SIOCGIFCONF, (void *)&ifc);
2875			CURVNET_RESTORE();
2876			if (error == 0)
2877				ifc32->ifc_len = ifc.ifc_len;
2878			return (error);
2879		}
2880#endif
2881	}
2882	ifr = (struct ifreq *)data;
2883
2884	switch (cmd) {
2885#ifdef VIMAGE
2886	case SIOCSIFRVNET:
2887		error = priv_check(td, PRIV_NET_SETIFVNET);
2888		if (error == 0)
2889			error = if_vmove_reclaim(td, ifr->ifr_name,
2890			    ifr->ifr_jid);
2891		CURVNET_RESTORE();
2892		return (error);
2893#endif
2894	case SIOCIFCREATE:
2895	case SIOCIFCREATE2:
2896		error = priv_check(td, PRIV_NET_IFCREATE);
2897		if (error == 0)
2898			error = if_clone_create(ifr->ifr_name,
2899			    sizeof(ifr->ifr_name),
2900			    cmd == SIOCIFCREATE2 ? ifr->ifr_data : NULL);
2901		CURVNET_RESTORE();
2902		return (error);
2903	case SIOCIFDESTROY:
2904		error = priv_check(td, PRIV_NET_IFDESTROY);
2905		if (error == 0)
2906			error = if_clone_destroy(ifr->ifr_name);
2907		CURVNET_RESTORE();
2908		return (error);
2909
2910	case SIOCIFGCLONERS:
2911		error = if_clone_list((struct if_clonereq *)data);
2912		CURVNET_RESTORE();
2913		return (error);
2914	case SIOCGIFGMEMB:
2915		error = if_getgroupmembers((struct ifgroupreq *)data);
2916		CURVNET_RESTORE();
2917		return (error);
2918#if defined(INET) || defined(INET6)
2919	case SIOCSVH:
2920	case SIOCGVH:
2921		if (carp_ioctl_p == NULL)
2922			error = EPROTONOSUPPORT;
2923		else
2924			error = (*carp_ioctl_p)(ifr, cmd, td);
2925		CURVNET_RESTORE();
2926		return (error);
2927#endif
2928	}
2929
2930	ifp = ifunit_ref(ifr->ifr_name);
2931	if (ifp == NULL) {
2932		CURVNET_RESTORE();
2933		return (ENXIO);
2934	}
2935
2936	error = ifhwioctl(cmd, ifp, data, td);
2937	if (error != ENOIOCTL) {
2938		if_rele(ifp);
2939		CURVNET_RESTORE();
2940		return (error);
2941	}
2942
2943	oif_flags = ifp->if_flags;
2944	if (so->so_proto == NULL) {
2945		if_rele(ifp);
2946		CURVNET_RESTORE();
2947		return (EOPNOTSUPP);
2948	}
2949
2950	/*
2951	 * Pass the request on to the socket control method, and if the
2952	 * latter returns EOPNOTSUPP, directly to the interface.
2953	 *
2954	 * Make an exception for the legacy SIOCSIF* requests.  Drivers
2955	 * trust SIOCSIFADDR et al to come from an already privileged
2956	 * layer, and do not perform any credentials checks or input
2957	 * validation.
2958	 */
2959	error = ((*so->so_proto->pr_usrreqs->pru_control)(so, cmd, data,
2960	    ifp, td));
2961	if (error == EOPNOTSUPP && ifp != NULL && ifp->if_ioctl != NULL &&
2962	    cmd != SIOCSIFADDR && cmd != SIOCSIFBRDADDR &&
2963	    cmd != SIOCSIFDSTADDR && cmd != SIOCSIFNETMASK)
2964		error = (*ifp->if_ioctl)(ifp, cmd, data);
2965
2966	if ((oif_flags ^ ifp->if_flags) & IFF_UP) {
2967#ifdef INET6
2968		if (ifp->if_flags & IFF_UP)
2969			in6_if_up(ifp);
2970#endif
2971	}
2972	if_rele(ifp);
2973	CURVNET_RESTORE();
2974	return (error);
2975}
2976
2977/*
2978 * The code common to handling reference counted flags,
2979 * e.g., in ifpromisc() and if_allmulti().
2980 * The "pflag" argument can specify a permanent mode flag to check,
2981 * such as IFF_PPROMISC for promiscuous mode; should be 0 if none.
2982 *
2983 * Only to be used on stack-owned flags, not driver-owned flags.
2984 */
2985static int
2986if_setflag(struct ifnet *ifp, int flag, int pflag, int *refcount, int onswitch)
2987{
2988	struct ifreq ifr;
2989	int error;
2990	int oldflags, oldcount;
2991
2992	/* Sanity checks to catch programming errors */
2993	KASSERT((flag & (IFF_DRV_OACTIVE|IFF_DRV_RUNNING)) == 0,
2994	    ("%s: setting driver-owned flag %d", __func__, flag));
2995
2996	if (onswitch)
2997		KASSERT(*refcount >= 0,
2998		    ("%s: increment negative refcount %d for flag %d",
2999		    __func__, *refcount, flag));
3000	else
3001		KASSERT(*refcount > 0,
3002		    ("%s: decrement non-positive refcount %d for flag %d",
3003		    __func__, *refcount, flag));
3004
3005	/* In case this mode is permanent, just touch refcount */
3006	if (ifp->if_flags & pflag) {
3007		*refcount += onswitch ? 1 : -1;
3008		return (0);
3009	}
3010
3011	/* Save ifnet parameters for if_ioctl() may fail */
3012	oldcount = *refcount;
3013	oldflags = ifp->if_flags;
3014
3015	/*
3016	 * See if we aren't the only and touching refcount is enough.
3017	 * Actually toggle interface flag if we are the first or last.
3018	 */
3019	if (onswitch) {
3020		if ((*refcount)++)
3021			return (0);
3022		ifp->if_flags |= flag;
3023	} else {
3024		if (--(*refcount))
3025			return (0);
3026		ifp->if_flags &= ~flag;
3027	}
3028
3029	/* Call down the driver since we've changed interface flags */
3030	if (ifp->if_ioctl == NULL) {
3031		error = EOPNOTSUPP;
3032		goto recover;
3033	}
3034	ifr.ifr_flags = ifp->if_flags & 0xffff;
3035	ifr.ifr_flagshigh = ifp->if_flags >> 16;
3036	error = (*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
3037	if (error)
3038		goto recover;
3039	/* Notify userland that interface flags have changed */
3040	rt_ifmsg(ifp);
3041	return (0);
3042
3043recover:
3044	/* Recover after driver error */
3045	*refcount = oldcount;
3046	ifp->if_flags = oldflags;
3047	return (error);
3048}
3049
3050/*
3051 * Set/clear promiscuous mode on interface ifp based on the truth value
3052 * of pswitch.  The calls are reference counted so that only the first
3053 * "on" request actually has an effect, as does the final "off" request.
3054 * Results are undefined if the "off" and "on" requests are not matched.
3055 */
3056int
3057ifpromisc(struct ifnet *ifp, int pswitch)
3058{
3059	int error;
3060	int oldflags = ifp->if_flags;
3061
3062	error = if_setflag(ifp, IFF_PROMISC, IFF_PPROMISC,
3063			   &ifp->if_pcount, pswitch);
3064	/* If promiscuous mode status has changed, log a message */
3065	if (error == 0 && ((ifp->if_flags ^ oldflags) & IFF_PROMISC) &&
3066            log_promisc_mode_change)
3067		log(LOG_INFO, "%s: promiscuous mode %s\n",
3068		    ifp->if_xname,
3069		    (ifp->if_flags & IFF_PROMISC) ? "enabled" : "disabled");
3070	return (error);
3071}
3072
3073/*
3074 * Return interface configuration
3075 * of system.  List may be used
3076 * in later ioctl's (above) to get
3077 * other information.
3078 */
3079/*ARGSUSED*/
3080static int
3081ifconf(u_long cmd, caddr_t data)
3082{
3083	struct ifconf *ifc = (struct ifconf *)data;
3084	struct ifnet *ifp;
3085	struct ifaddr *ifa;
3086	struct ifreq ifr;
3087	struct sbuf *sb;
3088	int error, full = 0, valid_len, max_len;
3089
3090	/* Limit initial buffer size to MAXPHYS to avoid DoS from userspace. */
3091	max_len = MAXPHYS - 1;
3092
3093	/* Prevent hostile input from being able to crash the system */
3094	if (ifc->ifc_len <= 0)
3095		return (EINVAL);
3096
3097again:
3098	if (ifc->ifc_len <= max_len) {
3099		max_len = ifc->ifc_len;
3100		full = 1;
3101	}
3102	sb = sbuf_new(NULL, NULL, max_len + 1, SBUF_FIXEDLEN);
3103	max_len = 0;
3104	valid_len = 0;
3105
3106	IFNET_RLOCK();
3107	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
3108		int addrs;
3109
3110		/*
3111		 * Zero the ifr_name buffer to make sure we don't
3112		 * disclose the contents of the stack.
3113		 */
3114		memset(ifr.ifr_name, 0, sizeof(ifr.ifr_name));
3115
3116		if (strlcpy(ifr.ifr_name, ifp->if_xname, sizeof(ifr.ifr_name))
3117		    >= sizeof(ifr.ifr_name)) {
3118			sbuf_delete(sb);
3119			IFNET_RUNLOCK();
3120			return (ENAMETOOLONG);
3121		}
3122
3123		addrs = 0;
3124		IF_ADDR_RLOCK(ifp);
3125		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
3126			struct sockaddr *sa = ifa->ifa_addr;
3127
3128			if (prison_if(curthread->td_ucred, sa) != 0)
3129				continue;
3130			addrs++;
3131			/* COMPAT_SVR4 */
3132			if (cmd == OSIOCGIFCONF) {
3133				struct osockaddr *osa =
3134				    (struct osockaddr *)&ifr.ifr_addr;
3135				ifr.ifr_addr = *sa;
3136				osa->sa_family = sa->sa_family;
3137				sbuf_bcat(sb, &ifr, sizeof(ifr));
3138				max_len += sizeof(ifr);
3139			} else
3140			if (sa->sa_len <= sizeof(*sa)) {
3141				ifr.ifr_addr = *sa;
3142				sbuf_bcat(sb, &ifr, sizeof(ifr));
3143				max_len += sizeof(ifr);
3144			} else {
3145				sbuf_bcat(sb, &ifr,
3146				    offsetof(struct ifreq, ifr_addr));
3147				max_len += offsetof(struct ifreq, ifr_addr);
3148				sbuf_bcat(sb, sa, sa->sa_len);
3149				max_len += sa->sa_len;
3150			}
3151
3152			if (sbuf_error(sb) == 0)
3153				valid_len = sbuf_len(sb);
3154		}
3155		IF_ADDR_RUNLOCK(ifp);
3156		if (addrs == 0) {
3157			bzero((caddr_t)&ifr.ifr_addr, sizeof(ifr.ifr_addr));
3158			sbuf_bcat(sb, &ifr, sizeof(ifr));
3159			max_len += sizeof(ifr);
3160
3161			if (sbuf_error(sb) == 0)
3162				valid_len = sbuf_len(sb);
3163		}
3164	}
3165	IFNET_RUNLOCK();
3166
3167	/*
3168	 * If we didn't allocate enough space (uncommon), try again.  If
3169	 * we have already allocated as much space as we are allowed,
3170	 * return what we've got.
3171	 */
3172	if (valid_len != max_len && !full) {
3173		sbuf_delete(sb);
3174		goto again;
3175	}
3176
3177	ifc->ifc_len = valid_len;
3178	sbuf_finish(sb);
3179	error = copyout(sbuf_data(sb), ifc->ifc_req, ifc->ifc_len);
3180	sbuf_delete(sb);
3181	return (error);
3182}
3183
3184/*
3185 * Just like ifpromisc(), but for all-multicast-reception mode.
3186 */
3187int
3188if_allmulti(struct ifnet *ifp, int onswitch)
3189{
3190
3191	return (if_setflag(ifp, IFF_ALLMULTI, 0, &ifp->if_amcount, onswitch));
3192}
3193
3194struct ifmultiaddr *
3195if_findmulti(struct ifnet *ifp, const struct sockaddr *sa)
3196{
3197	struct ifmultiaddr *ifma;
3198
3199	IF_ADDR_LOCK_ASSERT(ifp);
3200
3201	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
3202		if (sa->sa_family == AF_LINK) {
3203			if (sa_dl_equal(ifma->ifma_addr, sa))
3204				break;
3205		} else {
3206			if (sa_equal(ifma->ifma_addr, sa))
3207				break;
3208		}
3209	}
3210
3211	return ifma;
3212}
3213
3214/*
3215 * Allocate a new ifmultiaddr and initialize based on passed arguments.  We
3216 * make copies of passed sockaddrs.  The ifmultiaddr will not be added to
3217 * the ifnet multicast address list here, so the caller must do that and
3218 * other setup work (such as notifying the device driver).  The reference
3219 * count is initialized to 1.
3220 */
3221static struct ifmultiaddr *
3222if_allocmulti(struct ifnet *ifp, struct sockaddr *sa, struct sockaddr *llsa,
3223    int mflags)
3224{
3225	struct ifmultiaddr *ifma;
3226	struct sockaddr *dupsa;
3227
3228	ifma = malloc(sizeof *ifma, M_IFMADDR, mflags |
3229	    M_ZERO);
3230	if (ifma == NULL)
3231		return (NULL);
3232
3233	dupsa = malloc(sa->sa_len, M_IFMADDR, mflags);
3234	if (dupsa == NULL) {
3235		free(ifma, M_IFMADDR);
3236		return (NULL);
3237	}
3238	bcopy(sa, dupsa, sa->sa_len);
3239	ifma->ifma_addr = dupsa;
3240
3241	ifma->ifma_ifp = ifp;
3242	ifma->ifma_refcount = 1;
3243	ifma->ifma_protospec = NULL;
3244
3245	if (llsa == NULL) {
3246		ifma->ifma_lladdr = NULL;
3247		return (ifma);
3248	}
3249
3250	dupsa = malloc(llsa->sa_len, M_IFMADDR, mflags);
3251	if (dupsa == NULL) {
3252		free(ifma->ifma_addr, M_IFMADDR);
3253		free(ifma, M_IFMADDR);
3254		return (NULL);
3255	}
3256	bcopy(llsa, dupsa, llsa->sa_len);
3257	ifma->ifma_lladdr = dupsa;
3258
3259	return (ifma);
3260}
3261
3262/*
3263 * if_freemulti: free ifmultiaddr structure and possibly attached related
3264 * addresses.  The caller is responsible for implementing reference
3265 * counting, notifying the driver, handling routing messages, and releasing
3266 * any dependent link layer state.
3267 */
3268static void
3269if_freemulti(struct ifmultiaddr *ifma)
3270{
3271
3272	KASSERT(ifma->ifma_refcount == 0, ("if_freemulti: refcount %d",
3273	    ifma->ifma_refcount));
3274
3275	if (ifma->ifma_lladdr != NULL)
3276		free(ifma->ifma_lladdr, M_IFMADDR);
3277	free(ifma->ifma_addr, M_IFMADDR);
3278	free(ifma, M_IFMADDR);
3279}
3280
3281/*
3282 * Register an additional multicast address with a network interface.
3283 *
3284 * - If the address is already present, bump the reference count on the
3285 *   address and return.
3286 * - If the address is not link-layer, look up a link layer address.
3287 * - Allocate address structures for one or both addresses, and attach to the
3288 *   multicast address list on the interface.  If automatically adding a link
3289 *   layer address, the protocol address will own a reference to the link
3290 *   layer address, to be freed when it is freed.
3291 * - Notify the network device driver of an addition to the multicast address
3292 *   list.
3293 *
3294 * 'sa' points to caller-owned memory with the desired multicast address.
3295 *
3296 * 'retifma' will be used to return a pointer to the resulting multicast
3297 * address reference, if desired.
3298 */
3299int
3300if_addmulti(struct ifnet *ifp, struct sockaddr *sa,
3301    struct ifmultiaddr **retifma)
3302{
3303	struct ifmultiaddr *ifma, *ll_ifma;
3304	struct sockaddr *llsa;
3305	struct sockaddr_dl sdl;
3306	int error;
3307
3308	/*
3309	 * If the address is already present, return a new reference to it;
3310	 * otherwise, allocate storage and set up a new address.
3311	 */
3312	IF_ADDR_WLOCK(ifp);
3313	ifma = if_findmulti(ifp, sa);
3314	if (ifma != NULL) {
3315		ifma->ifma_refcount++;
3316		if (retifma != NULL)
3317			*retifma = ifma;
3318		IF_ADDR_WUNLOCK(ifp);
3319		return (0);
3320	}
3321
3322	/*
3323	 * The address isn't already present; resolve the protocol address
3324	 * into a link layer address, and then look that up, bump its
3325	 * refcount or allocate an ifma for that also.
3326	 * Most link layer resolving functions returns address data which
3327	 * fits inside default sockaddr_dl structure. However callback
3328	 * can allocate another sockaddr structure, in that case we need to
3329	 * free it later.
3330	 */
3331	llsa = NULL;
3332	ll_ifma = NULL;
3333	if (ifp->if_resolvemulti != NULL) {
3334		/* Provide called function with buffer size information */
3335		sdl.sdl_len = sizeof(sdl);
3336		llsa = (struct sockaddr *)&sdl;
3337		error = ifp->if_resolvemulti(ifp, &llsa, sa);
3338		if (error)
3339			goto unlock_out;
3340	}
3341
3342	/*
3343	 * Allocate the new address.  Don't hook it up yet, as we may also
3344	 * need to allocate a link layer multicast address.
3345	 */
3346	ifma = if_allocmulti(ifp, sa, llsa, M_NOWAIT);
3347	if (ifma == NULL) {
3348		error = ENOMEM;
3349		goto free_llsa_out;
3350	}
3351
3352	/*
3353	 * If a link layer address is found, we'll need to see if it's
3354	 * already present in the address list, or allocate is as well.
3355	 * When this block finishes, the link layer address will be on the
3356	 * list.
3357	 */
3358	if (llsa != NULL) {
3359		ll_ifma = if_findmulti(ifp, llsa);
3360		if (ll_ifma == NULL) {
3361			ll_ifma = if_allocmulti(ifp, llsa, NULL, M_NOWAIT);
3362			if (ll_ifma == NULL) {
3363				--ifma->ifma_refcount;
3364				if_freemulti(ifma);
3365				error = ENOMEM;
3366				goto free_llsa_out;
3367			}
3368			TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ll_ifma,
3369			    ifma_link);
3370		} else
3371			ll_ifma->ifma_refcount++;
3372		ifma->ifma_llifma = ll_ifma;
3373	}
3374
3375	/*
3376	 * We now have a new multicast address, ifma, and possibly a new or
3377	 * referenced link layer address.  Add the primary address to the
3378	 * ifnet address list.
3379	 */
3380	TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link);
3381
3382	if (retifma != NULL)
3383		*retifma = ifma;
3384
3385	/*
3386	 * Must generate the message while holding the lock so that 'ifma'
3387	 * pointer is still valid.
3388	 */
3389	rt_newmaddrmsg(RTM_NEWMADDR, ifma);
3390	IF_ADDR_WUNLOCK(ifp);
3391
3392	/*
3393	 * We are certain we have added something, so call down to the
3394	 * interface to let them know about it.
3395	 */
3396	if (ifp->if_ioctl != NULL) {
3397		(void) (*ifp->if_ioctl)(ifp, SIOCADDMULTI, 0);
3398	}
3399
3400	if ((llsa != NULL) && (llsa != (struct sockaddr *)&sdl))
3401		link_free_sdl(llsa);
3402
3403	return (0);
3404
3405free_llsa_out:
3406	if ((llsa != NULL) && (llsa != (struct sockaddr *)&sdl))
3407		link_free_sdl(llsa);
3408
3409unlock_out:
3410	IF_ADDR_WUNLOCK(ifp);
3411	return (error);
3412}
3413
3414/*
3415 * Delete a multicast group membership by network-layer group address.
3416 *
3417 * Returns ENOENT if the entry could not be found. If ifp no longer
3418 * exists, results are undefined. This entry point should only be used
3419 * from subsystems which do appropriate locking to hold ifp for the
3420 * duration of the call.
3421 * Network-layer protocol domains must use if_delmulti_ifma().
3422 */
3423int
3424if_delmulti(struct ifnet *ifp, struct sockaddr *sa)
3425{
3426	struct ifmultiaddr *ifma;
3427	int lastref;
3428#ifdef INVARIANTS
3429	struct ifnet *oifp;
3430
3431	IFNET_RLOCK_NOSLEEP();
3432	TAILQ_FOREACH(oifp, &V_ifnet, if_link)
3433		if (ifp == oifp)
3434			break;
3435	if (ifp != oifp)
3436		ifp = NULL;
3437	IFNET_RUNLOCK_NOSLEEP();
3438
3439	KASSERT(ifp != NULL, ("%s: ifnet went away", __func__));
3440#endif
3441	if (ifp == NULL)
3442		return (ENOENT);
3443
3444	IF_ADDR_WLOCK(ifp);
3445	lastref = 0;
3446	ifma = if_findmulti(ifp, sa);
3447	if (ifma != NULL)
3448		lastref = if_delmulti_locked(ifp, ifma, 0);
3449	IF_ADDR_WUNLOCK(ifp);
3450
3451	if (ifma == NULL)
3452		return (ENOENT);
3453
3454	if (lastref && ifp->if_ioctl != NULL) {
3455		(void)(*ifp->if_ioctl)(ifp, SIOCDELMULTI, 0);
3456	}
3457
3458	return (0);
3459}
3460
3461/*
3462 * Delete all multicast group membership for an interface.
3463 * Should be used to quickly flush all multicast filters.
3464 */
3465void
3466if_delallmulti(struct ifnet *ifp)
3467{
3468	struct ifmultiaddr *ifma;
3469	struct ifmultiaddr *next;
3470
3471	IF_ADDR_WLOCK(ifp);
3472	TAILQ_FOREACH_SAFE(ifma, &ifp->if_multiaddrs, ifma_link, next)
3473		if_delmulti_locked(ifp, ifma, 0);
3474	IF_ADDR_WUNLOCK(ifp);
3475}
3476
3477/*
3478 * Delete a multicast group membership by group membership pointer.
3479 * Network-layer protocol domains must use this routine.
3480 *
3481 * It is safe to call this routine if the ifp disappeared.
3482 */
3483void
3484if_delmulti_ifma(struct ifmultiaddr *ifma)
3485{
3486	struct ifnet *ifp;
3487	int lastref;
3488
3489	ifp = ifma->ifma_ifp;
3490#ifdef DIAGNOSTIC
3491	if (ifp == NULL) {
3492		printf("%s: ifma_ifp seems to be detached\n", __func__);
3493	} else {
3494		struct ifnet *oifp;
3495
3496		IFNET_RLOCK_NOSLEEP();
3497		TAILQ_FOREACH(oifp, &V_ifnet, if_link)
3498			if (ifp == oifp)
3499				break;
3500		if (ifp != oifp) {
3501			printf("%s: ifnet %p disappeared\n", __func__, ifp);
3502			ifp = NULL;
3503		}
3504		IFNET_RUNLOCK_NOSLEEP();
3505	}
3506#endif
3507	/*
3508	 * If and only if the ifnet instance exists: Acquire the address lock.
3509	 */
3510	if (ifp != NULL)
3511		IF_ADDR_WLOCK(ifp);
3512
3513	lastref = if_delmulti_locked(ifp, ifma, 0);
3514
3515	if (ifp != NULL) {
3516		/*
3517		 * If and only if the ifnet instance exists:
3518		 *  Release the address lock.
3519		 *  If the group was left: update the hardware hash filter.
3520		 */
3521		IF_ADDR_WUNLOCK(ifp);
3522		if (lastref && ifp->if_ioctl != NULL) {
3523			(void)(*ifp->if_ioctl)(ifp, SIOCDELMULTI, 0);
3524		}
3525	}
3526}
3527
3528/*
3529 * Perform deletion of network-layer and/or link-layer multicast address.
3530 *
3531 * Return 0 if the reference count was decremented.
3532 * Return 1 if the final reference was released, indicating that the
3533 * hardware hash filter should be reprogrammed.
3534 */
3535static int
3536if_delmulti_locked(struct ifnet *ifp, struct ifmultiaddr *ifma, int detaching)
3537{
3538	struct ifmultiaddr *ll_ifma;
3539
3540	if (ifp != NULL && ifma->ifma_ifp != NULL) {
3541		KASSERT(ifma->ifma_ifp == ifp,
3542		    ("%s: inconsistent ifp %p", __func__, ifp));
3543		IF_ADDR_WLOCK_ASSERT(ifp);
3544	}
3545
3546	ifp = ifma->ifma_ifp;
3547
3548	/*
3549	 * If the ifnet is detaching, null out references to ifnet,
3550	 * so that upper protocol layers will notice, and not attempt
3551	 * to obtain locks for an ifnet which no longer exists. The
3552	 * routing socket announcement must happen before the ifnet
3553	 * instance is detached from the system.
3554	 */
3555	if (detaching) {
3556#ifdef DIAGNOSTIC
3557		printf("%s: detaching ifnet instance %p\n", __func__, ifp);
3558#endif
3559		/*
3560		 * ifp may already be nulled out if we are being reentered
3561		 * to delete the ll_ifma.
3562		 */
3563		if (ifp != NULL) {
3564			rt_newmaddrmsg(RTM_DELMADDR, ifma);
3565			ifma->ifma_ifp = NULL;
3566		}
3567	}
3568
3569	if (--ifma->ifma_refcount > 0)
3570		return 0;
3571
3572	/*
3573	 * If this ifma is a network-layer ifma, a link-layer ifma may
3574	 * have been associated with it. Release it first if so.
3575	 */
3576	ll_ifma = ifma->ifma_llifma;
3577	if (ll_ifma != NULL) {
3578		KASSERT(ifma->ifma_lladdr != NULL,
3579		    ("%s: llifma w/o lladdr", __func__));
3580		if (detaching)
3581			ll_ifma->ifma_ifp = NULL;	/* XXX */
3582		if (--ll_ifma->ifma_refcount == 0) {
3583			if (ifp != NULL) {
3584				TAILQ_REMOVE(&ifp->if_multiaddrs, ll_ifma,
3585				    ifma_link);
3586			}
3587			if_freemulti(ll_ifma);
3588		}
3589	}
3590
3591	if (ifp != NULL)
3592		TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link);
3593
3594	if_freemulti(ifma);
3595
3596	/*
3597	 * The last reference to this instance of struct ifmultiaddr
3598	 * was released; the hardware should be notified of this change.
3599	 */
3600	return 1;
3601}
3602
3603/*
3604 * Set the link layer address on an interface.
3605 *
3606 * At this time we only support certain types of interfaces,
3607 * and we don't allow the length of the address to change.
3608 *
3609 * Set noinline to be dtrace-friendly
3610 */
3611__noinline int
3612if_setlladdr(struct ifnet *ifp, const u_char *lladdr, int len)
3613{
3614	struct sockaddr_dl *sdl;
3615	struct ifaddr *ifa;
3616	struct ifreq ifr;
3617
3618	IF_ADDR_RLOCK(ifp);
3619	ifa = ifp->if_addr;
3620	if (ifa == NULL) {
3621		IF_ADDR_RUNLOCK(ifp);
3622		return (EINVAL);
3623	}
3624	ifa_ref(ifa);
3625	IF_ADDR_RUNLOCK(ifp);
3626	sdl = (struct sockaddr_dl *)ifa->ifa_addr;
3627	if (sdl == NULL) {
3628		ifa_free(ifa);
3629		return (EINVAL);
3630	}
3631	if (len != sdl->sdl_alen) {	/* don't allow length to change */
3632		ifa_free(ifa);
3633		return (EINVAL);
3634	}
3635	switch (ifp->if_type) {
3636	case IFT_ETHER:
3637	case IFT_FDDI:
3638	case IFT_XETHER:
3639	case IFT_ISO88025:
3640	case IFT_L2VLAN:
3641	case IFT_BRIDGE:
3642	case IFT_ARCNET:
3643	case IFT_IEEE8023ADLAG:
3644	case IFT_IEEE80211:
3645		bcopy(lladdr, LLADDR(sdl), len);
3646		ifa_free(ifa);
3647		break;
3648	default:
3649		ifa_free(ifa);
3650		return (ENODEV);
3651	}
3652
3653	/*
3654	 * If the interface is already up, we need
3655	 * to re-init it in order to reprogram its
3656	 * address filter.
3657	 */
3658	if ((ifp->if_flags & IFF_UP) != 0) {
3659		if (ifp->if_ioctl) {
3660			ifp->if_flags &= ~IFF_UP;
3661			ifr.ifr_flags = ifp->if_flags & 0xffff;
3662			ifr.ifr_flagshigh = ifp->if_flags >> 16;
3663			(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
3664			ifp->if_flags |= IFF_UP;
3665			ifr.ifr_flags = ifp->if_flags & 0xffff;
3666			ifr.ifr_flagshigh = ifp->if_flags >> 16;
3667			(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
3668		}
3669	}
3670	EVENTHANDLER_INVOKE(iflladdr_event, ifp);
3671	return (0);
3672}
3673
3674/*
3675 * Compat function for handling basic encapsulation requests.
3676 * Not converted stacks (FDDI, IB, ..) supports traditional
3677 * output model: ARP (and other similar L2 protocols) are handled
3678 * inside output routine, arpresolve/nd6_resolve() returns MAC
3679 * address instead of full prepend.
3680 *
3681 * This function creates calculated header==MAC for IPv4/IPv6 and
3682 * returns EAFNOSUPPORT (which is then handled in ARP code) for other
3683 * address families.
3684 */
3685static int
3686if_requestencap_default(struct ifnet *ifp, struct if_encap_req *req)
3687{
3688
3689	if (req->rtype != IFENCAP_LL)
3690		return (EOPNOTSUPP);
3691
3692	if (req->bufsize < req->lladdr_len)
3693		return (ENOMEM);
3694
3695	switch (req->family) {
3696	case AF_INET:
3697	case AF_INET6:
3698		break;
3699	default:
3700		return (EAFNOSUPPORT);
3701	}
3702
3703	/* Copy lladdr to storage as is */
3704	memmove(req->buf, req->lladdr, req->lladdr_len);
3705	req->bufsize = req->lladdr_len;
3706	req->lladdr_off = 0;
3707
3708	return (0);
3709}
3710
3711/*
3712 * Get the link layer address that was read from the hardware at attach.
3713 *
3714 * This is only set by Ethernet NICs (IFT_ETHER), but laggX interfaces re-type
3715 * their component interfaces as IFT_IEEE8023ADLAG.
3716 */
3717int
3718if_gethwaddr(struct ifnet *ifp, struct ifreq *ifr)
3719{
3720
3721	if (ifp->if_hw_addr == NULL)
3722		return (ENODEV);
3723
3724	switch (ifp->if_type) {
3725	case IFT_ETHER:
3726	case IFT_IEEE8023ADLAG:
3727		bcopy(ifp->if_hw_addr, ifr->ifr_addr.sa_data, ifp->if_addrlen);
3728		return (0);
3729	default:
3730		return (ENODEV);
3731	}
3732}
3733
3734/*
3735 * The name argument must be a pointer to storage which will last as
3736 * long as the interface does.  For physical devices, the result of
3737 * device_get_name(dev) is a good choice and for pseudo-devices a
3738 * static string works well.
3739 */
3740void
3741if_initname(struct ifnet *ifp, const char *name, int unit)
3742{
3743	ifp->if_dname = name;
3744	ifp->if_dunit = unit;
3745	if (unit != IF_DUNIT_NONE)
3746		snprintf(ifp->if_xname, IFNAMSIZ, "%s%d", name, unit);
3747	else
3748		strlcpy(ifp->if_xname, name, IFNAMSIZ);
3749}
3750
3751int
3752if_printf(struct ifnet *ifp, const char * fmt, ...)
3753{
3754	va_list ap;
3755	int retval;
3756
3757	retval = printf("%s: ", ifp->if_xname);
3758	va_start(ap, fmt);
3759	retval += vprintf(fmt, ap);
3760	va_end(ap);
3761	return (retval);
3762}
3763
3764void
3765if_start(struct ifnet *ifp)
3766{
3767
3768	(*(ifp)->if_start)(ifp);
3769}
3770
3771/*
3772 * Backwards compatibility interface for drivers
3773 * that have not implemented it
3774 */
3775static int
3776if_transmit(struct ifnet *ifp, struct mbuf *m)
3777{
3778	int error;
3779
3780	IFQ_HANDOFF(ifp, m, error);
3781	return (error);
3782}
3783
3784static void
3785if_input_default(struct ifnet *ifp __unused, struct mbuf *m)
3786{
3787
3788	m_freem(m);
3789}
3790
3791int
3792if_handoff(struct ifqueue *ifq, struct mbuf *m, struct ifnet *ifp, int adjust)
3793{
3794	int active = 0;
3795
3796	IF_LOCK(ifq);
3797	if (_IF_QFULL(ifq)) {
3798		IF_UNLOCK(ifq);
3799		if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1);
3800		m_freem(m);
3801		return (0);
3802	}
3803	if (ifp != NULL) {
3804		if_inc_counter(ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len + adjust);
3805		if (m->m_flags & (M_BCAST|M_MCAST))
3806			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
3807		active = ifp->if_drv_flags & IFF_DRV_OACTIVE;
3808	}
3809	_IF_ENQUEUE(ifq, m);
3810	IF_UNLOCK(ifq);
3811	if (ifp != NULL && !active)
3812		(*(ifp)->if_start)(ifp);
3813	return (1);
3814}
3815
3816void
3817if_register_com_alloc(u_char type,
3818    if_com_alloc_t *a, if_com_free_t *f)
3819{
3820
3821	KASSERT(if_com_alloc[type] == NULL,
3822	    ("if_register_com_alloc: %d already registered", type));
3823	KASSERT(if_com_free[type] == NULL,
3824	    ("if_register_com_alloc: %d free already registered", type));
3825
3826	if_com_alloc[type] = a;
3827	if_com_free[type] = f;
3828}
3829
3830void
3831if_deregister_com_alloc(u_char type)
3832{
3833
3834	KASSERT(if_com_alloc[type] != NULL,
3835	    ("if_deregister_com_alloc: %d not registered", type));
3836	KASSERT(if_com_free[type] != NULL,
3837	    ("if_deregister_com_alloc: %d free not registered", type));
3838	if_com_alloc[type] = NULL;
3839	if_com_free[type] = NULL;
3840}
3841
3842/* API for driver access to network stack owned ifnet.*/
3843uint64_t
3844if_setbaudrate(struct ifnet *ifp, uint64_t baudrate)
3845{
3846	uint64_t oldbrate;
3847
3848	oldbrate = ifp->if_baudrate;
3849	ifp->if_baudrate = baudrate;
3850	return (oldbrate);
3851}
3852
3853uint64_t
3854if_getbaudrate(if_t ifp)
3855{
3856
3857	return (((struct ifnet *)ifp)->if_baudrate);
3858}
3859
3860int
3861if_setcapabilities(if_t ifp, int capabilities)
3862{
3863	((struct ifnet *)ifp)->if_capabilities = capabilities;
3864	return (0);
3865}
3866
3867int
3868if_setcapabilitiesbit(if_t ifp, int setbit, int clearbit)
3869{
3870	((struct ifnet *)ifp)->if_capabilities |= setbit;
3871	((struct ifnet *)ifp)->if_capabilities &= ~clearbit;
3872
3873	return (0);
3874}
3875
3876int
3877if_getcapabilities(if_t ifp)
3878{
3879	return ((struct ifnet *)ifp)->if_capabilities;
3880}
3881
3882int
3883if_setcapenable(if_t ifp, int capabilities)
3884{
3885	((struct ifnet *)ifp)->if_capenable = capabilities;
3886	return (0);
3887}
3888
3889int
3890if_setcapenablebit(if_t ifp, int setcap, int clearcap)
3891{
3892	if(setcap)
3893		((struct ifnet *)ifp)->if_capenable |= setcap;
3894	if(clearcap)
3895		((struct ifnet *)ifp)->if_capenable &= ~clearcap;
3896
3897	return (0);
3898}
3899
3900const char *
3901if_getdname(if_t ifp)
3902{
3903	return ((struct ifnet *)ifp)->if_dname;
3904}
3905
3906int
3907if_togglecapenable(if_t ifp, int togglecap)
3908{
3909	((struct ifnet *)ifp)->if_capenable ^= togglecap;
3910	return (0);
3911}
3912
3913int
3914if_getcapenable(if_t ifp)
3915{
3916	return ((struct ifnet *)ifp)->if_capenable;
3917}
3918
3919/*
3920 * This is largely undesirable because it ties ifnet to a device, but does
3921 * provide flexiblity for an embedded product vendor. Should be used with
3922 * the understanding that it violates the interface boundaries, and should be
3923 * a last resort only.
3924 */
3925int
3926if_setdev(if_t ifp, void *dev)
3927{
3928	return (0);
3929}
3930
3931int
3932if_setdrvflagbits(if_t ifp, int set_flags, int clear_flags)
3933{
3934	((struct ifnet *)ifp)->if_drv_flags |= set_flags;
3935	((struct ifnet *)ifp)->if_drv_flags &= ~clear_flags;
3936
3937	return (0);
3938}
3939
3940int
3941if_getdrvflags(if_t ifp)
3942{
3943	return ((struct ifnet *)ifp)->if_drv_flags;
3944}
3945
3946int
3947if_setdrvflags(if_t ifp, int flags)
3948{
3949	((struct ifnet *)ifp)->if_drv_flags = flags;
3950	return (0);
3951}
3952
3953
3954int
3955if_setflags(if_t ifp, int flags)
3956{
3957	((struct ifnet *)ifp)->if_flags = flags;
3958	return (0);
3959}
3960
3961int
3962if_setflagbits(if_t ifp, int set, int clear)
3963{
3964	((struct ifnet *)ifp)->if_flags |= set;
3965	((struct ifnet *)ifp)->if_flags &= ~clear;
3966
3967	return (0);
3968}
3969
3970int
3971if_getflags(if_t ifp)
3972{
3973	return ((struct ifnet *)ifp)->if_flags;
3974}
3975
3976int
3977if_clearhwassist(if_t ifp)
3978{
3979	((struct ifnet *)ifp)->if_hwassist = 0;
3980	return (0);
3981}
3982
3983int
3984if_sethwassistbits(if_t ifp, int toset, int toclear)
3985{
3986	((struct ifnet *)ifp)->if_hwassist |= toset;
3987	((struct ifnet *)ifp)->if_hwassist &= ~toclear;
3988
3989	return (0);
3990}
3991
3992int
3993if_sethwassist(if_t ifp, int hwassist_bit)
3994{
3995	((struct ifnet *)ifp)->if_hwassist = hwassist_bit;
3996	return (0);
3997}
3998
3999int
4000if_gethwassist(if_t ifp)
4001{
4002	return ((struct ifnet *)ifp)->if_hwassist;
4003}
4004
4005int
4006if_setmtu(if_t ifp, int mtu)
4007{
4008	((struct ifnet *)ifp)->if_mtu = mtu;
4009	return (0);
4010}
4011
4012int
4013if_getmtu(if_t ifp)
4014{
4015	return ((struct ifnet *)ifp)->if_mtu;
4016}
4017
4018int
4019if_getmtu_family(if_t ifp, int family)
4020{
4021	struct domain *dp;
4022
4023	for (dp = domains; dp; dp = dp->dom_next) {
4024		if (dp->dom_family == family && dp->dom_ifmtu != NULL)
4025			return (dp->dom_ifmtu((struct ifnet *)ifp));
4026	}
4027
4028	return (((struct ifnet *)ifp)->if_mtu);
4029}
4030
4031int
4032if_setsoftc(if_t ifp, void *softc)
4033{
4034	((struct ifnet *)ifp)->if_softc = softc;
4035	return (0);
4036}
4037
4038void *
4039if_getsoftc(if_t ifp)
4040{
4041	return ((struct ifnet *)ifp)->if_softc;
4042}
4043
4044void
4045if_setrcvif(struct mbuf *m, if_t ifp)
4046{
4047	m->m_pkthdr.rcvif = (struct ifnet *)ifp;
4048}
4049
4050void
4051if_setvtag(struct mbuf *m, uint16_t tag)
4052{
4053	m->m_pkthdr.ether_vtag = tag;
4054}
4055
4056uint16_t
4057if_getvtag(struct mbuf *m)
4058{
4059
4060	return (m->m_pkthdr.ether_vtag);
4061}
4062
4063int
4064if_sendq_empty(if_t ifp)
4065{
4066	return IFQ_DRV_IS_EMPTY(&((struct ifnet *)ifp)->if_snd);
4067}
4068
4069struct ifaddr *
4070if_getifaddr(if_t ifp)
4071{
4072	return ((struct ifnet *)ifp)->if_addr;
4073}
4074
4075int
4076if_getamcount(if_t ifp)
4077{
4078	return ((struct ifnet *)ifp)->if_amcount;
4079}
4080
4081
4082int
4083if_setsendqready(if_t ifp)
4084{
4085	IFQ_SET_READY(&((struct ifnet *)ifp)->if_snd);
4086	return (0);
4087}
4088
4089int
4090if_setsendqlen(if_t ifp, int tx_desc_count)
4091{
4092	IFQ_SET_MAXLEN(&((struct ifnet *)ifp)->if_snd, tx_desc_count);
4093	((struct ifnet *)ifp)->if_snd.ifq_drv_maxlen = tx_desc_count;
4094
4095	return (0);
4096}
4097
4098int
4099if_vlantrunkinuse(if_t ifp)
4100{
4101	return ((struct ifnet *)ifp)->if_vlantrunk != NULL?1:0;
4102}
4103
4104int
4105if_input(if_t ifp, struct mbuf* sendmp)
4106{
4107	(*((struct ifnet *)ifp)->if_input)((struct ifnet *)ifp, sendmp);
4108	return (0);
4109
4110}
4111
4112/* XXX */
4113#ifndef ETH_ADDR_LEN
4114#define ETH_ADDR_LEN 6
4115#endif
4116
4117int
4118if_setupmultiaddr(if_t ifp, void *mta, int *cnt, int max)
4119{
4120	struct ifmultiaddr *ifma;
4121	uint8_t *lmta = (uint8_t *)mta;
4122	int mcnt = 0;
4123
4124	TAILQ_FOREACH(ifma, &((struct ifnet *)ifp)->if_multiaddrs, ifma_link) {
4125		if (ifma->ifma_addr->sa_family != AF_LINK)
4126			continue;
4127
4128		if (mcnt == max)
4129			break;
4130
4131		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
4132		    &lmta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
4133		mcnt++;
4134	}
4135	*cnt = mcnt;
4136
4137	return (0);
4138}
4139
4140int
4141if_multiaddr_array(if_t ifp, void *mta, int *cnt, int max)
4142{
4143	int error;
4144
4145	if_maddr_rlock(ifp);
4146	error = if_setupmultiaddr(ifp, mta, cnt, max);
4147	if_maddr_runlock(ifp);
4148	return (error);
4149}
4150
4151int
4152if_multiaddr_count(if_t ifp, int max)
4153{
4154	struct ifmultiaddr *ifma;
4155	int count;
4156
4157	count = 0;
4158	if_maddr_rlock(ifp);
4159	TAILQ_FOREACH(ifma, &((struct ifnet *)ifp)->if_multiaddrs, ifma_link) {
4160		if (ifma->ifma_addr->sa_family != AF_LINK)
4161			continue;
4162		count++;
4163		if (count == max)
4164			break;
4165	}
4166	if_maddr_runlock(ifp);
4167	return (count);
4168}
4169
4170int
4171if_multi_apply(struct ifnet *ifp, int (*filter)(void *, struct ifmultiaddr *, int), void *arg)
4172{
4173	struct ifmultiaddr *ifma;
4174	int cnt = 0;
4175
4176	if_maddr_rlock(ifp);
4177	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link)
4178		cnt += filter(arg, ifma, cnt);
4179	if_maddr_runlock(ifp);
4180	return (cnt);
4181}
4182
4183struct mbuf *
4184if_dequeue(if_t ifp)
4185{
4186	struct mbuf *m;
4187	IFQ_DRV_DEQUEUE(&((struct ifnet *)ifp)->if_snd, m);
4188
4189	return (m);
4190}
4191
4192int
4193if_sendq_prepend(if_t ifp, struct mbuf *m)
4194{
4195	IFQ_DRV_PREPEND(&((struct ifnet *)ifp)->if_snd, m);
4196	return (0);
4197}
4198
4199int
4200if_setifheaderlen(if_t ifp, int len)
4201{
4202	((struct ifnet *)ifp)->if_hdrlen = len;
4203	return (0);
4204}
4205
4206caddr_t
4207if_getlladdr(if_t ifp)
4208{
4209	return (IF_LLADDR((struct ifnet *)ifp));
4210}
4211
4212void *
4213if_gethandle(u_char type)
4214{
4215	return (if_alloc(type));
4216}
4217
4218void
4219if_bpfmtap(if_t ifh, struct mbuf *m)
4220{
4221	struct ifnet *ifp = (struct ifnet *)ifh;
4222
4223	BPF_MTAP(ifp, m);
4224}
4225
4226void
4227if_etherbpfmtap(if_t ifh, struct mbuf *m)
4228{
4229	struct ifnet *ifp = (struct ifnet *)ifh;
4230
4231	ETHER_BPF_MTAP(ifp, m);
4232}
4233
4234void
4235if_vlancap(if_t ifh)
4236{
4237	struct ifnet *ifp = (struct ifnet *)ifh;
4238	VLAN_CAPABILITIES(ifp);
4239}
4240
4241void
4242if_setinitfn(if_t ifp, void (*init_fn)(void *))
4243{
4244	((struct ifnet *)ifp)->if_init = init_fn;
4245}
4246
4247void
4248if_setioctlfn(if_t ifp, int (*ioctl_fn)(if_t, u_long, caddr_t))
4249{
4250	((struct ifnet *)ifp)->if_ioctl = (void *)ioctl_fn;
4251}
4252
4253void
4254if_setstartfn(if_t ifp, void (*start_fn)(if_t))
4255{
4256	((struct ifnet *)ifp)->if_start = (void *)start_fn;
4257}
4258
4259void
4260if_settransmitfn(if_t ifp, if_transmit_fn_t start_fn)
4261{
4262	((struct ifnet *)ifp)->if_transmit = start_fn;
4263}
4264
4265void if_setqflushfn(if_t ifp, if_qflush_fn_t flush_fn)
4266{
4267	((struct ifnet *)ifp)->if_qflush = flush_fn;
4268
4269}
4270
4271void
4272if_setgetcounterfn(if_t ifp, if_get_counter_t fn)
4273{
4274
4275	ifp->if_get_counter = fn;
4276}
4277
4278/* Revisit these - These are inline functions originally. */
4279int
4280drbr_inuse_drv(if_t ifh, struct buf_ring *br)
4281{
4282	return drbr_inuse(ifh, br);
4283}
4284
4285struct mbuf*
4286drbr_dequeue_drv(if_t ifh, struct buf_ring *br)
4287{
4288	return drbr_dequeue(ifh, br);
4289}
4290
4291int
4292drbr_needs_enqueue_drv(if_t ifh, struct buf_ring *br)
4293{
4294	return drbr_needs_enqueue(ifh, br);
4295}
4296
4297int
4298drbr_enqueue_drv(if_t ifh, struct buf_ring *br, struct mbuf *m)
4299{
4300	return drbr_enqueue(ifh, br, m);
4301
4302}
4303