if.c revision 332288
1/*-
2 * Copyright (c) 1980, 1986, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 4. Neither the name of the University nor the names of its contributors
14 *    may be used to endorse or promote products derived from this software
15 *    without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 *	@(#)if.c	8.5 (Berkeley) 1/9/95
30 * $FreeBSD: stable/11/sys/net/if.c 332288 2018-04-08 16:54:07Z brooks $
31 */
32
33#include "opt_compat.h"
34#include "opt_inet6.h"
35#include "opt_inet.h"
36
37#include <sys/param.h>
38#include <sys/types.h>
39#include <sys/conf.h>
40#include <sys/malloc.h>
41#include <sys/sbuf.h>
42#include <sys/bus.h>
43#include <sys/mbuf.h>
44#include <sys/systm.h>
45#include <sys/priv.h>
46#include <sys/proc.h>
47#include <sys/socket.h>
48#include <sys/socketvar.h>
49#include <sys/protosw.h>
50#include <sys/kernel.h>
51#include <sys/lock.h>
52#include <sys/refcount.h>
53#include <sys/module.h>
54#include <sys/rwlock.h>
55#include <sys/sockio.h>
56#include <sys/syslog.h>
57#include <sys/sysctl.h>
58#include <sys/sysent.h>
59#include <sys/taskqueue.h>
60#include <sys/domain.h>
61#include <sys/jail.h>
62#include <sys/priv.h>
63
64#include <machine/stdarg.h>
65#include <vm/uma.h>
66
67#include <net/bpf.h>
68#include <net/ethernet.h>
69#include <net/if.h>
70#include <net/if_arp.h>
71#include <net/if_clone.h>
72#include <net/if_dl.h>
73#include <net/if_types.h>
74#include <net/if_var.h>
75#include <net/if_media.h>
76#include <net/if_vlan_var.h>
77#include <net/radix.h>
78#include <net/route.h>
79#include <net/vnet.h>
80
81#if defined(INET) || defined(INET6)
82#include <net/ethernet.h>
83#include <netinet/in.h>
84#include <netinet/in_var.h>
85#include <netinet/ip.h>
86#include <netinet/ip_carp.h>
87#ifdef INET
88#include <netinet/if_ether.h>
89#endif /* INET */
90#ifdef INET6
91#include <netinet6/in6_var.h>
92#include <netinet6/in6_ifattach.h>
93#endif /* INET6 */
94#endif /* INET || INET6 */
95
96#include <security/mac/mac_framework.h>
97
98#ifdef COMPAT_FREEBSD32
99#include <sys/mount.h>
100#include <compat/freebsd32/freebsd32.h>
101
102struct ifreq_buffer32 {
103	uint32_t	length;		/* (size_t) */
104	uint32_t	buffer;		/* (void *) */
105};
106
107/*
108 * Interface request structure used for socket
109 * ioctl's.  All interface ioctl's must have parameter
110 * definitions which begin with ifr_name.  The
111 * remainder may be interface specific.
112 */
113struct ifreq32 {
114	char	ifr_name[IFNAMSIZ];		/* if name, e.g. "en0" */
115	union {
116		struct sockaddr	ifru_addr;
117		struct sockaddr	ifru_dstaddr;
118		struct sockaddr	ifru_broadaddr;
119		struct ifreq_buffer32 ifru_buffer;
120		short		ifru_flags[2];
121		short		ifru_index;
122		int		ifru_jid;
123		int		ifru_metric;
124		int		ifru_mtu;
125		int		ifru_phys;
126		int		ifru_media;
127		uint32_t	ifru_data;
128		int		ifru_cap[2];
129		u_int		ifru_fib;
130		u_char		ifru_vlan_pcp;
131	} ifr_ifru;
132};
133CTASSERT(sizeof(struct ifreq) == sizeof(struct ifreq32));
134CTASSERT(__offsetof(struct ifreq, ifr_ifru) ==
135    __offsetof(struct ifreq32, ifr_ifru));
136#endif
137
138union ifreq_union {
139	struct ifreq	ifr;
140#ifdef COMPAT_FREEBSD32
141	struct ifreq32	ifr32;
142#endif
143};
144
145SYSCTL_NODE(_net, PF_LINK, link, CTLFLAG_RW, 0, "Link layers");
146SYSCTL_NODE(_net_link, 0, generic, CTLFLAG_RW, 0, "Generic link-management");
147
148SYSCTL_INT(_net_link, OID_AUTO, ifqmaxlen, CTLFLAG_RDTUN,
149    &ifqmaxlen, 0, "max send queue size");
150
151/* Log link state change events */
152static int log_link_state_change = 1;
153
154SYSCTL_INT(_net_link, OID_AUTO, log_link_state_change, CTLFLAG_RW,
155	&log_link_state_change, 0,
156	"log interface link state change events");
157
158/* Log promiscuous mode change events */
159static int log_promisc_mode_change = 1;
160
161SYSCTL_INT(_net_link, OID_AUTO, log_promisc_mode_change, CTLFLAG_RDTUN,
162	&log_promisc_mode_change, 1,
163	"log promiscuous mode change events");
164
165/* Interface description */
166static unsigned int ifdescr_maxlen = 1024;
167SYSCTL_UINT(_net, OID_AUTO, ifdescr_maxlen, CTLFLAG_RW,
168	&ifdescr_maxlen, 0,
169	"administrative maximum length for interface description");
170
171static MALLOC_DEFINE(M_IFDESCR, "ifdescr", "ifnet descriptions");
172
173/* global sx for non-critical path ifdescr */
174static struct sx ifdescr_sx;
175SX_SYSINIT(ifdescr_sx, &ifdescr_sx, "ifnet descr");
176
177void	(*bridge_linkstate_p)(struct ifnet *ifp);
178void	(*ng_ether_link_state_p)(struct ifnet *ifp, int state);
179void	(*lagg_linkstate_p)(struct ifnet *ifp, int state);
180/* These are external hooks for CARP. */
181void	(*carp_linkstate_p)(struct ifnet *ifp);
182void	(*carp_demote_adj_p)(int, char *);
183int	(*carp_master_p)(struct ifaddr *);
184#if defined(INET) || defined(INET6)
185int	(*carp_forus_p)(struct ifnet *ifp, u_char *dhost);
186int	(*carp_output_p)(struct ifnet *ifp, struct mbuf *m,
187    const struct sockaddr *sa);
188int	(*carp_ioctl_p)(struct ifreq *, u_long, struct thread *);
189int	(*carp_attach_p)(struct ifaddr *, int);
190void	(*carp_detach_p)(struct ifaddr *);
191#endif
192#ifdef INET
193int	(*carp_iamatch_p)(struct ifaddr *, uint8_t **);
194#endif
195#ifdef INET6
196struct ifaddr *(*carp_iamatch6_p)(struct ifnet *ifp, struct in6_addr *taddr6);
197caddr_t	(*carp_macmatch6_p)(struct ifnet *ifp, struct mbuf *m,
198    const struct in6_addr *taddr);
199#endif
200
201struct mbuf *(*tbr_dequeue_ptr)(struct ifaltq *, int) = NULL;
202
203/*
204 * XXX: Style; these should be sorted alphabetically, and unprototyped
205 * static functions should be prototyped. Currently they are sorted by
206 * declaration order.
207 */
208static void	if_attachdomain(void *);
209static void	if_attachdomain1(struct ifnet *);
210static int	ifconf(u_long, caddr_t);
211static void	if_freemulti(struct ifmultiaddr *);
212static void	if_grow(void);
213static void	if_input_default(struct ifnet *, struct mbuf *);
214static int	if_requestencap_default(struct ifnet *, struct if_encap_req *);
215static void	if_route(struct ifnet *, int flag, int fam);
216static int	if_setflag(struct ifnet *, int, int, int *, int);
217static int	if_transmit(struct ifnet *ifp, struct mbuf *m);
218static void	if_unroute(struct ifnet *, int flag, int fam);
219static void	link_rtrequest(int, struct rtentry *, struct rt_addrinfo *);
220static int	ifhwioctl(u_long, struct ifnet *, caddr_t, struct thread *);
221static int	if_delmulti_locked(struct ifnet *, struct ifmultiaddr *, int);
222static void	do_link_state_change(void *, int);
223static int	if_getgroup(struct ifgroupreq *, struct ifnet *);
224static int	if_getgroupmembers(struct ifgroupreq *);
225static void	if_delgroups(struct ifnet *);
226static void	if_attach_internal(struct ifnet *, int, struct if_clone *);
227static int	if_detach_internal(struct ifnet *, int, struct if_clone **);
228#ifdef VIMAGE
229static void	if_vmove(struct ifnet *, struct vnet *);
230#endif
231
232#ifdef INET6
233/*
234 * XXX: declare here to avoid to include many inet6 related files..
235 * should be more generalized?
236 */
237extern void	nd6_setmtu(struct ifnet *);
238#endif
239
240/* ipsec helper hooks */
241VNET_DEFINE(struct hhook_head *, ipsec_hhh_in[HHOOK_IPSEC_COUNT]);
242VNET_DEFINE(struct hhook_head *, ipsec_hhh_out[HHOOK_IPSEC_COUNT]);
243
244VNET_DEFINE(int, if_index);
245int	ifqmaxlen = IFQ_MAXLEN;
246VNET_DEFINE(struct ifnethead, ifnet);	/* depend on static init XXX */
247VNET_DEFINE(struct ifgrouphead, ifg_head);
248
249static VNET_DEFINE(int, if_indexlim) = 8;
250
251/* Table of ifnet by index. */
252VNET_DEFINE(struct ifnet **, ifindex_table);
253
254#define	V_if_indexlim		VNET(if_indexlim)
255#define	V_ifindex_table		VNET(ifindex_table)
256
257/*
258 * The global network interface list (V_ifnet) and related state (such as
259 * if_index, if_indexlim, and ifindex_table) are protected by an sxlock and
260 * an rwlock.  Either may be acquired shared to stablize the list, but both
261 * must be acquired writable to modify the list.  This model allows us to
262 * both stablize the interface list during interrupt thread processing, but
263 * also to stablize it over long-running ioctls, without introducing priority
264 * inversions and deadlocks.
265 */
266struct rwlock ifnet_rwlock;
267RW_SYSINIT_FLAGS(ifnet_rw, &ifnet_rwlock, "ifnet_rw", RW_RECURSE);
268struct sx ifnet_sxlock;
269SX_SYSINIT_FLAGS(ifnet_sx, &ifnet_sxlock, "ifnet_sx", SX_RECURSE);
270
271/*
272 * The allocation of network interfaces is a rather non-atomic affair; we
273 * need to select an index before we are ready to expose the interface for
274 * use, so will use this pointer value to indicate reservation.
275 */
276#define	IFNET_HOLD	(void *)(uintptr_t)(-1)
277
278static	if_com_alloc_t *if_com_alloc[256];
279static	if_com_free_t *if_com_free[256];
280
281static MALLOC_DEFINE(M_IFNET, "ifnet", "interface internals");
282MALLOC_DEFINE(M_IFADDR, "ifaddr", "interface address");
283MALLOC_DEFINE(M_IFMADDR, "ether_multi", "link-level multicast address");
284
285struct ifnet *
286ifnet_byindex_locked(u_short idx)
287{
288
289	if (idx > V_if_index)
290		return (NULL);
291	if (V_ifindex_table[idx] == IFNET_HOLD)
292		return (NULL);
293	return (V_ifindex_table[idx]);
294}
295
296struct ifnet *
297ifnet_byindex(u_short idx)
298{
299	struct ifnet *ifp;
300
301	IFNET_RLOCK_NOSLEEP();
302	ifp = ifnet_byindex_locked(idx);
303	IFNET_RUNLOCK_NOSLEEP();
304	return (ifp);
305}
306
307struct ifnet *
308ifnet_byindex_ref(u_short idx)
309{
310	struct ifnet *ifp;
311
312	IFNET_RLOCK_NOSLEEP();
313	ifp = ifnet_byindex_locked(idx);
314	if (ifp == NULL || (ifp->if_flags & IFF_DYING)) {
315		IFNET_RUNLOCK_NOSLEEP();
316		return (NULL);
317	}
318	if_ref(ifp);
319	IFNET_RUNLOCK_NOSLEEP();
320	return (ifp);
321}
322
323/*
324 * Allocate an ifindex array entry; return 0 on success or an error on
325 * failure.
326 */
327static u_short
328ifindex_alloc(void)
329{
330	u_short idx;
331
332	IFNET_WLOCK_ASSERT();
333retry:
334	/*
335	 * Try to find an empty slot below V_if_index.  If we fail, take the
336	 * next slot.
337	 */
338	for (idx = 1; idx <= V_if_index; idx++) {
339		if (V_ifindex_table[idx] == NULL)
340			break;
341	}
342
343	/* Catch if_index overflow. */
344	if (idx >= V_if_indexlim) {
345		if_grow();
346		goto retry;
347	}
348	if (idx > V_if_index)
349		V_if_index = idx;
350	return (idx);
351}
352
353static void
354ifindex_free_locked(u_short idx)
355{
356
357	IFNET_WLOCK_ASSERT();
358
359	V_ifindex_table[idx] = NULL;
360	while (V_if_index > 0 &&
361	    V_ifindex_table[V_if_index] == NULL)
362		V_if_index--;
363}
364
365static void
366ifindex_free(u_short idx)
367{
368
369	IFNET_WLOCK();
370	ifindex_free_locked(idx);
371	IFNET_WUNLOCK();
372}
373
374static void
375ifnet_setbyindex_locked(u_short idx, struct ifnet *ifp)
376{
377
378	IFNET_WLOCK_ASSERT();
379
380	V_ifindex_table[idx] = ifp;
381}
382
383static void
384ifnet_setbyindex(u_short idx, struct ifnet *ifp)
385{
386
387	IFNET_WLOCK();
388	ifnet_setbyindex_locked(idx, ifp);
389	IFNET_WUNLOCK();
390}
391
392struct ifaddr *
393ifaddr_byindex(u_short idx)
394{
395	struct ifnet *ifp;
396	struct ifaddr *ifa = NULL;
397
398	IFNET_RLOCK_NOSLEEP();
399	ifp = ifnet_byindex_locked(idx);
400	if (ifp != NULL && (ifa = ifp->if_addr) != NULL)
401		ifa_ref(ifa);
402	IFNET_RUNLOCK_NOSLEEP();
403	return (ifa);
404}
405
406/*
407 * Network interface utility routines.
408 *
409 * Routines with ifa_ifwith* names take sockaddr *'s as
410 * parameters.
411 */
412
413static void
414vnet_if_init(const void *unused __unused)
415{
416
417	TAILQ_INIT(&V_ifnet);
418	TAILQ_INIT(&V_ifg_head);
419	IFNET_WLOCK();
420	if_grow();				/* create initial table */
421	IFNET_WUNLOCK();
422	vnet_if_clone_init();
423}
424VNET_SYSINIT(vnet_if_init, SI_SUB_INIT_IF, SI_ORDER_SECOND, vnet_if_init,
425    NULL);
426
427#ifdef VIMAGE
428static void
429vnet_if_uninit(const void *unused __unused)
430{
431
432	VNET_ASSERT(TAILQ_EMPTY(&V_ifnet), ("%s:%d tailq &V_ifnet=%p "
433	    "not empty", __func__, __LINE__, &V_ifnet));
434	VNET_ASSERT(TAILQ_EMPTY(&V_ifg_head), ("%s:%d tailq &V_ifg_head=%p "
435	    "not empty", __func__, __LINE__, &V_ifg_head));
436
437	free((caddr_t)V_ifindex_table, M_IFNET);
438}
439VNET_SYSUNINIT(vnet_if_uninit, SI_SUB_INIT_IF, SI_ORDER_FIRST,
440    vnet_if_uninit, NULL);
441
442static void
443vnet_if_return(const void *unused __unused)
444{
445	struct ifnet *ifp, *nifp;
446
447	/* Return all inherited interfaces to their parent vnets. */
448	TAILQ_FOREACH_SAFE(ifp, &V_ifnet, if_link, nifp) {
449		if (ifp->if_home_vnet != ifp->if_vnet)
450			if_vmove(ifp, ifp->if_home_vnet);
451	}
452}
453VNET_SYSUNINIT(vnet_if_return, SI_SUB_VNET_DONE, SI_ORDER_ANY,
454    vnet_if_return, NULL);
455#endif
456
457static void
458if_grow(void)
459{
460	int oldlim;
461	u_int n;
462	struct ifnet **e;
463
464	IFNET_WLOCK_ASSERT();
465	oldlim = V_if_indexlim;
466	IFNET_WUNLOCK();
467	n = (oldlim << 1) * sizeof(*e);
468	e = malloc(n, M_IFNET, M_WAITOK | M_ZERO);
469	IFNET_WLOCK();
470	if (V_if_indexlim != oldlim) {
471		free(e, M_IFNET);
472		return;
473	}
474	if (V_ifindex_table != NULL) {
475		memcpy((caddr_t)e, (caddr_t)V_ifindex_table, n/2);
476		free((caddr_t)V_ifindex_table, M_IFNET);
477	}
478	V_if_indexlim <<= 1;
479	V_ifindex_table = e;
480}
481
482/*
483 * Allocate a struct ifnet and an index for an interface.  A layer 2
484 * common structure will also be allocated if an allocation routine is
485 * registered for the passed type.
486 */
487struct ifnet *
488if_alloc(u_char type)
489{
490	struct ifnet *ifp;
491	u_short idx;
492
493	ifp = malloc(sizeof(struct ifnet), M_IFNET, M_WAITOK|M_ZERO);
494	IFNET_WLOCK();
495	idx = ifindex_alloc();
496	ifnet_setbyindex_locked(idx, IFNET_HOLD);
497	IFNET_WUNLOCK();
498	ifp->if_index = idx;
499	ifp->if_type = type;
500	ifp->if_alloctype = type;
501#ifdef VIMAGE
502	ifp->if_vnet = curvnet;
503#endif
504	if (if_com_alloc[type] != NULL) {
505		ifp->if_l2com = if_com_alloc[type](type, ifp);
506		if (ifp->if_l2com == NULL) {
507			free(ifp, M_IFNET);
508			ifindex_free(idx);
509			return (NULL);
510		}
511	}
512
513	IF_ADDR_LOCK_INIT(ifp);
514	TASK_INIT(&ifp->if_linktask, 0, do_link_state_change, ifp);
515	ifp->if_afdata_initialized = 0;
516	IF_AFDATA_LOCK_INIT(ifp);
517	TAILQ_INIT(&ifp->if_addrhead);
518	TAILQ_INIT(&ifp->if_multiaddrs);
519	TAILQ_INIT(&ifp->if_groups);
520#ifdef MAC
521	mac_ifnet_init(ifp);
522#endif
523	ifq_init(&ifp->if_snd, ifp);
524
525	refcount_init(&ifp->if_refcount, 1);	/* Index reference. */
526	for (int i = 0; i < IFCOUNTERS; i++)
527		ifp->if_counters[i] = counter_u64_alloc(M_WAITOK);
528	ifp->if_get_counter = if_get_counter_default;
529	ifnet_setbyindex(ifp->if_index, ifp);
530	return (ifp);
531}
532
533/*
534 * Do the actual work of freeing a struct ifnet, and layer 2 common
535 * structure.  This call is made when the last reference to an
536 * interface is released.
537 */
538static void
539if_free_internal(struct ifnet *ifp)
540{
541
542	KASSERT((ifp->if_flags & IFF_DYING),
543	    ("if_free_internal: interface not dying"));
544
545	if (if_com_free[ifp->if_alloctype] != NULL)
546		if_com_free[ifp->if_alloctype](ifp->if_l2com,
547		    ifp->if_alloctype);
548
549#ifdef MAC
550	mac_ifnet_destroy(ifp);
551#endif /* MAC */
552	if (ifp->if_description != NULL)
553		free(ifp->if_description, M_IFDESCR);
554	IF_AFDATA_DESTROY(ifp);
555	IF_ADDR_LOCK_DESTROY(ifp);
556	ifq_delete(&ifp->if_snd);
557
558	for (int i = 0; i < IFCOUNTERS; i++)
559		counter_u64_free(ifp->if_counters[i]);
560
561	free(ifp, M_IFNET);
562}
563
564/*
565 * Deregister an interface and free the associated storage.
566 */
567void
568if_free(struct ifnet *ifp)
569{
570
571	ifp->if_flags |= IFF_DYING;			/* XXX: Locking */
572
573	CURVNET_SET_QUIET(ifp->if_vnet);
574	IFNET_WLOCK();
575	KASSERT(ifp == ifnet_byindex_locked(ifp->if_index),
576	    ("%s: freeing unallocated ifnet", ifp->if_xname));
577
578	ifindex_free_locked(ifp->if_index);
579	IFNET_WUNLOCK();
580
581	if (refcount_release(&ifp->if_refcount))
582		if_free_internal(ifp);
583	CURVNET_RESTORE();
584}
585
586/*
587 * Interfaces to keep an ifnet type-stable despite the possibility of the
588 * driver calling if_free().  If there are additional references, we defer
589 * freeing the underlying data structure.
590 */
591void
592if_ref(struct ifnet *ifp)
593{
594
595	/* We don't assert the ifnet list lock here, but arguably should. */
596	refcount_acquire(&ifp->if_refcount);
597}
598
599void
600if_rele(struct ifnet *ifp)
601{
602
603	if (!refcount_release(&ifp->if_refcount))
604		return;
605	if_free_internal(ifp);
606}
607
608void
609ifq_init(struct ifaltq *ifq, struct ifnet *ifp)
610{
611
612	mtx_init(&ifq->ifq_mtx, ifp->if_xname, "if send queue", MTX_DEF);
613
614	if (ifq->ifq_maxlen == 0)
615		ifq->ifq_maxlen = ifqmaxlen;
616
617	ifq->altq_type = 0;
618	ifq->altq_disc = NULL;
619	ifq->altq_flags &= ALTQF_CANTCHANGE;
620	ifq->altq_tbr  = NULL;
621	ifq->altq_ifp  = ifp;
622}
623
624void
625ifq_delete(struct ifaltq *ifq)
626{
627	mtx_destroy(&ifq->ifq_mtx);
628}
629
630/*
631 * Perform generic interface initialization tasks and attach the interface
632 * to the list of "active" interfaces.  If vmove flag is set on entry
633 * to if_attach_internal(), perform only a limited subset of initialization
634 * tasks, given that we are moving from one vnet to another an ifnet which
635 * has already been fully initialized.
636 *
637 * Note that if_detach_internal() removes group membership unconditionally
638 * even when vmove flag is set, and if_attach_internal() adds only IFG_ALL.
639 * Thus, when if_vmove() is applied to a cloned interface, group membership
640 * is lost while a cloned one always joins a group whose name is
641 * ifc->ifc_name.  To recover this after if_detach_internal() and
642 * if_attach_internal(), the cloner should be specified to
643 * if_attach_internal() via ifc.  If it is non-NULL, if_attach_internal()
644 * attempts to join a group whose name is ifc->ifc_name.
645 *
646 * XXX:
647 *  - The decision to return void and thus require this function to
648 *    succeed is questionable.
649 *  - We should probably do more sanity checking.  For instance we don't
650 *    do anything to insure if_xname is unique or non-empty.
651 */
652void
653if_attach(struct ifnet *ifp)
654{
655
656	if_attach_internal(ifp, 0, NULL);
657}
658
659/*
660 * Compute the least common TSO limit.
661 */
662void
663if_hw_tsomax_common(if_t ifp, struct ifnet_hw_tsomax *pmax)
664{
665	/*
666	 * 1) If there is no limit currently, take the limit from
667	 * the network adapter.
668	 *
669	 * 2) If the network adapter has a limit below the current
670	 * limit, apply it.
671	 */
672	if (pmax->tsomaxbytes == 0 || (ifp->if_hw_tsomax != 0 &&
673	    ifp->if_hw_tsomax < pmax->tsomaxbytes)) {
674		pmax->tsomaxbytes = ifp->if_hw_tsomax;
675	}
676	if (pmax->tsomaxsegcount == 0 || (ifp->if_hw_tsomaxsegcount != 0 &&
677	    ifp->if_hw_tsomaxsegcount < pmax->tsomaxsegcount)) {
678		pmax->tsomaxsegcount = ifp->if_hw_tsomaxsegcount;
679	}
680	if (pmax->tsomaxsegsize == 0 || (ifp->if_hw_tsomaxsegsize != 0 &&
681	    ifp->if_hw_tsomaxsegsize < pmax->tsomaxsegsize)) {
682		pmax->tsomaxsegsize = ifp->if_hw_tsomaxsegsize;
683	}
684}
685
686/*
687 * Update TSO limit of a network adapter.
688 *
689 * Returns zero if no change. Else non-zero.
690 */
691int
692if_hw_tsomax_update(if_t ifp, struct ifnet_hw_tsomax *pmax)
693{
694	int retval = 0;
695	if (ifp->if_hw_tsomax != pmax->tsomaxbytes) {
696		ifp->if_hw_tsomax = pmax->tsomaxbytes;
697		retval++;
698	}
699	if (ifp->if_hw_tsomaxsegsize != pmax->tsomaxsegsize) {
700		ifp->if_hw_tsomaxsegsize = pmax->tsomaxsegsize;
701		retval++;
702	}
703	if (ifp->if_hw_tsomaxsegcount != pmax->tsomaxsegcount) {
704		ifp->if_hw_tsomaxsegcount = pmax->tsomaxsegcount;
705		retval++;
706	}
707	return (retval);
708}
709
710static void
711if_attach_internal(struct ifnet *ifp, int vmove, struct if_clone *ifc)
712{
713	unsigned socksize, ifasize;
714	int namelen, masklen;
715	struct sockaddr_dl *sdl;
716	struct ifaddr *ifa;
717
718	if (ifp->if_index == 0 || ifp != ifnet_byindex(ifp->if_index))
719		panic ("%s: BUG: if_attach called without if_alloc'd input()\n",
720		    ifp->if_xname);
721
722#ifdef VIMAGE
723	ifp->if_vnet = curvnet;
724	if (ifp->if_home_vnet == NULL)
725		ifp->if_home_vnet = curvnet;
726#endif
727
728	if_addgroup(ifp, IFG_ALL);
729
730	/* Restore group membership for cloned interfaces. */
731	if (vmove && ifc != NULL)
732		if_clone_addgroup(ifp, ifc);
733
734	getmicrotime(&ifp->if_lastchange);
735	ifp->if_epoch = time_uptime;
736
737	KASSERT((ifp->if_transmit == NULL && ifp->if_qflush == NULL) ||
738	    (ifp->if_transmit != NULL && ifp->if_qflush != NULL),
739	    ("transmit and qflush must both either be set or both be NULL"));
740	if (ifp->if_transmit == NULL) {
741		ifp->if_transmit = if_transmit;
742		ifp->if_qflush = if_qflush;
743	}
744	if (ifp->if_input == NULL)
745		ifp->if_input = if_input_default;
746
747	if (ifp->if_requestencap == NULL)
748		ifp->if_requestencap = if_requestencap_default;
749
750	if (!vmove) {
751#ifdef MAC
752		mac_ifnet_create(ifp);
753#endif
754
755		/*
756		 * Create a Link Level name for this device.
757		 */
758		namelen = strlen(ifp->if_xname);
759		/*
760		 * Always save enough space for any possiable name so we
761		 * can do a rename in place later.
762		 */
763		masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + IFNAMSIZ;
764		socksize = masklen + ifp->if_addrlen;
765		if (socksize < sizeof(*sdl))
766			socksize = sizeof(*sdl);
767		socksize = roundup2(socksize, sizeof(long));
768		ifasize = sizeof(*ifa) + 2 * socksize;
769		ifa = ifa_alloc(ifasize, M_WAITOK);
770		sdl = (struct sockaddr_dl *)(ifa + 1);
771		sdl->sdl_len = socksize;
772		sdl->sdl_family = AF_LINK;
773		bcopy(ifp->if_xname, sdl->sdl_data, namelen);
774		sdl->sdl_nlen = namelen;
775		sdl->sdl_index = ifp->if_index;
776		sdl->sdl_type = ifp->if_type;
777		ifp->if_addr = ifa;
778		ifa->ifa_ifp = ifp;
779		ifa->ifa_rtrequest = link_rtrequest;
780		ifa->ifa_addr = (struct sockaddr *)sdl;
781		sdl = (struct sockaddr_dl *)(socksize + (caddr_t)sdl);
782		ifa->ifa_netmask = (struct sockaddr *)sdl;
783		sdl->sdl_len = masklen;
784		while (namelen != 0)
785			sdl->sdl_data[--namelen] = 0xff;
786		TAILQ_INSERT_HEAD(&ifp->if_addrhead, ifa, ifa_link);
787		/* Reliably crash if used uninitialized. */
788		ifp->if_broadcastaddr = NULL;
789
790		if (ifp->if_type == IFT_ETHER) {
791			ifp->if_hw_addr = malloc(ifp->if_addrlen, M_IFADDR,
792			    M_WAITOK | M_ZERO);
793		}
794
795#if defined(INET) || defined(INET6)
796		/* Use defaults for TSO, if nothing is set */
797		if (ifp->if_hw_tsomax == 0 &&
798		    ifp->if_hw_tsomaxsegcount == 0 &&
799		    ifp->if_hw_tsomaxsegsize == 0) {
800			/*
801			 * The TSO defaults needs to be such that an
802			 * NFS mbuf list of 35 mbufs totalling just
803			 * below 64K works and that a chain of mbufs
804			 * can be defragged into at most 32 segments:
805			 */
806			ifp->if_hw_tsomax = min(IP_MAXPACKET, (32 * MCLBYTES) -
807			    (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN));
808			ifp->if_hw_tsomaxsegcount = 35;
809			ifp->if_hw_tsomaxsegsize = 2048;	/* 2K */
810
811			/* XXX some drivers set IFCAP_TSO after ethernet attach */
812			if (ifp->if_capabilities & IFCAP_TSO) {
813				if_printf(ifp, "Using defaults for TSO: %u/%u/%u\n",
814				    ifp->if_hw_tsomax,
815				    ifp->if_hw_tsomaxsegcount,
816				    ifp->if_hw_tsomaxsegsize);
817			}
818		}
819#endif
820	}
821#ifdef VIMAGE
822	else {
823		/*
824		 * Update the interface index in the link layer address
825		 * of the interface.
826		 */
827		for (ifa = ifp->if_addr; ifa != NULL;
828		    ifa = TAILQ_NEXT(ifa, ifa_link)) {
829			if (ifa->ifa_addr->sa_family == AF_LINK) {
830				sdl = (struct sockaddr_dl *)ifa->ifa_addr;
831				sdl->sdl_index = ifp->if_index;
832			}
833		}
834	}
835#endif
836
837	IFNET_WLOCK();
838	TAILQ_INSERT_TAIL(&V_ifnet, ifp, if_link);
839#ifdef VIMAGE
840	curvnet->vnet_ifcnt++;
841#endif
842	IFNET_WUNLOCK();
843
844	if (domain_init_status >= 2)
845		if_attachdomain1(ifp);
846
847	EVENTHANDLER_INVOKE(ifnet_arrival_event, ifp);
848	if (IS_DEFAULT_VNET(curvnet))
849		devctl_notify("IFNET", ifp->if_xname, "ATTACH", NULL);
850
851	/* Announce the interface. */
852	rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
853}
854
855static void
856if_attachdomain(void *dummy)
857{
858	struct ifnet *ifp;
859
860	TAILQ_FOREACH(ifp, &V_ifnet, if_link)
861		if_attachdomain1(ifp);
862}
863SYSINIT(domainifattach, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_SECOND,
864    if_attachdomain, NULL);
865
866static void
867if_attachdomain1(struct ifnet *ifp)
868{
869	struct domain *dp;
870
871	/*
872	 * Since dp->dom_ifattach calls malloc() with M_WAITOK, we
873	 * cannot lock ifp->if_afdata initialization, entirely.
874	 */
875	IF_AFDATA_LOCK(ifp);
876	if (ifp->if_afdata_initialized >= domain_init_status) {
877		IF_AFDATA_UNLOCK(ifp);
878		log(LOG_WARNING, "%s called more than once on %s\n",
879		    __func__, ifp->if_xname);
880		return;
881	}
882	ifp->if_afdata_initialized = domain_init_status;
883	IF_AFDATA_UNLOCK(ifp);
884
885	/* address family dependent data region */
886	bzero(ifp->if_afdata, sizeof(ifp->if_afdata));
887	for (dp = domains; dp; dp = dp->dom_next) {
888		if (dp->dom_ifattach)
889			ifp->if_afdata[dp->dom_family] =
890			    (*dp->dom_ifattach)(ifp);
891	}
892}
893
894/*
895 * Remove any unicast or broadcast network addresses from an interface.
896 */
897void
898if_purgeaddrs(struct ifnet *ifp)
899{
900	struct ifaddr *ifa, *next;
901
902	/* XXX cannot hold IF_ADDR_WLOCK over called functions. */
903	TAILQ_FOREACH_SAFE(ifa, &ifp->if_addrhead, ifa_link, next) {
904		if (ifa->ifa_addr->sa_family == AF_LINK)
905			continue;
906#ifdef INET
907		/* XXX: Ugly!! ad hoc just for INET */
908		if (ifa->ifa_addr->sa_family == AF_INET) {
909			struct ifaliasreq ifr;
910
911			bzero(&ifr, sizeof(ifr));
912			ifr.ifra_addr = *ifa->ifa_addr;
913			if (ifa->ifa_dstaddr)
914				ifr.ifra_broadaddr = *ifa->ifa_dstaddr;
915			if (in_control(NULL, SIOCDIFADDR, (caddr_t)&ifr, ifp,
916			    NULL) == 0)
917				continue;
918		}
919#endif /* INET */
920#ifdef INET6
921		if (ifa->ifa_addr->sa_family == AF_INET6) {
922			in6_purgeaddr(ifa);
923			/* ifp_addrhead is already updated */
924			continue;
925		}
926#endif /* INET6 */
927		IF_ADDR_WLOCK(ifp);
928		TAILQ_REMOVE(&ifp->if_addrhead, ifa, ifa_link);
929		IF_ADDR_WUNLOCK(ifp);
930		ifa_free(ifa);
931	}
932}
933
934/*
935 * Remove any multicast network addresses from an interface when an ifnet
936 * is going away.
937 */
938static void
939if_purgemaddrs(struct ifnet *ifp)
940{
941	struct ifmultiaddr *ifma;
942	struct ifmultiaddr *next;
943
944	IF_ADDR_WLOCK(ifp);
945	TAILQ_FOREACH_SAFE(ifma, &ifp->if_multiaddrs, ifma_link, next)
946		if_delmulti_locked(ifp, ifma, 1);
947	IF_ADDR_WUNLOCK(ifp);
948}
949
950/*
951 * Detach an interface, removing it from the list of "active" interfaces.
952 * If vmove flag is set on entry to if_detach_internal(), perform only a
953 * limited subset of cleanup tasks, given that we are moving an ifnet from
954 * one vnet to another, where it must be fully operational.
955 *
956 * XXXRW: There are some significant questions about event ordering, and
957 * how to prevent things from starting to use the interface during detach.
958 */
959void
960if_detach(struct ifnet *ifp)
961{
962
963	CURVNET_SET_QUIET(ifp->if_vnet);
964	if_detach_internal(ifp, 0, NULL);
965	CURVNET_RESTORE();
966}
967
968/*
969 * The vmove flag, if set, indicates that we are called from a callpath
970 * that is moving an interface to a different vnet instance.
971 *
972 * The shutdown flag, if set, indicates that we are called in the
973 * process of shutting down a vnet instance.  Currently only the
974 * vnet_if_return SYSUNINIT function sets it.  Note: we can be called
975 * on a vnet instance shutdown without this flag being set, e.g., when
976 * the cloned interfaces are destoyed as first thing of teardown.
977 */
978static int
979if_detach_internal(struct ifnet *ifp, int vmove, struct if_clone **ifcp)
980{
981	struct ifaddr *ifa;
982	int i;
983	struct domain *dp;
984 	struct ifnet *iter;
985 	int found = 0;
986#ifdef VIMAGE
987	int shutdown;
988
989	shutdown = (ifp->if_vnet->vnet_state > SI_SUB_VNET &&
990		 ifp->if_vnet->vnet_state < SI_SUB_VNET_DONE) ? 1 : 0;
991#endif
992	IFNET_WLOCK();
993	TAILQ_FOREACH(iter, &V_ifnet, if_link)
994		if (iter == ifp) {
995			TAILQ_REMOVE(&V_ifnet, ifp, if_link);
996			found = 1;
997			break;
998		}
999	IFNET_WUNLOCK();
1000	if (!found) {
1001		/*
1002		 * While we would want to panic here, we cannot
1003		 * guarantee that the interface is indeed still on
1004		 * the list given we don't hold locks all the way.
1005		 */
1006		return (ENOENT);
1007#if 0
1008		if (vmove)
1009			panic("%s: ifp=%p not on the ifnet tailq %p",
1010			    __func__, ifp, &V_ifnet);
1011		else
1012			return; /* XXX this should panic as well? */
1013#endif
1014	}
1015
1016	/*
1017	 * At this point we know the interface still was on the ifnet list
1018	 * and we removed it so we are in a stable state.
1019	 */
1020#ifdef VIMAGE
1021	curvnet->vnet_ifcnt--;
1022#endif
1023
1024	/*
1025	 * In any case (destroy or vmove) detach us from the groups
1026	 * and remove/wait for pending events on the taskq.
1027	 * XXX-BZ in theory an interface could still enqueue a taskq change?
1028	 */
1029	if_delgroups(ifp);
1030
1031	taskqueue_drain(taskqueue_swi, &ifp->if_linktask);
1032
1033	/*
1034	 * Check if this is a cloned interface or not. Must do even if
1035	 * shutting down as a if_vmove_reclaim() would move the ifp and
1036	 * the if_clone_addgroup() will have a corrupted string overwise
1037	 * from a gibberish pointer.
1038	 */
1039	if (vmove && ifcp != NULL)
1040		*ifcp = if_clone_findifc(ifp);
1041
1042	if_down(ifp);
1043
1044#ifdef VIMAGE
1045	/*
1046	 * On VNET shutdown abort here as the stack teardown will do all
1047	 * the work top-down for us.
1048	 */
1049	if (shutdown) {
1050		/*
1051		 * In case of a vmove we are done here without error.
1052		 * If we would signal an error it would lead to the same
1053		 * abort as if we did not find the ifnet anymore.
1054		 * if_detach() calls us in void context and does not care
1055		 * about an early abort notification, so life is splendid :)
1056		 */
1057		goto finish_vnet_shutdown;
1058	}
1059#endif
1060
1061	/*
1062	 * At this point we are not tearing down a VNET and are either
1063	 * going to destroy or vmove the interface and have to cleanup
1064	 * accordingly.
1065	 */
1066
1067	/*
1068	 * Remove routes and flush queues.
1069	 */
1070#ifdef ALTQ
1071	if (ALTQ_IS_ENABLED(&ifp->if_snd))
1072		altq_disable(&ifp->if_snd);
1073	if (ALTQ_IS_ATTACHED(&ifp->if_snd))
1074		altq_detach(&ifp->if_snd);
1075#endif
1076
1077	if_purgeaddrs(ifp);
1078
1079#ifdef INET
1080	in_ifdetach(ifp);
1081#endif
1082
1083#ifdef INET6
1084	/*
1085	 * Remove all IPv6 kernel structs related to ifp.  This should be done
1086	 * before removing routing entries below, since IPv6 interface direct
1087	 * routes are expected to be removed by the IPv6-specific kernel API.
1088	 * Otherwise, the kernel will detect some inconsistency and bark it.
1089	 */
1090	in6_ifdetach(ifp);
1091#endif
1092	if_purgemaddrs(ifp);
1093
1094	/* Announce that the interface is gone. */
1095	rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
1096	EVENTHANDLER_INVOKE(ifnet_departure_event, ifp);
1097	if (IS_DEFAULT_VNET(curvnet))
1098		devctl_notify("IFNET", ifp->if_xname, "DETACH", NULL);
1099
1100	if (!vmove) {
1101		/*
1102		 * Prevent further calls into the device driver via ifnet.
1103		 */
1104		if_dead(ifp);
1105
1106		/*
1107		 * Remove link ifaddr pointer and maybe decrement if_index.
1108		 * Clean up all addresses.
1109		 */
1110		free(ifp->if_hw_addr, M_IFADDR);
1111		ifp->if_hw_addr = NULL;
1112		ifp->if_addr = NULL;
1113
1114		/* We can now free link ifaddr. */
1115		IF_ADDR_WLOCK(ifp);
1116		if (!TAILQ_EMPTY(&ifp->if_addrhead)) {
1117			ifa = TAILQ_FIRST(&ifp->if_addrhead);
1118			TAILQ_REMOVE(&ifp->if_addrhead, ifa, ifa_link);
1119			IF_ADDR_WUNLOCK(ifp);
1120			ifa_free(ifa);
1121		} else
1122			IF_ADDR_WUNLOCK(ifp);
1123	}
1124
1125	rt_flushifroutes(ifp);
1126
1127#ifdef VIMAGE
1128finish_vnet_shutdown:
1129#endif
1130	/*
1131	 * We cannot hold the lock over dom_ifdetach calls as they might
1132	 * sleep, for example trying to drain a callout, thus open up the
1133	 * theoretical race with re-attaching.
1134	 */
1135	IF_AFDATA_LOCK(ifp);
1136	i = ifp->if_afdata_initialized;
1137	ifp->if_afdata_initialized = 0;
1138	IF_AFDATA_UNLOCK(ifp);
1139	for (dp = domains; i > 0 && dp; dp = dp->dom_next) {
1140		if (dp->dom_ifdetach && ifp->if_afdata[dp->dom_family]) {
1141			(*dp->dom_ifdetach)(ifp,
1142			    ifp->if_afdata[dp->dom_family]);
1143			ifp->if_afdata[dp->dom_family] = NULL;
1144		}
1145	}
1146
1147	return (0);
1148}
1149
1150#ifdef VIMAGE
1151/*
1152 * if_vmove() performs a limited version of if_detach() in current
1153 * vnet and if_attach()es the ifnet to the vnet specified as 2nd arg.
1154 * An attempt is made to shrink if_index in current vnet, find an
1155 * unused if_index in target vnet and calls if_grow() if necessary,
1156 * and finally find an unused if_xname for the target vnet.
1157 */
1158static void
1159if_vmove(struct ifnet *ifp, struct vnet *new_vnet)
1160{
1161	struct if_clone *ifc;
1162	u_int bif_dlt, bif_hdrlen;
1163	int rc;
1164
1165 	/*
1166	 * if_detach_internal() will call the eventhandler to notify
1167	 * interface departure.  That will detach if_bpf.  We need to
1168	 * safe the dlt and hdrlen so we can re-attach it later.
1169	 */
1170	bpf_get_bp_params(ifp->if_bpf, &bif_dlt, &bif_hdrlen);
1171
1172	/*
1173	 * Detach from current vnet, but preserve LLADDR info, do not
1174	 * mark as dead etc. so that the ifnet can be reattached later.
1175	 * If we cannot find it, we lost the race to someone else.
1176	 */
1177	rc = if_detach_internal(ifp, 1, &ifc);
1178	if (rc != 0)
1179		return;
1180
1181	/*
1182	 * Unlink the ifnet from ifindex_table[] in current vnet, and shrink
1183	 * the if_index for that vnet if possible.
1184	 *
1185	 * NOTE: IFNET_WLOCK/IFNET_WUNLOCK() are assumed to be unvirtualized,
1186	 * or we'd lock on one vnet and unlock on another.
1187	 */
1188	IFNET_WLOCK();
1189	ifindex_free_locked(ifp->if_index);
1190	IFNET_WUNLOCK();
1191
1192	/*
1193	 * Perform interface-specific reassignment tasks, if provided by
1194	 * the driver.
1195	 */
1196	if (ifp->if_reassign != NULL)
1197		ifp->if_reassign(ifp, new_vnet, NULL);
1198
1199	/*
1200	 * Switch to the context of the target vnet.
1201	 */
1202	CURVNET_SET_QUIET(new_vnet);
1203
1204	IFNET_WLOCK();
1205	ifp->if_index = ifindex_alloc();
1206	ifnet_setbyindex_locked(ifp->if_index, ifp);
1207	IFNET_WUNLOCK();
1208
1209	if_attach_internal(ifp, 1, ifc);
1210
1211	if (ifp->if_bpf == NULL)
1212		bpfattach(ifp, bif_dlt, bif_hdrlen);
1213
1214	CURVNET_RESTORE();
1215}
1216
1217/*
1218 * Move an ifnet to or from another child prison/vnet, specified by the jail id.
1219 */
1220static int
1221if_vmove_loan(struct thread *td, struct ifnet *ifp, char *ifname, int jid)
1222{
1223	struct prison *pr;
1224	struct ifnet *difp;
1225	int shutdown;
1226
1227	/* Try to find the prison within our visibility. */
1228	sx_slock(&allprison_lock);
1229	pr = prison_find_child(td->td_ucred->cr_prison, jid);
1230	sx_sunlock(&allprison_lock);
1231	if (pr == NULL)
1232		return (ENXIO);
1233	prison_hold_locked(pr);
1234	mtx_unlock(&pr->pr_mtx);
1235
1236	/* Do not try to move the iface from and to the same prison. */
1237	if (pr->pr_vnet == ifp->if_vnet) {
1238		prison_free(pr);
1239		return (EEXIST);
1240	}
1241
1242	/* Make sure the named iface does not exists in the dst. prison/vnet. */
1243	/* XXX Lock interfaces to avoid races. */
1244	CURVNET_SET_QUIET(pr->pr_vnet);
1245	difp = ifunit(ifname);
1246	if (difp != NULL) {
1247		CURVNET_RESTORE();
1248		prison_free(pr);
1249		return (EEXIST);
1250	}
1251
1252	/* Make sure the VNET is stable. */
1253	shutdown = (ifp->if_vnet->vnet_state > SI_SUB_VNET &&
1254		 ifp->if_vnet->vnet_state < SI_SUB_VNET_DONE) ? 1 : 0;
1255	if (shutdown) {
1256		CURVNET_RESTORE();
1257		prison_free(pr);
1258		return (EBUSY);
1259	}
1260	CURVNET_RESTORE();
1261
1262	/* Move the interface into the child jail/vnet. */
1263	if_vmove(ifp, pr->pr_vnet);
1264
1265	/* Report the new if_xname back to the userland. */
1266	sprintf(ifname, "%s", ifp->if_xname);
1267
1268	prison_free(pr);
1269	return (0);
1270}
1271
1272static int
1273if_vmove_reclaim(struct thread *td, char *ifname, int jid)
1274{
1275	struct prison *pr;
1276	struct vnet *vnet_dst;
1277	struct ifnet *ifp;
1278 	int shutdown;
1279
1280	/* Try to find the prison within our visibility. */
1281	sx_slock(&allprison_lock);
1282	pr = prison_find_child(td->td_ucred->cr_prison, jid);
1283	sx_sunlock(&allprison_lock);
1284	if (pr == NULL)
1285		return (ENXIO);
1286	prison_hold_locked(pr);
1287	mtx_unlock(&pr->pr_mtx);
1288
1289	/* Make sure the named iface exists in the source prison/vnet. */
1290	CURVNET_SET(pr->pr_vnet);
1291	ifp = ifunit(ifname);		/* XXX Lock to avoid races. */
1292	if (ifp == NULL) {
1293		CURVNET_RESTORE();
1294		prison_free(pr);
1295		return (ENXIO);
1296	}
1297
1298	/* Do not try to move the iface from and to the same prison. */
1299	vnet_dst = TD_TO_VNET(td);
1300	if (vnet_dst == ifp->if_vnet) {
1301		CURVNET_RESTORE();
1302		prison_free(pr);
1303		return (EEXIST);
1304	}
1305
1306	/* Make sure the VNET is stable. */
1307	shutdown = (ifp->if_vnet->vnet_state > SI_SUB_VNET &&
1308		 ifp->if_vnet->vnet_state < SI_SUB_VNET_DONE) ? 1 : 0;
1309	if (shutdown) {
1310		CURVNET_RESTORE();
1311		prison_free(pr);
1312		return (EBUSY);
1313	}
1314
1315	/* Get interface back from child jail/vnet. */
1316	if_vmove(ifp, vnet_dst);
1317	CURVNET_RESTORE();
1318
1319	/* Report the new if_xname back to the userland. */
1320	sprintf(ifname, "%s", ifp->if_xname);
1321
1322	prison_free(pr);
1323	return (0);
1324}
1325#endif /* VIMAGE */
1326
1327/*
1328 * Add a group to an interface
1329 */
1330int
1331if_addgroup(struct ifnet *ifp, const char *groupname)
1332{
1333	struct ifg_list		*ifgl;
1334	struct ifg_group	*ifg = NULL;
1335	struct ifg_member	*ifgm;
1336	int 			 new = 0;
1337
1338	if (groupname[0] && groupname[strlen(groupname) - 1] >= '0' &&
1339	    groupname[strlen(groupname) - 1] <= '9')
1340		return (EINVAL);
1341
1342	IFNET_WLOCK();
1343	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
1344		if (!strcmp(ifgl->ifgl_group->ifg_group, groupname)) {
1345			IFNET_WUNLOCK();
1346			return (EEXIST);
1347		}
1348
1349	if ((ifgl = (struct ifg_list *)malloc(sizeof(struct ifg_list), M_TEMP,
1350	    M_NOWAIT)) == NULL) {
1351	    	IFNET_WUNLOCK();
1352		return (ENOMEM);
1353	}
1354
1355	if ((ifgm = (struct ifg_member *)malloc(sizeof(struct ifg_member),
1356	    M_TEMP, M_NOWAIT)) == NULL) {
1357		free(ifgl, M_TEMP);
1358		IFNET_WUNLOCK();
1359		return (ENOMEM);
1360	}
1361
1362	TAILQ_FOREACH(ifg, &V_ifg_head, ifg_next)
1363		if (!strcmp(ifg->ifg_group, groupname))
1364			break;
1365
1366	if (ifg == NULL) {
1367		if ((ifg = (struct ifg_group *)malloc(sizeof(struct ifg_group),
1368		    M_TEMP, M_NOWAIT)) == NULL) {
1369			free(ifgl, M_TEMP);
1370			free(ifgm, M_TEMP);
1371			IFNET_WUNLOCK();
1372			return (ENOMEM);
1373		}
1374		strlcpy(ifg->ifg_group, groupname, sizeof(ifg->ifg_group));
1375		ifg->ifg_refcnt = 0;
1376		TAILQ_INIT(&ifg->ifg_members);
1377		TAILQ_INSERT_TAIL(&V_ifg_head, ifg, ifg_next);
1378		new = 1;
1379	}
1380
1381	ifg->ifg_refcnt++;
1382	ifgl->ifgl_group = ifg;
1383	ifgm->ifgm_ifp = ifp;
1384
1385	IF_ADDR_WLOCK(ifp);
1386	TAILQ_INSERT_TAIL(&ifg->ifg_members, ifgm, ifgm_next);
1387	TAILQ_INSERT_TAIL(&ifp->if_groups, ifgl, ifgl_next);
1388	IF_ADDR_WUNLOCK(ifp);
1389
1390	IFNET_WUNLOCK();
1391
1392	if (new)
1393		EVENTHANDLER_INVOKE(group_attach_event, ifg);
1394	EVENTHANDLER_INVOKE(group_change_event, groupname);
1395
1396	return (0);
1397}
1398
1399/*
1400 * Remove a group from an interface
1401 */
1402int
1403if_delgroup(struct ifnet *ifp, const char *groupname)
1404{
1405	struct ifg_list		*ifgl;
1406	struct ifg_member	*ifgm;
1407
1408	IFNET_WLOCK();
1409	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
1410		if (!strcmp(ifgl->ifgl_group->ifg_group, groupname))
1411			break;
1412	if (ifgl == NULL) {
1413		IFNET_WUNLOCK();
1414		return (ENOENT);
1415	}
1416
1417	IF_ADDR_WLOCK(ifp);
1418	TAILQ_REMOVE(&ifp->if_groups, ifgl, ifgl_next);
1419	IF_ADDR_WUNLOCK(ifp);
1420
1421	TAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next)
1422		if (ifgm->ifgm_ifp == ifp)
1423			break;
1424
1425	if (ifgm != NULL) {
1426		TAILQ_REMOVE(&ifgl->ifgl_group->ifg_members, ifgm, ifgm_next);
1427		free(ifgm, M_TEMP);
1428	}
1429
1430	if (--ifgl->ifgl_group->ifg_refcnt == 0) {
1431		TAILQ_REMOVE(&V_ifg_head, ifgl->ifgl_group, ifg_next);
1432		IFNET_WUNLOCK();
1433		EVENTHANDLER_INVOKE(group_detach_event, ifgl->ifgl_group);
1434		free(ifgl->ifgl_group, M_TEMP);
1435	} else
1436		IFNET_WUNLOCK();
1437
1438	free(ifgl, M_TEMP);
1439
1440	EVENTHANDLER_INVOKE(group_change_event, groupname);
1441
1442	return (0);
1443}
1444
1445/*
1446 * Remove an interface from all groups
1447 */
1448static void
1449if_delgroups(struct ifnet *ifp)
1450{
1451	struct ifg_list		*ifgl;
1452	struct ifg_member	*ifgm;
1453	char groupname[IFNAMSIZ];
1454
1455	IFNET_WLOCK();
1456	while (!TAILQ_EMPTY(&ifp->if_groups)) {
1457		ifgl = TAILQ_FIRST(&ifp->if_groups);
1458
1459		strlcpy(groupname, ifgl->ifgl_group->ifg_group, IFNAMSIZ);
1460
1461		IF_ADDR_WLOCK(ifp);
1462		TAILQ_REMOVE(&ifp->if_groups, ifgl, ifgl_next);
1463		IF_ADDR_WUNLOCK(ifp);
1464
1465		TAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next)
1466			if (ifgm->ifgm_ifp == ifp)
1467				break;
1468
1469		if (ifgm != NULL) {
1470			TAILQ_REMOVE(&ifgl->ifgl_group->ifg_members, ifgm,
1471			    ifgm_next);
1472			free(ifgm, M_TEMP);
1473		}
1474
1475		if (--ifgl->ifgl_group->ifg_refcnt == 0) {
1476			TAILQ_REMOVE(&V_ifg_head, ifgl->ifgl_group, ifg_next);
1477			IFNET_WUNLOCK();
1478			EVENTHANDLER_INVOKE(group_detach_event,
1479			    ifgl->ifgl_group);
1480			free(ifgl->ifgl_group, M_TEMP);
1481		} else
1482			IFNET_WUNLOCK();
1483
1484		free(ifgl, M_TEMP);
1485
1486		EVENTHANDLER_INVOKE(group_change_event, groupname);
1487
1488		IFNET_WLOCK();
1489	}
1490	IFNET_WUNLOCK();
1491}
1492
1493/*
1494 * Stores all groups from an interface in memory pointed
1495 * to by data
1496 */
1497static int
1498if_getgroup(struct ifgroupreq *data, struct ifnet *ifp)
1499{
1500	int			 len, error;
1501	struct ifg_list		*ifgl;
1502	struct ifg_req		 ifgrq, *ifgp;
1503	struct ifgroupreq	*ifgr = data;
1504
1505	if (ifgr->ifgr_len == 0) {
1506		IF_ADDR_RLOCK(ifp);
1507		TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
1508			ifgr->ifgr_len += sizeof(struct ifg_req);
1509		IF_ADDR_RUNLOCK(ifp);
1510		return (0);
1511	}
1512
1513	len = ifgr->ifgr_len;
1514	ifgp = ifgr->ifgr_groups;
1515	/* XXX: wire */
1516	IF_ADDR_RLOCK(ifp);
1517	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) {
1518		if (len < sizeof(ifgrq)) {
1519			IF_ADDR_RUNLOCK(ifp);
1520			return (EINVAL);
1521		}
1522		bzero(&ifgrq, sizeof ifgrq);
1523		strlcpy(ifgrq.ifgrq_group, ifgl->ifgl_group->ifg_group,
1524		    sizeof(ifgrq.ifgrq_group));
1525		if ((error = copyout(&ifgrq, ifgp, sizeof(struct ifg_req)))) {
1526		    	IF_ADDR_RUNLOCK(ifp);
1527			return (error);
1528		}
1529		len -= sizeof(ifgrq);
1530		ifgp++;
1531	}
1532	IF_ADDR_RUNLOCK(ifp);
1533
1534	return (0);
1535}
1536
1537/*
1538 * Stores all members of a group in memory pointed to by data
1539 */
1540static int
1541if_getgroupmembers(struct ifgroupreq *data)
1542{
1543	struct ifgroupreq	*ifgr = data;
1544	struct ifg_group	*ifg;
1545	struct ifg_member	*ifgm;
1546	struct ifg_req		 ifgrq, *ifgp;
1547	int			 len, error;
1548
1549	IFNET_RLOCK();
1550	TAILQ_FOREACH(ifg, &V_ifg_head, ifg_next)
1551		if (!strcmp(ifg->ifg_group, ifgr->ifgr_name))
1552			break;
1553	if (ifg == NULL) {
1554		IFNET_RUNLOCK();
1555		return (ENOENT);
1556	}
1557
1558	if (ifgr->ifgr_len == 0) {
1559		TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next)
1560			ifgr->ifgr_len += sizeof(ifgrq);
1561		IFNET_RUNLOCK();
1562		return (0);
1563	}
1564
1565	len = ifgr->ifgr_len;
1566	ifgp = ifgr->ifgr_groups;
1567	TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) {
1568		if (len < sizeof(ifgrq)) {
1569			IFNET_RUNLOCK();
1570			return (EINVAL);
1571		}
1572		bzero(&ifgrq, sizeof ifgrq);
1573		strlcpy(ifgrq.ifgrq_member, ifgm->ifgm_ifp->if_xname,
1574		    sizeof(ifgrq.ifgrq_member));
1575		if ((error = copyout(&ifgrq, ifgp, sizeof(struct ifg_req)))) {
1576			IFNET_RUNLOCK();
1577			return (error);
1578		}
1579		len -= sizeof(ifgrq);
1580		ifgp++;
1581	}
1582	IFNET_RUNLOCK();
1583
1584	return (0);
1585}
1586
1587/*
1588 * Return counter values from counter(9)s stored in ifnet.
1589 */
1590uint64_t
1591if_get_counter_default(struct ifnet *ifp, ift_counter cnt)
1592{
1593
1594	KASSERT(cnt < IFCOUNTERS, ("%s: invalid cnt %d", __func__, cnt));
1595
1596	return (counter_u64_fetch(ifp->if_counters[cnt]));
1597}
1598
1599/*
1600 * Increase an ifnet counter. Usually used for counters shared
1601 * between the stack and a driver, but function supports them all.
1602 */
1603void
1604if_inc_counter(struct ifnet *ifp, ift_counter cnt, int64_t inc)
1605{
1606
1607	KASSERT(cnt < IFCOUNTERS, ("%s: invalid cnt %d", __func__, cnt));
1608
1609	counter_u64_add(ifp->if_counters[cnt], inc);
1610}
1611
1612/*
1613 * Copy data from ifnet to userland API structure if_data.
1614 */
1615void
1616if_data_copy(struct ifnet *ifp, struct if_data *ifd)
1617{
1618
1619	ifd->ifi_type = ifp->if_type;
1620	ifd->ifi_physical = 0;
1621	ifd->ifi_addrlen = ifp->if_addrlen;
1622	ifd->ifi_hdrlen = ifp->if_hdrlen;
1623	ifd->ifi_link_state = ifp->if_link_state;
1624	ifd->ifi_vhid = 0;
1625	ifd->ifi_datalen = sizeof(struct if_data);
1626	ifd->ifi_mtu = ifp->if_mtu;
1627	ifd->ifi_metric = ifp->if_metric;
1628	ifd->ifi_baudrate = ifp->if_baudrate;
1629	ifd->ifi_hwassist = ifp->if_hwassist;
1630	ifd->ifi_epoch = ifp->if_epoch;
1631	ifd->ifi_lastchange = ifp->if_lastchange;
1632
1633	ifd->ifi_ipackets = ifp->if_get_counter(ifp, IFCOUNTER_IPACKETS);
1634	ifd->ifi_ierrors = ifp->if_get_counter(ifp, IFCOUNTER_IERRORS);
1635	ifd->ifi_opackets = ifp->if_get_counter(ifp, IFCOUNTER_OPACKETS);
1636	ifd->ifi_oerrors = ifp->if_get_counter(ifp, IFCOUNTER_OERRORS);
1637	ifd->ifi_collisions = ifp->if_get_counter(ifp, IFCOUNTER_COLLISIONS);
1638	ifd->ifi_ibytes = ifp->if_get_counter(ifp, IFCOUNTER_IBYTES);
1639	ifd->ifi_obytes = ifp->if_get_counter(ifp, IFCOUNTER_OBYTES);
1640	ifd->ifi_imcasts = ifp->if_get_counter(ifp, IFCOUNTER_IMCASTS);
1641	ifd->ifi_omcasts = ifp->if_get_counter(ifp, IFCOUNTER_OMCASTS);
1642	ifd->ifi_iqdrops = ifp->if_get_counter(ifp, IFCOUNTER_IQDROPS);
1643	ifd->ifi_oqdrops = ifp->if_get_counter(ifp, IFCOUNTER_OQDROPS);
1644	ifd->ifi_noproto = ifp->if_get_counter(ifp, IFCOUNTER_NOPROTO);
1645}
1646
1647/*
1648 * Wrapper functions for struct ifnet address list locking macros.  These are
1649 * used by kernel modules to avoid encoding programming interface or binary
1650 * interface assumptions that may be violated when kernel-internal locking
1651 * approaches change.
1652 */
1653void
1654if_addr_rlock(struct ifnet *ifp)
1655{
1656
1657	IF_ADDR_RLOCK(ifp);
1658}
1659
1660void
1661if_addr_runlock(struct ifnet *ifp)
1662{
1663
1664	IF_ADDR_RUNLOCK(ifp);
1665}
1666
1667void
1668if_maddr_rlock(if_t ifp)
1669{
1670
1671	IF_ADDR_RLOCK((struct ifnet *)ifp);
1672}
1673
1674void
1675if_maddr_runlock(if_t ifp)
1676{
1677
1678	IF_ADDR_RUNLOCK((struct ifnet *)ifp);
1679}
1680
1681/*
1682 * Initialization, destruction and refcounting functions for ifaddrs.
1683 */
1684struct ifaddr *
1685ifa_alloc(size_t size, int flags)
1686{
1687	struct ifaddr *ifa;
1688
1689	KASSERT(size >= sizeof(struct ifaddr),
1690	    ("%s: invalid size %zu", __func__, size));
1691
1692	ifa = malloc(size, M_IFADDR, M_ZERO | flags);
1693	if (ifa == NULL)
1694		return (NULL);
1695
1696	if ((ifa->ifa_opackets = counter_u64_alloc(flags)) == NULL)
1697		goto fail;
1698	if ((ifa->ifa_ipackets = counter_u64_alloc(flags)) == NULL)
1699		goto fail;
1700	if ((ifa->ifa_obytes = counter_u64_alloc(flags)) == NULL)
1701		goto fail;
1702	if ((ifa->ifa_ibytes = counter_u64_alloc(flags)) == NULL)
1703		goto fail;
1704
1705	refcount_init(&ifa->ifa_refcnt, 1);
1706
1707	return (ifa);
1708
1709fail:
1710	/* free(NULL) is okay */
1711	counter_u64_free(ifa->ifa_opackets);
1712	counter_u64_free(ifa->ifa_ipackets);
1713	counter_u64_free(ifa->ifa_obytes);
1714	counter_u64_free(ifa->ifa_ibytes);
1715	free(ifa, M_IFADDR);
1716
1717	return (NULL);
1718}
1719
1720void
1721ifa_ref(struct ifaddr *ifa)
1722{
1723
1724	refcount_acquire(&ifa->ifa_refcnt);
1725}
1726
1727void
1728ifa_free(struct ifaddr *ifa)
1729{
1730
1731	if (refcount_release(&ifa->ifa_refcnt)) {
1732		counter_u64_free(ifa->ifa_opackets);
1733		counter_u64_free(ifa->ifa_ipackets);
1734		counter_u64_free(ifa->ifa_obytes);
1735		counter_u64_free(ifa->ifa_ibytes);
1736		free(ifa, M_IFADDR);
1737	}
1738}
1739
1740static int
1741ifa_maintain_loopback_route(int cmd, const char *otype, struct ifaddr *ifa,
1742    struct sockaddr *ia)
1743{
1744	int error;
1745	struct rt_addrinfo info;
1746	struct sockaddr_dl null_sdl;
1747	struct ifnet *ifp;
1748
1749	ifp = ifa->ifa_ifp;
1750
1751	bzero(&info, sizeof(info));
1752	if (cmd != RTM_DELETE)
1753		info.rti_ifp = V_loif;
1754	info.rti_flags = ifa->ifa_flags | RTF_HOST | RTF_STATIC | RTF_PINNED;
1755	info.rti_info[RTAX_DST] = ia;
1756	info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&null_sdl;
1757	link_init_sdl(ifp, (struct sockaddr *)&null_sdl, ifp->if_type);
1758
1759	error = rtrequest1_fib(cmd, &info, NULL, ifp->if_fib);
1760
1761	if (error != 0)
1762		log(LOG_DEBUG, "%s: %s failed for interface %s: %u\n",
1763		    __func__, otype, if_name(ifp), error);
1764
1765	return (error);
1766}
1767
1768int
1769ifa_add_loopback_route(struct ifaddr *ifa, struct sockaddr *ia)
1770{
1771
1772	return (ifa_maintain_loopback_route(RTM_ADD, "insertion", ifa, ia));
1773}
1774
1775int
1776ifa_del_loopback_route(struct ifaddr *ifa, struct sockaddr *ia)
1777{
1778
1779	return (ifa_maintain_loopback_route(RTM_DELETE, "deletion", ifa, ia));
1780}
1781
1782int
1783ifa_switch_loopback_route(struct ifaddr *ifa, struct sockaddr *ia)
1784{
1785
1786	return (ifa_maintain_loopback_route(RTM_CHANGE, "switch", ifa, ia));
1787}
1788
1789/*
1790 * XXX: Because sockaddr_dl has deeper structure than the sockaddr
1791 * structs used to represent other address families, it is necessary
1792 * to perform a different comparison.
1793 */
1794
1795#define	sa_dl_equal(a1, a2)	\
1796	((((const struct sockaddr_dl *)(a1))->sdl_len ==		\
1797	 ((const struct sockaddr_dl *)(a2))->sdl_len) &&		\
1798	 (bcmp(CLLADDR((const struct sockaddr_dl *)(a1)),		\
1799	       CLLADDR((const struct sockaddr_dl *)(a2)),		\
1800	       ((const struct sockaddr_dl *)(a1))->sdl_alen) == 0))
1801
1802/*
1803 * Locate an interface based on a complete address.
1804 */
1805/*ARGSUSED*/
1806static struct ifaddr *
1807ifa_ifwithaddr_internal(const struct sockaddr *addr, int getref)
1808{
1809	struct ifnet *ifp;
1810	struct ifaddr *ifa;
1811
1812	IFNET_RLOCK_NOSLEEP();
1813	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
1814		IF_ADDR_RLOCK(ifp);
1815		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1816			if (ifa->ifa_addr->sa_family != addr->sa_family)
1817				continue;
1818			if (sa_equal(addr, ifa->ifa_addr)) {
1819				if (getref)
1820					ifa_ref(ifa);
1821				IF_ADDR_RUNLOCK(ifp);
1822				goto done;
1823			}
1824			/* IP6 doesn't have broadcast */
1825			if ((ifp->if_flags & IFF_BROADCAST) &&
1826			    ifa->ifa_broadaddr &&
1827			    ifa->ifa_broadaddr->sa_len != 0 &&
1828			    sa_equal(ifa->ifa_broadaddr, addr)) {
1829				if (getref)
1830					ifa_ref(ifa);
1831				IF_ADDR_RUNLOCK(ifp);
1832				goto done;
1833			}
1834		}
1835		IF_ADDR_RUNLOCK(ifp);
1836	}
1837	ifa = NULL;
1838done:
1839	IFNET_RUNLOCK_NOSLEEP();
1840	return (ifa);
1841}
1842
1843struct ifaddr *
1844ifa_ifwithaddr(const struct sockaddr *addr)
1845{
1846
1847	return (ifa_ifwithaddr_internal(addr, 1));
1848}
1849
1850int
1851ifa_ifwithaddr_check(const struct sockaddr *addr)
1852{
1853
1854	return (ifa_ifwithaddr_internal(addr, 0) != NULL);
1855}
1856
1857/*
1858 * Locate an interface based on the broadcast address.
1859 */
1860/* ARGSUSED */
1861struct ifaddr *
1862ifa_ifwithbroadaddr(const struct sockaddr *addr, int fibnum)
1863{
1864	struct ifnet *ifp;
1865	struct ifaddr *ifa;
1866
1867	IFNET_RLOCK_NOSLEEP();
1868	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
1869		if ((fibnum != RT_ALL_FIBS) && (ifp->if_fib != fibnum))
1870			continue;
1871		IF_ADDR_RLOCK(ifp);
1872		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1873			if (ifa->ifa_addr->sa_family != addr->sa_family)
1874				continue;
1875			if ((ifp->if_flags & IFF_BROADCAST) &&
1876			    ifa->ifa_broadaddr &&
1877			    ifa->ifa_broadaddr->sa_len != 0 &&
1878			    sa_equal(ifa->ifa_broadaddr, addr)) {
1879				ifa_ref(ifa);
1880				IF_ADDR_RUNLOCK(ifp);
1881				goto done;
1882			}
1883		}
1884		IF_ADDR_RUNLOCK(ifp);
1885	}
1886	ifa = NULL;
1887done:
1888	IFNET_RUNLOCK_NOSLEEP();
1889	return (ifa);
1890}
1891
1892/*
1893 * Locate the point to point interface with a given destination address.
1894 */
1895/*ARGSUSED*/
1896struct ifaddr *
1897ifa_ifwithdstaddr(const struct sockaddr *addr, int fibnum)
1898{
1899	struct ifnet *ifp;
1900	struct ifaddr *ifa;
1901
1902	IFNET_RLOCK_NOSLEEP();
1903	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
1904		if ((ifp->if_flags & IFF_POINTOPOINT) == 0)
1905			continue;
1906		if ((fibnum != RT_ALL_FIBS) && (ifp->if_fib != fibnum))
1907			continue;
1908		IF_ADDR_RLOCK(ifp);
1909		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1910			if (ifa->ifa_addr->sa_family != addr->sa_family)
1911				continue;
1912			if (ifa->ifa_dstaddr != NULL &&
1913			    sa_equal(addr, ifa->ifa_dstaddr)) {
1914				ifa_ref(ifa);
1915				IF_ADDR_RUNLOCK(ifp);
1916				goto done;
1917			}
1918		}
1919		IF_ADDR_RUNLOCK(ifp);
1920	}
1921	ifa = NULL;
1922done:
1923	IFNET_RUNLOCK_NOSLEEP();
1924	return (ifa);
1925}
1926
1927/*
1928 * Find an interface on a specific network.  If many, choice
1929 * is most specific found.
1930 */
1931struct ifaddr *
1932ifa_ifwithnet(const struct sockaddr *addr, int ignore_ptp, int fibnum)
1933{
1934	struct ifnet *ifp;
1935	struct ifaddr *ifa;
1936	struct ifaddr *ifa_maybe = NULL;
1937	u_int af = addr->sa_family;
1938	const char *addr_data = addr->sa_data, *cplim;
1939
1940	/*
1941	 * AF_LINK addresses can be looked up directly by their index number,
1942	 * so do that if we can.
1943	 */
1944	if (af == AF_LINK) {
1945	    const struct sockaddr_dl *sdl = (const struct sockaddr_dl *)addr;
1946	    if (sdl->sdl_index && sdl->sdl_index <= V_if_index)
1947		return (ifaddr_byindex(sdl->sdl_index));
1948	}
1949
1950	/*
1951	 * Scan though each interface, looking for ones that have addresses
1952	 * in this address family and the requested fib.  Maintain a reference
1953	 * on ifa_maybe once we find one, as we release the IF_ADDR_RLOCK() that
1954	 * kept it stable when we move onto the next interface.
1955	 */
1956	IFNET_RLOCK_NOSLEEP();
1957	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
1958		if ((fibnum != RT_ALL_FIBS) && (ifp->if_fib != fibnum))
1959			continue;
1960		IF_ADDR_RLOCK(ifp);
1961		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1962			const char *cp, *cp2, *cp3;
1963
1964			if (ifa->ifa_addr->sa_family != af)
1965next:				continue;
1966			if (af == AF_INET &&
1967			    ifp->if_flags & IFF_POINTOPOINT && !ignore_ptp) {
1968				/*
1969				 * This is a bit broken as it doesn't
1970				 * take into account that the remote end may
1971				 * be a single node in the network we are
1972				 * looking for.
1973				 * The trouble is that we don't know the
1974				 * netmask for the remote end.
1975				 */
1976				if (ifa->ifa_dstaddr != NULL &&
1977				    sa_equal(addr, ifa->ifa_dstaddr)) {
1978					ifa_ref(ifa);
1979					IF_ADDR_RUNLOCK(ifp);
1980					goto done;
1981				}
1982			} else {
1983				/*
1984				 * Scan all the bits in the ifa's address.
1985				 * If a bit dissagrees with what we are
1986				 * looking for, mask it with the netmask
1987				 * to see if it really matters.
1988				 * (A byte at a time)
1989				 */
1990				if (ifa->ifa_netmask == 0)
1991					continue;
1992				cp = addr_data;
1993				cp2 = ifa->ifa_addr->sa_data;
1994				cp3 = ifa->ifa_netmask->sa_data;
1995				cplim = ifa->ifa_netmask->sa_len
1996					+ (char *)ifa->ifa_netmask;
1997				while (cp3 < cplim)
1998					if ((*cp++ ^ *cp2++) & *cp3++)
1999						goto next; /* next address! */
2000				/*
2001				 * If the netmask of what we just found
2002				 * is more specific than what we had before
2003				 * (if we had one), or if the virtual status
2004				 * of new prefix is better than of the old one,
2005				 * then remember the new one before continuing
2006				 * to search for an even better one.
2007				 */
2008				if (ifa_maybe == NULL ||
2009				    ifa_preferred(ifa_maybe, ifa) ||
2010				    rn_refines((caddr_t)ifa->ifa_netmask,
2011				    (caddr_t)ifa_maybe->ifa_netmask)) {
2012					if (ifa_maybe != NULL)
2013						ifa_free(ifa_maybe);
2014					ifa_maybe = ifa;
2015					ifa_ref(ifa_maybe);
2016				}
2017			}
2018		}
2019		IF_ADDR_RUNLOCK(ifp);
2020	}
2021	ifa = ifa_maybe;
2022	ifa_maybe = NULL;
2023done:
2024	IFNET_RUNLOCK_NOSLEEP();
2025	if (ifa_maybe != NULL)
2026		ifa_free(ifa_maybe);
2027	return (ifa);
2028}
2029
2030/*
2031 * Find an interface address specific to an interface best matching
2032 * a given address.
2033 */
2034struct ifaddr *
2035ifaof_ifpforaddr(const struct sockaddr *addr, struct ifnet *ifp)
2036{
2037	struct ifaddr *ifa;
2038	const char *cp, *cp2, *cp3;
2039	char *cplim;
2040	struct ifaddr *ifa_maybe = NULL;
2041	u_int af = addr->sa_family;
2042
2043	if (af >= AF_MAX)
2044		return (NULL);
2045	IF_ADDR_RLOCK(ifp);
2046	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
2047		if (ifa->ifa_addr->sa_family != af)
2048			continue;
2049		if (ifa_maybe == NULL)
2050			ifa_maybe = ifa;
2051		if (ifa->ifa_netmask == 0) {
2052			if (sa_equal(addr, ifa->ifa_addr) ||
2053			    (ifa->ifa_dstaddr &&
2054			    sa_equal(addr, ifa->ifa_dstaddr)))
2055				goto done;
2056			continue;
2057		}
2058		if (ifp->if_flags & IFF_POINTOPOINT) {
2059			if (sa_equal(addr, ifa->ifa_dstaddr))
2060				goto done;
2061		} else {
2062			cp = addr->sa_data;
2063			cp2 = ifa->ifa_addr->sa_data;
2064			cp3 = ifa->ifa_netmask->sa_data;
2065			cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask;
2066			for (; cp3 < cplim; cp3++)
2067				if ((*cp++ ^ *cp2++) & *cp3)
2068					break;
2069			if (cp3 == cplim)
2070				goto done;
2071		}
2072	}
2073	ifa = ifa_maybe;
2074done:
2075	if (ifa != NULL)
2076		ifa_ref(ifa);
2077	IF_ADDR_RUNLOCK(ifp);
2078	return (ifa);
2079}
2080
2081/*
2082 * See whether new ifa is better than current one:
2083 * 1) A non-virtual one is preferred over virtual.
2084 * 2) A virtual in master state preferred over any other state.
2085 *
2086 * Used in several address selecting functions.
2087 */
2088int
2089ifa_preferred(struct ifaddr *cur, struct ifaddr *next)
2090{
2091
2092	return (cur->ifa_carp && (!next->ifa_carp ||
2093	    ((*carp_master_p)(next) && !(*carp_master_p)(cur))));
2094}
2095
2096#include <net/if_llatbl.h>
2097
2098/*
2099 * Default action when installing a route with a Link Level gateway.
2100 * Lookup an appropriate real ifa to point to.
2101 * This should be moved to /sys/net/link.c eventually.
2102 */
2103static void
2104link_rtrequest(int cmd, struct rtentry *rt, struct rt_addrinfo *info)
2105{
2106	struct ifaddr *ifa, *oifa;
2107	struct sockaddr *dst;
2108	struct ifnet *ifp;
2109
2110	if (cmd != RTM_ADD || ((ifa = rt->rt_ifa) == NULL) ||
2111	    ((ifp = ifa->ifa_ifp) == NULL) || ((dst = rt_key(rt)) == NULL))
2112		return;
2113	ifa = ifaof_ifpforaddr(dst, ifp);
2114	if (ifa) {
2115		oifa = rt->rt_ifa;
2116		rt->rt_ifa = ifa;
2117		ifa_free(oifa);
2118		if (ifa->ifa_rtrequest && ifa->ifa_rtrequest != link_rtrequest)
2119			ifa->ifa_rtrequest(cmd, rt, info);
2120	}
2121}
2122
2123struct sockaddr_dl *
2124link_alloc_sdl(size_t size, int flags)
2125{
2126
2127	return (malloc(size, M_TEMP, flags));
2128}
2129
2130void
2131link_free_sdl(struct sockaddr *sa)
2132{
2133	free(sa, M_TEMP);
2134}
2135
2136/*
2137 * Fills in given sdl with interface basic info.
2138 * Returns pointer to filled sdl.
2139 */
2140struct sockaddr_dl *
2141link_init_sdl(struct ifnet *ifp, struct sockaddr *paddr, u_char iftype)
2142{
2143	struct sockaddr_dl *sdl;
2144
2145	sdl = (struct sockaddr_dl *)paddr;
2146	memset(sdl, 0, sizeof(struct sockaddr_dl));
2147	sdl->sdl_len = sizeof(struct sockaddr_dl);
2148	sdl->sdl_family = AF_LINK;
2149	sdl->sdl_index = ifp->if_index;
2150	sdl->sdl_type = iftype;
2151
2152	return (sdl);
2153}
2154
2155/*
2156 * Mark an interface down and notify protocols of
2157 * the transition.
2158 */
2159static void
2160if_unroute(struct ifnet *ifp, int flag, int fam)
2161{
2162	struct ifaddr *ifa;
2163
2164	KASSERT(flag == IFF_UP, ("if_unroute: flag != IFF_UP"));
2165
2166	ifp->if_flags &= ~flag;
2167	getmicrotime(&ifp->if_lastchange);
2168	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
2169		if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family))
2170			pfctlinput(PRC_IFDOWN, ifa->ifa_addr);
2171	ifp->if_qflush(ifp);
2172
2173	if (ifp->if_carp)
2174		(*carp_linkstate_p)(ifp);
2175	rt_ifmsg(ifp);
2176}
2177
2178/*
2179 * Mark an interface up and notify protocols of
2180 * the transition.
2181 */
2182static void
2183if_route(struct ifnet *ifp, int flag, int fam)
2184{
2185	struct ifaddr *ifa;
2186
2187	KASSERT(flag == IFF_UP, ("if_route: flag != IFF_UP"));
2188
2189	ifp->if_flags |= flag;
2190	getmicrotime(&ifp->if_lastchange);
2191	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
2192		if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family))
2193			pfctlinput(PRC_IFUP, ifa->ifa_addr);
2194	if (ifp->if_carp)
2195		(*carp_linkstate_p)(ifp);
2196	rt_ifmsg(ifp);
2197#ifdef INET6
2198	in6_if_up(ifp);
2199#endif
2200}
2201
2202void	(*vlan_link_state_p)(struct ifnet *);	/* XXX: private from if_vlan */
2203void	(*vlan_trunk_cap_p)(struct ifnet *);		/* XXX: private from if_vlan */
2204struct ifnet *(*vlan_trunkdev_p)(struct ifnet *);
2205struct	ifnet *(*vlan_devat_p)(struct ifnet *, uint16_t);
2206int	(*vlan_tag_p)(struct ifnet *, uint16_t *);
2207int	(*vlan_setcookie_p)(struct ifnet *, void *);
2208void	*(*vlan_cookie_p)(struct ifnet *);
2209
2210/*
2211 * Handle a change in the interface link state. To avoid LORs
2212 * between driver lock and upper layer locks, as well as possible
2213 * recursions, we post event to taskqueue, and all job
2214 * is done in static do_link_state_change().
2215 */
2216void
2217if_link_state_change(struct ifnet *ifp, int link_state)
2218{
2219	/* Return if state hasn't changed. */
2220	if (ifp->if_link_state == link_state)
2221		return;
2222
2223	ifp->if_link_state = link_state;
2224
2225	taskqueue_enqueue(taskqueue_swi, &ifp->if_linktask);
2226}
2227
2228static void
2229do_link_state_change(void *arg, int pending)
2230{
2231	struct ifnet *ifp = (struct ifnet *)arg;
2232	int link_state = ifp->if_link_state;
2233	CURVNET_SET(ifp->if_vnet);
2234
2235	/* Notify that the link state has changed. */
2236	rt_ifmsg(ifp);
2237	if (ifp->if_vlantrunk != NULL)
2238		(*vlan_link_state_p)(ifp);
2239
2240	if ((ifp->if_type == IFT_ETHER || ifp->if_type == IFT_L2VLAN) &&
2241	    ifp->if_l2com != NULL)
2242		(*ng_ether_link_state_p)(ifp, link_state);
2243	if (ifp->if_carp)
2244		(*carp_linkstate_p)(ifp);
2245	if (ifp->if_bridge)
2246		(*bridge_linkstate_p)(ifp);
2247	if (ifp->if_lagg)
2248		(*lagg_linkstate_p)(ifp, link_state);
2249
2250	if (IS_DEFAULT_VNET(curvnet))
2251		devctl_notify("IFNET", ifp->if_xname,
2252		    (link_state == LINK_STATE_UP) ? "LINK_UP" : "LINK_DOWN",
2253		    NULL);
2254	if (pending > 1)
2255		if_printf(ifp, "%d link states coalesced\n", pending);
2256	if (log_link_state_change)
2257		log(LOG_NOTICE, "%s: link state changed to %s\n", ifp->if_xname,
2258		    (link_state == LINK_STATE_UP) ? "UP" : "DOWN" );
2259	EVENTHANDLER_INVOKE(ifnet_link_event, ifp, link_state);
2260	CURVNET_RESTORE();
2261}
2262
2263/*
2264 * Mark an interface down and notify protocols of
2265 * the transition.
2266 */
2267void
2268if_down(struct ifnet *ifp)
2269{
2270
2271	EVENTHANDLER_INVOKE(ifnet_event, ifp, IFNET_EVENT_DOWN);
2272	if_unroute(ifp, IFF_UP, AF_UNSPEC);
2273}
2274
2275/*
2276 * Mark an interface up and notify protocols of
2277 * the transition.
2278 */
2279void
2280if_up(struct ifnet *ifp)
2281{
2282
2283	if_route(ifp, IFF_UP, AF_UNSPEC);
2284	EVENTHANDLER_INVOKE(ifnet_event, ifp, IFNET_EVENT_UP);
2285}
2286
2287/*
2288 * Flush an interface queue.
2289 */
2290void
2291if_qflush(struct ifnet *ifp)
2292{
2293	struct mbuf *m, *n;
2294	struct ifaltq *ifq;
2295
2296	ifq = &ifp->if_snd;
2297	IFQ_LOCK(ifq);
2298#ifdef ALTQ
2299	if (ALTQ_IS_ENABLED(ifq))
2300		ALTQ_PURGE(ifq);
2301#endif
2302	n = ifq->ifq_head;
2303	while ((m = n) != NULL) {
2304		n = m->m_nextpkt;
2305		m_freem(m);
2306	}
2307	ifq->ifq_head = 0;
2308	ifq->ifq_tail = 0;
2309	ifq->ifq_len = 0;
2310	IFQ_UNLOCK(ifq);
2311}
2312
2313/*
2314 * Map interface name to interface structure pointer, with or without
2315 * returning a reference.
2316 */
2317struct ifnet *
2318ifunit_ref(const char *name)
2319{
2320	struct ifnet *ifp;
2321
2322	IFNET_RLOCK_NOSLEEP();
2323	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
2324		if (strncmp(name, ifp->if_xname, IFNAMSIZ) == 0 &&
2325		    !(ifp->if_flags & IFF_DYING))
2326			break;
2327	}
2328	if (ifp != NULL)
2329		if_ref(ifp);
2330	IFNET_RUNLOCK_NOSLEEP();
2331	return (ifp);
2332}
2333
2334struct ifnet *
2335ifunit(const char *name)
2336{
2337	struct ifnet *ifp;
2338
2339	IFNET_RLOCK_NOSLEEP();
2340	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
2341		if (strncmp(name, ifp->if_xname, IFNAMSIZ) == 0)
2342			break;
2343	}
2344	IFNET_RUNLOCK_NOSLEEP();
2345	return (ifp);
2346}
2347
2348static void *
2349ifr_buffer_get_buffer(void *data)
2350{
2351	union ifreq_union *ifrup;
2352
2353	ifrup = data;
2354#ifdef COMPAT_FREEBSD32
2355	if (SV_CURPROC_FLAG(SV_ILP32))
2356		return ((void *)(uintptr_t)
2357		    ifrup->ifr32.ifr_ifru.ifru_buffer.buffer);
2358#endif
2359	return (ifrup->ifr.ifr_ifru.ifru_buffer.buffer);
2360}
2361
2362static void
2363ifr_buffer_set_buffer_null(void *data)
2364{
2365	union ifreq_union *ifrup;
2366
2367	ifrup = data;
2368#ifdef COMPAT_FREEBSD32
2369	if (SV_CURPROC_FLAG(SV_ILP32))
2370		ifrup->ifr32.ifr_ifru.ifru_buffer.buffer = 0;
2371	else
2372#endif
2373		ifrup->ifr.ifr_ifru.ifru_buffer.buffer = NULL;
2374}
2375
2376static size_t
2377ifr_buffer_get_length(void *data)
2378{
2379	union ifreq_union *ifrup;
2380
2381	ifrup = data;
2382#ifdef COMPAT_FREEBSD32
2383	if (SV_CURPROC_FLAG(SV_ILP32))
2384		return (ifrup->ifr32.ifr_ifru.ifru_buffer.length);
2385#endif
2386	return (ifrup->ifr.ifr_ifru.ifru_buffer.length);
2387}
2388
2389static void
2390ifr_buffer_set_length(void *data, size_t len)
2391{
2392	union ifreq_union *ifrup;
2393
2394	ifrup = data;
2395#ifdef COMPAT_FREEBSD32
2396	if (SV_CURPROC_FLAG(SV_ILP32))
2397		ifrup->ifr32.ifr_ifru.ifru_buffer.length = len;
2398	else
2399#endif
2400		ifrup->ifr.ifr_ifru.ifru_buffer.length = len;
2401}
2402
2403void *
2404ifr_data_get_ptr(void *ifrp)
2405{
2406	union ifreq_union *ifrup;
2407
2408	ifrup = ifrp;
2409#ifdef COMPAT_FREEBSD32
2410	if (SV_CURPROC_FLAG(SV_ILP32))
2411		return ((void *)(uintptr_t)
2412		    ifrup->ifr32.ifr_ifru.ifru_data);
2413#endif
2414		return (ifrup->ifr.ifr_ifru.ifru_data);
2415}
2416
2417/*
2418 * Hardware specific interface ioctls.
2419 */
2420static int
2421ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
2422{
2423	struct ifreq *ifr;
2424	int error = 0, do_ifup = 0;
2425	int new_flags, temp_flags;
2426	size_t namelen, onamelen;
2427	size_t descrlen;
2428	char *descrbuf, *odescrbuf;
2429	char new_name[IFNAMSIZ];
2430	struct ifaddr *ifa;
2431	struct sockaddr_dl *sdl;
2432
2433	ifr = (struct ifreq *)data;
2434	switch (cmd) {
2435	case SIOCGIFINDEX:
2436		ifr->ifr_index = ifp->if_index;
2437		break;
2438
2439	case SIOCGIFFLAGS:
2440		temp_flags = ifp->if_flags | ifp->if_drv_flags;
2441		ifr->ifr_flags = temp_flags & 0xffff;
2442		ifr->ifr_flagshigh = temp_flags >> 16;
2443		break;
2444
2445	case SIOCGIFCAP:
2446		ifr->ifr_reqcap = ifp->if_capabilities;
2447		ifr->ifr_curcap = ifp->if_capenable;
2448		break;
2449
2450#ifdef MAC
2451	case SIOCGIFMAC:
2452		error = mac_ifnet_ioctl_get(td->td_ucred, ifr, ifp);
2453		break;
2454#endif
2455
2456	case SIOCGIFMETRIC:
2457		ifr->ifr_metric = ifp->if_metric;
2458		break;
2459
2460	case SIOCGIFMTU:
2461		ifr->ifr_mtu = ifp->if_mtu;
2462		break;
2463
2464	case SIOCGIFPHYS:
2465		/* XXXGL: did this ever worked? */
2466		ifr->ifr_phys = 0;
2467		break;
2468
2469	case SIOCGIFDESCR:
2470		error = 0;
2471		sx_slock(&ifdescr_sx);
2472		if (ifp->if_description == NULL)
2473			error = ENOMSG;
2474		else {
2475			/* space for terminating nul */
2476			descrlen = strlen(ifp->if_description) + 1;
2477			if (ifr_buffer_get_length(ifr) < descrlen)
2478				ifr_buffer_set_buffer_null(ifr);
2479			else
2480				error = copyout(ifp->if_description,
2481				    ifr_buffer_get_buffer(ifr), descrlen);
2482			ifr_buffer_set_length(ifr, descrlen);
2483		}
2484		sx_sunlock(&ifdescr_sx);
2485		break;
2486
2487	case SIOCSIFDESCR:
2488		error = priv_check(td, PRIV_NET_SETIFDESCR);
2489		if (error)
2490			return (error);
2491
2492		/*
2493		 * Copy only (length-1) bytes to make sure that
2494		 * if_description is always nul terminated.  The
2495		 * length parameter is supposed to count the
2496		 * terminating nul in.
2497		 */
2498		if (ifr_buffer_get_length(ifr) > ifdescr_maxlen)
2499			return (ENAMETOOLONG);
2500		else if (ifr_buffer_get_length(ifr) == 0)
2501			descrbuf = NULL;
2502		else {
2503			descrbuf = malloc(ifr_buffer_get_length(ifr),
2504			    M_IFDESCR, M_WAITOK | M_ZERO);
2505			error = copyin(ifr_buffer_get_buffer(ifr), descrbuf,
2506			    ifr_buffer_get_length(ifr) - 1);
2507			if (error) {
2508				free(descrbuf, M_IFDESCR);
2509				break;
2510			}
2511		}
2512
2513		sx_xlock(&ifdescr_sx);
2514		odescrbuf = ifp->if_description;
2515		ifp->if_description = descrbuf;
2516		sx_xunlock(&ifdescr_sx);
2517
2518		getmicrotime(&ifp->if_lastchange);
2519		free(odescrbuf, M_IFDESCR);
2520		break;
2521
2522	case SIOCGIFFIB:
2523		ifr->ifr_fib = ifp->if_fib;
2524		break;
2525
2526	case SIOCSIFFIB:
2527		error = priv_check(td, PRIV_NET_SETIFFIB);
2528		if (error)
2529			return (error);
2530		if (ifr->ifr_fib >= rt_numfibs)
2531			return (EINVAL);
2532
2533		ifp->if_fib = ifr->ifr_fib;
2534		break;
2535
2536	case SIOCSIFFLAGS:
2537		error = priv_check(td, PRIV_NET_SETIFFLAGS);
2538		if (error)
2539			return (error);
2540		/*
2541		 * Currently, no driver owned flags pass the IFF_CANTCHANGE
2542		 * check, so we don't need special handling here yet.
2543		 */
2544		new_flags = (ifr->ifr_flags & 0xffff) |
2545		    (ifr->ifr_flagshigh << 16);
2546		if (ifp->if_flags & IFF_UP &&
2547		    (new_flags & IFF_UP) == 0) {
2548			if_down(ifp);
2549		} else if (new_flags & IFF_UP &&
2550		    (ifp->if_flags & IFF_UP) == 0) {
2551			do_ifup = 1;
2552		}
2553		/* See if permanently promiscuous mode bit is about to flip */
2554		if ((ifp->if_flags ^ new_flags) & IFF_PPROMISC) {
2555			if (new_flags & IFF_PPROMISC)
2556				ifp->if_flags |= IFF_PROMISC;
2557			else if (ifp->if_pcount == 0)
2558				ifp->if_flags &= ~IFF_PROMISC;
2559			if (log_promisc_mode_change)
2560                                log(LOG_INFO, "%s: permanently promiscuous mode %s\n",
2561                                    ifp->if_xname,
2562                                    ((new_flags & IFF_PPROMISC) ?
2563                                     "enabled" : "disabled"));
2564		}
2565		ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) |
2566			(new_flags &~ IFF_CANTCHANGE);
2567		if (ifp->if_ioctl) {
2568			(void) (*ifp->if_ioctl)(ifp, cmd, data);
2569		}
2570		if (do_ifup)
2571			if_up(ifp);
2572		getmicrotime(&ifp->if_lastchange);
2573		break;
2574
2575	case SIOCSIFCAP:
2576		error = priv_check(td, PRIV_NET_SETIFCAP);
2577		if (error)
2578			return (error);
2579		if (ifp->if_ioctl == NULL)
2580			return (EOPNOTSUPP);
2581		if (ifr->ifr_reqcap & ~ifp->if_capabilities)
2582			return (EINVAL);
2583		error = (*ifp->if_ioctl)(ifp, cmd, data);
2584		if (error == 0)
2585			getmicrotime(&ifp->if_lastchange);
2586		break;
2587
2588#ifdef MAC
2589	case SIOCSIFMAC:
2590		error = mac_ifnet_ioctl_set(td->td_ucred, ifr, ifp);
2591		break;
2592#endif
2593
2594	case SIOCSIFNAME:
2595		error = priv_check(td, PRIV_NET_SETIFNAME);
2596		if (error)
2597			return (error);
2598		error = copyinstr(ifr_data_get_ptr(ifr), new_name, IFNAMSIZ,
2599		    NULL);
2600		if (error != 0)
2601			return (error);
2602		if (new_name[0] == '\0')
2603			return (EINVAL);
2604		if (new_name[IFNAMSIZ-1] != '\0') {
2605			new_name[IFNAMSIZ-1] = '\0';
2606			if (strlen(new_name) == IFNAMSIZ-1)
2607				return (EINVAL);
2608		}
2609		if (ifunit(new_name) != NULL)
2610			return (EEXIST);
2611
2612		/*
2613		 * XXX: Locking.  Nothing else seems to lock if_flags,
2614		 * and there are numerous other races with the
2615		 * ifunit() checks not being atomic with namespace
2616		 * changes (renames, vmoves, if_attach, etc).
2617		 */
2618		ifp->if_flags |= IFF_RENAMING;
2619
2620		/* Announce the departure of the interface. */
2621		rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
2622		EVENTHANDLER_INVOKE(ifnet_departure_event, ifp);
2623
2624		log(LOG_INFO, "%s: changing name to '%s'\n",
2625		    ifp->if_xname, new_name);
2626
2627		IF_ADDR_WLOCK(ifp);
2628		strlcpy(ifp->if_xname, new_name, sizeof(ifp->if_xname));
2629		ifa = ifp->if_addr;
2630		sdl = (struct sockaddr_dl *)ifa->ifa_addr;
2631		namelen = strlen(new_name);
2632		onamelen = sdl->sdl_nlen;
2633		/*
2634		 * Move the address if needed.  This is safe because we
2635		 * allocate space for a name of length IFNAMSIZ when we
2636		 * create this in if_attach().
2637		 */
2638		if (namelen != onamelen) {
2639			bcopy(sdl->sdl_data + onamelen,
2640			    sdl->sdl_data + namelen, sdl->sdl_alen);
2641		}
2642		bcopy(new_name, sdl->sdl_data, namelen);
2643		sdl->sdl_nlen = namelen;
2644		sdl = (struct sockaddr_dl *)ifa->ifa_netmask;
2645		bzero(sdl->sdl_data, onamelen);
2646		while (namelen != 0)
2647			sdl->sdl_data[--namelen] = 0xff;
2648		IF_ADDR_WUNLOCK(ifp);
2649
2650		EVENTHANDLER_INVOKE(ifnet_arrival_event, ifp);
2651		/* Announce the return of the interface. */
2652		rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
2653
2654		ifp->if_flags &= ~IFF_RENAMING;
2655		break;
2656
2657#ifdef VIMAGE
2658	case SIOCSIFVNET:
2659		error = priv_check(td, PRIV_NET_SETIFVNET);
2660		if (error)
2661			return (error);
2662		error = if_vmove_loan(td, ifp, ifr->ifr_name, ifr->ifr_jid);
2663		break;
2664#endif
2665
2666	case SIOCSIFMETRIC:
2667		error = priv_check(td, PRIV_NET_SETIFMETRIC);
2668		if (error)
2669			return (error);
2670		ifp->if_metric = ifr->ifr_metric;
2671		getmicrotime(&ifp->if_lastchange);
2672		break;
2673
2674	case SIOCSIFPHYS:
2675		error = priv_check(td, PRIV_NET_SETIFPHYS);
2676		if (error)
2677			return (error);
2678		if (ifp->if_ioctl == NULL)
2679			return (EOPNOTSUPP);
2680		error = (*ifp->if_ioctl)(ifp, cmd, data);
2681		if (error == 0)
2682			getmicrotime(&ifp->if_lastchange);
2683		break;
2684
2685	case SIOCSIFMTU:
2686	{
2687		u_long oldmtu = ifp->if_mtu;
2688
2689		error = priv_check(td, PRIV_NET_SETIFMTU);
2690		if (error)
2691			return (error);
2692		if (ifr->ifr_mtu < IF_MINMTU || ifr->ifr_mtu > IF_MAXMTU)
2693			return (EINVAL);
2694		if (ifp->if_ioctl == NULL)
2695			return (EOPNOTSUPP);
2696		error = (*ifp->if_ioctl)(ifp, cmd, data);
2697		if (error == 0) {
2698			getmicrotime(&ifp->if_lastchange);
2699			rt_ifmsg(ifp);
2700		}
2701		/*
2702		 * If the link MTU changed, do network layer specific procedure.
2703		 */
2704		if (ifp->if_mtu != oldmtu) {
2705#ifdef INET6
2706			nd6_setmtu(ifp);
2707#endif
2708			rt_updatemtu(ifp);
2709		}
2710		break;
2711	}
2712
2713	case SIOCADDMULTI:
2714	case SIOCDELMULTI:
2715		if (cmd == SIOCADDMULTI)
2716			error = priv_check(td, PRIV_NET_ADDMULTI);
2717		else
2718			error = priv_check(td, PRIV_NET_DELMULTI);
2719		if (error)
2720			return (error);
2721
2722		/* Don't allow group membership on non-multicast interfaces. */
2723		if ((ifp->if_flags & IFF_MULTICAST) == 0)
2724			return (EOPNOTSUPP);
2725
2726		/* Don't let users screw up protocols' entries. */
2727		if (ifr->ifr_addr.sa_family != AF_LINK)
2728			return (EINVAL);
2729
2730		if (cmd == SIOCADDMULTI) {
2731			struct ifmultiaddr *ifma;
2732
2733			/*
2734			 * Userland is only permitted to join groups once
2735			 * via the if_addmulti() KPI, because it cannot hold
2736			 * struct ifmultiaddr * between calls. It may also
2737			 * lose a race while we check if the membership
2738			 * already exists.
2739			 */
2740			IF_ADDR_RLOCK(ifp);
2741			ifma = if_findmulti(ifp, &ifr->ifr_addr);
2742			IF_ADDR_RUNLOCK(ifp);
2743			if (ifma != NULL)
2744				error = EADDRINUSE;
2745			else
2746				error = if_addmulti(ifp, &ifr->ifr_addr, &ifma);
2747		} else {
2748			error = if_delmulti(ifp, &ifr->ifr_addr);
2749		}
2750		if (error == 0)
2751			getmicrotime(&ifp->if_lastchange);
2752		break;
2753
2754	case SIOCSIFPHYADDR:
2755	case SIOCDIFPHYADDR:
2756#ifdef INET6
2757	case SIOCSIFPHYADDR_IN6:
2758#endif
2759	case SIOCSIFMEDIA:
2760	case SIOCSIFGENERIC:
2761		error = priv_check(td, PRIV_NET_HWIOCTL);
2762		if (error)
2763			return (error);
2764		if (ifp->if_ioctl == NULL)
2765			return (EOPNOTSUPP);
2766		error = (*ifp->if_ioctl)(ifp, cmd, data);
2767		if (error == 0)
2768			getmicrotime(&ifp->if_lastchange);
2769		break;
2770
2771	case SIOCGIFSTATUS:
2772	case SIOCGIFPSRCADDR:
2773	case SIOCGIFPDSTADDR:
2774	case SIOCGIFMEDIA:
2775	case SIOCGIFXMEDIA:
2776	case SIOCGIFGENERIC:
2777	case SIOCGIFRSSKEY:
2778	case SIOCGIFRSSHASH:
2779		if (ifp->if_ioctl == NULL)
2780			return (EOPNOTSUPP);
2781		error = (*ifp->if_ioctl)(ifp, cmd, data);
2782		break;
2783
2784	case SIOCSIFLLADDR:
2785		error = priv_check(td, PRIV_NET_SETLLADDR);
2786		if (error)
2787			return (error);
2788		error = if_setlladdr(ifp,
2789		    ifr->ifr_addr.sa_data, ifr->ifr_addr.sa_len);
2790		break;
2791
2792	case SIOCGHWADDR:
2793		error = if_gethwaddr(ifp, ifr);
2794		break;
2795
2796	case SIOCAIFGROUP:
2797	{
2798		struct ifgroupreq *ifgr = (struct ifgroupreq *)ifr;
2799
2800		error = priv_check(td, PRIV_NET_ADDIFGROUP);
2801		if (error)
2802			return (error);
2803		if ((error = if_addgroup(ifp, ifgr->ifgr_group)))
2804			return (error);
2805		break;
2806	}
2807
2808	case SIOCGIFGROUP:
2809		if ((error = if_getgroup((struct ifgroupreq *)ifr, ifp)))
2810			return (error);
2811		break;
2812
2813	case SIOCDIFGROUP:
2814	{
2815		struct ifgroupreq *ifgr = (struct ifgroupreq *)ifr;
2816
2817		error = priv_check(td, PRIV_NET_DELIFGROUP);
2818		if (error)
2819			return (error);
2820		if ((error = if_delgroup(ifp, ifgr->ifgr_group)))
2821			return (error);
2822		break;
2823	}
2824
2825	default:
2826		error = ENOIOCTL;
2827		break;
2828	}
2829	return (error);
2830}
2831
2832/* COMPAT_SVR4 */
2833#define	OSIOCGIFCONF	_IOWR('i', 20, struct ifconf)
2834
2835#ifdef COMPAT_FREEBSD32
2836struct ifconf32 {
2837	int32_t	ifc_len;
2838	union {
2839		uint32_t	ifcu_buf;
2840		uint32_t	ifcu_req;
2841	} ifc_ifcu;
2842};
2843#define	SIOCGIFCONF32	_IOWR('i', 36, struct ifconf32)
2844#endif
2845
2846/*
2847 * Interface ioctls.
2848 */
2849int
2850ifioctl(struct socket *so, u_long cmd, caddr_t data, struct thread *td)
2851{
2852	struct ifnet *ifp;
2853	struct ifreq *ifr;
2854	int error;
2855	int oif_flags;
2856#ifdef VIMAGE
2857	int shutdown;
2858#endif
2859
2860	CURVNET_SET(so->so_vnet);
2861#ifdef VIMAGE
2862	/* Make sure the VNET is stable. */
2863	shutdown = (so->so_vnet->vnet_state > SI_SUB_VNET &&
2864		 so->so_vnet->vnet_state < SI_SUB_VNET_DONE) ? 1 : 0;
2865	if (shutdown) {
2866		CURVNET_RESTORE();
2867		return (EBUSY);
2868	}
2869#endif
2870
2871
2872	switch (cmd) {
2873	case SIOCGIFCONF:
2874	case OSIOCGIFCONF:	/* COMPAT_SVR4 */
2875		error = ifconf(cmd, data);
2876		CURVNET_RESTORE();
2877		return (error);
2878
2879#ifdef COMPAT_FREEBSD32
2880	case SIOCGIFCONF32:
2881		{
2882			struct ifconf32 *ifc32;
2883			struct ifconf ifc;
2884
2885			ifc32 = (struct ifconf32 *)data;
2886			ifc.ifc_len = ifc32->ifc_len;
2887			ifc.ifc_buf = PTRIN(ifc32->ifc_buf);
2888
2889			error = ifconf(SIOCGIFCONF, (void *)&ifc);
2890			CURVNET_RESTORE();
2891			if (error == 0)
2892				ifc32->ifc_len = ifc.ifc_len;
2893			return (error);
2894		}
2895#endif
2896	}
2897	ifr = (struct ifreq *)data;
2898
2899	switch (cmd) {
2900#ifdef VIMAGE
2901	case SIOCSIFRVNET:
2902		error = priv_check(td, PRIV_NET_SETIFVNET);
2903		if (error == 0)
2904			error = if_vmove_reclaim(td, ifr->ifr_name,
2905			    ifr->ifr_jid);
2906		CURVNET_RESTORE();
2907		return (error);
2908#endif
2909	case SIOCIFCREATE:
2910	case SIOCIFCREATE2:
2911		error = priv_check(td, PRIV_NET_IFCREATE);
2912		if (error == 0)
2913			error = if_clone_create(ifr->ifr_name,
2914			    sizeof(ifr->ifr_name), cmd == SIOCIFCREATE2 ?
2915			    ifr_data_get_ptr(ifr) : NULL);
2916		CURVNET_RESTORE();
2917		return (error);
2918	case SIOCIFDESTROY:
2919		error = priv_check(td, PRIV_NET_IFDESTROY);
2920		if (error == 0)
2921			error = if_clone_destroy(ifr->ifr_name);
2922		CURVNET_RESTORE();
2923		return (error);
2924
2925	case SIOCIFGCLONERS:
2926		error = if_clone_list((struct if_clonereq *)data);
2927		CURVNET_RESTORE();
2928		return (error);
2929	case SIOCGIFGMEMB:
2930		error = if_getgroupmembers((struct ifgroupreq *)data);
2931		CURVNET_RESTORE();
2932		return (error);
2933#if defined(INET) || defined(INET6)
2934	case SIOCSVH:
2935	case SIOCGVH:
2936		if (carp_ioctl_p == NULL)
2937			error = EPROTONOSUPPORT;
2938		else
2939			error = (*carp_ioctl_p)(ifr, cmd, td);
2940		CURVNET_RESTORE();
2941		return (error);
2942#endif
2943	}
2944
2945	ifp = ifunit_ref(ifr->ifr_name);
2946	if (ifp == NULL) {
2947		CURVNET_RESTORE();
2948		return (ENXIO);
2949	}
2950
2951	error = ifhwioctl(cmd, ifp, data, td);
2952	if (error != ENOIOCTL) {
2953		if_rele(ifp);
2954		CURVNET_RESTORE();
2955		return (error);
2956	}
2957
2958	oif_flags = ifp->if_flags;
2959	if (so->so_proto == NULL) {
2960		if_rele(ifp);
2961		CURVNET_RESTORE();
2962		return (EOPNOTSUPP);
2963	}
2964
2965	/*
2966	 * Pass the request on to the socket control method, and if the
2967	 * latter returns EOPNOTSUPP, directly to the interface.
2968	 *
2969	 * Make an exception for the legacy SIOCSIF* requests.  Drivers
2970	 * trust SIOCSIFADDR et al to come from an already privileged
2971	 * layer, and do not perform any credentials checks or input
2972	 * validation.
2973	 */
2974	error = ((*so->so_proto->pr_usrreqs->pru_control)(so, cmd, data,
2975	    ifp, td));
2976	if (error == EOPNOTSUPP && ifp != NULL && ifp->if_ioctl != NULL &&
2977	    cmd != SIOCSIFADDR && cmd != SIOCSIFBRDADDR &&
2978	    cmd != SIOCSIFDSTADDR && cmd != SIOCSIFNETMASK)
2979		error = (*ifp->if_ioctl)(ifp, cmd, data);
2980
2981	if ((oif_flags ^ ifp->if_flags) & IFF_UP) {
2982#ifdef INET6
2983		if (ifp->if_flags & IFF_UP)
2984			in6_if_up(ifp);
2985#endif
2986	}
2987	if_rele(ifp);
2988	CURVNET_RESTORE();
2989	return (error);
2990}
2991
2992/*
2993 * The code common to handling reference counted flags,
2994 * e.g., in ifpromisc() and if_allmulti().
2995 * The "pflag" argument can specify a permanent mode flag to check,
2996 * such as IFF_PPROMISC for promiscuous mode; should be 0 if none.
2997 *
2998 * Only to be used on stack-owned flags, not driver-owned flags.
2999 */
3000static int
3001if_setflag(struct ifnet *ifp, int flag, int pflag, int *refcount, int onswitch)
3002{
3003	struct ifreq ifr;
3004	int error;
3005	int oldflags, oldcount;
3006
3007	/* Sanity checks to catch programming errors */
3008	KASSERT((flag & (IFF_DRV_OACTIVE|IFF_DRV_RUNNING)) == 0,
3009	    ("%s: setting driver-owned flag %d", __func__, flag));
3010
3011	if (onswitch)
3012		KASSERT(*refcount >= 0,
3013		    ("%s: increment negative refcount %d for flag %d",
3014		    __func__, *refcount, flag));
3015	else
3016		KASSERT(*refcount > 0,
3017		    ("%s: decrement non-positive refcount %d for flag %d",
3018		    __func__, *refcount, flag));
3019
3020	/* In case this mode is permanent, just touch refcount */
3021	if (ifp->if_flags & pflag) {
3022		*refcount += onswitch ? 1 : -1;
3023		return (0);
3024	}
3025
3026	/* Save ifnet parameters for if_ioctl() may fail */
3027	oldcount = *refcount;
3028	oldflags = ifp->if_flags;
3029
3030	/*
3031	 * See if we aren't the only and touching refcount is enough.
3032	 * Actually toggle interface flag if we are the first or last.
3033	 */
3034	if (onswitch) {
3035		if ((*refcount)++)
3036			return (0);
3037		ifp->if_flags |= flag;
3038	} else {
3039		if (--(*refcount))
3040			return (0);
3041		ifp->if_flags &= ~flag;
3042	}
3043
3044	/* Call down the driver since we've changed interface flags */
3045	if (ifp->if_ioctl == NULL) {
3046		error = EOPNOTSUPP;
3047		goto recover;
3048	}
3049	ifr.ifr_flags = ifp->if_flags & 0xffff;
3050	ifr.ifr_flagshigh = ifp->if_flags >> 16;
3051	error = (*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
3052	if (error)
3053		goto recover;
3054	/* Notify userland that interface flags have changed */
3055	rt_ifmsg(ifp);
3056	return (0);
3057
3058recover:
3059	/* Recover after driver error */
3060	*refcount = oldcount;
3061	ifp->if_flags = oldflags;
3062	return (error);
3063}
3064
3065/*
3066 * Set/clear promiscuous mode on interface ifp based on the truth value
3067 * of pswitch.  The calls are reference counted so that only the first
3068 * "on" request actually has an effect, as does the final "off" request.
3069 * Results are undefined if the "off" and "on" requests are not matched.
3070 */
3071int
3072ifpromisc(struct ifnet *ifp, int pswitch)
3073{
3074	int error;
3075	int oldflags = ifp->if_flags;
3076
3077	error = if_setflag(ifp, IFF_PROMISC, IFF_PPROMISC,
3078			   &ifp->if_pcount, pswitch);
3079	/* If promiscuous mode status has changed, log a message */
3080	if (error == 0 && ((ifp->if_flags ^ oldflags) & IFF_PROMISC) &&
3081            log_promisc_mode_change)
3082		log(LOG_INFO, "%s: promiscuous mode %s\n",
3083		    ifp->if_xname,
3084		    (ifp->if_flags & IFF_PROMISC) ? "enabled" : "disabled");
3085	return (error);
3086}
3087
3088/*
3089 * Return interface configuration
3090 * of system.  List may be used
3091 * in later ioctl's (above) to get
3092 * other information.
3093 */
3094/*ARGSUSED*/
3095static int
3096ifconf(u_long cmd, caddr_t data)
3097{
3098	struct ifconf *ifc = (struct ifconf *)data;
3099	struct ifnet *ifp;
3100	struct ifaddr *ifa;
3101	struct ifreq ifr;
3102	struct sbuf *sb;
3103	int error, full = 0, valid_len, max_len;
3104
3105	/* Limit initial buffer size to MAXPHYS to avoid DoS from userspace. */
3106	max_len = MAXPHYS - 1;
3107
3108	/* Prevent hostile input from being able to crash the system */
3109	if (ifc->ifc_len <= 0)
3110		return (EINVAL);
3111
3112again:
3113	if (ifc->ifc_len <= max_len) {
3114		max_len = ifc->ifc_len;
3115		full = 1;
3116	}
3117	sb = sbuf_new(NULL, NULL, max_len + 1, SBUF_FIXEDLEN);
3118	max_len = 0;
3119	valid_len = 0;
3120
3121	IFNET_RLOCK();
3122	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
3123		int addrs;
3124
3125		/*
3126		 * Zero the ifr_name buffer to make sure we don't
3127		 * disclose the contents of the stack.
3128		 */
3129		memset(ifr.ifr_name, 0, sizeof(ifr.ifr_name));
3130
3131		if (strlcpy(ifr.ifr_name, ifp->if_xname, sizeof(ifr.ifr_name))
3132		    >= sizeof(ifr.ifr_name)) {
3133			sbuf_delete(sb);
3134			IFNET_RUNLOCK();
3135			return (ENAMETOOLONG);
3136		}
3137
3138		addrs = 0;
3139		IF_ADDR_RLOCK(ifp);
3140		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
3141			struct sockaddr *sa = ifa->ifa_addr;
3142
3143			if (prison_if(curthread->td_ucred, sa) != 0)
3144				continue;
3145			addrs++;
3146			/* COMPAT_SVR4 */
3147			if (cmd == OSIOCGIFCONF) {
3148				struct osockaddr *osa =
3149				    (struct osockaddr *)&ifr.ifr_addr;
3150				ifr.ifr_addr = *sa;
3151				osa->sa_family = sa->sa_family;
3152				sbuf_bcat(sb, &ifr, sizeof(ifr));
3153				max_len += sizeof(ifr);
3154			} else
3155			if (sa->sa_len <= sizeof(*sa)) {
3156				ifr.ifr_addr = *sa;
3157				sbuf_bcat(sb, &ifr, sizeof(ifr));
3158				max_len += sizeof(ifr);
3159			} else {
3160				sbuf_bcat(sb, &ifr,
3161				    offsetof(struct ifreq, ifr_addr));
3162				max_len += offsetof(struct ifreq, ifr_addr);
3163				sbuf_bcat(sb, sa, sa->sa_len);
3164				max_len += sa->sa_len;
3165			}
3166
3167			if (sbuf_error(sb) == 0)
3168				valid_len = sbuf_len(sb);
3169		}
3170		IF_ADDR_RUNLOCK(ifp);
3171		if (addrs == 0) {
3172			bzero((caddr_t)&ifr.ifr_addr, sizeof(ifr.ifr_addr));
3173			sbuf_bcat(sb, &ifr, sizeof(ifr));
3174			max_len += sizeof(ifr);
3175
3176			if (sbuf_error(sb) == 0)
3177				valid_len = sbuf_len(sb);
3178		}
3179	}
3180	IFNET_RUNLOCK();
3181
3182	/*
3183	 * If we didn't allocate enough space (uncommon), try again.  If
3184	 * we have already allocated as much space as we are allowed,
3185	 * return what we've got.
3186	 */
3187	if (valid_len != max_len && !full) {
3188		sbuf_delete(sb);
3189		goto again;
3190	}
3191
3192	ifc->ifc_len = valid_len;
3193	sbuf_finish(sb);
3194	error = copyout(sbuf_data(sb), ifc->ifc_req, ifc->ifc_len);
3195	sbuf_delete(sb);
3196	return (error);
3197}
3198
3199/*
3200 * Just like ifpromisc(), but for all-multicast-reception mode.
3201 */
3202int
3203if_allmulti(struct ifnet *ifp, int onswitch)
3204{
3205
3206	return (if_setflag(ifp, IFF_ALLMULTI, 0, &ifp->if_amcount, onswitch));
3207}
3208
3209struct ifmultiaddr *
3210if_findmulti(struct ifnet *ifp, const struct sockaddr *sa)
3211{
3212	struct ifmultiaddr *ifma;
3213
3214	IF_ADDR_LOCK_ASSERT(ifp);
3215
3216	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
3217		if (sa->sa_family == AF_LINK) {
3218			if (sa_dl_equal(ifma->ifma_addr, sa))
3219				break;
3220		} else {
3221			if (sa_equal(ifma->ifma_addr, sa))
3222				break;
3223		}
3224	}
3225
3226	return ifma;
3227}
3228
3229/*
3230 * Allocate a new ifmultiaddr and initialize based on passed arguments.  We
3231 * make copies of passed sockaddrs.  The ifmultiaddr will not be added to
3232 * the ifnet multicast address list here, so the caller must do that and
3233 * other setup work (such as notifying the device driver).  The reference
3234 * count is initialized to 1.
3235 */
3236static struct ifmultiaddr *
3237if_allocmulti(struct ifnet *ifp, struct sockaddr *sa, struct sockaddr *llsa,
3238    int mflags)
3239{
3240	struct ifmultiaddr *ifma;
3241	struct sockaddr *dupsa;
3242
3243	ifma = malloc(sizeof *ifma, M_IFMADDR, mflags |
3244	    M_ZERO);
3245	if (ifma == NULL)
3246		return (NULL);
3247
3248	dupsa = malloc(sa->sa_len, M_IFMADDR, mflags);
3249	if (dupsa == NULL) {
3250		free(ifma, M_IFMADDR);
3251		return (NULL);
3252	}
3253	bcopy(sa, dupsa, sa->sa_len);
3254	ifma->ifma_addr = dupsa;
3255
3256	ifma->ifma_ifp = ifp;
3257	ifma->ifma_refcount = 1;
3258	ifma->ifma_protospec = NULL;
3259
3260	if (llsa == NULL) {
3261		ifma->ifma_lladdr = NULL;
3262		return (ifma);
3263	}
3264
3265	dupsa = malloc(llsa->sa_len, M_IFMADDR, mflags);
3266	if (dupsa == NULL) {
3267		free(ifma->ifma_addr, M_IFMADDR);
3268		free(ifma, M_IFMADDR);
3269		return (NULL);
3270	}
3271	bcopy(llsa, dupsa, llsa->sa_len);
3272	ifma->ifma_lladdr = dupsa;
3273
3274	return (ifma);
3275}
3276
3277/*
3278 * if_freemulti: free ifmultiaddr structure and possibly attached related
3279 * addresses.  The caller is responsible for implementing reference
3280 * counting, notifying the driver, handling routing messages, and releasing
3281 * any dependent link layer state.
3282 */
3283static void
3284if_freemulti(struct ifmultiaddr *ifma)
3285{
3286
3287	KASSERT(ifma->ifma_refcount == 0, ("if_freemulti: refcount %d",
3288	    ifma->ifma_refcount));
3289
3290	if (ifma->ifma_lladdr != NULL)
3291		free(ifma->ifma_lladdr, M_IFMADDR);
3292	free(ifma->ifma_addr, M_IFMADDR);
3293	free(ifma, M_IFMADDR);
3294}
3295
3296/*
3297 * Register an additional multicast address with a network interface.
3298 *
3299 * - If the address is already present, bump the reference count on the
3300 *   address and return.
3301 * - If the address is not link-layer, look up a link layer address.
3302 * - Allocate address structures for one or both addresses, and attach to the
3303 *   multicast address list on the interface.  If automatically adding a link
3304 *   layer address, the protocol address will own a reference to the link
3305 *   layer address, to be freed when it is freed.
3306 * - Notify the network device driver of an addition to the multicast address
3307 *   list.
3308 *
3309 * 'sa' points to caller-owned memory with the desired multicast address.
3310 *
3311 * 'retifma' will be used to return a pointer to the resulting multicast
3312 * address reference, if desired.
3313 */
3314int
3315if_addmulti(struct ifnet *ifp, struct sockaddr *sa,
3316    struct ifmultiaddr **retifma)
3317{
3318	struct ifmultiaddr *ifma, *ll_ifma;
3319	struct sockaddr *llsa;
3320	struct sockaddr_dl sdl;
3321	int error;
3322
3323	/*
3324	 * If the address is already present, return a new reference to it;
3325	 * otherwise, allocate storage and set up a new address.
3326	 */
3327	IF_ADDR_WLOCK(ifp);
3328	ifma = if_findmulti(ifp, sa);
3329	if (ifma != NULL) {
3330		ifma->ifma_refcount++;
3331		if (retifma != NULL)
3332			*retifma = ifma;
3333		IF_ADDR_WUNLOCK(ifp);
3334		return (0);
3335	}
3336
3337	/*
3338	 * The address isn't already present; resolve the protocol address
3339	 * into a link layer address, and then look that up, bump its
3340	 * refcount or allocate an ifma for that also.
3341	 * Most link layer resolving functions returns address data which
3342	 * fits inside default sockaddr_dl structure. However callback
3343	 * can allocate another sockaddr structure, in that case we need to
3344	 * free it later.
3345	 */
3346	llsa = NULL;
3347	ll_ifma = NULL;
3348	if (ifp->if_resolvemulti != NULL) {
3349		/* Provide called function with buffer size information */
3350		sdl.sdl_len = sizeof(sdl);
3351		llsa = (struct sockaddr *)&sdl;
3352		error = ifp->if_resolvemulti(ifp, &llsa, sa);
3353		if (error)
3354			goto unlock_out;
3355	}
3356
3357	/*
3358	 * Allocate the new address.  Don't hook it up yet, as we may also
3359	 * need to allocate a link layer multicast address.
3360	 */
3361	ifma = if_allocmulti(ifp, sa, llsa, M_NOWAIT);
3362	if (ifma == NULL) {
3363		error = ENOMEM;
3364		goto free_llsa_out;
3365	}
3366
3367	/*
3368	 * If a link layer address is found, we'll need to see if it's
3369	 * already present in the address list, or allocate is as well.
3370	 * When this block finishes, the link layer address will be on the
3371	 * list.
3372	 */
3373	if (llsa != NULL) {
3374		ll_ifma = if_findmulti(ifp, llsa);
3375		if (ll_ifma == NULL) {
3376			ll_ifma = if_allocmulti(ifp, llsa, NULL, M_NOWAIT);
3377			if (ll_ifma == NULL) {
3378				--ifma->ifma_refcount;
3379				if_freemulti(ifma);
3380				error = ENOMEM;
3381				goto free_llsa_out;
3382			}
3383			TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ll_ifma,
3384			    ifma_link);
3385		} else
3386			ll_ifma->ifma_refcount++;
3387		ifma->ifma_llifma = ll_ifma;
3388	}
3389
3390	/*
3391	 * We now have a new multicast address, ifma, and possibly a new or
3392	 * referenced link layer address.  Add the primary address to the
3393	 * ifnet address list.
3394	 */
3395	TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link);
3396
3397	if (retifma != NULL)
3398		*retifma = ifma;
3399
3400	/*
3401	 * Must generate the message while holding the lock so that 'ifma'
3402	 * pointer is still valid.
3403	 */
3404	rt_newmaddrmsg(RTM_NEWMADDR, ifma);
3405	IF_ADDR_WUNLOCK(ifp);
3406
3407	/*
3408	 * We are certain we have added something, so call down to the
3409	 * interface to let them know about it.
3410	 */
3411	if (ifp->if_ioctl != NULL) {
3412		(void) (*ifp->if_ioctl)(ifp, SIOCADDMULTI, 0);
3413	}
3414
3415	if ((llsa != NULL) && (llsa != (struct sockaddr *)&sdl))
3416		link_free_sdl(llsa);
3417
3418	return (0);
3419
3420free_llsa_out:
3421	if ((llsa != NULL) && (llsa != (struct sockaddr *)&sdl))
3422		link_free_sdl(llsa);
3423
3424unlock_out:
3425	IF_ADDR_WUNLOCK(ifp);
3426	return (error);
3427}
3428
3429/*
3430 * Delete a multicast group membership by network-layer group address.
3431 *
3432 * Returns ENOENT if the entry could not be found. If ifp no longer
3433 * exists, results are undefined. This entry point should only be used
3434 * from subsystems which do appropriate locking to hold ifp for the
3435 * duration of the call.
3436 * Network-layer protocol domains must use if_delmulti_ifma().
3437 */
3438int
3439if_delmulti(struct ifnet *ifp, struct sockaddr *sa)
3440{
3441	struct ifmultiaddr *ifma;
3442	int lastref;
3443#ifdef INVARIANTS
3444	struct ifnet *oifp;
3445
3446	IFNET_RLOCK_NOSLEEP();
3447	TAILQ_FOREACH(oifp, &V_ifnet, if_link)
3448		if (ifp == oifp)
3449			break;
3450	if (ifp != oifp)
3451		ifp = NULL;
3452	IFNET_RUNLOCK_NOSLEEP();
3453
3454	KASSERT(ifp != NULL, ("%s: ifnet went away", __func__));
3455#endif
3456	if (ifp == NULL)
3457		return (ENOENT);
3458
3459	IF_ADDR_WLOCK(ifp);
3460	lastref = 0;
3461	ifma = if_findmulti(ifp, sa);
3462	if (ifma != NULL)
3463		lastref = if_delmulti_locked(ifp, ifma, 0);
3464	IF_ADDR_WUNLOCK(ifp);
3465
3466	if (ifma == NULL)
3467		return (ENOENT);
3468
3469	if (lastref && ifp->if_ioctl != NULL) {
3470		(void)(*ifp->if_ioctl)(ifp, SIOCDELMULTI, 0);
3471	}
3472
3473	return (0);
3474}
3475
3476/*
3477 * Delete all multicast group membership for an interface.
3478 * Should be used to quickly flush all multicast filters.
3479 */
3480void
3481if_delallmulti(struct ifnet *ifp)
3482{
3483	struct ifmultiaddr *ifma;
3484	struct ifmultiaddr *next;
3485
3486	IF_ADDR_WLOCK(ifp);
3487	TAILQ_FOREACH_SAFE(ifma, &ifp->if_multiaddrs, ifma_link, next)
3488		if_delmulti_locked(ifp, ifma, 0);
3489	IF_ADDR_WUNLOCK(ifp);
3490}
3491
3492/*
3493 * Delete a multicast group membership by group membership pointer.
3494 * Network-layer protocol domains must use this routine.
3495 *
3496 * It is safe to call this routine if the ifp disappeared.
3497 */
3498void
3499if_delmulti_ifma(struct ifmultiaddr *ifma)
3500{
3501	struct ifnet *ifp;
3502	int lastref;
3503
3504	ifp = ifma->ifma_ifp;
3505#ifdef DIAGNOSTIC
3506	if (ifp == NULL) {
3507		printf("%s: ifma_ifp seems to be detached\n", __func__);
3508	} else {
3509		struct ifnet *oifp;
3510
3511		IFNET_RLOCK_NOSLEEP();
3512		TAILQ_FOREACH(oifp, &V_ifnet, if_link)
3513			if (ifp == oifp)
3514				break;
3515		if (ifp != oifp) {
3516			printf("%s: ifnet %p disappeared\n", __func__, ifp);
3517			ifp = NULL;
3518		}
3519		IFNET_RUNLOCK_NOSLEEP();
3520	}
3521#endif
3522	/*
3523	 * If and only if the ifnet instance exists: Acquire the address lock.
3524	 */
3525	if (ifp != NULL)
3526		IF_ADDR_WLOCK(ifp);
3527
3528	lastref = if_delmulti_locked(ifp, ifma, 0);
3529
3530	if (ifp != NULL) {
3531		/*
3532		 * If and only if the ifnet instance exists:
3533		 *  Release the address lock.
3534		 *  If the group was left: update the hardware hash filter.
3535		 */
3536		IF_ADDR_WUNLOCK(ifp);
3537		if (lastref && ifp->if_ioctl != NULL) {
3538			(void)(*ifp->if_ioctl)(ifp, SIOCDELMULTI, 0);
3539		}
3540	}
3541}
3542
3543/*
3544 * Perform deletion of network-layer and/or link-layer multicast address.
3545 *
3546 * Return 0 if the reference count was decremented.
3547 * Return 1 if the final reference was released, indicating that the
3548 * hardware hash filter should be reprogrammed.
3549 */
3550static int
3551if_delmulti_locked(struct ifnet *ifp, struct ifmultiaddr *ifma, int detaching)
3552{
3553	struct ifmultiaddr *ll_ifma;
3554
3555	if (ifp != NULL && ifma->ifma_ifp != NULL) {
3556		KASSERT(ifma->ifma_ifp == ifp,
3557		    ("%s: inconsistent ifp %p", __func__, ifp));
3558		IF_ADDR_WLOCK_ASSERT(ifp);
3559	}
3560
3561	ifp = ifma->ifma_ifp;
3562
3563	/*
3564	 * If the ifnet is detaching, null out references to ifnet,
3565	 * so that upper protocol layers will notice, and not attempt
3566	 * to obtain locks for an ifnet which no longer exists. The
3567	 * routing socket announcement must happen before the ifnet
3568	 * instance is detached from the system.
3569	 */
3570	if (detaching) {
3571#ifdef DIAGNOSTIC
3572		printf("%s: detaching ifnet instance %p\n", __func__, ifp);
3573#endif
3574		/*
3575		 * ifp may already be nulled out if we are being reentered
3576		 * to delete the ll_ifma.
3577		 */
3578		if (ifp != NULL) {
3579			rt_newmaddrmsg(RTM_DELMADDR, ifma);
3580			ifma->ifma_ifp = NULL;
3581		}
3582	}
3583
3584	if (--ifma->ifma_refcount > 0)
3585		return 0;
3586
3587	/*
3588	 * If this ifma is a network-layer ifma, a link-layer ifma may
3589	 * have been associated with it. Release it first if so.
3590	 */
3591	ll_ifma = ifma->ifma_llifma;
3592	if (ll_ifma != NULL) {
3593		KASSERT(ifma->ifma_lladdr != NULL,
3594		    ("%s: llifma w/o lladdr", __func__));
3595		if (detaching)
3596			ll_ifma->ifma_ifp = NULL;	/* XXX */
3597		if (--ll_ifma->ifma_refcount == 0) {
3598			if (ifp != NULL) {
3599				TAILQ_REMOVE(&ifp->if_multiaddrs, ll_ifma,
3600				    ifma_link);
3601			}
3602			if_freemulti(ll_ifma);
3603		}
3604	}
3605
3606	if (ifp != NULL)
3607		TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link);
3608
3609	if_freemulti(ifma);
3610
3611	/*
3612	 * The last reference to this instance of struct ifmultiaddr
3613	 * was released; the hardware should be notified of this change.
3614	 */
3615	return 1;
3616}
3617
3618/*
3619 * Set the link layer address on an interface.
3620 *
3621 * At this time we only support certain types of interfaces,
3622 * and we don't allow the length of the address to change.
3623 *
3624 * Set noinline to be dtrace-friendly
3625 */
3626__noinline int
3627if_setlladdr(struct ifnet *ifp, const u_char *lladdr, int len)
3628{
3629	struct sockaddr_dl *sdl;
3630	struct ifaddr *ifa;
3631	struct ifreq ifr;
3632
3633	IF_ADDR_RLOCK(ifp);
3634	ifa = ifp->if_addr;
3635	if (ifa == NULL) {
3636		IF_ADDR_RUNLOCK(ifp);
3637		return (EINVAL);
3638	}
3639	ifa_ref(ifa);
3640	IF_ADDR_RUNLOCK(ifp);
3641	sdl = (struct sockaddr_dl *)ifa->ifa_addr;
3642	if (sdl == NULL) {
3643		ifa_free(ifa);
3644		return (EINVAL);
3645	}
3646	if (len != sdl->sdl_alen) {	/* don't allow length to change */
3647		ifa_free(ifa);
3648		return (EINVAL);
3649	}
3650	switch (ifp->if_type) {
3651	case IFT_ETHER:
3652	case IFT_FDDI:
3653	case IFT_XETHER:
3654	case IFT_ISO88025:
3655	case IFT_L2VLAN:
3656	case IFT_BRIDGE:
3657	case IFT_ARCNET:
3658	case IFT_IEEE8023ADLAG:
3659	case IFT_IEEE80211:
3660		bcopy(lladdr, LLADDR(sdl), len);
3661		ifa_free(ifa);
3662		break;
3663	default:
3664		ifa_free(ifa);
3665		return (ENODEV);
3666	}
3667
3668	/*
3669	 * If the interface is already up, we need
3670	 * to re-init it in order to reprogram its
3671	 * address filter.
3672	 */
3673	if ((ifp->if_flags & IFF_UP) != 0) {
3674		if (ifp->if_ioctl) {
3675			ifp->if_flags &= ~IFF_UP;
3676			ifr.ifr_flags = ifp->if_flags & 0xffff;
3677			ifr.ifr_flagshigh = ifp->if_flags >> 16;
3678			(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
3679			ifp->if_flags |= IFF_UP;
3680			ifr.ifr_flags = ifp->if_flags & 0xffff;
3681			ifr.ifr_flagshigh = ifp->if_flags >> 16;
3682			(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
3683		}
3684	}
3685	EVENTHANDLER_INVOKE(iflladdr_event, ifp);
3686	return (0);
3687}
3688
3689/*
3690 * Compat function for handling basic encapsulation requests.
3691 * Not converted stacks (FDDI, IB, ..) supports traditional
3692 * output model: ARP (and other similar L2 protocols) are handled
3693 * inside output routine, arpresolve/nd6_resolve() returns MAC
3694 * address instead of full prepend.
3695 *
3696 * This function creates calculated header==MAC for IPv4/IPv6 and
3697 * returns EAFNOSUPPORT (which is then handled in ARP code) for other
3698 * address families.
3699 */
3700static int
3701if_requestencap_default(struct ifnet *ifp, struct if_encap_req *req)
3702{
3703
3704	if (req->rtype != IFENCAP_LL)
3705		return (EOPNOTSUPP);
3706
3707	if (req->bufsize < req->lladdr_len)
3708		return (ENOMEM);
3709
3710	switch (req->family) {
3711	case AF_INET:
3712	case AF_INET6:
3713		break;
3714	default:
3715		return (EAFNOSUPPORT);
3716	}
3717
3718	/* Copy lladdr to storage as is */
3719	memmove(req->buf, req->lladdr, req->lladdr_len);
3720	req->bufsize = req->lladdr_len;
3721	req->lladdr_off = 0;
3722
3723	return (0);
3724}
3725
3726/*
3727 * Get the link layer address that was read from the hardware at attach.
3728 *
3729 * This is only set by Ethernet NICs (IFT_ETHER), but laggX interfaces re-type
3730 * their component interfaces as IFT_IEEE8023ADLAG.
3731 */
3732int
3733if_gethwaddr(struct ifnet *ifp, struct ifreq *ifr)
3734{
3735
3736	if (ifp->if_hw_addr == NULL)
3737		return (ENODEV);
3738
3739	switch (ifp->if_type) {
3740	case IFT_ETHER:
3741	case IFT_IEEE8023ADLAG:
3742		bcopy(ifp->if_hw_addr, ifr->ifr_addr.sa_data, ifp->if_addrlen);
3743		return (0);
3744	default:
3745		return (ENODEV);
3746	}
3747}
3748
3749/*
3750 * The name argument must be a pointer to storage which will last as
3751 * long as the interface does.  For physical devices, the result of
3752 * device_get_name(dev) is a good choice and for pseudo-devices a
3753 * static string works well.
3754 */
3755void
3756if_initname(struct ifnet *ifp, const char *name, int unit)
3757{
3758	ifp->if_dname = name;
3759	ifp->if_dunit = unit;
3760	if (unit != IF_DUNIT_NONE)
3761		snprintf(ifp->if_xname, IFNAMSIZ, "%s%d", name, unit);
3762	else
3763		strlcpy(ifp->if_xname, name, IFNAMSIZ);
3764}
3765
3766int
3767if_printf(struct ifnet *ifp, const char * fmt, ...)
3768{
3769	va_list ap;
3770	int retval;
3771
3772	retval = printf("%s: ", ifp->if_xname);
3773	va_start(ap, fmt);
3774	retval += vprintf(fmt, ap);
3775	va_end(ap);
3776	return (retval);
3777}
3778
3779void
3780if_start(struct ifnet *ifp)
3781{
3782
3783	(*(ifp)->if_start)(ifp);
3784}
3785
3786/*
3787 * Backwards compatibility interface for drivers
3788 * that have not implemented it
3789 */
3790static int
3791if_transmit(struct ifnet *ifp, struct mbuf *m)
3792{
3793	int error;
3794
3795	IFQ_HANDOFF(ifp, m, error);
3796	return (error);
3797}
3798
3799static void
3800if_input_default(struct ifnet *ifp __unused, struct mbuf *m)
3801{
3802
3803	m_freem(m);
3804}
3805
3806int
3807if_handoff(struct ifqueue *ifq, struct mbuf *m, struct ifnet *ifp, int adjust)
3808{
3809	int active = 0;
3810
3811	IF_LOCK(ifq);
3812	if (_IF_QFULL(ifq)) {
3813		IF_UNLOCK(ifq);
3814		if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1);
3815		m_freem(m);
3816		return (0);
3817	}
3818	if (ifp != NULL) {
3819		if_inc_counter(ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len + adjust);
3820		if (m->m_flags & (M_BCAST|M_MCAST))
3821			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
3822		active = ifp->if_drv_flags & IFF_DRV_OACTIVE;
3823	}
3824	_IF_ENQUEUE(ifq, m);
3825	IF_UNLOCK(ifq);
3826	if (ifp != NULL && !active)
3827		(*(ifp)->if_start)(ifp);
3828	return (1);
3829}
3830
3831void
3832if_register_com_alloc(u_char type,
3833    if_com_alloc_t *a, if_com_free_t *f)
3834{
3835
3836	KASSERT(if_com_alloc[type] == NULL,
3837	    ("if_register_com_alloc: %d already registered", type));
3838	KASSERT(if_com_free[type] == NULL,
3839	    ("if_register_com_alloc: %d free already registered", type));
3840
3841	if_com_alloc[type] = a;
3842	if_com_free[type] = f;
3843}
3844
3845void
3846if_deregister_com_alloc(u_char type)
3847{
3848
3849	KASSERT(if_com_alloc[type] != NULL,
3850	    ("if_deregister_com_alloc: %d not registered", type));
3851	KASSERT(if_com_free[type] != NULL,
3852	    ("if_deregister_com_alloc: %d free not registered", type));
3853	if_com_alloc[type] = NULL;
3854	if_com_free[type] = NULL;
3855}
3856
3857/* API for driver access to network stack owned ifnet.*/
3858uint64_t
3859if_setbaudrate(struct ifnet *ifp, uint64_t baudrate)
3860{
3861	uint64_t oldbrate;
3862
3863	oldbrate = ifp->if_baudrate;
3864	ifp->if_baudrate = baudrate;
3865	return (oldbrate);
3866}
3867
3868uint64_t
3869if_getbaudrate(if_t ifp)
3870{
3871
3872	return (((struct ifnet *)ifp)->if_baudrate);
3873}
3874
3875int
3876if_setcapabilities(if_t ifp, int capabilities)
3877{
3878	((struct ifnet *)ifp)->if_capabilities = capabilities;
3879	return (0);
3880}
3881
3882int
3883if_setcapabilitiesbit(if_t ifp, int setbit, int clearbit)
3884{
3885	((struct ifnet *)ifp)->if_capabilities |= setbit;
3886	((struct ifnet *)ifp)->if_capabilities &= ~clearbit;
3887
3888	return (0);
3889}
3890
3891int
3892if_getcapabilities(if_t ifp)
3893{
3894	return ((struct ifnet *)ifp)->if_capabilities;
3895}
3896
3897int
3898if_setcapenable(if_t ifp, int capabilities)
3899{
3900	((struct ifnet *)ifp)->if_capenable = capabilities;
3901	return (0);
3902}
3903
3904int
3905if_setcapenablebit(if_t ifp, int setcap, int clearcap)
3906{
3907	if(setcap)
3908		((struct ifnet *)ifp)->if_capenable |= setcap;
3909	if(clearcap)
3910		((struct ifnet *)ifp)->if_capenable &= ~clearcap;
3911
3912	return (0);
3913}
3914
3915const char *
3916if_getdname(if_t ifp)
3917{
3918	return ((struct ifnet *)ifp)->if_dname;
3919}
3920
3921int
3922if_togglecapenable(if_t ifp, int togglecap)
3923{
3924	((struct ifnet *)ifp)->if_capenable ^= togglecap;
3925	return (0);
3926}
3927
3928int
3929if_getcapenable(if_t ifp)
3930{
3931	return ((struct ifnet *)ifp)->if_capenable;
3932}
3933
3934/*
3935 * This is largely undesirable because it ties ifnet to a device, but does
3936 * provide flexiblity for an embedded product vendor. Should be used with
3937 * the understanding that it violates the interface boundaries, and should be
3938 * a last resort only.
3939 */
3940int
3941if_setdev(if_t ifp, void *dev)
3942{
3943	return (0);
3944}
3945
3946int
3947if_setdrvflagbits(if_t ifp, int set_flags, int clear_flags)
3948{
3949	((struct ifnet *)ifp)->if_drv_flags |= set_flags;
3950	((struct ifnet *)ifp)->if_drv_flags &= ~clear_flags;
3951
3952	return (0);
3953}
3954
3955int
3956if_getdrvflags(if_t ifp)
3957{
3958	return ((struct ifnet *)ifp)->if_drv_flags;
3959}
3960
3961int
3962if_setdrvflags(if_t ifp, int flags)
3963{
3964	((struct ifnet *)ifp)->if_drv_flags = flags;
3965	return (0);
3966}
3967
3968
3969int
3970if_setflags(if_t ifp, int flags)
3971{
3972	((struct ifnet *)ifp)->if_flags = flags;
3973	return (0);
3974}
3975
3976int
3977if_setflagbits(if_t ifp, int set, int clear)
3978{
3979	((struct ifnet *)ifp)->if_flags |= set;
3980	((struct ifnet *)ifp)->if_flags &= ~clear;
3981
3982	return (0);
3983}
3984
3985int
3986if_getflags(if_t ifp)
3987{
3988	return ((struct ifnet *)ifp)->if_flags;
3989}
3990
3991int
3992if_clearhwassist(if_t ifp)
3993{
3994	((struct ifnet *)ifp)->if_hwassist = 0;
3995	return (0);
3996}
3997
3998int
3999if_sethwassistbits(if_t ifp, int toset, int toclear)
4000{
4001	((struct ifnet *)ifp)->if_hwassist |= toset;
4002	((struct ifnet *)ifp)->if_hwassist &= ~toclear;
4003
4004	return (0);
4005}
4006
4007int
4008if_sethwassist(if_t ifp, int hwassist_bit)
4009{
4010	((struct ifnet *)ifp)->if_hwassist = hwassist_bit;
4011	return (0);
4012}
4013
4014int
4015if_gethwassist(if_t ifp)
4016{
4017	return ((struct ifnet *)ifp)->if_hwassist;
4018}
4019
4020int
4021if_setmtu(if_t ifp, int mtu)
4022{
4023	((struct ifnet *)ifp)->if_mtu = mtu;
4024	return (0);
4025}
4026
4027int
4028if_getmtu(if_t ifp)
4029{
4030	return ((struct ifnet *)ifp)->if_mtu;
4031}
4032
4033int
4034if_getmtu_family(if_t ifp, int family)
4035{
4036	struct domain *dp;
4037
4038	for (dp = domains; dp; dp = dp->dom_next) {
4039		if (dp->dom_family == family && dp->dom_ifmtu != NULL)
4040			return (dp->dom_ifmtu((struct ifnet *)ifp));
4041	}
4042
4043	return (((struct ifnet *)ifp)->if_mtu);
4044}
4045
4046int
4047if_setsoftc(if_t ifp, void *softc)
4048{
4049	((struct ifnet *)ifp)->if_softc = softc;
4050	return (0);
4051}
4052
4053void *
4054if_getsoftc(if_t ifp)
4055{
4056	return ((struct ifnet *)ifp)->if_softc;
4057}
4058
4059void
4060if_setrcvif(struct mbuf *m, if_t ifp)
4061{
4062	m->m_pkthdr.rcvif = (struct ifnet *)ifp;
4063}
4064
4065void
4066if_setvtag(struct mbuf *m, uint16_t tag)
4067{
4068	m->m_pkthdr.ether_vtag = tag;
4069}
4070
4071uint16_t
4072if_getvtag(struct mbuf *m)
4073{
4074
4075	return (m->m_pkthdr.ether_vtag);
4076}
4077
4078int
4079if_sendq_empty(if_t ifp)
4080{
4081	return IFQ_DRV_IS_EMPTY(&((struct ifnet *)ifp)->if_snd);
4082}
4083
4084struct ifaddr *
4085if_getifaddr(if_t ifp)
4086{
4087	return ((struct ifnet *)ifp)->if_addr;
4088}
4089
4090int
4091if_getamcount(if_t ifp)
4092{
4093	return ((struct ifnet *)ifp)->if_amcount;
4094}
4095
4096
4097int
4098if_setsendqready(if_t ifp)
4099{
4100	IFQ_SET_READY(&((struct ifnet *)ifp)->if_snd);
4101	return (0);
4102}
4103
4104int
4105if_setsendqlen(if_t ifp, int tx_desc_count)
4106{
4107	IFQ_SET_MAXLEN(&((struct ifnet *)ifp)->if_snd, tx_desc_count);
4108	((struct ifnet *)ifp)->if_snd.ifq_drv_maxlen = tx_desc_count;
4109
4110	return (0);
4111}
4112
4113int
4114if_vlantrunkinuse(if_t ifp)
4115{
4116	return ((struct ifnet *)ifp)->if_vlantrunk != NULL?1:0;
4117}
4118
4119int
4120if_input(if_t ifp, struct mbuf* sendmp)
4121{
4122	(*((struct ifnet *)ifp)->if_input)((struct ifnet *)ifp, sendmp);
4123	return (0);
4124
4125}
4126
4127/* XXX */
4128#ifndef ETH_ADDR_LEN
4129#define ETH_ADDR_LEN 6
4130#endif
4131
4132int
4133if_setupmultiaddr(if_t ifp, void *mta, int *cnt, int max)
4134{
4135	struct ifmultiaddr *ifma;
4136	uint8_t *lmta = (uint8_t *)mta;
4137	int mcnt = 0;
4138
4139	TAILQ_FOREACH(ifma, &((struct ifnet *)ifp)->if_multiaddrs, ifma_link) {
4140		if (ifma->ifma_addr->sa_family != AF_LINK)
4141			continue;
4142
4143		if (mcnt == max)
4144			break;
4145
4146		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
4147		    &lmta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
4148		mcnt++;
4149	}
4150	*cnt = mcnt;
4151
4152	return (0);
4153}
4154
4155int
4156if_multiaddr_array(if_t ifp, void *mta, int *cnt, int max)
4157{
4158	int error;
4159
4160	if_maddr_rlock(ifp);
4161	error = if_setupmultiaddr(ifp, mta, cnt, max);
4162	if_maddr_runlock(ifp);
4163	return (error);
4164}
4165
4166int
4167if_multiaddr_count(if_t ifp, int max)
4168{
4169	struct ifmultiaddr *ifma;
4170	int count;
4171
4172	count = 0;
4173	if_maddr_rlock(ifp);
4174	TAILQ_FOREACH(ifma, &((struct ifnet *)ifp)->if_multiaddrs, ifma_link) {
4175		if (ifma->ifma_addr->sa_family != AF_LINK)
4176			continue;
4177		count++;
4178		if (count == max)
4179			break;
4180	}
4181	if_maddr_runlock(ifp);
4182	return (count);
4183}
4184
4185int
4186if_multi_apply(struct ifnet *ifp, int (*filter)(void *, struct ifmultiaddr *, int), void *arg)
4187{
4188	struct ifmultiaddr *ifma;
4189	int cnt = 0;
4190
4191	if_maddr_rlock(ifp);
4192	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link)
4193		cnt += filter(arg, ifma, cnt);
4194	if_maddr_runlock(ifp);
4195	return (cnt);
4196}
4197
4198struct mbuf *
4199if_dequeue(if_t ifp)
4200{
4201	struct mbuf *m;
4202	IFQ_DRV_DEQUEUE(&((struct ifnet *)ifp)->if_snd, m);
4203
4204	return (m);
4205}
4206
4207int
4208if_sendq_prepend(if_t ifp, struct mbuf *m)
4209{
4210	IFQ_DRV_PREPEND(&((struct ifnet *)ifp)->if_snd, m);
4211	return (0);
4212}
4213
4214int
4215if_setifheaderlen(if_t ifp, int len)
4216{
4217	((struct ifnet *)ifp)->if_hdrlen = len;
4218	return (0);
4219}
4220
4221caddr_t
4222if_getlladdr(if_t ifp)
4223{
4224	return (IF_LLADDR((struct ifnet *)ifp));
4225}
4226
4227void *
4228if_gethandle(u_char type)
4229{
4230	return (if_alloc(type));
4231}
4232
4233void
4234if_bpfmtap(if_t ifh, struct mbuf *m)
4235{
4236	struct ifnet *ifp = (struct ifnet *)ifh;
4237
4238	BPF_MTAP(ifp, m);
4239}
4240
4241void
4242if_etherbpfmtap(if_t ifh, struct mbuf *m)
4243{
4244	struct ifnet *ifp = (struct ifnet *)ifh;
4245
4246	ETHER_BPF_MTAP(ifp, m);
4247}
4248
4249void
4250if_vlancap(if_t ifh)
4251{
4252	struct ifnet *ifp = (struct ifnet *)ifh;
4253	VLAN_CAPABILITIES(ifp);
4254}
4255
4256void
4257if_setinitfn(if_t ifp, void (*init_fn)(void *))
4258{
4259	((struct ifnet *)ifp)->if_init = init_fn;
4260}
4261
4262void
4263if_setioctlfn(if_t ifp, int (*ioctl_fn)(if_t, u_long, caddr_t))
4264{
4265	((struct ifnet *)ifp)->if_ioctl = (void *)ioctl_fn;
4266}
4267
4268void
4269if_setstartfn(if_t ifp, void (*start_fn)(if_t))
4270{
4271	((struct ifnet *)ifp)->if_start = (void *)start_fn;
4272}
4273
4274void
4275if_settransmitfn(if_t ifp, if_transmit_fn_t start_fn)
4276{
4277	((struct ifnet *)ifp)->if_transmit = start_fn;
4278}
4279
4280void if_setqflushfn(if_t ifp, if_qflush_fn_t flush_fn)
4281{
4282	((struct ifnet *)ifp)->if_qflush = flush_fn;
4283
4284}
4285
4286void
4287if_setgetcounterfn(if_t ifp, if_get_counter_t fn)
4288{
4289
4290	ifp->if_get_counter = fn;
4291}
4292
4293/* Revisit these - These are inline functions originally. */
4294int
4295drbr_inuse_drv(if_t ifh, struct buf_ring *br)
4296{
4297	return drbr_inuse(ifh, br);
4298}
4299
4300struct mbuf*
4301drbr_dequeue_drv(if_t ifh, struct buf_ring *br)
4302{
4303	return drbr_dequeue(ifh, br);
4304}
4305
4306int
4307drbr_needs_enqueue_drv(if_t ifh, struct buf_ring *br)
4308{
4309	return drbr_needs_enqueue(ifh, br);
4310}
4311
4312int
4313drbr_enqueue_drv(if_t ifh, struct buf_ring *br, struct mbuf *m)
4314{
4315	return drbr_enqueue(ifh, br, m);
4316
4317}
4318