if.c revision 318397
1/*-
2 * Copyright (c) 1980, 1986, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 4. Neither the name of the University nor the names of its contributors
14 *    may be used to endorse or promote products derived from this software
15 *    without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 *	@(#)if.c	8.5 (Berkeley) 1/9/95
30 * $FreeBSD: stable/11/sys/net/if.c 318397 2017-05-17 05:53:25Z rpokala $
31 */
32
33#include "opt_compat.h"
34#include "opt_inet6.h"
35#include "opt_inet.h"
36
37#include <sys/param.h>
38#include <sys/types.h>
39#include <sys/conf.h>
40#include <sys/malloc.h>
41#include <sys/sbuf.h>
42#include <sys/bus.h>
43#include <sys/mbuf.h>
44#include <sys/systm.h>
45#include <sys/priv.h>
46#include <sys/proc.h>
47#include <sys/socket.h>
48#include <sys/socketvar.h>
49#include <sys/protosw.h>
50#include <sys/kernel.h>
51#include <sys/lock.h>
52#include <sys/refcount.h>
53#include <sys/module.h>
54#include <sys/rwlock.h>
55#include <sys/sockio.h>
56#include <sys/syslog.h>
57#include <sys/sysctl.h>
58#include <sys/taskqueue.h>
59#include <sys/domain.h>
60#include <sys/jail.h>
61#include <sys/priv.h>
62
63#include <machine/stdarg.h>
64#include <vm/uma.h>
65
66#include <net/bpf.h>
67#include <net/ethernet.h>
68#include <net/if.h>
69#include <net/if_arp.h>
70#include <net/if_clone.h>
71#include <net/if_dl.h>
72#include <net/if_types.h>
73#include <net/if_var.h>
74#include <net/if_media.h>
75#include <net/if_vlan_var.h>
76#include <net/radix.h>
77#include <net/route.h>
78#include <net/vnet.h>
79
80#if defined(INET) || defined(INET6)
81#include <net/ethernet.h>
82#include <netinet/in.h>
83#include <netinet/in_var.h>
84#include <netinet/ip.h>
85#include <netinet/ip_carp.h>
86#ifdef INET
87#include <netinet/if_ether.h>
88#endif /* INET */
89#ifdef INET6
90#include <netinet6/in6_var.h>
91#include <netinet6/in6_ifattach.h>
92#endif /* INET6 */
93#endif /* INET || INET6 */
94
95#include <security/mac/mac_framework.h>
96
97#ifdef COMPAT_FREEBSD32
98#include <sys/mount.h>
99#include <compat/freebsd32/freebsd32.h>
100#endif
101
102SYSCTL_NODE(_net, PF_LINK, link, CTLFLAG_RW, 0, "Link layers");
103SYSCTL_NODE(_net_link, 0, generic, CTLFLAG_RW, 0, "Generic link-management");
104
105SYSCTL_INT(_net_link, OID_AUTO, ifqmaxlen, CTLFLAG_RDTUN,
106    &ifqmaxlen, 0, "max send queue size");
107
108/* Log link state change events */
109static int log_link_state_change = 1;
110
111SYSCTL_INT(_net_link, OID_AUTO, log_link_state_change, CTLFLAG_RW,
112	&log_link_state_change, 0,
113	"log interface link state change events");
114
115/* Log promiscuous mode change events */
116static int log_promisc_mode_change = 1;
117
118SYSCTL_INT(_net_link, OID_AUTO, log_promisc_mode_change, CTLFLAG_RDTUN,
119	&log_promisc_mode_change, 1,
120	"log promiscuous mode change events");
121
122/* Interface description */
123static unsigned int ifdescr_maxlen = 1024;
124SYSCTL_UINT(_net, OID_AUTO, ifdescr_maxlen, CTLFLAG_RW,
125	&ifdescr_maxlen, 0,
126	"administrative maximum length for interface description");
127
128static MALLOC_DEFINE(M_IFDESCR, "ifdescr", "ifnet descriptions");
129
130/* global sx for non-critical path ifdescr */
131static struct sx ifdescr_sx;
132SX_SYSINIT(ifdescr_sx, &ifdescr_sx, "ifnet descr");
133
134void	(*bridge_linkstate_p)(struct ifnet *ifp);
135void	(*ng_ether_link_state_p)(struct ifnet *ifp, int state);
136void	(*lagg_linkstate_p)(struct ifnet *ifp, int state);
137/* These are external hooks for CARP. */
138void	(*carp_linkstate_p)(struct ifnet *ifp);
139void	(*carp_demote_adj_p)(int, char *);
140int	(*carp_master_p)(struct ifaddr *);
141#if defined(INET) || defined(INET6)
142int	(*carp_forus_p)(struct ifnet *ifp, u_char *dhost);
143int	(*carp_output_p)(struct ifnet *ifp, struct mbuf *m,
144    const struct sockaddr *sa);
145int	(*carp_ioctl_p)(struct ifreq *, u_long, struct thread *);
146int	(*carp_attach_p)(struct ifaddr *, int);
147void	(*carp_detach_p)(struct ifaddr *);
148#endif
149#ifdef INET
150int	(*carp_iamatch_p)(struct ifaddr *, uint8_t **);
151#endif
152#ifdef INET6
153struct ifaddr *(*carp_iamatch6_p)(struct ifnet *ifp, struct in6_addr *taddr6);
154caddr_t	(*carp_macmatch6_p)(struct ifnet *ifp, struct mbuf *m,
155    const struct in6_addr *taddr);
156#endif
157
158struct mbuf *(*tbr_dequeue_ptr)(struct ifaltq *, int) = NULL;
159
160/*
161 * XXX: Style; these should be sorted alphabetically, and unprototyped
162 * static functions should be prototyped. Currently they are sorted by
163 * declaration order.
164 */
165static void	if_attachdomain(void *);
166static void	if_attachdomain1(struct ifnet *);
167static int	ifconf(u_long, caddr_t);
168static void	if_freemulti(struct ifmultiaddr *);
169static void	if_grow(void);
170static void	if_input_default(struct ifnet *, struct mbuf *);
171static int	if_requestencap_default(struct ifnet *, struct if_encap_req *);
172static void	if_route(struct ifnet *, int flag, int fam);
173static int	if_setflag(struct ifnet *, int, int, int *, int);
174static int	if_transmit(struct ifnet *ifp, struct mbuf *m);
175static void	if_unroute(struct ifnet *, int flag, int fam);
176static void	link_rtrequest(int, struct rtentry *, struct rt_addrinfo *);
177static int	ifhwioctl(u_long, struct ifnet *, caddr_t, struct thread *);
178static int	if_delmulti_locked(struct ifnet *, struct ifmultiaddr *, int);
179static void	do_link_state_change(void *, int);
180static int	if_getgroup(struct ifgroupreq *, struct ifnet *);
181static int	if_getgroupmembers(struct ifgroupreq *);
182static void	if_delgroups(struct ifnet *);
183static void	if_attach_internal(struct ifnet *, int, struct if_clone *);
184static int	if_detach_internal(struct ifnet *, int, struct if_clone **);
185#ifdef VIMAGE
186static void	if_vmove(struct ifnet *, struct vnet *);
187#endif
188
189#ifdef INET6
190/*
191 * XXX: declare here to avoid to include many inet6 related files..
192 * should be more generalized?
193 */
194extern void	nd6_setmtu(struct ifnet *);
195#endif
196
197/* ipsec helper hooks */
198VNET_DEFINE(struct hhook_head *, ipsec_hhh_in[HHOOK_IPSEC_COUNT]);
199VNET_DEFINE(struct hhook_head *, ipsec_hhh_out[HHOOK_IPSEC_COUNT]);
200
201VNET_DEFINE(int, if_index);
202int	ifqmaxlen = IFQ_MAXLEN;
203VNET_DEFINE(struct ifnethead, ifnet);	/* depend on static init XXX */
204VNET_DEFINE(struct ifgrouphead, ifg_head);
205
206static VNET_DEFINE(int, if_indexlim) = 8;
207
208/* Table of ifnet by index. */
209VNET_DEFINE(struct ifnet **, ifindex_table);
210
211#define	V_if_indexlim		VNET(if_indexlim)
212#define	V_ifindex_table		VNET(ifindex_table)
213
214/*
215 * The global network interface list (V_ifnet) and related state (such as
216 * if_index, if_indexlim, and ifindex_table) are protected by an sxlock and
217 * an rwlock.  Either may be acquired shared to stablize the list, but both
218 * must be acquired writable to modify the list.  This model allows us to
219 * both stablize the interface list during interrupt thread processing, but
220 * also to stablize it over long-running ioctls, without introducing priority
221 * inversions and deadlocks.
222 */
223struct rwlock ifnet_rwlock;
224RW_SYSINIT_FLAGS(ifnet_rw, &ifnet_rwlock, "ifnet_rw", RW_RECURSE);
225struct sx ifnet_sxlock;
226SX_SYSINIT_FLAGS(ifnet_sx, &ifnet_sxlock, "ifnet_sx", SX_RECURSE);
227
228/*
229 * The allocation of network interfaces is a rather non-atomic affair; we
230 * need to select an index before we are ready to expose the interface for
231 * use, so will use this pointer value to indicate reservation.
232 */
233#define	IFNET_HOLD	(void *)(uintptr_t)(-1)
234
235static	if_com_alloc_t *if_com_alloc[256];
236static	if_com_free_t *if_com_free[256];
237
238static MALLOC_DEFINE(M_IFNET, "ifnet", "interface internals");
239MALLOC_DEFINE(M_IFADDR, "ifaddr", "interface address");
240MALLOC_DEFINE(M_IFMADDR, "ether_multi", "link-level multicast address");
241
242struct ifnet *
243ifnet_byindex_locked(u_short idx)
244{
245
246	if (idx > V_if_index)
247		return (NULL);
248	if (V_ifindex_table[idx] == IFNET_HOLD)
249		return (NULL);
250	return (V_ifindex_table[idx]);
251}
252
253struct ifnet *
254ifnet_byindex(u_short idx)
255{
256	struct ifnet *ifp;
257
258	IFNET_RLOCK_NOSLEEP();
259	ifp = ifnet_byindex_locked(idx);
260	IFNET_RUNLOCK_NOSLEEP();
261	return (ifp);
262}
263
264struct ifnet *
265ifnet_byindex_ref(u_short idx)
266{
267	struct ifnet *ifp;
268
269	IFNET_RLOCK_NOSLEEP();
270	ifp = ifnet_byindex_locked(idx);
271	if (ifp == NULL || (ifp->if_flags & IFF_DYING)) {
272		IFNET_RUNLOCK_NOSLEEP();
273		return (NULL);
274	}
275	if_ref(ifp);
276	IFNET_RUNLOCK_NOSLEEP();
277	return (ifp);
278}
279
280/*
281 * Allocate an ifindex array entry; return 0 on success or an error on
282 * failure.
283 */
284static u_short
285ifindex_alloc(void)
286{
287	u_short idx;
288
289	IFNET_WLOCK_ASSERT();
290retry:
291	/*
292	 * Try to find an empty slot below V_if_index.  If we fail, take the
293	 * next slot.
294	 */
295	for (idx = 1; idx <= V_if_index; idx++) {
296		if (V_ifindex_table[idx] == NULL)
297			break;
298	}
299
300	/* Catch if_index overflow. */
301	if (idx >= V_if_indexlim) {
302		if_grow();
303		goto retry;
304	}
305	if (idx > V_if_index)
306		V_if_index = idx;
307	return (idx);
308}
309
310static void
311ifindex_free_locked(u_short idx)
312{
313
314	IFNET_WLOCK_ASSERT();
315
316	V_ifindex_table[idx] = NULL;
317	while (V_if_index > 0 &&
318	    V_ifindex_table[V_if_index] == NULL)
319		V_if_index--;
320}
321
322static void
323ifindex_free(u_short idx)
324{
325
326	IFNET_WLOCK();
327	ifindex_free_locked(idx);
328	IFNET_WUNLOCK();
329}
330
331static void
332ifnet_setbyindex_locked(u_short idx, struct ifnet *ifp)
333{
334
335	IFNET_WLOCK_ASSERT();
336
337	V_ifindex_table[idx] = ifp;
338}
339
340static void
341ifnet_setbyindex(u_short idx, struct ifnet *ifp)
342{
343
344	IFNET_WLOCK();
345	ifnet_setbyindex_locked(idx, ifp);
346	IFNET_WUNLOCK();
347}
348
349struct ifaddr *
350ifaddr_byindex(u_short idx)
351{
352	struct ifnet *ifp;
353	struct ifaddr *ifa = NULL;
354
355	IFNET_RLOCK_NOSLEEP();
356	ifp = ifnet_byindex_locked(idx);
357	if (ifp != NULL && (ifa = ifp->if_addr) != NULL)
358		ifa_ref(ifa);
359	IFNET_RUNLOCK_NOSLEEP();
360	return (ifa);
361}
362
363/*
364 * Network interface utility routines.
365 *
366 * Routines with ifa_ifwith* names take sockaddr *'s as
367 * parameters.
368 */
369
370static void
371vnet_if_init(const void *unused __unused)
372{
373
374	TAILQ_INIT(&V_ifnet);
375	TAILQ_INIT(&V_ifg_head);
376	IFNET_WLOCK();
377	if_grow();				/* create initial table */
378	IFNET_WUNLOCK();
379	vnet_if_clone_init();
380}
381VNET_SYSINIT(vnet_if_init, SI_SUB_INIT_IF, SI_ORDER_SECOND, vnet_if_init,
382    NULL);
383
384#ifdef VIMAGE
385static void
386vnet_if_uninit(const void *unused __unused)
387{
388
389	VNET_ASSERT(TAILQ_EMPTY(&V_ifnet), ("%s:%d tailq &V_ifnet=%p "
390	    "not empty", __func__, __LINE__, &V_ifnet));
391	VNET_ASSERT(TAILQ_EMPTY(&V_ifg_head), ("%s:%d tailq &V_ifg_head=%p "
392	    "not empty", __func__, __LINE__, &V_ifg_head));
393
394	free((caddr_t)V_ifindex_table, M_IFNET);
395}
396VNET_SYSUNINIT(vnet_if_uninit, SI_SUB_INIT_IF, SI_ORDER_FIRST,
397    vnet_if_uninit, NULL);
398
399static void
400vnet_if_return(const void *unused __unused)
401{
402	struct ifnet *ifp, *nifp;
403
404	/* Return all inherited interfaces to their parent vnets. */
405	TAILQ_FOREACH_SAFE(ifp, &V_ifnet, if_link, nifp) {
406		if (ifp->if_home_vnet != ifp->if_vnet)
407			if_vmove(ifp, ifp->if_home_vnet);
408	}
409}
410VNET_SYSUNINIT(vnet_if_return, SI_SUB_VNET_DONE, SI_ORDER_ANY,
411    vnet_if_return, NULL);
412#endif
413
414static void
415if_grow(void)
416{
417	int oldlim;
418	u_int n;
419	struct ifnet **e;
420
421	IFNET_WLOCK_ASSERT();
422	oldlim = V_if_indexlim;
423	IFNET_WUNLOCK();
424	n = (oldlim << 1) * sizeof(*e);
425	e = malloc(n, M_IFNET, M_WAITOK | M_ZERO);
426	IFNET_WLOCK();
427	if (V_if_indexlim != oldlim) {
428		free(e, M_IFNET);
429		return;
430	}
431	if (V_ifindex_table != NULL) {
432		memcpy((caddr_t)e, (caddr_t)V_ifindex_table, n/2);
433		free((caddr_t)V_ifindex_table, M_IFNET);
434	}
435	V_if_indexlim <<= 1;
436	V_ifindex_table = e;
437}
438
439/*
440 * Allocate a struct ifnet and an index for an interface.  A layer 2
441 * common structure will also be allocated if an allocation routine is
442 * registered for the passed type.
443 */
444struct ifnet *
445if_alloc(u_char type)
446{
447	struct ifnet *ifp;
448	u_short idx;
449
450	ifp = malloc(sizeof(struct ifnet), M_IFNET, M_WAITOK|M_ZERO);
451	IFNET_WLOCK();
452	idx = ifindex_alloc();
453	ifnet_setbyindex_locked(idx, IFNET_HOLD);
454	IFNET_WUNLOCK();
455	ifp->if_index = idx;
456	ifp->if_type = type;
457	ifp->if_alloctype = type;
458#ifdef VIMAGE
459	ifp->if_vnet = curvnet;
460#endif
461	if (if_com_alloc[type] != NULL) {
462		ifp->if_l2com = if_com_alloc[type](type, ifp);
463		if (ifp->if_l2com == NULL) {
464			free(ifp, M_IFNET);
465			ifindex_free(idx);
466			return (NULL);
467		}
468	}
469
470	IF_ADDR_LOCK_INIT(ifp);
471	TASK_INIT(&ifp->if_linktask, 0, do_link_state_change, ifp);
472	ifp->if_afdata_initialized = 0;
473	IF_AFDATA_LOCK_INIT(ifp);
474	TAILQ_INIT(&ifp->if_addrhead);
475	TAILQ_INIT(&ifp->if_multiaddrs);
476	TAILQ_INIT(&ifp->if_groups);
477#ifdef MAC
478	mac_ifnet_init(ifp);
479#endif
480	ifq_init(&ifp->if_snd, ifp);
481
482	refcount_init(&ifp->if_refcount, 1);	/* Index reference. */
483	for (int i = 0; i < IFCOUNTERS; i++)
484		ifp->if_counters[i] = counter_u64_alloc(M_WAITOK);
485	ifp->if_get_counter = if_get_counter_default;
486	ifnet_setbyindex(ifp->if_index, ifp);
487	return (ifp);
488}
489
490/*
491 * Do the actual work of freeing a struct ifnet, and layer 2 common
492 * structure.  This call is made when the last reference to an
493 * interface is released.
494 */
495static void
496if_free_internal(struct ifnet *ifp)
497{
498
499	KASSERT((ifp->if_flags & IFF_DYING),
500	    ("if_free_internal: interface not dying"));
501
502	if (if_com_free[ifp->if_alloctype] != NULL)
503		if_com_free[ifp->if_alloctype](ifp->if_l2com,
504		    ifp->if_alloctype);
505
506#ifdef MAC
507	mac_ifnet_destroy(ifp);
508#endif /* MAC */
509	if (ifp->if_description != NULL)
510		free(ifp->if_description, M_IFDESCR);
511	IF_AFDATA_DESTROY(ifp);
512	IF_ADDR_LOCK_DESTROY(ifp);
513	ifq_delete(&ifp->if_snd);
514
515	for (int i = 0; i < IFCOUNTERS; i++)
516		counter_u64_free(ifp->if_counters[i]);
517
518	free(ifp, M_IFNET);
519}
520
521/*
522 * Deregister an interface and free the associated storage.
523 */
524void
525if_free(struct ifnet *ifp)
526{
527
528	ifp->if_flags |= IFF_DYING;			/* XXX: Locking */
529
530	CURVNET_SET_QUIET(ifp->if_vnet);
531	IFNET_WLOCK();
532	KASSERT(ifp == ifnet_byindex_locked(ifp->if_index),
533	    ("%s: freeing unallocated ifnet", ifp->if_xname));
534
535	ifindex_free_locked(ifp->if_index);
536	IFNET_WUNLOCK();
537
538	if (refcount_release(&ifp->if_refcount))
539		if_free_internal(ifp);
540	CURVNET_RESTORE();
541}
542
543/*
544 * Interfaces to keep an ifnet type-stable despite the possibility of the
545 * driver calling if_free().  If there are additional references, we defer
546 * freeing the underlying data structure.
547 */
548void
549if_ref(struct ifnet *ifp)
550{
551
552	/* We don't assert the ifnet list lock here, but arguably should. */
553	refcount_acquire(&ifp->if_refcount);
554}
555
556void
557if_rele(struct ifnet *ifp)
558{
559
560	if (!refcount_release(&ifp->if_refcount))
561		return;
562	if_free_internal(ifp);
563}
564
565void
566ifq_init(struct ifaltq *ifq, struct ifnet *ifp)
567{
568
569	mtx_init(&ifq->ifq_mtx, ifp->if_xname, "if send queue", MTX_DEF);
570
571	if (ifq->ifq_maxlen == 0)
572		ifq->ifq_maxlen = ifqmaxlen;
573
574	ifq->altq_type = 0;
575	ifq->altq_disc = NULL;
576	ifq->altq_flags &= ALTQF_CANTCHANGE;
577	ifq->altq_tbr  = NULL;
578	ifq->altq_ifp  = ifp;
579}
580
581void
582ifq_delete(struct ifaltq *ifq)
583{
584	mtx_destroy(&ifq->ifq_mtx);
585}
586
587/*
588 * Perform generic interface initialization tasks and attach the interface
589 * to the list of "active" interfaces.  If vmove flag is set on entry
590 * to if_attach_internal(), perform only a limited subset of initialization
591 * tasks, given that we are moving from one vnet to another an ifnet which
592 * has already been fully initialized.
593 *
594 * Note that if_detach_internal() removes group membership unconditionally
595 * even when vmove flag is set, and if_attach_internal() adds only IFG_ALL.
596 * Thus, when if_vmove() is applied to a cloned interface, group membership
597 * is lost while a cloned one always joins a group whose name is
598 * ifc->ifc_name.  To recover this after if_detach_internal() and
599 * if_attach_internal(), the cloner should be specified to
600 * if_attach_internal() via ifc.  If it is non-NULL, if_attach_internal()
601 * attempts to join a group whose name is ifc->ifc_name.
602 *
603 * XXX:
604 *  - The decision to return void and thus require this function to
605 *    succeed is questionable.
606 *  - We should probably do more sanity checking.  For instance we don't
607 *    do anything to insure if_xname is unique or non-empty.
608 */
609void
610if_attach(struct ifnet *ifp)
611{
612
613	if_attach_internal(ifp, 0, NULL);
614}
615
616/*
617 * Compute the least common TSO limit.
618 */
619void
620if_hw_tsomax_common(if_t ifp, struct ifnet_hw_tsomax *pmax)
621{
622	/*
623	 * 1) If there is no limit currently, take the limit from
624	 * the network adapter.
625	 *
626	 * 2) If the network adapter has a limit below the current
627	 * limit, apply it.
628	 */
629	if (pmax->tsomaxbytes == 0 || (ifp->if_hw_tsomax != 0 &&
630	    ifp->if_hw_tsomax < pmax->tsomaxbytes)) {
631		pmax->tsomaxbytes = ifp->if_hw_tsomax;
632	}
633	if (pmax->tsomaxsegcount == 0 || (ifp->if_hw_tsomaxsegcount != 0 &&
634	    ifp->if_hw_tsomaxsegcount < pmax->tsomaxsegcount)) {
635		pmax->tsomaxsegcount = ifp->if_hw_tsomaxsegcount;
636	}
637	if (pmax->tsomaxsegsize == 0 || (ifp->if_hw_tsomaxsegsize != 0 &&
638	    ifp->if_hw_tsomaxsegsize < pmax->tsomaxsegsize)) {
639		pmax->tsomaxsegsize = ifp->if_hw_tsomaxsegsize;
640	}
641}
642
643/*
644 * Update TSO limit of a network adapter.
645 *
646 * Returns zero if no change. Else non-zero.
647 */
648int
649if_hw_tsomax_update(if_t ifp, struct ifnet_hw_tsomax *pmax)
650{
651	int retval = 0;
652	if (ifp->if_hw_tsomax != pmax->tsomaxbytes) {
653		ifp->if_hw_tsomax = pmax->tsomaxbytes;
654		retval++;
655	}
656	if (ifp->if_hw_tsomaxsegsize != pmax->tsomaxsegsize) {
657		ifp->if_hw_tsomaxsegsize = pmax->tsomaxsegsize;
658		retval++;
659	}
660	if (ifp->if_hw_tsomaxsegcount != pmax->tsomaxsegcount) {
661		ifp->if_hw_tsomaxsegcount = pmax->tsomaxsegcount;
662		retval++;
663	}
664	return (retval);
665}
666
667static void
668if_attach_internal(struct ifnet *ifp, int vmove, struct if_clone *ifc)
669{
670	unsigned socksize, ifasize;
671	int namelen, masklen;
672	struct sockaddr_dl *sdl;
673	struct ifaddr *ifa;
674
675	if (ifp->if_index == 0 || ifp != ifnet_byindex(ifp->if_index))
676		panic ("%s: BUG: if_attach called without if_alloc'd input()\n",
677		    ifp->if_xname);
678
679#ifdef VIMAGE
680	ifp->if_vnet = curvnet;
681	if (ifp->if_home_vnet == NULL)
682		ifp->if_home_vnet = curvnet;
683#endif
684
685	if_addgroup(ifp, IFG_ALL);
686
687	/* Restore group membership for cloned interfaces. */
688	if (vmove && ifc != NULL)
689		if_clone_addgroup(ifp, ifc);
690
691	getmicrotime(&ifp->if_lastchange);
692	ifp->if_epoch = time_uptime;
693
694	KASSERT((ifp->if_transmit == NULL && ifp->if_qflush == NULL) ||
695	    (ifp->if_transmit != NULL && ifp->if_qflush != NULL),
696	    ("transmit and qflush must both either be set or both be NULL"));
697	if (ifp->if_transmit == NULL) {
698		ifp->if_transmit = if_transmit;
699		ifp->if_qflush = if_qflush;
700	}
701	if (ifp->if_input == NULL)
702		ifp->if_input = if_input_default;
703
704	if (ifp->if_requestencap == NULL)
705		ifp->if_requestencap = if_requestencap_default;
706
707	if (!vmove) {
708#ifdef MAC
709		mac_ifnet_create(ifp);
710#endif
711
712		/*
713		 * Create a Link Level name for this device.
714		 */
715		namelen = strlen(ifp->if_xname);
716		/*
717		 * Always save enough space for any possiable name so we
718		 * can do a rename in place later.
719		 */
720		masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + IFNAMSIZ;
721		socksize = masklen + ifp->if_addrlen;
722		if (socksize < sizeof(*sdl))
723			socksize = sizeof(*sdl);
724		socksize = roundup2(socksize, sizeof(long));
725		ifasize = sizeof(*ifa) + 2 * socksize;
726		ifa = ifa_alloc(ifasize, M_WAITOK);
727		sdl = (struct sockaddr_dl *)(ifa + 1);
728		sdl->sdl_len = socksize;
729		sdl->sdl_family = AF_LINK;
730		bcopy(ifp->if_xname, sdl->sdl_data, namelen);
731		sdl->sdl_nlen = namelen;
732		sdl->sdl_index = ifp->if_index;
733		sdl->sdl_type = ifp->if_type;
734		ifp->if_addr = ifa;
735		ifa->ifa_ifp = ifp;
736		ifa->ifa_rtrequest = link_rtrequest;
737		ifa->ifa_addr = (struct sockaddr *)sdl;
738		sdl = (struct sockaddr_dl *)(socksize + (caddr_t)sdl);
739		ifa->ifa_netmask = (struct sockaddr *)sdl;
740		sdl->sdl_len = masklen;
741		while (namelen != 0)
742			sdl->sdl_data[--namelen] = 0xff;
743		TAILQ_INSERT_HEAD(&ifp->if_addrhead, ifa, ifa_link);
744		/* Reliably crash if used uninitialized. */
745		ifp->if_broadcastaddr = NULL;
746
747		if (ifp->if_type == IFT_ETHER) {
748			ifp->if_hw_addr = malloc(ifp->if_addrlen, M_IFADDR,
749			    M_WAITOK | M_ZERO);
750		}
751
752#if defined(INET) || defined(INET6)
753		/* Use defaults for TSO, if nothing is set */
754		if (ifp->if_hw_tsomax == 0 &&
755		    ifp->if_hw_tsomaxsegcount == 0 &&
756		    ifp->if_hw_tsomaxsegsize == 0) {
757			/*
758			 * The TSO defaults needs to be such that an
759			 * NFS mbuf list of 35 mbufs totalling just
760			 * below 64K works and that a chain of mbufs
761			 * can be defragged into at most 32 segments:
762			 */
763			ifp->if_hw_tsomax = min(IP_MAXPACKET, (32 * MCLBYTES) -
764			    (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN));
765			ifp->if_hw_tsomaxsegcount = 35;
766			ifp->if_hw_tsomaxsegsize = 2048;	/* 2K */
767
768			/* XXX some drivers set IFCAP_TSO after ethernet attach */
769			if (ifp->if_capabilities & IFCAP_TSO) {
770				if_printf(ifp, "Using defaults for TSO: %u/%u/%u\n",
771				    ifp->if_hw_tsomax,
772				    ifp->if_hw_tsomaxsegcount,
773				    ifp->if_hw_tsomaxsegsize);
774			}
775		}
776#endif
777	}
778#ifdef VIMAGE
779	else {
780		/*
781		 * Update the interface index in the link layer address
782		 * of the interface.
783		 */
784		for (ifa = ifp->if_addr; ifa != NULL;
785		    ifa = TAILQ_NEXT(ifa, ifa_link)) {
786			if (ifa->ifa_addr->sa_family == AF_LINK) {
787				sdl = (struct sockaddr_dl *)ifa->ifa_addr;
788				sdl->sdl_index = ifp->if_index;
789			}
790		}
791	}
792#endif
793
794	IFNET_WLOCK();
795	TAILQ_INSERT_TAIL(&V_ifnet, ifp, if_link);
796#ifdef VIMAGE
797	curvnet->vnet_ifcnt++;
798#endif
799	IFNET_WUNLOCK();
800
801	if (domain_init_status >= 2)
802		if_attachdomain1(ifp);
803
804	EVENTHANDLER_INVOKE(ifnet_arrival_event, ifp);
805	if (IS_DEFAULT_VNET(curvnet))
806		devctl_notify("IFNET", ifp->if_xname, "ATTACH", NULL);
807
808	/* Announce the interface. */
809	rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
810}
811
812static void
813if_attachdomain(void *dummy)
814{
815	struct ifnet *ifp;
816
817	TAILQ_FOREACH(ifp, &V_ifnet, if_link)
818		if_attachdomain1(ifp);
819}
820SYSINIT(domainifattach, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_SECOND,
821    if_attachdomain, NULL);
822
823static void
824if_attachdomain1(struct ifnet *ifp)
825{
826	struct domain *dp;
827
828	/*
829	 * Since dp->dom_ifattach calls malloc() with M_WAITOK, we
830	 * cannot lock ifp->if_afdata initialization, entirely.
831	 */
832	IF_AFDATA_LOCK(ifp);
833	if (ifp->if_afdata_initialized >= domain_init_status) {
834		IF_AFDATA_UNLOCK(ifp);
835		log(LOG_WARNING, "%s called more than once on %s\n",
836		    __func__, ifp->if_xname);
837		return;
838	}
839	ifp->if_afdata_initialized = domain_init_status;
840	IF_AFDATA_UNLOCK(ifp);
841
842	/* address family dependent data region */
843	bzero(ifp->if_afdata, sizeof(ifp->if_afdata));
844	for (dp = domains; dp; dp = dp->dom_next) {
845		if (dp->dom_ifattach)
846			ifp->if_afdata[dp->dom_family] =
847			    (*dp->dom_ifattach)(ifp);
848	}
849}
850
851/*
852 * Remove any unicast or broadcast network addresses from an interface.
853 */
854void
855if_purgeaddrs(struct ifnet *ifp)
856{
857	struct ifaddr *ifa, *next;
858
859	/* XXX cannot hold IF_ADDR_WLOCK over called functions. */
860	TAILQ_FOREACH_SAFE(ifa, &ifp->if_addrhead, ifa_link, next) {
861		if (ifa->ifa_addr->sa_family == AF_LINK)
862			continue;
863#ifdef INET
864		/* XXX: Ugly!! ad hoc just for INET */
865		if (ifa->ifa_addr->sa_family == AF_INET) {
866			struct ifaliasreq ifr;
867
868			bzero(&ifr, sizeof(ifr));
869			ifr.ifra_addr = *ifa->ifa_addr;
870			if (ifa->ifa_dstaddr)
871				ifr.ifra_broadaddr = *ifa->ifa_dstaddr;
872			if (in_control(NULL, SIOCDIFADDR, (caddr_t)&ifr, ifp,
873			    NULL) == 0)
874				continue;
875		}
876#endif /* INET */
877#ifdef INET6
878		if (ifa->ifa_addr->sa_family == AF_INET6) {
879			in6_purgeaddr(ifa);
880			/* ifp_addrhead is already updated */
881			continue;
882		}
883#endif /* INET6 */
884		IF_ADDR_WLOCK(ifp);
885		TAILQ_REMOVE(&ifp->if_addrhead, ifa, ifa_link);
886		IF_ADDR_WUNLOCK(ifp);
887		ifa_free(ifa);
888	}
889}
890
891/*
892 * Remove any multicast network addresses from an interface when an ifnet
893 * is going away.
894 */
895static void
896if_purgemaddrs(struct ifnet *ifp)
897{
898	struct ifmultiaddr *ifma;
899	struct ifmultiaddr *next;
900
901	IF_ADDR_WLOCK(ifp);
902	TAILQ_FOREACH_SAFE(ifma, &ifp->if_multiaddrs, ifma_link, next)
903		if_delmulti_locked(ifp, ifma, 1);
904	IF_ADDR_WUNLOCK(ifp);
905}
906
907/*
908 * Detach an interface, removing it from the list of "active" interfaces.
909 * If vmove flag is set on entry to if_detach_internal(), perform only a
910 * limited subset of cleanup tasks, given that we are moving an ifnet from
911 * one vnet to another, where it must be fully operational.
912 *
913 * XXXRW: There are some significant questions about event ordering, and
914 * how to prevent things from starting to use the interface during detach.
915 */
916void
917if_detach(struct ifnet *ifp)
918{
919
920	CURVNET_SET_QUIET(ifp->if_vnet);
921	if_detach_internal(ifp, 0, NULL);
922	CURVNET_RESTORE();
923}
924
925/*
926 * The vmove flag, if set, indicates that we are called from a callpath
927 * that is moving an interface to a different vnet instance.
928 *
929 * The shutdown flag, if set, indicates that we are called in the
930 * process of shutting down a vnet instance.  Currently only the
931 * vnet_if_return SYSUNINIT function sets it.  Note: we can be called
932 * on a vnet instance shutdown without this flag being set, e.g., when
933 * the cloned interfaces are destoyed as first thing of teardown.
934 */
935static int
936if_detach_internal(struct ifnet *ifp, int vmove, struct if_clone **ifcp)
937{
938	struct ifaddr *ifa;
939	int i;
940	struct domain *dp;
941 	struct ifnet *iter;
942 	int found = 0;
943#ifdef VIMAGE
944	int shutdown;
945
946	shutdown = (ifp->if_vnet->vnet_state > SI_SUB_VNET &&
947		 ifp->if_vnet->vnet_state < SI_SUB_VNET_DONE) ? 1 : 0;
948#endif
949	IFNET_WLOCK();
950	TAILQ_FOREACH(iter, &V_ifnet, if_link)
951		if (iter == ifp) {
952			TAILQ_REMOVE(&V_ifnet, ifp, if_link);
953			found = 1;
954			break;
955		}
956	IFNET_WUNLOCK();
957	if (!found) {
958		/*
959		 * While we would want to panic here, we cannot
960		 * guarantee that the interface is indeed still on
961		 * the list given we don't hold locks all the way.
962		 */
963		return (ENOENT);
964#if 0
965		if (vmove)
966			panic("%s: ifp=%p not on the ifnet tailq %p",
967			    __func__, ifp, &V_ifnet);
968		else
969			return; /* XXX this should panic as well? */
970#endif
971	}
972
973	/*
974	 * At this point we know the interface still was on the ifnet list
975	 * and we removed it so we are in a stable state.
976	 */
977#ifdef VIMAGE
978	curvnet->vnet_ifcnt--;
979#endif
980
981	/*
982	 * In any case (destroy or vmove) detach us from the groups
983	 * and remove/wait for pending events on the taskq.
984	 * XXX-BZ in theory an interface could still enqueue a taskq change?
985	 */
986	if_delgroups(ifp);
987
988	taskqueue_drain(taskqueue_swi, &ifp->if_linktask);
989
990	/*
991	 * Check if this is a cloned interface or not. Must do even if
992	 * shutting down as a if_vmove_reclaim() would move the ifp and
993	 * the if_clone_addgroup() will have a corrupted string overwise
994	 * from a gibberish pointer.
995	 */
996	if (vmove && ifcp != NULL)
997		*ifcp = if_clone_findifc(ifp);
998
999	if_down(ifp);
1000
1001#ifdef VIMAGE
1002	/*
1003	 * On VNET shutdown abort here as the stack teardown will do all
1004	 * the work top-down for us.
1005	 */
1006	if (shutdown) {
1007		/*
1008		 * In case of a vmove we are done here without error.
1009		 * If we would signal an error it would lead to the same
1010		 * abort as if we did not find the ifnet anymore.
1011		 * if_detach() calls us in void context and does not care
1012		 * about an early abort notification, so life is splendid :)
1013		 */
1014		goto finish_vnet_shutdown;
1015	}
1016#endif
1017
1018	/*
1019	 * At this point we are not tearing down a VNET and are either
1020	 * going to destroy or vmove the interface and have to cleanup
1021	 * accordingly.
1022	 */
1023
1024	/*
1025	 * Remove routes and flush queues.
1026	 */
1027#ifdef ALTQ
1028	if (ALTQ_IS_ENABLED(&ifp->if_snd))
1029		altq_disable(&ifp->if_snd);
1030	if (ALTQ_IS_ATTACHED(&ifp->if_snd))
1031		altq_detach(&ifp->if_snd);
1032#endif
1033
1034	if_purgeaddrs(ifp);
1035
1036#ifdef INET
1037	in_ifdetach(ifp);
1038#endif
1039
1040#ifdef INET6
1041	/*
1042	 * Remove all IPv6 kernel structs related to ifp.  This should be done
1043	 * before removing routing entries below, since IPv6 interface direct
1044	 * routes are expected to be removed by the IPv6-specific kernel API.
1045	 * Otherwise, the kernel will detect some inconsistency and bark it.
1046	 */
1047	in6_ifdetach(ifp);
1048#endif
1049	if_purgemaddrs(ifp);
1050
1051	/* Announce that the interface is gone. */
1052	rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
1053	EVENTHANDLER_INVOKE(ifnet_departure_event, ifp);
1054	if (IS_DEFAULT_VNET(curvnet))
1055		devctl_notify("IFNET", ifp->if_xname, "DETACH", NULL);
1056
1057	if (!vmove) {
1058		/*
1059		 * Prevent further calls into the device driver via ifnet.
1060		 */
1061		if_dead(ifp);
1062
1063		/*
1064		 * Remove link ifaddr pointer and maybe decrement if_index.
1065		 * Clean up all addresses.
1066		 */
1067		free(ifp->if_hw_addr, M_IFADDR);
1068		ifp->if_hw_addr = NULL;
1069		ifp->if_addr = NULL;
1070
1071		/* We can now free link ifaddr. */
1072		IF_ADDR_WLOCK(ifp);
1073		if (!TAILQ_EMPTY(&ifp->if_addrhead)) {
1074			ifa = TAILQ_FIRST(&ifp->if_addrhead);
1075			TAILQ_REMOVE(&ifp->if_addrhead, ifa, ifa_link);
1076			IF_ADDR_WUNLOCK(ifp);
1077			ifa_free(ifa);
1078		} else
1079			IF_ADDR_WUNLOCK(ifp);
1080	}
1081
1082	rt_flushifroutes(ifp);
1083
1084#ifdef VIMAGE
1085finish_vnet_shutdown:
1086#endif
1087	/*
1088	 * We cannot hold the lock over dom_ifdetach calls as they might
1089	 * sleep, for example trying to drain a callout, thus open up the
1090	 * theoretical race with re-attaching.
1091	 */
1092	IF_AFDATA_LOCK(ifp);
1093	i = ifp->if_afdata_initialized;
1094	ifp->if_afdata_initialized = 0;
1095	IF_AFDATA_UNLOCK(ifp);
1096	for (dp = domains; i > 0 && dp; dp = dp->dom_next) {
1097		if (dp->dom_ifdetach && ifp->if_afdata[dp->dom_family]) {
1098			(*dp->dom_ifdetach)(ifp,
1099			    ifp->if_afdata[dp->dom_family]);
1100			ifp->if_afdata[dp->dom_family] = NULL;
1101		}
1102	}
1103
1104	return (0);
1105}
1106
1107#ifdef VIMAGE
1108/*
1109 * if_vmove() performs a limited version of if_detach() in current
1110 * vnet and if_attach()es the ifnet to the vnet specified as 2nd arg.
1111 * An attempt is made to shrink if_index in current vnet, find an
1112 * unused if_index in target vnet and calls if_grow() if necessary,
1113 * and finally find an unused if_xname for the target vnet.
1114 */
1115static void
1116if_vmove(struct ifnet *ifp, struct vnet *new_vnet)
1117{
1118	struct if_clone *ifc;
1119	u_int bif_dlt, bif_hdrlen;
1120	int rc;
1121
1122 	/*
1123	 * if_detach_internal() will call the eventhandler to notify
1124	 * interface departure.  That will detach if_bpf.  We need to
1125	 * safe the dlt and hdrlen so we can re-attach it later.
1126	 */
1127	bpf_get_bp_params(ifp->if_bpf, &bif_dlt, &bif_hdrlen);
1128
1129	/*
1130	 * Detach from current vnet, but preserve LLADDR info, do not
1131	 * mark as dead etc. so that the ifnet can be reattached later.
1132	 * If we cannot find it, we lost the race to someone else.
1133	 */
1134	rc = if_detach_internal(ifp, 1, &ifc);
1135	if (rc != 0)
1136		return;
1137
1138	/*
1139	 * Unlink the ifnet from ifindex_table[] in current vnet, and shrink
1140	 * the if_index for that vnet if possible.
1141	 *
1142	 * NOTE: IFNET_WLOCK/IFNET_WUNLOCK() are assumed to be unvirtualized,
1143	 * or we'd lock on one vnet and unlock on another.
1144	 */
1145	IFNET_WLOCK();
1146	ifindex_free_locked(ifp->if_index);
1147	IFNET_WUNLOCK();
1148
1149	/*
1150	 * Perform interface-specific reassignment tasks, if provided by
1151	 * the driver.
1152	 */
1153	if (ifp->if_reassign != NULL)
1154		ifp->if_reassign(ifp, new_vnet, NULL);
1155
1156	/*
1157	 * Switch to the context of the target vnet.
1158	 */
1159	CURVNET_SET_QUIET(new_vnet);
1160
1161	IFNET_WLOCK();
1162	ifp->if_index = ifindex_alloc();
1163	ifnet_setbyindex_locked(ifp->if_index, ifp);
1164	IFNET_WUNLOCK();
1165
1166	if_attach_internal(ifp, 1, ifc);
1167
1168	if (ifp->if_bpf == NULL)
1169		bpfattach(ifp, bif_dlt, bif_hdrlen);
1170
1171	CURVNET_RESTORE();
1172}
1173
1174/*
1175 * Move an ifnet to or from another child prison/vnet, specified by the jail id.
1176 */
1177static int
1178if_vmove_loan(struct thread *td, struct ifnet *ifp, char *ifname, int jid)
1179{
1180	struct prison *pr;
1181	struct ifnet *difp;
1182	int shutdown;
1183
1184	/* Try to find the prison within our visibility. */
1185	sx_slock(&allprison_lock);
1186	pr = prison_find_child(td->td_ucred->cr_prison, jid);
1187	sx_sunlock(&allprison_lock);
1188	if (pr == NULL)
1189		return (ENXIO);
1190	prison_hold_locked(pr);
1191	mtx_unlock(&pr->pr_mtx);
1192
1193	/* Do not try to move the iface from and to the same prison. */
1194	if (pr->pr_vnet == ifp->if_vnet) {
1195		prison_free(pr);
1196		return (EEXIST);
1197	}
1198
1199	/* Make sure the named iface does not exists in the dst. prison/vnet. */
1200	/* XXX Lock interfaces to avoid races. */
1201	CURVNET_SET_QUIET(pr->pr_vnet);
1202	difp = ifunit(ifname);
1203	if (difp != NULL) {
1204		CURVNET_RESTORE();
1205		prison_free(pr);
1206		return (EEXIST);
1207	}
1208
1209	/* Make sure the VNET is stable. */
1210	shutdown = (ifp->if_vnet->vnet_state > SI_SUB_VNET &&
1211		 ifp->if_vnet->vnet_state < SI_SUB_VNET_DONE) ? 1 : 0;
1212	if (shutdown) {
1213		CURVNET_RESTORE();
1214		prison_free(pr);
1215		return (EBUSY);
1216	}
1217	CURVNET_RESTORE();
1218
1219	/* Move the interface into the child jail/vnet. */
1220	if_vmove(ifp, pr->pr_vnet);
1221
1222	/* Report the new if_xname back to the userland. */
1223	sprintf(ifname, "%s", ifp->if_xname);
1224
1225	prison_free(pr);
1226	return (0);
1227}
1228
1229static int
1230if_vmove_reclaim(struct thread *td, char *ifname, int jid)
1231{
1232	struct prison *pr;
1233	struct vnet *vnet_dst;
1234	struct ifnet *ifp;
1235 	int shutdown;
1236
1237	/* Try to find the prison within our visibility. */
1238	sx_slock(&allprison_lock);
1239	pr = prison_find_child(td->td_ucred->cr_prison, jid);
1240	sx_sunlock(&allprison_lock);
1241	if (pr == NULL)
1242		return (ENXIO);
1243	prison_hold_locked(pr);
1244	mtx_unlock(&pr->pr_mtx);
1245
1246	/* Make sure the named iface exists in the source prison/vnet. */
1247	CURVNET_SET(pr->pr_vnet);
1248	ifp = ifunit(ifname);		/* XXX Lock to avoid races. */
1249	if (ifp == NULL) {
1250		CURVNET_RESTORE();
1251		prison_free(pr);
1252		return (ENXIO);
1253	}
1254
1255	/* Do not try to move the iface from and to the same prison. */
1256	vnet_dst = TD_TO_VNET(td);
1257	if (vnet_dst == ifp->if_vnet) {
1258		CURVNET_RESTORE();
1259		prison_free(pr);
1260		return (EEXIST);
1261	}
1262
1263	/* Make sure the VNET is stable. */
1264	shutdown = (ifp->if_vnet->vnet_state > SI_SUB_VNET &&
1265		 ifp->if_vnet->vnet_state < SI_SUB_VNET_DONE) ? 1 : 0;
1266	if (shutdown) {
1267		CURVNET_RESTORE();
1268		prison_free(pr);
1269		return (EBUSY);
1270	}
1271
1272	/* Get interface back from child jail/vnet. */
1273	if_vmove(ifp, vnet_dst);
1274	CURVNET_RESTORE();
1275
1276	/* Report the new if_xname back to the userland. */
1277	sprintf(ifname, "%s", ifp->if_xname);
1278
1279	prison_free(pr);
1280	return (0);
1281}
1282#endif /* VIMAGE */
1283
1284/*
1285 * Add a group to an interface
1286 */
1287int
1288if_addgroup(struct ifnet *ifp, const char *groupname)
1289{
1290	struct ifg_list		*ifgl;
1291	struct ifg_group	*ifg = NULL;
1292	struct ifg_member	*ifgm;
1293	int 			 new = 0;
1294
1295	if (groupname[0] && groupname[strlen(groupname) - 1] >= '0' &&
1296	    groupname[strlen(groupname) - 1] <= '9')
1297		return (EINVAL);
1298
1299	IFNET_WLOCK();
1300	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
1301		if (!strcmp(ifgl->ifgl_group->ifg_group, groupname)) {
1302			IFNET_WUNLOCK();
1303			return (EEXIST);
1304		}
1305
1306	if ((ifgl = (struct ifg_list *)malloc(sizeof(struct ifg_list), M_TEMP,
1307	    M_NOWAIT)) == NULL) {
1308	    	IFNET_WUNLOCK();
1309		return (ENOMEM);
1310	}
1311
1312	if ((ifgm = (struct ifg_member *)malloc(sizeof(struct ifg_member),
1313	    M_TEMP, M_NOWAIT)) == NULL) {
1314		free(ifgl, M_TEMP);
1315		IFNET_WUNLOCK();
1316		return (ENOMEM);
1317	}
1318
1319	TAILQ_FOREACH(ifg, &V_ifg_head, ifg_next)
1320		if (!strcmp(ifg->ifg_group, groupname))
1321			break;
1322
1323	if (ifg == NULL) {
1324		if ((ifg = (struct ifg_group *)malloc(sizeof(struct ifg_group),
1325		    M_TEMP, M_NOWAIT)) == NULL) {
1326			free(ifgl, M_TEMP);
1327			free(ifgm, M_TEMP);
1328			IFNET_WUNLOCK();
1329			return (ENOMEM);
1330		}
1331		strlcpy(ifg->ifg_group, groupname, sizeof(ifg->ifg_group));
1332		ifg->ifg_refcnt = 0;
1333		TAILQ_INIT(&ifg->ifg_members);
1334		TAILQ_INSERT_TAIL(&V_ifg_head, ifg, ifg_next);
1335		new = 1;
1336	}
1337
1338	ifg->ifg_refcnt++;
1339	ifgl->ifgl_group = ifg;
1340	ifgm->ifgm_ifp = ifp;
1341
1342	IF_ADDR_WLOCK(ifp);
1343	TAILQ_INSERT_TAIL(&ifg->ifg_members, ifgm, ifgm_next);
1344	TAILQ_INSERT_TAIL(&ifp->if_groups, ifgl, ifgl_next);
1345	IF_ADDR_WUNLOCK(ifp);
1346
1347	IFNET_WUNLOCK();
1348
1349	if (new)
1350		EVENTHANDLER_INVOKE(group_attach_event, ifg);
1351	EVENTHANDLER_INVOKE(group_change_event, groupname);
1352
1353	return (0);
1354}
1355
1356/*
1357 * Remove a group from an interface
1358 */
1359int
1360if_delgroup(struct ifnet *ifp, const char *groupname)
1361{
1362	struct ifg_list		*ifgl;
1363	struct ifg_member	*ifgm;
1364
1365	IFNET_WLOCK();
1366	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
1367		if (!strcmp(ifgl->ifgl_group->ifg_group, groupname))
1368			break;
1369	if (ifgl == NULL) {
1370		IFNET_WUNLOCK();
1371		return (ENOENT);
1372	}
1373
1374	IF_ADDR_WLOCK(ifp);
1375	TAILQ_REMOVE(&ifp->if_groups, ifgl, ifgl_next);
1376	IF_ADDR_WUNLOCK(ifp);
1377
1378	TAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next)
1379		if (ifgm->ifgm_ifp == ifp)
1380			break;
1381
1382	if (ifgm != NULL) {
1383		TAILQ_REMOVE(&ifgl->ifgl_group->ifg_members, ifgm, ifgm_next);
1384		free(ifgm, M_TEMP);
1385	}
1386
1387	if (--ifgl->ifgl_group->ifg_refcnt == 0) {
1388		TAILQ_REMOVE(&V_ifg_head, ifgl->ifgl_group, ifg_next);
1389		IFNET_WUNLOCK();
1390		EVENTHANDLER_INVOKE(group_detach_event, ifgl->ifgl_group);
1391		free(ifgl->ifgl_group, M_TEMP);
1392	} else
1393		IFNET_WUNLOCK();
1394
1395	free(ifgl, M_TEMP);
1396
1397	EVENTHANDLER_INVOKE(group_change_event, groupname);
1398
1399	return (0);
1400}
1401
1402/*
1403 * Remove an interface from all groups
1404 */
1405static void
1406if_delgroups(struct ifnet *ifp)
1407{
1408	struct ifg_list		*ifgl;
1409	struct ifg_member	*ifgm;
1410	char groupname[IFNAMSIZ];
1411
1412	IFNET_WLOCK();
1413	while (!TAILQ_EMPTY(&ifp->if_groups)) {
1414		ifgl = TAILQ_FIRST(&ifp->if_groups);
1415
1416		strlcpy(groupname, ifgl->ifgl_group->ifg_group, IFNAMSIZ);
1417
1418		IF_ADDR_WLOCK(ifp);
1419		TAILQ_REMOVE(&ifp->if_groups, ifgl, ifgl_next);
1420		IF_ADDR_WUNLOCK(ifp);
1421
1422		TAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next)
1423			if (ifgm->ifgm_ifp == ifp)
1424				break;
1425
1426		if (ifgm != NULL) {
1427			TAILQ_REMOVE(&ifgl->ifgl_group->ifg_members, ifgm,
1428			    ifgm_next);
1429			free(ifgm, M_TEMP);
1430		}
1431
1432		if (--ifgl->ifgl_group->ifg_refcnt == 0) {
1433			TAILQ_REMOVE(&V_ifg_head, ifgl->ifgl_group, ifg_next);
1434			IFNET_WUNLOCK();
1435			EVENTHANDLER_INVOKE(group_detach_event,
1436			    ifgl->ifgl_group);
1437			free(ifgl->ifgl_group, M_TEMP);
1438		} else
1439			IFNET_WUNLOCK();
1440
1441		free(ifgl, M_TEMP);
1442
1443		EVENTHANDLER_INVOKE(group_change_event, groupname);
1444
1445		IFNET_WLOCK();
1446	}
1447	IFNET_WUNLOCK();
1448}
1449
1450/*
1451 * Stores all groups from an interface in memory pointed
1452 * to by data
1453 */
1454static int
1455if_getgroup(struct ifgroupreq *data, struct ifnet *ifp)
1456{
1457	int			 len, error;
1458	struct ifg_list		*ifgl;
1459	struct ifg_req		 ifgrq, *ifgp;
1460	struct ifgroupreq	*ifgr = data;
1461
1462	if (ifgr->ifgr_len == 0) {
1463		IF_ADDR_RLOCK(ifp);
1464		TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
1465			ifgr->ifgr_len += sizeof(struct ifg_req);
1466		IF_ADDR_RUNLOCK(ifp);
1467		return (0);
1468	}
1469
1470	len = ifgr->ifgr_len;
1471	ifgp = ifgr->ifgr_groups;
1472	/* XXX: wire */
1473	IF_ADDR_RLOCK(ifp);
1474	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) {
1475		if (len < sizeof(ifgrq)) {
1476			IF_ADDR_RUNLOCK(ifp);
1477			return (EINVAL);
1478		}
1479		bzero(&ifgrq, sizeof ifgrq);
1480		strlcpy(ifgrq.ifgrq_group, ifgl->ifgl_group->ifg_group,
1481		    sizeof(ifgrq.ifgrq_group));
1482		if ((error = copyout(&ifgrq, ifgp, sizeof(struct ifg_req)))) {
1483		    	IF_ADDR_RUNLOCK(ifp);
1484			return (error);
1485		}
1486		len -= sizeof(ifgrq);
1487		ifgp++;
1488	}
1489	IF_ADDR_RUNLOCK(ifp);
1490
1491	return (0);
1492}
1493
1494/*
1495 * Stores all members of a group in memory pointed to by data
1496 */
1497static int
1498if_getgroupmembers(struct ifgroupreq *data)
1499{
1500	struct ifgroupreq	*ifgr = data;
1501	struct ifg_group	*ifg;
1502	struct ifg_member	*ifgm;
1503	struct ifg_req		 ifgrq, *ifgp;
1504	int			 len, error;
1505
1506	IFNET_RLOCK();
1507	TAILQ_FOREACH(ifg, &V_ifg_head, ifg_next)
1508		if (!strcmp(ifg->ifg_group, ifgr->ifgr_name))
1509			break;
1510	if (ifg == NULL) {
1511		IFNET_RUNLOCK();
1512		return (ENOENT);
1513	}
1514
1515	if (ifgr->ifgr_len == 0) {
1516		TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next)
1517			ifgr->ifgr_len += sizeof(ifgrq);
1518		IFNET_RUNLOCK();
1519		return (0);
1520	}
1521
1522	len = ifgr->ifgr_len;
1523	ifgp = ifgr->ifgr_groups;
1524	TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) {
1525		if (len < sizeof(ifgrq)) {
1526			IFNET_RUNLOCK();
1527			return (EINVAL);
1528		}
1529		bzero(&ifgrq, sizeof ifgrq);
1530		strlcpy(ifgrq.ifgrq_member, ifgm->ifgm_ifp->if_xname,
1531		    sizeof(ifgrq.ifgrq_member));
1532		if ((error = copyout(&ifgrq, ifgp, sizeof(struct ifg_req)))) {
1533			IFNET_RUNLOCK();
1534			return (error);
1535		}
1536		len -= sizeof(ifgrq);
1537		ifgp++;
1538	}
1539	IFNET_RUNLOCK();
1540
1541	return (0);
1542}
1543
1544/*
1545 * Return counter values from counter(9)s stored in ifnet.
1546 */
1547uint64_t
1548if_get_counter_default(struct ifnet *ifp, ift_counter cnt)
1549{
1550
1551	KASSERT(cnt < IFCOUNTERS, ("%s: invalid cnt %d", __func__, cnt));
1552
1553	return (counter_u64_fetch(ifp->if_counters[cnt]));
1554}
1555
1556/*
1557 * Increase an ifnet counter. Usually used for counters shared
1558 * between the stack and a driver, but function supports them all.
1559 */
1560void
1561if_inc_counter(struct ifnet *ifp, ift_counter cnt, int64_t inc)
1562{
1563
1564	KASSERT(cnt < IFCOUNTERS, ("%s: invalid cnt %d", __func__, cnt));
1565
1566	counter_u64_add(ifp->if_counters[cnt], inc);
1567}
1568
1569/*
1570 * Copy data from ifnet to userland API structure if_data.
1571 */
1572void
1573if_data_copy(struct ifnet *ifp, struct if_data *ifd)
1574{
1575
1576	ifd->ifi_type = ifp->if_type;
1577	ifd->ifi_physical = 0;
1578	ifd->ifi_addrlen = ifp->if_addrlen;
1579	ifd->ifi_hdrlen = ifp->if_hdrlen;
1580	ifd->ifi_link_state = ifp->if_link_state;
1581	ifd->ifi_vhid = 0;
1582	ifd->ifi_datalen = sizeof(struct if_data);
1583	ifd->ifi_mtu = ifp->if_mtu;
1584	ifd->ifi_metric = ifp->if_metric;
1585	ifd->ifi_baudrate = ifp->if_baudrate;
1586	ifd->ifi_hwassist = ifp->if_hwassist;
1587	ifd->ifi_epoch = ifp->if_epoch;
1588	ifd->ifi_lastchange = ifp->if_lastchange;
1589
1590	ifd->ifi_ipackets = ifp->if_get_counter(ifp, IFCOUNTER_IPACKETS);
1591	ifd->ifi_ierrors = ifp->if_get_counter(ifp, IFCOUNTER_IERRORS);
1592	ifd->ifi_opackets = ifp->if_get_counter(ifp, IFCOUNTER_OPACKETS);
1593	ifd->ifi_oerrors = ifp->if_get_counter(ifp, IFCOUNTER_OERRORS);
1594	ifd->ifi_collisions = ifp->if_get_counter(ifp, IFCOUNTER_COLLISIONS);
1595	ifd->ifi_ibytes = ifp->if_get_counter(ifp, IFCOUNTER_IBYTES);
1596	ifd->ifi_obytes = ifp->if_get_counter(ifp, IFCOUNTER_OBYTES);
1597	ifd->ifi_imcasts = ifp->if_get_counter(ifp, IFCOUNTER_IMCASTS);
1598	ifd->ifi_omcasts = ifp->if_get_counter(ifp, IFCOUNTER_OMCASTS);
1599	ifd->ifi_iqdrops = ifp->if_get_counter(ifp, IFCOUNTER_IQDROPS);
1600	ifd->ifi_oqdrops = ifp->if_get_counter(ifp, IFCOUNTER_OQDROPS);
1601	ifd->ifi_noproto = ifp->if_get_counter(ifp, IFCOUNTER_NOPROTO);
1602}
1603
1604/*
1605 * Wrapper functions for struct ifnet address list locking macros.  These are
1606 * used by kernel modules to avoid encoding programming interface or binary
1607 * interface assumptions that may be violated when kernel-internal locking
1608 * approaches change.
1609 */
1610void
1611if_addr_rlock(struct ifnet *ifp)
1612{
1613
1614	IF_ADDR_RLOCK(ifp);
1615}
1616
1617void
1618if_addr_runlock(struct ifnet *ifp)
1619{
1620
1621	IF_ADDR_RUNLOCK(ifp);
1622}
1623
1624void
1625if_maddr_rlock(if_t ifp)
1626{
1627
1628	IF_ADDR_RLOCK((struct ifnet *)ifp);
1629}
1630
1631void
1632if_maddr_runlock(if_t ifp)
1633{
1634
1635	IF_ADDR_RUNLOCK((struct ifnet *)ifp);
1636}
1637
1638/*
1639 * Initialization, destruction and refcounting functions for ifaddrs.
1640 */
1641struct ifaddr *
1642ifa_alloc(size_t size, int flags)
1643{
1644	struct ifaddr *ifa;
1645
1646	KASSERT(size >= sizeof(struct ifaddr),
1647	    ("%s: invalid size %zu", __func__, size));
1648
1649	ifa = malloc(size, M_IFADDR, M_ZERO | flags);
1650	if (ifa == NULL)
1651		return (NULL);
1652
1653	if ((ifa->ifa_opackets = counter_u64_alloc(flags)) == NULL)
1654		goto fail;
1655	if ((ifa->ifa_ipackets = counter_u64_alloc(flags)) == NULL)
1656		goto fail;
1657	if ((ifa->ifa_obytes = counter_u64_alloc(flags)) == NULL)
1658		goto fail;
1659	if ((ifa->ifa_ibytes = counter_u64_alloc(flags)) == NULL)
1660		goto fail;
1661
1662	refcount_init(&ifa->ifa_refcnt, 1);
1663
1664	return (ifa);
1665
1666fail:
1667	/* free(NULL) is okay */
1668	counter_u64_free(ifa->ifa_opackets);
1669	counter_u64_free(ifa->ifa_ipackets);
1670	counter_u64_free(ifa->ifa_obytes);
1671	counter_u64_free(ifa->ifa_ibytes);
1672	free(ifa, M_IFADDR);
1673
1674	return (NULL);
1675}
1676
1677void
1678ifa_ref(struct ifaddr *ifa)
1679{
1680
1681	refcount_acquire(&ifa->ifa_refcnt);
1682}
1683
1684void
1685ifa_free(struct ifaddr *ifa)
1686{
1687
1688	if (refcount_release(&ifa->ifa_refcnt)) {
1689		counter_u64_free(ifa->ifa_opackets);
1690		counter_u64_free(ifa->ifa_ipackets);
1691		counter_u64_free(ifa->ifa_obytes);
1692		counter_u64_free(ifa->ifa_ibytes);
1693		free(ifa, M_IFADDR);
1694	}
1695}
1696
1697static int
1698ifa_maintain_loopback_route(int cmd, const char *otype, struct ifaddr *ifa,
1699    struct sockaddr *ia)
1700{
1701	int error;
1702	struct rt_addrinfo info;
1703	struct sockaddr_dl null_sdl;
1704	struct ifnet *ifp;
1705
1706	ifp = ifa->ifa_ifp;
1707
1708	bzero(&info, sizeof(info));
1709	if (cmd != RTM_DELETE)
1710		info.rti_ifp = V_loif;
1711	info.rti_flags = ifa->ifa_flags | RTF_HOST | RTF_STATIC;
1712	info.rti_info[RTAX_DST] = ia;
1713	info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&null_sdl;
1714	link_init_sdl(ifp, (struct sockaddr *)&null_sdl, ifp->if_type);
1715
1716	error = rtrequest1_fib(cmd, &info, NULL, ifp->if_fib);
1717
1718	if (error != 0)
1719		log(LOG_DEBUG, "%s: %s failed for interface %s: %u\n",
1720		    __func__, otype, if_name(ifp), error);
1721
1722	return (error);
1723}
1724
1725int
1726ifa_add_loopback_route(struct ifaddr *ifa, struct sockaddr *ia)
1727{
1728
1729	return (ifa_maintain_loopback_route(RTM_ADD, "insertion", ifa, ia));
1730}
1731
1732int
1733ifa_del_loopback_route(struct ifaddr *ifa, struct sockaddr *ia)
1734{
1735
1736	return (ifa_maintain_loopback_route(RTM_DELETE, "deletion", ifa, ia));
1737}
1738
1739int
1740ifa_switch_loopback_route(struct ifaddr *ifa, struct sockaddr *ia)
1741{
1742
1743	return (ifa_maintain_loopback_route(RTM_CHANGE, "switch", ifa, ia));
1744}
1745
1746/*
1747 * XXX: Because sockaddr_dl has deeper structure than the sockaddr
1748 * structs used to represent other address families, it is necessary
1749 * to perform a different comparison.
1750 */
1751
1752#define	sa_dl_equal(a1, a2)	\
1753	((((const struct sockaddr_dl *)(a1))->sdl_len ==		\
1754	 ((const struct sockaddr_dl *)(a2))->sdl_len) &&		\
1755	 (bcmp(CLLADDR((const struct sockaddr_dl *)(a1)),		\
1756	       CLLADDR((const struct sockaddr_dl *)(a2)),		\
1757	       ((const struct sockaddr_dl *)(a1))->sdl_alen) == 0))
1758
1759/*
1760 * Locate an interface based on a complete address.
1761 */
1762/*ARGSUSED*/
1763static struct ifaddr *
1764ifa_ifwithaddr_internal(const struct sockaddr *addr, int getref)
1765{
1766	struct ifnet *ifp;
1767	struct ifaddr *ifa;
1768
1769	IFNET_RLOCK_NOSLEEP();
1770	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
1771		IF_ADDR_RLOCK(ifp);
1772		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1773			if (ifa->ifa_addr->sa_family != addr->sa_family)
1774				continue;
1775			if (sa_equal(addr, ifa->ifa_addr)) {
1776				if (getref)
1777					ifa_ref(ifa);
1778				IF_ADDR_RUNLOCK(ifp);
1779				goto done;
1780			}
1781			/* IP6 doesn't have broadcast */
1782			if ((ifp->if_flags & IFF_BROADCAST) &&
1783			    ifa->ifa_broadaddr &&
1784			    ifa->ifa_broadaddr->sa_len != 0 &&
1785			    sa_equal(ifa->ifa_broadaddr, addr)) {
1786				if (getref)
1787					ifa_ref(ifa);
1788				IF_ADDR_RUNLOCK(ifp);
1789				goto done;
1790			}
1791		}
1792		IF_ADDR_RUNLOCK(ifp);
1793	}
1794	ifa = NULL;
1795done:
1796	IFNET_RUNLOCK_NOSLEEP();
1797	return (ifa);
1798}
1799
1800struct ifaddr *
1801ifa_ifwithaddr(const struct sockaddr *addr)
1802{
1803
1804	return (ifa_ifwithaddr_internal(addr, 1));
1805}
1806
1807int
1808ifa_ifwithaddr_check(const struct sockaddr *addr)
1809{
1810
1811	return (ifa_ifwithaddr_internal(addr, 0) != NULL);
1812}
1813
1814/*
1815 * Locate an interface based on the broadcast address.
1816 */
1817/* ARGSUSED */
1818struct ifaddr *
1819ifa_ifwithbroadaddr(const struct sockaddr *addr, int fibnum)
1820{
1821	struct ifnet *ifp;
1822	struct ifaddr *ifa;
1823
1824	IFNET_RLOCK_NOSLEEP();
1825	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
1826		if ((fibnum != RT_ALL_FIBS) && (ifp->if_fib != fibnum))
1827			continue;
1828		IF_ADDR_RLOCK(ifp);
1829		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1830			if (ifa->ifa_addr->sa_family != addr->sa_family)
1831				continue;
1832			if ((ifp->if_flags & IFF_BROADCAST) &&
1833			    ifa->ifa_broadaddr &&
1834			    ifa->ifa_broadaddr->sa_len != 0 &&
1835			    sa_equal(ifa->ifa_broadaddr, addr)) {
1836				ifa_ref(ifa);
1837				IF_ADDR_RUNLOCK(ifp);
1838				goto done;
1839			}
1840		}
1841		IF_ADDR_RUNLOCK(ifp);
1842	}
1843	ifa = NULL;
1844done:
1845	IFNET_RUNLOCK_NOSLEEP();
1846	return (ifa);
1847}
1848
1849/*
1850 * Locate the point to point interface with a given destination address.
1851 */
1852/*ARGSUSED*/
1853struct ifaddr *
1854ifa_ifwithdstaddr(const struct sockaddr *addr, int fibnum)
1855{
1856	struct ifnet *ifp;
1857	struct ifaddr *ifa;
1858
1859	IFNET_RLOCK_NOSLEEP();
1860	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
1861		if ((ifp->if_flags & IFF_POINTOPOINT) == 0)
1862			continue;
1863		if ((fibnum != RT_ALL_FIBS) && (ifp->if_fib != fibnum))
1864			continue;
1865		IF_ADDR_RLOCK(ifp);
1866		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1867			if (ifa->ifa_addr->sa_family != addr->sa_family)
1868				continue;
1869			if (ifa->ifa_dstaddr != NULL &&
1870			    sa_equal(addr, ifa->ifa_dstaddr)) {
1871				ifa_ref(ifa);
1872				IF_ADDR_RUNLOCK(ifp);
1873				goto done;
1874			}
1875		}
1876		IF_ADDR_RUNLOCK(ifp);
1877	}
1878	ifa = NULL;
1879done:
1880	IFNET_RUNLOCK_NOSLEEP();
1881	return (ifa);
1882}
1883
1884/*
1885 * Find an interface on a specific network.  If many, choice
1886 * is most specific found.
1887 */
1888struct ifaddr *
1889ifa_ifwithnet(const struct sockaddr *addr, int ignore_ptp, int fibnum)
1890{
1891	struct ifnet *ifp;
1892	struct ifaddr *ifa;
1893	struct ifaddr *ifa_maybe = NULL;
1894	u_int af = addr->sa_family;
1895	const char *addr_data = addr->sa_data, *cplim;
1896
1897	/*
1898	 * AF_LINK addresses can be looked up directly by their index number,
1899	 * so do that if we can.
1900	 */
1901	if (af == AF_LINK) {
1902	    const struct sockaddr_dl *sdl = (const struct sockaddr_dl *)addr;
1903	    if (sdl->sdl_index && sdl->sdl_index <= V_if_index)
1904		return (ifaddr_byindex(sdl->sdl_index));
1905	}
1906
1907	/*
1908	 * Scan though each interface, looking for ones that have addresses
1909	 * in this address family and the requested fib.  Maintain a reference
1910	 * on ifa_maybe once we find one, as we release the IF_ADDR_RLOCK() that
1911	 * kept it stable when we move onto the next interface.
1912	 */
1913	IFNET_RLOCK_NOSLEEP();
1914	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
1915		if ((fibnum != RT_ALL_FIBS) && (ifp->if_fib != fibnum))
1916			continue;
1917		IF_ADDR_RLOCK(ifp);
1918		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1919			const char *cp, *cp2, *cp3;
1920
1921			if (ifa->ifa_addr->sa_family != af)
1922next:				continue;
1923			if (af == AF_INET &&
1924			    ifp->if_flags & IFF_POINTOPOINT && !ignore_ptp) {
1925				/*
1926				 * This is a bit broken as it doesn't
1927				 * take into account that the remote end may
1928				 * be a single node in the network we are
1929				 * looking for.
1930				 * The trouble is that we don't know the
1931				 * netmask for the remote end.
1932				 */
1933				if (ifa->ifa_dstaddr != NULL &&
1934				    sa_equal(addr, ifa->ifa_dstaddr)) {
1935					ifa_ref(ifa);
1936					IF_ADDR_RUNLOCK(ifp);
1937					goto done;
1938				}
1939			} else {
1940				/*
1941				 * Scan all the bits in the ifa's address.
1942				 * If a bit dissagrees with what we are
1943				 * looking for, mask it with the netmask
1944				 * to see if it really matters.
1945				 * (A byte at a time)
1946				 */
1947				if (ifa->ifa_netmask == 0)
1948					continue;
1949				cp = addr_data;
1950				cp2 = ifa->ifa_addr->sa_data;
1951				cp3 = ifa->ifa_netmask->sa_data;
1952				cplim = ifa->ifa_netmask->sa_len
1953					+ (char *)ifa->ifa_netmask;
1954				while (cp3 < cplim)
1955					if ((*cp++ ^ *cp2++) & *cp3++)
1956						goto next; /* next address! */
1957				/*
1958				 * If the netmask of what we just found
1959				 * is more specific than what we had before
1960				 * (if we had one), or if the virtual status
1961				 * of new prefix is better than of the old one,
1962				 * then remember the new one before continuing
1963				 * to search for an even better one.
1964				 */
1965				if (ifa_maybe == NULL ||
1966				    ifa_preferred(ifa_maybe, ifa) ||
1967				    rn_refines((caddr_t)ifa->ifa_netmask,
1968				    (caddr_t)ifa_maybe->ifa_netmask)) {
1969					if (ifa_maybe != NULL)
1970						ifa_free(ifa_maybe);
1971					ifa_maybe = ifa;
1972					ifa_ref(ifa_maybe);
1973				}
1974			}
1975		}
1976		IF_ADDR_RUNLOCK(ifp);
1977	}
1978	ifa = ifa_maybe;
1979	ifa_maybe = NULL;
1980done:
1981	IFNET_RUNLOCK_NOSLEEP();
1982	if (ifa_maybe != NULL)
1983		ifa_free(ifa_maybe);
1984	return (ifa);
1985}
1986
1987/*
1988 * Find an interface address specific to an interface best matching
1989 * a given address.
1990 */
1991struct ifaddr *
1992ifaof_ifpforaddr(const struct sockaddr *addr, struct ifnet *ifp)
1993{
1994	struct ifaddr *ifa;
1995	const char *cp, *cp2, *cp3;
1996	char *cplim;
1997	struct ifaddr *ifa_maybe = NULL;
1998	u_int af = addr->sa_family;
1999
2000	if (af >= AF_MAX)
2001		return (NULL);
2002	IF_ADDR_RLOCK(ifp);
2003	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
2004		if (ifa->ifa_addr->sa_family != af)
2005			continue;
2006		if (ifa_maybe == NULL)
2007			ifa_maybe = ifa;
2008		if (ifa->ifa_netmask == 0) {
2009			if (sa_equal(addr, ifa->ifa_addr) ||
2010			    (ifa->ifa_dstaddr &&
2011			    sa_equal(addr, ifa->ifa_dstaddr)))
2012				goto done;
2013			continue;
2014		}
2015		if (ifp->if_flags & IFF_POINTOPOINT) {
2016			if (sa_equal(addr, ifa->ifa_dstaddr))
2017				goto done;
2018		} else {
2019			cp = addr->sa_data;
2020			cp2 = ifa->ifa_addr->sa_data;
2021			cp3 = ifa->ifa_netmask->sa_data;
2022			cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask;
2023			for (; cp3 < cplim; cp3++)
2024				if ((*cp++ ^ *cp2++) & *cp3)
2025					break;
2026			if (cp3 == cplim)
2027				goto done;
2028		}
2029	}
2030	ifa = ifa_maybe;
2031done:
2032	if (ifa != NULL)
2033		ifa_ref(ifa);
2034	IF_ADDR_RUNLOCK(ifp);
2035	return (ifa);
2036}
2037
2038/*
2039 * See whether new ifa is better than current one:
2040 * 1) A non-virtual one is preferred over virtual.
2041 * 2) A virtual in master state preferred over any other state.
2042 *
2043 * Used in several address selecting functions.
2044 */
2045int
2046ifa_preferred(struct ifaddr *cur, struct ifaddr *next)
2047{
2048
2049	return (cur->ifa_carp && (!next->ifa_carp ||
2050	    ((*carp_master_p)(next) && !(*carp_master_p)(cur))));
2051}
2052
2053#include <net/if_llatbl.h>
2054
2055/*
2056 * Default action when installing a route with a Link Level gateway.
2057 * Lookup an appropriate real ifa to point to.
2058 * This should be moved to /sys/net/link.c eventually.
2059 */
2060static void
2061link_rtrequest(int cmd, struct rtentry *rt, struct rt_addrinfo *info)
2062{
2063	struct ifaddr *ifa, *oifa;
2064	struct sockaddr *dst;
2065	struct ifnet *ifp;
2066
2067	if (cmd != RTM_ADD || ((ifa = rt->rt_ifa) == NULL) ||
2068	    ((ifp = ifa->ifa_ifp) == NULL) || ((dst = rt_key(rt)) == NULL))
2069		return;
2070	ifa = ifaof_ifpforaddr(dst, ifp);
2071	if (ifa) {
2072		oifa = rt->rt_ifa;
2073		rt->rt_ifa = ifa;
2074		ifa_free(oifa);
2075		if (ifa->ifa_rtrequest && ifa->ifa_rtrequest != link_rtrequest)
2076			ifa->ifa_rtrequest(cmd, rt, info);
2077	}
2078}
2079
2080struct sockaddr_dl *
2081link_alloc_sdl(size_t size, int flags)
2082{
2083
2084	return (malloc(size, M_TEMP, flags));
2085}
2086
2087void
2088link_free_sdl(struct sockaddr *sa)
2089{
2090	free(sa, M_TEMP);
2091}
2092
2093/*
2094 * Fills in given sdl with interface basic info.
2095 * Returns pointer to filled sdl.
2096 */
2097struct sockaddr_dl *
2098link_init_sdl(struct ifnet *ifp, struct sockaddr *paddr, u_char iftype)
2099{
2100	struct sockaddr_dl *sdl;
2101
2102	sdl = (struct sockaddr_dl *)paddr;
2103	memset(sdl, 0, sizeof(struct sockaddr_dl));
2104	sdl->sdl_len = sizeof(struct sockaddr_dl);
2105	sdl->sdl_family = AF_LINK;
2106	sdl->sdl_index = ifp->if_index;
2107	sdl->sdl_type = iftype;
2108
2109	return (sdl);
2110}
2111
2112/*
2113 * Mark an interface down and notify protocols of
2114 * the transition.
2115 */
2116static void
2117if_unroute(struct ifnet *ifp, int flag, int fam)
2118{
2119	struct ifaddr *ifa;
2120
2121	KASSERT(flag == IFF_UP, ("if_unroute: flag != IFF_UP"));
2122
2123	ifp->if_flags &= ~flag;
2124	getmicrotime(&ifp->if_lastchange);
2125	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
2126		if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family))
2127			pfctlinput(PRC_IFDOWN, ifa->ifa_addr);
2128	ifp->if_qflush(ifp);
2129
2130	if (ifp->if_carp)
2131		(*carp_linkstate_p)(ifp);
2132	rt_ifmsg(ifp);
2133}
2134
2135/*
2136 * Mark an interface up and notify protocols of
2137 * the transition.
2138 */
2139static void
2140if_route(struct ifnet *ifp, int flag, int fam)
2141{
2142	struct ifaddr *ifa;
2143
2144	KASSERT(flag == IFF_UP, ("if_route: flag != IFF_UP"));
2145
2146	ifp->if_flags |= flag;
2147	getmicrotime(&ifp->if_lastchange);
2148	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
2149		if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family))
2150			pfctlinput(PRC_IFUP, ifa->ifa_addr);
2151	if (ifp->if_carp)
2152		(*carp_linkstate_p)(ifp);
2153	rt_ifmsg(ifp);
2154#ifdef INET6
2155	in6_if_up(ifp);
2156#endif
2157}
2158
2159void	(*vlan_link_state_p)(struct ifnet *);	/* XXX: private from if_vlan */
2160void	(*vlan_trunk_cap_p)(struct ifnet *);		/* XXX: private from if_vlan */
2161struct ifnet *(*vlan_trunkdev_p)(struct ifnet *);
2162struct	ifnet *(*vlan_devat_p)(struct ifnet *, uint16_t);
2163int	(*vlan_tag_p)(struct ifnet *, uint16_t *);
2164int	(*vlan_setcookie_p)(struct ifnet *, void *);
2165void	*(*vlan_cookie_p)(struct ifnet *);
2166
2167/*
2168 * Handle a change in the interface link state. To avoid LORs
2169 * between driver lock and upper layer locks, as well as possible
2170 * recursions, we post event to taskqueue, and all job
2171 * is done in static do_link_state_change().
2172 */
2173void
2174if_link_state_change(struct ifnet *ifp, int link_state)
2175{
2176	/* Return if state hasn't changed. */
2177	if (ifp->if_link_state == link_state)
2178		return;
2179
2180	ifp->if_link_state = link_state;
2181
2182	taskqueue_enqueue(taskqueue_swi, &ifp->if_linktask);
2183}
2184
2185static void
2186do_link_state_change(void *arg, int pending)
2187{
2188	struct ifnet *ifp = (struct ifnet *)arg;
2189	int link_state = ifp->if_link_state;
2190	CURVNET_SET(ifp->if_vnet);
2191
2192	/* Notify that the link state has changed. */
2193	rt_ifmsg(ifp);
2194	if (ifp->if_vlantrunk != NULL)
2195		(*vlan_link_state_p)(ifp);
2196
2197	if ((ifp->if_type == IFT_ETHER || ifp->if_type == IFT_L2VLAN) &&
2198	    ifp->if_l2com != NULL)
2199		(*ng_ether_link_state_p)(ifp, link_state);
2200	if (ifp->if_carp)
2201		(*carp_linkstate_p)(ifp);
2202	if (ifp->if_bridge)
2203		(*bridge_linkstate_p)(ifp);
2204	if (ifp->if_lagg)
2205		(*lagg_linkstate_p)(ifp, link_state);
2206
2207	if (IS_DEFAULT_VNET(curvnet))
2208		devctl_notify("IFNET", ifp->if_xname,
2209		    (link_state == LINK_STATE_UP) ? "LINK_UP" : "LINK_DOWN",
2210		    NULL);
2211	if (pending > 1)
2212		if_printf(ifp, "%d link states coalesced\n", pending);
2213	if (log_link_state_change)
2214		log(LOG_NOTICE, "%s: link state changed to %s\n", ifp->if_xname,
2215		    (link_state == LINK_STATE_UP) ? "UP" : "DOWN" );
2216	EVENTHANDLER_INVOKE(ifnet_link_event, ifp, link_state);
2217	CURVNET_RESTORE();
2218}
2219
2220/*
2221 * Mark an interface down and notify protocols of
2222 * the transition.
2223 */
2224void
2225if_down(struct ifnet *ifp)
2226{
2227
2228	EVENTHANDLER_INVOKE(ifnet_event, ifp, IFNET_EVENT_DOWN);
2229	if_unroute(ifp, IFF_UP, AF_UNSPEC);
2230}
2231
2232/*
2233 * Mark an interface up and notify protocols of
2234 * the transition.
2235 */
2236void
2237if_up(struct ifnet *ifp)
2238{
2239
2240	if_route(ifp, IFF_UP, AF_UNSPEC);
2241	EVENTHANDLER_INVOKE(ifnet_event, ifp, IFNET_EVENT_UP);
2242}
2243
2244/*
2245 * Flush an interface queue.
2246 */
2247void
2248if_qflush(struct ifnet *ifp)
2249{
2250	struct mbuf *m, *n;
2251	struct ifaltq *ifq;
2252
2253	ifq = &ifp->if_snd;
2254	IFQ_LOCK(ifq);
2255#ifdef ALTQ
2256	if (ALTQ_IS_ENABLED(ifq))
2257		ALTQ_PURGE(ifq);
2258#endif
2259	n = ifq->ifq_head;
2260	while ((m = n) != NULL) {
2261		n = m->m_nextpkt;
2262		m_freem(m);
2263	}
2264	ifq->ifq_head = 0;
2265	ifq->ifq_tail = 0;
2266	ifq->ifq_len = 0;
2267	IFQ_UNLOCK(ifq);
2268}
2269
2270/*
2271 * Map interface name to interface structure pointer, with or without
2272 * returning a reference.
2273 */
2274struct ifnet *
2275ifunit_ref(const char *name)
2276{
2277	struct ifnet *ifp;
2278
2279	IFNET_RLOCK_NOSLEEP();
2280	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
2281		if (strncmp(name, ifp->if_xname, IFNAMSIZ) == 0 &&
2282		    !(ifp->if_flags & IFF_DYING))
2283			break;
2284	}
2285	if (ifp != NULL)
2286		if_ref(ifp);
2287	IFNET_RUNLOCK_NOSLEEP();
2288	return (ifp);
2289}
2290
2291struct ifnet *
2292ifunit(const char *name)
2293{
2294	struct ifnet *ifp;
2295
2296	IFNET_RLOCK_NOSLEEP();
2297	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
2298		if (strncmp(name, ifp->if_xname, IFNAMSIZ) == 0)
2299			break;
2300	}
2301	IFNET_RUNLOCK_NOSLEEP();
2302	return (ifp);
2303}
2304
2305/*
2306 * Hardware specific interface ioctls.
2307 */
2308static int
2309ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
2310{
2311	struct ifreq *ifr;
2312	int error = 0, do_ifup = 0;
2313	int new_flags, temp_flags;
2314	size_t namelen, onamelen;
2315	size_t descrlen;
2316	char *descrbuf, *odescrbuf;
2317	char new_name[IFNAMSIZ];
2318	struct ifaddr *ifa;
2319	struct sockaddr_dl *sdl;
2320
2321	ifr = (struct ifreq *)data;
2322	switch (cmd) {
2323	case SIOCGIFINDEX:
2324		ifr->ifr_index = ifp->if_index;
2325		break;
2326
2327	case SIOCGIFFLAGS:
2328		temp_flags = ifp->if_flags | ifp->if_drv_flags;
2329		ifr->ifr_flags = temp_flags & 0xffff;
2330		ifr->ifr_flagshigh = temp_flags >> 16;
2331		break;
2332
2333	case SIOCGIFCAP:
2334		ifr->ifr_reqcap = ifp->if_capabilities;
2335		ifr->ifr_curcap = ifp->if_capenable;
2336		break;
2337
2338#ifdef MAC
2339	case SIOCGIFMAC:
2340		error = mac_ifnet_ioctl_get(td->td_ucred, ifr, ifp);
2341		break;
2342#endif
2343
2344	case SIOCGIFMETRIC:
2345		ifr->ifr_metric = ifp->if_metric;
2346		break;
2347
2348	case SIOCGIFMTU:
2349		ifr->ifr_mtu = ifp->if_mtu;
2350		break;
2351
2352	case SIOCGIFPHYS:
2353		/* XXXGL: did this ever worked? */
2354		ifr->ifr_phys = 0;
2355		break;
2356
2357	case SIOCGIFDESCR:
2358		error = 0;
2359		sx_slock(&ifdescr_sx);
2360		if (ifp->if_description == NULL)
2361			error = ENOMSG;
2362		else {
2363			/* space for terminating nul */
2364			descrlen = strlen(ifp->if_description) + 1;
2365			if (ifr->ifr_buffer.length < descrlen)
2366				ifr->ifr_buffer.buffer = NULL;
2367			else
2368				error = copyout(ifp->if_description,
2369				    ifr->ifr_buffer.buffer, descrlen);
2370			ifr->ifr_buffer.length = descrlen;
2371		}
2372		sx_sunlock(&ifdescr_sx);
2373		break;
2374
2375	case SIOCSIFDESCR:
2376		error = priv_check(td, PRIV_NET_SETIFDESCR);
2377		if (error)
2378			return (error);
2379
2380		/*
2381		 * Copy only (length-1) bytes to make sure that
2382		 * if_description is always nul terminated.  The
2383		 * length parameter is supposed to count the
2384		 * terminating nul in.
2385		 */
2386		if (ifr->ifr_buffer.length > ifdescr_maxlen)
2387			return (ENAMETOOLONG);
2388		else if (ifr->ifr_buffer.length == 0)
2389			descrbuf = NULL;
2390		else {
2391			descrbuf = malloc(ifr->ifr_buffer.length, M_IFDESCR,
2392			    M_WAITOK | M_ZERO);
2393			error = copyin(ifr->ifr_buffer.buffer, descrbuf,
2394			    ifr->ifr_buffer.length - 1);
2395			if (error) {
2396				free(descrbuf, M_IFDESCR);
2397				break;
2398			}
2399		}
2400
2401		sx_xlock(&ifdescr_sx);
2402		odescrbuf = ifp->if_description;
2403		ifp->if_description = descrbuf;
2404		sx_xunlock(&ifdescr_sx);
2405
2406		getmicrotime(&ifp->if_lastchange);
2407		free(odescrbuf, M_IFDESCR);
2408		break;
2409
2410	case SIOCGIFFIB:
2411		ifr->ifr_fib = ifp->if_fib;
2412		break;
2413
2414	case SIOCSIFFIB:
2415		error = priv_check(td, PRIV_NET_SETIFFIB);
2416		if (error)
2417			return (error);
2418		if (ifr->ifr_fib >= rt_numfibs)
2419			return (EINVAL);
2420
2421		ifp->if_fib = ifr->ifr_fib;
2422		break;
2423
2424	case SIOCSIFFLAGS:
2425		error = priv_check(td, PRIV_NET_SETIFFLAGS);
2426		if (error)
2427			return (error);
2428		/*
2429		 * Currently, no driver owned flags pass the IFF_CANTCHANGE
2430		 * check, so we don't need special handling here yet.
2431		 */
2432		new_flags = (ifr->ifr_flags & 0xffff) |
2433		    (ifr->ifr_flagshigh << 16);
2434		if (ifp->if_flags & IFF_UP &&
2435		    (new_flags & IFF_UP) == 0) {
2436			if_down(ifp);
2437		} else if (new_flags & IFF_UP &&
2438		    (ifp->if_flags & IFF_UP) == 0) {
2439			do_ifup = 1;
2440		}
2441		/* See if permanently promiscuous mode bit is about to flip */
2442		if ((ifp->if_flags ^ new_flags) & IFF_PPROMISC) {
2443			if (new_flags & IFF_PPROMISC)
2444				ifp->if_flags |= IFF_PROMISC;
2445			else if (ifp->if_pcount == 0)
2446				ifp->if_flags &= ~IFF_PROMISC;
2447			if (log_promisc_mode_change)
2448                                log(LOG_INFO, "%s: permanently promiscuous mode %s\n",
2449                                    ifp->if_xname,
2450                                    ((new_flags & IFF_PPROMISC) ?
2451                                     "enabled" : "disabled"));
2452		}
2453		ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) |
2454			(new_flags &~ IFF_CANTCHANGE);
2455		if (ifp->if_ioctl) {
2456			(void) (*ifp->if_ioctl)(ifp, cmd, data);
2457		}
2458		if (do_ifup)
2459			if_up(ifp);
2460		getmicrotime(&ifp->if_lastchange);
2461		break;
2462
2463	case SIOCSIFCAP:
2464		error = priv_check(td, PRIV_NET_SETIFCAP);
2465		if (error)
2466			return (error);
2467		if (ifp->if_ioctl == NULL)
2468			return (EOPNOTSUPP);
2469		if (ifr->ifr_reqcap & ~ifp->if_capabilities)
2470			return (EINVAL);
2471		error = (*ifp->if_ioctl)(ifp, cmd, data);
2472		if (error == 0)
2473			getmicrotime(&ifp->if_lastchange);
2474		break;
2475
2476#ifdef MAC
2477	case SIOCSIFMAC:
2478		error = mac_ifnet_ioctl_set(td->td_ucred, ifr, ifp);
2479		break;
2480#endif
2481
2482	case SIOCSIFNAME:
2483		error = priv_check(td, PRIV_NET_SETIFNAME);
2484		if (error)
2485			return (error);
2486		error = copyinstr(ifr->ifr_data, new_name, IFNAMSIZ, NULL);
2487		if (error != 0)
2488			return (error);
2489		if (new_name[0] == '\0')
2490			return (EINVAL);
2491		if (new_name[IFNAMSIZ-1] != '\0') {
2492			new_name[IFNAMSIZ-1] = '\0';
2493			if (strlen(new_name) == IFNAMSIZ-1)
2494				return (EINVAL);
2495		}
2496		if (ifunit(new_name) != NULL)
2497			return (EEXIST);
2498
2499		/*
2500		 * XXX: Locking.  Nothing else seems to lock if_flags,
2501		 * and there are numerous other races with the
2502		 * ifunit() checks not being atomic with namespace
2503		 * changes (renames, vmoves, if_attach, etc).
2504		 */
2505		ifp->if_flags |= IFF_RENAMING;
2506
2507		/* Announce the departure of the interface. */
2508		rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
2509		EVENTHANDLER_INVOKE(ifnet_departure_event, ifp);
2510
2511		log(LOG_INFO, "%s: changing name to '%s'\n",
2512		    ifp->if_xname, new_name);
2513
2514		IF_ADDR_WLOCK(ifp);
2515		strlcpy(ifp->if_xname, new_name, sizeof(ifp->if_xname));
2516		ifa = ifp->if_addr;
2517		sdl = (struct sockaddr_dl *)ifa->ifa_addr;
2518		namelen = strlen(new_name);
2519		onamelen = sdl->sdl_nlen;
2520		/*
2521		 * Move the address if needed.  This is safe because we
2522		 * allocate space for a name of length IFNAMSIZ when we
2523		 * create this in if_attach().
2524		 */
2525		if (namelen != onamelen) {
2526			bcopy(sdl->sdl_data + onamelen,
2527			    sdl->sdl_data + namelen, sdl->sdl_alen);
2528		}
2529		bcopy(new_name, sdl->sdl_data, namelen);
2530		sdl->sdl_nlen = namelen;
2531		sdl = (struct sockaddr_dl *)ifa->ifa_netmask;
2532		bzero(sdl->sdl_data, onamelen);
2533		while (namelen != 0)
2534			sdl->sdl_data[--namelen] = 0xff;
2535		IF_ADDR_WUNLOCK(ifp);
2536
2537		EVENTHANDLER_INVOKE(ifnet_arrival_event, ifp);
2538		/* Announce the return of the interface. */
2539		rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
2540
2541		ifp->if_flags &= ~IFF_RENAMING;
2542		break;
2543
2544#ifdef VIMAGE
2545	case SIOCSIFVNET:
2546		error = priv_check(td, PRIV_NET_SETIFVNET);
2547		if (error)
2548			return (error);
2549		error = if_vmove_loan(td, ifp, ifr->ifr_name, ifr->ifr_jid);
2550		break;
2551#endif
2552
2553	case SIOCSIFMETRIC:
2554		error = priv_check(td, PRIV_NET_SETIFMETRIC);
2555		if (error)
2556			return (error);
2557		ifp->if_metric = ifr->ifr_metric;
2558		getmicrotime(&ifp->if_lastchange);
2559		break;
2560
2561	case SIOCSIFPHYS:
2562		error = priv_check(td, PRIV_NET_SETIFPHYS);
2563		if (error)
2564			return (error);
2565		if (ifp->if_ioctl == NULL)
2566			return (EOPNOTSUPP);
2567		error = (*ifp->if_ioctl)(ifp, cmd, data);
2568		if (error == 0)
2569			getmicrotime(&ifp->if_lastchange);
2570		break;
2571
2572	case SIOCSIFMTU:
2573	{
2574		u_long oldmtu = ifp->if_mtu;
2575
2576		error = priv_check(td, PRIV_NET_SETIFMTU);
2577		if (error)
2578			return (error);
2579		if (ifr->ifr_mtu < IF_MINMTU || ifr->ifr_mtu > IF_MAXMTU)
2580			return (EINVAL);
2581		if (ifp->if_ioctl == NULL)
2582			return (EOPNOTSUPP);
2583		error = (*ifp->if_ioctl)(ifp, cmd, data);
2584		if (error == 0) {
2585			getmicrotime(&ifp->if_lastchange);
2586			rt_ifmsg(ifp);
2587		}
2588		/*
2589		 * If the link MTU changed, do network layer specific procedure.
2590		 */
2591		if (ifp->if_mtu != oldmtu) {
2592#ifdef INET6
2593			nd6_setmtu(ifp);
2594#endif
2595			rt_updatemtu(ifp);
2596		}
2597		break;
2598	}
2599
2600	case SIOCADDMULTI:
2601	case SIOCDELMULTI:
2602		if (cmd == SIOCADDMULTI)
2603			error = priv_check(td, PRIV_NET_ADDMULTI);
2604		else
2605			error = priv_check(td, PRIV_NET_DELMULTI);
2606		if (error)
2607			return (error);
2608
2609		/* Don't allow group membership on non-multicast interfaces. */
2610		if ((ifp->if_flags & IFF_MULTICAST) == 0)
2611			return (EOPNOTSUPP);
2612
2613		/* Don't let users screw up protocols' entries. */
2614		if (ifr->ifr_addr.sa_family != AF_LINK)
2615			return (EINVAL);
2616
2617		if (cmd == SIOCADDMULTI) {
2618			struct ifmultiaddr *ifma;
2619
2620			/*
2621			 * Userland is only permitted to join groups once
2622			 * via the if_addmulti() KPI, because it cannot hold
2623			 * struct ifmultiaddr * between calls. It may also
2624			 * lose a race while we check if the membership
2625			 * already exists.
2626			 */
2627			IF_ADDR_RLOCK(ifp);
2628			ifma = if_findmulti(ifp, &ifr->ifr_addr);
2629			IF_ADDR_RUNLOCK(ifp);
2630			if (ifma != NULL)
2631				error = EADDRINUSE;
2632			else
2633				error = if_addmulti(ifp, &ifr->ifr_addr, &ifma);
2634		} else {
2635			error = if_delmulti(ifp, &ifr->ifr_addr);
2636		}
2637		if (error == 0)
2638			getmicrotime(&ifp->if_lastchange);
2639		break;
2640
2641	case SIOCSIFPHYADDR:
2642	case SIOCDIFPHYADDR:
2643#ifdef INET6
2644	case SIOCSIFPHYADDR_IN6:
2645#endif
2646	case SIOCSIFMEDIA:
2647	case SIOCSIFGENERIC:
2648		error = priv_check(td, PRIV_NET_HWIOCTL);
2649		if (error)
2650			return (error);
2651		if (ifp->if_ioctl == NULL)
2652			return (EOPNOTSUPP);
2653		error = (*ifp->if_ioctl)(ifp, cmd, data);
2654		if (error == 0)
2655			getmicrotime(&ifp->if_lastchange);
2656		break;
2657
2658	case SIOCGIFSTATUS:
2659	case SIOCGIFPSRCADDR:
2660	case SIOCGIFPDSTADDR:
2661	case SIOCGIFMEDIA:
2662	case SIOCGIFXMEDIA:
2663	case SIOCGIFGENERIC:
2664		if (ifp->if_ioctl == NULL)
2665			return (EOPNOTSUPP);
2666		error = (*ifp->if_ioctl)(ifp, cmd, data);
2667		break;
2668
2669	case SIOCSIFLLADDR:
2670		error = priv_check(td, PRIV_NET_SETLLADDR);
2671		if (error)
2672			return (error);
2673		error = if_setlladdr(ifp,
2674		    ifr->ifr_addr.sa_data, ifr->ifr_addr.sa_len);
2675		break;
2676
2677	case SIOCGHWADDR:
2678		error = if_gethwaddr(ifp, ifr);
2679		break;
2680
2681	case SIOCAIFGROUP:
2682	{
2683		struct ifgroupreq *ifgr = (struct ifgroupreq *)ifr;
2684
2685		error = priv_check(td, PRIV_NET_ADDIFGROUP);
2686		if (error)
2687			return (error);
2688		if ((error = if_addgroup(ifp, ifgr->ifgr_group)))
2689			return (error);
2690		break;
2691	}
2692
2693	case SIOCGIFGROUP:
2694		if ((error = if_getgroup((struct ifgroupreq *)ifr, ifp)))
2695			return (error);
2696		break;
2697
2698	case SIOCDIFGROUP:
2699	{
2700		struct ifgroupreq *ifgr = (struct ifgroupreq *)ifr;
2701
2702		error = priv_check(td, PRIV_NET_DELIFGROUP);
2703		if (error)
2704			return (error);
2705		if ((error = if_delgroup(ifp, ifgr->ifgr_group)))
2706			return (error);
2707		break;
2708	}
2709
2710	default:
2711		error = ENOIOCTL;
2712		break;
2713	}
2714	return (error);
2715}
2716
2717/* COMPAT_SVR4 */
2718#define	OSIOCGIFCONF	_IOWR('i', 20, struct ifconf)
2719
2720#ifdef COMPAT_FREEBSD32
2721struct ifconf32 {
2722	int32_t	ifc_len;
2723	union {
2724		uint32_t	ifcu_buf;
2725		uint32_t	ifcu_req;
2726	} ifc_ifcu;
2727};
2728#define	SIOCGIFCONF32	_IOWR('i', 36, struct ifconf32)
2729#endif
2730
2731/*
2732 * Interface ioctls.
2733 */
2734int
2735ifioctl(struct socket *so, u_long cmd, caddr_t data, struct thread *td)
2736{
2737	struct ifnet *ifp;
2738	struct ifreq *ifr;
2739	int error;
2740	int oif_flags;
2741#ifdef VIMAGE
2742	int shutdown;
2743#endif
2744
2745	CURVNET_SET(so->so_vnet);
2746#ifdef VIMAGE
2747	/* Make sure the VNET is stable. */
2748	shutdown = (so->so_vnet->vnet_state > SI_SUB_VNET &&
2749		 so->so_vnet->vnet_state < SI_SUB_VNET_DONE) ? 1 : 0;
2750	if (shutdown) {
2751		CURVNET_RESTORE();
2752		return (EBUSY);
2753	}
2754#endif
2755
2756
2757	switch (cmd) {
2758	case SIOCGIFCONF:
2759	case OSIOCGIFCONF:	/* COMPAT_SVR4 */
2760		error = ifconf(cmd, data);
2761		CURVNET_RESTORE();
2762		return (error);
2763
2764#ifdef COMPAT_FREEBSD32
2765	case SIOCGIFCONF32:
2766		{
2767			struct ifconf32 *ifc32;
2768			struct ifconf ifc;
2769
2770			ifc32 = (struct ifconf32 *)data;
2771			ifc.ifc_len = ifc32->ifc_len;
2772			ifc.ifc_buf = PTRIN(ifc32->ifc_buf);
2773
2774			error = ifconf(SIOCGIFCONF, (void *)&ifc);
2775			CURVNET_RESTORE();
2776			if (error == 0)
2777				ifc32->ifc_len = ifc.ifc_len;
2778			return (error);
2779		}
2780#endif
2781	}
2782	ifr = (struct ifreq *)data;
2783
2784	switch (cmd) {
2785#ifdef VIMAGE
2786	case SIOCSIFRVNET:
2787		error = priv_check(td, PRIV_NET_SETIFVNET);
2788		if (error == 0)
2789			error = if_vmove_reclaim(td, ifr->ifr_name,
2790			    ifr->ifr_jid);
2791		CURVNET_RESTORE();
2792		return (error);
2793#endif
2794	case SIOCIFCREATE:
2795	case SIOCIFCREATE2:
2796		error = priv_check(td, PRIV_NET_IFCREATE);
2797		if (error == 0)
2798			error = if_clone_create(ifr->ifr_name,
2799			    sizeof(ifr->ifr_name),
2800			    cmd == SIOCIFCREATE2 ? ifr->ifr_data : NULL);
2801		CURVNET_RESTORE();
2802		return (error);
2803	case SIOCIFDESTROY:
2804		error = priv_check(td, PRIV_NET_IFDESTROY);
2805		if (error == 0)
2806			error = if_clone_destroy(ifr->ifr_name);
2807		CURVNET_RESTORE();
2808		return (error);
2809
2810	case SIOCIFGCLONERS:
2811		error = if_clone_list((struct if_clonereq *)data);
2812		CURVNET_RESTORE();
2813		return (error);
2814	case SIOCGIFGMEMB:
2815		error = if_getgroupmembers((struct ifgroupreq *)data);
2816		CURVNET_RESTORE();
2817		return (error);
2818#if defined(INET) || defined(INET6)
2819	case SIOCSVH:
2820	case SIOCGVH:
2821		if (carp_ioctl_p == NULL)
2822			error = EPROTONOSUPPORT;
2823		else
2824			error = (*carp_ioctl_p)(ifr, cmd, td);
2825		CURVNET_RESTORE();
2826		return (error);
2827#endif
2828	}
2829
2830	ifp = ifunit_ref(ifr->ifr_name);
2831	if (ifp == NULL) {
2832		CURVNET_RESTORE();
2833		return (ENXIO);
2834	}
2835
2836	error = ifhwioctl(cmd, ifp, data, td);
2837	if (error != ENOIOCTL) {
2838		if_rele(ifp);
2839		CURVNET_RESTORE();
2840		return (error);
2841	}
2842
2843	oif_flags = ifp->if_flags;
2844	if (so->so_proto == NULL) {
2845		if_rele(ifp);
2846		CURVNET_RESTORE();
2847		return (EOPNOTSUPP);
2848	}
2849
2850	/*
2851	 * Pass the request on to the socket control method, and if the
2852	 * latter returns EOPNOTSUPP, directly to the interface.
2853	 *
2854	 * Make an exception for the legacy SIOCSIF* requests.  Drivers
2855	 * trust SIOCSIFADDR et al to come from an already privileged
2856	 * layer, and do not perform any credentials checks or input
2857	 * validation.
2858	 */
2859	error = ((*so->so_proto->pr_usrreqs->pru_control)(so, cmd, data,
2860	    ifp, td));
2861	if (error == EOPNOTSUPP && ifp != NULL && ifp->if_ioctl != NULL &&
2862	    cmd != SIOCSIFADDR && cmd != SIOCSIFBRDADDR &&
2863	    cmd != SIOCSIFDSTADDR && cmd != SIOCSIFNETMASK)
2864		error = (*ifp->if_ioctl)(ifp, cmd, data);
2865
2866	if ((oif_flags ^ ifp->if_flags) & IFF_UP) {
2867#ifdef INET6
2868		if (ifp->if_flags & IFF_UP)
2869			in6_if_up(ifp);
2870#endif
2871	}
2872	if_rele(ifp);
2873	CURVNET_RESTORE();
2874	return (error);
2875}
2876
2877/*
2878 * The code common to handling reference counted flags,
2879 * e.g., in ifpromisc() and if_allmulti().
2880 * The "pflag" argument can specify a permanent mode flag to check,
2881 * such as IFF_PPROMISC for promiscuous mode; should be 0 if none.
2882 *
2883 * Only to be used on stack-owned flags, not driver-owned flags.
2884 */
2885static int
2886if_setflag(struct ifnet *ifp, int flag, int pflag, int *refcount, int onswitch)
2887{
2888	struct ifreq ifr;
2889	int error;
2890	int oldflags, oldcount;
2891
2892	/* Sanity checks to catch programming errors */
2893	KASSERT((flag & (IFF_DRV_OACTIVE|IFF_DRV_RUNNING)) == 0,
2894	    ("%s: setting driver-owned flag %d", __func__, flag));
2895
2896	if (onswitch)
2897		KASSERT(*refcount >= 0,
2898		    ("%s: increment negative refcount %d for flag %d",
2899		    __func__, *refcount, flag));
2900	else
2901		KASSERT(*refcount > 0,
2902		    ("%s: decrement non-positive refcount %d for flag %d",
2903		    __func__, *refcount, flag));
2904
2905	/* In case this mode is permanent, just touch refcount */
2906	if (ifp->if_flags & pflag) {
2907		*refcount += onswitch ? 1 : -1;
2908		return (0);
2909	}
2910
2911	/* Save ifnet parameters for if_ioctl() may fail */
2912	oldcount = *refcount;
2913	oldflags = ifp->if_flags;
2914
2915	/*
2916	 * See if we aren't the only and touching refcount is enough.
2917	 * Actually toggle interface flag if we are the first or last.
2918	 */
2919	if (onswitch) {
2920		if ((*refcount)++)
2921			return (0);
2922		ifp->if_flags |= flag;
2923	} else {
2924		if (--(*refcount))
2925			return (0);
2926		ifp->if_flags &= ~flag;
2927	}
2928
2929	/* Call down the driver since we've changed interface flags */
2930	if (ifp->if_ioctl == NULL) {
2931		error = EOPNOTSUPP;
2932		goto recover;
2933	}
2934	ifr.ifr_flags = ifp->if_flags & 0xffff;
2935	ifr.ifr_flagshigh = ifp->if_flags >> 16;
2936	error = (*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
2937	if (error)
2938		goto recover;
2939	/* Notify userland that interface flags have changed */
2940	rt_ifmsg(ifp);
2941	return (0);
2942
2943recover:
2944	/* Recover after driver error */
2945	*refcount = oldcount;
2946	ifp->if_flags = oldflags;
2947	return (error);
2948}
2949
2950/*
2951 * Set/clear promiscuous mode on interface ifp based on the truth value
2952 * of pswitch.  The calls are reference counted so that only the first
2953 * "on" request actually has an effect, as does the final "off" request.
2954 * Results are undefined if the "off" and "on" requests are not matched.
2955 */
2956int
2957ifpromisc(struct ifnet *ifp, int pswitch)
2958{
2959	int error;
2960	int oldflags = ifp->if_flags;
2961
2962	error = if_setflag(ifp, IFF_PROMISC, IFF_PPROMISC,
2963			   &ifp->if_pcount, pswitch);
2964	/* If promiscuous mode status has changed, log a message */
2965	if (error == 0 && ((ifp->if_flags ^ oldflags) & IFF_PROMISC) &&
2966            log_promisc_mode_change)
2967		log(LOG_INFO, "%s: promiscuous mode %s\n",
2968		    ifp->if_xname,
2969		    (ifp->if_flags & IFF_PROMISC) ? "enabled" : "disabled");
2970	return (error);
2971}
2972
2973/*
2974 * Return interface configuration
2975 * of system.  List may be used
2976 * in later ioctl's (above) to get
2977 * other information.
2978 */
2979/*ARGSUSED*/
2980static int
2981ifconf(u_long cmd, caddr_t data)
2982{
2983	struct ifconf *ifc = (struct ifconf *)data;
2984	struct ifnet *ifp;
2985	struct ifaddr *ifa;
2986	struct ifreq ifr;
2987	struct sbuf *sb;
2988	int error, full = 0, valid_len, max_len;
2989
2990	/* Limit initial buffer size to MAXPHYS to avoid DoS from userspace. */
2991	max_len = MAXPHYS - 1;
2992
2993	/* Prevent hostile input from being able to crash the system */
2994	if (ifc->ifc_len <= 0)
2995		return (EINVAL);
2996
2997again:
2998	if (ifc->ifc_len <= max_len) {
2999		max_len = ifc->ifc_len;
3000		full = 1;
3001	}
3002	sb = sbuf_new(NULL, NULL, max_len + 1, SBUF_FIXEDLEN);
3003	max_len = 0;
3004	valid_len = 0;
3005
3006	IFNET_RLOCK();
3007	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
3008		int addrs;
3009
3010		/*
3011		 * Zero the ifr_name buffer to make sure we don't
3012		 * disclose the contents of the stack.
3013		 */
3014		memset(ifr.ifr_name, 0, sizeof(ifr.ifr_name));
3015
3016		if (strlcpy(ifr.ifr_name, ifp->if_xname, sizeof(ifr.ifr_name))
3017		    >= sizeof(ifr.ifr_name)) {
3018			sbuf_delete(sb);
3019			IFNET_RUNLOCK();
3020			return (ENAMETOOLONG);
3021		}
3022
3023		addrs = 0;
3024		IF_ADDR_RLOCK(ifp);
3025		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
3026			struct sockaddr *sa = ifa->ifa_addr;
3027
3028			if (prison_if(curthread->td_ucred, sa) != 0)
3029				continue;
3030			addrs++;
3031			/* COMPAT_SVR4 */
3032			if (cmd == OSIOCGIFCONF) {
3033				struct osockaddr *osa =
3034				    (struct osockaddr *)&ifr.ifr_addr;
3035				ifr.ifr_addr = *sa;
3036				osa->sa_family = sa->sa_family;
3037				sbuf_bcat(sb, &ifr, sizeof(ifr));
3038				max_len += sizeof(ifr);
3039			} else
3040			if (sa->sa_len <= sizeof(*sa)) {
3041				ifr.ifr_addr = *sa;
3042				sbuf_bcat(sb, &ifr, sizeof(ifr));
3043				max_len += sizeof(ifr);
3044			} else {
3045				sbuf_bcat(sb, &ifr,
3046				    offsetof(struct ifreq, ifr_addr));
3047				max_len += offsetof(struct ifreq, ifr_addr);
3048				sbuf_bcat(sb, sa, sa->sa_len);
3049				max_len += sa->sa_len;
3050			}
3051
3052			if (sbuf_error(sb) == 0)
3053				valid_len = sbuf_len(sb);
3054		}
3055		IF_ADDR_RUNLOCK(ifp);
3056		if (addrs == 0) {
3057			bzero((caddr_t)&ifr.ifr_addr, sizeof(ifr.ifr_addr));
3058			sbuf_bcat(sb, &ifr, sizeof(ifr));
3059			max_len += sizeof(ifr);
3060
3061			if (sbuf_error(sb) == 0)
3062				valid_len = sbuf_len(sb);
3063		}
3064	}
3065	IFNET_RUNLOCK();
3066
3067	/*
3068	 * If we didn't allocate enough space (uncommon), try again.  If
3069	 * we have already allocated as much space as we are allowed,
3070	 * return what we've got.
3071	 */
3072	if (valid_len != max_len && !full) {
3073		sbuf_delete(sb);
3074		goto again;
3075	}
3076
3077	ifc->ifc_len = valid_len;
3078	sbuf_finish(sb);
3079	error = copyout(sbuf_data(sb), ifc->ifc_req, ifc->ifc_len);
3080	sbuf_delete(sb);
3081	return (error);
3082}
3083
3084/*
3085 * Just like ifpromisc(), but for all-multicast-reception mode.
3086 */
3087int
3088if_allmulti(struct ifnet *ifp, int onswitch)
3089{
3090
3091	return (if_setflag(ifp, IFF_ALLMULTI, 0, &ifp->if_amcount, onswitch));
3092}
3093
3094struct ifmultiaddr *
3095if_findmulti(struct ifnet *ifp, const struct sockaddr *sa)
3096{
3097	struct ifmultiaddr *ifma;
3098
3099	IF_ADDR_LOCK_ASSERT(ifp);
3100
3101	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
3102		if (sa->sa_family == AF_LINK) {
3103			if (sa_dl_equal(ifma->ifma_addr, sa))
3104				break;
3105		} else {
3106			if (sa_equal(ifma->ifma_addr, sa))
3107				break;
3108		}
3109	}
3110
3111	return ifma;
3112}
3113
3114/*
3115 * Allocate a new ifmultiaddr and initialize based on passed arguments.  We
3116 * make copies of passed sockaddrs.  The ifmultiaddr will not be added to
3117 * the ifnet multicast address list here, so the caller must do that and
3118 * other setup work (such as notifying the device driver).  The reference
3119 * count is initialized to 1.
3120 */
3121static struct ifmultiaddr *
3122if_allocmulti(struct ifnet *ifp, struct sockaddr *sa, struct sockaddr *llsa,
3123    int mflags)
3124{
3125	struct ifmultiaddr *ifma;
3126	struct sockaddr *dupsa;
3127
3128	ifma = malloc(sizeof *ifma, M_IFMADDR, mflags |
3129	    M_ZERO);
3130	if (ifma == NULL)
3131		return (NULL);
3132
3133	dupsa = malloc(sa->sa_len, M_IFMADDR, mflags);
3134	if (dupsa == NULL) {
3135		free(ifma, M_IFMADDR);
3136		return (NULL);
3137	}
3138	bcopy(sa, dupsa, sa->sa_len);
3139	ifma->ifma_addr = dupsa;
3140
3141	ifma->ifma_ifp = ifp;
3142	ifma->ifma_refcount = 1;
3143	ifma->ifma_protospec = NULL;
3144
3145	if (llsa == NULL) {
3146		ifma->ifma_lladdr = NULL;
3147		return (ifma);
3148	}
3149
3150	dupsa = malloc(llsa->sa_len, M_IFMADDR, mflags);
3151	if (dupsa == NULL) {
3152		free(ifma->ifma_addr, M_IFMADDR);
3153		free(ifma, M_IFMADDR);
3154		return (NULL);
3155	}
3156	bcopy(llsa, dupsa, llsa->sa_len);
3157	ifma->ifma_lladdr = dupsa;
3158
3159	return (ifma);
3160}
3161
3162/*
3163 * if_freemulti: free ifmultiaddr structure and possibly attached related
3164 * addresses.  The caller is responsible for implementing reference
3165 * counting, notifying the driver, handling routing messages, and releasing
3166 * any dependent link layer state.
3167 */
3168static void
3169if_freemulti(struct ifmultiaddr *ifma)
3170{
3171
3172	KASSERT(ifma->ifma_refcount == 0, ("if_freemulti: refcount %d",
3173	    ifma->ifma_refcount));
3174
3175	if (ifma->ifma_lladdr != NULL)
3176		free(ifma->ifma_lladdr, M_IFMADDR);
3177	free(ifma->ifma_addr, M_IFMADDR);
3178	free(ifma, M_IFMADDR);
3179}
3180
3181/*
3182 * Register an additional multicast address with a network interface.
3183 *
3184 * - If the address is already present, bump the reference count on the
3185 *   address and return.
3186 * - If the address is not link-layer, look up a link layer address.
3187 * - Allocate address structures for one or both addresses, and attach to the
3188 *   multicast address list on the interface.  If automatically adding a link
3189 *   layer address, the protocol address will own a reference to the link
3190 *   layer address, to be freed when it is freed.
3191 * - Notify the network device driver of an addition to the multicast address
3192 *   list.
3193 *
3194 * 'sa' points to caller-owned memory with the desired multicast address.
3195 *
3196 * 'retifma' will be used to return a pointer to the resulting multicast
3197 * address reference, if desired.
3198 */
3199int
3200if_addmulti(struct ifnet *ifp, struct sockaddr *sa,
3201    struct ifmultiaddr **retifma)
3202{
3203	struct ifmultiaddr *ifma, *ll_ifma;
3204	struct sockaddr *llsa;
3205	struct sockaddr_dl sdl;
3206	int error;
3207
3208	/*
3209	 * If the address is already present, return a new reference to it;
3210	 * otherwise, allocate storage and set up a new address.
3211	 */
3212	IF_ADDR_WLOCK(ifp);
3213	ifma = if_findmulti(ifp, sa);
3214	if (ifma != NULL) {
3215		ifma->ifma_refcount++;
3216		if (retifma != NULL)
3217			*retifma = ifma;
3218		IF_ADDR_WUNLOCK(ifp);
3219		return (0);
3220	}
3221
3222	/*
3223	 * The address isn't already present; resolve the protocol address
3224	 * into a link layer address, and then look that up, bump its
3225	 * refcount or allocate an ifma for that also.
3226	 * Most link layer resolving functions returns address data which
3227	 * fits inside default sockaddr_dl structure. However callback
3228	 * can allocate another sockaddr structure, in that case we need to
3229	 * free it later.
3230	 */
3231	llsa = NULL;
3232	ll_ifma = NULL;
3233	if (ifp->if_resolvemulti != NULL) {
3234		/* Provide called function with buffer size information */
3235		sdl.sdl_len = sizeof(sdl);
3236		llsa = (struct sockaddr *)&sdl;
3237		error = ifp->if_resolvemulti(ifp, &llsa, sa);
3238		if (error)
3239			goto unlock_out;
3240	}
3241
3242	/*
3243	 * Allocate the new address.  Don't hook it up yet, as we may also
3244	 * need to allocate a link layer multicast address.
3245	 */
3246	ifma = if_allocmulti(ifp, sa, llsa, M_NOWAIT);
3247	if (ifma == NULL) {
3248		error = ENOMEM;
3249		goto free_llsa_out;
3250	}
3251
3252	/*
3253	 * If a link layer address is found, we'll need to see if it's
3254	 * already present in the address list, or allocate is as well.
3255	 * When this block finishes, the link layer address will be on the
3256	 * list.
3257	 */
3258	if (llsa != NULL) {
3259		ll_ifma = if_findmulti(ifp, llsa);
3260		if (ll_ifma == NULL) {
3261			ll_ifma = if_allocmulti(ifp, llsa, NULL, M_NOWAIT);
3262			if (ll_ifma == NULL) {
3263				--ifma->ifma_refcount;
3264				if_freemulti(ifma);
3265				error = ENOMEM;
3266				goto free_llsa_out;
3267			}
3268			TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ll_ifma,
3269			    ifma_link);
3270		} else
3271			ll_ifma->ifma_refcount++;
3272		ifma->ifma_llifma = ll_ifma;
3273	}
3274
3275	/*
3276	 * We now have a new multicast address, ifma, and possibly a new or
3277	 * referenced link layer address.  Add the primary address to the
3278	 * ifnet address list.
3279	 */
3280	TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link);
3281
3282	if (retifma != NULL)
3283		*retifma = ifma;
3284
3285	/*
3286	 * Must generate the message while holding the lock so that 'ifma'
3287	 * pointer is still valid.
3288	 */
3289	rt_newmaddrmsg(RTM_NEWMADDR, ifma);
3290	IF_ADDR_WUNLOCK(ifp);
3291
3292	/*
3293	 * We are certain we have added something, so call down to the
3294	 * interface to let them know about it.
3295	 */
3296	if (ifp->if_ioctl != NULL) {
3297		(void) (*ifp->if_ioctl)(ifp, SIOCADDMULTI, 0);
3298	}
3299
3300	if ((llsa != NULL) && (llsa != (struct sockaddr *)&sdl))
3301		link_free_sdl(llsa);
3302
3303	return (0);
3304
3305free_llsa_out:
3306	if ((llsa != NULL) && (llsa != (struct sockaddr *)&sdl))
3307		link_free_sdl(llsa);
3308
3309unlock_out:
3310	IF_ADDR_WUNLOCK(ifp);
3311	return (error);
3312}
3313
3314/*
3315 * Delete a multicast group membership by network-layer group address.
3316 *
3317 * Returns ENOENT if the entry could not be found. If ifp no longer
3318 * exists, results are undefined. This entry point should only be used
3319 * from subsystems which do appropriate locking to hold ifp for the
3320 * duration of the call.
3321 * Network-layer protocol domains must use if_delmulti_ifma().
3322 */
3323int
3324if_delmulti(struct ifnet *ifp, struct sockaddr *sa)
3325{
3326	struct ifmultiaddr *ifma;
3327	int lastref;
3328#ifdef INVARIANTS
3329	struct ifnet *oifp;
3330
3331	IFNET_RLOCK_NOSLEEP();
3332	TAILQ_FOREACH(oifp, &V_ifnet, if_link)
3333		if (ifp == oifp)
3334			break;
3335	if (ifp != oifp)
3336		ifp = NULL;
3337	IFNET_RUNLOCK_NOSLEEP();
3338
3339	KASSERT(ifp != NULL, ("%s: ifnet went away", __func__));
3340#endif
3341	if (ifp == NULL)
3342		return (ENOENT);
3343
3344	IF_ADDR_WLOCK(ifp);
3345	lastref = 0;
3346	ifma = if_findmulti(ifp, sa);
3347	if (ifma != NULL)
3348		lastref = if_delmulti_locked(ifp, ifma, 0);
3349	IF_ADDR_WUNLOCK(ifp);
3350
3351	if (ifma == NULL)
3352		return (ENOENT);
3353
3354	if (lastref && ifp->if_ioctl != NULL) {
3355		(void)(*ifp->if_ioctl)(ifp, SIOCDELMULTI, 0);
3356	}
3357
3358	return (0);
3359}
3360
3361/*
3362 * Delete all multicast group membership for an interface.
3363 * Should be used to quickly flush all multicast filters.
3364 */
3365void
3366if_delallmulti(struct ifnet *ifp)
3367{
3368	struct ifmultiaddr *ifma;
3369	struct ifmultiaddr *next;
3370
3371	IF_ADDR_WLOCK(ifp);
3372	TAILQ_FOREACH_SAFE(ifma, &ifp->if_multiaddrs, ifma_link, next)
3373		if_delmulti_locked(ifp, ifma, 0);
3374	IF_ADDR_WUNLOCK(ifp);
3375}
3376
3377/*
3378 * Delete a multicast group membership by group membership pointer.
3379 * Network-layer protocol domains must use this routine.
3380 *
3381 * It is safe to call this routine if the ifp disappeared.
3382 */
3383void
3384if_delmulti_ifma(struct ifmultiaddr *ifma)
3385{
3386	struct ifnet *ifp;
3387	int lastref;
3388
3389	ifp = ifma->ifma_ifp;
3390#ifdef DIAGNOSTIC
3391	if (ifp == NULL) {
3392		printf("%s: ifma_ifp seems to be detached\n", __func__);
3393	} else {
3394		struct ifnet *oifp;
3395
3396		IFNET_RLOCK_NOSLEEP();
3397		TAILQ_FOREACH(oifp, &V_ifnet, if_link)
3398			if (ifp == oifp)
3399				break;
3400		if (ifp != oifp) {
3401			printf("%s: ifnet %p disappeared\n", __func__, ifp);
3402			ifp = NULL;
3403		}
3404		IFNET_RUNLOCK_NOSLEEP();
3405	}
3406#endif
3407	/*
3408	 * If and only if the ifnet instance exists: Acquire the address lock.
3409	 */
3410	if (ifp != NULL)
3411		IF_ADDR_WLOCK(ifp);
3412
3413	lastref = if_delmulti_locked(ifp, ifma, 0);
3414
3415	if (ifp != NULL) {
3416		/*
3417		 * If and only if the ifnet instance exists:
3418		 *  Release the address lock.
3419		 *  If the group was left: update the hardware hash filter.
3420		 */
3421		IF_ADDR_WUNLOCK(ifp);
3422		if (lastref && ifp->if_ioctl != NULL) {
3423			(void)(*ifp->if_ioctl)(ifp, SIOCDELMULTI, 0);
3424		}
3425	}
3426}
3427
3428/*
3429 * Perform deletion of network-layer and/or link-layer multicast address.
3430 *
3431 * Return 0 if the reference count was decremented.
3432 * Return 1 if the final reference was released, indicating that the
3433 * hardware hash filter should be reprogrammed.
3434 */
3435static int
3436if_delmulti_locked(struct ifnet *ifp, struct ifmultiaddr *ifma, int detaching)
3437{
3438	struct ifmultiaddr *ll_ifma;
3439
3440	if (ifp != NULL && ifma->ifma_ifp != NULL) {
3441		KASSERT(ifma->ifma_ifp == ifp,
3442		    ("%s: inconsistent ifp %p", __func__, ifp));
3443		IF_ADDR_WLOCK_ASSERT(ifp);
3444	}
3445
3446	ifp = ifma->ifma_ifp;
3447
3448	/*
3449	 * If the ifnet is detaching, null out references to ifnet,
3450	 * so that upper protocol layers will notice, and not attempt
3451	 * to obtain locks for an ifnet which no longer exists. The
3452	 * routing socket announcement must happen before the ifnet
3453	 * instance is detached from the system.
3454	 */
3455	if (detaching) {
3456#ifdef DIAGNOSTIC
3457		printf("%s: detaching ifnet instance %p\n", __func__, ifp);
3458#endif
3459		/*
3460		 * ifp may already be nulled out if we are being reentered
3461		 * to delete the ll_ifma.
3462		 */
3463		if (ifp != NULL) {
3464			rt_newmaddrmsg(RTM_DELMADDR, ifma);
3465			ifma->ifma_ifp = NULL;
3466		}
3467	}
3468
3469	if (--ifma->ifma_refcount > 0)
3470		return 0;
3471
3472	/*
3473	 * If this ifma is a network-layer ifma, a link-layer ifma may
3474	 * have been associated with it. Release it first if so.
3475	 */
3476	ll_ifma = ifma->ifma_llifma;
3477	if (ll_ifma != NULL) {
3478		KASSERT(ifma->ifma_lladdr != NULL,
3479		    ("%s: llifma w/o lladdr", __func__));
3480		if (detaching)
3481			ll_ifma->ifma_ifp = NULL;	/* XXX */
3482		if (--ll_ifma->ifma_refcount == 0) {
3483			if (ifp != NULL) {
3484				TAILQ_REMOVE(&ifp->if_multiaddrs, ll_ifma,
3485				    ifma_link);
3486			}
3487			if_freemulti(ll_ifma);
3488		}
3489	}
3490
3491	if (ifp != NULL)
3492		TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link);
3493
3494	if_freemulti(ifma);
3495
3496	/*
3497	 * The last reference to this instance of struct ifmultiaddr
3498	 * was released; the hardware should be notified of this change.
3499	 */
3500	return 1;
3501}
3502
3503/*
3504 * Set the link layer address on an interface.
3505 *
3506 * At this time we only support certain types of interfaces,
3507 * and we don't allow the length of the address to change.
3508 *
3509 * Set noinline to be dtrace-friendly
3510 */
3511__noinline int
3512if_setlladdr(struct ifnet *ifp, const u_char *lladdr, int len)
3513{
3514	struct sockaddr_dl *sdl;
3515	struct ifaddr *ifa;
3516	struct ifreq ifr;
3517
3518	IF_ADDR_RLOCK(ifp);
3519	ifa = ifp->if_addr;
3520	if (ifa == NULL) {
3521		IF_ADDR_RUNLOCK(ifp);
3522		return (EINVAL);
3523	}
3524	ifa_ref(ifa);
3525	IF_ADDR_RUNLOCK(ifp);
3526	sdl = (struct sockaddr_dl *)ifa->ifa_addr;
3527	if (sdl == NULL) {
3528		ifa_free(ifa);
3529		return (EINVAL);
3530	}
3531	if (len != sdl->sdl_alen) {	/* don't allow length to change */
3532		ifa_free(ifa);
3533		return (EINVAL);
3534	}
3535	switch (ifp->if_type) {
3536	case IFT_ETHER:
3537	case IFT_FDDI:
3538	case IFT_XETHER:
3539	case IFT_ISO88025:
3540	case IFT_L2VLAN:
3541	case IFT_BRIDGE:
3542	case IFT_ARCNET:
3543	case IFT_IEEE8023ADLAG:
3544	case IFT_IEEE80211:
3545		bcopy(lladdr, LLADDR(sdl), len);
3546		ifa_free(ifa);
3547		break;
3548	default:
3549		ifa_free(ifa);
3550		return (ENODEV);
3551	}
3552
3553	/*
3554	 * If the interface is already up, we need
3555	 * to re-init it in order to reprogram its
3556	 * address filter.
3557	 */
3558	if ((ifp->if_flags & IFF_UP) != 0) {
3559		if (ifp->if_ioctl) {
3560			ifp->if_flags &= ~IFF_UP;
3561			ifr.ifr_flags = ifp->if_flags & 0xffff;
3562			ifr.ifr_flagshigh = ifp->if_flags >> 16;
3563			(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
3564			ifp->if_flags |= IFF_UP;
3565			ifr.ifr_flags = ifp->if_flags & 0xffff;
3566			ifr.ifr_flagshigh = ifp->if_flags >> 16;
3567			(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
3568		}
3569	}
3570	EVENTHANDLER_INVOKE(iflladdr_event, ifp);
3571	return (0);
3572}
3573
3574/*
3575 * Compat function for handling basic encapsulation requests.
3576 * Not converted stacks (FDDI, IB, ..) supports traditional
3577 * output model: ARP (and other similar L2 protocols) are handled
3578 * inside output routine, arpresolve/nd6_resolve() returns MAC
3579 * address instead of full prepend.
3580 *
3581 * This function creates calculated header==MAC for IPv4/IPv6 and
3582 * returns EAFNOSUPPORT (which is then handled in ARP code) for other
3583 * address families.
3584 */
3585static int
3586if_requestencap_default(struct ifnet *ifp, struct if_encap_req *req)
3587{
3588
3589	if (req->rtype != IFENCAP_LL)
3590		return (EOPNOTSUPP);
3591
3592	if (req->bufsize < req->lladdr_len)
3593		return (ENOMEM);
3594
3595	switch (req->family) {
3596	case AF_INET:
3597	case AF_INET6:
3598		break;
3599	default:
3600		return (EAFNOSUPPORT);
3601	}
3602
3603	/* Copy lladdr to storage as is */
3604	memmove(req->buf, req->lladdr, req->lladdr_len);
3605	req->bufsize = req->lladdr_len;
3606	req->lladdr_off = 0;
3607
3608	return (0);
3609}
3610
3611/*
3612 * Get the link layer address that was read from the hardware at attach.
3613 *
3614 * This is only set by Ethernet NICs (IFT_ETHER), but laggX interfaces re-type
3615 * their component interfaces as IFT_IEEE8023ADLAG.
3616 */
3617int
3618if_gethwaddr(struct ifnet *ifp, struct ifreq *ifr)
3619{
3620
3621	if (ifp->if_hw_addr == NULL)
3622		return (ENODEV);
3623
3624	switch (ifp->if_type) {
3625	case IFT_ETHER:
3626	case IFT_IEEE8023ADLAG:
3627		bcopy(ifp->if_hw_addr, ifr->ifr_addr.sa_data, ifp->if_addrlen);
3628		return (0);
3629	default:
3630		return (ENODEV);
3631	}
3632}
3633
3634/*
3635 * The name argument must be a pointer to storage which will last as
3636 * long as the interface does.  For physical devices, the result of
3637 * device_get_name(dev) is a good choice and for pseudo-devices a
3638 * static string works well.
3639 */
3640void
3641if_initname(struct ifnet *ifp, const char *name, int unit)
3642{
3643	ifp->if_dname = name;
3644	ifp->if_dunit = unit;
3645	if (unit != IF_DUNIT_NONE)
3646		snprintf(ifp->if_xname, IFNAMSIZ, "%s%d", name, unit);
3647	else
3648		strlcpy(ifp->if_xname, name, IFNAMSIZ);
3649}
3650
3651int
3652if_printf(struct ifnet *ifp, const char * fmt, ...)
3653{
3654	va_list ap;
3655	int retval;
3656
3657	retval = printf("%s: ", ifp->if_xname);
3658	va_start(ap, fmt);
3659	retval += vprintf(fmt, ap);
3660	va_end(ap);
3661	return (retval);
3662}
3663
3664void
3665if_start(struct ifnet *ifp)
3666{
3667
3668	(*(ifp)->if_start)(ifp);
3669}
3670
3671/*
3672 * Backwards compatibility interface for drivers
3673 * that have not implemented it
3674 */
3675static int
3676if_transmit(struct ifnet *ifp, struct mbuf *m)
3677{
3678	int error;
3679
3680	IFQ_HANDOFF(ifp, m, error);
3681	return (error);
3682}
3683
3684static void
3685if_input_default(struct ifnet *ifp __unused, struct mbuf *m)
3686{
3687
3688	m_freem(m);
3689}
3690
3691int
3692if_handoff(struct ifqueue *ifq, struct mbuf *m, struct ifnet *ifp, int adjust)
3693{
3694	int active = 0;
3695
3696	IF_LOCK(ifq);
3697	if (_IF_QFULL(ifq)) {
3698		IF_UNLOCK(ifq);
3699		if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1);
3700		m_freem(m);
3701		return (0);
3702	}
3703	if (ifp != NULL) {
3704		if_inc_counter(ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len + adjust);
3705		if (m->m_flags & (M_BCAST|M_MCAST))
3706			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
3707		active = ifp->if_drv_flags & IFF_DRV_OACTIVE;
3708	}
3709	_IF_ENQUEUE(ifq, m);
3710	IF_UNLOCK(ifq);
3711	if (ifp != NULL && !active)
3712		(*(ifp)->if_start)(ifp);
3713	return (1);
3714}
3715
3716void
3717if_register_com_alloc(u_char type,
3718    if_com_alloc_t *a, if_com_free_t *f)
3719{
3720
3721	KASSERT(if_com_alloc[type] == NULL,
3722	    ("if_register_com_alloc: %d already registered", type));
3723	KASSERT(if_com_free[type] == NULL,
3724	    ("if_register_com_alloc: %d free already registered", type));
3725
3726	if_com_alloc[type] = a;
3727	if_com_free[type] = f;
3728}
3729
3730void
3731if_deregister_com_alloc(u_char type)
3732{
3733
3734	KASSERT(if_com_alloc[type] != NULL,
3735	    ("if_deregister_com_alloc: %d not registered", type));
3736	KASSERT(if_com_free[type] != NULL,
3737	    ("if_deregister_com_alloc: %d free not registered", type));
3738	if_com_alloc[type] = NULL;
3739	if_com_free[type] = NULL;
3740}
3741
3742/* API for driver access to network stack owned ifnet.*/
3743uint64_t
3744if_setbaudrate(struct ifnet *ifp, uint64_t baudrate)
3745{
3746	uint64_t oldbrate;
3747
3748	oldbrate = ifp->if_baudrate;
3749	ifp->if_baudrate = baudrate;
3750	return (oldbrate);
3751}
3752
3753uint64_t
3754if_getbaudrate(if_t ifp)
3755{
3756
3757	return (((struct ifnet *)ifp)->if_baudrate);
3758}
3759
3760int
3761if_setcapabilities(if_t ifp, int capabilities)
3762{
3763	((struct ifnet *)ifp)->if_capabilities = capabilities;
3764	return (0);
3765}
3766
3767int
3768if_setcapabilitiesbit(if_t ifp, int setbit, int clearbit)
3769{
3770	((struct ifnet *)ifp)->if_capabilities |= setbit;
3771	((struct ifnet *)ifp)->if_capabilities &= ~clearbit;
3772
3773	return (0);
3774}
3775
3776int
3777if_getcapabilities(if_t ifp)
3778{
3779	return ((struct ifnet *)ifp)->if_capabilities;
3780}
3781
3782int
3783if_setcapenable(if_t ifp, int capabilities)
3784{
3785	((struct ifnet *)ifp)->if_capenable = capabilities;
3786	return (0);
3787}
3788
3789int
3790if_setcapenablebit(if_t ifp, int setcap, int clearcap)
3791{
3792	if(setcap)
3793		((struct ifnet *)ifp)->if_capenable |= setcap;
3794	if(clearcap)
3795		((struct ifnet *)ifp)->if_capenable &= ~clearcap;
3796
3797	return (0);
3798}
3799
3800const char *
3801if_getdname(if_t ifp)
3802{
3803	return ((struct ifnet *)ifp)->if_dname;
3804}
3805
3806int
3807if_togglecapenable(if_t ifp, int togglecap)
3808{
3809	((struct ifnet *)ifp)->if_capenable ^= togglecap;
3810	return (0);
3811}
3812
3813int
3814if_getcapenable(if_t ifp)
3815{
3816	return ((struct ifnet *)ifp)->if_capenable;
3817}
3818
3819/*
3820 * This is largely undesirable because it ties ifnet to a device, but does
3821 * provide flexiblity for an embedded product vendor. Should be used with
3822 * the understanding that it violates the interface boundaries, and should be
3823 * a last resort only.
3824 */
3825int
3826if_setdev(if_t ifp, void *dev)
3827{
3828	return (0);
3829}
3830
3831int
3832if_setdrvflagbits(if_t ifp, int set_flags, int clear_flags)
3833{
3834	((struct ifnet *)ifp)->if_drv_flags |= set_flags;
3835	((struct ifnet *)ifp)->if_drv_flags &= ~clear_flags;
3836
3837	return (0);
3838}
3839
3840int
3841if_getdrvflags(if_t ifp)
3842{
3843	return ((struct ifnet *)ifp)->if_drv_flags;
3844}
3845
3846int
3847if_setdrvflags(if_t ifp, int flags)
3848{
3849	((struct ifnet *)ifp)->if_drv_flags = flags;
3850	return (0);
3851}
3852
3853
3854int
3855if_setflags(if_t ifp, int flags)
3856{
3857	((struct ifnet *)ifp)->if_flags = flags;
3858	return (0);
3859}
3860
3861int
3862if_setflagbits(if_t ifp, int set, int clear)
3863{
3864	((struct ifnet *)ifp)->if_flags |= set;
3865	((struct ifnet *)ifp)->if_flags &= ~clear;
3866
3867	return (0);
3868}
3869
3870int
3871if_getflags(if_t ifp)
3872{
3873	return ((struct ifnet *)ifp)->if_flags;
3874}
3875
3876int
3877if_clearhwassist(if_t ifp)
3878{
3879	((struct ifnet *)ifp)->if_hwassist = 0;
3880	return (0);
3881}
3882
3883int
3884if_sethwassistbits(if_t ifp, int toset, int toclear)
3885{
3886	((struct ifnet *)ifp)->if_hwassist |= toset;
3887	((struct ifnet *)ifp)->if_hwassist &= ~toclear;
3888
3889	return (0);
3890}
3891
3892int
3893if_sethwassist(if_t ifp, int hwassist_bit)
3894{
3895	((struct ifnet *)ifp)->if_hwassist = hwassist_bit;
3896	return (0);
3897}
3898
3899int
3900if_gethwassist(if_t ifp)
3901{
3902	return ((struct ifnet *)ifp)->if_hwassist;
3903}
3904
3905int
3906if_setmtu(if_t ifp, int mtu)
3907{
3908	((struct ifnet *)ifp)->if_mtu = mtu;
3909	return (0);
3910}
3911
3912int
3913if_getmtu(if_t ifp)
3914{
3915	return ((struct ifnet *)ifp)->if_mtu;
3916}
3917
3918int
3919if_getmtu_family(if_t ifp, int family)
3920{
3921	struct domain *dp;
3922
3923	for (dp = domains; dp; dp = dp->dom_next) {
3924		if (dp->dom_family == family && dp->dom_ifmtu != NULL)
3925			return (dp->dom_ifmtu((struct ifnet *)ifp));
3926	}
3927
3928	return (((struct ifnet *)ifp)->if_mtu);
3929}
3930
3931int
3932if_setsoftc(if_t ifp, void *softc)
3933{
3934	((struct ifnet *)ifp)->if_softc = softc;
3935	return (0);
3936}
3937
3938void *
3939if_getsoftc(if_t ifp)
3940{
3941	return ((struct ifnet *)ifp)->if_softc;
3942}
3943
3944void
3945if_setrcvif(struct mbuf *m, if_t ifp)
3946{
3947	m->m_pkthdr.rcvif = (struct ifnet *)ifp;
3948}
3949
3950void
3951if_setvtag(struct mbuf *m, uint16_t tag)
3952{
3953	m->m_pkthdr.ether_vtag = tag;
3954}
3955
3956uint16_t
3957if_getvtag(struct mbuf *m)
3958{
3959
3960	return (m->m_pkthdr.ether_vtag);
3961}
3962
3963int
3964if_sendq_empty(if_t ifp)
3965{
3966	return IFQ_DRV_IS_EMPTY(&((struct ifnet *)ifp)->if_snd);
3967}
3968
3969struct ifaddr *
3970if_getifaddr(if_t ifp)
3971{
3972	return ((struct ifnet *)ifp)->if_addr;
3973}
3974
3975int
3976if_getamcount(if_t ifp)
3977{
3978	return ((struct ifnet *)ifp)->if_amcount;
3979}
3980
3981
3982int
3983if_setsendqready(if_t ifp)
3984{
3985	IFQ_SET_READY(&((struct ifnet *)ifp)->if_snd);
3986	return (0);
3987}
3988
3989int
3990if_setsendqlen(if_t ifp, int tx_desc_count)
3991{
3992	IFQ_SET_MAXLEN(&((struct ifnet *)ifp)->if_snd, tx_desc_count);
3993	((struct ifnet *)ifp)->if_snd.ifq_drv_maxlen = tx_desc_count;
3994
3995	return (0);
3996}
3997
3998int
3999if_vlantrunkinuse(if_t ifp)
4000{
4001	return ((struct ifnet *)ifp)->if_vlantrunk != NULL?1:0;
4002}
4003
4004int
4005if_input(if_t ifp, struct mbuf* sendmp)
4006{
4007	(*((struct ifnet *)ifp)->if_input)((struct ifnet *)ifp, sendmp);
4008	return (0);
4009
4010}
4011
4012/* XXX */
4013#ifndef ETH_ADDR_LEN
4014#define ETH_ADDR_LEN 6
4015#endif
4016
4017int
4018if_setupmultiaddr(if_t ifp, void *mta, int *cnt, int max)
4019{
4020	struct ifmultiaddr *ifma;
4021	uint8_t *lmta = (uint8_t *)mta;
4022	int mcnt = 0;
4023
4024	TAILQ_FOREACH(ifma, &((struct ifnet *)ifp)->if_multiaddrs, ifma_link) {
4025		if (ifma->ifma_addr->sa_family != AF_LINK)
4026			continue;
4027
4028		if (mcnt == max)
4029			break;
4030
4031		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
4032		    &lmta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
4033		mcnt++;
4034	}
4035	*cnt = mcnt;
4036
4037	return (0);
4038}
4039
4040int
4041if_multiaddr_array(if_t ifp, void *mta, int *cnt, int max)
4042{
4043	int error;
4044
4045	if_maddr_rlock(ifp);
4046	error = if_setupmultiaddr(ifp, mta, cnt, max);
4047	if_maddr_runlock(ifp);
4048	return (error);
4049}
4050
4051int
4052if_multiaddr_count(if_t ifp, int max)
4053{
4054	struct ifmultiaddr *ifma;
4055	int count;
4056
4057	count = 0;
4058	if_maddr_rlock(ifp);
4059	TAILQ_FOREACH(ifma, &((struct ifnet *)ifp)->if_multiaddrs, ifma_link) {
4060		if (ifma->ifma_addr->sa_family != AF_LINK)
4061			continue;
4062		count++;
4063		if (count == max)
4064			break;
4065	}
4066	if_maddr_runlock(ifp);
4067	return (count);
4068}
4069
4070int
4071if_multi_apply(struct ifnet *ifp, int (*filter)(void *, struct ifmultiaddr *, int), void *arg)
4072{
4073	struct ifmultiaddr *ifma;
4074	int cnt = 0;
4075
4076	if_maddr_rlock(ifp);
4077	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link)
4078		cnt += filter(arg, ifma, cnt);
4079	if_maddr_runlock(ifp);
4080	return (cnt);
4081}
4082
4083struct mbuf *
4084if_dequeue(if_t ifp)
4085{
4086	struct mbuf *m;
4087	IFQ_DRV_DEQUEUE(&((struct ifnet *)ifp)->if_snd, m);
4088
4089	return (m);
4090}
4091
4092int
4093if_sendq_prepend(if_t ifp, struct mbuf *m)
4094{
4095	IFQ_DRV_PREPEND(&((struct ifnet *)ifp)->if_snd, m);
4096	return (0);
4097}
4098
4099int
4100if_setifheaderlen(if_t ifp, int len)
4101{
4102	((struct ifnet *)ifp)->if_hdrlen = len;
4103	return (0);
4104}
4105
4106caddr_t
4107if_getlladdr(if_t ifp)
4108{
4109	return (IF_LLADDR((struct ifnet *)ifp));
4110}
4111
4112void *
4113if_gethandle(u_char type)
4114{
4115	return (if_alloc(type));
4116}
4117
4118void
4119if_bpfmtap(if_t ifh, struct mbuf *m)
4120{
4121	struct ifnet *ifp = (struct ifnet *)ifh;
4122
4123	BPF_MTAP(ifp, m);
4124}
4125
4126void
4127if_etherbpfmtap(if_t ifh, struct mbuf *m)
4128{
4129	struct ifnet *ifp = (struct ifnet *)ifh;
4130
4131	ETHER_BPF_MTAP(ifp, m);
4132}
4133
4134void
4135if_vlancap(if_t ifh)
4136{
4137	struct ifnet *ifp = (struct ifnet *)ifh;
4138	VLAN_CAPABILITIES(ifp);
4139}
4140
4141void
4142if_setinitfn(if_t ifp, void (*init_fn)(void *))
4143{
4144	((struct ifnet *)ifp)->if_init = init_fn;
4145}
4146
4147void
4148if_setioctlfn(if_t ifp, int (*ioctl_fn)(if_t, u_long, caddr_t))
4149{
4150	((struct ifnet *)ifp)->if_ioctl = (void *)ioctl_fn;
4151}
4152
4153void
4154if_setstartfn(if_t ifp, void (*start_fn)(if_t))
4155{
4156	((struct ifnet *)ifp)->if_start = (void *)start_fn;
4157}
4158
4159void
4160if_settransmitfn(if_t ifp, if_transmit_fn_t start_fn)
4161{
4162	((struct ifnet *)ifp)->if_transmit = start_fn;
4163}
4164
4165void if_setqflushfn(if_t ifp, if_qflush_fn_t flush_fn)
4166{
4167	((struct ifnet *)ifp)->if_qflush = flush_fn;
4168
4169}
4170
4171void
4172if_setgetcounterfn(if_t ifp, if_get_counter_t fn)
4173{
4174
4175	ifp->if_get_counter = fn;
4176}
4177
4178/* Revisit these - These are inline functions originally. */
4179int
4180drbr_inuse_drv(if_t ifh, struct buf_ring *br)
4181{
4182	return drbr_inuse(ifh, br);
4183}
4184
4185struct mbuf*
4186drbr_dequeue_drv(if_t ifh, struct buf_ring *br)
4187{
4188	return drbr_dequeue(ifh, br);
4189}
4190
4191int
4192drbr_needs_enqueue_drv(if_t ifh, struct buf_ring *br)
4193{
4194	return drbr_needs_enqueue(ifh, br);
4195}
4196
4197int
4198drbr_enqueue_drv(if_t ifh, struct buf_ring *br, struct mbuf *m)
4199{
4200	return drbr_enqueue(ifh, br, m);
4201
4202}
4203