if.c revision 168561
1/*-
2 * Copyright (c) 1980, 1986, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 4. Neither the name of the University nor the names of its contributors
14 *    may be used to endorse or promote products derived from this software
15 *    without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 *	@(#)if.c	8.5 (Berkeley) 1/9/95
30 * $FreeBSD: head/sys/net/if.c 168561 2007-04-10 00:27:25Z thompsa $
31 */
32
33#include "opt_compat.h"
34#include "opt_inet6.h"
35#include "opt_inet.h"
36#include "opt_mac.h"
37#include "opt_carp.h"
38
39#include <sys/param.h>
40#include <sys/types.h>
41#include <sys/conf.h>
42#include <sys/malloc.h>
43#include <sys/sbuf.h>
44#include <sys/bus.h>
45#include <sys/mbuf.h>
46#include <sys/systm.h>
47#include <sys/priv.h>
48#include <sys/proc.h>
49#include <sys/socket.h>
50#include <sys/socketvar.h>
51#include <sys/protosw.h>
52#include <sys/kernel.h>
53#include <sys/sockio.h>
54#include <sys/syslog.h>
55#include <sys/sysctl.h>
56#include <sys/taskqueue.h>
57#include <sys/domain.h>
58#include <sys/jail.h>
59#include <machine/stdarg.h>
60
61#include <net/if.h>
62#include <net/if_clone.h>
63#include <net/if_dl.h>
64#include <net/if_types.h>
65#include <net/if_var.h>
66#include <net/radix.h>
67#include <net/route.h>
68
69#if defined(INET) || defined(INET6)
70/*XXX*/
71#include <netinet/in.h>
72#include <netinet/in_var.h>
73#ifdef INET6
74#include <netinet6/in6_var.h>
75#include <netinet6/in6_ifattach.h>
76#endif
77#endif
78#ifdef INET
79#include <netinet/if_ether.h>
80#endif
81#ifdef DEV_CARP
82#include <netinet/ip_carp.h>
83#endif
84
85#include <security/mac/mac_framework.h>
86
87SYSCTL_NODE(_net, PF_LINK, link, CTLFLAG_RW, 0, "Link layers");
88SYSCTL_NODE(_net_link, 0, generic, CTLFLAG_RW, 0, "Generic link-management");
89
90/* Log link state change events */
91static int log_link_state_change = 1;
92
93SYSCTL_INT(_net_link, OID_AUTO, log_link_state_change, CTLFLAG_RW,
94	&log_link_state_change, 0,
95	"log interface link state change events");
96
97void	(*bstp_linkstate_p)(struct ifnet *ifp, int state);
98void	(*ng_ether_link_state_p)(struct ifnet *ifp, int state);
99void	(*trunk_linkstate_p)(struct ifnet *ifp, int state);
100
101struct mbuf *(*tbr_dequeue_ptr)(struct ifaltq *, int) = NULL;
102
103/*
104 * XXX: Style; these should be sorted alphabetically, and unprototyped
105 * static functions should be prototyped. Currently they are sorted by
106 * declaration order.
107 */
108static void	if_attachdomain(void *);
109static void	if_attachdomain1(struct ifnet *);
110static void	if_purgemaddrs(struct ifnet *);
111static int	ifconf(u_long, caddr_t);
112static void	if_freemulti(struct ifmultiaddr *);
113static void	if_grow(void);
114static void	if_init(void *);
115static void	if_check(void *);
116static void	if_qflush(struct ifaltq *);
117static void	if_route(struct ifnet *, int flag, int fam);
118static int	if_setflag(struct ifnet *, int, int, int *, int);
119static void	if_slowtimo(void *);
120static void	if_unroute(struct ifnet *, int flag, int fam);
121static void	link_rtrequest(int, struct rtentry *, struct rt_addrinfo *);
122static int	if_rtdel(struct radix_node *, void *);
123static int	ifhwioctl(u_long, struct ifnet *, caddr_t, struct thread *);
124static int	if_delmulti_locked(struct ifnet *, struct ifmultiaddr *, int);
125static void	if_start_deferred(void *context, int pending);
126static void	do_link_state_change(void *, int);
127static int	if_getgroup(struct ifgroupreq *, struct ifnet *);
128static int	if_getgroupmembers(struct ifgroupreq *);
129#ifdef INET6
130/*
131 * XXX: declare here to avoid to include many inet6 related files..
132 * should be more generalized?
133 */
134extern void	nd6_setmtu(struct ifnet *);
135#endif
136
137int	if_index = 0;
138struct	ifindex_entry *ifindex_table = NULL;
139int	ifqmaxlen = IFQ_MAXLEN;
140struct	ifnethead ifnet;	/* depend on static init XXX */
141struct	ifgrouphead ifg_head;
142struct	mtx ifnet_lock;
143static	if_com_alloc_t *if_com_alloc[256];
144static	if_com_free_t *if_com_free[256];
145
146static int	if_indexlim = 8;
147static struct	knlist ifklist;
148
149static void	filt_netdetach(struct knote *kn);
150static int	filt_netdev(struct knote *kn, long hint);
151
152static struct filterops netdev_filtops =
153    { 1, NULL, filt_netdetach, filt_netdev };
154
155/*
156 * System initialization
157 */
158SYSINIT(interfaces, SI_SUB_INIT_IF, SI_ORDER_FIRST, if_init, NULL)
159SYSINIT(interface_check, SI_SUB_PROTO_IF, SI_ORDER_FIRST, if_check, NULL)
160
161MALLOC_DEFINE(M_IFNET, "ifnet", "interface internals");
162MALLOC_DEFINE(M_IFADDR, "ifaddr", "interface address");
163MALLOC_DEFINE(M_IFMADDR, "ether_multi", "link-level multicast address");
164
165static d_open_t		netopen;
166static d_close_t	netclose;
167static d_ioctl_t	netioctl;
168static d_kqfilter_t	netkqfilter;
169
170static struct cdevsw net_cdevsw = {
171	.d_version =	D_VERSION,
172	.d_flags =	D_NEEDGIANT,
173	.d_open =	netopen,
174	.d_close =	netclose,
175	.d_ioctl =	netioctl,
176	.d_name =	"net",
177	.d_kqfilter =	netkqfilter,
178};
179
180static int
181netopen(struct cdev *dev, int flag, int mode, struct thread *td)
182{
183	return (0);
184}
185
186static int
187netclose(struct cdev *dev, int flags, int fmt, struct thread *td)
188{
189	return (0);
190}
191
192static int
193netioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread *td)
194{
195	struct ifnet *ifp;
196	int error, idx;
197
198	/* only support interface specific ioctls */
199	if (IOCGROUP(cmd) != 'i')
200		return (EOPNOTSUPP);
201	idx = minor(dev);
202	if (idx == 0) {
203		/*
204		 * special network device, not interface.
205		 */
206		if (cmd == SIOCGIFCONF)
207			return (ifconf(cmd, data));	/* XXX remove cmd */
208#ifdef __amd64__
209		if (cmd == SIOCGIFCONF32)
210			return (ifconf(cmd, data));	/* XXX remove cmd */
211#endif
212		return (EOPNOTSUPP);
213	}
214
215	ifp = ifnet_byindex(idx);
216	if (ifp == NULL)
217		return (ENXIO);
218
219	error = ifhwioctl(cmd, ifp, data, td);
220	if (error == ENOIOCTL)
221		error = EOPNOTSUPP;
222	return (error);
223}
224
225static int
226netkqfilter(struct cdev *dev, struct knote *kn)
227{
228	struct knlist *klist;
229	struct ifnet *ifp;
230	int idx;
231
232	switch (kn->kn_filter) {
233	case EVFILT_NETDEV:
234		kn->kn_fop = &netdev_filtops;
235		break;
236	default:
237		return (EINVAL);
238	}
239
240	idx = minor(dev);
241	if (idx == 0) {
242		klist = &ifklist;
243	} else {
244		ifp = ifnet_byindex(idx);
245		if (ifp == NULL)
246			return (1);
247		klist = &ifp->if_klist;
248	}
249
250	kn->kn_hook = (caddr_t)klist;
251
252	knlist_add(klist, kn, 0);
253
254	return (0);
255}
256
257static void
258filt_netdetach(struct knote *kn)
259{
260	struct knlist *klist = (struct knlist *)kn->kn_hook;
261
262	knlist_remove(klist, kn, 0);
263}
264
265static int
266filt_netdev(struct knote *kn, long hint)
267{
268	struct knlist *klist = (struct knlist *)kn->kn_hook;
269
270	/*
271	 * Currently NOTE_EXIT is abused to indicate device detach.
272	 */
273	if (hint == NOTE_EXIT) {
274		kn->kn_data = NOTE_LINKINV;
275		kn->kn_flags |= (EV_EOF | EV_ONESHOT);
276		knlist_remove_inevent(klist, kn);
277		return (1);
278	}
279	if (hint != 0)
280		kn->kn_data = hint;			/* current status */
281	if (kn->kn_sfflags & hint)
282		kn->kn_fflags |= hint;
283	return (kn->kn_fflags != 0);
284}
285
286/*
287 * Network interface utility routines.
288 *
289 * Routines with ifa_ifwith* names take sockaddr *'s as
290 * parameters.
291 */
292/* ARGSUSED*/
293static void
294if_init(void *dummy __unused)
295{
296
297	IFNET_LOCK_INIT();
298	TAILQ_INIT(&ifnet);
299	TAILQ_INIT(&ifg_head);
300	knlist_init(&ifklist, NULL, NULL, NULL, NULL);
301	if_grow();				/* create initial table */
302	ifdev_byindex(0) = make_dev(&net_cdevsw, 0,
303	    UID_ROOT, GID_WHEEL, 0600, "network");
304	if_clone_init();
305}
306
307static void
308if_grow(void)
309{
310	u_int n;
311	struct ifindex_entry *e;
312
313	if_indexlim <<= 1;
314	n = if_indexlim * sizeof(*e);
315	e = malloc(n, M_IFNET, M_WAITOK | M_ZERO);
316	if (ifindex_table != NULL) {
317		memcpy((caddr_t)e, (caddr_t)ifindex_table, n/2);
318		free((caddr_t)ifindex_table, M_IFNET);
319	}
320	ifindex_table = e;
321}
322
323/* ARGSUSED*/
324static void
325if_check(void *dummy __unused)
326{
327	struct ifnet *ifp;
328	int s;
329
330	s = splimp();
331	IFNET_RLOCK();	/* could sleep on rare error; mostly okay XXX */
332	TAILQ_FOREACH(ifp, &ifnet, if_link) {
333		if (ifp->if_snd.ifq_maxlen == 0) {
334			if_printf(ifp, "XXX: driver didn't set ifq_maxlen\n");
335			ifp->if_snd.ifq_maxlen = ifqmaxlen;
336		}
337		if (!mtx_initialized(&ifp->if_snd.ifq_mtx)) {
338			if_printf(ifp,
339			    "XXX: driver didn't initialize queue mtx\n");
340			mtx_init(&ifp->if_snd.ifq_mtx, "unknown",
341			    MTX_NETWORK_LOCK, MTX_DEF);
342		}
343	}
344	IFNET_RUNLOCK();
345	splx(s);
346	if_slowtimo(0);
347}
348
349/*
350 * Allocate a struct ifnet and in index for an interface.
351 */
352struct ifnet*
353if_alloc(u_char type)
354{
355	struct ifnet *ifp;
356
357	ifp = malloc(sizeof(struct ifnet), M_IFNET, M_WAITOK|M_ZERO);
358
359	/*
360	 * Try to find an empty slot below if_index.  If we fail, take
361	 * the next slot.
362	 *
363	 * XXX: should be locked!
364	 */
365	for (ifp->if_index = 1; ifp->if_index <= if_index; ifp->if_index++) {
366		if (ifnet_byindex(ifp->if_index) == NULL)
367			break;
368	}
369	/* Catch if_index overflow. */
370	if (ifp->if_index < 1) {
371		free(ifp, M_IFNET);
372		return (NULL);
373	}
374	if (ifp->if_index > if_index)
375		if_index = ifp->if_index;
376	if (if_index >= if_indexlim)
377		if_grow();
378	ifnet_byindex(ifp->if_index) = ifp;
379
380	ifp->if_type = type;
381
382	if (if_com_alloc[type] != NULL) {
383		ifp->if_l2com = if_com_alloc[type](type, ifp);
384		if (ifp->if_l2com == NULL) {
385			free(ifp, M_IFNET);
386			return (NULL);
387		}
388	}
389	IF_ADDR_LOCK_INIT(ifp);
390
391	return (ifp);
392}
393
394void
395if_free(struct ifnet *ifp)
396{
397
398	/* Do not add code to this function!  Add it to if_free_type(). */
399	if_free_type(ifp, ifp->if_type);
400}
401
402void
403if_free_type(struct ifnet *ifp, u_char type)
404{
405
406	if (ifp != ifnet_byindex(ifp->if_index)) {
407		if_printf(ifp, "%s: value was not if_alloced, skipping\n",
408		    __func__);
409		return;
410	}
411
412	IF_ADDR_LOCK_DESTROY(ifp);
413
414	ifnet_byindex(ifp->if_index) = NULL;
415
416	/* XXX: should be locked with if_findindex() */
417	while (if_index > 0 && ifnet_byindex(if_index) == NULL)
418		if_index--;
419
420	if (if_com_free[type] != NULL)
421		if_com_free[type](ifp->if_l2com, type);
422
423	free(ifp, M_IFNET);
424};
425
426/*
427 * Attach an interface to the
428 * list of "active" interfaces.
429 */
430void
431if_attach(struct ifnet *ifp)
432{
433	unsigned socksize, ifasize;
434	int namelen, masklen;
435	struct sockaddr_dl *sdl;
436	struct ifaddr *ifa;
437
438	if (ifp->if_index == 0 || ifp != ifnet_byindex(ifp->if_index))
439		panic ("%s: BUG: if_attach called without if_alloc'd input()\n",
440		    ifp->if_xname);
441
442	TASK_INIT(&ifp->if_starttask, 0, if_start_deferred, ifp);
443	TASK_INIT(&ifp->if_linktask, 0, do_link_state_change, ifp);
444	IF_AFDATA_LOCK_INIT(ifp);
445	ifp->if_afdata_initialized = 0;
446	/*
447	 * XXX -
448	 * The old code would work if the interface passed a pre-existing
449	 * chain of ifaddrs to this code.  We don't trust our callers to
450	 * properly initialize the tailq, however, so we no longer allow
451	 * this unlikely case.
452	 */
453	TAILQ_INIT(&ifp->if_addrhead);
454	TAILQ_INIT(&ifp->if_prefixhead);
455	TAILQ_INIT(&ifp->if_multiaddrs);
456	TAILQ_INIT(&ifp->if_groups);
457
458	if_addgroup(ifp, IFG_ALL);
459
460	knlist_init(&ifp->if_klist, NULL, NULL, NULL, NULL);
461	getmicrotime(&ifp->if_lastchange);
462	ifp->if_data.ifi_epoch = time_uptime;
463	ifp->if_data.ifi_datalen = sizeof(struct if_data);
464
465#ifdef MAC
466	mac_init_ifnet(ifp);
467	mac_create_ifnet(ifp);
468#endif
469
470	ifdev_byindex(ifp->if_index) = make_dev(&net_cdevsw,
471	    unit2minor(ifp->if_index),
472	    UID_ROOT, GID_WHEEL, 0600, "%s/%s",
473	    net_cdevsw.d_name, ifp->if_xname);
474	make_dev_alias(ifdev_byindex(ifp->if_index), "%s%d",
475	    net_cdevsw.d_name, ifp->if_index);
476
477	mtx_init(&ifp->if_snd.ifq_mtx, ifp->if_xname, "if send queue", MTX_DEF);
478
479	/*
480	 * create a Link Level name for this device
481	 */
482	namelen = strlen(ifp->if_xname);
483	/*
484	 * Always save enough space for any possiable name so we can do
485	 * a rename in place later.
486	 */
487	masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + IFNAMSIZ;
488	socksize = masklen + ifp->if_addrlen;
489	if (socksize < sizeof(*sdl))
490		socksize = sizeof(*sdl);
491	socksize = roundup2(socksize, sizeof(long));
492	ifasize = sizeof(*ifa) + 2 * socksize;
493	ifa = malloc(ifasize, M_IFADDR, M_WAITOK | M_ZERO);
494	IFA_LOCK_INIT(ifa);
495	sdl = (struct sockaddr_dl *)(ifa + 1);
496	sdl->sdl_len = socksize;
497	sdl->sdl_family = AF_LINK;
498	bcopy(ifp->if_xname, sdl->sdl_data, namelen);
499	sdl->sdl_nlen = namelen;
500	sdl->sdl_index = ifp->if_index;
501	sdl->sdl_type = ifp->if_type;
502	ifp->if_addr = ifa;
503	ifa->ifa_ifp = ifp;
504	ifa->ifa_rtrequest = link_rtrequest;
505	ifa->ifa_addr = (struct sockaddr *)sdl;
506	sdl = (struct sockaddr_dl *)(socksize + (caddr_t)sdl);
507	ifa->ifa_netmask = (struct sockaddr *)sdl;
508	sdl->sdl_len = masklen;
509	while (namelen != 0)
510		sdl->sdl_data[--namelen] = 0xff;
511	ifa->ifa_refcnt = 1;
512	TAILQ_INSERT_HEAD(&ifp->if_addrhead, ifa, ifa_link);
513	ifp->if_broadcastaddr = NULL; /* reliably crash if used uninitialized */
514	ifp->if_snd.altq_type = 0;
515	ifp->if_snd.altq_disc = NULL;
516	ifp->if_snd.altq_flags &= ALTQF_CANTCHANGE;
517	ifp->if_snd.altq_tbr  = NULL;
518	ifp->if_snd.altq_ifp  = ifp;
519
520	IFNET_WLOCK();
521	TAILQ_INSERT_TAIL(&ifnet, ifp, if_link);
522	IFNET_WUNLOCK();
523
524	if (domain_init_status >= 2)
525		if_attachdomain1(ifp);
526
527	EVENTHANDLER_INVOKE(ifnet_arrival_event, ifp);
528	devctl_notify("IFNET", ifp->if_xname, "ATTACH", NULL);
529
530	/* Announce the interface. */
531	rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
532
533	if (ifp->if_watchdog != NULL)
534		if_printf(ifp, "using obsoleted if_watchdog interface\n");
535}
536
537static void
538if_attachdomain(void *dummy)
539{
540	struct ifnet *ifp;
541	int s;
542
543	s = splnet();
544	TAILQ_FOREACH(ifp, &ifnet, if_link)
545		if_attachdomain1(ifp);
546	splx(s);
547}
548SYSINIT(domainifattach, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_SECOND,
549    if_attachdomain, NULL);
550
551static void
552if_attachdomain1(struct ifnet *ifp)
553{
554	struct domain *dp;
555	int s;
556
557	s = splnet();
558
559	/*
560	 * Since dp->dom_ifattach calls malloc() with M_WAITOK, we
561	 * cannot lock ifp->if_afdata initialization, entirely.
562	 */
563	if (IF_AFDATA_TRYLOCK(ifp) == 0) {
564		splx(s);
565		return;
566	}
567	if (ifp->if_afdata_initialized >= domain_init_status) {
568		IF_AFDATA_UNLOCK(ifp);
569		splx(s);
570		printf("if_attachdomain called more than once on %s\n",
571		    ifp->if_xname);
572		return;
573	}
574	ifp->if_afdata_initialized = domain_init_status;
575	IF_AFDATA_UNLOCK(ifp);
576
577	/* address family dependent data region */
578	bzero(ifp->if_afdata, sizeof(ifp->if_afdata));
579	for (dp = domains; dp; dp = dp->dom_next) {
580		if (dp->dom_ifattach)
581			ifp->if_afdata[dp->dom_family] =
582			    (*dp->dom_ifattach)(ifp);
583	}
584
585	splx(s);
586}
587
588/*
589 * Remove any unicast or broadcast network addresses from an interface.
590 */
591
592void
593if_purgeaddrs(struct ifnet *ifp)
594{
595	struct ifaddr *ifa, *next;
596
597	TAILQ_FOREACH_SAFE(ifa, &ifp->if_addrhead, ifa_link, next) {
598		if (ifa->ifa_addr->sa_family == AF_LINK)
599			continue;
600#ifdef INET
601		/* XXX: Ugly!! ad hoc just for INET */
602		if (ifa->ifa_addr->sa_family == AF_INET) {
603			struct ifaliasreq ifr;
604
605			bzero(&ifr, sizeof(ifr));
606			ifr.ifra_addr = *ifa->ifa_addr;
607			if (ifa->ifa_dstaddr)
608				ifr.ifra_broadaddr = *ifa->ifa_dstaddr;
609			if (in_control(NULL, SIOCDIFADDR, (caddr_t)&ifr, ifp,
610			    NULL) == 0)
611				continue;
612		}
613#endif /* INET */
614#ifdef INET6
615		if (ifa->ifa_addr->sa_family == AF_INET6) {
616			in6_purgeaddr(ifa);
617			/* ifp_addrhead is already updated */
618			continue;
619		}
620#endif /* INET6 */
621		TAILQ_REMOVE(&ifp->if_addrhead, ifa, ifa_link);
622		IFAFREE(ifa);
623	}
624}
625
626/*
627 * Remove any multicast network addresses from an interface.
628 */
629static void
630if_purgemaddrs(struct ifnet *ifp)
631{
632	struct ifmultiaddr *ifma;
633	struct ifmultiaddr *next;
634
635	IF_ADDR_LOCK(ifp);
636	TAILQ_FOREACH_SAFE(ifma, &ifp->if_multiaddrs, ifma_link, next)
637		if_delmulti_locked(ifp, ifma, 1);
638	IF_ADDR_UNLOCK(ifp);
639}
640
641/*
642 * Detach an interface, removing it from the
643 * list of "active" interfaces.
644 *
645 * XXXRW: There are some significant questions about event ordering, and
646 * how to prevent things from starting to use the interface during detach.
647 */
648void
649if_detach(struct ifnet *ifp)
650{
651	struct ifaddr *ifa;
652	struct radix_node_head	*rnh;
653	int s;
654	int i;
655	struct domain *dp;
656 	struct ifnet *iter;
657 	int found = 0;
658
659	IFNET_WLOCK();
660	TAILQ_FOREACH(iter, &ifnet, if_link)
661		if (iter == ifp) {
662			TAILQ_REMOVE(&ifnet, ifp, if_link);
663			found = 1;
664			break;
665		}
666	IFNET_WUNLOCK();
667	if (!found)
668		return;
669
670	/*
671	 * Remove/wait for pending events.
672	 */
673	taskqueue_drain(taskqueue_swi, &ifp->if_linktask);
674
675	/*
676	 * Remove routes and flush queues.
677	 */
678	s = splnet();
679	if_down(ifp);
680#ifdef ALTQ
681	if (ALTQ_IS_ENABLED(&ifp->if_snd))
682		altq_disable(&ifp->if_snd);
683	if (ALTQ_IS_ATTACHED(&ifp->if_snd))
684		altq_detach(&ifp->if_snd);
685#endif
686
687	if_purgeaddrs(ifp);
688
689#ifdef INET
690	in_ifdetach(ifp);
691#endif
692
693#ifdef INET6
694	/*
695	 * Remove all IPv6 kernel structs related to ifp.  This should be done
696	 * before removing routing entries below, since IPv6 interface direct
697	 * routes are expected to be removed by the IPv6-specific kernel API.
698	 * Otherwise, the kernel will detect some inconsistency and bark it.
699	 */
700	in6_ifdetach(ifp);
701#endif
702	if_purgemaddrs(ifp);
703
704	/*
705	 * Remove link ifaddr pointer and maybe decrement if_index.
706	 * Clean up all addresses.
707	 */
708	ifp->if_addr = NULL;
709	destroy_dev(ifdev_byindex(ifp->if_index));
710	ifdev_byindex(ifp->if_index) = NULL;
711
712	/* We can now free link ifaddr. */
713	if (!TAILQ_EMPTY(&ifp->if_addrhead)) {
714		ifa = TAILQ_FIRST(&ifp->if_addrhead);
715		TAILQ_REMOVE(&ifp->if_addrhead, ifa, ifa_link);
716		IFAFREE(ifa);
717	}
718
719	/*
720	 * Delete all remaining routes using this interface
721	 * Unfortuneatly the only way to do this is to slog through
722	 * the entire routing table looking for routes which point
723	 * to this interface...oh well...
724	 */
725	for (i = 1; i <= AF_MAX; i++) {
726		if ((rnh = rt_tables[i]) == NULL)
727			continue;
728		RADIX_NODE_HEAD_LOCK(rnh);
729		(void) rnh->rnh_walktree(rnh, if_rtdel, ifp);
730		RADIX_NODE_HEAD_UNLOCK(rnh);
731	}
732
733	/* Announce that the interface is gone. */
734	rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
735	EVENTHANDLER_INVOKE(ifnet_departure_event, ifp);
736	devctl_notify("IFNET", ifp->if_xname, "DETACH", NULL);
737
738	IF_AFDATA_LOCK(ifp);
739	for (dp = domains; dp; dp = dp->dom_next) {
740		if (dp->dom_ifdetach && ifp->if_afdata[dp->dom_family])
741			(*dp->dom_ifdetach)(ifp,
742			    ifp->if_afdata[dp->dom_family]);
743	}
744	IF_AFDATA_UNLOCK(ifp);
745
746#ifdef MAC
747	mac_destroy_ifnet(ifp);
748#endif /* MAC */
749	KNOTE_UNLOCKED(&ifp->if_klist, NOTE_EXIT);
750	knlist_clear(&ifp->if_klist, 0);
751	knlist_destroy(&ifp->if_klist);
752	mtx_destroy(&ifp->if_snd.ifq_mtx);
753	IF_AFDATA_DESTROY(ifp);
754	splx(s);
755}
756
757/*
758 * Add a group to an interface
759 */
760int
761if_addgroup(struct ifnet *ifp, const char *groupname)
762{
763	struct ifg_list		*ifgl;
764	struct ifg_group	*ifg = NULL;
765	struct ifg_member	*ifgm;
766
767	if (groupname[0] && groupname[strlen(groupname) - 1] >= '0' &&
768	    groupname[strlen(groupname) - 1] <= '9')
769		return (EINVAL);
770
771	IFNET_WLOCK();
772	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
773		if (!strcmp(ifgl->ifgl_group->ifg_group, groupname)) {
774			IFNET_WUNLOCK();
775			return (EEXIST);
776		}
777
778	if ((ifgl = (struct ifg_list *)malloc(sizeof(struct ifg_list), M_TEMP,
779	    M_NOWAIT)) == NULL) {
780	    	IFNET_WUNLOCK();
781		return (ENOMEM);
782	}
783
784	if ((ifgm = (struct ifg_member *)malloc(sizeof(struct ifg_member),
785	    M_TEMP, M_NOWAIT)) == NULL) {
786		free(ifgl, M_TEMP);
787		IFNET_WUNLOCK();
788		return (ENOMEM);
789	}
790
791	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
792		if (!strcmp(ifg->ifg_group, groupname))
793			break;
794
795	if (ifg == NULL) {
796		if ((ifg = (struct ifg_group *)malloc(sizeof(struct ifg_group),
797		    M_TEMP, M_NOWAIT)) == NULL) {
798			free(ifgl, M_TEMP);
799			free(ifgm, M_TEMP);
800			IFNET_WUNLOCK();
801			return (ENOMEM);
802		}
803		strlcpy(ifg->ifg_group, groupname, sizeof(ifg->ifg_group));
804		ifg->ifg_refcnt = 0;
805		TAILQ_INIT(&ifg->ifg_members);
806		EVENTHANDLER_INVOKE(group_attach_event, ifg);
807		TAILQ_INSERT_TAIL(&ifg_head, ifg, ifg_next);
808	}
809
810	ifg->ifg_refcnt++;
811	ifgl->ifgl_group = ifg;
812	ifgm->ifgm_ifp = ifp;
813
814	IF_ADDR_LOCK(ifp);
815	TAILQ_INSERT_TAIL(&ifg->ifg_members, ifgm, ifgm_next);
816	TAILQ_INSERT_TAIL(&ifp->if_groups, ifgl, ifgl_next);
817	IF_ADDR_UNLOCK(ifp);
818
819	IFNET_WUNLOCK();
820
821	EVENTHANDLER_INVOKE(group_change_event, groupname);
822
823	return (0);
824}
825
826/*
827 * Remove a group from an interface
828 */
829int
830if_delgroup(struct ifnet *ifp, const char *groupname)
831{
832	struct ifg_list		*ifgl;
833	struct ifg_member	*ifgm;
834
835	IFNET_WLOCK();
836	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
837		if (!strcmp(ifgl->ifgl_group->ifg_group, groupname))
838			break;
839	if (ifgl == NULL) {
840		IFNET_WUNLOCK();
841		return (ENOENT);
842	}
843
844	IF_ADDR_LOCK(ifp);
845	TAILQ_REMOVE(&ifp->if_groups, ifgl, ifgl_next);
846	IF_ADDR_UNLOCK(ifp);
847
848	TAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next)
849		if (ifgm->ifgm_ifp == ifp)
850			break;
851
852	if (ifgm != NULL) {
853		TAILQ_REMOVE(&ifgl->ifgl_group->ifg_members, ifgm, ifgm_next);
854		free(ifgm, M_TEMP);
855	}
856
857	if (--ifgl->ifgl_group->ifg_refcnt == 0) {
858		TAILQ_REMOVE(&ifg_head, ifgl->ifgl_group, ifg_next);
859		EVENTHANDLER_INVOKE(group_detach_event, ifgl->ifgl_group);
860		free(ifgl->ifgl_group, M_TEMP);
861	}
862	IFNET_WUNLOCK();
863
864	free(ifgl, M_TEMP);
865
866	EVENTHANDLER_INVOKE(group_change_event, groupname);
867
868	return (0);
869}
870
871/*
872 * Stores all groups from an interface in memory pointed
873 * to by data
874 */
875static int
876if_getgroup(struct ifgroupreq *data, struct ifnet *ifp)
877{
878	int			 len, error;
879	struct ifg_list		*ifgl;
880	struct ifg_req		 ifgrq, *ifgp;
881	struct ifgroupreq	*ifgr = data;
882
883	if (ifgr->ifgr_len == 0) {
884		IF_ADDR_LOCK(ifp);
885		TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
886			ifgr->ifgr_len += sizeof(struct ifg_req);
887		IF_ADDR_UNLOCK(ifp);
888		return (0);
889	}
890
891	len = ifgr->ifgr_len;
892	ifgp = ifgr->ifgr_groups;
893	/* XXX: wire */
894	IF_ADDR_LOCK(ifp);
895	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) {
896		if (len < sizeof(ifgrq)) {
897			IF_ADDR_UNLOCK(ifp);
898			return (EINVAL);
899		}
900		bzero(&ifgrq, sizeof ifgrq);
901		strlcpy(ifgrq.ifgrq_group, ifgl->ifgl_group->ifg_group,
902		    sizeof(ifgrq.ifgrq_group));
903		if ((error = copyout(&ifgrq, ifgp, sizeof(struct ifg_req)))) {
904		    	IF_ADDR_UNLOCK(ifp);
905			return (error);
906		}
907		len -= sizeof(ifgrq);
908		ifgp++;
909	}
910	IF_ADDR_UNLOCK(ifp);
911
912	return (0);
913}
914
915/*
916 * Stores all members of a group in memory pointed to by data
917 */
918static int
919if_getgroupmembers(struct ifgroupreq *data)
920{
921	struct ifgroupreq	*ifgr = data;
922	struct ifg_group	*ifg;
923	struct ifg_member	*ifgm;
924	struct ifg_req		 ifgrq, *ifgp;
925	int			 len, error;
926
927	IFNET_RLOCK();
928	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
929		if (!strcmp(ifg->ifg_group, ifgr->ifgr_name))
930			break;
931	if (ifg == NULL) {
932		IFNET_RUNLOCK();
933		return (ENOENT);
934	}
935
936	if (ifgr->ifgr_len == 0) {
937		TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next)
938			ifgr->ifgr_len += sizeof(ifgrq);
939		IFNET_RUNLOCK();
940		return (0);
941	}
942
943	len = ifgr->ifgr_len;
944	ifgp = ifgr->ifgr_groups;
945	TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) {
946		if (len < sizeof(ifgrq)) {
947			IFNET_RUNLOCK();
948			return (EINVAL);
949		}
950		bzero(&ifgrq, sizeof ifgrq);
951		strlcpy(ifgrq.ifgrq_member, ifgm->ifgm_ifp->if_xname,
952		    sizeof(ifgrq.ifgrq_member));
953		if ((error = copyout(&ifgrq, ifgp, sizeof(struct ifg_req)))) {
954			IFNET_RUNLOCK();
955			return (error);
956		}
957		len -= sizeof(ifgrq);
958		ifgp++;
959	}
960	IFNET_RUNLOCK();
961
962	return (0);
963}
964
965/*
966 * Delete Routes for a Network Interface
967 *
968 * Called for each routing entry via the rnh->rnh_walktree() call above
969 * to delete all route entries referencing a detaching network interface.
970 *
971 * Arguments:
972 *	rn	pointer to node in the routing table
973 *	arg	argument passed to rnh->rnh_walktree() - detaching interface
974 *
975 * Returns:
976 *	0	successful
977 *	errno	failed - reason indicated
978 *
979 */
980static int
981if_rtdel(struct radix_node *rn, void *arg)
982{
983	struct rtentry	*rt = (struct rtentry *)rn;
984	struct ifnet	*ifp = arg;
985	int		err;
986
987	if (rt->rt_ifp == ifp) {
988
989		/*
990		 * Protect (sorta) against walktree recursion problems
991		 * with cloned routes
992		 */
993		if ((rt->rt_flags & RTF_UP) == 0)
994			return (0);
995
996		err = rtrequest(RTM_DELETE, rt_key(rt), rt->rt_gateway,
997				rt_mask(rt), rt->rt_flags,
998				(struct rtentry **) NULL);
999		if (err) {
1000			log(LOG_WARNING, "if_rtdel: error %d\n", err);
1001		}
1002	}
1003
1004	return (0);
1005}
1006
1007/*
1008 * XXX: Because sockaddr_dl has deeper structure than the sockaddr
1009 * structs used to represent other address families, it is necessary
1010 * to perform a different comparison.
1011 */
1012
1013#define	sa_equal(a1, a2)	\
1014	(bcmp((a1), (a2), ((a1))->sa_len) == 0)
1015
1016#define	sa_dl_equal(a1, a2)	\
1017	((((struct sockaddr_dl *)(a1))->sdl_len ==			\
1018	 ((struct sockaddr_dl *)(a2))->sdl_len) &&			\
1019	 (bcmp(LLADDR((struct sockaddr_dl *)(a1)),			\
1020	       LLADDR((struct sockaddr_dl *)(a2)),			\
1021	       ((struct sockaddr_dl *)(a1))->sdl_alen) == 0))
1022
1023/*
1024 * Locate an interface based on a complete address.
1025 */
1026/*ARGSUSED*/
1027struct ifaddr *
1028ifa_ifwithaddr(struct sockaddr *addr)
1029{
1030	struct ifnet *ifp;
1031	struct ifaddr *ifa;
1032
1033	IFNET_RLOCK();
1034	TAILQ_FOREACH(ifp, &ifnet, if_link)
1035		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1036			if (ifa->ifa_addr->sa_family != addr->sa_family)
1037				continue;
1038			if (sa_equal(addr, ifa->ifa_addr))
1039				goto done;
1040			/* IP6 doesn't have broadcast */
1041			if ((ifp->if_flags & IFF_BROADCAST) &&
1042			    ifa->ifa_broadaddr &&
1043			    ifa->ifa_broadaddr->sa_len != 0 &&
1044			    sa_equal(ifa->ifa_broadaddr, addr))
1045				goto done;
1046		}
1047	ifa = NULL;
1048done:
1049	IFNET_RUNLOCK();
1050	return (ifa);
1051}
1052
1053/*
1054 * Locate an interface based on the broadcast address.
1055 */
1056/* ARGSUSED */
1057struct ifaddr *
1058ifa_ifwithbroadaddr(struct sockaddr *addr)
1059{
1060	struct ifnet *ifp;
1061	struct ifaddr *ifa;
1062
1063	IFNET_RLOCK();
1064	TAILQ_FOREACH(ifp, &ifnet, if_link)
1065		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1066			if (ifa->ifa_addr->sa_family != addr->sa_family)
1067				continue;
1068			if ((ifp->if_flags & IFF_BROADCAST) &&
1069			    ifa->ifa_broadaddr &&
1070			    ifa->ifa_broadaddr->sa_len != 0 &&
1071			    sa_equal(ifa->ifa_broadaddr, addr))
1072				goto done;
1073		}
1074	ifa = NULL;
1075done:
1076	IFNET_RUNLOCK();
1077	return (ifa);
1078}
1079
1080/*
1081 * Locate the point to point interface with a given destination address.
1082 */
1083/*ARGSUSED*/
1084struct ifaddr *
1085ifa_ifwithdstaddr(struct sockaddr *addr)
1086{
1087	struct ifnet *ifp;
1088	struct ifaddr *ifa;
1089
1090	IFNET_RLOCK();
1091	TAILQ_FOREACH(ifp, &ifnet, if_link) {
1092		if ((ifp->if_flags & IFF_POINTOPOINT) == 0)
1093			continue;
1094		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1095			if (ifa->ifa_addr->sa_family != addr->sa_family)
1096				continue;
1097			if (ifa->ifa_dstaddr &&
1098			    sa_equal(addr, ifa->ifa_dstaddr))
1099				goto done;
1100		}
1101	}
1102	ifa = NULL;
1103done:
1104	IFNET_RUNLOCK();
1105	return (ifa);
1106}
1107
1108/*
1109 * Find an interface on a specific network.  If many, choice
1110 * is most specific found.
1111 */
1112struct ifaddr *
1113ifa_ifwithnet(struct sockaddr *addr)
1114{
1115	struct ifnet *ifp;
1116	struct ifaddr *ifa;
1117	struct ifaddr *ifa_maybe = (struct ifaddr *) 0;
1118	u_int af = addr->sa_family;
1119	char *addr_data = addr->sa_data, *cplim;
1120
1121	/*
1122	 * AF_LINK addresses can be looked up directly by their index number,
1123	 * so do that if we can.
1124	 */
1125	if (af == AF_LINK) {
1126	    struct sockaddr_dl *sdl = (struct sockaddr_dl *)addr;
1127	    if (sdl->sdl_index && sdl->sdl_index <= if_index)
1128		return (ifaddr_byindex(sdl->sdl_index));
1129	}
1130
1131	/*
1132	 * Scan though each interface, looking for ones that have
1133	 * addresses in this address family.
1134	 */
1135	IFNET_RLOCK();
1136	TAILQ_FOREACH(ifp, &ifnet, if_link) {
1137		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1138			char *cp, *cp2, *cp3;
1139
1140			if (ifa->ifa_addr->sa_family != af)
1141next:				continue;
1142			if (af == AF_INET && ifp->if_flags & IFF_POINTOPOINT) {
1143				/*
1144				 * This is a bit broken as it doesn't
1145				 * take into account that the remote end may
1146				 * be a single node in the network we are
1147				 * looking for.
1148				 * The trouble is that we don't know the
1149				 * netmask for the remote end.
1150				 */
1151				if (ifa->ifa_dstaddr != 0 &&
1152				    sa_equal(addr, ifa->ifa_dstaddr))
1153					goto done;
1154			} else {
1155				/*
1156				 * if we have a special address handler,
1157				 * then use it instead of the generic one.
1158				 */
1159				if (ifa->ifa_claim_addr) {
1160					if ((*ifa->ifa_claim_addr)(ifa, addr))
1161						goto done;
1162					continue;
1163				}
1164
1165				/*
1166				 * Scan all the bits in the ifa's address.
1167				 * If a bit dissagrees with what we are
1168				 * looking for, mask it with the netmask
1169				 * to see if it really matters.
1170				 * (A byte at a time)
1171				 */
1172				if (ifa->ifa_netmask == 0)
1173					continue;
1174				cp = addr_data;
1175				cp2 = ifa->ifa_addr->sa_data;
1176				cp3 = ifa->ifa_netmask->sa_data;
1177				cplim = ifa->ifa_netmask->sa_len
1178					+ (char *)ifa->ifa_netmask;
1179				while (cp3 < cplim)
1180					if ((*cp++ ^ *cp2++) & *cp3++)
1181						goto next; /* next address! */
1182				/*
1183				 * If the netmask of what we just found
1184				 * is more specific than what we had before
1185				 * (if we had one) then remember the new one
1186				 * before continuing to search
1187				 * for an even better one.
1188				 */
1189				if (ifa_maybe == 0 ||
1190				    rn_refines((caddr_t)ifa->ifa_netmask,
1191				    (caddr_t)ifa_maybe->ifa_netmask))
1192					ifa_maybe = ifa;
1193			}
1194		}
1195	}
1196	ifa = ifa_maybe;
1197done:
1198	IFNET_RUNLOCK();
1199	return (ifa);
1200}
1201
1202/*
1203 * Find an interface address specific to an interface best matching
1204 * a given address.
1205 */
1206struct ifaddr *
1207ifaof_ifpforaddr(struct sockaddr *addr, struct ifnet *ifp)
1208{
1209	struct ifaddr *ifa;
1210	char *cp, *cp2, *cp3;
1211	char *cplim;
1212	struct ifaddr *ifa_maybe = 0;
1213	u_int af = addr->sa_family;
1214
1215	if (af >= AF_MAX)
1216		return (0);
1217	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1218		if (ifa->ifa_addr->sa_family != af)
1219			continue;
1220		if (ifa_maybe == 0)
1221			ifa_maybe = ifa;
1222		if (ifa->ifa_netmask == 0) {
1223			if (sa_equal(addr, ifa->ifa_addr) ||
1224			    (ifa->ifa_dstaddr &&
1225			    sa_equal(addr, ifa->ifa_dstaddr)))
1226				goto done;
1227			continue;
1228		}
1229		if (ifp->if_flags & IFF_POINTOPOINT) {
1230			if (sa_equal(addr, ifa->ifa_dstaddr))
1231				goto done;
1232		} else {
1233			cp = addr->sa_data;
1234			cp2 = ifa->ifa_addr->sa_data;
1235			cp3 = ifa->ifa_netmask->sa_data;
1236			cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask;
1237			for (; cp3 < cplim; cp3++)
1238				if ((*cp++ ^ *cp2++) & *cp3)
1239					break;
1240			if (cp3 == cplim)
1241				goto done;
1242		}
1243	}
1244	ifa = ifa_maybe;
1245done:
1246	return (ifa);
1247}
1248
1249#include <net/route.h>
1250
1251/*
1252 * Default action when installing a route with a Link Level gateway.
1253 * Lookup an appropriate real ifa to point to.
1254 * This should be moved to /sys/net/link.c eventually.
1255 */
1256static void
1257link_rtrequest(int cmd, struct rtentry *rt, struct rt_addrinfo *info)
1258{
1259	struct ifaddr *ifa, *oifa;
1260	struct sockaddr *dst;
1261	struct ifnet *ifp;
1262
1263	RT_LOCK_ASSERT(rt);
1264
1265	if (cmd != RTM_ADD || ((ifa = rt->rt_ifa) == 0) ||
1266	    ((ifp = ifa->ifa_ifp) == 0) || ((dst = rt_key(rt)) == 0))
1267		return;
1268	ifa = ifaof_ifpforaddr(dst, ifp);
1269	if (ifa) {
1270		IFAREF(ifa);		/* XXX */
1271		oifa = rt->rt_ifa;
1272		rt->rt_ifa = ifa;
1273		IFAFREE(oifa);
1274		if (ifa->ifa_rtrequest && ifa->ifa_rtrequest != link_rtrequest)
1275			ifa->ifa_rtrequest(cmd, rt, info);
1276	}
1277}
1278
1279/*
1280 * Mark an interface down and notify protocols of
1281 * the transition.
1282 * NOTE: must be called at splnet or eqivalent.
1283 */
1284static void
1285if_unroute(struct ifnet *ifp, int flag, int fam)
1286{
1287	struct ifaddr *ifa;
1288
1289	KASSERT(flag == IFF_UP, ("if_unroute: flag != IFF_UP"));
1290
1291	ifp->if_flags &= ~flag;
1292	getmicrotime(&ifp->if_lastchange);
1293	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
1294		if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family))
1295			pfctlinput(PRC_IFDOWN, ifa->ifa_addr);
1296	if_qflush(&ifp->if_snd);
1297#ifdef DEV_CARP
1298	if (ifp->if_carp)
1299		carp_carpdev_state(ifp->if_carp);
1300#endif
1301	rt_ifmsg(ifp);
1302}
1303
1304/*
1305 * Mark an interface up and notify protocols of
1306 * the transition.
1307 * NOTE: must be called at splnet or eqivalent.
1308 */
1309static void
1310if_route(struct ifnet *ifp, int flag, int fam)
1311{
1312	struct ifaddr *ifa;
1313
1314	KASSERT(flag == IFF_UP, ("if_route: flag != IFF_UP"));
1315
1316	ifp->if_flags |= flag;
1317	getmicrotime(&ifp->if_lastchange);
1318	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
1319		if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family))
1320			pfctlinput(PRC_IFUP, ifa->ifa_addr);
1321#ifdef DEV_CARP
1322	if (ifp->if_carp)
1323		carp_carpdev_state(ifp->if_carp);
1324#endif
1325	rt_ifmsg(ifp);
1326#ifdef INET6
1327	in6_if_up(ifp);
1328#endif
1329}
1330
1331void	(*vlan_link_state_p)(struct ifnet *, int);	/* XXX: private from if_vlan */
1332void	(*vlan_trunk_cap_p)(struct ifnet *);		/* XXX: private from if_vlan */
1333
1334/*
1335 * Handle a change in the interface link state. To avoid LORs
1336 * between driver lock and upper layer locks, as well as possible
1337 * recursions, we post event to taskqueue, and all job
1338 * is done in static do_link_state_change().
1339 */
1340void
1341if_link_state_change(struct ifnet *ifp, int link_state)
1342{
1343	/* Return if state hasn't changed. */
1344	if (ifp->if_link_state == link_state)
1345		return;
1346
1347	ifp->if_link_state = link_state;
1348
1349	taskqueue_enqueue(taskqueue_swi, &ifp->if_linktask);
1350}
1351
1352static void
1353do_link_state_change(void *arg, int pending)
1354{
1355	struct ifnet *ifp = (struct ifnet *)arg;
1356	int link_state = ifp->if_link_state;
1357	int link;
1358
1359	/* Notify that the link state has changed. */
1360	rt_ifmsg(ifp);
1361	if (link_state == LINK_STATE_UP)
1362		link = NOTE_LINKUP;
1363	else if (link_state == LINK_STATE_DOWN)
1364		link = NOTE_LINKDOWN;
1365	else
1366		link = NOTE_LINKINV;
1367	KNOTE_UNLOCKED(&ifp->if_klist, link);
1368	if (ifp->if_vlantrunk != NULL)
1369		(*vlan_link_state_p)(ifp, link);
1370
1371	if ((ifp->if_type == IFT_ETHER || ifp->if_type == IFT_L2VLAN) &&
1372	    IFP2AC(ifp)->ac_netgraph != NULL)
1373		(*ng_ether_link_state_p)(ifp, link_state);
1374#ifdef DEV_CARP
1375	if (ifp->if_carp)
1376		carp_carpdev_state(ifp->if_carp);
1377#endif
1378	if (ifp->if_bridge) {
1379		KASSERT(bstp_linkstate_p != NULL,("if_bridge bstp not loaded!"));
1380		(*bstp_linkstate_p)(ifp, link_state);
1381	}
1382	if (ifp->if_trunk) {
1383		KASSERT(trunk_linkstate_p != NULL,("if_trunk not loaded!"));
1384		(*trunk_linkstate_p)(ifp, link_state);
1385	}
1386
1387	devctl_notify("IFNET", ifp->if_xname,
1388	    (link_state == LINK_STATE_UP) ? "LINK_UP" : "LINK_DOWN", NULL);
1389	if (pending > 1)
1390		if_printf(ifp, "%d link states coalesced\n", pending);
1391	if (log_link_state_change)
1392		log(LOG_NOTICE, "%s: link state changed to %s\n", ifp->if_xname,
1393		    (link_state == LINK_STATE_UP) ? "UP" : "DOWN" );
1394}
1395
1396/*
1397 * Mark an interface down and notify protocols of
1398 * the transition.
1399 * NOTE: must be called at splnet or eqivalent.
1400 */
1401void
1402if_down(struct ifnet *ifp)
1403{
1404
1405	if_unroute(ifp, IFF_UP, AF_UNSPEC);
1406}
1407
1408/*
1409 * Mark an interface up and notify protocols of
1410 * the transition.
1411 * NOTE: must be called at splnet or eqivalent.
1412 */
1413void
1414if_up(struct ifnet *ifp)
1415{
1416
1417	if_route(ifp, IFF_UP, AF_UNSPEC);
1418}
1419
1420/*
1421 * Flush an interface queue.
1422 */
1423static void
1424if_qflush(struct ifaltq *ifq)
1425{
1426	struct mbuf *m, *n;
1427
1428	IFQ_LOCK(ifq);
1429#ifdef ALTQ
1430	if (ALTQ_IS_ENABLED(ifq))
1431		ALTQ_PURGE(ifq);
1432#endif
1433	n = ifq->ifq_head;
1434	while ((m = n) != 0) {
1435		n = m->m_act;
1436		m_freem(m);
1437	}
1438	ifq->ifq_head = 0;
1439	ifq->ifq_tail = 0;
1440	ifq->ifq_len = 0;
1441	IFQ_UNLOCK(ifq);
1442}
1443
1444/*
1445 * Handle interface watchdog timer routines.  Called
1446 * from softclock, we decrement timers (if set) and
1447 * call the appropriate interface routine on expiration.
1448 *
1449 * XXXRW: Note that because timeouts run with Giant, if_watchdog() is called
1450 * holding Giant.  If we switch to an MPSAFE callout, we likely need to grab
1451 * Giant before entering if_watchdog() on an IFF_NEEDSGIANT interface.
1452 */
1453static void
1454if_slowtimo(void *arg)
1455{
1456	struct ifnet *ifp;
1457	int s = splimp();
1458
1459	IFNET_RLOCK();
1460	TAILQ_FOREACH(ifp, &ifnet, if_link) {
1461		if (ifp->if_timer == 0 || --ifp->if_timer)
1462			continue;
1463		if (ifp->if_watchdog)
1464			(*ifp->if_watchdog)(ifp);
1465	}
1466	IFNET_RUNLOCK();
1467	splx(s);
1468	timeout(if_slowtimo, (void *)0, hz / IFNET_SLOWHZ);
1469}
1470
1471/*
1472 * Map interface name to
1473 * interface structure pointer.
1474 */
1475struct ifnet *
1476ifunit(const char *name)
1477{
1478	struct ifnet *ifp;
1479
1480	IFNET_RLOCK();
1481	TAILQ_FOREACH(ifp, &ifnet, if_link) {
1482		if (strncmp(name, ifp->if_xname, IFNAMSIZ) == 0)
1483			break;
1484	}
1485	IFNET_RUNLOCK();
1486	return (ifp);
1487}
1488
1489/*
1490 * Hardware specific interface ioctls.
1491 */
1492static int
1493ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
1494{
1495	struct ifreq *ifr;
1496	struct ifstat *ifs;
1497	int error = 0;
1498	int new_flags, temp_flags;
1499	size_t namelen, onamelen;
1500	char new_name[IFNAMSIZ];
1501	struct ifaddr *ifa;
1502	struct sockaddr_dl *sdl;
1503
1504	ifr = (struct ifreq *)data;
1505	switch (cmd) {
1506	case SIOCGIFINDEX:
1507		ifr->ifr_index = ifp->if_index;
1508		break;
1509
1510	case SIOCGIFFLAGS:
1511		temp_flags = ifp->if_flags | ifp->if_drv_flags;
1512		ifr->ifr_flags = temp_flags & 0xffff;
1513		ifr->ifr_flagshigh = temp_flags >> 16;
1514		break;
1515
1516	case SIOCGIFCAP:
1517		ifr->ifr_reqcap = ifp->if_capabilities;
1518		ifr->ifr_curcap = ifp->if_capenable;
1519		break;
1520
1521#ifdef MAC
1522	case SIOCGIFMAC:
1523		error = mac_ioctl_ifnet_get(td->td_ucred, ifr, ifp);
1524		break;
1525#endif
1526
1527	case SIOCGIFMETRIC:
1528		ifr->ifr_metric = ifp->if_metric;
1529		break;
1530
1531	case SIOCGIFMTU:
1532		ifr->ifr_mtu = ifp->if_mtu;
1533		break;
1534
1535	case SIOCGIFPHYS:
1536		ifr->ifr_phys = ifp->if_physical;
1537		break;
1538
1539	case SIOCSIFFLAGS:
1540		error = priv_check(td, PRIV_NET_SETIFFLAGS);
1541		if (error)
1542			return (error);
1543		/*
1544		 * Currently, no driver owned flags pass the IFF_CANTCHANGE
1545		 * check, so we don't need special handling here yet.
1546		 */
1547		new_flags = (ifr->ifr_flags & 0xffff) |
1548		    (ifr->ifr_flagshigh << 16);
1549		if (ifp->if_flags & IFF_SMART) {
1550			/* Smart drivers twiddle their own routes */
1551		} else if (ifp->if_flags & IFF_UP &&
1552		    (new_flags & IFF_UP) == 0) {
1553			int s = splimp();
1554			if_down(ifp);
1555			splx(s);
1556		} else if (new_flags & IFF_UP &&
1557		    (ifp->if_flags & IFF_UP) == 0) {
1558			int s = splimp();
1559			if_up(ifp);
1560			splx(s);
1561		}
1562		/* See if permanently promiscuous mode bit is about to flip */
1563		if ((ifp->if_flags ^ new_flags) & IFF_PPROMISC) {
1564			if (new_flags & IFF_PPROMISC)
1565				ifp->if_flags |= IFF_PROMISC;
1566			else if (ifp->if_pcount == 0)
1567				ifp->if_flags &= ~IFF_PROMISC;
1568			log(LOG_INFO, "%s: permanently promiscuous mode %s\n",
1569			    ifp->if_xname,
1570			    (new_flags & IFF_PPROMISC) ? "enabled" : "disabled");
1571		}
1572		ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) |
1573			(new_flags &~ IFF_CANTCHANGE);
1574		if (ifp->if_ioctl) {
1575			IFF_LOCKGIANT(ifp);
1576			(void) (*ifp->if_ioctl)(ifp, cmd, data);
1577			IFF_UNLOCKGIANT(ifp);
1578		}
1579		getmicrotime(&ifp->if_lastchange);
1580		break;
1581
1582	case SIOCSIFCAP:
1583		error = priv_check(td, PRIV_NET_SETIFCAP);
1584		if (error)
1585			return (error);
1586		if (ifp->if_ioctl == NULL)
1587			return (EOPNOTSUPP);
1588		if (ifr->ifr_reqcap & ~ifp->if_capabilities)
1589			return (EINVAL);
1590		IFF_LOCKGIANT(ifp);
1591		error = (*ifp->if_ioctl)(ifp, cmd, data);
1592		IFF_UNLOCKGIANT(ifp);
1593		if (error == 0)
1594			getmicrotime(&ifp->if_lastchange);
1595		break;
1596
1597#ifdef MAC
1598	case SIOCSIFMAC:
1599		error = mac_ioctl_ifnet_set(td->td_ucred, ifr, ifp);
1600		break;
1601#endif
1602
1603	case SIOCSIFNAME:
1604		error = priv_check(td, PRIV_NET_SETIFNAME);
1605		if (error)
1606			return (error);
1607		error = copyinstr(ifr->ifr_data, new_name, IFNAMSIZ, NULL);
1608		if (error != 0)
1609			return (error);
1610		if (new_name[0] == '\0')
1611			return (EINVAL);
1612		if (ifunit(new_name) != NULL)
1613			return (EEXIST);
1614
1615		/* Announce the departure of the interface. */
1616		rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
1617		EVENTHANDLER_INVOKE(ifnet_departure_event, ifp);
1618
1619		log(LOG_INFO, "%s: changing name to '%s'\n",
1620		    ifp->if_xname, new_name);
1621
1622		strlcpy(ifp->if_xname, new_name, sizeof(ifp->if_xname));
1623		ifa = ifp->if_addr;
1624		IFA_LOCK(ifa);
1625		sdl = (struct sockaddr_dl *)ifa->ifa_addr;
1626		namelen = strlen(new_name);
1627		onamelen = sdl->sdl_nlen;
1628		/*
1629		 * Move the address if needed.  This is safe because we
1630		 * allocate space for a name of length IFNAMSIZ when we
1631		 * create this in if_attach().
1632		 */
1633		if (namelen != onamelen) {
1634			bcopy(sdl->sdl_data + onamelen,
1635			    sdl->sdl_data + namelen, sdl->sdl_alen);
1636		}
1637		bcopy(new_name, sdl->sdl_data, namelen);
1638		sdl->sdl_nlen = namelen;
1639		sdl = (struct sockaddr_dl *)ifa->ifa_netmask;
1640		bzero(sdl->sdl_data, onamelen);
1641		while (namelen != 0)
1642			sdl->sdl_data[--namelen] = 0xff;
1643		IFA_UNLOCK(ifa);
1644
1645		EVENTHANDLER_INVOKE(ifnet_arrival_event, ifp);
1646		/* Announce the return of the interface. */
1647		rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
1648		break;
1649
1650	case SIOCSIFMETRIC:
1651		error = priv_check(td, PRIV_NET_SETIFMETRIC);
1652		if (error)
1653			return (error);
1654		ifp->if_metric = ifr->ifr_metric;
1655		getmicrotime(&ifp->if_lastchange);
1656		break;
1657
1658	case SIOCSIFPHYS:
1659		error = priv_check(td, PRIV_NET_SETIFPHYS);
1660		if (error)
1661			return (error);
1662		if (ifp->if_ioctl == NULL)
1663			return (EOPNOTSUPP);
1664		IFF_LOCKGIANT(ifp);
1665		error = (*ifp->if_ioctl)(ifp, cmd, data);
1666		IFF_UNLOCKGIANT(ifp);
1667		if (error == 0)
1668			getmicrotime(&ifp->if_lastchange);
1669		break;
1670
1671	case SIOCSIFMTU:
1672	{
1673		u_long oldmtu = ifp->if_mtu;
1674
1675		error = priv_check(td, PRIV_NET_SETIFMTU);
1676		if (error)
1677			return (error);
1678		if (ifr->ifr_mtu < IF_MINMTU || ifr->ifr_mtu > IF_MAXMTU)
1679			return (EINVAL);
1680		if (ifp->if_ioctl == NULL)
1681			return (EOPNOTSUPP);
1682		IFF_LOCKGIANT(ifp);
1683		error = (*ifp->if_ioctl)(ifp, cmd, data);
1684		IFF_UNLOCKGIANT(ifp);
1685		if (error == 0) {
1686			getmicrotime(&ifp->if_lastchange);
1687			rt_ifmsg(ifp);
1688		}
1689		/*
1690		 * If the link MTU changed, do network layer specific procedure.
1691		 */
1692		if (ifp->if_mtu != oldmtu) {
1693#ifdef INET6
1694			nd6_setmtu(ifp);
1695#endif
1696		}
1697		break;
1698	}
1699
1700	case SIOCADDMULTI:
1701	case SIOCDELMULTI:
1702		if (cmd == SIOCADDMULTI)
1703			error = priv_check(td, PRIV_NET_ADDMULTI);
1704		else
1705			error = priv_check(td, PRIV_NET_DELMULTI);
1706		if (error)
1707			return (error);
1708
1709		/* Don't allow group membership on non-multicast interfaces. */
1710		if ((ifp->if_flags & IFF_MULTICAST) == 0)
1711			return (EOPNOTSUPP);
1712
1713		/* Don't let users screw up protocols' entries. */
1714		if (ifr->ifr_addr.sa_family != AF_LINK)
1715			return (EINVAL);
1716
1717		if (cmd == SIOCADDMULTI) {
1718			struct ifmultiaddr *ifma;
1719
1720			/*
1721			 * Userland is only permitted to join groups once
1722			 * via the if_addmulti() KPI, because it cannot hold
1723			 * struct ifmultiaddr * between calls. It may also
1724			 * lose a race while we check if the membership
1725			 * already exists.
1726			 */
1727			IF_ADDR_LOCK(ifp);
1728			ifma = if_findmulti(ifp, &ifr->ifr_addr);
1729			IF_ADDR_UNLOCK(ifp);
1730			if (ifma != NULL)
1731				error = EADDRINUSE;
1732			else
1733				error = if_addmulti(ifp, &ifr->ifr_addr, &ifma);
1734		} else {
1735			error = if_delmulti(ifp, &ifr->ifr_addr);
1736		}
1737		if (error == 0)
1738			getmicrotime(&ifp->if_lastchange);
1739		break;
1740
1741	case SIOCSIFPHYADDR:
1742	case SIOCDIFPHYADDR:
1743#ifdef INET6
1744	case SIOCSIFPHYADDR_IN6:
1745#endif
1746	case SIOCSLIFPHYADDR:
1747	case SIOCSIFMEDIA:
1748	case SIOCSIFGENERIC:
1749		error = priv_check(td, PRIV_NET_HWIOCTL);
1750		if (error)
1751			return (error);
1752		if (ifp->if_ioctl == NULL)
1753			return (EOPNOTSUPP);
1754		IFF_LOCKGIANT(ifp);
1755		error = (*ifp->if_ioctl)(ifp, cmd, data);
1756		IFF_UNLOCKGIANT(ifp);
1757		if (error == 0)
1758			getmicrotime(&ifp->if_lastchange);
1759		break;
1760
1761	case SIOCGIFSTATUS:
1762		ifs = (struct ifstat *)data;
1763		ifs->ascii[0] = '\0';
1764
1765	case SIOCGIFPSRCADDR:
1766	case SIOCGIFPDSTADDR:
1767	case SIOCGLIFPHYADDR:
1768	case SIOCGIFMEDIA:
1769	case SIOCGIFGENERIC:
1770		if (ifp->if_ioctl == NULL)
1771			return (EOPNOTSUPP);
1772		IFF_LOCKGIANT(ifp);
1773		error = (*ifp->if_ioctl)(ifp, cmd, data);
1774		IFF_UNLOCKGIANT(ifp);
1775		break;
1776
1777	case SIOCSIFLLADDR:
1778		error = priv_check(td, PRIV_NET_SETLLADDR);
1779		if (error)
1780			return (error);
1781		error = if_setlladdr(ifp,
1782		    ifr->ifr_addr.sa_data, ifr->ifr_addr.sa_len);
1783		break;
1784
1785	case SIOCAIFGROUP:
1786	{
1787		struct ifgroupreq *ifgr = (struct ifgroupreq *)ifr;
1788
1789		error = priv_check(td, PRIV_NET_ADDIFGROUP);
1790		if (error)
1791			return (error);
1792		if ((error = if_addgroup(ifp, ifgr->ifgr_group)))
1793			return (error);
1794		break;
1795	}
1796
1797	case SIOCGIFGROUP:
1798		if ((error = if_getgroup((struct ifgroupreq *)ifr, ifp)))
1799			return (error);
1800		break;
1801
1802	case SIOCDIFGROUP:
1803	{
1804		struct ifgroupreq *ifgr = (struct ifgroupreq *)ifr;
1805
1806		error = priv_check(td, PRIV_NET_DELIFGROUP);
1807		if (error)
1808			return (error);
1809		if ((error = if_delgroup(ifp, ifgr->ifgr_group)))
1810			return (error);
1811		break;
1812	}
1813
1814	default:
1815		error = ENOIOCTL;
1816		break;
1817	}
1818	return (error);
1819}
1820
1821/*
1822 * Interface ioctls.
1823 */
1824int
1825ifioctl(struct socket *so, u_long cmd, caddr_t data, struct thread *td)
1826{
1827	struct ifnet *ifp;
1828	struct ifreq *ifr;
1829	int error;
1830	int oif_flags;
1831
1832	switch (cmd) {
1833	case SIOCGIFCONF:
1834	case OSIOCGIFCONF:
1835#ifdef __amd64__
1836	case SIOCGIFCONF32:
1837#endif
1838		return (ifconf(cmd, data));
1839	}
1840	ifr = (struct ifreq *)data;
1841
1842	switch (cmd) {
1843	case SIOCIFCREATE:
1844	case SIOCIFCREATE2:
1845		error = priv_check(td, PRIV_NET_IFCREATE);
1846		if (error)
1847			return (error);
1848		return (if_clone_create(ifr->ifr_name, sizeof(ifr->ifr_name),
1849			cmd == SIOCIFCREATE2 ? ifr->ifr_data : NULL));
1850	case SIOCIFDESTROY:
1851		error = priv_check(td, PRIV_NET_IFDESTROY);
1852		if (error)
1853			return (error);
1854		return if_clone_destroy(ifr->ifr_name);
1855
1856	case SIOCIFGCLONERS:
1857		return (if_clone_list((struct if_clonereq *)data));
1858	case SIOCGIFGMEMB:
1859		return (if_getgroupmembers((struct ifgroupreq *)data));
1860	}
1861
1862	ifp = ifunit(ifr->ifr_name);
1863	if (ifp == 0)
1864		return (ENXIO);
1865
1866	error = ifhwioctl(cmd, ifp, data, td);
1867	if (error != ENOIOCTL)
1868		return (error);
1869
1870	oif_flags = ifp->if_flags;
1871	if (so->so_proto == 0)
1872		return (EOPNOTSUPP);
1873#ifndef COMPAT_43
1874	error = ((*so->so_proto->pr_usrreqs->pru_control)(so, cmd,
1875								 data,
1876								 ifp, td));
1877#else
1878	{
1879		int ocmd = cmd;
1880
1881		switch (cmd) {
1882
1883		case SIOCSIFDSTADDR:
1884		case SIOCSIFADDR:
1885		case SIOCSIFBRDADDR:
1886		case SIOCSIFNETMASK:
1887#if BYTE_ORDER != BIG_ENDIAN
1888			if (ifr->ifr_addr.sa_family == 0 &&
1889			    ifr->ifr_addr.sa_len < 16) {
1890				ifr->ifr_addr.sa_family = ifr->ifr_addr.sa_len;
1891				ifr->ifr_addr.sa_len = 16;
1892			}
1893#else
1894			if (ifr->ifr_addr.sa_len == 0)
1895				ifr->ifr_addr.sa_len = 16;
1896#endif
1897			break;
1898
1899		case OSIOCGIFADDR:
1900			cmd = SIOCGIFADDR;
1901			break;
1902
1903		case OSIOCGIFDSTADDR:
1904			cmd = SIOCGIFDSTADDR;
1905			break;
1906
1907		case OSIOCGIFBRDADDR:
1908			cmd = SIOCGIFBRDADDR;
1909			break;
1910
1911		case OSIOCGIFNETMASK:
1912			cmd = SIOCGIFNETMASK;
1913		}
1914		error =  ((*so->so_proto->pr_usrreqs->pru_control)(so,
1915								   cmd,
1916								   data,
1917								   ifp, td));
1918		switch (ocmd) {
1919
1920		case OSIOCGIFADDR:
1921		case OSIOCGIFDSTADDR:
1922		case OSIOCGIFBRDADDR:
1923		case OSIOCGIFNETMASK:
1924			*(u_short *)&ifr->ifr_addr = ifr->ifr_addr.sa_family;
1925
1926		}
1927	}
1928#endif /* COMPAT_43 */
1929
1930	if ((oif_flags ^ ifp->if_flags) & IFF_UP) {
1931#ifdef INET6
1932		DELAY(100);/* XXX: temporary workaround for fxp issue*/
1933		if (ifp->if_flags & IFF_UP) {
1934			int s = splimp();
1935			in6_if_up(ifp);
1936			splx(s);
1937		}
1938#endif
1939	}
1940	return (error);
1941}
1942
1943/*
1944 * The code common to handling reference counted flags,
1945 * e.g., in ifpromisc() and if_allmulti().
1946 * The "pflag" argument can specify a permanent mode flag to check,
1947 * such as IFF_PPROMISC for promiscuous mode; should be 0 if none.
1948 *
1949 * Only to be used on stack-owned flags, not driver-owned flags.
1950 */
1951static int
1952if_setflag(struct ifnet *ifp, int flag, int pflag, int *refcount, int onswitch)
1953{
1954	struct ifreq ifr;
1955	int error;
1956	int oldflags, oldcount;
1957
1958	/* Sanity checks to catch programming errors */
1959	KASSERT((flag & (IFF_DRV_OACTIVE|IFF_DRV_RUNNING)) == 0,
1960	    ("%s: setting driver-owned flag %d", __func__, flag));
1961
1962	if (onswitch)
1963		KASSERT(*refcount >= 0,
1964		    ("%s: increment negative refcount %d for flag %d",
1965		    __func__, *refcount, flag));
1966	else
1967		KASSERT(*refcount > 0,
1968		    ("%s: decrement non-positive refcount %d for flag %d",
1969		    __func__, *refcount, flag));
1970
1971	/* In case this mode is permanent, just touch refcount */
1972	if (ifp->if_flags & pflag) {
1973		*refcount += onswitch ? 1 : -1;
1974		return (0);
1975	}
1976
1977	/* Save ifnet parameters for if_ioctl() may fail */
1978	oldcount = *refcount;
1979	oldflags = ifp->if_flags;
1980
1981	/*
1982	 * See if we aren't the only and touching refcount is enough.
1983	 * Actually toggle interface flag if we are the first or last.
1984	 */
1985	if (onswitch) {
1986		if ((*refcount)++)
1987			return (0);
1988		ifp->if_flags |= flag;
1989	} else {
1990		if (--(*refcount))
1991			return (0);
1992		ifp->if_flags &= ~flag;
1993	}
1994
1995	/* Call down the driver since we've changed interface flags */
1996	if (ifp->if_ioctl == NULL) {
1997		error = EOPNOTSUPP;
1998		goto recover;
1999	}
2000	ifr.ifr_flags = ifp->if_flags & 0xffff;
2001	ifr.ifr_flagshigh = ifp->if_flags >> 16;
2002	IFF_LOCKGIANT(ifp);
2003	error = (*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
2004	IFF_UNLOCKGIANT(ifp);
2005	if (error)
2006		goto recover;
2007	/* Notify userland that interface flags have changed */
2008	rt_ifmsg(ifp);
2009	return (0);
2010
2011recover:
2012	/* Recover after driver error */
2013	*refcount = oldcount;
2014	ifp->if_flags = oldflags;
2015	return (error);
2016}
2017
2018/*
2019 * Set/clear promiscuous mode on interface ifp based on the truth value
2020 * of pswitch.  The calls are reference counted so that only the first
2021 * "on" request actually has an effect, as does the final "off" request.
2022 * Results are undefined if the "off" and "on" requests are not matched.
2023 */
2024int
2025ifpromisc(struct ifnet *ifp, int pswitch)
2026{
2027	int error;
2028	int oldflags = ifp->if_flags;
2029
2030	error = if_setflag(ifp, IFF_PROMISC, IFF_PPROMISC,
2031			   &ifp->if_pcount, pswitch);
2032	/* If promiscuous mode status has changed, log a message */
2033	if (error == 0 && ((ifp->if_flags ^ oldflags) & IFF_PROMISC))
2034		log(LOG_INFO, "%s: promiscuous mode %s\n",
2035		    ifp->if_xname,
2036		    (ifp->if_flags & IFF_PROMISC) ? "enabled" : "disabled");
2037	return (error);
2038}
2039
2040/*
2041 * Return interface configuration
2042 * of system.  List may be used
2043 * in later ioctl's (above) to get
2044 * other information.
2045 */
2046/*ARGSUSED*/
2047static int
2048ifconf(u_long cmd, caddr_t data)
2049{
2050	struct ifconf *ifc = (struct ifconf *)data;
2051#ifdef __amd64__
2052	struct ifconf32 *ifc32 = (struct ifconf32 *)data;
2053	struct ifconf ifc_swab;
2054#endif
2055	struct ifnet *ifp;
2056	struct ifaddr *ifa;
2057	struct ifreq ifr;
2058	struct sbuf *sb;
2059	int error, full = 0, valid_len, max_len;
2060
2061#ifdef __amd64__
2062	if (cmd == SIOCGIFCONF32) {
2063		ifc_swab.ifc_len = ifc32->ifc_len;
2064		ifc_swab.ifc_buf = (caddr_t)(uintptr_t)ifc32->ifc_buf;
2065		ifc = &ifc_swab;
2066	}
2067#endif
2068	/* Limit initial buffer size to MAXPHYS to avoid DoS from userspace. */
2069	max_len = MAXPHYS - 1;
2070
2071	/* Prevent hostile input from being able to crash the system */
2072	if (ifc->ifc_len <= 0)
2073		return (EINVAL);
2074
2075again:
2076	if (ifc->ifc_len <= max_len) {
2077		max_len = ifc->ifc_len;
2078		full = 1;
2079	}
2080	sb = sbuf_new(NULL, NULL, max_len + 1, SBUF_FIXEDLEN);
2081	max_len = 0;
2082	valid_len = 0;
2083
2084	IFNET_RLOCK();		/* could sleep XXX */
2085	TAILQ_FOREACH(ifp, &ifnet, if_link) {
2086		int addrs;
2087
2088		/*
2089		 * Zero the ifr_name buffer to make sure we don't
2090		 * disclose the contents of the stack.
2091		 */
2092		memset(ifr.ifr_name, 0, sizeof(ifr.ifr_name));
2093
2094		if (strlcpy(ifr.ifr_name, ifp->if_xname, sizeof(ifr.ifr_name))
2095		    >= sizeof(ifr.ifr_name)) {
2096			sbuf_delete(sb);
2097			IFNET_RUNLOCK();
2098			return (ENAMETOOLONG);
2099		}
2100
2101		addrs = 0;
2102		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
2103			struct sockaddr *sa = ifa->ifa_addr;
2104
2105			if (jailed(curthread->td_ucred) &&
2106			    prison_if(curthread->td_ucred, sa))
2107				continue;
2108			addrs++;
2109#ifdef COMPAT_43
2110			if (cmd == OSIOCGIFCONF) {
2111				struct osockaddr *osa =
2112					 (struct osockaddr *)&ifr.ifr_addr;
2113				ifr.ifr_addr = *sa;
2114				osa->sa_family = sa->sa_family;
2115				sbuf_bcat(sb, &ifr, sizeof(ifr));
2116				max_len += sizeof(ifr);
2117			} else
2118#endif
2119			if (sa->sa_len <= sizeof(*sa)) {
2120				ifr.ifr_addr = *sa;
2121				sbuf_bcat(sb, &ifr, sizeof(ifr));
2122				max_len += sizeof(ifr);
2123			} else {
2124				sbuf_bcat(sb, &ifr,
2125				    offsetof(struct ifreq, ifr_addr));
2126				max_len += offsetof(struct ifreq, ifr_addr);
2127				sbuf_bcat(sb, sa, sa->sa_len);
2128				max_len += sa->sa_len;
2129			}
2130
2131			if (!sbuf_overflowed(sb))
2132				valid_len = sbuf_len(sb);
2133		}
2134		if (addrs == 0) {
2135			bzero((caddr_t)&ifr.ifr_addr, sizeof(ifr.ifr_addr));
2136			sbuf_bcat(sb, &ifr, sizeof(ifr));
2137			max_len += sizeof(ifr);
2138
2139			if (!sbuf_overflowed(sb))
2140				valid_len = sbuf_len(sb);
2141		}
2142	}
2143	IFNET_RUNLOCK();
2144
2145	/*
2146	 * If we didn't allocate enough space (uncommon), try again.  If
2147	 * we have already allocated as much space as we are allowed,
2148	 * return what we've got.
2149	 */
2150	if (valid_len != max_len && !full) {
2151		sbuf_delete(sb);
2152		goto again;
2153	}
2154
2155	ifc->ifc_len = valid_len;
2156#ifdef __amd64__
2157	if (cmd == SIOCGIFCONF32)
2158		ifc32->ifc_len = valid_len;
2159#endif
2160	sbuf_finish(sb);
2161	error = copyout(sbuf_data(sb), ifc->ifc_req, ifc->ifc_len);
2162	sbuf_delete(sb);
2163	return (error);
2164}
2165
2166/*
2167 * Just like ifpromisc(), but for all-multicast-reception mode.
2168 */
2169int
2170if_allmulti(struct ifnet *ifp, int onswitch)
2171{
2172
2173	return (if_setflag(ifp, IFF_ALLMULTI, 0, &ifp->if_amcount, onswitch));
2174}
2175
2176struct ifmultiaddr *
2177if_findmulti(struct ifnet *ifp, struct sockaddr *sa)
2178{
2179	struct ifmultiaddr *ifma;
2180
2181	IF_ADDR_LOCK_ASSERT(ifp);
2182
2183	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2184		if (sa->sa_family == AF_LINK) {
2185			if (sa_dl_equal(ifma->ifma_addr, sa))
2186				break;
2187		} else {
2188			if (sa_equal(ifma->ifma_addr, sa))
2189				break;
2190		}
2191	}
2192
2193	return ifma;
2194}
2195
2196/*
2197 * Allocate a new ifmultiaddr and initialize based on passed arguments.  We
2198 * make copies of passed sockaddrs.  The ifmultiaddr will not be added to
2199 * the ifnet multicast address list here, so the caller must do that and
2200 * other setup work (such as notifying the device driver).  The reference
2201 * count is initialized to 1.
2202 */
2203static struct ifmultiaddr *
2204if_allocmulti(struct ifnet *ifp, struct sockaddr *sa, struct sockaddr *llsa,
2205    int mflags)
2206{
2207	struct ifmultiaddr *ifma;
2208	struct sockaddr *dupsa;
2209
2210	MALLOC(ifma, struct ifmultiaddr *, sizeof *ifma, M_IFMADDR, mflags |
2211	    M_ZERO);
2212	if (ifma == NULL)
2213		return (NULL);
2214
2215	MALLOC(dupsa, struct sockaddr *, sa->sa_len, M_IFMADDR, mflags);
2216	if (dupsa == NULL) {
2217		FREE(ifma, M_IFMADDR);
2218		return (NULL);
2219	}
2220	bcopy(sa, dupsa, sa->sa_len);
2221	ifma->ifma_addr = dupsa;
2222
2223	ifma->ifma_ifp = ifp;
2224	ifma->ifma_refcount = 1;
2225	ifma->ifma_protospec = NULL;
2226
2227	if (llsa == NULL) {
2228		ifma->ifma_lladdr = NULL;
2229		return (ifma);
2230	}
2231
2232	MALLOC(dupsa, struct sockaddr *, llsa->sa_len, M_IFMADDR, mflags);
2233	if (dupsa == NULL) {
2234		FREE(ifma->ifma_addr, M_IFMADDR);
2235		FREE(ifma, M_IFMADDR);
2236		return (NULL);
2237	}
2238	bcopy(llsa, dupsa, llsa->sa_len);
2239	ifma->ifma_lladdr = dupsa;
2240
2241	return (ifma);
2242}
2243
2244/*
2245 * if_freemulti: free ifmultiaddr structure and possibly attached related
2246 * addresses.  The caller is responsible for implementing reference
2247 * counting, notifying the driver, handling routing messages, and releasing
2248 * any dependent link layer state.
2249 */
2250static void
2251if_freemulti(struct ifmultiaddr *ifma)
2252{
2253
2254	KASSERT(ifma->ifma_refcount == 0, ("if_freemulti: refcount %d",
2255	    ifma->ifma_refcount));
2256	KASSERT(ifma->ifma_protospec == NULL,
2257	    ("if_freemulti: protospec not NULL"));
2258
2259	if (ifma->ifma_lladdr != NULL)
2260		FREE(ifma->ifma_lladdr, M_IFMADDR);
2261	FREE(ifma->ifma_addr, M_IFMADDR);
2262	FREE(ifma, M_IFMADDR);
2263}
2264
2265/*
2266 * Register an additional multicast address with a network interface.
2267 *
2268 * - If the address is already present, bump the reference count on the
2269 *   address and return.
2270 * - If the address is not link-layer, look up a link layer address.
2271 * - Allocate address structures for one or both addresses, and attach to the
2272 *   multicast address list on the interface.  If automatically adding a link
2273 *   layer address, the protocol address will own a reference to the link
2274 *   layer address, to be freed when it is freed.
2275 * - Notify the network device driver of an addition to the multicast address
2276 *   list.
2277 *
2278 * 'sa' points to caller-owned memory with the desired multicast address.
2279 *
2280 * 'retifma' will be used to return a pointer to the resulting multicast
2281 * address reference, if desired.
2282 */
2283int
2284if_addmulti(struct ifnet *ifp, struct sockaddr *sa,
2285    struct ifmultiaddr **retifma)
2286{
2287	struct ifmultiaddr *ifma, *ll_ifma;
2288	struct sockaddr *llsa;
2289	int error;
2290
2291	/*
2292	 * If the address is already present, return a new reference to it;
2293	 * otherwise, allocate storage and set up a new address.
2294	 */
2295	IF_ADDR_LOCK(ifp);
2296	ifma = if_findmulti(ifp, sa);
2297	if (ifma != NULL) {
2298		ifma->ifma_refcount++;
2299		if (retifma != NULL)
2300			*retifma = ifma;
2301		IF_ADDR_UNLOCK(ifp);
2302		return (0);
2303	}
2304
2305	/*
2306	 * The address isn't already present; resolve the protocol address
2307	 * into a link layer address, and then look that up, bump its
2308	 * refcount or allocate an ifma for that also.  If 'llsa' was
2309	 * returned, we will need to free it later.
2310	 */
2311	llsa = NULL;
2312	ll_ifma = NULL;
2313	if (ifp->if_resolvemulti != NULL) {
2314		error = ifp->if_resolvemulti(ifp, &llsa, sa);
2315		if (error)
2316			goto unlock_out;
2317	}
2318
2319	/*
2320	 * Allocate the new address.  Don't hook it up yet, as we may also
2321	 * need to allocate a link layer multicast address.
2322	 */
2323	ifma = if_allocmulti(ifp, sa, llsa, M_NOWAIT);
2324	if (ifma == NULL) {
2325		error = ENOMEM;
2326		goto free_llsa_out;
2327	}
2328
2329	/*
2330	 * If a link layer address is found, we'll need to see if it's
2331	 * already present in the address list, or allocate is as well.
2332	 * When this block finishes, the link layer address will be on the
2333	 * list.
2334	 */
2335	if (llsa != NULL) {
2336		ll_ifma = if_findmulti(ifp, llsa);
2337		if (ll_ifma == NULL) {
2338			ll_ifma = if_allocmulti(ifp, llsa, NULL, M_NOWAIT);
2339			if (ll_ifma == NULL) {
2340				--ifma->ifma_refcount;
2341				if_freemulti(ifma);
2342				error = ENOMEM;
2343				goto free_llsa_out;
2344			}
2345			TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ll_ifma,
2346			    ifma_link);
2347		} else
2348			ll_ifma->ifma_refcount++;
2349		ifma->ifma_llifma = ll_ifma;
2350	}
2351
2352	/*
2353	 * We now have a new multicast address, ifma, and possibly a new or
2354	 * referenced link layer address.  Add the primary address to the
2355	 * ifnet address list.
2356	 */
2357	TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link);
2358
2359	if (retifma != NULL)
2360		*retifma = ifma;
2361
2362	/*
2363	 * Must generate the message while holding the lock so that 'ifma'
2364	 * pointer is still valid.
2365	 */
2366	rt_newmaddrmsg(RTM_NEWMADDR, ifma);
2367	IF_ADDR_UNLOCK(ifp);
2368
2369	/*
2370	 * We are certain we have added something, so call down to the
2371	 * interface to let them know about it.
2372	 */
2373	if (ifp->if_ioctl != NULL) {
2374		IFF_LOCKGIANT(ifp);
2375		(void) (*ifp->if_ioctl)(ifp, SIOCADDMULTI, 0);
2376		IFF_UNLOCKGIANT(ifp);
2377	}
2378
2379	if (llsa != NULL)
2380		FREE(llsa, M_IFMADDR);
2381
2382	return (0);
2383
2384free_llsa_out:
2385	if (llsa != NULL)
2386		FREE(llsa, M_IFMADDR);
2387
2388unlock_out:
2389	IF_ADDR_UNLOCK(ifp);
2390	return (error);
2391}
2392
2393/*
2394 * Delete a multicast group membership by network-layer group address.
2395 *
2396 * Returns ENOENT if the entry could not be found. If ifp no longer
2397 * exists, results are undefined. This entry point should only be used
2398 * from subsystems which do appropriate locking to hold ifp for the
2399 * duration of the call.
2400 * Network-layer protocol domains must use if_delmulti_ifma().
2401 */
2402int
2403if_delmulti(struct ifnet *ifp, struct sockaddr *sa)
2404{
2405	struct ifmultiaddr *ifma;
2406	int lastref;
2407#ifdef INVARIANTS
2408	struct ifnet *oifp;
2409
2410	IFNET_RLOCK();
2411	TAILQ_FOREACH(oifp, &ifnet, if_link)
2412		if (ifp == oifp)
2413			break;
2414	if (ifp != oifp)
2415		ifp = NULL;
2416	IFNET_RUNLOCK();
2417
2418	KASSERT(ifp != NULL, ("%s: ifnet went away", __func__));
2419#endif
2420	if (ifp == NULL)
2421		return (ENOENT);
2422
2423	IF_ADDR_LOCK(ifp);
2424	lastref = 0;
2425	ifma = if_findmulti(ifp, sa);
2426	if (ifma != NULL)
2427		lastref = if_delmulti_locked(ifp, ifma, 0);
2428	IF_ADDR_UNLOCK(ifp);
2429
2430	if (ifma == NULL)
2431		return (ENOENT);
2432
2433	if (lastref && ifp->if_ioctl != NULL) {
2434		IFF_LOCKGIANT(ifp);
2435		(void)(*ifp->if_ioctl)(ifp, SIOCDELMULTI, 0);
2436		IFF_UNLOCKGIANT(ifp);
2437	}
2438
2439	return (0);
2440}
2441
2442/*
2443 * Delete a multicast group membership by group membership pointer.
2444 * Network-layer protocol domains must use this routine.
2445 *
2446 * It is safe to call this routine if the ifp disappeared. Callers should
2447 * hold IFF_LOCKGIANT() to avoid a LOR in case the hardware needs to be
2448 * reconfigured.
2449 */
2450void
2451if_delmulti_ifma(struct ifmultiaddr *ifma)
2452{
2453	struct ifnet *ifp;
2454	int lastref;
2455
2456	ifp = ifma->ifma_ifp;
2457#ifdef DIAGNOSTIC
2458	if (ifp == NULL) {
2459		printf("%s: ifma_ifp seems to be detached\n", __func__);
2460	} else {
2461		struct ifnet *oifp;
2462
2463		IFNET_RLOCK();
2464		TAILQ_FOREACH(oifp, &ifnet, if_link)
2465			if (ifp == oifp)
2466				break;
2467		if (ifp != oifp) {
2468			printf("%s: ifnet %p disappeared\n", __func__, ifp);
2469			ifp = NULL;
2470		}
2471		IFNET_RUNLOCK();
2472	}
2473#endif
2474	/*
2475	 * If and only if the ifnet instance exists: Acquire the address lock.
2476	 */
2477	if (ifp != NULL)
2478		IF_ADDR_LOCK(ifp);
2479
2480	lastref = if_delmulti_locked(ifp, ifma, 0);
2481
2482	if (ifp != NULL) {
2483		/*
2484		 * If and only if the ifnet instance exists:
2485		 *  Release the address lock.
2486		 *  If the group was left: update the hardware hash filter.
2487		 */
2488		IF_ADDR_UNLOCK(ifp);
2489		if (lastref && ifp->if_ioctl != NULL) {
2490			IFF_LOCKGIANT(ifp);
2491			(void)(*ifp->if_ioctl)(ifp, SIOCDELMULTI, 0);
2492			IFF_UNLOCKGIANT(ifp);
2493		}
2494	}
2495}
2496
2497/*
2498 * Perform deletion of network-layer and/or link-layer multicast address.
2499 *
2500 * Return 0 if the reference count was decremented.
2501 * Return 1 if the final reference was released, indicating that the
2502 * hardware hash filter should be reprogrammed.
2503 */
2504static int
2505if_delmulti_locked(struct ifnet *ifp, struct ifmultiaddr *ifma, int detaching)
2506{
2507	struct ifmultiaddr *ll_ifma;
2508
2509	if (ifp != NULL && ifma->ifma_ifp != NULL) {
2510		KASSERT(ifma->ifma_ifp == ifp,
2511		    ("%s: inconsistent ifp %p", __func__, ifp));
2512		IF_ADDR_LOCK_ASSERT(ifp);
2513	}
2514
2515	ifp = ifma->ifma_ifp;
2516
2517	/*
2518	 * If the ifnet is detaching, null out references to ifnet,
2519	 * so that upper protocol layers will notice, and not attempt
2520	 * to obtain locks for an ifnet which no longer exists. The
2521	 * routing socket announcement must happen before the ifnet
2522	 * instance is detached from the system.
2523	 */
2524	if (detaching) {
2525#ifdef DIAGNOSTIC
2526		printf("%s: detaching ifnet instance %p\n", __func__, ifp);
2527#endif
2528		/*
2529		 * ifp may already be nulled out if we are being reentered
2530		 * to delete the ll_ifma.
2531		 */
2532		if (ifp != NULL) {
2533			rt_newmaddrmsg(RTM_DELMADDR, ifma);
2534			ifma->ifma_ifp = NULL;
2535		}
2536	}
2537
2538	if (--ifma->ifma_refcount > 0)
2539		return 0;
2540
2541	/*
2542	 * If this ifma is a network-layer ifma, a link-layer ifma may
2543	 * have been associated with it. Release it first if so.
2544	 */
2545	ll_ifma = ifma->ifma_llifma;
2546	if (ll_ifma != NULL) {
2547		KASSERT(ifma->ifma_lladdr != NULL,
2548		    ("%s: llifma w/o lladdr", __func__));
2549		if (detaching)
2550			ll_ifma->ifma_ifp = NULL;	/* XXX */
2551		if (--ll_ifma->ifma_refcount == 0) {
2552			if (ifp != NULL) {
2553				TAILQ_REMOVE(&ifp->if_multiaddrs, ll_ifma,
2554				    ifma_link);
2555			}
2556			if_freemulti(ll_ifma);
2557		}
2558	}
2559
2560	if (ifp != NULL)
2561		TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link);
2562
2563	if_freemulti(ifma);
2564
2565	/*
2566	 * The last reference to this instance of struct ifmultiaddr
2567	 * was released; the hardware should be notified of this change.
2568	 */
2569	return 1;
2570}
2571
2572/*
2573 * Set the link layer address on an interface.
2574 *
2575 * At this time we only support certain types of interfaces,
2576 * and we don't allow the length of the address to change.
2577 */
2578int
2579if_setlladdr(struct ifnet *ifp, const u_char *lladdr, int len)
2580{
2581	struct sockaddr_dl *sdl;
2582	struct ifaddr *ifa;
2583	struct ifreq ifr;
2584
2585	ifa = ifp->if_addr;
2586	if (ifa == NULL)
2587		return (EINVAL);
2588	sdl = (struct sockaddr_dl *)ifa->ifa_addr;
2589	if (sdl == NULL)
2590		return (EINVAL);
2591	if (len != sdl->sdl_alen)	/* don't allow length to change */
2592		return (EINVAL);
2593	switch (ifp->if_type) {
2594	case IFT_ETHER:
2595	case IFT_FDDI:
2596	case IFT_XETHER:
2597	case IFT_ISO88025:
2598	case IFT_L2VLAN:
2599	case IFT_BRIDGE:
2600	case IFT_ARCNET:
2601	case IFT_IEEE8023ADLAG:
2602		bcopy(lladdr, LLADDR(sdl), len);
2603		break;
2604	default:
2605		return (ENODEV);
2606	}
2607	/*
2608	 * If the interface is already up, we need
2609	 * to re-init it in order to reprogram its
2610	 * address filter.
2611	 */
2612	if ((ifp->if_flags & IFF_UP) != 0) {
2613		if (ifp->if_ioctl) {
2614			IFF_LOCKGIANT(ifp);
2615			ifp->if_flags &= ~IFF_UP;
2616			ifr.ifr_flags = ifp->if_flags & 0xffff;
2617			ifr.ifr_flagshigh = ifp->if_flags >> 16;
2618			(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
2619			ifp->if_flags |= IFF_UP;
2620			ifr.ifr_flags = ifp->if_flags & 0xffff;
2621			ifr.ifr_flagshigh = ifp->if_flags >> 16;
2622			(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
2623			IFF_UNLOCKGIANT(ifp);
2624		}
2625#ifdef INET
2626		/*
2627		 * Also send gratuitous ARPs to notify other nodes about
2628		 * the address change.
2629		 */
2630		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
2631			if (ifa->ifa_addr->sa_family == AF_INET)
2632				arp_ifinit(ifp, ifa);
2633		}
2634#endif
2635	}
2636	return (0);
2637}
2638
2639/*
2640 * The name argument must be a pointer to storage which will last as
2641 * long as the interface does.  For physical devices, the result of
2642 * device_get_name(dev) is a good choice and for pseudo-devices a
2643 * static string works well.
2644 */
2645void
2646if_initname(struct ifnet *ifp, const char *name, int unit)
2647{
2648	ifp->if_dname = name;
2649	ifp->if_dunit = unit;
2650	if (unit != IF_DUNIT_NONE)
2651		snprintf(ifp->if_xname, IFNAMSIZ, "%s%d", name, unit);
2652	else
2653		strlcpy(ifp->if_xname, name, IFNAMSIZ);
2654}
2655
2656int
2657if_printf(struct ifnet *ifp, const char * fmt, ...)
2658{
2659	va_list ap;
2660	int retval;
2661
2662	retval = printf("%s: ", ifp->if_xname);
2663	va_start(ap, fmt);
2664	retval += vprintf(fmt, ap);
2665	va_end(ap);
2666	return (retval);
2667}
2668
2669/*
2670 * When an interface is marked IFF_NEEDSGIANT, its if_start() routine cannot
2671 * be called without Giant.  However, we often can't acquire the Giant lock
2672 * at those points; instead, we run it via a task queue that holds Giant via
2673 * if_start_deferred.
2674 *
2675 * XXXRW: We need to make sure that the ifnet isn't fully detached until any
2676 * outstanding if_start_deferred() tasks that will run after the free.  This
2677 * probably means waiting in if_detach().
2678 */
2679void
2680if_start(struct ifnet *ifp)
2681{
2682
2683	NET_ASSERT_GIANT();
2684
2685	if ((ifp->if_flags & IFF_NEEDSGIANT) != 0 && debug_mpsafenet != 0) {
2686		if (mtx_owned(&Giant))
2687			(*(ifp)->if_start)(ifp);
2688		else
2689			taskqueue_enqueue(taskqueue_swi_giant,
2690			    &ifp->if_starttask);
2691	} else
2692		(*(ifp)->if_start)(ifp);
2693}
2694
2695static void
2696if_start_deferred(void *context, int pending)
2697{
2698	struct ifnet *ifp;
2699
2700	/*
2701	 * This code must be entered with Giant, and should never run if
2702	 * we're not running with debug.mpsafenet.
2703	 */
2704	KASSERT(debug_mpsafenet != 0, ("if_start_deferred: debug.mpsafenet"));
2705	GIANT_REQUIRED;
2706
2707	ifp = context;
2708	(ifp->if_start)(ifp);
2709}
2710
2711int
2712if_handoff(struct ifqueue *ifq, struct mbuf *m, struct ifnet *ifp, int adjust)
2713{
2714	int active = 0;
2715
2716	IF_LOCK(ifq);
2717	if (_IF_QFULL(ifq)) {
2718		_IF_DROP(ifq);
2719		IF_UNLOCK(ifq);
2720		m_freem(m);
2721		return (0);
2722	}
2723	if (ifp != NULL) {
2724		ifp->if_obytes += m->m_pkthdr.len + adjust;
2725		if (m->m_flags & (M_BCAST|M_MCAST))
2726			ifp->if_omcasts++;
2727		active = ifp->if_drv_flags & IFF_DRV_OACTIVE;
2728	}
2729	_IF_ENQUEUE(ifq, m);
2730	IF_UNLOCK(ifq);
2731	if (ifp != NULL && !active)
2732		if_start(ifp);
2733	return (1);
2734}
2735
2736void
2737if_register_com_alloc(u_char type,
2738    if_com_alloc_t *a, if_com_free_t *f)
2739{
2740
2741	KASSERT(if_com_alloc[type] == NULL,
2742	    ("if_register_com_alloc: %d already registered", type));
2743	KASSERT(if_com_free[type] == NULL,
2744	    ("if_register_com_alloc: %d free already registered", type));
2745
2746	if_com_alloc[type] = a;
2747	if_com_free[type] = f;
2748}
2749
2750void
2751if_deregister_com_alloc(u_char type)
2752{
2753
2754	KASSERT(if_com_alloc[type] != NULL,
2755	    ("if_deregister_com_alloc: %d not registered", type));
2756	KASSERT(if_com_free[type] != NULL,
2757	    ("if_deregister_com_alloc: %d free not registered", type));
2758	if_com_alloc[type] = NULL;
2759	if_com_free[type] = NULL;
2760}
2761