if.c revision 149782
1/*-
2 * Copyright (c) 1980, 1986, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 4. Neither the name of the University nor the names of its contributors
14 *    may be used to endorse or promote products derived from this software
15 *    without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 *	@(#)if.c	8.5 (Berkeley) 1/9/95
30 * $FreeBSD: head/sys/net/if.c 149782 2005-09-04 17:32:47Z sam $
31 */
32
33#include "opt_compat.h"
34#include "opt_inet6.h"
35#include "opt_inet.h"
36#include "opt_mac.h"
37#include "opt_carp.h"
38
39#include <sys/param.h>
40#include <sys/types.h>
41#include <sys/conf.h>
42#include <sys/mac.h>
43#include <sys/malloc.h>
44#include <sys/sbuf.h>
45#include <sys/bus.h>
46#include <sys/mbuf.h>
47#include <sys/systm.h>
48#include <sys/proc.h>
49#include <sys/socket.h>
50#include <sys/socketvar.h>
51#include <sys/protosw.h>
52#include <sys/kernel.h>
53#include <sys/sockio.h>
54#include <sys/syslog.h>
55#include <sys/sysctl.h>
56#include <sys/taskqueue.h>
57#include <sys/domain.h>
58#include <sys/jail.h>
59#include <machine/stdarg.h>
60
61#include <net/if.h>
62#include <net/if_arp.h>
63#include <net/if_clone.h>
64#include <net/if_dl.h>
65#include <net/if_types.h>
66#include <net/if_var.h>
67#include <net/radix.h>
68#include <net/route.h>
69
70#if defined(INET) || defined(INET6)
71/*XXX*/
72#include <netinet/in.h>
73#include <netinet/in_var.h>
74#ifdef INET6
75#include <netinet6/in6_var.h>
76#include <netinet6/in6_ifattach.h>
77#endif
78#endif
79#ifdef INET
80#include <netinet/if_ether.h>
81#endif
82#ifdef DEV_CARP
83#include <netinet/ip_carp.h>
84#endif
85
86SYSCTL_NODE(_net, PF_LINK, link, CTLFLAG_RW, 0, "Link layers");
87SYSCTL_NODE(_net_link, 0, generic, CTLFLAG_RW, 0, "Generic link-management");
88
89/* Log link state change events */
90static int log_link_state_change = 1;
91
92SYSCTL_INT(_net_link, OID_AUTO, log_link_state_change, CTLFLAG_RW,
93	&log_link_state_change, 0,
94	"log interface link state change events");
95
96void	(*bstp_linkstate_p)(struct ifnet *ifp, int state);
97void	(*ng_ether_link_state_p)(struct ifnet *ifp, int state);
98
99struct mbuf *(*tbr_dequeue_ptr)(struct ifaltq *, int) = NULL;
100
101static void	if_attachdomain(void *);
102static void	if_attachdomain1(struct ifnet *);
103static int	ifconf(u_long, caddr_t);
104static void	if_grow(void);
105static void	if_init(void *);
106static void	if_check(void *);
107static void	if_qflush(struct ifaltq *);
108static void	if_route(struct ifnet *, int flag, int fam);
109static int	if_setflag(struct ifnet *, int, int, int *, int);
110static void	if_slowtimo(void *);
111static void	if_unroute(struct ifnet *, int flag, int fam);
112static void	link_rtrequest(int, struct rtentry *, struct rt_addrinfo *);
113static int	if_rtdel(struct radix_node *, void *);
114static int	ifhwioctl(u_long, struct ifnet *, caddr_t, struct thread *);
115static void	if_start_deferred(void *context, int pending);
116static void	do_link_state_change(void *, int);
117#ifdef INET6
118/*
119 * XXX: declare here to avoid to include many inet6 related files..
120 * should be more generalized?
121 */
122extern void	nd6_setmtu(struct ifnet *);
123#endif
124
125int	if_index = 0;
126struct	ifindex_entry *ifindex_table = NULL;
127int	ifqmaxlen = IFQ_MAXLEN;
128struct	ifnethead ifnet;	/* depend on static init XXX */
129struct	mtx ifnet_lock;
130static	if_com_alloc_t *if_com_alloc[256];
131static	if_com_free_t *if_com_free[256];
132
133static int	if_indexlim = 8;
134static struct	knlist ifklist;
135
136static void	filt_netdetach(struct knote *kn);
137static int	filt_netdev(struct knote *kn, long hint);
138
139static struct filterops netdev_filtops =
140    { 1, NULL, filt_netdetach, filt_netdev };
141
142/*
143 * System initialization
144 */
145SYSINIT(interfaces, SI_SUB_INIT_IF, SI_ORDER_FIRST, if_init, NULL)
146SYSINIT(interface_check, SI_SUB_PROTO_IF, SI_ORDER_FIRST, if_check, NULL)
147
148MALLOC_DEFINE(M_IFNET, "ifnet", "interface internals");
149MALLOC_DEFINE(M_IFADDR, "ifaddr", "interface address");
150MALLOC_DEFINE(M_IFMADDR, "ether_multi", "link-level multicast address");
151
152static d_open_t		netopen;
153static d_close_t	netclose;
154static d_ioctl_t	netioctl;
155static d_kqfilter_t	netkqfilter;
156
157static struct cdevsw net_cdevsw = {
158	.d_version =	D_VERSION,
159	.d_flags =	D_NEEDGIANT,
160	.d_open =	netopen,
161	.d_close =	netclose,
162	.d_ioctl =	netioctl,
163	.d_name =	"net",
164	.d_kqfilter =	netkqfilter,
165};
166
167static int
168netopen(struct cdev *dev, int flag, int mode, struct thread *td)
169{
170	return (0);
171}
172
173static int
174netclose(struct cdev *dev, int flags, int fmt, struct thread *td)
175{
176	return (0);
177}
178
179static int
180netioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread *td)
181{
182	struct ifnet *ifp;
183	int error, idx;
184
185	/* only support interface specific ioctls */
186	if (IOCGROUP(cmd) != 'i')
187		return (EOPNOTSUPP);
188	idx = minor(dev);
189	if (idx == 0) {
190		/*
191		 * special network device, not interface.
192		 */
193		if (cmd == SIOCGIFCONF)
194			return (ifconf(cmd, data));	/* XXX remove cmd */
195		return (EOPNOTSUPP);
196	}
197
198	ifp = ifnet_byindex(idx);
199	if (ifp == NULL)
200		return (ENXIO);
201
202	error = ifhwioctl(cmd, ifp, data, td);
203	if (error == ENOIOCTL)
204		error = EOPNOTSUPP;
205	return (error);
206}
207
208static int
209netkqfilter(struct cdev *dev, struct knote *kn)
210{
211	struct knlist *klist;
212	struct ifnet *ifp;
213	int idx;
214
215	switch (kn->kn_filter) {
216	case EVFILT_NETDEV:
217		kn->kn_fop = &netdev_filtops;
218		break;
219	default:
220		return (1);
221	}
222
223	idx = minor(dev);
224	if (idx == 0) {
225		klist = &ifklist;
226	} else {
227		ifp = ifnet_byindex(idx);
228		if (ifp == NULL)
229			return (1);
230		klist = &ifp->if_klist;
231	}
232
233	kn->kn_hook = (caddr_t)klist;
234
235	knlist_add(klist, kn, 0);
236
237	return (0);
238}
239
240static void
241filt_netdetach(struct knote *kn)
242{
243	struct knlist *klist = (struct knlist *)kn->kn_hook;
244
245	knlist_remove(klist, kn, 0);
246}
247
248static int
249filt_netdev(struct knote *kn, long hint)
250{
251	struct knlist *klist = (struct knlist *)kn->kn_hook;
252
253	/*
254	 * Currently NOTE_EXIT is abused to indicate device detach.
255	 */
256	if (hint == NOTE_EXIT) {
257		kn->kn_data = NOTE_LINKINV;
258		kn->kn_flags |= (EV_EOF | EV_ONESHOT);
259		knlist_remove_inevent(klist, kn);
260		return (1);
261	}
262	if (hint != 0)
263		kn->kn_data = hint;			/* current status */
264	if (kn->kn_sfflags & hint)
265		kn->kn_fflags |= hint;
266	return (kn->kn_fflags != 0);
267}
268
269/*
270 * Network interface utility routines.
271 *
272 * Routines with ifa_ifwith* names take sockaddr *'s as
273 * parameters.
274 */
275/* ARGSUSED*/
276static void
277if_init(void *dummy __unused)
278{
279
280	IFNET_LOCK_INIT();
281	TAILQ_INIT(&ifnet);
282	knlist_init(&ifklist, NULL, NULL, NULL, NULL);
283	if_grow();				/* create initial table */
284	ifdev_byindex(0) = make_dev(&net_cdevsw, 0,
285	    UID_ROOT, GID_WHEEL, 0600, "network");
286	if_clone_init();
287}
288
289static void
290if_grow(void)
291{
292	u_int n;
293	struct ifindex_entry *e;
294
295	if_indexlim <<= 1;
296	n = if_indexlim * sizeof(*e);
297	e = malloc(n, M_IFNET, M_WAITOK | M_ZERO);
298	if (ifindex_table != NULL) {
299		memcpy((caddr_t)e, (caddr_t)ifindex_table, n/2);
300		free((caddr_t)ifindex_table, M_IFNET);
301	}
302	ifindex_table = e;
303}
304
305/* ARGSUSED*/
306static void
307if_check(void *dummy __unused)
308{
309	struct ifnet *ifp;
310	int s;
311
312	s = splimp();
313	IFNET_RLOCK();	/* could sleep on rare error; mostly okay XXX */
314	TAILQ_FOREACH(ifp, &ifnet, if_link) {
315		if (ifp->if_snd.ifq_maxlen == 0) {
316			if_printf(ifp, "XXX: driver didn't set ifq_maxlen\n");
317			ifp->if_snd.ifq_maxlen = ifqmaxlen;
318		}
319		if (!mtx_initialized(&ifp->if_snd.ifq_mtx)) {
320			if_printf(ifp,
321			    "XXX: driver didn't initialize queue mtx\n");
322			mtx_init(&ifp->if_snd.ifq_mtx, "unknown",
323			    MTX_NETWORK_LOCK, MTX_DEF);
324		}
325	}
326	IFNET_RUNLOCK();
327	splx(s);
328	if_slowtimo(0);
329}
330
331/*
332 * Allocate a struct ifnet and in index for an interface.
333 */
334struct ifnet*
335if_alloc(u_char type)
336{
337	struct ifnet *ifp;
338
339	ifp = malloc(sizeof(struct ifnet), M_IFNET, M_WAITOK|M_ZERO);
340
341	/*
342	 * Try to find an empty slot below if_index.  If we fail, take
343	 * the next slot.
344	 *
345	 * XXX: should be locked!
346	 */
347	for (ifp->if_index = 1; ifp->if_index <= if_index; ifp->if_index++) {
348		if (ifnet_byindex(ifp->if_index) == NULL)
349			break;
350	}
351	/* Catch if_index overflow. */
352	if (ifp->if_index < 1) {
353		free(ifp, M_IFNET);
354		return (NULL);
355	}
356	if (ifp->if_index > if_index)
357		if_index = ifp->if_index;
358	if (if_index >= if_indexlim)
359		if_grow();
360	ifnet_byindex(ifp->if_index) = ifp;
361
362	ifp->if_type = type;
363
364	if (if_com_alloc[type] != NULL) {
365		ifp->if_l2com = if_com_alloc[type](type, ifp);
366		if (ifp->if_l2com == NULL) {
367			free(ifp, M_IFNET);
368			return (NULL);
369		}
370	}
371	IF_ADDR_LOCK_INIT(ifp);
372
373	return (ifp);
374}
375
376void
377if_free(struct ifnet *ifp)
378{
379
380	/* Do not add code to this function!  Add it to if_free_type(). */
381	if_free_type(ifp, ifp->if_type);
382}
383
384void
385if_free_type(struct ifnet *ifp, u_char type)
386{
387
388	if (ifp != ifnet_byindex(ifp->if_index)) {
389		if_printf(ifp, "%s: value was not if_alloced, skipping\n",
390		    __func__);
391		return;
392	}
393
394	IF_ADDR_LOCK_DESTROY(ifp);
395
396	ifnet_byindex(ifp->if_index) = NULL;
397
398	/* XXX: should be locked with if_findindex() */
399	while (if_index > 0 && ifnet_byindex(if_index) == NULL)
400		if_index--;
401
402	if (if_com_free[type] != NULL)
403		if_com_free[type](ifp->if_l2com, type);
404
405	free(ifp, M_IFNET);
406};
407
408/*
409 * Attach an interface to the
410 * list of "active" interfaces.
411 */
412void
413if_attach(struct ifnet *ifp)
414{
415	unsigned socksize, ifasize;
416	int namelen, masklen;
417	struct sockaddr_dl *sdl;
418	struct ifaddr *ifa;
419
420	if (ifp->if_index == 0 || ifp != ifnet_byindex(ifp->if_index))
421		panic ("%s: BUG: if_attach called without if_alloc'd input()\n",
422		    ifp->if_xname);
423
424	TASK_INIT(&ifp->if_starttask, 0, if_start_deferred, ifp);
425	TASK_INIT(&ifp->if_linktask, 0, do_link_state_change, ifp);
426	IF_AFDATA_LOCK_INIT(ifp);
427	ifp->if_afdata_initialized = 0;
428	IFNET_WLOCK();
429	TAILQ_INSERT_TAIL(&ifnet, ifp, if_link);
430	IFNET_WUNLOCK();
431	/*
432	 * XXX -
433	 * The old code would work if the interface passed a pre-existing
434	 * chain of ifaddrs to this code.  We don't trust our callers to
435	 * properly initialize the tailq, however, so we no longer allow
436	 * this unlikely case.
437	 */
438	TAILQ_INIT(&ifp->if_addrhead);
439	TAILQ_INIT(&ifp->if_prefixhead);
440	TAILQ_INIT(&ifp->if_multiaddrs);
441	knlist_init(&ifp->if_klist, NULL, NULL, NULL, NULL);
442	getmicrotime(&ifp->if_lastchange);
443	ifp->if_data.ifi_epoch = time_uptime;
444	ifp->if_data.ifi_datalen = sizeof(struct if_data);
445
446#ifdef MAC
447	mac_init_ifnet(ifp);
448	mac_create_ifnet(ifp);
449#endif
450
451	ifdev_byindex(ifp->if_index) = make_dev(&net_cdevsw,
452	    unit2minor(ifp->if_index),
453	    UID_ROOT, GID_WHEEL, 0600, "%s/%s",
454	    net_cdevsw.d_name, ifp->if_xname);
455	make_dev_alias(ifdev_byindex(ifp->if_index), "%s%d",
456	    net_cdevsw.d_name, ifp->if_index);
457
458	mtx_init(&ifp->if_snd.ifq_mtx, ifp->if_xname, "if send queue", MTX_DEF);
459
460	/*
461	 * create a Link Level name for this device
462	 */
463	namelen = strlen(ifp->if_xname);
464	/*
465	 * Always save enough space for any possiable name so we can do
466	 * a rename in place later.
467	 */
468	masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + IFNAMSIZ;
469	socksize = masklen + ifp->if_addrlen;
470	if (socksize < sizeof(*sdl))
471		socksize = sizeof(*sdl);
472	socksize = roundup2(socksize, sizeof(long));
473	ifasize = sizeof(*ifa) + 2 * socksize;
474	ifa = malloc(ifasize, M_IFADDR, M_WAITOK | M_ZERO);
475	IFA_LOCK_INIT(ifa);
476	sdl = (struct sockaddr_dl *)(ifa + 1);
477	sdl->sdl_len = socksize;
478	sdl->sdl_family = AF_LINK;
479	bcopy(ifp->if_xname, sdl->sdl_data, namelen);
480	sdl->sdl_nlen = namelen;
481	sdl->sdl_index = ifp->if_index;
482	sdl->sdl_type = ifp->if_type;
483	ifaddr_byindex(ifp->if_index) = ifa;
484	ifa->ifa_ifp = ifp;
485	ifa->ifa_rtrequest = link_rtrequest;
486	ifa->ifa_addr = (struct sockaddr *)sdl;
487	sdl = (struct sockaddr_dl *)(socksize + (caddr_t)sdl);
488	ifa->ifa_netmask = (struct sockaddr *)sdl;
489	sdl->sdl_len = masklen;
490	while (namelen != 0)
491		sdl->sdl_data[--namelen] = 0xff;
492	ifa->ifa_refcnt = 1;
493	TAILQ_INSERT_HEAD(&ifp->if_addrhead, ifa, ifa_link);
494	ifp->if_broadcastaddr = NULL; /* reliably crash if used uninitialized */
495	ifp->if_snd.altq_type = 0;
496	ifp->if_snd.altq_disc = NULL;
497	ifp->if_snd.altq_flags &= ALTQF_CANTCHANGE;
498	ifp->if_snd.altq_tbr  = NULL;
499	ifp->if_snd.altq_ifp  = ifp;
500
501	if (domain_init_status >= 2)
502		if_attachdomain1(ifp);
503
504	EVENTHANDLER_INVOKE(ifnet_arrival_event, ifp);
505
506	/* Announce the interface. */
507	rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
508}
509
510static void
511if_attachdomain(void *dummy)
512{
513	struct ifnet *ifp;
514	int s;
515
516	s = splnet();
517	TAILQ_FOREACH(ifp, &ifnet, if_link)
518		if_attachdomain1(ifp);
519	splx(s);
520}
521SYSINIT(domainifattach, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_SECOND,
522    if_attachdomain, NULL);
523
524static void
525if_attachdomain1(struct ifnet *ifp)
526{
527	struct domain *dp;
528	int s;
529
530	s = splnet();
531
532	/*
533	 * Since dp->dom_ifattach calls malloc() with M_WAITOK, we
534	 * cannot lock ifp->if_afdata initialization, entirely.
535	 */
536	if (IF_AFDATA_TRYLOCK(ifp) == 0) {
537		splx(s);
538		return;
539	}
540	if (ifp->if_afdata_initialized >= domain_init_status) {
541		IF_AFDATA_UNLOCK(ifp);
542		splx(s);
543		printf("if_attachdomain called more than once on %s\n",
544		    ifp->if_xname);
545		return;
546	}
547	ifp->if_afdata_initialized = domain_init_status;
548	IF_AFDATA_UNLOCK(ifp);
549
550	/* address family dependent data region */
551	bzero(ifp->if_afdata, sizeof(ifp->if_afdata));
552	for (dp = domains; dp; dp = dp->dom_next) {
553		if (dp->dom_ifattach)
554			ifp->if_afdata[dp->dom_family] =
555			    (*dp->dom_ifattach)(ifp);
556	}
557
558	splx(s);
559}
560
561/*
562 * Remove any network addresses from an interface.
563 */
564
565void
566if_purgeaddrs(struct ifnet *ifp)
567{
568	struct ifaddr *ifa, *next;
569
570	TAILQ_FOREACH_SAFE(ifa, &ifp->if_addrhead, ifa_link, next) {
571
572		if (ifa->ifa_addr->sa_family == AF_LINK)
573			continue;
574#ifdef INET
575		/* XXX: Ugly!! ad hoc just for INET */
576		if (ifa->ifa_addr && ifa->ifa_addr->sa_family == AF_INET) {
577			struct ifaliasreq ifr;
578
579			bzero(&ifr, sizeof(ifr));
580			ifr.ifra_addr = *ifa->ifa_addr;
581			if (ifa->ifa_dstaddr)
582				ifr.ifra_broadaddr = *ifa->ifa_dstaddr;
583			if (in_control(NULL, SIOCDIFADDR, (caddr_t)&ifr, ifp,
584			    NULL) == 0)
585				continue;
586		}
587#endif /* INET */
588#ifdef INET6
589		if (ifa->ifa_addr && ifa->ifa_addr->sa_family == AF_INET6) {
590			in6_purgeaddr(ifa);
591			/* ifp_addrhead is already updated */
592			continue;
593		}
594#endif /* INET6 */
595		TAILQ_REMOVE(&ifp->if_addrhead, ifa, ifa_link);
596		IFAFREE(ifa);
597	}
598}
599
600/*
601 * Detach an interface, removing it from the
602 * list of "active" interfaces and freeing the struct ifnet.
603 */
604void
605if_detach(struct ifnet *ifp)
606{
607	struct ifaddr *ifa;
608	struct radix_node_head	*rnh;
609	int s;
610	int i;
611	struct domain *dp;
612 	struct ifnet *iter;
613 	int found;
614
615	/*
616	 * Remove/wait for pending events.
617	 */
618	taskqueue_drain(taskqueue_swi, &ifp->if_linktask);
619
620#ifdef DEV_CARP
621	/* Maybe hook to the generalized departure handler above?!? */
622	if (ifp->if_carp)
623		carp_ifdetach(ifp);
624#endif
625
626	/*
627	 * Remove routes and flush queues.
628	 */
629	s = splnet();
630	if_down(ifp);
631#ifdef ALTQ
632	if (ALTQ_IS_ENABLED(&ifp->if_snd))
633		altq_disable(&ifp->if_snd);
634	if (ALTQ_IS_ATTACHED(&ifp->if_snd))
635		altq_detach(&ifp->if_snd);
636#endif
637
638	if_purgeaddrs(ifp);
639
640#ifdef INET6
641	/*
642	 * Remove all IPv6 kernel structs related to ifp.  This should be done
643	 * before removing routing entries below, since IPv6 interface direct
644	 * routes are expected to be removed by the IPv6-specific kernel API.
645	 * Otherwise, the kernel will detect some inconsistency and bark it.
646	 */
647	in6_ifdetach(ifp);
648#endif
649	/*
650	 * Remove address from ifindex_table[] and maybe decrement if_index.
651	 * Clean up all addresses.
652	 */
653	ifaddr_byindex(ifp->if_index) = NULL;
654	destroy_dev(ifdev_byindex(ifp->if_index));
655	ifdev_byindex(ifp->if_index) = NULL;
656
657	/* We can now free link ifaddr. */
658	if (!TAILQ_EMPTY(&ifp->if_addrhead)) {
659		ifa = TAILQ_FIRST(&ifp->if_addrhead);
660		TAILQ_REMOVE(&ifp->if_addrhead, ifa, ifa_link);
661		IFAFREE(ifa);
662	}
663
664	/*
665	 * Delete all remaining routes using this interface
666	 * Unfortuneatly the only way to do this is to slog through
667	 * the entire routing table looking for routes which point
668	 * to this interface...oh well...
669	 */
670	for (i = 1; i <= AF_MAX; i++) {
671		if ((rnh = rt_tables[i]) == NULL)
672			continue;
673		RADIX_NODE_HEAD_LOCK(rnh);
674		(void) rnh->rnh_walktree(rnh, if_rtdel, ifp);
675		RADIX_NODE_HEAD_UNLOCK(rnh);
676	}
677
678	/* Announce that the interface is gone. */
679	rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
680	EVENTHANDLER_INVOKE(ifnet_departure_event, ifp);
681
682	IF_AFDATA_LOCK(ifp);
683	for (dp = domains; dp; dp = dp->dom_next) {
684		if (dp->dom_ifdetach && ifp->if_afdata[dp->dom_family])
685			(*dp->dom_ifdetach)(ifp,
686			    ifp->if_afdata[dp->dom_family]);
687	}
688	IF_AFDATA_UNLOCK(ifp);
689
690#ifdef MAC
691	mac_destroy_ifnet(ifp);
692#endif /* MAC */
693	KNOTE_UNLOCKED(&ifp->if_klist, NOTE_EXIT);
694	knlist_clear(&ifp->if_klist, 0);
695	knlist_destroy(&ifp->if_klist);
696	IFNET_WLOCK();
697 	found = 0;
698 	TAILQ_FOREACH(iter, &ifnet, if_link)
699 		if (iter == ifp) {
700 			found = 1;
701 			break;
702 		}
703 	if (found)
704 		TAILQ_REMOVE(&ifnet, ifp, if_link);
705	IFNET_WUNLOCK();
706	mtx_destroy(&ifp->if_snd.ifq_mtx);
707	IF_AFDATA_DESTROY(ifp);
708	splx(s);
709}
710
711/*
712 * Delete Routes for a Network Interface
713 *
714 * Called for each routing entry via the rnh->rnh_walktree() call above
715 * to delete all route entries referencing a detaching network interface.
716 *
717 * Arguments:
718 *	rn	pointer to node in the routing table
719 *	arg	argument passed to rnh->rnh_walktree() - detaching interface
720 *
721 * Returns:
722 *	0	successful
723 *	errno	failed - reason indicated
724 *
725 */
726static int
727if_rtdel(struct radix_node *rn, void *arg)
728{
729	struct rtentry	*rt = (struct rtentry *)rn;
730	struct ifnet	*ifp = arg;
731	int		err;
732
733	if (rt->rt_ifp == ifp) {
734
735		/*
736		 * Protect (sorta) against walktree recursion problems
737		 * with cloned routes
738		 */
739		if ((rt->rt_flags & RTF_UP) == 0)
740			return (0);
741
742		err = rtrequest(RTM_DELETE, rt_key(rt), rt->rt_gateway,
743				rt_mask(rt), rt->rt_flags,
744				(struct rtentry **) NULL);
745		if (err) {
746			log(LOG_WARNING, "if_rtdel: error %d\n", err);
747		}
748	}
749
750	return (0);
751}
752
753#define	sa_equal(a1, a2)	(bcmp((a1), (a2), ((a1))->sa_len) == 0)
754
755/*
756 * Locate an interface based on a complete address.
757 */
758/*ARGSUSED*/
759struct ifaddr *
760ifa_ifwithaddr(struct sockaddr *addr)
761{
762	struct ifnet *ifp;
763	struct ifaddr *ifa;
764
765	IFNET_RLOCK();
766	TAILQ_FOREACH(ifp, &ifnet, if_link)
767		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
768			if (ifa->ifa_addr->sa_family != addr->sa_family)
769				continue;
770			if (sa_equal(addr, ifa->ifa_addr))
771				goto done;
772			/* IP6 doesn't have broadcast */
773			if ((ifp->if_flags & IFF_BROADCAST) &&
774			    ifa->ifa_broadaddr &&
775			    ifa->ifa_broadaddr->sa_len != 0 &&
776			    sa_equal(ifa->ifa_broadaddr, addr))
777				goto done;
778		}
779	ifa = NULL;
780done:
781	IFNET_RUNLOCK();
782	return (ifa);
783}
784
785/*
786 * Locate the point to point interface with a given destination address.
787 */
788/*ARGSUSED*/
789struct ifaddr *
790ifa_ifwithdstaddr(struct sockaddr *addr)
791{
792	struct ifnet *ifp;
793	struct ifaddr *ifa;
794
795	IFNET_RLOCK();
796	TAILQ_FOREACH(ifp, &ifnet, if_link) {
797		if ((ifp->if_flags & IFF_POINTOPOINT) == 0)
798			continue;
799		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
800			if (ifa->ifa_addr->sa_family != addr->sa_family)
801				continue;
802			if (ifa->ifa_dstaddr &&
803			    sa_equal(addr, ifa->ifa_dstaddr))
804				goto done;
805		}
806	}
807	ifa = NULL;
808done:
809	IFNET_RUNLOCK();
810	return (ifa);
811}
812
813/*
814 * Find an interface on a specific network.  If many, choice
815 * is most specific found.
816 */
817struct ifaddr *
818ifa_ifwithnet(struct sockaddr *addr)
819{
820	struct ifnet *ifp;
821	struct ifaddr *ifa;
822	struct ifaddr *ifa_maybe = (struct ifaddr *) 0;
823	u_int af = addr->sa_family;
824	char *addr_data = addr->sa_data, *cplim;
825
826	/*
827	 * AF_LINK addresses can be looked up directly by their index number,
828	 * so do that if we can.
829	 */
830	if (af == AF_LINK) {
831	    struct sockaddr_dl *sdl = (struct sockaddr_dl *)addr;
832	    if (sdl->sdl_index && sdl->sdl_index <= if_index)
833		return (ifaddr_byindex(sdl->sdl_index));
834	}
835
836	/*
837	 * Scan though each interface, looking for ones that have
838	 * addresses in this address family.
839	 */
840	IFNET_RLOCK();
841	TAILQ_FOREACH(ifp, &ifnet, if_link) {
842		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
843			char *cp, *cp2, *cp3;
844
845			if (ifa->ifa_addr->sa_family != af)
846next:				continue;
847			if (af == AF_INET && ifp->if_flags & IFF_POINTOPOINT) {
848				/*
849				 * This is a bit broken as it doesn't
850				 * take into account that the remote end may
851				 * be a single node in the network we are
852				 * looking for.
853				 * The trouble is that we don't know the
854				 * netmask for the remote end.
855				 */
856				if (ifa->ifa_dstaddr != 0 &&
857				    sa_equal(addr, ifa->ifa_dstaddr))
858					goto done;
859			} else {
860				/*
861				 * if we have a special address handler,
862				 * then use it instead of the generic one.
863				 */
864				if (ifa->ifa_claim_addr) {
865					if ((*ifa->ifa_claim_addr)(ifa, addr))
866						goto done;
867					continue;
868				}
869
870				/*
871				 * Scan all the bits in the ifa's address.
872				 * If a bit dissagrees with what we are
873				 * looking for, mask it with the netmask
874				 * to see if it really matters.
875				 * (A byte at a time)
876				 */
877				if (ifa->ifa_netmask == 0)
878					continue;
879				cp = addr_data;
880				cp2 = ifa->ifa_addr->sa_data;
881				cp3 = ifa->ifa_netmask->sa_data;
882				cplim = ifa->ifa_netmask->sa_len
883					+ (char *)ifa->ifa_netmask;
884				while (cp3 < cplim)
885					if ((*cp++ ^ *cp2++) & *cp3++)
886						goto next; /* next address! */
887				/*
888				 * If the netmask of what we just found
889				 * is more specific than what we had before
890				 * (if we had one) then remember the new one
891				 * before continuing to search
892				 * for an even better one.
893				 */
894				if (ifa_maybe == 0 ||
895				    rn_refines((caddr_t)ifa->ifa_netmask,
896				    (caddr_t)ifa_maybe->ifa_netmask))
897					ifa_maybe = ifa;
898			}
899		}
900	}
901	ifa = ifa_maybe;
902done:
903	IFNET_RUNLOCK();
904	return (ifa);
905}
906
907/*
908 * Find an interface address specific to an interface best matching
909 * a given address.
910 */
911struct ifaddr *
912ifaof_ifpforaddr(struct sockaddr *addr, struct ifnet *ifp)
913{
914	struct ifaddr *ifa;
915	char *cp, *cp2, *cp3;
916	char *cplim;
917	struct ifaddr *ifa_maybe = 0;
918	u_int af = addr->sa_family;
919
920	if (af >= AF_MAX)
921		return (0);
922	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
923		if (ifa->ifa_addr->sa_family != af)
924			continue;
925		if (ifa_maybe == 0)
926			ifa_maybe = ifa;
927		if (ifa->ifa_netmask == 0) {
928			if (sa_equal(addr, ifa->ifa_addr) ||
929			    (ifa->ifa_dstaddr &&
930			    sa_equal(addr, ifa->ifa_dstaddr)))
931				goto done;
932			continue;
933		}
934		if (ifp->if_flags & IFF_POINTOPOINT) {
935			if (sa_equal(addr, ifa->ifa_dstaddr))
936				goto done;
937		} else {
938			cp = addr->sa_data;
939			cp2 = ifa->ifa_addr->sa_data;
940			cp3 = ifa->ifa_netmask->sa_data;
941			cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask;
942			for (; cp3 < cplim; cp3++)
943				if ((*cp++ ^ *cp2++) & *cp3)
944					break;
945			if (cp3 == cplim)
946				goto done;
947		}
948	}
949	ifa = ifa_maybe;
950done:
951	return (ifa);
952}
953
954#include <net/route.h>
955
956/*
957 * Default action when installing a route with a Link Level gateway.
958 * Lookup an appropriate real ifa to point to.
959 * This should be moved to /sys/net/link.c eventually.
960 */
961static void
962link_rtrequest(int cmd, struct rtentry *rt, struct rt_addrinfo *info)
963{
964	struct ifaddr *ifa, *oifa;
965	struct sockaddr *dst;
966	struct ifnet *ifp;
967
968	RT_LOCK_ASSERT(rt);
969
970	if (cmd != RTM_ADD || ((ifa = rt->rt_ifa) == 0) ||
971	    ((ifp = ifa->ifa_ifp) == 0) || ((dst = rt_key(rt)) == 0))
972		return;
973	ifa = ifaof_ifpforaddr(dst, ifp);
974	if (ifa) {
975		IFAREF(ifa);		/* XXX */
976		oifa = rt->rt_ifa;
977		rt->rt_ifa = ifa;
978		IFAFREE(oifa);
979		if (ifa->ifa_rtrequest && ifa->ifa_rtrequest != link_rtrequest)
980			ifa->ifa_rtrequest(cmd, rt, info);
981	}
982}
983
984/*
985 * Mark an interface down and notify protocols of
986 * the transition.
987 * NOTE: must be called at splnet or eqivalent.
988 */
989static void
990if_unroute(struct ifnet *ifp, int flag, int fam)
991{
992	struct ifaddr *ifa;
993
994	KASSERT(flag == IFF_UP, ("if_unroute: flag != IFF_UP"));
995
996	ifp->if_flags &= ~flag;
997	getmicrotime(&ifp->if_lastchange);
998	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
999		if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family))
1000			pfctlinput(PRC_IFDOWN, ifa->ifa_addr);
1001	if_qflush(&ifp->if_snd);
1002#ifdef DEV_CARP
1003	if (ifp->if_carp)
1004		carp_carpdev_state(ifp->if_carp);
1005#endif
1006	rt_ifmsg(ifp);
1007}
1008
1009/*
1010 * Mark an interface up and notify protocols of
1011 * the transition.
1012 * NOTE: must be called at splnet or eqivalent.
1013 */
1014static void
1015if_route(struct ifnet *ifp, int flag, int fam)
1016{
1017	struct ifaddr *ifa;
1018
1019	KASSERT(flag == IFF_UP, ("if_route: flag != IFF_UP"));
1020
1021	ifp->if_flags |= flag;
1022	getmicrotime(&ifp->if_lastchange);
1023	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
1024		if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family))
1025			pfctlinput(PRC_IFUP, ifa->ifa_addr);
1026#ifdef DEV_CARP
1027	if (ifp->if_carp)
1028		carp_carpdev_state(ifp->if_carp);
1029#endif
1030	rt_ifmsg(ifp);
1031#ifdef INET6
1032	in6_if_up(ifp);
1033#endif
1034}
1035
1036void	(*vlan_link_state_p)(struct ifnet *, int);	/* XXX: private from if_vlan */
1037
1038/*
1039 * Handle a change in the interface link state. To avoid LORs
1040 * between driver lock and upper layer locks, as well as possible
1041 * recursions, we post event to taskqueue, and all job
1042 * is done in static do_link_state_change().
1043 */
1044void
1045if_link_state_change(struct ifnet *ifp, int link_state)
1046{
1047	/* Return if state hasn't changed. */
1048	if (ifp->if_link_state == link_state)
1049		return;
1050
1051	ifp->if_link_state = link_state;
1052
1053	taskqueue_enqueue(taskqueue_swi, &ifp->if_linktask);
1054}
1055
1056static void
1057do_link_state_change(void *arg, int pending)
1058{
1059	struct ifnet *ifp = (struct ifnet *)arg;
1060	int link_state = ifp->if_link_state;
1061	int link;
1062
1063	/* Notify that the link state has changed. */
1064	rt_ifmsg(ifp);
1065	if (link_state == LINK_STATE_UP)
1066		link = NOTE_LINKUP;
1067	else if (link_state == LINK_STATE_DOWN)
1068		link = NOTE_LINKDOWN;
1069	else
1070		link = NOTE_LINKINV;
1071	KNOTE_UNLOCKED(&ifp->if_klist, link);
1072	if (ifp->if_nvlans != 0)
1073		(*vlan_link_state_p)(ifp, link);
1074
1075	if ((ifp->if_type == IFT_ETHER || ifp->if_type == IFT_L2VLAN) &&
1076	    IFP2AC(ifp)->ac_netgraph != NULL)
1077		(*ng_ether_link_state_p)(ifp, link_state);
1078#ifdef DEV_CARP
1079	if (ifp->if_carp)
1080		carp_carpdev_state(ifp->if_carp);
1081#endif
1082	if (ifp->if_bridge) {
1083		KASSERT(bstp_linkstate_p != NULL,("if_bridge bstp not loaded!"));
1084		(*bstp_linkstate_p)(ifp, link_state);
1085	}
1086
1087	devctl_notify("IFNET", ifp->if_xname,
1088	    (link_state == LINK_STATE_UP) ? "LINK_UP" : "LINK_DOWN", NULL);
1089	if (pending > 1)
1090		if_printf(ifp, "%d link states coalesced\n", pending);
1091	if (log_link_state_change)
1092		log(LOG_NOTICE, "%s: link state changed to %s\n", ifp->if_xname,
1093		    (link_state == LINK_STATE_UP) ? "UP" : "DOWN" );
1094}
1095
1096/*
1097 * Mark an interface down and notify protocols of
1098 * the transition.
1099 * NOTE: must be called at splnet or eqivalent.
1100 */
1101void
1102if_down(struct ifnet *ifp)
1103{
1104
1105	if_unroute(ifp, IFF_UP, AF_UNSPEC);
1106}
1107
1108/*
1109 * Mark an interface up and notify protocols of
1110 * the transition.
1111 * NOTE: must be called at splnet or eqivalent.
1112 */
1113void
1114if_up(struct ifnet *ifp)
1115{
1116
1117	if_route(ifp, IFF_UP, AF_UNSPEC);
1118}
1119
1120/*
1121 * Flush an interface queue.
1122 */
1123static void
1124if_qflush(struct ifaltq *ifq)
1125{
1126	struct mbuf *m, *n;
1127
1128	IFQ_LOCK(ifq);
1129#ifdef ALTQ
1130	if (ALTQ_IS_ENABLED(ifq))
1131		ALTQ_PURGE(ifq);
1132#endif
1133	n = ifq->ifq_head;
1134	while ((m = n) != 0) {
1135		n = m->m_act;
1136		m_freem(m);
1137	}
1138	ifq->ifq_head = 0;
1139	ifq->ifq_tail = 0;
1140	ifq->ifq_len = 0;
1141	IFQ_UNLOCK(ifq);
1142}
1143
1144/*
1145 * Handle interface watchdog timer routines.  Called
1146 * from softclock, we decrement timers (if set) and
1147 * call the appropriate interface routine on expiration.
1148 *
1149 * XXXRW: Note that because timeouts run with Giant, if_watchdog() is called
1150 * holding Giant.  If we switch to an MPSAFE callout, we likely need to grab
1151 * Giant before entering if_watchdog() on an IFF_NEEDSGIANT interface.
1152 */
1153static void
1154if_slowtimo(void *arg)
1155{
1156	struct ifnet *ifp;
1157	int s = splimp();
1158
1159	IFNET_RLOCK();
1160	TAILQ_FOREACH(ifp, &ifnet, if_link) {
1161		if (ifp->if_timer == 0 || --ifp->if_timer)
1162			continue;
1163		if (ifp->if_watchdog)
1164			(*ifp->if_watchdog)(ifp);
1165	}
1166	IFNET_RUNLOCK();
1167	splx(s);
1168	timeout(if_slowtimo, (void *)0, hz / IFNET_SLOWHZ);
1169}
1170
1171/*
1172 * Map interface name to
1173 * interface structure pointer.
1174 */
1175struct ifnet *
1176ifunit(const char *name)
1177{
1178	struct ifnet *ifp;
1179
1180	IFNET_RLOCK();
1181	TAILQ_FOREACH(ifp, &ifnet, if_link) {
1182		if (strncmp(name, ifp->if_xname, IFNAMSIZ) == 0)
1183			break;
1184	}
1185	IFNET_RUNLOCK();
1186	return (ifp);
1187}
1188
1189/*
1190 * Hardware specific interface ioctls.
1191 */
1192static int
1193ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
1194{
1195	struct ifreq *ifr;
1196	struct ifstat *ifs;
1197	int error = 0;
1198	int new_flags, temp_flags;
1199	size_t namelen, onamelen;
1200	char new_name[IFNAMSIZ];
1201	struct ifaddr *ifa;
1202	struct sockaddr_dl *sdl;
1203
1204	ifr = (struct ifreq *)data;
1205	switch (cmd) {
1206	case SIOCGIFINDEX:
1207		ifr->ifr_index = ifp->if_index;
1208		break;
1209
1210	case SIOCGIFFLAGS:
1211		temp_flags = ifp->if_flags | ifp->if_drv_flags;
1212		ifr->ifr_flags = temp_flags & 0xffff;
1213		ifr->ifr_flagshigh = temp_flags >> 16;
1214		break;
1215
1216	case SIOCGIFCAP:
1217		ifr->ifr_reqcap = ifp->if_capabilities;
1218		ifr->ifr_curcap = ifp->if_capenable;
1219		break;
1220
1221#ifdef MAC
1222	case SIOCGIFMAC:
1223		error = mac_ioctl_ifnet_get(td->td_ucred, ifr, ifp);
1224		break;
1225#endif
1226
1227	case SIOCGIFMETRIC:
1228		ifr->ifr_metric = ifp->if_metric;
1229		break;
1230
1231	case SIOCGIFMTU:
1232		ifr->ifr_mtu = ifp->if_mtu;
1233		break;
1234
1235	case SIOCGIFPHYS:
1236		ifr->ifr_phys = ifp->if_physical;
1237		break;
1238
1239	case SIOCSIFFLAGS:
1240		error = suser(td);
1241		if (error)
1242			return (error);
1243		/*
1244		 * Currently, no driver owned flags pass the IFF_CANTCHANGE
1245		 * check, so we don't need special handling here yet.
1246		 */
1247		new_flags = (ifr->ifr_flags & 0xffff) |
1248		    (ifr->ifr_flagshigh << 16);
1249		if (ifp->if_flags & IFF_SMART) {
1250			/* Smart drivers twiddle their own routes */
1251		} else if (ifp->if_flags & IFF_UP &&
1252		    (new_flags & IFF_UP) == 0) {
1253			int s = splimp();
1254			if_down(ifp);
1255			splx(s);
1256		} else if (new_flags & IFF_UP &&
1257		    (ifp->if_flags & IFF_UP) == 0) {
1258			int s = splimp();
1259			if_up(ifp);
1260			splx(s);
1261		}
1262		ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) |
1263			(new_flags &~ IFF_CANTCHANGE);
1264		if (new_flags & IFF_PPROMISC) {
1265			/* Permanently promiscuous mode requested */
1266			ifp->if_flags |= IFF_PROMISC;
1267		} else if (ifp->if_pcount == 0) {
1268			ifp->if_flags &= ~IFF_PROMISC;
1269		}
1270		if (ifp->if_ioctl) {
1271			IFF_LOCKGIANT(ifp);
1272			(void) (*ifp->if_ioctl)(ifp, cmd, data);
1273			IFF_UNLOCKGIANT(ifp);
1274		}
1275		getmicrotime(&ifp->if_lastchange);
1276		break;
1277
1278	case SIOCSIFCAP:
1279		error = suser(td);
1280		if (error)
1281			return (error);
1282		if (ifp->if_ioctl == NULL)
1283			return (EOPNOTSUPP);
1284		if (ifr->ifr_reqcap & ~ifp->if_capabilities)
1285			return (EINVAL);
1286		IFF_LOCKGIANT(ifp);
1287		error = (*ifp->if_ioctl)(ifp, cmd, data);
1288		IFF_UNLOCKGIANT(ifp);
1289		if (error == 0)
1290			getmicrotime(&ifp->if_lastchange);
1291		break;
1292
1293#ifdef MAC
1294	case SIOCSIFMAC:
1295		error = mac_ioctl_ifnet_set(td->td_ucred, ifr, ifp);
1296		break;
1297#endif
1298
1299	case SIOCSIFNAME:
1300		error = suser(td);
1301		if (error != 0)
1302			return (error);
1303		error = copyinstr(ifr->ifr_data, new_name, IFNAMSIZ, NULL);
1304		if (error != 0)
1305			return (error);
1306		if (new_name[0] == '\0')
1307			return (EINVAL);
1308		if (ifunit(new_name) != NULL)
1309			return (EEXIST);
1310
1311		/* Announce the departure of the interface. */
1312		rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
1313		EVENTHANDLER_INVOKE(ifnet_departure_event, ifp);
1314
1315		log(LOG_INFO, "%s: changing name to '%s'\n",
1316		    ifp->if_xname, new_name);
1317
1318		strlcpy(ifp->if_xname, new_name, sizeof(ifp->if_xname));
1319		ifa = ifaddr_byindex(ifp->if_index);
1320		IFA_LOCK(ifa);
1321		sdl = (struct sockaddr_dl *)ifa->ifa_addr;
1322		namelen = strlen(new_name);
1323		onamelen = sdl->sdl_nlen;
1324		/*
1325		 * Move the address if needed.  This is safe because we
1326		 * allocate space for a name of length IFNAMSIZ when we
1327		 * create this in if_attach().
1328		 */
1329		if (namelen != onamelen) {
1330			bcopy(sdl->sdl_data + onamelen,
1331			    sdl->sdl_data + namelen, sdl->sdl_alen);
1332		}
1333		bcopy(new_name, sdl->sdl_data, namelen);
1334		sdl->sdl_nlen = namelen;
1335		sdl = (struct sockaddr_dl *)ifa->ifa_netmask;
1336		bzero(sdl->sdl_data, onamelen);
1337		while (namelen != 0)
1338			sdl->sdl_data[--namelen] = 0xff;
1339		IFA_UNLOCK(ifa);
1340
1341		EVENTHANDLER_INVOKE(ifnet_arrival_event, ifp);
1342		/* Announce the return of the interface. */
1343		rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
1344		break;
1345
1346	case SIOCSIFMETRIC:
1347		error = suser(td);
1348		if (error)
1349			return (error);
1350		ifp->if_metric = ifr->ifr_metric;
1351		getmicrotime(&ifp->if_lastchange);
1352		break;
1353
1354	case SIOCSIFPHYS:
1355		error = suser(td);
1356		if (error)
1357			return (error);
1358		if (ifp->if_ioctl == NULL)
1359			return (EOPNOTSUPP);
1360		IFF_LOCKGIANT(ifp);
1361		error = (*ifp->if_ioctl)(ifp, cmd, data);
1362		IFF_UNLOCKGIANT(ifp);
1363		if (error == 0)
1364			getmicrotime(&ifp->if_lastchange);
1365		break;
1366
1367	case SIOCSIFMTU:
1368	{
1369		u_long oldmtu = ifp->if_mtu;
1370
1371		error = suser(td);
1372		if (error)
1373			return (error);
1374		if (ifr->ifr_mtu < IF_MINMTU || ifr->ifr_mtu > IF_MAXMTU)
1375			return (EINVAL);
1376		if (ifp->if_ioctl == NULL)
1377			return (EOPNOTSUPP);
1378		IFF_LOCKGIANT(ifp);
1379		error = (*ifp->if_ioctl)(ifp, cmd, data);
1380		IFF_UNLOCKGIANT(ifp);
1381		if (error == 0) {
1382			getmicrotime(&ifp->if_lastchange);
1383			rt_ifmsg(ifp);
1384		}
1385		/*
1386		 * If the link MTU changed, do network layer specific procedure.
1387		 */
1388		if (ifp->if_mtu != oldmtu) {
1389#ifdef INET6
1390			nd6_setmtu(ifp);
1391#endif
1392		}
1393		break;
1394	}
1395
1396	case SIOCADDMULTI:
1397	case SIOCDELMULTI:
1398		error = suser(td);
1399		if (error)
1400			return (error);
1401
1402		/* Don't allow group membership on non-multicast interfaces. */
1403		if ((ifp->if_flags & IFF_MULTICAST) == 0)
1404			return (EOPNOTSUPP);
1405
1406		/* Don't let users screw up protocols' entries. */
1407		if (ifr->ifr_addr.sa_family != AF_LINK)
1408			return (EINVAL);
1409
1410		if (cmd == SIOCADDMULTI) {
1411			struct ifmultiaddr *ifma;
1412			error = if_addmulti(ifp, &ifr->ifr_addr, &ifma);
1413		} else {
1414			error = if_delmulti(ifp, &ifr->ifr_addr);
1415		}
1416		if (error == 0)
1417			getmicrotime(&ifp->if_lastchange);
1418		break;
1419
1420	case SIOCSIFPHYADDR:
1421	case SIOCDIFPHYADDR:
1422#ifdef INET6
1423	case SIOCSIFPHYADDR_IN6:
1424#endif
1425	case SIOCSLIFPHYADDR:
1426	case SIOCSIFMEDIA:
1427	case SIOCSIFGENERIC:
1428		error = suser(td);
1429		if (error)
1430			return (error);
1431		if (ifp->if_ioctl == NULL)
1432			return (EOPNOTSUPP);
1433		IFF_LOCKGIANT(ifp);
1434		error = (*ifp->if_ioctl)(ifp, cmd, data);
1435		IFF_UNLOCKGIANT(ifp);
1436		if (error == 0)
1437			getmicrotime(&ifp->if_lastchange);
1438		break;
1439
1440	case SIOCGIFSTATUS:
1441		ifs = (struct ifstat *)data;
1442		ifs->ascii[0] = '\0';
1443
1444	case SIOCGIFPSRCADDR:
1445	case SIOCGIFPDSTADDR:
1446	case SIOCGLIFPHYADDR:
1447	case SIOCGIFMEDIA:
1448	case SIOCGIFGENERIC:
1449		if (ifp->if_ioctl == NULL)
1450			return (EOPNOTSUPP);
1451		IFF_LOCKGIANT(ifp);
1452		error = (*ifp->if_ioctl)(ifp, cmd, data);
1453		IFF_UNLOCKGIANT(ifp);
1454		break;
1455
1456	case SIOCSIFLLADDR:
1457		error = suser(td);
1458		if (error)
1459			return (error);
1460		error = if_setlladdr(ifp,
1461		    ifr->ifr_addr.sa_data, ifr->ifr_addr.sa_len);
1462		break;
1463
1464	default:
1465		error = ENOIOCTL;
1466		break;
1467	}
1468	return (error);
1469}
1470
1471/*
1472 * Interface ioctls.
1473 */
1474int
1475ifioctl(struct socket *so, u_long cmd, caddr_t data, struct thread *td)
1476{
1477	struct ifnet *ifp;
1478	struct ifreq *ifr;
1479	int error;
1480	int oif_flags;
1481
1482	switch (cmd) {
1483	case SIOCGIFCONF:
1484	case OSIOCGIFCONF:
1485		return (ifconf(cmd, data));
1486	}
1487	ifr = (struct ifreq *)data;
1488
1489	switch (cmd) {
1490	case SIOCIFCREATE:
1491	case SIOCIFDESTROY:
1492		if ((error = suser(td)) != 0)
1493			return (error);
1494		return ((cmd == SIOCIFCREATE) ?
1495			if_clone_create(ifr->ifr_name, sizeof(ifr->ifr_name)) :
1496			if_clone_destroy(ifr->ifr_name));
1497
1498	case SIOCIFGCLONERS:
1499		return (if_clone_list((struct if_clonereq *)data));
1500	}
1501
1502	ifp = ifunit(ifr->ifr_name);
1503	if (ifp == 0)
1504		return (ENXIO);
1505
1506	error = ifhwioctl(cmd, ifp, data, td);
1507	if (error != ENOIOCTL)
1508		return (error);
1509
1510	oif_flags = ifp->if_flags;
1511	if (so->so_proto == 0)
1512		return (EOPNOTSUPP);
1513#ifndef COMPAT_43
1514	error = ((*so->so_proto->pr_usrreqs->pru_control)(so, cmd,
1515								 data,
1516								 ifp, td));
1517#else
1518	{
1519		int ocmd = cmd;
1520
1521		switch (cmd) {
1522
1523		case SIOCSIFDSTADDR:
1524		case SIOCSIFADDR:
1525		case SIOCSIFBRDADDR:
1526		case SIOCSIFNETMASK:
1527#if BYTE_ORDER != BIG_ENDIAN
1528			if (ifr->ifr_addr.sa_family == 0 &&
1529			    ifr->ifr_addr.sa_len < 16) {
1530				ifr->ifr_addr.sa_family = ifr->ifr_addr.sa_len;
1531				ifr->ifr_addr.sa_len = 16;
1532			}
1533#else
1534			if (ifr->ifr_addr.sa_len == 0)
1535				ifr->ifr_addr.sa_len = 16;
1536#endif
1537			break;
1538
1539		case OSIOCGIFADDR:
1540			cmd = SIOCGIFADDR;
1541			break;
1542
1543		case OSIOCGIFDSTADDR:
1544			cmd = SIOCGIFDSTADDR;
1545			break;
1546
1547		case OSIOCGIFBRDADDR:
1548			cmd = SIOCGIFBRDADDR;
1549			break;
1550
1551		case OSIOCGIFNETMASK:
1552			cmd = SIOCGIFNETMASK;
1553		}
1554		error =  ((*so->so_proto->pr_usrreqs->pru_control)(so,
1555								   cmd,
1556								   data,
1557								   ifp, td));
1558		switch (ocmd) {
1559
1560		case OSIOCGIFADDR:
1561		case OSIOCGIFDSTADDR:
1562		case OSIOCGIFBRDADDR:
1563		case OSIOCGIFNETMASK:
1564			*(u_short *)&ifr->ifr_addr = ifr->ifr_addr.sa_family;
1565
1566		}
1567	}
1568#endif /* COMPAT_43 */
1569
1570	if ((oif_flags ^ ifp->if_flags) & IFF_UP) {
1571#ifdef INET6
1572		DELAY(100);/* XXX: temporary workaround for fxp issue*/
1573		if (ifp->if_flags & IFF_UP) {
1574			int s = splimp();
1575			in6_if_up(ifp);
1576			splx(s);
1577		}
1578#endif
1579	}
1580	return (error);
1581}
1582
1583/*
1584 * The code common to handling reference counted flags,
1585 * e.g., in ifpromisc() and if_allmulti().
1586 * The "pflag" argument can specify a permanent mode flag,
1587 * such as IFF_PPROMISC for promiscuous mode; should be 0 if none.
1588 *
1589 * Only to be used on stack-owned flags, not driver-owned flags.
1590 */
1591static int
1592if_setflag(struct ifnet *ifp, int flag, int pflag, int *refcount, int onswitch)
1593{
1594	struct ifreq ifr;
1595	int error;
1596	int oldflags, oldcount;
1597
1598	KASSERT((flag & (IFF_DRV_OACTIVE|IFF_DRV_RUNNING)) == 0,
1599	    ("if_setflag: setting driver-ownded flag %d", flag));
1600
1601	/* Sanity checks to catch programming errors */
1602	if (onswitch) {
1603		if (*refcount < 0) {
1604			if_printf(ifp,
1605			    "refusing to increment negative refcount %d "
1606			    "for interface flag %d\n", *refcount, flag);
1607			return (EINVAL);
1608		}
1609	} else {
1610		if (*refcount <= 0) {
1611			if_printf(ifp,
1612			    "refusing to decrement non-positive refcount %d"
1613			    "for interface flag %d\n", *refcount, flag);
1614			return (EINVAL);
1615		}
1616	}
1617
1618	/* In case this mode is permanent, just touch refcount */
1619	if (ifp->if_flags & pflag) {
1620		*refcount += onswitch ? 1 : -1;
1621		return (0);
1622	}
1623
1624	/* Save ifnet parameters for if_ioctl() may fail */
1625	oldcount = *refcount;
1626	oldflags = ifp->if_flags;
1627
1628	/*
1629	 * See if we aren't the only and touching refcount is enough.
1630	 * Actually toggle interface flag if we are the first or last.
1631	 */
1632	if (onswitch) {
1633		if ((*refcount)++)
1634			return (0);
1635		ifp->if_flags |= flag;
1636	} else {
1637		if (--(*refcount))
1638			return (0);
1639		ifp->if_flags &= ~flag;
1640	}
1641
1642	/* Call down the driver since we've changed interface flags */
1643	if (ifp->if_ioctl == NULL) {
1644		error = EOPNOTSUPP;
1645		goto recover;
1646	}
1647	ifr.ifr_flags = ifp->if_flags & 0xffff;
1648	ifr.ifr_flagshigh = ifp->if_flags >> 16;
1649	IFF_LOCKGIANT(ifp);
1650	error = (*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
1651	IFF_UNLOCKGIANT(ifp);
1652	if (error)
1653		goto recover;
1654	/* Notify userland that interface flags have changed */
1655	rt_ifmsg(ifp);
1656	return (0);
1657
1658recover:
1659	/* Recover after driver error */
1660	*refcount = oldcount;
1661	ifp->if_flags = oldflags;
1662	return (error);
1663}
1664
1665/*
1666 * Set/clear promiscuous mode on interface ifp based on the truth value
1667 * of pswitch.  The calls are reference counted so that only the first
1668 * "on" request actually has an effect, as does the final "off" request.
1669 * Results are undefined if the "off" and "on" requests are not matched.
1670 */
1671int
1672ifpromisc(struct ifnet *ifp, int pswitch)
1673{
1674	int error;
1675	int oldflags = ifp->if_flags;
1676
1677	error = if_setflag(ifp, IFF_PROMISC, IFF_PPROMISC,
1678			   &ifp->if_pcount, pswitch);
1679	/* If promiscuous mode status has changed, log a message */
1680	if (error == 0 && ((ifp->if_flags ^ oldflags) & IFF_PROMISC))
1681		log(LOG_INFO, "%s: promiscuous mode %s\n",
1682		    ifp->if_xname,
1683		    (ifp->if_flags & IFF_PROMISC) ? "enabled" : "disabled");
1684	return (error);
1685}
1686
1687/*
1688 * Return interface configuration
1689 * of system.  List may be used
1690 * in later ioctl's (above) to get
1691 * other information.
1692 */
1693/*ARGSUSED*/
1694static int
1695ifconf(u_long cmd, caddr_t data)
1696{
1697	struct ifconf *ifc = (struct ifconf *)data;
1698	struct ifnet *ifp;
1699	struct ifaddr *ifa;
1700	struct ifreq ifr;
1701	struct sbuf *sb;
1702	int error, full = 0, valid_len, max_len;
1703
1704	/* Limit initial buffer size to MAXPHYS to avoid DoS from userspace. */
1705	max_len = MAXPHYS - 1;
1706
1707	/* Prevent hostile input from being able to crash the system */
1708	if (ifc->ifc_len <= 0)
1709		return (EINVAL);
1710
1711again:
1712	if (ifc->ifc_len <= max_len) {
1713		max_len = ifc->ifc_len;
1714		full = 1;
1715	}
1716	sb = sbuf_new(NULL, NULL, max_len + 1, SBUF_FIXEDLEN);
1717	max_len = 0;
1718	valid_len = 0;
1719
1720	IFNET_RLOCK();		/* could sleep XXX */
1721	TAILQ_FOREACH(ifp, &ifnet, if_link) {
1722		int addrs;
1723
1724		/*
1725		 * Zero the ifr_name buffer to make sure we don't
1726		 * disclose the contents of the stack.
1727		 */
1728		memset(ifr.ifr_name, 0, sizeof(ifr.ifr_name));
1729
1730		if (strlcpy(ifr.ifr_name, ifp->if_xname, sizeof(ifr.ifr_name))
1731		    >= sizeof(ifr.ifr_name)) {
1732			sbuf_delete(sb);
1733			IFNET_RUNLOCK();
1734			return (ENAMETOOLONG);
1735		}
1736
1737		addrs = 0;
1738		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1739			struct sockaddr *sa = ifa->ifa_addr;
1740
1741			if (jailed(curthread->td_ucred) &&
1742			    prison_if(curthread->td_ucred, sa))
1743				continue;
1744			addrs++;
1745#ifdef COMPAT_43
1746			if (cmd == OSIOCGIFCONF) {
1747				struct osockaddr *osa =
1748					 (struct osockaddr *)&ifr.ifr_addr;
1749				ifr.ifr_addr = *sa;
1750				osa->sa_family = sa->sa_family;
1751				sbuf_bcat(sb, &ifr, sizeof(ifr));
1752				max_len += sizeof(ifr);
1753			} else
1754#endif
1755			if (sa->sa_len <= sizeof(*sa)) {
1756				ifr.ifr_addr = *sa;
1757				sbuf_bcat(sb, &ifr, sizeof(ifr));
1758				max_len += sizeof(ifr);
1759			} else {
1760				sbuf_bcat(sb, &ifr,
1761				    offsetof(struct ifreq, ifr_addr));
1762				max_len += offsetof(struct ifreq, ifr_addr);
1763				sbuf_bcat(sb, sa, sa->sa_len);
1764				max_len += sa->sa_len;
1765			}
1766
1767			if (!sbuf_overflowed(sb))
1768				valid_len = sbuf_len(sb);
1769		}
1770		if (addrs == 0) {
1771			bzero((caddr_t)&ifr.ifr_addr, sizeof(ifr.ifr_addr));
1772			sbuf_bcat(sb, &ifr, sizeof(ifr));
1773			max_len += sizeof(ifr);
1774
1775			if (!sbuf_overflowed(sb))
1776				valid_len = sbuf_len(sb);
1777		}
1778	}
1779	IFNET_RUNLOCK();
1780
1781	/*
1782	 * If we didn't allocate enough space (uncommon), try again.  If
1783	 * we have already allocated as much space as we are allowed,
1784	 * return what we've got.
1785	 */
1786	if (valid_len != max_len && !full) {
1787		sbuf_delete(sb);
1788		goto again;
1789	}
1790
1791	ifc->ifc_len = valid_len;
1792	sbuf_finish(sb);
1793	error = copyout(sbuf_data(sb), ifc->ifc_req, ifc->ifc_len);
1794	sbuf_delete(sb);
1795	return (error);
1796}
1797
1798/*
1799 * Just like ifpromisc(), but for all-multicast-reception mode.
1800 */
1801int
1802if_allmulti(struct ifnet *ifp, int onswitch)
1803{
1804
1805	return (if_setflag(ifp, IFF_ALLMULTI, 0, &ifp->if_amcount, onswitch));
1806}
1807
1808static struct ifmultiaddr *
1809if_findmulti(struct ifnet *ifp, struct sockaddr *sa)
1810{
1811	struct ifmultiaddr *ifma;
1812
1813	IF_ADDR_LOCK_ASSERT(ifp);
1814
1815	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1816		if (sa_equal(ifma->ifma_addr, sa))
1817			break;
1818	}
1819
1820	return ifma;
1821}
1822
1823/*
1824 * Allocate a new ifmultiaddr and initialize based on passed arguments.  We
1825 * make copies of passed sockaddrs.  The ifmultiaddr will not be added to
1826 * the ifnet multicast address list here, so the caller must do that and
1827 * other setup work (such as notifying the device driver).  The reference
1828 * count is initialized to 1.
1829 */
1830static struct ifmultiaddr *
1831if_allocmulti(struct ifnet *ifp, struct sockaddr *sa, struct sockaddr *llsa,
1832    int mflags)
1833{
1834	struct ifmultiaddr *ifma;
1835	struct sockaddr *dupsa;
1836
1837	MALLOC(ifma, struct ifmultiaddr *, sizeof *ifma, M_IFMADDR, mflags |
1838	    M_ZERO);
1839	if (ifma == NULL)
1840		return (NULL);
1841
1842	MALLOC(dupsa, struct sockaddr *, sa->sa_len, M_IFMADDR, mflags);
1843	if (dupsa == NULL) {
1844		FREE(ifma, M_IFMADDR);
1845		return (NULL);
1846	}
1847	bcopy(sa, dupsa, sa->sa_len);
1848	ifma->ifma_addr = dupsa;
1849
1850	ifma->ifma_ifp = ifp;
1851	ifma->ifma_refcount = 1;
1852	ifma->ifma_protospec = NULL;
1853
1854	if (llsa == NULL) {
1855		ifma->ifma_lladdr = NULL;
1856		return (ifma);
1857	}
1858
1859	MALLOC(dupsa, struct sockaddr *, llsa->sa_len, M_IFMADDR, mflags);
1860	if (dupsa == NULL) {
1861		FREE(ifma->ifma_addr, M_IFMADDR);
1862		FREE(ifma, M_IFMADDR);
1863		return (NULL);
1864	}
1865	bcopy(llsa, dupsa, llsa->sa_len);
1866	ifma->ifma_lladdr = dupsa;
1867
1868	return (ifma);
1869}
1870
1871/*
1872 * if_freemulti: free ifmultiaddr structure and possibly attached related
1873 * addresses.  The caller is responsible for implementing reference
1874 * counting, notifying the driver, handling routing messages, and releasing
1875 * any dependent link layer state.
1876 */
1877static void
1878if_freemulti(struct ifmultiaddr *ifma)
1879{
1880
1881	KASSERT(ifma->ifma_refcount == 1, ("if_freemulti: refcount %d",
1882	    ifma->ifma_refcount));
1883	KASSERT(ifma->ifma_protospec == NULL,
1884	    ("if_freemulti: protospec not NULL"));
1885
1886	if (ifma->ifma_lladdr != NULL)
1887		FREE(ifma->ifma_lladdr, M_IFMADDR);
1888	FREE(ifma->ifma_addr, M_IFMADDR);
1889	FREE(ifma, M_IFMADDR);
1890}
1891
1892/*
1893 * Register an additional multicast address with a network interface.
1894 *
1895 * - If the address is already present, bump the reference count on the
1896 *   address and return.
1897 * - If the address is not link-layer, look up a link layer address.
1898 * - Allocate address structures for one or both addresses, and attach to the
1899 *   multicast address list on the interface.  If automatically adding a link
1900 *   layer address, the protocol address will own a reference to the link
1901 *   layer address, to be freed when it is freed.
1902 * - Notify the network device driver of an addition to the multicast address
1903 *   list.
1904 *
1905 * 'sa' points to caller-owned memory with the desired multicast address.
1906 *
1907 * 'retifma' will be used to return a pointer to the resulting multicast
1908 * address reference, if desired.
1909 */
1910int
1911if_addmulti(struct ifnet *ifp, struct sockaddr *sa,
1912    struct ifmultiaddr **retifma)
1913{
1914	struct ifmultiaddr *ifma, *ll_ifma;
1915	struct sockaddr *llsa;
1916	int error;
1917
1918	/*
1919	 * If the address is already present, return a new reference to it;
1920	 * otherwise, allocate storage and set up a new address.
1921	 */
1922	IF_ADDR_LOCK(ifp);
1923	ifma = if_findmulti(ifp, sa);
1924	if (ifma != NULL) {
1925		ifma->ifma_refcount++;
1926		if (retifma != NULL)
1927			*retifma = ifma;
1928		IF_ADDR_UNLOCK(ifp);
1929		return (0);
1930	}
1931
1932	/*
1933	 * The address isn't already present; resolve the protocol address
1934	 * into a link layer address, and then look that up, bump its
1935	 * refcount or allocate an ifma for that also.  If 'llsa' was
1936	 * returned, we will need to free it later.
1937	 */
1938	llsa = NULL;
1939	ll_ifma = NULL;
1940	if (ifp->if_resolvemulti != NULL) {
1941		error = ifp->if_resolvemulti(ifp, &llsa, sa);
1942		if (error)
1943			goto unlock_out;
1944	}
1945
1946	/*
1947	 * Allocate the new address.  Don't hook it up yet, as we may also
1948	 * need to allocate a link layer multicast address.
1949	 */
1950	ifma = if_allocmulti(ifp, sa, llsa, M_NOWAIT);
1951	if (ifma == NULL) {
1952		error = ENOMEM;
1953		goto free_llsa_out;
1954	}
1955
1956	/*
1957	 * If a link layer address is found, we'll need to see if it's
1958	 * already present in the address list, or allocate is as well.
1959	 * When this block finishes, the link layer address will be on the
1960	 * list.
1961	 */
1962	if (llsa != NULL) {
1963		ll_ifma = if_findmulti(ifp, llsa);
1964		if (ll_ifma == NULL) {
1965			ll_ifma = if_allocmulti(ifp, llsa, NULL, M_NOWAIT);
1966			if (ll_ifma == NULL) {
1967				if_freemulti(ifma);
1968				error = ENOMEM;
1969				goto free_llsa_out;
1970			}
1971			TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ll_ifma,
1972			    ifma_link);
1973		} else
1974			ll_ifma->ifma_refcount++;
1975	}
1976
1977	/*
1978	 * We now have a new multicast address, ifma, and possibly a new or
1979	 * referenced link layer address.  Add the primary address to the
1980	 * ifnet address list.
1981	 */
1982	TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link);
1983
1984	if (retifma != NULL)
1985		*retifma = ifma;
1986
1987	/*
1988	 * Must generate the message while holding the lock so that 'ifma'
1989	 * pointer is still valid.
1990	 *
1991	 * XXXRW: How come we don't announce ll_ifma?
1992	 */
1993	rt_newmaddrmsg(RTM_NEWMADDR, ifma);
1994	IF_ADDR_UNLOCK(ifp);
1995
1996	/*
1997	 * We are certain we have added something, so call down to the
1998	 * interface to let them know about it.
1999	 */
2000	if (ifp->if_ioctl != NULL) {
2001		IFF_LOCKGIANT(ifp);
2002		(void) (*ifp->if_ioctl)(ifp, SIOCADDMULTI, 0);
2003		IFF_UNLOCKGIANT(ifp);
2004	}
2005
2006	if (llsa != NULL)
2007		FREE(llsa, M_IFMADDR);
2008
2009	return (0);
2010
2011free_llsa_out:
2012	if (llsa != NULL)
2013		FREE(llsa, M_IFMADDR);
2014
2015unlock_out:
2016	IF_ADDR_UNLOCK(ifp);
2017	return (error);
2018}
2019
2020/*
2021 * Remove a reference to a multicast address on this interface.  Yell
2022 * if the request does not match an existing membership.
2023 */
2024int
2025if_delmulti(struct ifnet *ifp, struct sockaddr *sa)
2026{
2027	struct ifmultiaddr *ifma, *ll_ifma;
2028
2029	IF_ADDR_LOCK(ifp);
2030	ifma = if_findmulti(ifp, sa);
2031	if (ifma == NULL) {
2032		IF_ADDR_UNLOCK(ifp);
2033		return ENOENT;
2034	}
2035
2036	if (ifma->ifma_refcount > 1) {
2037		ifma->ifma_refcount--;
2038		IF_ADDR_UNLOCK(ifp);
2039		return 0;
2040	}
2041
2042	sa = ifma->ifma_lladdr;
2043	if (sa != NULL)
2044		ll_ifma = if_findmulti(ifp, sa);
2045	else
2046		ll_ifma = NULL;
2047
2048	/*
2049	 * XXXRW: How come we don't announce ll_ifma?
2050	 */
2051	rt_newmaddrmsg(RTM_DELMADDR, ifma);
2052
2053	TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link);
2054	if_freemulti(ifma);
2055
2056	if (ll_ifma != NULL) {
2057		if (ll_ifma->ifma_refcount == 1) {
2058			TAILQ_REMOVE(&ifp->if_multiaddrs, ll_ifma, ifma_link);
2059			if_freemulti(ll_ifma);
2060		} else
2061			ll_ifma->ifma_refcount--;
2062	}
2063	IF_ADDR_UNLOCK(ifp);
2064
2065	/*
2066	 * Make sure the interface driver is notified
2067	 * in the case of a link layer mcast group being left.
2068	 */
2069	if (ifp->if_ioctl) {
2070		IFF_LOCKGIANT(ifp);
2071		(void) (*ifp->if_ioctl)(ifp, SIOCDELMULTI, 0);
2072		IFF_UNLOCKGIANT(ifp);
2073	}
2074
2075	return 0;
2076}
2077
2078/*
2079 * Set the link layer address on an interface.
2080 *
2081 * At this time we only support certain types of interfaces,
2082 * and we don't allow the length of the address to change.
2083 */
2084int
2085if_setlladdr(struct ifnet *ifp, const u_char *lladdr, int len)
2086{
2087	struct sockaddr_dl *sdl;
2088	struct ifaddr *ifa;
2089	struct ifreq ifr;
2090
2091	ifa = ifaddr_byindex(ifp->if_index);
2092	if (ifa == NULL)
2093		return (EINVAL);
2094	sdl = (struct sockaddr_dl *)ifa->ifa_addr;
2095	if (sdl == NULL)
2096		return (EINVAL);
2097	if (len != sdl->sdl_alen)	/* don't allow length to change */
2098		return (EINVAL);
2099	switch (ifp->if_type) {
2100	case IFT_ETHER:			/* these types use struct arpcom */
2101	case IFT_FDDI:
2102	case IFT_XETHER:
2103	case IFT_ISO88025:
2104	case IFT_L2VLAN:
2105	case IFT_BRIDGE:
2106		bcopy(lladdr, IFP2ENADDR(ifp), len);
2107		/*
2108		 * XXX We also need to store the lladdr in LLADDR(sdl),
2109		 * which is done below. This is a pain because we must
2110		 * remember to keep the info in sync.
2111		 */
2112		/* FALLTHROUGH */
2113	case IFT_ARCNET:
2114		bcopy(lladdr, LLADDR(sdl), len);
2115		break;
2116	default:
2117		return (ENODEV);
2118	}
2119	/*
2120	 * If the interface is already up, we need
2121	 * to re-init it in order to reprogram its
2122	 * address filter.
2123	 */
2124	if ((ifp->if_flags & IFF_UP) != 0) {
2125		if (ifp->if_ioctl) {
2126			IFF_LOCKGIANT(ifp);
2127			ifp->if_flags &= ~IFF_UP;
2128			ifr.ifr_flags = ifp->if_flags & 0xffff;
2129			ifr.ifr_flagshigh = ifp->if_flags >> 16;
2130			(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
2131			ifp->if_flags |= IFF_UP;
2132			ifr.ifr_flags = ifp->if_flags & 0xffff;
2133			ifr.ifr_flagshigh = ifp->if_flags >> 16;
2134			(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
2135			IFF_UNLOCKGIANT(ifp);
2136		}
2137#ifdef INET
2138		/*
2139		 * Also send gratuitous ARPs to notify other nodes about
2140		 * the address change.
2141		 */
2142		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
2143			if (ifa->ifa_addr != NULL &&
2144			    ifa->ifa_addr->sa_family == AF_INET)
2145				arp_ifinit(ifp, ifa);
2146		}
2147#endif
2148	}
2149	return (0);
2150}
2151
2152/*
2153 * The name argument must be a pointer to storage which will last as
2154 * long as the interface does.  For physical devices, the result of
2155 * device_get_name(dev) is a good choice and for pseudo-devices a
2156 * static string works well.
2157 */
2158void
2159if_initname(struct ifnet *ifp, const char *name, int unit)
2160{
2161	ifp->if_dname = name;
2162	ifp->if_dunit = unit;
2163	if (unit != IF_DUNIT_NONE)
2164		snprintf(ifp->if_xname, IFNAMSIZ, "%s%d", name, unit);
2165	else
2166		strlcpy(ifp->if_xname, name, IFNAMSIZ);
2167}
2168
2169int
2170if_printf(struct ifnet *ifp, const char * fmt, ...)
2171{
2172	va_list ap;
2173	int retval;
2174
2175	retval = printf("%s: ", ifp->if_xname);
2176	va_start(ap, fmt);
2177	retval += vprintf(fmt, ap);
2178	va_end(ap);
2179	return (retval);
2180}
2181
2182/*
2183 * When an interface is marked IFF_NEEDSGIANT, its if_start() routine cannot
2184 * be called without Giant.  However, we often can't acquire the Giant lock
2185 * at those points; instead, we run it via a task queue that holds Giant via
2186 * if_start_deferred.
2187 *
2188 * XXXRW: We need to make sure that the ifnet isn't fully detached until any
2189 * outstanding if_start_deferred() tasks that will run after the free.  This
2190 * probably means waiting in if_detach().
2191 */
2192void
2193if_start(struct ifnet *ifp)
2194{
2195
2196	NET_ASSERT_GIANT();
2197
2198	if ((ifp->if_flags & IFF_NEEDSGIANT) != 0 && debug_mpsafenet != 0) {
2199		if (mtx_owned(&Giant))
2200			(*(ifp)->if_start)(ifp);
2201		else
2202			taskqueue_enqueue(taskqueue_swi_giant,
2203			    &ifp->if_starttask);
2204	} else
2205		(*(ifp)->if_start)(ifp);
2206}
2207
2208static void
2209if_start_deferred(void *context, int pending)
2210{
2211	struct ifnet *ifp;
2212
2213	/*
2214	 * This code must be entered with Giant, and should never run if
2215	 * we're not running with debug.mpsafenet.
2216	 */
2217	KASSERT(debug_mpsafenet != 0, ("if_start_deferred: debug.mpsafenet"));
2218	GIANT_REQUIRED;
2219
2220	ifp = context;
2221	(ifp->if_start)(ifp);
2222}
2223
2224int
2225if_handoff(struct ifqueue *ifq, struct mbuf *m, struct ifnet *ifp, int adjust)
2226{
2227	int active = 0;
2228
2229	IF_LOCK(ifq);
2230	if (_IF_QFULL(ifq)) {
2231		_IF_DROP(ifq);
2232		IF_UNLOCK(ifq);
2233		m_freem(m);
2234		return (0);
2235	}
2236	if (ifp != NULL) {
2237		ifp->if_obytes += m->m_pkthdr.len + adjust;
2238		if (m->m_flags & (M_BCAST|M_MCAST))
2239			ifp->if_omcasts++;
2240		active = ifp->if_drv_flags & IFF_DRV_OACTIVE;
2241	}
2242	_IF_ENQUEUE(ifq, m);
2243	IF_UNLOCK(ifq);
2244	if (ifp != NULL && !active)
2245		if_start(ifp);
2246	return (1);
2247}
2248
2249void
2250if_register_com_alloc(u_char type,
2251    if_com_alloc_t *a, if_com_free_t *f)
2252{
2253
2254	KASSERT(if_com_alloc[type] == NULL,
2255	    ("if_register_com_alloc: %d already registered", type));
2256	KASSERT(if_com_free[type] == NULL,
2257	    ("if_register_com_alloc: %d free already registered", type));
2258
2259	if_com_alloc[type] = a;
2260	if_com_free[type] = f;
2261}
2262
2263void
2264if_deregister_com_alloc(u_char type)
2265{
2266
2267	KASSERT(if_com_alloc[type] == NULL,
2268	    ("if_deregister_com_alloc: %d not registered", type));
2269	KASSERT(if_com_free[type] == NULL,
2270	    ("if_deregister_com_alloc: %d free not registered", type));
2271	if_com_alloc[type] = NULL;
2272	if_com_free[type] = NULL;
2273}
2274