if.c revision 147730
1/*-
2 * Copyright (c) 1980, 1986, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 4. Neither the name of the University nor the names of its contributors
14 *    may be used to endorse or promote products derived from this software
15 *    without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 *	@(#)if.c	8.5 (Berkeley) 1/9/95
30 * $FreeBSD: head/sys/net/if.c 147730 2005-07-01 16:28:32Z ssouhlal $
31 */
32
33#include "opt_compat.h"
34#include "opt_inet6.h"
35#include "opt_inet.h"
36#include "opt_mac.h"
37#include "opt_carp.h"
38
39#include <sys/param.h>
40#include <sys/types.h>
41#include <sys/conf.h>
42#include <sys/mac.h>
43#include <sys/malloc.h>
44#include <sys/sbuf.h>
45#include <sys/bus.h>
46#include <sys/mbuf.h>
47#include <sys/systm.h>
48#include <sys/proc.h>
49#include <sys/socket.h>
50#include <sys/socketvar.h>
51#include <sys/protosw.h>
52#include <sys/kernel.h>
53#include <sys/sockio.h>
54#include <sys/syslog.h>
55#include <sys/sysctl.h>
56#include <sys/taskqueue.h>
57#include <sys/domain.h>
58#include <sys/jail.h>
59#include <machine/stdarg.h>
60
61#include <net/if.h>
62#include <net/if_arp.h>
63#include <net/if_clone.h>
64#include <net/if_dl.h>
65#include <net/if_types.h>
66#include <net/if_var.h>
67#include <net/radix.h>
68#include <net/route.h>
69
70#if defined(INET) || defined(INET6)
71/*XXX*/
72#include <netinet/in.h>
73#include <netinet/in_var.h>
74#ifdef INET6
75#include <netinet6/in6_var.h>
76#include <netinet6/in6_ifattach.h>
77#endif
78#endif
79#ifdef INET
80#include <netinet/if_ether.h>
81#endif
82#ifdef DEV_CARP
83#include <netinet/ip_carp.h>
84#endif
85
86SYSCTL_NODE(_net, PF_LINK, link, CTLFLAG_RW, 0, "Link layers");
87SYSCTL_NODE(_net_link, 0, generic, CTLFLAG_RW, 0, "Generic link-management");
88
89/* Log link state change events */
90static int log_link_state_change = 1;
91
92SYSCTL_INT(_net_link, OID_AUTO, log_link_state_change, CTLFLAG_RW,
93	&log_link_state_change, 0,
94	"log interface link state change events");
95
96void	(*bstp_linkstate_p)(struct ifnet *ifp, int state);
97void	(*ng_ether_link_state_p)(struct ifnet *ifp, int state);
98
99struct mbuf *(*tbr_dequeue_ptr)(struct ifaltq *, int) = NULL;
100
101static void	if_attachdomain(void *);
102static void	if_attachdomain1(struct ifnet *);
103static int	ifconf(u_long, caddr_t);
104static void	if_grow(void);
105static void	if_init(void *);
106static void	if_check(void *);
107static int	if_findindex(struct ifnet *);
108static void	if_qflush(struct ifaltq *);
109static void	if_route(struct ifnet *, int flag, int fam);
110static void	if_slowtimo(void *);
111static void	if_unroute(struct ifnet *, int flag, int fam);
112static void	link_rtrequest(int, struct rtentry *, struct rt_addrinfo *);
113static int	if_rtdel(struct radix_node *, void *);
114static int	ifhwioctl(u_long, struct ifnet *, caddr_t, struct thread *);
115static void	if_start_deferred(void *context, int pending);
116static void	do_link_state_change(void *, int);
117#ifdef INET6
118/*
119 * XXX: declare here to avoid to include many inet6 related files..
120 * should be more generalized?
121 */
122extern void	nd6_setmtu(struct ifnet *);
123#endif
124
125int	if_index = 0;
126struct	ifindex_entry *ifindex_table = NULL;
127int	ifqmaxlen = IFQ_MAXLEN;
128struct	ifnethead ifnet;	/* depend on static init XXX */
129struct	mtx ifnet_lock;
130static	if_com_alloc_t *if_com_alloc[256];
131static	if_com_free_t *if_com_free[256];
132
133static int	if_indexlim = 8;
134static struct	knlist ifklist;
135
136static void	filt_netdetach(struct knote *kn);
137static int	filt_netdev(struct knote *kn, long hint);
138
139static struct filterops netdev_filtops =
140    { 1, NULL, filt_netdetach, filt_netdev };
141
142/*
143 * System initialization
144 */
145SYSINIT(interfaces, SI_SUB_INIT_IF, SI_ORDER_FIRST, if_init, NULL)
146SYSINIT(interface_check, SI_SUB_PROTO_IF, SI_ORDER_FIRST, if_check, NULL)
147
148MALLOC_DEFINE(M_IFNET, "ifnet", "interface internals");
149MALLOC_DEFINE(M_IFADDR, "ifaddr", "interface address");
150MALLOC_DEFINE(M_IFMADDR, "ether_multi", "link-level multicast address");
151
152static d_open_t		netopen;
153static d_close_t	netclose;
154static d_ioctl_t	netioctl;
155static d_kqfilter_t	netkqfilter;
156
157static struct cdevsw net_cdevsw = {
158	.d_version =	D_VERSION,
159	.d_flags =	D_NEEDGIANT,
160	.d_open =	netopen,
161	.d_close =	netclose,
162	.d_ioctl =	netioctl,
163	.d_name =	"net",
164	.d_kqfilter =	netkqfilter,
165};
166
167static int
168netopen(struct cdev *dev, int flag, int mode, struct thread *td)
169{
170	return (0);
171}
172
173static int
174netclose(struct cdev *dev, int flags, int fmt, struct thread *td)
175{
176	return (0);
177}
178
179static int
180netioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread *td)
181{
182	struct ifnet *ifp;
183	int error, idx;
184
185	/* only support interface specific ioctls */
186	if (IOCGROUP(cmd) != 'i')
187		return (EOPNOTSUPP);
188	idx = minor(dev);
189	if (idx == 0) {
190		/*
191		 * special network device, not interface.
192		 */
193		if (cmd == SIOCGIFCONF)
194			return (ifconf(cmd, data));	/* XXX remove cmd */
195		return (EOPNOTSUPP);
196	}
197
198	ifp = ifnet_byindex(idx);
199	if (ifp == NULL)
200		return (ENXIO);
201
202	error = ifhwioctl(cmd, ifp, data, td);
203	if (error == ENOIOCTL)
204		error = EOPNOTSUPP;
205	return (error);
206}
207
208static int
209netkqfilter(struct cdev *dev, struct knote *kn)
210{
211	struct knlist *klist;
212	struct ifnet *ifp;
213	int idx;
214
215	switch (kn->kn_filter) {
216	case EVFILT_NETDEV:
217		kn->kn_fop = &netdev_filtops;
218		break;
219	default:
220		return (1);
221	}
222
223	idx = minor(dev);
224	if (idx == 0) {
225		klist = &ifklist;
226	} else {
227		ifp = ifnet_byindex(idx);
228		if (ifp == NULL)
229			return (1);
230		klist = &ifp->if_klist;
231	}
232
233	kn->kn_hook = (caddr_t)klist;
234
235	knlist_add(klist, kn, 0);
236
237	return (0);
238}
239
240static void
241filt_netdetach(struct knote *kn)
242{
243	struct knlist *klist = (struct knlist *)kn->kn_hook;
244
245	knlist_remove(klist, kn, 0);
246}
247
248static int
249filt_netdev(struct knote *kn, long hint)
250{
251	struct knlist *klist = (struct knlist *)kn->kn_hook;
252
253	/*
254	 * Currently NOTE_EXIT is abused to indicate device detach.
255	 */
256	if (hint == NOTE_EXIT) {
257		kn->kn_data = NOTE_LINKINV;
258		kn->kn_flags |= (EV_EOF | EV_ONESHOT);
259		knlist_remove_inevent(klist, kn);
260		return (1);
261	}
262	if (hint != 0)
263		kn->kn_data = hint;			/* current status */
264	if (kn->kn_sfflags & hint)
265		kn->kn_fflags |= hint;
266	return (kn->kn_fflags != 0);
267}
268
269/*
270 * Network interface utility routines.
271 *
272 * Routines with ifa_ifwith* names take sockaddr *'s as
273 * parameters.
274 */
275/* ARGSUSED*/
276static void
277if_init(void *dummy __unused)
278{
279
280	IFNET_LOCK_INIT();
281	TAILQ_INIT(&ifnet);
282	knlist_init(&ifklist, NULL, NULL, NULL, NULL);
283	if_grow();				/* create initial table */
284	ifdev_byindex(0) = make_dev(&net_cdevsw, 0,
285	    UID_ROOT, GID_WHEEL, 0600, "network");
286	if_clone_init();
287}
288
289static void
290if_grow(void)
291{
292	u_int n;
293	struct ifindex_entry *e;
294
295	if_indexlim <<= 1;
296	n = if_indexlim * sizeof(*e);
297	e = malloc(n, M_IFNET, M_WAITOK | M_ZERO);
298	if (ifindex_table != NULL) {
299		memcpy((caddr_t)e, (caddr_t)ifindex_table, n/2);
300		free((caddr_t)ifindex_table, M_IFNET);
301	}
302	ifindex_table = e;
303}
304
305/* ARGSUSED*/
306static void
307if_check(void *dummy __unused)
308{
309	struct ifnet *ifp;
310	int s;
311
312	s = splimp();
313	IFNET_RLOCK();	/* could sleep on rare error; mostly okay XXX */
314	TAILQ_FOREACH(ifp, &ifnet, if_link) {
315		if (ifp->if_snd.ifq_maxlen == 0) {
316			if_printf(ifp, "XXX: driver didn't set ifq_maxlen\n");
317			ifp->if_snd.ifq_maxlen = ifqmaxlen;
318		}
319		if (!mtx_initialized(&ifp->if_snd.ifq_mtx)) {
320			if_printf(ifp,
321			    "XXX: driver didn't initialize queue mtx\n");
322			mtx_init(&ifp->if_snd.ifq_mtx, "unknown",
323			    MTX_NETWORK_LOCK, MTX_DEF);
324		}
325	}
326	IFNET_RUNLOCK();
327	splx(s);
328	if_slowtimo(0);
329}
330
331/* XXX: should be locked. */
332static int
333if_findindex(struct ifnet *ifp)
334{
335	int i, unit;
336	char eaddr[18], devname[32];
337	const char *name, *p;
338
339	switch (ifp->if_type) {
340	case IFT_ETHER:			/* these types use struct arpcom */
341	case IFT_FDDI:
342	case IFT_XETHER:
343	case IFT_ISO88025:
344	case IFT_L2VLAN:
345	case IFT_BRIDGE:
346		snprintf(eaddr, 18, "%6D", IFP2ENADDR(ifp), ":");
347		break;
348	default:
349		eaddr[0] = '\0';
350		break;
351	}
352	strlcpy(devname, ifp->if_xname, sizeof(devname));
353	name = net_cdevsw.d_name;
354	i = 0;
355	while ((resource_find_dev(&i, name, &unit, NULL, NULL)) == 0) {
356		if (resource_string_value(name, unit, "ether", &p) == 0)
357			if (strcmp(p, eaddr) == 0)
358				goto found;
359		if (resource_string_value(name, unit, "dev", &p) == 0)
360			if (strcmp(p, devname) == 0)
361				goto found;
362	}
363	unit = 0;
364found:
365	if (unit != 0) {
366		if (ifaddr_byindex(unit) == NULL)
367			return (unit);
368		printf("%s%d in use, cannot hardwire it to %s.\n",
369		    name, unit, devname);
370	}
371	for (unit = 1; ; unit++) {
372		if (unit <= if_index && ifaddr_byindex(unit) != NULL)
373			continue;
374		if (resource_string_value(name, unit, "ether", &p) == 0 ||
375		    resource_string_value(name, unit, "dev", &p) == 0)
376			continue;
377		break;
378	}
379	return (unit);
380}
381
382/*
383 * Allocate a struct ifnet and in index for an interface.
384 */
385struct ifnet*
386if_alloc(u_char type)
387{
388	struct ifnet *ifp;
389
390	ifp = malloc(sizeof(struct ifnet), M_IFNET, M_WAITOK|M_ZERO);
391
392	/* XXX: This should fail if if_index is too big */
393	ifp->if_index = if_findindex(ifp);
394	if (ifp->if_index > if_index)
395		if_index = ifp->if_index;
396	if (if_index >= if_indexlim)
397		if_grow();
398
399	ifnet_byindex(ifp->if_index) = ifp;
400
401	ifp->if_type = type;
402
403	if (if_com_alloc[type] != NULL) {
404		ifp->if_l2com = if_com_alloc[type](type, ifp);
405		if (ifp->if_l2com == NULL) {
406			free(ifp, M_IFNET);
407			return (NULL);
408		}
409	}
410
411	return (ifp);
412}
413
414void
415if_free(struct ifnet *ifp)
416{
417
418	if_free_type(ifp, ifp->if_type);
419}
420
421void
422if_free_type(struct ifnet *ifp, u_char type)
423{
424
425	if (ifp != ifnet_byindex(ifp->if_index)) {
426		if_printf(ifp, "%s: value was not if_alloced, skipping\n",
427		    __func__);
428		return;
429	}
430
431	ifnet_byindex(ifp->if_index) = NULL;
432
433	/* XXX: should be locked with if_findindex() */
434	while (if_index > 0 && ifaddr_byindex(if_index) == NULL)
435		if_index--;
436
437	if (if_com_free[type] != NULL)
438		if_com_free[type](ifp->if_l2com, type);
439
440	free(ifp, M_IFNET);
441};
442
443/*
444 * Attach an interface to the
445 * list of "active" interfaces.
446 */
447void
448if_attach(struct ifnet *ifp)
449{
450	unsigned socksize, ifasize;
451	int namelen, masklen;
452	struct sockaddr_dl *sdl;
453	struct ifaddr *ifa;
454
455	if (ifp->if_index == 0 || ifp != ifnet_byindex(ifp->if_index))
456		panic ("%s: BUG: if_attach called without if_alloc'd input()\n",
457		    ifp->if_xname);
458
459	TASK_INIT(&ifp->if_starttask, 0, if_start_deferred, ifp);
460	TASK_INIT(&ifp->if_linktask, 0, do_link_state_change, ifp);
461	IF_AFDATA_LOCK_INIT(ifp);
462	ifp->if_afdata_initialized = 0;
463	IFNET_WLOCK();
464	TAILQ_INSERT_TAIL(&ifnet, ifp, if_link);
465	IFNET_WUNLOCK();
466	/*
467	 * XXX -
468	 * The old code would work if the interface passed a pre-existing
469	 * chain of ifaddrs to this code.  We don't trust our callers to
470	 * properly initialize the tailq, however, so we no longer allow
471	 * this unlikely case.
472	 */
473	TAILQ_INIT(&ifp->if_addrhead);
474	TAILQ_INIT(&ifp->if_prefixhead);
475	TAILQ_INIT(&ifp->if_multiaddrs);
476	knlist_init(&ifp->if_klist, NULL, NULL, NULL, NULL);
477	getmicrotime(&ifp->if_lastchange);
478	ifp->if_data.ifi_epoch = time_uptime;
479	ifp->if_data.ifi_datalen = sizeof(struct if_data);
480
481#ifdef MAC
482	mac_init_ifnet(ifp);
483	mac_create_ifnet(ifp);
484#endif
485
486	ifdev_byindex(ifp->if_index) = make_dev(&net_cdevsw,
487	    unit2minor(ifp->if_index),
488	    UID_ROOT, GID_WHEEL, 0600, "%s/%s",
489	    net_cdevsw.d_name, ifp->if_xname);
490	make_dev_alias(ifdev_byindex(ifp->if_index), "%s%d",
491	    net_cdevsw.d_name, ifp->if_index);
492
493	mtx_init(&ifp->if_snd.ifq_mtx, ifp->if_xname, "if send queue", MTX_DEF);
494
495	/*
496	 * create a Link Level name for this device
497	 */
498	namelen = strlen(ifp->if_xname);
499	/*
500	 * Always save enough space for any possiable name so we can do
501	 * a rename in place later.
502	 */
503	masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + IFNAMSIZ;
504	socksize = masklen + ifp->if_addrlen;
505	if (socksize < sizeof(*sdl))
506		socksize = sizeof(*sdl);
507	socksize = roundup2(socksize, sizeof(long));
508	ifasize = sizeof(*ifa) + 2 * socksize;
509	ifa = malloc(ifasize, M_IFADDR, M_WAITOK | M_ZERO);
510	IFA_LOCK_INIT(ifa);
511	sdl = (struct sockaddr_dl *)(ifa + 1);
512	sdl->sdl_len = socksize;
513	sdl->sdl_family = AF_LINK;
514	bcopy(ifp->if_xname, sdl->sdl_data, namelen);
515	sdl->sdl_nlen = namelen;
516	sdl->sdl_index = ifp->if_index;
517	sdl->sdl_type = ifp->if_type;
518	ifaddr_byindex(ifp->if_index) = ifa;
519	ifa->ifa_ifp = ifp;
520	ifa->ifa_rtrequest = link_rtrequest;
521	ifa->ifa_addr = (struct sockaddr *)sdl;
522	sdl = (struct sockaddr_dl *)(socksize + (caddr_t)sdl);
523	ifa->ifa_netmask = (struct sockaddr *)sdl;
524	sdl->sdl_len = masklen;
525	while (namelen != 0)
526		sdl->sdl_data[--namelen] = 0xff;
527	ifa->ifa_refcnt = 1;
528	TAILQ_INSERT_HEAD(&ifp->if_addrhead, ifa, ifa_link);
529	ifp->if_broadcastaddr = NULL; /* reliably crash if used uninitialized */
530	ifp->if_snd.altq_type = 0;
531	ifp->if_snd.altq_disc = NULL;
532	ifp->if_snd.altq_flags &= ALTQF_CANTCHANGE;
533	ifp->if_snd.altq_tbr  = NULL;
534	ifp->if_snd.altq_ifp  = ifp;
535
536	if (domain_init_status >= 2)
537		if_attachdomain1(ifp);
538
539	EVENTHANDLER_INVOKE(ifnet_arrival_event, ifp);
540
541	/* Announce the interface. */
542	rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
543}
544
545static void
546if_attachdomain(void *dummy)
547{
548	struct ifnet *ifp;
549	int s;
550
551	s = splnet();
552	TAILQ_FOREACH(ifp, &ifnet, if_link)
553		if_attachdomain1(ifp);
554	splx(s);
555}
556SYSINIT(domainifattach, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_SECOND,
557    if_attachdomain, NULL);
558
559static void
560if_attachdomain1(struct ifnet *ifp)
561{
562	struct domain *dp;
563	int s;
564
565	s = splnet();
566
567	/*
568	 * Since dp->dom_ifattach calls malloc() with M_WAITOK, we
569	 * cannot lock ifp->if_afdata initialization, entirely.
570	 */
571	if (IF_AFDATA_TRYLOCK(ifp) == 0) {
572		splx(s);
573		return;
574	}
575	if (ifp->if_afdata_initialized >= domain_init_status) {
576		IF_AFDATA_UNLOCK(ifp);
577		splx(s);
578		printf("if_attachdomain called more than once on %s\n",
579		    ifp->if_xname);
580		return;
581	}
582	ifp->if_afdata_initialized = domain_init_status;
583	IF_AFDATA_UNLOCK(ifp);
584
585	/* address family dependent data region */
586	bzero(ifp->if_afdata, sizeof(ifp->if_afdata));
587	for (dp = domains; dp; dp = dp->dom_next) {
588		if (dp->dom_ifattach)
589			ifp->if_afdata[dp->dom_family] =
590			    (*dp->dom_ifattach)(ifp);
591	}
592
593	splx(s);
594}
595
596/*
597 * Remove any network addresses from an interface.
598 */
599
600void
601if_purgeaddrs(struct ifnet *ifp)
602{
603	struct ifaddr *ifa, *next;
604
605	TAILQ_FOREACH_SAFE(ifa, &ifp->if_addrhead, ifa_link, next) {
606
607		if (ifa->ifa_addr->sa_family == AF_LINK)
608			continue;
609#ifdef INET
610		/* XXX: Ugly!! ad hoc just for INET */
611		if (ifa->ifa_addr && ifa->ifa_addr->sa_family == AF_INET) {
612			struct ifaliasreq ifr;
613
614			bzero(&ifr, sizeof(ifr));
615			ifr.ifra_addr = *ifa->ifa_addr;
616			if (ifa->ifa_dstaddr)
617				ifr.ifra_broadaddr = *ifa->ifa_dstaddr;
618			if (in_control(NULL, SIOCDIFADDR, (caddr_t)&ifr, ifp,
619			    NULL) == 0)
620				continue;
621		}
622#endif /* INET */
623#ifdef INET6
624		if (ifa->ifa_addr && ifa->ifa_addr->sa_family == AF_INET6) {
625			in6_purgeaddr(ifa);
626			/* ifp_addrhead is already updated */
627			continue;
628		}
629#endif /* INET6 */
630		TAILQ_REMOVE(&ifp->if_addrhead, ifa, ifa_link);
631		IFAFREE(ifa);
632	}
633}
634
635/*
636 * Detach an interface, removing it from the
637 * list of "active" interfaces and freeing the struct ifnet.
638 */
639void
640if_detach(struct ifnet *ifp)
641{
642	struct ifaddr *ifa;
643	struct radix_node_head	*rnh;
644	int s;
645	int i;
646	struct domain *dp;
647 	struct ifnet *iter;
648 	int found;
649
650	/*
651	 * Remove/wait for pending events.
652	 */
653	taskqueue_drain(taskqueue_swi, &ifp->if_linktask);
654
655	EVENTHANDLER_INVOKE(ifnet_departure_event, ifp);
656#ifdef DEV_CARP
657	/* Maybe hook to the generalized departure handler above?!? */
658	if (ifp->if_carp)
659		carp_ifdetach(ifp);
660#endif
661
662	/*
663	 * Remove routes and flush queues.
664	 */
665	s = splnet();
666	if_down(ifp);
667#ifdef ALTQ
668	if (ALTQ_IS_ENABLED(&ifp->if_snd))
669		altq_disable(&ifp->if_snd);
670	if (ALTQ_IS_ATTACHED(&ifp->if_snd))
671		altq_detach(&ifp->if_snd);
672#endif
673
674	if_purgeaddrs(ifp);
675
676#ifdef INET6
677	/*
678	 * Remove all IPv6 kernel structs related to ifp.  This should be done
679	 * before removing routing entries below, since IPv6 interface direct
680	 * routes are expected to be removed by the IPv6-specific kernel API.
681	 * Otherwise, the kernel will detect some inconsistency and bark it.
682	 */
683	in6_ifdetach(ifp);
684#endif
685	/*
686	 * Remove address from ifindex_table[] and maybe decrement if_index.
687	 * Clean up all addresses.
688	 */
689	ifaddr_byindex(ifp->if_index) = NULL;
690	destroy_dev(ifdev_byindex(ifp->if_index));
691	ifdev_byindex(ifp->if_index) = NULL;
692
693	/* We can now free link ifaddr. */
694	if (!TAILQ_EMPTY(&ifp->if_addrhead)) {
695		ifa = TAILQ_FIRST(&ifp->if_addrhead);
696		TAILQ_REMOVE(&ifp->if_addrhead, ifa, ifa_link);
697		IFAFREE(ifa);
698	}
699
700	/*
701	 * Delete all remaining routes using this interface
702	 * Unfortuneatly the only way to do this is to slog through
703	 * the entire routing table looking for routes which point
704	 * to this interface...oh well...
705	 */
706	for (i = 1; i <= AF_MAX; i++) {
707		if ((rnh = rt_tables[i]) == NULL)
708			continue;
709		RADIX_NODE_HEAD_LOCK(rnh);
710		(void) rnh->rnh_walktree(rnh, if_rtdel, ifp);
711		RADIX_NODE_HEAD_UNLOCK(rnh);
712	}
713
714	/* Announce that the interface is gone. */
715	rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
716
717	IF_AFDATA_LOCK(ifp);
718	for (dp = domains; dp; dp = dp->dom_next) {
719		if (dp->dom_ifdetach && ifp->if_afdata[dp->dom_family])
720			(*dp->dom_ifdetach)(ifp,
721			    ifp->if_afdata[dp->dom_family]);
722	}
723	IF_AFDATA_UNLOCK(ifp);
724
725#ifdef MAC
726	mac_destroy_ifnet(ifp);
727#endif /* MAC */
728	KNOTE_UNLOCKED(&ifp->if_klist, NOTE_EXIT);
729	knlist_clear(&ifp->if_klist, 0);
730	knlist_destroy(&ifp->if_klist);
731	IFNET_WLOCK();
732 	found = 0;
733 	TAILQ_FOREACH(iter, &ifnet, if_link)
734 		if (iter == ifp) {
735 			found = 1;
736 			break;
737 		}
738 	if (found)
739 		TAILQ_REMOVE(&ifnet, ifp, if_link);
740	IFNET_WUNLOCK();
741	mtx_destroy(&ifp->if_snd.ifq_mtx);
742	IF_AFDATA_DESTROY(ifp);
743	splx(s);
744}
745
746/*
747 * Delete Routes for a Network Interface
748 *
749 * Called for each routing entry via the rnh->rnh_walktree() call above
750 * to delete all route entries referencing a detaching network interface.
751 *
752 * Arguments:
753 *	rn	pointer to node in the routing table
754 *	arg	argument passed to rnh->rnh_walktree() - detaching interface
755 *
756 * Returns:
757 *	0	successful
758 *	errno	failed - reason indicated
759 *
760 */
761static int
762if_rtdel(struct radix_node *rn, void *arg)
763{
764	struct rtentry	*rt = (struct rtentry *)rn;
765	struct ifnet	*ifp = arg;
766	int		err;
767
768	if (rt->rt_ifp == ifp) {
769
770		/*
771		 * Protect (sorta) against walktree recursion problems
772		 * with cloned routes
773		 */
774		if ((rt->rt_flags & RTF_UP) == 0)
775			return (0);
776
777		err = rtrequest(RTM_DELETE, rt_key(rt), rt->rt_gateway,
778				rt_mask(rt), rt->rt_flags,
779				(struct rtentry **) NULL);
780		if (err) {
781			log(LOG_WARNING, "if_rtdel: error %d\n", err);
782		}
783	}
784
785	return (0);
786}
787
788#define	equal(a1, a2)	(bcmp((a1), (a2), ((a1))->sa_len) == 0)
789
790/*
791 * Locate an interface based on a complete address.
792 */
793/*ARGSUSED*/
794struct ifaddr *
795ifa_ifwithaddr(struct sockaddr *addr)
796{
797	struct ifnet *ifp;
798	struct ifaddr *ifa;
799
800	IFNET_RLOCK();
801	TAILQ_FOREACH(ifp, &ifnet, if_link)
802		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
803			if (ifa->ifa_addr->sa_family != addr->sa_family)
804				continue;
805			if (equal(addr, ifa->ifa_addr))
806				goto done;
807			/* IP6 doesn't have broadcast */
808			if ((ifp->if_flags & IFF_BROADCAST) &&
809			    ifa->ifa_broadaddr &&
810			    ifa->ifa_broadaddr->sa_len != 0 &&
811			    equal(ifa->ifa_broadaddr, addr))
812				goto done;
813		}
814	ifa = NULL;
815done:
816	IFNET_RUNLOCK();
817	return (ifa);
818}
819
820/*
821 * Locate the point to point interface with a given destination address.
822 */
823/*ARGSUSED*/
824struct ifaddr *
825ifa_ifwithdstaddr(struct sockaddr *addr)
826{
827	struct ifnet *ifp;
828	struct ifaddr *ifa;
829
830	IFNET_RLOCK();
831	TAILQ_FOREACH(ifp, &ifnet, if_link) {
832		if ((ifp->if_flags & IFF_POINTOPOINT) == 0)
833			continue;
834		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
835			if (ifa->ifa_addr->sa_family != addr->sa_family)
836				continue;
837			if (ifa->ifa_dstaddr && equal(addr, ifa->ifa_dstaddr))
838				goto done;
839		}
840	}
841	ifa = NULL;
842done:
843	IFNET_RUNLOCK();
844	return (ifa);
845}
846
847/*
848 * Find an interface on a specific network.  If many, choice
849 * is most specific found.
850 */
851struct ifaddr *
852ifa_ifwithnet(struct sockaddr *addr)
853{
854	struct ifnet *ifp;
855	struct ifaddr *ifa;
856	struct ifaddr *ifa_maybe = (struct ifaddr *) 0;
857	u_int af = addr->sa_family;
858	char *addr_data = addr->sa_data, *cplim;
859
860	/*
861	 * AF_LINK addresses can be looked up directly by their index number,
862	 * so do that if we can.
863	 */
864	if (af == AF_LINK) {
865	    struct sockaddr_dl *sdl = (struct sockaddr_dl *)addr;
866	    if (sdl->sdl_index && sdl->sdl_index <= if_index)
867		return (ifaddr_byindex(sdl->sdl_index));
868	}
869
870	/*
871	 * Scan though each interface, looking for ones that have
872	 * addresses in this address family.
873	 */
874	IFNET_RLOCK();
875	TAILQ_FOREACH(ifp, &ifnet, if_link) {
876		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
877			char *cp, *cp2, *cp3;
878
879			if (ifa->ifa_addr->sa_family != af)
880next:				continue;
881			if (af == AF_INET && ifp->if_flags & IFF_POINTOPOINT) {
882				/*
883				 * This is a bit broken as it doesn't
884				 * take into account that the remote end may
885				 * be a single node in the network we are
886				 * looking for.
887				 * The trouble is that we don't know the
888				 * netmask for the remote end.
889				 */
890				if (ifa->ifa_dstaddr != 0
891				    && equal(addr, ifa->ifa_dstaddr))
892					goto done;
893			} else {
894				/*
895				 * if we have a special address handler,
896				 * then use it instead of the generic one.
897				 */
898				if (ifa->ifa_claim_addr) {
899					if ((*ifa->ifa_claim_addr)(ifa, addr))
900						goto done;
901					continue;
902				}
903
904				/*
905				 * Scan all the bits in the ifa's address.
906				 * If a bit dissagrees with what we are
907				 * looking for, mask it with the netmask
908				 * to see if it really matters.
909				 * (A byte at a time)
910				 */
911				if (ifa->ifa_netmask == 0)
912					continue;
913				cp = addr_data;
914				cp2 = ifa->ifa_addr->sa_data;
915				cp3 = ifa->ifa_netmask->sa_data;
916				cplim = ifa->ifa_netmask->sa_len
917					+ (char *)ifa->ifa_netmask;
918				while (cp3 < cplim)
919					if ((*cp++ ^ *cp2++) & *cp3++)
920						goto next; /* next address! */
921				/*
922				 * If the netmask of what we just found
923				 * is more specific than what we had before
924				 * (if we had one) then remember the new one
925				 * before continuing to search
926				 * for an even better one.
927				 */
928				if (ifa_maybe == 0 ||
929				    rn_refines((caddr_t)ifa->ifa_netmask,
930				    (caddr_t)ifa_maybe->ifa_netmask))
931					ifa_maybe = ifa;
932			}
933		}
934	}
935	ifa = ifa_maybe;
936done:
937	IFNET_RUNLOCK();
938	return (ifa);
939}
940
941/*
942 * Find an interface address specific to an interface best matching
943 * a given address.
944 */
945struct ifaddr *
946ifaof_ifpforaddr(struct sockaddr *addr, struct ifnet *ifp)
947{
948	struct ifaddr *ifa;
949	char *cp, *cp2, *cp3;
950	char *cplim;
951	struct ifaddr *ifa_maybe = 0;
952	u_int af = addr->sa_family;
953
954	if (af >= AF_MAX)
955		return (0);
956	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
957		if (ifa->ifa_addr->sa_family != af)
958			continue;
959		if (ifa_maybe == 0)
960			ifa_maybe = ifa;
961		if (ifa->ifa_netmask == 0) {
962			if (equal(addr, ifa->ifa_addr) ||
963			    (ifa->ifa_dstaddr && equal(addr, ifa->ifa_dstaddr)))
964				goto done;
965			continue;
966		}
967		if (ifp->if_flags & IFF_POINTOPOINT) {
968			if (equal(addr, ifa->ifa_dstaddr))
969				goto done;
970		} else {
971			cp = addr->sa_data;
972			cp2 = ifa->ifa_addr->sa_data;
973			cp3 = ifa->ifa_netmask->sa_data;
974			cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask;
975			for (; cp3 < cplim; cp3++)
976				if ((*cp++ ^ *cp2++) & *cp3)
977					break;
978			if (cp3 == cplim)
979				goto done;
980		}
981	}
982	ifa = ifa_maybe;
983done:
984	return (ifa);
985}
986
987#include <net/route.h>
988
989/*
990 * Default action when installing a route with a Link Level gateway.
991 * Lookup an appropriate real ifa to point to.
992 * This should be moved to /sys/net/link.c eventually.
993 */
994static void
995link_rtrequest(int cmd, struct rtentry *rt, struct rt_addrinfo *info)
996{
997	struct ifaddr *ifa, *oifa;
998	struct sockaddr *dst;
999	struct ifnet *ifp;
1000
1001	RT_LOCK_ASSERT(rt);
1002
1003	if (cmd != RTM_ADD || ((ifa = rt->rt_ifa) == 0) ||
1004	    ((ifp = ifa->ifa_ifp) == 0) || ((dst = rt_key(rt)) == 0))
1005		return;
1006	ifa = ifaof_ifpforaddr(dst, ifp);
1007	if (ifa) {
1008		IFAREF(ifa);		/* XXX */
1009		oifa = rt->rt_ifa;
1010		rt->rt_ifa = ifa;
1011		IFAFREE(oifa);
1012		if (ifa->ifa_rtrequest && ifa->ifa_rtrequest != link_rtrequest)
1013			ifa->ifa_rtrequest(cmd, rt, info);
1014	}
1015}
1016
1017/*
1018 * Mark an interface down and notify protocols of
1019 * the transition.
1020 * NOTE: must be called at splnet or eqivalent.
1021 */
1022static void
1023if_unroute(struct ifnet *ifp, int flag, int fam)
1024{
1025	struct ifaddr *ifa;
1026
1027	ifp->if_flags &= ~flag;
1028	getmicrotime(&ifp->if_lastchange);
1029	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
1030		if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family))
1031			pfctlinput(PRC_IFDOWN, ifa->ifa_addr);
1032	if_qflush(&ifp->if_snd);
1033#ifdef DEV_CARP
1034	if (ifp->if_carp)
1035		carp_carpdev_state(ifp->if_carp);
1036#endif
1037	rt_ifmsg(ifp);
1038}
1039
1040/*
1041 * Mark an interface up and notify protocols of
1042 * the transition.
1043 * NOTE: must be called at splnet or eqivalent.
1044 */
1045static void
1046if_route(struct ifnet *ifp, int flag, int fam)
1047{
1048	struct ifaddr *ifa;
1049
1050	ifp->if_flags |= flag;
1051	getmicrotime(&ifp->if_lastchange);
1052	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
1053		if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family))
1054			pfctlinput(PRC_IFUP, ifa->ifa_addr);
1055#ifdef DEV_CARP
1056	if (ifp->if_carp)
1057		carp_carpdev_state(ifp->if_carp);
1058#endif
1059	rt_ifmsg(ifp);
1060#ifdef INET6
1061	in6_if_up(ifp);
1062#endif
1063}
1064
1065void	(*vlan_link_state_p)(struct ifnet *, int);	/* XXX: private from if_vlan */
1066
1067/*
1068 * Handle a change in the interface link state. To avoid LORs
1069 * between driver lock and upper layer locks, as well as possible
1070 * recursions, we post event to taskqueue, and all job
1071 * is done in static do_link_state_change().
1072 */
1073void
1074if_link_state_change(struct ifnet *ifp, int link_state)
1075{
1076	/* Return if state hasn't changed. */
1077	if (ifp->if_link_state == link_state)
1078		return;
1079
1080	ifp->if_link_state = link_state;
1081
1082	taskqueue_enqueue(taskqueue_swi, &ifp->if_linktask);
1083}
1084
1085static void
1086do_link_state_change(void *arg, int pending)
1087{
1088	struct ifnet *ifp = (struct ifnet *)arg;
1089	int link_state = ifp->if_link_state;
1090	int link;
1091
1092	/* Notify that the link state has changed. */
1093	rt_ifmsg(ifp);
1094	if (link_state == LINK_STATE_UP)
1095		link = NOTE_LINKUP;
1096	else if (link_state == LINK_STATE_DOWN)
1097		link = NOTE_LINKDOWN;
1098	else
1099		link = NOTE_LINKINV;
1100	KNOTE_UNLOCKED(&ifp->if_klist, link);
1101	if (ifp->if_nvlans != 0)
1102		(*vlan_link_state_p)(ifp, link);
1103
1104	if ((ifp->if_type == IFT_ETHER || ifp->if_type == IFT_L2VLAN) &&
1105	    IFP2AC(ifp)->ac_netgraph != NULL)
1106		(*ng_ether_link_state_p)(ifp, link_state);
1107#ifdef DEV_CARP
1108	if (ifp->if_carp)
1109		carp_carpdev_state(ifp->if_carp);
1110#endif
1111	if (ifp->if_bridge) {
1112		KASSERT(bstp_linkstate_p != NULL,("if_bridge bstp not loaded!"));
1113		(*bstp_linkstate_p)(ifp, link_state);
1114	}
1115
1116	devctl_notify("IFNET", ifp->if_xname,
1117	    (link_state == LINK_STATE_UP) ? "LINK_UP" : "LINK_DOWN", NULL);
1118	if (pending > 1)
1119		if_printf(ifp, "%d link states coalesced\n", pending);
1120	if (log_link_state_change)
1121		log(LOG_NOTICE, "%s: link state changed to %s\n", ifp->if_xname,
1122		    (link_state == LINK_STATE_UP) ? "UP" : "DOWN" );
1123}
1124
1125/*
1126 * Mark an interface down and notify protocols of
1127 * the transition.
1128 * NOTE: must be called at splnet or eqivalent.
1129 */
1130void
1131if_down(struct ifnet *ifp)
1132{
1133
1134	if_unroute(ifp, IFF_UP, AF_UNSPEC);
1135}
1136
1137/*
1138 * Mark an interface up and notify protocols of
1139 * the transition.
1140 * NOTE: must be called at splnet or eqivalent.
1141 */
1142void
1143if_up(struct ifnet *ifp)
1144{
1145
1146	if_route(ifp, IFF_UP, AF_UNSPEC);
1147}
1148
1149/*
1150 * Flush an interface queue.
1151 */
1152static void
1153if_qflush(struct ifaltq *ifq)
1154{
1155	struct mbuf *m, *n;
1156
1157	IFQ_LOCK(ifq);
1158#ifdef ALTQ
1159	if (ALTQ_IS_ENABLED(ifq))
1160		ALTQ_PURGE(ifq);
1161#endif
1162	n = ifq->ifq_head;
1163	while ((m = n) != 0) {
1164		n = m->m_act;
1165		m_freem(m);
1166	}
1167	ifq->ifq_head = 0;
1168	ifq->ifq_tail = 0;
1169	ifq->ifq_len = 0;
1170	IFQ_UNLOCK(ifq);
1171}
1172
1173/*
1174 * Handle interface watchdog timer routines.  Called
1175 * from softclock, we decrement timers (if set) and
1176 * call the appropriate interface routine on expiration.
1177 *
1178 * XXXRW: Note that because timeouts run with Giant, if_watchdog() is called
1179 * holding Giant.  If we switch to an MPSAFE callout, we likely need to grab
1180 * Giant before entering if_watchdog() on an IFF_NEEDSGIANT interface.
1181 */
1182static void
1183if_slowtimo(void *arg)
1184{
1185	struct ifnet *ifp;
1186	int s = splimp();
1187
1188	IFNET_RLOCK();
1189	TAILQ_FOREACH(ifp, &ifnet, if_link) {
1190		if (ifp->if_timer == 0 || --ifp->if_timer)
1191			continue;
1192		if (ifp->if_watchdog)
1193			(*ifp->if_watchdog)(ifp);
1194	}
1195	IFNET_RUNLOCK();
1196	splx(s);
1197	timeout(if_slowtimo, (void *)0, hz / IFNET_SLOWHZ);
1198}
1199
1200/*
1201 * Map interface name to
1202 * interface structure pointer.
1203 */
1204struct ifnet *
1205ifunit(const char *name)
1206{
1207	struct ifnet *ifp;
1208
1209	IFNET_RLOCK();
1210	TAILQ_FOREACH(ifp, &ifnet, if_link) {
1211		if (strncmp(name, ifp->if_xname, IFNAMSIZ) == 0)
1212			break;
1213	}
1214	IFNET_RUNLOCK();
1215	return (ifp);
1216}
1217
1218/*
1219 * Hardware specific interface ioctls.
1220 */
1221static int
1222ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
1223{
1224	struct ifreq *ifr;
1225	struct ifstat *ifs;
1226	int error = 0;
1227	int new_flags;
1228	size_t namelen, onamelen;
1229	char new_name[IFNAMSIZ];
1230	struct ifaddr *ifa;
1231	struct sockaddr_dl *sdl;
1232
1233	ifr = (struct ifreq *)data;
1234	switch (cmd) {
1235	case SIOCGIFINDEX:
1236		ifr->ifr_index = ifp->if_index;
1237		break;
1238
1239	case SIOCGIFFLAGS:
1240		ifr->ifr_flags = ifp->if_flags & 0xffff;
1241		ifr->ifr_flagshigh = ifp->if_flags >> 16;
1242		break;
1243
1244	case SIOCGIFCAP:
1245		ifr->ifr_reqcap = ifp->if_capabilities;
1246		ifr->ifr_curcap = ifp->if_capenable;
1247		break;
1248
1249#ifdef MAC
1250	case SIOCGIFMAC:
1251		error = mac_ioctl_ifnet_get(td->td_ucred, ifr, ifp);
1252		break;
1253#endif
1254
1255	case SIOCGIFMETRIC:
1256		ifr->ifr_metric = ifp->if_metric;
1257		break;
1258
1259	case SIOCGIFMTU:
1260		ifr->ifr_mtu = ifp->if_mtu;
1261		break;
1262
1263	case SIOCGIFPHYS:
1264		ifr->ifr_phys = ifp->if_physical;
1265		break;
1266
1267	case SIOCSIFFLAGS:
1268		error = suser(td);
1269		if (error)
1270			return (error);
1271		new_flags = (ifr->ifr_flags & 0xffff) |
1272		    (ifr->ifr_flagshigh << 16);
1273		if (ifp->if_flags & IFF_SMART) {
1274			/* Smart drivers twiddle their own routes */
1275		} else if (ifp->if_flags & IFF_UP &&
1276		    (new_flags & IFF_UP) == 0) {
1277			int s = splimp();
1278			if_down(ifp);
1279			splx(s);
1280		} else if (new_flags & IFF_UP &&
1281		    (ifp->if_flags & IFF_UP) == 0) {
1282			int s = splimp();
1283			if_up(ifp);
1284			splx(s);
1285		}
1286		ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) |
1287			(new_flags &~ IFF_CANTCHANGE);
1288		if (new_flags & IFF_PPROMISC) {
1289			/* Permanently promiscuous mode requested */
1290			ifp->if_flags |= IFF_PROMISC;
1291		} else if (ifp->if_pcount == 0) {
1292			ifp->if_flags &= ~IFF_PROMISC;
1293		}
1294		if (ifp->if_ioctl) {
1295			IFF_LOCKGIANT(ifp);
1296			(void) (*ifp->if_ioctl)(ifp, cmd, data);
1297			IFF_UNLOCKGIANT(ifp);
1298		}
1299		getmicrotime(&ifp->if_lastchange);
1300		break;
1301
1302	case SIOCSIFCAP:
1303		error = suser(td);
1304		if (error)
1305			return (error);
1306		if (ifp->if_ioctl == NULL)
1307			return (EOPNOTSUPP);
1308		if (ifr->ifr_reqcap & ~ifp->if_capabilities)
1309			return (EINVAL);
1310		IFF_LOCKGIANT(ifp);
1311		error = (*ifp->if_ioctl)(ifp, cmd, data);
1312		IFF_UNLOCKGIANT(ifp);
1313		if (error == 0)
1314			getmicrotime(&ifp->if_lastchange);
1315		break;
1316
1317#ifdef MAC
1318	case SIOCSIFMAC:
1319		error = mac_ioctl_ifnet_set(td->td_ucred, ifr, ifp);
1320		break;
1321#endif
1322
1323	case SIOCSIFNAME:
1324		error = suser(td);
1325		if (error != 0)
1326			return (error);
1327		error = copyinstr(ifr->ifr_data, new_name, IFNAMSIZ, NULL);
1328		if (error != 0)
1329			return (error);
1330		if (new_name[0] == '\0')
1331			return (EINVAL);
1332		if (ifunit(new_name) != NULL)
1333			return (EEXIST);
1334
1335		EVENTHANDLER_INVOKE(ifnet_departure_event, ifp);
1336		/* Announce the departure of the interface. */
1337		rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
1338
1339		log(LOG_INFO, "%s: changing name to '%s'\n",
1340		    ifp->if_xname, new_name);
1341
1342		strlcpy(ifp->if_xname, new_name, sizeof(ifp->if_xname));
1343		ifa = ifaddr_byindex(ifp->if_index);
1344		IFA_LOCK(ifa);
1345		sdl = (struct sockaddr_dl *)ifa->ifa_addr;
1346		namelen = strlen(new_name);
1347		onamelen = sdl->sdl_nlen;
1348		/*
1349		 * Move the address if needed.  This is safe because we
1350		 * allocate space for a name of length IFNAMSIZ when we
1351		 * create this in if_attach().
1352		 */
1353		if (namelen != onamelen) {
1354			bcopy(sdl->sdl_data + onamelen,
1355			    sdl->sdl_data + namelen, sdl->sdl_alen);
1356		}
1357		bcopy(new_name, sdl->sdl_data, namelen);
1358		sdl->sdl_nlen = namelen;
1359		sdl = (struct sockaddr_dl *)ifa->ifa_netmask;
1360		bzero(sdl->sdl_data, onamelen);
1361		while (namelen != 0)
1362			sdl->sdl_data[--namelen] = 0xff;
1363		IFA_UNLOCK(ifa);
1364
1365		EVENTHANDLER_INVOKE(ifnet_arrival_event, ifp);
1366		/* Announce the return of the interface. */
1367		rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
1368		break;
1369
1370	case SIOCSIFMETRIC:
1371		error = suser(td);
1372		if (error)
1373			return (error);
1374		ifp->if_metric = ifr->ifr_metric;
1375		getmicrotime(&ifp->if_lastchange);
1376		break;
1377
1378	case SIOCSIFPHYS:
1379		error = suser(td);
1380		if (error)
1381			return (error);
1382		if (ifp->if_ioctl == NULL)
1383			return (EOPNOTSUPP);
1384		IFF_LOCKGIANT(ifp);
1385		error = (*ifp->if_ioctl)(ifp, cmd, data);
1386		IFF_UNLOCKGIANT(ifp);
1387		if (error == 0)
1388			getmicrotime(&ifp->if_lastchange);
1389		break;
1390
1391	case SIOCSIFMTU:
1392	{
1393		u_long oldmtu = ifp->if_mtu;
1394
1395		error = suser(td);
1396		if (error)
1397			return (error);
1398		if (ifr->ifr_mtu < IF_MINMTU || ifr->ifr_mtu > IF_MAXMTU)
1399			return (EINVAL);
1400		if (ifp->if_ioctl == NULL)
1401			return (EOPNOTSUPP);
1402		IFF_LOCKGIANT(ifp);
1403		error = (*ifp->if_ioctl)(ifp, cmd, data);
1404		IFF_UNLOCKGIANT(ifp);
1405		if (error == 0) {
1406			getmicrotime(&ifp->if_lastchange);
1407			rt_ifmsg(ifp);
1408		}
1409		/*
1410		 * If the link MTU changed, do network layer specific procedure.
1411		 */
1412		if (ifp->if_mtu != oldmtu) {
1413#ifdef INET6
1414			nd6_setmtu(ifp);
1415#endif
1416		}
1417		break;
1418	}
1419
1420	case SIOCADDMULTI:
1421	case SIOCDELMULTI:
1422		error = suser(td);
1423		if (error)
1424			return (error);
1425
1426		/* Don't allow group membership on non-multicast interfaces. */
1427		if ((ifp->if_flags & IFF_MULTICAST) == 0)
1428			return (EOPNOTSUPP);
1429
1430		/* Don't let users screw up protocols' entries. */
1431		if (ifr->ifr_addr.sa_family != AF_LINK)
1432			return (EINVAL);
1433
1434		if (cmd == SIOCADDMULTI) {
1435			struct ifmultiaddr *ifma;
1436			error = if_addmulti(ifp, &ifr->ifr_addr, &ifma);
1437		} else {
1438			error = if_delmulti(ifp, &ifr->ifr_addr);
1439		}
1440		if (error == 0)
1441			getmicrotime(&ifp->if_lastchange);
1442		break;
1443
1444	case SIOCSIFPHYADDR:
1445	case SIOCDIFPHYADDR:
1446#ifdef INET6
1447	case SIOCSIFPHYADDR_IN6:
1448#endif
1449	case SIOCSLIFPHYADDR:
1450	case SIOCSIFMEDIA:
1451	case SIOCSIFGENERIC:
1452		error = suser(td);
1453		if (error)
1454			return (error);
1455		if (ifp->if_ioctl == NULL)
1456			return (EOPNOTSUPP);
1457		IFF_LOCKGIANT(ifp);
1458		error = (*ifp->if_ioctl)(ifp, cmd, data);
1459		IFF_UNLOCKGIANT(ifp);
1460		if (error == 0)
1461			getmicrotime(&ifp->if_lastchange);
1462		break;
1463
1464	case SIOCGIFSTATUS:
1465		ifs = (struct ifstat *)data;
1466		ifs->ascii[0] = '\0';
1467
1468	case SIOCGIFPSRCADDR:
1469	case SIOCGIFPDSTADDR:
1470	case SIOCGLIFPHYADDR:
1471	case SIOCGIFMEDIA:
1472	case SIOCGIFGENERIC:
1473		if (ifp->if_ioctl == NULL)
1474			return (EOPNOTSUPP);
1475		IFF_LOCKGIANT(ifp);
1476		error = (*ifp->if_ioctl)(ifp, cmd, data);
1477		IFF_UNLOCKGIANT(ifp);
1478		break;
1479
1480	case SIOCSIFLLADDR:
1481		error = suser(td);
1482		if (error)
1483			return (error);
1484		error = if_setlladdr(ifp,
1485		    ifr->ifr_addr.sa_data, ifr->ifr_addr.sa_len);
1486		break;
1487
1488	default:
1489		error = ENOIOCTL;
1490		break;
1491	}
1492	return (error);
1493}
1494
1495/*
1496 * Interface ioctls.
1497 */
1498int
1499ifioctl(struct socket *so, u_long cmd, caddr_t data, struct thread *td)
1500{
1501	struct ifnet *ifp;
1502	struct ifreq *ifr;
1503	int error;
1504	int oif_flags;
1505
1506	switch (cmd) {
1507	case SIOCGIFCONF:
1508	case OSIOCGIFCONF:
1509		return (ifconf(cmd, data));
1510	}
1511	ifr = (struct ifreq *)data;
1512
1513	switch (cmd) {
1514	case SIOCIFCREATE:
1515	case SIOCIFDESTROY:
1516		if ((error = suser(td)) != 0)
1517			return (error);
1518		return ((cmd == SIOCIFCREATE) ?
1519			if_clone_create(ifr->ifr_name, sizeof(ifr->ifr_name)) :
1520			if_clone_destroy(ifr->ifr_name));
1521
1522	case SIOCIFGCLONERS:
1523		return (if_clone_list((struct if_clonereq *)data));
1524	}
1525
1526	ifp = ifunit(ifr->ifr_name);
1527	if (ifp == 0)
1528		return (ENXIO);
1529
1530	error = ifhwioctl(cmd, ifp, data, td);
1531	if (error != ENOIOCTL)
1532		return (error);
1533
1534	oif_flags = ifp->if_flags;
1535	if (so->so_proto == 0)
1536		return (EOPNOTSUPP);
1537#ifndef COMPAT_43
1538	error = ((*so->so_proto->pr_usrreqs->pru_control)(so, cmd,
1539								 data,
1540								 ifp, td));
1541#else
1542	{
1543		int ocmd = cmd;
1544
1545		switch (cmd) {
1546
1547		case SIOCSIFDSTADDR:
1548		case SIOCSIFADDR:
1549		case SIOCSIFBRDADDR:
1550		case SIOCSIFNETMASK:
1551#if BYTE_ORDER != BIG_ENDIAN
1552			if (ifr->ifr_addr.sa_family == 0 &&
1553			    ifr->ifr_addr.sa_len < 16) {
1554				ifr->ifr_addr.sa_family = ifr->ifr_addr.sa_len;
1555				ifr->ifr_addr.sa_len = 16;
1556			}
1557#else
1558			if (ifr->ifr_addr.sa_len == 0)
1559				ifr->ifr_addr.sa_len = 16;
1560#endif
1561			break;
1562
1563		case OSIOCGIFADDR:
1564			cmd = SIOCGIFADDR;
1565			break;
1566
1567		case OSIOCGIFDSTADDR:
1568			cmd = SIOCGIFDSTADDR;
1569			break;
1570
1571		case OSIOCGIFBRDADDR:
1572			cmd = SIOCGIFBRDADDR;
1573			break;
1574
1575		case OSIOCGIFNETMASK:
1576			cmd = SIOCGIFNETMASK;
1577		}
1578		error =  ((*so->so_proto->pr_usrreqs->pru_control)(so,
1579								   cmd,
1580								   data,
1581								   ifp, td));
1582		switch (ocmd) {
1583
1584		case OSIOCGIFADDR:
1585		case OSIOCGIFDSTADDR:
1586		case OSIOCGIFBRDADDR:
1587		case OSIOCGIFNETMASK:
1588			*(u_short *)&ifr->ifr_addr = ifr->ifr_addr.sa_family;
1589
1590		}
1591	}
1592#endif /* COMPAT_43 */
1593
1594	if ((oif_flags ^ ifp->if_flags) & IFF_UP) {
1595#ifdef INET6
1596		DELAY(100);/* XXX: temporary workaround for fxp issue*/
1597		if (ifp->if_flags & IFF_UP) {
1598			int s = splimp();
1599			in6_if_up(ifp);
1600			splx(s);
1601		}
1602#endif
1603	}
1604	return (error);
1605}
1606
1607/*
1608 * Set/clear promiscuous mode on interface ifp based on the truth value
1609 * of pswitch.  The calls are reference counted so that only the first
1610 * "on" request actually has an effect, as does the final "off" request.
1611 * Results are undefined if the "off" and "on" requests are not matched.
1612 */
1613int
1614ifpromisc(struct ifnet *ifp, int pswitch)
1615{
1616	struct ifreq ifr;
1617	int error;
1618	int oldflags, oldpcount;
1619
1620	oldpcount = ifp->if_pcount;
1621	oldflags = ifp->if_flags;
1622	if (ifp->if_flags & IFF_PPROMISC) {
1623		/* Do nothing if device is in permanently promiscuous mode */
1624		ifp->if_pcount += pswitch ? 1 : -1;
1625		return (0);
1626	}
1627	if (pswitch) {
1628		/*
1629		 * If the device is not configured up, we cannot put it in
1630		 * promiscuous mode.
1631		 */
1632		if ((ifp->if_flags & IFF_UP) == 0)
1633			return (ENETDOWN);
1634		if (ifp->if_pcount++ != 0)
1635			return (0);
1636		ifp->if_flags |= IFF_PROMISC;
1637	} else {
1638		if (--ifp->if_pcount > 0)
1639			return (0);
1640		ifp->if_flags &= ~IFF_PROMISC;
1641	}
1642	ifr.ifr_flags = ifp->if_flags & 0xffff;
1643	ifr.ifr_flagshigh = ifp->if_flags >> 16;
1644	IFF_LOCKGIANT(ifp);
1645	error = (*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
1646	IFF_UNLOCKGIANT(ifp);
1647	if (error == 0) {
1648		log(LOG_INFO, "%s: promiscuous mode %s\n",
1649		    ifp->if_xname,
1650		    (ifp->if_flags & IFF_PROMISC) ? "enabled" : "disabled");
1651		rt_ifmsg(ifp);
1652	} else {
1653		ifp->if_pcount = oldpcount;
1654		ifp->if_flags = oldflags;
1655	}
1656	return error;
1657}
1658
1659/*
1660 * Return interface configuration
1661 * of system.  List may be used
1662 * in later ioctl's (above) to get
1663 * other information.
1664 */
1665/*ARGSUSED*/
1666static int
1667ifconf(u_long cmd, caddr_t data)
1668{
1669	struct ifconf *ifc = (struct ifconf *)data;
1670	struct ifnet *ifp;
1671	struct ifaddr *ifa;
1672	struct ifreq ifr;
1673	struct sbuf *sb;
1674	int error, full = 0, valid_len, max_len;
1675
1676	/* Limit initial buffer size to MAXPHYS to avoid DoS from userspace. */
1677	max_len = MAXPHYS - 1;
1678
1679	/* Prevent hostile input from being able to crash the system */
1680	if (ifc->ifc_len <= 0)
1681		return (EINVAL);
1682
1683again:
1684	if (ifc->ifc_len <= max_len) {
1685		max_len = ifc->ifc_len;
1686		full = 1;
1687	}
1688	sb = sbuf_new(NULL, NULL, max_len + 1, SBUF_FIXEDLEN);
1689	max_len = 0;
1690	valid_len = 0;
1691
1692	IFNET_RLOCK();		/* could sleep XXX */
1693	TAILQ_FOREACH(ifp, &ifnet, if_link) {
1694		int addrs;
1695
1696		/*
1697		 * Zero the ifr_name buffer to make sure we don't
1698		 * disclose the contents of the stack.
1699		 */
1700		memset(ifr.ifr_name, 0, sizeof(ifr.ifr_name));
1701
1702		if (strlcpy(ifr.ifr_name, ifp->if_xname, sizeof(ifr.ifr_name))
1703		    >= sizeof(ifr.ifr_name))
1704			return (ENAMETOOLONG);
1705
1706		addrs = 0;
1707		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1708			struct sockaddr *sa = ifa->ifa_addr;
1709
1710			if (jailed(curthread->td_ucred) &&
1711			    prison_if(curthread->td_ucred, sa))
1712				continue;
1713			addrs++;
1714#ifdef COMPAT_43
1715			if (cmd == OSIOCGIFCONF) {
1716				struct osockaddr *osa =
1717					 (struct osockaddr *)&ifr.ifr_addr;
1718				ifr.ifr_addr = *sa;
1719				osa->sa_family = sa->sa_family;
1720				sbuf_bcat(sb, &ifr, sizeof(ifr));
1721				max_len += sizeof(ifr);
1722			} else
1723#endif
1724			if (sa->sa_len <= sizeof(*sa)) {
1725				ifr.ifr_addr = *sa;
1726				sbuf_bcat(sb, &ifr, sizeof(ifr));
1727				max_len += sizeof(ifr);
1728			} else {
1729				sbuf_bcat(sb, &ifr,
1730				    offsetof(struct ifreq, ifr_addr));
1731				max_len += offsetof(struct ifreq, ifr_addr);
1732				sbuf_bcat(sb, sa, sa->sa_len);
1733				max_len += sa->sa_len;
1734			}
1735
1736			if (!sbuf_overflowed(sb))
1737				valid_len = sbuf_len(sb);
1738		}
1739		if (addrs == 0) {
1740			bzero((caddr_t)&ifr.ifr_addr, sizeof(ifr.ifr_addr));
1741			sbuf_bcat(sb, &ifr, sizeof(ifr));
1742			max_len += sizeof(ifr);
1743
1744			if (!sbuf_overflowed(sb))
1745				valid_len = sbuf_len(sb);
1746		}
1747	}
1748	IFNET_RUNLOCK();
1749
1750	/*
1751	 * If we didn't allocate enough space (uncommon), try again.  If
1752	 * we have already allocated as much space as we are allowed,
1753	 * return what we've got.
1754	 */
1755	if (valid_len != max_len && !full) {
1756		sbuf_delete(sb);
1757		goto again;
1758	}
1759
1760	ifc->ifc_len = valid_len;
1761	sbuf_finish(sb);
1762	error = copyout(sbuf_data(sb), ifc->ifc_req, ifc->ifc_len);
1763	sbuf_delete(sb);
1764	return (error);
1765}
1766
1767/*
1768 * Just like ifpromisc(), but for all-multicast-reception mode.
1769 */
1770int
1771if_allmulti(struct ifnet *ifp, int onswitch)
1772{
1773	int error = 0;
1774	int s = splimp();
1775	struct ifreq ifr;
1776
1777	if (onswitch) {
1778		if (ifp->if_amcount++ == 0) {
1779			ifp->if_flags |= IFF_ALLMULTI;
1780			ifr.ifr_flags = ifp->if_flags & 0xffff;
1781			ifr.ifr_flagshigh = ifp->if_flags >> 16;
1782			IFF_LOCKGIANT(ifp);
1783			error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
1784			IFF_UNLOCKGIANT(ifp);
1785		}
1786	} else {
1787		if (ifp->if_amcount > 1) {
1788			ifp->if_amcount--;
1789		} else {
1790			ifp->if_amcount = 0;
1791			ifp->if_flags &= ~IFF_ALLMULTI;
1792			ifr.ifr_flags = ifp->if_flags & 0xffff;;
1793			ifr.ifr_flagshigh = ifp->if_flags >> 16;
1794			IFF_LOCKGIANT(ifp);
1795			error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
1796			IFF_UNLOCKGIANT(ifp);
1797		}
1798	}
1799	splx(s);
1800
1801	if (error == 0)
1802		rt_ifmsg(ifp);
1803	return error;
1804}
1805
1806/*
1807 * Add a multicast listenership to the interface in question.
1808 * The link layer provides a routine which converts
1809 */
1810int
1811if_addmulti(struct ifnet *ifp, struct sockaddr *sa, struct ifmultiaddr **retifma)
1812{
1813	struct sockaddr *llsa, *dupsa;
1814	int error, s;
1815	struct ifmultiaddr *ifma;
1816
1817	/*
1818	 * If the matching multicast address already exists
1819	 * then don't add a new one, just add a reference
1820	 */
1821	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1822		if (equal(sa, ifma->ifma_addr)) {
1823			ifma->ifma_refcount++;
1824			if (retifma)
1825				*retifma = ifma;
1826			return 0;
1827		}
1828	}
1829
1830	/*
1831	 * Give the link layer a chance to accept/reject it, and also
1832	 * find out which AF_LINK address this maps to, if it isn't one
1833	 * already.
1834	 */
1835	if (ifp->if_resolvemulti) {
1836		error = ifp->if_resolvemulti(ifp, &llsa, sa);
1837		if (error) return error;
1838	} else {
1839		llsa = 0;
1840	}
1841
1842	MALLOC(ifma, struct ifmultiaddr *, sizeof *ifma, M_IFMADDR, M_WAITOK);
1843	MALLOC(dupsa, struct sockaddr *, sa->sa_len, M_IFMADDR, M_WAITOK);
1844	bcopy(sa, dupsa, sa->sa_len);
1845
1846	ifma->ifma_addr = dupsa;
1847	ifma->ifma_lladdr = llsa;
1848	ifma->ifma_ifp = ifp;
1849	ifma->ifma_refcount = 1;
1850	ifma->ifma_protospec = NULL;
1851	rt_newmaddrmsg(RTM_NEWMADDR, ifma);
1852
1853	/*
1854	 * Some network interfaces can scan the address list at
1855	 * interrupt time; lock them out.
1856	 */
1857	s = splimp();
1858	TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link);
1859	splx(s);
1860	if (retifma != NULL)
1861		*retifma = ifma;
1862
1863	if (llsa != 0) {
1864		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1865			if (equal(ifma->ifma_addr, llsa))
1866				break;
1867		}
1868		if (ifma) {
1869			ifma->ifma_refcount++;
1870		} else {
1871			MALLOC(ifma, struct ifmultiaddr *, sizeof *ifma,
1872			       M_IFMADDR, M_WAITOK);
1873			MALLOC(dupsa, struct sockaddr *, llsa->sa_len,
1874			       M_IFMADDR, M_WAITOK);
1875			bcopy(llsa, dupsa, llsa->sa_len);
1876			ifma->ifma_addr = dupsa;
1877			ifma->ifma_lladdr = NULL;
1878			ifma->ifma_ifp = ifp;
1879			ifma->ifma_refcount = 1;
1880			ifma->ifma_protospec = NULL;
1881			s = splimp();
1882			TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link);
1883			splx(s);
1884		}
1885	}
1886	/*
1887	 * We are certain we have added something, so call down to the
1888	 * interface to let them know about it.
1889	 */
1890	s = splimp();
1891	IFF_LOCKGIANT(ifp);
1892	ifp->if_ioctl(ifp, SIOCADDMULTI, 0);
1893	IFF_UNLOCKGIANT(ifp);
1894	splx(s);
1895
1896	return 0;
1897}
1898
1899/*
1900 * Remove a reference to a multicast address on this interface.  Yell
1901 * if the request does not match an existing membership.
1902 */
1903int
1904if_delmulti(struct ifnet *ifp, struct sockaddr *sa)
1905{
1906	struct ifmultiaddr *ifma;
1907	int s;
1908
1909	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link)
1910		if (equal(sa, ifma->ifma_addr))
1911			break;
1912	if (ifma == 0)
1913		return ENOENT;
1914
1915	if (ifma->ifma_refcount > 1) {
1916		ifma->ifma_refcount--;
1917		return 0;
1918	}
1919
1920	rt_newmaddrmsg(RTM_DELMADDR, ifma);
1921	sa = ifma->ifma_lladdr;
1922	s = splimp();
1923	TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link);
1924	/*
1925	 * Make sure the interface driver is notified
1926	 * in the case of a link layer mcast group being left.
1927	 */
1928	if (ifma->ifma_addr->sa_family == AF_LINK && sa == 0) {
1929		IFF_LOCKGIANT(ifp);
1930		ifp->if_ioctl(ifp, SIOCDELMULTI, 0);
1931		IFF_UNLOCKGIANT(ifp);
1932	}
1933	splx(s);
1934	free(ifma->ifma_addr, M_IFMADDR);
1935	free(ifma, M_IFMADDR);
1936	if (sa == 0)
1937		return 0;
1938
1939	/*
1940	 * Now look for the link-layer address which corresponds to
1941	 * this network address.  It had been squirreled away in
1942	 * ifma->ifma_lladdr for this purpose (so we don't have
1943	 * to call ifp->if_resolvemulti() again), and we saved that
1944	 * value in sa above.  If some nasty deleted the
1945	 * link-layer address out from underneath us, we can deal because
1946	 * the address we stored was is not the same as the one which was
1947	 * in the record for the link-layer address.  (So we don't complain
1948	 * in that case.)
1949	 */
1950	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link)
1951		if (equal(sa, ifma->ifma_addr))
1952			break;
1953	if (ifma == 0)
1954		return 0;
1955
1956	if (ifma->ifma_refcount > 1) {
1957		ifma->ifma_refcount--;
1958		return 0;
1959	}
1960
1961	s = splimp();
1962	TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link);
1963	IFF_LOCKGIANT(ifp);
1964	ifp->if_ioctl(ifp, SIOCDELMULTI, 0);
1965	IFF_UNLOCKGIANT(ifp);
1966	splx(s);
1967	free(ifma->ifma_addr, M_IFMADDR);
1968	free(sa, M_IFMADDR);
1969	free(ifma, M_IFMADDR);
1970
1971	return 0;
1972}
1973
1974/*
1975 * Set the link layer address on an interface.
1976 *
1977 * At this time we only support certain types of interfaces,
1978 * and we don't allow the length of the address to change.
1979 */
1980int
1981if_setlladdr(struct ifnet *ifp, const u_char *lladdr, int len)
1982{
1983	struct sockaddr_dl *sdl;
1984	struct ifaddr *ifa;
1985	struct ifreq ifr;
1986
1987	ifa = ifaddr_byindex(ifp->if_index);
1988	if (ifa == NULL)
1989		return (EINVAL);
1990	sdl = (struct sockaddr_dl *)ifa->ifa_addr;
1991	if (sdl == NULL)
1992		return (EINVAL);
1993	if (len != sdl->sdl_alen)	/* don't allow length to change */
1994		return (EINVAL);
1995	switch (ifp->if_type) {
1996	case IFT_ETHER:			/* these types use struct arpcom */
1997	case IFT_FDDI:
1998	case IFT_XETHER:
1999	case IFT_ISO88025:
2000	case IFT_L2VLAN:
2001	case IFT_BRIDGE:
2002		bcopy(lladdr, IFP2ENADDR(ifp), len);
2003		/*
2004		 * XXX We also need to store the lladdr in LLADDR(sdl),
2005		 * which is done below. This is a pain because we must
2006		 * remember to keep the info in sync.
2007		 */
2008		/* FALLTHROUGH */
2009	case IFT_ARCNET:
2010		bcopy(lladdr, LLADDR(sdl), len);
2011		break;
2012	default:
2013		return (ENODEV);
2014	}
2015	/*
2016	 * If the interface is already up, we need
2017	 * to re-init it in order to reprogram its
2018	 * address filter.
2019	 */
2020	if ((ifp->if_flags & IFF_UP) != 0) {
2021		IFF_LOCKGIANT(ifp);
2022		ifp->if_flags &= ~IFF_UP;
2023		ifr.ifr_flags = ifp->if_flags & 0xffff;
2024		ifr.ifr_flagshigh = ifp->if_flags >> 16;
2025		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
2026		ifp->if_flags |= IFF_UP;
2027		ifr.ifr_flags = ifp->if_flags & 0xffff;
2028		ifr.ifr_flagshigh = ifp->if_flags >> 16;
2029		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
2030		IFF_UNLOCKGIANT(ifp);
2031#ifdef INET
2032		/*
2033		 * Also send gratuitous ARPs to notify other nodes about
2034		 * the address change.
2035		 */
2036		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
2037			if (ifa->ifa_addr != NULL &&
2038			    ifa->ifa_addr->sa_family == AF_INET)
2039				arp_ifinit(ifp, ifa);
2040		}
2041#endif
2042	}
2043	return (0);
2044}
2045
2046struct ifmultiaddr *
2047ifmaof_ifpforaddr(struct sockaddr *sa, struct ifnet *ifp)
2048{
2049	struct ifmultiaddr *ifma;
2050
2051	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link)
2052		if (equal(ifma->ifma_addr, sa))
2053			break;
2054
2055	return ifma;
2056}
2057
2058/*
2059 * The name argument must be a pointer to storage which will last as
2060 * long as the interface does.  For physical devices, the result of
2061 * device_get_name(dev) is a good choice and for pseudo-devices a
2062 * static string works well.
2063 */
2064void
2065if_initname(struct ifnet *ifp, const char *name, int unit)
2066{
2067	ifp->if_dname = name;
2068	ifp->if_dunit = unit;
2069	if (unit != IF_DUNIT_NONE)
2070		snprintf(ifp->if_xname, IFNAMSIZ, "%s%d", name, unit);
2071	else
2072		strlcpy(ifp->if_xname, name, IFNAMSIZ);
2073}
2074
2075int
2076if_printf(struct ifnet *ifp, const char * fmt, ...)
2077{
2078	va_list ap;
2079	int retval;
2080
2081	retval = printf("%s: ", ifp->if_xname);
2082	va_start(ap, fmt);
2083	retval += vprintf(fmt, ap);
2084	va_end(ap);
2085	return (retval);
2086}
2087
2088/*
2089 * When an interface is marked IFF_NEEDSGIANT, its if_start() routine cannot
2090 * be called without Giant.  However, we often can't acquire the Giant lock
2091 * at those points; instead, we run it via a task queue that holds Giant via
2092 * if_start_deferred.
2093 *
2094 * XXXRW: We need to make sure that the ifnet isn't fully detached until any
2095 * outstanding if_start_deferred() tasks that will run after the free.  This
2096 * probably means waiting in if_detach().
2097 */
2098void
2099if_start(struct ifnet *ifp)
2100{
2101
2102	NET_ASSERT_GIANT();
2103
2104	if ((ifp->if_flags & IFF_NEEDSGIANT) != 0 && debug_mpsafenet != 0) {
2105		if (mtx_owned(&Giant))
2106			(*(ifp)->if_start)(ifp);
2107		else
2108			taskqueue_enqueue(taskqueue_swi_giant,
2109			    &ifp->if_starttask);
2110	} else
2111		(*(ifp)->if_start)(ifp);
2112}
2113
2114static void
2115if_start_deferred(void *context, int pending)
2116{
2117	struct ifnet *ifp;
2118
2119	/*
2120	 * This code must be entered with Giant, and should never run if
2121	 * we're not running with debug.mpsafenet.
2122	 */
2123	KASSERT(debug_mpsafenet != 0, ("if_start_deferred: debug.mpsafenet"));
2124	GIANT_REQUIRED;
2125
2126	ifp = context;
2127	(ifp->if_start)(ifp);
2128}
2129
2130int
2131if_handoff(struct ifqueue *ifq, struct mbuf *m, struct ifnet *ifp, int adjust)
2132{
2133	int active = 0;
2134
2135	IF_LOCK(ifq);
2136	if (_IF_QFULL(ifq)) {
2137		_IF_DROP(ifq);
2138		IF_UNLOCK(ifq);
2139		m_freem(m);
2140		return (0);
2141	}
2142	if (ifp != NULL) {
2143		ifp->if_obytes += m->m_pkthdr.len + adjust;
2144		if (m->m_flags & (M_BCAST|M_MCAST))
2145			ifp->if_omcasts++;
2146		active = ifp->if_flags & IFF_OACTIVE;
2147	}
2148	_IF_ENQUEUE(ifq, m);
2149	IF_UNLOCK(ifq);
2150	if (ifp != NULL && !active)
2151		if_start(ifp);
2152	return (1);
2153}
2154
2155void
2156if_register_com_alloc(u_char type,
2157    if_com_alloc_t *a, if_com_free_t *f)
2158{
2159
2160	KASSERT(if_com_alloc[type] == NULL,
2161	    ("if_register_com_alloc: %d already registered", type));
2162	KASSERT(if_com_free[type] == NULL,
2163	    ("if_register_com_alloc: %d free already registered", type));
2164
2165	if_com_alloc[type] = a;
2166	if_com_free[type] = f;
2167}
2168
2169void
2170if_deregister_com_alloc(u_char type)
2171{
2172
2173	KASSERT(if_com_alloc[type] == NULL,
2174	    ("if_deregister_com_alloc: %d not registered", type));
2175	KASSERT(if_com_free[type] == NULL,
2176	    ("if_deregister_com_alloc: %d free not registered", type));
2177	if_com_alloc[type] = NULL;
2178	if_com_free[type] = NULL;
2179}
2180