if.c revision 194251
1/*-
2 * Copyright (c) 1980, 1986, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 4. Neither the name of the University nor the names of its contributors
14 *    may be used to endorse or promote products derived from this software
15 *    without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 *	@(#)if.c	8.5 (Berkeley) 1/9/95
30 * $FreeBSD: head/sys/net/if.c 194251 2009-06-15 18:59:29Z jamie $
31 */
32
33#include "opt_compat.h"
34#include "opt_inet6.h"
35#include "opt_inet.h"
36#include "opt_carp.h"
37
38#include <sys/param.h>
39#include <sys/types.h>
40#include <sys/conf.h>
41#include <sys/malloc.h>
42#include <sys/sbuf.h>
43#include <sys/bus.h>
44#include <sys/mbuf.h>
45#include <sys/systm.h>
46#include <sys/priv.h>
47#include <sys/proc.h>
48#include <sys/socket.h>
49#include <sys/socketvar.h>
50#include <sys/protosw.h>
51#include <sys/kernel.h>
52#include <sys/lock.h>
53#include <sys/refcount.h>
54#include <sys/module.h>
55#include <sys/rwlock.h>
56#include <sys/sockio.h>
57#include <sys/syslog.h>
58#include <sys/sysctl.h>
59#include <sys/taskqueue.h>
60#include <sys/domain.h>
61#include <sys/jail.h>
62#include <sys/vimage.h>
63#include <machine/stdarg.h>
64#include <vm/uma.h>
65
66#include <net/if.h>
67#include <net/if_arp.h>
68#include <net/if_clone.h>
69#include <net/if_dl.h>
70#include <net/if_types.h>
71#include <net/if_var.h>
72#include <net/radix.h>
73#include <net/route.h>
74#include <net/vnet.h>
75
76#if defined(INET) || defined(INET6)
77/*XXX*/
78#include <netinet/in.h>
79#include <netinet/in_var.h>
80#ifdef INET6
81#include <netinet6/in6_var.h>
82#include <netinet6/in6_ifattach.h>
83#endif
84#endif
85#ifdef INET
86#include <netinet/if_ether.h>
87#include <netinet/vinet.h>
88#endif
89#if defined(INET) || defined(INET6)
90#ifdef DEV_CARP
91#include <netinet/ip_carp.h>
92#endif
93#endif
94
95#include <security/mac/mac_framework.h>
96
97#ifndef VIMAGE
98#ifndef VIMAGE_GLOBALS
99struct vnet_net vnet_net_0;
100#endif
101#endif
102
103static int slowtimo_started;
104
105SYSCTL_NODE(_net, PF_LINK, link, CTLFLAG_RW, 0, "Link layers");
106SYSCTL_NODE(_net_link, 0, generic, CTLFLAG_RW, 0, "Generic link-management");
107
108/* Log link state change events */
109static int log_link_state_change = 1;
110
111SYSCTL_INT(_net_link, OID_AUTO, log_link_state_change, CTLFLAG_RW,
112	&log_link_state_change, 0,
113	"log interface link state change events");
114
115void	(*bstp_linkstate_p)(struct ifnet *ifp, int state);
116void	(*ng_ether_link_state_p)(struct ifnet *ifp, int state);
117void	(*lagg_linkstate_p)(struct ifnet *ifp, int state);
118
119struct mbuf *(*tbr_dequeue_ptr)(struct ifaltq *, int) = NULL;
120
121/*
122 * XXX: Style; these should be sorted alphabetically, and unprototyped
123 * static functions should be prototyped. Currently they are sorted by
124 * declaration order.
125 */
126static void	if_attachdomain(void *);
127static void	if_attachdomain1(struct ifnet *);
128static int	ifconf(u_long, caddr_t);
129static void	if_freemulti(struct ifmultiaddr *);
130static void	if_init(void *);
131static void	if_check(void *);
132static void	if_route(struct ifnet *, int flag, int fam);
133static int	if_setflag(struct ifnet *, int, int, int *, int);
134static void	if_slowtimo(void *);
135static int	if_transmit(struct ifnet *ifp, struct mbuf *m);
136static void	if_unroute(struct ifnet *, int flag, int fam);
137static void	link_rtrequest(int, struct rtentry *, struct rt_addrinfo *);
138static int	if_rtdel(struct radix_node *, void *);
139static int	ifhwioctl(u_long, struct ifnet *, caddr_t, struct thread *);
140static int	if_delmulti_locked(struct ifnet *, struct ifmultiaddr *, int);
141static void	do_link_state_change(void *, int);
142static int	if_getgroup(struct ifgroupreq *, struct ifnet *);
143static int	if_getgroupmembers(struct ifgroupreq *);
144static void	if_delgroups(struct ifnet *);
145static void	if_attach_internal(struct ifnet *, int);
146static void	if_detach_internal(struct ifnet *, int);
147
148#ifdef INET6
149/*
150 * XXX: declare here to avoid to include many inet6 related files..
151 * should be more generalized?
152 */
153extern void	nd6_setmtu(struct ifnet *);
154#endif
155
156static int	vnet_net_iattach(const void *);
157#ifdef VIMAGE
158static int	vnet_net_idetach(const void *);
159#endif
160
161#ifdef VIMAGE_GLOBALS
162struct	ifnethead ifnet;	/* depend on static init XXX */
163struct	ifgrouphead ifg_head;
164int	if_index;
165static	int if_indexlim;
166/* Table of ifnet/cdev by index.  Locked with ifnet_lock. */
167static struct ifindex_entry *ifindex_table;
168static struct	knlist ifklist;
169#endif
170
171int	ifqmaxlen = IFQ_MAXLEN;
172struct rwlock ifnet_lock;
173static	if_com_alloc_t *if_com_alloc[256];
174static	if_com_free_t *if_com_free[256];
175
176static void	filt_netdetach(struct knote *kn);
177static int	filt_netdev(struct knote *kn, long hint);
178
179static struct filterops netdev_filtops =
180    { 1, NULL, filt_netdetach, filt_netdev };
181
182#ifndef VIMAGE_GLOBALS
183static struct vnet_symmap vnet_net_symmap[] = {
184	VNET_SYMMAP(net, ifnet),
185	VNET_SYMMAP(net, rt_tables),
186	VNET_SYMMAP(net, rtstat),
187	VNET_SYMMAP(net, rttrash),
188	VNET_SYMMAP_END
189};
190
191static const vnet_modinfo_t vnet_net_modinfo = {
192	.vmi_id		= VNET_MOD_NET,
193	.vmi_name	= "net",
194	.vmi_size	= sizeof(struct vnet_net),
195	.vmi_symmap	= vnet_net_symmap,
196	.vmi_iattach	= vnet_net_iattach,
197#ifdef VIMAGE
198	.vmi_idetach	= vnet_net_idetach
199#endif
200};
201#endif /* !VIMAGE_GLOBALS */
202
203/*
204 * System initialization
205 */
206SYSINIT(interfaces, SI_SUB_INIT_IF, SI_ORDER_FIRST, if_init, NULL);
207SYSINIT(interface_check, SI_SUB_PROTO_IF, SI_ORDER_FIRST, if_check, NULL);
208
209MALLOC_DEFINE(M_IFNET, "ifnet", "interface internals");
210MALLOC_DEFINE(M_IFADDR, "ifaddr", "interface address");
211MALLOC_DEFINE(M_IFMADDR, "ether_multi", "link-level multicast address");
212
213struct ifnet *
214ifnet_byindex_locked(u_short idx)
215{
216	INIT_VNET_NET(curvnet);
217
218	if (idx > V_if_index)
219		return (NULL);
220	return (V_ifindex_table[idx].ife_ifnet);
221}
222
223struct ifnet *
224ifnet_byindex(u_short idx)
225{
226	struct ifnet *ifp;
227
228	IFNET_RLOCK();
229	ifp = ifnet_byindex_locked(idx);
230	IFNET_RUNLOCK();
231	return (ifp);
232}
233
234struct ifnet *
235ifnet_byindex_ref(u_short idx)
236{
237	struct ifnet *ifp;
238
239	IFNET_RLOCK();
240	ifp = ifnet_byindex_locked(idx);
241	if (ifp == NULL || (ifp->if_flags & IFF_DYING)) {
242		IFNET_RUNLOCK();
243		return (NULL);
244	}
245	if_ref(ifp);
246	IFNET_RUNLOCK();
247	return (ifp);
248}
249
250static void
251ifnet_setbyindex(u_short idx, struct ifnet *ifp)
252{
253	INIT_VNET_NET(curvnet);
254
255	IFNET_WLOCK_ASSERT();
256
257	V_ifindex_table[idx].ife_ifnet = ifp;
258}
259
260struct ifaddr *
261ifaddr_byindex(u_short idx)
262{
263	struct ifaddr *ifa;
264
265	IFNET_RLOCK();
266	ifa = ifnet_byindex_locked(idx)->if_addr;
267	IFNET_RUNLOCK();
268	return (ifa);
269}
270
271struct cdev *
272ifdev_byindex(u_short idx)
273{
274	INIT_VNET_NET(curvnet);
275	struct cdev *cdev;
276
277	IFNET_RLOCK();
278	cdev = V_ifindex_table[idx].ife_dev;
279	IFNET_RUNLOCK();
280	return (cdev);
281}
282
283static void
284ifdev_setbyindex(u_short idx, struct cdev *cdev)
285{
286	INIT_VNET_NET(curvnet);
287
288	IFNET_WLOCK();
289	V_ifindex_table[idx].ife_dev = cdev;
290	IFNET_WUNLOCK();
291}
292
293static d_open_t		netopen;
294static d_close_t	netclose;
295static d_ioctl_t	netioctl;
296static d_kqfilter_t	netkqfilter;
297
298static struct cdevsw net_cdevsw = {
299	.d_version =	D_VERSION,
300	.d_flags =	D_NEEDGIANT,
301	.d_open =	netopen,
302	.d_close =	netclose,
303	.d_ioctl =	netioctl,
304	.d_name =	"net",
305	.d_kqfilter =	netkqfilter,
306};
307
308static int
309netopen(struct cdev *dev, int flag, int mode, struct thread *td)
310{
311	return (0);
312}
313
314static int
315netclose(struct cdev *dev, int flags, int fmt, struct thread *td)
316{
317	return (0);
318}
319
320static int
321netioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread *td)
322{
323	struct ifnet *ifp;
324	int error, idx;
325
326	/* only support interface specific ioctls */
327	if (IOCGROUP(cmd) != 'i')
328		return (EOPNOTSUPP);
329	idx = dev2unit(dev);
330	if (idx == 0) {
331		/*
332		 * special network device, not interface.
333		 */
334		if (cmd == SIOCGIFCONF)
335			return (ifconf(cmd, data));	/* XXX remove cmd */
336#ifdef __amd64__
337		if (cmd == SIOCGIFCONF32)
338			return (ifconf(cmd, data));	/* XXX remove cmd */
339#endif
340		return (EOPNOTSUPP);
341	}
342
343	ifp = ifnet_byindex(idx);
344	if (ifp == NULL)
345		return (ENXIO);
346
347	error = ifhwioctl(cmd, ifp, data, td);
348	if (error == ENOIOCTL)
349		error = EOPNOTSUPP;
350	return (error);
351}
352
353static int
354netkqfilter(struct cdev *dev, struct knote *kn)
355{
356	INIT_VNET_NET(curvnet);
357	struct knlist *klist;
358	struct ifnet *ifp;
359	int idx;
360
361	switch (kn->kn_filter) {
362	case EVFILT_NETDEV:
363		kn->kn_fop = &netdev_filtops;
364		break;
365	default:
366		return (EINVAL);
367	}
368
369	idx = dev2unit(dev);
370	if (idx == 0) {
371		klist = &V_ifklist;
372	} else {
373		ifp = ifnet_byindex(idx);
374		if (ifp == NULL)
375			return (1);
376		klist = &ifp->if_klist;
377	}
378
379	kn->kn_hook = (caddr_t)klist;
380
381	knlist_add(klist, kn, 0);
382
383	return (0);
384}
385
386static void
387filt_netdetach(struct knote *kn)
388{
389	struct knlist *klist = (struct knlist *)kn->kn_hook;
390
391	knlist_remove(klist, kn, 0);
392}
393
394static int
395filt_netdev(struct knote *kn, long hint)
396{
397	struct knlist *klist = (struct knlist *)kn->kn_hook;
398
399	/*
400	 * Currently NOTE_EXIT is abused to indicate device detach.
401	 */
402	if (hint == NOTE_EXIT) {
403		kn->kn_data = NOTE_LINKINV;
404		kn->kn_flags |= (EV_EOF | EV_ONESHOT);
405		knlist_remove_inevent(klist, kn);
406		return (1);
407	}
408	if (hint != 0)
409		kn->kn_data = hint;			/* current status */
410	if (kn->kn_sfflags & hint)
411		kn->kn_fflags |= hint;
412	return (kn->kn_fflags != 0);
413}
414
415/*
416 * Network interface utility routines.
417 *
418 * Routines with ifa_ifwith* names take sockaddr *'s as
419 * parameters.
420 */
421
422/* ARGSUSED*/
423static void
424if_init(void *dummy __unused)
425{
426
427#ifndef VIMAGE_GLOBALS
428	vnet_mod_register(&vnet_net_modinfo);
429#else
430	vnet_net_iattach(NULL);
431#endif
432
433	IFNET_LOCK_INIT();
434	ifdev_setbyindex(0, make_dev(&net_cdevsw, 0, UID_ROOT, GID_WHEEL,
435	    0600, "network"));
436	if_clone_init();
437}
438
439static int
440vnet_net_iattach(const void *unused __unused)
441{
442	INIT_VNET_NET(curvnet);
443
444	V_if_index = 0;
445	V_ifindex_table = NULL;
446	V_if_indexlim = 8;
447
448	TAILQ_INIT(&V_ifnet);
449	TAILQ_INIT(&V_ifg_head);
450	knlist_init_mtx(&V_ifklist, NULL);
451	if_grow();				/* create initial table */
452
453	return (0);
454}
455
456#ifdef VIMAGE
457static int
458vnet_net_idetach(const void *unused __unused)
459{
460	INIT_VNET_NET(curvnet);
461
462	VNET_ASSERT(TAILQ_EMPTY(&V_ifnet));
463	VNET_ASSERT(TAILQ_EMPTY(&V_ifg_head));
464	VNET_ASSERT(SLIST_EMPTY(&V_ifklist.kl_list));
465
466	free((caddr_t)V_ifindex_table, M_IFNET);
467
468	return (0);
469}
470#endif
471
472void
473if_grow(void)
474{
475	INIT_VNET_NET(curvnet);
476	u_int n;
477	struct ifindex_entry *e;
478
479	V_if_indexlim <<= 1;
480	n = V_if_indexlim * sizeof(*e);
481	e = malloc(n, M_IFNET, M_WAITOK | M_ZERO);
482	if (V_ifindex_table != NULL) {
483		memcpy((caddr_t)e, (caddr_t)V_ifindex_table, n/2);
484		free((caddr_t)V_ifindex_table, M_IFNET);
485	}
486	V_ifindex_table = e;
487}
488
489static void
490if_check(void *dummy __unused)
491{
492
493	/*
494	 * If at least one interface added during boot uses
495	 * if_watchdog then start the timer.
496	 */
497	if (slowtimo_started)
498		if_slowtimo(0);
499}
500
501/*
502 * Allocate a struct ifnet and an index for an interface.  A layer 2
503 * common structure will also be allocated if an allocation routine is
504 * registered for the passed type.
505 */
506struct ifnet *
507if_alloc(u_char type)
508{
509	INIT_VNET_NET(curvnet);
510	struct ifnet *ifp;
511
512	ifp = malloc(sizeof(struct ifnet), M_IFNET, M_WAITOK|M_ZERO);
513
514	/*
515	 * Try to find an empty slot below if_index.  If we fail, take
516	 * the next slot.
517	 *
518	 * XXX: should be locked!
519	 */
520	for (ifp->if_index = 1; ifp->if_index <= V_if_index; ifp->if_index++) {
521		if (ifnet_byindex(ifp->if_index) == NULL)
522			break;
523	}
524	/* Catch if_index overflow. */
525	if (ifp->if_index < 1) {
526		free(ifp, M_IFNET);
527		return (NULL);
528	}
529	if (ifp->if_index > V_if_index)
530		V_if_index = ifp->if_index;
531	if (V_if_index >= V_if_indexlim)
532		if_grow();
533
534	ifp->if_type = type;
535	ifp->if_alloctype = type;
536
537	if (if_com_alloc[type] != NULL) {
538		ifp->if_l2com = if_com_alloc[type](type, ifp);
539		if (ifp->if_l2com == NULL) {
540			free(ifp, M_IFNET);
541			return (NULL);
542		}
543	}
544
545	IF_ADDR_LOCK_INIT(ifp);
546	TASK_INIT(&ifp->if_linktask, 0, do_link_state_change, ifp);
547	ifp->if_afdata_initialized = 0;
548	IF_AFDATA_LOCK_INIT(ifp);
549	TAILQ_INIT(&ifp->if_addrhead);
550	TAILQ_INIT(&ifp->if_prefixhead);
551	TAILQ_INIT(&ifp->if_multiaddrs);
552	TAILQ_INIT(&ifp->if_groups);
553	knlist_init_mtx(&ifp->if_klist, NULL);
554#ifdef MAC
555	mac_ifnet_init(ifp);
556#endif
557
558	refcount_init(&ifp->if_refcount, 1);	/* Index reference. */
559	IFNET_WLOCK();
560	ifnet_setbyindex(ifp->if_index, ifp);
561	IFNET_WUNLOCK();
562	return (ifp);
563}
564
565/*
566 * Do the actual work of freeing a struct ifnet, associated index, and layer
567 * 2 common structure.  This call is made when the last reference to an
568 * interface is released.
569 */
570static void
571if_free_internal(struct ifnet *ifp)
572{
573	INIT_VNET_NET(curvnet);		/* ifp->if_vnet is already NULL here */
574
575	KASSERT((ifp->if_flags & IFF_DYING),
576	    ("if_free_internal: interface not dying"));
577
578	IFNET_WLOCK();
579	KASSERT(ifp == ifnet_byindex_locked(ifp->if_index),
580	    ("%s: freeing unallocated ifnet", ifp->if_xname));
581
582	ifnet_setbyindex(ifp->if_index, NULL);
583	while (V_if_index > 0 && ifnet_byindex_locked(V_if_index) == NULL)
584		V_if_index--;
585	IFNET_WUNLOCK();
586
587	if (if_com_free[ifp->if_alloctype] != NULL)
588		if_com_free[ifp->if_alloctype](ifp->if_l2com,
589		    ifp->if_alloctype);
590
591#ifdef MAC
592	mac_ifnet_destroy(ifp);
593#endif /* MAC */
594	KNOTE_UNLOCKED(&ifp->if_klist, NOTE_EXIT);
595	knlist_clear(&ifp->if_klist, 0);
596	knlist_destroy(&ifp->if_klist);
597	IF_AFDATA_DESTROY(ifp);
598	IF_ADDR_LOCK_DESTROY(ifp);
599	ifq_detach(&ifp->if_snd);
600	free(ifp, M_IFNET);
601}
602
603/*
604 * This version should only be called by intefaces that switch their type
605 * after calling if_alloc().  if_free_type() will go away again now that we
606 * have if_alloctype to cache the original allocation type.  For now, assert
607 * that they match, since we require that in practice.
608 */
609void
610if_free_type(struct ifnet *ifp, u_char type)
611{
612
613	KASSERT(ifp->if_alloctype == type,
614	    ("if_free_type: type (%d) != alloctype (%d)", type,
615	    ifp->if_alloctype));
616
617	ifp->if_flags |= IFF_DYING;			/* XXX: Locking */
618	if (!refcount_release(&ifp->if_refcount))
619		return;
620	if_free_internal(ifp);
621}
622
623/*
624 * This is the normal version of if_free(), used by device drivers to free a
625 * detached network interface.  The contents of if_free_type() will move into
626 * here when if_free_type() goes away.
627 */
628void
629if_free(struct ifnet *ifp)
630{
631
632	if_free_type(ifp, ifp->if_alloctype);
633}
634
635/*
636 * Interfaces to keep an ifnet type-stable despite the possibility of the
637 * driver calling if_free().  If there are additional references, we defer
638 * freeing the underlying data structure.
639 */
640void
641if_ref(struct ifnet *ifp)
642{
643
644	/* We don't assert the ifnet list lock here, but arguably should. */
645	refcount_acquire(&ifp->if_refcount);
646}
647
648void
649if_rele(struct ifnet *ifp)
650{
651
652	if (!refcount_release(&ifp->if_refcount))
653		return;
654	if_free_internal(ifp);
655}
656
657void
658ifq_attach(struct ifaltq *ifq, struct ifnet *ifp)
659{
660
661	mtx_init(&ifq->ifq_mtx, ifp->if_xname, "if send queue", MTX_DEF);
662
663	if (ifq->ifq_maxlen == 0)
664		ifq->ifq_maxlen = ifqmaxlen;
665
666	ifq->altq_type = 0;
667	ifq->altq_disc = NULL;
668	ifq->altq_flags &= ALTQF_CANTCHANGE;
669	ifq->altq_tbr  = NULL;
670	ifq->altq_ifp  = ifp;
671}
672
673void
674ifq_detach(struct ifaltq *ifq)
675{
676	mtx_destroy(&ifq->ifq_mtx);
677}
678
679/*
680 * Perform generic interface initalization tasks and attach the interface
681 * to the list of "active" interfaces.  If vmove flag is set on entry
682 * to if_attach_internal(), perform only a limited subset of initialization
683 * tasks, given that we are moving from one vnet to another an ifnet which
684 * has already been fully initialized.
685 *
686 * XXX:
687 *  - The decision to return void and thus require this function to
688 *    succeed is questionable.
689 *  - We should probably do more sanity checking.  For instance we don't
690 *    do anything to insure if_xname is unique or non-empty.
691 */
692void
693if_attach(struct ifnet *ifp)
694{
695
696	if_attach_internal(ifp, 0);
697}
698
699static void
700if_attach_internal(struct ifnet *ifp, int vmove)
701{
702	INIT_VNET_NET(curvnet);
703	unsigned socksize, ifasize;
704	int namelen, masklen;
705	struct sockaddr_dl *sdl;
706	struct ifaddr *ifa;
707
708	if (ifp->if_index == 0 || ifp != ifnet_byindex(ifp->if_index))
709		panic ("%s: BUG: if_attach called without if_alloc'd input()\n",
710		    ifp->if_xname);
711
712#ifdef VIMAGE
713	ifp->if_vnet = curvnet;
714	if (ifp->if_home_vnet == NULL)
715		ifp->if_home_vnet = curvnet;
716#endif
717
718	if_addgroup(ifp, IFG_ALL);
719
720	getmicrotime(&ifp->if_lastchange);
721	ifp->if_data.ifi_epoch = time_uptime;
722	ifp->if_data.ifi_datalen = sizeof(struct if_data);
723
724	KASSERT((ifp->if_transmit == NULL && ifp->if_qflush == NULL) ||
725	    (ifp->if_transmit != NULL && ifp->if_qflush != NULL),
726	    ("transmit and qflush must both either be set or both be NULL"));
727	if (ifp->if_transmit == NULL) {
728		ifp->if_transmit = if_transmit;
729		ifp->if_qflush = if_qflush;
730	}
731
732	if (!vmove) {
733#ifdef MAC
734		mac_ifnet_create(ifp);
735#endif
736
737		if (IS_DEFAULT_VNET(curvnet)) {
738			ifdev_setbyindex(ifp->if_index, make_dev(&net_cdevsw,
739			    ifp->if_index, UID_ROOT, GID_WHEEL, 0600, "%s/%s",
740			    net_cdevsw.d_name, ifp->if_xname));
741			make_dev_alias(ifdev_byindex(ifp->if_index), "%s%d",
742			    net_cdevsw.d_name, ifp->if_index);
743		}
744
745		ifq_attach(&ifp->if_snd, ifp);
746
747		/*
748		 * Create a Link Level name for this device.
749		 */
750		namelen = strlen(ifp->if_xname);
751		/*
752		 * Always save enough space for any possiable name so we
753		 * can do a rename in place later.
754		 */
755		masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + IFNAMSIZ;
756		socksize = masklen + ifp->if_addrlen;
757		if (socksize < sizeof(*sdl))
758			socksize = sizeof(*sdl);
759		socksize = roundup2(socksize, sizeof(long));
760		ifasize = sizeof(*ifa) + 2 * socksize;
761		ifa = malloc(ifasize, M_IFADDR, M_WAITOK | M_ZERO);
762		IFA_LOCK_INIT(ifa);
763		sdl = (struct sockaddr_dl *)(ifa + 1);
764		sdl->sdl_len = socksize;
765		sdl->sdl_family = AF_LINK;
766		bcopy(ifp->if_xname, sdl->sdl_data, namelen);
767		sdl->sdl_nlen = namelen;
768		sdl->sdl_index = ifp->if_index;
769		sdl->sdl_type = ifp->if_type;
770		ifp->if_addr = ifa;
771		ifa->ifa_ifp = ifp;
772		ifa->ifa_rtrequest = link_rtrequest;
773		ifa->ifa_addr = (struct sockaddr *)sdl;
774		sdl = (struct sockaddr_dl *)(socksize + (caddr_t)sdl);
775		ifa->ifa_netmask = (struct sockaddr *)sdl;
776		sdl->sdl_len = masklen;
777		while (namelen != 0)
778			sdl->sdl_data[--namelen] = 0xff;
779		ifa->ifa_refcnt = 1;
780		TAILQ_INSERT_HEAD(&ifp->if_addrhead, ifa, ifa_link);
781		/* Reliably crash if used uninitialized. */
782		ifp->if_broadcastaddr = NULL;
783	}
784
785	IFNET_WLOCK();
786	TAILQ_INSERT_TAIL(&V_ifnet, ifp, if_link);
787#ifdef VIMAGE
788	curvnet->ifcnt++;
789#endif
790	IFNET_WUNLOCK();
791
792	if (domain_init_status >= 2)
793		if_attachdomain1(ifp);
794
795	EVENTHANDLER_INVOKE(ifnet_arrival_event, ifp);
796	if (IS_DEFAULT_VNET(curvnet))
797		devctl_notify("IFNET", ifp->if_xname, "ATTACH", NULL);
798
799	/* Announce the interface. */
800	rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
801
802	if (!vmove && ifp->if_watchdog != NULL) {
803		if_printf(ifp,
804		    "WARNING: using obsoleted if_watchdog interface\n");
805
806		/*
807		 * Note that we need if_slowtimo().  If this happens after
808		 * boot, then call if_slowtimo() directly.
809		 */
810		if (atomic_cmpset_int(&slowtimo_started, 0, 1) && !cold)
811			if_slowtimo(0);
812	}
813}
814
815static void
816if_attachdomain(void *dummy)
817{
818	INIT_VNET_NET(curvnet);
819	struct ifnet *ifp;
820	int s;
821
822	s = splnet();
823	TAILQ_FOREACH(ifp, &V_ifnet, if_link)
824		if_attachdomain1(ifp);
825	splx(s);
826}
827SYSINIT(domainifattach, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_SECOND,
828    if_attachdomain, NULL);
829
830static void
831if_attachdomain1(struct ifnet *ifp)
832{
833	struct domain *dp;
834	int s;
835
836	s = splnet();
837
838	/*
839	 * Since dp->dom_ifattach calls malloc() with M_WAITOK, we
840	 * cannot lock ifp->if_afdata initialization, entirely.
841	 */
842	if (IF_AFDATA_TRYLOCK(ifp) == 0) {
843		splx(s);
844		return;
845	}
846	if (ifp->if_afdata_initialized >= domain_init_status) {
847		IF_AFDATA_UNLOCK(ifp);
848		splx(s);
849		printf("if_attachdomain called more than once on %s\n",
850		    ifp->if_xname);
851		return;
852	}
853	ifp->if_afdata_initialized = domain_init_status;
854	IF_AFDATA_UNLOCK(ifp);
855
856	/* address family dependent data region */
857	bzero(ifp->if_afdata, sizeof(ifp->if_afdata));
858	for (dp = domains; dp; dp = dp->dom_next) {
859		if (dp->dom_ifattach)
860			ifp->if_afdata[dp->dom_family] =
861			    (*dp->dom_ifattach)(ifp);
862	}
863
864	splx(s);
865}
866
867/*
868 * Remove any unicast or broadcast network addresses from an interface.
869 */
870void
871if_purgeaddrs(struct ifnet *ifp)
872{
873	struct ifaddr *ifa, *next;
874
875	TAILQ_FOREACH_SAFE(ifa, &ifp->if_addrhead, ifa_link, next) {
876		if (ifa->ifa_addr->sa_family == AF_LINK)
877			continue;
878#ifdef INET
879		/* XXX: Ugly!! ad hoc just for INET */
880		if (ifa->ifa_addr->sa_family == AF_INET) {
881			struct ifaliasreq ifr;
882
883			bzero(&ifr, sizeof(ifr));
884			ifr.ifra_addr = *ifa->ifa_addr;
885			if (ifa->ifa_dstaddr)
886				ifr.ifra_broadaddr = *ifa->ifa_dstaddr;
887			if (in_control(NULL, SIOCDIFADDR, (caddr_t)&ifr, ifp,
888			    NULL) == 0)
889				continue;
890		}
891#endif /* INET */
892#ifdef INET6
893		if (ifa->ifa_addr->sa_family == AF_INET6) {
894			in6_purgeaddr(ifa);
895			/* ifp_addrhead is already updated */
896			continue;
897		}
898#endif /* INET6 */
899		TAILQ_REMOVE(&ifp->if_addrhead, ifa, ifa_link);
900		IFAFREE(ifa);
901	}
902}
903
904/*
905 * Remove any multicast network addresses from an interface.
906 */
907void
908if_purgemaddrs(struct ifnet *ifp)
909{
910	struct ifmultiaddr *ifma;
911	struct ifmultiaddr *next;
912
913	IF_ADDR_LOCK(ifp);
914	TAILQ_FOREACH_SAFE(ifma, &ifp->if_multiaddrs, ifma_link, next)
915		if_delmulti_locked(ifp, ifma, 1);
916	IF_ADDR_UNLOCK(ifp);
917}
918
919/*
920 * Detach an interface, removing it from the list of "active" interfaces.
921 * If vmove flag is set on entry to if_detach_internal(), perform only a
922 * limited subset of cleanup tasks, given that we are moving an ifnet from
923 * one vnet to another, where it must be fully operational.
924 *
925 * XXXRW: There are some significant questions about event ordering, and
926 * how to prevent things from starting to use the interface during detach.
927 */
928void
929if_detach(struct ifnet *ifp)
930{
931
932	if_detach_internal(ifp, 0);
933}
934
935static void
936if_detach_internal(struct ifnet *ifp, int vmove)
937{
938	INIT_VNET_NET(ifp->if_vnet);
939	struct ifaddr *ifa;
940	struct radix_node_head	*rnh;
941	int i, j;
942	struct domain *dp;
943 	struct ifnet *iter;
944 	int found = 0;
945
946	IFNET_WLOCK();
947	TAILQ_FOREACH(iter, &V_ifnet, if_link)
948		if (iter == ifp) {
949			TAILQ_REMOVE(&V_ifnet, ifp, if_link);
950			found = 1;
951			break;
952		}
953#ifdef VIMAGE
954	if (found)
955		curvnet->ifcnt--;
956#endif
957	IFNET_WUNLOCK();
958	if (!found) {
959		if (vmove)
960			panic("interface not in it's own ifnet list");
961		else
962			return; /* XXX this should panic as well? */
963	}
964
965	/*
966	 * Remove/wait for pending events.
967	 */
968	taskqueue_drain(taskqueue_swi, &ifp->if_linktask);
969
970	/*
971	 * Remove routes and flush queues.
972	 */
973	if_down(ifp);
974#ifdef ALTQ
975	if (ALTQ_IS_ENABLED(&ifp->if_snd))
976		altq_disable(&ifp->if_snd);
977	if (ALTQ_IS_ATTACHED(&ifp->if_snd))
978		altq_detach(&ifp->if_snd);
979#endif
980
981	if_purgeaddrs(ifp);
982
983#ifdef INET
984	in_ifdetach(ifp);
985#endif
986
987#ifdef INET6
988	/*
989	 * Remove all IPv6 kernel structs related to ifp.  This should be done
990	 * before removing routing entries below, since IPv6 interface direct
991	 * routes are expected to be removed by the IPv6-specific kernel API.
992	 * Otherwise, the kernel will detect some inconsistency and bark it.
993	 */
994	in6_ifdetach(ifp);
995#endif
996	if_purgemaddrs(ifp);
997
998	if (!vmove) {
999		/*
1000		 * Prevent further calls into the device driver via ifnet.
1001		 */
1002		if_dead(ifp);
1003
1004		/*
1005		 * Remove link ifaddr pointer and maybe decrement if_index.
1006		 * Clean up all addresses.
1007		 */
1008		ifp->if_addr = NULL;
1009		if (IS_DEFAULT_VNET(curvnet))
1010			destroy_dev(ifdev_byindex(ifp->if_index));
1011		ifdev_setbyindex(ifp->if_index, NULL);
1012
1013		/* We can now free link ifaddr. */
1014		if (!TAILQ_EMPTY(&ifp->if_addrhead)) {
1015			ifa = TAILQ_FIRST(&ifp->if_addrhead);
1016			TAILQ_REMOVE(&ifp->if_addrhead, ifa, ifa_link);
1017			IFAFREE(ifa);
1018		}
1019	}
1020
1021	/*
1022	 * Delete all remaining routes using this interface
1023	 * Unfortuneatly the only way to do this is to slog through
1024	 * the entire routing table looking for routes which point
1025	 * to this interface...oh well...
1026	 */
1027	for (i = 1; i <= AF_MAX; i++) {
1028		for (j = 0; j < rt_numfibs; j++) {
1029			rnh = rt_tables_get_rnh(j, i);
1030			if (rnh == NULL)
1031				continue;
1032			RADIX_NODE_HEAD_LOCK(rnh);
1033			(void) rnh->rnh_walktree(rnh, if_rtdel, ifp);
1034			RADIX_NODE_HEAD_UNLOCK(rnh);
1035		}
1036	}
1037
1038	/* Announce that the interface is gone. */
1039	rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
1040	EVENTHANDLER_INVOKE(ifnet_departure_event, ifp);
1041	if (IS_DEFAULT_VNET(curvnet))
1042		devctl_notify("IFNET", ifp->if_xname, "DETACH", NULL);
1043	if_delgroups(ifp);
1044
1045	IF_AFDATA_LOCK(ifp);
1046	for (dp = domains; dp; dp = dp->dom_next) {
1047		if (dp->dom_ifdetach && ifp->if_afdata[dp->dom_family])
1048			(*dp->dom_ifdetach)(ifp,
1049			    ifp->if_afdata[dp->dom_family]);
1050	}
1051	ifp->if_afdata_initialized = 0;
1052	IF_AFDATA_UNLOCK(ifp);
1053}
1054
1055#ifdef VIMAGE
1056/*
1057 * if_vmove() performs a limited version of if_detach() in current
1058 * vnet and if_attach()es the ifnet to the vnet specified as 2nd arg.
1059 * An attempt is made to shrink if_index in current vnet, find an
1060 * unused if_index in target vnet and calls if_grow() if necessary,
1061 * and finally find an unused if_xname for the target vnet.
1062 */
1063void
1064if_vmove(struct ifnet *ifp, struct vnet *new_vnet)
1065{
1066
1067	/*
1068	 * Detach from current vnet, but preserve LLADDR info, do not
1069	 * mark as dead etc. so that the ifnet can be reattached later.
1070	 */
1071	if_detach_internal(ifp, 1);
1072
1073	/*
1074	 * Unlink the ifnet from ifindex_table[] in current vnet,
1075	 * and shrink the if_index for that vnet if possible.
1076	 * do / while construct below is needed to confine the scope
1077	 * of INIT_VNET_NET().
1078	 */
1079	{
1080		INIT_VNET_NET(curvnet);
1081
1082		IFNET_WLOCK();
1083		ifnet_setbyindex(ifp->if_index, NULL);
1084		while (V_if_index > 0 && \
1085		    ifnet_byindex_locked(V_if_index) == NULL)
1086			V_if_index--;
1087		IFNET_WUNLOCK();
1088	};
1089
1090	/*
1091	 * Switch to the context of the target vnet.
1092	 */
1093	CURVNET_SET_QUIET(new_vnet);
1094	INIT_VNET_NET(new_vnet);
1095
1096	/*
1097	 * Try to find an empty slot below if_index.  If we fail, take
1098	 * the next slot.
1099	 */
1100	IFNET_WLOCK();
1101	for (ifp->if_index = 1; ifp->if_index <= V_if_index; ifp->if_index++) {
1102		if (ifnet_byindex_locked(ifp->if_index) == NULL)
1103			break;
1104	}
1105	/* Catch if_index overflow. */
1106	if (ifp->if_index < 1)
1107		panic("if_index overflow");
1108
1109	if (ifp->if_index > V_if_index)
1110		V_if_index = ifp->if_index;
1111	if (V_if_index >= V_if_indexlim)
1112		if_grow();
1113	ifnet_setbyindex(ifp->if_index, ifp);
1114	IFNET_WUNLOCK();
1115
1116	if_attach_internal(ifp, 1);
1117
1118	CURVNET_RESTORE();
1119}
1120#endif /* VIMAGE */
1121
1122/*
1123 * Add a group to an interface
1124 */
1125int
1126if_addgroup(struct ifnet *ifp, const char *groupname)
1127{
1128	INIT_VNET_NET(ifp->if_vnet);
1129	struct ifg_list		*ifgl;
1130	struct ifg_group	*ifg = NULL;
1131	struct ifg_member	*ifgm;
1132
1133	if (groupname[0] && groupname[strlen(groupname) - 1] >= '0' &&
1134	    groupname[strlen(groupname) - 1] <= '9')
1135		return (EINVAL);
1136
1137	IFNET_WLOCK();
1138	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
1139		if (!strcmp(ifgl->ifgl_group->ifg_group, groupname)) {
1140			IFNET_WUNLOCK();
1141			return (EEXIST);
1142		}
1143
1144	if ((ifgl = (struct ifg_list *)malloc(sizeof(struct ifg_list), M_TEMP,
1145	    M_NOWAIT)) == NULL) {
1146	    	IFNET_WUNLOCK();
1147		return (ENOMEM);
1148	}
1149
1150	if ((ifgm = (struct ifg_member *)malloc(sizeof(struct ifg_member),
1151	    M_TEMP, M_NOWAIT)) == NULL) {
1152		free(ifgl, M_TEMP);
1153		IFNET_WUNLOCK();
1154		return (ENOMEM);
1155	}
1156
1157	TAILQ_FOREACH(ifg, &V_ifg_head, ifg_next)
1158		if (!strcmp(ifg->ifg_group, groupname))
1159			break;
1160
1161	if (ifg == NULL) {
1162		if ((ifg = (struct ifg_group *)malloc(sizeof(struct ifg_group),
1163		    M_TEMP, M_NOWAIT)) == NULL) {
1164			free(ifgl, M_TEMP);
1165			free(ifgm, M_TEMP);
1166			IFNET_WUNLOCK();
1167			return (ENOMEM);
1168		}
1169		strlcpy(ifg->ifg_group, groupname, sizeof(ifg->ifg_group));
1170		ifg->ifg_refcnt = 0;
1171		TAILQ_INIT(&ifg->ifg_members);
1172		EVENTHANDLER_INVOKE(group_attach_event, ifg);
1173		TAILQ_INSERT_TAIL(&V_ifg_head, ifg, ifg_next);
1174	}
1175
1176	ifg->ifg_refcnt++;
1177	ifgl->ifgl_group = ifg;
1178	ifgm->ifgm_ifp = ifp;
1179
1180	IF_ADDR_LOCK(ifp);
1181	TAILQ_INSERT_TAIL(&ifg->ifg_members, ifgm, ifgm_next);
1182	TAILQ_INSERT_TAIL(&ifp->if_groups, ifgl, ifgl_next);
1183	IF_ADDR_UNLOCK(ifp);
1184
1185	IFNET_WUNLOCK();
1186
1187	EVENTHANDLER_INVOKE(group_change_event, groupname);
1188
1189	return (0);
1190}
1191
1192/*
1193 * Remove a group from an interface
1194 */
1195int
1196if_delgroup(struct ifnet *ifp, const char *groupname)
1197{
1198	INIT_VNET_NET(ifp->if_vnet);
1199	struct ifg_list		*ifgl;
1200	struct ifg_member	*ifgm;
1201
1202	IFNET_WLOCK();
1203	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
1204		if (!strcmp(ifgl->ifgl_group->ifg_group, groupname))
1205			break;
1206	if (ifgl == NULL) {
1207		IFNET_WUNLOCK();
1208		return (ENOENT);
1209	}
1210
1211	IF_ADDR_LOCK(ifp);
1212	TAILQ_REMOVE(&ifp->if_groups, ifgl, ifgl_next);
1213	IF_ADDR_UNLOCK(ifp);
1214
1215	TAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next)
1216		if (ifgm->ifgm_ifp == ifp)
1217			break;
1218
1219	if (ifgm != NULL) {
1220		TAILQ_REMOVE(&ifgl->ifgl_group->ifg_members, ifgm, ifgm_next);
1221		free(ifgm, M_TEMP);
1222	}
1223
1224	if (--ifgl->ifgl_group->ifg_refcnt == 0) {
1225		TAILQ_REMOVE(&V_ifg_head, ifgl->ifgl_group, ifg_next);
1226		EVENTHANDLER_INVOKE(group_detach_event, ifgl->ifgl_group);
1227		free(ifgl->ifgl_group, M_TEMP);
1228	}
1229	IFNET_WUNLOCK();
1230
1231	free(ifgl, M_TEMP);
1232
1233	EVENTHANDLER_INVOKE(group_change_event, groupname);
1234
1235	return (0);
1236}
1237
1238/*
1239 * Remove an interface from all groups
1240 */
1241static void
1242if_delgroups(struct ifnet *ifp)
1243{
1244	INIT_VNET_NET(ifp->if_vnet);
1245	struct ifg_list		*ifgl;
1246	struct ifg_member	*ifgm;
1247	char groupname[IFNAMSIZ];
1248
1249	IFNET_WLOCK();
1250	while (!TAILQ_EMPTY(&ifp->if_groups)) {
1251		ifgl = TAILQ_FIRST(&ifp->if_groups);
1252
1253		strlcpy(groupname, ifgl->ifgl_group->ifg_group, IFNAMSIZ);
1254
1255		IF_ADDR_LOCK(ifp);
1256		TAILQ_REMOVE(&ifp->if_groups, ifgl, ifgl_next);
1257		IF_ADDR_UNLOCK(ifp);
1258
1259		TAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next)
1260			if (ifgm->ifgm_ifp == ifp)
1261				break;
1262
1263		if (ifgm != NULL) {
1264			TAILQ_REMOVE(&ifgl->ifgl_group->ifg_members, ifgm,
1265			    ifgm_next);
1266			free(ifgm, M_TEMP);
1267		}
1268
1269		if (--ifgl->ifgl_group->ifg_refcnt == 0) {
1270			TAILQ_REMOVE(&V_ifg_head, ifgl->ifgl_group, ifg_next);
1271			EVENTHANDLER_INVOKE(group_detach_event,
1272			    ifgl->ifgl_group);
1273			free(ifgl->ifgl_group, M_TEMP);
1274		}
1275		IFNET_WUNLOCK();
1276
1277		free(ifgl, M_TEMP);
1278
1279		EVENTHANDLER_INVOKE(group_change_event, groupname);
1280
1281		IFNET_WLOCK();
1282	}
1283	IFNET_WUNLOCK();
1284}
1285
1286/*
1287 * Stores all groups from an interface in memory pointed
1288 * to by data
1289 */
1290static int
1291if_getgroup(struct ifgroupreq *data, struct ifnet *ifp)
1292{
1293	int			 len, error;
1294	struct ifg_list		*ifgl;
1295	struct ifg_req		 ifgrq, *ifgp;
1296	struct ifgroupreq	*ifgr = data;
1297
1298	if (ifgr->ifgr_len == 0) {
1299		IF_ADDR_LOCK(ifp);
1300		TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
1301			ifgr->ifgr_len += sizeof(struct ifg_req);
1302		IF_ADDR_UNLOCK(ifp);
1303		return (0);
1304	}
1305
1306	len = ifgr->ifgr_len;
1307	ifgp = ifgr->ifgr_groups;
1308	/* XXX: wire */
1309	IF_ADDR_LOCK(ifp);
1310	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) {
1311		if (len < sizeof(ifgrq)) {
1312			IF_ADDR_UNLOCK(ifp);
1313			return (EINVAL);
1314		}
1315		bzero(&ifgrq, sizeof ifgrq);
1316		strlcpy(ifgrq.ifgrq_group, ifgl->ifgl_group->ifg_group,
1317		    sizeof(ifgrq.ifgrq_group));
1318		if ((error = copyout(&ifgrq, ifgp, sizeof(struct ifg_req)))) {
1319		    	IF_ADDR_UNLOCK(ifp);
1320			return (error);
1321		}
1322		len -= sizeof(ifgrq);
1323		ifgp++;
1324	}
1325	IF_ADDR_UNLOCK(ifp);
1326
1327	return (0);
1328}
1329
1330/*
1331 * Stores all members of a group in memory pointed to by data
1332 */
1333static int
1334if_getgroupmembers(struct ifgroupreq *data)
1335{
1336	INIT_VNET_NET(curvnet);
1337	struct ifgroupreq	*ifgr = data;
1338	struct ifg_group	*ifg;
1339	struct ifg_member	*ifgm;
1340	struct ifg_req		 ifgrq, *ifgp;
1341	int			 len, error;
1342
1343	IFNET_RLOCK();
1344	TAILQ_FOREACH(ifg, &V_ifg_head, ifg_next)
1345		if (!strcmp(ifg->ifg_group, ifgr->ifgr_name))
1346			break;
1347	if (ifg == NULL) {
1348		IFNET_RUNLOCK();
1349		return (ENOENT);
1350	}
1351
1352	if (ifgr->ifgr_len == 0) {
1353		TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next)
1354			ifgr->ifgr_len += sizeof(ifgrq);
1355		IFNET_RUNLOCK();
1356		return (0);
1357	}
1358
1359	len = ifgr->ifgr_len;
1360	ifgp = ifgr->ifgr_groups;
1361	TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) {
1362		if (len < sizeof(ifgrq)) {
1363			IFNET_RUNLOCK();
1364			return (EINVAL);
1365		}
1366		bzero(&ifgrq, sizeof ifgrq);
1367		strlcpy(ifgrq.ifgrq_member, ifgm->ifgm_ifp->if_xname,
1368		    sizeof(ifgrq.ifgrq_member));
1369		if ((error = copyout(&ifgrq, ifgp, sizeof(struct ifg_req)))) {
1370			IFNET_RUNLOCK();
1371			return (error);
1372		}
1373		len -= sizeof(ifgrq);
1374		ifgp++;
1375	}
1376	IFNET_RUNLOCK();
1377
1378	return (0);
1379}
1380
1381/*
1382 * Delete Routes for a Network Interface
1383 *
1384 * Called for each routing entry via the rnh->rnh_walktree() call above
1385 * to delete all route entries referencing a detaching network interface.
1386 *
1387 * Arguments:
1388 *	rn	pointer to node in the routing table
1389 *	arg	argument passed to rnh->rnh_walktree() - detaching interface
1390 *
1391 * Returns:
1392 *	0	successful
1393 *	errno	failed - reason indicated
1394 *
1395 */
1396static int
1397if_rtdel(struct radix_node *rn, void *arg)
1398{
1399	struct rtentry	*rt = (struct rtentry *)rn;
1400	struct ifnet	*ifp = arg;
1401	int		err;
1402
1403	if (rt->rt_ifp == ifp) {
1404
1405		/*
1406		 * Protect (sorta) against walktree recursion problems
1407		 * with cloned routes
1408		 */
1409		if ((rt->rt_flags & RTF_UP) == 0)
1410			return (0);
1411
1412		err = rtrequest_fib(RTM_DELETE, rt_key(rt), rt->rt_gateway,
1413				rt_mask(rt), rt->rt_flags|RTF_RNH_LOCKED,
1414				(struct rtentry **) NULL, rt->rt_fibnum);
1415		if (err) {
1416			log(LOG_WARNING, "if_rtdel: error %d\n", err);
1417		}
1418	}
1419
1420	return (0);
1421}
1422
1423/*
1424 * XXX: Because sockaddr_dl has deeper structure than the sockaddr
1425 * structs used to represent other address families, it is necessary
1426 * to perform a different comparison.
1427 */
1428
1429#define	sa_equal(a1, a2)	\
1430	(bcmp((a1), (a2), ((a1))->sa_len) == 0)
1431
1432#define	sa_dl_equal(a1, a2)	\
1433	((((struct sockaddr_dl *)(a1))->sdl_len ==			\
1434	 ((struct sockaddr_dl *)(a2))->sdl_len) &&			\
1435	 (bcmp(LLADDR((struct sockaddr_dl *)(a1)),			\
1436	       LLADDR((struct sockaddr_dl *)(a2)),			\
1437	       ((struct sockaddr_dl *)(a1))->sdl_alen) == 0))
1438
1439/*
1440 * Locate an interface based on a complete address.
1441 */
1442/*ARGSUSED*/
1443struct ifaddr *
1444ifa_ifwithaddr(struct sockaddr *addr)
1445{
1446	INIT_VNET_NET(curvnet);
1447	struct ifnet *ifp;
1448	struct ifaddr *ifa;
1449
1450	IFNET_RLOCK();
1451	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
1452		IF_ADDR_LOCK(ifp);
1453		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1454			if (ifa->ifa_addr->sa_family != addr->sa_family)
1455				continue;
1456			if (sa_equal(addr, ifa->ifa_addr)) {
1457				IF_ADDR_UNLOCK(ifp);
1458				goto done;
1459			}
1460			/* IP6 doesn't have broadcast */
1461			if ((ifp->if_flags & IFF_BROADCAST) &&
1462			    ifa->ifa_broadaddr &&
1463			    ifa->ifa_broadaddr->sa_len != 0 &&
1464			    sa_equal(ifa->ifa_broadaddr, addr)) {
1465				IF_ADDR_UNLOCK(ifp);
1466				goto done;
1467			}
1468		}
1469		IF_ADDR_UNLOCK(ifp);
1470	}
1471	ifa = NULL;
1472done:
1473	IFNET_RUNLOCK();
1474	return (ifa);
1475}
1476
1477/*
1478 * Locate an interface based on the broadcast address.
1479 */
1480/* ARGSUSED */
1481struct ifaddr *
1482ifa_ifwithbroadaddr(struct sockaddr *addr)
1483{
1484	INIT_VNET_NET(curvnet);
1485	struct ifnet *ifp;
1486	struct ifaddr *ifa;
1487
1488	IFNET_RLOCK();
1489	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
1490		IF_ADDR_LOCK(ifp);
1491		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1492			if (ifa->ifa_addr->sa_family != addr->sa_family)
1493				continue;
1494			if ((ifp->if_flags & IFF_BROADCAST) &&
1495			    ifa->ifa_broadaddr &&
1496			    ifa->ifa_broadaddr->sa_len != 0 &&
1497			    sa_equal(ifa->ifa_broadaddr, addr)) {
1498				IF_ADDR_UNLOCK(ifp);
1499				goto done;
1500			}
1501		}
1502		IF_ADDR_UNLOCK(ifp);
1503	}
1504	ifa = NULL;
1505done:
1506	IFNET_RUNLOCK();
1507	return (ifa);
1508}
1509
1510/*
1511 * Locate the point to point interface with a given destination address.
1512 */
1513/*ARGSUSED*/
1514struct ifaddr *
1515ifa_ifwithdstaddr(struct sockaddr *addr)
1516{
1517	INIT_VNET_NET(curvnet);
1518	struct ifnet *ifp;
1519	struct ifaddr *ifa;
1520
1521	IFNET_RLOCK();
1522	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
1523		if ((ifp->if_flags & IFF_POINTOPOINT) == 0)
1524			continue;
1525		IF_ADDR_LOCK(ifp);
1526		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1527			if (ifa->ifa_addr->sa_family != addr->sa_family)
1528				continue;
1529			if (ifa->ifa_dstaddr != NULL &&
1530			    sa_equal(addr, ifa->ifa_dstaddr)) {
1531				IF_ADDR_UNLOCK(ifp);
1532				goto done;
1533			}
1534		}
1535		IF_ADDR_UNLOCK(ifp);
1536	}
1537	ifa = NULL;
1538done:
1539	IFNET_RUNLOCK();
1540	return (ifa);
1541}
1542
1543/*
1544 * Find an interface on a specific network.  If many, choice
1545 * is most specific found.
1546 */
1547struct ifaddr *
1548ifa_ifwithnet(struct sockaddr *addr)
1549{
1550	INIT_VNET_NET(curvnet);
1551	struct ifnet *ifp;
1552	struct ifaddr *ifa;
1553	struct ifaddr *ifa_maybe = (struct ifaddr *) 0;
1554	u_int af = addr->sa_family;
1555	char *addr_data = addr->sa_data, *cplim;
1556
1557	/*
1558	 * AF_LINK addresses can be looked up directly by their index number,
1559	 * so do that if we can.
1560	 */
1561	if (af == AF_LINK) {
1562	    struct sockaddr_dl *sdl = (struct sockaddr_dl *)addr;
1563	    if (sdl->sdl_index && sdl->sdl_index <= V_if_index)
1564		return (ifaddr_byindex(sdl->sdl_index));
1565	}
1566
1567	/*
1568	 * Scan though each interface, looking for ones that have
1569	 * addresses in this address family.
1570	 */
1571	IFNET_RLOCK();
1572	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
1573		IF_ADDR_LOCK(ifp);
1574		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1575			char *cp, *cp2, *cp3;
1576
1577			if (ifa->ifa_addr->sa_family != af)
1578next:				continue;
1579			if (af == AF_INET && ifp->if_flags & IFF_POINTOPOINT) {
1580				/*
1581				 * This is a bit broken as it doesn't
1582				 * take into account that the remote end may
1583				 * be a single node in the network we are
1584				 * looking for.
1585				 * The trouble is that we don't know the
1586				 * netmask for the remote end.
1587				 */
1588				if (ifa->ifa_dstaddr != NULL &&
1589				    sa_equal(addr, ifa->ifa_dstaddr)) {
1590					IF_ADDR_UNLOCK(ifp);
1591					goto done;
1592				}
1593			} else {
1594				/*
1595				 * if we have a special address handler,
1596				 * then use it instead of the generic one.
1597				 */
1598				if (ifa->ifa_claim_addr) {
1599					if ((*ifa->ifa_claim_addr)(ifa, addr)) {
1600						IF_ADDR_UNLOCK(ifp);
1601						goto done;
1602					}
1603					continue;
1604				}
1605
1606				/*
1607				 * Scan all the bits in the ifa's address.
1608				 * If a bit dissagrees with what we are
1609				 * looking for, mask it with the netmask
1610				 * to see if it really matters.
1611				 * (A byte at a time)
1612				 */
1613				if (ifa->ifa_netmask == 0)
1614					continue;
1615				cp = addr_data;
1616				cp2 = ifa->ifa_addr->sa_data;
1617				cp3 = ifa->ifa_netmask->sa_data;
1618				cplim = ifa->ifa_netmask->sa_len
1619					+ (char *)ifa->ifa_netmask;
1620				while (cp3 < cplim)
1621					if ((*cp++ ^ *cp2++) & *cp3++)
1622						goto next; /* next address! */
1623				/*
1624				 * If the netmask of what we just found
1625				 * is more specific than what we had before
1626				 * (if we had one) then remember the new one
1627				 * before continuing to search
1628				 * for an even better one.
1629				 */
1630				if (ifa_maybe == 0 ||
1631				    rn_refines((caddr_t)ifa->ifa_netmask,
1632				    (caddr_t)ifa_maybe->ifa_netmask))
1633					ifa_maybe = ifa;
1634			}
1635		}
1636		IF_ADDR_UNLOCK(ifp);
1637	}
1638	ifa = ifa_maybe;
1639done:
1640	IFNET_RUNLOCK();
1641	return (ifa);
1642}
1643
1644/*
1645 * Find an interface address specific to an interface best matching
1646 * a given address.
1647 */
1648struct ifaddr *
1649ifaof_ifpforaddr(struct sockaddr *addr, struct ifnet *ifp)
1650{
1651	struct ifaddr *ifa;
1652	char *cp, *cp2, *cp3;
1653	char *cplim;
1654	struct ifaddr *ifa_maybe = 0;
1655	u_int af = addr->sa_family;
1656
1657	if (af >= AF_MAX)
1658		return (0);
1659	IF_ADDR_LOCK(ifp);
1660	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1661		if (ifa->ifa_addr->sa_family != af)
1662			continue;
1663		if (ifa_maybe == 0)
1664			ifa_maybe = ifa;
1665		if (ifa->ifa_netmask == 0) {
1666			if (sa_equal(addr, ifa->ifa_addr) ||
1667			    (ifa->ifa_dstaddr &&
1668			    sa_equal(addr, ifa->ifa_dstaddr)))
1669				goto done;
1670			continue;
1671		}
1672		if (ifp->if_flags & IFF_POINTOPOINT) {
1673			if (sa_equal(addr, ifa->ifa_dstaddr))
1674				goto done;
1675		} else {
1676			cp = addr->sa_data;
1677			cp2 = ifa->ifa_addr->sa_data;
1678			cp3 = ifa->ifa_netmask->sa_data;
1679			cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask;
1680			for (; cp3 < cplim; cp3++)
1681				if ((*cp++ ^ *cp2++) & *cp3)
1682					break;
1683			if (cp3 == cplim)
1684				goto done;
1685		}
1686	}
1687	ifa = ifa_maybe;
1688done:
1689	IF_ADDR_UNLOCK(ifp);
1690	return (ifa);
1691}
1692
1693#include <net/route.h>
1694#include <net/if_llatbl.h>
1695
1696/*
1697 * Default action when installing a route with a Link Level gateway.
1698 * Lookup an appropriate real ifa to point to.
1699 * This should be moved to /sys/net/link.c eventually.
1700 */
1701static void
1702link_rtrequest(int cmd, struct rtentry *rt, struct rt_addrinfo *info)
1703{
1704	struct ifaddr *ifa, *oifa;
1705	struct sockaddr *dst;
1706	struct ifnet *ifp;
1707
1708	RT_LOCK_ASSERT(rt);
1709
1710	if (cmd != RTM_ADD || ((ifa = rt->rt_ifa) == 0) ||
1711	    ((ifp = ifa->ifa_ifp) == 0) || ((dst = rt_key(rt)) == 0))
1712		return;
1713	ifa = ifaof_ifpforaddr(dst, ifp);
1714	if (ifa) {
1715		IFAREF(ifa);		/* XXX */
1716		oifa = rt->rt_ifa;
1717		rt->rt_ifa = ifa;
1718		IFAFREE(oifa);
1719		if (ifa->ifa_rtrequest && ifa->ifa_rtrequest != link_rtrequest)
1720			ifa->ifa_rtrequest(cmd, rt, info);
1721	}
1722}
1723
1724/*
1725 * Mark an interface down and notify protocols of
1726 * the transition.
1727 * NOTE: must be called at splnet or eqivalent.
1728 */
1729static void
1730if_unroute(struct ifnet *ifp, int flag, int fam)
1731{
1732	struct ifaddr *ifa;
1733
1734	KASSERT(flag == IFF_UP, ("if_unroute: flag != IFF_UP"));
1735
1736	ifp->if_flags &= ~flag;
1737	getmicrotime(&ifp->if_lastchange);
1738	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
1739		if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family))
1740			pfctlinput(PRC_IFDOWN, ifa->ifa_addr);
1741	ifp->if_qflush(ifp);
1742
1743#if defined(INET) || defined(INET6)
1744#ifdef DEV_CARP
1745	if (ifp->if_carp)
1746		carp_carpdev_state(ifp->if_carp);
1747#endif
1748#endif
1749	rt_ifmsg(ifp);
1750}
1751
1752/*
1753 * Mark an interface up and notify protocols of
1754 * the transition.
1755 * NOTE: must be called at splnet or eqivalent.
1756 */
1757static void
1758if_route(struct ifnet *ifp, int flag, int fam)
1759{
1760	struct ifaddr *ifa;
1761
1762	KASSERT(flag == IFF_UP, ("if_route: flag != IFF_UP"));
1763
1764	ifp->if_flags |= flag;
1765	getmicrotime(&ifp->if_lastchange);
1766	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
1767		if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family))
1768			pfctlinput(PRC_IFUP, ifa->ifa_addr);
1769#if defined(INET) || defined(INET6)
1770#ifdef DEV_CARP
1771	if (ifp->if_carp)
1772		carp_carpdev_state(ifp->if_carp);
1773#endif
1774#endif
1775	rt_ifmsg(ifp);
1776#ifdef INET6
1777	in6_if_up(ifp);
1778#endif
1779}
1780
1781void	(*vlan_link_state_p)(struct ifnet *, int);	/* XXX: private from if_vlan */
1782void	(*vlan_trunk_cap_p)(struct ifnet *);		/* XXX: private from if_vlan */
1783
1784/*
1785 * Handle a change in the interface link state. To avoid LORs
1786 * between driver lock and upper layer locks, as well as possible
1787 * recursions, we post event to taskqueue, and all job
1788 * is done in static do_link_state_change().
1789 */
1790void
1791if_link_state_change(struct ifnet *ifp, int link_state)
1792{
1793	/* Return if state hasn't changed. */
1794	if (ifp->if_link_state == link_state)
1795		return;
1796
1797	ifp->if_link_state = link_state;
1798
1799	taskqueue_enqueue(taskqueue_swi, &ifp->if_linktask);
1800}
1801
1802static void
1803do_link_state_change(void *arg, int pending)
1804{
1805	struct ifnet *ifp = (struct ifnet *)arg;
1806	int link_state = ifp->if_link_state;
1807	int link;
1808	CURVNET_SET(ifp->if_vnet);
1809
1810	/* Notify that the link state has changed. */
1811	rt_ifmsg(ifp);
1812	if (link_state == LINK_STATE_UP)
1813		link = NOTE_LINKUP;
1814	else if (link_state == LINK_STATE_DOWN)
1815		link = NOTE_LINKDOWN;
1816	else
1817		link = NOTE_LINKINV;
1818	KNOTE_UNLOCKED(&ifp->if_klist, link);
1819	if (ifp->if_vlantrunk != NULL)
1820		(*vlan_link_state_p)(ifp, link);
1821
1822	if ((ifp->if_type == IFT_ETHER || ifp->if_type == IFT_L2VLAN) &&
1823	    IFP2AC(ifp)->ac_netgraph != NULL)
1824		(*ng_ether_link_state_p)(ifp, link_state);
1825#if defined(INET) || defined(INET6)
1826#ifdef DEV_CARP
1827	if (ifp->if_carp)
1828		carp_carpdev_state(ifp->if_carp);
1829#endif
1830#endif
1831	if (ifp->if_bridge) {
1832		KASSERT(bstp_linkstate_p != NULL,("if_bridge bstp not loaded!"));
1833		(*bstp_linkstate_p)(ifp, link_state);
1834	}
1835	if (ifp->if_lagg) {
1836		KASSERT(lagg_linkstate_p != NULL,("if_lagg not loaded!"));
1837		(*lagg_linkstate_p)(ifp, link_state);
1838	}
1839
1840	if (IS_DEFAULT_VNET(curvnet))
1841		devctl_notify("IFNET", ifp->if_xname,
1842		    (link_state == LINK_STATE_UP) ? "LINK_UP" : "LINK_DOWN",
1843		    NULL);
1844	if (pending > 1)
1845		if_printf(ifp, "%d link states coalesced\n", pending);
1846	if (log_link_state_change)
1847		log(LOG_NOTICE, "%s: link state changed to %s\n", ifp->if_xname,
1848		    (link_state == LINK_STATE_UP) ? "UP" : "DOWN" );
1849	CURVNET_RESTORE();
1850}
1851
1852/*
1853 * Mark an interface down and notify protocols of
1854 * the transition.
1855 * NOTE: must be called at splnet or eqivalent.
1856 */
1857void
1858if_down(struct ifnet *ifp)
1859{
1860
1861	if_unroute(ifp, IFF_UP, AF_UNSPEC);
1862}
1863
1864/*
1865 * Mark an interface up and notify protocols of
1866 * the transition.
1867 * NOTE: must be called at splnet or eqivalent.
1868 */
1869void
1870if_up(struct ifnet *ifp)
1871{
1872
1873	if_route(ifp, IFF_UP, AF_UNSPEC);
1874}
1875
1876/*
1877 * Flush an interface queue.
1878 */
1879void
1880if_qflush(struct ifnet *ifp)
1881{
1882	struct mbuf *m, *n;
1883	struct ifaltq *ifq;
1884
1885	ifq = &ifp->if_snd;
1886	IFQ_LOCK(ifq);
1887#ifdef ALTQ
1888	if (ALTQ_IS_ENABLED(ifq))
1889		ALTQ_PURGE(ifq);
1890#endif
1891	n = ifq->ifq_head;
1892	while ((m = n) != 0) {
1893		n = m->m_act;
1894		m_freem(m);
1895	}
1896	ifq->ifq_head = 0;
1897	ifq->ifq_tail = 0;
1898	ifq->ifq_len = 0;
1899	IFQ_UNLOCK(ifq);
1900}
1901
1902/*
1903 * Handle interface watchdog timer routines.  Called
1904 * from softclock, we decrement timers (if set) and
1905 * call the appropriate interface routine on expiration.
1906 *
1907 * XXXRW: Note that because timeouts run with Giant, if_watchdog() is called
1908 * holding Giant.
1909 */
1910static void
1911if_slowtimo(void *arg)
1912{
1913	VNET_ITERATOR_DECL(vnet_iter);
1914	struct ifnet *ifp;
1915	int s = splimp();
1916
1917	IFNET_RLOCK();
1918	VNET_LIST_RLOCK();
1919	VNET_FOREACH(vnet_iter) {
1920		CURVNET_SET(vnet_iter);
1921		INIT_VNET_NET(vnet_iter);
1922		TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
1923			if (ifp->if_timer == 0 || --ifp->if_timer)
1924				continue;
1925			if (ifp->if_watchdog)
1926				(*ifp->if_watchdog)(ifp);
1927		}
1928		CURVNET_RESTORE();
1929	}
1930	VNET_LIST_RUNLOCK();
1931	IFNET_RUNLOCK();
1932	splx(s);
1933	timeout(if_slowtimo, (void *)0, hz / IFNET_SLOWHZ);
1934}
1935
1936/*
1937 * Map interface name to interface structure pointer, with or without
1938 * returning a reference.
1939 */
1940struct ifnet *
1941ifunit_ref(const char *name)
1942{
1943	INIT_VNET_NET(curvnet);
1944	struct ifnet *ifp;
1945
1946	IFNET_RLOCK();
1947	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
1948		if (strncmp(name, ifp->if_xname, IFNAMSIZ) == 0 &&
1949		    !(ifp->if_flags & IFF_DYING))
1950			break;
1951	}
1952	if (ifp != NULL)
1953		if_ref(ifp);
1954	IFNET_RUNLOCK();
1955	return (ifp);
1956}
1957
1958struct ifnet *
1959ifunit(const char *name)
1960{
1961	INIT_VNET_NET(curvnet);
1962	struct ifnet *ifp;
1963
1964	IFNET_RLOCK();
1965	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
1966		if (strncmp(name, ifp->if_xname, IFNAMSIZ) == 0)
1967			break;
1968	}
1969	IFNET_RUNLOCK();
1970	return (ifp);
1971}
1972
1973/*
1974 * Hardware specific interface ioctls.
1975 */
1976static int
1977ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
1978{
1979	struct ifreq *ifr;
1980	struct ifstat *ifs;
1981	int error = 0;
1982	int new_flags, temp_flags;
1983	size_t namelen, onamelen;
1984	char new_name[IFNAMSIZ];
1985	struct ifaddr *ifa;
1986	struct sockaddr_dl *sdl;
1987
1988	ifr = (struct ifreq *)data;
1989	switch (cmd) {
1990	case SIOCGIFINDEX:
1991		ifr->ifr_index = ifp->if_index;
1992		break;
1993
1994	case SIOCGIFFLAGS:
1995		temp_flags = ifp->if_flags | ifp->if_drv_flags;
1996		ifr->ifr_flags = temp_flags & 0xffff;
1997		ifr->ifr_flagshigh = temp_flags >> 16;
1998		break;
1999
2000	case SIOCGIFCAP:
2001		ifr->ifr_reqcap = ifp->if_capabilities;
2002		ifr->ifr_curcap = ifp->if_capenable;
2003		break;
2004
2005#ifdef MAC
2006	case SIOCGIFMAC:
2007		error = mac_ifnet_ioctl_get(td->td_ucred, ifr, ifp);
2008		break;
2009#endif
2010
2011	case SIOCGIFMETRIC:
2012		ifr->ifr_metric = ifp->if_metric;
2013		break;
2014
2015	case SIOCGIFMTU:
2016		ifr->ifr_mtu = ifp->if_mtu;
2017		break;
2018
2019	case SIOCGIFPHYS:
2020		ifr->ifr_phys = ifp->if_physical;
2021		break;
2022
2023	case SIOCSIFFLAGS:
2024		error = priv_check(td, PRIV_NET_SETIFFLAGS);
2025		if (error)
2026			return (error);
2027		/*
2028		 * Currently, no driver owned flags pass the IFF_CANTCHANGE
2029		 * check, so we don't need special handling here yet.
2030		 */
2031		new_flags = (ifr->ifr_flags & 0xffff) |
2032		    (ifr->ifr_flagshigh << 16);
2033		if (ifp->if_flags & IFF_SMART) {
2034			/* Smart drivers twiddle their own routes */
2035		} else if (ifp->if_flags & IFF_UP &&
2036		    (new_flags & IFF_UP) == 0) {
2037			int s = splimp();
2038			if_down(ifp);
2039			splx(s);
2040		} else if (new_flags & IFF_UP &&
2041		    (ifp->if_flags & IFF_UP) == 0) {
2042			int s = splimp();
2043			if_up(ifp);
2044			splx(s);
2045		}
2046		/* See if permanently promiscuous mode bit is about to flip */
2047		if ((ifp->if_flags ^ new_flags) & IFF_PPROMISC) {
2048			if (new_flags & IFF_PPROMISC)
2049				ifp->if_flags |= IFF_PROMISC;
2050			else if (ifp->if_pcount == 0)
2051				ifp->if_flags &= ~IFF_PROMISC;
2052			log(LOG_INFO, "%s: permanently promiscuous mode %s\n",
2053			    ifp->if_xname,
2054			    (new_flags & IFF_PPROMISC) ? "enabled" : "disabled");
2055		}
2056		ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) |
2057			(new_flags &~ IFF_CANTCHANGE);
2058		if (ifp->if_ioctl) {
2059			(void) (*ifp->if_ioctl)(ifp, cmd, data);
2060		}
2061		getmicrotime(&ifp->if_lastchange);
2062		break;
2063
2064	case SIOCSIFCAP:
2065		error = priv_check(td, PRIV_NET_SETIFCAP);
2066		if (error)
2067			return (error);
2068		if (ifp->if_ioctl == NULL)
2069			return (EOPNOTSUPP);
2070		if (ifr->ifr_reqcap & ~ifp->if_capabilities)
2071			return (EINVAL);
2072		error = (*ifp->if_ioctl)(ifp, cmd, data);
2073		if (error == 0)
2074			getmicrotime(&ifp->if_lastchange);
2075		break;
2076
2077#ifdef MAC
2078	case SIOCSIFMAC:
2079		error = mac_ifnet_ioctl_set(td->td_ucred, ifr, ifp);
2080		break;
2081#endif
2082
2083	case SIOCSIFNAME:
2084		error = priv_check(td, PRIV_NET_SETIFNAME);
2085		if (error)
2086			return (error);
2087		error = copyinstr(ifr->ifr_data, new_name, IFNAMSIZ, NULL);
2088		if (error != 0)
2089			return (error);
2090		if (new_name[0] == '\0')
2091			return (EINVAL);
2092		if (ifunit(new_name) != NULL)
2093			return (EEXIST);
2094
2095		/* Announce the departure of the interface. */
2096		rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
2097		EVENTHANDLER_INVOKE(ifnet_departure_event, ifp);
2098
2099		log(LOG_INFO, "%s: changing name to '%s'\n",
2100		    ifp->if_xname, new_name);
2101
2102		strlcpy(ifp->if_xname, new_name, sizeof(ifp->if_xname));
2103		ifa = ifp->if_addr;
2104		IFA_LOCK(ifa);
2105		sdl = (struct sockaddr_dl *)ifa->ifa_addr;
2106		namelen = strlen(new_name);
2107		onamelen = sdl->sdl_nlen;
2108		/*
2109		 * Move the address if needed.  This is safe because we
2110		 * allocate space for a name of length IFNAMSIZ when we
2111		 * create this in if_attach().
2112		 */
2113		if (namelen != onamelen) {
2114			bcopy(sdl->sdl_data + onamelen,
2115			    sdl->sdl_data + namelen, sdl->sdl_alen);
2116		}
2117		bcopy(new_name, sdl->sdl_data, namelen);
2118		sdl->sdl_nlen = namelen;
2119		sdl = (struct sockaddr_dl *)ifa->ifa_netmask;
2120		bzero(sdl->sdl_data, onamelen);
2121		while (namelen != 0)
2122			sdl->sdl_data[--namelen] = 0xff;
2123		IFA_UNLOCK(ifa);
2124
2125		EVENTHANDLER_INVOKE(ifnet_arrival_event, ifp);
2126		/* Announce the return of the interface. */
2127		rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
2128		break;
2129
2130#ifdef VIMAGE
2131	case SIOCSIFVNET:
2132		error = priv_check(td, PRIV_NET_SETIFVNET);
2133		if (error)
2134			return (error);
2135		error = vi_if_move(td, ifp, ifr->ifr_name, ifr->ifr_jid, NULL);
2136		break;
2137#endif
2138
2139	case SIOCSIFMETRIC:
2140		error = priv_check(td, PRIV_NET_SETIFMETRIC);
2141		if (error)
2142			return (error);
2143		ifp->if_metric = ifr->ifr_metric;
2144		getmicrotime(&ifp->if_lastchange);
2145		break;
2146
2147	case SIOCSIFPHYS:
2148		error = priv_check(td, PRIV_NET_SETIFPHYS);
2149		if (error)
2150			return (error);
2151		if (ifp->if_ioctl == NULL)
2152			return (EOPNOTSUPP);
2153		error = (*ifp->if_ioctl)(ifp, cmd, data);
2154		if (error == 0)
2155			getmicrotime(&ifp->if_lastchange);
2156		break;
2157
2158	case SIOCSIFMTU:
2159	{
2160		u_long oldmtu = ifp->if_mtu;
2161
2162		error = priv_check(td, PRIV_NET_SETIFMTU);
2163		if (error)
2164			return (error);
2165		if (ifr->ifr_mtu < IF_MINMTU || ifr->ifr_mtu > IF_MAXMTU)
2166			return (EINVAL);
2167		if (ifp->if_ioctl == NULL)
2168			return (EOPNOTSUPP);
2169		error = (*ifp->if_ioctl)(ifp, cmd, data);
2170		if (error == 0) {
2171			getmicrotime(&ifp->if_lastchange);
2172			rt_ifmsg(ifp);
2173		}
2174		/*
2175		 * If the link MTU changed, do network layer specific procedure.
2176		 */
2177		if (ifp->if_mtu != oldmtu) {
2178#ifdef INET6
2179			nd6_setmtu(ifp);
2180#endif
2181		}
2182		break;
2183	}
2184
2185	case SIOCADDMULTI:
2186	case SIOCDELMULTI:
2187		if (cmd == SIOCADDMULTI)
2188			error = priv_check(td, PRIV_NET_ADDMULTI);
2189		else
2190			error = priv_check(td, PRIV_NET_DELMULTI);
2191		if (error)
2192			return (error);
2193
2194		/* Don't allow group membership on non-multicast interfaces. */
2195		if ((ifp->if_flags & IFF_MULTICAST) == 0)
2196			return (EOPNOTSUPP);
2197
2198		/* Don't let users screw up protocols' entries. */
2199		if (ifr->ifr_addr.sa_family != AF_LINK)
2200			return (EINVAL);
2201
2202		if (cmd == SIOCADDMULTI) {
2203			struct ifmultiaddr *ifma;
2204
2205			/*
2206			 * Userland is only permitted to join groups once
2207			 * via the if_addmulti() KPI, because it cannot hold
2208			 * struct ifmultiaddr * between calls. It may also
2209			 * lose a race while we check if the membership
2210			 * already exists.
2211			 */
2212			IF_ADDR_LOCK(ifp);
2213			ifma = if_findmulti(ifp, &ifr->ifr_addr);
2214			IF_ADDR_UNLOCK(ifp);
2215			if (ifma != NULL)
2216				error = EADDRINUSE;
2217			else
2218				error = if_addmulti(ifp, &ifr->ifr_addr, &ifma);
2219		} else {
2220			error = if_delmulti(ifp, &ifr->ifr_addr);
2221		}
2222		if (error == 0)
2223			getmicrotime(&ifp->if_lastchange);
2224		break;
2225
2226	case SIOCSIFPHYADDR:
2227	case SIOCDIFPHYADDR:
2228#ifdef INET6
2229	case SIOCSIFPHYADDR_IN6:
2230#endif
2231	case SIOCSLIFPHYADDR:
2232	case SIOCSIFMEDIA:
2233	case SIOCSIFGENERIC:
2234		error = priv_check(td, PRIV_NET_HWIOCTL);
2235		if (error)
2236			return (error);
2237		if (ifp->if_ioctl == NULL)
2238			return (EOPNOTSUPP);
2239		error = (*ifp->if_ioctl)(ifp, cmd, data);
2240		if (error == 0)
2241			getmicrotime(&ifp->if_lastchange);
2242		break;
2243
2244	case SIOCGIFSTATUS:
2245		ifs = (struct ifstat *)data;
2246		ifs->ascii[0] = '\0';
2247
2248	case SIOCGIFPSRCADDR:
2249	case SIOCGIFPDSTADDR:
2250	case SIOCGLIFPHYADDR:
2251	case SIOCGIFMEDIA:
2252	case SIOCGIFGENERIC:
2253		if (ifp->if_ioctl == NULL)
2254			return (EOPNOTSUPP);
2255		error = (*ifp->if_ioctl)(ifp, cmd, data);
2256		break;
2257
2258	case SIOCSIFLLADDR:
2259		error = priv_check(td, PRIV_NET_SETLLADDR);
2260		if (error)
2261			return (error);
2262		error = if_setlladdr(ifp,
2263		    ifr->ifr_addr.sa_data, ifr->ifr_addr.sa_len);
2264		break;
2265
2266	case SIOCAIFGROUP:
2267	{
2268		struct ifgroupreq *ifgr = (struct ifgroupreq *)ifr;
2269
2270		error = priv_check(td, PRIV_NET_ADDIFGROUP);
2271		if (error)
2272			return (error);
2273		if ((error = if_addgroup(ifp, ifgr->ifgr_group)))
2274			return (error);
2275		break;
2276	}
2277
2278	case SIOCGIFGROUP:
2279		if ((error = if_getgroup((struct ifgroupreq *)ifr, ifp)))
2280			return (error);
2281		break;
2282
2283	case SIOCDIFGROUP:
2284	{
2285		struct ifgroupreq *ifgr = (struct ifgroupreq *)ifr;
2286
2287		error = priv_check(td, PRIV_NET_DELIFGROUP);
2288		if (error)
2289			return (error);
2290		if ((error = if_delgroup(ifp, ifgr->ifgr_group)))
2291			return (error);
2292		break;
2293	}
2294
2295	default:
2296		error = ENOIOCTL;
2297		break;
2298	}
2299	return (error);
2300}
2301
2302/*
2303 * Interface ioctls.
2304 */
2305int
2306ifioctl(struct socket *so, u_long cmd, caddr_t data, struct thread *td)
2307{
2308	struct ifnet *ifp;
2309	struct ifreq *ifr;
2310	int error;
2311	int oif_flags;
2312
2313	switch (cmd) {
2314	case SIOCGIFCONF:
2315	case OSIOCGIFCONF:
2316#ifdef __amd64__
2317	case SIOCGIFCONF32:
2318#endif
2319		return (ifconf(cmd, data));
2320	}
2321	ifr = (struct ifreq *)data;
2322
2323	switch (cmd) {
2324#ifdef VIMAGE
2325	case SIOCSIFRVNET:
2326		error = priv_check(td, PRIV_NET_SETIFVNET);
2327		if (error)
2328			return (error);
2329		return (vi_if_move(td, NULL, ifr->ifr_name, ifr->ifr_jid,
2330		    NULL));
2331	/*
2332	 * XXX vnet creation will be implemented through the new jail
2333	 * framework - this is just a temporary hack for testing the
2334	 * vnet create / destroy mechanisms.
2335	 */
2336	case SIOCSIFVIMAGE:
2337		error = vi_if_move(td, NULL, NULL, 0, (struct vi_req *) data);
2338		return (error);
2339	case SIOCSPVIMAGE:
2340	case SIOCGPVIMAGE:
2341		error = vi_td_ioctl(cmd, (struct vi_req *) data, td);
2342		return (error);
2343#endif
2344	case SIOCIFCREATE:
2345	case SIOCIFCREATE2:
2346		error = priv_check(td, PRIV_NET_IFCREATE);
2347		if (error)
2348			return (error);
2349		return (if_clone_create(ifr->ifr_name, sizeof(ifr->ifr_name),
2350			cmd == SIOCIFCREATE2 ? ifr->ifr_data : NULL));
2351	case SIOCIFDESTROY:
2352		error = priv_check(td, PRIV_NET_IFDESTROY);
2353		if (error)
2354			return (error);
2355		return if_clone_destroy(ifr->ifr_name);
2356
2357	case SIOCIFGCLONERS:
2358		return (if_clone_list((struct if_clonereq *)data));
2359	case SIOCGIFGMEMB:
2360		return (if_getgroupmembers((struct ifgroupreq *)data));
2361	}
2362
2363	ifp = ifunit_ref(ifr->ifr_name);
2364	if (ifp == NULL)
2365		return (ENXIO);
2366
2367	error = ifhwioctl(cmd, ifp, data, td);
2368	if (error != ENOIOCTL) {
2369		if_rele(ifp);
2370		return (error);
2371	}
2372
2373	oif_flags = ifp->if_flags;
2374	if (so->so_proto == NULL) {
2375		if_rele(ifp);
2376		return (EOPNOTSUPP);
2377	}
2378#ifndef COMPAT_43
2379	error = ((*so->so_proto->pr_usrreqs->pru_control)(so, cmd,
2380								 data,
2381								 ifp, td));
2382	if (error == EOPNOTSUPP && ifp != NULL && ifp->if_ioctl != NULL)
2383		error = (*ifp->if_ioctl)(ifp, cmd, data);
2384#else
2385	{
2386		int ocmd = cmd;
2387
2388		switch (cmd) {
2389
2390		case SIOCSIFDSTADDR:
2391		case SIOCSIFADDR:
2392		case SIOCSIFBRDADDR:
2393		case SIOCSIFNETMASK:
2394#if BYTE_ORDER != BIG_ENDIAN
2395			if (ifr->ifr_addr.sa_family == 0 &&
2396			    ifr->ifr_addr.sa_len < 16) {
2397				ifr->ifr_addr.sa_family = ifr->ifr_addr.sa_len;
2398				ifr->ifr_addr.sa_len = 16;
2399			}
2400#else
2401			if (ifr->ifr_addr.sa_len == 0)
2402				ifr->ifr_addr.sa_len = 16;
2403#endif
2404			break;
2405
2406		case OSIOCGIFADDR:
2407			cmd = SIOCGIFADDR;
2408			break;
2409
2410		case OSIOCGIFDSTADDR:
2411			cmd = SIOCGIFDSTADDR;
2412			break;
2413
2414		case OSIOCGIFBRDADDR:
2415			cmd = SIOCGIFBRDADDR;
2416			break;
2417
2418		case OSIOCGIFNETMASK:
2419			cmd = SIOCGIFNETMASK;
2420		}
2421		error =  ((*so->so_proto->pr_usrreqs->pru_control)(so,
2422								   cmd,
2423								   data,
2424								   ifp, td));
2425		if (error == EOPNOTSUPP && ifp != NULL &&
2426		    ifp->if_ioctl != NULL)
2427			error = (*ifp->if_ioctl)(ifp, cmd, data);
2428		switch (ocmd) {
2429
2430		case OSIOCGIFADDR:
2431		case OSIOCGIFDSTADDR:
2432		case OSIOCGIFBRDADDR:
2433		case OSIOCGIFNETMASK:
2434			*(u_short *)&ifr->ifr_addr = ifr->ifr_addr.sa_family;
2435
2436		}
2437	}
2438#endif /* COMPAT_43 */
2439
2440	if ((oif_flags ^ ifp->if_flags) & IFF_UP) {
2441#ifdef INET6
2442		DELAY(100);/* XXX: temporary workaround for fxp issue*/
2443		if (ifp->if_flags & IFF_UP) {
2444			int s = splimp();
2445			in6_if_up(ifp);
2446			splx(s);
2447		}
2448#endif
2449	}
2450	if_rele(ifp);
2451	return (error);
2452}
2453
2454/*
2455 * The code common to handling reference counted flags,
2456 * e.g., in ifpromisc() and if_allmulti().
2457 * The "pflag" argument can specify a permanent mode flag to check,
2458 * such as IFF_PPROMISC for promiscuous mode; should be 0 if none.
2459 *
2460 * Only to be used on stack-owned flags, not driver-owned flags.
2461 */
2462static int
2463if_setflag(struct ifnet *ifp, int flag, int pflag, int *refcount, int onswitch)
2464{
2465	struct ifreq ifr;
2466	int error;
2467	int oldflags, oldcount;
2468
2469	/* Sanity checks to catch programming errors */
2470	KASSERT((flag & (IFF_DRV_OACTIVE|IFF_DRV_RUNNING)) == 0,
2471	    ("%s: setting driver-owned flag %d", __func__, flag));
2472
2473	if (onswitch)
2474		KASSERT(*refcount >= 0,
2475		    ("%s: increment negative refcount %d for flag %d",
2476		    __func__, *refcount, flag));
2477	else
2478		KASSERT(*refcount > 0,
2479		    ("%s: decrement non-positive refcount %d for flag %d",
2480		    __func__, *refcount, flag));
2481
2482	/* In case this mode is permanent, just touch refcount */
2483	if (ifp->if_flags & pflag) {
2484		*refcount += onswitch ? 1 : -1;
2485		return (0);
2486	}
2487
2488	/* Save ifnet parameters for if_ioctl() may fail */
2489	oldcount = *refcount;
2490	oldflags = ifp->if_flags;
2491
2492	/*
2493	 * See if we aren't the only and touching refcount is enough.
2494	 * Actually toggle interface flag if we are the first or last.
2495	 */
2496	if (onswitch) {
2497		if ((*refcount)++)
2498			return (0);
2499		ifp->if_flags |= flag;
2500	} else {
2501		if (--(*refcount))
2502			return (0);
2503		ifp->if_flags &= ~flag;
2504	}
2505
2506	/* Call down the driver since we've changed interface flags */
2507	if (ifp->if_ioctl == NULL) {
2508		error = EOPNOTSUPP;
2509		goto recover;
2510	}
2511	ifr.ifr_flags = ifp->if_flags & 0xffff;
2512	ifr.ifr_flagshigh = ifp->if_flags >> 16;
2513	error = (*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
2514	if (error)
2515		goto recover;
2516	/* Notify userland that interface flags have changed */
2517	rt_ifmsg(ifp);
2518	return (0);
2519
2520recover:
2521	/* Recover after driver error */
2522	*refcount = oldcount;
2523	ifp->if_flags = oldflags;
2524	return (error);
2525}
2526
2527/*
2528 * Set/clear promiscuous mode on interface ifp based on the truth value
2529 * of pswitch.  The calls are reference counted so that only the first
2530 * "on" request actually has an effect, as does the final "off" request.
2531 * Results are undefined if the "off" and "on" requests are not matched.
2532 */
2533int
2534ifpromisc(struct ifnet *ifp, int pswitch)
2535{
2536	int error;
2537	int oldflags = ifp->if_flags;
2538
2539	error = if_setflag(ifp, IFF_PROMISC, IFF_PPROMISC,
2540			   &ifp->if_pcount, pswitch);
2541	/* If promiscuous mode status has changed, log a message */
2542	if (error == 0 && ((ifp->if_flags ^ oldflags) & IFF_PROMISC))
2543		log(LOG_INFO, "%s: promiscuous mode %s\n",
2544		    ifp->if_xname,
2545		    (ifp->if_flags & IFF_PROMISC) ? "enabled" : "disabled");
2546	return (error);
2547}
2548
2549/*
2550 * Return interface configuration
2551 * of system.  List may be used
2552 * in later ioctl's (above) to get
2553 * other information.
2554 */
2555/*ARGSUSED*/
2556static int
2557ifconf(u_long cmd, caddr_t data)
2558{
2559	INIT_VNET_NET(curvnet);
2560	struct ifconf *ifc = (struct ifconf *)data;
2561#ifdef __amd64__
2562	struct ifconf32 *ifc32 = (struct ifconf32 *)data;
2563	struct ifconf ifc_swab;
2564#endif
2565	struct ifnet *ifp;
2566	struct ifaddr *ifa;
2567	struct ifreq ifr;
2568	struct sbuf *sb;
2569	int error, full = 0, valid_len, max_len;
2570
2571#ifdef __amd64__
2572	if (cmd == SIOCGIFCONF32) {
2573		ifc_swab.ifc_len = ifc32->ifc_len;
2574		ifc_swab.ifc_buf = (caddr_t)(uintptr_t)ifc32->ifc_buf;
2575		ifc = &ifc_swab;
2576	}
2577#endif
2578	/* Limit initial buffer size to MAXPHYS to avoid DoS from userspace. */
2579	max_len = MAXPHYS - 1;
2580
2581	/* Prevent hostile input from being able to crash the system */
2582	if (ifc->ifc_len <= 0)
2583		return (EINVAL);
2584
2585again:
2586	if (ifc->ifc_len <= max_len) {
2587		max_len = ifc->ifc_len;
2588		full = 1;
2589	}
2590	sb = sbuf_new(NULL, NULL, max_len + 1, SBUF_FIXEDLEN);
2591	max_len = 0;
2592	valid_len = 0;
2593
2594	IFNET_RLOCK();		/* could sleep XXX */
2595	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
2596		int addrs;
2597
2598		/*
2599		 * Zero the ifr_name buffer to make sure we don't
2600		 * disclose the contents of the stack.
2601		 */
2602		memset(ifr.ifr_name, 0, sizeof(ifr.ifr_name));
2603
2604		if (strlcpy(ifr.ifr_name, ifp->if_xname, sizeof(ifr.ifr_name))
2605		    >= sizeof(ifr.ifr_name)) {
2606			sbuf_delete(sb);
2607			IFNET_RUNLOCK();
2608			return (ENAMETOOLONG);
2609		}
2610
2611		addrs = 0;
2612		IF_ADDR_LOCK(ifp);
2613		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
2614			struct sockaddr *sa = ifa->ifa_addr;
2615
2616			if (prison_if(curthread->td_ucred, sa) != 0)
2617				continue;
2618			addrs++;
2619#ifdef COMPAT_43
2620			if (cmd == OSIOCGIFCONF) {
2621				struct osockaddr *osa =
2622					 (struct osockaddr *)&ifr.ifr_addr;
2623				ifr.ifr_addr = *sa;
2624				osa->sa_family = sa->sa_family;
2625				sbuf_bcat(sb, &ifr, sizeof(ifr));
2626				max_len += sizeof(ifr);
2627			} else
2628#endif
2629			if (sa->sa_len <= sizeof(*sa)) {
2630				ifr.ifr_addr = *sa;
2631				sbuf_bcat(sb, &ifr, sizeof(ifr));
2632				max_len += sizeof(ifr);
2633			} else {
2634				sbuf_bcat(sb, &ifr,
2635				    offsetof(struct ifreq, ifr_addr));
2636				max_len += offsetof(struct ifreq, ifr_addr);
2637				sbuf_bcat(sb, sa, sa->sa_len);
2638				max_len += sa->sa_len;
2639			}
2640
2641			if (!sbuf_overflowed(sb))
2642				valid_len = sbuf_len(sb);
2643		}
2644		IF_ADDR_UNLOCK(ifp);
2645		if (addrs == 0) {
2646			bzero((caddr_t)&ifr.ifr_addr, sizeof(ifr.ifr_addr));
2647			sbuf_bcat(sb, &ifr, sizeof(ifr));
2648			max_len += sizeof(ifr);
2649
2650			if (!sbuf_overflowed(sb))
2651				valid_len = sbuf_len(sb);
2652		}
2653	}
2654	IFNET_RUNLOCK();
2655
2656	/*
2657	 * If we didn't allocate enough space (uncommon), try again.  If
2658	 * we have already allocated as much space as we are allowed,
2659	 * return what we've got.
2660	 */
2661	if (valid_len != max_len && !full) {
2662		sbuf_delete(sb);
2663		goto again;
2664	}
2665
2666	ifc->ifc_len = valid_len;
2667#ifdef __amd64__
2668	if (cmd == SIOCGIFCONF32)
2669		ifc32->ifc_len = valid_len;
2670#endif
2671	sbuf_finish(sb);
2672	error = copyout(sbuf_data(sb), ifc->ifc_req, ifc->ifc_len);
2673	sbuf_delete(sb);
2674	return (error);
2675}
2676
2677/*
2678 * Just like ifpromisc(), but for all-multicast-reception mode.
2679 */
2680int
2681if_allmulti(struct ifnet *ifp, int onswitch)
2682{
2683
2684	return (if_setflag(ifp, IFF_ALLMULTI, 0, &ifp->if_amcount, onswitch));
2685}
2686
2687struct ifmultiaddr *
2688if_findmulti(struct ifnet *ifp, struct sockaddr *sa)
2689{
2690	struct ifmultiaddr *ifma;
2691
2692	IF_ADDR_LOCK_ASSERT(ifp);
2693
2694	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2695		if (sa->sa_family == AF_LINK) {
2696			if (sa_dl_equal(ifma->ifma_addr, sa))
2697				break;
2698		} else {
2699			if (sa_equal(ifma->ifma_addr, sa))
2700				break;
2701		}
2702	}
2703
2704	return ifma;
2705}
2706
2707/*
2708 * Allocate a new ifmultiaddr and initialize based on passed arguments.  We
2709 * make copies of passed sockaddrs.  The ifmultiaddr will not be added to
2710 * the ifnet multicast address list here, so the caller must do that and
2711 * other setup work (such as notifying the device driver).  The reference
2712 * count is initialized to 1.
2713 */
2714static struct ifmultiaddr *
2715if_allocmulti(struct ifnet *ifp, struct sockaddr *sa, struct sockaddr *llsa,
2716    int mflags)
2717{
2718	struct ifmultiaddr *ifma;
2719	struct sockaddr *dupsa;
2720
2721	ifma = malloc(sizeof *ifma, M_IFMADDR, mflags |
2722	    M_ZERO);
2723	if (ifma == NULL)
2724		return (NULL);
2725
2726	dupsa = malloc(sa->sa_len, M_IFMADDR, mflags);
2727	if (dupsa == NULL) {
2728		free(ifma, M_IFMADDR);
2729		return (NULL);
2730	}
2731	bcopy(sa, dupsa, sa->sa_len);
2732	ifma->ifma_addr = dupsa;
2733
2734	ifma->ifma_ifp = ifp;
2735	ifma->ifma_refcount = 1;
2736	ifma->ifma_protospec = NULL;
2737
2738	if (llsa == NULL) {
2739		ifma->ifma_lladdr = NULL;
2740		return (ifma);
2741	}
2742
2743	dupsa = malloc(llsa->sa_len, M_IFMADDR, mflags);
2744	if (dupsa == NULL) {
2745		free(ifma->ifma_addr, M_IFMADDR);
2746		free(ifma, M_IFMADDR);
2747		return (NULL);
2748	}
2749	bcopy(llsa, dupsa, llsa->sa_len);
2750	ifma->ifma_lladdr = dupsa;
2751
2752	return (ifma);
2753}
2754
2755/*
2756 * if_freemulti: free ifmultiaddr structure and possibly attached related
2757 * addresses.  The caller is responsible for implementing reference
2758 * counting, notifying the driver, handling routing messages, and releasing
2759 * any dependent link layer state.
2760 */
2761static void
2762if_freemulti(struct ifmultiaddr *ifma)
2763{
2764
2765	KASSERT(ifma->ifma_refcount == 0, ("if_freemulti: refcount %d",
2766	    ifma->ifma_refcount));
2767	KASSERT(ifma->ifma_protospec == NULL,
2768	    ("if_freemulti: protospec not NULL"));
2769
2770	if (ifma->ifma_lladdr != NULL)
2771		free(ifma->ifma_lladdr, M_IFMADDR);
2772	free(ifma->ifma_addr, M_IFMADDR);
2773	free(ifma, M_IFMADDR);
2774}
2775
2776/*
2777 * Register an additional multicast address with a network interface.
2778 *
2779 * - If the address is already present, bump the reference count on the
2780 *   address and return.
2781 * - If the address is not link-layer, look up a link layer address.
2782 * - Allocate address structures for one or both addresses, and attach to the
2783 *   multicast address list on the interface.  If automatically adding a link
2784 *   layer address, the protocol address will own a reference to the link
2785 *   layer address, to be freed when it is freed.
2786 * - Notify the network device driver of an addition to the multicast address
2787 *   list.
2788 *
2789 * 'sa' points to caller-owned memory with the desired multicast address.
2790 *
2791 * 'retifma' will be used to return a pointer to the resulting multicast
2792 * address reference, if desired.
2793 */
2794int
2795if_addmulti(struct ifnet *ifp, struct sockaddr *sa,
2796    struct ifmultiaddr **retifma)
2797{
2798	struct ifmultiaddr *ifma, *ll_ifma;
2799	struct sockaddr *llsa;
2800	int error;
2801
2802	/*
2803	 * If the address is already present, return a new reference to it;
2804	 * otherwise, allocate storage and set up a new address.
2805	 */
2806	IF_ADDR_LOCK(ifp);
2807	ifma = if_findmulti(ifp, sa);
2808	if (ifma != NULL) {
2809		ifma->ifma_refcount++;
2810		if (retifma != NULL)
2811			*retifma = ifma;
2812		IF_ADDR_UNLOCK(ifp);
2813		return (0);
2814	}
2815
2816	/*
2817	 * The address isn't already present; resolve the protocol address
2818	 * into a link layer address, and then look that up, bump its
2819	 * refcount or allocate an ifma for that also.  If 'llsa' was
2820	 * returned, we will need to free it later.
2821	 */
2822	llsa = NULL;
2823	ll_ifma = NULL;
2824	if (ifp->if_resolvemulti != NULL) {
2825		error = ifp->if_resolvemulti(ifp, &llsa, sa);
2826		if (error)
2827			goto unlock_out;
2828	}
2829
2830	/*
2831	 * Allocate the new address.  Don't hook it up yet, as we may also
2832	 * need to allocate a link layer multicast address.
2833	 */
2834	ifma = if_allocmulti(ifp, sa, llsa, M_NOWAIT);
2835	if (ifma == NULL) {
2836		error = ENOMEM;
2837		goto free_llsa_out;
2838	}
2839
2840	/*
2841	 * If a link layer address is found, we'll need to see if it's
2842	 * already present in the address list, or allocate is as well.
2843	 * When this block finishes, the link layer address will be on the
2844	 * list.
2845	 */
2846	if (llsa != NULL) {
2847		ll_ifma = if_findmulti(ifp, llsa);
2848		if (ll_ifma == NULL) {
2849			ll_ifma = if_allocmulti(ifp, llsa, NULL, M_NOWAIT);
2850			if (ll_ifma == NULL) {
2851				--ifma->ifma_refcount;
2852				if_freemulti(ifma);
2853				error = ENOMEM;
2854				goto free_llsa_out;
2855			}
2856			TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ll_ifma,
2857			    ifma_link);
2858		} else
2859			ll_ifma->ifma_refcount++;
2860		ifma->ifma_llifma = ll_ifma;
2861	}
2862
2863	/*
2864	 * We now have a new multicast address, ifma, and possibly a new or
2865	 * referenced link layer address.  Add the primary address to the
2866	 * ifnet address list.
2867	 */
2868	TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link);
2869
2870	if (retifma != NULL)
2871		*retifma = ifma;
2872
2873	/*
2874	 * Must generate the message while holding the lock so that 'ifma'
2875	 * pointer is still valid.
2876	 */
2877	rt_newmaddrmsg(RTM_NEWMADDR, ifma);
2878	IF_ADDR_UNLOCK(ifp);
2879
2880	/*
2881	 * We are certain we have added something, so call down to the
2882	 * interface to let them know about it.
2883	 */
2884	if (ifp->if_ioctl != NULL) {
2885		(void) (*ifp->if_ioctl)(ifp, SIOCADDMULTI, 0);
2886	}
2887
2888	if (llsa != NULL)
2889		free(llsa, M_IFMADDR);
2890
2891	return (0);
2892
2893free_llsa_out:
2894	if (llsa != NULL)
2895		free(llsa, M_IFMADDR);
2896
2897unlock_out:
2898	IF_ADDR_UNLOCK(ifp);
2899	return (error);
2900}
2901
2902/*
2903 * Delete a multicast group membership by network-layer group address.
2904 *
2905 * Returns ENOENT if the entry could not be found. If ifp no longer
2906 * exists, results are undefined. This entry point should only be used
2907 * from subsystems which do appropriate locking to hold ifp for the
2908 * duration of the call.
2909 * Network-layer protocol domains must use if_delmulti_ifma().
2910 */
2911int
2912if_delmulti(struct ifnet *ifp, struct sockaddr *sa)
2913{
2914	struct ifmultiaddr *ifma;
2915	int lastref;
2916#ifdef INVARIANTS
2917	struct ifnet *oifp;
2918	INIT_VNET_NET(ifp->if_vnet);
2919
2920	IFNET_RLOCK();
2921	TAILQ_FOREACH(oifp, &V_ifnet, if_link)
2922		if (ifp == oifp)
2923			break;
2924	if (ifp != oifp)
2925		ifp = NULL;
2926	IFNET_RUNLOCK();
2927
2928	KASSERT(ifp != NULL, ("%s: ifnet went away", __func__));
2929#endif
2930	if (ifp == NULL)
2931		return (ENOENT);
2932
2933	IF_ADDR_LOCK(ifp);
2934	lastref = 0;
2935	ifma = if_findmulti(ifp, sa);
2936	if (ifma != NULL)
2937		lastref = if_delmulti_locked(ifp, ifma, 0);
2938	IF_ADDR_UNLOCK(ifp);
2939
2940	if (ifma == NULL)
2941		return (ENOENT);
2942
2943	if (lastref && ifp->if_ioctl != NULL) {
2944		(void)(*ifp->if_ioctl)(ifp, SIOCDELMULTI, 0);
2945	}
2946
2947	return (0);
2948}
2949
2950/*
2951 * Delete a multicast group membership by group membership pointer.
2952 * Network-layer protocol domains must use this routine.
2953 *
2954 * It is safe to call this routine if the ifp disappeared.
2955 */
2956void
2957if_delmulti_ifma(struct ifmultiaddr *ifma)
2958{
2959#ifdef DIAGNOSTIC
2960	INIT_VNET_NET(curvnet);
2961#endif
2962	struct ifnet *ifp;
2963	int lastref;
2964
2965	ifp = ifma->ifma_ifp;
2966#ifdef DIAGNOSTIC
2967	if (ifp == NULL) {
2968		printf("%s: ifma_ifp seems to be detached\n", __func__);
2969	} else {
2970		struct ifnet *oifp;
2971
2972		IFNET_RLOCK();
2973		TAILQ_FOREACH(oifp, &V_ifnet, if_link)
2974			if (ifp == oifp)
2975				break;
2976		if (ifp != oifp) {
2977			printf("%s: ifnet %p disappeared\n", __func__, ifp);
2978			ifp = NULL;
2979		}
2980		IFNET_RUNLOCK();
2981	}
2982#endif
2983	/*
2984	 * If and only if the ifnet instance exists: Acquire the address lock.
2985	 */
2986	if (ifp != NULL)
2987		IF_ADDR_LOCK(ifp);
2988
2989	lastref = if_delmulti_locked(ifp, ifma, 0);
2990
2991	if (ifp != NULL) {
2992		/*
2993		 * If and only if the ifnet instance exists:
2994		 *  Release the address lock.
2995		 *  If the group was left: update the hardware hash filter.
2996		 */
2997		IF_ADDR_UNLOCK(ifp);
2998		if (lastref && ifp->if_ioctl != NULL) {
2999			(void)(*ifp->if_ioctl)(ifp, SIOCDELMULTI, 0);
3000		}
3001	}
3002}
3003
3004/*
3005 * Perform deletion of network-layer and/or link-layer multicast address.
3006 *
3007 * Return 0 if the reference count was decremented.
3008 * Return 1 if the final reference was released, indicating that the
3009 * hardware hash filter should be reprogrammed.
3010 */
3011static int
3012if_delmulti_locked(struct ifnet *ifp, struct ifmultiaddr *ifma, int detaching)
3013{
3014	struct ifmultiaddr *ll_ifma;
3015
3016	if (ifp != NULL && ifma->ifma_ifp != NULL) {
3017		KASSERT(ifma->ifma_ifp == ifp,
3018		    ("%s: inconsistent ifp %p", __func__, ifp));
3019		IF_ADDR_LOCK_ASSERT(ifp);
3020	}
3021
3022	ifp = ifma->ifma_ifp;
3023
3024	/*
3025	 * If the ifnet is detaching, null out references to ifnet,
3026	 * so that upper protocol layers will notice, and not attempt
3027	 * to obtain locks for an ifnet which no longer exists. The
3028	 * routing socket announcement must happen before the ifnet
3029	 * instance is detached from the system.
3030	 */
3031	if (detaching) {
3032#ifdef DIAGNOSTIC
3033		printf("%s: detaching ifnet instance %p\n", __func__, ifp);
3034#endif
3035		/*
3036		 * ifp may already be nulled out if we are being reentered
3037		 * to delete the ll_ifma.
3038		 */
3039		if (ifp != NULL) {
3040			rt_newmaddrmsg(RTM_DELMADDR, ifma);
3041			ifma->ifma_ifp = NULL;
3042		}
3043	}
3044
3045	if (--ifma->ifma_refcount > 0)
3046		return 0;
3047
3048	/*
3049	 * If this ifma is a network-layer ifma, a link-layer ifma may
3050	 * have been associated with it. Release it first if so.
3051	 */
3052	ll_ifma = ifma->ifma_llifma;
3053	if (ll_ifma != NULL) {
3054		KASSERT(ifma->ifma_lladdr != NULL,
3055		    ("%s: llifma w/o lladdr", __func__));
3056		if (detaching)
3057			ll_ifma->ifma_ifp = NULL;	/* XXX */
3058		if (--ll_ifma->ifma_refcount == 0) {
3059			if (ifp != NULL) {
3060				TAILQ_REMOVE(&ifp->if_multiaddrs, ll_ifma,
3061				    ifma_link);
3062			}
3063			if_freemulti(ll_ifma);
3064		}
3065	}
3066
3067	if (ifp != NULL)
3068		TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link);
3069
3070	if_freemulti(ifma);
3071
3072	/*
3073	 * The last reference to this instance of struct ifmultiaddr
3074	 * was released; the hardware should be notified of this change.
3075	 */
3076	return 1;
3077}
3078
3079/*
3080 * Set the link layer address on an interface.
3081 *
3082 * At this time we only support certain types of interfaces,
3083 * and we don't allow the length of the address to change.
3084 */
3085int
3086if_setlladdr(struct ifnet *ifp, const u_char *lladdr, int len)
3087{
3088	struct sockaddr_dl *sdl;
3089	struct ifaddr *ifa;
3090	struct ifreq ifr;
3091
3092	ifa = ifp->if_addr;
3093	if (ifa == NULL)
3094		return (EINVAL);
3095	sdl = (struct sockaddr_dl *)ifa->ifa_addr;
3096	if (sdl == NULL)
3097		return (EINVAL);
3098	if (len != sdl->sdl_alen)	/* don't allow length to change */
3099		return (EINVAL);
3100	switch (ifp->if_type) {
3101	case IFT_ETHER:
3102	case IFT_FDDI:
3103	case IFT_XETHER:
3104	case IFT_ISO88025:
3105	case IFT_L2VLAN:
3106	case IFT_BRIDGE:
3107	case IFT_ARCNET:
3108	case IFT_IEEE8023ADLAG:
3109	case IFT_IEEE80211:
3110		bcopy(lladdr, LLADDR(sdl), len);
3111		break;
3112	default:
3113		return (ENODEV);
3114	}
3115	/*
3116	 * If the interface is already up, we need
3117	 * to re-init it in order to reprogram its
3118	 * address filter.
3119	 */
3120	if ((ifp->if_flags & IFF_UP) != 0) {
3121		if (ifp->if_ioctl) {
3122			ifp->if_flags &= ~IFF_UP;
3123			ifr.ifr_flags = ifp->if_flags & 0xffff;
3124			ifr.ifr_flagshigh = ifp->if_flags >> 16;
3125			(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
3126			ifp->if_flags |= IFF_UP;
3127			ifr.ifr_flags = ifp->if_flags & 0xffff;
3128			ifr.ifr_flagshigh = ifp->if_flags >> 16;
3129			(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
3130		}
3131#ifdef INET
3132		/*
3133		 * Also send gratuitous ARPs to notify other nodes about
3134		 * the address change.
3135		 */
3136		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
3137			if (ifa->ifa_addr->sa_family == AF_INET)
3138				arp_ifinit(ifp, ifa);
3139		}
3140#endif
3141	}
3142	return (0);
3143}
3144
3145/*
3146 * The name argument must be a pointer to storage which will last as
3147 * long as the interface does.  For physical devices, the result of
3148 * device_get_name(dev) is a good choice and for pseudo-devices a
3149 * static string works well.
3150 */
3151void
3152if_initname(struct ifnet *ifp, const char *name, int unit)
3153{
3154	ifp->if_dname = name;
3155	ifp->if_dunit = unit;
3156	if (unit != IF_DUNIT_NONE)
3157		snprintf(ifp->if_xname, IFNAMSIZ, "%s%d", name, unit);
3158	else
3159		strlcpy(ifp->if_xname, name, IFNAMSIZ);
3160}
3161
3162int
3163if_printf(struct ifnet *ifp, const char * fmt, ...)
3164{
3165	va_list ap;
3166	int retval;
3167
3168	retval = printf("%s: ", ifp->if_xname);
3169	va_start(ap, fmt);
3170	retval += vprintf(fmt, ap);
3171	va_end(ap);
3172	return (retval);
3173}
3174
3175void
3176if_start(struct ifnet *ifp)
3177{
3178
3179	(*(ifp)->if_start)(ifp);
3180}
3181
3182/*
3183 * Backwards compatibility interface for drivers
3184 * that have not implemented it
3185 */
3186static int
3187if_transmit(struct ifnet *ifp, struct mbuf *m)
3188{
3189	int error;
3190
3191	IFQ_HANDOFF(ifp, m, error);
3192	return (error);
3193}
3194
3195int
3196if_handoff(struct ifqueue *ifq, struct mbuf *m, struct ifnet *ifp, int adjust)
3197{
3198	int active = 0;
3199
3200	IF_LOCK(ifq);
3201	if (_IF_QFULL(ifq)) {
3202		_IF_DROP(ifq);
3203		IF_UNLOCK(ifq);
3204		m_freem(m);
3205		return (0);
3206	}
3207	if (ifp != NULL) {
3208		ifp->if_obytes += m->m_pkthdr.len + adjust;
3209		if (m->m_flags & (M_BCAST|M_MCAST))
3210			ifp->if_omcasts++;
3211		active = ifp->if_drv_flags & IFF_DRV_OACTIVE;
3212	}
3213	_IF_ENQUEUE(ifq, m);
3214	IF_UNLOCK(ifq);
3215	if (ifp != NULL && !active)
3216		(*(ifp)->if_start)(ifp);
3217	return (1);
3218}
3219
3220void
3221if_register_com_alloc(u_char type,
3222    if_com_alloc_t *a, if_com_free_t *f)
3223{
3224
3225	KASSERT(if_com_alloc[type] == NULL,
3226	    ("if_register_com_alloc: %d already registered", type));
3227	KASSERT(if_com_free[type] == NULL,
3228	    ("if_register_com_alloc: %d free already registered", type));
3229
3230	if_com_alloc[type] = a;
3231	if_com_free[type] = f;
3232}
3233
3234void
3235if_deregister_com_alloc(u_char type)
3236{
3237
3238	KASSERT(if_com_alloc[type] != NULL,
3239	    ("if_deregister_com_alloc: %d not registered", type));
3240	KASSERT(if_com_free[type] != NULL,
3241	    ("if_deregister_com_alloc: %d free not registered", type));
3242	if_com_alloc[type] = NULL;
3243	if_com_free[type] = NULL;
3244}
3245