if.c revision 134399
1/*
2 * Copyright (c) 1980, 1986, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 4. Neither the name of the University nor the names of its contributors
14 *    may be used to endorse or promote products derived from this software
15 *    without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 *	@(#)if.c	8.5 (Berkeley) 1/9/95
30 * $FreeBSD: head/sys/net/if.c 134399 2004-08-27 19:42:40Z brooks $
31 */
32
33#include "opt_compat.h"
34#include "opt_inet6.h"
35#include "opt_inet.h"
36#include "opt_mac.h"
37
38#include <sys/param.h>
39#include <sys/conf.h>
40#include <sys/mac.h>
41#include <sys/malloc.h>
42#include <sys/bus.h>
43#include <sys/mbuf.h>
44#include <sys/systm.h>
45#include <sys/proc.h>
46#include <sys/socket.h>
47#include <sys/socketvar.h>
48#include <sys/protosw.h>
49#include <sys/kernel.h>
50#include <sys/sockio.h>
51#include <sys/syslog.h>
52#include <sys/sysctl.h>
53#include <sys/taskqueue.h>
54#include <sys/domain.h>
55#include <sys/jail.h>
56#include <machine/stdarg.h>
57
58#include <net/if.h>
59#include <net/if_arp.h>
60#include <net/if_clone.h>
61#include <net/if_dl.h>
62#include <net/if_types.h>
63#include <net/if_var.h>
64#include <net/radix.h>
65#include <net/route.h>
66
67#if defined(INET) || defined(INET6)
68/*XXX*/
69#include <netinet/in.h>
70#include <netinet/in_var.h>
71#ifdef INET6
72#include <netinet6/in6_var.h>
73#include <netinet6/in6_ifattach.h>
74#endif
75#endif
76#ifdef INET
77#include <netinet/if_ether.h>
78#endif
79
80struct mbuf *(*tbr_dequeue_ptr)(struct ifaltq *, int) = NULL;
81
82static void	if_attachdomain(void *);
83static void	if_attachdomain1(struct ifnet *);
84static int	ifconf(u_long, caddr_t);
85static void	if_grow(void);
86static void	if_init(void *);
87static void	if_check(void *);
88static int	if_findindex(struct ifnet *);
89static void	if_qflush(struct ifaltq *);
90static void	if_route(struct ifnet *, int flag, int fam);
91static void	if_slowtimo(void *);
92static void	if_unroute(struct ifnet *, int flag, int fam);
93static void	link_rtrequest(int, struct rtentry *, struct rt_addrinfo *);
94static int	if_rtdel(struct radix_node *, void *);
95static int	ifhwioctl(u_long, struct ifnet *, caddr_t, struct thread *);
96static void	if_start_deferred(void *context, int pending);
97#ifdef INET6
98/*
99 * XXX: declare here to avoid to include many inet6 related files..
100 * should be more generalized?
101 */
102extern void	nd6_setmtu(struct ifnet *);
103#endif
104
105int	if_index = 0;
106struct	ifindex_entry *ifindex_table = NULL;
107int	ifqmaxlen = IFQ_MAXLEN;
108struct	ifnethead ifnet;	/* depend on static init XXX */
109struct	mtx ifnet_lock;
110
111static int	if_indexlim = 8;
112static struct	knlist ifklist;
113
114static void	filt_netdetach(struct knote *kn);
115static int	filt_netdev(struct knote *kn, long hint);
116
117static struct filterops netdev_filtops =
118    { 1, NULL, filt_netdetach, filt_netdev };
119
120/*
121 * System initialization
122 */
123SYSINIT(interfaces, SI_SUB_INIT_IF, SI_ORDER_FIRST, if_init, NULL)
124SYSINIT(interface_check, SI_SUB_PROTO_IF, SI_ORDER_FIRST, if_check, NULL)
125
126MALLOC_DEFINE(M_IFADDR, "ifaddr", "interface address");
127MALLOC_DEFINE(M_IFMADDR, "ether_multi", "link-level multicast address");
128
129static d_open_t		netopen;
130static d_close_t	netclose;
131static d_ioctl_t	netioctl;
132static d_kqfilter_t	netkqfilter;
133
134static struct cdevsw net_cdevsw = {
135	.d_version =	D_VERSION,
136	.d_flags =	D_NEEDGIANT,
137	.d_open =	netopen,
138	.d_close =	netclose,
139	.d_ioctl =	netioctl,
140	.d_name =	"net",
141	.d_kqfilter =	netkqfilter,
142};
143
144static int
145netopen(struct cdev *dev, int flag, int mode, struct thread *td)
146{
147	return (0);
148}
149
150static int
151netclose(struct cdev *dev, int flags, int fmt, struct thread *td)
152{
153	return (0);
154}
155
156static int
157netioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread *td)
158{
159	struct ifnet *ifp;
160	int error, idx;
161
162	/* only support interface specific ioctls */
163	if (IOCGROUP(cmd) != 'i')
164		return (EOPNOTSUPP);
165	idx = minor(dev);
166	if (idx == 0) {
167		/*
168		 * special network device, not interface.
169		 */
170		if (cmd == SIOCGIFCONF)
171			return (ifconf(cmd, data));	/* XXX remove cmd */
172		return (EOPNOTSUPP);
173	}
174
175	ifp = ifnet_byindex(idx);
176	if (ifp == NULL)
177		return (ENXIO);
178
179	error = ifhwioctl(cmd, ifp, data, td);
180	if (error == ENOIOCTL)
181		error = EOPNOTSUPP;
182	return (error);
183}
184
185static int
186netkqfilter(struct cdev *dev, struct knote *kn)
187{
188	struct knlist *klist;
189	struct ifnet *ifp;
190	int idx;
191
192	switch (kn->kn_filter) {
193	case EVFILT_NETDEV:
194		kn->kn_fop = &netdev_filtops;
195		break;
196	default:
197		return (1);
198	}
199
200	idx = minor(dev);
201	if (idx == 0) {
202		klist = &ifklist;
203	} else {
204		ifp = ifnet_byindex(idx);
205		if (ifp == NULL)
206			return (1);
207		klist = &ifp->if_klist;
208	}
209
210	kn->kn_hook = (caddr_t)klist;
211
212	knlist_add(klist, kn, 0);
213
214	return (0);
215}
216
217static void
218filt_netdetach(struct knote *kn)
219{
220	struct knlist *klist = (struct knlist *)kn->kn_hook;
221
222	if (kn->kn_status & KN_DETACHED)
223		return;
224
225	knlist_remove(klist, kn, 0);
226}
227
228static int
229filt_netdev(struct knote *kn, long hint)
230{
231	struct knlist *klist = (struct knlist *)kn->kn_hook;
232
233	/*
234	 * Currently NOTE_EXIT is abused to indicate device detach.
235	 */
236	if (hint == NOTE_EXIT) {
237		kn->kn_data = NOTE_LINKINV;
238		kn->kn_flags |= (EV_EOF | EV_ONESHOT);
239		knlist_remove_inevent(klist, kn);
240		return (1);
241	}
242	if (hint != 0)
243		kn->kn_data = hint;			/* current status */
244	if (kn->kn_sfflags & hint)
245		kn->kn_fflags |= hint;
246	return (kn->kn_fflags != 0);
247}
248
249/*
250 * Network interface utility routines.
251 *
252 * Routines with ifa_ifwith* names take sockaddr *'s as
253 * parameters.
254 */
255/* ARGSUSED*/
256static void
257if_init(void *dummy __unused)
258{
259
260	IFNET_LOCK_INIT();
261	TAILQ_INIT(&ifnet);
262	knlist_init(&ifklist, NULL);
263	if_grow();				/* create initial table */
264	ifdev_byindex(0) = make_dev(&net_cdevsw, 0,
265	    UID_ROOT, GID_WHEEL, 0600, "network");
266	if_clone_init();
267}
268
269static void
270if_grow(void)
271{
272	u_int n;
273	struct ifindex_entry *e;
274
275	if_indexlim <<= 1;
276	n = if_indexlim * sizeof(*e);
277	e = malloc(n, M_IFADDR, M_WAITOK | M_ZERO);
278	if (ifindex_table != NULL) {
279		memcpy((caddr_t)e, (caddr_t)ifindex_table, n/2);
280		free((caddr_t)ifindex_table, M_IFADDR);
281	}
282	ifindex_table = e;
283}
284
285/* ARGSUSED*/
286static void
287if_check(void *dummy __unused)
288{
289	struct ifnet *ifp;
290	int s;
291
292	s = splimp();
293	IFNET_RLOCK();	/* could sleep on rare error; mostly okay XXX */
294	TAILQ_FOREACH(ifp, &ifnet, if_link) {
295		if (ifp->if_snd.ifq_maxlen == 0) {
296			if_printf(ifp, "XXX: driver didn't set ifq_maxlen\n");
297			ifp->if_snd.ifq_maxlen = ifqmaxlen;
298		}
299		if (!mtx_initialized(&ifp->if_snd.ifq_mtx)) {
300			if_printf(ifp,
301			    "XXX: driver didn't initialize queue mtx\n");
302			mtx_init(&ifp->if_snd.ifq_mtx, "unknown",
303			    MTX_NETWORK_LOCK, MTX_DEF);
304		}
305	}
306	IFNET_RUNLOCK();
307	splx(s);
308	if_slowtimo(0);
309}
310
311static int
312if_findindex(struct ifnet *ifp)
313{
314	int i, unit;
315	char eaddr[18], devname[32];
316	const char *name, *p;
317
318	switch (ifp->if_type) {
319	case IFT_ETHER:			/* these types use struct arpcom */
320	case IFT_FDDI:
321	case IFT_XETHER:
322	case IFT_ISO88025:
323	case IFT_L2VLAN:
324		snprintf(eaddr, 18, "%6D", IFP2AC(ifp)->ac_enaddr, ":");
325		break;
326	default:
327		eaddr[0] = '\0';
328		break;
329	}
330	strlcpy(devname, ifp->if_xname, sizeof(devname));
331	name = net_cdevsw.d_name;
332	i = 0;
333	while ((resource_find_dev(&i, name, &unit, NULL, NULL)) == 0) {
334		if (resource_string_value(name, unit, "ether", &p) == 0)
335			if (strcmp(p, eaddr) == 0)
336				goto found;
337		if (resource_string_value(name, unit, "dev", &p) == 0)
338			if (strcmp(p, devname) == 0)
339				goto found;
340	}
341	unit = 0;
342found:
343	if (unit != 0) {
344		if (ifaddr_byindex(unit) == NULL)
345			return (unit);
346		printf("%s%d in use, cannot hardwire it to %s.\n",
347		    name, unit, devname);
348	}
349	for (unit = 1; ; unit++) {
350		if (unit <= if_index && ifaddr_byindex(unit) != NULL)
351			continue;
352		if (resource_string_value(name, unit, "ether", &p) == 0 ||
353		    resource_string_value(name, unit, "dev", &p) == 0)
354			continue;
355		break;
356	}
357	return (unit);
358}
359
360/*
361 * Attach an interface to the
362 * list of "active" interfaces.
363 */
364void
365if_attach(struct ifnet *ifp)
366{
367	unsigned socksize, ifasize;
368	int namelen, masklen;
369	struct sockaddr_dl *sdl;
370	struct ifaddr *ifa;
371
372	TASK_INIT(&ifp->if_starttask, 0, if_start_deferred, ifp);
373	IF_AFDATA_LOCK_INIT(ifp);
374	ifp->if_afdata_initialized = 0;
375	IFNET_WLOCK();
376	TAILQ_INSERT_TAIL(&ifnet, ifp, if_link);
377	IFNET_WUNLOCK();
378	/*
379	 * XXX -
380	 * The old code would work if the interface passed a pre-existing
381	 * chain of ifaddrs to this code.  We don't trust our callers to
382	 * properly initialize the tailq, however, so we no longer allow
383	 * this unlikely case.
384	 */
385	TAILQ_INIT(&ifp->if_addrhead);
386	TAILQ_INIT(&ifp->if_prefixhead);
387	TAILQ_INIT(&ifp->if_multiaddrs);
388	knlist_init(&ifp->if_klist, NULL);
389	getmicrotime(&ifp->if_lastchange);
390
391#ifdef MAC
392	mac_init_ifnet(ifp);
393	mac_create_ifnet(ifp);
394#endif
395
396	ifp->if_index = if_findindex(ifp);
397	if (ifp->if_index > if_index)
398		if_index = ifp->if_index;
399	if (if_index >= if_indexlim)
400		if_grow();
401
402	ifnet_byindex(ifp->if_index) = ifp;
403	ifdev_byindex(ifp->if_index) = make_dev(&net_cdevsw,
404	    unit2minor(ifp->if_index),
405	    UID_ROOT, GID_WHEEL, 0600, "%s/%s",
406	    net_cdevsw.d_name, ifp->if_xname);
407	make_dev_alias(ifdev_byindex(ifp->if_index), "%s%d",
408	    net_cdevsw.d_name, ifp->if_index);
409
410	mtx_init(&ifp->if_snd.ifq_mtx, ifp->if_xname, "if send queue", MTX_DEF);
411
412	/*
413	 * create a Link Level name for this device
414	 */
415	namelen = strlen(ifp->if_xname);
416	/*
417	 * Always save enough space for any possiable name so we can do
418	 * a rename in place later.
419	 */
420	masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + IFNAMSIZ;
421	socksize = masklen + ifp->if_addrlen;
422	if (socksize < sizeof(*sdl))
423		socksize = sizeof(*sdl);
424	socksize = roundup2(socksize, sizeof(long));
425	ifasize = sizeof(*ifa) + 2 * socksize;
426	ifa = malloc(ifasize, M_IFADDR, M_WAITOK | M_ZERO);
427	IFA_LOCK_INIT(ifa);
428	sdl = (struct sockaddr_dl *)(ifa + 1);
429	sdl->sdl_len = socksize;
430	sdl->sdl_family = AF_LINK;
431	bcopy(ifp->if_xname, sdl->sdl_data, namelen);
432	sdl->sdl_nlen = namelen;
433	sdl->sdl_index = ifp->if_index;
434	sdl->sdl_type = ifp->if_type;
435	ifaddr_byindex(ifp->if_index) = ifa;
436	ifa->ifa_ifp = ifp;
437	ifa->ifa_rtrequest = link_rtrequest;
438	ifa->ifa_addr = (struct sockaddr *)sdl;
439	sdl = (struct sockaddr_dl *)(socksize + (caddr_t)sdl);
440	ifa->ifa_netmask = (struct sockaddr *)sdl;
441	sdl->sdl_len = masklen;
442	while (namelen != 0)
443		sdl->sdl_data[--namelen] = 0xff;
444	ifa->ifa_refcnt = 1;
445	TAILQ_INSERT_HEAD(&ifp->if_addrhead, ifa, ifa_link);
446	ifp->if_broadcastaddr = 0; /* reliably crash if used uninitialized */
447	ifp->if_snd.altq_type = 0;
448	ifp->if_snd.altq_disc = NULL;
449	ifp->if_snd.altq_flags &= ALTQF_CANTCHANGE;
450	ifp->if_snd.altq_tbr  = NULL;
451	ifp->if_snd.altq_ifp  = ifp;
452
453	if (domains)
454		if_attachdomain1(ifp);
455
456	EVENTHANDLER_INVOKE(ifnet_arrival_event, ifp);
457
458	/* Announce the interface. */
459	rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
460}
461
462static void
463if_attachdomain(void *dummy)
464{
465	struct ifnet *ifp;
466	int s;
467
468	s = splnet();
469	TAILQ_FOREACH(ifp, &ifnet, if_link)
470		if_attachdomain1(ifp);
471	splx(s);
472}
473SYSINIT(domainifattach, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_FIRST,
474    if_attachdomain, NULL);
475
476static void
477if_attachdomain1(struct ifnet *ifp)
478{
479	struct domain *dp;
480	int s;
481
482	s = splnet();
483
484	/*
485	 * Since dp->dom_ifattach calls malloc() with M_WAITOK, we
486	 * cannot lock ifp->if_afdata initialization, entirely.
487	 */
488	if (IF_AFDATA_TRYLOCK(ifp) == 0) {
489		splx(s);
490		return;
491	}
492	if (ifp->if_afdata_initialized) {
493		IF_AFDATA_UNLOCK(ifp);
494		splx(s);
495		return;
496	}
497	ifp->if_afdata_initialized = 1;
498	IF_AFDATA_UNLOCK(ifp);
499
500	/* address family dependent data region */
501	bzero(ifp->if_afdata, sizeof(ifp->if_afdata));
502	for (dp = domains; dp; dp = dp->dom_next) {
503		if (dp->dom_ifattach)
504			ifp->if_afdata[dp->dom_family] =
505			    (*dp->dom_ifattach)(ifp);
506	}
507
508	splx(s);
509}
510
511/*
512 * Detach an interface, removing it from the
513 * list of "active" interfaces.
514 */
515void
516if_detach(struct ifnet *ifp)
517{
518	struct ifaddr *ifa, *next;
519	struct radix_node_head	*rnh;
520	int s;
521	int i;
522	struct domain *dp;
523 	struct ifnet *iter;
524 	int found;
525
526	EVENTHANDLER_INVOKE(ifnet_departure_event, ifp);
527	/*
528	 * Remove routes and flush queues.
529	 */
530	s = splnet();
531	if_down(ifp);
532#ifdef ALTQ
533	if (ALTQ_IS_ENABLED(&ifp->if_snd))
534		altq_disable(&ifp->if_snd);
535	if (ALTQ_IS_ATTACHED(&ifp->if_snd))
536		altq_detach(&ifp->if_snd);
537#endif
538
539	for (ifa = TAILQ_FIRST(&ifp->if_addrhead); ifa; ifa = next) {
540		next = TAILQ_NEXT(ifa, ifa_link);
541
542		if (ifa->ifa_addr->sa_family == AF_LINK)
543			continue;
544#ifdef INET
545		/* XXX: Ugly!! ad hoc just for INET */
546		if (ifa->ifa_addr && ifa->ifa_addr->sa_family == AF_INET) {
547			struct ifaliasreq ifr;
548
549			bzero(&ifr, sizeof(ifr));
550			ifr.ifra_addr = *ifa->ifa_addr;
551			if (ifa->ifa_dstaddr)
552				ifr.ifra_broadaddr = *ifa->ifa_dstaddr;
553			if (in_control(NULL, SIOCDIFADDR, (caddr_t)&ifr, ifp,
554			    NULL) == 0)
555				continue;
556		}
557#endif /* INET */
558#ifdef INET6
559		if (ifa->ifa_addr && ifa->ifa_addr->sa_family == AF_INET6) {
560			in6_purgeaddr(ifa);
561			/* ifp_addrhead is already updated */
562			continue;
563		}
564#endif /* INET6 */
565		TAILQ_REMOVE(&ifp->if_addrhead, ifa, ifa_link);
566		IFAFREE(ifa);
567	}
568
569#ifdef INET6
570	/*
571	 * Remove all IPv6 kernel structs related to ifp.  This should be done
572	 * before removing routing entries below, since IPv6 interface direct
573	 * routes are expected to be removed by the IPv6-specific kernel API.
574	 * Otherwise, the kernel will detect some inconsistency and bark it.
575	 */
576	in6_ifdetach(ifp);
577#endif
578	/*
579	 * Remove address from ifindex_table[] and maybe decrement if_index.
580	 * Clean up all addresses.
581	 */
582	ifnet_byindex(ifp->if_index) = NULL;
583	ifaddr_byindex(ifp->if_index) = NULL;
584	destroy_dev(ifdev_byindex(ifp->if_index));
585	ifdev_byindex(ifp->if_index) = NULL;
586
587	while (if_index > 0 && ifaddr_byindex(if_index) == NULL)
588		if_index--;
589
590
591	/* We can now free link ifaddr. */
592	if (!TAILQ_EMPTY(&ifp->if_addrhead)) {
593		ifa = TAILQ_FIRST(&ifp->if_addrhead);
594		TAILQ_REMOVE(&ifp->if_addrhead, ifa, ifa_link);
595		IFAFREE(ifa);
596	}
597
598	/*
599	 * Delete all remaining routes using this interface
600	 * Unfortuneatly the only way to do this is to slog through
601	 * the entire routing table looking for routes which point
602	 * to this interface...oh well...
603	 */
604	for (i = 1; i <= AF_MAX; i++) {
605		if ((rnh = rt_tables[i]) == NULL)
606			continue;
607		RADIX_NODE_HEAD_LOCK(rnh);
608		(void) rnh->rnh_walktree(rnh, if_rtdel, ifp);
609		RADIX_NODE_HEAD_UNLOCK(rnh);
610	}
611
612	/* Announce that the interface is gone. */
613	rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
614
615	IF_AFDATA_LOCK(ifp);
616	for (dp = domains; dp; dp = dp->dom_next) {
617		if (dp->dom_ifdetach && ifp->if_afdata[dp->dom_family])
618			(*dp->dom_ifdetach)(ifp,
619			    ifp->if_afdata[dp->dom_family]);
620	}
621	IF_AFDATA_UNLOCK(ifp);
622
623#ifdef MAC
624	mac_destroy_ifnet(ifp);
625#endif /* MAC */
626	KNOTE_UNLOCKED(&ifp->if_klist, NOTE_EXIT);
627	knlist_clear(&ifp->if_klist, 0);
628	knlist_destroy(&ifp->if_klist);
629	IFNET_WLOCK();
630 	found = 0;
631 	TAILQ_FOREACH(iter, &ifnet, if_link)
632 		if (iter == ifp) {
633 			found = 1;
634 			break;
635 		}
636 	if (found)
637 		TAILQ_REMOVE(&ifnet, ifp, if_link);
638	IFNET_WUNLOCK();
639	mtx_destroy(&ifp->if_snd.ifq_mtx);
640	IF_AFDATA_DESTROY(ifp);
641	splx(s);
642}
643
644/*
645 * Delete Routes for a Network Interface
646 *
647 * Called for each routing entry via the rnh->rnh_walktree() call above
648 * to delete all route entries referencing a detaching network interface.
649 *
650 * Arguments:
651 *	rn	pointer to node in the routing table
652 *	arg	argument passed to rnh->rnh_walktree() - detaching interface
653 *
654 * Returns:
655 *	0	successful
656 *	errno	failed - reason indicated
657 *
658 */
659static int
660if_rtdel(struct radix_node *rn, void *arg)
661{
662	struct rtentry	*rt = (struct rtentry *)rn;
663	struct ifnet	*ifp = arg;
664	int		err;
665
666	if (rt->rt_ifp == ifp) {
667
668		/*
669		 * Protect (sorta) against walktree recursion problems
670		 * with cloned routes
671		 */
672		if ((rt->rt_flags & RTF_UP) == 0)
673			return (0);
674
675		err = rtrequest(RTM_DELETE, rt_key(rt), rt->rt_gateway,
676				rt_mask(rt), rt->rt_flags,
677				(struct rtentry **) NULL);
678		if (err) {
679			log(LOG_WARNING, "if_rtdel: error %d\n", err);
680		}
681	}
682
683	return (0);
684}
685
686#define	equal(a1, a2)	(bcmp((a1), (a2), ((a1))->sa_len) == 0)
687
688/*
689 * Locate an interface based on a complete address.
690 */
691/*ARGSUSED*/
692struct ifaddr *
693ifa_ifwithaddr(struct sockaddr *addr)
694{
695	struct ifnet *ifp;
696	struct ifaddr *ifa;
697
698	IFNET_RLOCK();
699	TAILQ_FOREACH(ifp, &ifnet, if_link)
700		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
701			if (ifa->ifa_addr->sa_family != addr->sa_family)
702				continue;
703			if (equal(addr, ifa->ifa_addr))
704				goto done;
705			/* IP6 doesn't have broadcast */
706			if ((ifp->if_flags & IFF_BROADCAST) &&
707			    ifa->ifa_broadaddr &&
708			    ifa->ifa_broadaddr->sa_len != 0 &&
709			    equal(ifa->ifa_broadaddr, addr))
710				goto done;
711		}
712	ifa = NULL;
713done:
714	IFNET_RUNLOCK();
715	return (ifa);
716}
717
718/*
719 * Locate the point to point interface with a given destination address.
720 */
721/*ARGSUSED*/
722struct ifaddr *
723ifa_ifwithdstaddr(struct sockaddr *addr)
724{
725	struct ifnet *ifp;
726	struct ifaddr *ifa;
727
728	IFNET_RLOCK();
729	TAILQ_FOREACH(ifp, &ifnet, if_link) {
730		if ((ifp->if_flags & IFF_POINTOPOINT) == 0)
731			continue;
732		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
733			if (ifa->ifa_addr->sa_family != addr->sa_family)
734				continue;
735			if (ifa->ifa_dstaddr && equal(addr, ifa->ifa_dstaddr))
736				goto done;
737		}
738	}
739	ifa = NULL;
740done:
741	IFNET_RUNLOCK();
742	return (ifa);
743}
744
745/*
746 * Find an interface on a specific network.  If many, choice
747 * is most specific found.
748 */
749struct ifaddr *
750ifa_ifwithnet(struct sockaddr *addr)
751{
752	struct ifnet *ifp;
753	struct ifaddr *ifa;
754	struct ifaddr *ifa_maybe = (struct ifaddr *) 0;
755	u_int af = addr->sa_family;
756	char *addr_data = addr->sa_data, *cplim;
757
758	/*
759	 * AF_LINK addresses can be looked up directly by their index number,
760	 * so do that if we can.
761	 */
762	if (af == AF_LINK) {
763	    struct sockaddr_dl *sdl = (struct sockaddr_dl *)addr;
764	    if (sdl->sdl_index && sdl->sdl_index <= if_index)
765		return (ifaddr_byindex(sdl->sdl_index));
766	}
767
768	/*
769	 * Scan though each interface, looking for ones that have
770	 * addresses in this address family.
771	 */
772	IFNET_RLOCK();
773	TAILQ_FOREACH(ifp, &ifnet, if_link) {
774		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
775			char *cp, *cp2, *cp3;
776
777			if (ifa->ifa_addr->sa_family != af)
778next:				continue;
779			if (af == AF_INET && ifp->if_flags & IFF_POINTOPOINT) {
780				/*
781				 * This is a bit broken as it doesn't
782				 * take into account that the remote end may
783				 * be a single node in the network we are
784				 * looking for.
785				 * The trouble is that we don't know the
786				 * netmask for the remote end.
787				 */
788				if (ifa->ifa_dstaddr != 0
789				    && equal(addr, ifa->ifa_dstaddr))
790					goto done;
791			} else {
792				/*
793				 * if we have a special address handler,
794				 * then use it instead of the generic one.
795				 */
796				if (ifa->ifa_claim_addr) {
797					if ((*ifa->ifa_claim_addr)(ifa, addr))
798						goto done;
799					continue;
800				}
801
802				/*
803				 * Scan all the bits in the ifa's address.
804				 * If a bit dissagrees with what we are
805				 * looking for, mask it with the netmask
806				 * to see if it really matters.
807				 * (A byte at a time)
808				 */
809				if (ifa->ifa_netmask == 0)
810					continue;
811				cp = addr_data;
812				cp2 = ifa->ifa_addr->sa_data;
813				cp3 = ifa->ifa_netmask->sa_data;
814				cplim = ifa->ifa_netmask->sa_len
815					+ (char *)ifa->ifa_netmask;
816				while (cp3 < cplim)
817					if ((*cp++ ^ *cp2++) & *cp3++)
818						goto next; /* next address! */
819				/*
820				 * If the netmask of what we just found
821				 * is more specific than what we had before
822				 * (if we had one) then remember the new one
823				 * before continuing to search
824				 * for an even better one.
825				 */
826				if (ifa_maybe == 0 ||
827				    rn_refines((caddr_t)ifa->ifa_netmask,
828				    (caddr_t)ifa_maybe->ifa_netmask))
829					ifa_maybe = ifa;
830			}
831		}
832	}
833	ifa = ifa_maybe;
834done:
835	IFNET_RUNLOCK();
836	return (ifa);
837}
838
839/*
840 * Find an interface address specific to an interface best matching
841 * a given address.
842 */
843struct ifaddr *
844ifaof_ifpforaddr(struct sockaddr *addr, struct ifnet *ifp)
845{
846	struct ifaddr *ifa;
847	char *cp, *cp2, *cp3;
848	char *cplim;
849	struct ifaddr *ifa_maybe = 0;
850	u_int af = addr->sa_family;
851
852	if (af >= AF_MAX)
853		return (0);
854	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
855		if (ifa->ifa_addr->sa_family != af)
856			continue;
857		if (ifa_maybe == 0)
858			ifa_maybe = ifa;
859		if (ifa->ifa_netmask == 0) {
860			if (equal(addr, ifa->ifa_addr) ||
861			    (ifa->ifa_dstaddr && equal(addr, ifa->ifa_dstaddr)))
862				goto done;
863			continue;
864		}
865		if (ifp->if_flags & IFF_POINTOPOINT) {
866			if (equal(addr, ifa->ifa_dstaddr))
867				goto done;
868		} else {
869			cp = addr->sa_data;
870			cp2 = ifa->ifa_addr->sa_data;
871			cp3 = ifa->ifa_netmask->sa_data;
872			cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask;
873			for (; cp3 < cplim; cp3++)
874				if ((*cp++ ^ *cp2++) & *cp3)
875					break;
876			if (cp3 == cplim)
877				goto done;
878		}
879	}
880	ifa = ifa_maybe;
881done:
882	return (ifa);
883}
884
885#include <net/route.h>
886
887/*
888 * Default action when installing a route with a Link Level gateway.
889 * Lookup an appropriate real ifa to point to.
890 * This should be moved to /sys/net/link.c eventually.
891 */
892static void
893link_rtrequest(int cmd, struct rtentry *rt, struct rt_addrinfo *info)
894{
895	struct ifaddr *ifa, *oifa;
896	struct sockaddr *dst;
897	struct ifnet *ifp;
898
899	RT_LOCK_ASSERT(rt);
900
901	if (cmd != RTM_ADD || ((ifa = rt->rt_ifa) == 0) ||
902	    ((ifp = ifa->ifa_ifp) == 0) || ((dst = rt_key(rt)) == 0))
903		return;
904	ifa = ifaof_ifpforaddr(dst, ifp);
905	if (ifa) {
906		IFAREF(ifa);		/* XXX */
907		oifa = rt->rt_ifa;
908		rt->rt_ifa = ifa;
909		IFAFREE(oifa);
910		if (ifa->ifa_rtrequest && ifa->ifa_rtrequest != link_rtrequest)
911			ifa->ifa_rtrequest(cmd, rt, info);
912	}
913}
914
915/*
916 * Mark an interface down and notify protocols of
917 * the transition.
918 * NOTE: must be called at splnet or eqivalent.
919 */
920static void
921if_unroute(struct ifnet *ifp, int flag, int fam)
922{
923	struct ifaddr *ifa;
924
925	ifp->if_flags &= ~flag;
926	getmicrotime(&ifp->if_lastchange);
927	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
928		if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family))
929			pfctlinput(PRC_IFDOWN, ifa->ifa_addr);
930	if_qflush(&ifp->if_snd);
931	rt_ifmsg(ifp);
932}
933
934/*
935 * Mark an interface up and notify protocols of
936 * the transition.
937 * NOTE: must be called at splnet or eqivalent.
938 */
939static void
940if_route(struct ifnet *ifp, int flag, int fam)
941{
942	struct ifaddr *ifa;
943
944	ifp->if_flags |= flag;
945	getmicrotime(&ifp->if_lastchange);
946	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
947		if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family))
948			pfctlinput(PRC_IFUP, ifa->ifa_addr);
949	rt_ifmsg(ifp);
950#ifdef INET6
951	in6_if_up(ifp);
952#endif
953}
954
955/*
956 * Mark an interface down and notify protocols of
957 * the transition.
958 * NOTE: must be called at splnet or eqivalent.
959 */
960void
961if_down(struct ifnet *ifp)
962{
963
964	if_unroute(ifp, IFF_UP, AF_UNSPEC);
965}
966
967/*
968 * Mark an interface up and notify protocols of
969 * the transition.
970 * NOTE: must be called at splnet or eqivalent.
971 */
972void
973if_up(struct ifnet *ifp)
974{
975
976	if_route(ifp, IFF_UP, AF_UNSPEC);
977}
978
979/*
980 * Flush an interface queue.
981 */
982static void
983if_qflush(struct ifaltq *ifq)
984{
985	struct mbuf *m, *n;
986
987#ifdef ALTQ
988	if (ALTQ_IS_ENABLED(ifq))
989		ALTQ_PURGE(ifq);
990#endif
991	n = ifq->ifq_head;
992	while ((m = n) != 0) {
993		n = m->m_act;
994		m_freem(m);
995	}
996	ifq->ifq_head = 0;
997	ifq->ifq_tail = 0;
998	ifq->ifq_len = 0;
999}
1000
1001/*
1002 * Handle interface watchdog timer routines.  Called
1003 * from softclock, we decrement timers (if set) and
1004 * call the appropriate interface routine on expiration.
1005 *
1006 * XXXRW: Note that because timeouts run with Giant, if_watchdog() is called
1007 * holding Giant.  If we switch to an MPSAFE callout, we likely need to grab
1008 * Giant before entering if_watchdog() on an IFF_NEEDSGIANT interface.
1009 */
1010static void
1011if_slowtimo(void *arg)
1012{
1013	struct ifnet *ifp;
1014	int s = splimp();
1015
1016	IFNET_RLOCK();
1017	TAILQ_FOREACH(ifp, &ifnet, if_link) {
1018		if (ifp->if_timer == 0 || --ifp->if_timer)
1019			continue;
1020		if (ifp->if_watchdog)
1021			(*ifp->if_watchdog)(ifp);
1022	}
1023	IFNET_RUNLOCK();
1024	splx(s);
1025	timeout(if_slowtimo, (void *)0, hz / IFNET_SLOWHZ);
1026}
1027
1028/*
1029 * Map interface name to
1030 * interface structure pointer.
1031 */
1032struct ifnet *
1033ifunit(const char *name)
1034{
1035	struct ifnet *ifp;
1036
1037	IFNET_RLOCK();
1038	TAILQ_FOREACH(ifp, &ifnet, if_link) {
1039		if (strncmp(name, ifp->if_xname, IFNAMSIZ) == 0)
1040			break;
1041	}
1042	IFNET_RUNLOCK();
1043	return (ifp);
1044}
1045
1046/*
1047 * Hardware specific interface ioctls.
1048 */
1049static int
1050ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
1051{
1052	struct ifreq *ifr;
1053	struct ifstat *ifs;
1054	int error = 0;
1055	int new_flags;
1056	size_t namelen, onamelen;
1057	char new_name[IFNAMSIZ];
1058	struct ifaddr *ifa;
1059	struct sockaddr_dl *sdl;
1060
1061	ifr = (struct ifreq *)data;
1062	switch (cmd) {
1063	case SIOCGIFINDEX:
1064		ifr->ifr_index = ifp->if_index;
1065		break;
1066
1067	case SIOCGIFFLAGS:
1068		ifr->ifr_flags = ifp->if_flags & 0xffff;
1069		ifr->ifr_flagshigh = ifp->if_flags >> 16;
1070		break;
1071
1072	case SIOCGIFCAP:
1073		ifr->ifr_reqcap = ifp->if_capabilities;
1074		ifr->ifr_curcap = ifp->if_capenable;
1075		break;
1076
1077#ifdef MAC
1078	case SIOCGIFMAC:
1079		error = mac_ioctl_ifnet_get(td->td_ucred, ifr, ifp);
1080		break;
1081#endif
1082
1083	case SIOCGIFMETRIC:
1084		ifr->ifr_metric = ifp->if_metric;
1085		break;
1086
1087	case SIOCGIFMTU:
1088		ifr->ifr_mtu = ifp->if_mtu;
1089		break;
1090
1091	case SIOCGIFPHYS:
1092		ifr->ifr_phys = ifp->if_physical;
1093		break;
1094
1095	case SIOCSIFFLAGS:
1096		error = suser(td);
1097		if (error)
1098			return (error);
1099		new_flags = (ifr->ifr_flags & 0xffff) |
1100		    (ifr->ifr_flagshigh << 16);
1101		if (ifp->if_flags & IFF_SMART) {
1102			/* Smart drivers twiddle their own routes */
1103		} else if (ifp->if_flags & IFF_UP &&
1104		    (new_flags & IFF_UP) == 0) {
1105			int s = splimp();
1106			if_down(ifp);
1107			splx(s);
1108		} else if (new_flags & IFF_UP &&
1109		    (ifp->if_flags & IFF_UP) == 0) {
1110			int s = splimp();
1111			if_up(ifp);
1112			splx(s);
1113		}
1114		ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) |
1115			(new_flags &~ IFF_CANTCHANGE);
1116		if (new_flags & IFF_PPROMISC) {
1117			/* Permanently promiscuous mode requested */
1118			ifp->if_flags |= IFF_PROMISC;
1119		} else if (ifp->if_pcount == 0) {
1120			ifp->if_flags &= ~IFF_PROMISC;
1121		}
1122		if (ifp->if_ioctl)
1123			(void) (*ifp->if_ioctl)(ifp, cmd, data);
1124		getmicrotime(&ifp->if_lastchange);
1125		break;
1126
1127	case SIOCSIFCAP:
1128		error = suser(td);
1129		if (error)
1130			return (error);
1131		if (ifp->if_ioctl == NULL)
1132			return (EOPNOTSUPP);
1133		if (ifr->ifr_reqcap & ~ifp->if_capabilities)
1134			return (EINVAL);
1135		error = (*ifp->if_ioctl)(ifp, cmd, data);
1136		if (error == 0)
1137			getmicrotime(&ifp->if_lastchange);
1138		break;
1139
1140#ifdef MAC
1141	case SIOCSIFMAC:
1142		error = mac_ioctl_ifnet_set(td->td_ucred, ifr, ifp);
1143		break;
1144#endif
1145
1146	case SIOCSIFNAME:
1147		error = suser(td);
1148		if (error != 0)
1149			return (error);
1150		error = copyinstr(ifr->ifr_data, new_name, IFNAMSIZ, NULL);
1151		if (error != 0)
1152			return (error);
1153		if (new_name[0] == '\0')
1154			return (EINVAL);
1155		if (ifunit(new_name) != NULL)
1156			return (EEXIST);
1157
1158		EVENTHANDLER_INVOKE(ifnet_departure_event, ifp);
1159		/* Announce the departure of the interface. */
1160		rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
1161
1162		strlcpy(ifp->if_xname, new_name, sizeof(ifp->if_xname));
1163		ifa = ifaddr_byindex(ifp->if_index);
1164		IFA_LOCK(ifa);
1165		sdl = (struct sockaddr_dl *)ifa->ifa_addr;
1166		namelen = strlen(new_name);
1167		onamelen = sdl->sdl_nlen;
1168		/*
1169		 * Move the address if needed.  This is safe because we
1170		 * allocate space for a name of length IFNAMSIZ when we
1171		 * create this in if_attach().
1172		 */
1173		if (namelen != onamelen) {
1174			bcopy(sdl->sdl_data + onamelen,
1175			    sdl->sdl_data + namelen, sdl->sdl_alen);
1176		}
1177		bcopy(new_name, sdl->sdl_data, namelen);
1178		sdl->sdl_nlen = namelen;
1179		sdl = (struct sockaddr_dl *)ifa->ifa_netmask;
1180		bzero(sdl->sdl_data, onamelen);
1181		while (namelen != 0)
1182			sdl->sdl_data[--namelen] = 0xff;
1183		IFA_UNLOCK(ifa);
1184
1185		EVENTHANDLER_INVOKE(ifnet_arrival_event, ifp);
1186		/* Announce the return of the interface. */
1187		rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
1188		break;
1189
1190	case SIOCSIFMETRIC:
1191		error = suser(td);
1192		if (error)
1193			return (error);
1194		ifp->if_metric = ifr->ifr_metric;
1195		getmicrotime(&ifp->if_lastchange);
1196		break;
1197
1198	case SIOCSIFPHYS:
1199		error = suser(td);
1200		if (error)
1201			return (error);
1202		if (ifp->if_ioctl == NULL)
1203			return (EOPNOTSUPP);
1204		error = (*ifp->if_ioctl)(ifp, cmd, data);
1205		if (error == 0)
1206			getmicrotime(&ifp->if_lastchange);
1207		break;
1208
1209	case SIOCSIFMTU:
1210	{
1211		u_long oldmtu = ifp->if_mtu;
1212
1213		error = suser(td);
1214		if (error)
1215			return (error);
1216		if (ifr->ifr_mtu < IF_MINMTU || ifr->ifr_mtu > IF_MAXMTU)
1217			return (EINVAL);
1218		if (ifp->if_ioctl == NULL)
1219			return (EOPNOTSUPP);
1220		error = (*ifp->if_ioctl)(ifp, cmd, data);
1221		if (error == 0) {
1222			getmicrotime(&ifp->if_lastchange);
1223			rt_ifmsg(ifp);
1224		}
1225		/*
1226		 * If the link MTU changed, do network layer specific procedure.
1227		 */
1228		if (ifp->if_mtu != oldmtu) {
1229#ifdef INET6
1230			nd6_setmtu(ifp);
1231#endif
1232		}
1233		break;
1234	}
1235
1236	case SIOCADDMULTI:
1237	case SIOCDELMULTI:
1238		error = suser(td);
1239		if (error)
1240			return (error);
1241
1242		/* Don't allow group membership on non-multicast interfaces. */
1243		if ((ifp->if_flags & IFF_MULTICAST) == 0)
1244			return (EOPNOTSUPP);
1245
1246		/* Don't let users screw up protocols' entries. */
1247		if (ifr->ifr_addr.sa_family != AF_LINK)
1248			return (EINVAL);
1249
1250		if (cmd == SIOCADDMULTI) {
1251			struct ifmultiaddr *ifma;
1252			error = if_addmulti(ifp, &ifr->ifr_addr, &ifma);
1253		} else {
1254			error = if_delmulti(ifp, &ifr->ifr_addr);
1255		}
1256		if (error == 0)
1257			getmicrotime(&ifp->if_lastchange);
1258		break;
1259
1260	case SIOCSIFPHYADDR:
1261	case SIOCDIFPHYADDR:
1262#ifdef INET6
1263	case SIOCSIFPHYADDR_IN6:
1264#endif
1265	case SIOCSLIFPHYADDR:
1266	case SIOCSIFMEDIA:
1267	case SIOCSIFGENERIC:
1268		error = suser(td);
1269		if (error)
1270			return (error);
1271		if (ifp->if_ioctl == NULL)
1272			return (EOPNOTSUPP);
1273		error = (*ifp->if_ioctl)(ifp, cmd, data);
1274		if (error == 0)
1275			getmicrotime(&ifp->if_lastchange);
1276		break;
1277
1278	case SIOCGIFSTATUS:
1279		ifs = (struct ifstat *)data;
1280		ifs->ascii[0] = '\0';
1281
1282	case SIOCGIFPSRCADDR:
1283	case SIOCGIFPDSTADDR:
1284	case SIOCGLIFPHYADDR:
1285	case SIOCGIFMEDIA:
1286	case SIOCGIFGENERIC:
1287		if (ifp->if_ioctl == NULL)
1288			return (EOPNOTSUPP);
1289		error = (*ifp->if_ioctl)(ifp, cmd, data);
1290		break;
1291
1292	case SIOCSIFLLADDR:
1293		error = suser(td);
1294		if (error)
1295			return (error);
1296		error = if_setlladdr(ifp,
1297		    ifr->ifr_addr.sa_data, ifr->ifr_addr.sa_len);
1298		break;
1299
1300	default:
1301		error = ENOIOCTL;
1302		break;
1303	}
1304	return (error);
1305}
1306
1307/*
1308 * Interface ioctls.
1309 */
1310int
1311ifioctl(struct socket *so, u_long cmd, caddr_t data, struct thread *td)
1312{
1313	struct ifnet *ifp;
1314	struct ifreq *ifr;
1315	int error;
1316	int oif_flags;
1317
1318	switch (cmd) {
1319	case SIOCGIFCONF:
1320	case OSIOCGIFCONF:
1321		return (ifconf(cmd, data));
1322	}
1323	ifr = (struct ifreq *)data;
1324
1325	switch (cmd) {
1326	case SIOCIFCREATE:
1327	case SIOCIFDESTROY:
1328		if ((error = suser(td)) != 0)
1329			return (error);
1330		return ((cmd == SIOCIFCREATE) ?
1331			if_clone_create(ifr->ifr_name, sizeof(ifr->ifr_name)) :
1332			if_clone_destroy(ifr->ifr_name));
1333
1334	case SIOCIFGCLONERS:
1335		return (if_clone_list((struct if_clonereq *)data));
1336	}
1337
1338	ifp = ifunit(ifr->ifr_name);
1339	if (ifp == 0)
1340		return (ENXIO);
1341
1342	error = ifhwioctl(cmd, ifp, data, td);
1343	if (error != ENOIOCTL)
1344		return (error);
1345
1346	oif_flags = ifp->if_flags;
1347	if (so->so_proto == 0)
1348		return (EOPNOTSUPP);
1349#ifndef COMPAT_43
1350	error = ((*so->so_proto->pr_usrreqs->pru_control)(so, cmd,
1351								 data,
1352								 ifp, td));
1353#else
1354	{
1355		int ocmd = cmd;
1356
1357		switch (cmd) {
1358
1359		case SIOCSIFDSTADDR:
1360		case SIOCSIFADDR:
1361		case SIOCSIFBRDADDR:
1362		case SIOCSIFNETMASK:
1363#if BYTE_ORDER != BIG_ENDIAN
1364			if (ifr->ifr_addr.sa_family == 0 &&
1365			    ifr->ifr_addr.sa_len < 16) {
1366				ifr->ifr_addr.sa_family = ifr->ifr_addr.sa_len;
1367				ifr->ifr_addr.sa_len = 16;
1368			}
1369#else
1370			if (ifr->ifr_addr.sa_len == 0)
1371				ifr->ifr_addr.sa_len = 16;
1372#endif
1373			break;
1374
1375		case OSIOCGIFADDR:
1376			cmd = SIOCGIFADDR;
1377			break;
1378
1379		case OSIOCGIFDSTADDR:
1380			cmd = SIOCGIFDSTADDR;
1381			break;
1382
1383		case OSIOCGIFBRDADDR:
1384			cmd = SIOCGIFBRDADDR;
1385			break;
1386
1387		case OSIOCGIFNETMASK:
1388			cmd = SIOCGIFNETMASK;
1389		}
1390		error =  ((*so->so_proto->pr_usrreqs->pru_control)(so,
1391								   cmd,
1392								   data,
1393								   ifp, td));
1394		switch (ocmd) {
1395
1396		case OSIOCGIFADDR:
1397		case OSIOCGIFDSTADDR:
1398		case OSIOCGIFBRDADDR:
1399		case OSIOCGIFNETMASK:
1400			*(u_short *)&ifr->ifr_addr = ifr->ifr_addr.sa_family;
1401
1402		}
1403	}
1404#endif /* COMPAT_43 */
1405
1406	if ((oif_flags ^ ifp->if_flags) & IFF_UP) {
1407#ifdef INET6
1408		DELAY(100);/* XXX: temporary workaround for fxp issue*/
1409		if (ifp->if_flags & IFF_UP) {
1410			int s = splimp();
1411			in6_if_up(ifp);
1412			splx(s);
1413		}
1414#endif
1415	}
1416	return (error);
1417}
1418
1419/*
1420 * Set/clear promiscuous mode on interface ifp based on the truth value
1421 * of pswitch.  The calls are reference counted so that only the first
1422 * "on" request actually has an effect, as does the final "off" request.
1423 * Results are undefined if the "off" and "on" requests are not matched.
1424 */
1425int
1426ifpromisc(struct ifnet *ifp, int pswitch)
1427{
1428	struct ifreq ifr;
1429	int error;
1430	int oldflags, oldpcount;
1431
1432	oldpcount = ifp->if_pcount;
1433	oldflags = ifp->if_flags;
1434	if (ifp->if_flags & IFF_PPROMISC) {
1435		/* Do nothing if device is in permanently promiscuous mode */
1436		ifp->if_pcount += pswitch ? 1 : -1;
1437		return (0);
1438	}
1439	if (pswitch) {
1440		/*
1441		 * If the device is not configured up, we cannot put it in
1442		 * promiscuous mode.
1443		 */
1444		if ((ifp->if_flags & IFF_UP) == 0)
1445			return (ENETDOWN);
1446		if (ifp->if_pcount++ != 0)
1447			return (0);
1448		ifp->if_flags |= IFF_PROMISC;
1449	} else {
1450		if (--ifp->if_pcount > 0)
1451			return (0);
1452		ifp->if_flags &= ~IFF_PROMISC;
1453	}
1454	ifr.ifr_flags = ifp->if_flags & 0xffff;
1455	ifr.ifr_flagshigh = ifp->if_flags >> 16;
1456	error = (*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
1457	if (error == 0) {
1458		log(LOG_INFO, "%s: promiscuous mode %s\n",
1459		    ifp->if_xname,
1460		    (ifp->if_flags & IFF_PROMISC) ? "enabled" : "disabled");
1461		rt_ifmsg(ifp);
1462	} else {
1463		ifp->if_pcount = oldpcount;
1464		ifp->if_flags = oldflags;
1465	}
1466	return error;
1467}
1468
1469/*
1470 * Return interface configuration
1471 * of system.  List may be used
1472 * in later ioctl's (above) to get
1473 * other information.
1474 */
1475/*ARGSUSED*/
1476static int
1477ifconf(u_long cmd, caddr_t data)
1478{
1479	struct ifconf *ifc = (struct ifconf *)data;
1480	struct ifnet *ifp;
1481	struct ifaddr *ifa;
1482	struct ifreq ifr, *ifrp;
1483	int space = ifc->ifc_len, error = 0;
1484
1485	ifrp = ifc->ifc_req;
1486	IFNET_RLOCK();		/* could sleep XXX */
1487	TAILQ_FOREACH(ifp, &ifnet, if_link) {
1488		int addrs;
1489
1490		if (space < sizeof(ifr))
1491			break;
1492		if (strlcpy(ifr.ifr_name, ifp->if_xname, sizeof(ifr.ifr_name))
1493		    >= sizeof(ifr.ifr_name)) {
1494			error = ENAMETOOLONG;
1495			break;
1496		}
1497
1498		addrs = 0;
1499		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1500			struct sockaddr *sa = ifa->ifa_addr;
1501
1502			if (space < sizeof(ifr))
1503				break;
1504			if (jailed(curthread->td_ucred) &&
1505			    prison_if(curthread->td_ucred, sa))
1506				continue;
1507			addrs++;
1508#ifdef COMPAT_43
1509			if (cmd == OSIOCGIFCONF) {
1510				struct osockaddr *osa =
1511					 (struct osockaddr *)&ifr.ifr_addr;
1512				ifr.ifr_addr = *sa;
1513				osa->sa_family = sa->sa_family;
1514				error = copyout((caddr_t)&ifr, (caddr_t)ifrp,
1515						sizeof (ifr));
1516				ifrp++;
1517			} else
1518#endif
1519			if (sa->sa_len <= sizeof(*sa)) {
1520				ifr.ifr_addr = *sa;
1521				error = copyout((caddr_t)&ifr, (caddr_t)ifrp,
1522						sizeof (ifr));
1523				ifrp++;
1524			} else {
1525				if (space < sizeof (ifr) + sa->sa_len -
1526					    sizeof(*sa))
1527					break;
1528				space -= sa->sa_len - sizeof(*sa);
1529				error = copyout((caddr_t)&ifr, (caddr_t)ifrp,
1530						sizeof (ifr.ifr_name));
1531				if (error == 0)
1532				    error = copyout((caddr_t)sa,
1533				      (caddr_t)&ifrp->ifr_addr, sa->sa_len);
1534				ifrp = (struct ifreq *)
1535					(sa->sa_len + (caddr_t)&ifrp->ifr_addr);
1536			}
1537			if (error)
1538				break;
1539			space -= sizeof (ifr);
1540		}
1541		if (error)
1542			break;
1543		if (!addrs) {
1544			bzero((caddr_t)&ifr.ifr_addr, sizeof(ifr.ifr_addr));
1545			error = copyout((caddr_t)&ifr, (caddr_t)ifrp,
1546			    sizeof (ifr));
1547			if (error)
1548				break;
1549			space -= sizeof (ifr);
1550			ifrp++;
1551		}
1552	}
1553	IFNET_RUNLOCK();
1554	ifc->ifc_len -= space;
1555	return (error);
1556}
1557
1558/*
1559 * Just like if_promisc(), but for all-multicast-reception mode.
1560 */
1561int
1562if_allmulti(struct ifnet *ifp, int onswitch)
1563{
1564	int error = 0;
1565	int s = splimp();
1566	struct ifreq ifr;
1567
1568	if (onswitch) {
1569		if (ifp->if_amcount++ == 0) {
1570			ifp->if_flags |= IFF_ALLMULTI;
1571			ifr.ifr_flags = ifp->if_flags & 0xffff;
1572			ifr.ifr_flagshigh = ifp->if_flags >> 16;
1573			error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
1574		}
1575	} else {
1576		if (ifp->if_amcount > 1) {
1577			ifp->if_amcount--;
1578		} else {
1579			ifp->if_amcount = 0;
1580			ifp->if_flags &= ~IFF_ALLMULTI;
1581			ifr.ifr_flags = ifp->if_flags & 0xffff;;
1582			ifr.ifr_flagshigh = ifp->if_flags >> 16;
1583			error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
1584		}
1585	}
1586	splx(s);
1587
1588	if (error == 0)
1589		rt_ifmsg(ifp);
1590	return error;
1591}
1592
1593/*
1594 * Add a multicast listenership to the interface in question.
1595 * The link layer provides a routine which converts
1596 */
1597int
1598if_addmulti(struct ifnet *ifp, struct sockaddr *sa, struct ifmultiaddr **retifma)
1599{
1600	struct sockaddr *llsa, *dupsa;
1601	int error, s;
1602	struct ifmultiaddr *ifma;
1603
1604	/*
1605	 * If the matching multicast address already exists
1606	 * then don't add a new one, just add a reference
1607	 */
1608	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1609		if (equal(sa, ifma->ifma_addr)) {
1610			ifma->ifma_refcount++;
1611			if (retifma)
1612				*retifma = ifma;
1613			return 0;
1614		}
1615	}
1616
1617	/*
1618	 * Give the link layer a chance to accept/reject it, and also
1619	 * find out which AF_LINK address this maps to, if it isn't one
1620	 * already.
1621	 */
1622	if (ifp->if_resolvemulti) {
1623		error = ifp->if_resolvemulti(ifp, &llsa, sa);
1624		if (error) return error;
1625	} else {
1626		llsa = 0;
1627	}
1628
1629	MALLOC(ifma, struct ifmultiaddr *, sizeof *ifma, M_IFMADDR, M_WAITOK);
1630	MALLOC(dupsa, struct sockaddr *, sa->sa_len, M_IFMADDR, M_WAITOK);
1631	bcopy(sa, dupsa, sa->sa_len);
1632
1633	ifma->ifma_addr = dupsa;
1634	ifma->ifma_lladdr = llsa;
1635	ifma->ifma_ifp = ifp;
1636	ifma->ifma_refcount = 1;
1637	ifma->ifma_protospec = 0;
1638	rt_newmaddrmsg(RTM_NEWMADDR, ifma);
1639
1640	/*
1641	 * Some network interfaces can scan the address list at
1642	 * interrupt time; lock them out.
1643	 */
1644	s = splimp();
1645	TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link);
1646	splx(s);
1647	if (retifma != NULL)
1648		*retifma = ifma;
1649
1650	if (llsa != 0) {
1651		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1652			if (equal(ifma->ifma_addr, llsa))
1653				break;
1654		}
1655		if (ifma) {
1656			ifma->ifma_refcount++;
1657		} else {
1658			MALLOC(ifma, struct ifmultiaddr *, sizeof *ifma,
1659			       M_IFMADDR, M_WAITOK);
1660			MALLOC(dupsa, struct sockaddr *, llsa->sa_len,
1661			       M_IFMADDR, M_WAITOK);
1662			bcopy(llsa, dupsa, llsa->sa_len);
1663			ifma->ifma_addr = dupsa;
1664			ifma->ifma_lladdr = NULL;
1665			ifma->ifma_ifp = ifp;
1666			ifma->ifma_refcount = 1;
1667			ifma->ifma_protospec = 0;
1668			s = splimp();
1669			TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link);
1670			splx(s);
1671		}
1672	}
1673	/*
1674	 * We are certain we have added something, so call down to the
1675	 * interface to let them know about it.
1676	 */
1677	s = splimp();
1678	ifp->if_ioctl(ifp, SIOCADDMULTI, 0);
1679	splx(s);
1680
1681	return 0;
1682}
1683
1684/*
1685 * Remove a reference to a multicast address on this interface.  Yell
1686 * if the request does not match an existing membership.
1687 */
1688int
1689if_delmulti(struct ifnet *ifp, struct sockaddr *sa)
1690{
1691	struct ifmultiaddr *ifma;
1692	int s;
1693
1694	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link)
1695		if (equal(sa, ifma->ifma_addr))
1696			break;
1697	if (ifma == 0)
1698		return ENOENT;
1699
1700	if (ifma->ifma_refcount > 1) {
1701		ifma->ifma_refcount--;
1702		return 0;
1703	}
1704
1705	rt_newmaddrmsg(RTM_DELMADDR, ifma);
1706	sa = ifma->ifma_lladdr;
1707	s = splimp();
1708	TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link);
1709	/*
1710	 * Make sure the interface driver is notified
1711	 * in the case of a link layer mcast group being left.
1712	 */
1713	if (ifma->ifma_addr->sa_family == AF_LINK && sa == 0)
1714		ifp->if_ioctl(ifp, SIOCDELMULTI, 0);
1715	splx(s);
1716	free(ifma->ifma_addr, M_IFMADDR);
1717	free(ifma, M_IFMADDR);
1718	if (sa == 0)
1719		return 0;
1720
1721	/*
1722	 * Now look for the link-layer address which corresponds to
1723	 * this network address.  It had been squirreled away in
1724	 * ifma->ifma_lladdr for this purpose (so we don't have
1725	 * to call ifp->if_resolvemulti() again), and we saved that
1726	 * value in sa above.  If some nasty deleted the
1727	 * link-layer address out from underneath us, we can deal because
1728	 * the address we stored was is not the same as the one which was
1729	 * in the record for the link-layer address.  (So we don't complain
1730	 * in that case.)
1731	 */
1732	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link)
1733		if (equal(sa, ifma->ifma_addr))
1734			break;
1735	if (ifma == 0)
1736		return 0;
1737
1738	if (ifma->ifma_refcount > 1) {
1739		ifma->ifma_refcount--;
1740		return 0;
1741	}
1742
1743	s = splimp();
1744	TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link);
1745	ifp->if_ioctl(ifp, SIOCDELMULTI, 0);
1746	splx(s);
1747	free(ifma->ifma_addr, M_IFMADDR);
1748	free(sa, M_IFMADDR);
1749	free(ifma, M_IFMADDR);
1750
1751	return 0;
1752}
1753
1754/*
1755 * Set the link layer address on an interface.
1756 *
1757 * At this time we only support certain types of interfaces,
1758 * and we don't allow the length of the address to change.
1759 */
1760int
1761if_setlladdr(struct ifnet *ifp, const u_char *lladdr, int len)
1762{
1763	struct sockaddr_dl *sdl;
1764	struct ifaddr *ifa;
1765	struct ifreq ifr;
1766
1767	ifa = ifaddr_byindex(ifp->if_index);
1768	if (ifa == NULL)
1769		return (EINVAL);
1770	sdl = (struct sockaddr_dl *)ifa->ifa_addr;
1771	if (sdl == NULL)
1772		return (EINVAL);
1773	if (len != sdl->sdl_alen)	/* don't allow length to change */
1774		return (EINVAL);
1775	switch (ifp->if_type) {
1776	case IFT_ETHER:			/* these types use struct arpcom */
1777	case IFT_FDDI:
1778	case IFT_XETHER:
1779	case IFT_ISO88025:
1780	case IFT_L2VLAN:
1781		bcopy(lladdr, IFP2AC(ifp)->ac_enaddr, len);
1782		/*
1783		 * XXX We also need to store the lladdr in LLADDR(sdl),
1784		 * which is done below. This is a pain because we must
1785		 * remember to keep the info in sync.
1786		 */
1787		/* FALLTHROUGH */
1788	case IFT_ARCNET:
1789		bcopy(lladdr, LLADDR(sdl), len);
1790		break;
1791	default:
1792		return (ENODEV);
1793	}
1794	/*
1795	 * If the interface is already up, we need
1796	 * to re-init it in order to reprogram its
1797	 * address filter.
1798	 */
1799	if ((ifp->if_flags & IFF_UP) != 0) {
1800		ifp->if_flags &= ~IFF_UP;
1801		ifr.ifr_flags = ifp->if_flags & 0xffff;
1802		ifr.ifr_flagshigh = ifp->if_flags >> 16;
1803		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
1804		ifp->if_flags |= IFF_UP;
1805		ifr.ifr_flags = ifp->if_flags & 0xffff;
1806		ifr.ifr_flagshigh = ifp->if_flags >> 16;
1807		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
1808#ifdef INET
1809		/*
1810		 * Also send gratuitous ARPs to notify other nodes about
1811		 * the address change.
1812		 */
1813		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1814			if (ifa->ifa_addr != NULL &&
1815			    ifa->ifa_addr->sa_family == AF_INET)
1816				arp_ifinit(ifp, ifa);
1817		}
1818#endif
1819	}
1820	return (0);
1821}
1822
1823struct ifmultiaddr *
1824ifmaof_ifpforaddr(struct sockaddr *sa, struct ifnet *ifp)
1825{
1826	struct ifmultiaddr *ifma;
1827
1828	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link)
1829		if (equal(ifma->ifma_addr, sa))
1830			break;
1831
1832	return ifma;
1833}
1834
1835/*
1836 * The name argument must be a pointer to storage which will last as
1837 * long as the interface does.  For physical devices, the result of
1838 * device_get_name(dev) is a good choice and for pseudo-devices a
1839 * static string works well.
1840 */
1841void
1842if_initname(struct ifnet *ifp, const char *name, int unit)
1843{
1844	ifp->if_dname = name;
1845	ifp->if_dunit = unit;
1846	if (unit != IF_DUNIT_NONE)
1847		snprintf(ifp->if_xname, IFNAMSIZ, "%s%d", name, unit);
1848	else
1849		strlcpy(ifp->if_xname, name, IFNAMSIZ);
1850}
1851
1852int
1853if_printf(struct ifnet *ifp, const char * fmt, ...)
1854{
1855	va_list ap;
1856	int retval;
1857
1858	retval = printf("%s: ", ifp->if_xname);
1859	va_start(ap, fmt);
1860	retval += vprintf(fmt, ap);
1861	va_end(ap);
1862	return (retval);
1863}
1864
1865/*
1866 * When an interface is marked IFF_NEEDSGIANT, its if_start() routine cannot
1867 * be called without Giant.  However, we often can't acquire the Giant lock
1868 * at those points; instead, we run it via a task queue that holds Giant via
1869 * if_start_deferred.
1870 *
1871 * XXXRW: We need to make sure that the ifnet isn't fully detached until any
1872 * outstanding if_start_deferred() tasks that will run after the free.  This
1873 * probably means waiting in if_detach().
1874 */
1875void
1876if_start(struct ifnet *ifp)
1877{
1878
1879	NET_ASSERT_GIANT();
1880
1881        if ((ifp->if_flags & IFF_NEEDSGIANT) != 0 && debug_mpsafenet != 0) {
1882                if (mtx_owned(&Giant))
1883                        (*(ifp)->if_start)(ifp);
1884                else
1885			taskqueue_enqueue(taskqueue_swi_giant,
1886			    &ifp->if_starttask);
1887        } else
1888                (*(ifp)->if_start)(ifp);
1889}
1890
1891static void
1892if_start_deferred(void *context, int pending)
1893{
1894	struct ifnet *ifp;
1895
1896	/*
1897	 * This code must be entered with Giant, and should never run if
1898	 * we're not running with debug.mpsafenet.
1899	 */
1900	KASSERT(debug_mpsafenet != 0, ("if_start_deferred: debug.mpsafenet"));
1901	GIANT_REQUIRED;
1902
1903	ifp = (struct ifnet *)context;
1904	(ifp->if_start)(ifp);
1905}
1906
1907SYSCTL_NODE(_net, PF_LINK, link, CTLFLAG_RW, 0, "Link layers");
1908SYSCTL_NODE(_net_link, 0, generic, CTLFLAG_RW, 0, "Generic link-management");
1909