if.c revision 126900
1/*
2 * Copyright (c) 1980, 1986, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by the University of
16 *	California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 *	@(#)if.c	8.5 (Berkeley) 1/9/95
34 * $FreeBSD: head/sys/net/if.c 126900 2004-03-13 02:31:40Z brooks $
35 */
36
37#include "opt_compat.h"
38#include "opt_inet6.h"
39#include "opt_inet.h"
40#include "opt_mac.h"
41
42#include <sys/param.h>
43#include <sys/conf.h>
44#include <sys/mac.h>
45#include <sys/malloc.h>
46#include <sys/bus.h>
47#include <sys/mbuf.h>
48#include <sys/systm.h>
49#include <sys/proc.h>
50#include <sys/socket.h>
51#include <sys/socketvar.h>
52#include <sys/protosw.h>
53#include <sys/kernel.h>
54#include <sys/sockio.h>
55#include <sys/syslog.h>
56#include <sys/sysctl.h>
57#include <sys/domain.h>
58#include <sys/jail.h>
59#include <machine/stdarg.h>
60
61#include <net/if.h>
62#include <net/if_arp.h>
63#include <net/if_dl.h>
64#include <net/if_types.h>
65#include <net/if_var.h>
66#include <net/radix.h>
67#include <net/route.h>
68
69#if defined(INET) || defined(INET6)
70/*XXX*/
71#include <netinet/in.h>
72#include <netinet/in_var.h>
73#ifdef INET6
74#include <netinet6/in6_var.h>
75#include <netinet6/in6_ifattach.h>
76#endif
77#endif
78#ifdef INET
79#include <netinet/if_ether.h>
80#endif
81
82static void	if_attachdomain(void *);
83static void	if_attachdomain1(struct ifnet *);
84static int	ifconf(u_long, caddr_t);
85static void	if_grow(void);
86static void	if_init(void *);
87static void	if_check(void *);
88static int	if_findindex(struct ifnet *);
89static void	if_qflush(struct ifqueue *);
90static void	if_slowtimo(void *);
91static void	link_rtrequest(int, struct rtentry *, struct rt_addrinfo *);
92static int	if_rtdel(struct radix_node *, void *);
93static struct	if_clone *if_clone_lookup(const char *, int *);
94static int	if_clone_list(struct if_clonereq *);
95static int	ifhwioctl(u_long, struct ifnet *, caddr_t, struct thread *);
96#ifdef INET6
97/*
98 * XXX: declare here to avoid to include many inet6 related files..
99 * should be more generalized?
100 */
101extern void	nd6_setmtu(struct ifnet *);
102#endif
103
104int	if_index = 0;
105struct	ifindex_entry *ifindex_table = NULL;
106int	ifqmaxlen = IFQ_MAXLEN;
107struct	ifnethead ifnet;	/* depend on static init XXX */
108struct	mtx ifnet_lock;
109static int	if_cloners_count;
110LIST_HEAD(, if_clone) if_cloners = LIST_HEAD_INITIALIZER(if_cloners);
111
112static int	if_indexlim = 8;
113static struct	klist ifklist;
114
115static void	filt_netdetach(struct knote *kn);
116static int	filt_netdev(struct knote *kn, long hint);
117
118static struct filterops netdev_filtops =
119    { 1, NULL, filt_netdetach, filt_netdev };
120
121/*
122 * System initialization
123 */
124SYSINIT(interfaces, SI_SUB_INIT_IF, SI_ORDER_FIRST, if_init, NULL)
125SYSINIT(interface_check, SI_SUB_PROTO_IF, SI_ORDER_FIRST, if_check, NULL)
126
127MALLOC_DEFINE(M_IFADDR, "ifaddr", "interface address");
128MALLOC_DEFINE(M_IFMADDR, "ether_multi", "link-level multicast address");
129MALLOC_DEFINE(M_CLONE, "clone", "interface cloning framework");
130
131static d_open_t		netopen;
132static d_close_t	netclose;
133static d_ioctl_t	netioctl;
134static d_kqfilter_t	netkqfilter;
135
136static struct cdevsw net_cdevsw = {
137	.d_version =	D_VERSION,
138	.d_flags =	D_NEEDGIANT,
139	.d_open =	netopen,
140	.d_close =	netclose,
141	.d_ioctl =	netioctl,
142	.d_name =	"net",
143	.d_kqfilter =	netkqfilter,
144};
145
146static int
147netopen(dev_t dev, int flag, int mode, struct thread *td)
148{
149	return (0);
150}
151
152static int
153netclose(dev_t dev, int flags, int fmt, struct thread *td)
154{
155	return (0);
156}
157
158static int
159netioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct thread *td)
160{
161	struct ifnet *ifp;
162	int error, idx;
163
164	/* only support interface specific ioctls */
165	if (IOCGROUP(cmd) != 'i')
166		return (EOPNOTSUPP);
167	idx = minor(dev);
168	if (idx == 0) {
169		/*
170		 * special network device, not interface.
171		 */
172		if (cmd == SIOCGIFCONF)
173			return (ifconf(cmd, data));	/* XXX remove cmd */
174		return (EOPNOTSUPP);
175	}
176
177	ifp = ifnet_byindex(idx);
178	if (ifp == NULL)
179		return (ENXIO);
180
181	error = ifhwioctl(cmd, ifp, data, td);
182	if (error == ENOIOCTL)
183		error = EOPNOTSUPP;
184	return (error);
185}
186
187static int
188netkqfilter(dev_t dev, struct knote *kn)
189{
190	struct klist *klist;
191	struct ifnet *ifp;
192	int idx;
193
194	idx = minor(dev);
195	if (idx == 0) {
196		klist = &ifklist;
197	} else {
198		ifp = ifnet_byindex(idx);
199		if (ifp == NULL)
200			return (1);
201		klist = &ifp->if_klist;
202	}
203
204	switch (kn->kn_filter) {
205	case EVFILT_NETDEV:
206		kn->kn_fop = &netdev_filtops;
207		break;
208	default:
209		return (1);
210	}
211
212	kn->kn_hook = (caddr_t)klist;
213
214	/* XXX locking? */
215	SLIST_INSERT_HEAD(klist, kn, kn_selnext);
216
217	return (0);
218}
219
220static void
221filt_netdetach(struct knote *kn)
222{
223	struct klist *klist = (struct klist *)kn->kn_hook;
224
225	if (kn->kn_status & KN_DETACHED)
226		return;
227	SLIST_REMOVE(klist, kn, knote, kn_selnext);
228}
229
230static int
231filt_netdev(struct knote *kn, long hint)
232{
233
234	/*
235	 * Currently NOTE_EXIT is abused to indicate device detach.
236	 */
237	if (hint == NOTE_EXIT) {
238		kn->kn_data = NOTE_LINKINV;
239		kn->kn_status |= KN_DETACHED;
240		kn->kn_flags |= (EV_EOF | EV_ONESHOT);
241		return (1);
242	}
243	kn->kn_data = hint;			/* current status */
244	if (kn->kn_sfflags & hint)
245		kn->kn_fflags |= hint;
246	return (kn->kn_fflags != 0);
247}
248
249/*
250 * Network interface utility routines.
251 *
252 * Routines with ifa_ifwith* names take sockaddr *'s as
253 * parameters.
254 */
255/* ARGSUSED*/
256static void
257if_init(void *dummy __unused)
258{
259
260	IFNET_LOCK_INIT();
261	TAILQ_INIT(&ifnet);
262	SLIST_INIT(&ifklist);
263	if_grow();				/* create initial table */
264	ifdev_byindex(0) = make_dev(&net_cdevsw, 0,
265	    UID_ROOT, GID_WHEEL, 0600, "network");
266}
267
268static void
269if_grow(void)
270{
271	u_int n;
272	struct ifindex_entry *e;
273
274	if_indexlim <<= 1;
275	n = if_indexlim * sizeof(*e);
276	e = malloc(n, M_IFADDR, M_WAITOK | M_ZERO);
277	if (ifindex_table != NULL) {
278		memcpy((caddr_t)e, (caddr_t)ifindex_table, n/2);
279		free((caddr_t)ifindex_table, M_IFADDR);
280	}
281	ifindex_table = e;
282}
283
284/* ARGSUSED*/
285static void
286if_check(void *dummy __unused)
287{
288	struct ifnet *ifp;
289	int s;
290
291	s = splimp();
292	IFNET_RLOCK();	/* could sleep on rare error; mostly okay XXX */
293	TAILQ_FOREACH(ifp, &ifnet, if_link) {
294		if (ifp->if_snd.ifq_maxlen == 0) {
295			if_printf(ifp, "XXX: driver didn't set ifq_maxlen\n");
296			ifp->if_snd.ifq_maxlen = ifqmaxlen;
297		}
298		if (!mtx_initialized(&ifp->if_snd.ifq_mtx)) {
299			if_printf(ifp,
300			    "XXX: driver didn't initialize queue mtx\n");
301			mtx_init(&ifp->if_snd.ifq_mtx, "unknown",
302			    MTX_NETWORK_LOCK, MTX_DEF);
303		}
304	}
305	IFNET_RUNLOCK();
306	splx(s);
307	if_slowtimo(0);
308}
309
310static int
311if_findindex(struct ifnet *ifp)
312{
313	int i, unit;
314	char eaddr[18], devname[32];
315	const char *name, *p;
316
317	switch (ifp->if_type) {
318	case IFT_ETHER:			/* these types use struct arpcom */
319	case IFT_FDDI:
320	case IFT_XETHER:
321	case IFT_ISO88025:
322	case IFT_L2VLAN:
323		snprintf(eaddr, 18, "%6D",
324		    ((struct arpcom *)ifp->if_softc)->ac_enaddr, ":");
325		break;
326	default:
327		eaddr[0] = '\0';
328		break;
329	}
330	strlcpy(devname, ifp->if_xname, sizeof(devname));
331	name = net_cdevsw.d_name;
332	i = 0;
333	while ((resource_find_dev(&i, name, &unit, NULL, NULL)) == 0) {
334		if (resource_string_value(name, unit, "ether", &p) == 0)
335			if (strcmp(p, eaddr) == 0)
336				goto found;
337		if (resource_string_value(name, unit, "dev", &p) == 0)
338			if (strcmp(p, devname) == 0)
339				goto found;
340	}
341	unit = 0;
342found:
343	if (unit != 0) {
344		if (ifaddr_byindex(unit) == NULL)
345			return (unit);
346		printf("%s%d in use, cannot hardwire it to %s.\n",
347		    name, unit, devname);
348	}
349	for (unit = 1; ; unit++) {
350		if (unit <= if_index && ifaddr_byindex(unit) != NULL)
351			continue;
352		if (resource_string_value(name, unit, "ether", &p) == 0 ||
353		    resource_string_value(name, unit, "dev", &p) == 0)
354			continue;
355		break;
356	}
357	return (unit);
358}
359
360/*
361 * Attach an interface to the
362 * list of "active" interfaces.
363 */
364void
365if_attach(struct ifnet *ifp)
366{
367	unsigned socksize, ifasize;
368	int namelen, masklen;
369	struct sockaddr_dl *sdl;
370	struct ifaddr *ifa;
371
372	IF_AFDATA_LOCK_INIT(ifp);
373	ifp->if_afdata_initialized = 0;
374	IFNET_WLOCK();
375	TAILQ_INSERT_TAIL(&ifnet, ifp, if_link);
376	IFNET_WUNLOCK();
377	/*
378	 * XXX -
379	 * The old code would work if the interface passed a pre-existing
380	 * chain of ifaddrs to this code.  We don't trust our callers to
381	 * properly initialize the tailq, however, so we no longer allow
382	 * this unlikely case.
383	 */
384	TAILQ_INIT(&ifp->if_addrhead);
385	TAILQ_INIT(&ifp->if_prefixhead);
386	TAILQ_INIT(&ifp->if_multiaddrs);
387	SLIST_INIT(&ifp->if_klist);
388	getmicrotime(&ifp->if_lastchange);
389
390#ifdef MAC
391	mac_init_ifnet(ifp);
392	mac_create_ifnet(ifp);
393#endif
394
395	ifp->if_index = if_findindex(ifp);
396	if (ifp->if_index > if_index)
397		if_index = ifp->if_index;
398	if (if_index >= if_indexlim)
399		if_grow();
400
401	ifnet_byindex(ifp->if_index) = ifp;
402	ifdev_byindex(ifp->if_index) = make_dev(&net_cdevsw,
403	    unit2minor(ifp->if_index),
404	    UID_ROOT, GID_WHEEL, 0600, "%s/%s",
405	    net_cdevsw.d_name, ifp->if_xname);
406	make_dev_alias(ifdev_byindex(ifp->if_index), "%s%d",
407	    net_cdevsw.d_name, ifp->if_index);
408
409	mtx_init(&ifp->if_snd.ifq_mtx, ifp->if_xname, "if send queue", MTX_DEF);
410
411	/*
412	 * create a Link Level name for this device
413	 */
414	namelen = strlen(ifp->if_xname);
415	/*
416	 * Always save enough space for any possiable name so we can do
417	 * a rename in place later.
418	 */
419	masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + IFNAMSIZ;
420	socksize = masklen + ifp->if_addrlen;
421	if (socksize < sizeof(*sdl))
422		socksize = sizeof(*sdl);
423	socksize = roundup2(socksize, sizeof(long));
424	ifasize = sizeof(*ifa) + 2 * socksize;
425	ifa = malloc(ifasize, M_IFADDR, M_WAITOK | M_ZERO);
426	IFA_LOCK_INIT(ifa);
427	sdl = (struct sockaddr_dl *)(ifa + 1);
428	sdl->sdl_len = socksize;
429	sdl->sdl_family = AF_LINK;
430	bcopy(ifp->if_xname, sdl->sdl_data, namelen);
431	sdl->sdl_nlen = namelen;
432	sdl->sdl_index = ifp->if_index;
433	sdl->sdl_type = ifp->if_type;
434	ifaddr_byindex(ifp->if_index) = ifa;
435	ifa->ifa_ifp = ifp;
436	ifa->ifa_rtrequest = link_rtrequest;
437	ifa->ifa_addr = (struct sockaddr *)sdl;
438	sdl = (struct sockaddr_dl *)(socksize + (caddr_t)sdl);
439	ifa->ifa_netmask = (struct sockaddr *)sdl;
440	sdl->sdl_len = masklen;
441	while (namelen != 0)
442		sdl->sdl_data[--namelen] = 0xff;
443	ifa->ifa_refcnt = 1;
444	TAILQ_INSERT_HEAD(&ifp->if_addrhead, ifa, ifa_link);
445	ifp->if_broadcastaddr = 0; /* reliably crash if used uninitialized */
446
447	if (domains)
448		if_attachdomain1(ifp);
449
450	EVENTHANDLER_INVOKE(ifnet_arrival_event, ifp);
451
452	/* Announce the interface. */
453	rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
454}
455
456static void
457if_attachdomain(void *dummy)
458{
459	struct ifnet *ifp;
460	int s;
461
462	s = splnet();
463	for (ifp = TAILQ_FIRST(&ifnet); ifp; ifp = TAILQ_NEXT(ifp, if_list))
464		if_attachdomain1(ifp);
465	splx(s);
466}
467SYSINIT(domainifattach, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_FIRST,
468    if_attachdomain, NULL);
469
470static void
471if_attachdomain1(struct ifnet *ifp)
472{
473	struct domain *dp;
474	int s;
475
476	s = splnet();
477
478	/*
479	 * Since dp->dom_ifattach calls malloc() with M_WAITOK, we
480	 * cannot lock ifp->if_afdata initialization, entirely.
481	 */
482	if (IF_AFDATA_TRYLOCK(ifp) == 0) {
483		splx(s);
484		return;
485	}
486	if (ifp->if_afdata_initialized) {
487		IF_AFDATA_UNLOCK(ifp);
488		splx(s);
489		return;
490	}
491	ifp->if_afdata_initialized = 1;
492	IF_AFDATA_UNLOCK(ifp);
493
494	/* address family dependent data region */
495	bzero(ifp->if_afdata, sizeof(ifp->if_afdata));
496	for (dp = domains; dp; dp = dp->dom_next) {
497		if (dp->dom_ifattach)
498			ifp->if_afdata[dp->dom_family] =
499			    (*dp->dom_ifattach)(ifp);
500	}
501
502	splx(s);
503}
504
505/*
506 * Detach an interface, removing it from the
507 * list of "active" interfaces.
508 */
509void
510if_detach(struct ifnet *ifp)
511{
512	struct ifaddr *ifa, *next;
513	struct radix_node_head	*rnh;
514	int s;
515	int i;
516	struct domain *dp;
517
518	EVENTHANDLER_INVOKE(ifnet_departure_event, ifp);
519	/*
520	 * Remove routes and flush queues.
521	 */
522	s = splnet();
523	if_down(ifp);
524
525	/*
526	 * Remove address from ifindex_table[] and maybe decrement if_index.
527	 * Clean up all addresses.
528	 */
529	ifaddr_byindex(ifp->if_index) = NULL;
530	destroy_dev(ifdev_byindex(ifp->if_index));
531	ifdev_byindex(ifp->if_index) = NULL;
532
533	while (if_index > 0 && ifaddr_byindex(if_index) == NULL)
534		if_index--;
535
536	for (ifa = TAILQ_FIRST(&ifp->if_addrhead); ifa; ifa = next) {
537		next = TAILQ_NEXT(ifa, ifa_link);
538
539		if (ifa->ifa_addr->sa_family == AF_LINK)
540			continue;
541#ifdef INET
542		/* XXX: Ugly!! ad hoc just for INET */
543		if (ifa->ifa_addr && ifa->ifa_addr->sa_family == AF_INET) {
544			struct ifaliasreq ifr;
545
546			bzero(&ifr, sizeof(ifr));
547			ifr.ifra_addr = *ifa->ifa_addr;
548			if (ifa->ifa_dstaddr)
549				ifr.ifra_broadaddr = *ifa->ifa_dstaddr;
550			if (in_control(NULL, SIOCDIFADDR, (caddr_t)&ifr, ifp,
551			    NULL) == 0)
552				continue;
553		}
554#endif /* INET */
555#ifdef INET6
556		if (ifa->ifa_addr && ifa->ifa_addr->sa_family == AF_INET6) {
557			in6_purgeaddr(ifa);
558			/* ifp_addrhead is already updated */
559			continue;
560		}
561#endif /* INET6 */
562		TAILQ_REMOVE(&ifp->if_addrhead, ifa, ifa_link);
563		IFAFREE(ifa);
564	}
565
566#ifdef INET6
567	/*
568	 * Remove all IPv6 kernel structs related to ifp.  This should be done
569	 * before removing routing entries below, since IPv6 interface direct
570	 * routes are expected to be removed by the IPv6-specific kernel API.
571	 * Otherwise, the kernel will detect some inconsistency and bark it.
572	 */
573	in6_ifdetach(ifp);
574#endif
575
576	/* We can now free link ifaddr. */
577	ifa = TAILQ_FIRST(&ifp->if_addrhead);
578	TAILQ_REMOVE(&ifp->if_addrhead, ifa, ifa_link);
579	IFAFREE(ifa);
580
581	/*
582	 * Delete all remaining routes using this interface
583	 * Unfortuneatly the only way to do this is to slog through
584	 * the entire routing table looking for routes which point
585	 * to this interface...oh well...
586	 */
587	for (i = 1; i <= AF_MAX; i++) {
588		if ((rnh = rt_tables[i]) == NULL)
589			continue;
590		RADIX_NODE_HEAD_LOCK(rnh);
591		(void) rnh->rnh_walktree(rnh, if_rtdel, ifp);
592		RADIX_NODE_HEAD_UNLOCK(rnh);
593	}
594
595	/* Announce that the interface is gone. */
596	rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
597
598	IF_AFDATA_LOCK(ifp);
599	for (dp = domains; dp; dp = dp->dom_next) {
600		if (dp->dom_ifdetach && ifp->if_afdata[dp->dom_family])
601			(*dp->dom_ifdetach)(ifp,
602			    ifp->if_afdata[dp->dom_family]);
603	}
604	IF_AFDATA_UNLOCK(ifp);
605
606#ifdef MAC
607	mac_destroy_ifnet(ifp);
608#endif /* MAC */
609	KNOTE(&ifp->if_klist, NOTE_EXIT);
610	IFNET_WLOCK();
611	TAILQ_REMOVE(&ifnet, ifp, if_link);
612	IFNET_WUNLOCK();
613	mtx_destroy(&ifp->if_snd.ifq_mtx);
614	IF_AFDATA_DESTROY(ifp);
615	splx(s);
616}
617
618/*
619 * Delete Routes for a Network Interface
620 *
621 * Called for each routing entry via the rnh->rnh_walktree() call above
622 * to delete all route entries referencing a detaching network interface.
623 *
624 * Arguments:
625 *	rn	pointer to node in the routing table
626 *	arg	argument passed to rnh->rnh_walktree() - detaching interface
627 *
628 * Returns:
629 *	0	successful
630 *	errno	failed - reason indicated
631 *
632 */
633static int
634if_rtdel(struct radix_node *rn, void *arg)
635{
636	struct rtentry	*rt = (struct rtentry *)rn;
637	struct ifnet	*ifp = arg;
638	int		err;
639
640	if (rt->rt_ifp == ifp) {
641
642		/*
643		 * Protect (sorta) against walktree recursion problems
644		 * with cloned routes
645		 */
646		if ((rt->rt_flags & RTF_UP) == 0)
647			return (0);
648
649		err = rtrequest(RTM_DELETE, rt_key(rt), rt->rt_gateway,
650				rt_mask(rt), rt->rt_flags,
651				(struct rtentry **) NULL);
652		if (err) {
653			log(LOG_WARNING, "if_rtdel: error %d\n", err);
654		}
655	}
656
657	return (0);
658}
659
660/*
661 * Create a clone network interface.
662 */
663int
664if_clone_create(char *name, int len)
665{
666	struct if_clone *ifc;
667	char *dp;
668	int wildcard, bytoff, bitoff;
669	int unit;
670	int err;
671
672	ifc = if_clone_lookup(name, &unit);
673	if (ifc == NULL)
674		return (EINVAL);
675
676	if (ifunit(name) != NULL)
677		return (EEXIST);
678
679	bytoff = bitoff = 0;
680	wildcard = (unit < 0);
681	/*
682	 * Find a free unit if none was given.
683	 */
684	if (wildcard) {
685		while ((bytoff < ifc->ifc_bmlen)
686		    && (ifc->ifc_units[bytoff] == 0xff))
687			bytoff++;
688		if (bytoff >= ifc->ifc_bmlen)
689			return (ENOSPC);
690		while ((ifc->ifc_units[bytoff] & (1 << bitoff)) != 0)
691			bitoff++;
692		unit = (bytoff << 3) + bitoff;
693	}
694
695	if (unit > ifc->ifc_maxunit)
696		return (ENXIO);
697
698	err = (*ifc->ifc_create)(ifc, unit);
699	if (err != 0)
700		return (err);
701
702	if (!wildcard) {
703		bytoff = unit >> 3;
704		bitoff = unit - (bytoff << 3);
705	}
706
707	/*
708	 * Allocate the unit in the bitmap.
709	 */
710	KASSERT((ifc->ifc_units[bytoff] & (1 << bitoff)) == 0,
711	    ("%s: bit is already set", __func__));
712	ifc->ifc_units[bytoff] |= (1 << bitoff);
713
714	/* In the wildcard case, we need to update the name. */
715	if (wildcard) {
716		for (dp = name; *dp != '\0'; dp++);
717		if (snprintf(dp, len - (dp-name), "%d", unit) >
718		    len - (dp-name) - 1) {
719			/*
720			 * This can only be a programmer error and
721			 * there's no straightforward way to recover if
722			 * it happens.
723			 */
724			panic("if_clone_create(): interface name too long");
725		}
726
727	}
728
729	return (0);
730}
731
732/*
733 * Destroy a clone network interface.
734 */
735int
736if_clone_destroy(const char *name)
737{
738	struct if_clone *ifc;
739	struct ifnet *ifp;
740	int bytoff, bitoff;
741	int unit;
742
743	ifp = ifunit(name);
744	if (ifp == NULL)
745		return (ENXIO);
746
747	unit = ifp->if_dunit;
748
749	ifc = if_clone_lookup(ifp->if_dname, NULL);
750	if (ifc == NULL)
751		return (EINVAL);
752
753	if (ifc->ifc_destroy == NULL)
754		return (EOPNOTSUPP);
755
756	(*ifc->ifc_destroy)(ifp);
757
758	/*
759	 * Compute offset in the bitmap and deallocate the unit.
760	 */
761	bytoff = unit >> 3;
762	bitoff = unit - (bytoff << 3);
763	KASSERT((ifc->ifc_units[bytoff] & (1 << bitoff)) != 0,
764	    ("%s: bit is already cleared", __func__));
765	ifc->ifc_units[bytoff] &= ~(1 << bitoff);
766	return (0);
767}
768
769/*
770 * Look up a network interface cloner.
771 */
772static struct if_clone *
773if_clone_lookup(const char *name, int *unitp)
774{
775	struct if_clone *ifc;
776	const char *cp;
777	int i;
778
779	for (ifc = LIST_FIRST(&if_cloners); ifc != NULL;) {
780		for (cp = name, i = 0; i < ifc->ifc_namelen; i++, cp++) {
781			if (ifc->ifc_name[i] != *cp)
782				goto next_ifc;
783		}
784		goto found_name;
785 next_ifc:
786		ifc = LIST_NEXT(ifc, ifc_list);
787	}
788
789	/* No match. */
790	return ((struct if_clone *)NULL);
791
792 found_name:
793	if (*cp == '\0') {
794		i = -1;
795	} else {
796		for (i = 0; *cp != '\0'; cp++) {
797			if (*cp < '0' || *cp > '9') {
798				/* Bogus unit number. */
799				return (NULL);
800			}
801			i = (i * 10) + (*cp - '0');
802		}
803	}
804
805	if (unitp != NULL)
806		*unitp = i;
807	return (ifc);
808}
809
810/*
811 * Register a network interface cloner.
812 */
813void
814if_clone_attach(struct if_clone *ifc)
815{
816	int bytoff, bitoff;
817	int err;
818	int len, maxclone;
819	int unit;
820
821	KASSERT(ifc->ifc_minifs - 1 <= ifc->ifc_maxunit,
822	    ("%s: %s requested more units then allowed (%d > %d)",
823	    __func__, ifc->ifc_name, ifc->ifc_minifs,
824	    ifc->ifc_maxunit + 1));
825	/*
826	 * Compute bitmap size and allocate it.
827	 */
828	maxclone = ifc->ifc_maxunit + 1;
829	len = maxclone >> 3;
830	if ((len << 3) < maxclone)
831		len++;
832	ifc->ifc_units = malloc(len, M_CLONE, M_WAITOK | M_ZERO);
833	ifc->ifc_bmlen = len;
834
835	LIST_INSERT_HEAD(&if_cloners, ifc, ifc_list);
836	if_cloners_count++;
837
838	for (unit = 0; unit < ifc->ifc_minifs; unit++) {
839		err = (*ifc->ifc_create)(ifc, unit);
840		KASSERT(err == 0,
841		    ("%s: failed to create required interface %s%d",
842		    __func__, ifc->ifc_name, unit));
843
844		/* Allocate the unit in the bitmap. */
845		bytoff = unit >> 3;
846		bitoff = unit - (bytoff << 3);
847		ifc->ifc_units[bytoff] |= (1 << bitoff);
848	}
849	EVENTHANDLER_INVOKE(if_clone_event, ifc);
850}
851
852/*
853 * Unregister a network interface cloner.
854 */
855void
856if_clone_detach(struct if_clone *ifc)
857{
858
859	LIST_REMOVE(ifc, ifc_list);
860	free(ifc->ifc_units, M_CLONE);
861	if_cloners_count--;
862}
863
864/*
865 * Provide list of interface cloners to userspace.
866 */
867static int
868if_clone_list(struct if_clonereq *ifcr)
869{
870	char outbuf[IFNAMSIZ], *dst;
871	struct if_clone *ifc;
872	int count, error = 0;
873
874	ifcr->ifcr_total = if_cloners_count;
875	if ((dst = ifcr->ifcr_buffer) == NULL) {
876		/* Just asking how many there are. */
877		return (0);
878	}
879
880	if (ifcr->ifcr_count < 0)
881		return (EINVAL);
882
883	count = (if_cloners_count < ifcr->ifcr_count) ?
884	    if_cloners_count : ifcr->ifcr_count;
885
886	for (ifc = LIST_FIRST(&if_cloners); ifc != NULL && count != 0;
887	     ifc = LIST_NEXT(ifc, ifc_list), count--, dst += IFNAMSIZ) {
888		strlcpy(outbuf, ifc->ifc_name, IFNAMSIZ);
889		error = copyout(outbuf, dst, IFNAMSIZ);
890		if (error)
891			break;
892	}
893
894	return (error);
895}
896
897#define	equal(a1, a2)	(bcmp((a1), (a2), ((a1))->sa_len) == 0)
898
899/*
900 * Locate an interface based on a complete address.
901 */
902/*ARGSUSED*/
903struct ifaddr *
904ifa_ifwithaddr(struct sockaddr *addr)
905{
906	struct ifnet *ifp;
907	struct ifaddr *ifa;
908
909	IFNET_RLOCK();
910	TAILQ_FOREACH(ifp, &ifnet, if_link)
911		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
912			if (ifa->ifa_addr->sa_family != addr->sa_family)
913				continue;
914			if (equal(addr, ifa->ifa_addr))
915				goto done;
916			/* IP6 doesn't have broadcast */
917			if ((ifp->if_flags & IFF_BROADCAST) &&
918			    ifa->ifa_broadaddr &&
919			    ifa->ifa_broadaddr->sa_len != 0 &&
920			    equal(ifa->ifa_broadaddr, addr))
921				goto done;
922		}
923	ifa = NULL;
924done:
925	IFNET_RUNLOCK();
926	return (ifa);
927}
928
929/*
930 * Locate the point to point interface with a given destination address.
931 */
932/*ARGSUSED*/
933struct ifaddr *
934ifa_ifwithdstaddr(struct sockaddr *addr)
935{
936	struct ifnet *ifp;
937	struct ifaddr *ifa;
938
939	IFNET_RLOCK();
940	TAILQ_FOREACH(ifp, &ifnet, if_link) {
941		if ((ifp->if_flags & IFF_POINTOPOINT) == 0)
942			continue;
943		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
944			if (ifa->ifa_addr->sa_family != addr->sa_family)
945				continue;
946			if (ifa->ifa_dstaddr && equal(addr, ifa->ifa_dstaddr))
947				goto done;
948		}
949	}
950	ifa = NULL;
951done:
952	IFNET_RUNLOCK();
953	return (ifa);
954}
955
956/*
957 * Find an interface on a specific network.  If many, choice
958 * is most specific found.
959 */
960struct ifaddr *
961ifa_ifwithnet(struct sockaddr *addr)
962{
963	struct ifnet *ifp;
964	struct ifaddr *ifa;
965	struct ifaddr *ifa_maybe = (struct ifaddr *) 0;
966	u_int af = addr->sa_family;
967	char *addr_data = addr->sa_data, *cplim;
968
969	/*
970	 * AF_LINK addresses can be looked up directly by their index number,
971	 * so do that if we can.
972	 */
973	if (af == AF_LINK) {
974	    struct sockaddr_dl *sdl = (struct sockaddr_dl *)addr;
975	    if (sdl->sdl_index && sdl->sdl_index <= if_index)
976		return (ifaddr_byindex(sdl->sdl_index));
977	}
978
979	/*
980	 * Scan though each interface, looking for ones that have
981	 * addresses in this address family.
982	 */
983	IFNET_RLOCK();
984	TAILQ_FOREACH(ifp, &ifnet, if_link) {
985		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
986			char *cp, *cp2, *cp3;
987
988			if (ifa->ifa_addr->sa_family != af)
989next:				continue;
990			if (af == AF_INET && ifp->if_flags & IFF_POINTOPOINT) {
991				/*
992				 * This is a bit broken as it doesn't
993				 * take into account that the remote end may
994				 * be a single node in the network we are
995				 * looking for.
996				 * The trouble is that we don't know the
997				 * netmask for the remote end.
998				 */
999				if (ifa->ifa_dstaddr != 0
1000				    && equal(addr, ifa->ifa_dstaddr))
1001					goto done;
1002			} else {
1003				/*
1004				 * if we have a special address handler,
1005				 * then use it instead of the generic one.
1006				 */
1007				if (ifa->ifa_claim_addr) {
1008					if ((*ifa->ifa_claim_addr)(ifa, addr))
1009						goto done;
1010					continue;
1011				}
1012
1013				/*
1014				 * Scan all the bits in the ifa's address.
1015				 * If a bit dissagrees with what we are
1016				 * looking for, mask it with the netmask
1017				 * to see if it really matters.
1018				 * (A byte at a time)
1019				 */
1020				if (ifa->ifa_netmask == 0)
1021					continue;
1022				cp = addr_data;
1023				cp2 = ifa->ifa_addr->sa_data;
1024				cp3 = ifa->ifa_netmask->sa_data;
1025				cplim = ifa->ifa_netmask->sa_len
1026					+ (char *)ifa->ifa_netmask;
1027				while (cp3 < cplim)
1028					if ((*cp++ ^ *cp2++) & *cp3++)
1029						goto next; /* next address! */
1030				/*
1031				 * If the netmask of what we just found
1032				 * is more specific than what we had before
1033				 * (if we had one) then remember the new one
1034				 * before continuing to search
1035				 * for an even better one.
1036				 */
1037				if (ifa_maybe == 0 ||
1038				    rn_refines((caddr_t)ifa->ifa_netmask,
1039				    (caddr_t)ifa_maybe->ifa_netmask))
1040					ifa_maybe = ifa;
1041			}
1042		}
1043	}
1044	ifa = ifa_maybe;
1045done:
1046	IFNET_RUNLOCK();
1047	return (ifa);
1048}
1049
1050/*
1051 * Find an interface address specific to an interface best matching
1052 * a given address.
1053 */
1054struct ifaddr *
1055ifaof_ifpforaddr(struct sockaddr *addr, struct ifnet *ifp)
1056{
1057	struct ifaddr *ifa;
1058	char *cp, *cp2, *cp3;
1059	char *cplim;
1060	struct ifaddr *ifa_maybe = 0;
1061	u_int af = addr->sa_family;
1062
1063	if (af >= AF_MAX)
1064		return (0);
1065	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1066		if (ifa->ifa_addr->sa_family != af)
1067			continue;
1068		if (ifa_maybe == 0)
1069			ifa_maybe = ifa;
1070		if (ifa->ifa_netmask == 0) {
1071			if (equal(addr, ifa->ifa_addr) ||
1072			    (ifa->ifa_dstaddr && equal(addr, ifa->ifa_dstaddr)))
1073				goto done;
1074			continue;
1075		}
1076		if (ifp->if_flags & IFF_POINTOPOINT) {
1077			if (equal(addr, ifa->ifa_dstaddr))
1078				goto done;
1079		} else {
1080			cp = addr->sa_data;
1081			cp2 = ifa->ifa_addr->sa_data;
1082			cp3 = ifa->ifa_netmask->sa_data;
1083			cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask;
1084			for (; cp3 < cplim; cp3++)
1085				if ((*cp++ ^ *cp2++) & *cp3)
1086					break;
1087			if (cp3 == cplim)
1088				goto done;
1089		}
1090	}
1091	ifa = ifa_maybe;
1092done:
1093	return (ifa);
1094}
1095
1096#include <net/route.h>
1097
1098/*
1099 * Default action when installing a route with a Link Level gateway.
1100 * Lookup an appropriate real ifa to point to.
1101 * This should be moved to /sys/net/link.c eventually.
1102 */
1103static void
1104link_rtrequest(int cmd, struct rtentry *rt, struct rt_addrinfo *info)
1105{
1106	struct ifaddr *ifa, *oifa;
1107	struct sockaddr *dst;
1108	struct ifnet *ifp;
1109
1110	RT_LOCK_ASSERT(rt);
1111
1112	if (cmd != RTM_ADD || ((ifa = rt->rt_ifa) == 0) ||
1113	    ((ifp = ifa->ifa_ifp) == 0) || ((dst = rt_key(rt)) == 0))
1114		return;
1115	ifa = ifaof_ifpforaddr(dst, ifp);
1116	if (ifa) {
1117		IFAREF(ifa);		/* XXX */
1118		oifa = rt->rt_ifa;
1119		rt->rt_ifa = ifa;
1120		IFAFREE(oifa);
1121		if (ifa->ifa_rtrequest && ifa->ifa_rtrequest != link_rtrequest)
1122			ifa->ifa_rtrequest(cmd, rt, info);
1123	}
1124}
1125
1126/*
1127 * Mark an interface down and notify protocols of
1128 * the transition.
1129 * NOTE: must be called at splnet or eqivalent.
1130 */
1131void
1132if_unroute(struct ifnet *ifp, int flag, int fam)
1133{
1134	struct ifaddr *ifa;
1135
1136	ifp->if_flags &= ~flag;
1137	getmicrotime(&ifp->if_lastchange);
1138	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
1139		if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family))
1140			pfctlinput(PRC_IFDOWN, ifa->ifa_addr);
1141	if_qflush(&ifp->if_snd);
1142	rt_ifmsg(ifp);
1143}
1144
1145/*
1146 * Mark an interface up and notify protocols of
1147 * the transition.
1148 * NOTE: must be called at splnet or eqivalent.
1149 */
1150void
1151if_route(struct ifnet *ifp, int flag, int fam)
1152{
1153	struct ifaddr *ifa;
1154
1155	ifp->if_flags |= flag;
1156	getmicrotime(&ifp->if_lastchange);
1157	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
1158		if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family))
1159			pfctlinput(PRC_IFUP, ifa->ifa_addr);
1160	rt_ifmsg(ifp);
1161#ifdef INET6
1162	in6_if_up(ifp);
1163#endif
1164}
1165
1166/*
1167 * Mark an interface down and notify protocols of
1168 * the transition.
1169 * NOTE: must be called at splnet or eqivalent.
1170 */
1171void
1172if_down(struct ifnet *ifp)
1173{
1174
1175	if_unroute(ifp, IFF_UP, AF_UNSPEC);
1176}
1177
1178/*
1179 * Mark an interface up and notify protocols of
1180 * the transition.
1181 * NOTE: must be called at splnet or eqivalent.
1182 */
1183void
1184if_up(struct ifnet *ifp)
1185{
1186
1187	if_route(ifp, IFF_UP, AF_UNSPEC);
1188}
1189
1190/*
1191 * Flush an interface queue.
1192 */
1193static void
1194if_qflush(struct ifqueue *ifq)
1195{
1196	struct mbuf *m, *n;
1197
1198	n = ifq->ifq_head;
1199	while ((m = n) != 0) {
1200		n = m->m_act;
1201		m_freem(m);
1202	}
1203	ifq->ifq_head = 0;
1204	ifq->ifq_tail = 0;
1205	ifq->ifq_len = 0;
1206}
1207
1208/*
1209 * Handle interface watchdog timer routines.  Called
1210 * from softclock, we decrement timers (if set) and
1211 * call the appropriate interface routine on expiration.
1212 */
1213static void
1214if_slowtimo(void *arg)
1215{
1216	struct ifnet *ifp;
1217	int s = splimp();
1218
1219	IFNET_RLOCK();
1220	TAILQ_FOREACH(ifp, &ifnet, if_link) {
1221		if (ifp->if_timer == 0 || --ifp->if_timer)
1222			continue;
1223		if (ifp->if_watchdog)
1224			(*ifp->if_watchdog)(ifp);
1225	}
1226	IFNET_RUNLOCK();
1227	splx(s);
1228	timeout(if_slowtimo, (void *)0, hz / IFNET_SLOWHZ);
1229}
1230
1231/*
1232 * Map interface name to
1233 * interface structure pointer.
1234 */
1235struct ifnet *
1236ifunit(const char *name)
1237{
1238	struct ifnet *ifp;
1239
1240	IFNET_RLOCK();
1241	TAILQ_FOREACH(ifp, &ifnet, if_link) {
1242		if (strncmp(name, ifp->if_xname, IFNAMSIZ) == 0)
1243			break;
1244	}
1245	IFNET_RUNLOCK();
1246	return (ifp);
1247}
1248
1249/*
1250 * Hardware specific interface ioctls.
1251 */
1252static int
1253ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
1254{
1255	struct ifreq *ifr;
1256	struct ifstat *ifs;
1257	int error = 0;
1258	int new_flags;
1259	size_t namelen, onamelen;
1260	char new_name[IFNAMSIZ];
1261	struct ifaddr *ifa;
1262	struct sockaddr_dl *sdl;
1263
1264	ifr = (struct ifreq *)data;
1265	switch (cmd) {
1266	case SIOCGIFINDEX:
1267		ifr->ifr_index = ifp->if_index;
1268		break;
1269
1270	case SIOCGIFFLAGS:
1271		ifr->ifr_flags = ifp->if_flags & 0xffff;
1272		ifr->ifr_flagshigh = ifp->if_flags >> 16;
1273		break;
1274
1275	case SIOCGIFCAP:
1276		ifr->ifr_reqcap = ifp->if_capabilities;
1277		ifr->ifr_curcap = ifp->if_capenable;
1278		break;
1279
1280#ifdef MAC
1281	case SIOCGIFMAC:
1282		error = mac_ioctl_ifnet_get(td->td_ucred, ifr, ifp);
1283		break;
1284#endif
1285
1286	case SIOCGIFMETRIC:
1287		ifr->ifr_metric = ifp->if_metric;
1288		break;
1289
1290	case SIOCGIFMTU:
1291		ifr->ifr_mtu = ifp->if_mtu;
1292		break;
1293
1294	case SIOCGIFPHYS:
1295		ifr->ifr_phys = ifp->if_physical;
1296		break;
1297
1298	case SIOCSIFFLAGS:
1299		error = suser(td);
1300		if (error)
1301			return (error);
1302		new_flags = (ifr->ifr_flags & 0xffff) |
1303		    (ifr->ifr_flagshigh << 16);
1304		if (ifp->if_flags & IFF_SMART) {
1305			/* Smart drivers twiddle their own routes */
1306		} else if (ifp->if_flags & IFF_UP &&
1307		    (new_flags & IFF_UP) == 0) {
1308			int s = splimp();
1309			if_down(ifp);
1310			splx(s);
1311		} else if (new_flags & IFF_UP &&
1312		    (ifp->if_flags & IFF_UP) == 0) {
1313			int s = splimp();
1314			if_up(ifp);
1315			splx(s);
1316		}
1317		ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) |
1318			(new_flags &~ IFF_CANTCHANGE);
1319		if (new_flags & IFF_PPROMISC) {
1320			/* Permanently promiscuous mode requested */
1321			ifp->if_flags |= IFF_PROMISC;
1322		} else if (ifp->if_pcount == 0) {
1323			ifp->if_flags &= ~IFF_PROMISC;
1324		}
1325		if (ifp->if_ioctl)
1326			(void) (*ifp->if_ioctl)(ifp, cmd, data);
1327		getmicrotime(&ifp->if_lastchange);
1328		break;
1329
1330	case SIOCSIFCAP:
1331		error = suser(td);
1332		if (error)
1333			return (error);
1334		if (ifp->if_ioctl == NULL)
1335			return (EOPNOTSUPP);
1336		if (ifr->ifr_reqcap & ~ifp->if_capabilities)
1337			return (EINVAL);
1338		error = (*ifp->if_ioctl)(ifp, cmd, data);
1339		if (error == 0)
1340			getmicrotime(&ifp->if_lastchange);
1341		break;
1342
1343#ifdef MAC
1344	case SIOCSIFMAC:
1345		error = mac_ioctl_ifnet_set(td->td_ucred, ifr, ifp);
1346		break;
1347#endif
1348
1349	case SIOCSIFNAME:
1350		error = suser(td);
1351		if (error)
1352			return (error);
1353		error = copyinstr(ifr->ifr_data, new_name, IFNAMSIZ, NULL);
1354		if (error)
1355			return (error);
1356		if (ifunit(new_name) != NULL)
1357			return (EEXIST);
1358
1359		EVENTHANDLER_INVOKE(ifnet_departure_event, ifp);
1360		/* Announce the departure of the interface. */
1361		rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
1362
1363		strlcpy(ifp->if_xname, new_name, sizeof(ifp->if_xname));
1364		ifa = TAILQ_FIRST(&ifp->if_addrhead);
1365		IFA_LOCK(ifa);
1366		sdl = (struct sockaddr_dl *)ifa->ifa_addr;
1367		namelen = strlen(new_name);
1368		onamelen = sdl->sdl_nlen;
1369		/*
1370		 * Move the address if needed.  This is safe because we
1371		 * allocate space for a name of length IFNAMSIZ when we
1372		 * create this in if_attach().
1373		 */
1374		if (namelen != onamelen) {
1375			bcopy(sdl->sdl_data + onamelen,
1376			    sdl->sdl_data + namelen, sdl->sdl_alen);
1377		}
1378		bcopy(new_name, sdl->sdl_data, namelen);
1379		sdl->sdl_nlen = namelen;
1380		sdl = (struct sockaddr_dl *)ifa->ifa_netmask;
1381		bzero(sdl->sdl_data, onamelen);
1382		while (namelen != 0)
1383			sdl->sdl_data[--namelen] = 0xff;
1384		IFA_UNLOCK(ifa);
1385
1386		EVENTHANDLER_INVOKE(ifnet_arrival_event, ifp);
1387		/* Announce the return of the interface. */
1388		rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
1389		break;
1390
1391	case SIOCSIFMETRIC:
1392		error = suser(td);
1393		if (error)
1394			return (error);
1395		ifp->if_metric = ifr->ifr_metric;
1396		getmicrotime(&ifp->if_lastchange);
1397		break;
1398
1399	case SIOCSIFPHYS:
1400		error = suser(td);
1401		if (error)
1402			return (error);
1403		if (ifp->if_ioctl == NULL)
1404			return (EOPNOTSUPP);
1405		error = (*ifp->if_ioctl)(ifp, cmd, data);
1406		if (error == 0)
1407			getmicrotime(&ifp->if_lastchange);
1408		break;
1409
1410	case SIOCSIFMTU:
1411	{
1412		u_long oldmtu = ifp->if_mtu;
1413
1414		error = suser(td);
1415		if (error)
1416			return (error);
1417		if (ifr->ifr_mtu < IF_MINMTU || ifr->ifr_mtu > IF_MAXMTU)
1418			return (EINVAL);
1419		if (ifp->if_ioctl == NULL)
1420			return (EOPNOTSUPP);
1421		error = (*ifp->if_ioctl)(ifp, cmd, data);
1422		if (error == 0) {
1423			getmicrotime(&ifp->if_lastchange);
1424			rt_ifmsg(ifp);
1425		}
1426		/*
1427		 * If the link MTU changed, do network layer specific procedure.
1428		 */
1429		if (ifp->if_mtu != oldmtu) {
1430#ifdef INET6
1431			nd6_setmtu(ifp);
1432#endif
1433		}
1434		break;
1435	}
1436
1437	case SIOCADDMULTI:
1438	case SIOCDELMULTI:
1439		error = suser(td);
1440		if (error)
1441			return (error);
1442
1443		/* Don't allow group membership on non-multicast interfaces. */
1444		if ((ifp->if_flags & IFF_MULTICAST) == 0)
1445			return (EOPNOTSUPP);
1446
1447		/* Don't let users screw up protocols' entries. */
1448		if (ifr->ifr_addr.sa_family != AF_LINK)
1449			return (EINVAL);
1450
1451		if (cmd == SIOCADDMULTI) {
1452			struct ifmultiaddr *ifma;
1453			error = if_addmulti(ifp, &ifr->ifr_addr, &ifma);
1454		} else {
1455			error = if_delmulti(ifp, &ifr->ifr_addr);
1456		}
1457		if (error == 0)
1458			getmicrotime(&ifp->if_lastchange);
1459		break;
1460
1461	case SIOCSIFPHYADDR:
1462	case SIOCDIFPHYADDR:
1463#ifdef INET6
1464	case SIOCSIFPHYADDR_IN6:
1465#endif
1466	case SIOCSLIFPHYADDR:
1467	case SIOCSIFMEDIA:
1468	case SIOCSIFGENERIC:
1469		error = suser(td);
1470		if (error)
1471			return (error);
1472		if (ifp->if_ioctl == NULL)
1473			return (EOPNOTSUPP);
1474		error = (*ifp->if_ioctl)(ifp, cmd, data);
1475		if (error == 0)
1476			getmicrotime(&ifp->if_lastchange);
1477		break;
1478
1479	case SIOCGIFSTATUS:
1480		ifs = (struct ifstat *)data;
1481		ifs->ascii[0] = '\0';
1482
1483	case SIOCGIFPSRCADDR:
1484	case SIOCGIFPDSTADDR:
1485	case SIOCGLIFPHYADDR:
1486	case SIOCGIFMEDIA:
1487	case SIOCGIFGENERIC:
1488		if (ifp->if_ioctl == NULL)
1489			return (EOPNOTSUPP);
1490		error = (*ifp->if_ioctl)(ifp, cmd, data);
1491		break;
1492
1493	case SIOCSIFLLADDR:
1494		error = suser(td);
1495		if (error)
1496			return (error);
1497		error = if_setlladdr(ifp,
1498		    ifr->ifr_addr.sa_data, ifr->ifr_addr.sa_len);
1499		break;
1500
1501	default:
1502		error = ENOIOCTL;
1503		break;
1504	}
1505	return (error);
1506}
1507
1508/*
1509 * Interface ioctls.
1510 */
1511int
1512ifioctl(struct socket *so, u_long cmd, caddr_t data, struct thread *td)
1513{
1514	struct ifnet *ifp;
1515	struct ifreq *ifr;
1516	int error;
1517	int oif_flags;
1518
1519	switch (cmd) {
1520	case SIOCGIFCONF:
1521	case OSIOCGIFCONF:
1522		return (ifconf(cmd, data));
1523	}
1524	ifr = (struct ifreq *)data;
1525
1526	switch (cmd) {
1527	case SIOCIFCREATE:
1528	case SIOCIFDESTROY:
1529		if ((error = suser(td)) != 0)
1530			return (error);
1531		return ((cmd == SIOCIFCREATE) ?
1532			if_clone_create(ifr->ifr_name, sizeof(ifr->ifr_name)) :
1533			if_clone_destroy(ifr->ifr_name));
1534
1535	case SIOCIFGCLONERS:
1536		return (if_clone_list((struct if_clonereq *)data));
1537	}
1538
1539	ifp = ifunit(ifr->ifr_name);
1540	if (ifp == 0)
1541		return (ENXIO);
1542
1543	error = ifhwioctl(cmd, ifp, data, td);
1544	if (error != ENOIOCTL)
1545		return (error);
1546
1547	oif_flags = ifp->if_flags;
1548	if (so->so_proto == 0)
1549		return (EOPNOTSUPP);
1550#ifndef COMPAT_43
1551	error = ((*so->so_proto->pr_usrreqs->pru_control)(so, cmd,
1552								 data,
1553								 ifp, td));
1554#else
1555	{
1556		int ocmd = cmd;
1557
1558		switch (cmd) {
1559
1560		case SIOCSIFDSTADDR:
1561		case SIOCSIFADDR:
1562		case SIOCSIFBRDADDR:
1563		case SIOCSIFNETMASK:
1564#if BYTE_ORDER != BIG_ENDIAN
1565			if (ifr->ifr_addr.sa_family == 0 &&
1566			    ifr->ifr_addr.sa_len < 16) {
1567				ifr->ifr_addr.sa_family = ifr->ifr_addr.sa_len;
1568				ifr->ifr_addr.sa_len = 16;
1569			}
1570#else
1571			if (ifr->ifr_addr.sa_len == 0)
1572				ifr->ifr_addr.sa_len = 16;
1573#endif
1574			break;
1575
1576		case OSIOCGIFADDR:
1577			cmd = SIOCGIFADDR;
1578			break;
1579
1580		case OSIOCGIFDSTADDR:
1581			cmd = SIOCGIFDSTADDR;
1582			break;
1583
1584		case OSIOCGIFBRDADDR:
1585			cmd = SIOCGIFBRDADDR;
1586			break;
1587
1588		case OSIOCGIFNETMASK:
1589			cmd = SIOCGIFNETMASK;
1590		}
1591		error =  ((*so->so_proto->pr_usrreqs->pru_control)(so,
1592								   cmd,
1593								   data,
1594								   ifp, td));
1595		switch (ocmd) {
1596
1597		case OSIOCGIFADDR:
1598		case OSIOCGIFDSTADDR:
1599		case OSIOCGIFBRDADDR:
1600		case OSIOCGIFNETMASK:
1601			*(u_short *)&ifr->ifr_addr = ifr->ifr_addr.sa_family;
1602
1603		}
1604	}
1605#endif /* COMPAT_43 */
1606
1607	if ((oif_flags ^ ifp->if_flags) & IFF_UP) {
1608#ifdef INET6
1609		DELAY(100);/* XXX: temporary workaround for fxp issue*/
1610		if (ifp->if_flags & IFF_UP) {
1611			int s = splimp();
1612			in6_if_up(ifp);
1613			splx(s);
1614		}
1615#endif
1616	}
1617	return (error);
1618}
1619
1620/*
1621 * Set/clear promiscuous mode on interface ifp based on the truth value
1622 * of pswitch.  The calls are reference counted so that only the first
1623 * "on" request actually has an effect, as does the final "off" request.
1624 * Results are undefined if the "off" and "on" requests are not matched.
1625 */
1626int
1627ifpromisc(struct ifnet *ifp, int pswitch)
1628{
1629	struct ifreq ifr;
1630	int error;
1631	int oldflags, oldpcount;
1632
1633	oldpcount = ifp->if_pcount;
1634	oldflags = ifp->if_flags;
1635	if (ifp->if_flags & IFF_PPROMISC) {
1636		/* Do nothing if device is in permanently promiscuous mode */
1637		ifp->if_pcount += pswitch ? 1 : -1;
1638		return (0);
1639	}
1640	if (pswitch) {
1641		/*
1642		 * If the device is not configured up, we cannot put it in
1643		 * promiscuous mode.
1644		 */
1645		if ((ifp->if_flags & IFF_UP) == 0)
1646			return (ENETDOWN);
1647		if (ifp->if_pcount++ != 0)
1648			return (0);
1649		ifp->if_flags |= IFF_PROMISC;
1650	} else {
1651		if (--ifp->if_pcount > 0)
1652			return (0);
1653		ifp->if_flags &= ~IFF_PROMISC;
1654	}
1655	ifr.ifr_flags = ifp->if_flags & 0xffff;
1656	ifr.ifr_flagshigh = ifp->if_flags >> 16;
1657	error = (*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
1658	if (error == 0) {
1659		log(LOG_INFO, "%s: promiscuous mode %s\n",
1660		    ifp->if_xname,
1661		    (ifp->if_flags & IFF_PROMISC) ? "enabled" : "disabled");
1662		rt_ifmsg(ifp);
1663	} else {
1664		ifp->if_pcount = oldpcount;
1665		ifp->if_flags = oldflags;
1666	}
1667	return error;
1668}
1669
1670/*
1671 * Return interface configuration
1672 * of system.  List may be used
1673 * in later ioctl's (above) to get
1674 * other information.
1675 */
1676/*ARGSUSED*/
1677static int
1678ifconf(u_long cmd, caddr_t data)
1679{
1680	struct ifconf *ifc = (struct ifconf *)data;
1681	struct ifnet *ifp;
1682	struct ifaddr *ifa;
1683	struct ifreq ifr, *ifrp;
1684	int space = ifc->ifc_len, error = 0;
1685
1686	ifrp = ifc->ifc_req;
1687	IFNET_RLOCK();		/* could sleep XXX */
1688	TAILQ_FOREACH(ifp, &ifnet, if_link) {
1689		int addrs;
1690
1691		if (space < sizeof(ifr))
1692			break;
1693		if (strlcpy(ifr.ifr_name, ifp->if_xname, sizeof(ifr.ifr_name))
1694		    >= sizeof(ifr.ifr_name)) {
1695			error = ENAMETOOLONG;
1696			break;
1697		}
1698
1699		addrs = 0;
1700		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1701			struct sockaddr *sa = ifa->ifa_addr;
1702
1703			if (space < sizeof(ifr))
1704				break;
1705			if (jailed(curthread->td_ucred) &&
1706			    prison_if(curthread->td_ucred, sa))
1707				continue;
1708			addrs++;
1709#ifdef COMPAT_43
1710			if (cmd == OSIOCGIFCONF) {
1711				struct osockaddr *osa =
1712					 (struct osockaddr *)&ifr.ifr_addr;
1713				ifr.ifr_addr = *sa;
1714				osa->sa_family = sa->sa_family;
1715				error = copyout((caddr_t)&ifr, (caddr_t)ifrp,
1716						sizeof (ifr));
1717				ifrp++;
1718			} else
1719#endif
1720			if (sa->sa_len <= sizeof(*sa)) {
1721				ifr.ifr_addr = *sa;
1722				error = copyout((caddr_t)&ifr, (caddr_t)ifrp,
1723						sizeof (ifr));
1724				ifrp++;
1725			} else {
1726				if (space < sizeof (ifr) + sa->sa_len -
1727					    sizeof(*sa))
1728					break;
1729				space -= sa->sa_len - sizeof(*sa);
1730				error = copyout((caddr_t)&ifr, (caddr_t)ifrp,
1731						sizeof (ifr.ifr_name));
1732				if (error == 0)
1733				    error = copyout((caddr_t)sa,
1734				      (caddr_t)&ifrp->ifr_addr, sa->sa_len);
1735				ifrp = (struct ifreq *)
1736					(sa->sa_len + (caddr_t)&ifrp->ifr_addr);
1737			}
1738			if (error)
1739				break;
1740			space -= sizeof (ifr);
1741		}
1742		if (error)
1743			break;
1744		if (!addrs) {
1745			bzero((caddr_t)&ifr.ifr_addr, sizeof(ifr.ifr_addr));
1746			error = copyout((caddr_t)&ifr, (caddr_t)ifrp,
1747			    sizeof (ifr));
1748			if (error)
1749				break;
1750			space -= sizeof (ifr);
1751			ifrp++;
1752		}
1753	}
1754	IFNET_RUNLOCK();
1755	ifc->ifc_len -= space;
1756	return (error);
1757}
1758
1759/*
1760 * Just like if_promisc(), but for all-multicast-reception mode.
1761 */
1762int
1763if_allmulti(struct ifnet *ifp, int onswitch)
1764{
1765	int error = 0;
1766	int s = splimp();
1767	struct ifreq ifr;
1768
1769	if (onswitch) {
1770		if (ifp->if_amcount++ == 0) {
1771			ifp->if_flags |= IFF_ALLMULTI;
1772			ifr.ifr_flags = ifp->if_flags & 0xffff;
1773			ifr.ifr_flagshigh = ifp->if_flags >> 16;
1774			error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
1775		}
1776	} else {
1777		if (ifp->if_amcount > 1) {
1778			ifp->if_amcount--;
1779		} else {
1780			ifp->if_amcount = 0;
1781			ifp->if_flags &= ~IFF_ALLMULTI;
1782			ifr.ifr_flags = ifp->if_flags & 0xffff;;
1783			ifr.ifr_flagshigh = ifp->if_flags >> 16;
1784			error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
1785		}
1786	}
1787	splx(s);
1788
1789	if (error == 0)
1790		rt_ifmsg(ifp);
1791	return error;
1792}
1793
1794/*
1795 * Add a multicast listenership to the interface in question.
1796 * The link layer provides a routine which converts
1797 */
1798int
1799if_addmulti(struct ifnet *ifp, struct sockaddr *sa, struct ifmultiaddr **retifma)
1800{
1801	struct sockaddr *llsa, *dupsa;
1802	int error, s;
1803	struct ifmultiaddr *ifma;
1804
1805	/*
1806	 * If the matching multicast address already exists
1807	 * then don't add a new one, just add a reference
1808	 */
1809	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1810		if (equal(sa, ifma->ifma_addr)) {
1811			ifma->ifma_refcount++;
1812			if (retifma)
1813				*retifma = ifma;
1814			return 0;
1815		}
1816	}
1817
1818	/*
1819	 * Give the link layer a chance to accept/reject it, and also
1820	 * find out which AF_LINK address this maps to, if it isn't one
1821	 * already.
1822	 */
1823	if (ifp->if_resolvemulti) {
1824		error = ifp->if_resolvemulti(ifp, &llsa, sa);
1825		if (error) return error;
1826	} else {
1827		llsa = 0;
1828	}
1829
1830	MALLOC(ifma, struct ifmultiaddr *, sizeof *ifma, M_IFMADDR, M_WAITOK);
1831	MALLOC(dupsa, struct sockaddr *, sa->sa_len, M_IFMADDR, M_WAITOK);
1832	bcopy(sa, dupsa, sa->sa_len);
1833
1834	ifma->ifma_addr = dupsa;
1835	ifma->ifma_lladdr = llsa;
1836	ifma->ifma_ifp = ifp;
1837	ifma->ifma_refcount = 1;
1838	ifma->ifma_protospec = 0;
1839	rt_newmaddrmsg(RTM_NEWMADDR, ifma);
1840
1841	/*
1842	 * Some network interfaces can scan the address list at
1843	 * interrupt time; lock them out.
1844	 */
1845	s = splimp();
1846	TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link);
1847	splx(s);
1848	if (retifma != NULL)
1849		*retifma = ifma;
1850
1851	if (llsa != 0) {
1852		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1853			if (equal(ifma->ifma_addr, llsa))
1854				break;
1855		}
1856		if (ifma) {
1857			ifma->ifma_refcount++;
1858		} else {
1859			MALLOC(ifma, struct ifmultiaddr *, sizeof *ifma,
1860			       M_IFMADDR, M_WAITOK);
1861			MALLOC(dupsa, struct sockaddr *, llsa->sa_len,
1862			       M_IFMADDR, M_WAITOK);
1863			bcopy(llsa, dupsa, llsa->sa_len);
1864			ifma->ifma_addr = dupsa;
1865			ifma->ifma_lladdr = NULL;
1866			ifma->ifma_ifp = ifp;
1867			ifma->ifma_refcount = 1;
1868			ifma->ifma_protospec = 0;
1869			s = splimp();
1870			TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link);
1871			splx(s);
1872		}
1873	}
1874	/*
1875	 * We are certain we have added something, so call down to the
1876	 * interface to let them know about it.
1877	 */
1878	s = splimp();
1879	ifp->if_ioctl(ifp, SIOCADDMULTI, 0);
1880	splx(s);
1881
1882	return 0;
1883}
1884
1885/*
1886 * Remove a reference to a multicast address on this interface.  Yell
1887 * if the request does not match an existing membership.
1888 */
1889int
1890if_delmulti(struct ifnet *ifp, struct sockaddr *sa)
1891{
1892	struct ifmultiaddr *ifma;
1893	int s;
1894
1895	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link)
1896		if (equal(sa, ifma->ifma_addr))
1897			break;
1898	if (ifma == 0)
1899		return ENOENT;
1900
1901	if (ifma->ifma_refcount > 1) {
1902		ifma->ifma_refcount--;
1903		return 0;
1904	}
1905
1906	rt_newmaddrmsg(RTM_DELMADDR, ifma);
1907	sa = ifma->ifma_lladdr;
1908	s = splimp();
1909	TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link);
1910	/*
1911	 * Make sure the interface driver is notified
1912	 * in the case of a link layer mcast group being left.
1913	 */
1914	if (ifma->ifma_addr->sa_family == AF_LINK && sa == 0)
1915		ifp->if_ioctl(ifp, SIOCDELMULTI, 0);
1916	splx(s);
1917	free(ifma->ifma_addr, M_IFMADDR);
1918	free(ifma, M_IFMADDR);
1919	if (sa == 0)
1920		return 0;
1921
1922	/*
1923	 * Now look for the link-layer address which corresponds to
1924	 * this network address.  It had been squirreled away in
1925	 * ifma->ifma_lladdr for this purpose (so we don't have
1926	 * to call ifp->if_resolvemulti() again), and we saved that
1927	 * value in sa above.  If some nasty deleted the
1928	 * link-layer address out from underneath us, we can deal because
1929	 * the address we stored was is not the same as the one which was
1930	 * in the record for the link-layer address.  (So we don't complain
1931	 * in that case.)
1932	 */
1933	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link)
1934		if (equal(sa, ifma->ifma_addr))
1935			break;
1936	if (ifma == 0)
1937		return 0;
1938
1939	if (ifma->ifma_refcount > 1) {
1940		ifma->ifma_refcount--;
1941		return 0;
1942	}
1943
1944	s = splimp();
1945	TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link);
1946	ifp->if_ioctl(ifp, SIOCDELMULTI, 0);
1947	splx(s);
1948	free(ifma->ifma_addr, M_IFMADDR);
1949	free(sa, M_IFMADDR);
1950	free(ifma, M_IFMADDR);
1951
1952	return 0;
1953}
1954
1955/*
1956 * Set the link layer address on an interface.
1957 *
1958 * At this time we only support certain types of interfaces,
1959 * and we don't allow the length of the address to change.
1960 */
1961int
1962if_setlladdr(struct ifnet *ifp, const u_char *lladdr, int len)
1963{
1964	struct sockaddr_dl *sdl;
1965	struct ifaddr *ifa;
1966	struct ifreq ifr;
1967
1968	ifa = ifaddr_byindex(ifp->if_index);
1969	if (ifa == NULL)
1970		return (EINVAL);
1971	sdl = (struct sockaddr_dl *)ifa->ifa_addr;
1972	if (sdl == NULL)
1973		return (EINVAL);
1974	if (len != sdl->sdl_alen)	/* don't allow length to change */
1975		return (EINVAL);
1976	switch (ifp->if_type) {
1977	case IFT_ETHER:			/* these types use struct arpcom */
1978	case IFT_FDDI:
1979	case IFT_XETHER:
1980	case IFT_ISO88025:
1981	case IFT_L2VLAN:
1982		bcopy(lladdr, ((struct arpcom *)ifp->if_softc)->ac_enaddr, len);
1983		/* FALLTHROUGH */
1984	case IFT_ARCNET:
1985		bcopy(lladdr, LLADDR(sdl), len);
1986		break;
1987	default:
1988		return (ENODEV);
1989	}
1990	/*
1991	 * If the interface is already up, we need
1992	 * to re-init it in order to reprogram its
1993	 * address filter.
1994	 */
1995	if ((ifp->if_flags & IFF_UP) != 0) {
1996		ifp->if_flags &= ~IFF_UP;
1997		ifr.ifr_flags = ifp->if_flags & 0xffff;
1998		ifr.ifr_flagshigh = ifp->if_flags >> 16;
1999		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
2000		ifp->if_flags |= IFF_UP;
2001		ifr.ifr_flags = ifp->if_flags & 0xffff;
2002		ifr.ifr_flagshigh = ifp->if_flags >> 16;
2003		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
2004#ifdef INET
2005		/*
2006		 * Also send gratuitous ARPs to notify other nodes about
2007		 * the address change.
2008		 */
2009		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
2010			if (ifa->ifa_addr != NULL &&
2011			    ifa->ifa_addr->sa_family == AF_INET)
2012				arp_ifinit(ifp, ifa);
2013		}
2014#endif
2015	}
2016	return (0);
2017}
2018
2019struct ifmultiaddr *
2020ifmaof_ifpforaddr(struct sockaddr *sa, struct ifnet *ifp)
2021{
2022	struct ifmultiaddr *ifma;
2023
2024	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link)
2025		if (equal(ifma->ifma_addr, sa))
2026			break;
2027
2028	return ifma;
2029}
2030
2031/*
2032 * The name argument must be a pointer to storage which will last as
2033 * long as the interface does.  For physical devices, the result of
2034 * device_get_name(dev) is a good choice and for pseudo-devices a
2035 * static string works well.
2036 */
2037void
2038if_initname(struct ifnet *ifp, const char *name, int unit)
2039{
2040	ifp->if_dname = name;
2041	ifp->if_dunit = unit;
2042	if (unit != IF_DUNIT_NONE)
2043		snprintf(ifp->if_xname, IFNAMSIZ, "%s%d", name, unit);
2044	else
2045		strlcpy(ifp->if_xname, name, IFNAMSIZ);
2046}
2047
2048int
2049if_printf(struct ifnet *ifp, const char * fmt, ...)
2050{
2051	va_list ap;
2052	int retval;
2053
2054	retval = printf("%s: ", ifp->if_xname);
2055	va_start(ap, fmt);
2056	retval += vprintf(fmt, ap);
2057	va_end(ap);
2058	return (retval);
2059}
2060
2061SYSCTL_NODE(_net, PF_LINK, link, CTLFLAG_RW, 0, "Link layers");
2062SYSCTL_NODE(_net_link, 0, generic, CTLFLAG_RW, 0, "Generic link-management");
2063