if.c revision 128316
1/*
2 * Copyright (c) 1980, 1986, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 4. Neither the name of the University nor the names of its contributors
14 *    may be used to endorse or promote products derived from this software
15 *    without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 *	@(#)if.c	8.5 (Berkeley) 1/9/95
30 * $FreeBSD: head/sys/net/if.c 128316 2004-04-16 10:32:13Z luigi $
31 */
32
33#include "opt_compat.h"
34#include "opt_inet6.h"
35#include "opt_inet.h"
36#include "opt_mac.h"
37
38#include <sys/param.h>
39#include <sys/conf.h>
40#include <sys/mac.h>
41#include <sys/malloc.h>
42#include <sys/bus.h>
43#include <sys/mbuf.h>
44#include <sys/systm.h>
45#include <sys/proc.h>
46#include <sys/socket.h>
47#include <sys/socketvar.h>
48#include <sys/protosw.h>
49#include <sys/kernel.h>
50#include <sys/sockio.h>
51#include <sys/syslog.h>
52#include <sys/sysctl.h>
53#include <sys/domain.h>
54#include <sys/jail.h>
55#include <machine/stdarg.h>
56
57#include <net/if.h>
58#include <net/if_arp.h>
59#include <net/if_dl.h>
60#include <net/if_types.h>
61#include <net/if_var.h>
62#include <net/radix.h>
63#include <net/route.h>
64
65#if defined(INET) || defined(INET6)
66/*XXX*/
67#include <netinet/in.h>
68#include <netinet/in_var.h>
69#ifdef INET6
70#include <netinet6/in6_var.h>
71#include <netinet6/in6_ifattach.h>
72#endif
73#endif
74#ifdef INET
75#include <netinet/if_ether.h>
76#endif
77
78static void	if_attachdomain(void *);
79static void	if_attachdomain1(struct ifnet *);
80static int	ifconf(u_long, caddr_t);
81static void	if_grow(void);
82static void	if_init(void *);
83static void	if_check(void *);
84static int	if_findindex(struct ifnet *);
85static void	if_qflush(struct ifqueue *);
86static void	if_slowtimo(void *);
87static void	link_rtrequest(int, struct rtentry *, struct rt_addrinfo *);
88static int	if_rtdel(struct radix_node *, void *);
89static struct	if_clone *if_clone_lookup(const char *, int *);
90static int	if_clone_list(struct if_clonereq *);
91static int	ifhwioctl(u_long, struct ifnet *, caddr_t, struct thread *);
92#ifdef INET6
93/*
94 * XXX: declare here to avoid to include many inet6 related files..
95 * should be more generalized?
96 */
97extern void	nd6_setmtu(struct ifnet *);
98#endif
99
100int	if_index = 0;
101struct	ifindex_entry *ifindex_table = NULL;
102int	ifqmaxlen = IFQ_MAXLEN;
103struct	ifnethead ifnet;	/* depend on static init XXX */
104struct	mtx ifnet_lock;
105static int	if_cloners_count;
106LIST_HEAD(, if_clone) if_cloners = LIST_HEAD_INITIALIZER(if_cloners);
107
108static int	if_indexlim = 8;
109static struct	klist ifklist;
110
111static void	filt_netdetach(struct knote *kn);
112static int	filt_netdev(struct knote *kn, long hint);
113
114static struct filterops netdev_filtops =
115    { 1, NULL, filt_netdetach, filt_netdev };
116
117/*
118 * System initialization
119 */
120SYSINIT(interfaces, SI_SUB_INIT_IF, SI_ORDER_FIRST, if_init, NULL)
121SYSINIT(interface_check, SI_SUB_PROTO_IF, SI_ORDER_FIRST, if_check, NULL)
122
123MALLOC_DEFINE(M_IFADDR, "ifaddr", "interface address");
124MALLOC_DEFINE(M_IFMADDR, "ether_multi", "link-level multicast address");
125MALLOC_DEFINE(M_CLONE, "clone", "interface cloning framework");
126
127static d_open_t		netopen;
128static d_close_t	netclose;
129static d_ioctl_t	netioctl;
130static d_kqfilter_t	netkqfilter;
131
132static struct cdevsw net_cdevsw = {
133	.d_version =	D_VERSION,
134	.d_flags =	D_NEEDGIANT,
135	.d_open =	netopen,
136	.d_close =	netclose,
137	.d_ioctl =	netioctl,
138	.d_name =	"net",
139	.d_kqfilter =	netkqfilter,
140};
141
142static int
143netopen(dev_t dev, int flag, int mode, struct thread *td)
144{
145	return (0);
146}
147
148static int
149netclose(dev_t dev, int flags, int fmt, struct thread *td)
150{
151	return (0);
152}
153
154static int
155netioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct thread *td)
156{
157	struct ifnet *ifp;
158	int error, idx;
159
160	/* only support interface specific ioctls */
161	if (IOCGROUP(cmd) != 'i')
162		return (EOPNOTSUPP);
163	idx = minor(dev);
164	if (idx == 0) {
165		/*
166		 * special network device, not interface.
167		 */
168		if (cmd == SIOCGIFCONF)
169			return (ifconf(cmd, data));	/* XXX remove cmd */
170		return (EOPNOTSUPP);
171	}
172
173	ifp = ifnet_byindex(idx);
174	if (ifp == NULL)
175		return (ENXIO);
176
177	error = ifhwioctl(cmd, ifp, data, td);
178	if (error == ENOIOCTL)
179		error = EOPNOTSUPP;
180	return (error);
181}
182
183static int
184netkqfilter(dev_t dev, struct knote *kn)
185{
186	struct klist *klist;
187	struct ifnet *ifp;
188	int idx;
189
190	idx = minor(dev);
191	if (idx == 0) {
192		klist = &ifklist;
193	} else {
194		ifp = ifnet_byindex(idx);
195		if (ifp == NULL)
196			return (1);
197		klist = &ifp->if_klist;
198	}
199
200	switch (kn->kn_filter) {
201	case EVFILT_NETDEV:
202		kn->kn_fop = &netdev_filtops;
203		break;
204	default:
205		return (1);
206	}
207
208	kn->kn_hook = (caddr_t)klist;
209
210	/* XXX locking? */
211	SLIST_INSERT_HEAD(klist, kn, kn_selnext);
212
213	return (0);
214}
215
216static void
217filt_netdetach(struct knote *kn)
218{
219	struct klist *klist = (struct klist *)kn->kn_hook;
220
221	if (kn->kn_status & KN_DETACHED)
222		return;
223	SLIST_REMOVE(klist, kn, knote, kn_selnext);
224}
225
226static int
227filt_netdev(struct knote *kn, long hint)
228{
229
230	/*
231	 * Currently NOTE_EXIT is abused to indicate device detach.
232	 */
233	if (hint == NOTE_EXIT) {
234		kn->kn_data = NOTE_LINKINV;
235		kn->kn_status |= KN_DETACHED;
236		kn->kn_flags |= (EV_EOF | EV_ONESHOT);
237		return (1);
238	}
239	kn->kn_data = hint;			/* current status */
240	if (kn->kn_sfflags & hint)
241		kn->kn_fflags |= hint;
242	return (kn->kn_fflags != 0);
243}
244
245/*
246 * Network interface utility routines.
247 *
248 * Routines with ifa_ifwith* names take sockaddr *'s as
249 * parameters.
250 */
251/* ARGSUSED*/
252static void
253if_init(void *dummy __unused)
254{
255
256	IFNET_LOCK_INIT();
257	TAILQ_INIT(&ifnet);
258	SLIST_INIT(&ifklist);
259	if_grow();				/* create initial table */
260	ifdev_byindex(0) = make_dev(&net_cdevsw, 0,
261	    UID_ROOT, GID_WHEEL, 0600, "network");
262}
263
264static void
265if_grow(void)
266{
267	u_int n;
268	struct ifindex_entry *e;
269
270	if_indexlim <<= 1;
271	n = if_indexlim * sizeof(*e);
272	e = malloc(n, M_IFADDR, M_WAITOK | M_ZERO);
273	if (ifindex_table != NULL) {
274		memcpy((caddr_t)e, (caddr_t)ifindex_table, n/2);
275		free((caddr_t)ifindex_table, M_IFADDR);
276	}
277	ifindex_table = e;
278}
279
280/* ARGSUSED*/
281static void
282if_check(void *dummy __unused)
283{
284	struct ifnet *ifp;
285	int s;
286
287	s = splimp();
288	IFNET_RLOCK();	/* could sleep on rare error; mostly okay XXX */
289	TAILQ_FOREACH(ifp, &ifnet, if_link) {
290		if (ifp->if_snd.ifq_maxlen == 0) {
291			if_printf(ifp, "XXX: driver didn't set ifq_maxlen\n");
292			ifp->if_snd.ifq_maxlen = ifqmaxlen;
293		}
294		if (!mtx_initialized(&ifp->if_snd.ifq_mtx)) {
295			if_printf(ifp,
296			    "XXX: driver didn't initialize queue mtx\n");
297			mtx_init(&ifp->if_snd.ifq_mtx, "unknown",
298			    MTX_NETWORK_LOCK, MTX_DEF);
299		}
300	}
301	IFNET_RUNLOCK();
302	splx(s);
303	if_slowtimo(0);
304}
305
306static int
307if_findindex(struct ifnet *ifp)
308{
309	int i, unit;
310	char eaddr[18], devname[32];
311	const char *name, *p;
312
313	switch (ifp->if_type) {
314	case IFT_ETHER:			/* these types use struct arpcom */
315	case IFT_FDDI:
316	case IFT_XETHER:
317	case IFT_ISO88025:
318	case IFT_L2VLAN:
319		snprintf(eaddr, 18, "%6D",
320		    ((struct arpcom *)ifp->if_softc)->ac_enaddr, ":");
321		break;
322	default:
323		eaddr[0] = '\0';
324		break;
325	}
326	strlcpy(devname, ifp->if_xname, sizeof(devname));
327	name = net_cdevsw.d_name;
328	i = 0;
329	while ((resource_find_dev(&i, name, &unit, NULL, NULL)) == 0) {
330		if (resource_string_value(name, unit, "ether", &p) == 0)
331			if (strcmp(p, eaddr) == 0)
332				goto found;
333		if (resource_string_value(name, unit, "dev", &p) == 0)
334			if (strcmp(p, devname) == 0)
335				goto found;
336	}
337	unit = 0;
338found:
339	if (unit != 0) {
340		if (ifaddr_byindex(unit) == NULL)
341			return (unit);
342		printf("%s%d in use, cannot hardwire it to %s.\n",
343		    name, unit, devname);
344	}
345	for (unit = 1; ; unit++) {
346		if (unit <= if_index && ifaddr_byindex(unit) != NULL)
347			continue;
348		if (resource_string_value(name, unit, "ether", &p) == 0 ||
349		    resource_string_value(name, unit, "dev", &p) == 0)
350			continue;
351		break;
352	}
353	return (unit);
354}
355
356/*
357 * Attach an interface to the
358 * list of "active" interfaces.
359 */
360void
361if_attach(struct ifnet *ifp)
362{
363	unsigned socksize, ifasize;
364	int namelen, masklen;
365	struct sockaddr_dl *sdl;
366	struct ifaddr *ifa;
367
368	IF_AFDATA_LOCK_INIT(ifp);
369	ifp->if_afdata_initialized = 0;
370	IFNET_WLOCK();
371	TAILQ_INSERT_TAIL(&ifnet, ifp, if_link);
372	IFNET_WUNLOCK();
373	/*
374	 * XXX -
375	 * The old code would work if the interface passed a pre-existing
376	 * chain of ifaddrs to this code.  We don't trust our callers to
377	 * properly initialize the tailq, however, so we no longer allow
378	 * this unlikely case.
379	 */
380	TAILQ_INIT(&ifp->if_addrhead);
381	TAILQ_INIT(&ifp->if_prefixhead);
382	TAILQ_INIT(&ifp->if_multiaddrs);
383	SLIST_INIT(&ifp->if_klist);
384	getmicrotime(&ifp->if_lastchange);
385
386#ifdef MAC
387	mac_init_ifnet(ifp);
388	mac_create_ifnet(ifp);
389#endif
390
391	ifp->if_index = if_findindex(ifp);
392	if (ifp->if_index > if_index)
393		if_index = ifp->if_index;
394	if (if_index >= if_indexlim)
395		if_grow();
396
397	ifnet_byindex(ifp->if_index) = ifp;
398	ifdev_byindex(ifp->if_index) = make_dev(&net_cdevsw,
399	    unit2minor(ifp->if_index),
400	    UID_ROOT, GID_WHEEL, 0600, "%s/%s",
401	    net_cdevsw.d_name, ifp->if_xname);
402	make_dev_alias(ifdev_byindex(ifp->if_index), "%s%d",
403	    net_cdevsw.d_name, ifp->if_index);
404
405	mtx_init(&ifp->if_snd.ifq_mtx, ifp->if_xname, "if send queue", MTX_DEF);
406
407	/*
408	 * create a Link Level name for this device
409	 */
410	namelen = strlen(ifp->if_xname);
411	/*
412	 * Always save enough space for any possiable name so we can do
413	 * a rename in place later.
414	 */
415	masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + IFNAMSIZ;
416	socksize = masklen + ifp->if_addrlen;
417	if (socksize < sizeof(*sdl))
418		socksize = sizeof(*sdl);
419	socksize = roundup2(socksize, sizeof(long));
420	ifasize = sizeof(*ifa) + 2 * socksize;
421	ifa = malloc(ifasize, M_IFADDR, M_WAITOK | M_ZERO);
422	IFA_LOCK_INIT(ifa);
423	sdl = (struct sockaddr_dl *)(ifa + 1);
424	sdl->sdl_len = socksize;
425	sdl->sdl_family = AF_LINK;
426	bcopy(ifp->if_xname, sdl->sdl_data, namelen);
427	sdl->sdl_nlen = namelen;
428	sdl->sdl_index = ifp->if_index;
429	sdl->sdl_type = ifp->if_type;
430	ifaddr_byindex(ifp->if_index) = ifa;
431	ifa->ifa_ifp = ifp;
432	ifa->ifa_rtrequest = link_rtrequest;
433	ifa->ifa_addr = (struct sockaddr *)sdl;
434	sdl = (struct sockaddr_dl *)(socksize + (caddr_t)sdl);
435	ifa->ifa_netmask = (struct sockaddr *)sdl;
436	sdl->sdl_len = masklen;
437	while (namelen != 0)
438		sdl->sdl_data[--namelen] = 0xff;
439	ifa->ifa_refcnt = 1;
440	TAILQ_INSERT_HEAD(&ifp->if_addrhead, ifa, ifa_link);
441	ifp->if_broadcastaddr = 0; /* reliably crash if used uninitialized */
442
443	if (domains)
444		if_attachdomain1(ifp);
445
446	EVENTHANDLER_INVOKE(ifnet_arrival_event, ifp);
447
448	/* Announce the interface. */
449	rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
450}
451
452static void
453if_attachdomain(void *dummy)
454{
455	struct ifnet *ifp;
456	int s;
457
458	s = splnet();
459	TAILQ_FOREACH(ifp, &ifnet, if_link)
460		if_attachdomain1(ifp);
461	splx(s);
462}
463SYSINIT(domainifattach, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_FIRST,
464    if_attachdomain, NULL);
465
466static void
467if_attachdomain1(struct ifnet *ifp)
468{
469	struct domain *dp;
470	int s;
471
472	s = splnet();
473
474	/*
475	 * Since dp->dom_ifattach calls malloc() with M_WAITOK, we
476	 * cannot lock ifp->if_afdata initialization, entirely.
477	 */
478	if (IF_AFDATA_TRYLOCK(ifp) == 0) {
479		splx(s);
480		return;
481	}
482	if (ifp->if_afdata_initialized) {
483		IF_AFDATA_UNLOCK(ifp);
484		splx(s);
485		return;
486	}
487	ifp->if_afdata_initialized = 1;
488	IF_AFDATA_UNLOCK(ifp);
489
490	/* address family dependent data region */
491	bzero(ifp->if_afdata, sizeof(ifp->if_afdata));
492	for (dp = domains; dp; dp = dp->dom_next) {
493		if (dp->dom_ifattach)
494			ifp->if_afdata[dp->dom_family] =
495			    (*dp->dom_ifattach)(ifp);
496	}
497
498	splx(s);
499}
500
501/*
502 * Detach an interface, removing it from the
503 * list of "active" interfaces.
504 */
505void
506if_detach(struct ifnet *ifp)
507{
508	struct ifaddr *ifa, *next;
509	struct radix_node_head	*rnh;
510	int s;
511	int i;
512	struct domain *dp;
513
514	EVENTHANDLER_INVOKE(ifnet_departure_event, ifp);
515	/*
516	 * Remove routes and flush queues.
517	 */
518	s = splnet();
519	if_down(ifp);
520
521	/*
522	 * Remove address from ifindex_table[] and maybe decrement if_index.
523	 * Clean up all addresses.
524	 */
525	ifaddr_byindex(ifp->if_index) = NULL;
526	destroy_dev(ifdev_byindex(ifp->if_index));
527	ifdev_byindex(ifp->if_index) = NULL;
528
529	while (if_index > 0 && ifaddr_byindex(if_index) == NULL)
530		if_index--;
531
532	for (ifa = TAILQ_FIRST(&ifp->if_addrhead); ifa; ifa = next) {
533		next = TAILQ_NEXT(ifa, ifa_link);
534
535		if (ifa->ifa_addr->sa_family == AF_LINK)
536			continue;
537#ifdef INET
538		/* XXX: Ugly!! ad hoc just for INET */
539		if (ifa->ifa_addr && ifa->ifa_addr->sa_family == AF_INET) {
540			struct ifaliasreq ifr;
541
542			bzero(&ifr, sizeof(ifr));
543			ifr.ifra_addr = *ifa->ifa_addr;
544			if (ifa->ifa_dstaddr)
545				ifr.ifra_broadaddr = *ifa->ifa_dstaddr;
546			if (in_control(NULL, SIOCDIFADDR, (caddr_t)&ifr, ifp,
547			    NULL) == 0)
548				continue;
549		}
550#endif /* INET */
551#ifdef INET6
552		if (ifa->ifa_addr && ifa->ifa_addr->sa_family == AF_INET6) {
553			in6_purgeaddr(ifa);
554			/* ifp_addrhead is already updated */
555			continue;
556		}
557#endif /* INET6 */
558		TAILQ_REMOVE(&ifp->if_addrhead, ifa, ifa_link);
559		IFAFREE(ifa);
560	}
561
562#ifdef INET6
563	/*
564	 * Remove all IPv6 kernel structs related to ifp.  This should be done
565	 * before removing routing entries below, since IPv6 interface direct
566	 * routes are expected to be removed by the IPv6-specific kernel API.
567	 * Otherwise, the kernel will detect some inconsistency and bark it.
568	 */
569	in6_ifdetach(ifp);
570#endif
571
572	/* We can now free link ifaddr. */
573	ifa = TAILQ_FIRST(&ifp->if_addrhead);
574	TAILQ_REMOVE(&ifp->if_addrhead, ifa, ifa_link);
575	IFAFREE(ifa);
576
577	/*
578	 * Delete all remaining routes using this interface
579	 * Unfortuneatly the only way to do this is to slog through
580	 * the entire routing table looking for routes which point
581	 * to this interface...oh well...
582	 */
583	for (i = 1; i <= AF_MAX; i++) {
584		if ((rnh = rt_tables[i]) == NULL)
585			continue;
586		RADIX_NODE_HEAD_LOCK(rnh);
587		(void) rnh->rnh_walktree(rnh, if_rtdel, ifp);
588		RADIX_NODE_HEAD_UNLOCK(rnh);
589	}
590
591	/* Announce that the interface is gone. */
592	rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
593
594	IF_AFDATA_LOCK(ifp);
595	for (dp = domains; dp; dp = dp->dom_next) {
596		if (dp->dom_ifdetach && ifp->if_afdata[dp->dom_family])
597			(*dp->dom_ifdetach)(ifp,
598			    ifp->if_afdata[dp->dom_family]);
599	}
600	IF_AFDATA_UNLOCK(ifp);
601
602#ifdef MAC
603	mac_destroy_ifnet(ifp);
604#endif /* MAC */
605	KNOTE(&ifp->if_klist, NOTE_EXIT);
606	IFNET_WLOCK();
607	TAILQ_REMOVE(&ifnet, ifp, if_link);
608	IFNET_WUNLOCK();
609	mtx_destroy(&ifp->if_snd.ifq_mtx);
610	IF_AFDATA_DESTROY(ifp);
611	splx(s);
612}
613
614/*
615 * Delete Routes for a Network Interface
616 *
617 * Called for each routing entry via the rnh->rnh_walktree() call above
618 * to delete all route entries referencing a detaching network interface.
619 *
620 * Arguments:
621 *	rn	pointer to node in the routing table
622 *	arg	argument passed to rnh->rnh_walktree() - detaching interface
623 *
624 * Returns:
625 *	0	successful
626 *	errno	failed - reason indicated
627 *
628 */
629static int
630if_rtdel(struct radix_node *rn, void *arg)
631{
632	struct rtentry	*rt = (struct rtentry *)rn;
633	struct ifnet	*ifp = arg;
634	int		err;
635
636	if (rt->rt_ifp == ifp) {
637
638		/*
639		 * Protect (sorta) against walktree recursion problems
640		 * with cloned routes
641		 */
642		if ((rt->rt_flags & RTF_UP) == 0)
643			return (0);
644
645		err = rtrequest(RTM_DELETE, rt_key(rt), rt->rt_gateway,
646				rt_mask(rt), rt->rt_flags,
647				(struct rtentry **) NULL);
648		if (err) {
649			log(LOG_WARNING, "if_rtdel: error %d\n", err);
650		}
651	}
652
653	return (0);
654}
655
656/*
657 * Create a clone network interface.
658 */
659int
660if_clone_create(char *name, int len)
661{
662	struct if_clone *ifc;
663	char *dp;
664	int wildcard, bytoff, bitoff;
665	int unit;
666	int err;
667
668	ifc = if_clone_lookup(name, &unit);
669	if (ifc == NULL)
670		return (EINVAL);
671
672	if (ifunit(name) != NULL)
673		return (EEXIST);
674
675	bytoff = bitoff = 0;
676	wildcard = (unit < 0);
677	/*
678	 * Find a free unit if none was given.
679	 */
680	if (wildcard) {
681		while ((bytoff < ifc->ifc_bmlen)
682		    && (ifc->ifc_units[bytoff] == 0xff))
683			bytoff++;
684		if (bytoff >= ifc->ifc_bmlen)
685			return (ENOSPC);
686		while ((ifc->ifc_units[bytoff] & (1 << bitoff)) != 0)
687			bitoff++;
688		unit = (bytoff << 3) + bitoff;
689	}
690
691	if (unit > ifc->ifc_maxunit)
692		return (ENXIO);
693
694	err = (*ifc->ifc_create)(ifc, unit);
695	if (err != 0)
696		return (err);
697
698	if (!wildcard) {
699		bytoff = unit >> 3;
700		bitoff = unit - (bytoff << 3);
701	}
702
703	/*
704	 * Allocate the unit in the bitmap.
705	 */
706	KASSERT((ifc->ifc_units[bytoff] & (1 << bitoff)) == 0,
707	    ("%s: bit is already set", __func__));
708	ifc->ifc_units[bytoff] |= (1 << bitoff);
709
710	/* In the wildcard case, we need to update the name. */
711	if (wildcard) {
712		for (dp = name; *dp != '\0'; dp++);
713		if (snprintf(dp, len - (dp-name), "%d", unit) >
714		    len - (dp-name) - 1) {
715			/*
716			 * This can only be a programmer error and
717			 * there's no straightforward way to recover if
718			 * it happens.
719			 */
720			panic("if_clone_create(): interface name too long");
721		}
722
723	}
724
725	return (0);
726}
727
728/*
729 * Destroy a clone network interface.
730 */
731int
732if_clone_destroy(const char *name)
733{
734	struct if_clone *ifc;
735	struct ifnet *ifp;
736	int bytoff, bitoff;
737	int unit;
738
739	ifp = ifunit(name);
740	if (ifp == NULL)
741		return (ENXIO);
742
743	unit = ifp->if_dunit;
744
745	ifc = if_clone_lookup(ifp->if_dname, NULL);
746	if (ifc == NULL)
747		return (EINVAL);
748
749	if (ifc->ifc_destroy == NULL)
750		return (EOPNOTSUPP);
751
752	(*ifc->ifc_destroy)(ifp);
753
754	/*
755	 * Compute offset in the bitmap and deallocate the unit.
756	 */
757	bytoff = unit >> 3;
758	bitoff = unit - (bytoff << 3);
759	KASSERT((ifc->ifc_units[bytoff] & (1 << bitoff)) != 0,
760	    ("%s: bit is already cleared", __func__));
761	ifc->ifc_units[bytoff] &= ~(1 << bitoff);
762	return (0);
763}
764
765/*
766 * Look up a network interface cloner.
767 */
768static struct if_clone *
769if_clone_lookup(const char *name, int *unitp)
770{
771	struct if_clone *ifc;
772	const char *cp;
773	int i;
774
775	for (ifc = LIST_FIRST(&if_cloners); ifc != NULL;) {
776		for (cp = name, i = 0; i < ifc->ifc_namelen; i++, cp++) {
777			if (ifc->ifc_name[i] != *cp)
778				goto next_ifc;
779		}
780		goto found_name;
781 next_ifc:
782		ifc = LIST_NEXT(ifc, ifc_list);
783	}
784
785	/* No match. */
786	return ((struct if_clone *)NULL);
787
788 found_name:
789	if (*cp == '\0') {
790		i = -1;
791	} else {
792		for (i = 0; *cp != '\0'; cp++) {
793			if (*cp < '0' || *cp > '9') {
794				/* Bogus unit number. */
795				return (NULL);
796			}
797			i = (i * 10) + (*cp - '0');
798		}
799	}
800
801	if (unitp != NULL)
802		*unitp = i;
803	return (ifc);
804}
805
806/*
807 * Register a network interface cloner.
808 */
809void
810if_clone_attach(struct if_clone *ifc)
811{
812	int bytoff, bitoff;
813	int err;
814	int len, maxclone;
815	int unit;
816
817	KASSERT(ifc->ifc_minifs - 1 <= ifc->ifc_maxunit,
818	    ("%s: %s requested more units then allowed (%d > %d)",
819	    __func__, ifc->ifc_name, ifc->ifc_minifs,
820	    ifc->ifc_maxunit + 1));
821	/*
822	 * Compute bitmap size and allocate it.
823	 */
824	maxclone = ifc->ifc_maxunit + 1;
825	len = maxclone >> 3;
826	if ((len << 3) < maxclone)
827		len++;
828	ifc->ifc_units = malloc(len, M_CLONE, M_WAITOK | M_ZERO);
829	ifc->ifc_bmlen = len;
830
831	LIST_INSERT_HEAD(&if_cloners, ifc, ifc_list);
832	if_cloners_count++;
833
834	for (unit = 0; unit < ifc->ifc_minifs; unit++) {
835		err = (*ifc->ifc_create)(ifc, unit);
836		KASSERT(err == 0,
837		    ("%s: failed to create required interface %s%d",
838		    __func__, ifc->ifc_name, unit));
839
840		/* Allocate the unit in the bitmap. */
841		bytoff = unit >> 3;
842		bitoff = unit - (bytoff << 3);
843		ifc->ifc_units[bytoff] |= (1 << bitoff);
844	}
845	EVENTHANDLER_INVOKE(if_clone_event, ifc);
846}
847
848/*
849 * Unregister a network interface cloner.
850 */
851void
852if_clone_detach(struct if_clone *ifc)
853{
854
855	LIST_REMOVE(ifc, ifc_list);
856	free(ifc->ifc_units, M_CLONE);
857	if_cloners_count--;
858}
859
860/*
861 * Provide list of interface cloners to userspace.
862 */
863static int
864if_clone_list(struct if_clonereq *ifcr)
865{
866	char outbuf[IFNAMSIZ], *dst;
867	struct if_clone *ifc;
868	int count, error = 0;
869
870	ifcr->ifcr_total = if_cloners_count;
871	if ((dst = ifcr->ifcr_buffer) == NULL) {
872		/* Just asking how many there are. */
873		return (0);
874	}
875
876	if (ifcr->ifcr_count < 0)
877		return (EINVAL);
878
879	count = (if_cloners_count < ifcr->ifcr_count) ?
880	    if_cloners_count : ifcr->ifcr_count;
881
882	for (ifc = LIST_FIRST(&if_cloners); ifc != NULL && count != 0;
883	     ifc = LIST_NEXT(ifc, ifc_list), count--, dst += IFNAMSIZ) {
884		strlcpy(outbuf, ifc->ifc_name, IFNAMSIZ);
885		error = copyout(outbuf, dst, IFNAMSIZ);
886		if (error)
887			break;
888	}
889
890	return (error);
891}
892
893#define	equal(a1, a2)	(bcmp((a1), (a2), ((a1))->sa_len) == 0)
894
895/*
896 * Locate an interface based on a complete address.
897 */
898/*ARGSUSED*/
899struct ifaddr *
900ifa_ifwithaddr(struct sockaddr *addr)
901{
902	struct ifnet *ifp;
903	struct ifaddr *ifa;
904
905	IFNET_RLOCK();
906	TAILQ_FOREACH(ifp, &ifnet, if_link)
907		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
908			if (ifa->ifa_addr->sa_family != addr->sa_family)
909				continue;
910			if (equal(addr, ifa->ifa_addr))
911				goto done;
912			/* IP6 doesn't have broadcast */
913			if ((ifp->if_flags & IFF_BROADCAST) &&
914			    ifa->ifa_broadaddr &&
915			    ifa->ifa_broadaddr->sa_len != 0 &&
916			    equal(ifa->ifa_broadaddr, addr))
917				goto done;
918		}
919	ifa = NULL;
920done:
921	IFNET_RUNLOCK();
922	return (ifa);
923}
924
925/*
926 * Locate the point to point interface with a given destination address.
927 */
928/*ARGSUSED*/
929struct ifaddr *
930ifa_ifwithdstaddr(struct sockaddr *addr)
931{
932	struct ifnet *ifp;
933	struct ifaddr *ifa;
934
935	IFNET_RLOCK();
936	TAILQ_FOREACH(ifp, &ifnet, if_link) {
937		if ((ifp->if_flags & IFF_POINTOPOINT) == 0)
938			continue;
939		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
940			if (ifa->ifa_addr->sa_family != addr->sa_family)
941				continue;
942			if (ifa->ifa_dstaddr && equal(addr, ifa->ifa_dstaddr))
943				goto done;
944		}
945	}
946	ifa = NULL;
947done:
948	IFNET_RUNLOCK();
949	return (ifa);
950}
951
952/*
953 * Find an interface on a specific network.  If many, choice
954 * is most specific found.
955 */
956struct ifaddr *
957ifa_ifwithnet(struct sockaddr *addr)
958{
959	struct ifnet *ifp;
960	struct ifaddr *ifa;
961	struct ifaddr *ifa_maybe = (struct ifaddr *) 0;
962	u_int af = addr->sa_family;
963	char *addr_data = addr->sa_data, *cplim;
964
965	/*
966	 * AF_LINK addresses can be looked up directly by their index number,
967	 * so do that if we can.
968	 */
969	if (af == AF_LINK) {
970	    struct sockaddr_dl *sdl = (struct sockaddr_dl *)addr;
971	    if (sdl->sdl_index && sdl->sdl_index <= if_index)
972		return (ifaddr_byindex(sdl->sdl_index));
973	}
974
975	/*
976	 * Scan though each interface, looking for ones that have
977	 * addresses in this address family.
978	 */
979	IFNET_RLOCK();
980	TAILQ_FOREACH(ifp, &ifnet, if_link) {
981		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
982			char *cp, *cp2, *cp3;
983
984			if (ifa->ifa_addr->sa_family != af)
985next:				continue;
986			if (af == AF_INET && ifp->if_flags & IFF_POINTOPOINT) {
987				/*
988				 * This is a bit broken as it doesn't
989				 * take into account that the remote end may
990				 * be a single node in the network we are
991				 * looking for.
992				 * The trouble is that we don't know the
993				 * netmask for the remote end.
994				 */
995				if (ifa->ifa_dstaddr != 0
996				    && equal(addr, ifa->ifa_dstaddr))
997					goto done;
998			} else {
999				/*
1000				 * if we have a special address handler,
1001				 * then use it instead of the generic one.
1002				 */
1003				if (ifa->ifa_claim_addr) {
1004					if ((*ifa->ifa_claim_addr)(ifa, addr))
1005						goto done;
1006					continue;
1007				}
1008
1009				/*
1010				 * Scan all the bits in the ifa's address.
1011				 * If a bit dissagrees with what we are
1012				 * looking for, mask it with the netmask
1013				 * to see if it really matters.
1014				 * (A byte at a time)
1015				 */
1016				if (ifa->ifa_netmask == 0)
1017					continue;
1018				cp = addr_data;
1019				cp2 = ifa->ifa_addr->sa_data;
1020				cp3 = ifa->ifa_netmask->sa_data;
1021				cplim = ifa->ifa_netmask->sa_len
1022					+ (char *)ifa->ifa_netmask;
1023				while (cp3 < cplim)
1024					if ((*cp++ ^ *cp2++) & *cp3++)
1025						goto next; /* next address! */
1026				/*
1027				 * If the netmask of what we just found
1028				 * is more specific than what we had before
1029				 * (if we had one) then remember the new one
1030				 * before continuing to search
1031				 * for an even better one.
1032				 */
1033				if (ifa_maybe == 0 ||
1034				    rn_refines((caddr_t)ifa->ifa_netmask,
1035				    (caddr_t)ifa_maybe->ifa_netmask))
1036					ifa_maybe = ifa;
1037			}
1038		}
1039	}
1040	ifa = ifa_maybe;
1041done:
1042	IFNET_RUNLOCK();
1043	return (ifa);
1044}
1045
1046/*
1047 * Find an interface address specific to an interface best matching
1048 * a given address.
1049 */
1050struct ifaddr *
1051ifaof_ifpforaddr(struct sockaddr *addr, struct ifnet *ifp)
1052{
1053	struct ifaddr *ifa;
1054	char *cp, *cp2, *cp3;
1055	char *cplim;
1056	struct ifaddr *ifa_maybe = 0;
1057	u_int af = addr->sa_family;
1058
1059	if (af >= AF_MAX)
1060		return (0);
1061	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1062		if (ifa->ifa_addr->sa_family != af)
1063			continue;
1064		if (ifa_maybe == 0)
1065			ifa_maybe = ifa;
1066		if (ifa->ifa_netmask == 0) {
1067			if (equal(addr, ifa->ifa_addr) ||
1068			    (ifa->ifa_dstaddr && equal(addr, ifa->ifa_dstaddr)))
1069				goto done;
1070			continue;
1071		}
1072		if (ifp->if_flags & IFF_POINTOPOINT) {
1073			if (equal(addr, ifa->ifa_dstaddr))
1074				goto done;
1075		} else {
1076			cp = addr->sa_data;
1077			cp2 = ifa->ifa_addr->sa_data;
1078			cp3 = ifa->ifa_netmask->sa_data;
1079			cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask;
1080			for (; cp3 < cplim; cp3++)
1081				if ((*cp++ ^ *cp2++) & *cp3)
1082					break;
1083			if (cp3 == cplim)
1084				goto done;
1085		}
1086	}
1087	ifa = ifa_maybe;
1088done:
1089	return (ifa);
1090}
1091
1092#include <net/route.h>
1093
1094/*
1095 * Default action when installing a route with a Link Level gateway.
1096 * Lookup an appropriate real ifa to point to.
1097 * This should be moved to /sys/net/link.c eventually.
1098 */
1099static void
1100link_rtrequest(int cmd, struct rtentry *rt, struct rt_addrinfo *info)
1101{
1102	struct ifaddr *ifa, *oifa;
1103	struct sockaddr *dst;
1104	struct ifnet *ifp;
1105
1106	RT_LOCK_ASSERT(rt);
1107
1108	if (cmd != RTM_ADD || ((ifa = rt->rt_ifa) == 0) ||
1109	    ((ifp = ifa->ifa_ifp) == 0) || ((dst = rt_key(rt)) == 0))
1110		return;
1111	ifa = ifaof_ifpforaddr(dst, ifp);
1112	if (ifa) {
1113		IFAREF(ifa);		/* XXX */
1114		oifa = rt->rt_ifa;
1115		rt->rt_ifa = ifa;
1116		IFAFREE(oifa);
1117		if (ifa->ifa_rtrequest && ifa->ifa_rtrequest != link_rtrequest)
1118			ifa->ifa_rtrequest(cmd, rt, info);
1119	}
1120}
1121
1122/*
1123 * Mark an interface down and notify protocols of
1124 * the transition.
1125 * NOTE: must be called at splnet or eqivalent.
1126 */
1127void
1128if_unroute(struct ifnet *ifp, int flag, int fam)
1129{
1130	struct ifaddr *ifa;
1131
1132	ifp->if_flags &= ~flag;
1133	getmicrotime(&ifp->if_lastchange);
1134	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
1135		if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family))
1136			pfctlinput(PRC_IFDOWN, ifa->ifa_addr);
1137	if_qflush(&ifp->if_snd);
1138	rt_ifmsg(ifp);
1139}
1140
1141/*
1142 * Mark an interface up and notify protocols of
1143 * the transition.
1144 * NOTE: must be called at splnet or eqivalent.
1145 */
1146void
1147if_route(struct ifnet *ifp, int flag, int fam)
1148{
1149	struct ifaddr *ifa;
1150
1151	ifp->if_flags |= flag;
1152	getmicrotime(&ifp->if_lastchange);
1153	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
1154		if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family))
1155			pfctlinput(PRC_IFUP, ifa->ifa_addr);
1156	rt_ifmsg(ifp);
1157#ifdef INET6
1158	in6_if_up(ifp);
1159#endif
1160}
1161
1162/*
1163 * Mark an interface down and notify protocols of
1164 * the transition.
1165 * NOTE: must be called at splnet or eqivalent.
1166 */
1167void
1168if_down(struct ifnet *ifp)
1169{
1170
1171	if_unroute(ifp, IFF_UP, AF_UNSPEC);
1172}
1173
1174/*
1175 * Mark an interface up and notify protocols of
1176 * the transition.
1177 * NOTE: must be called at splnet or eqivalent.
1178 */
1179void
1180if_up(struct ifnet *ifp)
1181{
1182
1183	if_route(ifp, IFF_UP, AF_UNSPEC);
1184}
1185
1186/*
1187 * Flush an interface queue.
1188 */
1189static void
1190if_qflush(struct ifqueue *ifq)
1191{
1192	struct mbuf *m, *n;
1193
1194	n = ifq->ifq_head;
1195	while ((m = n) != 0) {
1196		n = m->m_act;
1197		m_freem(m);
1198	}
1199	ifq->ifq_head = 0;
1200	ifq->ifq_tail = 0;
1201	ifq->ifq_len = 0;
1202}
1203
1204/*
1205 * Handle interface watchdog timer routines.  Called
1206 * from softclock, we decrement timers (if set) and
1207 * call the appropriate interface routine on expiration.
1208 */
1209static void
1210if_slowtimo(void *arg)
1211{
1212	struct ifnet *ifp;
1213	int s = splimp();
1214
1215	IFNET_RLOCK();
1216	TAILQ_FOREACH(ifp, &ifnet, if_link) {
1217		if (ifp->if_timer == 0 || --ifp->if_timer)
1218			continue;
1219		if (ifp->if_watchdog)
1220			(*ifp->if_watchdog)(ifp);
1221	}
1222	IFNET_RUNLOCK();
1223	splx(s);
1224	timeout(if_slowtimo, (void *)0, hz / IFNET_SLOWHZ);
1225}
1226
1227/*
1228 * Map interface name to
1229 * interface structure pointer.
1230 */
1231struct ifnet *
1232ifunit(const char *name)
1233{
1234	struct ifnet *ifp;
1235
1236	IFNET_RLOCK();
1237	TAILQ_FOREACH(ifp, &ifnet, if_link) {
1238		if (strncmp(name, ifp->if_xname, IFNAMSIZ) == 0)
1239			break;
1240	}
1241	IFNET_RUNLOCK();
1242	return (ifp);
1243}
1244
1245/*
1246 * Hardware specific interface ioctls.
1247 */
1248static int
1249ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
1250{
1251	struct ifreq *ifr;
1252	struct ifstat *ifs;
1253	int error = 0;
1254	int new_flags;
1255	size_t namelen, onamelen;
1256	char new_name[IFNAMSIZ];
1257	struct ifaddr *ifa;
1258	struct sockaddr_dl *sdl;
1259
1260	ifr = (struct ifreq *)data;
1261	switch (cmd) {
1262	case SIOCGIFINDEX:
1263		ifr->ifr_index = ifp->if_index;
1264		break;
1265
1266	case SIOCGIFFLAGS:
1267		ifr->ifr_flags = ifp->if_flags & 0xffff;
1268		ifr->ifr_flagshigh = ifp->if_flags >> 16;
1269		break;
1270
1271	case SIOCGIFCAP:
1272		ifr->ifr_reqcap = ifp->if_capabilities;
1273		ifr->ifr_curcap = ifp->if_capenable;
1274		break;
1275
1276#ifdef MAC
1277	case SIOCGIFMAC:
1278		error = mac_ioctl_ifnet_get(td->td_ucred, ifr, ifp);
1279		break;
1280#endif
1281
1282	case SIOCGIFMETRIC:
1283		ifr->ifr_metric = ifp->if_metric;
1284		break;
1285
1286	case SIOCGIFMTU:
1287		ifr->ifr_mtu = ifp->if_mtu;
1288		break;
1289
1290	case SIOCGIFPHYS:
1291		ifr->ifr_phys = ifp->if_physical;
1292		break;
1293
1294	case SIOCSIFFLAGS:
1295		error = suser(td);
1296		if (error)
1297			return (error);
1298		new_flags = (ifr->ifr_flags & 0xffff) |
1299		    (ifr->ifr_flagshigh << 16);
1300		if (ifp->if_flags & IFF_SMART) {
1301			/* Smart drivers twiddle their own routes */
1302		} else if (ifp->if_flags & IFF_UP &&
1303		    (new_flags & IFF_UP) == 0) {
1304			int s = splimp();
1305			if_down(ifp);
1306			splx(s);
1307		} else if (new_flags & IFF_UP &&
1308		    (ifp->if_flags & IFF_UP) == 0) {
1309			int s = splimp();
1310			if_up(ifp);
1311			splx(s);
1312		}
1313		ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) |
1314			(new_flags &~ IFF_CANTCHANGE);
1315		if (new_flags & IFF_PPROMISC) {
1316			/* Permanently promiscuous mode requested */
1317			ifp->if_flags |= IFF_PROMISC;
1318		} else if (ifp->if_pcount == 0) {
1319			ifp->if_flags &= ~IFF_PROMISC;
1320		}
1321		if (ifp->if_ioctl)
1322			(void) (*ifp->if_ioctl)(ifp, cmd, data);
1323		getmicrotime(&ifp->if_lastchange);
1324		break;
1325
1326	case SIOCSIFCAP:
1327		error = suser(td);
1328		if (error)
1329			return (error);
1330		if (ifp->if_ioctl == NULL)
1331			return (EOPNOTSUPP);
1332		if (ifr->ifr_reqcap & ~ifp->if_capabilities)
1333			return (EINVAL);
1334		error = (*ifp->if_ioctl)(ifp, cmd, data);
1335		if (error == 0)
1336			getmicrotime(&ifp->if_lastchange);
1337		break;
1338
1339#ifdef MAC
1340	case SIOCSIFMAC:
1341		error = mac_ioctl_ifnet_set(td->td_ucred, ifr, ifp);
1342		break;
1343#endif
1344
1345	case SIOCSIFNAME:
1346		error = suser(td);
1347		if (error != 0)
1348			return (error);
1349		error = copyinstr(ifr->ifr_data, new_name, IFNAMSIZ, NULL);
1350		if (error != 0)
1351			return (error);
1352		if (new_name[0] == '\0')
1353			return (EINVAL);
1354		if (ifunit(new_name) != NULL)
1355			return (EEXIST);
1356
1357		EVENTHANDLER_INVOKE(ifnet_departure_event, ifp);
1358		/* Announce the departure of the interface. */
1359		rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
1360
1361		strlcpy(ifp->if_xname, new_name, sizeof(ifp->if_xname));
1362		ifa = ifaddr_byindex(ifp->if_index);
1363		IFA_LOCK(ifa);
1364		sdl = (struct sockaddr_dl *)ifa->ifa_addr;
1365		namelen = strlen(new_name);
1366		onamelen = sdl->sdl_nlen;
1367		/*
1368		 * Move the address if needed.  This is safe because we
1369		 * allocate space for a name of length IFNAMSIZ when we
1370		 * create this in if_attach().
1371		 */
1372		if (namelen != onamelen) {
1373			bcopy(sdl->sdl_data + onamelen,
1374			    sdl->sdl_data + namelen, sdl->sdl_alen);
1375		}
1376		bcopy(new_name, sdl->sdl_data, namelen);
1377		sdl->sdl_nlen = namelen;
1378		sdl = (struct sockaddr_dl *)ifa->ifa_netmask;
1379		bzero(sdl->sdl_data, onamelen);
1380		while (namelen != 0)
1381			sdl->sdl_data[--namelen] = 0xff;
1382		IFA_UNLOCK(ifa);
1383
1384		EVENTHANDLER_INVOKE(ifnet_arrival_event, ifp);
1385		/* Announce the return of the interface. */
1386		rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
1387		break;
1388
1389	case SIOCSIFMETRIC:
1390		error = suser(td);
1391		if (error)
1392			return (error);
1393		ifp->if_metric = ifr->ifr_metric;
1394		getmicrotime(&ifp->if_lastchange);
1395		break;
1396
1397	case SIOCSIFPHYS:
1398		error = suser(td);
1399		if (error)
1400			return (error);
1401		if (ifp->if_ioctl == NULL)
1402			return (EOPNOTSUPP);
1403		error = (*ifp->if_ioctl)(ifp, cmd, data);
1404		if (error == 0)
1405			getmicrotime(&ifp->if_lastchange);
1406		break;
1407
1408	case SIOCSIFMTU:
1409	{
1410		u_long oldmtu = ifp->if_mtu;
1411
1412		error = suser(td);
1413		if (error)
1414			return (error);
1415		if (ifr->ifr_mtu < IF_MINMTU || ifr->ifr_mtu > IF_MAXMTU)
1416			return (EINVAL);
1417		if (ifp->if_ioctl == NULL)
1418			return (EOPNOTSUPP);
1419		error = (*ifp->if_ioctl)(ifp, cmd, data);
1420		if (error == 0) {
1421			getmicrotime(&ifp->if_lastchange);
1422			rt_ifmsg(ifp);
1423		}
1424		/*
1425		 * If the link MTU changed, do network layer specific procedure.
1426		 */
1427		if (ifp->if_mtu != oldmtu) {
1428#ifdef INET6
1429			nd6_setmtu(ifp);
1430#endif
1431		}
1432		break;
1433	}
1434
1435	case SIOCADDMULTI:
1436	case SIOCDELMULTI:
1437		error = suser(td);
1438		if (error)
1439			return (error);
1440
1441		/* Don't allow group membership on non-multicast interfaces. */
1442		if ((ifp->if_flags & IFF_MULTICAST) == 0)
1443			return (EOPNOTSUPP);
1444
1445		/* Don't let users screw up protocols' entries. */
1446		if (ifr->ifr_addr.sa_family != AF_LINK)
1447			return (EINVAL);
1448
1449		if (cmd == SIOCADDMULTI) {
1450			struct ifmultiaddr *ifma;
1451			error = if_addmulti(ifp, &ifr->ifr_addr, &ifma);
1452		} else {
1453			error = if_delmulti(ifp, &ifr->ifr_addr);
1454		}
1455		if (error == 0)
1456			getmicrotime(&ifp->if_lastchange);
1457		break;
1458
1459	case SIOCSIFPHYADDR:
1460	case SIOCDIFPHYADDR:
1461#ifdef INET6
1462	case SIOCSIFPHYADDR_IN6:
1463#endif
1464	case SIOCSLIFPHYADDR:
1465	case SIOCSIFMEDIA:
1466	case SIOCSIFGENERIC:
1467		error = suser(td);
1468		if (error)
1469			return (error);
1470		if (ifp->if_ioctl == NULL)
1471			return (EOPNOTSUPP);
1472		error = (*ifp->if_ioctl)(ifp, cmd, data);
1473		if (error == 0)
1474			getmicrotime(&ifp->if_lastchange);
1475		break;
1476
1477	case SIOCGIFSTATUS:
1478		ifs = (struct ifstat *)data;
1479		ifs->ascii[0] = '\0';
1480
1481	case SIOCGIFPSRCADDR:
1482	case SIOCGIFPDSTADDR:
1483	case SIOCGLIFPHYADDR:
1484	case SIOCGIFMEDIA:
1485	case SIOCGIFGENERIC:
1486		if (ifp->if_ioctl == NULL)
1487			return (EOPNOTSUPP);
1488		error = (*ifp->if_ioctl)(ifp, cmd, data);
1489		break;
1490
1491	case SIOCSIFLLADDR:
1492		error = suser(td);
1493		if (error)
1494			return (error);
1495		error = if_setlladdr(ifp,
1496		    ifr->ifr_addr.sa_data, ifr->ifr_addr.sa_len);
1497		break;
1498
1499	default:
1500		error = ENOIOCTL;
1501		break;
1502	}
1503	return (error);
1504}
1505
1506/*
1507 * Interface ioctls.
1508 */
1509int
1510ifioctl(struct socket *so, u_long cmd, caddr_t data, struct thread *td)
1511{
1512	struct ifnet *ifp;
1513	struct ifreq *ifr;
1514	int error;
1515	int oif_flags;
1516
1517	switch (cmd) {
1518	case SIOCGIFCONF:
1519	case OSIOCGIFCONF:
1520		return (ifconf(cmd, data));
1521	}
1522	ifr = (struct ifreq *)data;
1523
1524	switch (cmd) {
1525	case SIOCIFCREATE:
1526	case SIOCIFDESTROY:
1527		if ((error = suser(td)) != 0)
1528			return (error);
1529		return ((cmd == SIOCIFCREATE) ?
1530			if_clone_create(ifr->ifr_name, sizeof(ifr->ifr_name)) :
1531			if_clone_destroy(ifr->ifr_name));
1532
1533	case SIOCIFGCLONERS:
1534		return (if_clone_list((struct if_clonereq *)data));
1535	}
1536
1537	ifp = ifunit(ifr->ifr_name);
1538	if (ifp == 0)
1539		return (ENXIO);
1540
1541	error = ifhwioctl(cmd, ifp, data, td);
1542	if (error != ENOIOCTL)
1543		return (error);
1544
1545	oif_flags = ifp->if_flags;
1546	if (so->so_proto == 0)
1547		return (EOPNOTSUPP);
1548#ifndef COMPAT_43
1549	error = ((*so->so_proto->pr_usrreqs->pru_control)(so, cmd,
1550								 data,
1551								 ifp, td));
1552#else
1553	{
1554		int ocmd = cmd;
1555
1556		switch (cmd) {
1557
1558		case SIOCSIFDSTADDR:
1559		case SIOCSIFADDR:
1560		case SIOCSIFBRDADDR:
1561		case SIOCSIFNETMASK:
1562#if BYTE_ORDER != BIG_ENDIAN
1563			if (ifr->ifr_addr.sa_family == 0 &&
1564			    ifr->ifr_addr.sa_len < 16) {
1565				ifr->ifr_addr.sa_family = ifr->ifr_addr.sa_len;
1566				ifr->ifr_addr.sa_len = 16;
1567			}
1568#else
1569			if (ifr->ifr_addr.sa_len == 0)
1570				ifr->ifr_addr.sa_len = 16;
1571#endif
1572			break;
1573
1574		case OSIOCGIFADDR:
1575			cmd = SIOCGIFADDR;
1576			break;
1577
1578		case OSIOCGIFDSTADDR:
1579			cmd = SIOCGIFDSTADDR;
1580			break;
1581
1582		case OSIOCGIFBRDADDR:
1583			cmd = SIOCGIFBRDADDR;
1584			break;
1585
1586		case OSIOCGIFNETMASK:
1587			cmd = SIOCGIFNETMASK;
1588		}
1589		error =  ((*so->so_proto->pr_usrreqs->pru_control)(so,
1590								   cmd,
1591								   data,
1592								   ifp, td));
1593		switch (ocmd) {
1594
1595		case OSIOCGIFADDR:
1596		case OSIOCGIFDSTADDR:
1597		case OSIOCGIFBRDADDR:
1598		case OSIOCGIFNETMASK:
1599			*(u_short *)&ifr->ifr_addr = ifr->ifr_addr.sa_family;
1600
1601		}
1602	}
1603#endif /* COMPAT_43 */
1604
1605	if ((oif_flags ^ ifp->if_flags) & IFF_UP) {
1606#ifdef INET6
1607		DELAY(100);/* XXX: temporary workaround for fxp issue*/
1608		if (ifp->if_flags & IFF_UP) {
1609			int s = splimp();
1610			in6_if_up(ifp);
1611			splx(s);
1612		}
1613#endif
1614	}
1615	return (error);
1616}
1617
1618/*
1619 * Set/clear promiscuous mode on interface ifp based on the truth value
1620 * of pswitch.  The calls are reference counted so that only the first
1621 * "on" request actually has an effect, as does the final "off" request.
1622 * Results are undefined if the "off" and "on" requests are not matched.
1623 */
1624int
1625ifpromisc(struct ifnet *ifp, int pswitch)
1626{
1627	struct ifreq ifr;
1628	int error;
1629	int oldflags, oldpcount;
1630
1631	oldpcount = ifp->if_pcount;
1632	oldflags = ifp->if_flags;
1633	if (ifp->if_flags & IFF_PPROMISC) {
1634		/* Do nothing if device is in permanently promiscuous mode */
1635		ifp->if_pcount += pswitch ? 1 : -1;
1636		return (0);
1637	}
1638	if (pswitch) {
1639		/*
1640		 * If the device is not configured up, we cannot put it in
1641		 * promiscuous mode.
1642		 */
1643		if ((ifp->if_flags & IFF_UP) == 0)
1644			return (ENETDOWN);
1645		if (ifp->if_pcount++ != 0)
1646			return (0);
1647		ifp->if_flags |= IFF_PROMISC;
1648	} else {
1649		if (--ifp->if_pcount > 0)
1650			return (0);
1651		ifp->if_flags &= ~IFF_PROMISC;
1652	}
1653	ifr.ifr_flags = ifp->if_flags & 0xffff;
1654	ifr.ifr_flagshigh = ifp->if_flags >> 16;
1655	error = (*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
1656	if (error == 0) {
1657		log(LOG_INFO, "%s: promiscuous mode %s\n",
1658		    ifp->if_xname,
1659		    (ifp->if_flags & IFF_PROMISC) ? "enabled" : "disabled");
1660		rt_ifmsg(ifp);
1661	} else {
1662		ifp->if_pcount = oldpcount;
1663		ifp->if_flags = oldflags;
1664	}
1665	return error;
1666}
1667
1668/*
1669 * Return interface configuration
1670 * of system.  List may be used
1671 * in later ioctl's (above) to get
1672 * other information.
1673 */
1674/*ARGSUSED*/
1675static int
1676ifconf(u_long cmd, caddr_t data)
1677{
1678	struct ifconf *ifc = (struct ifconf *)data;
1679	struct ifnet *ifp;
1680	struct ifaddr *ifa;
1681	struct ifreq ifr, *ifrp;
1682	int space = ifc->ifc_len, error = 0;
1683
1684	ifrp = ifc->ifc_req;
1685	IFNET_RLOCK();		/* could sleep XXX */
1686	TAILQ_FOREACH(ifp, &ifnet, if_link) {
1687		int addrs;
1688
1689		if (space < sizeof(ifr))
1690			break;
1691		if (strlcpy(ifr.ifr_name, ifp->if_xname, sizeof(ifr.ifr_name))
1692		    >= sizeof(ifr.ifr_name)) {
1693			error = ENAMETOOLONG;
1694			break;
1695		}
1696
1697		addrs = 0;
1698		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1699			struct sockaddr *sa = ifa->ifa_addr;
1700
1701			if (space < sizeof(ifr))
1702				break;
1703			if (jailed(curthread->td_ucred) &&
1704			    prison_if(curthread->td_ucred, sa))
1705				continue;
1706			addrs++;
1707#ifdef COMPAT_43
1708			if (cmd == OSIOCGIFCONF) {
1709				struct osockaddr *osa =
1710					 (struct osockaddr *)&ifr.ifr_addr;
1711				ifr.ifr_addr = *sa;
1712				osa->sa_family = sa->sa_family;
1713				error = copyout((caddr_t)&ifr, (caddr_t)ifrp,
1714						sizeof (ifr));
1715				ifrp++;
1716			} else
1717#endif
1718			if (sa->sa_len <= sizeof(*sa)) {
1719				ifr.ifr_addr = *sa;
1720				error = copyout((caddr_t)&ifr, (caddr_t)ifrp,
1721						sizeof (ifr));
1722				ifrp++;
1723			} else {
1724				if (space < sizeof (ifr) + sa->sa_len -
1725					    sizeof(*sa))
1726					break;
1727				space -= sa->sa_len - sizeof(*sa);
1728				error = copyout((caddr_t)&ifr, (caddr_t)ifrp,
1729						sizeof (ifr.ifr_name));
1730				if (error == 0)
1731				    error = copyout((caddr_t)sa,
1732				      (caddr_t)&ifrp->ifr_addr, sa->sa_len);
1733				ifrp = (struct ifreq *)
1734					(sa->sa_len + (caddr_t)&ifrp->ifr_addr);
1735			}
1736			if (error)
1737				break;
1738			space -= sizeof (ifr);
1739		}
1740		if (error)
1741			break;
1742		if (!addrs) {
1743			bzero((caddr_t)&ifr.ifr_addr, sizeof(ifr.ifr_addr));
1744			error = copyout((caddr_t)&ifr, (caddr_t)ifrp,
1745			    sizeof (ifr));
1746			if (error)
1747				break;
1748			space -= sizeof (ifr);
1749			ifrp++;
1750		}
1751	}
1752	IFNET_RUNLOCK();
1753	ifc->ifc_len -= space;
1754	return (error);
1755}
1756
1757/*
1758 * Just like if_promisc(), but for all-multicast-reception mode.
1759 */
1760int
1761if_allmulti(struct ifnet *ifp, int onswitch)
1762{
1763	int error = 0;
1764	int s = splimp();
1765	struct ifreq ifr;
1766
1767	if (onswitch) {
1768		if (ifp->if_amcount++ == 0) {
1769			ifp->if_flags |= IFF_ALLMULTI;
1770			ifr.ifr_flags = ifp->if_flags & 0xffff;
1771			ifr.ifr_flagshigh = ifp->if_flags >> 16;
1772			error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
1773		}
1774	} else {
1775		if (ifp->if_amcount > 1) {
1776			ifp->if_amcount--;
1777		} else {
1778			ifp->if_amcount = 0;
1779			ifp->if_flags &= ~IFF_ALLMULTI;
1780			ifr.ifr_flags = ifp->if_flags & 0xffff;;
1781			ifr.ifr_flagshigh = ifp->if_flags >> 16;
1782			error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
1783		}
1784	}
1785	splx(s);
1786
1787	if (error == 0)
1788		rt_ifmsg(ifp);
1789	return error;
1790}
1791
1792/*
1793 * Add a multicast listenership to the interface in question.
1794 * The link layer provides a routine which converts
1795 */
1796int
1797if_addmulti(struct ifnet *ifp, struct sockaddr *sa, struct ifmultiaddr **retifma)
1798{
1799	struct sockaddr *llsa, *dupsa;
1800	int error, s;
1801	struct ifmultiaddr *ifma;
1802
1803	/*
1804	 * If the matching multicast address already exists
1805	 * then don't add a new one, just add a reference
1806	 */
1807	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1808		if (equal(sa, ifma->ifma_addr)) {
1809			ifma->ifma_refcount++;
1810			if (retifma)
1811				*retifma = ifma;
1812			return 0;
1813		}
1814	}
1815
1816	/*
1817	 * Give the link layer a chance to accept/reject it, and also
1818	 * find out which AF_LINK address this maps to, if it isn't one
1819	 * already.
1820	 */
1821	if (ifp->if_resolvemulti) {
1822		error = ifp->if_resolvemulti(ifp, &llsa, sa);
1823		if (error) return error;
1824	} else {
1825		llsa = 0;
1826	}
1827
1828	MALLOC(ifma, struct ifmultiaddr *, sizeof *ifma, M_IFMADDR, M_WAITOK);
1829	MALLOC(dupsa, struct sockaddr *, sa->sa_len, M_IFMADDR, M_WAITOK);
1830	bcopy(sa, dupsa, sa->sa_len);
1831
1832	ifma->ifma_addr = dupsa;
1833	ifma->ifma_lladdr = llsa;
1834	ifma->ifma_ifp = ifp;
1835	ifma->ifma_refcount = 1;
1836	ifma->ifma_protospec = 0;
1837	rt_newmaddrmsg(RTM_NEWMADDR, ifma);
1838
1839	/*
1840	 * Some network interfaces can scan the address list at
1841	 * interrupt time; lock them out.
1842	 */
1843	s = splimp();
1844	TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link);
1845	splx(s);
1846	if (retifma != NULL)
1847		*retifma = ifma;
1848
1849	if (llsa != 0) {
1850		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1851			if (equal(ifma->ifma_addr, llsa))
1852				break;
1853		}
1854		if (ifma) {
1855			ifma->ifma_refcount++;
1856		} else {
1857			MALLOC(ifma, struct ifmultiaddr *, sizeof *ifma,
1858			       M_IFMADDR, M_WAITOK);
1859			MALLOC(dupsa, struct sockaddr *, llsa->sa_len,
1860			       M_IFMADDR, M_WAITOK);
1861			bcopy(llsa, dupsa, llsa->sa_len);
1862			ifma->ifma_addr = dupsa;
1863			ifma->ifma_lladdr = NULL;
1864			ifma->ifma_ifp = ifp;
1865			ifma->ifma_refcount = 1;
1866			ifma->ifma_protospec = 0;
1867			s = splimp();
1868			TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link);
1869			splx(s);
1870		}
1871	}
1872	/*
1873	 * We are certain we have added something, so call down to the
1874	 * interface to let them know about it.
1875	 */
1876	s = splimp();
1877	ifp->if_ioctl(ifp, SIOCADDMULTI, 0);
1878	splx(s);
1879
1880	return 0;
1881}
1882
1883/*
1884 * Remove a reference to a multicast address on this interface.  Yell
1885 * if the request does not match an existing membership.
1886 */
1887int
1888if_delmulti(struct ifnet *ifp, struct sockaddr *sa)
1889{
1890	struct ifmultiaddr *ifma;
1891	int s;
1892
1893	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link)
1894		if (equal(sa, ifma->ifma_addr))
1895			break;
1896	if (ifma == 0)
1897		return ENOENT;
1898
1899	if (ifma->ifma_refcount > 1) {
1900		ifma->ifma_refcount--;
1901		return 0;
1902	}
1903
1904	rt_newmaddrmsg(RTM_DELMADDR, ifma);
1905	sa = ifma->ifma_lladdr;
1906	s = splimp();
1907	TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link);
1908	/*
1909	 * Make sure the interface driver is notified
1910	 * in the case of a link layer mcast group being left.
1911	 */
1912	if (ifma->ifma_addr->sa_family == AF_LINK && sa == 0)
1913		ifp->if_ioctl(ifp, SIOCDELMULTI, 0);
1914	splx(s);
1915	free(ifma->ifma_addr, M_IFMADDR);
1916	free(ifma, M_IFMADDR);
1917	if (sa == 0)
1918		return 0;
1919
1920	/*
1921	 * Now look for the link-layer address which corresponds to
1922	 * this network address.  It had been squirreled away in
1923	 * ifma->ifma_lladdr for this purpose (so we don't have
1924	 * to call ifp->if_resolvemulti() again), and we saved that
1925	 * value in sa above.  If some nasty deleted the
1926	 * link-layer address out from underneath us, we can deal because
1927	 * the address we stored was is not the same as the one which was
1928	 * in the record for the link-layer address.  (So we don't complain
1929	 * in that case.)
1930	 */
1931	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link)
1932		if (equal(sa, ifma->ifma_addr))
1933			break;
1934	if (ifma == 0)
1935		return 0;
1936
1937	if (ifma->ifma_refcount > 1) {
1938		ifma->ifma_refcount--;
1939		return 0;
1940	}
1941
1942	s = splimp();
1943	TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link);
1944	ifp->if_ioctl(ifp, SIOCDELMULTI, 0);
1945	splx(s);
1946	free(ifma->ifma_addr, M_IFMADDR);
1947	free(sa, M_IFMADDR);
1948	free(ifma, M_IFMADDR);
1949
1950	return 0;
1951}
1952
1953/*
1954 * Set the link layer address on an interface.
1955 *
1956 * At this time we only support certain types of interfaces,
1957 * and we don't allow the length of the address to change.
1958 */
1959int
1960if_setlladdr(struct ifnet *ifp, const u_char *lladdr, int len)
1961{
1962	struct sockaddr_dl *sdl;
1963	struct ifaddr *ifa;
1964	struct ifreq ifr;
1965
1966	ifa = ifaddr_byindex(ifp->if_index);
1967	if (ifa == NULL)
1968		return (EINVAL);
1969	sdl = (struct sockaddr_dl *)ifa->ifa_addr;
1970	if (sdl == NULL)
1971		return (EINVAL);
1972	if (len != sdl->sdl_alen)	/* don't allow length to change */
1973		return (EINVAL);
1974	switch (ifp->if_type) {
1975	case IFT_ETHER:			/* these types use struct arpcom */
1976	case IFT_FDDI:
1977	case IFT_XETHER:
1978	case IFT_ISO88025:
1979	case IFT_L2VLAN:
1980		bcopy(lladdr, ((struct arpcom *)ifp->if_softc)->ac_enaddr, len);
1981		/*
1982		 * XXX We also need to store the lladdr in LLADDR(sdl),
1983		 * which is done below. This is a pain because we must
1984		 * remember to keep the info in sync.
1985		 */
1986		/* FALLTHROUGH */
1987	case IFT_ARCNET:
1988		bcopy(lladdr, LLADDR(sdl), len);
1989		break;
1990	default:
1991		return (ENODEV);
1992	}
1993	/*
1994	 * If the interface is already up, we need
1995	 * to re-init it in order to reprogram its
1996	 * address filter.
1997	 */
1998	if ((ifp->if_flags & IFF_UP) != 0) {
1999		ifp->if_flags &= ~IFF_UP;
2000		ifr.ifr_flags = ifp->if_flags & 0xffff;
2001		ifr.ifr_flagshigh = ifp->if_flags >> 16;
2002		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
2003		ifp->if_flags |= IFF_UP;
2004		ifr.ifr_flags = ifp->if_flags & 0xffff;
2005		ifr.ifr_flagshigh = ifp->if_flags >> 16;
2006		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
2007#ifdef INET
2008		/*
2009		 * Also send gratuitous ARPs to notify other nodes about
2010		 * the address change.
2011		 */
2012		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
2013			if (ifa->ifa_addr != NULL &&
2014			    ifa->ifa_addr->sa_family == AF_INET)
2015				arp_ifinit(ifp, ifa);
2016		}
2017#endif
2018	}
2019	return (0);
2020}
2021
2022struct ifmultiaddr *
2023ifmaof_ifpforaddr(struct sockaddr *sa, struct ifnet *ifp)
2024{
2025	struct ifmultiaddr *ifma;
2026
2027	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link)
2028		if (equal(ifma->ifma_addr, sa))
2029			break;
2030
2031	return ifma;
2032}
2033
2034/*
2035 * The name argument must be a pointer to storage which will last as
2036 * long as the interface does.  For physical devices, the result of
2037 * device_get_name(dev) is a good choice and for pseudo-devices a
2038 * static string works well.
2039 */
2040void
2041if_initname(struct ifnet *ifp, const char *name, int unit)
2042{
2043	ifp->if_dname = name;
2044	ifp->if_dunit = unit;
2045	if (unit != IF_DUNIT_NONE)
2046		snprintf(ifp->if_xname, IFNAMSIZ, "%s%d", name, unit);
2047	else
2048		strlcpy(ifp->if_xname, name, IFNAMSIZ);
2049}
2050
2051int
2052if_printf(struct ifnet *ifp, const char * fmt, ...)
2053{
2054	va_list ap;
2055	int retval;
2056
2057	retval = printf("%s: ", ifp->if_xname);
2058	va_start(ap, fmt);
2059	retval += vprintf(fmt, ap);
2060	va_end(ap);
2061	return (retval);
2062}
2063
2064SYSCTL_NODE(_net, PF_LINK, link, CTLFLAG_RW, 0, "Link layers");
2065SYSCTL_NODE(_net_link, 0, generic, CTLFLAG_RW, 0, "Generic link-management");
2066