if.c revision 106955
1/*
2 * Copyright (c) 1980, 1986, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by the University of
16 *	California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 *	@(#)if.c	8.5 (Berkeley) 1/9/95
34 * $FreeBSD: head/sys/net/if.c 106955 2002-11-15 18:35:41Z sam $
35 */
36
37#include "opt_compat.h"
38#include "opt_inet6.h"
39#include "opt_inet.h"
40#include "opt_mac.h"
41
42#include <sys/param.h>
43#include <sys/conf.h>
44#include <sys/mac.h>
45#include <sys/malloc.h>
46#include <sys/bus.h>
47#include <sys/mbuf.h>
48#include <sys/systm.h>
49#include <sys/proc.h>
50#include <sys/socket.h>
51#include <sys/socketvar.h>
52#include <sys/protosw.h>
53#include <sys/kernel.h>
54#include <sys/sockio.h>
55#include <sys/syslog.h>
56#include <sys/sysctl.h>
57#include <sys/jail.h>
58#include <machine/stdarg.h>
59
60#include <net/if.h>
61#include <net/if_arp.h>
62#include <net/if_dl.h>
63#include <net/if_types.h>
64#include <net/if_var.h>
65#include <net/radix.h>
66#include <net/route.h>
67
68#if defined(INET) || defined(INET6)
69/*XXX*/
70#include <netinet/in.h>
71#include <netinet/in_var.h>
72#ifdef INET6
73#include <netinet6/in6_var.h>
74#include <netinet6/in6_ifattach.h>
75#endif
76#endif
77#ifdef INET
78#include <netinet/if_ether.h>
79#endif
80
81static int	ifconf(u_long, caddr_t);
82static void	if_grow(void);
83static void	if_init(void *);
84static void	if_check(void *);
85static int	if_findindex(struct ifnet *);
86static void	if_qflush(struct ifqueue *);
87static void	if_slowtimo(void *);
88static void	link_rtrequest(int, struct rtentry *, struct rt_addrinfo *);
89static int	if_rtdel(struct radix_node *, void *);
90static struct	if_clone *if_clone_lookup(const char *, int *);
91static int	if_clone_list(struct if_clonereq *);
92static int	ifhwioctl(u_long, struct ifnet *, caddr_t, struct thread *);
93#ifdef INET6
94/*
95 * XXX: declare here to avoid to include many inet6 related files..
96 * should be more generalized?
97 */
98extern void	nd6_setmtu(struct ifnet *);
99#endif
100
101int	if_index = 0;
102struct	ifindex_entry *ifindex_table = NULL;
103int	ifqmaxlen = IFQ_MAXLEN;
104struct	ifnethead ifnet;	/* depend on static init XXX */
105int	if_cloners_count;
106LIST_HEAD(, if_clone) if_cloners = LIST_HEAD_INITIALIZER(if_cloners);
107
108static int	if_indexlim = 8;
109static struct	klist ifklist;
110
111static void	filt_netdetach(struct knote *kn);
112static int	filt_netdev(struct knote *kn, long hint);
113
114static struct filterops netdev_filtops =
115    { 1, NULL, filt_netdetach, filt_netdev };
116
117/*
118 * System initialization
119 */
120SYSINIT(interfaces, SI_SUB_INIT_IF, SI_ORDER_FIRST, if_init, NULL)
121SYSINIT(interface_check, SI_SUB_PROTO_IF, SI_ORDER_FIRST, if_check, NULL)
122
123MALLOC_DEFINE(M_IFADDR, "ifaddr", "interface address");
124MALLOC_DEFINE(M_IFMADDR, "ether_multi", "link-level multicast address");
125MALLOC_DEFINE(M_CLONE, "clone", "interface cloning framework");
126
127#define CDEV_MAJOR	165
128
129static d_open_t		netopen;
130static d_close_t	netclose;
131static d_ioctl_t	netioctl;
132static d_kqfilter_t	netkqfilter;
133
134static struct cdevsw net_cdevsw = {
135	/* open */	netopen,
136	/* close */	netclose,
137	/* read */	noread,
138	/* write */	nowrite,
139	/* ioctl */	netioctl,
140	/* poll */	nopoll,
141	/* mmap */	nommap,
142	/* strategy */	nostrategy,
143	/* name */	"net",
144	/* maj */	CDEV_MAJOR,
145	/* dump */	nodump,
146	/* psize */	nopsize,
147	/* flags */	D_KQFILTER,
148	/* kqfilter */	netkqfilter,
149};
150
151static int
152netopen(dev_t dev, int flag, int mode, struct thread *td)
153{
154	return (0);
155}
156
157static int
158netclose(dev_t dev, int flags, int fmt, struct thread *td)
159{
160	return (0);
161}
162
163static int
164netioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct thread *td)
165{
166	struct ifnet *ifp;
167	int error, idx;
168
169	/* only support interface specific ioctls */
170	if (IOCGROUP(cmd) != 'i')
171		return (EOPNOTSUPP);
172	idx = minor(dev);
173	if (idx == 0) {
174		/*
175		 * special network device, not interface.
176		 */
177		if (cmd == SIOCGIFCONF)
178			return (ifconf(cmd, data));	/* XXX remove cmd */
179		return (EOPNOTSUPP);
180	}
181
182	ifp = ifnet_byindex(idx);
183	if (ifp == NULL)
184		return (ENXIO);
185
186	error = ifhwioctl(cmd, ifp, data, td);
187	if (error == ENOIOCTL)
188		error = EOPNOTSUPP;
189	return (error);
190}
191
192static int
193netkqfilter(dev_t dev, struct knote *kn)
194{
195	struct klist *klist;
196	struct ifnet *ifp;
197	int idx;
198
199	idx = minor(dev);
200	if (idx == 0) {
201		klist = &ifklist;
202	} else {
203		ifp = ifnet_byindex(idx);
204		if (ifp == NULL)
205			return (1);
206		klist = &ifp->if_klist;
207	}
208
209	switch (kn->kn_filter) {
210	case EVFILT_NETDEV:
211		kn->kn_fop = &netdev_filtops;
212		break;
213	default:
214		return (1);
215	}
216
217	kn->kn_hook = (caddr_t)klist;
218
219	/* XXX locking? */
220	SLIST_INSERT_HEAD(klist, kn, kn_selnext);
221
222	return (0);
223}
224
225static void
226filt_netdetach(struct knote *kn)
227{
228	struct klist *klist = (struct klist *)kn->kn_hook;
229
230	if (kn->kn_status & KN_DETACHED)
231		return;
232	SLIST_REMOVE(klist, kn, knote, kn_selnext);
233}
234
235static int
236filt_netdev(struct knote *kn, long hint)
237{
238
239	/*
240	 * Currently NOTE_EXIT is abused to indicate device detach.
241	 */
242	if (hint == NOTE_EXIT) {
243		kn->kn_data = NOTE_LINKINV;
244                kn->kn_status |= KN_DETACHED;
245                kn->kn_flags |= (EV_EOF | EV_ONESHOT);
246                return (1);
247        }
248	kn->kn_data = hint;			/* current status */
249	if (kn->kn_sfflags & hint)
250		kn->kn_fflags |= hint;
251	return (kn->kn_fflags != 0);
252}
253
254/*
255 * Network interface utility routines.
256 *
257 * Routines with ifa_ifwith* names take sockaddr *'s as
258 * parameters.
259 */
260/* ARGSUSED*/
261static void
262if_init(dummy)
263	void *dummy;
264{
265
266	TAILQ_INIT(&ifnet);
267	SLIST_INIT(&ifklist);
268	if_grow();				/* create initial table */
269	ifdev_byindex(0) = make_dev(&net_cdevsw, 0,
270	    UID_ROOT, GID_WHEEL, 0600, "network");
271}
272
273static void
274if_grow(void)
275{
276	u_int n;
277	struct ifindex_entry *e;
278
279	if_indexlim <<= 1;
280	n = if_indexlim * sizeof(*e);
281	e = malloc(n, M_IFADDR, M_WAITOK | M_ZERO);
282	if (ifindex_table != NULL) {
283		memcpy((caddr_t)e, (caddr_t)ifindex_table, n/2);
284		free((caddr_t)ifindex_table, M_IFADDR);
285	}
286	ifindex_table = e;
287}
288
289/* ARGSUSED*/
290static void
291if_check(dummy)
292	void *dummy;
293{
294	struct ifnet *ifp;
295	int s;
296
297	s = splimp();
298	TAILQ_FOREACH(ifp, &ifnet, if_link) {
299		if (ifp->if_snd.ifq_maxlen == 0) {
300			printf("%s%d XXX: driver didn't set ifq_maxlen\n",
301			    ifp->if_name, ifp->if_unit);
302			ifp->if_snd.ifq_maxlen = ifqmaxlen;
303		}
304		if (!mtx_initialized(&ifp->if_snd.ifq_mtx)) {
305			printf("%s%d XXX: driver didn't initialize queue mtx\n",
306			    ifp->if_name, ifp->if_unit);
307			mtx_init(&ifp->if_snd.ifq_mtx, "unknown",
308			    MTX_NETWORK_LOCK, MTX_DEF);
309		}
310	}
311	splx(s);
312	if_slowtimo(0);
313}
314
315static int
316if_findindex(struct ifnet *ifp)
317{
318	int i, unit;
319	char eaddr[18], devname[32];
320	const char *name, *p;
321
322	switch (ifp->if_type) {
323	case IFT_ETHER:			/* these types use struct arpcom */
324	case IFT_FDDI:
325	case IFT_XETHER:
326	case IFT_ISO88025:
327	case IFT_L2VLAN:
328		snprintf(eaddr, 18, "%6D",
329		    ((struct arpcom *)ifp->if_softc)->ac_enaddr, ":");
330		break;
331	default:
332		eaddr[0] = '\0';
333		break;
334	}
335	snprintf(devname, 32, "%s%d", ifp->if_name, ifp->if_unit);
336	name = net_cdevsw.d_name;
337	i = 0;
338	while ((resource_find_dev(&i, name, &unit, NULL, NULL)) == 0) {
339		if (resource_string_value(name, unit, "ether", &p) == 0)
340			if (strcmp(p, eaddr) == 0)
341				goto found;
342		if (resource_string_value(name, unit, "dev", &p) == 0)
343			if (strcmp(p, devname) == 0)
344				goto found;
345	}
346	unit = 0;
347found:
348	if (unit != 0) {
349		if (ifaddr_byindex(unit) == NULL)
350			return (unit);
351		printf("%s%d in use, cannot hardwire it to %s.\n",
352		    name, unit, devname);
353	}
354	for (unit = 1; ; unit++) {
355		if (unit <= if_index && ifaddr_byindex(unit) != NULL)
356			continue;
357		if (resource_string_value(name, unit, "ether", &p) == 0 ||
358		    resource_string_value(name, unit, "dev", &p) == 0)
359			continue;
360		break;
361	}
362	return (unit);
363}
364
365/*
366 * Attach an interface to the
367 * list of "active" interfaces.
368 */
369void
370if_attach(ifp)
371	struct ifnet *ifp;
372{
373	unsigned socksize, ifasize;
374	int namelen, masklen;
375	char workbuf[64];
376	register struct sockaddr_dl *sdl;
377	register struct ifaddr *ifa;
378
379	TAILQ_INSERT_TAIL(&ifnet, ifp, if_link);
380	/*
381	 * XXX -
382	 * The old code would work if the interface passed a pre-existing
383	 * chain of ifaddrs to this code.  We don't trust our callers to
384	 * properly initialize the tailq, however, so we no longer allow
385	 * this unlikely case.
386	 */
387	TAILQ_INIT(&ifp->if_addrhead);
388	TAILQ_INIT(&ifp->if_prefixhead);
389	TAILQ_INIT(&ifp->if_multiaddrs);
390	SLIST_INIT(&ifp->if_klist);
391	getmicrotime(&ifp->if_lastchange);
392
393#ifdef MAC
394	mac_init_ifnet(ifp);
395	mac_create_ifnet(ifp);
396#endif
397
398	ifp->if_index = if_findindex(ifp);
399	if (ifp->if_index > if_index)
400		if_index = ifp->if_index;
401	if (if_index >= if_indexlim)
402		if_grow();
403
404	ifnet_byindex(ifp->if_index) = ifp;
405	ifdev_byindex(ifp->if_index) = make_dev(&net_cdevsw, ifp->if_index,
406	    UID_ROOT, GID_WHEEL, 0600, "%s/%s%d",
407	    net_cdevsw.d_name, ifp->if_name, ifp->if_unit);
408	make_dev_alias(ifdev_byindex(ifp->if_index), "%s%d",
409	    net_cdevsw.d_name, ifp->if_index);
410
411	mtx_init(&ifp->if_snd.ifq_mtx, ifp->if_name, "if send queue", MTX_DEF);
412
413	/*
414	 * create a Link Level name for this device
415	 */
416	namelen = snprintf(workbuf, sizeof(workbuf),
417	    "%s%d", ifp->if_name, ifp->if_unit);
418#define _offsetof(t, m) ((int)((caddr_t)&((t *)0)->m))
419	masklen = _offsetof(struct sockaddr_dl, sdl_data[0]) + namelen;
420	socksize = masklen + ifp->if_addrlen;
421#define ROUNDUP(a) (1 + (((a) - 1) | (sizeof(long) - 1)))
422	if (socksize < sizeof(*sdl))
423		socksize = sizeof(*sdl);
424	socksize = ROUNDUP(socksize);
425	ifasize = sizeof(*ifa) + 2 * socksize;
426	ifa = (struct ifaddr *)malloc(ifasize, M_IFADDR, M_NOWAIT | M_ZERO);
427	if (ifa) {
428		sdl = (struct sockaddr_dl *)(ifa + 1);
429		sdl->sdl_len = socksize;
430		sdl->sdl_family = AF_LINK;
431		bcopy(workbuf, sdl->sdl_data, namelen);
432		sdl->sdl_nlen = namelen;
433		sdl->sdl_index = ifp->if_index;
434		sdl->sdl_type = ifp->if_type;
435		ifaddr_byindex(ifp->if_index) = ifa;
436		ifa->ifa_ifp = ifp;
437		ifa->ifa_rtrequest = link_rtrequest;
438		ifa->ifa_addr = (struct sockaddr *)sdl;
439		sdl = (struct sockaddr_dl *)(socksize + (caddr_t)sdl);
440		ifa->ifa_netmask = (struct sockaddr *)sdl;
441		sdl->sdl_len = masklen;
442		while (namelen != 0)
443			sdl->sdl_data[--namelen] = 0xff;
444		TAILQ_INSERT_HEAD(&ifp->if_addrhead, ifa, ifa_link);
445	}
446	ifp->if_broadcastaddr = 0; /* reliably crash if used uninitialized */
447
448	/* Announce the interface. */
449	rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
450}
451
452/*
453 * Detach an interface, removing it from the
454 * list of "active" interfaces.
455 */
456void
457if_detach(ifp)
458	struct ifnet *ifp;
459{
460	struct ifaddr *ifa;
461	struct radix_node_head	*rnh;
462	int s;
463	int i;
464
465	/*
466	 * Remove routes and flush queues.
467	 */
468	s = splnet();
469	if_down(ifp);
470
471	/*
472	 * Remove address from ifindex_table[] and maybe decrement if_index.
473	 * Clean up all addresses.
474	 */
475	ifaddr_byindex(ifp->if_index) = NULL;
476	revoke_and_destroy_dev(ifdev_byindex(ifp->if_index));
477	ifdev_byindex(ifp->if_index) = NULL;
478
479	while (if_index > 0 && ifaddr_byindex(if_index) == NULL)
480		if_index--;
481
482	for (ifa = TAILQ_FIRST(&ifp->if_addrhead); ifa;
483	     ifa = TAILQ_FIRST(&ifp->if_addrhead)) {
484#ifdef INET
485		/* XXX: Ugly!! ad hoc just for INET */
486		if (ifa->ifa_addr && ifa->ifa_addr->sa_family == AF_INET) {
487			struct ifaliasreq ifr;
488
489			bzero(&ifr, sizeof(ifr));
490			ifr.ifra_addr = *ifa->ifa_addr;
491			if (ifa->ifa_dstaddr)
492				ifr.ifra_broadaddr = *ifa->ifa_dstaddr;
493			if (in_control(NULL, SIOCDIFADDR, (caddr_t)&ifr, ifp,
494			    NULL) == 0)
495				continue;
496		}
497#endif /* INET */
498#ifdef INET6
499		if (ifa->ifa_addr && ifa->ifa_addr->sa_family == AF_INET6) {
500			in6_purgeaddr(ifa);
501			/* ifp_addrhead is already updated */
502			continue;
503		}
504#endif /* INET6 */
505		TAILQ_REMOVE(&ifp->if_addrhead, ifa, ifa_link);
506		IFAFREE(ifa);
507	}
508
509#ifdef INET6
510	/*
511	 * Remove all IPv6 kernel structs related to ifp.  This should be done
512	 * before removing routing entries below, since IPv6 interface direct
513	 * routes are expected to be removed by the IPv6-specific kernel API.
514	 * Otherwise, the kernel will detect some inconsistency and bark it.
515	 */
516	in6_ifdetach(ifp);
517#endif
518
519	/*
520	 * Delete all remaining routes using this interface
521	 * Unfortuneatly the only way to do this is to slog through
522	 * the entire routing table looking for routes which point
523	 * to this interface...oh well...
524	 */
525	for (i = 1; i <= AF_MAX; i++) {
526		if ((rnh = rt_tables[i]) == NULL)
527			continue;
528		(void) rnh->rnh_walktree(rnh, if_rtdel, ifp);
529	}
530
531	/* Announce that the interface is gone. */
532	rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
533
534#ifdef MAC
535	mac_destroy_ifnet(ifp);
536#endif /* MAC */
537	KNOTE(&ifp->if_klist, NOTE_EXIT);
538	TAILQ_REMOVE(&ifnet, ifp, if_link);
539	mtx_destroy(&ifp->if_snd.ifq_mtx);
540	splx(s);
541}
542
543/*
544 * Delete Routes for a Network Interface
545 *
546 * Called for each routing entry via the rnh->rnh_walktree() call above
547 * to delete all route entries referencing a detaching network interface.
548 *
549 * Arguments:
550 *	rn	pointer to node in the routing table
551 *	arg	argument passed to rnh->rnh_walktree() - detaching interface
552 *
553 * Returns:
554 *	0	successful
555 *	errno	failed - reason indicated
556 *
557 */
558static int
559if_rtdel(rn, arg)
560	struct radix_node	*rn;
561	void			*arg;
562{
563	struct rtentry	*rt = (struct rtentry *)rn;
564	struct ifnet	*ifp = arg;
565	int		err;
566
567	if (rt->rt_ifp == ifp) {
568
569		/*
570		 * Protect (sorta) against walktree recursion problems
571		 * with cloned routes
572		 */
573		if ((rt->rt_flags & RTF_UP) == 0)
574			return (0);
575
576		err = rtrequest(RTM_DELETE, rt_key(rt), rt->rt_gateway,
577				rt_mask(rt), rt->rt_flags,
578				(struct rtentry **) NULL);
579		if (err) {
580			log(LOG_WARNING, "if_rtdel: error %d\n", err);
581		}
582	}
583
584	return (0);
585}
586
587/*
588 * Create a clone network interface.
589 */
590int
591if_clone_create(name, len)
592	char *name;
593	int len;
594{
595	struct if_clone *ifc;
596	char *dp;
597	int wildcard, bytoff, bitoff;
598	int unit;
599	int err;
600
601	ifc = if_clone_lookup(name, &unit);
602	if (ifc == NULL)
603		return (EINVAL);
604
605	if (ifunit(name) != NULL)
606		return (EEXIST);
607
608	bytoff = bitoff = 0;
609	wildcard = (unit < 0);
610	/*
611	 * Find a free unit if none was given.
612	 */
613	if (wildcard) {
614		while ((bytoff < ifc->ifc_bmlen)
615		    && (ifc->ifc_units[bytoff] == 0xff))
616			bytoff++;
617		if (bytoff >= ifc->ifc_bmlen)
618			return (ENOSPC);
619		while ((ifc->ifc_units[bytoff] & (1 << bitoff)) != 0)
620			bitoff++;
621		unit = (bytoff << 3) + bitoff;
622	}
623
624	if (unit > ifc->ifc_maxunit)
625		return (ENXIO);
626
627	err = (*ifc->ifc_create)(ifc, unit);
628	if (err != 0)
629		return (err);
630
631	if (!wildcard) {
632		bytoff = unit >> 3;
633		bitoff = unit - (bytoff << 3);
634	}
635
636	/*
637	 * Allocate the unit in the bitmap.
638	 */
639	KASSERT((ifc->ifc_units[bytoff] & (1 << bitoff)) == 0,
640	    ("%s: bit is already set", __func__));
641	ifc->ifc_units[bytoff] |= (1 << bitoff);
642
643	/* In the wildcard case, we need to update the name. */
644	if (wildcard) {
645		for (dp = name; *dp != '\0'; dp++);
646		if (snprintf(dp, len - (dp-name), "%d", unit) >
647		    len - (dp-name) - 1) {
648			/*
649			 * This can only be a programmer error and
650			 * there's no straightforward way to recover if
651			 * it happens.
652			 */
653			panic("if_clone_create(): interface name too long");
654		}
655
656	}
657
658	return (0);
659}
660
661/*
662 * Destroy a clone network interface.
663 */
664int
665if_clone_destroy(name)
666	const char *name;
667{
668	struct if_clone *ifc;
669	struct ifnet *ifp;
670	int bytoff, bitoff;
671	int unit;
672
673	ifc = if_clone_lookup(name, &unit);
674	if (ifc == NULL)
675		return (EINVAL);
676
677	if (unit < ifc->ifc_minifs)
678		return (EINVAL);
679
680	ifp = ifunit(name);
681	if (ifp == NULL)
682		return (ENXIO);
683
684	if (ifc->ifc_destroy == NULL)
685		return (EOPNOTSUPP);
686
687	(*ifc->ifc_destroy)(ifp);
688
689	/*
690	 * Compute offset in the bitmap and deallocate the unit.
691	 */
692	bytoff = unit >> 3;
693	bitoff = unit - (bytoff << 3);
694	KASSERT((ifc->ifc_units[bytoff] & (1 << bitoff)) != 0,
695	    ("%s: bit is already cleared", __func__));
696	ifc->ifc_units[bytoff] &= ~(1 << bitoff);
697	return (0);
698}
699
700/*
701 * Look up a network interface cloner.
702 */
703static struct if_clone *
704if_clone_lookup(name, unitp)
705	const char *name;
706	int *unitp;
707{
708	struct if_clone *ifc;
709	const char *cp;
710	int i;
711
712	for (ifc = LIST_FIRST(&if_cloners); ifc != NULL;) {
713		for (cp = name, i = 0; i < ifc->ifc_namelen; i++, cp++) {
714			if (ifc->ifc_name[i] != *cp)
715				goto next_ifc;
716		}
717		goto found_name;
718 next_ifc:
719		ifc = LIST_NEXT(ifc, ifc_list);
720	}
721
722	/* No match. */
723	return ((struct if_clone *)NULL);
724
725 found_name:
726	if (*cp == '\0') {
727		i = -1;
728	} else {
729		for (i = 0; *cp != '\0'; cp++) {
730			if (*cp < '0' || *cp > '9') {
731				/* Bogus unit number. */
732				return (NULL);
733			}
734			i = (i * 10) + (*cp - '0');
735		}
736	}
737
738	if (unitp != NULL)
739		*unitp = i;
740	return (ifc);
741}
742
743/*
744 * Register a network interface cloner.
745 */
746void
747if_clone_attach(ifc)
748	struct if_clone *ifc;
749{
750	int bytoff, bitoff;
751	int err;
752	int len, maxclone;
753	int unit;
754
755	KASSERT(ifc->ifc_minifs - 1 <= ifc->ifc_maxunit,
756	    ("%s: %s requested more units then allowed (%d > %d)",
757	    __func__, ifc->ifc_name, ifc->ifc_minifs,
758	    ifc->ifc_maxunit + 1));
759	/*
760	 * Compute bitmap size and allocate it.
761	 */
762	maxclone = ifc->ifc_maxunit + 1;
763	len = maxclone >> 3;
764	if ((len << 3) < maxclone)
765		len++;
766	ifc->ifc_units = malloc(len, M_CLONE, M_WAITOK | M_ZERO);
767	ifc->ifc_bmlen = len;
768
769	LIST_INSERT_HEAD(&if_cloners, ifc, ifc_list);
770	if_cloners_count++;
771
772	for (unit = 0; unit < ifc->ifc_minifs; unit++) {
773		err = (*ifc->ifc_create)(ifc, unit);
774		KASSERT(err == 0,
775		    ("%s: failed to create required interface %s%d",
776		    __func__, ifc->ifc_name, unit));
777
778		/* Allocate the unit in the bitmap. */
779		bytoff = unit >> 3;
780		bitoff = unit - (bytoff << 3);
781		ifc->ifc_units[bytoff] |= (1 << bitoff);
782	}
783}
784
785/*
786 * Unregister a network interface cloner.
787 */
788void
789if_clone_detach(ifc)
790	struct if_clone *ifc;
791{
792
793	LIST_REMOVE(ifc, ifc_list);
794	free(ifc->ifc_units, M_CLONE);
795	if_cloners_count--;
796}
797
798/*
799 * Provide list of interface cloners to userspace.
800 */
801static int
802if_clone_list(ifcr)
803	struct if_clonereq *ifcr;
804{
805	char outbuf[IFNAMSIZ], *dst;
806	struct if_clone *ifc;
807	int count, error = 0;
808
809	ifcr->ifcr_total = if_cloners_count;
810	if ((dst = ifcr->ifcr_buffer) == NULL) {
811		/* Just asking how many there are. */
812		return (0);
813	}
814
815	if (ifcr->ifcr_count < 0)
816		return (EINVAL);
817
818	count = (if_cloners_count < ifcr->ifcr_count) ?
819	    if_cloners_count : ifcr->ifcr_count;
820
821	for (ifc = LIST_FIRST(&if_cloners); ifc != NULL && count != 0;
822	     ifc = LIST_NEXT(ifc, ifc_list), count--, dst += IFNAMSIZ) {
823		strncpy(outbuf, ifc->ifc_name, IFNAMSIZ);
824		outbuf[IFNAMSIZ - 1] = '\0';	/* sanity */
825		error = copyout(outbuf, dst, IFNAMSIZ);
826		if (error)
827			break;
828	}
829
830	return (error);
831}
832
833/*
834 * Locate an interface based on a complete address.
835 */
836/*ARGSUSED*/
837struct ifaddr *
838ifa_ifwithaddr(addr)
839	struct sockaddr *addr;
840{
841	struct ifnet *ifp;
842	struct ifaddr *ifa;
843
844#define	equal(a1, a2) \
845  (bcmp((caddr_t)(a1), (caddr_t)(a2), ((struct sockaddr *)(a1))->sa_len) == 0)
846	TAILQ_FOREACH(ifp, &ifnet, if_link)
847		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
848			if (ifa->ifa_addr->sa_family != addr->sa_family)
849				continue;
850			if (equal(addr, ifa->ifa_addr))
851				goto done;
852			/* IP6 doesn't have broadcast */
853			if ((ifp->if_flags & IFF_BROADCAST) &&
854			    ifa->ifa_broadaddr &&
855			    ifa->ifa_broadaddr->sa_len != 0 &&
856			    equal(ifa->ifa_broadaddr, addr))
857				goto done;
858		}
859	ifa = NULL;
860done:
861	return (ifa);
862}
863
864/*
865 * Locate the point to point interface with a given destination address.
866 */
867/*ARGSUSED*/
868struct ifaddr *
869ifa_ifwithdstaddr(addr)
870	struct sockaddr *addr;
871{
872	struct ifnet *ifp;
873	struct ifaddr *ifa;
874
875	TAILQ_FOREACH(ifp, &ifnet, if_link) {
876		if ((ifp->if_flags & IFF_POINTOPOINT) == 0)
877			continue;
878		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
879			if (ifa->ifa_addr->sa_family != addr->sa_family)
880				continue;
881			if (ifa->ifa_dstaddr && equal(addr, ifa->ifa_dstaddr))
882				goto done;
883		}
884	}
885	ifa = NULL;
886done:
887	return (ifa);
888}
889
890/*
891 * Find an interface on a specific network.  If many, choice
892 * is most specific found.
893 */
894struct ifaddr *
895ifa_ifwithnet(addr)
896	struct sockaddr *addr;
897{
898	register struct ifnet *ifp;
899	register struct ifaddr *ifa;
900	struct ifaddr *ifa_maybe = (struct ifaddr *) 0;
901	u_int af = addr->sa_family;
902	char *addr_data = addr->sa_data, *cplim;
903
904	/*
905	 * AF_LINK addresses can be looked up directly by their index number,
906	 * so do that if we can.
907	 */
908	if (af == AF_LINK) {
909	    register struct sockaddr_dl *sdl = (struct sockaddr_dl *)addr;
910	    if (sdl->sdl_index && sdl->sdl_index <= if_index)
911		return (ifaddr_byindex(sdl->sdl_index));
912	}
913
914	/*
915	 * Scan though each interface, looking for ones that have
916	 * addresses in this address family.
917	 */
918	TAILQ_FOREACH(ifp, &ifnet, if_link) {
919		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
920			register char *cp, *cp2, *cp3;
921
922			if (ifa->ifa_addr->sa_family != af)
923next:				continue;
924			if (af == AF_INET && ifp->if_flags & IFF_POINTOPOINT) {
925				/*
926				 * This is a bit broken as it doesn't
927				 * take into account that the remote end may
928				 * be a single node in the network we are
929				 * looking for.
930				 * The trouble is that we don't know the
931				 * netmask for the remote end.
932				 */
933				if (ifa->ifa_dstaddr != 0
934				    && equal(addr, ifa->ifa_dstaddr))
935					goto done;
936			} else {
937				/*
938				 * if we have a special address handler,
939				 * then use it instead of the generic one.
940				 */
941	          		if (ifa->ifa_claim_addr) {
942					if ((*ifa->ifa_claim_addr)(ifa, addr))
943						goto done;
944					continue;
945				}
946
947				/*
948				 * Scan all the bits in the ifa's address.
949				 * If a bit dissagrees with what we are
950				 * looking for, mask it with the netmask
951				 * to see if it really matters.
952				 * (A byte at a time)
953				 */
954				if (ifa->ifa_netmask == 0)
955					continue;
956				cp = addr_data;
957				cp2 = ifa->ifa_addr->sa_data;
958				cp3 = ifa->ifa_netmask->sa_data;
959				cplim = ifa->ifa_netmask->sa_len
960					+ (char *)ifa->ifa_netmask;
961				while (cp3 < cplim)
962					if ((*cp++ ^ *cp2++) & *cp3++)
963						goto next; /* next address! */
964				/*
965				 * If the netmask of what we just found
966				 * is more specific than what we had before
967				 * (if we had one) then remember the new one
968				 * before continuing to search
969				 * for an even better one.
970				 */
971				if (ifa_maybe == 0 ||
972				    rn_refines((caddr_t)ifa->ifa_netmask,
973				    (caddr_t)ifa_maybe->ifa_netmask))
974					ifa_maybe = ifa;
975			}
976		}
977	}
978	ifa = ifa_maybe;
979done:
980	return (ifa);
981}
982
983/*
984 * Find an interface address specific to an interface best matching
985 * a given address.
986 */
987struct ifaddr *
988ifaof_ifpforaddr(addr, ifp)
989	struct sockaddr *addr;
990	register struct ifnet *ifp;
991{
992	register struct ifaddr *ifa;
993	register char *cp, *cp2, *cp3;
994	register char *cplim;
995	struct ifaddr *ifa_maybe = 0;
996	u_int af = addr->sa_family;
997
998	if (af >= AF_MAX)
999		return (0);
1000	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1001		if (ifa->ifa_addr->sa_family != af)
1002			continue;
1003		if (ifa_maybe == 0)
1004			ifa_maybe = ifa;
1005		if (ifa->ifa_netmask == 0) {
1006			if (equal(addr, ifa->ifa_addr) ||
1007			    (ifa->ifa_dstaddr && equal(addr, ifa->ifa_dstaddr)))
1008				goto done;
1009			continue;
1010		}
1011		if (ifp->if_flags & IFF_POINTOPOINT) {
1012			if (equal(addr, ifa->ifa_dstaddr))
1013				goto done;
1014		} else {
1015			cp = addr->sa_data;
1016			cp2 = ifa->ifa_addr->sa_data;
1017			cp3 = ifa->ifa_netmask->sa_data;
1018			cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask;
1019			for (; cp3 < cplim; cp3++)
1020				if ((*cp++ ^ *cp2++) & *cp3)
1021					break;
1022			if (cp3 == cplim)
1023				goto done;
1024		}
1025	}
1026	ifa = ifa_maybe;
1027done:
1028	return (ifa);
1029}
1030
1031#include <net/route.h>
1032
1033/*
1034 * Default action when installing a route with a Link Level gateway.
1035 * Lookup an appropriate real ifa to point to.
1036 * This should be moved to /sys/net/link.c eventually.
1037 */
1038static void
1039link_rtrequest(cmd, rt, info)
1040	int cmd;
1041	register struct rtentry *rt;
1042	struct rt_addrinfo *info;
1043{
1044	register struct ifaddr *ifa;
1045	struct sockaddr *dst;
1046	struct ifnet *ifp;
1047
1048	if (cmd != RTM_ADD || ((ifa = rt->rt_ifa) == 0) ||
1049	    ((ifp = ifa->ifa_ifp) == 0) || ((dst = rt_key(rt)) == 0))
1050		return;
1051	ifa = ifaof_ifpforaddr(dst, ifp);
1052	if (ifa) {
1053		IFAFREE(rt->rt_ifa);
1054		rt->rt_ifa = ifa;
1055		ifa->ifa_refcnt++;
1056		if (ifa->ifa_rtrequest && ifa->ifa_rtrequest != link_rtrequest)
1057			ifa->ifa_rtrequest(cmd, rt, info);
1058	}
1059}
1060
1061/*
1062 * Mark an interface down and notify protocols of
1063 * the transition.
1064 * NOTE: must be called at splnet or eqivalent.
1065 */
1066void
1067if_unroute(ifp, flag, fam)
1068	register struct ifnet *ifp;
1069	int flag, fam;
1070{
1071	register struct ifaddr *ifa;
1072
1073	ifp->if_flags &= ~flag;
1074	getmicrotime(&ifp->if_lastchange);
1075	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
1076		if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family))
1077			pfctlinput(PRC_IFDOWN, ifa->ifa_addr);
1078	if_qflush(&ifp->if_snd);
1079	rt_ifmsg(ifp);
1080}
1081
1082/*
1083 * Mark an interface up and notify protocols of
1084 * the transition.
1085 * NOTE: must be called at splnet or eqivalent.
1086 */
1087void
1088if_route(ifp, flag, fam)
1089	register struct ifnet *ifp;
1090	int flag, fam;
1091{
1092	register struct ifaddr *ifa;
1093
1094	ifp->if_flags |= flag;
1095	getmicrotime(&ifp->if_lastchange);
1096	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
1097		if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family))
1098			pfctlinput(PRC_IFUP, ifa->ifa_addr);
1099	rt_ifmsg(ifp);
1100#ifdef INET6
1101	in6_if_up(ifp);
1102#endif
1103}
1104
1105/*
1106 * Mark an interface down and notify protocols of
1107 * the transition.
1108 * NOTE: must be called at splnet or eqivalent.
1109 */
1110void
1111if_down(ifp)
1112	register struct ifnet *ifp;
1113{
1114
1115	if_unroute(ifp, IFF_UP, AF_UNSPEC);
1116}
1117
1118/*
1119 * Mark an interface up and notify protocols of
1120 * the transition.
1121 * NOTE: must be called at splnet or eqivalent.
1122 */
1123void
1124if_up(ifp)
1125	register struct ifnet *ifp;
1126{
1127
1128	if_route(ifp, IFF_UP, AF_UNSPEC);
1129}
1130
1131/*
1132 * Flush an interface queue.
1133 */
1134static void
1135if_qflush(ifq)
1136	register struct ifqueue *ifq;
1137{
1138	register struct mbuf *m, *n;
1139
1140	n = ifq->ifq_head;
1141	while ((m = n) != 0) {
1142		n = m->m_act;
1143		m_freem(m);
1144	}
1145	ifq->ifq_head = 0;
1146	ifq->ifq_tail = 0;
1147	ifq->ifq_len = 0;
1148}
1149
1150/*
1151 * Handle interface watchdog timer routines.  Called
1152 * from softclock, we decrement timers (if set) and
1153 * call the appropriate interface routine on expiration.
1154 */
1155static void
1156if_slowtimo(arg)
1157	void *arg;
1158{
1159	register struct ifnet *ifp;
1160	int s = splimp();
1161
1162	TAILQ_FOREACH(ifp, &ifnet, if_link) {
1163		if (ifp->if_timer == 0 || --ifp->if_timer)
1164			continue;
1165		if (ifp->if_watchdog)
1166			(*ifp->if_watchdog)(ifp);
1167	}
1168	splx(s);
1169	timeout(if_slowtimo, (void *)0, hz / IFNET_SLOWHZ);
1170}
1171
1172/*
1173 * Map interface name to
1174 * interface structure pointer.
1175 */
1176struct ifnet *
1177ifunit(const char *name)
1178{
1179	char namebuf[IFNAMSIZ + 1];
1180	struct ifnet *ifp;
1181	dev_t dev;
1182
1183	/*
1184	 * Now search all the interfaces for this name/number
1185	 */
1186
1187	/*
1188	 * XXX
1189	 * Devices should really be known as /dev/fooN, not /dev/net/fooN.
1190	 */
1191	snprintf(namebuf, IFNAMSIZ, "%s/%s", net_cdevsw.d_name, name);
1192	TAILQ_FOREACH(ifp, &ifnet, if_link) {
1193		dev = ifdev_byindex(ifp->if_index);
1194		if (strcmp(devtoname(dev), namebuf) == 0)
1195			break;
1196		if (dev_named(dev, name))
1197			break;
1198	}
1199	return (ifp);
1200}
1201
1202/*
1203 * Map interface name in a sockaddr_dl to
1204 * interface structure pointer.
1205 */
1206struct ifnet *
1207if_withname(sa)
1208	struct sockaddr *sa;
1209{
1210	char ifname[IFNAMSIZ+1];
1211	struct sockaddr_dl *sdl = (struct sockaddr_dl *)sa;
1212
1213	if ( (sa->sa_family != AF_LINK) || (sdl->sdl_nlen == 0) ||
1214	     (sdl->sdl_nlen > IFNAMSIZ) )
1215		return NULL;
1216
1217	/*
1218	 * ifunit wants a NUL-terminated string.  It may not be NUL-terminated
1219	 * in the sockaddr, and we don't want to change the caller's sockaddr
1220	 * (there might not be room to add the trailing NUL anyway), so we make
1221	 * a local copy that we know we can NUL-terminate safely.
1222	 */
1223
1224	bcopy(sdl->sdl_data, ifname, sdl->sdl_nlen);
1225	ifname[sdl->sdl_nlen] = '\0';
1226	return ifunit(ifname);
1227}
1228
1229/*
1230 * Hardware specific interface ioctls.
1231 */
1232static int
1233ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
1234{
1235	struct ifreq *ifr;
1236	struct ifstat *ifs;
1237	int error = 0;
1238	int new_flags;
1239
1240	ifr = (struct ifreq *)data;
1241	switch (cmd) {
1242	case SIOCGIFINDEX:
1243		ifr->ifr_index = ifp->if_index;
1244		break;
1245
1246	case SIOCGIFFLAGS:
1247		ifr->ifr_flags = ifp->if_flags & 0xffff;
1248		ifr->ifr_flagshigh = ifp->if_flags >> 16;
1249		break;
1250
1251	case SIOCGIFCAP:
1252		ifr->ifr_reqcap = ifp->if_capabilities;
1253		ifr->ifr_curcap = ifp->if_capenable;
1254		break;
1255
1256#ifdef MAC
1257	case SIOCGIFMAC:
1258		error = mac_ioctl_ifnet_get(td->td_proc->p_ucred, ifr, ifp);
1259		break;
1260#endif
1261
1262	case SIOCGIFMETRIC:
1263		ifr->ifr_metric = ifp->if_metric;
1264		break;
1265
1266	case SIOCGIFMTU:
1267		ifr->ifr_mtu = ifp->if_mtu;
1268		break;
1269
1270	case SIOCGIFPHYS:
1271		ifr->ifr_phys = ifp->if_physical;
1272		break;
1273
1274	case SIOCSIFFLAGS:
1275		error = suser(td);
1276		if (error)
1277			return (error);
1278		new_flags = (ifr->ifr_flags & 0xffff) |
1279		    (ifr->ifr_flagshigh << 16);
1280		if (ifp->if_flags & IFF_SMART) {
1281			/* Smart drivers twiddle their own routes */
1282		} else if (ifp->if_flags & IFF_UP &&
1283		    (new_flags & IFF_UP) == 0) {
1284			int s = splimp();
1285			if_down(ifp);
1286			splx(s);
1287		} else if (new_flags & IFF_UP &&
1288		    (ifp->if_flags & IFF_UP) == 0) {
1289			int s = splimp();
1290			if_up(ifp);
1291			splx(s);
1292		}
1293		ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) |
1294			(new_flags &~ IFF_CANTCHANGE);
1295		if (new_flags & IFF_PPROMISC) {
1296			/* Permanently promiscuous mode requested */
1297			ifp->if_flags |= IFF_PROMISC;
1298		} else if (ifp->if_pcount == 0) {
1299			ifp->if_flags &= ~IFF_PROMISC;
1300		}
1301		if (ifp->if_ioctl)
1302			(void) (*ifp->if_ioctl)(ifp, cmd, data);
1303		getmicrotime(&ifp->if_lastchange);
1304		break;
1305
1306	case SIOCSIFCAP:
1307		error = suser(td);
1308		if (error)
1309			return (error);
1310		if (ifr->ifr_reqcap & ~ifp->if_capabilities)
1311			return (EINVAL);
1312		(void) (*ifp->if_ioctl)(ifp, cmd, data);
1313		break;
1314
1315#ifdef MAC
1316	case SIOCSIFMAC:
1317		error = mac_ioctl_ifnet_set(td->td_proc->p_ucred, ifr, ifp);
1318		break;
1319#endif
1320
1321	case SIOCSIFMETRIC:
1322		error = suser(td);
1323		if (error)
1324			return (error);
1325		ifp->if_metric = ifr->ifr_metric;
1326		getmicrotime(&ifp->if_lastchange);
1327		break;
1328
1329	case SIOCSIFPHYS:
1330		error = suser(td);
1331		if (error)
1332			return error;
1333		if (!ifp->if_ioctl)
1334		        return EOPNOTSUPP;
1335		error = (*ifp->if_ioctl)(ifp, cmd, data);
1336		if (error == 0)
1337			getmicrotime(&ifp->if_lastchange);
1338		return(error);
1339
1340	case SIOCSIFMTU:
1341	{
1342		u_long oldmtu = ifp->if_mtu;
1343
1344		error = suser(td);
1345		if (error)
1346			return (error);
1347		if (ifr->ifr_mtu < IF_MINMTU || ifr->ifr_mtu > IF_MAXMTU)
1348			return (EINVAL);
1349		if (ifp->if_ioctl == NULL)
1350			return (EOPNOTSUPP);
1351		error = (*ifp->if_ioctl)(ifp, cmd, data);
1352		if (error == 0) {
1353			getmicrotime(&ifp->if_lastchange);
1354			rt_ifmsg(ifp);
1355		}
1356		/*
1357		 * If the link MTU changed, do network layer specific procedure.
1358		 */
1359		if (ifp->if_mtu != oldmtu) {
1360#ifdef INET6
1361			nd6_setmtu(ifp);
1362#endif
1363		}
1364		break;
1365	}
1366
1367	case SIOCADDMULTI:
1368	case SIOCDELMULTI:
1369		error = suser(td);
1370		if (error)
1371			return (error);
1372
1373		/* Don't allow group membership on non-multicast interfaces. */
1374		if ((ifp->if_flags & IFF_MULTICAST) == 0)
1375			return (EOPNOTSUPP);
1376
1377		/* Don't let users screw up protocols' entries. */
1378		if (ifr->ifr_addr.sa_family != AF_LINK)
1379			return (EINVAL);
1380
1381		if (cmd == SIOCADDMULTI) {
1382			struct ifmultiaddr *ifma;
1383			error = if_addmulti(ifp, &ifr->ifr_addr, &ifma);
1384		} else {
1385			error = if_delmulti(ifp, &ifr->ifr_addr);
1386		}
1387		if (error == 0)
1388			getmicrotime(&ifp->if_lastchange);
1389		break;
1390
1391	case SIOCSIFPHYADDR:
1392	case SIOCDIFPHYADDR:
1393#ifdef INET6
1394	case SIOCSIFPHYADDR_IN6:
1395#endif
1396	case SIOCSLIFPHYADDR:
1397        case SIOCSIFMEDIA:
1398	case SIOCSIFGENERIC:
1399		error = suser(td);
1400		if (error)
1401			return (error);
1402		if (ifp->if_ioctl == NULL)
1403			return (EOPNOTSUPP);
1404		error = (*ifp->if_ioctl)(ifp, cmd, data);
1405		if (error == 0)
1406			getmicrotime(&ifp->if_lastchange);
1407		break;
1408
1409	case SIOCGIFSTATUS:
1410		ifs = (struct ifstat *)data;
1411		ifs->ascii[0] = '\0';
1412
1413	case SIOCGIFPSRCADDR:
1414	case SIOCGIFPDSTADDR:
1415	case SIOCGLIFPHYADDR:
1416	case SIOCGIFMEDIA:
1417	case SIOCGIFGENERIC:
1418		if (ifp->if_ioctl == 0)
1419			return (EOPNOTSUPP);
1420		error = (*ifp->if_ioctl)(ifp, cmd, data);
1421		break;
1422
1423	case SIOCSIFLLADDR:
1424		error = suser(td);
1425		if (error)
1426			return (error);
1427		error = if_setlladdr(ifp,
1428		    ifr->ifr_addr.sa_data, ifr->ifr_addr.sa_len);
1429		break;
1430
1431	default:
1432		error = ENOIOCTL;
1433		break;
1434	}
1435	return (error);
1436}
1437
1438/*
1439 * Interface ioctls.
1440 */
1441int
1442ifioctl(so, cmd, data, td)
1443	struct socket *so;
1444	u_long cmd;
1445	caddr_t data;
1446	struct thread *td;
1447{
1448	struct ifnet *ifp;
1449	struct ifreq *ifr;
1450	int error;
1451	int oif_flags;
1452
1453	switch (cmd) {
1454	case SIOCGIFCONF:
1455	case OSIOCGIFCONF:
1456		return (ifconf(cmd, data));
1457	}
1458	ifr = (struct ifreq *)data;
1459
1460	switch (cmd) {
1461	case SIOCIFCREATE:
1462	case SIOCIFDESTROY:
1463		if ((error = suser(td)) != 0)
1464			return (error);
1465		return ((cmd == SIOCIFCREATE) ?
1466			if_clone_create(ifr->ifr_name, sizeof(ifr->ifr_name)) :
1467			if_clone_destroy(ifr->ifr_name));
1468
1469	case SIOCIFGCLONERS:
1470		return (if_clone_list((struct if_clonereq *)data));
1471	}
1472
1473	ifp = ifunit(ifr->ifr_name);
1474	if (ifp == 0)
1475		return (ENXIO);
1476
1477	error = ifhwioctl(cmd, ifp, data, td);
1478	if (error != ENOIOCTL)
1479		return (error);
1480
1481	oif_flags = ifp->if_flags;
1482	if (so->so_proto == 0)
1483		return (EOPNOTSUPP);
1484#ifndef COMPAT_43
1485	error = ((*so->so_proto->pr_usrreqs->pru_control)(so, cmd,
1486								 data,
1487								 ifp, td));
1488#else
1489	{
1490		int ocmd = cmd;
1491
1492		switch (cmd) {
1493
1494		case SIOCSIFDSTADDR:
1495		case SIOCSIFADDR:
1496		case SIOCSIFBRDADDR:
1497		case SIOCSIFNETMASK:
1498#if BYTE_ORDER != BIG_ENDIAN
1499			if (ifr->ifr_addr.sa_family == 0 &&
1500			    ifr->ifr_addr.sa_len < 16) {
1501				ifr->ifr_addr.sa_family = ifr->ifr_addr.sa_len;
1502				ifr->ifr_addr.sa_len = 16;
1503			}
1504#else
1505			if (ifr->ifr_addr.sa_len == 0)
1506				ifr->ifr_addr.sa_len = 16;
1507#endif
1508			break;
1509
1510		case OSIOCGIFADDR:
1511			cmd = SIOCGIFADDR;
1512			break;
1513
1514		case OSIOCGIFDSTADDR:
1515			cmd = SIOCGIFDSTADDR;
1516			break;
1517
1518		case OSIOCGIFBRDADDR:
1519			cmd = SIOCGIFBRDADDR;
1520			break;
1521
1522		case OSIOCGIFNETMASK:
1523			cmd = SIOCGIFNETMASK;
1524		}
1525		error =  ((*so->so_proto->pr_usrreqs->pru_control)(so,
1526								   cmd,
1527								   data,
1528								   ifp, td));
1529		switch (ocmd) {
1530
1531		case OSIOCGIFADDR:
1532		case OSIOCGIFDSTADDR:
1533		case OSIOCGIFBRDADDR:
1534		case OSIOCGIFNETMASK:
1535			*(u_short *)&ifr->ifr_addr = ifr->ifr_addr.sa_family;
1536
1537		}
1538	}
1539#endif /* COMPAT_43 */
1540
1541	if ((oif_flags ^ ifp->if_flags) & IFF_UP) {
1542#ifdef INET6
1543		DELAY(100);/* XXX: temporary workaround for fxp issue*/
1544		if (ifp->if_flags & IFF_UP) {
1545			int s = splimp();
1546			in6_if_up(ifp);
1547			splx(s);
1548		}
1549#endif
1550	}
1551	return (error);
1552}
1553
1554/*
1555 * Set/clear promiscuous mode on interface ifp based on the truth value
1556 * of pswitch.  The calls are reference counted so that only the first
1557 * "on" request actually has an effect, as does the final "off" request.
1558 * Results are undefined if the "off" and "on" requests are not matched.
1559 */
1560int
1561ifpromisc(ifp, pswitch)
1562	struct ifnet *ifp;
1563	int pswitch;
1564{
1565	struct ifreq ifr;
1566	int error;
1567	int oldflags, oldpcount;
1568
1569	oldpcount = ifp->if_pcount;
1570	oldflags = ifp->if_flags;
1571	if (ifp->if_flags & IFF_PPROMISC) {
1572		/* Do nothing if device is in permanently promiscuous mode */
1573		ifp->if_pcount += pswitch ? 1 : -1;
1574		return (0);
1575	}
1576	if (pswitch) {
1577		/*
1578		 * If the device is not configured up, we cannot put it in
1579		 * promiscuous mode.
1580		 */
1581		if ((ifp->if_flags & IFF_UP) == 0)
1582			return (ENETDOWN);
1583		if (ifp->if_pcount++ != 0)
1584			return (0);
1585		ifp->if_flags |= IFF_PROMISC;
1586	} else {
1587		if (--ifp->if_pcount > 0)
1588			return (0);
1589		ifp->if_flags &= ~IFF_PROMISC;
1590	}
1591	ifr.ifr_flags = ifp->if_flags & 0xffff;
1592	ifr.ifr_flagshigh = ifp->if_flags >> 16;
1593	error = (*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
1594	if (error == 0) {
1595		log(LOG_INFO, "%s%d: promiscuous mode %s\n",
1596		    ifp->if_name, ifp->if_unit,
1597		    (ifp->if_flags & IFF_PROMISC) ? "enabled" : "disabled");
1598		rt_ifmsg(ifp);
1599	} else {
1600		ifp->if_pcount = oldpcount;
1601		ifp->if_flags = oldflags;
1602	}
1603	return error;
1604}
1605
1606/*
1607 * Return interface configuration
1608 * of system.  List may be used
1609 * in later ioctl's (above) to get
1610 * other information.
1611 */
1612/*ARGSUSED*/
1613static int
1614ifconf(cmd, data)
1615	u_long cmd;
1616	caddr_t data;
1617{
1618	struct ifconf *ifc = (struct ifconf *)data;
1619	struct ifnet *ifp;
1620	struct ifaddr *ifa;
1621	struct ifreq ifr, *ifrp;
1622	int space = ifc->ifc_len, error = 0;
1623
1624	ifrp = ifc->ifc_req;
1625	TAILQ_FOREACH(ifp, &ifnet, if_link) {
1626		char workbuf[64];
1627		int ifnlen, addrs;
1628
1629		if (space < sizeof(ifr))
1630			break;
1631		ifnlen = snprintf(workbuf, sizeof(workbuf),
1632		    "%s%d", ifp->if_name, ifp->if_unit);
1633		if(ifnlen + 1 > sizeof ifr.ifr_name) {
1634			error = ENAMETOOLONG;
1635			break;
1636		} else {
1637			strcpy(ifr.ifr_name, workbuf);
1638		}
1639
1640		addrs = 0;
1641		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1642			struct sockaddr *sa = ifa->ifa_addr;
1643
1644			if (space < sizeof(ifr))
1645				break;
1646			if (jailed(curthread->td_ucred) &&
1647			    prison_if(curthread->td_ucred, sa))
1648				continue;
1649			addrs++;
1650#ifdef COMPAT_43
1651			if (cmd == OSIOCGIFCONF) {
1652				struct osockaddr *osa =
1653					 (struct osockaddr *)&ifr.ifr_addr;
1654				ifr.ifr_addr = *sa;
1655				osa->sa_family = sa->sa_family;
1656				error = copyout((caddr_t)&ifr, (caddr_t)ifrp,
1657						sizeof (ifr));
1658				ifrp++;
1659			} else
1660#endif
1661			if (sa->sa_len <= sizeof(*sa)) {
1662				ifr.ifr_addr = *sa;
1663				error = copyout((caddr_t)&ifr, (caddr_t)ifrp,
1664						sizeof (ifr));
1665				ifrp++;
1666			} else {
1667				if (space < sizeof (ifr) + sa->sa_len -
1668					    sizeof(*sa))
1669					break;
1670				space -= sa->sa_len - sizeof(*sa);
1671				error = copyout((caddr_t)&ifr, (caddr_t)ifrp,
1672						sizeof (ifr.ifr_name));
1673				if (error == 0)
1674				    error = copyout((caddr_t)sa,
1675				      (caddr_t)&ifrp->ifr_addr, sa->sa_len);
1676				ifrp = (struct ifreq *)
1677					(sa->sa_len + (caddr_t)&ifrp->ifr_addr);
1678			}
1679			if (error)
1680				break;
1681			space -= sizeof (ifr);
1682		}
1683		if (error)
1684			break;
1685		if (!addrs) {
1686			bzero((caddr_t)&ifr.ifr_addr, sizeof(ifr.ifr_addr));
1687			error = copyout((caddr_t)&ifr, (caddr_t)ifrp,
1688			    sizeof (ifr));
1689			if (error)
1690				break;
1691			space -= sizeof (ifr);
1692			ifrp++;
1693		}
1694	}
1695	ifc->ifc_len -= space;
1696	return (error);
1697}
1698
1699/*
1700 * Just like if_promisc(), but for all-multicast-reception mode.
1701 */
1702int
1703if_allmulti(ifp, onswitch)
1704	struct ifnet *ifp;
1705	int onswitch;
1706{
1707	int error = 0;
1708	int s = splimp();
1709	struct ifreq ifr;
1710
1711	if (onswitch) {
1712		if (ifp->if_amcount++ == 0) {
1713			ifp->if_flags |= IFF_ALLMULTI;
1714			ifr.ifr_flags = ifp->if_flags & 0xffff;
1715			ifr.ifr_flagshigh = ifp->if_flags >> 16;
1716			error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
1717		}
1718	} else {
1719		if (ifp->if_amcount > 1) {
1720			ifp->if_amcount--;
1721		} else {
1722			ifp->if_amcount = 0;
1723			ifp->if_flags &= ~IFF_ALLMULTI;
1724			ifr.ifr_flags = ifp->if_flags & 0xffff;;
1725			ifr.ifr_flagshigh = ifp->if_flags >> 16;
1726			error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
1727		}
1728	}
1729	splx(s);
1730
1731	if (error == 0)
1732		rt_ifmsg(ifp);
1733	return error;
1734}
1735
1736/*
1737 * Add a multicast listenership to the interface in question.
1738 * The link layer provides a routine which converts
1739 */
1740int
1741if_addmulti(ifp, sa, retifma)
1742	struct ifnet *ifp;	/* interface to manipulate */
1743	struct sockaddr *sa;	/* address to add */
1744	struct ifmultiaddr **retifma;
1745{
1746	struct sockaddr *llsa, *dupsa;
1747	int error, s;
1748	struct ifmultiaddr *ifma;
1749
1750	/*
1751	 * If the matching multicast address already exists
1752	 * then don't add a new one, just add a reference
1753	 */
1754	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1755		if (equal(sa, ifma->ifma_addr)) {
1756			ifma->ifma_refcount++;
1757			if (retifma)
1758				*retifma = ifma;
1759			return 0;
1760		}
1761	}
1762
1763	/*
1764	 * Give the link layer a chance to accept/reject it, and also
1765	 * find out which AF_LINK address this maps to, if it isn't one
1766	 * already.
1767	 */
1768	if (ifp->if_resolvemulti) {
1769		error = ifp->if_resolvemulti(ifp, &llsa, sa);
1770		if (error) return error;
1771	} else {
1772		llsa = 0;
1773	}
1774
1775	MALLOC(ifma, struct ifmultiaddr *, sizeof *ifma, M_IFMADDR, M_WAITOK);
1776	MALLOC(dupsa, struct sockaddr *, sa->sa_len, M_IFMADDR, M_WAITOK);
1777	bcopy(sa, dupsa, sa->sa_len);
1778
1779	ifma->ifma_addr = dupsa;
1780	ifma->ifma_lladdr = llsa;
1781	ifma->ifma_ifp = ifp;
1782	ifma->ifma_refcount = 1;
1783	ifma->ifma_protospec = 0;
1784	rt_newmaddrmsg(RTM_NEWMADDR, ifma);
1785
1786	/*
1787	 * Some network interfaces can scan the address list at
1788	 * interrupt time; lock them out.
1789	 */
1790	s = splimp();
1791	TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link);
1792	splx(s);
1793	if (retifma != NULL)
1794		*retifma = ifma;
1795
1796	if (llsa != 0) {
1797		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1798			if (equal(ifma->ifma_addr, llsa))
1799				break;
1800		}
1801		if (ifma) {
1802			ifma->ifma_refcount++;
1803		} else {
1804			MALLOC(ifma, struct ifmultiaddr *, sizeof *ifma,
1805			       M_IFMADDR, M_WAITOK);
1806			MALLOC(dupsa, struct sockaddr *, llsa->sa_len,
1807			       M_IFMADDR, M_WAITOK);
1808			bcopy(llsa, dupsa, llsa->sa_len);
1809			ifma->ifma_addr = dupsa;
1810			ifma->ifma_ifp = ifp;
1811			ifma->ifma_refcount = 1;
1812			s = splimp();
1813			TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link);
1814			splx(s);
1815		}
1816	}
1817	/*
1818	 * We are certain we have added something, so call down to the
1819	 * interface to let them know about it.
1820	 */
1821	s = splimp();
1822	ifp->if_ioctl(ifp, SIOCADDMULTI, 0);
1823	splx(s);
1824
1825	return 0;
1826}
1827
1828/*
1829 * Remove a reference to a multicast address on this interface.  Yell
1830 * if the request does not match an existing membership.
1831 */
1832int
1833if_delmulti(ifp, sa)
1834	struct ifnet *ifp;
1835	struct sockaddr *sa;
1836{
1837	struct ifmultiaddr *ifma;
1838	int s;
1839
1840	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link)
1841		if (equal(sa, ifma->ifma_addr))
1842			break;
1843	if (ifma == 0)
1844		return ENOENT;
1845
1846	if (ifma->ifma_refcount > 1) {
1847		ifma->ifma_refcount--;
1848		return 0;
1849	}
1850
1851	rt_newmaddrmsg(RTM_DELMADDR, ifma);
1852	sa = ifma->ifma_lladdr;
1853	s = splimp();
1854	TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link);
1855	/*
1856	 * Make sure the interface driver is notified
1857	 * in the case of a link layer mcast group being left.
1858	 */
1859	if (ifma->ifma_addr->sa_family == AF_LINK && sa == 0)
1860		ifp->if_ioctl(ifp, SIOCDELMULTI, 0);
1861	splx(s);
1862	free(ifma->ifma_addr, M_IFMADDR);
1863	free(ifma, M_IFMADDR);
1864	if (sa == 0)
1865		return 0;
1866
1867	/*
1868	 * Now look for the link-layer address which corresponds to
1869	 * this network address.  It had been squirreled away in
1870	 * ifma->ifma_lladdr for this purpose (so we don't have
1871	 * to call ifp->if_resolvemulti() again), and we saved that
1872	 * value in sa above.  If some nasty deleted the
1873	 * link-layer address out from underneath us, we can deal because
1874	 * the address we stored was is not the same as the one which was
1875	 * in the record for the link-layer address.  (So we don't complain
1876	 * in that case.)
1877	 */
1878	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link)
1879		if (equal(sa, ifma->ifma_addr))
1880			break;
1881	if (ifma == 0)
1882		return 0;
1883
1884	if (ifma->ifma_refcount > 1) {
1885		ifma->ifma_refcount--;
1886		return 0;
1887	}
1888
1889	s = splimp();
1890	TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link);
1891	ifp->if_ioctl(ifp, SIOCDELMULTI, 0);
1892	splx(s);
1893	free(ifma->ifma_addr, M_IFMADDR);
1894	free(sa, M_IFMADDR);
1895	free(ifma, M_IFMADDR);
1896
1897	return 0;
1898}
1899
1900/*
1901 * Set the link layer address on an interface.
1902 *
1903 * At this time we only support certain types of interfaces,
1904 * and we don't allow the length of the address to change.
1905 */
1906int
1907if_setlladdr(struct ifnet *ifp, const u_char *lladdr, int len)
1908{
1909	struct sockaddr_dl *sdl;
1910	struct ifaddr *ifa;
1911	struct ifreq ifr;
1912
1913	ifa = ifaddr_byindex(ifp->if_index);
1914	if (ifa == NULL)
1915		return (EINVAL);
1916	sdl = (struct sockaddr_dl *)ifa->ifa_addr;
1917	if (sdl == NULL)
1918		return (EINVAL);
1919	if (len != sdl->sdl_alen)	/* don't allow length to change */
1920		return (EINVAL);
1921	switch (ifp->if_type) {
1922	case IFT_ETHER:			/* these types use struct arpcom */
1923	case IFT_FDDI:
1924	case IFT_XETHER:
1925	case IFT_ISO88025:
1926	case IFT_L2VLAN:
1927		bcopy(lladdr, ((struct arpcom *)ifp->if_softc)->ac_enaddr, len);
1928		bcopy(lladdr, LLADDR(sdl), len);
1929		break;
1930	default:
1931		return (ENODEV);
1932	}
1933	/*
1934	 * If the interface is already up, we need
1935	 * to re-init it in order to reprogram its
1936	 * address filter.
1937	 */
1938	if ((ifp->if_flags & IFF_UP) != 0) {
1939		ifp->if_flags &= ~IFF_UP;
1940		ifr.ifr_flags = ifp->if_flags & 0xffff;
1941		ifr.ifr_flagshigh = ifp->if_flags >> 16;
1942		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
1943		ifp->if_flags |= IFF_UP;
1944		ifr.ifr_flags = ifp->if_flags & 0xffff;
1945		ifr.ifr_flagshigh = ifp->if_flags >> 16;
1946		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
1947#ifdef INET
1948		/*
1949		 * Also send gratuitous ARPs to notify other nodes about
1950		 * the address change.
1951		 */
1952		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1953			if (ifa->ifa_addr != NULL &&
1954			    ifa->ifa_addr->sa_family == AF_INET)
1955				arp_ifinit(ifp, ifa);
1956		}
1957#endif
1958	}
1959	return (0);
1960}
1961
1962struct ifmultiaddr *
1963ifmaof_ifpforaddr(sa, ifp)
1964	struct sockaddr *sa;
1965	struct ifnet *ifp;
1966{
1967	struct ifmultiaddr *ifma;
1968
1969	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link)
1970		if (equal(ifma->ifma_addr, sa))
1971			break;
1972
1973	return ifma;
1974}
1975
1976int
1977if_printf(struct ifnet *ifp, const char * fmt, ...)
1978{
1979	va_list ap;
1980	int retval;
1981
1982	retval = printf("%s%d: ", ifp->if_name, ifp->if_unit);
1983	va_start(ap, fmt);
1984	retval += vprintf(fmt, ap);
1985	va_end(ap);
1986	return (retval);
1987}
1988
1989SYSCTL_NODE(_net, PF_LINK, link, CTLFLAG_RW, 0, "Link layers");
1990SYSCTL_NODE(_net_link, 0, generic, CTLFLAG_RW, 0, "Generic link-management");
1991