if_vlan.c revision 317082
1/*-
2 * Copyright 1998 Massachusetts Institute of Technology
3 *
4 * Permission to use, copy, modify, and distribute this software and
5 * its documentation for any purpose and without fee is hereby
6 * granted, provided that both the above copyright notice and this
7 * permission notice appear in all copies, that both the above
8 * copyright notice and this permission notice appear in all
9 * supporting documentation, and that the name of M.I.T. not be used
10 * in advertising or publicity pertaining to distribution of the
11 * software without specific, written prior permission.  M.I.T. makes
12 * no representations about the suitability of this software for any
13 * purpose.  It is provided "as is" without express or implied
14 * warranty.
15 *
16 * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''.  M.I.T. DISCLAIMS
17 * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
18 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
19 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
20 * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
23 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
25 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
26 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30/*
31 * if_vlan.c - pseudo-device driver for IEEE 802.1Q virtual LANs.
32 * Might be extended some day to also handle IEEE 802.1p priority
33 * tagging.  This is sort of sneaky in the implementation, since
34 * we need to pretend to be enough of an Ethernet implementation
35 * to make arp work.  The way we do this is by telling everyone
36 * that we are an Ethernet, and then catch the packets that
37 * ether_output() sends to us via if_transmit(), rewrite them for
38 * use by the real outgoing interface, and ask it to send them.
39 */
40
41#include <sys/cdefs.h>
42__FBSDID("$FreeBSD: stable/10/sys/net/if_vlan.c 317082 2017-04-18 09:22:06Z ae $");
43
44#include "opt_inet.h"
45#include "opt_vlan.h"
46
47#include <sys/param.h>
48#include <sys/kernel.h>
49#include <sys/lock.h>
50#include <sys/malloc.h>
51#include <sys/mbuf.h>
52#include <sys/module.h>
53#include <sys/rwlock.h>
54#include <sys/queue.h>
55#include <sys/socket.h>
56#include <sys/sockio.h>
57#include <sys/sysctl.h>
58#include <sys/systm.h>
59#include <sys/sx.h>
60
61#include <net/bpf.h>
62#include <net/ethernet.h>
63#include <net/if.h>
64#include <net/if_clone.h>
65#include <net/if_dl.h>
66#include <net/if_types.h>
67#include <net/if_vlan_var.h>
68#include <net/vnet.h>
69
70#ifdef INET
71#include <netinet/in.h>
72#include <netinet/if_ether.h>
73#endif
74
75#define	VLAN_DEF_HWIDTH	4
76#define	VLAN_IFFLAGS	(IFF_BROADCAST | IFF_MULTICAST)
77
78#define	UP_AND_RUNNING(ifp) \
79    ((ifp)->if_flags & IFF_UP && (ifp)->if_drv_flags & IFF_DRV_RUNNING)
80
81LIST_HEAD(ifvlanhead, ifvlan);
82
83struct ifvlantrunk {
84	struct	ifnet   *parent;	/* parent interface of this trunk */
85	struct	rwlock	rw;
86#ifdef VLAN_ARRAY
87#define	VLAN_ARRAY_SIZE	(EVL_VLID_MASK + 1)
88	struct	ifvlan	*vlans[VLAN_ARRAY_SIZE]; /* static table */
89#else
90	struct	ifvlanhead *hash;	/* dynamic hash-list table */
91	uint16_t	hmask;
92	uint16_t	hwidth;
93#endif
94	int		refcnt;
95};
96
97struct vlan_mc_entry {
98	struct sockaddr_dl		mc_addr;
99	SLIST_ENTRY(vlan_mc_entry)	mc_entries;
100};
101
102struct	ifvlan {
103	struct	ifvlantrunk *ifv_trunk;
104	struct	ifnet *ifv_ifp;
105	void	*ifv_cookie;
106#define	TRUNK(ifv)	((ifv)->ifv_trunk)
107#define	PARENT(ifv)	((ifv)->ifv_trunk->parent)
108	int	ifv_pflags;	/* special flags we have set on parent */
109	struct	ifv_linkmib {
110		int	ifvm_encaplen;	/* encapsulation length */
111		int	ifvm_mtufudge;	/* MTU fudged by this much */
112		int	ifvm_mintu;	/* min transmission unit */
113		uint16_t ifvm_proto;	/* encapsulation ethertype */
114		uint16_t ifvm_tag;	/* tag to apply on packets leaving if */
115	}	ifv_mib;
116	SLIST_HEAD(, vlan_mc_entry) vlan_mc_listhead;
117#ifndef VLAN_ARRAY
118	LIST_ENTRY(ifvlan) ifv_list;
119#endif
120};
121#define	ifv_proto	ifv_mib.ifvm_proto
122#define	ifv_vid		ifv_mib.ifvm_tag
123#define	ifv_encaplen	ifv_mib.ifvm_encaplen
124#define	ifv_mtufudge	ifv_mib.ifvm_mtufudge
125#define	ifv_mintu	ifv_mib.ifvm_mintu
126
127/* Special flags we should propagate to parent. */
128static struct {
129	int flag;
130	int (*func)(struct ifnet *, int);
131} vlan_pflags[] = {
132	{IFF_PROMISC, ifpromisc},
133	{IFF_ALLMULTI, if_allmulti},
134	{0, NULL}
135};
136
137SYSCTL_DECL(_net_link);
138static SYSCTL_NODE(_net_link, IFT_L2VLAN, vlan, CTLFLAG_RW, 0,
139    "IEEE 802.1Q VLAN");
140static SYSCTL_NODE(_net_link_vlan, PF_LINK, link, CTLFLAG_RW, 0,
141    "for consistency");
142
143static int soft_pad = 0;
144SYSCTL_INT(_net_link_vlan, OID_AUTO, soft_pad, CTLFLAG_RW, &soft_pad, 0,
145	   "pad short frames before tagging");
146
147static const char vlanname[] = "vlan";
148static MALLOC_DEFINE(M_VLAN, vlanname, "802.1Q Virtual LAN Interface");
149
150static eventhandler_tag ifdetach_tag;
151static eventhandler_tag iflladdr_tag;
152
153/*
154 * We have a global mutex, that is used to serialize configuration
155 * changes and isn't used in normal packet delivery.
156 *
157 * We also have a per-trunk rwlock, that is locked shared on packet
158 * processing and exclusive when configuration is changed.
159 *
160 * The VLAN_ARRAY substitutes the dynamic hash with a static array
161 * with 4096 entries. In theory this can give a boost in processing,
162 * however on practice it does not. Probably this is because array
163 * is too big to fit into CPU cache.
164 */
165static struct sx ifv_lock;
166#define	VLAN_LOCK_INIT()	sx_init(&ifv_lock, "vlan_global")
167#define	VLAN_LOCK_DESTROY()	sx_destroy(&ifv_lock)
168#define	VLAN_LOCK_ASSERT()	sx_assert(&ifv_lock, SA_LOCKED)
169#define	VLAN_LOCK()		sx_xlock(&ifv_lock)
170#define	VLAN_UNLOCK()		sx_xunlock(&ifv_lock)
171#define	TRUNK_LOCK_INIT(trunk)	rw_init(&(trunk)->rw, vlanname)
172#define	TRUNK_LOCK_DESTROY(trunk) rw_destroy(&(trunk)->rw)
173#define	TRUNK_LOCK(trunk)	rw_wlock(&(trunk)->rw)
174#define	TRUNK_UNLOCK(trunk)	rw_wunlock(&(trunk)->rw)
175#define	TRUNK_LOCK_ASSERT(trunk) rw_assert(&(trunk)->rw, RA_WLOCKED)
176#define	TRUNK_RLOCK(trunk)	rw_rlock(&(trunk)->rw)
177#define	TRUNK_RUNLOCK(trunk)	rw_runlock(&(trunk)->rw)
178#define	TRUNK_LOCK_RASSERT(trunk) rw_assert(&(trunk)->rw, RA_RLOCKED)
179
180#ifndef VLAN_ARRAY
181static	void vlan_inithash(struct ifvlantrunk *trunk);
182static	void vlan_freehash(struct ifvlantrunk *trunk);
183static	int vlan_inshash(struct ifvlantrunk *trunk, struct ifvlan *ifv);
184static	int vlan_remhash(struct ifvlantrunk *trunk, struct ifvlan *ifv);
185static	void vlan_growhash(struct ifvlantrunk *trunk, int howmuch);
186static __inline struct ifvlan * vlan_gethash(struct ifvlantrunk *trunk,
187	uint16_t vid);
188#endif
189static	void trunk_destroy(struct ifvlantrunk *trunk);
190
191static	void vlan_init(void *foo);
192static	void vlan_input(struct ifnet *ifp, struct mbuf *m);
193static	int vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr);
194static	void vlan_qflush(struct ifnet *ifp);
195static	int vlan_setflag(struct ifnet *ifp, int flag, int status,
196    int (*func)(struct ifnet *, int));
197static	int vlan_setflags(struct ifnet *ifp, int status);
198static	int vlan_setmulti(struct ifnet *ifp);
199static	int vlan_transmit(struct ifnet *ifp, struct mbuf *m);
200static	void vlan_unconfig(struct ifnet *ifp);
201static	void vlan_unconfig_locked(struct ifnet *ifp, int departing);
202static	int vlan_config(struct ifvlan *ifv, struct ifnet *p, uint16_t tag);
203static	void vlan_link_state(struct ifnet *ifp);
204static	void vlan_capabilities(struct ifvlan *ifv);
205static	void vlan_trunk_capabilities(struct ifnet *ifp);
206
207static	struct ifnet *vlan_clone_match_ethervid(struct if_clone *,
208    const char *, int *);
209static	int vlan_clone_match(struct if_clone *, const char *);
210static	int vlan_clone_create(struct if_clone *, char *, size_t, caddr_t);
211static	int vlan_clone_destroy(struct if_clone *, struct ifnet *);
212
213static	void vlan_ifdetach(void *arg, struct ifnet *ifp);
214static  void vlan_iflladdr(void *arg, struct ifnet *ifp);
215
216static struct if_clone *vlan_cloner;
217
218#ifdef VIMAGE
219static VNET_DEFINE(struct if_clone *, vlan_cloner);
220#define	V_vlan_cloner	VNET(vlan_cloner)
221#endif
222
223#ifndef VLAN_ARRAY
224#define HASH(n, m)	((((n) >> 8) ^ ((n) >> 4) ^ (n)) & (m))
225
226static void
227vlan_inithash(struct ifvlantrunk *trunk)
228{
229	int i, n;
230
231	/*
232	 * The trunk must not be locked here since we call malloc(M_WAITOK).
233	 * It is OK in case this function is called before the trunk struct
234	 * gets hooked up and becomes visible from other threads.
235	 */
236
237	KASSERT(trunk->hwidth == 0 && trunk->hash == NULL,
238	    ("%s: hash already initialized", __func__));
239
240	trunk->hwidth = VLAN_DEF_HWIDTH;
241	n = 1 << trunk->hwidth;
242	trunk->hmask = n - 1;
243	trunk->hash = malloc(sizeof(struct ifvlanhead) * n, M_VLAN, M_WAITOK);
244	for (i = 0; i < n; i++)
245		LIST_INIT(&trunk->hash[i]);
246}
247
248static void
249vlan_freehash(struct ifvlantrunk *trunk)
250{
251#ifdef INVARIANTS
252	int i;
253
254	KASSERT(trunk->hwidth > 0, ("%s: hwidth not positive", __func__));
255	for (i = 0; i < (1 << trunk->hwidth); i++)
256		KASSERT(LIST_EMPTY(&trunk->hash[i]),
257		    ("%s: hash table not empty", __func__));
258#endif
259	free(trunk->hash, M_VLAN);
260	trunk->hash = NULL;
261	trunk->hwidth = trunk->hmask = 0;
262}
263
264static int
265vlan_inshash(struct ifvlantrunk *trunk, struct ifvlan *ifv)
266{
267	int i, b;
268	struct ifvlan *ifv2;
269
270	TRUNK_LOCK_ASSERT(trunk);
271	KASSERT(trunk->hwidth > 0, ("%s: hwidth not positive", __func__));
272
273	b = 1 << trunk->hwidth;
274	i = HASH(ifv->ifv_vid, trunk->hmask);
275	LIST_FOREACH(ifv2, &trunk->hash[i], ifv_list)
276		if (ifv->ifv_vid == ifv2->ifv_vid)
277			return (EEXIST);
278
279	/*
280	 * Grow the hash when the number of vlans exceeds half of the number of
281	 * hash buckets squared. This will make the average linked-list length
282	 * buckets/2.
283	 */
284	if (trunk->refcnt > (b * b) / 2) {
285		vlan_growhash(trunk, 1);
286		i = HASH(ifv->ifv_vid, trunk->hmask);
287	}
288	LIST_INSERT_HEAD(&trunk->hash[i], ifv, ifv_list);
289	trunk->refcnt++;
290
291	return (0);
292}
293
294static int
295vlan_remhash(struct ifvlantrunk *trunk, struct ifvlan *ifv)
296{
297	int i, b;
298	struct ifvlan *ifv2;
299
300	TRUNK_LOCK_ASSERT(trunk);
301	KASSERT(trunk->hwidth > 0, ("%s: hwidth not positive", __func__));
302
303	b = 1 << trunk->hwidth;
304	i = HASH(ifv->ifv_vid, trunk->hmask);
305	LIST_FOREACH(ifv2, &trunk->hash[i], ifv_list)
306		if (ifv2 == ifv) {
307			trunk->refcnt--;
308			LIST_REMOVE(ifv2, ifv_list);
309			if (trunk->refcnt < (b * b) / 2)
310				vlan_growhash(trunk, -1);
311			return (0);
312		}
313
314	panic("%s: vlan not found\n", __func__);
315	return (ENOENT); /*NOTREACHED*/
316}
317
318/*
319 * Grow the hash larger or smaller if memory permits.
320 */
321static void
322vlan_growhash(struct ifvlantrunk *trunk, int howmuch)
323{
324	struct ifvlan *ifv;
325	struct ifvlanhead *hash2;
326	int hwidth2, i, j, n, n2;
327
328	TRUNK_LOCK_ASSERT(trunk);
329	KASSERT(trunk->hwidth > 0, ("%s: hwidth not positive", __func__));
330
331	if (howmuch == 0) {
332		/* Harmless yet obvious coding error */
333		printf("%s: howmuch is 0\n", __func__);
334		return;
335	}
336
337	hwidth2 = trunk->hwidth + howmuch;
338	n = 1 << trunk->hwidth;
339	n2 = 1 << hwidth2;
340	/* Do not shrink the table below the default */
341	if (hwidth2 < VLAN_DEF_HWIDTH)
342		return;
343
344	/* M_NOWAIT because we're called with trunk mutex held */
345	hash2 = malloc(sizeof(struct ifvlanhead) * n2, M_VLAN, M_NOWAIT);
346	if (hash2 == NULL) {
347		printf("%s: out of memory -- hash size not changed\n",
348		    __func__);
349		return;		/* We can live with the old hash table */
350	}
351	for (j = 0; j < n2; j++)
352		LIST_INIT(&hash2[j]);
353	for (i = 0; i < n; i++)
354		while ((ifv = LIST_FIRST(&trunk->hash[i])) != NULL) {
355			LIST_REMOVE(ifv, ifv_list);
356			j = HASH(ifv->ifv_vid, n2 - 1);
357			LIST_INSERT_HEAD(&hash2[j], ifv, ifv_list);
358		}
359	free(trunk->hash, M_VLAN);
360	trunk->hash = hash2;
361	trunk->hwidth = hwidth2;
362	trunk->hmask = n2 - 1;
363
364	if (bootverbose)
365		if_printf(trunk->parent,
366		    "VLAN hash table resized from %d to %d buckets\n", n, n2);
367}
368
369static __inline struct ifvlan *
370vlan_gethash(struct ifvlantrunk *trunk, uint16_t vid)
371{
372	struct ifvlan *ifv;
373
374	TRUNK_LOCK_RASSERT(trunk);
375
376	LIST_FOREACH(ifv, &trunk->hash[HASH(vid, trunk->hmask)], ifv_list)
377		if (ifv->ifv_vid == vid)
378			return (ifv);
379	return (NULL);
380}
381
382#if 0
383/* Debugging code to view the hashtables. */
384static void
385vlan_dumphash(struct ifvlantrunk *trunk)
386{
387	int i;
388	struct ifvlan *ifv;
389
390	for (i = 0; i < (1 << trunk->hwidth); i++) {
391		printf("%d: ", i);
392		LIST_FOREACH(ifv, &trunk->hash[i], ifv_list)
393			printf("%s ", ifv->ifv_ifp->if_xname);
394		printf("\n");
395	}
396}
397#endif /* 0 */
398#else
399
400static __inline struct ifvlan *
401vlan_gethash(struct ifvlantrunk *trunk, uint16_t vid)
402{
403
404	return trunk->vlans[vid];
405}
406
407static __inline int
408vlan_inshash(struct ifvlantrunk *trunk, struct ifvlan *ifv)
409{
410
411	if (trunk->vlans[ifv->ifv_vid] != NULL)
412		return EEXIST;
413	trunk->vlans[ifv->ifv_vid] = ifv;
414	trunk->refcnt++;
415
416	return (0);
417}
418
419static __inline int
420vlan_remhash(struct ifvlantrunk *trunk, struct ifvlan *ifv)
421{
422
423	trunk->vlans[ifv->ifv_vid] = NULL;
424	trunk->refcnt--;
425
426	return (0);
427}
428
429static __inline void
430vlan_freehash(struct ifvlantrunk *trunk)
431{
432}
433
434static __inline void
435vlan_inithash(struct ifvlantrunk *trunk)
436{
437}
438
439#endif /* !VLAN_ARRAY */
440
441static void
442trunk_destroy(struct ifvlantrunk *trunk)
443{
444	VLAN_LOCK_ASSERT();
445
446	TRUNK_LOCK(trunk);
447	vlan_freehash(trunk);
448	trunk->parent->if_vlantrunk = NULL;
449	TRUNK_UNLOCK(trunk);
450	TRUNK_LOCK_DESTROY(trunk);
451	free(trunk, M_VLAN);
452}
453
454/*
455 * Program our multicast filter. What we're actually doing is
456 * programming the multicast filter of the parent. This has the
457 * side effect of causing the parent interface to receive multicast
458 * traffic that it doesn't really want, which ends up being discarded
459 * later by the upper protocol layers. Unfortunately, there's no way
460 * to avoid this: there really is only one physical interface.
461 */
462static int
463vlan_setmulti(struct ifnet *ifp)
464{
465	struct ifnet		*ifp_p;
466	struct ifmultiaddr	*ifma;
467	struct ifvlan		*sc;
468	struct vlan_mc_entry	*mc;
469	int			error;
470
471	/* Find the parent. */
472	sc = ifp->if_softc;
473	TRUNK_LOCK_ASSERT(TRUNK(sc));
474	ifp_p = PARENT(sc);
475
476	CURVNET_SET_QUIET(ifp_p->if_vnet);
477
478	/* First, remove any existing filter entries. */
479	while ((mc = SLIST_FIRST(&sc->vlan_mc_listhead)) != NULL) {
480		SLIST_REMOVE_HEAD(&sc->vlan_mc_listhead, mc_entries);
481		(void)if_delmulti(ifp_p, (struct sockaddr *)&mc->mc_addr);
482		free(mc, M_VLAN);
483	}
484
485	/* Now program new ones. */
486	IF_ADDR_WLOCK(ifp);
487	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
488		if (ifma->ifma_addr->sa_family != AF_LINK)
489			continue;
490		mc = malloc(sizeof(struct vlan_mc_entry), M_VLAN, M_NOWAIT);
491		if (mc == NULL) {
492			IF_ADDR_WUNLOCK(ifp);
493			return (ENOMEM);
494		}
495		bcopy(ifma->ifma_addr, &mc->mc_addr, ifma->ifma_addr->sa_len);
496		mc->mc_addr.sdl_index = ifp_p->if_index;
497		SLIST_INSERT_HEAD(&sc->vlan_mc_listhead, mc, mc_entries);
498	}
499	IF_ADDR_WUNLOCK(ifp);
500	SLIST_FOREACH (mc, &sc->vlan_mc_listhead, mc_entries) {
501		error = if_addmulti(ifp_p, (struct sockaddr *)&mc->mc_addr,
502		    NULL);
503		if (error)
504			return (error);
505	}
506
507	CURVNET_RESTORE();
508	return (0);
509}
510
511/*
512 * A handler for parent interface link layer address changes.
513 * If the parent interface link layer address is changed we
514 * should also change it on all children vlans.
515 */
516static void
517vlan_iflladdr(void *arg __unused, struct ifnet *ifp)
518{
519	struct ifvlan *ifv;
520#ifndef VLAN_ARRAY
521	struct ifvlan *next;
522#endif
523	int i;
524
525	/*
526	 * Check if it's a trunk interface first of all
527	 * to avoid needless locking.
528	 */
529	if (ifp->if_vlantrunk == NULL)
530		return;
531
532	VLAN_LOCK();
533	/*
534	 * OK, it's a trunk.  Loop over and change all vlan's lladdrs on it.
535	 */
536#ifdef VLAN_ARRAY
537	for (i = 0; i < VLAN_ARRAY_SIZE; i++)
538		if ((ifv = ifp->if_vlantrunk->vlans[i])) {
539#else /* VLAN_ARRAY */
540	for (i = 0; i < (1 << ifp->if_vlantrunk->hwidth); i++)
541		LIST_FOREACH_SAFE(ifv, &ifp->if_vlantrunk->hash[i], ifv_list, next) {
542#endif /* VLAN_ARRAY */
543			VLAN_UNLOCK();
544			if_setlladdr(ifv->ifv_ifp, IF_LLADDR(ifp),
545			    ifp->if_addrlen);
546			VLAN_LOCK();
547		}
548	VLAN_UNLOCK();
549
550}
551
552/*
553 * A handler for network interface departure events.
554 * Track departure of trunks here so that we don't access invalid
555 * pointers or whatever if a trunk is ripped from under us, e.g.,
556 * by ejecting its hot-plug card.  However, if an ifnet is simply
557 * being renamed, then there's no need to tear down the state.
558 */
559static void
560vlan_ifdetach(void *arg __unused, struct ifnet *ifp)
561{
562	struct ifvlan *ifv;
563	int i;
564
565	/*
566	 * Check if it's a trunk interface first of all
567	 * to avoid needless locking.
568	 */
569	if (ifp->if_vlantrunk == NULL)
570		return;
571
572	/* If the ifnet is just being renamed, don't do anything. */
573	if (ifp->if_flags & IFF_RENAMING)
574		return;
575
576	VLAN_LOCK();
577	/*
578	 * OK, it's a trunk.  Loop over and detach all vlan's on it.
579	 * Check trunk pointer after each vlan_unconfig() as it will
580	 * free it and set to NULL after the last vlan was detached.
581	 */
582#ifdef VLAN_ARRAY
583	for (i = 0; i < VLAN_ARRAY_SIZE; i++)
584		if ((ifv = ifp->if_vlantrunk->vlans[i])) {
585			vlan_unconfig_locked(ifv->ifv_ifp, 1);
586			if (ifp->if_vlantrunk == NULL)
587				break;
588		}
589#else /* VLAN_ARRAY */
590restart:
591	for (i = 0; i < (1 << ifp->if_vlantrunk->hwidth); i++)
592		if ((ifv = LIST_FIRST(&ifp->if_vlantrunk->hash[i]))) {
593			vlan_unconfig_locked(ifv->ifv_ifp, 1);
594			if (ifp->if_vlantrunk)
595				goto restart;	/* trunk->hwidth can change */
596			else
597				break;
598		}
599#endif /* VLAN_ARRAY */
600	/* Trunk should have been destroyed in vlan_unconfig(). */
601	KASSERT(ifp->if_vlantrunk == NULL, ("%s: purge failed", __func__));
602	VLAN_UNLOCK();
603}
604
605/*
606 * Return the trunk device for a virtual interface.
607 */
608static struct ifnet  *
609vlan_trunkdev(struct ifnet *ifp)
610{
611	struct ifvlan *ifv;
612
613	if (ifp->if_type != IFT_L2VLAN)
614		return (NULL);
615	ifv = ifp->if_softc;
616	ifp = NULL;
617	VLAN_LOCK();
618	if (ifv->ifv_trunk)
619		ifp = PARENT(ifv);
620	VLAN_UNLOCK();
621	return (ifp);
622}
623
624/*
625 * Return the 12-bit VLAN VID for this interface, for use by external
626 * components such as Infiniband.
627 *
628 * XXXRW: Note that the function name here is historical; it should be named
629 * vlan_vid().
630 */
631static int
632vlan_tag(struct ifnet *ifp, uint16_t *vidp)
633{
634	struct ifvlan *ifv;
635
636	if (ifp->if_type != IFT_L2VLAN)
637		return (EINVAL);
638	ifv = ifp->if_softc;
639	*vidp = ifv->ifv_vid;
640	return (0);
641}
642
643/*
644 * Return a driver specific cookie for this interface.  Synchronization
645 * with setcookie must be provided by the driver.
646 */
647static void *
648vlan_cookie(struct ifnet *ifp)
649{
650	struct ifvlan *ifv;
651
652	if (ifp->if_type != IFT_L2VLAN)
653		return (NULL);
654	ifv = ifp->if_softc;
655	return (ifv->ifv_cookie);
656}
657
658/*
659 * Store a cookie in our softc that drivers can use to store driver
660 * private per-instance data in.
661 */
662static int
663vlan_setcookie(struct ifnet *ifp, void *cookie)
664{
665	struct ifvlan *ifv;
666
667	if (ifp->if_type != IFT_L2VLAN)
668		return (EINVAL);
669	ifv = ifp->if_softc;
670	ifv->ifv_cookie = cookie;
671	return (0);
672}
673
674/*
675 * Return the vlan device present at the specific VID.
676 */
677static struct ifnet *
678vlan_devat(struct ifnet *ifp, uint16_t vid)
679{
680	struct ifvlantrunk *trunk;
681	struct ifvlan *ifv;
682
683	trunk = ifp->if_vlantrunk;
684	if (trunk == NULL)
685		return (NULL);
686	ifp = NULL;
687	TRUNK_RLOCK(trunk);
688	ifv = vlan_gethash(trunk, vid);
689	if (ifv)
690		ifp = ifv->ifv_ifp;
691	TRUNK_RUNLOCK(trunk);
692	return (ifp);
693}
694
695/*
696 * VLAN support can be loaded as a module.  The only place in the
697 * system that's intimately aware of this is ether_input.  We hook
698 * into this code through vlan_input_p which is defined there and
699 * set here.  Noone else in the system should be aware of this so
700 * we use an explicit reference here.
701 */
702extern	void (*vlan_input_p)(struct ifnet *, struct mbuf *);
703
704/* For if_link_state_change() eyes only... */
705extern	void (*vlan_link_state_p)(struct ifnet *);
706
707static int
708vlan_modevent(module_t mod, int type, void *data)
709{
710
711	switch (type) {
712	case MOD_LOAD:
713		ifdetach_tag = EVENTHANDLER_REGISTER(ifnet_departure_event,
714		    vlan_ifdetach, NULL, EVENTHANDLER_PRI_ANY);
715		if (ifdetach_tag == NULL)
716			return (ENOMEM);
717		iflladdr_tag = EVENTHANDLER_REGISTER(iflladdr_event,
718		    vlan_iflladdr, NULL, EVENTHANDLER_PRI_ANY);
719		if (iflladdr_tag == NULL)
720			return (ENOMEM);
721		VLAN_LOCK_INIT();
722		vlan_input_p = vlan_input;
723		vlan_link_state_p = vlan_link_state;
724		vlan_trunk_cap_p = vlan_trunk_capabilities;
725		vlan_trunkdev_p = vlan_trunkdev;
726		vlan_cookie_p = vlan_cookie;
727		vlan_setcookie_p = vlan_setcookie;
728		vlan_tag_p = vlan_tag;
729		vlan_devat_p = vlan_devat;
730#ifndef VIMAGE
731		vlan_cloner = if_clone_advanced(vlanname, 0, vlan_clone_match,
732		    vlan_clone_create, vlan_clone_destroy);
733#endif
734		if (bootverbose)
735			printf("vlan: initialized, using "
736#ifdef VLAN_ARRAY
737			       "full-size arrays"
738#else
739			       "hash tables with chaining"
740#endif
741
742			       "\n");
743		break;
744	case MOD_UNLOAD:
745#ifndef VIMAGE
746		if_clone_detach(vlan_cloner);
747#endif
748		EVENTHANDLER_DEREGISTER(ifnet_departure_event, ifdetach_tag);
749		EVENTHANDLER_DEREGISTER(iflladdr_event, iflladdr_tag);
750		vlan_input_p = NULL;
751		vlan_link_state_p = NULL;
752		vlan_trunk_cap_p = NULL;
753		vlan_trunkdev_p = NULL;
754		vlan_tag_p = NULL;
755		vlan_cookie_p = NULL;
756		vlan_setcookie_p = NULL;
757		vlan_devat_p = NULL;
758		VLAN_LOCK_DESTROY();
759		if (bootverbose)
760			printf("vlan: unloaded\n");
761		break;
762	default:
763		return (EOPNOTSUPP);
764	}
765	return (0);
766}
767
768static moduledata_t vlan_mod = {
769	"if_vlan",
770	vlan_modevent,
771	0
772};
773
774DECLARE_MODULE(if_vlan, vlan_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
775MODULE_VERSION(if_vlan, 3);
776
777#ifdef VIMAGE
778static void
779vnet_vlan_init(const void *unused __unused)
780{
781
782	vlan_cloner = if_clone_advanced(vlanname, 0, vlan_clone_match,
783		    vlan_clone_create, vlan_clone_destroy);
784	V_vlan_cloner = vlan_cloner;
785}
786VNET_SYSINIT(vnet_vlan_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
787    vnet_vlan_init, NULL);
788
789static void
790vnet_vlan_uninit(const void *unused __unused)
791{
792
793	if_clone_detach(V_vlan_cloner);
794}
795VNET_SYSUNINIT(vnet_vlan_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_FIRST,
796    vnet_vlan_uninit, NULL);
797#endif
798
799static struct ifnet *
800vlan_clone_match_ethervid(struct if_clone *ifc, const char *name, int *vidp)
801{
802	const char *cp;
803	struct ifnet *ifp;
804	int vid;
805
806	/* Check for <etherif>.<vlan> style interface names. */
807	IFNET_RLOCK_NOSLEEP();
808	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
809		/*
810		 * We can handle non-ethernet hardware types as long as
811		 * they handle the tagging and headers themselves.
812		 */
813		if (ifp->if_type != IFT_ETHER &&
814		    (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) == 0)
815			continue;
816		if (strncmp(ifp->if_xname, name, strlen(ifp->if_xname)) != 0)
817			continue;
818		cp = name + strlen(ifp->if_xname);
819		if (*cp++ != '.')
820			continue;
821		if (*cp == '\0')
822			continue;
823		vid = 0;
824		for(; *cp >= '0' && *cp <= '9'; cp++)
825			vid = (vid * 10) + (*cp - '0');
826		if (*cp != '\0')
827			continue;
828		if (vidp != NULL)
829			*vidp = vid;
830		break;
831	}
832	IFNET_RUNLOCK_NOSLEEP();
833
834	return (ifp);
835}
836
837static int
838vlan_clone_match(struct if_clone *ifc, const char *name)
839{
840	const char *cp;
841
842	if (vlan_clone_match_ethervid(ifc, name, NULL) != NULL)
843		return (1);
844
845	if (strncmp(vlanname, name, strlen(vlanname)) != 0)
846		return (0);
847	for (cp = name + 4; *cp != '\0'; cp++) {
848		if (*cp < '0' || *cp > '9')
849			return (0);
850	}
851
852	return (1);
853}
854
855static int
856vlan_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
857{
858	char *dp;
859	int wildcard;
860	int unit;
861	int error;
862	int vid;
863	int ethertag;
864	struct ifvlan *ifv;
865	struct ifnet *ifp;
866	struct ifnet *p;
867	struct ifaddr *ifa;
868	struct sockaddr_dl *sdl;
869	struct vlanreq vlr;
870	static const u_char eaddr[ETHER_ADDR_LEN];	/* 00:00:00:00:00:00 */
871
872	/*
873	 * There are 3 (ugh) ways to specify the cloned device:
874	 * o pass a parameter block with the clone request.
875	 * o specify parameters in the text of the clone device name
876	 * o specify no parameters and get an unattached device that
877	 *   must be configured separately.
878	 * The first technique is preferred; the latter two are
879	 * supported for backwards compatibilty.
880	 *
881	 * XXXRW: Note historic use of the word "tag" here.  New ioctls may be
882	 * called for.
883	 */
884	if (params) {
885		error = copyin(params, &vlr, sizeof(vlr));
886		if (error)
887			return error;
888		p = ifunit(vlr.vlr_parent);
889		if (p == NULL)
890			return ENXIO;
891		/*
892		 * Don't let the caller set up a VLAN VID with
893		 * anything except VLID bits.
894		 */
895		if (vlr.vlr_tag & ~EVL_VLID_MASK)
896			return (EINVAL);
897		error = ifc_name2unit(name, &unit);
898		if (error != 0)
899			return (error);
900
901		ethertag = 1;
902		vid = vlr.vlr_tag;
903		wildcard = (unit < 0);
904	} else if ((p = vlan_clone_match_ethervid(ifc, name, &vid)) != NULL) {
905		ethertag = 1;
906		unit = -1;
907		wildcard = 0;
908
909		/*
910		 * Don't let the caller set up a VLAN VID with
911		 * anything except VLID bits.
912		 */
913		if (vid & ~EVL_VLID_MASK)
914			return (EINVAL);
915	} else {
916		ethertag = 0;
917
918		error = ifc_name2unit(name, &unit);
919		if (error != 0)
920			return (error);
921
922		wildcard = (unit < 0);
923	}
924
925	error = ifc_alloc_unit(ifc, &unit);
926	if (error != 0)
927		return (error);
928
929	/* In the wildcard case, we need to update the name. */
930	if (wildcard) {
931		for (dp = name; *dp != '\0'; dp++);
932		if (snprintf(dp, len - (dp-name), "%d", unit) >
933		    len - (dp-name) - 1) {
934			panic("%s: interface name too long", __func__);
935		}
936	}
937
938	ifv = malloc(sizeof(struct ifvlan), M_VLAN, M_WAITOK | M_ZERO);
939	ifp = ifv->ifv_ifp = if_alloc(IFT_ETHER);
940	if (ifp == NULL) {
941		ifc_free_unit(ifc, unit);
942		free(ifv, M_VLAN);
943		return (ENOSPC);
944	}
945	SLIST_INIT(&ifv->vlan_mc_listhead);
946
947	ifp->if_softc = ifv;
948	/*
949	 * Set the name manually rather than using if_initname because
950	 * we don't conform to the default naming convention for interfaces.
951	 */
952	strlcpy(ifp->if_xname, name, IFNAMSIZ);
953	ifp->if_dname = vlanname;
954	ifp->if_dunit = unit;
955	/* NB: flags are not set here */
956	ifp->if_linkmib = &ifv->ifv_mib;
957	ifp->if_linkmiblen = sizeof(ifv->ifv_mib);
958	/* NB: mtu is not set here */
959
960	ifp->if_init = vlan_init;
961	ifp->if_transmit = vlan_transmit;
962	ifp->if_qflush = vlan_qflush;
963	ifp->if_ioctl = vlan_ioctl;
964	ifp->if_flags = VLAN_IFFLAGS;
965	ether_ifattach(ifp, eaddr);
966	/* Now undo some of the damage... */
967	ifp->if_baudrate = 0;
968	ifp->if_type = IFT_L2VLAN;
969	ifp->if_hdrlen = ETHER_VLAN_ENCAP_LEN;
970	ifa = ifp->if_addr;
971	sdl = (struct sockaddr_dl *)ifa->ifa_addr;
972	sdl->sdl_type = IFT_L2VLAN;
973
974	if (ethertag) {
975		error = vlan_config(ifv, p, vid);
976		if (error != 0) {
977			/*
978			 * Since we've partially failed, we need to back
979			 * out all the way, otherwise userland could get
980			 * confused.  Thus, we destroy the interface.
981			 */
982			ether_ifdetach(ifp);
983			vlan_unconfig(ifp);
984			if_free(ifp);
985			ifc_free_unit(ifc, unit);
986			free(ifv, M_VLAN);
987
988			return (error);
989		}
990
991		/* Update flags on the parent, if necessary. */
992		vlan_setflags(ifp, 1);
993	}
994
995	return (0);
996}
997
998static int
999vlan_clone_destroy(struct if_clone *ifc, struct ifnet *ifp)
1000{
1001	struct ifvlan *ifv = ifp->if_softc;
1002	int unit = ifp->if_dunit;
1003
1004	ether_ifdetach(ifp);	/* first, remove it from system-wide lists */
1005	vlan_unconfig(ifp);	/* now it can be unconfigured and freed */
1006	if_free(ifp);
1007	free(ifv, M_VLAN);
1008	ifc_free_unit(ifc, unit);
1009
1010	return (0);
1011}
1012
1013/*
1014 * The ifp->if_init entry point for vlan(4) is a no-op.
1015 */
1016static void
1017vlan_init(void *foo __unused)
1018{
1019}
1020
1021/*
1022 * The if_transmit method for vlan(4) interface.
1023 */
1024static int
1025vlan_transmit(struct ifnet *ifp, struct mbuf *m)
1026{
1027	struct ifvlan *ifv;
1028	struct ifnet *p;
1029	int error, len, mcast;
1030
1031	ifv = ifp->if_softc;
1032	p = PARENT(ifv);
1033	len = m->m_pkthdr.len;
1034	mcast = (m->m_flags & (M_MCAST | M_BCAST)) ? 1 : 0;
1035
1036	BPF_MTAP(ifp, m);
1037
1038	/*
1039	 * Do not run parent's if_transmit() if the parent is not up,
1040	 * or parent's driver will cause a system crash.
1041	 */
1042	if (!UP_AND_RUNNING(p)) {
1043		m_freem(m);
1044		ifp->if_oerrors++;
1045		return (ENETDOWN);
1046	}
1047
1048	/*
1049	 * Pad the frame to the minimum size allowed if told to.
1050	 * This option is in accord with IEEE Std 802.1Q, 2003 Ed.,
1051	 * paragraph C.4.4.3.b.  It can help to work around buggy
1052	 * bridges that violate paragraph C.4.4.3.a from the same
1053	 * document, i.e., fail to pad short frames after untagging.
1054	 * E.g., a tagged frame 66 bytes long (incl. FCS) is OK, but
1055	 * untagging it will produce a 62-byte frame, which is a runt
1056	 * and requires padding.  There are VLAN-enabled network
1057	 * devices that just discard such runts instead or mishandle
1058	 * them somehow.
1059	 */
1060	if (soft_pad && p->if_type == IFT_ETHER) {
1061		static char pad[8];	/* just zeros */
1062		int n;
1063
1064		for (n = ETHERMIN + ETHER_HDR_LEN - m->m_pkthdr.len;
1065		     n > 0; n -= sizeof(pad))
1066			if (!m_append(m, min(n, sizeof(pad)), pad))
1067				break;
1068
1069		if (n > 0) {
1070			if_printf(ifp, "cannot pad short frame\n");
1071			ifp->if_oerrors++;
1072			m_freem(m);
1073			return (0);
1074		}
1075	}
1076
1077	/*
1078	 * If underlying interface can do VLAN tag insertion itself,
1079	 * just pass the packet along. However, we need some way to
1080	 * tell the interface where the packet came from so that it
1081	 * knows how to find the VLAN tag to use, so we attach a
1082	 * packet tag that holds it.
1083	 */
1084	if (p->if_capenable & IFCAP_VLAN_HWTAGGING) {
1085		m->m_pkthdr.ether_vtag = ifv->ifv_vid;
1086		m->m_flags |= M_VLANTAG;
1087	} else {
1088		m = ether_vlanencap(m, ifv->ifv_vid);
1089		if (m == NULL) {
1090			if_printf(ifp, "unable to prepend VLAN header\n");
1091			ifp->if_oerrors++;
1092			return (0);
1093		}
1094	}
1095
1096	/*
1097	 * Send it, precisely as ether_output() would have.
1098	 */
1099	error = (p->if_transmit)(p, m);
1100	if (!error) {
1101		ifp->if_opackets++;
1102		ifp->if_omcasts += mcast;
1103		ifp->if_obytes += len;
1104	} else
1105		ifp->if_oerrors++;
1106	return (error);
1107}
1108
1109/*
1110 * The ifp->if_qflush entry point for vlan(4) is a no-op.
1111 */
1112static void
1113vlan_qflush(struct ifnet *ifp __unused)
1114{
1115}
1116
1117static void
1118vlan_input(struct ifnet *ifp, struct mbuf *m)
1119{
1120	struct ifvlantrunk *trunk = ifp->if_vlantrunk;
1121	struct ifvlan *ifv;
1122	uint16_t vid;
1123
1124	KASSERT(trunk != NULL, ("%s: no trunk", __func__));
1125
1126	if (m->m_flags & M_VLANTAG) {
1127		/*
1128		 * Packet is tagged, but m contains a normal
1129		 * Ethernet frame; the tag is stored out-of-band.
1130		 */
1131		vid = EVL_VLANOFTAG(m->m_pkthdr.ether_vtag);
1132		m->m_flags &= ~M_VLANTAG;
1133	} else {
1134		struct ether_vlan_header *evl;
1135
1136		/*
1137		 * Packet is tagged in-band as specified by 802.1q.
1138		 */
1139		switch (ifp->if_type) {
1140		case IFT_ETHER:
1141			if (m->m_len < sizeof(*evl) &&
1142			    (m = m_pullup(m, sizeof(*evl))) == NULL) {
1143				if_printf(ifp, "cannot pullup VLAN header\n");
1144				return;
1145			}
1146			evl = mtod(m, struct ether_vlan_header *);
1147			vid = EVL_VLANOFTAG(ntohs(evl->evl_tag));
1148
1149			/*
1150			 * Remove the 802.1q header by copying the Ethernet
1151			 * addresses over it and adjusting the beginning of
1152			 * the data in the mbuf.  The encapsulated Ethernet
1153			 * type field is already in place.
1154			 */
1155			bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN,
1156			      ETHER_HDR_LEN - ETHER_TYPE_LEN);
1157			m_adj(m, ETHER_VLAN_ENCAP_LEN);
1158			break;
1159
1160		default:
1161#ifdef INVARIANTS
1162			panic("%s: %s has unsupported if_type %u",
1163			      __func__, ifp->if_xname, ifp->if_type);
1164#endif
1165			m_freem(m);
1166			ifp->if_noproto++;
1167			return;
1168		}
1169	}
1170
1171	TRUNK_RLOCK(trunk);
1172	ifv = vlan_gethash(trunk, vid);
1173	if (ifv == NULL || !UP_AND_RUNNING(ifv->ifv_ifp)) {
1174		TRUNK_RUNLOCK(trunk);
1175		m_freem(m);
1176		ifp->if_noproto++;
1177		return;
1178	}
1179	TRUNK_RUNLOCK(trunk);
1180
1181	m->m_pkthdr.rcvif = ifv->ifv_ifp;
1182	ifv->ifv_ifp->if_ipackets++;
1183
1184	/* Pass it back through the parent's input routine. */
1185	(*ifp->if_input)(ifv->ifv_ifp, m);
1186}
1187
1188static int
1189vlan_config(struct ifvlan *ifv, struct ifnet *p, uint16_t vid)
1190{
1191	struct ifvlantrunk *trunk;
1192	struct ifnet *ifp;
1193	int error = 0;
1194
1195	/* VID numbers 0x0 and 0xFFF are reserved */
1196	if (vid == 0 || vid == 0xFFF)
1197		return (EINVAL);
1198	if (p->if_type != IFT_ETHER &&
1199	    (p->if_capenable & IFCAP_VLAN_HWTAGGING) == 0)
1200		return (EPROTONOSUPPORT);
1201	if ((p->if_flags & VLAN_IFFLAGS) != VLAN_IFFLAGS)
1202		return (EPROTONOSUPPORT);
1203	if (ifv->ifv_trunk)
1204		return (EBUSY);
1205
1206	if (p->if_vlantrunk == NULL) {
1207		trunk = malloc(sizeof(struct ifvlantrunk),
1208		    M_VLAN, M_WAITOK | M_ZERO);
1209		vlan_inithash(trunk);
1210		VLAN_LOCK();
1211		if (p->if_vlantrunk != NULL) {
1212			/* A race that that is very unlikely to be hit. */
1213			vlan_freehash(trunk);
1214			free(trunk, M_VLAN);
1215			goto exists;
1216		}
1217		TRUNK_LOCK_INIT(trunk);
1218		TRUNK_LOCK(trunk);
1219		p->if_vlantrunk = trunk;
1220		trunk->parent = p;
1221	} else {
1222		VLAN_LOCK();
1223exists:
1224		trunk = p->if_vlantrunk;
1225		TRUNK_LOCK(trunk);
1226	}
1227
1228	ifv->ifv_vid = vid;	/* must set this before vlan_inshash() */
1229	error = vlan_inshash(trunk, ifv);
1230	if (error)
1231		goto done;
1232	ifv->ifv_proto = ETHERTYPE_VLAN;
1233	ifv->ifv_encaplen = ETHER_VLAN_ENCAP_LEN;
1234	ifv->ifv_mintu = ETHERMIN;
1235	ifv->ifv_pflags = 0;
1236
1237	/*
1238	 * If the parent supports the VLAN_MTU capability,
1239	 * i.e. can Tx/Rx larger than ETHER_MAX_LEN frames,
1240	 * use it.
1241	 */
1242	if (p->if_capenable & IFCAP_VLAN_MTU) {
1243		/*
1244		 * No need to fudge the MTU since the parent can
1245		 * handle extended frames.
1246		 */
1247		ifv->ifv_mtufudge = 0;
1248	} else {
1249		/*
1250		 * Fudge the MTU by the encapsulation size.  This
1251		 * makes us incompatible with strictly compliant
1252		 * 802.1Q implementations, but allows us to use
1253		 * the feature with other NetBSD implementations,
1254		 * which might still be useful.
1255		 */
1256		ifv->ifv_mtufudge = ifv->ifv_encaplen;
1257	}
1258
1259	ifv->ifv_trunk = trunk;
1260	ifp = ifv->ifv_ifp;
1261	/*
1262	 * Initialize fields from our parent.  This duplicates some
1263	 * work with ether_ifattach() but allows for non-ethernet
1264	 * interfaces to also work.
1265	 */
1266	ifp->if_mtu = p->if_mtu - ifv->ifv_mtufudge;
1267	ifp->if_baudrate = p->if_baudrate;
1268	ifp->if_output = p->if_output;
1269	ifp->if_input = p->if_input;
1270	ifp->if_resolvemulti = p->if_resolvemulti;
1271	ifp->if_addrlen = p->if_addrlen;
1272	ifp->if_broadcastaddr = p->if_broadcastaddr;
1273
1274	/*
1275	 * Copy only a selected subset of flags from the parent.
1276	 * Other flags are none of our business.
1277	 */
1278#define VLAN_COPY_FLAGS (IFF_SIMPLEX)
1279	ifp->if_flags &= ~VLAN_COPY_FLAGS;
1280	ifp->if_flags |= p->if_flags & VLAN_COPY_FLAGS;
1281#undef VLAN_COPY_FLAGS
1282
1283	ifp->if_link_state = p->if_link_state;
1284
1285	vlan_capabilities(ifv);
1286
1287	/*
1288	 * Set up our interface address to reflect the underlying
1289	 * physical interface's.
1290	 */
1291	bcopy(IF_LLADDR(p), IF_LLADDR(ifp), p->if_addrlen);
1292	((struct sockaddr_dl *)ifp->if_addr->ifa_addr)->sdl_alen =
1293	    p->if_addrlen;
1294
1295	/*
1296	 * Configure multicast addresses that may already be
1297	 * joined on the vlan device.
1298	 */
1299	(void)vlan_setmulti(ifp); /* XXX: VLAN lock held */
1300
1301	/* We are ready for operation now. */
1302	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1303done:
1304	TRUNK_UNLOCK(trunk);
1305	if (error == 0)
1306		EVENTHANDLER_INVOKE(vlan_config, p, ifv->ifv_vid);
1307	VLAN_UNLOCK();
1308
1309	return (error);
1310}
1311
1312static void
1313vlan_unconfig(struct ifnet *ifp)
1314{
1315
1316	VLAN_LOCK();
1317	vlan_unconfig_locked(ifp, 0);
1318	VLAN_UNLOCK();
1319}
1320
1321static void
1322vlan_unconfig_locked(struct ifnet *ifp, int departing)
1323{
1324	struct ifvlantrunk *trunk;
1325	struct vlan_mc_entry *mc;
1326	struct ifvlan *ifv;
1327	struct ifnet  *parent;
1328	int error;
1329
1330	VLAN_LOCK_ASSERT();
1331
1332	ifv = ifp->if_softc;
1333	trunk = ifv->ifv_trunk;
1334	parent = NULL;
1335
1336	if (trunk != NULL) {
1337
1338		TRUNK_LOCK(trunk);
1339		parent = trunk->parent;
1340
1341		/*
1342		 * Since the interface is being unconfigured, we need to
1343		 * empty the list of multicast groups that we may have joined
1344		 * while we were alive from the parent's list.
1345		 */
1346		while ((mc = SLIST_FIRST(&ifv->vlan_mc_listhead)) != NULL) {
1347			/*
1348			 * If the parent interface is being detached,
1349			 * all its multicast addresses have already
1350			 * been removed.  Warn about errors if
1351			 * if_delmulti() does fail, but don't abort as
1352			 * all callers expect vlan destruction to
1353			 * succeed.
1354			 */
1355			if (!departing) {
1356				error = if_delmulti(parent,
1357				    (struct sockaddr *)&mc->mc_addr);
1358				if (error)
1359					if_printf(ifp,
1360		    "Failed to delete multicast address from parent: %d\n",
1361					    error);
1362			}
1363			SLIST_REMOVE_HEAD(&ifv->vlan_mc_listhead, mc_entries);
1364			free(mc, M_VLAN);
1365		}
1366
1367		vlan_setflags(ifp, 0); /* clear special flags on parent */
1368		vlan_remhash(trunk, ifv);
1369		ifv->ifv_trunk = NULL;
1370
1371		/*
1372		 * Check if we were the last.
1373		 */
1374		if (trunk->refcnt == 0) {
1375			parent->if_vlantrunk = NULL;
1376			/*
1377			 * XXXGL: If some ithread has already entered
1378			 * vlan_input() and is now blocked on the trunk
1379			 * lock, then it should preempt us right after
1380			 * unlock and finish its work. Then we will acquire
1381			 * lock again in trunk_destroy().
1382			 */
1383			TRUNK_UNLOCK(trunk);
1384			trunk_destroy(trunk);
1385		} else
1386			TRUNK_UNLOCK(trunk);
1387	}
1388
1389	/* Disconnect from parent. */
1390	if (ifv->ifv_pflags)
1391		if_printf(ifp, "%s: ifv_pflags unclean\n", __func__);
1392	ifp->if_mtu = ETHERMTU;
1393	ifp->if_link_state = LINK_STATE_UNKNOWN;
1394	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1395
1396	/*
1397	 * Only dispatch an event if vlan was
1398	 * attached, otherwise there is nothing
1399	 * to cleanup anyway.
1400	 */
1401	if (parent != NULL)
1402		EVENTHANDLER_INVOKE(vlan_unconfig, parent, ifv->ifv_vid);
1403}
1404
1405/* Handle a reference counted flag that should be set on the parent as well */
1406static int
1407vlan_setflag(struct ifnet *ifp, int flag, int status,
1408	     int (*func)(struct ifnet *, int))
1409{
1410	struct ifvlan *ifv;
1411	int error;
1412
1413	/* XXX VLAN_LOCK_ASSERT(); */
1414
1415	ifv = ifp->if_softc;
1416	status = status ? (ifp->if_flags & flag) : 0;
1417	/* Now "status" contains the flag value or 0 */
1418
1419	/*
1420	 * See if recorded parent's status is different from what
1421	 * we want it to be.  If it is, flip it.  We record parent's
1422	 * status in ifv_pflags so that we won't clear parent's flag
1423	 * we haven't set.  In fact, we don't clear or set parent's
1424	 * flags directly, but get or release references to them.
1425	 * That's why we can be sure that recorded flags still are
1426	 * in accord with actual parent's flags.
1427	 */
1428	if (status != (ifv->ifv_pflags & flag)) {
1429		error = (*func)(PARENT(ifv), status);
1430		if (error)
1431			return (error);
1432		ifv->ifv_pflags &= ~flag;
1433		ifv->ifv_pflags |= status;
1434	}
1435	return (0);
1436}
1437
1438/*
1439 * Handle IFF_* flags that require certain changes on the parent:
1440 * if "status" is true, update parent's flags respective to our if_flags;
1441 * if "status" is false, forcedly clear the flags set on parent.
1442 */
1443static int
1444vlan_setflags(struct ifnet *ifp, int status)
1445{
1446	int error, i;
1447
1448	for (i = 0; vlan_pflags[i].flag; i++) {
1449		error = vlan_setflag(ifp, vlan_pflags[i].flag,
1450				     status, vlan_pflags[i].func);
1451		if (error)
1452			return (error);
1453	}
1454	return (0);
1455}
1456
1457/* Inform all vlans that their parent has changed link state */
1458static void
1459vlan_link_state(struct ifnet *ifp)
1460{
1461	struct ifvlantrunk *trunk = ifp->if_vlantrunk;
1462	struct ifvlan *ifv;
1463	int i;
1464
1465	TRUNK_LOCK(trunk);
1466#ifdef VLAN_ARRAY
1467	for (i = 0; i < VLAN_ARRAY_SIZE; i++)
1468		if (trunk->vlans[i] != NULL) {
1469			ifv = trunk->vlans[i];
1470#else
1471	for (i = 0; i < (1 << trunk->hwidth); i++)
1472		LIST_FOREACH(ifv, &trunk->hash[i], ifv_list) {
1473#endif
1474			ifv->ifv_ifp->if_baudrate = trunk->parent->if_baudrate;
1475			if_link_state_change(ifv->ifv_ifp,
1476			    trunk->parent->if_link_state);
1477		}
1478	TRUNK_UNLOCK(trunk);
1479}
1480
1481static void
1482vlan_capabilities(struct ifvlan *ifv)
1483{
1484	struct ifnet *p = PARENT(ifv);
1485	struct ifnet *ifp = ifv->ifv_ifp;
1486	struct ifnet_hw_tsomax hw_tsomax;
1487
1488	TRUNK_LOCK_ASSERT(TRUNK(ifv));
1489
1490	/*
1491	 * If the parent interface can do checksum offloading
1492	 * on VLANs, then propagate its hardware-assisted
1493	 * checksumming flags. Also assert that checksum
1494	 * offloading requires hardware VLAN tagging.
1495	 */
1496	if (p->if_capabilities & IFCAP_VLAN_HWCSUM)
1497		ifp->if_capabilities =
1498		    p->if_capabilities & (IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6);
1499
1500	if (p->if_capenable & IFCAP_VLAN_HWCSUM &&
1501	    p->if_capenable & IFCAP_VLAN_HWTAGGING) {
1502		ifp->if_capenable =
1503		    p->if_capenable & (IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6);
1504		ifp->if_hwassist = p->if_hwassist & (CSUM_IP | CSUM_TCP |
1505		    CSUM_UDP | CSUM_SCTP | CSUM_TCP_IPV6 | CSUM_UDP_IPV6 |
1506		    CSUM_SCTP_IPV6);
1507	} else {
1508		ifp->if_capenable = 0;
1509		ifp->if_hwassist = 0;
1510	}
1511	/*
1512	 * If the parent interface can do TSO on VLANs then
1513	 * propagate the hardware-assisted flag. TSO on VLANs
1514	 * does not necessarily require hardware VLAN tagging.
1515	 */
1516	memset(&hw_tsomax, 0, sizeof(hw_tsomax));
1517	if_hw_tsomax_common(p, &hw_tsomax);
1518	if_hw_tsomax_update(ifp, &hw_tsomax);
1519	if (p->if_capabilities & IFCAP_VLAN_HWTSO)
1520		ifp->if_capabilities |= p->if_capabilities & IFCAP_TSO;
1521	if (p->if_capenable & IFCAP_VLAN_HWTSO) {
1522		ifp->if_capenable |= p->if_capenable & IFCAP_TSO;
1523		ifp->if_hwassist |= p->if_hwassist & CSUM_TSO;
1524	} else {
1525		ifp->if_capenable &= ~(p->if_capenable & IFCAP_TSO);
1526		ifp->if_hwassist &= ~(p->if_hwassist & CSUM_TSO);
1527	}
1528
1529	/*
1530	 * If the parent interface can offload TCP connections over VLANs then
1531	 * propagate its TOE capability to the VLAN interface.
1532	 *
1533	 * All TOE drivers in the tree today can deal with VLANs.  If this
1534	 * changes then IFCAP_VLAN_TOE should be promoted to a full capability
1535	 * with its own bit.
1536	 */
1537#define	IFCAP_VLAN_TOE IFCAP_TOE
1538	if (p->if_capabilities & IFCAP_VLAN_TOE)
1539		ifp->if_capabilities |= p->if_capabilities & IFCAP_TOE;
1540	if (p->if_capenable & IFCAP_VLAN_TOE) {
1541		TOEDEV(ifp) = TOEDEV(p);
1542		ifp->if_capenable |= p->if_capenable & IFCAP_TOE;
1543	}
1544}
1545
1546static void
1547vlan_trunk_capabilities(struct ifnet *ifp)
1548{
1549	struct ifvlantrunk *trunk = ifp->if_vlantrunk;
1550	struct ifvlan *ifv;
1551	int i;
1552
1553	TRUNK_LOCK(trunk);
1554#ifdef VLAN_ARRAY
1555	for (i = 0; i < VLAN_ARRAY_SIZE; i++)
1556		if (trunk->vlans[i] != NULL) {
1557			ifv = trunk->vlans[i];
1558#else
1559	for (i = 0; i < (1 << trunk->hwidth); i++) {
1560		LIST_FOREACH(ifv, &trunk->hash[i], ifv_list)
1561#endif
1562			vlan_capabilities(ifv);
1563	}
1564	TRUNK_UNLOCK(trunk);
1565}
1566
1567static int
1568vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
1569{
1570	struct ifnet *p;
1571	struct ifreq *ifr;
1572	struct ifaddr *ifa;
1573	struct ifvlan *ifv;
1574	struct ifvlantrunk *trunk;
1575	struct vlanreq vlr;
1576	int error = 0;
1577
1578	ifr = (struct ifreq *)data;
1579	ifa = (struct ifaddr *) data;
1580	ifv = ifp->if_softc;
1581
1582	switch (cmd) {
1583	case SIOCSIFADDR:
1584		ifp->if_flags |= IFF_UP;
1585#ifdef INET
1586		if (ifa->ifa_addr->sa_family == AF_INET)
1587			arp_ifinit(ifp, ifa);
1588#endif
1589		break;
1590	case SIOCGIFADDR:
1591                {
1592			struct sockaddr *sa;
1593
1594			sa = (struct sockaddr *)&ifr->ifr_data;
1595			bcopy(IF_LLADDR(ifp), sa->sa_data, ifp->if_addrlen);
1596                }
1597		break;
1598	case SIOCGIFMEDIA:
1599		VLAN_LOCK();
1600		if (TRUNK(ifv) != NULL) {
1601			p = PARENT(ifv);
1602			VLAN_UNLOCK();
1603			error = (*p->if_ioctl)(p, SIOCGIFMEDIA, data);
1604			/* Limit the result to the parent's current config. */
1605			if (error == 0) {
1606				struct ifmediareq *ifmr;
1607
1608				ifmr = (struct ifmediareq *)data;
1609				if (ifmr->ifm_count >= 1 && ifmr->ifm_ulist) {
1610					ifmr->ifm_count = 1;
1611					error = copyout(&ifmr->ifm_current,
1612						ifmr->ifm_ulist,
1613						sizeof(int));
1614				}
1615			}
1616		} else {
1617			VLAN_UNLOCK();
1618			error = EINVAL;
1619		}
1620		break;
1621
1622	case SIOCSIFMEDIA:
1623		error = EINVAL;
1624		break;
1625
1626	case SIOCSIFMTU:
1627		/*
1628		 * Set the interface MTU.
1629		 */
1630		VLAN_LOCK();
1631		if (TRUNK(ifv) != NULL) {
1632			if (ifr->ifr_mtu >
1633			     (PARENT(ifv)->if_mtu - ifv->ifv_mtufudge) ||
1634			    ifr->ifr_mtu <
1635			     (ifv->ifv_mintu - ifv->ifv_mtufudge))
1636				error = EINVAL;
1637			else
1638				ifp->if_mtu = ifr->ifr_mtu;
1639		} else
1640			error = EINVAL;
1641		VLAN_UNLOCK();
1642		break;
1643
1644	case SIOCSETVLAN:
1645#ifdef VIMAGE
1646		/*
1647		 * XXXRW/XXXBZ: The goal in these checks is to allow a VLAN
1648		 * interface to be delegated to a jail without allowing the
1649		 * jail to change what underlying interface/VID it is
1650		 * associated with.  We are not entirely convinced that this
1651		 * is the right way to accomplish that policy goal.
1652		 */
1653		if (ifp->if_vnet != ifp->if_home_vnet) {
1654			error = EPERM;
1655			break;
1656		}
1657#endif
1658		error = copyin(ifr->ifr_data, &vlr, sizeof(vlr));
1659		if (error)
1660			break;
1661		if (vlr.vlr_parent[0] == '\0') {
1662			vlan_unconfig(ifp);
1663			break;
1664		}
1665		p = ifunit(vlr.vlr_parent);
1666		if (p == NULL) {
1667			error = ENOENT;
1668			break;
1669		}
1670		/*
1671		 * Don't let the caller set up a VLAN VID with
1672		 * anything except VLID bits.
1673		 */
1674		if (vlr.vlr_tag & ~EVL_VLID_MASK) {
1675			error = EINVAL;
1676			break;
1677		}
1678		error = vlan_config(ifv, p, vlr.vlr_tag);
1679		if (error)
1680			break;
1681
1682		/* Update flags on the parent, if necessary. */
1683		vlan_setflags(ifp, 1);
1684		break;
1685
1686	case SIOCGETVLAN:
1687#ifdef VIMAGE
1688		if (ifp->if_vnet != ifp->if_home_vnet) {
1689			error = EPERM;
1690			break;
1691		}
1692#endif
1693		bzero(&vlr, sizeof(vlr));
1694		VLAN_LOCK();
1695		if (TRUNK(ifv) != NULL) {
1696			strlcpy(vlr.vlr_parent, PARENT(ifv)->if_xname,
1697			    sizeof(vlr.vlr_parent));
1698			vlr.vlr_tag = ifv->ifv_vid;
1699		}
1700		VLAN_UNLOCK();
1701		error = copyout(&vlr, ifr->ifr_data, sizeof(vlr));
1702		break;
1703
1704	case SIOCSIFFLAGS:
1705		/*
1706		 * We should propagate selected flags to the parent,
1707		 * e.g., promiscuous mode.
1708		 */
1709		if (TRUNK(ifv) != NULL)
1710			error = vlan_setflags(ifp, 1);
1711		break;
1712
1713	case SIOCADDMULTI:
1714	case SIOCDELMULTI:
1715		/*
1716		 * If we don't have a parent, just remember the membership for
1717		 * when we do.
1718		 */
1719		trunk = TRUNK(ifv);
1720		if (trunk != NULL) {
1721			TRUNK_LOCK(trunk);
1722			error = vlan_setmulti(ifp);
1723			TRUNK_UNLOCK(trunk);
1724		}
1725		break;
1726
1727	default:
1728		error = EINVAL;
1729		break;
1730	}
1731
1732	return (error);
1733}
1734