if_vlan.c revision 167483
1193323Sed/*-
2193323Sed * Copyright 1998 Massachusetts Institute of Technology
3193323Sed *
4193323Sed * Permission to use, copy, modify, and distribute this software and
5193323Sed * its documentation for any purpose and without fee is hereby
6193323Sed * granted, provided that both the above copyright notice and this
7193323Sed * permission notice appear in all copies, that both the above
8193323Sed * copyright notice and this permission notice appear in all
9193323Sed * supporting documentation, and that the name of M.I.T. not be used
10193323Sed * in advertising or publicity pertaining to distribution of the
11193323Sed * software without specific, written prior permission.  M.I.T. makes
12193323Sed * no representations about the suitability of this software for any
13193323Sed * purpose.  It is provided "as is" without express or implied
14193323Sed * warranty.
15193323Sed *
16193323Sed * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''.  M.I.T. DISCLAIMS
17193323Sed * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
18193323Sed * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
19193323Sed * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
20193323Sed * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21193323Sed * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22193323Sed * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
23193323Sed * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24193323Sed * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
25193323Sed * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
26193323Sed * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27193323Sed * SUCH DAMAGE.
28193323Sed *
29193323Sed * $FreeBSD: head/sys/net/if_vlan.c 167483 2007-03-12 12:27:30Z yar $
30193323Sed */
31193323Sed
32193323Sed/*
33193323Sed * if_vlan.c - pseudo-device driver for IEEE 802.1Q virtual LANs.
34193323Sed * Might be extended some day to also handle IEEE 802.1p priority
35193323Sed * tagging.  This is sort of sneaky in the implementation, since
36193323Sed * we need to pretend to be enough of an Ethernet implementation
37193323Sed * to make arp work.  The way we do this is by telling everyone
38193323Sed * that we are an Ethernet, and then catch the packets that
39193323Sed * ether_output() left on our output queue when it calls
40193323Sed * if_start(), rewrite them for use by the real outgoing interface,
41193323Sed * and ask it to send them.
42193323Sed */
43193323Sed
44193323Sed#include "opt_inet.h"
45193323Sed#include "opt_vlan.h"
46193323Sed
47193323Sed#include <sys/param.h>
48193323Sed#include <sys/kernel.h>
49193323Sed#include <sys/lock.h>
50193323Sed#include <sys/malloc.h>
51193323Sed#include <sys/mbuf.h>
52193323Sed#include <sys/module.h>
53193323Sed#include <sys/rwlock.h>
54193323Sed#include <sys/queue.h>
55193323Sed#include <sys/socket.h>
56193323Sed#include <sys/sockio.h>
57193323Sed#include <sys/sysctl.h>
58193323Sed#include <sys/systm.h>
59193323Sed
60193323Sed#include <net/bpf.h>
61193323Sed#include <net/ethernet.h>
62193323Sed#include <net/if.h>
63193323Sed#include <net/if_clone.h>
64193323Sed#include <net/if_arp.h>
65193323Sed#include <net/if_dl.h>
66193323Sed#include <net/if_types.h>
67193323Sed#include <net/if_vlan_var.h>
68193323Sed
69193323Sed#ifdef INET
70193323Sed#include <netinet/in.h>
71193323Sed#include <netinet/if_ether.h>
72193323Sed#endif
73193323Sed
74193323Sed#define VLANNAME	"vlan"
75193323Sed#define	VLAN_DEF_HWIDTH	4
76193323Sed#define	VLAN_IFFLAGS	(IFF_BROADCAST | IFF_MULTICAST)
77193323Sed
78193323Sed#define	UP_AND_RUNNING(ifp) \
79193323Sed    ((ifp)->if_flags & IFF_UP && (ifp)->if_drv_flags & IFF_DRV_RUNNING)
80193323Sed
81193323SedLIST_HEAD(ifvlanhead, ifvlan);
82193323Sed
83193323Sedstruct ifvlantrunk {
84193323Sed	struct	ifnet   *parent;	/* parent interface of this trunk */
85193323Sed	struct	rwlock	rw;
86193323Sed#ifdef VLAN_ARRAY
87193323Sed#define	VLAN_ARRAY_SIZE	(EVL_VLID_MASK + 1)
88193323Sed	struct	ifvlan	*vlans[VLAN_ARRAY_SIZE]; /* static table */
89193323Sed#else
90193323Sed	struct	ifvlanhead *hash;	/* dynamic hash-list table */
91193323Sed	uint16_t	hmask;
92193323Sed	uint16_t	hwidth;
93193323Sed#endif
94193323Sed	int		refcnt;
95193323Sed};
96193323Sed
97193323Sedstruct vlan_mc_entry {
98193323Sed	struct ether_addr		mc_addr;
99193323Sed	SLIST_ENTRY(vlan_mc_entry)	mc_entries;
100193323Sed};
101193323Sed
102193323Sedstruct	ifvlan {
103193323Sed	struct	ifvlantrunk *ifv_trunk;
104193323Sed	struct	ifnet *ifv_ifp;
105193323Sed#define	TRUNK(ifv)	((ifv)->ifv_trunk)
106193323Sed#define	PARENT(ifv)	((ifv)->ifv_trunk->parent)
107193323Sed	int	ifv_pflags;	/* special flags we have set on parent */
108193323Sed	struct	ifv_linkmib {
109193323Sed		int	ifvm_encaplen;	/* encapsulation length */
110193323Sed		int	ifvm_mtufudge;	/* MTU fudged by this much */
111193323Sed		int	ifvm_mintu;	/* min transmission unit */
112193323Sed		uint16_t ifvm_proto;	/* encapsulation ethertype */
113193323Sed		uint16_t ifvm_tag;	/* tag to apply on packets leaving if */
114193323Sed	}	ifv_mib;
115193323Sed	SLIST_HEAD(, vlan_mc_entry) vlan_mc_listhead;
116193323Sed#ifndef VLAN_ARRAY
117193323Sed	LIST_ENTRY(ifvlan) ifv_list;
118193323Sed#endif
119193323Sed};
120193323Sed#define	ifv_proto	ifv_mib.ifvm_proto
121193323Sed#define	ifv_tag		ifv_mib.ifvm_tag
122193323Sed#define	ifv_encaplen	ifv_mib.ifvm_encaplen
123193323Sed#define	ifv_mtufudge	ifv_mib.ifvm_mtufudge
124193323Sed#define	ifv_mintu	ifv_mib.ifvm_mintu
125193323Sed
126193323Sed/* Special flags we should propagate to parent. */
127193323Sedstatic struct {
128193323Sed	int flag;
129193323Sed	int (*func)(struct ifnet *, int);
130193323Sed} vlan_pflags[] = {
131193323Sed	{IFF_PROMISC, ifpromisc},
132193323Sed	{IFF_ALLMULTI, if_allmulti},
133193323Sed	{0, NULL}
134193323Sed};
135193323Sed
136193323SedSYSCTL_DECL(_net_link);
137193323SedSYSCTL_NODE(_net_link, IFT_L2VLAN, vlan, CTLFLAG_RW, 0, "IEEE 802.1Q VLAN");
138193323SedSYSCTL_NODE(_net_link_vlan, PF_LINK, link, CTLFLAG_RW, 0, "for consistency");
139193323Sed
140193323Sedstatic int soft_pad = 0;
141193323SedSYSCTL_INT(_net_link_vlan, OID_AUTO, soft_pad, CTLFLAG_RW, &soft_pad, 0,
142193323Sed	   "pad short frames before tagging");
143193323Sed
144193323Sedstatic MALLOC_DEFINE(M_VLAN, VLANNAME, "802.1Q Virtual LAN Interface");
145193323Sed
146193323Sedstatic eventhandler_tag ifdetach_tag;
147193323Sed
148193323Sed/*
149193323Sed * We have a global mutex, that is used to serialize configuration
150193323Sed * changes and isn't used in normal packet delivery.
151193323Sed *
152193323Sed * We also have a per-trunk rwlock, that is locked shared on packet
153193323Sed * processing and exclusive when configuration is changed.
154193323Sed *
155193323Sed * The VLAN_ARRAY substitutes the dynamic hash with a static array
156193323Sed * with 4096 entries. In theory this can give a boost in processing,
157193323Sed * however on practice it does not. Probably this is because array
158193323Sed * is too big to fit into CPU cache.
159193323Sed */
160193323Sedstatic struct mtx ifv_mtx;
161193323Sed#define	VLAN_LOCK_INIT()	mtx_init(&ifv_mtx, "vlan_global", NULL, MTX_DEF)
162193323Sed#define	VLAN_LOCK_DESTROY()	mtx_destroy(&ifv_mtx)
163193323Sed#define	VLAN_LOCK_ASSERT()	mtx_assert(&ifv_mtx, MA_OWNED)
164193323Sed#define	VLAN_LOCK()		mtx_lock(&ifv_mtx)
165193323Sed#define	VLAN_UNLOCK()		mtx_unlock(&ifv_mtx)
166193323Sed#define	TRUNK_LOCK_INIT(trunk)	rw_init(&(trunk)->rw, VLANNAME)
167193323Sed#define	TRUNK_LOCK_DESTROY(trunk) rw_destroy(&(trunk)->rw)
168193323Sed#define	TRUNK_LOCK(trunk)	rw_wlock(&(trunk)->rw)
169193323Sed#define	TRUNK_UNLOCK(trunk)	rw_wunlock(&(trunk)->rw)
170193323Sed#define	TRUNK_LOCK_ASSERT(trunk) rw_assert(&(trunk)->rw, RA_WLOCKED)
171193323Sed#define	TRUNK_RLOCK(trunk)	rw_rlock(&(trunk)->rw)
172193323Sed#define	TRUNK_RUNLOCK(trunk)	rw_runlock(&(trunk)->rw)
173193323Sed#define	TRUNK_LOCK_RASSERT(trunk) rw_assert(&(trunk)->rw, RA_RLOCKED)
174193323Sed
175193323Sed#ifndef VLAN_ARRAY
176193323Sedstatic	void vlan_inithash(struct ifvlantrunk *trunk);
177193323Sedstatic	void vlan_freehash(struct ifvlantrunk *trunk);
178193323Sedstatic	int vlan_inshash(struct ifvlantrunk *trunk, struct ifvlan *ifv);
179193323Sedstatic	int vlan_remhash(struct ifvlantrunk *trunk, struct ifvlan *ifv);
180193323Sedstatic	void vlan_growhash(struct ifvlantrunk *trunk, int howmuch);
181193323Sedstatic __inline struct ifvlan * vlan_gethash(struct ifvlantrunk *trunk,
182193323Sed	uint16_t tag);
183193323Sed#endif
184193323Sedstatic	void trunk_destroy(struct ifvlantrunk *trunk);
185193323Sed
186193323Sedstatic	void vlan_start(struct ifnet *ifp);
187193323Sedstatic	void vlan_init(void *foo);
188193323Sedstatic	void vlan_input(struct ifnet *ifp, struct mbuf *m);
189193323Sedstatic	int vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr);
190193323Sedstatic	int vlan_setflag(struct ifnet *ifp, int flag, int status,
191193323Sed    int (*func)(struct ifnet *, int));
192193323Sedstatic	int vlan_setflags(struct ifnet *ifp, int status);
193193323Sedstatic	int vlan_setmulti(struct ifnet *ifp);
194193323Sedstatic	int vlan_unconfig(struct ifnet *ifp);
195193323Sedstatic	int vlan_unconfig_locked(struct ifnet *ifp);
196193323Sedstatic	int vlan_config(struct ifvlan *ifv, struct ifnet *p, uint16_t tag);
197193323Sedstatic	void vlan_link_state(struct ifnet *ifp, int link);
198193323Sedstatic	void vlan_capabilities(struct ifvlan *ifv);
199193323Sedstatic	void vlan_trunk_capabilities(struct ifnet *ifp);
200193323Sed
201193323Sedstatic	struct ifnet *vlan_clone_match_ethertag(struct if_clone *,
202193323Sed    const char *, int *);
203193323Sedstatic	int vlan_clone_match(struct if_clone *, const char *);
204193323Sedstatic	int vlan_clone_create(struct if_clone *, char *, size_t, caddr_t);
205193323Sedstatic	int vlan_clone_destroy(struct if_clone *, struct ifnet *);
206193323Sed
207193323Sedstatic	void vlan_ifdetach(void *arg, struct ifnet *ifp);
208193323Sed
209193323Sedstatic	struct if_clone vlan_cloner = IFC_CLONE_INITIALIZER(VLANNAME, NULL,
210193323Sed    IF_MAXUNIT, NULL, vlan_clone_match, vlan_clone_create, vlan_clone_destroy);
211193323Sed
212193323Sed#ifndef VLAN_ARRAY
213193323Sed#define HASH(n, m)	((((n) >> 8) ^ ((n) >> 4) ^ (n)) & (m))
214193323Sed
215193323Sedstatic void
216193323Sedvlan_inithash(struct ifvlantrunk *trunk)
217193323Sed{
218193323Sed	int i, n;
219193323Sed
220193323Sed	/*
221193323Sed	 * The trunk must not be locked here since we call malloc(M_WAITOK).
222193323Sed	 * It is OK in case this function is called before the trunk struct
223193323Sed	 * gets hooked up and becomes visible from other threads.
224193323Sed	 */
225193323Sed
226193323Sed	KASSERT(trunk->hwidth == 0 && trunk->hash == NULL,
227193323Sed	    ("%s: hash already initialized", __func__));
228193323Sed
229193323Sed	trunk->hwidth = VLAN_DEF_HWIDTH;
230193323Sed	n = 1 << trunk->hwidth;
231193323Sed	trunk->hmask = n - 1;
232193323Sed	trunk->hash = malloc(sizeof(struct ifvlanhead) * n, M_VLAN, M_WAITOK);
233193323Sed	for (i = 0; i < n; i++)
234193323Sed		LIST_INIT(&trunk->hash[i]);
235193323Sed}
236193323Sed
237193323Sedstatic void
238193323Sedvlan_freehash(struct ifvlantrunk *trunk)
239193323Sed{
240193323Sed#ifdef INVARIANTS
241193323Sed	int i;
242193323Sed
243193323Sed	KASSERT(trunk->hwidth > 0, ("%s: hwidth not positive", __func__));
244193323Sed	for (i = 0; i < (1 << trunk->hwidth); i++)
245193323Sed		KASSERT(LIST_EMPTY(&trunk->hash[i]),
246193323Sed		    ("%s: hash table not empty", __func__));
247193323Sed#endif
248193323Sed	free(trunk->hash, M_VLAN);
249193323Sed	trunk->hash = NULL;
250193323Sed	trunk->hwidth = trunk->hmask = 0;
251193323Sed}
252193323Sed
253193323Sedstatic int
254193323Sedvlan_inshash(struct ifvlantrunk *trunk, struct ifvlan *ifv)
255193323Sed{
256193323Sed	int i, b;
257193323Sed	struct ifvlan *ifv2;
258193323Sed
259193323Sed	TRUNK_LOCK_ASSERT(trunk);
260193323Sed	KASSERT(trunk->hwidth > 0, ("%s: hwidth not positive", __func__));
261193323Sed
262193323Sed	b = 1 << trunk->hwidth;
263193323Sed	i = HASH(ifv->ifv_tag, trunk->hmask);
264193323Sed	LIST_FOREACH(ifv2, &trunk->hash[i], ifv_list)
265193323Sed		if (ifv->ifv_tag == ifv2->ifv_tag)
266193323Sed			return (EEXIST);
267193323Sed
268193323Sed	/*
269193323Sed	 * Grow the hash when the number of vlans exceeds half of the number of
270193323Sed	 * hash buckets squared. This will make the average linked-list length
271193323Sed	 * buckets/2.
272193323Sed	 */
273193323Sed	if (trunk->refcnt > (b * b) / 2) {
274193323Sed		vlan_growhash(trunk, 1);
275193323Sed		i = HASH(ifv->ifv_tag, trunk->hmask);
276193323Sed	}
277193323Sed	LIST_INSERT_HEAD(&trunk->hash[i], ifv, ifv_list);
278193323Sed	trunk->refcnt++;
279193323Sed
280193323Sed	return (0);
281193323Sed}
282193323Sed
283193323Sedstatic int
284193323Sedvlan_remhash(struct ifvlantrunk *trunk, struct ifvlan *ifv)
285193323Sed{
286193323Sed	int i, b;
287193323Sed	struct ifvlan *ifv2;
288193323Sed
289193323Sed	TRUNK_LOCK_ASSERT(trunk);
290193323Sed	KASSERT(trunk->hwidth > 0, ("%s: hwidth not positive", __func__));
291193323Sed
292193323Sed	b = 1 << trunk->hwidth;
293193323Sed	i = HASH(ifv->ifv_tag, trunk->hmask);
294193323Sed	LIST_FOREACH(ifv2, &trunk->hash[i], ifv_list)
295193323Sed		if (ifv2 == ifv) {
296193323Sed			trunk->refcnt--;
297193323Sed			LIST_REMOVE(ifv2, ifv_list);
298193323Sed			if (trunk->refcnt < (b * b) / 2)
299193323Sed				vlan_growhash(trunk, -1);
300193323Sed			return (0);
301193323Sed		}
302193323Sed
303193323Sed	panic("%s: vlan not found\n", __func__);
304193323Sed	return (ENOENT); /*NOTREACHED*/
305193323Sed}
306193323Sed
307193323Sed/*
308193323Sed * Grow the hash larger or smaller if memory permits.
309193323Sed */
310193323Sedstatic void
311193323Sedvlan_growhash(struct ifvlantrunk *trunk, int howmuch)
312193323Sed{
313193323Sed
314193323Sed	struct ifvlan *ifv;
315193323Sed	struct ifvlanhead *hash2;
316193323Sed	int hwidth2, i, j, n, n2;
317193323Sed
318193323Sed	TRUNK_LOCK_ASSERT(trunk);
319193323Sed	KASSERT(trunk->hwidth > 0, ("%s: hwidth not positive", __func__));
320193323Sed
321193323Sed	if (howmuch == 0) {
322193323Sed		/* Harmless yet obvious coding error */
323193323Sed		printf("%s: howmuch is 0\n", __func__);
324193323Sed		return;
325193323Sed	}
326193323Sed
327193323Sed	hwidth2 = trunk->hwidth + howmuch;
328193323Sed	n = 1 << trunk->hwidth;
329193323Sed	n2 = 1 << hwidth2;
330193323Sed	/* Do not shrink the table below the default */
331193323Sed	if (hwidth2 < VLAN_DEF_HWIDTH)
332193323Sed		return;
333193323Sed
334193323Sed	/* M_NOWAIT because we're called with trunk mutex held */
335193323Sed	hash2 = malloc(sizeof(struct ifvlanhead) * n2, M_VLAN, M_NOWAIT);
336193323Sed	if (hash2 == NULL) {
337193323Sed		printf("%s: out of memory -- hash size not changed\n",
338193323Sed		    __func__);
339193323Sed		return;		/* We can live with the old hash table */
340193323Sed	}
341193323Sed	for (j = 0; j < n2; j++)
342193323Sed		LIST_INIT(&hash2[j]);
343193323Sed	for (i = 0; i < n; i++)
344193323Sed		while ((ifv = LIST_FIRST(&trunk->hash[i])) != NULL) {
345193323Sed			LIST_REMOVE(ifv, ifv_list);
346193323Sed			j = HASH(ifv->ifv_tag, n2 - 1);
347193323Sed			LIST_INSERT_HEAD(&hash2[j], ifv, ifv_list);
348193323Sed		}
349193323Sed	free(trunk->hash, M_VLAN);
350193323Sed	trunk->hash = hash2;
351193323Sed	trunk->hwidth = hwidth2;
352193323Sed	trunk->hmask = n2 - 1;
353193323Sed}
354193323Sed
355193323Sedstatic __inline struct ifvlan *
356193323Sedvlan_gethash(struct ifvlantrunk *trunk, uint16_t tag)
357193323Sed{
358193323Sed	struct ifvlan *ifv;
359193323Sed
360193323Sed	TRUNK_LOCK_RASSERT(trunk);
361193323Sed
362193323Sed	LIST_FOREACH(ifv, &trunk->hash[HASH(tag, trunk->hmask)], ifv_list)
363193323Sed		if (ifv->ifv_tag == tag)
364193323Sed			return (ifv);
365193323Sed	return (NULL);
366193323Sed}
367193323Sed
368193323Sed#if 0
369193323Sed/* Debugging code to view the hashtables. */
370193323Sedstatic void
371193323Sedvlan_dumphash(struct ifvlantrunk *trunk)
372193323Sed{
373193323Sed	int i;
374193323Sed	struct ifvlan *ifv;
375193323Sed
376193323Sed	for (i = 0; i < (1 << trunk->hwidth); i++) {
377193323Sed		printf("%d: ", i);
378193323Sed		LIST_FOREACH(ifv, &trunk->hash[i], ifv_list)
379193323Sed			printf("%s ", ifv->ifv_ifp->if_xname);
380193323Sed		printf("\n");
381193323Sed	}
382193323Sed}
383193323Sed#endif /* 0 */
384193323Sed#endif /* !VLAN_ARRAY */
385193323Sed
386193323Sedstatic void
387193323Sedtrunk_destroy(struct ifvlantrunk *trunk)
388193323Sed{
389193323Sed	VLAN_LOCK_ASSERT();
390193323Sed
391193323Sed	TRUNK_LOCK(trunk);
392193323Sed#ifndef VLAN_ARRAY
393193323Sed	vlan_freehash(trunk);
394193323Sed#endif
395193323Sed	trunk->parent->if_vlantrunk = NULL;
396193323Sed	TRUNK_UNLOCK(trunk);
397193323Sed	TRUNK_LOCK_DESTROY(trunk);
398193323Sed	free(trunk, M_VLAN);
399193323Sed}
400193323Sed
401193323Sed/*
402193323Sed * Program our multicast filter. What we're actually doing is
403193323Sed * programming the multicast filter of the parent. This has the
404193323Sed * side effect of causing the parent interface to receive multicast
405193323Sed * traffic that it doesn't really want, which ends up being discarded
406193323Sed * later by the upper protocol layers. Unfortunately, there's no way
407193323Sed * to avoid this: there really is only one physical interface.
408193323Sed *
409193323Sed * XXX: There is a possible race here if more than one thread is
410193323Sed *      modifying the multicast state of the vlan interface at the same time.
411193323Sed */
412193323Sedstatic int
413193323Sedvlan_setmulti(struct ifnet *ifp)
414193323Sed{
415193323Sed	struct ifnet		*ifp_p;
416193323Sed	struct ifmultiaddr	*ifma, *rifma = NULL;
417193323Sed	struct ifvlan		*sc;
418193323Sed	struct vlan_mc_entry	*mc;
419193323Sed	struct sockaddr_dl	sdl;
420193323Sed	int			error;
421193323Sed
422193323Sed	/*VLAN_LOCK_ASSERT();*/
423193323Sed
424193323Sed	/* Find the parent. */
425193323Sed	sc = ifp->if_softc;
426193323Sed	ifp_p = PARENT(sc);
427193323Sed
428193323Sed	bzero((char *)&sdl, sizeof(sdl));
429193323Sed	sdl.sdl_len = sizeof(sdl);
430193323Sed	sdl.sdl_family = AF_LINK;
431193323Sed	sdl.sdl_index = ifp_p->if_index;
432193323Sed	sdl.sdl_type = IFT_ETHER;
433193323Sed	sdl.sdl_alen = ETHER_ADDR_LEN;
434193323Sed
435193323Sed	/* First, remove any existing filter entries. */
436193323Sed	while ((mc = SLIST_FIRST(&sc->vlan_mc_listhead)) != NULL) {
437193323Sed		bcopy((char *)&mc->mc_addr, LLADDR(&sdl), ETHER_ADDR_LEN);
438193323Sed		error = if_delmulti(ifp_p, (struct sockaddr *)&sdl);
439193323Sed		if (error)
440193323Sed			return (error);
441193323Sed		SLIST_REMOVE_HEAD(&sc->vlan_mc_listhead, mc_entries);
442193323Sed		free(mc, M_VLAN);
443193323Sed	}
444193323Sed
445193323Sed	/* Now program new ones. */
446193323Sed	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
447193323Sed		if (ifma->ifma_addr->sa_family != AF_LINK)
448193323Sed			continue;
449193323Sed		mc = malloc(sizeof(struct vlan_mc_entry), M_VLAN, M_NOWAIT);
450193323Sed		if (mc == NULL)
451193323Sed			return (ENOMEM);
452193323Sed		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
453193323Sed		    (char *)&mc->mc_addr, ETHER_ADDR_LEN);
454193323Sed		SLIST_INSERT_HEAD(&sc->vlan_mc_listhead, mc, mc_entries);
455193323Sed		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
456193323Sed		    LLADDR(&sdl), ETHER_ADDR_LEN);
457193323Sed		error = if_addmulti(ifp_p, (struct sockaddr *)&sdl, &rifma);
458193323Sed		if (error)
459193323Sed			return (error);
460193323Sed	}
461193323Sed
462193323Sed	return (0);
463193323Sed}
464193323Sed
465193323Sed/*
466193323Sed * A handler for network interface departure events.
467193323Sed * Track departure of trunks here so that we don't access invalid
468193323Sed * pointers or whatever if a trunk is ripped from under us, e.g.,
469193323Sed * by ejecting its hot-plug card.
470193323Sed */
471193323Sedstatic void
472193323Sedvlan_ifdetach(void *arg __unused, struct ifnet *ifp)
473193323Sed{
474193323Sed	struct ifvlan *ifv;
475193323Sed	int i;
476193323Sed
477193323Sed	/*
478193323Sed	 * Check if it's a trunk interface first of all
479193323Sed	 * to avoid needless locking.
480193323Sed	 */
481193323Sed	if (ifp->if_vlantrunk == NULL)
482193323Sed		return;
483193323Sed
484193323Sed	VLAN_LOCK();
485193323Sed	/*
486193323Sed	 * OK, it's a trunk.  Loop over and detach all vlan's on it.
487193323Sed	 * Check trunk pointer after each vlan_unconfig() as it will
488193323Sed	 * free it and set to NULL after the last vlan was detached.
489193323Sed	 */
490193323Sed#ifdef VLAN_ARRAY
491193323Sed	for (i = 0; i < VLAN_ARRAY_SIZE; i++)
492193323Sed		if ((ifv = ifp->if_vlantrunk->vlans[i])) {
493193323Sed			vlan_unconfig_locked(ifv->ifv_ifp);
494193323Sed			if (ifp->if_vlantrunk == NULL)
495193323Sed				break;
496193323Sed		}
497193323Sed#else /* VLAN_ARRAY */
498193323Sedrestart:
499193323Sed	for (i = 0; i < (1 << ifp->if_vlantrunk->hwidth); i++)
500193323Sed		if ((ifv = LIST_FIRST(&ifp->if_vlantrunk->hash[i]))) {
501193323Sed			vlan_unconfig_locked(ifv->ifv_ifp);
502193323Sed			if (ifp->if_vlantrunk)
503193323Sed				goto restart;	/* trunk->hwidth can change */
504193323Sed			else
505193323Sed				break;
506193323Sed		}
507193323Sed#endif /* VLAN_ARRAY */
508193323Sed	/* Trunk should have been destroyed in vlan_unconfig(). */
509193323Sed	KASSERT(ifp->if_vlantrunk == NULL, ("%s: purge failed", __func__));
510193323Sed	VLAN_UNLOCK();
511193323Sed}
512193323Sed
513193323Sed/*
514193323Sed * VLAN support can be loaded as a module.  The only place in the
515193323Sed * system that's intimately aware of this is ether_input.  We hook
516193323Sed * into this code through vlan_input_p which is defined there and
517193323Sed * set here.  Noone else in the system should be aware of this so
518193323Sed * we use an explicit reference here.
519193323Sed */
520193323Sedextern	void (*vlan_input_p)(struct ifnet *, struct mbuf *);
521193323Sed
522193323Sed/* For if_link_state_change() eyes only... */
523193323Sedextern	void (*vlan_link_state_p)(struct ifnet *, int);
524193323Sed
525193323Sedstatic int
526193323Sedvlan_modevent(module_t mod, int type, void *data)
527193323Sed{
528193323Sed
529193323Sed	switch (type) {
530193323Sed	case MOD_LOAD:
531193323Sed		ifdetach_tag = EVENTHANDLER_REGISTER(ifnet_departure_event,
532193323Sed		    vlan_ifdetach, NULL, EVENTHANDLER_PRI_ANY);
533193323Sed		if (ifdetach_tag == NULL)
534193323Sed			return (ENOMEM);
535193323Sed		VLAN_LOCK_INIT();
536193323Sed		vlan_input_p = vlan_input;
537193323Sed		vlan_link_state_p = vlan_link_state;
538193323Sed		vlan_trunk_cap_p = vlan_trunk_capabilities;
539193323Sed		if_clone_attach(&vlan_cloner);
540193323Sed		break;
541193323Sed	case MOD_UNLOAD:
542193323Sed		if_clone_detach(&vlan_cloner);
543193323Sed		EVENTHANDLER_DEREGISTER(ifnet_departure_event, ifdetach_tag);
544193323Sed		vlan_input_p = NULL;
545193323Sed		vlan_link_state_p = NULL;
546193323Sed		vlan_trunk_cap_p = NULL;
547193323Sed		VLAN_LOCK_DESTROY();
548193323Sed		break;
549193323Sed	default:
550193323Sed		return (EOPNOTSUPP);
551193323Sed	}
552193323Sed	return (0);
553193323Sed}
554193323Sed
555193323Sedstatic moduledata_t vlan_mod = {
556193323Sed	"if_vlan",
557193323Sed	vlan_modevent,
558193323Sed	0
559193323Sed};
560193323Sed
561193323SedDECLARE_MODULE(if_vlan, vlan_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
562193323SedMODULE_VERSION(if_vlan, 3);
563193323SedMODULE_DEPEND(if_vlan, miibus, 1, 1, 1);
564193323Sed
565193323Sedstatic struct ifnet *
566193323Sedvlan_clone_match_ethertag(struct if_clone *ifc, const char *name, int *tag)
567193323Sed{
568193323Sed	const char *cp;
569193323Sed	struct ifnet *ifp;
570193323Sed	int t = 0;
571193323Sed
572193323Sed	/* Check for <etherif>.<vlan> style interface names. */
573193323Sed	IFNET_RLOCK();
574193323Sed	TAILQ_FOREACH(ifp, &ifnet, if_link) {
575193323Sed		if (ifp->if_type != IFT_ETHER)
576193323Sed			continue;
577193323Sed		if (strncmp(ifp->if_xname, name, strlen(ifp->if_xname)) != 0)
578193323Sed			continue;
579193323Sed		cp = name + strlen(ifp->if_xname);
580193323Sed		if (*cp != '.')
581193323Sed			continue;
582193323Sed		for(; *cp != '\0'; cp++) {
583193323Sed			if (*cp < '0' || *cp > '9')
584193323Sed				continue;
585193323Sed			t = (t * 10) + (*cp - '0');
586193323Sed		}
587193323Sed		if (tag != NULL)
588193323Sed			*tag = t;
589193323Sed		break;
590193323Sed	}
591193323Sed	IFNET_RUNLOCK();
592193323Sed
593193323Sed	return (ifp);
594193323Sed}
595193323Sed
596193323Sedstatic int
597193323Sedvlan_clone_match(struct if_clone *ifc, const char *name)
598193323Sed{
599193323Sed	const char *cp;
600193323Sed
601193323Sed	if (vlan_clone_match_ethertag(ifc, name, NULL) != NULL)
602193323Sed		return (1);
603193323Sed
604193323Sed	if (strncmp(VLANNAME, name, strlen(VLANNAME)) != 0)
605193323Sed		return (0);
606193323Sed	for (cp = name + 4; *cp != '\0'; cp++) {
607193323Sed		if (*cp < '0' || *cp > '9')
608193323Sed			return (0);
609193323Sed	}
610193323Sed
611193323Sed	return (1);
612193323Sed}
613193323Sed
614193323Sedstatic int
615193323Sedvlan_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
616193323Sed{
617193323Sed	char *dp;
618193323Sed	int wildcard;
619193323Sed	int unit;
620193323Sed	int error;
621193323Sed	int tag;
622193323Sed	int ethertag;
623193323Sed	struct ifvlan *ifv;
624193323Sed	struct ifnet *ifp;
625193323Sed	struct ifnet *p;
626193323Sed	struct vlanreq vlr;
627193323Sed	static const u_char eaddr[6];	/* 00:00:00:00:00:00 */
628193323Sed
629193323Sed	/*
630193323Sed	 * There are 3 (ugh) ways to specify the cloned device:
631193323Sed	 * o pass a parameter block with the clone request.
632193323Sed	 * o specify parameters in the text of the clone device name
633193323Sed	 * o specify no parameters and get an unattached device that
634193323Sed	 *   must be configured separately.
635193323Sed	 * The first technique is preferred; the latter two are
636193323Sed	 * supported for backwards compatibilty.
637193323Sed	 */
638193323Sed	if (params) {
639193323Sed		error = copyin(params, &vlr, sizeof(vlr));
640193323Sed		if (error)
641193323Sed			return error;
642193323Sed		p = ifunit(vlr.vlr_parent);
643193323Sed		if (p == NULL)
644193323Sed			return ENXIO;
645193323Sed		/*
646193323Sed		 * Don't let the caller set up a VLAN tag with
647193323Sed		 * anything except VLID bits.
648193323Sed		 */
649193323Sed		if (vlr.vlr_tag & ~EVL_VLID_MASK)
650193323Sed			return (EINVAL);
651193323Sed		error = ifc_name2unit(name, &unit);
652193323Sed		if (error != 0)
653193323Sed			return (error);
654193323Sed
655193323Sed		ethertag = 1;
656193323Sed		tag = vlr.vlr_tag;
657193323Sed		wildcard = (unit < 0);
658193323Sed	} else if ((p = vlan_clone_match_ethertag(ifc, name, &tag)) != NULL) {
659193323Sed		ethertag = 1;
660193323Sed		unit = -1;
661193323Sed		wildcard = 0;
662193323Sed
663193323Sed		/*
664193323Sed		 * Don't let the caller set up a VLAN tag with
665193323Sed		 * anything except VLID bits.
666193323Sed		 */
667193323Sed		if (tag & ~EVL_VLID_MASK)
668193323Sed			return (EINVAL);
669193323Sed	} else {
670193323Sed		ethertag = 0;
671193323Sed
672193323Sed		error = ifc_name2unit(name, &unit);
673193323Sed		if (error != 0)
674193323Sed			return (error);
675193323Sed
676193323Sed		wildcard = (unit < 0);
677193323Sed	}
678193323Sed
679193323Sed	error = ifc_alloc_unit(ifc, &unit);
680193323Sed	if (error != 0)
681193323Sed		return (error);
682193323Sed
683193323Sed	/* In the wildcard case, we need to update the name. */
684193323Sed	if (wildcard) {
685193323Sed		for (dp = name; *dp != '\0'; dp++);
686193323Sed		if (snprintf(dp, len - (dp-name), "%d", unit) >
687193323Sed		    len - (dp-name) - 1) {
688193323Sed			panic("%s: interface name too long", __func__);
689193323Sed		}
690193323Sed	}
691193323Sed
692193323Sed	ifv = malloc(sizeof(struct ifvlan), M_VLAN, M_WAITOK | M_ZERO);
693193323Sed	ifp = ifv->ifv_ifp = if_alloc(IFT_ETHER);
694193323Sed	if (ifp == NULL) {
695		ifc_free_unit(ifc, unit);
696		free(ifv, M_VLAN);
697		return (ENOSPC);
698	}
699	SLIST_INIT(&ifv->vlan_mc_listhead);
700
701	ifp->if_softc = ifv;
702	/*
703	 * Set the name manually rather than using if_initname because
704	 * we don't conform to the default naming convention for interfaces.
705	 */
706	strlcpy(ifp->if_xname, name, IFNAMSIZ);
707	ifp->if_dname = ifc->ifc_name;
708	ifp->if_dunit = unit;
709	/* NB: flags are not set here */
710	ifp->if_linkmib = &ifv->ifv_mib;
711	ifp->if_linkmiblen = sizeof(ifv->ifv_mib);
712	/* NB: mtu is not set here */
713
714	ifp->if_init = vlan_init;
715	ifp->if_start = vlan_start;
716	ifp->if_ioctl = vlan_ioctl;
717	ifp->if_snd.ifq_maxlen = ifqmaxlen;
718	ifp->if_flags = VLAN_IFFLAGS;
719	ether_ifattach(ifp, eaddr);
720	/* Now undo some of the damage... */
721	ifp->if_baudrate = 0;
722	ifp->if_type = IFT_L2VLAN;
723	ifp->if_hdrlen = ETHER_VLAN_ENCAP_LEN;
724
725	if (ethertag) {
726		error = vlan_config(ifv, p, tag);
727		if (error != 0) {
728			/*
729			 * Since we've partialy failed, we need to back
730			 * out all the way, otherwise userland could get
731			 * confused.  Thus, we destroy the interface.
732			 */
733			ether_ifdetach(ifp);
734			vlan_unconfig(ifp);
735			if_free_type(ifp, IFT_ETHER);
736			free(ifv, M_VLAN);
737
738			return (error);
739		}
740
741		/* Update flags on the parent, if necessary. */
742		vlan_setflags(ifp, 1);
743	}
744
745	return (0);
746}
747
748static int
749vlan_clone_destroy(struct if_clone *ifc, struct ifnet *ifp)
750{
751	struct ifvlan *ifv = ifp->if_softc;
752	int unit = ifp->if_dunit;
753
754	ether_ifdetach(ifp);	/* first, remove it from system-wide lists */
755	vlan_unconfig(ifp);	/* now it can be unconfigured and freed */
756	if_free_type(ifp, IFT_ETHER);
757	free(ifv, M_VLAN);
758	ifc_free_unit(ifc, unit);
759
760	return (0);
761}
762
763/*
764 * The ifp->if_init entry point for vlan(4) is a no-op.
765 */
766static void
767vlan_init(void *foo __unused)
768{
769}
770
771/*
772 * The if_start method for vlan(4) interface. It doesn't
773 * raises the IFF_DRV_OACTIVE flag, since it is called
774 * only from IFQ_HANDOFF() macro in ether_output_frame().
775 * If the interface queue is full, and vlan_start() is
776 * not called, the queue would never get emptied and
777 * interface would stall forever.
778 */
779static void
780vlan_start(struct ifnet *ifp)
781{
782	struct ifvlan *ifv;
783	struct ifnet *p;
784	struct mbuf *m;
785	int error;
786
787	ifv = ifp->if_softc;
788	p = PARENT(ifv);
789
790	for (;;) {
791		IF_DEQUEUE(&ifp->if_snd, m);
792		if (m == NULL)
793			break;
794		BPF_MTAP(ifp, m);
795
796		/*
797		 * Do not run parent's if_start() if the parent is not up,
798		 * or parent's driver will cause a system crash.
799		 */
800		if (!UP_AND_RUNNING(p)) {
801			m_freem(m);
802			ifp->if_collisions++;
803			continue;
804		}
805
806		/*
807		 * Pad the frame to the minimum size allowed if told to.
808		 * This option is in accord with IEEE Std 802.1Q, 2003 Ed.,
809		 * paragraph C.4.4.3.b.  It can help to work around buggy
810		 * bridges that violate paragraph C.4.4.3.a from the same
811		 * document, i.e., fail to pad short frames after untagging.
812		 * E.g., a tagged frame 66 bytes long (incl. FCS) is OK, but
813		 * untagging it will produce a 62-byte frame, which is a runt
814		 * and requires padding.  There are VLAN-enabled network
815		 * devices that just discard such runts instead or mishandle
816		 * them somehow.
817		 */
818		if (soft_pad) {
819			static char pad[8];	/* just zeros */
820			int n;
821
822			for (n = ETHERMIN + ETHER_HDR_LEN - m->m_pkthdr.len;
823			     n > 0; n -= sizeof(pad))
824				if (!m_append(m, min(n, sizeof(pad)), pad))
825					break;
826
827			if (n > 0) {
828				if_printf(ifp, "cannot pad short frame\n");
829				ifp->if_oerrors++;
830				m_freem(m);
831				continue;
832			}
833		}
834
835		/*
836		 * If underlying interface can do VLAN tag insertion itself,
837		 * just pass the packet along. However, we need some way to
838		 * tell the interface where the packet came from so that it
839		 * knows how to find the VLAN tag to use, so we attach a
840		 * packet tag that holds it.
841		 */
842		if (p->if_capenable & IFCAP_VLAN_HWTAGGING) {
843			m->m_pkthdr.ether_vtag = ifv->ifv_tag;
844			m->m_flags |= M_VLANTAG;
845		} else {
846			struct ether_vlan_header *evl;
847
848			M_PREPEND(m, ifv->ifv_encaplen, M_DONTWAIT);
849			if (m == NULL) {
850				if_printf(ifp,
851				    "unable to prepend VLAN header\n");
852				ifp->if_oerrors++;
853				continue;
854			}
855			/* M_PREPEND takes care of m_len, m_pkthdr.len for us */
856
857			if (m->m_len < sizeof(*evl)) {
858				m = m_pullup(m, sizeof(*evl));
859				if (m == NULL) {
860					if_printf(ifp,
861					    "cannot pullup VLAN header\n");
862					ifp->if_oerrors++;
863					continue;
864				}
865			}
866
867			/*
868			 * Transform the Ethernet header into an Ethernet header
869			 * with 802.1Q encapsulation.
870			 */
871			evl = mtod(m, struct ether_vlan_header *);
872			bcopy((char *)evl + ifv->ifv_encaplen,
873			      (char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN);
874			evl->evl_encap_proto = htons(ifv->ifv_proto);
875			evl->evl_tag = htons(ifv->ifv_tag);
876#ifdef DEBUG
877			printf("%s: %*D\n", __func__, (int)sizeof(*evl),
878			    (unsigned char *)evl, ":");
879#endif
880		}
881
882		/*
883		 * Send it, precisely as ether_output() would have.
884		 * We are already running at splimp.
885		 */
886		IFQ_HANDOFF(p, m, error);
887		if (!error)
888			ifp->if_opackets++;
889		else
890			ifp->if_oerrors++;
891	}
892}
893
894static void
895vlan_input(struct ifnet *ifp, struct mbuf *m)
896{
897	struct ifvlantrunk *trunk = ifp->if_vlantrunk;
898	struct ifvlan *ifv;
899	uint16_t tag;
900
901	KASSERT(trunk != NULL, ("%s: no trunk", __func__));
902
903	if (m->m_flags & M_VLANTAG) {
904		/*
905		 * Packet is tagged, but m contains a normal
906		 * Ethernet frame; the tag is stored out-of-band.
907		 */
908		tag = EVL_VLANOFTAG(m->m_pkthdr.ether_vtag);
909		m->m_flags &= ~M_VLANTAG;
910	} else {
911		struct ether_vlan_header *evl;
912
913		/*
914		 * Packet is tagged in-band as specified by 802.1q.
915		 */
916		switch (ifp->if_type) {
917		case IFT_ETHER:
918			if (m->m_len < sizeof(*evl) &&
919			    (m = m_pullup(m, sizeof(*evl))) == NULL) {
920				if_printf(ifp, "cannot pullup VLAN header\n");
921				return;
922			}
923			evl = mtod(m, struct ether_vlan_header *);
924			tag = EVL_VLANOFTAG(ntohs(evl->evl_tag));
925
926			/*
927			 * Remove the 802.1q header by copying the Ethernet
928			 * addresses over it and adjusting the beginning of
929			 * the data in the mbuf.  The encapsulated Ethernet
930			 * type field is already in place.
931			 */
932			bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN,
933			      ETHER_HDR_LEN - ETHER_TYPE_LEN);
934			m_adj(m, ETHER_VLAN_ENCAP_LEN);
935			break;
936
937		default:
938#ifdef INVARIANTS
939			panic("%s: %s has unsupported if_type %u",
940			      __func__, ifp->if_xname, ifp->if_type);
941#endif
942			m_freem(m);
943			ifp->if_noproto++;
944			return;
945		}
946	}
947
948	TRUNK_RLOCK(trunk);
949#ifdef VLAN_ARRAY
950	ifv = trunk->vlans[tag];
951#else
952	ifv = vlan_gethash(trunk, tag);
953#endif
954	if (ifv == NULL || !UP_AND_RUNNING(ifv->ifv_ifp)) {
955		TRUNK_RUNLOCK(trunk);
956		m_freem(m);
957		ifp->if_noproto++;
958		return;
959	}
960	TRUNK_RUNLOCK(trunk);
961
962	m->m_pkthdr.rcvif = ifv->ifv_ifp;
963	ifv->ifv_ifp->if_ipackets++;
964
965	/* Pass it back through the parent's input routine. */
966	(*ifp->if_input)(ifv->ifv_ifp, m);
967}
968
969static int
970vlan_config(struct ifvlan *ifv, struct ifnet *p, uint16_t tag)
971{
972	struct ifvlantrunk *trunk;
973	struct ifnet *ifp;
974	int error = 0;
975
976	/* VID numbers 0x0 and 0xFFF are reserved */
977	if (tag == 0 || tag == 0xFFF)
978		return (EINVAL);
979	if (p->if_type != IFT_ETHER)
980		return (EPROTONOSUPPORT);
981	if ((p->if_flags & VLAN_IFFLAGS) != VLAN_IFFLAGS)
982		return (EPROTONOSUPPORT);
983	if (ifv->ifv_trunk)
984		return (EBUSY);
985
986	if (p->if_vlantrunk == NULL) {
987		trunk = malloc(sizeof(struct ifvlantrunk),
988		    M_VLAN, M_WAITOK | M_ZERO);
989#ifndef VLAN_ARRAY
990		vlan_inithash(trunk);
991#endif
992		VLAN_LOCK();
993		if (p->if_vlantrunk != NULL) {
994			/* A race that that is very unlikely to be hit. */
995#ifndef VLAN_ARRAY
996			vlan_freehash(trunk);
997#endif
998			free(trunk, M_VLAN);
999			goto exists;
1000		}
1001		TRUNK_LOCK_INIT(trunk);
1002		TRUNK_LOCK(trunk);
1003		p->if_vlantrunk = trunk;
1004		trunk->parent = p;
1005	} else {
1006		VLAN_LOCK();
1007exists:
1008		trunk = p->if_vlantrunk;
1009		TRUNK_LOCK(trunk);
1010	}
1011
1012	ifv->ifv_tag = tag;	/* must set this before vlan_inshash() */
1013#ifdef VLAN_ARRAY
1014	if (trunk->vlans[tag] != NULL) {
1015		error = EEXIST;
1016		goto done;
1017	}
1018	trunk->vlans[tag] = ifv;
1019	trunk->refcnt++;
1020#else
1021	error = vlan_inshash(trunk, ifv);
1022	if (error)
1023		goto done;
1024#endif
1025	ifv->ifv_proto = ETHERTYPE_VLAN;
1026	ifv->ifv_encaplen = ETHER_VLAN_ENCAP_LEN;
1027	ifv->ifv_mintu = ETHERMIN;
1028	ifv->ifv_pflags = 0;
1029
1030	/*
1031	 * If the parent supports the VLAN_MTU capability,
1032	 * i.e. can Tx/Rx larger than ETHER_MAX_LEN frames,
1033	 * use it.
1034	 */
1035	if (p->if_capenable & IFCAP_VLAN_MTU) {
1036		/*
1037		 * No need to fudge the MTU since the parent can
1038		 * handle extended frames.
1039		 */
1040		ifv->ifv_mtufudge = 0;
1041	} else {
1042		/*
1043		 * Fudge the MTU by the encapsulation size.  This
1044		 * makes us incompatible with strictly compliant
1045		 * 802.1Q implementations, but allows us to use
1046		 * the feature with other NetBSD implementations,
1047		 * which might still be useful.
1048		 */
1049		ifv->ifv_mtufudge = ifv->ifv_encaplen;
1050	}
1051
1052	ifv->ifv_trunk = trunk;
1053	ifp = ifv->ifv_ifp;
1054	ifp->if_mtu = p->if_mtu - ifv->ifv_mtufudge;
1055	ifp->if_baudrate = p->if_baudrate;
1056	/*
1057	 * Copy only a selected subset of flags from the parent.
1058	 * Other flags are none of our business.
1059	 */
1060#define VLAN_COPY_FLAGS (IFF_SIMPLEX)
1061	ifp->if_flags &= ~VLAN_COPY_FLAGS;
1062	ifp->if_flags |= p->if_flags & VLAN_COPY_FLAGS;
1063#undef VLAN_COPY_FLAGS
1064
1065	ifp->if_link_state = p->if_link_state;
1066
1067	vlan_capabilities(ifv);
1068
1069	/*
1070	 * Set up our ``Ethernet address'' to reflect the underlying
1071	 * physical interface's.
1072	 */
1073	bcopy(IF_LLADDR(p), IF_LLADDR(ifp), ETHER_ADDR_LEN);
1074
1075	/*
1076	 * Configure multicast addresses that may already be
1077	 * joined on the vlan device.
1078	 */
1079	(void)vlan_setmulti(ifp); /* XXX: VLAN lock held */
1080
1081	/* We are ready for operation now. */
1082	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1083done:
1084	TRUNK_UNLOCK(trunk);
1085	VLAN_UNLOCK();
1086
1087	return (error);
1088}
1089
1090static int
1091vlan_unconfig(struct ifnet *ifp)
1092{
1093	int ret;
1094
1095	VLAN_LOCK();
1096	ret = vlan_unconfig_locked(ifp);
1097	VLAN_UNLOCK();
1098	return (ret);
1099}
1100
1101static int
1102vlan_unconfig_locked(struct ifnet *ifp)
1103{
1104	struct ifvlantrunk *trunk;
1105	struct vlan_mc_entry *mc;
1106	struct ifvlan *ifv;
1107	int error;
1108
1109	VLAN_LOCK_ASSERT();
1110
1111	ifv = ifp->if_softc;
1112	trunk = ifv->ifv_trunk;
1113
1114	if (trunk) {
1115		struct sockaddr_dl sdl;
1116		struct ifnet *p = trunk->parent;
1117
1118		TRUNK_LOCK(trunk);
1119
1120		/*
1121		 * Since the interface is being unconfigured, we need to
1122		 * empty the list of multicast groups that we may have joined
1123		 * while we were alive from the parent's list.
1124		 */
1125		bzero((char *)&sdl, sizeof(sdl));
1126		sdl.sdl_len = sizeof(sdl);
1127		sdl.sdl_family = AF_LINK;
1128		sdl.sdl_index = p->if_index;
1129		sdl.sdl_type = IFT_ETHER;
1130		sdl.sdl_alen = ETHER_ADDR_LEN;
1131
1132		while ((mc = SLIST_FIRST(&ifv->vlan_mc_listhead)) != NULL) {
1133			bcopy((char *)&mc->mc_addr, LLADDR(&sdl),
1134			    ETHER_ADDR_LEN);
1135			error = if_delmulti(p, (struct sockaddr *)&sdl);
1136			if (error)
1137				return (error);
1138			SLIST_REMOVE_HEAD(&ifv->vlan_mc_listhead, mc_entries);
1139			free(mc, M_VLAN);
1140		}
1141
1142		vlan_setflags(ifp, 0); /* clear special flags on parent */
1143#ifdef VLAN_ARRAY
1144		trunk->vlans[ifv->ifv_tag] = NULL;
1145		trunk->refcnt--;
1146#else
1147		vlan_remhash(trunk, ifv);
1148#endif
1149		ifv->ifv_trunk = NULL;
1150
1151		/*
1152		 * Check if we were the last.
1153		 */
1154		if (trunk->refcnt == 0) {
1155			trunk->parent->if_vlantrunk = NULL;
1156			/*
1157			 * XXXGL: If some ithread has already entered
1158			 * vlan_input() and is now blocked on the trunk
1159			 * lock, then it should preempt us right after
1160			 * unlock and finish its work. Then we will acquire
1161			 * lock again in trunk_destroy().
1162			 */
1163			TRUNK_UNLOCK(trunk);
1164			trunk_destroy(trunk);
1165		} else
1166			TRUNK_UNLOCK(trunk);
1167	}
1168
1169	/* Disconnect from parent. */
1170	if (ifv->ifv_pflags)
1171		if_printf(ifp, "%s: ifv_pflags unclean\n", __func__);
1172	ifp->if_mtu = ETHERMTU;
1173	ifp->if_link_state = LINK_STATE_UNKNOWN;
1174	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1175
1176	return (0);
1177}
1178
1179/* Handle a reference counted flag that should be set on the parent as well */
1180static int
1181vlan_setflag(struct ifnet *ifp, int flag, int status,
1182	     int (*func)(struct ifnet *, int))
1183{
1184	struct ifvlan *ifv;
1185	int error;
1186
1187	/* XXX VLAN_LOCK_ASSERT(); */
1188
1189	ifv = ifp->if_softc;
1190	status = status ? (ifp->if_flags & flag) : 0;
1191	/* Now "status" contains the flag value or 0 */
1192
1193	/*
1194	 * See if recorded parent's status is different from what
1195	 * we want it to be.  If it is, flip it.  We record parent's
1196	 * status in ifv_pflags so that we won't clear parent's flag
1197	 * we haven't set.  In fact, we don't clear or set parent's
1198	 * flags directly, but get or release references to them.
1199	 * That's why we can be sure that recorded flags still are
1200	 * in accord with actual parent's flags.
1201	 */
1202	if (status != (ifv->ifv_pflags & flag)) {
1203		error = (*func)(PARENT(ifv), status);
1204		if (error)
1205			return (error);
1206		ifv->ifv_pflags &= ~flag;
1207		ifv->ifv_pflags |= status;
1208	}
1209	return (0);
1210}
1211
1212/*
1213 * Handle IFF_* flags that require certain changes on the parent:
1214 * if "status" is true, update parent's flags respective to our if_flags;
1215 * if "status" is false, forcedly clear the flags set on parent.
1216 */
1217static int
1218vlan_setflags(struct ifnet *ifp, int status)
1219{
1220	int error, i;
1221
1222	for (i = 0; vlan_pflags[i].flag; i++) {
1223		error = vlan_setflag(ifp, vlan_pflags[i].flag,
1224				     status, vlan_pflags[i].func);
1225		if (error)
1226			return (error);
1227	}
1228	return (0);
1229}
1230
1231/* Inform all vlans that their parent has changed link state */
1232static void
1233vlan_link_state(struct ifnet *ifp, int link)
1234{
1235	struct ifvlantrunk *trunk = ifp->if_vlantrunk;
1236	struct ifvlan *ifv;
1237	int i;
1238
1239	TRUNK_LOCK(trunk);
1240#ifdef VLAN_ARRAY
1241	for (i = 0; i < VLAN_ARRAY_SIZE; i++)
1242		if (trunk->vlans[i] != NULL) {
1243			ifv = trunk->vlans[i];
1244#else
1245	for (i = 0; i < (1 << trunk->hwidth); i++)
1246		LIST_FOREACH(ifv, &trunk->hash[i], ifv_list) {
1247#endif
1248			ifv->ifv_ifp->if_baudrate = trunk->parent->if_baudrate;
1249			if_link_state_change(ifv->ifv_ifp,
1250			    trunk->parent->if_link_state);
1251		}
1252	TRUNK_UNLOCK(trunk);
1253}
1254
1255static void
1256vlan_capabilities(struct ifvlan *ifv)
1257{
1258	struct ifnet *p = PARENT(ifv);
1259	struct ifnet *ifp = ifv->ifv_ifp;
1260
1261	TRUNK_LOCK_ASSERT(TRUNK(ifv));
1262
1263	/*
1264	 * If the parent interface can do checksum offloading
1265	 * on VLANs, then propagate its hardware-assisted
1266	 * checksumming flags. Also assert that checksum
1267	 * offloading requires hardware VLAN tagging.
1268	 */
1269	if (p->if_capabilities & IFCAP_VLAN_HWCSUM)
1270		ifp->if_capabilities = p->if_capabilities & IFCAP_HWCSUM;
1271
1272	if (p->if_capenable & IFCAP_VLAN_HWCSUM &&
1273	    p->if_capenable & IFCAP_VLAN_HWTAGGING) {
1274		ifp->if_capenable = p->if_capenable & IFCAP_HWCSUM;
1275		ifp->if_hwassist = p->if_hwassist;
1276	} else {
1277		ifp->if_capenable = 0;
1278		ifp->if_hwassist = 0;
1279	}
1280}
1281
1282static void
1283vlan_trunk_capabilities(struct ifnet *ifp)
1284{
1285	struct ifvlantrunk *trunk = ifp->if_vlantrunk;
1286	struct ifvlan *ifv;
1287	int i;
1288
1289	TRUNK_LOCK(trunk);
1290#ifdef VLAN_ARRAY
1291	for (i = 0; i < VLAN_ARRAY_SIZE; i++)
1292		if (trunk->vlans[i] != NULL) {
1293			ifv = trunk->vlans[i];
1294#else
1295	for (i = 0; i < (1 << trunk->hwidth); i++) {
1296		LIST_FOREACH(ifv, &trunk->hash[i], ifv_list)
1297#endif
1298			vlan_capabilities(ifv);
1299	}
1300	TRUNK_UNLOCK(trunk);
1301}
1302
1303static int
1304vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
1305{
1306	struct ifaddr *ifa;
1307	struct ifnet *p;
1308	struct ifreq *ifr;
1309	struct ifvlan *ifv;
1310	struct vlanreq vlr;
1311	int error = 0;
1312
1313	ifr = (struct ifreq *)data;
1314	ifa = (struct ifaddr *)data;
1315	ifv = ifp->if_softc;
1316
1317	switch (cmd) {
1318	case SIOCSIFADDR:
1319		ifp->if_flags |= IFF_UP;
1320
1321		switch (ifa->ifa_addr->sa_family) {
1322#ifdef INET
1323		case AF_INET:
1324			arp_ifinit(ifv->ifv_ifp, ifa);
1325			break;
1326#endif
1327		default:
1328			break;
1329		}
1330		break;
1331
1332	case SIOCGIFADDR:
1333		{
1334			struct sockaddr *sa;
1335
1336			sa = (struct sockaddr *) &ifr->ifr_data;
1337			bcopy(IF_LLADDR(ifp), (caddr_t)sa->sa_data,
1338			    ETHER_ADDR_LEN);
1339		}
1340		break;
1341
1342	case SIOCGIFMEDIA:
1343		VLAN_LOCK();
1344		if (TRUNK(ifv) != NULL) {
1345			error = (*PARENT(ifv)->if_ioctl)(PARENT(ifv),
1346					SIOCGIFMEDIA, data);
1347			VLAN_UNLOCK();
1348			/* Limit the result to the parent's current config. */
1349			if (error == 0) {
1350				struct ifmediareq *ifmr;
1351
1352				ifmr = (struct ifmediareq *)data;
1353				if (ifmr->ifm_count >= 1 && ifmr->ifm_ulist) {
1354					ifmr->ifm_count = 1;
1355					error = copyout(&ifmr->ifm_current,
1356						ifmr->ifm_ulist,
1357						sizeof(int));
1358				}
1359			}
1360		} else {
1361			VLAN_UNLOCK();
1362			error = EINVAL;
1363		}
1364		break;
1365
1366	case SIOCSIFMEDIA:
1367		error = EINVAL;
1368		break;
1369
1370	case SIOCSIFMTU:
1371		/*
1372		 * Set the interface MTU.
1373		 */
1374		VLAN_LOCK();
1375		if (TRUNK(ifv) != NULL) {
1376			if (ifr->ifr_mtu >
1377			     (PARENT(ifv)->if_mtu - ifv->ifv_mtufudge) ||
1378			    ifr->ifr_mtu <
1379			     (ifv->ifv_mintu - ifv->ifv_mtufudge))
1380				error = EINVAL;
1381			else
1382				ifp->if_mtu = ifr->ifr_mtu;
1383		} else
1384			error = EINVAL;
1385		VLAN_UNLOCK();
1386		break;
1387
1388	case SIOCSETVLAN:
1389		error = copyin(ifr->ifr_data, &vlr, sizeof(vlr));
1390		if (error)
1391			break;
1392		if (vlr.vlr_parent[0] == '\0') {
1393			vlan_unconfig(ifp);
1394			break;
1395		}
1396		p = ifunit(vlr.vlr_parent);
1397		if (p == 0) {
1398			error = ENOENT;
1399			break;
1400		}
1401		/*
1402		 * Don't let the caller set up a VLAN tag with
1403		 * anything except VLID bits.
1404		 */
1405		if (vlr.vlr_tag & ~EVL_VLID_MASK) {
1406			error = EINVAL;
1407			break;
1408		}
1409		error = vlan_config(ifv, p, vlr.vlr_tag);
1410		if (error)
1411			break;
1412
1413		/* Update flags on the parent, if necessary. */
1414		vlan_setflags(ifp, 1);
1415		break;
1416
1417	case SIOCGETVLAN:
1418		bzero(&vlr, sizeof(vlr));
1419		VLAN_LOCK();
1420		if (TRUNK(ifv) != NULL) {
1421			strlcpy(vlr.vlr_parent, PARENT(ifv)->if_xname,
1422			    sizeof(vlr.vlr_parent));
1423			vlr.vlr_tag = ifv->ifv_tag;
1424		}
1425		VLAN_UNLOCK();
1426		error = copyout(&vlr, ifr->ifr_data, sizeof(vlr));
1427		break;
1428
1429	case SIOCSIFFLAGS:
1430		/*
1431		 * We should propagate selected flags to the parent,
1432		 * e.g., promiscuous mode.
1433		 */
1434		if (TRUNK(ifv) != NULL)
1435			error = vlan_setflags(ifp, 1);
1436		break;
1437
1438	case SIOCADDMULTI:
1439	case SIOCDELMULTI:
1440		/*
1441		 * If we don't have a parent, just remember the membership for
1442		 * when we do.
1443		 */
1444		if (TRUNK(ifv) != NULL)
1445			error = vlan_setmulti(ifp);
1446		break;
1447
1448	default:
1449		error = EINVAL;
1450	}
1451
1452	return (error);
1453}
1454