if_bridge.c revision 1.139
1/*	$NetBSD: if_bridge.c,v 1.139 2017/11/15 06:17:40 ozaki-r Exp $	*/
2
3/*
4 * Copyright 2001 Wasabi Systems, Inc.
5 * All rights reserved.
6 *
7 * Written by Jason R. Thorpe for Wasabi Systems, Inc.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 *    must display the following acknowledgement:
19 *	This product includes software developed for the NetBSD Project by
20 *	Wasabi Systems, Inc.
21 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
22 *    or promote products derived from this software without specific prior
23 *    written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38/*
39 * Copyright (c) 1999, 2000 Jason L. Wright (jason@thought.net)
40 * All rights reserved.
41 *
42 * Redistribution and use in source and binary forms, with or without
43 * modification, are permitted provided that the following conditions
44 * are met:
45 * 1. Redistributions of source code must retain the above copyright
46 *    notice, this list of conditions and the following disclaimer.
47 * 2. Redistributions in binary form must reproduce the above copyright
48 *    notice, this list of conditions and the following disclaimer in the
49 *    documentation and/or other materials provided with the distribution.
50 * 3. All advertising materials mentioning features or use of this software
51 *    must display the following acknowledgement:
52 *	This product includes software developed by Jason L. Wright
53 * 4. The name of the author may not be used to endorse or promote products
54 *    derived from this software without specific prior written permission.
55 *
56 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
57 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
58 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
59 * DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
60 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
61 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
62 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
63 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
64 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
65 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
66 * POSSIBILITY OF SUCH DAMAGE.
67 *
68 * OpenBSD: if_bridge.c,v 1.60 2001/06/15 03:38:33 itojun Exp
69 */
70
71/*
72 * Network interface bridge support.
73 *
74 * TODO:
75 *
76 *	- Currently only supports Ethernet-like interfaces (Ethernet,
77 *	  802.11, VLANs on Ethernet, etc.)  Figure out a nice way
78 *	  to bridge other types of interfaces (FDDI-FDDI, and maybe
79 *	  consider heterogenous bridges).
80 */
81
82#include <sys/cdefs.h>
83__KERNEL_RCSID(0, "$NetBSD: if_bridge.c,v 1.139 2017/11/15 06:17:40 ozaki-r Exp $");
84
85#ifdef _KERNEL_OPT
86#include "opt_bridge_ipf.h"
87#include "opt_inet.h"
88#include "opt_net_mpsafe.h"
89#endif /* _KERNEL_OPT */
90
91#include <sys/param.h>
92#include <sys/kernel.h>
93#include <sys/mbuf.h>
94#include <sys/queue.h>
95#include <sys/socket.h>
96#include <sys/socketvar.h> /* for softnet_lock */
97#include <sys/sockio.h>
98#include <sys/systm.h>
99#include <sys/proc.h>
100#include <sys/pool.h>
101#include <sys/kauth.h>
102#include <sys/cpu.h>
103#include <sys/cprng.h>
104#include <sys/mutex.h>
105#include <sys/kmem.h>
106
107#include <net/bpf.h>
108#include <net/if.h>
109#include <net/if_dl.h>
110#include <net/if_types.h>
111#include <net/if_llc.h>
112
113#include <net/if_ether.h>
114#include <net/if_bridgevar.h>
115
116#if defined(BRIDGE_IPF)
117/* Used for bridge_ip[6]_checkbasic */
118#include <netinet/in.h>
119#include <netinet/in_systm.h>
120#include <netinet/ip.h>
121#include <netinet/ip_var.h>
122#include <netinet/ip_private.h>		/* XXX */
123
124#include <netinet/ip6.h>
125#include <netinet6/in6_var.h>
126#include <netinet6/ip6_var.h>
127#include <netinet6/ip6_private.h>	/* XXX */
128#endif /* BRIDGE_IPF */
129
130/*
131 * Size of the route hash table.  Must be a power of two.
132 */
133#ifndef BRIDGE_RTHASH_SIZE
134#define	BRIDGE_RTHASH_SIZE		1024
135#endif
136
137#define	BRIDGE_RTHASH_MASK		(BRIDGE_RTHASH_SIZE - 1)
138
139#include "carp.h"
140#if NCARP > 0
141#include <netinet/in.h>
142#include <netinet/in_var.h>
143#include <netinet/ip_carp.h>
144#endif
145
146#include "ioconf.h"
147
148__CTASSERT(sizeof(struct ifbifconf) == sizeof(struct ifbaconf));
149__CTASSERT(offsetof(struct ifbifconf, ifbic_len) == offsetof(struct ifbaconf, ifbac_len));
150__CTASSERT(offsetof(struct ifbifconf, ifbic_buf) == offsetof(struct ifbaconf, ifbac_buf));
151
152/*
153 * Maximum number of addresses to cache.
154 */
155#ifndef BRIDGE_RTABLE_MAX
156#define	BRIDGE_RTABLE_MAX		100
157#endif
158
159/*
160 * Spanning tree defaults.
161 */
162#define	BSTP_DEFAULT_MAX_AGE		(20 * 256)
163#define	BSTP_DEFAULT_HELLO_TIME		(2 * 256)
164#define	BSTP_DEFAULT_FORWARD_DELAY	(15 * 256)
165#define	BSTP_DEFAULT_HOLD_TIME		(1 * 256)
166#define	BSTP_DEFAULT_BRIDGE_PRIORITY	0x8000
167#define	BSTP_DEFAULT_PORT_PRIORITY	0x80
168#define	BSTP_DEFAULT_PATH_COST		55
169
170/*
171 * Timeout (in seconds) for entries learned dynamically.
172 */
173#ifndef BRIDGE_RTABLE_TIMEOUT
174#define	BRIDGE_RTABLE_TIMEOUT		(20 * 60)	/* same as ARP */
175#endif
176
177/*
178 * Number of seconds between walks of the route list.
179 */
180#ifndef BRIDGE_RTABLE_PRUNE_PERIOD
181#define	BRIDGE_RTABLE_PRUNE_PERIOD	(5 * 60)
182#endif
183
184#define BRIDGE_RT_LOCK(_sc)	if ((_sc)->sc_rtlist_lock) \
185					mutex_enter((_sc)->sc_rtlist_lock)
186#define BRIDGE_RT_UNLOCK(_sc)	if ((_sc)->sc_rtlist_lock) \
187					mutex_exit((_sc)->sc_rtlist_lock)
188#define BRIDGE_RT_LOCKED(_sc)	(!(_sc)->sc_rtlist_lock || \
189				 mutex_owned((_sc)->sc_rtlist_lock))
190
191#define BRIDGE_RT_PSZ_PERFORM(_sc) \
192				if ((_sc)->sc_rtlist_psz != NULL) \
193					pserialize_perform((_sc)->sc_rtlist_psz);
194
195#define BRIDGE_RT_RENTER(__s)	do { __s = pserialize_read_enter(); } while (0)
196#define BRIDGE_RT_REXIT(__s)	do { pserialize_read_exit(__s); } while (0)
197
198
199#ifdef NET_MPSAFE
200#define DECLARE_LOCK_VARIABLE
201#define ACQUIRE_GLOBAL_LOCKS()	do { } while (0)
202#define RELEASE_GLOBAL_LOCKS()	do { } while (0)
203#else
204#define DECLARE_LOCK_VARIABLE	int __s
205#define ACQUIRE_GLOBAL_LOCKS()	do {					\
206					KERNEL_LOCK(1, NULL);		\
207					mutex_enter(softnet_lock);	\
208					__s = splsoftnet();		\
209				} while (0)
210#define RELEASE_GLOBAL_LOCKS()	do {					\
211					splx(__s);			\
212					mutex_exit(softnet_lock);	\
213					KERNEL_UNLOCK_ONE(NULL);	\
214				} while (0)
215#endif
216
217struct psref_class *bridge_psref_class __read_mostly;
218
219int	bridge_rtable_prune_period = BRIDGE_RTABLE_PRUNE_PERIOD;
220
221static struct pool bridge_rtnode_pool;
222
223static int	bridge_clone_create(struct if_clone *, int);
224static int	bridge_clone_destroy(struct ifnet *);
225
226static int	bridge_ioctl(struct ifnet *, u_long, void *);
227static int	bridge_init(struct ifnet *);
228static void	bridge_stop(struct ifnet *, int);
229static void	bridge_start(struct ifnet *);
230
231static void	bridge_input(struct ifnet *, struct mbuf *);
232static void	bridge_forward(struct bridge_softc *, struct mbuf *);
233
234static void	bridge_timer(void *);
235
236static void	bridge_broadcast(struct bridge_softc *, struct ifnet *,
237				 struct mbuf *);
238
239static int	bridge_rtupdate(struct bridge_softc *, const uint8_t *,
240				struct ifnet *, int, uint8_t);
241static struct ifnet *bridge_rtlookup(struct bridge_softc *, const uint8_t *);
242static void	bridge_rttrim(struct bridge_softc *);
243static void	bridge_rtage(struct bridge_softc *);
244static void	bridge_rtage_work(struct work *, void *);
245static void	bridge_rtflush(struct bridge_softc *, int);
246static int	bridge_rtdaddr(struct bridge_softc *, const uint8_t *);
247static void	bridge_rtdelete(struct bridge_softc *, struct ifnet *ifp);
248
249static void	bridge_rtable_init(struct bridge_softc *);
250static void	bridge_rtable_fini(struct bridge_softc *);
251
252static struct bridge_rtnode *bridge_rtnode_lookup(struct bridge_softc *,
253						  const uint8_t *);
254static int	bridge_rtnode_insert(struct bridge_softc *,
255				     struct bridge_rtnode *);
256static void	bridge_rtnode_remove(struct bridge_softc *,
257				     struct bridge_rtnode *);
258static void	bridge_rtnode_destroy(struct bridge_rtnode *);
259
260static struct bridge_iflist *bridge_lookup_member(struct bridge_softc *,
261						  const char *name,
262						  struct psref *);
263static struct bridge_iflist *bridge_lookup_member_if(struct bridge_softc *,
264						     struct ifnet *ifp,
265						     struct psref *);
266static void	bridge_release_member(struct bridge_softc *, struct bridge_iflist *,
267                                      struct psref *);
268static void	bridge_delete_member(struct bridge_softc *,
269				     struct bridge_iflist *);
270static void	bridge_acquire_member(struct bridge_softc *sc,
271                                      struct bridge_iflist *,
272                                      struct psref *);
273
274static int	bridge_ioctl_add(struct bridge_softc *, void *);
275static int	bridge_ioctl_del(struct bridge_softc *, void *);
276static int	bridge_ioctl_gifflags(struct bridge_softc *, void *);
277static int	bridge_ioctl_sifflags(struct bridge_softc *, void *);
278static int	bridge_ioctl_scache(struct bridge_softc *, void *);
279static int	bridge_ioctl_gcache(struct bridge_softc *, void *);
280static int	bridge_ioctl_gifs(struct bridge_softc *, void *);
281static int	bridge_ioctl_rts(struct bridge_softc *, void *);
282static int	bridge_ioctl_saddr(struct bridge_softc *, void *);
283static int	bridge_ioctl_sto(struct bridge_softc *, void *);
284static int	bridge_ioctl_gto(struct bridge_softc *, void *);
285static int	bridge_ioctl_daddr(struct bridge_softc *, void *);
286static int	bridge_ioctl_flush(struct bridge_softc *, void *);
287static int	bridge_ioctl_gpri(struct bridge_softc *, void *);
288static int	bridge_ioctl_spri(struct bridge_softc *, void *);
289static int	bridge_ioctl_ght(struct bridge_softc *, void *);
290static int	bridge_ioctl_sht(struct bridge_softc *, void *);
291static int	bridge_ioctl_gfd(struct bridge_softc *, void *);
292static int	bridge_ioctl_sfd(struct bridge_softc *, void *);
293static int	bridge_ioctl_gma(struct bridge_softc *, void *);
294static int	bridge_ioctl_sma(struct bridge_softc *, void *);
295static int	bridge_ioctl_sifprio(struct bridge_softc *, void *);
296static int	bridge_ioctl_sifcost(struct bridge_softc *, void *);
297#if defined(BRIDGE_IPF)
298static int	bridge_ioctl_gfilt(struct bridge_softc *, void *);
299static int	bridge_ioctl_sfilt(struct bridge_softc *, void *);
300static int	bridge_ipf(void *, struct mbuf **, struct ifnet *, int);
301static int	bridge_ip_checkbasic(struct mbuf **mp);
302# ifdef INET6
303static int	bridge_ip6_checkbasic(struct mbuf **mp);
304# endif /* INET6 */
305#endif /* BRIDGE_IPF */
306
307struct bridge_control {
308	int	(*bc_func)(struct bridge_softc *, void *);
309	int	bc_argsize;
310	int	bc_flags;
311};
312
313#define	BC_F_COPYIN		0x01	/* copy arguments in */
314#define	BC_F_COPYOUT		0x02	/* copy arguments out */
315#define	BC_F_SUSER		0x04	/* do super-user check */
316#define BC_F_XLATEIN		0x08	/* xlate arguments in */
317#define BC_F_XLATEOUT		0x10	/* xlate arguments out */
318
319static const struct bridge_control bridge_control_table[] = {
320[BRDGADD] = {bridge_ioctl_add, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_SUSER},
321[BRDGDEL] = {bridge_ioctl_del, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_SUSER},
322
323[BRDGGIFFLGS] = {bridge_ioctl_gifflags, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_COPYOUT},
324[BRDGSIFFLGS] = {bridge_ioctl_sifflags, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_SUSER},
325
326[BRDGSCACHE] = {bridge_ioctl_scache, sizeof(struct ifbrparam), BC_F_COPYIN|BC_F_SUSER},
327[BRDGGCACHE] = {bridge_ioctl_gcache, sizeof(struct ifbrparam), BC_F_COPYOUT},
328
329[OBRDGGIFS] = {bridge_ioctl_gifs, sizeof(struct ifbifconf), BC_F_COPYIN|BC_F_COPYOUT},
330[OBRDGRTS] = {bridge_ioctl_rts, sizeof(struct ifbaconf), BC_F_COPYIN|BC_F_COPYOUT},
331
332[BRDGSADDR] = {bridge_ioctl_saddr, sizeof(struct ifbareq), BC_F_COPYIN|BC_F_SUSER},
333
334[BRDGSTO] = {bridge_ioctl_sto, sizeof(struct ifbrparam), BC_F_COPYIN|BC_F_SUSER},
335[BRDGGTO] = {bridge_ioctl_gto, sizeof(struct ifbrparam), BC_F_COPYOUT},
336
337[BRDGDADDR] = {bridge_ioctl_daddr, sizeof(struct ifbareq), BC_F_COPYIN|BC_F_SUSER},
338
339[BRDGFLUSH] = {bridge_ioctl_flush, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_SUSER},
340
341[BRDGGPRI] = {bridge_ioctl_gpri, sizeof(struct ifbrparam), BC_F_COPYOUT},
342[BRDGSPRI] = {bridge_ioctl_spri, sizeof(struct ifbrparam), BC_F_COPYIN|BC_F_SUSER},
343
344[BRDGGHT] = {bridge_ioctl_ght, sizeof(struct ifbrparam), BC_F_COPYOUT},
345[BRDGSHT] = {bridge_ioctl_sht, sizeof(struct ifbrparam), BC_F_COPYIN|BC_F_SUSER},
346
347[BRDGGFD] = {bridge_ioctl_gfd, sizeof(struct ifbrparam), BC_F_COPYOUT},
348[BRDGSFD] = {bridge_ioctl_sfd, sizeof(struct ifbrparam), BC_F_COPYIN|BC_F_SUSER},
349
350[BRDGGMA] = {bridge_ioctl_gma, sizeof(struct ifbrparam), BC_F_COPYOUT},
351[BRDGSMA] = {bridge_ioctl_sma, sizeof(struct ifbrparam), BC_F_COPYIN|BC_F_SUSER},
352
353[BRDGSIFPRIO] = {bridge_ioctl_sifprio, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_SUSER},
354
355[BRDGSIFCOST] = {bridge_ioctl_sifcost, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_SUSER},
356#if defined(BRIDGE_IPF)
357[BRDGGFILT] = {bridge_ioctl_gfilt, sizeof(struct ifbrparam), BC_F_COPYOUT},
358[BRDGSFILT] = {bridge_ioctl_sfilt, sizeof(struct ifbrparam), BC_F_COPYIN|BC_F_SUSER},
359#endif /* BRIDGE_IPF */
360[BRDGGIFS] = {bridge_ioctl_gifs, sizeof(struct ifbifconf), BC_F_XLATEIN|BC_F_XLATEOUT},
361[BRDGRTS] = {bridge_ioctl_rts, sizeof(struct ifbaconf), BC_F_XLATEIN|BC_F_XLATEOUT},
362};
363
364static const int bridge_control_table_size = __arraycount(bridge_control_table);
365
366static struct if_clone bridge_cloner =
367    IF_CLONE_INITIALIZER("bridge", bridge_clone_create, bridge_clone_destroy);
368
369/*
370 * bridgeattach:
371 *
372 *	Pseudo-device attach routine.
373 */
374void
375bridgeattach(int n)
376{
377
378	pool_init(&bridge_rtnode_pool, sizeof(struct bridge_rtnode),
379	    0, 0, 0, "brtpl", NULL, IPL_NET);
380
381	bridge_psref_class = psref_class_create("bridge", IPL_SOFTNET);
382
383	if_clone_attach(&bridge_cloner);
384}
385
386/*
387 * bridge_clone_create:
388 *
389 *	Create a new bridge instance.
390 */
391static int
392bridge_clone_create(struct if_clone *ifc, int unit)
393{
394	struct bridge_softc *sc;
395	struct ifnet *ifp;
396	int error;
397
398	sc = kmem_zalloc(sizeof(*sc),  KM_SLEEP);
399	ifp = &sc->sc_if;
400
401	sc->sc_brtmax = BRIDGE_RTABLE_MAX;
402	sc->sc_brttimeout = BRIDGE_RTABLE_TIMEOUT;
403	sc->sc_bridge_max_age = BSTP_DEFAULT_MAX_AGE;
404	sc->sc_bridge_hello_time = BSTP_DEFAULT_HELLO_TIME;
405	sc->sc_bridge_forward_delay = BSTP_DEFAULT_FORWARD_DELAY;
406	sc->sc_bridge_priority = BSTP_DEFAULT_BRIDGE_PRIORITY;
407	sc->sc_hold_time = BSTP_DEFAULT_HOLD_TIME;
408	sc->sc_filter_flags = 0;
409
410	/* Initialize our routing table. */
411	bridge_rtable_init(sc);
412
413	error = workqueue_create(&sc->sc_rtage_wq, "bridge_rtage",
414	    bridge_rtage_work, sc, PRI_SOFTNET, IPL_SOFTNET, WQ_MPSAFE);
415	if (error)
416		panic("%s: workqueue_create %d\n", __func__, error);
417
418	callout_init(&sc->sc_brcallout, CALLOUT_MPSAFE);
419	callout_init(&sc->sc_bstpcallout, CALLOUT_MPSAFE);
420
421	mutex_init(&sc->sc_iflist_psref.bip_lock, MUTEX_DEFAULT, IPL_NONE);
422	PSLIST_INIT(&sc->sc_iflist_psref.bip_iflist);
423	sc->sc_iflist_psref.bip_psz = pserialize_create();
424
425	if_initname(ifp, ifc->ifc_name, unit);
426	ifp->if_softc = sc;
427	ifp->if_extflags = IFEF_OUTPUT_MPSAFE;
428	ifp->if_mtu = ETHERMTU;
429	ifp->if_ioctl = bridge_ioctl;
430	ifp->if_output = bridge_output;
431	ifp->if_start = bridge_start;
432	ifp->if_stop = bridge_stop;
433	ifp->if_init = bridge_init;
434	ifp->if_type = IFT_BRIDGE;
435	ifp->if_addrlen = 0;
436	ifp->if_dlt = DLT_EN10MB;
437	ifp->if_hdrlen = ETHER_HDR_LEN;
438
439	error = if_initialize(ifp);
440	if (error != 0) {
441		pserialize_destroy(sc->sc_iflist_psref.bip_psz);
442		mutex_destroy(&sc->sc_iflist_psref.bip_lock);
443		callout_destroy(&sc->sc_brcallout);
444		callout_destroy(&sc->sc_bstpcallout);
445		workqueue_destroy(sc->sc_rtage_wq);
446		bridge_rtable_fini(sc);
447		kmem_free(sc, sizeof(*sc));
448
449		return error;
450	}
451	if_register(ifp);
452
453	if_alloc_sadl(ifp);
454
455	return 0;
456}
457
458/*
459 * bridge_clone_destroy:
460 *
461 *	Destroy a bridge instance.
462 */
463static int
464bridge_clone_destroy(struct ifnet *ifp)
465{
466	struct bridge_softc *sc = ifp->if_softc;
467	struct bridge_iflist *bif;
468
469	bridge_stop(ifp, 1);
470
471	BRIDGE_LOCK(sc);
472	for (;;) {
473		bif = PSLIST_WRITER_FIRST(&sc->sc_iflist_psref.bip_iflist, struct bridge_iflist,
474		    bif_next);
475		if (bif == NULL)
476			break;
477		bridge_delete_member(sc, bif);
478	}
479	PSLIST_DESTROY(&sc->sc_iflist_psref.bip_iflist);
480	BRIDGE_UNLOCK(sc);
481
482	if_detach(ifp);
483
484	/* Tear down the routing table. */
485	bridge_rtable_fini(sc);
486
487	pserialize_destroy(sc->sc_iflist_psref.bip_psz);
488	mutex_destroy(&sc->sc_iflist_psref.bip_lock);
489	callout_destroy(&sc->sc_brcallout);
490	callout_destroy(&sc->sc_bstpcallout);
491	workqueue_destroy(sc->sc_rtage_wq);
492	kmem_free(sc, sizeof(*sc));
493
494	return 0;
495}
496
497/*
498 * bridge_ioctl:
499 *
500 *	Handle a control request from the operator.
501 */
502static int
503bridge_ioctl(struct ifnet *ifp, u_long cmd, void *data)
504{
505	struct bridge_softc *sc = ifp->if_softc;
506	struct lwp *l = curlwp;	/* XXX */
507	union {
508		struct ifbreq ifbreq;
509		struct ifbifconf ifbifconf;
510		struct ifbareq ifbareq;
511		struct ifbaconf ifbaconf;
512		struct ifbrparam ifbrparam;
513	} args;
514	struct ifdrv *ifd = (struct ifdrv *) data;
515	const struct bridge_control *bc = NULL; /* XXXGCC */
516	int s, error = 0;
517
518	/* Authorize command before calling splsoftnet(). */
519	switch (cmd) {
520	case SIOCGDRVSPEC:
521	case SIOCSDRVSPEC:
522		if (ifd->ifd_cmd >= bridge_control_table_size
523		    || (bc = &bridge_control_table[ifd->ifd_cmd]) == NULL) {
524			error = EINVAL;
525			return error;
526		}
527
528		/* We only care about BC_F_SUSER at this point. */
529		if ((bc->bc_flags & BC_F_SUSER) == 0)
530			break;
531
532		error = kauth_authorize_network(l->l_cred,
533		    KAUTH_NETWORK_INTERFACE_BRIDGE,
534		    cmd == SIOCGDRVSPEC ?
535		     KAUTH_REQ_NETWORK_INTERFACE_BRIDGE_GETPRIV :
536		     KAUTH_REQ_NETWORK_INTERFACE_BRIDGE_SETPRIV,
537		     ifd, NULL, NULL);
538		if (error)
539			return error;
540
541		break;
542	}
543
544	s = splsoftnet();
545
546	switch (cmd) {
547	case SIOCGDRVSPEC:
548	case SIOCSDRVSPEC:
549		KASSERT(bc != NULL);
550		if (cmd == SIOCGDRVSPEC &&
551		    (bc->bc_flags & (BC_F_COPYOUT|BC_F_XLATEOUT)) == 0) {
552			error = EINVAL;
553			break;
554		}
555		else if (cmd == SIOCSDRVSPEC &&
556		    (bc->bc_flags & (BC_F_COPYOUT|BC_F_XLATEOUT)) != 0) {
557			error = EINVAL;
558			break;
559		}
560
561		/* BC_F_SUSER is checked above, before splsoftnet(). */
562
563		if ((bc->bc_flags & (BC_F_XLATEIN|BC_F_XLATEOUT)) == 0
564		    && (ifd->ifd_len != bc->bc_argsize
565			|| ifd->ifd_len > sizeof(args))) {
566			error = EINVAL;
567			break;
568		}
569
570		memset(&args, 0, sizeof(args));
571		if (bc->bc_flags & BC_F_COPYIN) {
572			error = copyin(ifd->ifd_data, &args, ifd->ifd_len);
573			if (error)
574				break;
575		} else if (bc->bc_flags & BC_F_XLATEIN) {
576			args.ifbifconf.ifbic_len = ifd->ifd_len;
577			args.ifbifconf.ifbic_buf = ifd->ifd_data;
578		}
579
580		error = (*bc->bc_func)(sc, &args);
581		if (error)
582			break;
583
584		if (bc->bc_flags & BC_F_COPYOUT) {
585			error = copyout(&args, ifd->ifd_data, ifd->ifd_len);
586		} else if (bc->bc_flags & BC_F_XLATEOUT) {
587			ifd->ifd_len = args.ifbifconf.ifbic_len;
588			ifd->ifd_data = args.ifbifconf.ifbic_buf;
589		}
590		break;
591
592	case SIOCSIFFLAGS:
593		if ((error = ifioctl_common(ifp, cmd, data)) != 0)
594			break;
595		switch (ifp->if_flags & (IFF_UP|IFF_RUNNING)) {
596		case IFF_RUNNING:
597			/*
598			 * If interface is marked down and it is running,
599			 * then stop and disable it.
600			 */
601			(*ifp->if_stop)(ifp, 1);
602			break;
603		case IFF_UP:
604			/*
605			 * If interface is marked up and it is stopped, then
606			 * start it.
607			 */
608			error = (*ifp->if_init)(ifp);
609			break;
610		default:
611			break;
612		}
613		break;
614
615	case SIOCSIFMTU:
616		if ((error = ifioctl_common(ifp, cmd, data)) == ENETRESET)
617			error = 0;
618		break;
619
620	default:
621		error = ifioctl_common(ifp, cmd, data);
622		break;
623	}
624
625	splx(s);
626
627	return error;
628}
629
630/*
631 * bridge_lookup_member:
632 *
633 *	Lookup a bridge member interface.
634 */
635static struct bridge_iflist *
636bridge_lookup_member(struct bridge_softc *sc, const char *name, struct psref *psref)
637{
638	struct bridge_iflist *bif;
639	struct ifnet *ifp;
640	int s;
641
642	BRIDGE_PSZ_RENTER(s);
643
644	BRIDGE_IFLIST_READER_FOREACH(bif, sc) {
645		ifp = bif->bif_ifp;
646		if (strcmp(ifp->if_xname, name) == 0)
647			break;
648	}
649	if (bif != NULL)
650		bridge_acquire_member(sc, bif, psref);
651
652	BRIDGE_PSZ_REXIT(s);
653
654	return bif;
655}
656
657/*
658 * bridge_lookup_member_if:
659 *
660 *	Lookup a bridge member interface by ifnet*.
661 */
662static struct bridge_iflist *
663bridge_lookup_member_if(struct bridge_softc *sc, struct ifnet *member_ifp,
664    struct psref *psref)
665{
666	struct bridge_iflist *bif;
667	int s;
668
669	BRIDGE_PSZ_RENTER(s);
670
671	bif = member_ifp->if_bridgeif;
672	if (bif != NULL) {
673		psref_acquire(psref, &bif->bif_psref,
674		    bridge_psref_class);
675	}
676
677	BRIDGE_PSZ_REXIT(s);
678
679	return bif;
680}
681
682static void
683bridge_acquire_member(struct bridge_softc *sc, struct bridge_iflist *bif,
684    struct psref *psref)
685{
686
687	psref_acquire(psref, &bif->bif_psref, bridge_psref_class);
688}
689
690/*
691 * bridge_release_member:
692 *
693 *	Release the specified member interface.
694 */
695static void
696bridge_release_member(struct bridge_softc *sc, struct bridge_iflist *bif,
697    struct psref *psref)
698{
699
700	psref_release(psref, &bif->bif_psref, bridge_psref_class);
701}
702
703/*
704 * bridge_delete_member:
705 *
706 *	Delete the specified member interface.
707 */
708static void
709bridge_delete_member(struct bridge_softc *sc, struct bridge_iflist *bif)
710{
711	struct ifnet *ifs = bif->bif_ifp;
712
713	KASSERT(BRIDGE_LOCKED(sc));
714
715	ifs->_if_input = ether_input;
716	ifs->if_bridge = NULL;
717	ifs->if_bridgeif = NULL;
718
719	PSLIST_WRITER_REMOVE(bif, bif_next);
720	BRIDGE_PSZ_PERFORM(sc);
721	BRIDGE_UNLOCK(sc);
722
723	psref_target_destroy(&bif->bif_psref, bridge_psref_class);
724
725	PSLIST_ENTRY_DESTROY(bif, bif_next);
726	kmem_free(bif, sizeof(*bif));
727
728	BRIDGE_LOCK(sc);
729}
730
731static int
732bridge_ioctl_add(struct bridge_softc *sc, void *arg)
733{
734	struct ifbreq *req = arg;
735	struct bridge_iflist *bif = NULL;
736	struct ifnet *ifs;
737	int error = 0;
738	struct psref psref;
739
740	ifs = if_get(req->ifbr_ifsname, &psref);
741	if (ifs == NULL)
742		return ENOENT;
743
744	if (ifs->if_bridge == sc) {
745		error = EEXIST;
746		goto out;
747	}
748
749	if (ifs->if_bridge != NULL) {
750		error = EBUSY;
751		goto out;
752	}
753
754	if (ifs->_if_input != ether_input) {
755		error = EINVAL;
756		goto out;
757	}
758
759	/* FIXME: doesn't work with non-IFF_SIMPLEX interfaces */
760	if ((ifs->if_flags & IFF_SIMPLEX) == 0) {
761		error = EINVAL;
762		goto out;
763	}
764
765	bif = kmem_alloc(sizeof(*bif), KM_SLEEP);
766
767	switch (ifs->if_type) {
768	case IFT_ETHER:
769		if (sc->sc_if.if_mtu != ifs->if_mtu) {
770			error = EINVAL;
771			goto out;
772		}
773		/* FALLTHROUGH */
774	case IFT_L2TP:
775		if ((error = ether_enable_vlan_mtu(ifs)) > 0)
776			goto out;
777		/*
778		 * Place the interface into promiscuous mode.
779		 */
780		error = ifpromisc(ifs, 1);
781		if (error)
782			goto out;
783		break;
784	default:
785		error = EINVAL;
786		goto out;
787	}
788
789	bif->bif_ifp = ifs;
790	bif->bif_flags = IFBIF_LEARNING | IFBIF_DISCOVER;
791	bif->bif_priority = BSTP_DEFAULT_PORT_PRIORITY;
792	bif->bif_path_cost = BSTP_DEFAULT_PATH_COST;
793	PSLIST_ENTRY_INIT(bif, bif_next);
794	psref_target_init(&bif->bif_psref, bridge_psref_class);
795
796	BRIDGE_LOCK(sc);
797
798	ifs->if_bridge = sc;
799	ifs->if_bridgeif = bif;
800	PSLIST_WRITER_INSERT_HEAD(&sc->sc_iflist_psref.bip_iflist, bif, bif_next);
801	ifs->_if_input = bridge_input;
802
803	BRIDGE_UNLOCK(sc);
804
805	if (sc->sc_if.if_flags & IFF_RUNNING)
806		bstp_initialization(sc);
807	else
808		bstp_stop(sc);
809
810 out:
811	if_put(ifs, &psref);
812	if (error) {
813		if (bif != NULL)
814			kmem_free(bif, sizeof(*bif));
815	}
816	return error;
817}
818
819static int
820bridge_ioctl_del(struct bridge_softc *sc, void *arg)
821{
822	struct ifbreq *req = arg;
823	const char *name = req->ifbr_ifsname;
824	struct bridge_iflist *bif;
825	struct ifnet *ifs;
826
827	BRIDGE_LOCK(sc);
828
829	/*
830	 * Don't use bridge_lookup_member. We want to get a member
831	 * with bif_refs == 0.
832	 */
833	BRIDGE_IFLIST_WRITER_FOREACH(bif, sc) {
834		ifs = bif->bif_ifp;
835		if (strcmp(ifs->if_xname, name) == 0)
836			break;
837	}
838
839	if (bif == NULL) {
840		BRIDGE_UNLOCK(sc);
841		return ENOENT;
842	}
843
844	bridge_delete_member(sc, bif);
845
846	BRIDGE_UNLOCK(sc);
847
848	switch (ifs->if_type) {
849	case IFT_ETHER:
850	case IFT_L2TP:
851		/*
852		 * Take the interface out of promiscuous mode.
853		 * Don't call it with holding a spin lock.
854		 */
855		(void) ifpromisc(ifs, 0);
856		(void) ether_disable_vlan_mtu(ifs);
857		break;
858	default:
859#ifdef DIAGNOSTIC
860		panic("bridge_delete_member: impossible");
861#endif
862		break;
863	}
864
865	bridge_rtdelete(sc, ifs);
866
867	if (sc->sc_if.if_flags & IFF_RUNNING)
868		bstp_initialization(sc);
869
870	return 0;
871}
872
873static int
874bridge_ioctl_gifflags(struct bridge_softc *sc, void *arg)
875{
876	struct ifbreq *req = arg;
877	struct bridge_iflist *bif;
878	struct psref psref;
879
880	bif = bridge_lookup_member(sc, req->ifbr_ifsname, &psref);
881	if (bif == NULL)
882		return ENOENT;
883
884	req->ifbr_ifsflags = bif->bif_flags;
885	req->ifbr_state = bif->bif_state;
886	req->ifbr_priority = bif->bif_priority;
887	req->ifbr_path_cost = bif->bif_path_cost;
888	req->ifbr_portno = bif->bif_ifp->if_index & 0xff;
889
890	bridge_release_member(sc, bif, &psref);
891
892	return 0;
893}
894
895static int
896bridge_ioctl_sifflags(struct bridge_softc *sc, void *arg)
897{
898	struct ifbreq *req = arg;
899	struct bridge_iflist *bif;
900	struct psref psref;
901
902	bif = bridge_lookup_member(sc, req->ifbr_ifsname, &psref);
903	if (bif == NULL)
904		return ENOENT;
905
906	if (req->ifbr_ifsflags & IFBIF_STP) {
907		switch (bif->bif_ifp->if_type) {
908		case IFT_ETHER:
909		case IFT_L2TP:
910			/* These can do spanning tree. */
911			break;
912
913		default:
914			/* Nothing else can. */
915			bridge_release_member(sc, bif, &psref);
916			return EINVAL;
917		}
918	}
919
920	bif->bif_flags = req->ifbr_ifsflags;
921
922	bridge_release_member(sc, bif, &psref);
923
924	if (sc->sc_if.if_flags & IFF_RUNNING)
925		bstp_initialization(sc);
926
927	return 0;
928}
929
930static int
931bridge_ioctl_scache(struct bridge_softc *sc, void *arg)
932{
933	struct ifbrparam *param = arg;
934
935	sc->sc_brtmax = param->ifbrp_csize;
936	bridge_rttrim(sc);
937
938	return 0;
939}
940
941static int
942bridge_ioctl_gcache(struct bridge_softc *sc, void *arg)
943{
944	struct ifbrparam *param = arg;
945
946	param->ifbrp_csize = sc->sc_brtmax;
947
948	return 0;
949}
950
951static int
952bridge_ioctl_gifs(struct bridge_softc *sc, void *arg)
953{
954	struct ifbifconf *bifc = arg;
955	struct bridge_iflist *bif;
956	struct ifbreq *breqs;
957	int i, count, error = 0;
958
959retry:
960	BRIDGE_LOCK(sc);
961	count = 0;
962	BRIDGE_IFLIST_WRITER_FOREACH(bif, sc)
963		count++;
964	BRIDGE_UNLOCK(sc);
965
966	if (count == 0) {
967		bifc->ifbic_len = 0;
968		return 0;
969	}
970
971	if (bifc->ifbic_len == 0 || bifc->ifbic_len < (sizeof(*breqs) * count)) {
972		/* Tell that a larger buffer is needed */
973		bifc->ifbic_len = sizeof(*breqs) * count;
974		return 0;
975	}
976
977	breqs = kmem_alloc(sizeof(*breqs) * count, KM_SLEEP);
978
979	BRIDGE_LOCK(sc);
980
981	i = 0;
982	BRIDGE_IFLIST_WRITER_FOREACH(bif, sc)
983		i++;
984	if (i > count) {
985		/*
986		 * The number of members has been increased.
987		 * We need more memory!
988		 */
989		BRIDGE_UNLOCK(sc);
990		kmem_free(breqs, sizeof(*breqs) * count);
991		goto retry;
992	}
993
994	i = 0;
995	BRIDGE_IFLIST_WRITER_FOREACH(bif, sc) {
996		struct ifbreq *breq = &breqs[i++];
997		memset(breq, 0, sizeof(*breq));
998
999		strlcpy(breq->ifbr_ifsname, bif->bif_ifp->if_xname,
1000		    sizeof(breq->ifbr_ifsname));
1001		breq->ifbr_ifsflags = bif->bif_flags;
1002		breq->ifbr_state = bif->bif_state;
1003		breq->ifbr_priority = bif->bif_priority;
1004		breq->ifbr_path_cost = bif->bif_path_cost;
1005		breq->ifbr_portno = bif->bif_ifp->if_index & 0xff;
1006	}
1007
1008	/* Don't call copyout with holding the mutex */
1009	BRIDGE_UNLOCK(sc);
1010
1011	for (i = 0; i < count; i++) {
1012		error = copyout(&breqs[i], bifc->ifbic_req + i, sizeof(*breqs));
1013		if (error)
1014			break;
1015	}
1016	bifc->ifbic_len = sizeof(*breqs) * i;
1017
1018	kmem_free(breqs, sizeof(*breqs) * count);
1019
1020	return error;
1021}
1022
1023static int
1024bridge_ioctl_rts(struct bridge_softc *sc, void *arg)
1025{
1026	struct ifbaconf *bac = arg;
1027	struct bridge_rtnode *brt;
1028	struct ifbareq bareq;
1029	int count = 0, error = 0, len;
1030
1031	if (bac->ifbac_len == 0)
1032		return 0;
1033
1034	BRIDGE_RT_LOCK(sc);
1035
1036	len = bac->ifbac_len;
1037	LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) {
1038		if (len < sizeof(bareq))
1039			goto out;
1040		memset(&bareq, 0, sizeof(bareq));
1041		strlcpy(bareq.ifba_ifsname, brt->brt_ifp->if_xname,
1042		    sizeof(bareq.ifba_ifsname));
1043		memcpy(bareq.ifba_dst, brt->brt_addr, sizeof(brt->brt_addr));
1044		if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
1045			bareq.ifba_expire = brt->brt_expire - time_uptime;
1046		} else
1047			bareq.ifba_expire = 0;
1048		bareq.ifba_flags = brt->brt_flags;
1049
1050		error = copyout(&bareq, bac->ifbac_req + count, sizeof(bareq));
1051		if (error)
1052			goto out;
1053		count++;
1054		len -= sizeof(bareq);
1055	}
1056 out:
1057	BRIDGE_RT_UNLOCK(sc);
1058
1059	bac->ifbac_len = sizeof(bareq) * count;
1060	return error;
1061}
1062
1063static int
1064bridge_ioctl_saddr(struct bridge_softc *sc, void *arg)
1065{
1066	struct ifbareq *req = arg;
1067	struct bridge_iflist *bif;
1068	int error;
1069	struct psref psref;
1070
1071	bif = bridge_lookup_member(sc, req->ifba_ifsname, &psref);
1072	if (bif == NULL)
1073		return ENOENT;
1074
1075	error = bridge_rtupdate(sc, req->ifba_dst, bif->bif_ifp, 1,
1076	    req->ifba_flags);
1077
1078	bridge_release_member(sc, bif, &psref);
1079
1080	return error;
1081}
1082
1083static int
1084bridge_ioctl_sto(struct bridge_softc *sc, void *arg)
1085{
1086	struct ifbrparam *param = arg;
1087
1088	sc->sc_brttimeout = param->ifbrp_ctime;
1089
1090	return 0;
1091}
1092
1093static int
1094bridge_ioctl_gto(struct bridge_softc *sc, void *arg)
1095{
1096	struct ifbrparam *param = arg;
1097
1098	param->ifbrp_ctime = sc->sc_brttimeout;
1099
1100	return 0;
1101}
1102
1103static int
1104bridge_ioctl_daddr(struct bridge_softc *sc, void *arg)
1105{
1106	struct ifbareq *req = arg;
1107
1108	return (bridge_rtdaddr(sc, req->ifba_dst));
1109}
1110
1111static int
1112bridge_ioctl_flush(struct bridge_softc *sc, void *arg)
1113{
1114	struct ifbreq *req = arg;
1115
1116	bridge_rtflush(sc, req->ifbr_ifsflags);
1117
1118	return 0;
1119}
1120
1121static int
1122bridge_ioctl_gpri(struct bridge_softc *sc, void *arg)
1123{
1124	struct ifbrparam *param = arg;
1125
1126	param->ifbrp_prio = sc->sc_bridge_priority;
1127
1128	return 0;
1129}
1130
1131static int
1132bridge_ioctl_spri(struct bridge_softc *sc, void *arg)
1133{
1134	struct ifbrparam *param = arg;
1135
1136	sc->sc_bridge_priority = param->ifbrp_prio;
1137
1138	if (sc->sc_if.if_flags & IFF_RUNNING)
1139		bstp_initialization(sc);
1140
1141	return 0;
1142}
1143
1144static int
1145bridge_ioctl_ght(struct bridge_softc *sc, void *arg)
1146{
1147	struct ifbrparam *param = arg;
1148
1149	param->ifbrp_hellotime = sc->sc_bridge_hello_time >> 8;
1150
1151	return 0;
1152}
1153
1154static int
1155bridge_ioctl_sht(struct bridge_softc *sc, void *arg)
1156{
1157	struct ifbrparam *param = arg;
1158
1159	if (param->ifbrp_hellotime == 0)
1160		return EINVAL;
1161	sc->sc_bridge_hello_time = param->ifbrp_hellotime << 8;
1162
1163	if (sc->sc_if.if_flags & IFF_RUNNING)
1164		bstp_initialization(sc);
1165
1166	return 0;
1167}
1168
1169static int
1170bridge_ioctl_gfd(struct bridge_softc *sc, void *arg)
1171{
1172	struct ifbrparam *param = arg;
1173
1174	param->ifbrp_fwddelay = sc->sc_bridge_forward_delay >> 8;
1175
1176	return 0;
1177}
1178
1179static int
1180bridge_ioctl_sfd(struct bridge_softc *sc, void *arg)
1181{
1182	struct ifbrparam *param = arg;
1183
1184	if (param->ifbrp_fwddelay == 0)
1185		return EINVAL;
1186	sc->sc_bridge_forward_delay = param->ifbrp_fwddelay << 8;
1187
1188	if (sc->sc_if.if_flags & IFF_RUNNING)
1189		bstp_initialization(sc);
1190
1191	return 0;
1192}
1193
1194static int
1195bridge_ioctl_gma(struct bridge_softc *sc, void *arg)
1196{
1197	struct ifbrparam *param = arg;
1198
1199	param->ifbrp_maxage = sc->sc_bridge_max_age >> 8;
1200
1201	return 0;
1202}
1203
1204static int
1205bridge_ioctl_sma(struct bridge_softc *sc, void *arg)
1206{
1207	struct ifbrparam *param = arg;
1208
1209	if (param->ifbrp_maxage == 0)
1210		return EINVAL;
1211	sc->sc_bridge_max_age = param->ifbrp_maxage << 8;
1212
1213	if (sc->sc_if.if_flags & IFF_RUNNING)
1214		bstp_initialization(sc);
1215
1216	return 0;
1217}
1218
1219static int
1220bridge_ioctl_sifprio(struct bridge_softc *sc, void *arg)
1221{
1222	struct ifbreq *req = arg;
1223	struct bridge_iflist *bif;
1224	struct psref psref;
1225
1226	bif = bridge_lookup_member(sc, req->ifbr_ifsname, &psref);
1227	if (bif == NULL)
1228		return ENOENT;
1229
1230	bif->bif_priority = req->ifbr_priority;
1231
1232	if (sc->sc_if.if_flags & IFF_RUNNING)
1233		bstp_initialization(sc);
1234
1235	bridge_release_member(sc, bif, &psref);
1236
1237	return 0;
1238}
1239
1240#if defined(BRIDGE_IPF)
1241static int
1242bridge_ioctl_gfilt(struct bridge_softc *sc, void *arg)
1243{
1244	struct ifbrparam *param = arg;
1245
1246	param->ifbrp_filter = sc->sc_filter_flags;
1247
1248	return 0;
1249}
1250
1251static int
1252bridge_ioctl_sfilt(struct bridge_softc *sc, void *arg)
1253{
1254	struct ifbrparam *param = arg;
1255	uint32_t nflags, oflags;
1256
1257	if (param->ifbrp_filter & ~IFBF_FILT_MASK)
1258		return EINVAL;
1259
1260	nflags = param->ifbrp_filter;
1261	oflags = sc->sc_filter_flags;
1262
1263	if ((nflags & IFBF_FILT_USEIPF) && !(oflags & IFBF_FILT_USEIPF)) {
1264		pfil_add_hook((void *)bridge_ipf, NULL, PFIL_IN|PFIL_OUT,
1265			sc->sc_if.if_pfil);
1266	}
1267	if (!(nflags & IFBF_FILT_USEIPF) && (oflags & IFBF_FILT_USEIPF)) {
1268		pfil_remove_hook((void *)bridge_ipf, NULL, PFIL_IN|PFIL_OUT,
1269			sc->sc_if.if_pfil);
1270	}
1271
1272	sc->sc_filter_flags = nflags;
1273
1274	return 0;
1275}
1276#endif /* BRIDGE_IPF */
1277
1278static int
1279bridge_ioctl_sifcost(struct bridge_softc *sc, void *arg)
1280{
1281	struct ifbreq *req = arg;
1282	struct bridge_iflist *bif;
1283	struct psref psref;
1284
1285	bif = bridge_lookup_member(sc, req->ifbr_ifsname, &psref);
1286	if (bif == NULL)
1287		return ENOENT;
1288
1289	bif->bif_path_cost = req->ifbr_path_cost;
1290
1291	if (sc->sc_if.if_flags & IFF_RUNNING)
1292		bstp_initialization(sc);
1293
1294	bridge_release_member(sc, bif, &psref);
1295
1296	return 0;
1297}
1298
1299/*
1300 * bridge_ifdetach:
1301 *
1302 *	Detach an interface from a bridge.  Called when a member
1303 *	interface is detaching.
1304 */
1305void
1306bridge_ifdetach(struct ifnet *ifp)
1307{
1308	struct bridge_softc *sc = ifp->if_bridge;
1309	struct ifbreq breq;
1310
1311	/* ioctl_lock should prevent this from happening */
1312	KASSERT(sc != NULL);
1313
1314	memset(&breq, 0, sizeof(breq));
1315	strlcpy(breq.ifbr_ifsname, ifp->if_xname, sizeof(breq.ifbr_ifsname));
1316
1317	(void) bridge_ioctl_del(sc, &breq);
1318}
1319
1320/*
1321 * bridge_init:
1322 *
1323 *	Initialize a bridge interface.
1324 */
1325static int
1326bridge_init(struct ifnet *ifp)
1327{
1328	struct bridge_softc *sc = ifp->if_softc;
1329
1330	if (ifp->if_flags & IFF_RUNNING)
1331		return 0;
1332
1333	callout_reset(&sc->sc_brcallout, bridge_rtable_prune_period * hz,
1334	    bridge_timer, sc);
1335
1336	ifp->if_flags |= IFF_RUNNING;
1337	bstp_initialization(sc);
1338	return 0;
1339}
1340
1341/*
1342 * bridge_stop:
1343 *
1344 *	Stop the bridge interface.
1345 */
1346static void
1347bridge_stop(struct ifnet *ifp, int disable)
1348{
1349	struct bridge_softc *sc = ifp->if_softc;
1350
1351	if ((ifp->if_flags & IFF_RUNNING) == 0)
1352		return;
1353
1354	callout_stop(&sc->sc_brcallout);
1355	bstp_stop(sc);
1356
1357	bridge_rtflush(sc, IFBF_FLUSHDYN);
1358
1359	ifp->if_flags &= ~IFF_RUNNING;
1360}
1361
1362/*
1363 * bridge_enqueue:
1364 *
1365 *	Enqueue a packet on a bridge member interface.
1366 */
1367void
1368bridge_enqueue(struct bridge_softc *sc, struct ifnet *dst_ifp, struct mbuf *m,
1369    int runfilt)
1370{
1371	int len, error;
1372	short mflags;
1373
1374	/*
1375	 * Clear any in-bound checksum flags for this packet.
1376	 */
1377	m->m_pkthdr.csum_flags = 0;
1378
1379	if (runfilt) {
1380		if (pfil_run_hooks(sc->sc_if.if_pfil, &m,
1381		    dst_ifp, PFIL_OUT) != 0) {
1382			if (m != NULL)
1383				m_freem(m);
1384			return;
1385		}
1386		if (m == NULL)
1387			return;
1388	}
1389
1390#ifdef ALTQ
1391	KERNEL_LOCK(1, NULL);
1392	/*
1393	 * If ALTQ is enabled on the member interface, do
1394	 * classification; the queueing discipline might
1395	 * not require classification, but might require
1396	 * the address family/header pointer in the pktattr.
1397	 */
1398	if (ALTQ_IS_ENABLED(&dst_ifp->if_snd)) {
1399		/* XXX IFT_ETHER */
1400		altq_etherclassify(&dst_ifp->if_snd, m);
1401	}
1402	KERNEL_UNLOCK_ONE(NULL);
1403#endif /* ALTQ */
1404
1405	len = m->m_pkthdr.len;
1406	mflags = m->m_flags;
1407
1408	error = if_transmit_lock(dst_ifp, m);
1409	if (error) {
1410		/* mbuf is already freed */
1411		sc->sc_if.if_oerrors++;
1412		return;
1413	}
1414
1415	sc->sc_if.if_opackets++;
1416	sc->sc_if.if_obytes += len;
1417	if (mflags & M_MCAST)
1418		sc->sc_if.if_omcasts++;
1419}
1420
1421/*
1422 * bridge_output:
1423 *
1424 *	Send output from a bridge member interface.  This
1425 *	performs the bridging function for locally originated
1426 *	packets.
1427 *
1428 *	The mbuf has the Ethernet header already attached.  We must
1429 *	enqueue or free the mbuf before returning.
1430 */
1431int
1432bridge_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *sa,
1433    const struct rtentry *rt)
1434{
1435	struct ether_header *eh;
1436	struct ifnet *dst_if;
1437	struct bridge_softc *sc;
1438	int s;
1439
1440	/*
1441	 * bridge_output() is called from ether_output(), furthermore
1442	 * ifp argument doesn't point to bridge(4). So, don't assert
1443	 * IFEF_OUTPUT_MPSAFE here.
1444	 */
1445
1446	if (m->m_len < ETHER_HDR_LEN) {
1447		m = m_pullup(m, ETHER_HDR_LEN);
1448		if (m == NULL)
1449			return 0;
1450	}
1451
1452	eh = mtod(m, struct ether_header *);
1453	sc = ifp->if_bridge;
1454
1455	if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
1456		if (memcmp(etherbroadcastaddr,
1457		    eh->ether_dhost, ETHER_ADDR_LEN) == 0)
1458			m->m_flags |= M_BCAST;
1459		else
1460			m->m_flags |= M_MCAST;
1461	}
1462
1463	/*
1464	 * If bridge is down, but the original output interface is up,
1465	 * go ahead and send out that interface.  Otherwise, the packet
1466	 * is dropped below.
1467	 */
1468	if (__predict_false(sc == NULL) ||
1469	    (sc->sc_if.if_flags & IFF_RUNNING) == 0) {
1470		dst_if = ifp;
1471		goto sendunicast;
1472	}
1473
1474	/*
1475	 * If the packet is a multicast, or we don't know a better way to
1476	 * get there, send to all interfaces.
1477	 */
1478	if ((m->m_flags & (M_MCAST | M_BCAST)) != 0)
1479		dst_if = NULL;
1480	else
1481		dst_if = bridge_rtlookup(sc, eh->ether_dhost);
1482	if (dst_if == NULL) {
1483		/* XXX Should call bridge_broadcast, but there are locking
1484		 * issues which need resolving first. */
1485		struct bridge_iflist *bif;
1486		struct mbuf *mc;
1487		bool used = false;
1488
1489		BRIDGE_PSZ_RENTER(s);
1490		BRIDGE_IFLIST_READER_FOREACH(bif, sc) {
1491			struct psref psref;
1492
1493			bridge_acquire_member(sc, bif, &psref);
1494			BRIDGE_PSZ_REXIT(s);
1495
1496			dst_if = bif->bif_ifp;
1497			if ((dst_if->if_flags & IFF_RUNNING) == 0)
1498				goto next;
1499
1500			/*
1501			 * If this is not the original output interface,
1502			 * and the interface is participating in spanning
1503			 * tree, make sure the port is in a state that
1504			 * allows forwarding.
1505			 */
1506			if (dst_if != ifp &&
1507			    (bif->bif_flags & IFBIF_STP) != 0) {
1508				switch (bif->bif_state) {
1509				case BSTP_IFSTATE_BLOCKING:
1510				case BSTP_IFSTATE_LISTENING:
1511				case BSTP_IFSTATE_DISABLED:
1512					goto next;
1513				}
1514			}
1515
1516			if (PSLIST_READER_NEXT(bif, struct bridge_iflist,
1517			    bif_next) == NULL &&
1518			    ((m->m_flags & (M_MCAST | M_BCAST)) == 0 ||
1519			    dst_if == ifp))
1520			{
1521				used = true;
1522				mc = m;
1523			} else {
1524				mc = m_copym(m, 0, M_COPYALL, M_NOWAIT);
1525				if (mc == NULL) {
1526					sc->sc_if.if_oerrors++;
1527					goto next;
1528				}
1529			}
1530
1531			bridge_enqueue(sc, dst_if, mc, 0);
1532
1533			if ((m->m_flags & (M_MCAST | M_BCAST)) != 0 &&
1534			    dst_if != ifp)
1535			{
1536				if (PSLIST_READER_NEXT(bif,
1537				    struct bridge_iflist, bif_next) == NULL)
1538				{
1539					used = true;
1540					mc = m;
1541				} else {
1542					mc = m_copym(m, 0, M_COPYALL,
1543					    M_DONTWAIT);
1544					if (mc == NULL) {
1545						sc->sc_if.if_oerrors++;
1546						goto next;
1547					}
1548				}
1549
1550				m_set_rcvif(mc, dst_if);
1551				mc->m_flags &= ~M_PROMISC;
1552
1553#ifndef NET_MPSAFE
1554				s = splsoftnet();
1555#endif
1556				ether_input(dst_if, mc);
1557#ifndef NET_MPSAFE
1558				splx(s);
1559#endif
1560			}
1561
1562next:
1563			BRIDGE_PSZ_RENTER(s);
1564			bridge_release_member(sc, bif, &psref);
1565
1566			/* Guarantee we don't re-enter the loop as we already
1567			 * decided we're at the end. */
1568			if (used)
1569				break;
1570		}
1571		BRIDGE_PSZ_REXIT(s);
1572
1573		if (!used)
1574			m_freem(m);
1575		return 0;
1576	}
1577
1578 sendunicast:
1579	/*
1580	 * XXX Spanning tree consideration here?
1581	 */
1582
1583	if ((dst_if->if_flags & IFF_RUNNING) == 0) {
1584		m_freem(m);
1585		return 0;
1586	}
1587
1588	bridge_enqueue(sc, dst_if, m, 0);
1589
1590	return 0;
1591}
1592
1593/*
1594 * bridge_start:
1595 *
1596 *	Start output on a bridge.
1597 *
1598 *	NOTE: This routine should never be called in this implementation.
1599 */
1600static void
1601bridge_start(struct ifnet *ifp)
1602{
1603
1604	printf("%s: bridge_start() called\n", ifp->if_xname);
1605}
1606
1607/*
1608 * bridge_forward:
1609 *
1610 *	The forwarding function of the bridge.
1611 */
1612static void
1613bridge_forward(struct bridge_softc *sc, struct mbuf *m)
1614{
1615	struct bridge_iflist *bif;
1616	struct ifnet *src_if, *dst_if;
1617	struct ether_header *eh;
1618	struct psref psref;
1619	struct psref psref_src;
1620	DECLARE_LOCK_VARIABLE;
1621
1622	if ((sc->sc_if.if_flags & IFF_RUNNING) == 0)
1623		return;
1624
1625	src_if = m_get_rcvif_psref(m, &psref_src);
1626	if (src_if == NULL) {
1627		/* Interface is being destroyed? */
1628		m_freem(m);
1629		goto out;
1630	}
1631
1632	sc->sc_if.if_ipackets++;
1633	sc->sc_if.if_ibytes += m->m_pkthdr.len;
1634
1635	/*
1636	 * Look up the bridge_iflist.
1637	 */
1638	bif = bridge_lookup_member_if(sc, src_if, &psref);
1639	if (bif == NULL) {
1640		/* Interface is not a bridge member (anymore?) */
1641		m_freem(m);
1642		goto out;
1643	}
1644
1645	if (bif->bif_flags & IFBIF_STP) {
1646		switch (bif->bif_state) {
1647		case BSTP_IFSTATE_BLOCKING:
1648		case BSTP_IFSTATE_LISTENING:
1649		case BSTP_IFSTATE_DISABLED:
1650			m_freem(m);
1651			bridge_release_member(sc, bif, &psref);
1652			goto out;
1653		}
1654	}
1655
1656	eh = mtod(m, struct ether_header *);
1657
1658	/*
1659	 * If the interface is learning, and the source
1660	 * address is valid and not multicast, record
1661	 * the address.
1662	 */
1663	if ((bif->bif_flags & IFBIF_LEARNING) != 0 &&
1664	    ETHER_IS_MULTICAST(eh->ether_shost) == 0 &&
1665	    (eh->ether_shost[0] == 0 &&
1666	     eh->ether_shost[1] == 0 &&
1667	     eh->ether_shost[2] == 0 &&
1668	     eh->ether_shost[3] == 0 &&
1669	     eh->ether_shost[4] == 0 &&
1670	     eh->ether_shost[5] == 0) == 0) {
1671		(void) bridge_rtupdate(sc, eh->ether_shost,
1672		    src_if, 0, IFBAF_DYNAMIC);
1673	}
1674
1675	if ((bif->bif_flags & IFBIF_STP) != 0 &&
1676	    bif->bif_state == BSTP_IFSTATE_LEARNING) {
1677		m_freem(m);
1678		bridge_release_member(sc, bif, &psref);
1679		goto out;
1680	}
1681
1682	bridge_release_member(sc, bif, &psref);
1683
1684	/*
1685	 * At this point, the port either doesn't participate
1686	 * in spanning tree or it is in the forwarding state.
1687	 */
1688
1689	/*
1690	 * If the packet is unicast, destined for someone on
1691	 * "this" side of the bridge, drop it.
1692	 */
1693	if ((m->m_flags & (M_BCAST|M_MCAST)) == 0) {
1694		dst_if = bridge_rtlookup(sc, eh->ether_dhost);
1695		if (src_if == dst_if) {
1696			m_freem(m);
1697			goto out;
1698		}
1699	} else {
1700		/* ...forward it to all interfaces. */
1701		sc->sc_if.if_imcasts++;
1702		dst_if = NULL;
1703	}
1704
1705	if (pfil_run_hooks(sc->sc_if.if_pfil, &m, src_if, PFIL_IN) != 0) {
1706		if (m != NULL)
1707			m_freem(m);
1708		goto out;
1709	}
1710	if (m == NULL)
1711		goto out;
1712
1713	if (dst_if == NULL) {
1714		bridge_broadcast(sc, src_if, m);
1715		goto out;
1716	}
1717
1718	m_put_rcvif_psref(src_if, &psref_src);
1719	src_if = NULL;
1720
1721	/*
1722	 * At this point, we're dealing with a unicast frame
1723	 * going to a different interface.
1724	 */
1725	if ((dst_if->if_flags & IFF_RUNNING) == 0) {
1726		m_freem(m);
1727		goto out;
1728	}
1729
1730	bif = bridge_lookup_member_if(sc, dst_if, &psref);
1731	if (bif == NULL) {
1732		/* Not a member of the bridge (anymore?) */
1733		m_freem(m);
1734		goto out;
1735	}
1736
1737	if (bif->bif_flags & IFBIF_STP) {
1738		switch (bif->bif_state) {
1739		case BSTP_IFSTATE_DISABLED:
1740		case BSTP_IFSTATE_BLOCKING:
1741			m_freem(m);
1742			bridge_release_member(sc, bif, &psref);
1743			goto out;
1744		}
1745	}
1746
1747	bridge_release_member(sc, bif, &psref);
1748
1749	ACQUIRE_GLOBAL_LOCKS();
1750	bridge_enqueue(sc, dst_if, m, 1);
1751	RELEASE_GLOBAL_LOCKS();
1752out:
1753	if (src_if != NULL)
1754		m_put_rcvif_psref(src_if, &psref_src);
1755	return;
1756}
1757
1758static bool
1759bstp_state_before_learning(struct bridge_iflist *bif)
1760{
1761	if (bif->bif_flags & IFBIF_STP) {
1762		switch (bif->bif_state) {
1763		case BSTP_IFSTATE_BLOCKING:
1764		case BSTP_IFSTATE_LISTENING:
1765		case BSTP_IFSTATE_DISABLED:
1766			return true;
1767		}
1768	}
1769	return false;
1770}
1771
1772static bool
1773bridge_ourether(struct bridge_iflist *bif, struct ether_header *eh, int src)
1774{
1775	uint8_t *ether = src ? eh->ether_shost : eh->ether_dhost;
1776
1777	if (memcmp(CLLADDR(bif->bif_ifp->if_sadl), ether, ETHER_ADDR_LEN) == 0
1778#if NCARP > 0
1779	    || (bif->bif_ifp->if_carp &&
1780	        carp_ourether(bif->bif_ifp->if_carp, eh, IFT_ETHER, src) != NULL)
1781#endif /* NCARP > 0 */
1782	    )
1783		return true;
1784
1785	return false;
1786}
1787
1788/*
1789 * bridge_input:
1790 *
1791 *	Receive input from a member interface.  Queue the packet for
1792 *	bridging if it is not for us.
1793 */
1794static void
1795bridge_input(struct ifnet *ifp, struct mbuf *m)
1796{
1797	struct bridge_softc *sc = ifp->if_bridge;
1798	struct bridge_iflist *bif;
1799	struct ether_header *eh;
1800	struct psref psref;
1801	int bound;
1802	DECLARE_LOCK_VARIABLE;
1803
1804	KASSERT(!cpu_intr_p());
1805
1806	if (__predict_false(sc == NULL) ||
1807	    (sc->sc_if.if_flags & IFF_RUNNING) == 0) {
1808		ACQUIRE_GLOBAL_LOCKS();
1809		ether_input(ifp, m);
1810		RELEASE_GLOBAL_LOCKS();
1811		return;
1812	}
1813
1814	bound = curlwp_bind();
1815	bif = bridge_lookup_member_if(sc, ifp, &psref);
1816	if (bif == NULL) {
1817		curlwp_bindx(bound);
1818		ACQUIRE_GLOBAL_LOCKS();
1819		ether_input(ifp, m);
1820		RELEASE_GLOBAL_LOCKS();
1821		return;
1822	}
1823
1824	eh = mtod(m, struct ether_header *);
1825
1826	if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
1827		if (memcmp(etherbroadcastaddr,
1828		    eh->ether_dhost, ETHER_ADDR_LEN) == 0)
1829			m->m_flags |= M_BCAST;
1830		else
1831			m->m_flags |= M_MCAST;
1832	}
1833
1834	/*
1835	 * A 'fast' path for packets addressed to interfaces that are
1836	 * part of this bridge.
1837	 */
1838	if (!(m->m_flags & (M_BCAST|M_MCAST)) &&
1839	    !bstp_state_before_learning(bif)) {
1840		struct bridge_iflist *_bif;
1841		struct ifnet *_ifp = NULL;
1842		int s;
1843		struct psref _psref;
1844
1845		BRIDGE_PSZ_RENTER(s);
1846		BRIDGE_IFLIST_READER_FOREACH(_bif, sc) {
1847			/* It is destined for us. */
1848			if (bridge_ourether(_bif, eh, 0)) {
1849				bridge_acquire_member(sc, _bif, &_psref);
1850				BRIDGE_PSZ_REXIT(s);
1851				if (_bif->bif_flags & IFBIF_LEARNING)
1852					(void) bridge_rtupdate(sc,
1853					    eh->ether_shost, ifp, 0, IFBAF_DYNAMIC);
1854				m_set_rcvif(m, _bif->bif_ifp);
1855				_ifp = _bif->bif_ifp;
1856				bridge_release_member(sc, _bif, &_psref);
1857				goto out;
1858			}
1859
1860			/* We just received a packet that we sent out. */
1861			if (bridge_ourether(_bif, eh, 1))
1862				break;
1863		}
1864		BRIDGE_PSZ_REXIT(s);
1865out:
1866
1867		if (_bif != NULL) {
1868			bridge_release_member(sc, bif, &psref);
1869			curlwp_bindx(bound);
1870			if (_ifp != NULL) {
1871				m->m_flags &= ~M_PROMISC;
1872				ACQUIRE_GLOBAL_LOCKS();
1873				ether_input(_ifp, m);
1874				RELEASE_GLOBAL_LOCKS();
1875			} else
1876				m_freem(m);
1877			return;
1878		}
1879	}
1880
1881	/* Tap off 802.1D packets; they do not get forwarded. */
1882	if (bif->bif_flags & IFBIF_STP &&
1883	    memcmp(eh->ether_dhost, bstp_etheraddr, ETHER_ADDR_LEN) == 0) {
1884		bstp_input(sc, bif, m);
1885		bridge_release_member(sc, bif, &psref);
1886		curlwp_bindx(bound);
1887		return;
1888	}
1889
1890	/*
1891	 * A normal switch would discard the packet here, but that's not what
1892	 * we've done historically. This also prevents some obnoxious behaviour.
1893	 */
1894	if (bstp_state_before_learning(bif)) {
1895		bridge_release_member(sc, bif, &psref);
1896		curlwp_bindx(bound);
1897		ACQUIRE_GLOBAL_LOCKS();
1898		ether_input(ifp, m);
1899		RELEASE_GLOBAL_LOCKS();
1900		return;
1901	}
1902
1903	bridge_release_member(sc, bif, &psref);
1904
1905	bridge_forward(sc, m);
1906
1907	curlwp_bindx(bound);
1908}
1909
1910/*
1911 * bridge_broadcast:
1912 *
1913 *	Send a frame to all interfaces that are members of
1914 *	the bridge, except for the one on which the packet
1915 *	arrived.
1916 */
1917static void
1918bridge_broadcast(struct bridge_softc *sc, struct ifnet *src_if,
1919    struct mbuf *m)
1920{
1921	struct bridge_iflist *bif;
1922	struct mbuf *mc;
1923	struct ifnet *dst_if;
1924	bool bmcast;
1925	int s;
1926	DECLARE_LOCK_VARIABLE;
1927
1928	bmcast = m->m_flags & (M_BCAST|M_MCAST);
1929
1930	BRIDGE_PSZ_RENTER(s);
1931	BRIDGE_IFLIST_READER_FOREACH(bif, sc) {
1932		struct psref psref;
1933
1934		bridge_acquire_member(sc, bif, &psref);
1935		BRIDGE_PSZ_REXIT(s);
1936
1937		dst_if = bif->bif_ifp;
1938
1939		if (bif->bif_flags & IFBIF_STP) {
1940			switch (bif->bif_state) {
1941			case BSTP_IFSTATE_BLOCKING:
1942			case BSTP_IFSTATE_DISABLED:
1943				goto next;
1944			}
1945		}
1946
1947		if ((bif->bif_flags & IFBIF_DISCOVER) == 0 && !bmcast)
1948			goto next;
1949
1950		if ((dst_if->if_flags & IFF_RUNNING) == 0)
1951			goto next;
1952
1953		if (dst_if != src_if) {
1954			mc = m_copym(m, 0, M_COPYALL, M_DONTWAIT);
1955			if (mc == NULL) {
1956				sc->sc_if.if_oerrors++;
1957				goto next;
1958			}
1959			ACQUIRE_GLOBAL_LOCKS();
1960			bridge_enqueue(sc, dst_if, mc, 1);
1961			RELEASE_GLOBAL_LOCKS();
1962		}
1963
1964		if (bmcast) {
1965			mc = m_copym(m, 0, M_COPYALL, M_DONTWAIT);
1966			if (mc == NULL) {
1967				sc->sc_if.if_oerrors++;
1968				goto next;
1969			}
1970
1971			m_set_rcvif(mc, dst_if);
1972			mc->m_flags &= ~M_PROMISC;
1973
1974			ACQUIRE_GLOBAL_LOCKS();
1975			ether_input(dst_if, mc);
1976			RELEASE_GLOBAL_LOCKS();
1977		}
1978next:
1979		BRIDGE_PSZ_RENTER(s);
1980		bridge_release_member(sc, bif, &psref);
1981	}
1982	BRIDGE_PSZ_REXIT(s);
1983
1984	m_freem(m);
1985}
1986
1987static int
1988bridge_rtalloc(struct bridge_softc *sc, const uint8_t *dst,
1989    struct bridge_rtnode **brtp)
1990{
1991	struct bridge_rtnode *brt;
1992	int error;
1993
1994	if (sc->sc_brtcnt >= sc->sc_brtmax)
1995		return ENOSPC;
1996
1997	/*
1998	 * Allocate a new bridge forwarding node, and
1999	 * initialize the expiration time and Ethernet
2000	 * address.
2001	 */
2002	brt = pool_get(&bridge_rtnode_pool, PR_NOWAIT);
2003	if (brt == NULL)
2004		return ENOMEM;
2005
2006	memset(brt, 0, sizeof(*brt));
2007	brt->brt_expire = time_uptime + sc->sc_brttimeout;
2008	brt->brt_flags = IFBAF_DYNAMIC;
2009	memcpy(brt->brt_addr, dst, ETHER_ADDR_LEN);
2010
2011	BRIDGE_RT_LOCK(sc);
2012	error = bridge_rtnode_insert(sc, brt);
2013	BRIDGE_RT_UNLOCK(sc);
2014
2015	if (error != 0) {
2016		pool_put(&bridge_rtnode_pool, brt);
2017		return error;
2018	}
2019
2020	*brtp = brt;
2021	return 0;
2022}
2023
2024/*
2025 * bridge_rtupdate:
2026 *
2027 *	Add a bridge routing entry.
2028 */
2029static int
2030bridge_rtupdate(struct bridge_softc *sc, const uint8_t *dst,
2031    struct ifnet *dst_if, int setflags, uint8_t flags)
2032{
2033	struct bridge_rtnode *brt;
2034	int s;
2035
2036again:
2037	/*
2038	 * A route for this destination might already exist.  If so,
2039	 * update it, otherwise create a new one.
2040	 */
2041	BRIDGE_RT_RENTER(s);
2042	brt = bridge_rtnode_lookup(sc, dst);
2043
2044	if (brt != NULL) {
2045		brt->brt_ifp = dst_if;
2046		if (setflags) {
2047			brt->brt_flags = flags;
2048			if (flags & IFBAF_STATIC)
2049				brt->brt_expire = 0;
2050			else
2051				brt->brt_expire = time_uptime + sc->sc_brttimeout;
2052		} else {
2053			if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC)
2054				brt->brt_expire = time_uptime + sc->sc_brttimeout;
2055		}
2056	}
2057	BRIDGE_RT_REXIT(s);
2058
2059	if (brt == NULL) {
2060		int r;
2061
2062		r = bridge_rtalloc(sc, dst, &brt);
2063		if (r != 0)
2064			return r;
2065		goto again;
2066	}
2067
2068	return 0;
2069}
2070
2071/*
2072 * bridge_rtlookup:
2073 *
2074 *	Lookup the destination interface for an address.
2075 */
2076static struct ifnet *
2077bridge_rtlookup(struct bridge_softc *sc, const uint8_t *addr)
2078{
2079	struct bridge_rtnode *brt;
2080	struct ifnet *ifs = NULL;
2081	int s;
2082
2083	BRIDGE_RT_RENTER(s);
2084	brt = bridge_rtnode_lookup(sc, addr);
2085	if (brt != NULL)
2086		ifs = brt->brt_ifp;
2087	BRIDGE_RT_REXIT(s);
2088
2089	return ifs;
2090}
2091
2092typedef bool (*bridge_iterate_cb_t)
2093    (struct bridge_softc *, struct bridge_rtnode *, bool *, void *);
2094
2095/*
2096 * bridge_rtlist_iterate_remove:
2097 *
2098 *	It iterates on sc->sc_rtlist and removes rtnodes of it which func
2099 *	callback judges to remove. Removals of rtnodes are done in a manner
2100 *	of pserialize. To this end, all kmem_* operations are placed out of
2101 *	mutexes.
2102 */
2103static void
2104bridge_rtlist_iterate_remove(struct bridge_softc *sc, bridge_iterate_cb_t func, void *arg)
2105{
2106	struct bridge_rtnode *brt, *nbrt;
2107	struct bridge_rtnode **brt_list;
2108	int i, count;
2109
2110retry:
2111	count = sc->sc_brtcnt;
2112	if (count == 0)
2113		return;
2114	brt_list = kmem_alloc(sizeof(*brt_list) * count, KM_SLEEP);
2115
2116	BRIDGE_RT_LOCK(sc);
2117	if (__predict_false(sc->sc_brtcnt > count)) {
2118		/* The rtnodes increased, we need more memory */
2119		BRIDGE_RT_UNLOCK(sc);
2120		kmem_free(brt_list, sizeof(*brt_list) * count);
2121		goto retry;
2122	}
2123
2124	i = 0;
2125	LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
2126		bool need_break = false;
2127		if (func(sc, brt, &need_break, arg)) {
2128			bridge_rtnode_remove(sc, brt);
2129			brt_list[i++] = brt;
2130		}
2131		if (need_break)
2132			break;
2133	}
2134
2135	if (i > 0)
2136		BRIDGE_RT_PSZ_PERFORM(sc);
2137	BRIDGE_RT_UNLOCK(sc);
2138
2139	while (--i >= 0)
2140		bridge_rtnode_destroy(brt_list[i]);
2141
2142	kmem_free(brt_list, sizeof(*brt_list) * count);
2143}
2144
2145static bool
2146bridge_rttrim0_cb(struct bridge_softc *sc, struct bridge_rtnode *brt,
2147    bool *need_break, void *arg)
2148{
2149	if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
2150		/* Take into account of the subsequent removal */
2151		if ((sc->sc_brtcnt - 1) <= sc->sc_brtmax)
2152			*need_break = true;
2153		return true;
2154	} else
2155		return false;
2156}
2157
2158static void
2159bridge_rttrim0(struct bridge_softc *sc)
2160{
2161	bridge_rtlist_iterate_remove(sc, bridge_rttrim0_cb, NULL);
2162}
2163
2164/*
2165 * bridge_rttrim:
2166 *
2167 *	Trim the routine table so that we have a number
2168 *	of routing entries less than or equal to the
2169 *	maximum number.
2170 */
2171static void
2172bridge_rttrim(struct bridge_softc *sc)
2173{
2174
2175	/* Make sure we actually need to do this. */
2176	if (sc->sc_brtcnt <= sc->sc_brtmax)
2177		return;
2178
2179	/* Force an aging cycle; this might trim enough addresses. */
2180	bridge_rtage(sc);
2181	if (sc->sc_brtcnt <= sc->sc_brtmax)
2182		return;
2183
2184	bridge_rttrim0(sc);
2185
2186	return;
2187}
2188
2189/*
2190 * bridge_timer:
2191 *
2192 *	Aging timer for the bridge.
2193 */
2194static void
2195bridge_timer(void *arg)
2196{
2197	struct bridge_softc *sc = arg;
2198
2199	workqueue_enqueue(sc->sc_rtage_wq, &sc->sc_rtage_wk, NULL);
2200}
2201
2202static void
2203bridge_rtage_work(struct work *wk, void *arg)
2204{
2205	struct bridge_softc *sc = arg;
2206
2207	KASSERT(wk == &sc->sc_rtage_wk);
2208
2209	bridge_rtage(sc);
2210
2211	if (sc->sc_if.if_flags & IFF_RUNNING)
2212		callout_reset(&sc->sc_brcallout,
2213		    bridge_rtable_prune_period * hz, bridge_timer, sc);
2214}
2215
2216static bool
2217bridge_rtage_cb(struct bridge_softc *sc, struct bridge_rtnode *brt,
2218    bool *need_break, void *arg)
2219{
2220	if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC &&
2221	    time_uptime >= brt->brt_expire)
2222		return true;
2223	else
2224		return false;
2225}
2226
2227/*
2228 * bridge_rtage:
2229 *
2230 *	Perform an aging cycle.
2231 */
2232static void
2233bridge_rtage(struct bridge_softc *sc)
2234{
2235	bridge_rtlist_iterate_remove(sc, bridge_rtage_cb, NULL);
2236}
2237
2238
2239static bool
2240bridge_rtflush_cb(struct bridge_softc *sc, struct bridge_rtnode *brt,
2241    bool *need_break, void *arg)
2242{
2243	int full = *(int*)arg;
2244
2245	if (full || (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC)
2246		return true;
2247	else
2248		return false;
2249}
2250
2251/*
2252 * bridge_rtflush:
2253 *
2254 *	Remove all dynamic addresses from the bridge.
2255 */
2256static void
2257bridge_rtflush(struct bridge_softc *sc, int full)
2258{
2259	bridge_rtlist_iterate_remove(sc, bridge_rtflush_cb, &full);
2260}
2261
2262/*
2263 * bridge_rtdaddr:
2264 *
2265 *	Remove an address from the table.
2266 */
2267static int
2268bridge_rtdaddr(struct bridge_softc *sc, const uint8_t *addr)
2269{
2270	struct bridge_rtnode *brt;
2271
2272	BRIDGE_RT_LOCK(sc);
2273	if ((brt = bridge_rtnode_lookup(sc, addr)) == NULL) {
2274		BRIDGE_RT_UNLOCK(sc);
2275		return ENOENT;
2276	}
2277	bridge_rtnode_remove(sc, brt);
2278	BRIDGE_RT_PSZ_PERFORM(sc);
2279	BRIDGE_RT_UNLOCK(sc);
2280
2281	bridge_rtnode_destroy(brt);
2282
2283	return 0;
2284}
2285
2286/*
2287 * bridge_rtdelete:
2288 *
2289 *	Delete routes to a speicifc member interface.
2290 */
2291static void
2292bridge_rtdelete(struct bridge_softc *sc, struct ifnet *ifp)
2293{
2294	struct bridge_rtnode *brt;
2295
2296	BRIDGE_RT_LOCK(sc);
2297	LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) {
2298		if (brt->brt_ifp == ifp)
2299			break;
2300	}
2301	if (brt == NULL) {
2302		BRIDGE_RT_UNLOCK(sc);
2303		return;
2304	}
2305	bridge_rtnode_remove(sc, brt);
2306	BRIDGE_RT_PSZ_PERFORM(sc);
2307	BRIDGE_RT_UNLOCK(sc);
2308
2309	bridge_rtnode_destroy(brt);
2310}
2311
2312/*
2313 * bridge_rtable_init:
2314 *
2315 *	Initialize the route table for this bridge.
2316 */
2317static void
2318bridge_rtable_init(struct bridge_softc *sc)
2319{
2320	int i;
2321
2322	sc->sc_rthash = kmem_alloc(sizeof(*sc->sc_rthash) * BRIDGE_RTHASH_SIZE,
2323	    KM_SLEEP);
2324
2325	for (i = 0; i < BRIDGE_RTHASH_SIZE; i++)
2326		LIST_INIT(&sc->sc_rthash[i]);
2327
2328	sc->sc_rthash_key = cprng_fast32();
2329
2330	LIST_INIT(&sc->sc_rtlist);
2331
2332	sc->sc_rtlist_psz = pserialize_create();
2333	sc->sc_rtlist_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_SOFTNET);
2334}
2335
2336/*
2337 * bridge_rtable_fini:
2338 *
2339 *	Deconstruct the route table for this bridge.
2340 */
2341static void
2342bridge_rtable_fini(struct bridge_softc *sc)
2343{
2344
2345	kmem_free(sc->sc_rthash, sizeof(*sc->sc_rthash) * BRIDGE_RTHASH_SIZE);
2346	if (sc->sc_rtlist_lock)
2347		mutex_obj_free(sc->sc_rtlist_lock);
2348	if (sc->sc_rtlist_psz)
2349		pserialize_destroy(sc->sc_rtlist_psz);
2350}
2351
2352/*
2353 * The following hash function is adapted from "Hash Functions" by Bob Jenkins
2354 * ("Algorithm Alley", Dr. Dobbs Journal, September 1997).
2355 */
2356#define	mix(a, b, c)							\
2357do {									\
2358	a -= b; a -= c; a ^= (c >> 13);					\
2359	b -= c; b -= a; b ^= (a << 8);					\
2360	c -= a; c -= b; c ^= (b >> 13);					\
2361	a -= b; a -= c; a ^= (c >> 12);					\
2362	b -= c; b -= a; b ^= (a << 16);					\
2363	c -= a; c -= b; c ^= (b >> 5);					\
2364	a -= b; a -= c; a ^= (c >> 3);					\
2365	b -= c; b -= a; b ^= (a << 10);					\
2366	c -= a; c -= b; c ^= (b >> 15);					\
2367} while (/*CONSTCOND*/0)
2368
2369static inline uint32_t
2370bridge_rthash(struct bridge_softc *sc, const uint8_t *addr)
2371{
2372	uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = sc->sc_rthash_key;
2373
2374	b += addr[5] << 8;
2375	b += addr[4];
2376	a += addr[3] << 24;
2377	a += addr[2] << 16;
2378	a += addr[1] << 8;
2379	a += addr[0];
2380
2381	mix(a, b, c);
2382
2383	return (c & BRIDGE_RTHASH_MASK);
2384}
2385
2386#undef mix
2387
2388/*
2389 * bridge_rtnode_lookup:
2390 *
2391 *	Look up a bridge route node for the specified destination.
2392 */
2393static struct bridge_rtnode *
2394bridge_rtnode_lookup(struct bridge_softc *sc, const uint8_t *addr)
2395{
2396	struct bridge_rtnode *brt;
2397	uint32_t hash;
2398	int dir;
2399
2400	hash = bridge_rthash(sc, addr);
2401	LIST_FOREACH(brt, &sc->sc_rthash[hash], brt_hash) {
2402		dir = memcmp(addr, brt->brt_addr, ETHER_ADDR_LEN);
2403		if (dir == 0)
2404			return brt;
2405		if (dir > 0)
2406			return NULL;
2407	}
2408
2409	return NULL;
2410}
2411
2412/*
2413 * bridge_rtnode_insert:
2414 *
2415 *	Insert the specified bridge node into the route table.  We
2416 *	assume the entry is not already in the table.
2417 */
2418static int
2419bridge_rtnode_insert(struct bridge_softc *sc, struct bridge_rtnode *brt)
2420{
2421	struct bridge_rtnode *lbrt;
2422	uint32_t hash;
2423	int dir;
2424
2425	KASSERT(BRIDGE_RT_LOCKED(sc));
2426
2427	hash = bridge_rthash(sc, brt->brt_addr);
2428
2429	lbrt = LIST_FIRST(&sc->sc_rthash[hash]);
2430	if (lbrt == NULL) {
2431		LIST_INSERT_HEAD(&sc->sc_rthash[hash], brt, brt_hash);
2432		goto out;
2433	}
2434
2435	do {
2436		dir = memcmp(brt->brt_addr, lbrt->brt_addr, ETHER_ADDR_LEN);
2437		if (dir == 0)
2438			return EEXIST;
2439		if (dir > 0) {
2440			LIST_INSERT_BEFORE(lbrt, brt, brt_hash);
2441			goto out;
2442		}
2443		if (LIST_NEXT(lbrt, brt_hash) == NULL) {
2444			LIST_INSERT_AFTER(lbrt, brt, brt_hash);
2445			goto out;
2446		}
2447		lbrt = LIST_NEXT(lbrt, brt_hash);
2448	} while (lbrt != NULL);
2449
2450#ifdef DIAGNOSTIC
2451	panic("bridge_rtnode_insert: impossible");
2452#endif
2453
2454 out:
2455	LIST_INSERT_HEAD(&sc->sc_rtlist, brt, brt_list);
2456	sc->sc_brtcnt++;
2457
2458	return 0;
2459}
2460
2461/*
2462 * bridge_rtnode_remove:
2463 *
2464 *	Remove a bridge rtnode from the rthash and the rtlist of a bridge.
2465 */
2466static void
2467bridge_rtnode_remove(struct bridge_softc *sc, struct bridge_rtnode *brt)
2468{
2469
2470	KASSERT(BRIDGE_RT_LOCKED(sc));
2471
2472	LIST_REMOVE(brt, brt_hash);
2473	LIST_REMOVE(brt, brt_list);
2474	sc->sc_brtcnt--;
2475}
2476
2477/*
2478 * bridge_rtnode_destroy:
2479 *
2480 *	Destroy a bridge rtnode.
2481 */
2482static void
2483bridge_rtnode_destroy(struct bridge_rtnode *brt)
2484{
2485
2486	pool_put(&bridge_rtnode_pool, brt);
2487}
2488
2489#if defined(BRIDGE_IPF)
2490extern pfil_head_t *inet_pfil_hook;                 /* XXX */
2491extern pfil_head_t *inet6_pfil_hook;                /* XXX */
2492
2493/*
2494 * Send bridge packets through IPF if they are one of the types IPF can deal
2495 * with, or if they are ARP or REVARP.  (IPF will pass ARP and REVARP without
2496 * question.)
2497 */
2498static int
2499bridge_ipf(void *arg, struct mbuf **mp, struct ifnet *ifp, int dir)
2500{
2501	int snap, error;
2502	struct ether_header *eh1, eh2;
2503	struct llc llc1;
2504	uint16_t ether_type;
2505
2506	snap = 0;
2507	error = -1;	/* Default error if not error == 0 */
2508	eh1 = mtod(*mp, struct ether_header *);
2509	ether_type = ntohs(eh1->ether_type);
2510
2511	/*
2512	 * Check for SNAP/LLC.
2513	 */
2514	if (ether_type < ETHERMTU) {
2515		struct llc *llc2 = (struct llc *)(eh1 + 1);
2516
2517		if ((*mp)->m_len >= ETHER_HDR_LEN + 8 &&
2518		    llc2->llc_dsap == LLC_SNAP_LSAP &&
2519		    llc2->llc_ssap == LLC_SNAP_LSAP &&
2520		    llc2->llc_control == LLC_UI) {
2521			ether_type = htons(llc2->llc_un.type_snap.ether_type);
2522			snap = 1;
2523		}
2524	}
2525
2526	/*
2527	 * If we're trying to filter bridge traffic, don't look at anything
2528	 * other than IP and ARP traffic.  If the filter doesn't understand
2529	 * IPv6, don't allow IPv6 through the bridge either.  This is lame
2530	 * since if we really wanted, say, an AppleTalk filter, we are hosed,
2531	 * but of course we don't have an AppleTalk filter to begin with.
2532	 * (Note that since IPF doesn't understand ARP it will pass *ALL*
2533	 * ARP traffic.)
2534	 */
2535	switch (ether_type) {
2536		case ETHERTYPE_ARP:
2537		case ETHERTYPE_REVARP:
2538			return 0; /* Automatically pass */
2539		case ETHERTYPE_IP:
2540# ifdef INET6
2541		case ETHERTYPE_IPV6:
2542# endif /* INET6 */
2543			break;
2544		default:
2545			goto bad;
2546	}
2547
2548	/* Strip off the Ethernet header and keep a copy. */
2549	m_copydata(*mp, 0, ETHER_HDR_LEN, (void *) &eh2);
2550	m_adj(*mp, ETHER_HDR_LEN);
2551
2552	/* Strip off snap header, if present */
2553	if (snap) {
2554		m_copydata(*mp, 0, sizeof(struct llc), (void *) &llc1);
2555		m_adj(*mp, sizeof(struct llc));
2556	}
2557
2558	/*
2559	 * Check basic packet sanity and run IPF through pfil.
2560	 */
2561	KASSERT(!cpu_intr_p());
2562	switch (ether_type)
2563	{
2564	case ETHERTYPE_IP :
2565		error = bridge_ip_checkbasic(mp);
2566		if (error == 0)
2567			error = pfil_run_hooks(inet_pfil_hook, mp, ifp, dir);
2568		break;
2569# ifdef INET6
2570	case ETHERTYPE_IPV6 :
2571		error = bridge_ip6_checkbasic(mp);
2572		if (error == 0)
2573			error = pfil_run_hooks(inet6_pfil_hook, mp, ifp, dir);
2574		break;
2575# endif
2576	default :
2577		error = 0;
2578		break;
2579	}
2580
2581	if (*mp == NULL)
2582		return error;
2583	if (error != 0)
2584		goto bad;
2585
2586	error = -1;
2587
2588	/*
2589	 * Finally, put everything back the way it was and return
2590	 */
2591	if (snap) {
2592		M_PREPEND(*mp, sizeof(struct llc), M_DONTWAIT);
2593		if (*mp == NULL)
2594			return error;
2595		bcopy(&llc1, mtod(*mp, void *), sizeof(struct llc));
2596	}
2597
2598	M_PREPEND(*mp, ETHER_HDR_LEN, M_DONTWAIT);
2599	if (*mp == NULL)
2600		return error;
2601	bcopy(&eh2, mtod(*mp, void *), ETHER_HDR_LEN);
2602
2603	return 0;
2604
2605    bad:
2606	m_freem(*mp);
2607	*mp = NULL;
2608	return error;
2609}
2610
2611/*
2612 * Perform basic checks on header size since
2613 * IPF assumes ip_input has already processed
2614 * it for it.  Cut-and-pasted from ip_input.c.
2615 * Given how simple the IPv6 version is,
2616 * does the IPv4 version really need to be
2617 * this complicated?
2618 *
2619 * XXX Should we update ipstat here, or not?
2620 * XXX Right now we update ipstat but not
2621 * XXX csum_counter.
2622 */
2623static int
2624bridge_ip_checkbasic(struct mbuf **mp)
2625{
2626	struct mbuf *m = *mp;
2627	struct ip *ip;
2628	int len, hlen;
2629
2630	if (*mp == NULL)
2631		return -1;
2632
2633	if (IP_HDR_ALIGNED_P(mtod(m, void *)) == 0) {
2634		if ((m = m_copyup(m, sizeof(struct ip),
2635			(max_linkhdr + 3) & ~3)) == NULL) {
2636			/* XXXJRT new stat, please */
2637			ip_statinc(IP_STAT_TOOSMALL);
2638			goto bad;
2639		}
2640	} else if (__predict_false(m->m_len < sizeof (struct ip))) {
2641		if ((m = m_pullup(m, sizeof (struct ip))) == NULL) {
2642			ip_statinc(IP_STAT_TOOSMALL);
2643			goto bad;
2644		}
2645	}
2646	ip = mtod(m, struct ip *);
2647	if (ip == NULL) goto bad;
2648
2649	if (ip->ip_v != IPVERSION) {
2650		ip_statinc(IP_STAT_BADVERS);
2651		goto bad;
2652	}
2653	hlen = ip->ip_hl << 2;
2654	if (hlen < sizeof(struct ip)) { /* minimum header length */
2655		ip_statinc(IP_STAT_BADHLEN);
2656		goto bad;
2657	}
2658	if (hlen > m->m_len) {
2659		if ((m = m_pullup(m, hlen)) == 0) {
2660			ip_statinc(IP_STAT_BADHLEN);
2661			goto bad;
2662		}
2663		ip = mtod(m, struct ip *);
2664		if (ip == NULL) goto bad;
2665	}
2666
2667	switch (m->m_pkthdr.csum_flags &
2668	        ((m_get_rcvif_NOMPSAFE(m)->if_csum_flags_rx & M_CSUM_IPv4) |
2669	         M_CSUM_IPv4_BAD)) {
2670	case M_CSUM_IPv4|M_CSUM_IPv4_BAD:
2671		/* INET_CSUM_COUNTER_INCR(&ip_hwcsum_bad); */
2672		goto bad;
2673
2674	case M_CSUM_IPv4:
2675		/* Checksum was okay. */
2676		/* INET_CSUM_COUNTER_INCR(&ip_hwcsum_ok); */
2677		break;
2678
2679	default:
2680		/* Must compute it ourselves. */
2681		/* INET_CSUM_COUNTER_INCR(&ip_swcsum); */
2682		if (in_cksum(m, hlen) != 0)
2683			goto bad;
2684		break;
2685	}
2686
2687	/* Retrieve the packet length. */
2688	len = ntohs(ip->ip_len);
2689
2690	/*
2691	 * Check for additional length bogosity
2692	 */
2693	if (len < hlen) {
2694		ip_statinc(IP_STAT_BADLEN);
2695		goto bad;
2696	}
2697
2698	/*
2699	 * Check that the amount of data in the buffers
2700	 * is as at least much as the IP header would have us expect.
2701	 * Drop packet if shorter than we expect.
2702	 */
2703	if (m->m_pkthdr.len < len) {
2704		ip_statinc(IP_STAT_TOOSHORT);
2705		goto bad;
2706	}
2707
2708	/* Checks out, proceed */
2709	*mp = m;
2710	return 0;
2711
2712    bad:
2713	*mp = m;
2714	return -1;
2715}
2716
2717# ifdef INET6
2718/*
2719 * Same as above, but for IPv6.
2720 * Cut-and-pasted from ip6_input.c.
2721 * XXX Should we update ip6stat, or not?
2722 */
2723static int
2724bridge_ip6_checkbasic(struct mbuf **mp)
2725{
2726	struct mbuf *m = *mp;
2727	struct ip6_hdr *ip6;
2728
2729	/*
2730	 * If the IPv6 header is not aligned, slurp it up into a new
2731	 * mbuf with space for link headers, in the event we forward
2732	 * it.  Otherwise, if it is aligned, make sure the entire base
2733	 * IPv6 header is in the first mbuf of the chain.
2734	 */
2735	if (IP6_HDR_ALIGNED_P(mtod(m, void *)) == 0) {
2736		struct ifnet *inifp = m_get_rcvif_NOMPSAFE(m);
2737		if ((m = m_copyup(m, sizeof(struct ip6_hdr),
2738		                  (max_linkhdr + 3) & ~3)) == NULL) {
2739			/* XXXJRT new stat, please */
2740			ip6_statinc(IP6_STAT_TOOSMALL);
2741			in6_ifstat_inc(inifp, ifs6_in_hdrerr);
2742			goto bad;
2743		}
2744	} else if (__predict_false(m->m_len < sizeof(struct ip6_hdr))) {
2745		struct ifnet *inifp = m_get_rcvif_NOMPSAFE(m);
2746		if ((m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) {
2747			ip6_statinc(IP6_STAT_TOOSMALL);
2748			in6_ifstat_inc(inifp, ifs6_in_hdrerr);
2749			goto bad;
2750		}
2751	}
2752
2753	ip6 = mtod(m, struct ip6_hdr *);
2754
2755	if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
2756		ip6_statinc(IP6_STAT_BADVERS);
2757		in6_ifstat_inc(m_get_rcvif_NOMPSAFE(m), ifs6_in_hdrerr);
2758		goto bad;
2759	}
2760
2761	/* Checks out, proceed */
2762	*mp = m;
2763	return 0;
2764
2765    bad:
2766	*mp = m;
2767	return -1;
2768}
2769# endif /* INET6 */
2770#endif /* BRIDGE_IPF */
2771