if_bridge.c revision 1.114
1/*	$NetBSD: if_bridge.c,v 1.114 2016/04/19 07:03:12 ozaki-r Exp $	*/
2
3/*
4 * Copyright 2001 Wasabi Systems, Inc.
5 * All rights reserved.
6 *
7 * Written by Jason R. Thorpe for Wasabi Systems, Inc.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 *    must display the following acknowledgement:
19 *	This product includes software developed for the NetBSD Project by
20 *	Wasabi Systems, Inc.
21 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
22 *    or promote products derived from this software without specific prior
23 *    written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38/*
39 * Copyright (c) 1999, 2000 Jason L. Wright (jason@thought.net)
40 * All rights reserved.
41 *
42 * Redistribution and use in source and binary forms, with or without
43 * modification, are permitted provided that the following conditions
44 * are met:
45 * 1. Redistributions of source code must retain the above copyright
46 *    notice, this list of conditions and the following disclaimer.
47 * 2. Redistributions in binary form must reproduce the above copyright
48 *    notice, this list of conditions and the following disclaimer in the
49 *    documentation and/or other materials provided with the distribution.
50 * 3. All advertising materials mentioning features or use of this software
51 *    must display the following acknowledgement:
52 *	This product includes software developed by Jason L. Wright
53 * 4. The name of the author may not be used to endorse or promote products
54 *    derived from this software without specific prior written permission.
55 *
56 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
57 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
58 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
59 * DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
60 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
61 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
62 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
63 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
64 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
65 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
66 * POSSIBILITY OF SUCH DAMAGE.
67 *
68 * OpenBSD: if_bridge.c,v 1.60 2001/06/15 03:38:33 itojun Exp
69 */
70
71/*
72 * Network interface bridge support.
73 *
74 * TODO:
75 *
76 *	- Currently only supports Ethernet-like interfaces (Ethernet,
77 *	  802.11, VLANs on Ethernet, etc.)  Figure out a nice way
78 *	  to bridge other types of interfaces (FDDI-FDDI, and maybe
79 *	  consider heterogenous bridges).
80 */
81
82#include <sys/cdefs.h>
83__KERNEL_RCSID(0, "$NetBSD: if_bridge.c,v 1.114 2016/04/19 07:03:12 ozaki-r Exp $");
84
85#ifdef _KERNEL_OPT
86#include "opt_bridge_ipf.h"
87#include "opt_inet.h"
88#include "opt_net_mpsafe.h"
89#endif /* _KERNEL_OPT */
90
91#include <sys/param.h>
92#include <sys/kernel.h>
93#include <sys/mbuf.h>
94#include <sys/queue.h>
95#include <sys/socket.h>
96#include <sys/socketvar.h> /* for softnet_lock */
97#include <sys/sockio.h>
98#include <sys/systm.h>
99#include <sys/proc.h>
100#include <sys/pool.h>
101#include <sys/kauth.h>
102#include <sys/cpu.h>
103#include <sys/cprng.h>
104#include <sys/mutex.h>
105#include <sys/kmem.h>
106
107#include <net/bpf.h>
108#include <net/if.h>
109#include <net/if_dl.h>
110#include <net/if_types.h>
111#include <net/if_llc.h>
112
113#include <net/if_ether.h>
114#include <net/if_bridgevar.h>
115
116#if defined(BRIDGE_IPF)
117/* Used for bridge_ip[6]_checkbasic */
118#include <netinet/in.h>
119#include <netinet/in_systm.h>
120#include <netinet/ip.h>
121#include <netinet/ip_var.h>
122#include <netinet/ip_private.h>		/* XXX */
123
124#include <netinet/ip6.h>
125#include <netinet6/in6_var.h>
126#include <netinet6/ip6_var.h>
127#include <netinet6/ip6_private.h>	/* XXX */
128#endif /* BRIDGE_IPF */
129
130/*
131 * Size of the route hash table.  Must be a power of two.
132 */
133#ifndef BRIDGE_RTHASH_SIZE
134#define	BRIDGE_RTHASH_SIZE		1024
135#endif
136
137#define	BRIDGE_RTHASH_MASK		(BRIDGE_RTHASH_SIZE - 1)
138
139#include "carp.h"
140#if NCARP > 0
141#include <netinet/in.h>
142#include <netinet/in_var.h>
143#include <netinet/ip_carp.h>
144#endif
145
146#include "ioconf.h"
147
148__CTASSERT(sizeof(struct ifbifconf) == sizeof(struct ifbaconf));
149__CTASSERT(offsetof(struct ifbifconf, ifbic_len) == offsetof(struct ifbaconf, ifbac_len));
150__CTASSERT(offsetof(struct ifbifconf, ifbic_buf) == offsetof(struct ifbaconf, ifbac_buf));
151
152/*
153 * Maximum number of addresses to cache.
154 */
155#ifndef BRIDGE_RTABLE_MAX
156#define	BRIDGE_RTABLE_MAX		100
157#endif
158
159/*
160 * Spanning tree defaults.
161 */
162#define	BSTP_DEFAULT_MAX_AGE		(20 * 256)
163#define	BSTP_DEFAULT_HELLO_TIME		(2 * 256)
164#define	BSTP_DEFAULT_FORWARD_DELAY	(15 * 256)
165#define	BSTP_DEFAULT_HOLD_TIME		(1 * 256)
166#define	BSTP_DEFAULT_BRIDGE_PRIORITY	0x8000
167#define	BSTP_DEFAULT_PORT_PRIORITY	0x80
168#define	BSTP_DEFAULT_PATH_COST		55
169
170/*
171 * Timeout (in seconds) for entries learned dynamically.
172 */
173#ifndef BRIDGE_RTABLE_TIMEOUT
174#define	BRIDGE_RTABLE_TIMEOUT		(20 * 60)	/* same as ARP */
175#endif
176
177/*
178 * Number of seconds between walks of the route list.
179 */
180#ifndef BRIDGE_RTABLE_PRUNE_PERIOD
181#define	BRIDGE_RTABLE_PRUNE_PERIOD	(5 * 60)
182#endif
183
184#define BRIDGE_RT_LOCK(_sc)	if ((_sc)->sc_rtlist_lock) \
185					mutex_enter((_sc)->sc_rtlist_lock)
186#define BRIDGE_RT_UNLOCK(_sc)	if ((_sc)->sc_rtlist_lock) \
187					mutex_exit((_sc)->sc_rtlist_lock)
188#define BRIDGE_RT_LOCKED(_sc)	(!(_sc)->sc_rtlist_lock || \
189				 mutex_owned((_sc)->sc_rtlist_lock))
190
191#define BRIDGE_RT_PSZ_PERFORM(_sc) \
192				if ((_sc)->sc_rtlist_psz != NULL) \
193					pserialize_perform((_sc)->sc_rtlist_psz);
194
195#define BRIDGE_RT_RENTER(__s)	do { __s = pserialize_read_enter(); } while (0)
196#define BRIDGE_RT_REXIT(__s)	do { pserialize_read_exit(__s); } while (0)
197
198
199#ifdef NET_MPSAFE
200#define DECLARE_LOCK_VARIABLE
201#define ACQUIRE_GLOBAL_LOCKS()	do { } while (0)
202#define RELEASE_GLOBAL_LOCKS()	do { } while (0)
203#else
204#define DECLARE_LOCK_VARIABLE	int __s
205#define ACQUIRE_GLOBAL_LOCKS()	do {					\
206					KERNEL_LOCK(1, NULL);		\
207					mutex_enter(softnet_lock);	\
208					__s = splnet();			\
209				} while (0)
210#define RELEASE_GLOBAL_LOCKS()	do {					\
211					splx(__s);			\
212					mutex_exit(softnet_lock);	\
213					KERNEL_UNLOCK_ONE(NULL);	\
214				} while (0)
215#endif
216
217int	bridge_rtable_prune_period = BRIDGE_RTABLE_PRUNE_PERIOD;
218
219static struct pool bridge_rtnode_pool;
220
221static int	bridge_clone_create(struct if_clone *, int);
222static int	bridge_clone_destroy(struct ifnet *);
223
224static int	bridge_ioctl(struct ifnet *, u_long, void *);
225static int	bridge_init(struct ifnet *);
226static void	bridge_stop(struct ifnet *, int);
227static void	bridge_start(struct ifnet *);
228
229static void	bridge_input(struct ifnet *, struct mbuf *);
230static void	bridge_forward(struct bridge_softc *, struct mbuf *);
231
232static void	bridge_timer(void *);
233
234static void	bridge_broadcast(struct bridge_softc *, struct ifnet *,
235				 struct mbuf *);
236
237static int	bridge_rtupdate(struct bridge_softc *, const uint8_t *,
238				struct ifnet *, int, uint8_t);
239static struct ifnet *bridge_rtlookup(struct bridge_softc *, const uint8_t *);
240static void	bridge_rttrim(struct bridge_softc *);
241static void	bridge_rtage(struct bridge_softc *);
242static void	bridge_rtage_work(struct work *, void *);
243static void	bridge_rtflush(struct bridge_softc *, int);
244static int	bridge_rtdaddr(struct bridge_softc *, const uint8_t *);
245static void	bridge_rtdelete(struct bridge_softc *, struct ifnet *ifp);
246
247static void	bridge_rtable_init(struct bridge_softc *);
248static void	bridge_rtable_fini(struct bridge_softc *);
249
250static struct bridge_rtnode *bridge_rtnode_lookup(struct bridge_softc *,
251						  const uint8_t *);
252static int	bridge_rtnode_insert(struct bridge_softc *,
253				     struct bridge_rtnode *);
254static void	bridge_rtnode_remove(struct bridge_softc *,
255				     struct bridge_rtnode *);
256static void	bridge_rtnode_destroy(struct bridge_rtnode *);
257
258static struct bridge_iflist *bridge_lookup_member(struct bridge_softc *,
259						  const char *name);
260static struct bridge_iflist *bridge_lookup_member_if(struct bridge_softc *,
261						     struct ifnet *ifp);
262static void	bridge_release_member(struct bridge_softc *, struct bridge_iflist *);
263static void	bridge_delete_member(struct bridge_softc *,
264				     struct bridge_iflist *);
265static struct bridge_iflist *bridge_try_hold_bif(struct bridge_iflist *);
266
267static int	bridge_ioctl_add(struct bridge_softc *, void *);
268static int	bridge_ioctl_del(struct bridge_softc *, void *);
269static int	bridge_ioctl_gifflags(struct bridge_softc *, void *);
270static int	bridge_ioctl_sifflags(struct bridge_softc *, void *);
271static int	bridge_ioctl_scache(struct bridge_softc *, void *);
272static int	bridge_ioctl_gcache(struct bridge_softc *, void *);
273static int	bridge_ioctl_gifs(struct bridge_softc *, void *);
274static int	bridge_ioctl_rts(struct bridge_softc *, void *);
275static int	bridge_ioctl_saddr(struct bridge_softc *, void *);
276static int	bridge_ioctl_sto(struct bridge_softc *, void *);
277static int	bridge_ioctl_gto(struct bridge_softc *, void *);
278static int	bridge_ioctl_daddr(struct bridge_softc *, void *);
279static int	bridge_ioctl_flush(struct bridge_softc *, void *);
280static int	bridge_ioctl_gpri(struct bridge_softc *, void *);
281static int	bridge_ioctl_spri(struct bridge_softc *, void *);
282static int	bridge_ioctl_ght(struct bridge_softc *, void *);
283static int	bridge_ioctl_sht(struct bridge_softc *, void *);
284static int	bridge_ioctl_gfd(struct bridge_softc *, void *);
285static int	bridge_ioctl_sfd(struct bridge_softc *, void *);
286static int	bridge_ioctl_gma(struct bridge_softc *, void *);
287static int	bridge_ioctl_sma(struct bridge_softc *, void *);
288static int	bridge_ioctl_sifprio(struct bridge_softc *, void *);
289static int	bridge_ioctl_sifcost(struct bridge_softc *, void *);
290#if defined(BRIDGE_IPF)
291static int	bridge_ioctl_gfilt(struct bridge_softc *, void *);
292static int	bridge_ioctl_sfilt(struct bridge_softc *, void *);
293static int	bridge_ipf(void *, struct mbuf **, struct ifnet *, int);
294static int	bridge_ip_checkbasic(struct mbuf **mp);
295# ifdef INET6
296static int	bridge_ip6_checkbasic(struct mbuf **mp);
297# endif /* INET6 */
298#endif /* BRIDGE_IPF */
299
300struct bridge_control {
301	int	(*bc_func)(struct bridge_softc *, void *);
302	int	bc_argsize;
303	int	bc_flags;
304};
305
306#define	BC_F_COPYIN		0x01	/* copy arguments in */
307#define	BC_F_COPYOUT		0x02	/* copy arguments out */
308#define	BC_F_SUSER		0x04	/* do super-user check */
309#define BC_F_XLATEIN		0x08	/* xlate arguments in */
310#define BC_F_XLATEOUT		0x10	/* xlate arguments out */
311
312static const struct bridge_control bridge_control_table[] = {
313[BRDGADD] = {bridge_ioctl_add, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_SUSER},
314[BRDGDEL] = {bridge_ioctl_del, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_SUSER},
315
316[BRDGGIFFLGS] = {bridge_ioctl_gifflags, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_COPYOUT},
317[BRDGSIFFLGS] = {bridge_ioctl_sifflags, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_SUSER},
318
319[BRDGSCACHE] = {bridge_ioctl_scache, sizeof(struct ifbrparam), BC_F_COPYIN|BC_F_SUSER},
320[BRDGGCACHE] = {bridge_ioctl_gcache, sizeof(struct ifbrparam), BC_F_COPYOUT},
321
322[OBRDGGIFS] = {bridge_ioctl_gifs, sizeof(struct ifbifconf), BC_F_COPYIN|BC_F_COPYOUT},
323[OBRDGRTS] = {bridge_ioctl_rts, sizeof(struct ifbaconf), BC_F_COPYIN|BC_F_COPYOUT},
324
325[BRDGSADDR] = {bridge_ioctl_saddr, sizeof(struct ifbareq), BC_F_COPYIN|BC_F_SUSER},
326
327[BRDGSTO] = {bridge_ioctl_sto, sizeof(struct ifbrparam), BC_F_COPYIN|BC_F_SUSER},
328[BRDGGTO] = {bridge_ioctl_gto, sizeof(struct ifbrparam), BC_F_COPYOUT},
329
330[BRDGDADDR] = {bridge_ioctl_daddr, sizeof(struct ifbareq), BC_F_COPYIN|BC_F_SUSER},
331
332[BRDGFLUSH] = {bridge_ioctl_flush, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_SUSER},
333
334[BRDGGPRI] = {bridge_ioctl_gpri, sizeof(struct ifbrparam), BC_F_COPYOUT},
335[BRDGSPRI] = {bridge_ioctl_spri, sizeof(struct ifbrparam), BC_F_COPYIN|BC_F_SUSER},
336
337[BRDGGHT] = {bridge_ioctl_ght, sizeof(struct ifbrparam), BC_F_COPYOUT},
338[BRDGSHT] = {bridge_ioctl_sht, sizeof(struct ifbrparam), BC_F_COPYIN|BC_F_SUSER},
339
340[BRDGGFD] = {bridge_ioctl_gfd, sizeof(struct ifbrparam), BC_F_COPYOUT},
341[BRDGSFD] = {bridge_ioctl_sfd, sizeof(struct ifbrparam), BC_F_COPYIN|BC_F_SUSER},
342
343[BRDGGMA] = {bridge_ioctl_gma, sizeof(struct ifbrparam), BC_F_COPYOUT},
344[BRDGSMA] = {bridge_ioctl_sma, sizeof(struct ifbrparam), BC_F_COPYIN|BC_F_SUSER},
345
346[BRDGSIFPRIO] = {bridge_ioctl_sifprio, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_SUSER},
347
348[BRDGSIFCOST] = {bridge_ioctl_sifcost, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_SUSER},
349#if defined(BRIDGE_IPF)
350[BRDGGFILT] = {bridge_ioctl_gfilt, sizeof(struct ifbrparam), BC_F_COPYOUT},
351[BRDGSFILT] = {bridge_ioctl_sfilt, sizeof(struct ifbrparam), BC_F_COPYIN|BC_F_SUSER},
352#endif /* BRIDGE_IPF */
353[BRDGGIFS] = {bridge_ioctl_gifs, sizeof(struct ifbifconf), BC_F_XLATEIN|BC_F_XLATEOUT},
354[BRDGRTS] = {bridge_ioctl_rts, sizeof(struct ifbaconf), BC_F_XLATEIN|BC_F_XLATEOUT},
355};
356
357static const int bridge_control_table_size = __arraycount(bridge_control_table);
358
359static struct if_clone bridge_cloner =
360    IF_CLONE_INITIALIZER("bridge", bridge_clone_create, bridge_clone_destroy);
361
362/*
363 * bridgeattach:
364 *
365 *	Pseudo-device attach routine.
366 */
367void
368bridgeattach(int n)
369{
370
371	pool_init(&bridge_rtnode_pool, sizeof(struct bridge_rtnode),
372	    0, 0, 0, "brtpl", NULL, IPL_NET);
373
374	if_clone_attach(&bridge_cloner);
375}
376
377/*
378 * bridge_clone_create:
379 *
380 *	Create a new bridge instance.
381 */
382static int
383bridge_clone_create(struct if_clone *ifc, int unit)
384{
385	struct bridge_softc *sc;
386	struct ifnet *ifp;
387	int error;
388
389	sc = kmem_zalloc(sizeof(*sc),  KM_SLEEP);
390	ifp = &sc->sc_if;
391
392	sc->sc_brtmax = BRIDGE_RTABLE_MAX;
393	sc->sc_brttimeout = BRIDGE_RTABLE_TIMEOUT;
394	sc->sc_bridge_max_age = BSTP_DEFAULT_MAX_AGE;
395	sc->sc_bridge_hello_time = BSTP_DEFAULT_HELLO_TIME;
396	sc->sc_bridge_forward_delay = BSTP_DEFAULT_FORWARD_DELAY;
397	sc->sc_bridge_priority = BSTP_DEFAULT_BRIDGE_PRIORITY;
398	sc->sc_hold_time = BSTP_DEFAULT_HOLD_TIME;
399	sc->sc_filter_flags = 0;
400
401	/* Initialize our routing table. */
402	bridge_rtable_init(sc);
403
404	error = workqueue_create(&sc->sc_rtage_wq, "bridge_rtage",
405	    bridge_rtage_work, sc, PRI_SOFTNET, IPL_SOFTNET, WQ_MPSAFE);
406	if (error)
407		panic("%s: workqueue_create %d\n", __func__, error);
408
409	callout_init(&sc->sc_brcallout, 0);
410	callout_init(&sc->sc_bstpcallout, 0);
411
412	PSLIST_INIT(&sc->sc_iflist);
413	sc->sc_iflist_psz = pserialize_create();
414	sc->sc_iflist_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_SOFTNET);
415	cv_init(&sc->sc_iflist_cv, "if_bridge_cv");
416
417	if_initname(ifp, ifc->ifc_name, unit);
418	ifp->if_softc = sc;
419	ifp->if_mtu = ETHERMTU;
420	ifp->if_ioctl = bridge_ioctl;
421	ifp->if_output = bridge_output;
422	ifp->if_start = bridge_start;
423	ifp->if_stop = bridge_stop;
424	ifp->if_init = bridge_init;
425	ifp->if_type = IFT_BRIDGE;
426	ifp->if_addrlen = 0;
427	ifp->if_dlt = DLT_EN10MB;
428	ifp->if_hdrlen = ETHER_HDR_LEN;
429
430	if_initialize(ifp);
431	if_register(ifp);
432
433	if_alloc_sadl(ifp);
434
435	return (0);
436}
437
438/*
439 * bridge_clone_destroy:
440 *
441 *	Destroy a bridge instance.
442 */
443static int
444bridge_clone_destroy(struct ifnet *ifp)
445{
446	struct bridge_softc *sc = ifp->if_softc;
447	struct bridge_iflist *bif;
448	int s;
449
450	s = splnet();
451
452	bridge_stop(ifp, 1);
453
454	BRIDGE_LOCK(sc);
455	for (;;) {
456		bif = PSLIST_WRITER_FIRST(&sc->sc_iflist, struct bridge_iflist,
457		    bif_next);
458		if (bif == NULL)
459			break;
460		bridge_delete_member(sc, bif);
461	}
462	PSLIST_DESTROY(&sc->sc_iflist);
463	BRIDGE_UNLOCK(sc);
464
465	splx(s);
466
467	if_detach(ifp);
468
469	/* Tear down the routing table. */
470	bridge_rtable_fini(sc);
471
472	cv_destroy(&sc->sc_iflist_cv);
473
474	if (sc->sc_iflist_psz)
475		pserialize_destroy(sc->sc_iflist_psz);
476	if (sc->sc_iflist_lock)
477		mutex_obj_free(sc->sc_iflist_lock);
478
479	workqueue_destroy(sc->sc_rtage_wq);
480
481	kmem_free(sc, sizeof(*sc));
482
483	return (0);
484}
485
486/*
487 * bridge_ioctl:
488 *
489 *	Handle a control request from the operator.
490 */
491static int
492bridge_ioctl(struct ifnet *ifp, u_long cmd, void *data)
493{
494	struct bridge_softc *sc = ifp->if_softc;
495	struct lwp *l = curlwp;	/* XXX */
496	union {
497		struct ifbreq ifbreq;
498		struct ifbifconf ifbifconf;
499		struct ifbareq ifbareq;
500		struct ifbaconf ifbaconf;
501		struct ifbrparam ifbrparam;
502	} args;
503	struct ifdrv *ifd = (struct ifdrv *) data;
504	const struct bridge_control *bc = NULL; /* XXXGCC */
505	int s, error = 0;
506
507	/* Authorize command before calling splnet(). */
508	switch (cmd) {
509	case SIOCGDRVSPEC:
510	case SIOCSDRVSPEC:
511		if (ifd->ifd_cmd >= bridge_control_table_size
512		    || (bc = &bridge_control_table[ifd->ifd_cmd]) == NULL) {
513			error = EINVAL;
514			return error;
515		}
516
517		/* We only care about BC_F_SUSER at this point. */
518		if ((bc->bc_flags & BC_F_SUSER) == 0)
519			break;
520
521		error = kauth_authorize_network(l->l_cred,
522		    KAUTH_NETWORK_INTERFACE_BRIDGE,
523		    cmd == SIOCGDRVSPEC ?
524		     KAUTH_REQ_NETWORK_INTERFACE_BRIDGE_GETPRIV :
525		     KAUTH_REQ_NETWORK_INTERFACE_BRIDGE_SETPRIV,
526		     ifd, NULL, NULL);
527		if (error)
528			return (error);
529
530		break;
531	}
532
533	s = splnet();
534
535	switch (cmd) {
536	case SIOCGDRVSPEC:
537	case SIOCSDRVSPEC:
538		KASSERT(bc != NULL);
539		if (cmd == SIOCGDRVSPEC &&
540		    (bc->bc_flags & (BC_F_COPYOUT|BC_F_XLATEOUT)) == 0) {
541			error = EINVAL;
542			break;
543		}
544		else if (cmd == SIOCSDRVSPEC &&
545		    (bc->bc_flags & (BC_F_COPYOUT|BC_F_XLATEOUT)) != 0) {
546			error = EINVAL;
547			break;
548		}
549
550		/* BC_F_SUSER is checked above, before splnet(). */
551
552		if ((bc->bc_flags & (BC_F_XLATEIN|BC_F_XLATEOUT)) == 0
553		    && (ifd->ifd_len != bc->bc_argsize
554			|| ifd->ifd_len > sizeof(args))) {
555			error = EINVAL;
556			break;
557		}
558
559		memset(&args, 0, sizeof(args));
560		if (bc->bc_flags & BC_F_COPYIN) {
561			error = copyin(ifd->ifd_data, &args, ifd->ifd_len);
562			if (error)
563				break;
564		} else if (bc->bc_flags & BC_F_XLATEIN) {
565			args.ifbifconf.ifbic_len = ifd->ifd_len;
566			args.ifbifconf.ifbic_buf = ifd->ifd_data;
567		}
568
569		error = (*bc->bc_func)(sc, &args);
570		if (error)
571			break;
572
573		if (bc->bc_flags & BC_F_COPYOUT) {
574			error = copyout(&args, ifd->ifd_data, ifd->ifd_len);
575		} else if (bc->bc_flags & BC_F_XLATEOUT) {
576			ifd->ifd_len = args.ifbifconf.ifbic_len;
577			ifd->ifd_data = args.ifbifconf.ifbic_buf;
578		}
579		break;
580
581	case SIOCSIFFLAGS:
582		if ((error = ifioctl_common(ifp, cmd, data)) != 0)
583			break;
584		switch (ifp->if_flags & (IFF_UP|IFF_RUNNING)) {
585		case IFF_RUNNING:
586			/*
587			 * If interface is marked down and it is running,
588			 * then stop and disable it.
589			 */
590			(*ifp->if_stop)(ifp, 1);
591			break;
592		case IFF_UP:
593			/*
594			 * If interface is marked up and it is stopped, then
595			 * start it.
596			 */
597			error = (*ifp->if_init)(ifp);
598			break;
599		default:
600			break;
601		}
602		break;
603
604	case SIOCSIFMTU:
605		if ((error = ifioctl_common(ifp, cmd, data)) == ENETRESET)
606			error = 0;
607		break;
608
609	default:
610		error = ifioctl_common(ifp, cmd, data);
611		break;
612	}
613
614	splx(s);
615
616	return (error);
617}
618
619/*
620 * bridge_lookup_member:
621 *
622 *	Lookup a bridge member interface.
623 */
624static struct bridge_iflist *
625bridge_lookup_member(struct bridge_softc *sc, const char *name)
626{
627	struct bridge_iflist *bif;
628	struct ifnet *ifp;
629	int s;
630
631	BRIDGE_PSZ_RENTER(s);
632
633	PSLIST_READER_FOREACH(bif, &sc->sc_iflist, struct bridge_iflist,
634	    bif_next) {
635		ifp = bif->bif_ifp;
636		if (strcmp(ifp->if_xname, name) == 0)
637			break;
638	}
639	bif = bridge_try_hold_bif(bif);
640
641	BRIDGE_PSZ_REXIT(s);
642
643	return bif;
644}
645
646/*
647 * bridge_lookup_member_if:
648 *
649 *	Lookup a bridge member interface by ifnet*.
650 */
651static struct bridge_iflist *
652bridge_lookup_member_if(struct bridge_softc *sc, struct ifnet *member_ifp)
653{
654	struct bridge_iflist *bif;
655	int s;
656
657	BRIDGE_PSZ_RENTER(s);
658
659	bif = member_ifp->if_bridgeif;
660	bif = bridge_try_hold_bif(bif);
661
662	BRIDGE_PSZ_REXIT(s);
663
664	return bif;
665}
666
667static struct bridge_iflist *
668bridge_try_hold_bif(struct bridge_iflist *bif)
669{
670
671	if (bif != NULL) {
672		if (bif->bif_waiting)
673			bif = NULL;
674		else
675			atomic_inc_32(&bif->bif_refs);
676	}
677	return bif;
678}
679
680/*
681 * bridge_release_member:
682 *
683 *	Release the specified member interface.
684 */
685static void
686bridge_release_member(struct bridge_softc *sc, struct bridge_iflist *bif)
687{
688	uint32_t refs;
689
690	refs = atomic_dec_uint_nv(&bif->bif_refs);
691	if (__predict_false(refs == 0 && bif->bif_waiting)) {
692		BRIDGE_LOCK(sc);
693		cv_broadcast(&sc->sc_iflist_cv);
694		BRIDGE_UNLOCK(sc);
695	}
696}
697
698/*
699 * bridge_delete_member:
700 *
701 *	Delete the specified member interface.
702 */
703static void
704bridge_delete_member(struct bridge_softc *sc, struct bridge_iflist *bif)
705{
706	struct ifnet *ifs = bif->bif_ifp;
707
708	KASSERT(BRIDGE_LOCKED(sc));
709
710	ifs->_if_input = ether_input;
711	ifs->if_bridge = NULL;
712	ifs->if_bridgeif = NULL;
713
714	PSLIST_WRITER_REMOVE(bif, bif_next);
715	BRIDGE_PSZ_PERFORM(sc);
716
717	bif->bif_waiting = true;
718	membar_sync();
719	while (bif->bif_refs > 0) {
720		aprint_debug("%s: cv_wait on iflist\n", __func__);
721		cv_wait(&sc->sc_iflist_cv, sc->sc_iflist_lock);
722	}
723	bif->bif_waiting = false;
724	BRIDGE_UNLOCK(sc);
725
726	PSLIST_ENTRY_DESTROY(bif, bif_next);
727	kmem_free(bif, sizeof(*bif));
728
729	BRIDGE_LOCK(sc);
730}
731
732static int
733bridge_ioctl_add(struct bridge_softc *sc, void *arg)
734{
735	struct ifbreq *req = arg;
736	struct bridge_iflist *bif = NULL;
737	struct ifnet *ifs;
738	int error = 0;
739
740	ifs = ifunit(req->ifbr_ifsname);
741	if (ifs == NULL)
742		return (ENOENT);
743
744	if (sc->sc_if.if_mtu != ifs->if_mtu)
745		return (EINVAL);
746
747	if (ifs->if_bridge == sc)
748		return (EEXIST);
749
750	if (ifs->if_bridge != NULL)
751		return (EBUSY);
752
753	if (ifs->_if_input != ether_input)
754		return EINVAL;
755
756	/* FIXME: doesn't work with non-IFF_SIMPLEX interfaces */
757	if ((ifs->if_flags & IFF_SIMPLEX) == 0)
758		return EINVAL;
759
760	bif = kmem_alloc(sizeof(*bif), KM_SLEEP);
761
762	switch (ifs->if_type) {
763	case IFT_ETHER:
764		if ((error = ether_enable_vlan_mtu(ifs)) > 0)
765			goto out;
766		/*
767		 * Place the interface into promiscuous mode.
768		 */
769		error = ifpromisc(ifs, 1);
770		if (error)
771			goto out;
772		break;
773	default:
774		error = EINVAL;
775		goto out;
776	}
777
778	bif->bif_ifp = ifs;
779	bif->bif_flags = IFBIF_LEARNING | IFBIF_DISCOVER;
780	bif->bif_priority = BSTP_DEFAULT_PORT_PRIORITY;
781	bif->bif_path_cost = BSTP_DEFAULT_PATH_COST;
782	bif->bif_refs = 0;
783	bif->bif_waiting = false;
784	PSLIST_ENTRY_INIT(bif, bif_next);
785
786	BRIDGE_LOCK(sc);
787
788	ifs->if_bridge = sc;
789	ifs->if_bridgeif = bif;
790	PSLIST_WRITER_INSERT_HEAD(&sc->sc_iflist, bif, bif_next);
791	ifs->_if_input = bridge_input;
792
793	BRIDGE_UNLOCK(sc);
794
795	if (sc->sc_if.if_flags & IFF_RUNNING)
796		bstp_initialization(sc);
797	else
798		bstp_stop(sc);
799
800 out:
801	if (error) {
802		if (bif != NULL)
803			kmem_free(bif, sizeof(*bif));
804	}
805	return (error);
806}
807
808static int
809bridge_ioctl_del(struct bridge_softc *sc, void *arg)
810{
811	struct ifbreq *req = arg;
812	const char *name = req->ifbr_ifsname;
813	struct bridge_iflist *bif;
814	struct ifnet *ifs;
815
816	BRIDGE_LOCK(sc);
817
818	/*
819	 * Don't use bridge_lookup_member. We want to get a member
820	 * with bif_refs == 0.
821	 */
822	PSLIST_WRITER_FOREACH(bif, &sc->sc_iflist, struct bridge_iflist,
823	    bif_next) {
824		ifs = bif->bif_ifp;
825		if (strcmp(ifs->if_xname, name) == 0)
826			break;
827	}
828
829	if (bif == NULL) {
830		BRIDGE_UNLOCK(sc);
831		return ENOENT;
832	}
833
834	bridge_delete_member(sc, bif);
835
836	BRIDGE_UNLOCK(sc);
837
838	switch (ifs->if_type) {
839	case IFT_ETHER:
840		/*
841		 * Take the interface out of promiscuous mode.
842		 * Don't call it with holding a spin lock.
843		 */
844		(void) ifpromisc(ifs, 0);
845		(void) ether_disable_vlan_mtu(ifs);
846		break;
847	default:
848#ifdef DIAGNOSTIC
849		panic("bridge_delete_member: impossible");
850#endif
851		break;
852	}
853
854	bridge_rtdelete(sc, ifs);
855
856	if (sc->sc_if.if_flags & IFF_RUNNING)
857		bstp_initialization(sc);
858
859	return 0;
860}
861
862static int
863bridge_ioctl_gifflags(struct bridge_softc *sc, void *arg)
864{
865	struct ifbreq *req = arg;
866	struct bridge_iflist *bif;
867
868	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
869	if (bif == NULL)
870		return (ENOENT);
871
872	req->ifbr_ifsflags = bif->bif_flags;
873	req->ifbr_state = bif->bif_state;
874	req->ifbr_priority = bif->bif_priority;
875	req->ifbr_path_cost = bif->bif_path_cost;
876	req->ifbr_portno = bif->bif_ifp->if_index & 0xff;
877
878	bridge_release_member(sc, bif);
879
880	return (0);
881}
882
883static int
884bridge_ioctl_sifflags(struct bridge_softc *sc, void *arg)
885{
886	struct ifbreq *req = arg;
887	struct bridge_iflist *bif;
888
889	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
890	if (bif == NULL)
891		return (ENOENT);
892
893	if (req->ifbr_ifsflags & IFBIF_STP) {
894		switch (bif->bif_ifp->if_type) {
895		case IFT_ETHER:
896			/* These can do spanning tree. */
897			break;
898
899		default:
900			/* Nothing else can. */
901			bridge_release_member(sc, bif);
902			return (EINVAL);
903		}
904	}
905
906	bif->bif_flags = req->ifbr_ifsflags;
907
908	bridge_release_member(sc, bif);
909
910	if (sc->sc_if.if_flags & IFF_RUNNING)
911		bstp_initialization(sc);
912
913	return (0);
914}
915
916static int
917bridge_ioctl_scache(struct bridge_softc *sc, void *arg)
918{
919	struct ifbrparam *param = arg;
920
921	sc->sc_brtmax = param->ifbrp_csize;
922	bridge_rttrim(sc);
923
924	return (0);
925}
926
927static int
928bridge_ioctl_gcache(struct bridge_softc *sc, void *arg)
929{
930	struct ifbrparam *param = arg;
931
932	param->ifbrp_csize = sc->sc_brtmax;
933
934	return (0);
935}
936
937static int
938bridge_ioctl_gifs(struct bridge_softc *sc, void *arg)
939{
940	struct ifbifconf *bifc = arg;
941	struct bridge_iflist *bif;
942	struct ifbreq *breqs;
943	int i, count, error = 0;
944
945retry:
946	BRIDGE_LOCK(sc);
947	count = 0;
948	PSLIST_WRITER_FOREACH(bif, &sc->sc_iflist, struct bridge_iflist,
949	    bif_next)
950		count++;
951	BRIDGE_UNLOCK(sc);
952
953	if (count == 0) {
954		bifc->ifbic_len = 0;
955		return 0;
956	}
957
958	if (bifc->ifbic_len == 0 || bifc->ifbic_len < (sizeof(*breqs) * count)) {
959		/* Tell that a larger buffer is needed */
960		bifc->ifbic_len = sizeof(*breqs) * count;
961		return 0;
962	}
963
964	breqs = kmem_alloc(sizeof(*breqs) * count, KM_SLEEP);
965
966	BRIDGE_LOCK(sc);
967
968	i = 0;
969	PSLIST_WRITER_FOREACH(bif, &sc->sc_iflist, struct bridge_iflist,
970	    bif_next)
971		i++;
972	if (i > count) {
973		/*
974		 * The number of members has been increased.
975		 * We need more memory!
976		 */
977		BRIDGE_UNLOCK(sc);
978		kmem_free(breqs, sizeof(*breqs) * count);
979		goto retry;
980	}
981
982	i = 0;
983	PSLIST_WRITER_FOREACH(bif, &sc->sc_iflist, struct bridge_iflist,
984	    bif_next) {
985		struct ifbreq *breq = &breqs[i++];
986		memset(breq, 0, sizeof(*breq));
987
988		strlcpy(breq->ifbr_ifsname, bif->bif_ifp->if_xname,
989		    sizeof(breq->ifbr_ifsname));
990		breq->ifbr_ifsflags = bif->bif_flags;
991		breq->ifbr_state = bif->bif_state;
992		breq->ifbr_priority = bif->bif_priority;
993		breq->ifbr_path_cost = bif->bif_path_cost;
994		breq->ifbr_portno = bif->bif_ifp->if_index & 0xff;
995	}
996
997	/* Don't call copyout with holding the mutex */
998	BRIDGE_UNLOCK(sc);
999
1000	for (i = 0; i < count; i++) {
1001		error = copyout(&breqs[i], bifc->ifbic_req + i, sizeof(*breqs));
1002		if (error)
1003			break;
1004	}
1005	bifc->ifbic_len = sizeof(*breqs) * i;
1006
1007	kmem_free(breqs, sizeof(*breqs) * count);
1008
1009	return error;
1010}
1011
1012static int
1013bridge_ioctl_rts(struct bridge_softc *sc, void *arg)
1014{
1015	struct ifbaconf *bac = arg;
1016	struct bridge_rtnode *brt;
1017	struct ifbareq bareq;
1018	int count = 0, error = 0, len;
1019
1020	if (bac->ifbac_len == 0)
1021		return (0);
1022
1023	BRIDGE_RT_LOCK(sc);
1024
1025	len = bac->ifbac_len;
1026	LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) {
1027		if (len < sizeof(bareq))
1028			goto out;
1029		memset(&bareq, 0, sizeof(bareq));
1030		strlcpy(bareq.ifba_ifsname, brt->brt_ifp->if_xname,
1031		    sizeof(bareq.ifba_ifsname));
1032		memcpy(bareq.ifba_dst, brt->brt_addr, sizeof(brt->brt_addr));
1033		if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
1034			bareq.ifba_expire = brt->brt_expire - time_uptime;
1035		} else
1036			bareq.ifba_expire = 0;
1037		bareq.ifba_flags = brt->brt_flags;
1038
1039		error = copyout(&bareq, bac->ifbac_req + count, sizeof(bareq));
1040		if (error)
1041			goto out;
1042		count++;
1043		len -= sizeof(bareq);
1044	}
1045 out:
1046	BRIDGE_RT_UNLOCK(sc);
1047
1048	bac->ifbac_len = sizeof(bareq) * count;
1049	return (error);
1050}
1051
1052static int
1053bridge_ioctl_saddr(struct bridge_softc *sc, void *arg)
1054{
1055	struct ifbareq *req = arg;
1056	struct bridge_iflist *bif;
1057	int error;
1058
1059	bif = bridge_lookup_member(sc, req->ifba_ifsname);
1060	if (bif == NULL)
1061		return (ENOENT);
1062
1063	error = bridge_rtupdate(sc, req->ifba_dst, bif->bif_ifp, 1,
1064	    req->ifba_flags);
1065
1066	bridge_release_member(sc, bif);
1067
1068	return (error);
1069}
1070
1071static int
1072bridge_ioctl_sto(struct bridge_softc *sc, void *arg)
1073{
1074	struct ifbrparam *param = arg;
1075
1076	sc->sc_brttimeout = param->ifbrp_ctime;
1077
1078	return (0);
1079}
1080
1081static int
1082bridge_ioctl_gto(struct bridge_softc *sc, void *arg)
1083{
1084	struct ifbrparam *param = arg;
1085
1086	param->ifbrp_ctime = sc->sc_brttimeout;
1087
1088	return (0);
1089}
1090
1091static int
1092bridge_ioctl_daddr(struct bridge_softc *sc, void *arg)
1093{
1094	struct ifbareq *req = arg;
1095
1096	return (bridge_rtdaddr(sc, req->ifba_dst));
1097}
1098
1099static int
1100bridge_ioctl_flush(struct bridge_softc *sc, void *arg)
1101{
1102	struct ifbreq *req = arg;
1103
1104	bridge_rtflush(sc, req->ifbr_ifsflags);
1105
1106	return (0);
1107}
1108
1109static int
1110bridge_ioctl_gpri(struct bridge_softc *sc, void *arg)
1111{
1112	struct ifbrparam *param = arg;
1113
1114	param->ifbrp_prio = sc->sc_bridge_priority;
1115
1116	return (0);
1117}
1118
1119static int
1120bridge_ioctl_spri(struct bridge_softc *sc, void *arg)
1121{
1122	struct ifbrparam *param = arg;
1123
1124	sc->sc_bridge_priority = param->ifbrp_prio;
1125
1126	if (sc->sc_if.if_flags & IFF_RUNNING)
1127		bstp_initialization(sc);
1128
1129	return (0);
1130}
1131
1132static int
1133bridge_ioctl_ght(struct bridge_softc *sc, void *arg)
1134{
1135	struct ifbrparam *param = arg;
1136
1137	param->ifbrp_hellotime = sc->sc_bridge_hello_time >> 8;
1138
1139	return (0);
1140}
1141
1142static int
1143bridge_ioctl_sht(struct bridge_softc *sc, void *arg)
1144{
1145	struct ifbrparam *param = arg;
1146
1147	if (param->ifbrp_hellotime == 0)
1148		return (EINVAL);
1149	sc->sc_bridge_hello_time = param->ifbrp_hellotime << 8;
1150
1151	if (sc->sc_if.if_flags & IFF_RUNNING)
1152		bstp_initialization(sc);
1153
1154	return (0);
1155}
1156
1157static int
1158bridge_ioctl_gfd(struct bridge_softc *sc, void *arg)
1159{
1160	struct ifbrparam *param = arg;
1161
1162	param->ifbrp_fwddelay = sc->sc_bridge_forward_delay >> 8;
1163
1164	return (0);
1165}
1166
1167static int
1168bridge_ioctl_sfd(struct bridge_softc *sc, void *arg)
1169{
1170	struct ifbrparam *param = arg;
1171
1172	if (param->ifbrp_fwddelay == 0)
1173		return (EINVAL);
1174	sc->sc_bridge_forward_delay = param->ifbrp_fwddelay << 8;
1175
1176	if (sc->sc_if.if_flags & IFF_RUNNING)
1177		bstp_initialization(sc);
1178
1179	return (0);
1180}
1181
1182static int
1183bridge_ioctl_gma(struct bridge_softc *sc, void *arg)
1184{
1185	struct ifbrparam *param = arg;
1186
1187	param->ifbrp_maxage = sc->sc_bridge_max_age >> 8;
1188
1189	return (0);
1190}
1191
1192static int
1193bridge_ioctl_sma(struct bridge_softc *sc, void *arg)
1194{
1195	struct ifbrparam *param = arg;
1196
1197	if (param->ifbrp_maxage == 0)
1198		return (EINVAL);
1199	sc->sc_bridge_max_age = param->ifbrp_maxage << 8;
1200
1201	if (sc->sc_if.if_flags & IFF_RUNNING)
1202		bstp_initialization(sc);
1203
1204	return (0);
1205}
1206
1207static int
1208bridge_ioctl_sifprio(struct bridge_softc *sc, void *arg)
1209{
1210	struct ifbreq *req = arg;
1211	struct bridge_iflist *bif;
1212
1213	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
1214	if (bif == NULL)
1215		return (ENOENT);
1216
1217	bif->bif_priority = req->ifbr_priority;
1218
1219	if (sc->sc_if.if_flags & IFF_RUNNING)
1220		bstp_initialization(sc);
1221
1222	bridge_release_member(sc, bif);
1223
1224	return (0);
1225}
1226
1227#if defined(BRIDGE_IPF)
1228static int
1229bridge_ioctl_gfilt(struct bridge_softc *sc, void *arg)
1230{
1231	struct ifbrparam *param = arg;
1232
1233	param->ifbrp_filter = sc->sc_filter_flags;
1234
1235	return (0);
1236}
1237
1238static int
1239bridge_ioctl_sfilt(struct bridge_softc *sc, void *arg)
1240{
1241	struct ifbrparam *param = arg;
1242	uint32_t nflags, oflags;
1243
1244	if (param->ifbrp_filter & ~IFBF_FILT_MASK)
1245		return (EINVAL);
1246
1247	nflags = param->ifbrp_filter;
1248	oflags = sc->sc_filter_flags;
1249
1250	if ((nflags & IFBF_FILT_USEIPF) && !(oflags & IFBF_FILT_USEIPF)) {
1251		pfil_add_hook((void *)bridge_ipf, NULL, PFIL_IN|PFIL_OUT,
1252			sc->sc_if.if_pfil);
1253	}
1254	if (!(nflags & IFBF_FILT_USEIPF) && (oflags & IFBF_FILT_USEIPF)) {
1255		pfil_remove_hook((void *)bridge_ipf, NULL, PFIL_IN|PFIL_OUT,
1256			sc->sc_if.if_pfil);
1257	}
1258
1259	sc->sc_filter_flags = nflags;
1260
1261	return (0);
1262}
1263#endif /* BRIDGE_IPF */
1264
1265static int
1266bridge_ioctl_sifcost(struct bridge_softc *sc, void *arg)
1267{
1268	struct ifbreq *req = arg;
1269	struct bridge_iflist *bif;
1270
1271	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
1272	if (bif == NULL)
1273		return (ENOENT);
1274
1275	bif->bif_path_cost = req->ifbr_path_cost;
1276
1277	if (sc->sc_if.if_flags & IFF_RUNNING)
1278		bstp_initialization(sc);
1279
1280	bridge_release_member(sc, bif);
1281
1282	return (0);
1283}
1284
1285/*
1286 * bridge_ifdetach:
1287 *
1288 *	Detach an interface from a bridge.  Called when a member
1289 *	interface is detaching.
1290 */
1291void
1292bridge_ifdetach(struct ifnet *ifp)
1293{
1294	struct bridge_softc *sc = ifp->if_bridge;
1295	struct ifbreq breq;
1296
1297	/* ioctl_lock should prevent this from happening */
1298	KASSERT(sc != NULL);
1299
1300	memset(&breq, 0, sizeof(breq));
1301	strlcpy(breq.ifbr_ifsname, ifp->if_xname, sizeof(breq.ifbr_ifsname));
1302
1303	(void) bridge_ioctl_del(sc, &breq);
1304}
1305
1306/*
1307 * bridge_init:
1308 *
1309 *	Initialize a bridge interface.
1310 */
1311static int
1312bridge_init(struct ifnet *ifp)
1313{
1314	struct bridge_softc *sc = ifp->if_softc;
1315
1316	if (ifp->if_flags & IFF_RUNNING)
1317		return (0);
1318
1319	callout_reset(&sc->sc_brcallout, bridge_rtable_prune_period * hz,
1320	    bridge_timer, sc);
1321
1322	ifp->if_flags |= IFF_RUNNING;
1323	bstp_initialization(sc);
1324	return (0);
1325}
1326
1327/*
1328 * bridge_stop:
1329 *
1330 *	Stop the bridge interface.
1331 */
1332static void
1333bridge_stop(struct ifnet *ifp, int disable)
1334{
1335	struct bridge_softc *sc = ifp->if_softc;
1336
1337	if ((ifp->if_flags & IFF_RUNNING) == 0)
1338		return;
1339
1340	callout_stop(&sc->sc_brcallout);
1341	bstp_stop(sc);
1342
1343	bridge_rtflush(sc, IFBF_FLUSHDYN);
1344
1345	ifp->if_flags &= ~IFF_RUNNING;
1346}
1347
1348/*
1349 * bridge_enqueue:
1350 *
1351 *	Enqueue a packet on a bridge member interface.
1352 */
1353void
1354bridge_enqueue(struct bridge_softc *sc, struct ifnet *dst_ifp, struct mbuf *m,
1355    int runfilt)
1356{
1357	ALTQ_DECL(struct altq_pktattr pktattr;)
1358	int len, error;
1359	short mflags;
1360
1361	/*
1362	 * Clear any in-bound checksum flags for this packet.
1363	 */
1364	m->m_pkthdr.csum_flags = 0;
1365
1366	if (runfilt) {
1367		if (pfil_run_hooks(sc->sc_if.if_pfil, &m,
1368		    dst_ifp, PFIL_OUT) != 0) {
1369			if (m != NULL)
1370				m_freem(m);
1371			return;
1372		}
1373		if (m == NULL)
1374			return;
1375	}
1376
1377#ifdef ALTQ
1378	/*
1379	 * If ALTQ is enabled on the member interface, do
1380	 * classification; the queueing discipline might
1381	 * not require classification, but might require
1382	 * the address family/header pointer in the pktattr.
1383	 */
1384	if (ALTQ_IS_ENABLED(&dst_ifp->if_snd)) {
1385		/* XXX IFT_ETHER */
1386		altq_etherclassify(&dst_ifp->if_snd, m, &pktattr);
1387	}
1388#endif /* ALTQ */
1389
1390	len = m->m_pkthdr.len;
1391	mflags = m->m_flags;
1392
1393	IFQ_ENQUEUE(&dst_ifp->if_snd, m, &pktattr, error);
1394
1395	if (error) {
1396		/* mbuf is already freed */
1397		sc->sc_if.if_oerrors++;
1398		return;
1399	}
1400
1401	sc->sc_if.if_opackets++;
1402	sc->sc_if.if_obytes += len;
1403
1404	dst_ifp->if_obytes += len;
1405
1406	if (mflags & M_MCAST) {
1407		sc->sc_if.if_omcasts++;
1408		dst_ifp->if_omcasts++;
1409	}
1410
1411	if ((dst_ifp->if_flags & IFF_OACTIVE) == 0)
1412		(*dst_ifp->if_start)(dst_ifp);
1413}
1414
1415/*
1416 * bridge_output:
1417 *
1418 *	Send output from a bridge member interface.  This
1419 *	performs the bridging function for locally originated
1420 *	packets.
1421 *
1422 *	The mbuf has the Ethernet header already attached.  We must
1423 *	enqueue or free the mbuf before returning.
1424 */
1425int
1426bridge_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *sa,
1427    struct rtentry *rt)
1428{
1429	struct ether_header *eh;
1430	struct ifnet *dst_if;
1431	struct bridge_softc *sc;
1432	int s;
1433
1434	if (m->m_len < ETHER_HDR_LEN) {
1435		m = m_pullup(m, ETHER_HDR_LEN);
1436		if (m == NULL)
1437			return (0);
1438	}
1439
1440	eh = mtod(m, struct ether_header *);
1441	sc = ifp->if_bridge;
1442
1443	/*
1444	 * If bridge is down, but the original output interface is up,
1445	 * go ahead and send out that interface.  Otherwise, the packet
1446	 * is dropped below.
1447	 */
1448	if (__predict_false(sc == NULL) ||
1449	    (sc->sc_if.if_flags & IFF_RUNNING) == 0) {
1450		dst_if = ifp;
1451		goto sendunicast;
1452	}
1453
1454	/*
1455	 * If the packet is a multicast, or we don't know a better way to
1456	 * get there, send to all interfaces.
1457	 */
1458	if (ETHER_IS_MULTICAST(eh->ether_dhost))
1459		dst_if = NULL;
1460	else
1461		dst_if = bridge_rtlookup(sc, eh->ether_dhost);
1462	if (dst_if == NULL) {
1463		struct bridge_iflist *bif;
1464		struct mbuf *mc;
1465		int used = 0;
1466
1467		BRIDGE_PSZ_RENTER(s);
1468		PSLIST_READER_FOREACH(bif, &sc->sc_iflist,
1469		    struct bridge_iflist, bif_next) {
1470			bif = bridge_try_hold_bif(bif);
1471			if (bif == NULL)
1472				continue;
1473			BRIDGE_PSZ_REXIT(s);
1474
1475			dst_if = bif->bif_ifp;
1476			if ((dst_if->if_flags & IFF_RUNNING) == 0)
1477				goto next;
1478
1479			/*
1480			 * If this is not the original output interface,
1481			 * and the interface is participating in spanning
1482			 * tree, make sure the port is in a state that
1483			 * allows forwarding.
1484			 */
1485			if (dst_if != ifp &&
1486			    (bif->bif_flags & IFBIF_STP) != 0) {
1487				switch (bif->bif_state) {
1488				case BSTP_IFSTATE_BLOCKING:
1489				case BSTP_IFSTATE_LISTENING:
1490				case BSTP_IFSTATE_DISABLED:
1491					goto next;
1492				}
1493			}
1494
1495			if (PSLIST_READER_NEXT(bif, struct bridge_iflist,
1496			    bif_next) == NULL) {
1497				used = 1;
1498				mc = m;
1499			} else {
1500				mc = m_copym(m, 0, M_COPYALL, M_NOWAIT);
1501				if (mc == NULL) {
1502					sc->sc_if.if_oerrors++;
1503					goto next;
1504				}
1505			}
1506
1507#ifndef NET_MPSAFE
1508			s = splnet();
1509#endif
1510			bridge_enqueue(sc, dst_if, mc, 0);
1511#ifndef NET_MPSAFE
1512			splx(s);
1513#endif
1514next:
1515			bridge_release_member(sc, bif);
1516			BRIDGE_PSZ_RENTER(s);
1517		}
1518		BRIDGE_PSZ_REXIT(s);
1519
1520		if (used == 0)
1521			m_freem(m);
1522		return (0);
1523	}
1524
1525 sendunicast:
1526	/*
1527	 * XXX Spanning tree consideration here?
1528	 */
1529
1530	if ((dst_if->if_flags & IFF_RUNNING) == 0) {
1531		m_freem(m);
1532		return (0);
1533	}
1534
1535#ifndef NET_MPSAFE
1536	s = splnet();
1537#endif
1538	bridge_enqueue(sc, dst_if, m, 0);
1539#ifndef NET_MPSAFE
1540	splx(s);
1541#endif
1542
1543	return (0);
1544}
1545
1546/*
1547 * bridge_start:
1548 *
1549 *	Start output on a bridge.
1550 *
1551 *	NOTE: This routine should never be called in this implementation.
1552 */
1553static void
1554bridge_start(struct ifnet *ifp)
1555{
1556
1557	printf("%s: bridge_start() called\n", ifp->if_xname);
1558}
1559
1560/*
1561 * bridge_forward:
1562 *
1563 *	The forwarding function of the bridge.
1564 */
1565static void
1566bridge_forward(struct bridge_softc *sc, struct mbuf *m)
1567{
1568	struct bridge_iflist *bif;
1569	struct ifnet *src_if, *dst_if;
1570	struct ether_header *eh;
1571	DECLARE_LOCK_VARIABLE;
1572
1573	if ((sc->sc_if.if_flags & IFF_RUNNING) == 0)
1574		return;
1575
1576	src_if = m->m_pkthdr.rcvif;
1577
1578	sc->sc_if.if_ipackets++;
1579	sc->sc_if.if_ibytes += m->m_pkthdr.len;
1580
1581	/*
1582	 * Look up the bridge_iflist.
1583	 */
1584	bif = bridge_lookup_member_if(sc, src_if);
1585	if (bif == NULL) {
1586		/* Interface is not a bridge member (anymore?) */
1587		m_freem(m);
1588		goto out;
1589	}
1590
1591	if (bif->bif_flags & IFBIF_STP) {
1592		switch (bif->bif_state) {
1593		case BSTP_IFSTATE_BLOCKING:
1594		case BSTP_IFSTATE_LISTENING:
1595		case BSTP_IFSTATE_DISABLED:
1596			m_freem(m);
1597			bridge_release_member(sc, bif);
1598			goto out;
1599		}
1600	}
1601
1602	eh = mtod(m, struct ether_header *);
1603
1604	/*
1605	 * If the interface is learning, and the source
1606	 * address is valid and not multicast, record
1607	 * the address.
1608	 */
1609	if ((bif->bif_flags & IFBIF_LEARNING) != 0 &&
1610	    ETHER_IS_MULTICAST(eh->ether_shost) == 0 &&
1611	    (eh->ether_shost[0] == 0 &&
1612	     eh->ether_shost[1] == 0 &&
1613	     eh->ether_shost[2] == 0 &&
1614	     eh->ether_shost[3] == 0 &&
1615	     eh->ether_shost[4] == 0 &&
1616	     eh->ether_shost[5] == 0) == 0) {
1617		(void) bridge_rtupdate(sc, eh->ether_shost,
1618		    src_if, 0, IFBAF_DYNAMIC);
1619	}
1620
1621	if ((bif->bif_flags & IFBIF_STP) != 0 &&
1622	    bif->bif_state == BSTP_IFSTATE_LEARNING) {
1623		m_freem(m);
1624		bridge_release_member(sc, bif);
1625		goto out;
1626	}
1627
1628	bridge_release_member(sc, bif);
1629
1630	/*
1631	 * At this point, the port either doesn't participate
1632	 * in spanning tree or it is in the forwarding state.
1633	 */
1634
1635	/*
1636	 * If the packet is unicast, destined for someone on
1637	 * "this" side of the bridge, drop it.
1638	 */
1639	if ((m->m_flags & (M_BCAST|M_MCAST)) == 0) {
1640		dst_if = bridge_rtlookup(sc, eh->ether_dhost);
1641		if (src_if == dst_if) {
1642			m_freem(m);
1643			goto out;
1644		}
1645	} else {
1646		/* ...forward it to all interfaces. */
1647		sc->sc_if.if_imcasts++;
1648		dst_if = NULL;
1649	}
1650
1651	if (pfil_run_hooks(sc->sc_if.if_pfil, &m,
1652	    m->m_pkthdr.rcvif, PFIL_IN) != 0) {
1653		if (m != NULL)
1654			m_freem(m);
1655		goto out;
1656	}
1657	if (m == NULL)
1658		goto out;
1659
1660	if (dst_if == NULL) {
1661		bridge_broadcast(sc, src_if, m);
1662		goto out;
1663	}
1664
1665	/*
1666	 * At this point, we're dealing with a unicast frame
1667	 * going to a different interface.
1668	 */
1669	if ((dst_if->if_flags & IFF_RUNNING) == 0) {
1670		m_freem(m);
1671		goto out;
1672	}
1673
1674	bif = bridge_lookup_member_if(sc, dst_if);
1675	if (bif == NULL) {
1676		/* Not a member of the bridge (anymore?) */
1677		m_freem(m);
1678		goto out;
1679	}
1680
1681	if (bif->bif_flags & IFBIF_STP) {
1682		switch (bif->bif_state) {
1683		case BSTP_IFSTATE_DISABLED:
1684		case BSTP_IFSTATE_BLOCKING:
1685			m_freem(m);
1686			bridge_release_member(sc, bif);
1687			goto out;
1688		}
1689	}
1690
1691	bridge_release_member(sc, bif);
1692
1693	ACQUIRE_GLOBAL_LOCKS();
1694	bridge_enqueue(sc, dst_if, m, 1);
1695	RELEASE_GLOBAL_LOCKS();
1696out:
1697	/* XXX gcc */
1698	return;
1699}
1700
1701static bool
1702bstp_state_before_learning(struct bridge_iflist *bif)
1703{
1704	if (bif->bif_flags & IFBIF_STP) {
1705		switch (bif->bif_state) {
1706		case BSTP_IFSTATE_BLOCKING:
1707		case BSTP_IFSTATE_LISTENING:
1708		case BSTP_IFSTATE_DISABLED:
1709			return true;
1710		}
1711	}
1712	return false;
1713}
1714
1715static bool
1716bridge_ourether(struct bridge_iflist *bif, struct ether_header *eh, int src)
1717{
1718	uint8_t *ether = src ? eh->ether_shost : eh->ether_dhost;
1719
1720	if (memcmp(CLLADDR(bif->bif_ifp->if_sadl), ether, ETHER_ADDR_LEN) == 0
1721#if NCARP > 0
1722	    || (bif->bif_ifp->if_carp &&
1723	        carp_ourether(bif->bif_ifp->if_carp, eh, IFT_ETHER, src) != NULL)
1724#endif /* NCARP > 0 */
1725	    )
1726		return true;
1727
1728	return false;
1729}
1730
1731/*
1732 * bridge_input:
1733 *
1734 *	Receive input from a member interface.  Queue the packet for
1735 *	bridging if it is not for us.
1736 */
1737static void
1738bridge_input(struct ifnet *ifp, struct mbuf *m)
1739{
1740	struct bridge_softc *sc = ifp->if_bridge;
1741	struct bridge_iflist *bif;
1742	struct ether_header *eh;
1743	DECLARE_LOCK_VARIABLE;
1744
1745	KASSERT(!cpu_intr_p());
1746
1747	if (__predict_false(sc == NULL) ||
1748	    (sc->sc_if.if_flags & IFF_RUNNING) == 0) {
1749		ACQUIRE_GLOBAL_LOCKS();
1750		ether_input(ifp, m);
1751		RELEASE_GLOBAL_LOCKS();
1752		return;
1753	}
1754
1755	bif = bridge_lookup_member_if(sc, ifp);
1756	if (bif == NULL) {
1757		ACQUIRE_GLOBAL_LOCKS();
1758		ether_input(ifp, m);
1759		RELEASE_GLOBAL_LOCKS();
1760		return;
1761	}
1762
1763	eh = mtod(m, struct ether_header *);
1764
1765	if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
1766		if (memcmp(etherbroadcastaddr,
1767		    eh->ether_dhost, ETHER_ADDR_LEN) == 0)
1768			m->m_flags |= M_BCAST;
1769		else
1770			m->m_flags |= M_MCAST;
1771	}
1772
1773	/*
1774	 * A 'fast' path for packets addressed to interfaces that are
1775	 * part of this bridge.
1776	 */
1777	if (!(m->m_flags & (M_BCAST|M_MCAST)) &&
1778	    !bstp_state_before_learning(bif)) {
1779		struct bridge_iflist *_bif;
1780		struct ifnet *_ifp = NULL;
1781		int s;
1782
1783		BRIDGE_PSZ_RENTER(s);
1784		PSLIST_READER_FOREACH(_bif, &sc->sc_iflist,
1785		    struct bridge_iflist, bif_next) {
1786			/* It is destined for us. */
1787			if (bridge_ourether(_bif, eh, 0)) {
1788				_bif = bridge_try_hold_bif(_bif);
1789				BRIDGE_PSZ_REXIT(s);
1790				if (_bif == NULL)
1791					goto out;
1792				if (_bif->bif_flags & IFBIF_LEARNING)
1793					(void) bridge_rtupdate(sc,
1794					    eh->ether_shost, ifp, 0, IFBAF_DYNAMIC);
1795				_ifp = m->m_pkthdr.rcvif = _bif->bif_ifp;
1796				bridge_release_member(sc, _bif);
1797				goto out;
1798			}
1799
1800			/* We just received a packet that we sent out. */
1801			if (bridge_ourether(_bif, eh, 1))
1802				break;
1803		}
1804		BRIDGE_PSZ_REXIT(s);
1805out:
1806
1807		if (_bif != NULL) {
1808			bridge_release_member(sc, bif);
1809			if (_ifp != NULL) {
1810				m->m_flags &= ~M_PROMISC;
1811				ACQUIRE_GLOBAL_LOCKS();
1812				ether_input(_ifp, m);
1813				RELEASE_GLOBAL_LOCKS();
1814			} else
1815				m_freem(m);
1816			return;
1817		}
1818	}
1819
1820	/* Tap off 802.1D packets; they do not get forwarded. */
1821	if (bif->bif_flags & IFBIF_STP &&
1822	    memcmp(eh->ether_dhost, bstp_etheraddr, ETHER_ADDR_LEN) == 0) {
1823		bstp_input(sc, bif, m);
1824		bridge_release_member(sc, bif);
1825		return;
1826	}
1827
1828	/*
1829	 * A normal switch would discard the packet here, but that's not what
1830	 * we've done historically. This also prevents some obnoxious behaviour.
1831	 */
1832	if (bstp_state_before_learning(bif)) {
1833		bridge_release_member(sc, bif);
1834		ACQUIRE_GLOBAL_LOCKS();
1835		ether_input(ifp, m);
1836		RELEASE_GLOBAL_LOCKS();
1837		return;
1838	}
1839
1840	bridge_release_member(sc, bif);
1841
1842	bridge_forward(sc, m);
1843}
1844
1845/*
1846 * bridge_broadcast:
1847 *
1848 *	Send a frame to all interfaces that are members of
1849 *	the bridge, except for the one on which the packet
1850 *	arrived.
1851 */
1852static void
1853bridge_broadcast(struct bridge_softc *sc, struct ifnet *src_if,
1854    struct mbuf *m)
1855{
1856	struct bridge_iflist *bif;
1857	struct mbuf *mc;
1858	struct ifnet *dst_if;
1859	bool bmcast;
1860	int s;
1861	DECLARE_LOCK_VARIABLE;
1862
1863	bmcast = m->m_flags & (M_BCAST|M_MCAST);
1864
1865	BRIDGE_PSZ_RENTER(s);
1866	PSLIST_READER_FOREACH(bif, &sc->sc_iflist, struct bridge_iflist,
1867	    bif_next) {
1868		bif = bridge_try_hold_bif(bif);
1869		if (bif == NULL)
1870			continue;
1871		BRIDGE_PSZ_REXIT(s);
1872
1873		dst_if = bif->bif_ifp;
1874
1875		if (bif->bif_flags & IFBIF_STP) {
1876			switch (bif->bif_state) {
1877			case BSTP_IFSTATE_BLOCKING:
1878			case BSTP_IFSTATE_DISABLED:
1879				goto next;
1880			}
1881		}
1882
1883		if ((bif->bif_flags & IFBIF_DISCOVER) == 0 && !bmcast)
1884			goto next;
1885
1886		if ((dst_if->if_flags & IFF_RUNNING) == 0)
1887			goto next;
1888
1889		if (dst_if != src_if) {
1890			mc = m_copym(m, 0, M_COPYALL, M_DONTWAIT);
1891			if (mc == NULL) {
1892				sc->sc_if.if_oerrors++;
1893				goto next;
1894			}
1895			ACQUIRE_GLOBAL_LOCKS();
1896			bridge_enqueue(sc, dst_if, mc, 1);
1897			RELEASE_GLOBAL_LOCKS();
1898		}
1899
1900		if (bmcast) {
1901			mc = m_copym(m, 0, M_COPYALL, M_DONTWAIT);
1902			if (mc == NULL) {
1903				sc->sc_if.if_oerrors++;
1904				goto next;
1905			}
1906
1907			mc->m_pkthdr.rcvif = dst_if;
1908			mc->m_flags &= ~M_PROMISC;
1909
1910			ACQUIRE_GLOBAL_LOCKS();
1911			ether_input(dst_if, mc);
1912			RELEASE_GLOBAL_LOCKS();
1913		}
1914next:
1915		bridge_release_member(sc, bif);
1916		BRIDGE_PSZ_RENTER(s);
1917	}
1918	BRIDGE_PSZ_REXIT(s);
1919
1920	m_freem(m);
1921}
1922
1923static int
1924bridge_rtalloc(struct bridge_softc *sc, const uint8_t *dst,
1925    struct bridge_rtnode **brtp)
1926{
1927	struct bridge_rtnode *brt;
1928	int error;
1929
1930	if (sc->sc_brtcnt >= sc->sc_brtmax)
1931		return ENOSPC;
1932
1933	/*
1934	 * Allocate a new bridge forwarding node, and
1935	 * initialize the expiration time and Ethernet
1936	 * address.
1937	 */
1938	brt = pool_get(&bridge_rtnode_pool, PR_NOWAIT);
1939	if (brt == NULL)
1940		return ENOMEM;
1941
1942	memset(brt, 0, sizeof(*brt));
1943	brt->brt_expire = time_uptime + sc->sc_brttimeout;
1944	brt->brt_flags = IFBAF_DYNAMIC;
1945	memcpy(brt->brt_addr, dst, ETHER_ADDR_LEN);
1946
1947	BRIDGE_RT_LOCK(sc);
1948	error = bridge_rtnode_insert(sc, brt);
1949	BRIDGE_RT_UNLOCK(sc);
1950
1951	if (error != 0) {
1952		pool_put(&bridge_rtnode_pool, brt);
1953		return error;
1954	}
1955
1956	*brtp = brt;
1957	return 0;
1958}
1959
1960/*
1961 * bridge_rtupdate:
1962 *
1963 *	Add a bridge routing entry.
1964 */
1965static int
1966bridge_rtupdate(struct bridge_softc *sc, const uint8_t *dst,
1967    struct ifnet *dst_if, int setflags, uint8_t flags)
1968{
1969	struct bridge_rtnode *brt;
1970	int s;
1971
1972again:
1973	/*
1974	 * A route for this destination might already exist.  If so,
1975	 * update it, otherwise create a new one.
1976	 */
1977	BRIDGE_RT_RENTER(s);
1978	brt = bridge_rtnode_lookup(sc, dst);
1979
1980	if (brt != NULL) {
1981		brt->brt_ifp = dst_if;
1982		if (setflags) {
1983			brt->brt_flags = flags;
1984			if (flags & IFBAF_STATIC)
1985				brt->brt_expire = 0;
1986			else
1987				brt->brt_expire = time_uptime + sc->sc_brttimeout;
1988		} else {
1989			if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC)
1990				brt->brt_expire = time_uptime + sc->sc_brttimeout;
1991		}
1992	}
1993	BRIDGE_RT_REXIT(s);
1994
1995	if (brt == NULL) {
1996		int r;
1997
1998		r = bridge_rtalloc(sc, dst, &brt);
1999		if (r != 0)
2000			return r;
2001		goto again;
2002	}
2003
2004	return 0;
2005}
2006
2007/*
2008 * bridge_rtlookup:
2009 *
2010 *	Lookup the destination interface for an address.
2011 */
2012static struct ifnet *
2013bridge_rtlookup(struct bridge_softc *sc, const uint8_t *addr)
2014{
2015	struct bridge_rtnode *brt;
2016	struct ifnet *ifs = NULL;
2017	int s;
2018
2019	BRIDGE_RT_RENTER(s);
2020	brt = bridge_rtnode_lookup(sc, addr);
2021	if (brt != NULL)
2022		ifs = brt->brt_ifp;
2023	BRIDGE_RT_REXIT(s);
2024
2025	return ifs;
2026}
2027
2028typedef bool (*bridge_iterate_cb_t)
2029    (struct bridge_softc *, struct bridge_rtnode *, bool *, void *);
2030
2031/*
2032 * bridge_rtlist_iterate_remove:
2033 *
2034 *	It iterates on sc->sc_rtlist and removes rtnodes of it which func
2035 *	callback judges to remove. Removals of rtnodes are done in a manner
2036 *	of pserialize. To this end, all kmem_* operations are placed out of
2037 *	mutexes.
2038 */
2039static void
2040bridge_rtlist_iterate_remove(struct bridge_softc *sc, bridge_iterate_cb_t func, void *arg)
2041{
2042	struct bridge_rtnode *brt, *nbrt;
2043	struct bridge_rtnode **brt_list;
2044	int i, count;
2045
2046retry:
2047	count = sc->sc_brtcnt;
2048	if (count == 0)
2049		return;
2050	brt_list = kmem_alloc(sizeof(*brt_list) * count, KM_SLEEP);
2051
2052	BRIDGE_RT_LOCK(sc);
2053	if (__predict_false(sc->sc_brtcnt > count)) {
2054		/* The rtnodes increased, we need more memory */
2055		BRIDGE_RT_UNLOCK(sc);
2056		kmem_free(brt_list, sizeof(*brt_list) * count);
2057		goto retry;
2058	}
2059
2060	i = 0;
2061	LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
2062		bool need_break = false;
2063		if (func(sc, brt, &need_break, arg)) {
2064			bridge_rtnode_remove(sc, brt);
2065			brt_list[i++] = brt;
2066		}
2067		if (need_break)
2068			break;
2069	}
2070
2071	if (i > 0)
2072		BRIDGE_RT_PSZ_PERFORM(sc);
2073	BRIDGE_RT_UNLOCK(sc);
2074
2075	while (--i >= 0)
2076		bridge_rtnode_destroy(brt_list[i]);
2077
2078	kmem_free(brt_list, sizeof(*brt_list) * count);
2079}
2080
2081static bool
2082bridge_rttrim0_cb(struct bridge_softc *sc, struct bridge_rtnode *brt,
2083    bool *need_break, void *arg)
2084{
2085	if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
2086		/* Take into account of the subsequent removal */
2087		if ((sc->sc_brtcnt - 1) <= sc->sc_brtmax)
2088			*need_break = true;
2089		return true;
2090	} else
2091		return false;
2092}
2093
2094static void
2095bridge_rttrim0(struct bridge_softc *sc)
2096{
2097	bridge_rtlist_iterate_remove(sc, bridge_rttrim0_cb, NULL);
2098}
2099
2100/*
2101 * bridge_rttrim:
2102 *
2103 *	Trim the routine table so that we have a number
2104 *	of routing entries less than or equal to the
2105 *	maximum number.
2106 */
2107static void
2108bridge_rttrim(struct bridge_softc *sc)
2109{
2110
2111	/* Make sure we actually need to do this. */
2112	if (sc->sc_brtcnt <= sc->sc_brtmax)
2113		return;
2114
2115	/* Force an aging cycle; this might trim enough addresses. */
2116	bridge_rtage(sc);
2117	if (sc->sc_brtcnt <= sc->sc_brtmax)
2118		return;
2119
2120	bridge_rttrim0(sc);
2121
2122	return;
2123}
2124
2125/*
2126 * bridge_timer:
2127 *
2128 *	Aging timer for the bridge.
2129 */
2130static void
2131bridge_timer(void *arg)
2132{
2133	struct bridge_softc *sc = arg;
2134
2135	workqueue_enqueue(sc->sc_rtage_wq, &sc->sc_rtage_wk, NULL);
2136}
2137
2138static void
2139bridge_rtage_work(struct work *wk, void *arg)
2140{
2141	struct bridge_softc *sc = arg;
2142
2143	KASSERT(wk == &sc->sc_rtage_wk);
2144
2145	bridge_rtage(sc);
2146
2147	if (sc->sc_if.if_flags & IFF_RUNNING)
2148		callout_reset(&sc->sc_brcallout,
2149		    bridge_rtable_prune_period * hz, bridge_timer, sc);
2150}
2151
2152static bool
2153bridge_rtage_cb(struct bridge_softc *sc, struct bridge_rtnode *brt,
2154    bool *need_break, void *arg)
2155{
2156	if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC &&
2157	    time_uptime >= brt->brt_expire)
2158		return true;
2159	else
2160		return false;
2161}
2162
2163/*
2164 * bridge_rtage:
2165 *
2166 *	Perform an aging cycle.
2167 */
2168static void
2169bridge_rtage(struct bridge_softc *sc)
2170{
2171	bridge_rtlist_iterate_remove(sc, bridge_rtage_cb, NULL);
2172}
2173
2174
2175static bool
2176bridge_rtflush_cb(struct bridge_softc *sc, struct bridge_rtnode *brt,
2177    bool *need_break, void *arg)
2178{
2179	int full = *(int*)arg;
2180
2181	if (full || (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC)
2182		return true;
2183	else
2184		return false;
2185}
2186
2187/*
2188 * bridge_rtflush:
2189 *
2190 *	Remove all dynamic addresses from the bridge.
2191 */
2192static void
2193bridge_rtflush(struct bridge_softc *sc, int full)
2194{
2195	bridge_rtlist_iterate_remove(sc, bridge_rtflush_cb, &full);
2196}
2197
2198/*
2199 * bridge_rtdaddr:
2200 *
2201 *	Remove an address from the table.
2202 */
2203static int
2204bridge_rtdaddr(struct bridge_softc *sc, const uint8_t *addr)
2205{
2206	struct bridge_rtnode *brt;
2207
2208	BRIDGE_RT_LOCK(sc);
2209	if ((brt = bridge_rtnode_lookup(sc, addr)) == NULL) {
2210		BRIDGE_RT_UNLOCK(sc);
2211		return ENOENT;
2212	}
2213	bridge_rtnode_remove(sc, brt);
2214	BRIDGE_RT_PSZ_PERFORM(sc);
2215	BRIDGE_RT_UNLOCK(sc);
2216
2217	bridge_rtnode_destroy(brt);
2218
2219	return 0;
2220}
2221
2222/*
2223 * bridge_rtdelete:
2224 *
2225 *	Delete routes to a speicifc member interface.
2226 */
2227static void
2228bridge_rtdelete(struct bridge_softc *sc, struct ifnet *ifp)
2229{
2230	struct bridge_rtnode *brt;
2231
2232	BRIDGE_RT_LOCK(sc);
2233	LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) {
2234		if (brt->brt_ifp == ifp)
2235			break;
2236	}
2237	if (brt == NULL) {
2238		BRIDGE_RT_UNLOCK(sc);
2239		return;
2240	}
2241	bridge_rtnode_remove(sc, brt);
2242	BRIDGE_RT_PSZ_PERFORM(sc);
2243	BRIDGE_RT_UNLOCK(sc);
2244
2245	bridge_rtnode_destroy(brt);
2246}
2247
2248/*
2249 * bridge_rtable_init:
2250 *
2251 *	Initialize the route table for this bridge.
2252 */
2253static void
2254bridge_rtable_init(struct bridge_softc *sc)
2255{
2256	int i;
2257
2258	sc->sc_rthash = kmem_alloc(sizeof(*sc->sc_rthash) * BRIDGE_RTHASH_SIZE,
2259	    KM_SLEEP);
2260
2261	for (i = 0; i < BRIDGE_RTHASH_SIZE; i++)
2262		LIST_INIT(&sc->sc_rthash[i]);
2263
2264	sc->sc_rthash_key = cprng_fast32();
2265
2266	LIST_INIT(&sc->sc_rtlist);
2267
2268	sc->sc_rtlist_psz = pserialize_create();
2269	sc->sc_rtlist_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_SOFTNET);
2270}
2271
2272/*
2273 * bridge_rtable_fini:
2274 *
2275 *	Deconstruct the route table for this bridge.
2276 */
2277static void
2278bridge_rtable_fini(struct bridge_softc *sc)
2279{
2280
2281	kmem_free(sc->sc_rthash, sizeof(*sc->sc_rthash) * BRIDGE_RTHASH_SIZE);
2282	if (sc->sc_rtlist_lock)
2283		mutex_obj_free(sc->sc_rtlist_lock);
2284	if (sc->sc_rtlist_psz)
2285		pserialize_destroy(sc->sc_rtlist_psz);
2286}
2287
2288/*
2289 * The following hash function is adapted from "Hash Functions" by Bob Jenkins
2290 * ("Algorithm Alley", Dr. Dobbs Journal, September 1997).
2291 */
2292#define	mix(a, b, c)							\
2293do {									\
2294	a -= b; a -= c; a ^= (c >> 13);					\
2295	b -= c; b -= a; b ^= (a << 8);					\
2296	c -= a; c -= b; c ^= (b >> 13);					\
2297	a -= b; a -= c; a ^= (c >> 12);					\
2298	b -= c; b -= a; b ^= (a << 16);					\
2299	c -= a; c -= b; c ^= (b >> 5);					\
2300	a -= b; a -= c; a ^= (c >> 3);					\
2301	b -= c; b -= a; b ^= (a << 10);					\
2302	c -= a; c -= b; c ^= (b >> 15);					\
2303} while (/*CONSTCOND*/0)
2304
2305static inline uint32_t
2306bridge_rthash(struct bridge_softc *sc, const uint8_t *addr)
2307{
2308	uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = sc->sc_rthash_key;
2309
2310	b += addr[5] << 8;
2311	b += addr[4];
2312	a += addr[3] << 24;
2313	a += addr[2] << 16;
2314	a += addr[1] << 8;
2315	a += addr[0];
2316
2317	mix(a, b, c);
2318
2319	return (c & BRIDGE_RTHASH_MASK);
2320}
2321
2322#undef mix
2323
2324/*
2325 * bridge_rtnode_lookup:
2326 *
2327 *	Look up a bridge route node for the specified destination.
2328 */
2329static struct bridge_rtnode *
2330bridge_rtnode_lookup(struct bridge_softc *sc, const uint8_t *addr)
2331{
2332	struct bridge_rtnode *brt;
2333	uint32_t hash;
2334	int dir;
2335
2336	hash = bridge_rthash(sc, addr);
2337	LIST_FOREACH(brt, &sc->sc_rthash[hash], brt_hash) {
2338		dir = memcmp(addr, brt->brt_addr, ETHER_ADDR_LEN);
2339		if (dir == 0)
2340			return (brt);
2341		if (dir > 0)
2342			return (NULL);
2343	}
2344
2345	return (NULL);
2346}
2347
2348/*
2349 * bridge_rtnode_insert:
2350 *
2351 *	Insert the specified bridge node into the route table.  We
2352 *	assume the entry is not already in the table.
2353 */
2354static int
2355bridge_rtnode_insert(struct bridge_softc *sc, struct bridge_rtnode *brt)
2356{
2357	struct bridge_rtnode *lbrt;
2358	uint32_t hash;
2359	int dir;
2360
2361	KASSERT(BRIDGE_RT_LOCKED(sc));
2362
2363	hash = bridge_rthash(sc, brt->brt_addr);
2364
2365	lbrt = LIST_FIRST(&sc->sc_rthash[hash]);
2366	if (lbrt == NULL) {
2367		LIST_INSERT_HEAD(&sc->sc_rthash[hash], brt, brt_hash);
2368		goto out;
2369	}
2370
2371	do {
2372		dir = memcmp(brt->brt_addr, lbrt->brt_addr, ETHER_ADDR_LEN);
2373		if (dir == 0)
2374			return (EEXIST);
2375		if (dir > 0) {
2376			LIST_INSERT_BEFORE(lbrt, brt, brt_hash);
2377			goto out;
2378		}
2379		if (LIST_NEXT(lbrt, brt_hash) == NULL) {
2380			LIST_INSERT_AFTER(lbrt, brt, brt_hash);
2381			goto out;
2382		}
2383		lbrt = LIST_NEXT(lbrt, brt_hash);
2384	} while (lbrt != NULL);
2385
2386#ifdef DIAGNOSTIC
2387	panic("bridge_rtnode_insert: impossible");
2388#endif
2389
2390 out:
2391	LIST_INSERT_HEAD(&sc->sc_rtlist, brt, brt_list);
2392	sc->sc_brtcnt++;
2393
2394	return (0);
2395}
2396
2397/*
2398 * bridge_rtnode_remove:
2399 *
2400 *	Remove a bridge rtnode from the rthash and the rtlist of a bridge.
2401 */
2402static void
2403bridge_rtnode_remove(struct bridge_softc *sc, struct bridge_rtnode *brt)
2404{
2405
2406	KASSERT(BRIDGE_RT_LOCKED(sc));
2407
2408	LIST_REMOVE(brt, brt_hash);
2409	LIST_REMOVE(brt, brt_list);
2410	sc->sc_brtcnt--;
2411}
2412
2413/*
2414 * bridge_rtnode_destroy:
2415 *
2416 *	Destroy a bridge rtnode.
2417 */
2418static void
2419bridge_rtnode_destroy(struct bridge_rtnode *brt)
2420{
2421
2422	pool_put(&bridge_rtnode_pool, brt);
2423}
2424
2425#if defined(BRIDGE_IPF)
2426extern pfil_head_t *inet_pfil_hook;                 /* XXX */
2427extern pfil_head_t *inet6_pfil_hook;                /* XXX */
2428
2429/*
2430 * Send bridge packets through IPF if they are one of the types IPF can deal
2431 * with, or if they are ARP or REVARP.  (IPF will pass ARP and REVARP without
2432 * question.)
2433 */
2434static int
2435bridge_ipf(void *arg, struct mbuf **mp, struct ifnet *ifp, int dir)
2436{
2437	int snap, error;
2438	struct ether_header *eh1, eh2;
2439	struct llc llc1;
2440	uint16_t ether_type;
2441
2442	snap = 0;
2443	error = -1;	/* Default error if not error == 0 */
2444	eh1 = mtod(*mp, struct ether_header *);
2445	ether_type = ntohs(eh1->ether_type);
2446
2447	/*
2448	 * Check for SNAP/LLC.
2449	 */
2450        if (ether_type < ETHERMTU) {
2451                struct llc *llc2 = (struct llc *)(eh1 + 1);
2452
2453                if ((*mp)->m_len >= ETHER_HDR_LEN + 8 &&
2454                    llc2->llc_dsap == LLC_SNAP_LSAP &&
2455                    llc2->llc_ssap == LLC_SNAP_LSAP &&
2456                    llc2->llc_control == LLC_UI) {
2457                	ether_type = htons(llc2->llc_un.type_snap.ether_type);
2458			snap = 1;
2459                }
2460        }
2461
2462	/*
2463	 * If we're trying to filter bridge traffic, don't look at anything
2464	 * other than IP and ARP traffic.  If the filter doesn't understand
2465	 * IPv6, don't allow IPv6 through the bridge either.  This is lame
2466	 * since if we really wanted, say, an AppleTalk filter, we are hosed,
2467	 * but of course we don't have an AppleTalk filter to begin with.
2468	 * (Note that since IPF doesn't understand ARP it will pass *ALL*
2469	 * ARP traffic.)
2470	 */
2471	switch (ether_type) {
2472		case ETHERTYPE_ARP:
2473		case ETHERTYPE_REVARP:
2474			return 0; /* Automatically pass */
2475		case ETHERTYPE_IP:
2476# ifdef INET6
2477		case ETHERTYPE_IPV6:
2478# endif /* INET6 */
2479			break;
2480		default:
2481			goto bad;
2482	}
2483
2484	/* Strip off the Ethernet header and keep a copy. */
2485	m_copydata(*mp, 0, ETHER_HDR_LEN, (void *) &eh2);
2486	m_adj(*mp, ETHER_HDR_LEN);
2487
2488	/* Strip off snap header, if present */
2489	if (snap) {
2490		m_copydata(*mp, 0, sizeof(struct llc), (void *) &llc1);
2491		m_adj(*mp, sizeof(struct llc));
2492	}
2493
2494	/*
2495	 * Check basic packet sanity and run IPF through pfil.
2496	 */
2497	KASSERT(!cpu_intr_p());
2498	switch (ether_type)
2499	{
2500	case ETHERTYPE_IP :
2501		error = (dir == PFIL_IN) ? bridge_ip_checkbasic(mp) : 0;
2502		if (error == 0)
2503			error = pfil_run_hooks(inet_pfil_hook, mp, ifp, dir);
2504		break;
2505# ifdef INET6
2506	case ETHERTYPE_IPV6 :
2507		error = (dir == PFIL_IN) ? bridge_ip6_checkbasic(mp) : 0;
2508		if (error == 0)
2509			error = pfil_run_hooks(inet6_pfil_hook, mp, ifp, dir);
2510		break;
2511# endif
2512	default :
2513		error = 0;
2514		break;
2515	}
2516
2517	if (*mp == NULL)
2518		return error;
2519	if (error != 0)
2520		goto bad;
2521
2522	error = -1;
2523
2524	/*
2525	 * Finally, put everything back the way it was and return
2526	 */
2527	if (snap) {
2528		M_PREPEND(*mp, sizeof(struct llc), M_DONTWAIT);
2529		if (*mp == NULL)
2530			return error;
2531		bcopy(&llc1, mtod(*mp, void *), sizeof(struct llc));
2532	}
2533
2534	M_PREPEND(*mp, ETHER_HDR_LEN, M_DONTWAIT);
2535	if (*mp == NULL)
2536		return error;
2537	bcopy(&eh2, mtod(*mp, void *), ETHER_HDR_LEN);
2538
2539	return 0;
2540
2541    bad:
2542	m_freem(*mp);
2543	*mp = NULL;
2544	return error;
2545}
2546
2547/*
2548 * Perform basic checks on header size since
2549 * IPF assumes ip_input has already processed
2550 * it for it.  Cut-and-pasted from ip_input.c.
2551 * Given how simple the IPv6 version is,
2552 * does the IPv4 version really need to be
2553 * this complicated?
2554 *
2555 * XXX Should we update ipstat here, or not?
2556 * XXX Right now we update ipstat but not
2557 * XXX csum_counter.
2558 */
2559static int
2560bridge_ip_checkbasic(struct mbuf **mp)
2561{
2562	struct mbuf *m = *mp;
2563	struct ip *ip;
2564	int len, hlen;
2565
2566	if (*mp == NULL)
2567		return -1;
2568
2569	if (IP_HDR_ALIGNED_P(mtod(m, void *)) == 0) {
2570		if ((m = m_copyup(m, sizeof(struct ip),
2571			(max_linkhdr + 3) & ~3)) == NULL) {
2572			/* XXXJRT new stat, please */
2573			ip_statinc(IP_STAT_TOOSMALL);
2574			goto bad;
2575		}
2576	} else if (__predict_false(m->m_len < sizeof (struct ip))) {
2577		if ((m = m_pullup(m, sizeof (struct ip))) == NULL) {
2578			ip_statinc(IP_STAT_TOOSMALL);
2579			goto bad;
2580		}
2581	}
2582	ip = mtod(m, struct ip *);
2583	if (ip == NULL) goto bad;
2584
2585	if (ip->ip_v != IPVERSION) {
2586		ip_statinc(IP_STAT_BADVERS);
2587		goto bad;
2588	}
2589	hlen = ip->ip_hl << 2;
2590	if (hlen < sizeof(struct ip)) { /* minimum header length */
2591		ip_statinc(IP_STAT_BADHLEN);
2592		goto bad;
2593	}
2594	if (hlen > m->m_len) {
2595		if ((m = m_pullup(m, hlen)) == 0) {
2596			ip_statinc(IP_STAT_BADHLEN);
2597			goto bad;
2598		}
2599		ip = mtod(m, struct ip *);
2600		if (ip == NULL) goto bad;
2601	}
2602
2603        switch (m->m_pkthdr.csum_flags &
2604                ((m->m_pkthdr.rcvif->if_csum_flags_rx & M_CSUM_IPv4) |
2605                 M_CSUM_IPv4_BAD)) {
2606        case M_CSUM_IPv4|M_CSUM_IPv4_BAD:
2607                /* INET_CSUM_COUNTER_INCR(&ip_hwcsum_bad); */
2608                goto bad;
2609
2610        case M_CSUM_IPv4:
2611                /* Checksum was okay. */
2612                /* INET_CSUM_COUNTER_INCR(&ip_hwcsum_ok); */
2613                break;
2614
2615        default:
2616                /* Must compute it ourselves. */
2617                /* INET_CSUM_COUNTER_INCR(&ip_swcsum); */
2618                if (in_cksum(m, hlen) != 0)
2619                        goto bad;
2620                break;
2621        }
2622
2623        /* Retrieve the packet length. */
2624        len = ntohs(ip->ip_len);
2625
2626        /*
2627         * Check for additional length bogosity
2628         */
2629        if (len < hlen) {
2630		ip_statinc(IP_STAT_BADLEN);
2631                goto bad;
2632        }
2633
2634        /*
2635         * Check that the amount of data in the buffers
2636         * is as at least much as the IP header would have us expect.
2637         * Drop packet if shorter than we expect.
2638         */
2639        if (m->m_pkthdr.len < len) {
2640		ip_statinc(IP_STAT_TOOSHORT);
2641                goto bad;
2642        }
2643
2644	/* Checks out, proceed */
2645	*mp = m;
2646	return 0;
2647
2648    bad:
2649	*mp = m;
2650	return -1;
2651}
2652
2653# ifdef INET6
2654/*
2655 * Same as above, but for IPv6.
2656 * Cut-and-pasted from ip6_input.c.
2657 * XXX Should we update ip6stat, or not?
2658 */
2659static int
2660bridge_ip6_checkbasic(struct mbuf **mp)
2661{
2662	struct mbuf *m = *mp;
2663	struct ip6_hdr *ip6;
2664
2665        /*
2666         * If the IPv6 header is not aligned, slurp it up into a new
2667         * mbuf with space for link headers, in the event we forward
2668         * it.  Otherwise, if it is aligned, make sure the entire base
2669         * IPv6 header is in the first mbuf of the chain.
2670         */
2671        if (IP6_HDR_ALIGNED_P(mtod(m, void *)) == 0) {
2672                struct ifnet *inifp = m->m_pkthdr.rcvif;
2673                if ((m = m_copyup(m, sizeof(struct ip6_hdr),
2674                                  (max_linkhdr + 3) & ~3)) == NULL) {
2675                        /* XXXJRT new stat, please */
2676			ip6_statinc(IP6_STAT_TOOSMALL);
2677                        in6_ifstat_inc(inifp, ifs6_in_hdrerr);
2678                        goto bad;
2679                }
2680        } else if (__predict_false(m->m_len < sizeof(struct ip6_hdr))) {
2681                struct ifnet *inifp = m->m_pkthdr.rcvif;
2682                if ((m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) {
2683			ip6_statinc(IP6_STAT_TOOSMALL);
2684                        in6_ifstat_inc(inifp, ifs6_in_hdrerr);
2685                        goto bad;
2686                }
2687        }
2688
2689        ip6 = mtod(m, struct ip6_hdr *);
2690
2691        if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
2692		ip6_statinc(IP6_STAT_BADVERS);
2693                in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_hdrerr);
2694                goto bad;
2695        }
2696
2697	/* Checks out, proceed */
2698	*mp = m;
2699	return 0;
2700
2701    bad:
2702	*mp = m;
2703	return -1;
2704}
2705# endif /* INET6 */
2706#endif /* BRIDGE_IPF */
2707