if_bridge.c revision 1.108
1/*	$NetBSD: if_bridge.c,v 1.108 2016/02/15 01:11:41 ozaki-r Exp $	*/
2
3/*
4 * Copyright 2001 Wasabi Systems, Inc.
5 * All rights reserved.
6 *
7 * Written by Jason R. Thorpe for Wasabi Systems, Inc.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 *    must display the following acknowledgement:
19 *	This product includes software developed for the NetBSD Project by
20 *	Wasabi Systems, Inc.
21 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
22 *    or promote products derived from this software without specific prior
23 *    written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38/*
39 * Copyright (c) 1999, 2000 Jason L. Wright (jason@thought.net)
40 * All rights reserved.
41 *
42 * Redistribution and use in source and binary forms, with or without
43 * modification, are permitted provided that the following conditions
44 * are met:
45 * 1. Redistributions of source code must retain the above copyright
46 *    notice, this list of conditions and the following disclaimer.
47 * 2. Redistributions in binary form must reproduce the above copyright
48 *    notice, this list of conditions and the following disclaimer in the
49 *    documentation and/or other materials provided with the distribution.
50 * 3. All advertising materials mentioning features or use of this software
51 *    must display the following acknowledgement:
52 *	This product includes software developed by Jason L. Wright
53 * 4. The name of the author may not be used to endorse or promote products
54 *    derived from this software without specific prior written permission.
55 *
56 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
57 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
58 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
59 * DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
60 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
61 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
62 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
63 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
64 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
65 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
66 * POSSIBILITY OF SUCH DAMAGE.
67 *
68 * OpenBSD: if_bridge.c,v 1.60 2001/06/15 03:38:33 itojun Exp
69 */
70
71/*
72 * Network interface bridge support.
73 *
74 * TODO:
75 *
76 *	- Currently only supports Ethernet-like interfaces (Ethernet,
77 *	  802.11, VLANs on Ethernet, etc.)  Figure out a nice way
78 *	  to bridge other types of interfaces (FDDI-FDDI, and maybe
79 *	  consider heterogenous bridges).
80 */
81
82#include <sys/cdefs.h>
83__KERNEL_RCSID(0, "$NetBSD: if_bridge.c,v 1.108 2016/02/15 01:11:41 ozaki-r Exp $");
84
85#ifdef _KERNEL_OPT
86#include "opt_bridge_ipf.h"
87#include "opt_inet.h"
88#endif /* _KERNEL_OPT */
89
90#include <sys/param.h>
91#include <sys/kernel.h>
92#include <sys/mbuf.h>
93#include <sys/queue.h>
94#include <sys/socket.h>
95#include <sys/socketvar.h> /* for softnet_lock */
96#include <sys/sockio.h>
97#include <sys/systm.h>
98#include <sys/proc.h>
99#include <sys/pool.h>
100#include <sys/kauth.h>
101#include <sys/cpu.h>
102#include <sys/cprng.h>
103#include <sys/mutex.h>
104#include <sys/kmem.h>
105
106#include <net/bpf.h>
107#include <net/if.h>
108#include <net/if_dl.h>
109#include <net/if_types.h>
110#include <net/if_llc.h>
111
112#include <net/if_ether.h>
113#include <net/if_bridgevar.h>
114
115#if defined(BRIDGE_IPF)
116/* Used for bridge_ip[6]_checkbasic */
117#include <netinet/in.h>
118#include <netinet/in_systm.h>
119#include <netinet/ip.h>
120#include <netinet/ip_var.h>
121#include <netinet/ip_private.h>		/* XXX */
122
123#include <netinet/ip6.h>
124#include <netinet6/in6_var.h>
125#include <netinet6/ip6_var.h>
126#include <netinet6/ip6_private.h>	/* XXX */
127#endif /* BRIDGE_IPF */
128
129/*
130 * Size of the route hash table.  Must be a power of two.
131 */
132#ifndef BRIDGE_RTHASH_SIZE
133#define	BRIDGE_RTHASH_SIZE		1024
134#endif
135
136#define	BRIDGE_RTHASH_MASK		(BRIDGE_RTHASH_SIZE - 1)
137
138#include "carp.h"
139#if NCARP > 0
140#include <netinet/in.h>
141#include <netinet/in_var.h>
142#include <netinet/ip_carp.h>
143#endif
144
145#include "ioconf.h"
146
147__CTASSERT(sizeof(struct ifbifconf) == sizeof(struct ifbaconf));
148__CTASSERT(offsetof(struct ifbifconf, ifbic_len) == offsetof(struct ifbaconf, ifbac_len));
149__CTASSERT(offsetof(struct ifbifconf, ifbic_buf) == offsetof(struct ifbaconf, ifbac_buf));
150
151/*
152 * Maximum number of addresses to cache.
153 */
154#ifndef BRIDGE_RTABLE_MAX
155#define	BRIDGE_RTABLE_MAX		100
156#endif
157
158/*
159 * Spanning tree defaults.
160 */
161#define	BSTP_DEFAULT_MAX_AGE		(20 * 256)
162#define	BSTP_DEFAULT_HELLO_TIME		(2 * 256)
163#define	BSTP_DEFAULT_FORWARD_DELAY	(15 * 256)
164#define	BSTP_DEFAULT_HOLD_TIME		(1 * 256)
165#define	BSTP_DEFAULT_BRIDGE_PRIORITY	0x8000
166#define	BSTP_DEFAULT_PORT_PRIORITY	0x80
167#define	BSTP_DEFAULT_PATH_COST		55
168
169/*
170 * Timeout (in seconds) for entries learned dynamically.
171 */
172#ifndef BRIDGE_RTABLE_TIMEOUT
173#define	BRIDGE_RTABLE_TIMEOUT		(20 * 60)	/* same as ARP */
174#endif
175
176/*
177 * Number of seconds between walks of the route list.
178 */
179#ifndef BRIDGE_RTABLE_PRUNE_PERIOD
180#define	BRIDGE_RTABLE_PRUNE_PERIOD	(5 * 60)
181#endif
182
183#define BRIDGE_RT_LOCK(_sc)	if ((_sc)->sc_rtlist_lock) \
184					mutex_enter((_sc)->sc_rtlist_lock)
185#define BRIDGE_RT_UNLOCK(_sc)	if ((_sc)->sc_rtlist_lock) \
186					mutex_exit((_sc)->sc_rtlist_lock)
187#define BRIDGE_RT_LOCKED(_sc)	(!(_sc)->sc_rtlist_lock || \
188				 mutex_owned((_sc)->sc_rtlist_lock))
189
190#define BRIDGE_RT_PSZ_PERFORM(_sc) \
191				if ((_sc)->sc_rtlist_psz != NULL) \
192					pserialize_perform((_sc)->sc_rtlist_psz);
193
194#ifdef BRIDGE_MPSAFE
195#define BRIDGE_RT_RENTER(__s)	do { __s = pserialize_read_enter(); } while (0)
196#define BRIDGE_RT_REXIT(__s)	do { pserialize_read_exit(__s); } while (0)
197#else /* BRIDGE_MPSAFE */
198#define BRIDGE_RT_RENTER(__s)	do { __s = 0; } while (0)
199#define BRIDGE_RT_REXIT(__s)	do { (void)__s; } while (0)
200#endif /* BRIDGE_MPSAFE */
201
202int	bridge_rtable_prune_period = BRIDGE_RTABLE_PRUNE_PERIOD;
203
204static struct pool bridge_rtnode_pool;
205
206static int	bridge_clone_create(struct if_clone *, int);
207static int	bridge_clone_destroy(struct ifnet *);
208
209static int	bridge_ioctl(struct ifnet *, u_long, void *);
210static int	bridge_init(struct ifnet *);
211static void	bridge_stop(struct ifnet *, int);
212static void	bridge_start(struct ifnet *);
213
214static void	bridge_input(struct ifnet *, struct mbuf *);
215static void	bridge_forward(struct bridge_softc *, struct mbuf *);
216
217static void	bridge_timer(void *);
218
219static void	bridge_broadcast(struct bridge_softc *, struct ifnet *,
220				 struct mbuf *);
221
222static int	bridge_rtupdate(struct bridge_softc *, const uint8_t *,
223				struct ifnet *, int, uint8_t);
224static struct ifnet *bridge_rtlookup(struct bridge_softc *, const uint8_t *);
225static void	bridge_rttrim(struct bridge_softc *);
226static void	bridge_rtage(struct bridge_softc *);
227static void	bridge_rtage_work(struct work *, void *);
228static void	bridge_rtflush(struct bridge_softc *, int);
229static int	bridge_rtdaddr(struct bridge_softc *, const uint8_t *);
230static void	bridge_rtdelete(struct bridge_softc *, struct ifnet *ifp);
231
232static void	bridge_rtable_init(struct bridge_softc *);
233static void	bridge_rtable_fini(struct bridge_softc *);
234
235static struct bridge_rtnode *bridge_rtnode_lookup(struct bridge_softc *,
236						  const uint8_t *);
237static int	bridge_rtnode_insert(struct bridge_softc *,
238				     struct bridge_rtnode *);
239static void	bridge_rtnode_remove(struct bridge_softc *,
240				     struct bridge_rtnode *);
241static void	bridge_rtnode_destroy(struct bridge_rtnode *);
242
243static struct bridge_iflist *bridge_lookup_member(struct bridge_softc *,
244						  const char *name);
245static struct bridge_iflist *bridge_lookup_member_if(struct bridge_softc *,
246						     struct ifnet *ifp);
247static void	bridge_release_member(struct bridge_softc *, struct bridge_iflist *);
248static void	bridge_delete_member(struct bridge_softc *,
249				     struct bridge_iflist *);
250static struct bridge_iflist *bridge_try_hold_bif(struct bridge_iflist *);
251
252static int	bridge_ioctl_add(struct bridge_softc *, void *);
253static int	bridge_ioctl_del(struct bridge_softc *, void *);
254static int	bridge_ioctl_gifflags(struct bridge_softc *, void *);
255static int	bridge_ioctl_sifflags(struct bridge_softc *, void *);
256static int	bridge_ioctl_scache(struct bridge_softc *, void *);
257static int	bridge_ioctl_gcache(struct bridge_softc *, void *);
258static int	bridge_ioctl_gifs(struct bridge_softc *, void *);
259static int	bridge_ioctl_rts(struct bridge_softc *, void *);
260static int	bridge_ioctl_saddr(struct bridge_softc *, void *);
261static int	bridge_ioctl_sto(struct bridge_softc *, void *);
262static int	bridge_ioctl_gto(struct bridge_softc *, void *);
263static int	bridge_ioctl_daddr(struct bridge_softc *, void *);
264static int	bridge_ioctl_flush(struct bridge_softc *, void *);
265static int	bridge_ioctl_gpri(struct bridge_softc *, void *);
266static int	bridge_ioctl_spri(struct bridge_softc *, void *);
267static int	bridge_ioctl_ght(struct bridge_softc *, void *);
268static int	bridge_ioctl_sht(struct bridge_softc *, void *);
269static int	bridge_ioctl_gfd(struct bridge_softc *, void *);
270static int	bridge_ioctl_sfd(struct bridge_softc *, void *);
271static int	bridge_ioctl_gma(struct bridge_softc *, void *);
272static int	bridge_ioctl_sma(struct bridge_softc *, void *);
273static int	bridge_ioctl_sifprio(struct bridge_softc *, void *);
274static int	bridge_ioctl_sifcost(struct bridge_softc *, void *);
275#if defined(BRIDGE_IPF)
276static int	bridge_ioctl_gfilt(struct bridge_softc *, void *);
277static int	bridge_ioctl_sfilt(struct bridge_softc *, void *);
278static int	bridge_ipf(void *, struct mbuf **, struct ifnet *, int);
279static int	bridge_ip_checkbasic(struct mbuf **mp);
280# ifdef INET6
281static int	bridge_ip6_checkbasic(struct mbuf **mp);
282# endif /* INET6 */
283#endif /* BRIDGE_IPF */
284
285struct bridge_control {
286	int	(*bc_func)(struct bridge_softc *, void *);
287	int	bc_argsize;
288	int	bc_flags;
289};
290
291#define	BC_F_COPYIN		0x01	/* copy arguments in */
292#define	BC_F_COPYOUT		0x02	/* copy arguments out */
293#define	BC_F_SUSER		0x04	/* do super-user check */
294#define BC_F_XLATEIN		0x08	/* xlate arguments in */
295#define BC_F_XLATEOUT		0x10	/* xlate arguments out */
296
297static const struct bridge_control bridge_control_table[] = {
298[BRDGADD] = {bridge_ioctl_add, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_SUSER},
299[BRDGDEL] = {bridge_ioctl_del, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_SUSER},
300
301[BRDGGIFFLGS] = {bridge_ioctl_gifflags, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_COPYOUT},
302[BRDGSIFFLGS] = {bridge_ioctl_sifflags, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_SUSER},
303
304[BRDGSCACHE] = {bridge_ioctl_scache, sizeof(struct ifbrparam), BC_F_COPYIN|BC_F_SUSER},
305[BRDGGCACHE] = {bridge_ioctl_gcache, sizeof(struct ifbrparam), BC_F_COPYOUT},
306
307[OBRDGGIFS] = {bridge_ioctl_gifs, sizeof(struct ifbifconf), BC_F_COPYIN|BC_F_COPYOUT},
308[OBRDGRTS] = {bridge_ioctl_rts, sizeof(struct ifbaconf), BC_F_COPYIN|BC_F_COPYOUT},
309
310[BRDGSADDR] = {bridge_ioctl_saddr, sizeof(struct ifbareq), BC_F_COPYIN|BC_F_SUSER},
311
312[BRDGSTO] = {bridge_ioctl_sto, sizeof(struct ifbrparam), BC_F_COPYIN|BC_F_SUSER},
313[BRDGGTO] = {bridge_ioctl_gto, sizeof(struct ifbrparam), BC_F_COPYOUT},
314
315[BRDGDADDR] = {bridge_ioctl_daddr, sizeof(struct ifbareq), BC_F_COPYIN|BC_F_SUSER},
316
317[BRDGFLUSH] = {bridge_ioctl_flush, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_SUSER},
318
319[BRDGGPRI] = {bridge_ioctl_gpri, sizeof(struct ifbrparam), BC_F_COPYOUT},
320[BRDGSPRI] = {bridge_ioctl_spri, sizeof(struct ifbrparam), BC_F_COPYIN|BC_F_SUSER},
321
322[BRDGGHT] = {bridge_ioctl_ght, sizeof(struct ifbrparam), BC_F_COPYOUT},
323[BRDGSHT] = {bridge_ioctl_sht, sizeof(struct ifbrparam), BC_F_COPYIN|BC_F_SUSER},
324
325[BRDGGFD] = {bridge_ioctl_gfd, sizeof(struct ifbrparam), BC_F_COPYOUT},
326[BRDGSFD] = {bridge_ioctl_sfd, sizeof(struct ifbrparam), BC_F_COPYIN|BC_F_SUSER},
327
328[BRDGGMA] = {bridge_ioctl_gma, sizeof(struct ifbrparam), BC_F_COPYOUT},
329[BRDGSMA] = {bridge_ioctl_sma, sizeof(struct ifbrparam), BC_F_COPYIN|BC_F_SUSER},
330
331[BRDGSIFPRIO] = {bridge_ioctl_sifprio, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_SUSER},
332
333[BRDGSIFCOST] = {bridge_ioctl_sifcost, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_SUSER},
334#if defined(BRIDGE_IPF)
335[BRDGGFILT] = {bridge_ioctl_gfilt, sizeof(struct ifbrparam), BC_F_COPYOUT},
336[BRDGSFILT] = {bridge_ioctl_sfilt, sizeof(struct ifbrparam), BC_F_COPYIN|BC_F_SUSER},
337#endif /* BRIDGE_IPF */
338[BRDGGIFS] = {bridge_ioctl_gifs, sizeof(struct ifbifconf), BC_F_XLATEIN|BC_F_XLATEOUT},
339[BRDGRTS] = {bridge_ioctl_rts, sizeof(struct ifbaconf), BC_F_XLATEIN|BC_F_XLATEOUT},
340};
341
342static const int bridge_control_table_size = __arraycount(bridge_control_table);
343
344static LIST_HEAD(, bridge_softc) bridge_list;
345static kmutex_t bridge_list_lock;
346
347static struct if_clone bridge_cloner =
348    IF_CLONE_INITIALIZER("bridge", bridge_clone_create, bridge_clone_destroy);
349
350/*
351 * bridgeattach:
352 *
353 *	Pseudo-device attach routine.
354 */
355void
356bridgeattach(int n)
357{
358
359	pool_init(&bridge_rtnode_pool, sizeof(struct bridge_rtnode),
360	    0, 0, 0, "brtpl", NULL, IPL_NET);
361
362	LIST_INIT(&bridge_list);
363	mutex_init(&bridge_list_lock, MUTEX_DEFAULT, IPL_NET);
364	if_clone_attach(&bridge_cloner);
365}
366
367/*
368 * bridge_clone_create:
369 *
370 *	Create a new bridge instance.
371 */
372static int
373bridge_clone_create(struct if_clone *ifc, int unit)
374{
375	struct bridge_softc *sc;
376	struct ifnet *ifp;
377	int error, flags;
378
379	sc = kmem_zalloc(sizeof(*sc),  KM_SLEEP);
380	ifp = &sc->sc_if;
381
382	sc->sc_brtmax = BRIDGE_RTABLE_MAX;
383	sc->sc_brttimeout = BRIDGE_RTABLE_TIMEOUT;
384	sc->sc_bridge_max_age = BSTP_DEFAULT_MAX_AGE;
385	sc->sc_bridge_hello_time = BSTP_DEFAULT_HELLO_TIME;
386	sc->sc_bridge_forward_delay = BSTP_DEFAULT_FORWARD_DELAY;
387	sc->sc_bridge_priority = BSTP_DEFAULT_BRIDGE_PRIORITY;
388	sc->sc_hold_time = BSTP_DEFAULT_HOLD_TIME;
389	sc->sc_filter_flags = 0;
390
391	/* Initialize our routing table. */
392	bridge_rtable_init(sc);
393
394#ifdef BRIDGE_MPSAFE
395	flags = WQ_MPSAFE;
396#else
397	flags = 0;
398#endif
399	error = workqueue_create(&sc->sc_rtage_wq, "bridge_rtage",
400	    bridge_rtage_work, sc, PRI_SOFTNET, IPL_SOFTNET, flags);
401	if (error)
402		panic("%s: workqueue_create %d\n", __func__, error);
403
404	callout_init(&sc->sc_brcallout, 0);
405	callout_init(&sc->sc_bstpcallout, 0);
406
407	LIST_INIT(&sc->sc_iflist);
408#ifdef BRIDGE_MPSAFE
409	sc->sc_iflist_psz = pserialize_create();
410	sc->sc_iflist_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_SOFTNET);
411#else
412	sc->sc_iflist_psz = NULL;
413	sc->sc_iflist_lock = NULL;
414#endif
415	cv_init(&sc->sc_iflist_cv, "if_bridge_cv");
416
417	if_initname(ifp, ifc->ifc_name, unit);
418	ifp->if_softc = sc;
419	ifp->if_mtu = ETHERMTU;
420	ifp->if_ioctl = bridge_ioctl;
421	ifp->if_output = bridge_output;
422	ifp->if_start = bridge_start;
423	ifp->if_stop = bridge_stop;
424	ifp->if_init = bridge_init;
425	ifp->if_type = IFT_BRIDGE;
426	ifp->if_addrlen = 0;
427	ifp->if_dlt = DLT_EN10MB;
428	ifp->if_hdrlen = ETHER_HDR_LEN;
429
430	if_initialize(ifp);
431	if_register(ifp);
432
433	if_alloc_sadl(ifp);
434
435	mutex_enter(&bridge_list_lock);
436	LIST_INSERT_HEAD(&bridge_list, sc, sc_list);
437	mutex_exit(&bridge_list_lock);
438
439	return (0);
440}
441
442/*
443 * bridge_clone_destroy:
444 *
445 *	Destroy a bridge instance.
446 */
447static int
448bridge_clone_destroy(struct ifnet *ifp)
449{
450	struct bridge_softc *sc = ifp->if_softc;
451	struct bridge_iflist *bif;
452	int s;
453
454	s = splnet();
455
456	bridge_stop(ifp, 1);
457
458	BRIDGE_LOCK(sc);
459	while ((bif = LIST_FIRST(&sc->sc_iflist)) != NULL)
460		bridge_delete_member(sc, bif);
461	BRIDGE_UNLOCK(sc);
462
463	mutex_enter(&bridge_list_lock);
464	LIST_REMOVE(sc, sc_list);
465	mutex_exit(&bridge_list_lock);
466
467	splx(s);
468
469	if_detach(ifp);
470
471	/* Tear down the routing table. */
472	bridge_rtable_fini(sc);
473
474	cv_destroy(&sc->sc_iflist_cv);
475
476	if (sc->sc_iflist_psz)
477		pserialize_destroy(sc->sc_iflist_psz);
478	if (sc->sc_iflist_lock)
479		mutex_obj_free(sc->sc_iflist_lock);
480
481	workqueue_destroy(sc->sc_rtage_wq);
482
483	kmem_free(sc, sizeof(*sc));
484
485	return (0);
486}
487
488/*
489 * bridge_ioctl:
490 *
491 *	Handle a control request from the operator.
492 */
493static int
494bridge_ioctl(struct ifnet *ifp, u_long cmd, void *data)
495{
496	struct bridge_softc *sc = ifp->if_softc;
497	struct lwp *l = curlwp;	/* XXX */
498	union {
499		struct ifbreq ifbreq;
500		struct ifbifconf ifbifconf;
501		struct ifbareq ifbareq;
502		struct ifbaconf ifbaconf;
503		struct ifbrparam ifbrparam;
504	} args;
505	struct ifdrv *ifd = (struct ifdrv *) data;
506	const struct bridge_control *bc = NULL; /* XXXGCC */
507	int s, error = 0;
508
509	/* Authorize command before calling splnet(). */
510	switch (cmd) {
511	case SIOCGDRVSPEC:
512	case SIOCSDRVSPEC:
513		if (ifd->ifd_cmd >= bridge_control_table_size
514		    || (bc = &bridge_control_table[ifd->ifd_cmd]) == NULL) {
515			error = EINVAL;
516			return error;
517		}
518
519		/* We only care about BC_F_SUSER at this point. */
520		if ((bc->bc_flags & BC_F_SUSER) == 0)
521			break;
522
523		error = kauth_authorize_network(l->l_cred,
524		    KAUTH_NETWORK_INTERFACE_BRIDGE,
525		    cmd == SIOCGDRVSPEC ?
526		     KAUTH_REQ_NETWORK_INTERFACE_BRIDGE_GETPRIV :
527		     KAUTH_REQ_NETWORK_INTERFACE_BRIDGE_SETPRIV,
528		     ifd, NULL, NULL);
529		if (error)
530			return (error);
531
532		break;
533	}
534
535	s = splnet();
536
537	switch (cmd) {
538	case SIOCGDRVSPEC:
539	case SIOCSDRVSPEC:
540		KASSERT(bc != NULL);
541		if (cmd == SIOCGDRVSPEC &&
542		    (bc->bc_flags & (BC_F_COPYOUT|BC_F_XLATEOUT)) == 0) {
543			error = EINVAL;
544			break;
545		}
546		else if (cmd == SIOCSDRVSPEC &&
547		    (bc->bc_flags & (BC_F_COPYOUT|BC_F_XLATEOUT)) != 0) {
548			error = EINVAL;
549			break;
550		}
551
552		/* BC_F_SUSER is checked above, before splnet(). */
553
554		if ((bc->bc_flags & (BC_F_XLATEIN|BC_F_XLATEOUT)) == 0
555		    && (ifd->ifd_len != bc->bc_argsize
556			|| ifd->ifd_len > sizeof(args))) {
557			error = EINVAL;
558			break;
559		}
560
561		memset(&args, 0, sizeof(args));
562		if (bc->bc_flags & BC_F_COPYIN) {
563			error = copyin(ifd->ifd_data, &args, ifd->ifd_len);
564			if (error)
565				break;
566		} else if (bc->bc_flags & BC_F_XLATEIN) {
567			args.ifbifconf.ifbic_len = ifd->ifd_len;
568			args.ifbifconf.ifbic_buf = ifd->ifd_data;
569		}
570
571		error = (*bc->bc_func)(sc, &args);
572		if (error)
573			break;
574
575		if (bc->bc_flags & BC_F_COPYOUT) {
576			error = copyout(&args, ifd->ifd_data, ifd->ifd_len);
577		} else if (bc->bc_flags & BC_F_XLATEOUT) {
578			ifd->ifd_len = args.ifbifconf.ifbic_len;
579			ifd->ifd_data = args.ifbifconf.ifbic_buf;
580		}
581		break;
582
583	case SIOCSIFFLAGS:
584		if ((error = ifioctl_common(ifp, cmd, data)) != 0)
585			break;
586		switch (ifp->if_flags & (IFF_UP|IFF_RUNNING)) {
587		case IFF_RUNNING:
588			/*
589			 * If interface is marked down and it is running,
590			 * then stop and disable it.
591			 */
592			(*ifp->if_stop)(ifp, 1);
593			break;
594		case IFF_UP:
595			/*
596			 * If interface is marked up and it is stopped, then
597			 * start it.
598			 */
599			error = (*ifp->if_init)(ifp);
600			break;
601		default:
602			break;
603		}
604		break;
605
606	case SIOCSIFMTU:
607		if ((error = ifioctl_common(ifp, cmd, data)) == ENETRESET)
608			error = 0;
609		break;
610
611	default:
612		error = ifioctl_common(ifp, cmd, data);
613		break;
614	}
615
616	splx(s);
617
618	return (error);
619}
620
621/*
622 * bridge_lookup_member:
623 *
624 *	Lookup a bridge member interface.
625 */
626static struct bridge_iflist *
627bridge_lookup_member(struct bridge_softc *sc, const char *name)
628{
629	struct bridge_iflist *bif;
630	struct ifnet *ifp;
631	int s;
632
633	BRIDGE_PSZ_RENTER(s);
634
635	LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
636		ifp = bif->bif_ifp;
637		if (strcmp(ifp->if_xname, name) == 0)
638			break;
639	}
640	bif = bridge_try_hold_bif(bif);
641
642	BRIDGE_PSZ_REXIT(s);
643
644	return bif;
645}
646
647/*
648 * bridge_lookup_member_if:
649 *
650 *	Lookup a bridge member interface by ifnet*.
651 */
652static struct bridge_iflist *
653bridge_lookup_member_if(struct bridge_softc *sc, struct ifnet *member_ifp)
654{
655	struct bridge_iflist *bif;
656	int s;
657
658	BRIDGE_PSZ_RENTER(s);
659
660	bif = member_ifp->if_bridgeif;
661	bif = bridge_try_hold_bif(bif);
662
663	BRIDGE_PSZ_REXIT(s);
664
665	return bif;
666}
667
668static struct bridge_iflist *
669bridge_try_hold_bif(struct bridge_iflist *bif)
670{
671#ifdef BRIDGE_MPSAFE
672	if (bif != NULL) {
673		if (bif->bif_waiting)
674			bif = NULL;
675		else
676			atomic_inc_32(&bif->bif_refs);
677	}
678#endif
679	return bif;
680}
681
682/*
683 * bridge_release_member:
684 *
685 *	Release the specified member interface.
686 */
687static void
688bridge_release_member(struct bridge_softc *sc, struct bridge_iflist *bif)
689{
690#ifdef BRIDGE_MPSAFE
691	uint32_t refs;
692
693	refs = atomic_dec_uint_nv(&bif->bif_refs);
694	if (__predict_false(refs == 0 && bif->bif_waiting)) {
695		BRIDGE_LOCK(sc);
696		cv_broadcast(&sc->sc_iflist_cv);
697		BRIDGE_UNLOCK(sc);
698	}
699#else
700	(void)sc;
701	(void)bif;
702#endif
703}
704
705/*
706 * bridge_delete_member:
707 *
708 *	Delete the specified member interface.
709 */
710static void
711bridge_delete_member(struct bridge_softc *sc, struct bridge_iflist *bif)
712{
713	struct ifnet *ifs = bif->bif_ifp;
714
715	KASSERT(BRIDGE_LOCKED(sc));
716
717	ifs->_if_input = ether_input;
718	ifs->if_bridge = NULL;
719	ifs->if_bridgeif = NULL;
720
721	LIST_REMOVE(bif, bif_next);
722
723	BRIDGE_PSZ_PERFORM(sc);
724
725#ifdef BRIDGE_MPSAFE
726	bif->bif_waiting = true;
727	membar_sync();
728	while (bif->bif_refs > 0) {
729		aprint_debug("%s: cv_wait on iflist\n", __func__);
730		cv_wait(&sc->sc_iflist_cv, sc->sc_iflist_lock);
731	}
732	bif->bif_waiting = false;
733#endif
734	BRIDGE_UNLOCK(sc);
735
736	kmem_free(bif, sizeof(*bif));
737
738	BRIDGE_LOCK(sc);
739}
740
741static int
742bridge_ioctl_add(struct bridge_softc *sc, void *arg)
743{
744	struct ifbreq *req = arg;
745	struct bridge_iflist *bif = NULL;
746	struct ifnet *ifs;
747	int error = 0;
748
749	ifs = ifunit(req->ifbr_ifsname);
750	if (ifs == NULL)
751		return (ENOENT);
752
753	if (sc->sc_if.if_mtu != ifs->if_mtu)
754		return (EINVAL);
755
756	if (ifs->if_bridge == sc)
757		return (EEXIST);
758
759	if (ifs->if_bridge != NULL)
760		return (EBUSY);
761
762	if (ifs->_if_input != ether_input)
763		return EINVAL;
764
765	/* FIXME: doesn't work with non-IFF_SIMPLEX interfaces */
766	if ((ifs->if_flags & IFF_SIMPLEX) == 0)
767		return EINVAL;
768
769	bif = kmem_alloc(sizeof(*bif), KM_SLEEP);
770
771	switch (ifs->if_type) {
772	case IFT_ETHER:
773		if ((error = ether_enable_vlan_mtu(ifs)) > 0)
774			goto out;
775		/*
776		 * Place the interface into promiscuous mode.
777		 */
778		error = ifpromisc(ifs, 1);
779		if (error)
780			goto out;
781		break;
782	default:
783		error = EINVAL;
784		goto out;
785	}
786
787	bif->bif_ifp = ifs;
788	bif->bif_flags = IFBIF_LEARNING | IFBIF_DISCOVER;
789	bif->bif_priority = BSTP_DEFAULT_PORT_PRIORITY;
790	bif->bif_path_cost = BSTP_DEFAULT_PATH_COST;
791	bif->bif_refs = 0;
792	bif->bif_waiting = false;
793
794	BRIDGE_LOCK(sc);
795
796	ifs->if_bridge = sc;
797	ifs->if_bridgeif = bif;
798	LIST_INSERT_HEAD(&sc->sc_iflist, bif, bif_next);
799	ifs->_if_input = bridge_input;
800
801	BRIDGE_UNLOCK(sc);
802
803	if (sc->sc_if.if_flags & IFF_RUNNING)
804		bstp_initialization(sc);
805	else
806		bstp_stop(sc);
807
808 out:
809	if (error) {
810		if (bif != NULL)
811			kmem_free(bif, sizeof(*bif));
812	}
813	return (error);
814}
815
816static int
817bridge_ioctl_del(struct bridge_softc *sc, void *arg)
818{
819	struct ifbreq *req = arg;
820	const char *name = req->ifbr_ifsname;
821	struct bridge_iflist *bif;
822	struct ifnet *ifs;
823
824	BRIDGE_LOCK(sc);
825
826	/*
827	 * Don't use bridge_lookup_member. We want to get a member
828	 * with bif_refs == 0.
829	 */
830	LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
831		ifs = bif->bif_ifp;
832		if (strcmp(ifs->if_xname, name) == 0)
833			break;
834	}
835
836	if (bif == NULL) {
837		BRIDGE_UNLOCK(sc);
838		return ENOENT;
839	}
840
841	bridge_delete_member(sc, bif);
842
843	BRIDGE_UNLOCK(sc);
844
845	switch (ifs->if_type) {
846	case IFT_ETHER:
847		/*
848		 * Take the interface out of promiscuous mode.
849		 * Don't call it with holding a spin lock.
850		 */
851		(void) ifpromisc(ifs, 0);
852		(void) ether_disable_vlan_mtu(ifs);
853		break;
854	default:
855#ifdef DIAGNOSTIC
856		panic("bridge_delete_member: impossible");
857#endif
858		break;
859	}
860
861	bridge_rtdelete(sc, ifs);
862
863	if (sc->sc_if.if_flags & IFF_RUNNING)
864		bstp_initialization(sc);
865
866	return 0;
867}
868
869static int
870bridge_ioctl_gifflags(struct bridge_softc *sc, void *arg)
871{
872	struct ifbreq *req = arg;
873	struct bridge_iflist *bif;
874
875	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
876	if (bif == NULL)
877		return (ENOENT);
878
879	req->ifbr_ifsflags = bif->bif_flags;
880	req->ifbr_state = bif->bif_state;
881	req->ifbr_priority = bif->bif_priority;
882	req->ifbr_path_cost = bif->bif_path_cost;
883	req->ifbr_portno = bif->bif_ifp->if_index & 0xff;
884
885	bridge_release_member(sc, bif);
886
887	return (0);
888}
889
890static int
891bridge_ioctl_sifflags(struct bridge_softc *sc, void *arg)
892{
893	struct ifbreq *req = arg;
894	struct bridge_iflist *bif;
895
896	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
897	if (bif == NULL)
898		return (ENOENT);
899
900	if (req->ifbr_ifsflags & IFBIF_STP) {
901		switch (bif->bif_ifp->if_type) {
902		case IFT_ETHER:
903			/* These can do spanning tree. */
904			break;
905
906		default:
907			/* Nothing else can. */
908			bridge_release_member(sc, bif);
909			return (EINVAL);
910		}
911	}
912
913	bif->bif_flags = req->ifbr_ifsflags;
914
915	bridge_release_member(sc, bif);
916
917	if (sc->sc_if.if_flags & IFF_RUNNING)
918		bstp_initialization(sc);
919
920	return (0);
921}
922
923static int
924bridge_ioctl_scache(struct bridge_softc *sc, void *arg)
925{
926	struct ifbrparam *param = arg;
927
928	sc->sc_brtmax = param->ifbrp_csize;
929	bridge_rttrim(sc);
930
931	return (0);
932}
933
934static int
935bridge_ioctl_gcache(struct bridge_softc *sc, void *arg)
936{
937	struct ifbrparam *param = arg;
938
939	param->ifbrp_csize = sc->sc_brtmax;
940
941	return (0);
942}
943
944static int
945bridge_ioctl_gifs(struct bridge_softc *sc, void *arg)
946{
947	struct ifbifconf *bifc = arg;
948	struct bridge_iflist *bif;
949	struct ifbreq *breqs;
950	int i, count, error = 0;
951
952retry:
953	BRIDGE_LOCK(sc);
954	count = 0;
955	LIST_FOREACH(bif, &sc->sc_iflist, bif_next)
956		count++;
957	BRIDGE_UNLOCK(sc);
958
959	if (count == 0) {
960		bifc->ifbic_len = 0;
961		return 0;
962	}
963
964	if (bifc->ifbic_len == 0 || bifc->ifbic_len < (sizeof(*breqs) * count)) {
965		/* Tell that a larger buffer is needed */
966		bifc->ifbic_len = sizeof(*breqs) * count;
967		return 0;
968	}
969
970	breqs = kmem_alloc(sizeof(*breqs) * count, KM_SLEEP);
971
972	BRIDGE_LOCK(sc);
973
974	i = 0;
975	LIST_FOREACH(bif, &sc->sc_iflist, bif_next)
976		i++;
977	if (i > count) {
978		/*
979		 * The number of members has been increased.
980		 * We need more memory!
981		 */
982		BRIDGE_UNLOCK(sc);
983		kmem_free(breqs, sizeof(*breqs) * count);
984		goto retry;
985	}
986
987	i = 0;
988	LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
989		struct ifbreq *breq = &breqs[i++];
990		memset(breq, 0, sizeof(*breq));
991
992		strlcpy(breq->ifbr_ifsname, bif->bif_ifp->if_xname,
993		    sizeof(breq->ifbr_ifsname));
994		breq->ifbr_ifsflags = bif->bif_flags;
995		breq->ifbr_state = bif->bif_state;
996		breq->ifbr_priority = bif->bif_priority;
997		breq->ifbr_path_cost = bif->bif_path_cost;
998		breq->ifbr_portno = bif->bif_ifp->if_index & 0xff;
999	}
1000
1001	/* Don't call copyout with holding the mutex */
1002	BRIDGE_UNLOCK(sc);
1003
1004	for (i = 0; i < count; i++) {
1005		error = copyout(&breqs[i], bifc->ifbic_req + i, sizeof(*breqs));
1006		if (error)
1007			break;
1008	}
1009	bifc->ifbic_len = sizeof(*breqs) * i;
1010
1011	kmem_free(breqs, sizeof(*breqs) * count);
1012
1013	return error;
1014}
1015
1016static int
1017bridge_ioctl_rts(struct bridge_softc *sc, void *arg)
1018{
1019	struct ifbaconf *bac = arg;
1020	struct bridge_rtnode *brt;
1021	struct ifbareq bareq;
1022	int count = 0, error = 0, len;
1023
1024	if (bac->ifbac_len == 0)
1025		return (0);
1026
1027	BRIDGE_RT_LOCK(sc);
1028
1029	len = bac->ifbac_len;
1030	LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) {
1031		if (len < sizeof(bareq))
1032			goto out;
1033		memset(&bareq, 0, sizeof(bareq));
1034		strlcpy(bareq.ifba_ifsname, brt->brt_ifp->if_xname,
1035		    sizeof(bareq.ifba_ifsname));
1036		memcpy(bareq.ifba_dst, brt->brt_addr, sizeof(brt->brt_addr));
1037		if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
1038			bareq.ifba_expire = brt->brt_expire - time_uptime;
1039		} else
1040			bareq.ifba_expire = 0;
1041		bareq.ifba_flags = brt->brt_flags;
1042
1043		error = copyout(&bareq, bac->ifbac_req + count, sizeof(bareq));
1044		if (error)
1045			goto out;
1046		count++;
1047		len -= sizeof(bareq);
1048	}
1049 out:
1050	BRIDGE_RT_UNLOCK(sc);
1051
1052	bac->ifbac_len = sizeof(bareq) * count;
1053	return (error);
1054}
1055
1056static int
1057bridge_ioctl_saddr(struct bridge_softc *sc, void *arg)
1058{
1059	struct ifbareq *req = arg;
1060	struct bridge_iflist *bif;
1061	int error;
1062
1063	bif = bridge_lookup_member(sc, req->ifba_ifsname);
1064	if (bif == NULL)
1065		return (ENOENT);
1066
1067	error = bridge_rtupdate(sc, req->ifba_dst, bif->bif_ifp, 1,
1068	    req->ifba_flags);
1069
1070	bridge_release_member(sc, bif);
1071
1072	return (error);
1073}
1074
1075static int
1076bridge_ioctl_sto(struct bridge_softc *sc, void *arg)
1077{
1078	struct ifbrparam *param = arg;
1079
1080	sc->sc_brttimeout = param->ifbrp_ctime;
1081
1082	return (0);
1083}
1084
1085static int
1086bridge_ioctl_gto(struct bridge_softc *sc, void *arg)
1087{
1088	struct ifbrparam *param = arg;
1089
1090	param->ifbrp_ctime = sc->sc_brttimeout;
1091
1092	return (0);
1093}
1094
1095static int
1096bridge_ioctl_daddr(struct bridge_softc *sc, void *arg)
1097{
1098	struct ifbareq *req = arg;
1099
1100	return (bridge_rtdaddr(sc, req->ifba_dst));
1101}
1102
1103static int
1104bridge_ioctl_flush(struct bridge_softc *sc, void *arg)
1105{
1106	struct ifbreq *req = arg;
1107
1108	bridge_rtflush(sc, req->ifbr_ifsflags);
1109
1110	return (0);
1111}
1112
1113static int
1114bridge_ioctl_gpri(struct bridge_softc *sc, void *arg)
1115{
1116	struct ifbrparam *param = arg;
1117
1118	param->ifbrp_prio = sc->sc_bridge_priority;
1119
1120	return (0);
1121}
1122
1123static int
1124bridge_ioctl_spri(struct bridge_softc *sc, void *arg)
1125{
1126	struct ifbrparam *param = arg;
1127
1128	sc->sc_bridge_priority = param->ifbrp_prio;
1129
1130	if (sc->sc_if.if_flags & IFF_RUNNING)
1131		bstp_initialization(sc);
1132
1133	return (0);
1134}
1135
1136static int
1137bridge_ioctl_ght(struct bridge_softc *sc, void *arg)
1138{
1139	struct ifbrparam *param = arg;
1140
1141	param->ifbrp_hellotime = sc->sc_bridge_hello_time >> 8;
1142
1143	return (0);
1144}
1145
1146static int
1147bridge_ioctl_sht(struct bridge_softc *sc, void *arg)
1148{
1149	struct ifbrparam *param = arg;
1150
1151	if (param->ifbrp_hellotime == 0)
1152		return (EINVAL);
1153	sc->sc_bridge_hello_time = param->ifbrp_hellotime << 8;
1154
1155	if (sc->sc_if.if_flags & IFF_RUNNING)
1156		bstp_initialization(sc);
1157
1158	return (0);
1159}
1160
1161static int
1162bridge_ioctl_gfd(struct bridge_softc *sc, void *arg)
1163{
1164	struct ifbrparam *param = arg;
1165
1166	param->ifbrp_fwddelay = sc->sc_bridge_forward_delay >> 8;
1167
1168	return (0);
1169}
1170
1171static int
1172bridge_ioctl_sfd(struct bridge_softc *sc, void *arg)
1173{
1174	struct ifbrparam *param = arg;
1175
1176	if (param->ifbrp_fwddelay == 0)
1177		return (EINVAL);
1178	sc->sc_bridge_forward_delay = param->ifbrp_fwddelay << 8;
1179
1180	if (sc->sc_if.if_flags & IFF_RUNNING)
1181		bstp_initialization(sc);
1182
1183	return (0);
1184}
1185
1186static int
1187bridge_ioctl_gma(struct bridge_softc *sc, void *arg)
1188{
1189	struct ifbrparam *param = arg;
1190
1191	param->ifbrp_maxage = sc->sc_bridge_max_age >> 8;
1192
1193	return (0);
1194}
1195
1196static int
1197bridge_ioctl_sma(struct bridge_softc *sc, void *arg)
1198{
1199	struct ifbrparam *param = arg;
1200
1201	if (param->ifbrp_maxage == 0)
1202		return (EINVAL);
1203	sc->sc_bridge_max_age = param->ifbrp_maxage << 8;
1204
1205	if (sc->sc_if.if_flags & IFF_RUNNING)
1206		bstp_initialization(sc);
1207
1208	return (0);
1209}
1210
1211static int
1212bridge_ioctl_sifprio(struct bridge_softc *sc, void *arg)
1213{
1214	struct ifbreq *req = arg;
1215	struct bridge_iflist *bif;
1216
1217	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
1218	if (bif == NULL)
1219		return (ENOENT);
1220
1221	bif->bif_priority = req->ifbr_priority;
1222
1223	if (sc->sc_if.if_flags & IFF_RUNNING)
1224		bstp_initialization(sc);
1225
1226	bridge_release_member(sc, bif);
1227
1228	return (0);
1229}
1230
1231#if defined(BRIDGE_IPF)
1232static int
1233bridge_ioctl_gfilt(struct bridge_softc *sc, void *arg)
1234{
1235	struct ifbrparam *param = arg;
1236
1237	param->ifbrp_filter = sc->sc_filter_flags;
1238
1239	return (0);
1240}
1241
1242static int
1243bridge_ioctl_sfilt(struct bridge_softc *sc, void *arg)
1244{
1245	struct ifbrparam *param = arg;
1246	uint32_t nflags, oflags;
1247
1248	if (param->ifbrp_filter & ~IFBF_FILT_MASK)
1249		return (EINVAL);
1250
1251	nflags = param->ifbrp_filter;
1252	oflags = sc->sc_filter_flags;
1253
1254	if ((nflags & IFBF_FILT_USEIPF) && !(oflags & IFBF_FILT_USEIPF)) {
1255		pfil_add_hook((void *)bridge_ipf, NULL, PFIL_IN|PFIL_OUT,
1256			sc->sc_if.if_pfil);
1257	}
1258	if (!(nflags & IFBF_FILT_USEIPF) && (oflags & IFBF_FILT_USEIPF)) {
1259		pfil_remove_hook((void *)bridge_ipf, NULL, PFIL_IN|PFIL_OUT,
1260			sc->sc_if.if_pfil);
1261	}
1262
1263	sc->sc_filter_flags = nflags;
1264
1265	return (0);
1266}
1267#endif /* BRIDGE_IPF */
1268
1269static int
1270bridge_ioctl_sifcost(struct bridge_softc *sc, void *arg)
1271{
1272	struct ifbreq *req = arg;
1273	struct bridge_iflist *bif;
1274
1275	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
1276	if (bif == NULL)
1277		return (ENOENT);
1278
1279	bif->bif_path_cost = req->ifbr_path_cost;
1280
1281	if (sc->sc_if.if_flags & IFF_RUNNING)
1282		bstp_initialization(sc);
1283
1284	bridge_release_member(sc, bif);
1285
1286	return (0);
1287}
1288
1289/*
1290 * bridge_ifdetach:
1291 *
1292 *	Detach an interface from a bridge.  Called when a member
1293 *	interface is detaching.
1294 */
1295void
1296bridge_ifdetach(struct ifnet *ifp)
1297{
1298	struct bridge_softc *sc = ifp->if_bridge;
1299	struct ifbreq breq;
1300
1301	/* ioctl_lock should prevent this from happening */
1302	KASSERT(sc != NULL);
1303
1304	memset(&breq, 0, sizeof(breq));
1305	strlcpy(breq.ifbr_ifsname, ifp->if_xname, sizeof(breq.ifbr_ifsname));
1306
1307	(void) bridge_ioctl_del(sc, &breq);
1308}
1309
1310/*
1311 * bridge_init:
1312 *
1313 *	Initialize a bridge interface.
1314 */
1315static int
1316bridge_init(struct ifnet *ifp)
1317{
1318	struct bridge_softc *sc = ifp->if_softc;
1319
1320	if (ifp->if_flags & IFF_RUNNING)
1321		return (0);
1322
1323	callout_reset(&sc->sc_brcallout, bridge_rtable_prune_period * hz,
1324	    bridge_timer, sc);
1325
1326	ifp->if_flags |= IFF_RUNNING;
1327	bstp_initialization(sc);
1328	return (0);
1329}
1330
1331/*
1332 * bridge_stop:
1333 *
1334 *	Stop the bridge interface.
1335 */
1336static void
1337bridge_stop(struct ifnet *ifp, int disable)
1338{
1339	struct bridge_softc *sc = ifp->if_softc;
1340
1341	if ((ifp->if_flags & IFF_RUNNING) == 0)
1342		return;
1343
1344	callout_stop(&sc->sc_brcallout);
1345	bstp_stop(sc);
1346
1347	bridge_rtflush(sc, IFBF_FLUSHDYN);
1348
1349	ifp->if_flags &= ~IFF_RUNNING;
1350}
1351
1352/*
1353 * bridge_enqueue:
1354 *
1355 *	Enqueue a packet on a bridge member interface.
1356 */
1357void
1358bridge_enqueue(struct bridge_softc *sc, struct ifnet *dst_ifp, struct mbuf *m,
1359    int runfilt)
1360{
1361	ALTQ_DECL(struct altq_pktattr pktattr;)
1362	int len, error;
1363	short mflags;
1364
1365	/*
1366	 * Clear any in-bound checksum flags for this packet.
1367	 */
1368	m->m_pkthdr.csum_flags = 0;
1369
1370	if (runfilt) {
1371		if (pfil_run_hooks(sc->sc_if.if_pfil, &m,
1372		    dst_ifp, PFIL_OUT) != 0) {
1373			if (m != NULL)
1374				m_freem(m);
1375			return;
1376		}
1377		if (m == NULL)
1378			return;
1379	}
1380
1381#ifdef ALTQ
1382	/*
1383	 * If ALTQ is enabled on the member interface, do
1384	 * classification; the queueing discipline might
1385	 * not require classification, but might require
1386	 * the address family/header pointer in the pktattr.
1387	 */
1388	if (ALTQ_IS_ENABLED(&dst_ifp->if_snd)) {
1389		/* XXX IFT_ETHER */
1390		altq_etherclassify(&dst_ifp->if_snd, m, &pktattr);
1391	}
1392#endif /* ALTQ */
1393
1394	len = m->m_pkthdr.len;
1395	mflags = m->m_flags;
1396
1397	IFQ_ENQUEUE(&dst_ifp->if_snd, m, &pktattr, error);
1398
1399	if (error) {
1400		/* mbuf is already freed */
1401		sc->sc_if.if_oerrors++;
1402		return;
1403	}
1404
1405	sc->sc_if.if_opackets++;
1406	sc->sc_if.if_obytes += len;
1407
1408	dst_ifp->if_obytes += len;
1409
1410	if (mflags & M_MCAST) {
1411		sc->sc_if.if_omcasts++;
1412		dst_ifp->if_omcasts++;
1413	}
1414
1415	if ((dst_ifp->if_flags & IFF_OACTIVE) == 0)
1416		(*dst_ifp->if_start)(dst_ifp);
1417}
1418
1419/*
1420 * bridge_output:
1421 *
1422 *	Send output from a bridge member interface.  This
1423 *	performs the bridging function for locally originated
1424 *	packets.
1425 *
1426 *	The mbuf has the Ethernet header already attached.  We must
1427 *	enqueue or free the mbuf before returning.
1428 */
1429int
1430bridge_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *sa,
1431    struct rtentry *rt)
1432{
1433	struct ether_header *eh;
1434	struct ifnet *dst_if;
1435	struct bridge_softc *sc;
1436#ifndef BRIDGE_MPSAFE
1437	int s;
1438#endif
1439
1440	if (m->m_len < ETHER_HDR_LEN) {
1441		m = m_pullup(m, ETHER_HDR_LEN);
1442		if (m == NULL)
1443			return (0);
1444	}
1445
1446	eh = mtod(m, struct ether_header *);
1447	sc = ifp->if_bridge;
1448
1449#ifndef BRIDGE_MPSAFE
1450	s = splnet();
1451#endif
1452
1453	/*
1454	 * If bridge is down, but the original output interface is up,
1455	 * go ahead and send out that interface.  Otherwise, the packet
1456	 * is dropped below.
1457	 */
1458	if (__predict_false(sc == NULL) ||
1459	    (sc->sc_if.if_flags & IFF_RUNNING) == 0) {
1460		dst_if = ifp;
1461		goto sendunicast;
1462	}
1463
1464	/*
1465	 * If the packet is a multicast, or we don't know a better way to
1466	 * get there, send to all interfaces.
1467	 */
1468	if (ETHER_IS_MULTICAST(eh->ether_dhost))
1469		dst_if = NULL;
1470	else
1471		dst_if = bridge_rtlookup(sc, eh->ether_dhost);
1472	if (dst_if == NULL) {
1473		struct bridge_iflist *bif;
1474		struct mbuf *mc;
1475		int used = 0;
1476		int ss;
1477
1478		BRIDGE_PSZ_RENTER(ss);
1479		LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
1480			bif = bridge_try_hold_bif(bif);
1481			if (bif == NULL)
1482				continue;
1483			BRIDGE_PSZ_REXIT(ss);
1484
1485			dst_if = bif->bif_ifp;
1486			if ((dst_if->if_flags & IFF_RUNNING) == 0)
1487				goto next;
1488
1489			/*
1490			 * If this is not the original output interface,
1491			 * and the interface is participating in spanning
1492			 * tree, make sure the port is in a state that
1493			 * allows forwarding.
1494			 */
1495			if (dst_if != ifp &&
1496			    (bif->bif_flags & IFBIF_STP) != 0) {
1497				switch (bif->bif_state) {
1498				case BSTP_IFSTATE_BLOCKING:
1499				case BSTP_IFSTATE_LISTENING:
1500				case BSTP_IFSTATE_DISABLED:
1501					goto next;
1502				}
1503			}
1504
1505			if (LIST_NEXT(bif, bif_next) == NULL) {
1506				used = 1;
1507				mc = m;
1508			} else {
1509				mc = m_copym(m, 0, M_COPYALL, M_NOWAIT);
1510				if (mc == NULL) {
1511					sc->sc_if.if_oerrors++;
1512					goto next;
1513				}
1514			}
1515
1516			bridge_enqueue(sc, dst_if, mc, 0);
1517next:
1518			bridge_release_member(sc, bif);
1519			BRIDGE_PSZ_RENTER(ss);
1520		}
1521		BRIDGE_PSZ_REXIT(ss);
1522
1523		if (used == 0)
1524			m_freem(m);
1525#ifndef BRIDGE_MPSAFE
1526		splx(s);
1527#endif
1528		return (0);
1529	}
1530
1531 sendunicast:
1532	/*
1533	 * XXX Spanning tree consideration here?
1534	 */
1535
1536	if ((dst_if->if_flags & IFF_RUNNING) == 0) {
1537		m_freem(m);
1538#ifndef BRIDGE_MPSAFE
1539		splx(s);
1540#endif
1541		return (0);
1542	}
1543
1544	bridge_enqueue(sc, dst_if, m, 0);
1545
1546#ifndef BRIDGE_MPSAFE
1547	splx(s);
1548#endif
1549	return (0);
1550}
1551
1552/*
1553 * bridge_start:
1554 *
1555 *	Start output on a bridge.
1556 *
1557 *	NOTE: This routine should never be called in this implementation.
1558 */
1559static void
1560bridge_start(struct ifnet *ifp)
1561{
1562
1563	printf("%s: bridge_start() called\n", ifp->if_xname);
1564}
1565
1566/*
1567 * bridge_forward:
1568 *
1569 *	The forwarding function of the bridge.
1570 */
1571static void
1572bridge_forward(struct bridge_softc *sc, struct mbuf *m)
1573{
1574	struct bridge_iflist *bif;
1575	struct ifnet *src_if, *dst_if;
1576	struct ether_header *eh;
1577#ifndef BRIDGE_MPSAFE
1578	int s;
1579
1580	KERNEL_LOCK(1, NULL);
1581	mutex_enter(softnet_lock);
1582#endif
1583
1584	if ((sc->sc_if.if_flags & IFF_RUNNING) == 0) {
1585#ifndef BRIDGE_MPSAFE
1586		mutex_exit(softnet_lock);
1587		KERNEL_UNLOCK_ONE(NULL);
1588#endif
1589		return;
1590	}
1591
1592#ifndef BRIDGE_MPSAFE
1593	s = splnet();
1594#endif
1595
1596	src_if = m->m_pkthdr.rcvif;
1597
1598	sc->sc_if.if_ipackets++;
1599	sc->sc_if.if_ibytes += m->m_pkthdr.len;
1600
1601	/*
1602	 * Look up the bridge_iflist.
1603	 */
1604	bif = bridge_lookup_member_if(sc, src_if);
1605	if (bif == NULL) {
1606		/* Interface is not a bridge member (anymore?) */
1607		m_freem(m);
1608		goto out;
1609	}
1610
1611	if (bif->bif_flags & IFBIF_STP) {
1612		switch (bif->bif_state) {
1613		case BSTP_IFSTATE_BLOCKING:
1614		case BSTP_IFSTATE_LISTENING:
1615		case BSTP_IFSTATE_DISABLED:
1616			m_freem(m);
1617			bridge_release_member(sc, bif);
1618			goto out;
1619		}
1620	}
1621
1622	eh = mtod(m, struct ether_header *);
1623
1624	/*
1625	 * If the interface is learning, and the source
1626	 * address is valid and not multicast, record
1627	 * the address.
1628	 */
1629	if ((bif->bif_flags & IFBIF_LEARNING) != 0 &&
1630	    ETHER_IS_MULTICAST(eh->ether_shost) == 0 &&
1631	    (eh->ether_shost[0] == 0 &&
1632	     eh->ether_shost[1] == 0 &&
1633	     eh->ether_shost[2] == 0 &&
1634	     eh->ether_shost[3] == 0 &&
1635	     eh->ether_shost[4] == 0 &&
1636	     eh->ether_shost[5] == 0) == 0) {
1637		(void) bridge_rtupdate(sc, eh->ether_shost,
1638		    src_if, 0, IFBAF_DYNAMIC);
1639	}
1640
1641	if ((bif->bif_flags & IFBIF_STP) != 0 &&
1642	    bif->bif_state == BSTP_IFSTATE_LEARNING) {
1643		m_freem(m);
1644		bridge_release_member(sc, bif);
1645		goto out;
1646	}
1647
1648	bridge_release_member(sc, bif);
1649
1650	/*
1651	 * At this point, the port either doesn't participate
1652	 * in spanning tree or it is in the forwarding state.
1653	 */
1654
1655	/*
1656	 * If the packet is unicast, destined for someone on
1657	 * "this" side of the bridge, drop it.
1658	 */
1659	if ((m->m_flags & (M_BCAST|M_MCAST)) == 0) {
1660		dst_if = bridge_rtlookup(sc, eh->ether_dhost);
1661		if (src_if == dst_if) {
1662			m_freem(m);
1663			goto out;
1664		}
1665	} else {
1666		/* ...forward it to all interfaces. */
1667		sc->sc_if.if_imcasts++;
1668		dst_if = NULL;
1669	}
1670
1671	if (pfil_run_hooks(sc->sc_if.if_pfil, &m,
1672	    m->m_pkthdr.rcvif, PFIL_IN) != 0) {
1673		if (m != NULL)
1674			m_freem(m);
1675		goto out;
1676	}
1677	if (m == NULL)
1678		goto out;
1679
1680	if (dst_if == NULL) {
1681		bridge_broadcast(sc, src_if, m);
1682		goto out;
1683	}
1684
1685	/*
1686	 * At this point, we're dealing with a unicast frame
1687	 * going to a different interface.
1688	 */
1689	if ((dst_if->if_flags & IFF_RUNNING) == 0) {
1690		m_freem(m);
1691		goto out;
1692	}
1693
1694	bif = bridge_lookup_member_if(sc, dst_if);
1695	if (bif == NULL) {
1696		/* Not a member of the bridge (anymore?) */
1697		m_freem(m);
1698		goto out;
1699	}
1700
1701	if (bif->bif_flags & IFBIF_STP) {
1702		switch (bif->bif_state) {
1703		case BSTP_IFSTATE_DISABLED:
1704		case BSTP_IFSTATE_BLOCKING:
1705			m_freem(m);
1706			bridge_release_member(sc, bif);
1707			goto out;
1708		}
1709	}
1710
1711	bridge_release_member(sc, bif);
1712
1713	bridge_enqueue(sc, dst_if, m, 1);
1714out:
1715#ifndef BRIDGE_MPSAFE
1716	splx(s);
1717	mutex_exit(softnet_lock);
1718	KERNEL_UNLOCK_ONE(NULL);
1719#else
1720	/* XXX gcc */
1721	return;
1722#endif
1723}
1724
1725static bool
1726bstp_state_before_learning(struct bridge_iflist *bif)
1727{
1728	if (bif->bif_flags & IFBIF_STP) {
1729		switch (bif->bif_state) {
1730		case BSTP_IFSTATE_BLOCKING:
1731		case BSTP_IFSTATE_LISTENING:
1732		case BSTP_IFSTATE_DISABLED:
1733			return true;
1734		}
1735	}
1736	return false;
1737}
1738
1739static bool
1740bridge_ourether(struct bridge_iflist *bif, struct ether_header *eh, int src)
1741{
1742	uint8_t *ether = src ? eh->ether_shost : eh->ether_dhost;
1743
1744	if (memcmp(CLLADDR(bif->bif_ifp->if_sadl), ether, ETHER_ADDR_LEN) == 0
1745#if NCARP > 0
1746	    || (bif->bif_ifp->if_carp &&
1747	        carp_ourether(bif->bif_ifp->if_carp, eh, IFT_ETHER, src) != NULL)
1748#endif /* NCARP > 0 */
1749	    )
1750		return true;
1751
1752	return false;
1753}
1754
1755/*
1756 * bridge_input:
1757 *
1758 *	Receive input from a member interface.  Queue the packet for
1759 *	bridging if it is not for us.
1760 */
1761static void
1762bridge_input(struct ifnet *ifp, struct mbuf *m)
1763{
1764	struct bridge_softc *sc = ifp->if_bridge;
1765	struct bridge_iflist *bif;
1766	struct ether_header *eh;
1767
1768	KASSERT(!cpu_intr_p());
1769
1770	if (__predict_false(sc == NULL) ||
1771	    (sc->sc_if.if_flags & IFF_RUNNING) == 0) {
1772		ether_input(ifp, m);
1773		return;
1774	}
1775
1776	bif = bridge_lookup_member_if(sc, ifp);
1777	if (bif == NULL) {
1778		ether_input(ifp, m);
1779		return;
1780	}
1781
1782	eh = mtod(m, struct ether_header *);
1783
1784	if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
1785		if (memcmp(etherbroadcastaddr,
1786		    eh->ether_dhost, ETHER_ADDR_LEN) == 0)
1787			m->m_flags |= M_BCAST;
1788		else
1789			m->m_flags |= M_MCAST;
1790	}
1791
1792	/*
1793	 * A 'fast' path for packets addressed to interfaces that are
1794	 * part of this bridge.
1795	 */
1796	if (!(m->m_flags & (M_BCAST|M_MCAST)) &&
1797	    !bstp_state_before_learning(bif)) {
1798		struct bridge_iflist *_bif;
1799		struct ifnet *_ifp = NULL;
1800		int s;
1801
1802		BRIDGE_PSZ_RENTER(s);
1803		LIST_FOREACH(_bif, &sc->sc_iflist, bif_next) {
1804			/* It is destined for us. */
1805			if (bridge_ourether(_bif, eh, 0)) {
1806				_bif = bridge_try_hold_bif(_bif);
1807				BRIDGE_PSZ_REXIT(s);
1808				if (_bif == NULL)
1809					goto out;
1810				if (_bif->bif_flags & IFBIF_LEARNING)
1811					(void) bridge_rtupdate(sc,
1812					    eh->ether_shost, ifp, 0, IFBAF_DYNAMIC);
1813				_ifp = m->m_pkthdr.rcvif = _bif->bif_ifp;
1814				bridge_release_member(sc, _bif);
1815				goto out;
1816			}
1817
1818			/* We just received a packet that we sent out. */
1819			if (bridge_ourether(_bif, eh, 1))
1820				break;
1821		}
1822		BRIDGE_PSZ_REXIT(s);
1823out:
1824
1825		if (_bif != NULL) {
1826			bridge_release_member(sc, bif);
1827			if (_ifp != NULL) {
1828				m->m_flags &= ~M_PROMISC;
1829				ether_input(_ifp, m);
1830			} else
1831				m_freem(m);
1832			return;
1833		}
1834	}
1835
1836	/* Tap off 802.1D packets; they do not get forwarded. */
1837	if (bif->bif_flags & IFBIF_STP &&
1838	    memcmp(eh->ether_dhost, bstp_etheraddr, ETHER_ADDR_LEN) == 0) {
1839		bstp_input(sc, bif, m);
1840		bridge_release_member(sc, bif);
1841		return;
1842	}
1843
1844	/*
1845	 * A normal switch would discard the packet here, but that's not what
1846	 * we've done historically. This also prevents some obnoxious behaviour.
1847	 */
1848	if (bstp_state_before_learning(bif)) {
1849		bridge_release_member(sc, bif);
1850		ether_input(ifp, m);
1851		return;
1852	}
1853
1854	bridge_release_member(sc, bif);
1855
1856	bridge_forward(sc, m);
1857}
1858
1859/*
1860 * bridge_broadcast:
1861 *
1862 *	Send a frame to all interfaces that are members of
1863 *	the bridge, except for the one on which the packet
1864 *	arrived.
1865 */
1866static void
1867bridge_broadcast(struct bridge_softc *sc, struct ifnet *src_if,
1868    struct mbuf *m)
1869{
1870	struct bridge_iflist *bif;
1871	struct mbuf *mc;
1872	struct ifnet *dst_if;
1873	bool bmcast;
1874	int s;
1875
1876	bmcast = m->m_flags & (M_BCAST|M_MCAST);
1877
1878	BRIDGE_PSZ_RENTER(s);
1879	LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
1880		bif = bridge_try_hold_bif(bif);
1881		if (bif == NULL)
1882			continue;
1883		BRIDGE_PSZ_REXIT(s);
1884
1885		dst_if = bif->bif_ifp;
1886
1887		if (bif->bif_flags & IFBIF_STP) {
1888			switch (bif->bif_state) {
1889			case BSTP_IFSTATE_BLOCKING:
1890			case BSTP_IFSTATE_DISABLED:
1891				goto next;
1892			}
1893		}
1894
1895		if ((bif->bif_flags & IFBIF_DISCOVER) == 0 && !bmcast)
1896			goto next;
1897
1898		if ((dst_if->if_flags & IFF_RUNNING) == 0)
1899			goto next;
1900
1901		if (dst_if != src_if) {
1902			mc = m_copym(m, 0, M_COPYALL, M_DONTWAIT);
1903			if (mc == NULL) {
1904				sc->sc_if.if_oerrors++;
1905				goto next;
1906			}
1907			bridge_enqueue(sc, dst_if, mc, 1);
1908		}
1909
1910		if (bmcast) {
1911			mc = m_copym(m, 0, M_COPYALL, M_DONTWAIT);
1912			if (mc == NULL) {
1913				sc->sc_if.if_oerrors++;
1914				goto next;
1915			}
1916
1917			mc->m_pkthdr.rcvif = dst_if;
1918			mc->m_flags &= ~M_PROMISC;
1919			ether_input(dst_if, mc);
1920		}
1921next:
1922		bridge_release_member(sc, bif);
1923		BRIDGE_PSZ_RENTER(s);
1924	}
1925	BRIDGE_PSZ_REXIT(s);
1926
1927	m_freem(m);
1928}
1929
1930static int
1931bridge_rtalloc(struct bridge_softc *sc, const uint8_t *dst,
1932    struct bridge_rtnode **brtp)
1933{
1934	struct bridge_rtnode *brt;
1935	int error;
1936
1937	if (sc->sc_brtcnt >= sc->sc_brtmax)
1938		return ENOSPC;
1939
1940	/*
1941	 * Allocate a new bridge forwarding node, and
1942	 * initialize the expiration time and Ethernet
1943	 * address.
1944	 */
1945	brt = pool_get(&bridge_rtnode_pool, PR_NOWAIT);
1946	if (brt == NULL)
1947		return ENOMEM;
1948
1949	memset(brt, 0, sizeof(*brt));
1950	brt->brt_expire = time_uptime + sc->sc_brttimeout;
1951	brt->brt_flags = IFBAF_DYNAMIC;
1952	memcpy(brt->brt_addr, dst, ETHER_ADDR_LEN);
1953
1954	BRIDGE_RT_LOCK(sc);
1955	error = bridge_rtnode_insert(sc, brt);
1956	BRIDGE_RT_UNLOCK(sc);
1957
1958	if (error != 0) {
1959		pool_put(&bridge_rtnode_pool, brt);
1960		return error;
1961	}
1962
1963	*brtp = brt;
1964	return 0;
1965}
1966
1967/*
1968 * bridge_rtupdate:
1969 *
1970 *	Add a bridge routing entry.
1971 */
1972static int
1973bridge_rtupdate(struct bridge_softc *sc, const uint8_t *dst,
1974    struct ifnet *dst_if, int setflags, uint8_t flags)
1975{
1976	struct bridge_rtnode *brt;
1977	int s;
1978
1979again:
1980	/*
1981	 * A route for this destination might already exist.  If so,
1982	 * update it, otherwise create a new one.
1983	 */
1984	BRIDGE_RT_RENTER(s);
1985	brt = bridge_rtnode_lookup(sc, dst);
1986
1987	if (brt != NULL) {
1988		brt->brt_ifp = dst_if;
1989		if (setflags) {
1990			brt->brt_flags = flags;
1991			if (flags & IFBAF_STATIC)
1992				brt->brt_expire = 0;
1993			else
1994				brt->brt_expire = time_uptime + sc->sc_brttimeout;
1995		} else {
1996			if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC)
1997				brt->brt_expire = time_uptime + sc->sc_brttimeout;
1998		}
1999	}
2000	BRIDGE_RT_REXIT(s);
2001
2002	if (brt == NULL) {
2003		int r;
2004
2005		r = bridge_rtalloc(sc, dst, &brt);
2006		if (r != 0)
2007			return r;
2008		goto again;
2009	}
2010
2011	return 0;
2012}
2013
2014/*
2015 * bridge_rtlookup:
2016 *
2017 *	Lookup the destination interface for an address.
2018 */
2019static struct ifnet *
2020bridge_rtlookup(struct bridge_softc *sc, const uint8_t *addr)
2021{
2022	struct bridge_rtnode *brt;
2023	struct ifnet *ifs = NULL;
2024	int s;
2025
2026	BRIDGE_RT_RENTER(s);
2027	brt = bridge_rtnode_lookup(sc, addr);
2028	if (brt != NULL)
2029		ifs = brt->brt_ifp;
2030	BRIDGE_RT_REXIT(s);
2031
2032	return ifs;
2033}
2034
2035typedef bool (*bridge_iterate_cb_t)
2036    (struct bridge_softc *, struct bridge_rtnode *, bool *, void *);
2037
2038/*
2039 * bridge_rtlist_iterate_remove:
2040 *
2041 *	It iterates on sc->sc_rtlist and removes rtnodes of it which func
2042 *	callback judges to remove. Removals of rtnodes are done in a manner
2043 *	of pserialize. To this end, all kmem_* operations are placed out of
2044 *	mutexes.
2045 */
2046static void
2047bridge_rtlist_iterate_remove(struct bridge_softc *sc, bridge_iterate_cb_t func, void *arg)
2048{
2049	struct bridge_rtnode *brt, *nbrt;
2050	struct bridge_rtnode **brt_list;
2051	int i, count;
2052
2053retry:
2054	count = sc->sc_brtcnt;
2055	if (count == 0)
2056		return;
2057	brt_list = kmem_alloc(sizeof(*brt_list) * count, KM_SLEEP);
2058
2059	BRIDGE_RT_LOCK(sc);
2060	if (__predict_false(sc->sc_brtcnt > count)) {
2061		/* The rtnodes increased, we need more memory */
2062		BRIDGE_RT_UNLOCK(sc);
2063		kmem_free(brt_list, sizeof(*brt_list) * count);
2064		goto retry;
2065	}
2066
2067	i = 0;
2068	LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
2069		bool need_break = false;
2070		if (func(sc, brt, &need_break, arg)) {
2071			bridge_rtnode_remove(sc, brt);
2072			brt_list[i++] = brt;
2073		}
2074		if (need_break)
2075			break;
2076	}
2077
2078	if (i > 0)
2079		BRIDGE_RT_PSZ_PERFORM(sc);
2080	BRIDGE_RT_UNLOCK(sc);
2081
2082	while (--i >= 0)
2083		bridge_rtnode_destroy(brt_list[i]);
2084
2085	kmem_free(brt_list, sizeof(*brt_list) * count);
2086}
2087
2088static bool
2089bridge_rttrim0_cb(struct bridge_softc *sc, struct bridge_rtnode *brt,
2090    bool *need_break, void *arg)
2091{
2092	if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
2093		/* Take into account of the subsequent removal */
2094		if ((sc->sc_brtcnt - 1) <= sc->sc_brtmax)
2095			*need_break = true;
2096		return true;
2097	} else
2098		return false;
2099}
2100
2101static void
2102bridge_rttrim0(struct bridge_softc *sc)
2103{
2104	bridge_rtlist_iterate_remove(sc, bridge_rttrim0_cb, NULL);
2105}
2106
2107/*
2108 * bridge_rttrim:
2109 *
2110 *	Trim the routine table so that we have a number
2111 *	of routing entries less than or equal to the
2112 *	maximum number.
2113 */
2114static void
2115bridge_rttrim(struct bridge_softc *sc)
2116{
2117
2118	/* Make sure we actually need to do this. */
2119	if (sc->sc_brtcnt <= sc->sc_brtmax)
2120		return;
2121
2122	/* Force an aging cycle; this might trim enough addresses. */
2123	bridge_rtage(sc);
2124	if (sc->sc_brtcnt <= sc->sc_brtmax)
2125		return;
2126
2127	bridge_rttrim0(sc);
2128
2129	return;
2130}
2131
2132/*
2133 * bridge_timer:
2134 *
2135 *	Aging timer for the bridge.
2136 */
2137static void
2138bridge_timer(void *arg)
2139{
2140	struct bridge_softc *sc = arg;
2141
2142	workqueue_enqueue(sc->sc_rtage_wq, &sc->sc_rtage_wk, NULL);
2143}
2144
2145static void
2146bridge_rtage_work(struct work *wk, void *arg)
2147{
2148	struct bridge_softc *sc = arg;
2149
2150	KASSERT(wk == &sc->sc_rtage_wk);
2151
2152	bridge_rtage(sc);
2153
2154	if (sc->sc_if.if_flags & IFF_RUNNING)
2155		callout_reset(&sc->sc_brcallout,
2156		    bridge_rtable_prune_period * hz, bridge_timer, sc);
2157}
2158
2159static bool
2160bridge_rtage_cb(struct bridge_softc *sc, struct bridge_rtnode *brt,
2161    bool *need_break, void *arg)
2162{
2163	if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC &&
2164	    time_uptime >= brt->brt_expire)
2165		return true;
2166	else
2167		return false;
2168}
2169
2170/*
2171 * bridge_rtage:
2172 *
2173 *	Perform an aging cycle.
2174 */
2175static void
2176bridge_rtage(struct bridge_softc *sc)
2177{
2178	bridge_rtlist_iterate_remove(sc, bridge_rtage_cb, NULL);
2179}
2180
2181
2182static bool
2183bridge_rtflush_cb(struct bridge_softc *sc, struct bridge_rtnode *brt,
2184    bool *need_break, void *arg)
2185{
2186	int full = *(int*)arg;
2187
2188	if (full || (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC)
2189		return true;
2190	else
2191		return false;
2192}
2193
2194/*
2195 * bridge_rtflush:
2196 *
2197 *	Remove all dynamic addresses from the bridge.
2198 */
2199static void
2200bridge_rtflush(struct bridge_softc *sc, int full)
2201{
2202	bridge_rtlist_iterate_remove(sc, bridge_rtflush_cb, &full);
2203}
2204
2205/*
2206 * bridge_rtdaddr:
2207 *
2208 *	Remove an address from the table.
2209 */
2210static int
2211bridge_rtdaddr(struct bridge_softc *sc, const uint8_t *addr)
2212{
2213	struct bridge_rtnode *brt;
2214
2215	BRIDGE_RT_LOCK(sc);
2216	if ((brt = bridge_rtnode_lookup(sc, addr)) == NULL) {
2217		BRIDGE_RT_UNLOCK(sc);
2218		return ENOENT;
2219	}
2220	bridge_rtnode_remove(sc, brt);
2221	BRIDGE_RT_PSZ_PERFORM(sc);
2222	BRIDGE_RT_UNLOCK(sc);
2223
2224	bridge_rtnode_destroy(brt);
2225
2226	return 0;
2227}
2228
2229/*
2230 * bridge_rtdelete:
2231 *
2232 *	Delete routes to a speicifc member interface.
2233 */
2234static void
2235bridge_rtdelete(struct bridge_softc *sc, struct ifnet *ifp)
2236{
2237	struct bridge_rtnode *brt, *nbrt;
2238
2239	BRIDGE_RT_LOCK(sc);
2240	LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
2241		if (brt->brt_ifp == ifp)
2242			break;
2243	}
2244	if (brt == NULL) {
2245		BRIDGE_RT_UNLOCK(sc);
2246		return;
2247	}
2248	bridge_rtnode_remove(sc, brt);
2249	BRIDGE_RT_PSZ_PERFORM(sc);
2250	BRIDGE_RT_UNLOCK(sc);
2251
2252	bridge_rtnode_destroy(brt);
2253}
2254
2255/*
2256 * bridge_rtable_init:
2257 *
2258 *	Initialize the route table for this bridge.
2259 */
2260static void
2261bridge_rtable_init(struct bridge_softc *sc)
2262{
2263	int i;
2264
2265	sc->sc_rthash = kmem_alloc(sizeof(*sc->sc_rthash) * BRIDGE_RTHASH_SIZE,
2266	    KM_SLEEP);
2267
2268	for (i = 0; i < BRIDGE_RTHASH_SIZE; i++)
2269		LIST_INIT(&sc->sc_rthash[i]);
2270
2271	sc->sc_rthash_key = cprng_fast32();
2272
2273	LIST_INIT(&sc->sc_rtlist);
2274
2275#ifdef BRIDGE_MPSAFE
2276	sc->sc_rtlist_psz = pserialize_create();
2277	sc->sc_rtlist_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_SOFTNET);
2278#else
2279	sc->sc_rtlist_psz = NULL;
2280	sc->sc_rtlist_lock = NULL;
2281#endif
2282}
2283
2284/*
2285 * bridge_rtable_fini:
2286 *
2287 *	Deconstruct the route table for this bridge.
2288 */
2289static void
2290bridge_rtable_fini(struct bridge_softc *sc)
2291{
2292
2293	kmem_free(sc->sc_rthash, sizeof(*sc->sc_rthash) * BRIDGE_RTHASH_SIZE);
2294	if (sc->sc_rtlist_lock)
2295		mutex_obj_free(sc->sc_rtlist_lock);
2296	if (sc->sc_rtlist_psz)
2297		pserialize_destroy(sc->sc_rtlist_psz);
2298}
2299
2300/*
2301 * The following hash function is adapted from "Hash Functions" by Bob Jenkins
2302 * ("Algorithm Alley", Dr. Dobbs Journal, September 1997).
2303 */
2304#define	mix(a, b, c)							\
2305do {									\
2306	a -= b; a -= c; a ^= (c >> 13);					\
2307	b -= c; b -= a; b ^= (a << 8);					\
2308	c -= a; c -= b; c ^= (b >> 13);					\
2309	a -= b; a -= c; a ^= (c >> 12);					\
2310	b -= c; b -= a; b ^= (a << 16);					\
2311	c -= a; c -= b; c ^= (b >> 5);					\
2312	a -= b; a -= c; a ^= (c >> 3);					\
2313	b -= c; b -= a; b ^= (a << 10);					\
2314	c -= a; c -= b; c ^= (b >> 15);					\
2315} while (/*CONSTCOND*/0)
2316
2317static inline uint32_t
2318bridge_rthash(struct bridge_softc *sc, const uint8_t *addr)
2319{
2320	uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = sc->sc_rthash_key;
2321
2322	b += addr[5] << 8;
2323	b += addr[4];
2324	a += addr[3] << 24;
2325	a += addr[2] << 16;
2326	a += addr[1] << 8;
2327	a += addr[0];
2328
2329	mix(a, b, c);
2330
2331	return (c & BRIDGE_RTHASH_MASK);
2332}
2333
2334#undef mix
2335
2336/*
2337 * bridge_rtnode_lookup:
2338 *
2339 *	Look up a bridge route node for the specified destination.
2340 */
2341static struct bridge_rtnode *
2342bridge_rtnode_lookup(struct bridge_softc *sc, const uint8_t *addr)
2343{
2344	struct bridge_rtnode *brt;
2345	uint32_t hash;
2346	int dir;
2347
2348	hash = bridge_rthash(sc, addr);
2349	LIST_FOREACH(brt, &sc->sc_rthash[hash], brt_hash) {
2350		dir = memcmp(addr, brt->brt_addr, ETHER_ADDR_LEN);
2351		if (dir == 0)
2352			return (brt);
2353		if (dir > 0)
2354			return (NULL);
2355	}
2356
2357	return (NULL);
2358}
2359
2360/*
2361 * bridge_rtnode_insert:
2362 *
2363 *	Insert the specified bridge node into the route table.  We
2364 *	assume the entry is not already in the table.
2365 */
2366static int
2367bridge_rtnode_insert(struct bridge_softc *sc, struct bridge_rtnode *brt)
2368{
2369	struct bridge_rtnode *lbrt;
2370	uint32_t hash;
2371	int dir;
2372
2373	KASSERT(BRIDGE_RT_LOCKED(sc));
2374
2375	hash = bridge_rthash(sc, brt->brt_addr);
2376
2377	lbrt = LIST_FIRST(&sc->sc_rthash[hash]);
2378	if (lbrt == NULL) {
2379		LIST_INSERT_HEAD(&sc->sc_rthash[hash], brt, brt_hash);
2380		goto out;
2381	}
2382
2383	do {
2384		dir = memcmp(brt->brt_addr, lbrt->brt_addr, ETHER_ADDR_LEN);
2385		if (dir == 0)
2386			return (EEXIST);
2387		if (dir > 0) {
2388			LIST_INSERT_BEFORE(lbrt, brt, brt_hash);
2389			goto out;
2390		}
2391		if (LIST_NEXT(lbrt, brt_hash) == NULL) {
2392			LIST_INSERT_AFTER(lbrt, brt, brt_hash);
2393			goto out;
2394		}
2395		lbrt = LIST_NEXT(lbrt, brt_hash);
2396	} while (lbrt != NULL);
2397
2398#ifdef DIAGNOSTIC
2399	panic("bridge_rtnode_insert: impossible");
2400#endif
2401
2402 out:
2403	LIST_INSERT_HEAD(&sc->sc_rtlist, brt, brt_list);
2404	sc->sc_brtcnt++;
2405
2406	return (0);
2407}
2408
2409/*
2410 * bridge_rtnode_remove:
2411 *
2412 *	Remove a bridge rtnode from the rthash and the rtlist of a bridge.
2413 */
2414static void
2415bridge_rtnode_remove(struct bridge_softc *sc, struct bridge_rtnode *brt)
2416{
2417
2418	KASSERT(BRIDGE_RT_LOCKED(sc));
2419
2420	LIST_REMOVE(brt, brt_hash);
2421	LIST_REMOVE(brt, brt_list);
2422	sc->sc_brtcnt--;
2423}
2424
2425/*
2426 * bridge_rtnode_destroy:
2427 *
2428 *	Destroy a bridge rtnode.
2429 */
2430static void
2431bridge_rtnode_destroy(struct bridge_rtnode *brt)
2432{
2433
2434	pool_put(&bridge_rtnode_pool, brt);
2435}
2436
2437#if defined(BRIDGE_IPF)
2438extern pfil_head_t *inet_pfil_hook;                 /* XXX */
2439extern pfil_head_t *inet6_pfil_hook;                /* XXX */
2440
2441/*
2442 * Send bridge packets through IPF if they are one of the types IPF can deal
2443 * with, or if they are ARP or REVARP.  (IPF will pass ARP and REVARP without
2444 * question.)
2445 */
2446static int
2447bridge_ipf(void *arg, struct mbuf **mp, struct ifnet *ifp, int dir)
2448{
2449	int snap, error;
2450	struct ether_header *eh1, eh2;
2451	struct llc llc1;
2452	uint16_t ether_type;
2453
2454	snap = 0;
2455	error = -1;	/* Default error if not error == 0 */
2456	eh1 = mtod(*mp, struct ether_header *);
2457	ether_type = ntohs(eh1->ether_type);
2458
2459	/*
2460	 * Check for SNAP/LLC.
2461	 */
2462        if (ether_type < ETHERMTU) {
2463                struct llc *llc2 = (struct llc *)(eh1 + 1);
2464
2465                if ((*mp)->m_len >= ETHER_HDR_LEN + 8 &&
2466                    llc2->llc_dsap == LLC_SNAP_LSAP &&
2467                    llc2->llc_ssap == LLC_SNAP_LSAP &&
2468                    llc2->llc_control == LLC_UI) {
2469                	ether_type = htons(llc2->llc_un.type_snap.ether_type);
2470			snap = 1;
2471                }
2472        }
2473
2474	/*
2475	 * If we're trying to filter bridge traffic, don't look at anything
2476	 * other than IP and ARP traffic.  If the filter doesn't understand
2477	 * IPv6, don't allow IPv6 through the bridge either.  This is lame
2478	 * since if we really wanted, say, an AppleTalk filter, we are hosed,
2479	 * but of course we don't have an AppleTalk filter to begin with.
2480	 * (Note that since IPF doesn't understand ARP it will pass *ALL*
2481	 * ARP traffic.)
2482	 */
2483	switch (ether_type) {
2484		case ETHERTYPE_ARP:
2485		case ETHERTYPE_REVARP:
2486			return 0; /* Automatically pass */
2487		case ETHERTYPE_IP:
2488# ifdef INET6
2489		case ETHERTYPE_IPV6:
2490# endif /* INET6 */
2491			break;
2492		default:
2493			goto bad;
2494	}
2495
2496	/* Strip off the Ethernet header and keep a copy. */
2497	m_copydata(*mp, 0, ETHER_HDR_LEN, (void *) &eh2);
2498	m_adj(*mp, ETHER_HDR_LEN);
2499
2500	/* Strip off snap header, if present */
2501	if (snap) {
2502		m_copydata(*mp, 0, sizeof(struct llc), (void *) &llc1);
2503		m_adj(*mp, sizeof(struct llc));
2504	}
2505
2506	/*
2507	 * Check basic packet sanity and run IPF through pfil.
2508	 */
2509	KASSERT(!cpu_intr_p());
2510	switch (ether_type)
2511	{
2512	case ETHERTYPE_IP :
2513		error = (dir == PFIL_IN) ? bridge_ip_checkbasic(mp) : 0;
2514		if (error == 0)
2515			error = pfil_run_hooks(inet_pfil_hook, mp, ifp, dir);
2516		break;
2517# ifdef INET6
2518	case ETHERTYPE_IPV6 :
2519		error = (dir == PFIL_IN) ? bridge_ip6_checkbasic(mp) : 0;
2520		if (error == 0)
2521			error = pfil_run_hooks(inet6_pfil_hook, mp, ifp, dir);
2522		break;
2523# endif
2524	default :
2525		error = 0;
2526		break;
2527	}
2528
2529	if (*mp == NULL)
2530		return error;
2531	if (error != 0)
2532		goto bad;
2533
2534	error = -1;
2535
2536	/*
2537	 * Finally, put everything back the way it was and return
2538	 */
2539	if (snap) {
2540		M_PREPEND(*mp, sizeof(struct llc), M_DONTWAIT);
2541		if (*mp == NULL)
2542			return error;
2543		bcopy(&llc1, mtod(*mp, void *), sizeof(struct llc));
2544	}
2545
2546	M_PREPEND(*mp, ETHER_HDR_LEN, M_DONTWAIT);
2547	if (*mp == NULL)
2548		return error;
2549	bcopy(&eh2, mtod(*mp, void *), ETHER_HDR_LEN);
2550
2551	return 0;
2552
2553    bad:
2554	m_freem(*mp);
2555	*mp = NULL;
2556	return error;
2557}
2558
2559/*
2560 * Perform basic checks on header size since
2561 * IPF assumes ip_input has already processed
2562 * it for it.  Cut-and-pasted from ip_input.c.
2563 * Given how simple the IPv6 version is,
2564 * does the IPv4 version really need to be
2565 * this complicated?
2566 *
2567 * XXX Should we update ipstat here, or not?
2568 * XXX Right now we update ipstat but not
2569 * XXX csum_counter.
2570 */
2571static int
2572bridge_ip_checkbasic(struct mbuf **mp)
2573{
2574	struct mbuf *m = *mp;
2575	struct ip *ip;
2576	int len, hlen;
2577
2578	if (*mp == NULL)
2579		return -1;
2580
2581	if (IP_HDR_ALIGNED_P(mtod(m, void *)) == 0) {
2582		if ((m = m_copyup(m, sizeof(struct ip),
2583			(max_linkhdr + 3) & ~3)) == NULL) {
2584			/* XXXJRT new stat, please */
2585			ip_statinc(IP_STAT_TOOSMALL);
2586			goto bad;
2587		}
2588	} else if (__predict_false(m->m_len < sizeof (struct ip))) {
2589		if ((m = m_pullup(m, sizeof (struct ip))) == NULL) {
2590			ip_statinc(IP_STAT_TOOSMALL);
2591			goto bad;
2592		}
2593	}
2594	ip = mtod(m, struct ip *);
2595	if (ip == NULL) goto bad;
2596
2597	if (ip->ip_v != IPVERSION) {
2598		ip_statinc(IP_STAT_BADVERS);
2599		goto bad;
2600	}
2601	hlen = ip->ip_hl << 2;
2602	if (hlen < sizeof(struct ip)) { /* minimum header length */
2603		ip_statinc(IP_STAT_BADHLEN);
2604		goto bad;
2605	}
2606	if (hlen > m->m_len) {
2607		if ((m = m_pullup(m, hlen)) == 0) {
2608			ip_statinc(IP_STAT_BADHLEN);
2609			goto bad;
2610		}
2611		ip = mtod(m, struct ip *);
2612		if (ip == NULL) goto bad;
2613	}
2614
2615        switch (m->m_pkthdr.csum_flags &
2616                ((m->m_pkthdr.rcvif->if_csum_flags_rx & M_CSUM_IPv4) |
2617                 M_CSUM_IPv4_BAD)) {
2618        case M_CSUM_IPv4|M_CSUM_IPv4_BAD:
2619                /* INET_CSUM_COUNTER_INCR(&ip_hwcsum_bad); */
2620                goto bad;
2621
2622        case M_CSUM_IPv4:
2623                /* Checksum was okay. */
2624                /* INET_CSUM_COUNTER_INCR(&ip_hwcsum_ok); */
2625                break;
2626
2627        default:
2628                /* Must compute it ourselves. */
2629                /* INET_CSUM_COUNTER_INCR(&ip_swcsum); */
2630                if (in_cksum(m, hlen) != 0)
2631                        goto bad;
2632                break;
2633        }
2634
2635        /* Retrieve the packet length. */
2636        len = ntohs(ip->ip_len);
2637
2638        /*
2639         * Check for additional length bogosity
2640         */
2641        if (len < hlen) {
2642		ip_statinc(IP_STAT_BADLEN);
2643                goto bad;
2644        }
2645
2646        /*
2647         * Check that the amount of data in the buffers
2648         * is as at least much as the IP header would have us expect.
2649         * Drop packet if shorter than we expect.
2650         */
2651        if (m->m_pkthdr.len < len) {
2652		ip_statinc(IP_STAT_TOOSHORT);
2653                goto bad;
2654        }
2655
2656	/* Checks out, proceed */
2657	*mp = m;
2658	return 0;
2659
2660    bad:
2661	*mp = m;
2662	return -1;
2663}
2664
2665# ifdef INET6
2666/*
2667 * Same as above, but for IPv6.
2668 * Cut-and-pasted from ip6_input.c.
2669 * XXX Should we update ip6stat, or not?
2670 */
2671static int
2672bridge_ip6_checkbasic(struct mbuf **mp)
2673{
2674	struct mbuf *m = *mp;
2675	struct ip6_hdr *ip6;
2676
2677        /*
2678         * If the IPv6 header is not aligned, slurp it up into a new
2679         * mbuf with space for link headers, in the event we forward
2680         * it.  Otherwise, if it is aligned, make sure the entire base
2681         * IPv6 header is in the first mbuf of the chain.
2682         */
2683        if (IP6_HDR_ALIGNED_P(mtod(m, void *)) == 0) {
2684                struct ifnet *inifp = m->m_pkthdr.rcvif;
2685                if ((m = m_copyup(m, sizeof(struct ip6_hdr),
2686                                  (max_linkhdr + 3) & ~3)) == NULL) {
2687                        /* XXXJRT new stat, please */
2688			ip6_statinc(IP6_STAT_TOOSMALL);
2689                        in6_ifstat_inc(inifp, ifs6_in_hdrerr);
2690                        goto bad;
2691                }
2692        } else if (__predict_false(m->m_len < sizeof(struct ip6_hdr))) {
2693                struct ifnet *inifp = m->m_pkthdr.rcvif;
2694                if ((m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) {
2695			ip6_statinc(IP6_STAT_TOOSMALL);
2696                        in6_ifstat_inc(inifp, ifs6_in_hdrerr);
2697                        goto bad;
2698                }
2699        }
2700
2701        ip6 = mtod(m, struct ip6_hdr *);
2702
2703        if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
2704		ip6_statinc(IP6_STAT_BADVERS);
2705                in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_hdrerr);
2706                goto bad;
2707        }
2708
2709	/* Checks out, proceed */
2710	*mp = m;
2711	return 0;
2712
2713    bad:
2714	*mp = m;
2715	return -1;
2716}
2717# endif /* INET6 */
2718#endif /* BRIDGE_IPF */
2719