if_bridge.c revision 1.95
1/*	$NetBSD: if_bridge.c,v 1.95 2014/12/31 17:36:24 ozaki-r Exp $	*/
2
3/*
4 * Copyright 2001 Wasabi Systems, Inc.
5 * All rights reserved.
6 *
7 * Written by Jason R. Thorpe for Wasabi Systems, Inc.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 *    must display the following acknowledgement:
19 *	This product includes software developed for the NetBSD Project by
20 *	Wasabi Systems, Inc.
21 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
22 *    or promote products derived from this software without specific prior
23 *    written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38/*
39 * Copyright (c) 1999, 2000 Jason L. Wright (jason@thought.net)
40 * All rights reserved.
41 *
42 * Redistribution and use in source and binary forms, with or without
43 * modification, are permitted provided that the following conditions
44 * are met:
45 * 1. Redistributions of source code must retain the above copyright
46 *    notice, this list of conditions and the following disclaimer.
47 * 2. Redistributions in binary form must reproduce the above copyright
48 *    notice, this list of conditions and the following disclaimer in the
49 *    documentation and/or other materials provided with the distribution.
50 * 3. All advertising materials mentioning features or use of this software
51 *    must display the following acknowledgement:
52 *	This product includes software developed by Jason L. Wright
53 * 4. The name of the author may not be used to endorse or promote products
54 *    derived from this software without specific prior written permission.
55 *
56 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
57 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
58 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
59 * DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
60 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
61 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
62 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
63 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
64 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
65 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
66 * POSSIBILITY OF SUCH DAMAGE.
67 *
68 * OpenBSD: if_bridge.c,v 1.60 2001/06/15 03:38:33 itojun Exp
69 */
70
71/*
72 * Network interface bridge support.
73 *
74 * TODO:
75 *
76 *	- Currently only supports Ethernet-like interfaces (Ethernet,
77 *	  802.11, VLANs on Ethernet, etc.)  Figure out a nice way
78 *	  to bridge other types of interfaces (FDDI-FDDI, and maybe
79 *	  consider heterogenous bridges).
80 */
81
82#include <sys/cdefs.h>
83__KERNEL_RCSID(0, "$NetBSD: if_bridge.c,v 1.95 2014/12/31 17:36:24 ozaki-r Exp $");
84
85#ifdef _KERNEL_OPT
86#include "opt_bridge_ipf.h"
87#include "opt_inet.h"
88#endif /* _KERNEL_OPT */
89
90#include <sys/param.h>
91#include <sys/kernel.h>
92#include <sys/mbuf.h>
93#include <sys/queue.h>
94#include <sys/socket.h>
95#include <sys/socketvar.h> /* for softnet_lock */
96#include <sys/sockio.h>
97#include <sys/systm.h>
98#include <sys/proc.h>
99#include <sys/pool.h>
100#include <sys/kauth.h>
101#include <sys/cpu.h>
102#include <sys/cprng.h>
103#include <sys/mutex.h>
104#include <sys/kmem.h>
105
106#include <net/bpf.h>
107#include <net/if.h>
108#include <net/if_dl.h>
109#include <net/if_types.h>
110#include <net/if_llc.h>
111#include <net/pktqueue.h>
112
113#include <net/if_ether.h>
114#include <net/if_bridgevar.h>
115
116#if defined(BRIDGE_IPF)
117/* Used for bridge_ip[6]_checkbasic */
118#include <netinet/in.h>
119#include <netinet/in_systm.h>
120#include <netinet/ip.h>
121#include <netinet/ip_var.h>
122#include <netinet/ip_private.h>		/* XXX */
123
124#include <netinet/ip6.h>
125#include <netinet6/in6_var.h>
126#include <netinet6/ip6_var.h>
127#include <netinet6/ip6_private.h>	/* XXX */
128#endif /* BRIDGE_IPF */
129
130/*
131 * Size of the route hash table.  Must be a power of two.
132 */
133#ifndef BRIDGE_RTHASH_SIZE
134#define	BRIDGE_RTHASH_SIZE		1024
135#endif
136
137#define	BRIDGE_RTHASH_MASK		(BRIDGE_RTHASH_SIZE - 1)
138
139#include "carp.h"
140#if NCARP > 0
141#include <netinet/in.h>
142#include <netinet/in_var.h>
143#include <netinet/ip_carp.h>
144#endif
145
146/*
147 * Maximum number of addresses to cache.
148 */
149#ifndef BRIDGE_RTABLE_MAX
150#define	BRIDGE_RTABLE_MAX		100
151#endif
152
153/*
154 * Spanning tree defaults.
155 */
156#define	BSTP_DEFAULT_MAX_AGE		(20 * 256)
157#define	BSTP_DEFAULT_HELLO_TIME		(2 * 256)
158#define	BSTP_DEFAULT_FORWARD_DELAY	(15 * 256)
159#define	BSTP_DEFAULT_HOLD_TIME		(1 * 256)
160#define	BSTP_DEFAULT_BRIDGE_PRIORITY	0x8000
161#define	BSTP_DEFAULT_PORT_PRIORITY	0x80
162#define	BSTP_DEFAULT_PATH_COST		55
163
164/*
165 * Timeout (in seconds) for entries learned dynamically.
166 */
167#ifndef BRIDGE_RTABLE_TIMEOUT
168#define	BRIDGE_RTABLE_TIMEOUT		(20 * 60)	/* same as ARP */
169#endif
170
171/*
172 * Number of seconds between walks of the route list.
173 */
174#ifndef BRIDGE_RTABLE_PRUNE_PERIOD
175#define	BRIDGE_RTABLE_PRUNE_PERIOD	(5 * 60)
176#endif
177
178int	bridge_rtable_prune_period = BRIDGE_RTABLE_PRUNE_PERIOD;
179
180static struct pool bridge_rtnode_pool;
181
182void	bridgeattach(int);
183
184static int	bridge_clone_create(struct if_clone *, int);
185static int	bridge_clone_destroy(struct ifnet *);
186
187static int	bridge_ioctl(struct ifnet *, u_long, void *);
188static int	bridge_init(struct ifnet *);
189static void	bridge_stop(struct ifnet *, int);
190static void	bridge_start(struct ifnet *);
191
192static void	bridge_input(struct ifnet *, struct mbuf *);
193static void	bridge_forward(void *);
194
195static void	bridge_timer(void *);
196
197static void	bridge_broadcast(struct bridge_softc *, struct ifnet *,
198				 struct mbuf *);
199
200static int	bridge_rtupdate(struct bridge_softc *, const uint8_t *,
201				struct ifnet *, int, uint8_t);
202static struct ifnet *bridge_rtlookup(struct bridge_softc *, const uint8_t *);
203static void	bridge_rttrim(struct bridge_softc *);
204static void	bridge_rtage(struct bridge_softc *);
205static void	bridge_rtflush(struct bridge_softc *, int);
206static int	bridge_rtdaddr(struct bridge_softc *, const uint8_t *);
207static void	bridge_rtdelete(struct bridge_softc *, struct ifnet *ifp);
208
209static void	bridge_rtable_init(struct bridge_softc *);
210static void	bridge_rtable_fini(struct bridge_softc *);
211
212static struct bridge_rtnode *bridge_rtnode_lookup(struct bridge_softc *,
213						  const uint8_t *);
214static int	bridge_rtnode_insert(struct bridge_softc *,
215				     struct bridge_rtnode *);
216static void	bridge_rtnode_destroy(struct bridge_softc *,
217				      struct bridge_rtnode *);
218
219static struct bridge_iflist *bridge_lookup_member(struct bridge_softc *,
220						  const char *name);
221static struct bridge_iflist *bridge_lookup_member_if(struct bridge_softc *,
222						     struct ifnet *ifp);
223static void	bridge_release_member(struct bridge_softc *, struct bridge_iflist *);
224static void	bridge_delete_member(struct bridge_softc *,
225				     struct bridge_iflist *);
226static struct bridge_iflist *bridge_try_hold_bif(struct bridge_iflist *);
227
228static int	bridge_ioctl_add(struct bridge_softc *, void *);
229static int	bridge_ioctl_del(struct bridge_softc *, void *);
230static int	bridge_ioctl_gifflags(struct bridge_softc *, void *);
231static int	bridge_ioctl_sifflags(struct bridge_softc *, void *);
232static int	bridge_ioctl_scache(struct bridge_softc *, void *);
233static int	bridge_ioctl_gcache(struct bridge_softc *, void *);
234static int	bridge_ioctl_gifs(struct bridge_softc *, void *);
235static int	bridge_ioctl_rts(struct bridge_softc *, void *);
236static int	bridge_ioctl_saddr(struct bridge_softc *, void *);
237static int	bridge_ioctl_sto(struct bridge_softc *, void *);
238static int	bridge_ioctl_gto(struct bridge_softc *, void *);
239static int	bridge_ioctl_daddr(struct bridge_softc *, void *);
240static int	bridge_ioctl_flush(struct bridge_softc *, void *);
241static int	bridge_ioctl_gpri(struct bridge_softc *, void *);
242static int	bridge_ioctl_spri(struct bridge_softc *, void *);
243static int	bridge_ioctl_ght(struct bridge_softc *, void *);
244static int	bridge_ioctl_sht(struct bridge_softc *, void *);
245static int	bridge_ioctl_gfd(struct bridge_softc *, void *);
246static int	bridge_ioctl_sfd(struct bridge_softc *, void *);
247static int	bridge_ioctl_gma(struct bridge_softc *, void *);
248static int	bridge_ioctl_sma(struct bridge_softc *, void *);
249static int	bridge_ioctl_sifprio(struct bridge_softc *, void *);
250static int	bridge_ioctl_sifcost(struct bridge_softc *, void *);
251#if defined(BRIDGE_IPF)
252static int	bridge_ioctl_gfilt(struct bridge_softc *, void *);
253static int	bridge_ioctl_sfilt(struct bridge_softc *, void *);
254static int	bridge_ipf(void *, struct mbuf **, struct ifnet *, int);
255static int	bridge_ip_checkbasic(struct mbuf **mp);
256# ifdef INET6
257static int	bridge_ip6_checkbasic(struct mbuf **mp);
258# endif /* INET6 */
259#endif /* BRIDGE_IPF */
260
261static void bridge_sysctl_fwdq_setup(struct sysctllog **clog,
262    struct bridge_softc *sc);
263
264struct bridge_control {
265	int	(*bc_func)(struct bridge_softc *, void *);
266	int	bc_argsize;
267	int	bc_flags;
268};
269
270#define	BC_F_COPYIN		0x01	/* copy arguments in */
271#define	BC_F_COPYOUT		0x02	/* copy arguments out */
272#define	BC_F_SUSER		0x04	/* do super-user check */
273
274static const struct bridge_control bridge_control_table[] = {
275[BRDGADD] = {bridge_ioctl_add, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_SUSER},
276[BRDGDEL] = {bridge_ioctl_del, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_SUSER},
277
278[BRDGGIFFLGS] = {bridge_ioctl_gifflags, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_COPYOUT},
279[BRDGSIFFLGS] = {bridge_ioctl_sifflags, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_SUSER},
280
281[BRDGSCACHE] = {bridge_ioctl_scache, sizeof(struct ifbrparam), BC_F_COPYIN|BC_F_SUSER},
282[BRDGGCACHE] = {bridge_ioctl_gcache, sizeof(struct ifbrparam), BC_F_COPYOUT},
283
284[BRDGGIFS] = {bridge_ioctl_gifs, sizeof(struct ifbifconf), BC_F_COPYIN|BC_F_COPYOUT},
285[BRDGRTS] = {bridge_ioctl_rts, sizeof(struct ifbaconf), BC_F_COPYIN|BC_F_COPYOUT},
286
287[BRDGSADDR] = {bridge_ioctl_saddr, sizeof(struct ifbareq), BC_F_COPYIN|BC_F_SUSER},
288
289[BRDGSTO] = {bridge_ioctl_sto, sizeof(struct ifbrparam), BC_F_COPYIN|BC_F_SUSER},
290[BRDGGTO] = {bridge_ioctl_gto, sizeof(struct ifbrparam), BC_F_COPYOUT},
291
292[BRDGDADDR] = {bridge_ioctl_daddr, sizeof(struct ifbareq), BC_F_COPYIN|BC_F_SUSER},
293
294[BRDGFLUSH] = {bridge_ioctl_flush, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_SUSER},
295
296[BRDGGPRI] = {bridge_ioctl_gpri, sizeof(struct ifbrparam), BC_F_COPYOUT},
297[BRDGSPRI] = {bridge_ioctl_spri, sizeof(struct ifbrparam), BC_F_COPYIN|BC_F_SUSER},
298
299[BRDGGHT] = {bridge_ioctl_ght, sizeof(struct ifbrparam), BC_F_COPYOUT},
300[BRDGSHT] = {bridge_ioctl_sht, sizeof(struct ifbrparam), BC_F_COPYIN|BC_F_SUSER},
301
302[BRDGGFD] = {bridge_ioctl_gfd, sizeof(struct ifbrparam), BC_F_COPYOUT},
303[BRDGSFD] = {bridge_ioctl_sfd, sizeof(struct ifbrparam), BC_F_COPYIN|BC_F_SUSER},
304
305[BRDGGMA] = {bridge_ioctl_gma, sizeof(struct ifbrparam), BC_F_COPYOUT},
306[BRDGSMA] = {bridge_ioctl_sma, sizeof(struct ifbrparam), BC_F_COPYIN|BC_F_SUSER},
307
308[BRDGSIFPRIO] = {bridge_ioctl_sifprio, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_SUSER},
309
310[BRDGSIFCOST] = {bridge_ioctl_sifcost, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_SUSER},
311#if defined(BRIDGE_IPF)
312[BRDGGFILT] = {bridge_ioctl_gfilt, sizeof(struct ifbrparam), BC_F_COPYOUT},
313[BRDGSFILT] = {bridge_ioctl_sfilt, sizeof(struct ifbrparam), BC_F_COPYIN|BC_F_SUSER},
314#endif /* BRIDGE_IPF */
315};
316static const int bridge_control_table_size = __arraycount(bridge_control_table);
317
318static LIST_HEAD(, bridge_softc) bridge_list;
319static kmutex_t bridge_list_lock;
320
321static struct if_clone bridge_cloner =
322    IF_CLONE_INITIALIZER("bridge", bridge_clone_create, bridge_clone_destroy);
323
324/*
325 * bridgeattach:
326 *
327 *	Pseudo-device attach routine.
328 */
329void
330bridgeattach(int n)
331{
332
333	pool_init(&bridge_rtnode_pool, sizeof(struct bridge_rtnode),
334	    0, 0, 0, "brtpl", NULL, IPL_NET);
335
336	LIST_INIT(&bridge_list);
337	mutex_init(&bridge_list_lock, MUTEX_DEFAULT, IPL_NET);
338	if_clone_attach(&bridge_cloner);
339}
340
341/*
342 * bridge_clone_create:
343 *
344 *	Create a new bridge instance.
345 */
346static int
347bridge_clone_create(struct if_clone *ifc, int unit)
348{
349	struct bridge_softc *sc;
350	struct ifnet *ifp;
351
352	sc = kmem_zalloc(sizeof(*sc),  KM_SLEEP);
353	ifp = &sc->sc_if;
354
355	sc->sc_brtmax = BRIDGE_RTABLE_MAX;
356	sc->sc_brttimeout = BRIDGE_RTABLE_TIMEOUT;
357	sc->sc_bridge_max_age = BSTP_DEFAULT_MAX_AGE;
358	sc->sc_bridge_hello_time = BSTP_DEFAULT_HELLO_TIME;
359	sc->sc_bridge_forward_delay = BSTP_DEFAULT_FORWARD_DELAY;
360	sc->sc_bridge_priority = BSTP_DEFAULT_BRIDGE_PRIORITY;
361	sc->sc_hold_time = BSTP_DEFAULT_HOLD_TIME;
362	sc->sc_filter_flags = 0;
363
364	/* Initialize our routing table. */
365	bridge_rtable_init(sc);
366
367	callout_init(&sc->sc_brcallout, 0);
368	callout_init(&sc->sc_bstpcallout, 0);
369
370	LIST_INIT(&sc->sc_iflist);
371#ifdef BRIDGE_MPSAFE
372	sc->sc_iflist_intr_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NET);
373	sc->sc_iflist_psz = pserialize_create();
374	sc->sc_iflist_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_SOFTNET);
375#else
376	sc->sc_iflist_intr_lock = NULL;
377	sc->sc_iflist_psz = NULL;
378	sc->sc_iflist_lock = NULL;
379#endif
380	cv_init(&sc->sc_iflist_cv, "if_bridge_cv");
381
382	if_initname(ifp, ifc->ifc_name, unit);
383	ifp->if_softc = sc;
384	ifp->if_mtu = ETHERMTU;
385	ifp->if_ioctl = bridge_ioctl;
386	ifp->if_output = bridge_output;
387	ifp->if_start = bridge_start;
388	ifp->if_stop = bridge_stop;
389	ifp->if_init = bridge_init;
390	ifp->if_type = IFT_BRIDGE;
391	ifp->if_addrlen = 0;
392	ifp->if_dlt = DLT_EN10MB;
393	ifp->if_hdrlen = ETHER_HDR_LEN;
394
395	sc->sc_fwd_pktq = pktq_create(IFQ_MAXLEN, bridge_forward, sc);
396	KASSERT(sc->sc_fwd_pktq != NULL);
397
398	bridge_sysctl_fwdq_setup(&ifp->if_sysctl_log, sc);
399
400	if_attach(ifp);
401
402	if_alloc_sadl(ifp);
403
404	mutex_enter(&bridge_list_lock);
405	LIST_INSERT_HEAD(&bridge_list, sc, sc_list);
406	mutex_exit(&bridge_list_lock);
407
408	return (0);
409}
410
411/*
412 * bridge_clone_destroy:
413 *
414 *	Destroy a bridge instance.
415 */
416static int
417bridge_clone_destroy(struct ifnet *ifp)
418{
419	struct bridge_softc *sc = ifp->if_softc;
420	struct bridge_iflist *bif;
421	int s;
422
423	/* Must be called during IFF_RUNNING, i.e., before bridge_stop */
424	pktq_barrier(sc->sc_fwd_pktq);
425
426	s = splnet();
427
428	bridge_stop(ifp, 1);
429
430	while ((bif = LIST_FIRST(&sc->sc_iflist)) != NULL)
431		bridge_delete_member(sc, bif);
432
433	mutex_enter(&bridge_list_lock);
434	LIST_REMOVE(sc, sc_list);
435	mutex_exit(&bridge_list_lock);
436
437	splx(s);
438
439	if_detach(ifp);
440
441	/* Should be called after if_detach for safe */
442	pktq_flush(sc->sc_fwd_pktq);
443	pktq_destroy(sc->sc_fwd_pktq);
444
445	/* Tear down the routing table. */
446	bridge_rtable_fini(sc);
447
448	cv_destroy(&sc->sc_iflist_cv);
449	if (sc->sc_iflist_intr_lock)
450		mutex_obj_free(sc->sc_iflist_intr_lock);
451
452	if (sc->sc_iflist_psz)
453		pserialize_destroy(sc->sc_iflist_psz);
454	if (sc->sc_iflist_lock)
455		mutex_obj_free(sc->sc_iflist_lock);
456
457	kmem_free(sc, sizeof(*sc));
458
459	return (0);
460}
461
462static int
463bridge_sysctl_fwdq_maxlen(SYSCTLFN_ARGS)
464{
465	struct sysctlnode node = *rnode;
466	const struct bridge_softc *sc =	node.sysctl_data;
467	return sysctl_pktq_maxlen(SYSCTLFN_CALL(rnode), sc->sc_fwd_pktq);
468}
469
470#define	SYSCTL_BRIDGE_PKTQ(cn, c)					\
471	static int							\
472	bridge_sysctl_fwdq_##cn(SYSCTLFN_ARGS)				\
473	{								\
474		struct sysctlnode node = *rnode;			\
475		const struct bridge_softc *sc =	node.sysctl_data;	\
476		return sysctl_pktq_count(SYSCTLFN_CALL(rnode),		\
477					 sc->sc_fwd_pktq, c);		\
478	}
479
480SYSCTL_BRIDGE_PKTQ(items, PKTQ_NITEMS)
481SYSCTL_BRIDGE_PKTQ(drops, PKTQ_DROPS)
482
483static void
484bridge_sysctl_fwdq_setup(struct sysctllog **clog, struct bridge_softc *sc)
485{
486	const struct sysctlnode *cnode, *rnode;
487	sysctlfn len_func = NULL, maxlen_func = NULL, drops_func = NULL;
488	const char *ifname = sc->sc_if.if_xname;
489
490	len_func = bridge_sysctl_fwdq_items;
491	maxlen_func = bridge_sysctl_fwdq_maxlen;
492	drops_func = bridge_sysctl_fwdq_drops;
493
494	if (sysctl_createv(clog, 0, NULL, &rnode,
495			   CTLFLAG_PERMANENT,
496			   CTLTYPE_NODE, "interfaces",
497			   SYSCTL_DESCR("Per-interface controls"),
498			   NULL, 0, NULL, 0,
499			   CTL_NET, CTL_CREATE, CTL_EOL) != 0)
500		goto bad;
501
502	if (sysctl_createv(clog, 0, &rnode, &rnode,
503			   CTLFLAG_PERMANENT,
504			   CTLTYPE_NODE, ifname,
505			   SYSCTL_DESCR("Interface controls"),
506			   NULL, 0, NULL, 0,
507			   CTL_CREATE, CTL_EOL) != 0)
508		goto bad;
509
510	if (sysctl_createv(clog, 0, &rnode, &rnode,
511			   CTLFLAG_PERMANENT,
512			   CTLTYPE_NODE, "fwdq",
513			   SYSCTL_DESCR("Protocol input queue controls"),
514			   NULL, 0, NULL, 0,
515			   CTL_CREATE, CTL_EOL) != 0)
516		goto bad;
517
518	if (sysctl_createv(clog, 0, &rnode, &cnode,
519			   CTLFLAG_PERMANENT,
520			   CTLTYPE_INT, "len",
521			   SYSCTL_DESCR("Current forwarding queue length"),
522			   len_func, 0, (void *)sc, 0,
523			   CTL_CREATE, IFQCTL_LEN, CTL_EOL) != 0)
524		goto bad;
525
526	if (sysctl_createv(clog, 0, &rnode, &cnode,
527			   CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
528			   CTLTYPE_INT, "maxlen",
529			   SYSCTL_DESCR("Maximum allowed forwarding queue length"),
530			   maxlen_func, 0, (void *)sc, 0,
531			   CTL_CREATE, IFQCTL_MAXLEN, CTL_EOL) != 0)
532		goto bad;
533
534	if (sysctl_createv(clog, 0, &rnode, &cnode,
535			   CTLFLAG_PERMANENT,
536			   CTLTYPE_INT, "drops",
537			   SYSCTL_DESCR("Packets dropped due to full forwarding queue"),
538			   drops_func, 0, (void *)sc, 0,
539			   CTL_CREATE, IFQCTL_DROPS, CTL_EOL) != 0)
540		goto bad;
541
542	return;
543bad:
544	aprint_error("%s: could not attach sysctl nodes\n", ifname);
545	return;
546}
547
548/*
549 * bridge_ioctl:
550 *
551 *	Handle a control request from the operator.
552 */
553static int
554bridge_ioctl(struct ifnet *ifp, u_long cmd, void *data)
555{
556	struct bridge_softc *sc = ifp->if_softc;
557	struct lwp *l = curlwp;	/* XXX */
558	union {
559		struct ifbreq ifbreq;
560		struct ifbifconf ifbifconf;
561		struct ifbareq ifbareq;
562		struct ifbaconf ifbaconf;
563		struct ifbrparam ifbrparam;
564	} args;
565	struct ifdrv *ifd = (struct ifdrv *) data;
566	const struct bridge_control *bc = NULL; /* XXXGCC */
567	int s, error = 0;
568
569	/* Authorize command before calling splnet(). */
570	switch (cmd) {
571	case SIOCGDRVSPEC:
572	case SIOCSDRVSPEC:
573		if (ifd->ifd_cmd >= bridge_control_table_size) {
574			error = EINVAL;
575			return error;
576		}
577
578		bc = &bridge_control_table[ifd->ifd_cmd];
579
580		/* We only care about BC_F_SUSER at this point. */
581		if ((bc->bc_flags & BC_F_SUSER) == 0)
582			break;
583
584		error = kauth_authorize_network(l->l_cred,
585		    KAUTH_NETWORK_INTERFACE_BRIDGE,
586		    cmd == SIOCGDRVSPEC ?
587		     KAUTH_REQ_NETWORK_INTERFACE_BRIDGE_GETPRIV :
588		     KAUTH_REQ_NETWORK_INTERFACE_BRIDGE_SETPRIV,
589		     ifd, NULL, NULL);
590		if (error)
591			return (error);
592
593		break;
594	}
595
596	s = splnet();
597
598	switch (cmd) {
599	case SIOCGDRVSPEC:
600	case SIOCSDRVSPEC:
601		KASSERT(bc != NULL);
602		if (cmd == SIOCGDRVSPEC &&
603		    (bc->bc_flags & BC_F_COPYOUT) == 0) {
604			error = EINVAL;
605			break;
606		}
607		else if (cmd == SIOCSDRVSPEC &&
608		    (bc->bc_flags & BC_F_COPYOUT) != 0) {
609			error = EINVAL;
610			break;
611		}
612
613		/* BC_F_SUSER is checked above, before splnet(). */
614
615		if (ifd->ifd_len != bc->bc_argsize ||
616		    ifd->ifd_len > sizeof(args)) {
617			error = EINVAL;
618			break;
619		}
620
621		memset(&args, 0, sizeof(args));
622		if (bc->bc_flags & BC_F_COPYIN) {
623			error = copyin(ifd->ifd_data, &args, ifd->ifd_len);
624			if (error)
625				break;
626		}
627
628		error = (*bc->bc_func)(sc, &args);
629		if (error)
630			break;
631
632		if (bc->bc_flags & BC_F_COPYOUT)
633			error = copyout(&args, ifd->ifd_data, ifd->ifd_len);
634
635		break;
636
637	case SIOCSIFFLAGS:
638		if ((error = ifioctl_common(ifp, cmd, data)) != 0)
639			break;
640		switch (ifp->if_flags & (IFF_UP|IFF_RUNNING)) {
641		case IFF_RUNNING:
642			/*
643			 * If interface is marked down and it is running,
644			 * then stop and disable it.
645			 */
646			(*ifp->if_stop)(ifp, 1);
647			break;
648		case IFF_UP:
649			/*
650			 * If interface is marked up and it is stopped, then
651			 * start it.
652			 */
653			error = (*ifp->if_init)(ifp);
654			break;
655		default:
656			break;
657		}
658		break;
659
660	case SIOCSIFMTU:
661		if ((error = ifioctl_common(ifp, cmd, data)) == ENETRESET)
662			error = 0;
663		break;
664
665	default:
666		error = ifioctl_common(ifp, cmd, data);
667		break;
668	}
669
670	splx(s);
671
672	return (error);
673}
674
675/*
676 * bridge_lookup_member:
677 *
678 *	Lookup a bridge member interface.
679 */
680static struct bridge_iflist *
681bridge_lookup_member(struct bridge_softc *sc, const char *name)
682{
683	struct bridge_iflist *bif;
684	struct ifnet *ifp;
685	int s;
686
687	BRIDGE_PSZ_RENTER(s);
688
689	LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
690		ifp = bif->bif_ifp;
691		if (strcmp(ifp->if_xname, name) == 0)
692			break;
693	}
694	bif = bridge_try_hold_bif(bif);
695
696	BRIDGE_PSZ_REXIT(s);
697
698	return bif;
699}
700
701/*
702 * bridge_lookup_member_if:
703 *
704 *	Lookup a bridge member interface by ifnet*.
705 */
706static struct bridge_iflist *
707bridge_lookup_member_if(struct bridge_softc *sc, struct ifnet *member_ifp)
708{
709	struct bridge_iflist *bif;
710	int s;
711
712	BRIDGE_PSZ_RENTER(s);
713
714	bif = member_ifp->if_bridgeif;
715	bif = bridge_try_hold_bif(bif);
716
717	BRIDGE_PSZ_REXIT(s);
718
719	return bif;
720}
721
722static struct bridge_iflist *
723bridge_try_hold_bif(struct bridge_iflist *bif)
724{
725#ifdef BRIDGE_MPSAFE
726	if (bif != NULL) {
727		if (bif->bif_waiting)
728			bif = NULL;
729		else
730			atomic_inc_32(&bif->bif_refs);
731	}
732#endif
733	return bif;
734}
735
736/*
737 * bridge_release_member:
738 *
739 *	Release the specified member interface.
740 */
741static void
742bridge_release_member(struct bridge_softc *sc, struct bridge_iflist *bif)
743{
744#ifdef BRIDGE_MPSAFE
745	uint32_t refs;
746
747	refs = atomic_dec_uint_nv(&bif->bif_refs);
748	if (__predict_false(refs == 0 && bif->bif_waiting)) {
749		BRIDGE_INTR_LOCK(sc);
750		cv_broadcast(&sc->sc_iflist_cv);
751		BRIDGE_INTR_UNLOCK(sc);
752	}
753#else
754	(void)sc;
755	(void)bif;
756#endif
757}
758
759/*
760 * bridge_delete_member:
761 *
762 *	Delete the specified member interface.
763 */
764static void
765bridge_delete_member(struct bridge_softc *sc, struct bridge_iflist *bif)
766{
767	struct ifnet *ifs = bif->bif_ifp;
768
769	BRIDGE_LOCK(sc);
770
771	ifs->if_input = ether_input;
772	ifs->if_bridge = NULL;
773	ifs->if_bridgeif = NULL;
774
775	LIST_REMOVE(bif, bif_next);
776
777	BRIDGE_PSZ_PERFORM(sc);
778
779	BRIDGE_UNLOCK(sc);
780
781#ifdef BRIDGE_MPSAFE
782	BRIDGE_INTR_LOCK(sc);
783	bif->bif_waiting = true;
784	membar_sync();
785	while (bif->bif_refs > 0) {
786		aprint_debug("%s: cv_wait on iflist\n", __func__);
787		cv_wait(&sc->sc_iflist_cv, sc->sc_iflist_intr_lock);
788	}
789	bif->bif_waiting = false;
790	BRIDGE_INTR_UNLOCK(sc);
791#endif
792
793	kmem_free(bif, sizeof(*bif));
794}
795
796static int
797bridge_ioctl_add(struct bridge_softc *sc, void *arg)
798{
799	struct ifbreq *req = arg;
800	struct bridge_iflist *bif = NULL;
801	struct ifnet *ifs;
802	int error = 0;
803
804	ifs = ifunit(req->ifbr_ifsname);
805	if (ifs == NULL)
806		return (ENOENT);
807
808	if (sc->sc_if.if_mtu != ifs->if_mtu)
809		return (EINVAL);
810
811	if (ifs->if_bridge == sc)
812		return (EEXIST);
813
814	if (ifs->if_bridge != NULL)
815		return (EBUSY);
816
817	if (ifs->if_input != ether_input)
818		return EINVAL;
819
820	/* FIXME: doesn't work with non-IFF_SIMPLEX interfaces */
821	if ((ifs->if_flags & IFF_SIMPLEX) == 0)
822		return EINVAL;
823
824	bif = kmem_alloc(sizeof(*bif), KM_SLEEP);
825
826	switch (ifs->if_type) {
827	case IFT_ETHER:
828		/*
829		 * Place the interface into promiscuous mode.
830		 */
831		error = ifpromisc(ifs, 1);
832		if (error)
833			goto out;
834		break;
835	default:
836		error = EINVAL;
837		goto out;
838	}
839
840	bif->bif_ifp = ifs;
841	bif->bif_flags = IFBIF_LEARNING | IFBIF_DISCOVER;
842	bif->bif_priority = BSTP_DEFAULT_PORT_PRIORITY;
843	bif->bif_path_cost = BSTP_DEFAULT_PATH_COST;
844	bif->bif_refs = 0;
845	bif->bif_waiting = false;
846
847	BRIDGE_LOCK(sc);
848
849	ifs->if_bridge = sc;
850	ifs->if_bridgeif = bif;
851	LIST_INSERT_HEAD(&sc->sc_iflist, bif, bif_next);
852	ifs->if_input = bridge_input;
853
854	BRIDGE_UNLOCK(sc);
855
856	if (sc->sc_if.if_flags & IFF_RUNNING)
857		bstp_initialization(sc);
858	else
859		bstp_stop(sc);
860
861 out:
862	if (error) {
863		if (bif != NULL)
864			kmem_free(bif, sizeof(*bif));
865	}
866	return (error);
867}
868
869static int
870bridge_ioctl_del(struct bridge_softc *sc, void *arg)
871{
872	struct ifbreq *req = arg;
873	const char *name = req->ifbr_ifsname;
874	struct bridge_iflist *bif;
875	struct ifnet *ifs;
876
877	BRIDGE_LOCK(sc);
878
879	/*
880	 * Don't use bridge_lookup_member. We want to get a member
881	 * with bif_refs == 0.
882	 */
883	LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
884		ifs = bif->bif_ifp;
885		if (strcmp(ifs->if_xname, name) == 0)
886			break;
887	}
888
889	if (bif == NULL) {
890		BRIDGE_UNLOCK(sc);
891		return ENOENT;
892	}
893
894	BRIDGE_UNLOCK(sc);
895
896	bridge_delete_member(sc, bif);
897
898
899	switch (ifs->if_type) {
900	case IFT_ETHER:
901		/*
902		 * Take the interface out of promiscuous mode.
903		 * Don't call it with holding a spin lock.
904		 */
905		(void) ifpromisc(ifs, 0);
906		break;
907	default:
908#ifdef DIAGNOSTIC
909		panic("bridge_delete_member: impossible");
910#endif
911		break;
912	}
913
914	bridge_rtdelete(sc, ifs);
915
916	if (sc->sc_if.if_flags & IFF_RUNNING)
917		bstp_initialization(sc);
918
919	return 0;
920}
921
922static int
923bridge_ioctl_gifflags(struct bridge_softc *sc, void *arg)
924{
925	struct ifbreq *req = arg;
926	struct bridge_iflist *bif;
927
928	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
929	if (bif == NULL)
930		return (ENOENT);
931
932	req->ifbr_ifsflags = bif->bif_flags;
933	req->ifbr_state = bif->bif_state;
934	req->ifbr_priority = bif->bif_priority;
935	req->ifbr_path_cost = bif->bif_path_cost;
936	req->ifbr_portno = bif->bif_ifp->if_index & 0xff;
937
938	bridge_release_member(sc, bif);
939
940	return (0);
941}
942
943static int
944bridge_ioctl_sifflags(struct bridge_softc *sc, void *arg)
945{
946	struct ifbreq *req = arg;
947	struct bridge_iflist *bif;
948
949	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
950	if (bif == NULL)
951		return (ENOENT);
952
953	if (req->ifbr_ifsflags & IFBIF_STP) {
954		switch (bif->bif_ifp->if_type) {
955		case IFT_ETHER:
956			/* These can do spanning tree. */
957			break;
958
959		default:
960			/* Nothing else can. */
961			bridge_release_member(sc, bif);
962			return (EINVAL);
963		}
964	}
965
966	bif->bif_flags = req->ifbr_ifsflags;
967
968	bridge_release_member(sc, bif);
969
970	if (sc->sc_if.if_flags & IFF_RUNNING)
971		bstp_initialization(sc);
972
973	return (0);
974}
975
976static int
977bridge_ioctl_scache(struct bridge_softc *sc, void *arg)
978{
979	struct ifbrparam *param = arg;
980
981	sc->sc_brtmax = param->ifbrp_csize;
982	bridge_rttrim(sc);
983
984	return (0);
985}
986
987static int
988bridge_ioctl_gcache(struct bridge_softc *sc, void *arg)
989{
990	struct ifbrparam *param = arg;
991
992	param->ifbrp_csize = sc->sc_brtmax;
993
994	return (0);
995}
996
997static int
998bridge_ioctl_gifs(struct bridge_softc *sc, void *arg)
999{
1000	struct ifbifconf *bifc = arg;
1001	struct bridge_iflist *bif;
1002	struct ifbreq *breqs;
1003	int i, count, error = 0;
1004
1005retry:
1006	BRIDGE_LOCK(sc);
1007	count = 0;
1008	LIST_FOREACH(bif, &sc->sc_iflist, bif_next)
1009		count++;
1010	BRIDGE_UNLOCK(sc);
1011
1012	if (count == 0) {
1013		bifc->ifbic_len = 0;
1014		return 0;
1015	}
1016
1017	if (bifc->ifbic_len == 0 || bifc->ifbic_len < (sizeof(*breqs) * count)) {
1018		/* Tell that a larger buffer is needed */
1019		bifc->ifbic_len = sizeof(*breqs) * count;
1020		return 0;
1021	}
1022
1023	breqs = kmem_alloc(sizeof(*breqs) * count, KM_SLEEP);
1024
1025	BRIDGE_LOCK(sc);
1026
1027	i = 0;
1028	LIST_FOREACH(bif, &sc->sc_iflist, bif_next)
1029		i++;
1030	if (i > count) {
1031		/*
1032		 * The number of members has been increased.
1033		 * We need more memory!
1034		 */
1035		BRIDGE_UNLOCK(sc);
1036		kmem_free(breqs, sizeof(*breqs) * count);
1037		goto retry;
1038	}
1039
1040	i = 0;
1041	LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
1042		struct ifbreq *breq = &breqs[i++];
1043		memset(breq, 0, sizeof(*breq));
1044
1045		strlcpy(breq->ifbr_ifsname, bif->bif_ifp->if_xname,
1046		    sizeof(breq->ifbr_ifsname));
1047		breq->ifbr_ifsflags = bif->bif_flags;
1048		breq->ifbr_state = bif->bif_state;
1049		breq->ifbr_priority = bif->bif_priority;
1050		breq->ifbr_path_cost = bif->bif_path_cost;
1051		breq->ifbr_portno = bif->bif_ifp->if_index & 0xff;
1052	}
1053
1054	/* Don't call copyout with holding the mutex */
1055	BRIDGE_UNLOCK(sc);
1056
1057	for (i = 0; i < count; i++) {
1058		error = copyout(&breqs[i], bifc->ifbic_req + i, sizeof(*breqs));
1059		if (error)
1060			break;
1061	}
1062	bifc->ifbic_len = sizeof(*breqs) * i;
1063
1064	kmem_free(breqs, sizeof(*breqs) * count);
1065
1066	return error;
1067}
1068
1069static int
1070bridge_ioctl_rts(struct bridge_softc *sc, void *arg)
1071{
1072	struct ifbaconf *bac = arg;
1073	struct bridge_rtnode *brt;
1074	struct ifbareq bareq;
1075	int count = 0, error = 0, len;
1076
1077	if (bac->ifbac_len == 0)
1078		return (0);
1079
1080	mutex_enter(sc->sc_rtlist_lock);
1081
1082	len = bac->ifbac_len;
1083	LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) {
1084		if (len < sizeof(bareq))
1085			goto out;
1086		memset(&bareq, 0, sizeof(bareq));
1087		strlcpy(bareq.ifba_ifsname, brt->brt_ifp->if_xname,
1088		    sizeof(bareq.ifba_ifsname));
1089		memcpy(bareq.ifba_dst, brt->brt_addr, sizeof(brt->brt_addr));
1090		if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
1091			bareq.ifba_expire = brt->brt_expire - time_uptime;
1092		} else
1093			bareq.ifba_expire = 0;
1094		bareq.ifba_flags = brt->brt_flags;
1095
1096		error = copyout(&bareq, bac->ifbac_req + count, sizeof(bareq));
1097		if (error)
1098			goto out;
1099		count++;
1100		len -= sizeof(bareq);
1101	}
1102 out:
1103	mutex_exit(sc->sc_rtlist_lock);
1104
1105	bac->ifbac_len = sizeof(bareq) * count;
1106	return (error);
1107}
1108
1109static int
1110bridge_ioctl_saddr(struct bridge_softc *sc, void *arg)
1111{
1112	struct ifbareq *req = arg;
1113	struct bridge_iflist *bif;
1114	int error;
1115
1116	bif = bridge_lookup_member(sc, req->ifba_ifsname);
1117	if (bif == NULL)
1118		return (ENOENT);
1119
1120	error = bridge_rtupdate(sc, req->ifba_dst, bif->bif_ifp, 1,
1121	    req->ifba_flags);
1122
1123	bridge_release_member(sc, bif);
1124
1125	return (error);
1126}
1127
1128static int
1129bridge_ioctl_sto(struct bridge_softc *sc, void *arg)
1130{
1131	struct ifbrparam *param = arg;
1132
1133	sc->sc_brttimeout = param->ifbrp_ctime;
1134
1135	return (0);
1136}
1137
1138static int
1139bridge_ioctl_gto(struct bridge_softc *sc, void *arg)
1140{
1141	struct ifbrparam *param = arg;
1142
1143	param->ifbrp_ctime = sc->sc_brttimeout;
1144
1145	return (0);
1146}
1147
1148static int
1149bridge_ioctl_daddr(struct bridge_softc *sc, void *arg)
1150{
1151	struct ifbareq *req = arg;
1152
1153	return (bridge_rtdaddr(sc, req->ifba_dst));
1154}
1155
1156static int
1157bridge_ioctl_flush(struct bridge_softc *sc, void *arg)
1158{
1159	struct ifbreq *req = arg;
1160
1161	bridge_rtflush(sc, req->ifbr_ifsflags);
1162
1163	return (0);
1164}
1165
1166static int
1167bridge_ioctl_gpri(struct bridge_softc *sc, void *arg)
1168{
1169	struct ifbrparam *param = arg;
1170
1171	param->ifbrp_prio = sc->sc_bridge_priority;
1172
1173	return (0);
1174}
1175
1176static int
1177bridge_ioctl_spri(struct bridge_softc *sc, void *arg)
1178{
1179	struct ifbrparam *param = arg;
1180
1181	sc->sc_bridge_priority = param->ifbrp_prio;
1182
1183	if (sc->sc_if.if_flags & IFF_RUNNING)
1184		bstp_initialization(sc);
1185
1186	return (0);
1187}
1188
1189static int
1190bridge_ioctl_ght(struct bridge_softc *sc, void *arg)
1191{
1192	struct ifbrparam *param = arg;
1193
1194	param->ifbrp_hellotime = sc->sc_bridge_hello_time >> 8;
1195
1196	return (0);
1197}
1198
1199static int
1200bridge_ioctl_sht(struct bridge_softc *sc, void *arg)
1201{
1202	struct ifbrparam *param = arg;
1203
1204	if (param->ifbrp_hellotime == 0)
1205		return (EINVAL);
1206	sc->sc_bridge_hello_time = param->ifbrp_hellotime << 8;
1207
1208	if (sc->sc_if.if_flags & IFF_RUNNING)
1209		bstp_initialization(sc);
1210
1211	return (0);
1212}
1213
1214static int
1215bridge_ioctl_gfd(struct bridge_softc *sc, void *arg)
1216{
1217	struct ifbrparam *param = arg;
1218
1219	param->ifbrp_fwddelay = sc->sc_bridge_forward_delay >> 8;
1220
1221	return (0);
1222}
1223
1224static int
1225bridge_ioctl_sfd(struct bridge_softc *sc, void *arg)
1226{
1227	struct ifbrparam *param = arg;
1228
1229	if (param->ifbrp_fwddelay == 0)
1230		return (EINVAL);
1231	sc->sc_bridge_forward_delay = param->ifbrp_fwddelay << 8;
1232
1233	if (sc->sc_if.if_flags & IFF_RUNNING)
1234		bstp_initialization(sc);
1235
1236	return (0);
1237}
1238
1239static int
1240bridge_ioctl_gma(struct bridge_softc *sc, void *arg)
1241{
1242	struct ifbrparam *param = arg;
1243
1244	param->ifbrp_maxage = sc->sc_bridge_max_age >> 8;
1245
1246	return (0);
1247}
1248
1249static int
1250bridge_ioctl_sma(struct bridge_softc *sc, void *arg)
1251{
1252	struct ifbrparam *param = arg;
1253
1254	if (param->ifbrp_maxage == 0)
1255		return (EINVAL);
1256	sc->sc_bridge_max_age = param->ifbrp_maxage << 8;
1257
1258	if (sc->sc_if.if_flags & IFF_RUNNING)
1259		bstp_initialization(sc);
1260
1261	return (0);
1262}
1263
1264static int
1265bridge_ioctl_sifprio(struct bridge_softc *sc, void *arg)
1266{
1267	struct ifbreq *req = arg;
1268	struct bridge_iflist *bif;
1269
1270	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
1271	if (bif == NULL)
1272		return (ENOENT);
1273
1274	bif->bif_priority = req->ifbr_priority;
1275
1276	if (sc->sc_if.if_flags & IFF_RUNNING)
1277		bstp_initialization(sc);
1278
1279	bridge_release_member(sc, bif);
1280
1281	return (0);
1282}
1283
1284#if defined(BRIDGE_IPF)
1285static int
1286bridge_ioctl_gfilt(struct bridge_softc *sc, void *arg)
1287{
1288	struct ifbrparam *param = arg;
1289
1290	param->ifbrp_filter = sc->sc_filter_flags;
1291
1292	return (0);
1293}
1294
1295static int
1296bridge_ioctl_sfilt(struct bridge_softc *sc, void *arg)
1297{
1298	struct ifbrparam *param = arg;
1299	uint32_t nflags, oflags;
1300
1301	if (param->ifbrp_filter & ~IFBF_FILT_MASK)
1302		return (EINVAL);
1303
1304	nflags = param->ifbrp_filter;
1305	oflags = sc->sc_filter_flags;
1306
1307	if ((nflags & IFBF_FILT_USEIPF) && !(oflags & IFBF_FILT_USEIPF)) {
1308		pfil_add_hook((void *)bridge_ipf, NULL, PFIL_IN|PFIL_OUT,
1309			sc->sc_if.if_pfil);
1310	}
1311	if (!(nflags & IFBF_FILT_USEIPF) && (oflags & IFBF_FILT_USEIPF)) {
1312		pfil_remove_hook((void *)bridge_ipf, NULL, PFIL_IN|PFIL_OUT,
1313			sc->sc_if.if_pfil);
1314	}
1315
1316	sc->sc_filter_flags = nflags;
1317
1318	return (0);
1319}
1320#endif /* BRIDGE_IPF */
1321
1322static int
1323bridge_ioctl_sifcost(struct bridge_softc *sc, void *arg)
1324{
1325	struct ifbreq *req = arg;
1326	struct bridge_iflist *bif;
1327
1328	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
1329	if (bif == NULL)
1330		return (ENOENT);
1331
1332	bif->bif_path_cost = req->ifbr_path_cost;
1333
1334	if (sc->sc_if.if_flags & IFF_RUNNING)
1335		bstp_initialization(sc);
1336
1337	bridge_release_member(sc, bif);
1338
1339	return (0);
1340}
1341
1342/*
1343 * bridge_ifdetach:
1344 *
1345 *	Detach an interface from a bridge.  Called when a member
1346 *	interface is detaching.
1347 */
1348void
1349bridge_ifdetach(struct ifnet *ifp)
1350{
1351	struct bridge_softc *sc = ifp->if_bridge;
1352	struct ifbreq breq;
1353
1354	/* ioctl_lock should prevent this from happening */
1355	KASSERT(sc != NULL);
1356
1357	memset(&breq, 0, sizeof(breq));
1358	strlcpy(breq.ifbr_ifsname, ifp->if_xname, sizeof(breq.ifbr_ifsname));
1359
1360	(void) bridge_ioctl_del(sc, &breq);
1361}
1362
1363/*
1364 * bridge_init:
1365 *
1366 *	Initialize a bridge interface.
1367 */
1368static int
1369bridge_init(struct ifnet *ifp)
1370{
1371	struct bridge_softc *sc = ifp->if_softc;
1372
1373	if (ifp->if_flags & IFF_RUNNING)
1374		return (0);
1375
1376	callout_reset(&sc->sc_brcallout, bridge_rtable_prune_period * hz,
1377	    bridge_timer, sc);
1378
1379	ifp->if_flags |= IFF_RUNNING;
1380	bstp_initialization(sc);
1381	return (0);
1382}
1383
1384/*
1385 * bridge_stop:
1386 *
1387 *	Stop the bridge interface.
1388 */
1389static void
1390bridge_stop(struct ifnet *ifp, int disable)
1391{
1392	struct bridge_softc *sc = ifp->if_softc;
1393
1394	if ((ifp->if_flags & IFF_RUNNING) == 0)
1395		return;
1396
1397	callout_stop(&sc->sc_brcallout);
1398	bstp_stop(sc);
1399
1400	bridge_rtflush(sc, IFBF_FLUSHDYN);
1401
1402	ifp->if_flags &= ~IFF_RUNNING;
1403}
1404
1405/*
1406 * bridge_enqueue:
1407 *
1408 *	Enqueue a packet on a bridge member interface.
1409 */
1410void
1411bridge_enqueue(struct bridge_softc *sc, struct ifnet *dst_ifp, struct mbuf *m,
1412    int runfilt)
1413{
1414	ALTQ_DECL(struct altq_pktattr pktattr;)
1415	int len, error;
1416	short mflags;
1417
1418	/*
1419	 * Clear any in-bound checksum flags for this packet.
1420	 */
1421	m->m_pkthdr.csum_flags = 0;
1422
1423	if (runfilt) {
1424		if (pfil_run_hooks(sc->sc_if.if_pfil, &m,
1425		    dst_ifp, PFIL_OUT) != 0) {
1426			if (m != NULL)
1427				m_freem(m);
1428			return;
1429		}
1430		if (m == NULL)
1431			return;
1432	}
1433
1434#ifdef ALTQ
1435	/*
1436	 * If ALTQ is enabled on the member interface, do
1437	 * classification; the queueing discipline might
1438	 * not require classification, but might require
1439	 * the address family/header pointer in the pktattr.
1440	 */
1441	if (ALTQ_IS_ENABLED(&dst_ifp->if_snd)) {
1442		/* XXX IFT_ETHER */
1443		altq_etherclassify(&dst_ifp->if_snd, m, &pktattr);
1444	}
1445#endif /* ALTQ */
1446
1447	len = m->m_pkthdr.len;
1448	m->m_flags |= M_PROTO1;
1449	mflags = m->m_flags;
1450
1451	IFQ_ENQUEUE(&dst_ifp->if_snd, m, &pktattr, error);
1452
1453	if (error) {
1454		/* mbuf is already freed */
1455		sc->sc_if.if_oerrors++;
1456		return;
1457	}
1458
1459	sc->sc_if.if_opackets++;
1460	sc->sc_if.if_obytes += len;
1461
1462	dst_ifp->if_obytes += len;
1463
1464	if (mflags & M_MCAST) {
1465		sc->sc_if.if_omcasts++;
1466		dst_ifp->if_omcasts++;
1467	}
1468
1469	if ((dst_ifp->if_flags & IFF_OACTIVE) == 0)
1470		(*dst_ifp->if_start)(dst_ifp);
1471}
1472
1473/*
1474 * bridge_output:
1475 *
1476 *	Send output from a bridge member interface.  This
1477 *	performs the bridging function for locally originated
1478 *	packets.
1479 *
1480 *	The mbuf has the Ethernet header already attached.  We must
1481 *	enqueue or free the mbuf before returning.
1482 */
1483int
1484bridge_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *sa,
1485    struct rtentry *rt)
1486{
1487	struct ether_header *eh;
1488	struct ifnet *dst_if;
1489	struct bridge_softc *sc;
1490#ifndef BRIDGE_MPSAFE
1491	int s;
1492#endif
1493
1494	if (m->m_len < ETHER_HDR_LEN) {
1495		m = m_pullup(m, ETHER_HDR_LEN);
1496		if (m == NULL)
1497			return (0);
1498	}
1499
1500	eh = mtod(m, struct ether_header *);
1501	sc = ifp->if_bridge;
1502
1503#ifndef BRIDGE_MPSAFE
1504	s = splnet();
1505#endif
1506
1507	/*
1508	 * If bridge is down, but the original output interface is up,
1509	 * go ahead and send out that interface.  Otherwise, the packet
1510	 * is dropped below.
1511	 */
1512	if (__predict_false(sc == NULL) ||
1513	    (sc->sc_if.if_flags & IFF_RUNNING) == 0) {
1514		dst_if = ifp;
1515		goto sendunicast;
1516	}
1517
1518	/*
1519	 * If the packet is a multicast, or we don't know a better way to
1520	 * get there, send to all interfaces.
1521	 */
1522	if (ETHER_IS_MULTICAST(eh->ether_dhost))
1523		dst_if = NULL;
1524	else
1525		dst_if = bridge_rtlookup(sc, eh->ether_dhost);
1526	if (dst_if == NULL) {
1527		struct bridge_iflist *bif;
1528		struct mbuf *mc;
1529		int used = 0;
1530		int ss;
1531
1532		BRIDGE_PSZ_RENTER(ss);
1533		LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
1534			bif = bridge_try_hold_bif(bif);
1535			if (bif == NULL)
1536				continue;
1537			BRIDGE_PSZ_REXIT(ss);
1538
1539			dst_if = bif->bif_ifp;
1540			if ((dst_if->if_flags & IFF_RUNNING) == 0)
1541				goto next;
1542
1543			/*
1544			 * If this is not the original output interface,
1545			 * and the interface is participating in spanning
1546			 * tree, make sure the port is in a state that
1547			 * allows forwarding.
1548			 */
1549			if (dst_if != ifp &&
1550			    (bif->bif_flags & IFBIF_STP) != 0) {
1551				switch (bif->bif_state) {
1552				case BSTP_IFSTATE_BLOCKING:
1553				case BSTP_IFSTATE_LISTENING:
1554				case BSTP_IFSTATE_DISABLED:
1555					goto next;
1556				}
1557			}
1558
1559			if (LIST_NEXT(bif, bif_next) == NULL) {
1560				used = 1;
1561				mc = m;
1562			} else {
1563				mc = m_copym(m, 0, M_COPYALL, M_NOWAIT);
1564				if (mc == NULL) {
1565					sc->sc_if.if_oerrors++;
1566					goto next;
1567				}
1568			}
1569
1570			bridge_enqueue(sc, dst_if, mc, 0);
1571next:
1572			bridge_release_member(sc, bif);
1573			BRIDGE_PSZ_RENTER(ss);
1574		}
1575		BRIDGE_PSZ_REXIT(ss);
1576
1577		if (used == 0)
1578			m_freem(m);
1579#ifndef BRIDGE_MPSAFE
1580		splx(s);
1581#endif
1582		return (0);
1583	}
1584
1585 sendunicast:
1586	/*
1587	 * XXX Spanning tree consideration here?
1588	 */
1589
1590	if ((dst_if->if_flags & IFF_RUNNING) == 0) {
1591		m_freem(m);
1592#ifndef BRIDGE_MPSAFE
1593		splx(s);
1594#endif
1595		return (0);
1596	}
1597
1598	bridge_enqueue(sc, dst_if, m, 0);
1599
1600#ifndef BRIDGE_MPSAFE
1601	splx(s);
1602#endif
1603	return (0);
1604}
1605
1606/*
1607 * bridge_start:
1608 *
1609 *	Start output on a bridge.
1610 *
1611 *	NOTE: This routine should never be called in this implementation.
1612 */
1613static void
1614bridge_start(struct ifnet *ifp)
1615{
1616
1617	printf("%s: bridge_start() called\n", ifp->if_xname);
1618}
1619
1620/*
1621 * bridge_forward:
1622 *
1623 *	The forwarding function of the bridge.
1624 */
1625static void
1626bridge_forward(void *v)
1627{
1628	struct bridge_softc *sc = v;
1629	struct mbuf *m;
1630	struct bridge_iflist *bif;
1631	struct ifnet *src_if, *dst_if;
1632	struct ether_header *eh;
1633#ifndef BRIDGE_MPSAFE
1634	int s;
1635
1636	KERNEL_LOCK(1, NULL);
1637	mutex_enter(softnet_lock);
1638#endif
1639
1640	if ((sc->sc_if.if_flags & IFF_RUNNING) == 0) {
1641#ifndef BRIDGE_MPSAFE
1642		mutex_exit(softnet_lock);
1643		KERNEL_UNLOCK_ONE(NULL);
1644#endif
1645		return;
1646	}
1647
1648#ifndef BRIDGE_MPSAFE
1649	s = splnet();
1650#endif
1651	while ((m = pktq_dequeue(sc->sc_fwd_pktq)) != NULL) {
1652		src_if = m->m_pkthdr.rcvif;
1653
1654		sc->sc_if.if_ipackets++;
1655		sc->sc_if.if_ibytes += m->m_pkthdr.len;
1656
1657		/*
1658		 * Look up the bridge_iflist.
1659		 */
1660		bif = bridge_lookup_member_if(sc, src_if);
1661		if (bif == NULL) {
1662			/* Interface is not a bridge member (anymore?) */
1663			m_freem(m);
1664			continue;
1665		}
1666
1667		if (bif->bif_flags & IFBIF_STP) {
1668			switch (bif->bif_state) {
1669			case BSTP_IFSTATE_BLOCKING:
1670			case BSTP_IFSTATE_LISTENING:
1671			case BSTP_IFSTATE_DISABLED:
1672				m_freem(m);
1673				bridge_release_member(sc, bif);
1674				continue;
1675			}
1676		}
1677
1678		eh = mtod(m, struct ether_header *);
1679
1680		/*
1681		 * If the interface is learning, and the source
1682		 * address is valid and not multicast, record
1683		 * the address.
1684		 */
1685		if ((bif->bif_flags & IFBIF_LEARNING) != 0 &&
1686		    ETHER_IS_MULTICAST(eh->ether_shost) == 0 &&
1687		    (eh->ether_shost[0] == 0 &&
1688		     eh->ether_shost[1] == 0 &&
1689		     eh->ether_shost[2] == 0 &&
1690		     eh->ether_shost[3] == 0 &&
1691		     eh->ether_shost[4] == 0 &&
1692		     eh->ether_shost[5] == 0) == 0) {
1693			(void) bridge_rtupdate(sc, eh->ether_shost,
1694			    src_if, 0, IFBAF_DYNAMIC);
1695		}
1696
1697		if ((bif->bif_flags & IFBIF_STP) != 0 &&
1698		    bif->bif_state == BSTP_IFSTATE_LEARNING) {
1699			m_freem(m);
1700			bridge_release_member(sc, bif);
1701			continue;
1702		}
1703
1704		bridge_release_member(sc, bif);
1705
1706		/*
1707		 * At this point, the port either doesn't participate
1708		 * in spanning tree or it is in the forwarding state.
1709		 */
1710
1711		/*
1712		 * If the packet is unicast, destined for someone on
1713		 * "this" side of the bridge, drop it.
1714		 */
1715		if ((m->m_flags & (M_BCAST|M_MCAST)) == 0) {
1716			dst_if = bridge_rtlookup(sc, eh->ether_dhost);
1717			if (src_if == dst_if) {
1718				m_freem(m);
1719				continue;
1720			}
1721		} else {
1722			/* ...forward it to all interfaces. */
1723			sc->sc_if.if_imcasts++;
1724			dst_if = NULL;
1725		}
1726
1727		if (pfil_run_hooks(sc->sc_if.if_pfil, &m,
1728		    m->m_pkthdr.rcvif, PFIL_IN) != 0) {
1729			if (m != NULL)
1730				m_freem(m);
1731			continue;
1732		}
1733		if (m == NULL)
1734			continue;
1735
1736		if (dst_if == NULL) {
1737			bridge_broadcast(sc, src_if, m);
1738			continue;
1739		}
1740
1741		/*
1742		 * At this point, we're dealing with a unicast frame
1743		 * going to a different interface.
1744		 */
1745		if ((dst_if->if_flags & IFF_RUNNING) == 0) {
1746			m_freem(m);
1747			continue;
1748		}
1749
1750		bif = bridge_lookup_member_if(sc, dst_if);
1751		if (bif == NULL) {
1752			/* Not a member of the bridge (anymore?) */
1753			m_freem(m);
1754			continue;
1755		}
1756
1757		if (bif->bif_flags & IFBIF_STP) {
1758			switch (bif->bif_state) {
1759			case BSTP_IFSTATE_DISABLED:
1760			case BSTP_IFSTATE_BLOCKING:
1761				m_freem(m);
1762				bridge_release_member(sc, bif);
1763				continue;
1764			}
1765		}
1766
1767		bridge_release_member(sc, bif);
1768
1769		bridge_enqueue(sc, dst_if, m, 1);
1770	}
1771#ifndef BRIDGE_MPSAFE
1772	splx(s);
1773	mutex_exit(softnet_lock);
1774	KERNEL_UNLOCK_ONE(NULL);
1775#endif
1776}
1777
1778static bool
1779bstp_state_before_learning(struct bridge_iflist *bif)
1780{
1781	if (bif->bif_flags & IFBIF_STP) {
1782		switch (bif->bif_state) {
1783		case BSTP_IFSTATE_BLOCKING:
1784		case BSTP_IFSTATE_LISTENING:
1785		case BSTP_IFSTATE_DISABLED:
1786			return true;
1787		}
1788	}
1789	return false;
1790}
1791
1792static bool
1793bridge_ourether(struct bridge_iflist *bif, struct ether_header *eh, int src)
1794{
1795	uint8_t *ether = src ? eh->ether_shost : eh->ether_dhost;
1796
1797	if (memcmp(CLLADDR(bif->bif_ifp->if_sadl), ether, ETHER_ADDR_LEN) == 0
1798#if NCARP > 0
1799	    || (bif->bif_ifp->if_carp &&
1800	        carp_ourether(bif->bif_ifp->if_carp, eh, IFT_ETHER, src) != NULL)
1801#endif /* NCARP > 0 */
1802	    )
1803		return true;
1804
1805	return false;
1806}
1807
1808/*
1809 * bridge_input:
1810 *
1811 *	Receive input from a member interface.  Queue the packet for
1812 *	bridging if it is not for us.
1813 */
1814static void
1815bridge_input(struct ifnet *ifp, struct mbuf *m)
1816{
1817	struct bridge_softc *sc = ifp->if_bridge;
1818	struct bridge_iflist *bif;
1819	struct ether_header *eh;
1820
1821	if (__predict_false(sc == NULL) ||
1822	    (sc->sc_if.if_flags & IFF_RUNNING) == 0) {
1823		ether_input(ifp, m);
1824		return;
1825	}
1826
1827	bif = bridge_lookup_member_if(sc, ifp);
1828	if (bif == NULL) {
1829		ether_input(ifp, m);
1830		return;
1831	}
1832
1833	eh = mtod(m, struct ether_header *);
1834
1835	if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
1836		if (memcmp(etherbroadcastaddr,
1837		    eh->ether_dhost, ETHER_ADDR_LEN) == 0)
1838			m->m_flags |= M_BCAST;
1839		else
1840			m->m_flags |= M_MCAST;
1841	}
1842
1843	/*
1844	 * A 'fast' path for packets addressed to interfaces that are
1845	 * part of this bridge.
1846	 */
1847	if (!(m->m_flags & (M_BCAST|M_MCAST)) &&
1848	    !bstp_state_before_learning(bif)) {
1849		struct bridge_iflist *_bif;
1850		struct ifnet *_ifp = NULL;
1851		int s;
1852
1853		BRIDGE_PSZ_RENTER(s);
1854		LIST_FOREACH(_bif, &sc->sc_iflist, bif_next) {
1855			/* It is destined for us. */
1856			if (bridge_ourether(_bif, eh, 0)) {
1857				_bif = bridge_try_hold_bif(_bif);
1858				BRIDGE_PSZ_REXIT(s);
1859				if (_bif == NULL)
1860					goto out;
1861				if (_bif->bif_flags & IFBIF_LEARNING)
1862					(void) bridge_rtupdate(sc,
1863					    eh->ether_shost, ifp, 0, IFBAF_DYNAMIC);
1864				_ifp = m->m_pkthdr.rcvif = _bif->bif_ifp;
1865				bridge_release_member(sc, _bif);
1866				goto out;
1867			}
1868
1869			/* We just received a packet that we sent out. */
1870			if (bridge_ourether(_bif, eh, 1))
1871				break;
1872		}
1873		BRIDGE_PSZ_REXIT(s);
1874out:
1875
1876		if (_bif != NULL) {
1877			bridge_release_member(sc, bif);
1878			if (_ifp != NULL)
1879				ether_input(_ifp, m);
1880			else
1881				m_freem(m);
1882			return;
1883		}
1884	}
1885
1886	/* Tap off 802.1D packets; they do not get forwarded. */
1887	if (bif->bif_flags & IFBIF_STP &&
1888	    memcmp(eh->ether_dhost, bstp_etheraddr, ETHER_ADDR_LEN) == 0) {
1889		bstp_input(sc, bif, m);
1890		bridge_release_member(sc, bif);
1891		return;
1892	}
1893
1894	/*
1895	 * A normal switch would discard the packet here, but that's not what
1896	 * we've done historically. This also prevents some obnoxious behaviour.
1897	 */
1898	if (bstp_state_before_learning(bif)) {
1899		bridge_release_member(sc, bif);
1900		ether_input(ifp, m);
1901		return;
1902	}
1903
1904	bridge_release_member(sc, bif);
1905
1906	/* Queue the packet for bridge forwarding. */
1907	if (__predict_false(!pktq_enqueue(sc->sc_fwd_pktq, m, 0)))
1908		m_freem(m);
1909}
1910
1911/*
1912 * bridge_broadcast:
1913 *
1914 *	Send a frame to all interfaces that are members of
1915 *	the bridge, except for the one on which the packet
1916 *	arrived.
1917 */
1918static void
1919bridge_broadcast(struct bridge_softc *sc, struct ifnet *src_if,
1920    struct mbuf *m)
1921{
1922	struct bridge_iflist *bif;
1923	struct mbuf *mc;
1924	struct ifnet *dst_if;
1925	bool used, bmcast;
1926	int s;
1927
1928	used = bmcast = m->m_flags & (M_BCAST|M_MCAST);
1929
1930	BRIDGE_PSZ_RENTER(s);
1931	LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
1932		bif = bridge_try_hold_bif(bif);
1933		if (bif == NULL)
1934			continue;
1935		BRIDGE_PSZ_REXIT(s);
1936
1937		dst_if = bif->bif_ifp;
1938		if (dst_if == src_if)
1939			goto next;
1940
1941		if (bif->bif_flags & IFBIF_STP) {
1942			switch (bif->bif_state) {
1943			case BSTP_IFSTATE_BLOCKING:
1944			case BSTP_IFSTATE_DISABLED:
1945				goto next;
1946			}
1947		}
1948
1949		if ((bif->bif_flags & IFBIF_DISCOVER) == 0 && !bmcast)
1950			goto next;
1951
1952		if ((dst_if->if_flags & IFF_RUNNING) == 0)
1953			goto next;
1954
1955		if (!used && LIST_NEXT(bif, bif_next) == NULL) {
1956			mc = m;
1957			used = true;
1958		} else {
1959			mc = m_copym(m, 0, M_COPYALL, M_DONTWAIT);
1960			if (mc == NULL) {
1961				sc->sc_if.if_oerrors++;
1962				goto next;
1963			}
1964		}
1965
1966		bridge_enqueue(sc, dst_if, mc, 1);
1967next:
1968		bridge_release_member(sc, bif);
1969		BRIDGE_PSZ_RENTER(s);
1970	}
1971	BRIDGE_PSZ_REXIT(s);
1972
1973	if (bmcast)
1974		ether_input(src_if, m);
1975	else if (!used)
1976		m_freem(m);
1977}
1978
1979/*
1980 * bridge_rtupdate:
1981 *
1982 *	Add a bridge routing entry.
1983 */
1984static int
1985bridge_rtupdate(struct bridge_softc *sc, const uint8_t *dst,
1986    struct ifnet *dst_if, int setflags, uint8_t flags)
1987{
1988	struct bridge_rtnode *brt;
1989	int error = 0;
1990
1991	mutex_enter(sc->sc_rtlist_lock);
1992
1993	/*
1994	 * A route for this destination might already exist.  If so,
1995	 * update it, otherwise create a new one.
1996	 */
1997	if ((brt = bridge_rtnode_lookup(sc, dst)) == NULL) {
1998		if (sc->sc_brtcnt >= sc->sc_brtmax) {
1999			error = ENOSPC;
2000			goto out;
2001		}
2002
2003		/*
2004		 * Allocate a new bridge forwarding node, and
2005		 * initialize the expiration time and Ethernet
2006		 * address.
2007		 */
2008		brt = pool_get(&bridge_rtnode_pool, PR_NOWAIT);
2009		if (brt == NULL) {
2010			error = ENOMEM;
2011			goto out;
2012		}
2013
2014		memset(brt, 0, sizeof(*brt));
2015		brt->brt_expire = time_uptime + sc->sc_brttimeout;
2016		brt->brt_flags = IFBAF_DYNAMIC;
2017		memcpy(brt->brt_addr, dst, ETHER_ADDR_LEN);
2018
2019		if ((error = bridge_rtnode_insert(sc, brt)) != 0) {
2020			pool_put(&bridge_rtnode_pool, brt);
2021			goto out;
2022		}
2023	}
2024
2025	brt->brt_ifp = dst_if;
2026	if (setflags) {
2027		brt->brt_flags = flags;
2028		if (flags & IFBAF_STATIC)
2029			brt->brt_expire = 0;
2030		else
2031			brt->brt_expire = time_uptime + sc->sc_brttimeout;
2032	}
2033
2034out:
2035	mutex_exit(sc->sc_rtlist_lock);
2036
2037	return error;
2038}
2039
2040/*
2041 * bridge_rtlookup:
2042 *
2043 *	Lookup the destination interface for an address.
2044 */
2045static struct ifnet *
2046bridge_rtlookup(struct bridge_softc *sc, const uint8_t *addr)
2047{
2048	struct bridge_rtnode *brt;
2049	struct ifnet *ifs = NULL;
2050
2051	mutex_enter(sc->sc_rtlist_lock);
2052
2053	brt = bridge_rtnode_lookup(sc, addr);
2054	if (brt != NULL)
2055		ifs = brt->brt_ifp;
2056
2057	mutex_exit(sc->sc_rtlist_lock);
2058
2059	return ifs;
2060}
2061
2062/*
2063 * bridge_rttrim:
2064 *
2065 *	Trim the routine table so that we have a number
2066 *	of routing entries less than or equal to the
2067 *	maximum number.
2068 */
2069static void
2070bridge_rttrim(struct bridge_softc *sc)
2071{
2072	struct bridge_rtnode *brt, *nbrt;
2073
2074	mutex_enter(sc->sc_rtlist_lock);
2075
2076	/* Make sure we actually need to do this. */
2077	if (sc->sc_brtcnt <= sc->sc_brtmax)
2078		goto out;
2079
2080	/* Force an aging cycle; this might trim enough addresses. */
2081	bridge_rtage(sc);
2082	if (sc->sc_brtcnt <= sc->sc_brtmax)
2083		goto out;
2084
2085	LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
2086		if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
2087			bridge_rtnode_destroy(sc, brt);
2088			if (sc->sc_brtcnt <= sc->sc_brtmax)
2089				goto out;
2090		}
2091	}
2092
2093out:
2094	mutex_exit(sc->sc_rtlist_lock);
2095
2096	return;
2097}
2098
2099/*
2100 * bridge_timer:
2101 *
2102 *	Aging timer for the bridge.
2103 */
2104static void
2105bridge_timer(void *arg)
2106{
2107	struct bridge_softc *sc = arg;
2108
2109	mutex_enter(sc->sc_rtlist_lock);
2110
2111	bridge_rtage(sc);
2112
2113	if (sc->sc_if.if_flags & IFF_RUNNING)
2114		callout_reset(&sc->sc_brcallout,
2115		    bridge_rtable_prune_period * hz, bridge_timer, sc);
2116
2117	mutex_exit(sc->sc_rtlist_lock);
2118}
2119
2120/*
2121 * bridge_rtage:
2122 *
2123 *	Perform an aging cycle.
2124 */
2125static void
2126bridge_rtage(struct bridge_softc *sc)
2127{
2128	struct bridge_rtnode *brt, *nbrt;
2129
2130	KASSERT(mutex_owned(sc->sc_rtlist_lock));
2131
2132	LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
2133		if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
2134			if (time_uptime >= brt->brt_expire)
2135				bridge_rtnode_destroy(sc, brt);
2136		}
2137	}
2138}
2139
2140/*
2141 * bridge_rtflush:
2142 *
2143 *	Remove all dynamic addresses from the bridge.
2144 */
2145static void
2146bridge_rtflush(struct bridge_softc *sc, int full)
2147{
2148	struct bridge_rtnode *brt, *nbrt;
2149
2150	mutex_enter(sc->sc_rtlist_lock);
2151
2152	LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
2153		if (full || (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC)
2154			bridge_rtnode_destroy(sc, brt);
2155	}
2156
2157	mutex_exit(sc->sc_rtlist_lock);
2158}
2159
2160/*
2161 * bridge_rtdaddr:
2162 *
2163 *	Remove an address from the table.
2164 */
2165static int
2166bridge_rtdaddr(struct bridge_softc *sc, const uint8_t *addr)
2167{
2168	struct bridge_rtnode *brt;
2169	int error = 0;
2170
2171	mutex_enter(sc->sc_rtlist_lock);
2172
2173	if ((brt = bridge_rtnode_lookup(sc, addr)) == NULL) {
2174		error = ENOENT;
2175		goto out;
2176	}
2177
2178	bridge_rtnode_destroy(sc, brt);
2179
2180out:
2181	mutex_exit(sc->sc_rtlist_lock);
2182
2183	return error;
2184}
2185
2186/*
2187 * bridge_rtdelete:
2188 *
2189 *	Delete routes to a speicifc member interface.
2190 */
2191static void
2192bridge_rtdelete(struct bridge_softc *sc, struct ifnet *ifp)
2193{
2194	struct bridge_rtnode *brt, *nbrt;
2195
2196	mutex_enter(sc->sc_rtlist_lock);
2197
2198	LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
2199		if (brt->brt_ifp == ifp)
2200			bridge_rtnode_destroy(sc, brt);
2201	}
2202
2203	mutex_exit(sc->sc_rtlist_lock);
2204}
2205
2206/*
2207 * bridge_rtable_init:
2208 *
2209 *	Initialize the route table for this bridge.
2210 */
2211static void
2212bridge_rtable_init(struct bridge_softc *sc)
2213{
2214	int i;
2215
2216	sc->sc_rthash = kmem_alloc(sizeof(*sc->sc_rthash) * BRIDGE_RTHASH_SIZE,
2217	    KM_SLEEP);
2218
2219	for (i = 0; i < BRIDGE_RTHASH_SIZE; i++)
2220		LIST_INIT(&sc->sc_rthash[i]);
2221
2222	sc->sc_rthash_key = cprng_fast32();
2223
2224	LIST_INIT(&sc->sc_rtlist);
2225
2226	sc->sc_rtlist_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NET);
2227}
2228
2229/*
2230 * bridge_rtable_fini:
2231 *
2232 *	Deconstruct the route table for this bridge.
2233 */
2234static void
2235bridge_rtable_fini(struct bridge_softc *sc)
2236{
2237
2238	kmem_free(sc->sc_rthash, sizeof(*sc->sc_rthash) * BRIDGE_RTHASH_SIZE);
2239	if (sc->sc_rtlist_lock)
2240		mutex_obj_free(sc->sc_rtlist_lock);
2241}
2242
2243/*
2244 * The following hash function is adapted from "Hash Functions" by Bob Jenkins
2245 * ("Algorithm Alley", Dr. Dobbs Journal, September 1997).
2246 */
2247#define	mix(a, b, c)							\
2248do {									\
2249	a -= b; a -= c; a ^= (c >> 13);					\
2250	b -= c; b -= a; b ^= (a << 8);					\
2251	c -= a; c -= b; c ^= (b >> 13);					\
2252	a -= b; a -= c; a ^= (c >> 12);					\
2253	b -= c; b -= a; b ^= (a << 16);					\
2254	c -= a; c -= b; c ^= (b >> 5);					\
2255	a -= b; a -= c; a ^= (c >> 3);					\
2256	b -= c; b -= a; b ^= (a << 10);					\
2257	c -= a; c -= b; c ^= (b >> 15);					\
2258} while (/*CONSTCOND*/0)
2259
2260static inline uint32_t
2261bridge_rthash(struct bridge_softc *sc, const uint8_t *addr)
2262{
2263	uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = sc->sc_rthash_key;
2264
2265	b += addr[5] << 8;
2266	b += addr[4];
2267	a += addr[3] << 24;
2268	a += addr[2] << 16;
2269	a += addr[1] << 8;
2270	a += addr[0];
2271
2272	mix(a, b, c);
2273
2274	return (c & BRIDGE_RTHASH_MASK);
2275}
2276
2277#undef mix
2278
2279/*
2280 * bridge_rtnode_lookup:
2281 *
2282 *	Look up a bridge route node for the specified destination.
2283 */
2284static struct bridge_rtnode *
2285bridge_rtnode_lookup(struct bridge_softc *sc, const uint8_t *addr)
2286{
2287	struct bridge_rtnode *brt;
2288	uint32_t hash;
2289	int dir;
2290
2291	KASSERT(mutex_owned(sc->sc_rtlist_lock));
2292
2293	hash = bridge_rthash(sc, addr);
2294	LIST_FOREACH(brt, &sc->sc_rthash[hash], brt_hash) {
2295		dir = memcmp(addr, brt->brt_addr, ETHER_ADDR_LEN);
2296		if (dir == 0)
2297			return (brt);
2298		if (dir > 0)
2299			return (NULL);
2300	}
2301
2302	return (NULL);
2303}
2304
2305/*
2306 * bridge_rtnode_insert:
2307 *
2308 *	Insert the specified bridge node into the route table.  We
2309 *	assume the entry is not already in the table.
2310 */
2311static int
2312bridge_rtnode_insert(struct bridge_softc *sc, struct bridge_rtnode *brt)
2313{
2314	struct bridge_rtnode *lbrt;
2315	uint32_t hash;
2316	int dir;
2317
2318	KASSERT(mutex_owned(sc->sc_rtlist_lock));
2319
2320	hash = bridge_rthash(sc, brt->brt_addr);
2321
2322	lbrt = LIST_FIRST(&sc->sc_rthash[hash]);
2323	if (lbrt == NULL) {
2324		LIST_INSERT_HEAD(&sc->sc_rthash[hash], brt, brt_hash);
2325		goto out;
2326	}
2327
2328	do {
2329		dir = memcmp(brt->brt_addr, lbrt->brt_addr, ETHER_ADDR_LEN);
2330		if (dir == 0)
2331			return (EEXIST);
2332		if (dir > 0) {
2333			LIST_INSERT_BEFORE(lbrt, brt, brt_hash);
2334			goto out;
2335		}
2336		if (LIST_NEXT(lbrt, brt_hash) == NULL) {
2337			LIST_INSERT_AFTER(lbrt, brt, brt_hash);
2338			goto out;
2339		}
2340		lbrt = LIST_NEXT(lbrt, brt_hash);
2341	} while (lbrt != NULL);
2342
2343#ifdef DIAGNOSTIC
2344	panic("bridge_rtnode_insert: impossible");
2345#endif
2346
2347 out:
2348	LIST_INSERT_HEAD(&sc->sc_rtlist, brt, brt_list);
2349	sc->sc_brtcnt++;
2350
2351	return (0);
2352}
2353
2354/*
2355 * bridge_rtnode_destroy:
2356 *
2357 *	Destroy a bridge rtnode.
2358 */
2359static void
2360bridge_rtnode_destroy(struct bridge_softc *sc, struct bridge_rtnode *brt)
2361{
2362
2363	KASSERT(mutex_owned(sc->sc_rtlist_lock));
2364
2365	LIST_REMOVE(brt, brt_hash);
2366
2367	LIST_REMOVE(brt, brt_list);
2368	sc->sc_brtcnt--;
2369	pool_put(&bridge_rtnode_pool, brt);
2370}
2371
2372#if defined(BRIDGE_IPF)
2373extern pfil_head_t *inet_pfil_hook;                 /* XXX */
2374extern pfil_head_t *inet6_pfil_hook;                /* XXX */
2375
2376/*
2377 * Send bridge packets through IPF if they are one of the types IPF can deal
2378 * with, or if they are ARP or REVARP.  (IPF will pass ARP and REVARP without
2379 * question.)
2380 */
2381static int
2382bridge_ipf(void *arg, struct mbuf **mp, struct ifnet *ifp, int dir)
2383{
2384	int snap, error;
2385	struct ether_header *eh1, eh2;
2386	struct llc llc1;
2387	uint16_t ether_type;
2388
2389	snap = 0;
2390	error = -1;	/* Default error if not error == 0 */
2391	eh1 = mtod(*mp, struct ether_header *);
2392	ether_type = ntohs(eh1->ether_type);
2393
2394	/*
2395	 * Check for SNAP/LLC.
2396	 */
2397        if (ether_type < ETHERMTU) {
2398                struct llc *llc2 = (struct llc *)(eh1 + 1);
2399
2400                if ((*mp)->m_len >= ETHER_HDR_LEN + 8 &&
2401                    llc2->llc_dsap == LLC_SNAP_LSAP &&
2402                    llc2->llc_ssap == LLC_SNAP_LSAP &&
2403                    llc2->llc_control == LLC_UI) {
2404                	ether_type = htons(llc2->llc_un.type_snap.ether_type);
2405			snap = 1;
2406                }
2407        }
2408
2409	/*
2410	 * If we're trying to filter bridge traffic, don't look at anything
2411	 * other than IP and ARP traffic.  If the filter doesn't understand
2412	 * IPv6, don't allow IPv6 through the bridge either.  This is lame
2413	 * since if we really wanted, say, an AppleTalk filter, we are hosed,
2414	 * but of course we don't have an AppleTalk filter to begin with.
2415	 * (Note that since IPF doesn't understand ARP it will pass *ALL*
2416	 * ARP traffic.)
2417	 */
2418	switch (ether_type) {
2419		case ETHERTYPE_ARP:
2420		case ETHERTYPE_REVARP:
2421			return 0; /* Automatically pass */
2422		case ETHERTYPE_IP:
2423# ifdef INET6
2424		case ETHERTYPE_IPV6:
2425# endif /* INET6 */
2426			break;
2427		default:
2428			goto bad;
2429	}
2430
2431	/* Strip off the Ethernet header and keep a copy. */
2432	m_copydata(*mp, 0, ETHER_HDR_LEN, (void *) &eh2);
2433	m_adj(*mp, ETHER_HDR_LEN);
2434
2435	/* Strip off snap header, if present */
2436	if (snap) {
2437		m_copydata(*mp, 0, sizeof(struct llc), (void *) &llc1);
2438		m_adj(*mp, sizeof(struct llc));
2439	}
2440
2441	/*
2442	 * Check basic packet sanity and run IPF through pfil.
2443	 */
2444	KASSERT(!cpu_intr_p());
2445	switch (ether_type)
2446	{
2447	case ETHERTYPE_IP :
2448		error = (dir == PFIL_IN) ? bridge_ip_checkbasic(mp) : 0;
2449		if (error == 0)
2450			error = pfil_run_hooks(inet_pfil_hook, mp, ifp, dir);
2451		break;
2452# ifdef INET6
2453	case ETHERTYPE_IPV6 :
2454		error = (dir == PFIL_IN) ? bridge_ip6_checkbasic(mp) : 0;
2455		if (error == 0)
2456			error = pfil_run_hooks(inet6_pfil_hook, mp, ifp, dir);
2457		break;
2458# endif
2459	default :
2460		error = 0;
2461		break;
2462	}
2463
2464	if (*mp == NULL)
2465		return error;
2466	if (error != 0)
2467		goto bad;
2468
2469	error = -1;
2470
2471	/*
2472	 * Finally, put everything back the way it was and return
2473	 */
2474	if (snap) {
2475		M_PREPEND(*mp, sizeof(struct llc), M_DONTWAIT);
2476		if (*mp == NULL)
2477			return error;
2478		bcopy(&llc1, mtod(*mp, void *), sizeof(struct llc));
2479	}
2480
2481	M_PREPEND(*mp, ETHER_HDR_LEN, M_DONTWAIT);
2482	if (*mp == NULL)
2483		return error;
2484	bcopy(&eh2, mtod(*mp, void *), ETHER_HDR_LEN);
2485
2486	return 0;
2487
2488    bad:
2489	m_freem(*mp);
2490	*mp = NULL;
2491	return error;
2492}
2493
2494/*
2495 * Perform basic checks on header size since
2496 * IPF assumes ip_input has already processed
2497 * it for it.  Cut-and-pasted from ip_input.c.
2498 * Given how simple the IPv6 version is,
2499 * does the IPv4 version really need to be
2500 * this complicated?
2501 *
2502 * XXX Should we update ipstat here, or not?
2503 * XXX Right now we update ipstat but not
2504 * XXX csum_counter.
2505 */
2506static int
2507bridge_ip_checkbasic(struct mbuf **mp)
2508{
2509	struct mbuf *m = *mp;
2510	struct ip *ip;
2511	int len, hlen;
2512
2513	if (*mp == NULL)
2514		return -1;
2515
2516	if (IP_HDR_ALIGNED_P(mtod(m, void *)) == 0) {
2517		if ((m = m_copyup(m, sizeof(struct ip),
2518			(max_linkhdr + 3) & ~3)) == NULL) {
2519			/* XXXJRT new stat, please */
2520			ip_statinc(IP_STAT_TOOSMALL);
2521			goto bad;
2522		}
2523	} else if (__predict_false(m->m_len < sizeof (struct ip))) {
2524		if ((m = m_pullup(m, sizeof (struct ip))) == NULL) {
2525			ip_statinc(IP_STAT_TOOSMALL);
2526			goto bad;
2527		}
2528	}
2529	ip = mtod(m, struct ip *);
2530	if (ip == NULL) goto bad;
2531
2532	if (ip->ip_v != IPVERSION) {
2533		ip_statinc(IP_STAT_BADVERS);
2534		goto bad;
2535	}
2536	hlen = ip->ip_hl << 2;
2537	if (hlen < sizeof(struct ip)) { /* minimum header length */
2538		ip_statinc(IP_STAT_BADHLEN);
2539		goto bad;
2540	}
2541	if (hlen > m->m_len) {
2542		if ((m = m_pullup(m, hlen)) == 0) {
2543			ip_statinc(IP_STAT_BADHLEN);
2544			goto bad;
2545		}
2546		ip = mtod(m, struct ip *);
2547		if (ip == NULL) goto bad;
2548	}
2549
2550        switch (m->m_pkthdr.csum_flags &
2551                ((m->m_pkthdr.rcvif->if_csum_flags_rx & M_CSUM_IPv4) |
2552                 M_CSUM_IPv4_BAD)) {
2553        case M_CSUM_IPv4|M_CSUM_IPv4_BAD:
2554                /* INET_CSUM_COUNTER_INCR(&ip_hwcsum_bad); */
2555                goto bad;
2556
2557        case M_CSUM_IPv4:
2558                /* Checksum was okay. */
2559                /* INET_CSUM_COUNTER_INCR(&ip_hwcsum_ok); */
2560                break;
2561
2562        default:
2563                /* Must compute it ourselves. */
2564                /* INET_CSUM_COUNTER_INCR(&ip_swcsum); */
2565                if (in_cksum(m, hlen) != 0)
2566                        goto bad;
2567                break;
2568        }
2569
2570        /* Retrieve the packet length. */
2571        len = ntohs(ip->ip_len);
2572
2573        /*
2574         * Check for additional length bogosity
2575         */
2576        if (len < hlen) {
2577		ip_statinc(IP_STAT_BADLEN);
2578                goto bad;
2579        }
2580
2581        /*
2582         * Check that the amount of data in the buffers
2583         * is as at least much as the IP header would have us expect.
2584         * Drop packet if shorter than we expect.
2585         */
2586        if (m->m_pkthdr.len < len) {
2587		ip_statinc(IP_STAT_TOOSHORT);
2588                goto bad;
2589        }
2590
2591	/* Checks out, proceed */
2592	*mp = m;
2593	return 0;
2594
2595    bad:
2596	*mp = m;
2597	return -1;
2598}
2599
2600# ifdef INET6
2601/*
2602 * Same as above, but for IPv6.
2603 * Cut-and-pasted from ip6_input.c.
2604 * XXX Should we update ip6stat, or not?
2605 */
2606static int
2607bridge_ip6_checkbasic(struct mbuf **mp)
2608{
2609	struct mbuf *m = *mp;
2610	struct ip6_hdr *ip6;
2611
2612        /*
2613         * If the IPv6 header is not aligned, slurp it up into a new
2614         * mbuf with space for link headers, in the event we forward
2615         * it.  Otherwise, if it is aligned, make sure the entire base
2616         * IPv6 header is in the first mbuf of the chain.
2617         */
2618        if (IP6_HDR_ALIGNED_P(mtod(m, void *)) == 0) {
2619                struct ifnet *inifp = m->m_pkthdr.rcvif;
2620                if ((m = m_copyup(m, sizeof(struct ip6_hdr),
2621                                  (max_linkhdr + 3) & ~3)) == NULL) {
2622                        /* XXXJRT new stat, please */
2623			ip6_statinc(IP6_STAT_TOOSMALL);
2624                        in6_ifstat_inc(inifp, ifs6_in_hdrerr);
2625                        goto bad;
2626                }
2627        } else if (__predict_false(m->m_len < sizeof(struct ip6_hdr))) {
2628                struct ifnet *inifp = m->m_pkthdr.rcvif;
2629                if ((m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) {
2630			ip6_statinc(IP6_STAT_TOOSMALL);
2631                        in6_ifstat_inc(inifp, ifs6_in_hdrerr);
2632                        goto bad;
2633                }
2634        }
2635
2636        ip6 = mtod(m, struct ip6_hdr *);
2637
2638        if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
2639		ip6_statinc(IP6_STAT_BADVERS);
2640                in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_hdrerr);
2641                goto bad;
2642        }
2643
2644	/* Checks out, proceed */
2645	*mp = m;
2646	return 0;
2647
2648    bad:
2649	*mp = m;
2650	return -1;
2651}
2652# endif /* INET6 */
2653#endif /* BRIDGE_IPF */
2654