if_bridge.c revision 1.120
1/*	$NetBSD: if_bridge.c,v 1.120 2016/04/28 00:16:56 ozaki-r Exp $	*/
2
3/*
4 * Copyright 2001 Wasabi Systems, Inc.
5 * All rights reserved.
6 *
7 * Written by Jason R. Thorpe for Wasabi Systems, Inc.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 *    must display the following acknowledgement:
19 *	This product includes software developed for the NetBSD Project by
20 *	Wasabi Systems, Inc.
21 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
22 *    or promote products derived from this software without specific prior
23 *    written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38/*
39 * Copyright (c) 1999, 2000 Jason L. Wright (jason@thought.net)
40 * All rights reserved.
41 *
42 * Redistribution and use in source and binary forms, with or without
43 * modification, are permitted provided that the following conditions
44 * are met:
45 * 1. Redistributions of source code must retain the above copyright
46 *    notice, this list of conditions and the following disclaimer.
47 * 2. Redistributions in binary form must reproduce the above copyright
48 *    notice, this list of conditions and the following disclaimer in the
49 *    documentation and/or other materials provided with the distribution.
50 * 3. All advertising materials mentioning features or use of this software
51 *    must display the following acknowledgement:
52 *	This product includes software developed by Jason L. Wright
53 * 4. The name of the author may not be used to endorse or promote products
54 *    derived from this software without specific prior written permission.
55 *
56 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
57 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
58 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
59 * DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
60 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
61 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
62 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
63 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
64 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
65 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
66 * POSSIBILITY OF SUCH DAMAGE.
67 *
68 * OpenBSD: if_bridge.c,v 1.60 2001/06/15 03:38:33 itojun Exp
69 */
70
71/*
72 * Network interface bridge support.
73 *
74 * TODO:
75 *
76 *	- Currently only supports Ethernet-like interfaces (Ethernet,
77 *	  802.11, VLANs on Ethernet, etc.)  Figure out a nice way
78 *	  to bridge other types of interfaces (FDDI-FDDI, and maybe
79 *	  consider heterogenous bridges).
80 */
81
82#include <sys/cdefs.h>
83__KERNEL_RCSID(0, "$NetBSD: if_bridge.c,v 1.120 2016/04/28 00:16:56 ozaki-r Exp $");
84
85#ifdef _KERNEL_OPT
86#include "opt_bridge_ipf.h"
87#include "opt_inet.h"
88#include "opt_net_mpsafe.h"
89#endif /* _KERNEL_OPT */
90
91#include <sys/param.h>
92#include <sys/kernel.h>
93#include <sys/mbuf.h>
94#include <sys/queue.h>
95#include <sys/socket.h>
96#include <sys/socketvar.h> /* for softnet_lock */
97#include <sys/sockio.h>
98#include <sys/systm.h>
99#include <sys/proc.h>
100#include <sys/pool.h>
101#include <sys/kauth.h>
102#include <sys/cpu.h>
103#include <sys/cprng.h>
104#include <sys/mutex.h>
105#include <sys/kmem.h>
106
107#include <net/bpf.h>
108#include <net/if.h>
109#include <net/if_dl.h>
110#include <net/if_types.h>
111#include <net/if_llc.h>
112
113#include <net/if_ether.h>
114#include <net/if_bridgevar.h>
115
116#if defined(BRIDGE_IPF)
117/* Used for bridge_ip[6]_checkbasic */
118#include <netinet/in.h>
119#include <netinet/in_systm.h>
120#include <netinet/ip.h>
121#include <netinet/ip_var.h>
122#include <netinet/ip_private.h>		/* XXX */
123
124#include <netinet/ip6.h>
125#include <netinet6/in6_var.h>
126#include <netinet6/ip6_var.h>
127#include <netinet6/ip6_private.h>	/* XXX */
128#endif /* BRIDGE_IPF */
129
130/*
131 * Size of the route hash table.  Must be a power of two.
132 */
133#ifndef BRIDGE_RTHASH_SIZE
134#define	BRIDGE_RTHASH_SIZE		1024
135#endif
136
137#define	BRIDGE_RTHASH_MASK		(BRIDGE_RTHASH_SIZE - 1)
138
139#include "carp.h"
140#if NCARP > 0
141#include <netinet/in.h>
142#include <netinet/in_var.h>
143#include <netinet/ip_carp.h>
144#endif
145
146#include "ioconf.h"
147
148__CTASSERT(sizeof(struct ifbifconf) == sizeof(struct ifbaconf));
149__CTASSERT(offsetof(struct ifbifconf, ifbic_len) == offsetof(struct ifbaconf, ifbac_len));
150__CTASSERT(offsetof(struct ifbifconf, ifbic_buf) == offsetof(struct ifbaconf, ifbac_buf));
151
152/*
153 * Maximum number of addresses to cache.
154 */
155#ifndef BRIDGE_RTABLE_MAX
156#define	BRIDGE_RTABLE_MAX		100
157#endif
158
159/*
160 * Spanning tree defaults.
161 */
162#define	BSTP_DEFAULT_MAX_AGE		(20 * 256)
163#define	BSTP_DEFAULT_HELLO_TIME		(2 * 256)
164#define	BSTP_DEFAULT_FORWARD_DELAY	(15 * 256)
165#define	BSTP_DEFAULT_HOLD_TIME		(1 * 256)
166#define	BSTP_DEFAULT_BRIDGE_PRIORITY	0x8000
167#define	BSTP_DEFAULT_PORT_PRIORITY	0x80
168#define	BSTP_DEFAULT_PATH_COST		55
169
170/*
171 * Timeout (in seconds) for entries learned dynamically.
172 */
173#ifndef BRIDGE_RTABLE_TIMEOUT
174#define	BRIDGE_RTABLE_TIMEOUT		(20 * 60)	/* same as ARP */
175#endif
176
177/*
178 * Number of seconds between walks of the route list.
179 */
180#ifndef BRIDGE_RTABLE_PRUNE_PERIOD
181#define	BRIDGE_RTABLE_PRUNE_PERIOD	(5 * 60)
182#endif
183
184#define BRIDGE_RT_LOCK(_sc)	if ((_sc)->sc_rtlist_lock) \
185					mutex_enter((_sc)->sc_rtlist_lock)
186#define BRIDGE_RT_UNLOCK(_sc)	if ((_sc)->sc_rtlist_lock) \
187					mutex_exit((_sc)->sc_rtlist_lock)
188#define BRIDGE_RT_LOCKED(_sc)	(!(_sc)->sc_rtlist_lock || \
189				 mutex_owned((_sc)->sc_rtlist_lock))
190
191#define BRIDGE_RT_PSZ_PERFORM(_sc) \
192				if ((_sc)->sc_rtlist_psz != NULL) \
193					pserialize_perform((_sc)->sc_rtlist_psz);
194
195#define BRIDGE_RT_RENTER(__s)	do { __s = pserialize_read_enter(); } while (0)
196#define BRIDGE_RT_REXIT(__s)	do { pserialize_read_exit(__s); } while (0)
197
198
199#ifdef NET_MPSAFE
200#define DECLARE_LOCK_VARIABLE
201#define ACQUIRE_GLOBAL_LOCKS()	do { } while (0)
202#define RELEASE_GLOBAL_LOCKS()	do { } while (0)
203#else
204#define DECLARE_LOCK_VARIABLE	int __s
205#define ACQUIRE_GLOBAL_LOCKS()	do {					\
206					KERNEL_LOCK(1, NULL);		\
207					mutex_enter(softnet_lock);	\
208					__s = splnet();			\
209				} while (0)
210#define RELEASE_GLOBAL_LOCKS()	do {					\
211					splx(__s);			\
212					mutex_exit(softnet_lock);	\
213					KERNEL_UNLOCK_ONE(NULL);	\
214				} while (0)
215#endif
216
217struct psref_class *bridge_psref_class __read_mostly;
218
219int	bridge_rtable_prune_period = BRIDGE_RTABLE_PRUNE_PERIOD;
220
221static struct pool bridge_rtnode_pool;
222
223static int	bridge_clone_create(struct if_clone *, int);
224static int	bridge_clone_destroy(struct ifnet *);
225
226static int	bridge_ioctl(struct ifnet *, u_long, void *);
227static int	bridge_init(struct ifnet *);
228static void	bridge_stop(struct ifnet *, int);
229static void	bridge_start(struct ifnet *);
230
231static void	bridge_input(struct ifnet *, struct mbuf *);
232static void	bridge_forward(struct bridge_softc *, struct mbuf *);
233
234static void	bridge_timer(void *);
235
236static void	bridge_broadcast(struct bridge_softc *, struct ifnet *,
237				 struct mbuf *);
238
239static int	bridge_rtupdate(struct bridge_softc *, const uint8_t *,
240				struct ifnet *, int, uint8_t);
241static struct ifnet *bridge_rtlookup(struct bridge_softc *, const uint8_t *);
242static void	bridge_rttrim(struct bridge_softc *);
243static void	bridge_rtage(struct bridge_softc *);
244static void	bridge_rtage_work(struct work *, void *);
245static void	bridge_rtflush(struct bridge_softc *, int);
246static int	bridge_rtdaddr(struct bridge_softc *, const uint8_t *);
247static void	bridge_rtdelete(struct bridge_softc *, struct ifnet *ifp);
248
249static void	bridge_rtable_init(struct bridge_softc *);
250static void	bridge_rtable_fini(struct bridge_softc *);
251
252static struct bridge_rtnode *bridge_rtnode_lookup(struct bridge_softc *,
253						  const uint8_t *);
254static int	bridge_rtnode_insert(struct bridge_softc *,
255				     struct bridge_rtnode *);
256static void	bridge_rtnode_remove(struct bridge_softc *,
257				     struct bridge_rtnode *);
258static void	bridge_rtnode_destroy(struct bridge_rtnode *);
259
260static struct bridge_iflist *bridge_lookup_member(struct bridge_softc *,
261						  const char *name,
262						  struct psref *);
263static struct bridge_iflist *bridge_lookup_member_if(struct bridge_softc *,
264						     struct ifnet *ifp,
265						     struct psref *);
266static void	bridge_release_member(struct bridge_softc *, struct bridge_iflist *,
267                                      struct psref *);
268static void	bridge_delete_member(struct bridge_softc *,
269				     struct bridge_iflist *);
270static void	bridge_acquire_member(struct bridge_softc *sc,
271                                      struct bridge_iflist *,
272                                      struct psref *);
273
274static int	bridge_ioctl_add(struct bridge_softc *, void *);
275static int	bridge_ioctl_del(struct bridge_softc *, void *);
276static int	bridge_ioctl_gifflags(struct bridge_softc *, void *);
277static int	bridge_ioctl_sifflags(struct bridge_softc *, void *);
278static int	bridge_ioctl_scache(struct bridge_softc *, void *);
279static int	bridge_ioctl_gcache(struct bridge_softc *, void *);
280static int	bridge_ioctl_gifs(struct bridge_softc *, void *);
281static int	bridge_ioctl_rts(struct bridge_softc *, void *);
282static int	bridge_ioctl_saddr(struct bridge_softc *, void *);
283static int	bridge_ioctl_sto(struct bridge_softc *, void *);
284static int	bridge_ioctl_gto(struct bridge_softc *, void *);
285static int	bridge_ioctl_daddr(struct bridge_softc *, void *);
286static int	bridge_ioctl_flush(struct bridge_softc *, void *);
287static int	bridge_ioctl_gpri(struct bridge_softc *, void *);
288static int	bridge_ioctl_spri(struct bridge_softc *, void *);
289static int	bridge_ioctl_ght(struct bridge_softc *, void *);
290static int	bridge_ioctl_sht(struct bridge_softc *, void *);
291static int	bridge_ioctl_gfd(struct bridge_softc *, void *);
292static int	bridge_ioctl_sfd(struct bridge_softc *, void *);
293static int	bridge_ioctl_gma(struct bridge_softc *, void *);
294static int	bridge_ioctl_sma(struct bridge_softc *, void *);
295static int	bridge_ioctl_sifprio(struct bridge_softc *, void *);
296static int	bridge_ioctl_sifcost(struct bridge_softc *, void *);
297#if defined(BRIDGE_IPF)
298static int	bridge_ioctl_gfilt(struct bridge_softc *, void *);
299static int	bridge_ioctl_sfilt(struct bridge_softc *, void *);
300static int	bridge_ipf(void *, struct mbuf **, struct ifnet *, int);
301static int	bridge_ip_checkbasic(struct mbuf **mp);
302# ifdef INET6
303static int	bridge_ip6_checkbasic(struct mbuf **mp);
304# endif /* INET6 */
305#endif /* BRIDGE_IPF */
306
307struct bridge_control {
308	int	(*bc_func)(struct bridge_softc *, void *);
309	int	bc_argsize;
310	int	bc_flags;
311};
312
313#define	BC_F_COPYIN		0x01	/* copy arguments in */
314#define	BC_F_COPYOUT		0x02	/* copy arguments out */
315#define	BC_F_SUSER		0x04	/* do super-user check */
316#define BC_F_XLATEIN		0x08	/* xlate arguments in */
317#define BC_F_XLATEOUT		0x10	/* xlate arguments out */
318
319static const struct bridge_control bridge_control_table[] = {
320[BRDGADD] = {bridge_ioctl_add, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_SUSER},
321[BRDGDEL] = {bridge_ioctl_del, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_SUSER},
322
323[BRDGGIFFLGS] = {bridge_ioctl_gifflags, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_COPYOUT},
324[BRDGSIFFLGS] = {bridge_ioctl_sifflags, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_SUSER},
325
326[BRDGSCACHE] = {bridge_ioctl_scache, sizeof(struct ifbrparam), BC_F_COPYIN|BC_F_SUSER},
327[BRDGGCACHE] = {bridge_ioctl_gcache, sizeof(struct ifbrparam), BC_F_COPYOUT},
328
329[OBRDGGIFS] = {bridge_ioctl_gifs, sizeof(struct ifbifconf), BC_F_COPYIN|BC_F_COPYOUT},
330[OBRDGRTS] = {bridge_ioctl_rts, sizeof(struct ifbaconf), BC_F_COPYIN|BC_F_COPYOUT},
331
332[BRDGSADDR] = {bridge_ioctl_saddr, sizeof(struct ifbareq), BC_F_COPYIN|BC_F_SUSER},
333
334[BRDGSTO] = {bridge_ioctl_sto, sizeof(struct ifbrparam), BC_F_COPYIN|BC_F_SUSER},
335[BRDGGTO] = {bridge_ioctl_gto, sizeof(struct ifbrparam), BC_F_COPYOUT},
336
337[BRDGDADDR] = {bridge_ioctl_daddr, sizeof(struct ifbareq), BC_F_COPYIN|BC_F_SUSER},
338
339[BRDGFLUSH] = {bridge_ioctl_flush, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_SUSER},
340
341[BRDGGPRI] = {bridge_ioctl_gpri, sizeof(struct ifbrparam), BC_F_COPYOUT},
342[BRDGSPRI] = {bridge_ioctl_spri, sizeof(struct ifbrparam), BC_F_COPYIN|BC_F_SUSER},
343
344[BRDGGHT] = {bridge_ioctl_ght, sizeof(struct ifbrparam), BC_F_COPYOUT},
345[BRDGSHT] = {bridge_ioctl_sht, sizeof(struct ifbrparam), BC_F_COPYIN|BC_F_SUSER},
346
347[BRDGGFD] = {bridge_ioctl_gfd, sizeof(struct ifbrparam), BC_F_COPYOUT},
348[BRDGSFD] = {bridge_ioctl_sfd, sizeof(struct ifbrparam), BC_F_COPYIN|BC_F_SUSER},
349
350[BRDGGMA] = {bridge_ioctl_gma, sizeof(struct ifbrparam), BC_F_COPYOUT},
351[BRDGSMA] = {bridge_ioctl_sma, sizeof(struct ifbrparam), BC_F_COPYIN|BC_F_SUSER},
352
353[BRDGSIFPRIO] = {bridge_ioctl_sifprio, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_SUSER},
354
355[BRDGSIFCOST] = {bridge_ioctl_sifcost, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_SUSER},
356#if defined(BRIDGE_IPF)
357[BRDGGFILT] = {bridge_ioctl_gfilt, sizeof(struct ifbrparam), BC_F_COPYOUT},
358[BRDGSFILT] = {bridge_ioctl_sfilt, sizeof(struct ifbrparam), BC_F_COPYIN|BC_F_SUSER},
359#endif /* BRIDGE_IPF */
360[BRDGGIFS] = {bridge_ioctl_gifs, sizeof(struct ifbifconf), BC_F_XLATEIN|BC_F_XLATEOUT},
361[BRDGRTS] = {bridge_ioctl_rts, sizeof(struct ifbaconf), BC_F_XLATEIN|BC_F_XLATEOUT},
362};
363
364static const int bridge_control_table_size = __arraycount(bridge_control_table);
365
366static struct if_clone bridge_cloner =
367    IF_CLONE_INITIALIZER("bridge", bridge_clone_create, bridge_clone_destroy);
368
369/*
370 * bridgeattach:
371 *
372 *	Pseudo-device attach routine.
373 */
374void
375bridgeattach(int n)
376{
377
378	pool_init(&bridge_rtnode_pool, sizeof(struct bridge_rtnode),
379	    0, 0, 0, "brtpl", NULL, IPL_NET);
380
381	bridge_psref_class = psref_class_create("bridge", IPL_SOFTNET);
382
383	if_clone_attach(&bridge_cloner);
384}
385
386/*
387 * bridge_clone_create:
388 *
389 *	Create a new bridge instance.
390 */
391static int
392bridge_clone_create(struct if_clone *ifc, int unit)
393{
394	struct bridge_softc *sc;
395	struct ifnet *ifp;
396	int error;
397
398	sc = kmem_zalloc(sizeof(*sc),  KM_SLEEP);
399	ifp = &sc->sc_if;
400
401	sc->sc_brtmax = BRIDGE_RTABLE_MAX;
402	sc->sc_brttimeout = BRIDGE_RTABLE_TIMEOUT;
403	sc->sc_bridge_max_age = BSTP_DEFAULT_MAX_AGE;
404	sc->sc_bridge_hello_time = BSTP_DEFAULT_HELLO_TIME;
405	sc->sc_bridge_forward_delay = BSTP_DEFAULT_FORWARD_DELAY;
406	sc->sc_bridge_priority = BSTP_DEFAULT_BRIDGE_PRIORITY;
407	sc->sc_hold_time = BSTP_DEFAULT_HOLD_TIME;
408	sc->sc_filter_flags = 0;
409
410	/* Initialize our routing table. */
411	bridge_rtable_init(sc);
412
413	error = workqueue_create(&sc->sc_rtage_wq, "bridge_rtage",
414	    bridge_rtage_work, sc, PRI_SOFTNET, IPL_SOFTNET, WQ_MPSAFE);
415	if (error)
416		panic("%s: workqueue_create %d\n", __func__, error);
417
418	callout_init(&sc->sc_brcallout, 0);
419	callout_init(&sc->sc_bstpcallout, 0);
420
421	mutex_init(&sc->sc_iflist_psref.bip_lock, MUTEX_DEFAULT, IPL_NONE);
422	PSLIST_INIT(&sc->sc_iflist_psref.bip_iflist);
423	sc->sc_iflist_psref.bip_psz = pserialize_create();
424
425	if_initname(ifp, ifc->ifc_name, unit);
426	ifp->if_softc = sc;
427	ifp->if_mtu = ETHERMTU;
428	ifp->if_ioctl = bridge_ioctl;
429	ifp->if_output = bridge_output;
430	ifp->if_start = bridge_start;
431	ifp->if_stop = bridge_stop;
432	ifp->if_init = bridge_init;
433	ifp->if_type = IFT_BRIDGE;
434	ifp->if_addrlen = 0;
435	ifp->if_dlt = DLT_EN10MB;
436	ifp->if_hdrlen = ETHER_HDR_LEN;
437
438	if_initialize(ifp);
439	if_register(ifp);
440
441	if_alloc_sadl(ifp);
442
443	return (0);
444}
445
446/*
447 * bridge_clone_destroy:
448 *
449 *	Destroy a bridge instance.
450 */
451static int
452bridge_clone_destroy(struct ifnet *ifp)
453{
454	struct bridge_softc *sc = ifp->if_softc;
455	struct bridge_iflist *bif;
456	int s;
457
458	s = splnet();
459
460	bridge_stop(ifp, 1);
461
462	BRIDGE_LOCK(sc);
463	for (;;) {
464		bif = PSLIST_WRITER_FIRST(&sc->sc_iflist_psref.bip_iflist, struct bridge_iflist,
465		    bif_next);
466		if (bif == NULL)
467			break;
468		bridge_delete_member(sc, bif);
469	}
470	PSLIST_DESTROY(&sc->sc_iflist_psref.bip_iflist);
471	BRIDGE_UNLOCK(sc);
472
473	splx(s);
474
475	if_detach(ifp);
476
477	/* Tear down the routing table. */
478	bridge_rtable_fini(sc);
479
480	pserialize_destroy(sc->sc_iflist_psref.bip_psz);
481	mutex_destroy(&sc->sc_iflist_psref.bip_lock);
482
483	workqueue_destroy(sc->sc_rtage_wq);
484
485	kmem_free(sc, sizeof(*sc));
486
487	return (0);
488}
489
490/*
491 * bridge_ioctl:
492 *
493 *	Handle a control request from the operator.
494 */
495static int
496bridge_ioctl(struct ifnet *ifp, u_long cmd, void *data)
497{
498	struct bridge_softc *sc = ifp->if_softc;
499	struct lwp *l = curlwp;	/* XXX */
500	union {
501		struct ifbreq ifbreq;
502		struct ifbifconf ifbifconf;
503		struct ifbareq ifbareq;
504		struct ifbaconf ifbaconf;
505		struct ifbrparam ifbrparam;
506	} args;
507	struct ifdrv *ifd = (struct ifdrv *) data;
508	const struct bridge_control *bc = NULL; /* XXXGCC */
509	int s, error = 0;
510
511	/* Authorize command before calling splnet(). */
512	switch (cmd) {
513	case SIOCGDRVSPEC:
514	case SIOCSDRVSPEC:
515		if (ifd->ifd_cmd >= bridge_control_table_size
516		    || (bc = &bridge_control_table[ifd->ifd_cmd]) == NULL) {
517			error = EINVAL;
518			return error;
519		}
520
521		/* We only care about BC_F_SUSER at this point. */
522		if ((bc->bc_flags & BC_F_SUSER) == 0)
523			break;
524
525		error = kauth_authorize_network(l->l_cred,
526		    KAUTH_NETWORK_INTERFACE_BRIDGE,
527		    cmd == SIOCGDRVSPEC ?
528		     KAUTH_REQ_NETWORK_INTERFACE_BRIDGE_GETPRIV :
529		     KAUTH_REQ_NETWORK_INTERFACE_BRIDGE_SETPRIV,
530		     ifd, NULL, NULL);
531		if (error)
532			return (error);
533
534		break;
535	}
536
537	s = splnet();
538
539	switch (cmd) {
540	case SIOCGDRVSPEC:
541	case SIOCSDRVSPEC:
542		KASSERT(bc != NULL);
543		if (cmd == SIOCGDRVSPEC &&
544		    (bc->bc_flags & (BC_F_COPYOUT|BC_F_XLATEOUT)) == 0) {
545			error = EINVAL;
546			break;
547		}
548		else if (cmd == SIOCSDRVSPEC &&
549		    (bc->bc_flags & (BC_F_COPYOUT|BC_F_XLATEOUT)) != 0) {
550			error = EINVAL;
551			break;
552		}
553
554		/* BC_F_SUSER is checked above, before splnet(). */
555
556		if ((bc->bc_flags & (BC_F_XLATEIN|BC_F_XLATEOUT)) == 0
557		    && (ifd->ifd_len != bc->bc_argsize
558			|| ifd->ifd_len > sizeof(args))) {
559			error = EINVAL;
560			break;
561		}
562
563		memset(&args, 0, sizeof(args));
564		if (bc->bc_flags & BC_F_COPYIN) {
565			error = copyin(ifd->ifd_data, &args, ifd->ifd_len);
566			if (error)
567				break;
568		} else if (bc->bc_flags & BC_F_XLATEIN) {
569			args.ifbifconf.ifbic_len = ifd->ifd_len;
570			args.ifbifconf.ifbic_buf = ifd->ifd_data;
571		}
572
573		error = (*bc->bc_func)(sc, &args);
574		if (error)
575			break;
576
577		if (bc->bc_flags & BC_F_COPYOUT) {
578			error = copyout(&args, ifd->ifd_data, ifd->ifd_len);
579		} else if (bc->bc_flags & BC_F_XLATEOUT) {
580			ifd->ifd_len = args.ifbifconf.ifbic_len;
581			ifd->ifd_data = args.ifbifconf.ifbic_buf;
582		}
583		break;
584
585	case SIOCSIFFLAGS:
586		if ((error = ifioctl_common(ifp, cmd, data)) != 0)
587			break;
588		switch (ifp->if_flags & (IFF_UP|IFF_RUNNING)) {
589		case IFF_RUNNING:
590			/*
591			 * If interface is marked down and it is running,
592			 * then stop and disable it.
593			 */
594			(*ifp->if_stop)(ifp, 1);
595			break;
596		case IFF_UP:
597			/*
598			 * If interface is marked up and it is stopped, then
599			 * start it.
600			 */
601			error = (*ifp->if_init)(ifp);
602			break;
603		default:
604			break;
605		}
606		break;
607
608	case SIOCSIFMTU:
609		if ((error = ifioctl_common(ifp, cmd, data)) == ENETRESET)
610			error = 0;
611		break;
612
613	default:
614		error = ifioctl_common(ifp, cmd, data);
615		break;
616	}
617
618	splx(s);
619
620	return (error);
621}
622
623/*
624 * bridge_lookup_member:
625 *
626 *	Lookup a bridge member interface.
627 */
628static struct bridge_iflist *
629bridge_lookup_member(struct bridge_softc *sc, const char *name, struct psref *psref)
630{
631	struct bridge_iflist *bif;
632	struct ifnet *ifp;
633	int s;
634
635	BRIDGE_PSZ_RENTER(s);
636
637	BRIDGE_IFLIST_READER_FOREACH(bif, sc) {
638		ifp = bif->bif_ifp;
639		if (strcmp(ifp->if_xname, name) == 0)
640			break;
641	}
642	if (bif != NULL)
643		bridge_acquire_member(sc, bif, psref);
644
645	BRIDGE_PSZ_REXIT(s);
646
647	return bif;
648}
649
650/*
651 * bridge_lookup_member_if:
652 *
653 *	Lookup a bridge member interface by ifnet*.
654 */
655static struct bridge_iflist *
656bridge_lookup_member_if(struct bridge_softc *sc, struct ifnet *member_ifp,
657    struct psref *psref)
658{
659	struct bridge_iflist *bif;
660	int s;
661
662	BRIDGE_PSZ_RENTER(s);
663
664	bif = member_ifp->if_bridgeif;
665	if (bif != NULL) {
666		psref_acquire(psref, &bif->bif_psref,
667		    bridge_psref_class);
668	}
669
670	BRIDGE_PSZ_REXIT(s);
671
672	return bif;
673}
674
675static void
676bridge_acquire_member(struct bridge_softc *sc, struct bridge_iflist *bif,
677    struct psref *psref)
678{
679
680	psref_acquire(psref, &bif->bif_psref, bridge_psref_class);
681}
682
683/*
684 * bridge_release_member:
685 *
686 *	Release the specified member interface.
687 */
688static void
689bridge_release_member(struct bridge_softc *sc, struct bridge_iflist *bif,
690    struct psref *psref)
691{
692
693	psref_release(psref, &bif->bif_psref, bridge_psref_class);
694}
695
696/*
697 * bridge_delete_member:
698 *
699 *	Delete the specified member interface.
700 */
701static void
702bridge_delete_member(struct bridge_softc *sc, struct bridge_iflist *bif)
703{
704	struct ifnet *ifs = bif->bif_ifp;
705
706	KASSERT(BRIDGE_LOCKED(sc));
707
708	ifs->_if_input = ether_input;
709	ifs->if_bridge = NULL;
710	ifs->if_bridgeif = NULL;
711
712	PSLIST_WRITER_REMOVE(bif, bif_next);
713	BRIDGE_PSZ_PERFORM(sc);
714	BRIDGE_UNLOCK(sc);
715
716	psref_target_destroy(&bif->bif_psref, bridge_psref_class);
717
718	PSLIST_ENTRY_DESTROY(bif, bif_next);
719	kmem_free(bif, sizeof(*bif));
720
721	BRIDGE_LOCK(sc);
722}
723
724static int
725bridge_ioctl_add(struct bridge_softc *sc, void *arg)
726{
727	struct ifbreq *req = arg;
728	struct bridge_iflist *bif = NULL;
729	struct ifnet *ifs;
730	int error = 0;
731
732	ifs = ifunit(req->ifbr_ifsname);
733	if (ifs == NULL)
734		return (ENOENT);
735
736	if (sc->sc_if.if_mtu != ifs->if_mtu)
737		return (EINVAL);
738
739	if (ifs->if_bridge == sc)
740		return (EEXIST);
741
742	if (ifs->if_bridge != NULL)
743		return (EBUSY);
744
745	if (ifs->_if_input != ether_input)
746		return EINVAL;
747
748	/* FIXME: doesn't work with non-IFF_SIMPLEX interfaces */
749	if ((ifs->if_flags & IFF_SIMPLEX) == 0)
750		return EINVAL;
751
752	bif = kmem_alloc(sizeof(*bif), KM_SLEEP);
753
754	switch (ifs->if_type) {
755	case IFT_ETHER:
756		if ((error = ether_enable_vlan_mtu(ifs)) > 0)
757			goto out;
758		/*
759		 * Place the interface into promiscuous mode.
760		 */
761		error = ifpromisc(ifs, 1);
762		if (error)
763			goto out;
764		break;
765	default:
766		error = EINVAL;
767		goto out;
768	}
769
770	bif->bif_ifp = ifs;
771	bif->bif_flags = IFBIF_LEARNING | IFBIF_DISCOVER;
772	bif->bif_priority = BSTP_DEFAULT_PORT_PRIORITY;
773	bif->bif_path_cost = BSTP_DEFAULT_PATH_COST;
774	PSLIST_ENTRY_INIT(bif, bif_next);
775	psref_target_init(&bif->bif_psref, bridge_psref_class);
776
777	BRIDGE_LOCK(sc);
778
779	ifs->if_bridge = sc;
780	ifs->if_bridgeif = bif;
781	PSLIST_WRITER_INSERT_HEAD(&sc->sc_iflist_psref.bip_iflist, bif, bif_next);
782	ifs->_if_input = bridge_input;
783
784	BRIDGE_UNLOCK(sc);
785
786	if (sc->sc_if.if_flags & IFF_RUNNING)
787		bstp_initialization(sc);
788	else
789		bstp_stop(sc);
790
791 out:
792	if (error) {
793		if (bif != NULL)
794			kmem_free(bif, sizeof(*bif));
795	}
796	return (error);
797}
798
799static int
800bridge_ioctl_del(struct bridge_softc *sc, void *arg)
801{
802	struct ifbreq *req = arg;
803	const char *name = req->ifbr_ifsname;
804	struct bridge_iflist *bif;
805	struct ifnet *ifs;
806
807	BRIDGE_LOCK(sc);
808
809	/*
810	 * Don't use bridge_lookup_member. We want to get a member
811	 * with bif_refs == 0.
812	 */
813	BRIDGE_IFLIST_WRITER_FOREACH(bif, sc) {
814		ifs = bif->bif_ifp;
815		if (strcmp(ifs->if_xname, name) == 0)
816			break;
817	}
818
819	if (bif == NULL) {
820		BRIDGE_UNLOCK(sc);
821		return ENOENT;
822	}
823
824	bridge_delete_member(sc, bif);
825
826	BRIDGE_UNLOCK(sc);
827
828	switch (ifs->if_type) {
829	case IFT_ETHER:
830		/*
831		 * Take the interface out of promiscuous mode.
832		 * Don't call it with holding a spin lock.
833		 */
834		(void) ifpromisc(ifs, 0);
835		(void) ether_disable_vlan_mtu(ifs);
836		break;
837	default:
838#ifdef DIAGNOSTIC
839		panic("bridge_delete_member: impossible");
840#endif
841		break;
842	}
843
844	bridge_rtdelete(sc, ifs);
845
846	if (sc->sc_if.if_flags & IFF_RUNNING)
847		bstp_initialization(sc);
848
849	return 0;
850}
851
852static int
853bridge_ioctl_gifflags(struct bridge_softc *sc, void *arg)
854{
855	struct ifbreq *req = arg;
856	struct bridge_iflist *bif;
857	struct psref psref;
858
859	bif = bridge_lookup_member(sc, req->ifbr_ifsname, &psref);
860	if (bif == NULL)
861		return (ENOENT);
862
863	req->ifbr_ifsflags = bif->bif_flags;
864	req->ifbr_state = bif->bif_state;
865	req->ifbr_priority = bif->bif_priority;
866	req->ifbr_path_cost = bif->bif_path_cost;
867	req->ifbr_portno = bif->bif_ifp->if_index & 0xff;
868
869	bridge_release_member(sc, bif, &psref);
870
871	return (0);
872}
873
874static int
875bridge_ioctl_sifflags(struct bridge_softc *sc, void *arg)
876{
877	struct ifbreq *req = arg;
878	struct bridge_iflist *bif;
879	struct psref psref;
880
881	bif = bridge_lookup_member(sc, req->ifbr_ifsname, &psref);
882	if (bif == NULL)
883		return (ENOENT);
884
885	if (req->ifbr_ifsflags & IFBIF_STP) {
886		switch (bif->bif_ifp->if_type) {
887		case IFT_ETHER:
888			/* These can do spanning tree. */
889			break;
890
891		default:
892			/* Nothing else can. */
893			bridge_release_member(sc, bif, &psref);
894			return (EINVAL);
895		}
896	}
897
898	bif->bif_flags = req->ifbr_ifsflags;
899
900	bridge_release_member(sc, bif, &psref);
901
902	if (sc->sc_if.if_flags & IFF_RUNNING)
903		bstp_initialization(sc);
904
905	return (0);
906}
907
908static int
909bridge_ioctl_scache(struct bridge_softc *sc, void *arg)
910{
911	struct ifbrparam *param = arg;
912
913	sc->sc_brtmax = param->ifbrp_csize;
914	bridge_rttrim(sc);
915
916	return (0);
917}
918
919static int
920bridge_ioctl_gcache(struct bridge_softc *sc, void *arg)
921{
922	struct ifbrparam *param = arg;
923
924	param->ifbrp_csize = sc->sc_brtmax;
925
926	return (0);
927}
928
929static int
930bridge_ioctl_gifs(struct bridge_softc *sc, void *arg)
931{
932	struct ifbifconf *bifc = arg;
933	struct bridge_iflist *bif;
934	struct ifbreq *breqs;
935	int i, count, error = 0;
936
937retry:
938	BRIDGE_LOCK(sc);
939	count = 0;
940	BRIDGE_IFLIST_WRITER_FOREACH(bif, sc)
941		count++;
942	BRIDGE_UNLOCK(sc);
943
944	if (count == 0) {
945		bifc->ifbic_len = 0;
946		return 0;
947	}
948
949	if (bifc->ifbic_len == 0 || bifc->ifbic_len < (sizeof(*breqs) * count)) {
950		/* Tell that a larger buffer is needed */
951		bifc->ifbic_len = sizeof(*breqs) * count;
952		return 0;
953	}
954
955	breqs = kmem_alloc(sizeof(*breqs) * count, KM_SLEEP);
956
957	BRIDGE_LOCK(sc);
958
959	i = 0;
960	BRIDGE_IFLIST_WRITER_FOREACH(bif, sc)
961		i++;
962	if (i > count) {
963		/*
964		 * The number of members has been increased.
965		 * We need more memory!
966		 */
967		BRIDGE_UNLOCK(sc);
968		kmem_free(breqs, sizeof(*breqs) * count);
969		goto retry;
970	}
971
972	i = 0;
973	BRIDGE_IFLIST_WRITER_FOREACH(bif, sc) {
974		struct ifbreq *breq = &breqs[i++];
975		memset(breq, 0, sizeof(*breq));
976
977		strlcpy(breq->ifbr_ifsname, bif->bif_ifp->if_xname,
978		    sizeof(breq->ifbr_ifsname));
979		breq->ifbr_ifsflags = bif->bif_flags;
980		breq->ifbr_state = bif->bif_state;
981		breq->ifbr_priority = bif->bif_priority;
982		breq->ifbr_path_cost = bif->bif_path_cost;
983		breq->ifbr_portno = bif->bif_ifp->if_index & 0xff;
984	}
985
986	/* Don't call copyout with holding the mutex */
987	BRIDGE_UNLOCK(sc);
988
989	for (i = 0; i < count; i++) {
990		error = copyout(&breqs[i], bifc->ifbic_req + i, sizeof(*breqs));
991		if (error)
992			break;
993	}
994	bifc->ifbic_len = sizeof(*breqs) * i;
995
996	kmem_free(breqs, sizeof(*breqs) * count);
997
998	return error;
999}
1000
1001static int
1002bridge_ioctl_rts(struct bridge_softc *sc, void *arg)
1003{
1004	struct ifbaconf *bac = arg;
1005	struct bridge_rtnode *brt;
1006	struct ifbareq bareq;
1007	int count = 0, error = 0, len;
1008
1009	if (bac->ifbac_len == 0)
1010		return (0);
1011
1012	BRIDGE_RT_LOCK(sc);
1013
1014	len = bac->ifbac_len;
1015	LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) {
1016		if (len < sizeof(bareq))
1017			goto out;
1018		memset(&bareq, 0, sizeof(bareq));
1019		strlcpy(bareq.ifba_ifsname, brt->brt_ifp->if_xname,
1020		    sizeof(bareq.ifba_ifsname));
1021		memcpy(bareq.ifba_dst, brt->brt_addr, sizeof(brt->brt_addr));
1022		if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
1023			bareq.ifba_expire = brt->brt_expire - time_uptime;
1024		} else
1025			bareq.ifba_expire = 0;
1026		bareq.ifba_flags = brt->brt_flags;
1027
1028		error = copyout(&bareq, bac->ifbac_req + count, sizeof(bareq));
1029		if (error)
1030			goto out;
1031		count++;
1032		len -= sizeof(bareq);
1033	}
1034 out:
1035	BRIDGE_RT_UNLOCK(sc);
1036
1037	bac->ifbac_len = sizeof(bareq) * count;
1038	return (error);
1039}
1040
1041static int
1042bridge_ioctl_saddr(struct bridge_softc *sc, void *arg)
1043{
1044	struct ifbareq *req = arg;
1045	struct bridge_iflist *bif;
1046	int error;
1047	struct psref psref;
1048
1049	bif = bridge_lookup_member(sc, req->ifba_ifsname, &psref);
1050	if (bif == NULL)
1051		return (ENOENT);
1052
1053	error = bridge_rtupdate(sc, req->ifba_dst, bif->bif_ifp, 1,
1054	    req->ifba_flags);
1055
1056	bridge_release_member(sc, bif, &psref);
1057
1058	return (error);
1059}
1060
1061static int
1062bridge_ioctl_sto(struct bridge_softc *sc, void *arg)
1063{
1064	struct ifbrparam *param = arg;
1065
1066	sc->sc_brttimeout = param->ifbrp_ctime;
1067
1068	return (0);
1069}
1070
1071static int
1072bridge_ioctl_gto(struct bridge_softc *sc, void *arg)
1073{
1074	struct ifbrparam *param = arg;
1075
1076	param->ifbrp_ctime = sc->sc_brttimeout;
1077
1078	return (0);
1079}
1080
1081static int
1082bridge_ioctl_daddr(struct bridge_softc *sc, void *arg)
1083{
1084	struct ifbareq *req = arg;
1085
1086	return (bridge_rtdaddr(sc, req->ifba_dst));
1087}
1088
1089static int
1090bridge_ioctl_flush(struct bridge_softc *sc, void *arg)
1091{
1092	struct ifbreq *req = arg;
1093
1094	bridge_rtflush(sc, req->ifbr_ifsflags);
1095
1096	return (0);
1097}
1098
1099static int
1100bridge_ioctl_gpri(struct bridge_softc *sc, void *arg)
1101{
1102	struct ifbrparam *param = arg;
1103
1104	param->ifbrp_prio = sc->sc_bridge_priority;
1105
1106	return (0);
1107}
1108
1109static int
1110bridge_ioctl_spri(struct bridge_softc *sc, void *arg)
1111{
1112	struct ifbrparam *param = arg;
1113
1114	sc->sc_bridge_priority = param->ifbrp_prio;
1115
1116	if (sc->sc_if.if_flags & IFF_RUNNING)
1117		bstp_initialization(sc);
1118
1119	return (0);
1120}
1121
1122static int
1123bridge_ioctl_ght(struct bridge_softc *sc, void *arg)
1124{
1125	struct ifbrparam *param = arg;
1126
1127	param->ifbrp_hellotime = sc->sc_bridge_hello_time >> 8;
1128
1129	return (0);
1130}
1131
1132static int
1133bridge_ioctl_sht(struct bridge_softc *sc, void *arg)
1134{
1135	struct ifbrparam *param = arg;
1136
1137	if (param->ifbrp_hellotime == 0)
1138		return (EINVAL);
1139	sc->sc_bridge_hello_time = param->ifbrp_hellotime << 8;
1140
1141	if (sc->sc_if.if_flags & IFF_RUNNING)
1142		bstp_initialization(sc);
1143
1144	return (0);
1145}
1146
1147static int
1148bridge_ioctl_gfd(struct bridge_softc *sc, void *arg)
1149{
1150	struct ifbrparam *param = arg;
1151
1152	param->ifbrp_fwddelay = sc->sc_bridge_forward_delay >> 8;
1153
1154	return (0);
1155}
1156
1157static int
1158bridge_ioctl_sfd(struct bridge_softc *sc, void *arg)
1159{
1160	struct ifbrparam *param = arg;
1161
1162	if (param->ifbrp_fwddelay == 0)
1163		return (EINVAL);
1164	sc->sc_bridge_forward_delay = param->ifbrp_fwddelay << 8;
1165
1166	if (sc->sc_if.if_flags & IFF_RUNNING)
1167		bstp_initialization(sc);
1168
1169	return (0);
1170}
1171
1172static int
1173bridge_ioctl_gma(struct bridge_softc *sc, void *arg)
1174{
1175	struct ifbrparam *param = arg;
1176
1177	param->ifbrp_maxage = sc->sc_bridge_max_age >> 8;
1178
1179	return (0);
1180}
1181
1182static int
1183bridge_ioctl_sma(struct bridge_softc *sc, void *arg)
1184{
1185	struct ifbrparam *param = arg;
1186
1187	if (param->ifbrp_maxage == 0)
1188		return (EINVAL);
1189	sc->sc_bridge_max_age = param->ifbrp_maxage << 8;
1190
1191	if (sc->sc_if.if_flags & IFF_RUNNING)
1192		bstp_initialization(sc);
1193
1194	return (0);
1195}
1196
1197static int
1198bridge_ioctl_sifprio(struct bridge_softc *sc, void *arg)
1199{
1200	struct ifbreq *req = arg;
1201	struct bridge_iflist *bif;
1202	struct psref psref;
1203
1204	bif = bridge_lookup_member(sc, req->ifbr_ifsname, &psref);
1205	if (bif == NULL)
1206		return (ENOENT);
1207
1208	bif->bif_priority = req->ifbr_priority;
1209
1210	if (sc->sc_if.if_flags & IFF_RUNNING)
1211		bstp_initialization(sc);
1212
1213	bridge_release_member(sc, bif, &psref);
1214
1215	return (0);
1216}
1217
1218#if defined(BRIDGE_IPF)
1219static int
1220bridge_ioctl_gfilt(struct bridge_softc *sc, void *arg)
1221{
1222	struct ifbrparam *param = arg;
1223
1224	param->ifbrp_filter = sc->sc_filter_flags;
1225
1226	return (0);
1227}
1228
1229static int
1230bridge_ioctl_sfilt(struct bridge_softc *sc, void *arg)
1231{
1232	struct ifbrparam *param = arg;
1233	uint32_t nflags, oflags;
1234
1235	if (param->ifbrp_filter & ~IFBF_FILT_MASK)
1236		return (EINVAL);
1237
1238	nflags = param->ifbrp_filter;
1239	oflags = sc->sc_filter_flags;
1240
1241	if ((nflags & IFBF_FILT_USEIPF) && !(oflags & IFBF_FILT_USEIPF)) {
1242		pfil_add_hook((void *)bridge_ipf, NULL, PFIL_IN|PFIL_OUT,
1243			sc->sc_if.if_pfil);
1244	}
1245	if (!(nflags & IFBF_FILT_USEIPF) && (oflags & IFBF_FILT_USEIPF)) {
1246		pfil_remove_hook((void *)bridge_ipf, NULL, PFIL_IN|PFIL_OUT,
1247			sc->sc_if.if_pfil);
1248	}
1249
1250	sc->sc_filter_flags = nflags;
1251
1252	return (0);
1253}
1254#endif /* BRIDGE_IPF */
1255
1256static int
1257bridge_ioctl_sifcost(struct bridge_softc *sc, void *arg)
1258{
1259	struct ifbreq *req = arg;
1260	struct bridge_iflist *bif;
1261	struct psref psref;
1262
1263	bif = bridge_lookup_member(sc, req->ifbr_ifsname, &psref);
1264	if (bif == NULL)
1265		return (ENOENT);
1266
1267	bif->bif_path_cost = req->ifbr_path_cost;
1268
1269	if (sc->sc_if.if_flags & IFF_RUNNING)
1270		bstp_initialization(sc);
1271
1272	bridge_release_member(sc, bif, &psref);
1273
1274	return (0);
1275}
1276
1277/*
1278 * bridge_ifdetach:
1279 *
1280 *	Detach an interface from a bridge.  Called when a member
1281 *	interface is detaching.
1282 */
1283void
1284bridge_ifdetach(struct ifnet *ifp)
1285{
1286	struct bridge_softc *sc = ifp->if_bridge;
1287	struct ifbreq breq;
1288
1289	/* ioctl_lock should prevent this from happening */
1290	KASSERT(sc != NULL);
1291
1292	memset(&breq, 0, sizeof(breq));
1293	strlcpy(breq.ifbr_ifsname, ifp->if_xname, sizeof(breq.ifbr_ifsname));
1294
1295	(void) bridge_ioctl_del(sc, &breq);
1296}
1297
1298/*
1299 * bridge_init:
1300 *
1301 *	Initialize a bridge interface.
1302 */
1303static int
1304bridge_init(struct ifnet *ifp)
1305{
1306	struct bridge_softc *sc = ifp->if_softc;
1307
1308	if (ifp->if_flags & IFF_RUNNING)
1309		return (0);
1310
1311	callout_reset(&sc->sc_brcallout, bridge_rtable_prune_period * hz,
1312	    bridge_timer, sc);
1313
1314	ifp->if_flags |= IFF_RUNNING;
1315	bstp_initialization(sc);
1316	return (0);
1317}
1318
1319/*
1320 * bridge_stop:
1321 *
1322 *	Stop the bridge interface.
1323 */
1324static void
1325bridge_stop(struct ifnet *ifp, int disable)
1326{
1327	struct bridge_softc *sc = ifp->if_softc;
1328
1329	if ((ifp->if_flags & IFF_RUNNING) == 0)
1330		return;
1331
1332	callout_stop(&sc->sc_brcallout);
1333	bstp_stop(sc);
1334
1335	bridge_rtflush(sc, IFBF_FLUSHDYN);
1336
1337	ifp->if_flags &= ~IFF_RUNNING;
1338}
1339
1340/*
1341 * bridge_enqueue:
1342 *
1343 *	Enqueue a packet on a bridge member interface.
1344 */
1345void
1346bridge_enqueue(struct bridge_softc *sc, struct ifnet *dst_ifp, struct mbuf *m,
1347    int runfilt)
1348{
1349	int len, error;
1350	short mflags;
1351
1352	/*
1353	 * Clear any in-bound checksum flags for this packet.
1354	 */
1355	m->m_pkthdr.csum_flags = 0;
1356
1357	if (runfilt) {
1358		if (pfil_run_hooks(sc->sc_if.if_pfil, &m,
1359		    dst_ifp, PFIL_OUT) != 0) {
1360			if (m != NULL)
1361				m_freem(m);
1362			return;
1363		}
1364		if (m == NULL)
1365			return;
1366	}
1367
1368#ifdef ALTQ
1369	/*
1370	 * If ALTQ is enabled on the member interface, do
1371	 * classification; the queueing discipline might
1372	 * not require classification, but might require
1373	 * the address family/header pointer in the pktattr.
1374	 */
1375	if (ALTQ_IS_ENABLED(&dst_ifp->if_snd)) {
1376		/* XXX IFT_ETHER */
1377		altq_etherclassify(&dst_ifp->if_snd, m);
1378	}
1379#endif /* ALTQ */
1380
1381	len = m->m_pkthdr.len;
1382	mflags = m->m_flags;
1383
1384	IFQ_ENQUEUE(&dst_ifp->if_snd, m, error);
1385
1386	if (error) {
1387		/* mbuf is already freed */
1388		sc->sc_if.if_oerrors++;
1389		return;
1390	}
1391
1392	sc->sc_if.if_opackets++;
1393	sc->sc_if.if_obytes += len;
1394
1395	dst_ifp->if_obytes += len;
1396
1397	if (mflags & M_MCAST) {
1398		sc->sc_if.if_omcasts++;
1399		dst_ifp->if_omcasts++;
1400	}
1401
1402	if ((dst_ifp->if_flags & IFF_OACTIVE) == 0)
1403		(*dst_ifp->if_start)(dst_ifp);
1404}
1405
1406/*
1407 * bridge_output:
1408 *
1409 *	Send output from a bridge member interface.  This
1410 *	performs the bridging function for locally originated
1411 *	packets.
1412 *
1413 *	The mbuf has the Ethernet header already attached.  We must
1414 *	enqueue or free the mbuf before returning.
1415 */
1416int
1417bridge_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *sa,
1418    const struct rtentry *rt)
1419{
1420	struct ether_header *eh;
1421	struct ifnet *dst_if;
1422	struct bridge_softc *sc;
1423	int s;
1424
1425	if (m->m_len < ETHER_HDR_LEN) {
1426		m = m_pullup(m, ETHER_HDR_LEN);
1427		if (m == NULL)
1428			return (0);
1429	}
1430
1431	eh = mtod(m, struct ether_header *);
1432	sc = ifp->if_bridge;
1433
1434	/*
1435	 * If bridge is down, but the original output interface is up,
1436	 * go ahead and send out that interface.  Otherwise, the packet
1437	 * is dropped below.
1438	 */
1439	if (__predict_false(sc == NULL) ||
1440	    (sc->sc_if.if_flags & IFF_RUNNING) == 0) {
1441		dst_if = ifp;
1442		goto sendunicast;
1443	}
1444
1445	/*
1446	 * If the packet is a multicast, or we don't know a better way to
1447	 * get there, send to all interfaces.
1448	 */
1449	if (ETHER_IS_MULTICAST(eh->ether_dhost))
1450		dst_if = NULL;
1451	else
1452		dst_if = bridge_rtlookup(sc, eh->ether_dhost);
1453	if (dst_if == NULL) {
1454		struct bridge_iflist *bif;
1455		struct mbuf *mc;
1456		bool used = false;
1457
1458		BRIDGE_PSZ_RENTER(s);
1459		BRIDGE_IFLIST_READER_FOREACH(bif, sc) {
1460			struct psref psref;
1461
1462			bridge_acquire_member(sc, bif, &psref);
1463			BRIDGE_PSZ_REXIT(s);
1464
1465			dst_if = bif->bif_ifp;
1466			if ((dst_if->if_flags & IFF_RUNNING) == 0)
1467				goto next;
1468
1469			/*
1470			 * If this is not the original output interface,
1471			 * and the interface is participating in spanning
1472			 * tree, make sure the port is in a state that
1473			 * allows forwarding.
1474			 */
1475			if (dst_if != ifp &&
1476			    (bif->bif_flags & IFBIF_STP) != 0) {
1477				switch (bif->bif_state) {
1478				case BSTP_IFSTATE_BLOCKING:
1479				case BSTP_IFSTATE_LISTENING:
1480				case BSTP_IFSTATE_DISABLED:
1481					goto next;
1482				}
1483			}
1484
1485			if (PSLIST_READER_NEXT(bif, struct bridge_iflist,
1486			    bif_next) == NULL) {
1487				used = true;
1488				mc = m;
1489			} else {
1490				mc = m_copym(m, 0, M_COPYALL, M_NOWAIT);
1491				if (mc == NULL) {
1492					sc->sc_if.if_oerrors++;
1493					goto next;
1494				}
1495			}
1496
1497#ifndef NET_MPSAFE
1498			s = splnet();
1499#endif
1500			bridge_enqueue(sc, dst_if, mc, 0);
1501#ifndef NET_MPSAFE
1502			splx(s);
1503#endif
1504next:
1505			BRIDGE_PSZ_RENTER(s);
1506			bridge_release_member(sc, bif, &psref);
1507
1508			/* Guarantee we don't re-enter the loop as we already
1509			 * decided we're at the end. */
1510			if (used)
1511				break;
1512		}
1513		BRIDGE_PSZ_REXIT(s);
1514
1515		if (!used)
1516			m_freem(m);
1517		return (0);
1518	}
1519
1520 sendunicast:
1521	/*
1522	 * XXX Spanning tree consideration here?
1523	 */
1524
1525	if ((dst_if->if_flags & IFF_RUNNING) == 0) {
1526		m_freem(m);
1527		return (0);
1528	}
1529
1530#ifndef NET_MPSAFE
1531	s = splnet();
1532#endif
1533	bridge_enqueue(sc, dst_if, m, 0);
1534#ifndef NET_MPSAFE
1535	splx(s);
1536#endif
1537
1538	return (0);
1539}
1540
1541/*
1542 * bridge_start:
1543 *
1544 *	Start output on a bridge.
1545 *
1546 *	NOTE: This routine should never be called in this implementation.
1547 */
1548static void
1549bridge_start(struct ifnet *ifp)
1550{
1551
1552	printf("%s: bridge_start() called\n", ifp->if_xname);
1553}
1554
1555/*
1556 * bridge_forward:
1557 *
1558 *	The forwarding function of the bridge.
1559 */
1560static void
1561bridge_forward(struct bridge_softc *sc, struct mbuf *m)
1562{
1563	struct bridge_iflist *bif;
1564	struct ifnet *src_if, *dst_if;
1565	struct ether_header *eh;
1566	struct psref psref;
1567	DECLARE_LOCK_VARIABLE;
1568
1569	if ((sc->sc_if.if_flags & IFF_RUNNING) == 0)
1570		return;
1571
1572	src_if = m->m_pkthdr.rcvif;
1573
1574	sc->sc_if.if_ipackets++;
1575	sc->sc_if.if_ibytes += m->m_pkthdr.len;
1576
1577	/*
1578	 * Look up the bridge_iflist.
1579	 */
1580	bif = bridge_lookup_member_if(sc, src_if, &psref);
1581	if (bif == NULL) {
1582		/* Interface is not a bridge member (anymore?) */
1583		m_freem(m);
1584		goto out;
1585	}
1586
1587	if (bif->bif_flags & IFBIF_STP) {
1588		switch (bif->bif_state) {
1589		case BSTP_IFSTATE_BLOCKING:
1590		case BSTP_IFSTATE_LISTENING:
1591		case BSTP_IFSTATE_DISABLED:
1592			m_freem(m);
1593			bridge_release_member(sc, bif, &psref);
1594			goto out;
1595		}
1596	}
1597
1598	eh = mtod(m, struct ether_header *);
1599
1600	/*
1601	 * If the interface is learning, and the source
1602	 * address is valid and not multicast, record
1603	 * the address.
1604	 */
1605	if ((bif->bif_flags & IFBIF_LEARNING) != 0 &&
1606	    ETHER_IS_MULTICAST(eh->ether_shost) == 0 &&
1607	    (eh->ether_shost[0] == 0 &&
1608	     eh->ether_shost[1] == 0 &&
1609	     eh->ether_shost[2] == 0 &&
1610	     eh->ether_shost[3] == 0 &&
1611	     eh->ether_shost[4] == 0 &&
1612	     eh->ether_shost[5] == 0) == 0) {
1613		(void) bridge_rtupdate(sc, eh->ether_shost,
1614		    src_if, 0, IFBAF_DYNAMIC);
1615	}
1616
1617	if ((bif->bif_flags & IFBIF_STP) != 0 &&
1618	    bif->bif_state == BSTP_IFSTATE_LEARNING) {
1619		m_freem(m);
1620		bridge_release_member(sc, bif, &psref);
1621		goto out;
1622	}
1623
1624	bridge_release_member(sc, bif, &psref);
1625
1626	/*
1627	 * At this point, the port either doesn't participate
1628	 * in spanning tree or it is in the forwarding state.
1629	 */
1630
1631	/*
1632	 * If the packet is unicast, destined for someone on
1633	 * "this" side of the bridge, drop it.
1634	 */
1635	if ((m->m_flags & (M_BCAST|M_MCAST)) == 0) {
1636		dst_if = bridge_rtlookup(sc, eh->ether_dhost);
1637		if (src_if == dst_if) {
1638			m_freem(m);
1639			goto out;
1640		}
1641	} else {
1642		/* ...forward it to all interfaces. */
1643		sc->sc_if.if_imcasts++;
1644		dst_if = NULL;
1645	}
1646
1647	if (pfil_run_hooks(sc->sc_if.if_pfil, &m,
1648	    m->m_pkthdr.rcvif, PFIL_IN) != 0) {
1649		if (m != NULL)
1650			m_freem(m);
1651		goto out;
1652	}
1653	if (m == NULL)
1654		goto out;
1655
1656	if (dst_if == NULL) {
1657		bridge_broadcast(sc, src_if, m);
1658		goto out;
1659	}
1660
1661	/*
1662	 * At this point, we're dealing with a unicast frame
1663	 * going to a different interface.
1664	 */
1665	if ((dst_if->if_flags & IFF_RUNNING) == 0) {
1666		m_freem(m);
1667		goto out;
1668	}
1669
1670	bif = bridge_lookup_member_if(sc, dst_if, &psref);
1671	if (bif == NULL) {
1672		/* Not a member of the bridge (anymore?) */
1673		m_freem(m);
1674		goto out;
1675	}
1676
1677	if (bif->bif_flags & IFBIF_STP) {
1678		switch (bif->bif_state) {
1679		case BSTP_IFSTATE_DISABLED:
1680		case BSTP_IFSTATE_BLOCKING:
1681			m_freem(m);
1682			bridge_release_member(sc, bif, &psref);
1683			goto out;
1684		}
1685	}
1686
1687	bridge_release_member(sc, bif, &psref);
1688
1689	ACQUIRE_GLOBAL_LOCKS();
1690	bridge_enqueue(sc, dst_if, m, 1);
1691	RELEASE_GLOBAL_LOCKS();
1692out:
1693	/* XXX gcc */
1694	return;
1695}
1696
1697static bool
1698bstp_state_before_learning(struct bridge_iflist *bif)
1699{
1700	if (bif->bif_flags & IFBIF_STP) {
1701		switch (bif->bif_state) {
1702		case BSTP_IFSTATE_BLOCKING:
1703		case BSTP_IFSTATE_LISTENING:
1704		case BSTP_IFSTATE_DISABLED:
1705			return true;
1706		}
1707	}
1708	return false;
1709}
1710
1711static bool
1712bridge_ourether(struct bridge_iflist *bif, struct ether_header *eh, int src)
1713{
1714	uint8_t *ether = src ? eh->ether_shost : eh->ether_dhost;
1715
1716	if (memcmp(CLLADDR(bif->bif_ifp->if_sadl), ether, ETHER_ADDR_LEN) == 0
1717#if NCARP > 0
1718	    || (bif->bif_ifp->if_carp &&
1719	        carp_ourether(bif->bif_ifp->if_carp, eh, IFT_ETHER, src) != NULL)
1720#endif /* NCARP > 0 */
1721	    )
1722		return true;
1723
1724	return false;
1725}
1726
1727/*
1728 * bridge_input:
1729 *
1730 *	Receive input from a member interface.  Queue the packet for
1731 *	bridging if it is not for us.
1732 */
1733static void
1734bridge_input(struct ifnet *ifp, struct mbuf *m)
1735{
1736	struct bridge_softc *sc = ifp->if_bridge;
1737	struct bridge_iflist *bif;
1738	struct ether_header *eh;
1739	struct psref psref;
1740	DECLARE_LOCK_VARIABLE;
1741
1742	KASSERT(!cpu_intr_p());
1743
1744	if (__predict_false(sc == NULL) ||
1745	    (sc->sc_if.if_flags & IFF_RUNNING) == 0) {
1746		ACQUIRE_GLOBAL_LOCKS();
1747		ether_input(ifp, m);
1748		RELEASE_GLOBAL_LOCKS();
1749		return;
1750	}
1751
1752	bif = bridge_lookup_member_if(sc, ifp, &psref);
1753	if (bif == NULL) {
1754		ACQUIRE_GLOBAL_LOCKS();
1755		ether_input(ifp, m);
1756		RELEASE_GLOBAL_LOCKS();
1757		return;
1758	}
1759
1760	eh = mtod(m, struct ether_header *);
1761
1762	if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
1763		if (memcmp(etherbroadcastaddr,
1764		    eh->ether_dhost, ETHER_ADDR_LEN) == 0)
1765			m->m_flags |= M_BCAST;
1766		else
1767			m->m_flags |= M_MCAST;
1768	}
1769
1770	/*
1771	 * A 'fast' path for packets addressed to interfaces that are
1772	 * part of this bridge.
1773	 */
1774	if (!(m->m_flags & (M_BCAST|M_MCAST)) &&
1775	    !bstp_state_before_learning(bif)) {
1776		struct bridge_iflist *_bif;
1777		struct ifnet *_ifp = NULL;
1778		int s;
1779		struct psref _psref;
1780
1781		BRIDGE_PSZ_RENTER(s);
1782		BRIDGE_IFLIST_READER_FOREACH(_bif, sc) {
1783			/* It is destined for us. */
1784			if (bridge_ourether(_bif, eh, 0)) {
1785				bridge_acquire_member(sc, _bif, &_psref);
1786				BRIDGE_PSZ_REXIT(s);
1787				if (_bif->bif_flags & IFBIF_LEARNING)
1788					(void) bridge_rtupdate(sc,
1789					    eh->ether_shost, ifp, 0, IFBAF_DYNAMIC);
1790				_ifp = m->m_pkthdr.rcvif = _bif->bif_ifp;
1791				bridge_release_member(sc, _bif, &_psref);
1792				goto out;
1793			}
1794
1795			/* We just received a packet that we sent out. */
1796			if (bridge_ourether(_bif, eh, 1))
1797				break;
1798		}
1799		BRIDGE_PSZ_REXIT(s);
1800out:
1801
1802		if (_bif != NULL) {
1803			bridge_release_member(sc, bif, &psref);
1804			if (_ifp != NULL) {
1805				m->m_flags &= ~M_PROMISC;
1806				ACQUIRE_GLOBAL_LOCKS();
1807				ether_input(_ifp, m);
1808				RELEASE_GLOBAL_LOCKS();
1809			} else
1810				m_freem(m);
1811			return;
1812		}
1813	}
1814
1815	/* Tap off 802.1D packets; they do not get forwarded. */
1816	if (bif->bif_flags & IFBIF_STP &&
1817	    memcmp(eh->ether_dhost, bstp_etheraddr, ETHER_ADDR_LEN) == 0) {
1818		bstp_input(sc, bif, m);
1819		bridge_release_member(sc, bif, &psref);
1820		return;
1821	}
1822
1823	/*
1824	 * A normal switch would discard the packet here, but that's not what
1825	 * we've done historically. This also prevents some obnoxious behaviour.
1826	 */
1827	if (bstp_state_before_learning(bif)) {
1828		bridge_release_member(sc, bif, &psref);
1829		ACQUIRE_GLOBAL_LOCKS();
1830		ether_input(ifp, m);
1831		RELEASE_GLOBAL_LOCKS();
1832		return;
1833	}
1834
1835	bridge_release_member(sc, bif, &psref);
1836
1837	bridge_forward(sc, m);
1838}
1839
1840/*
1841 * bridge_broadcast:
1842 *
1843 *	Send a frame to all interfaces that are members of
1844 *	the bridge, except for the one on which the packet
1845 *	arrived.
1846 */
1847static void
1848bridge_broadcast(struct bridge_softc *sc, struct ifnet *src_if,
1849    struct mbuf *m)
1850{
1851	struct bridge_iflist *bif;
1852	struct mbuf *mc;
1853	struct ifnet *dst_if;
1854	bool bmcast;
1855	int s;
1856	DECLARE_LOCK_VARIABLE;
1857
1858	bmcast = m->m_flags & (M_BCAST|M_MCAST);
1859
1860	BRIDGE_PSZ_RENTER(s);
1861	BRIDGE_IFLIST_READER_FOREACH(bif, sc) {
1862		struct psref psref;
1863
1864		bridge_acquire_member(sc, bif, &psref);
1865		BRIDGE_PSZ_REXIT(s);
1866
1867		dst_if = bif->bif_ifp;
1868
1869		if (bif->bif_flags & IFBIF_STP) {
1870			switch (bif->bif_state) {
1871			case BSTP_IFSTATE_BLOCKING:
1872			case BSTP_IFSTATE_DISABLED:
1873				goto next;
1874			}
1875		}
1876
1877		if ((bif->bif_flags & IFBIF_DISCOVER) == 0 && !bmcast)
1878			goto next;
1879
1880		if ((dst_if->if_flags & IFF_RUNNING) == 0)
1881			goto next;
1882
1883		if (dst_if != src_if) {
1884			mc = m_copym(m, 0, M_COPYALL, M_DONTWAIT);
1885			if (mc == NULL) {
1886				sc->sc_if.if_oerrors++;
1887				goto next;
1888			}
1889			ACQUIRE_GLOBAL_LOCKS();
1890			bridge_enqueue(sc, dst_if, mc, 1);
1891			RELEASE_GLOBAL_LOCKS();
1892		}
1893
1894		if (bmcast) {
1895			mc = m_copym(m, 0, M_COPYALL, M_DONTWAIT);
1896			if (mc == NULL) {
1897				sc->sc_if.if_oerrors++;
1898				goto next;
1899			}
1900
1901			mc->m_pkthdr.rcvif = dst_if;
1902			mc->m_flags &= ~M_PROMISC;
1903
1904			ACQUIRE_GLOBAL_LOCKS();
1905			ether_input(dst_if, mc);
1906			RELEASE_GLOBAL_LOCKS();
1907		}
1908next:
1909		BRIDGE_PSZ_RENTER(s);
1910		bridge_release_member(sc, bif, &psref);
1911	}
1912	BRIDGE_PSZ_REXIT(s);
1913
1914	m_freem(m);
1915}
1916
1917static int
1918bridge_rtalloc(struct bridge_softc *sc, const uint8_t *dst,
1919    struct bridge_rtnode **brtp)
1920{
1921	struct bridge_rtnode *brt;
1922	int error;
1923
1924	if (sc->sc_brtcnt >= sc->sc_brtmax)
1925		return ENOSPC;
1926
1927	/*
1928	 * Allocate a new bridge forwarding node, and
1929	 * initialize the expiration time and Ethernet
1930	 * address.
1931	 */
1932	brt = pool_get(&bridge_rtnode_pool, PR_NOWAIT);
1933	if (brt == NULL)
1934		return ENOMEM;
1935
1936	memset(brt, 0, sizeof(*brt));
1937	brt->brt_expire = time_uptime + sc->sc_brttimeout;
1938	brt->brt_flags = IFBAF_DYNAMIC;
1939	memcpy(brt->brt_addr, dst, ETHER_ADDR_LEN);
1940
1941	BRIDGE_RT_LOCK(sc);
1942	error = bridge_rtnode_insert(sc, brt);
1943	BRIDGE_RT_UNLOCK(sc);
1944
1945	if (error != 0) {
1946		pool_put(&bridge_rtnode_pool, brt);
1947		return error;
1948	}
1949
1950	*brtp = brt;
1951	return 0;
1952}
1953
1954/*
1955 * bridge_rtupdate:
1956 *
1957 *	Add a bridge routing entry.
1958 */
1959static int
1960bridge_rtupdate(struct bridge_softc *sc, const uint8_t *dst,
1961    struct ifnet *dst_if, int setflags, uint8_t flags)
1962{
1963	struct bridge_rtnode *brt;
1964	int s;
1965
1966again:
1967	/*
1968	 * A route for this destination might already exist.  If so,
1969	 * update it, otherwise create a new one.
1970	 */
1971	BRIDGE_RT_RENTER(s);
1972	brt = bridge_rtnode_lookup(sc, dst);
1973
1974	if (brt != NULL) {
1975		brt->brt_ifp = dst_if;
1976		if (setflags) {
1977			brt->brt_flags = flags;
1978			if (flags & IFBAF_STATIC)
1979				brt->brt_expire = 0;
1980			else
1981				brt->brt_expire = time_uptime + sc->sc_brttimeout;
1982		} else {
1983			if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC)
1984				brt->brt_expire = time_uptime + sc->sc_brttimeout;
1985		}
1986	}
1987	BRIDGE_RT_REXIT(s);
1988
1989	if (brt == NULL) {
1990		int r;
1991
1992		r = bridge_rtalloc(sc, dst, &brt);
1993		if (r != 0)
1994			return r;
1995		goto again;
1996	}
1997
1998	return 0;
1999}
2000
2001/*
2002 * bridge_rtlookup:
2003 *
2004 *	Lookup the destination interface for an address.
2005 */
2006static struct ifnet *
2007bridge_rtlookup(struct bridge_softc *sc, const uint8_t *addr)
2008{
2009	struct bridge_rtnode *brt;
2010	struct ifnet *ifs = NULL;
2011	int s;
2012
2013	BRIDGE_RT_RENTER(s);
2014	brt = bridge_rtnode_lookup(sc, addr);
2015	if (brt != NULL)
2016		ifs = brt->brt_ifp;
2017	BRIDGE_RT_REXIT(s);
2018
2019	return ifs;
2020}
2021
2022typedef bool (*bridge_iterate_cb_t)
2023    (struct bridge_softc *, struct bridge_rtnode *, bool *, void *);
2024
2025/*
2026 * bridge_rtlist_iterate_remove:
2027 *
2028 *	It iterates on sc->sc_rtlist and removes rtnodes of it which func
2029 *	callback judges to remove. Removals of rtnodes are done in a manner
2030 *	of pserialize. To this end, all kmem_* operations are placed out of
2031 *	mutexes.
2032 */
2033static void
2034bridge_rtlist_iterate_remove(struct bridge_softc *sc, bridge_iterate_cb_t func, void *arg)
2035{
2036	struct bridge_rtnode *brt, *nbrt;
2037	struct bridge_rtnode **brt_list;
2038	int i, count;
2039
2040retry:
2041	count = sc->sc_brtcnt;
2042	if (count == 0)
2043		return;
2044	brt_list = kmem_alloc(sizeof(*brt_list) * count, KM_SLEEP);
2045
2046	BRIDGE_RT_LOCK(sc);
2047	if (__predict_false(sc->sc_brtcnt > count)) {
2048		/* The rtnodes increased, we need more memory */
2049		BRIDGE_RT_UNLOCK(sc);
2050		kmem_free(brt_list, sizeof(*brt_list) * count);
2051		goto retry;
2052	}
2053
2054	i = 0;
2055	LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
2056		bool need_break = false;
2057		if (func(sc, brt, &need_break, arg)) {
2058			bridge_rtnode_remove(sc, brt);
2059			brt_list[i++] = brt;
2060		}
2061		if (need_break)
2062			break;
2063	}
2064
2065	if (i > 0)
2066		BRIDGE_RT_PSZ_PERFORM(sc);
2067	BRIDGE_RT_UNLOCK(sc);
2068
2069	while (--i >= 0)
2070		bridge_rtnode_destroy(brt_list[i]);
2071
2072	kmem_free(brt_list, sizeof(*brt_list) * count);
2073}
2074
2075static bool
2076bridge_rttrim0_cb(struct bridge_softc *sc, struct bridge_rtnode *brt,
2077    bool *need_break, void *arg)
2078{
2079	if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
2080		/* Take into account of the subsequent removal */
2081		if ((sc->sc_brtcnt - 1) <= sc->sc_brtmax)
2082			*need_break = true;
2083		return true;
2084	} else
2085		return false;
2086}
2087
2088static void
2089bridge_rttrim0(struct bridge_softc *sc)
2090{
2091	bridge_rtlist_iterate_remove(sc, bridge_rttrim0_cb, NULL);
2092}
2093
2094/*
2095 * bridge_rttrim:
2096 *
2097 *	Trim the routine table so that we have a number
2098 *	of routing entries less than or equal to the
2099 *	maximum number.
2100 */
2101static void
2102bridge_rttrim(struct bridge_softc *sc)
2103{
2104
2105	/* Make sure we actually need to do this. */
2106	if (sc->sc_brtcnt <= sc->sc_brtmax)
2107		return;
2108
2109	/* Force an aging cycle; this might trim enough addresses. */
2110	bridge_rtage(sc);
2111	if (sc->sc_brtcnt <= sc->sc_brtmax)
2112		return;
2113
2114	bridge_rttrim0(sc);
2115
2116	return;
2117}
2118
2119/*
2120 * bridge_timer:
2121 *
2122 *	Aging timer for the bridge.
2123 */
2124static void
2125bridge_timer(void *arg)
2126{
2127	struct bridge_softc *sc = arg;
2128
2129	workqueue_enqueue(sc->sc_rtage_wq, &sc->sc_rtage_wk, NULL);
2130}
2131
2132static void
2133bridge_rtage_work(struct work *wk, void *arg)
2134{
2135	struct bridge_softc *sc = arg;
2136
2137	KASSERT(wk == &sc->sc_rtage_wk);
2138
2139	bridge_rtage(sc);
2140
2141	if (sc->sc_if.if_flags & IFF_RUNNING)
2142		callout_reset(&sc->sc_brcallout,
2143		    bridge_rtable_prune_period * hz, bridge_timer, sc);
2144}
2145
2146static bool
2147bridge_rtage_cb(struct bridge_softc *sc, struct bridge_rtnode *brt,
2148    bool *need_break, void *arg)
2149{
2150	if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC &&
2151	    time_uptime >= brt->brt_expire)
2152		return true;
2153	else
2154		return false;
2155}
2156
2157/*
2158 * bridge_rtage:
2159 *
2160 *	Perform an aging cycle.
2161 */
2162static void
2163bridge_rtage(struct bridge_softc *sc)
2164{
2165	bridge_rtlist_iterate_remove(sc, bridge_rtage_cb, NULL);
2166}
2167
2168
2169static bool
2170bridge_rtflush_cb(struct bridge_softc *sc, struct bridge_rtnode *brt,
2171    bool *need_break, void *arg)
2172{
2173	int full = *(int*)arg;
2174
2175	if (full || (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC)
2176		return true;
2177	else
2178		return false;
2179}
2180
2181/*
2182 * bridge_rtflush:
2183 *
2184 *	Remove all dynamic addresses from the bridge.
2185 */
2186static void
2187bridge_rtflush(struct bridge_softc *sc, int full)
2188{
2189	bridge_rtlist_iterate_remove(sc, bridge_rtflush_cb, &full);
2190}
2191
2192/*
2193 * bridge_rtdaddr:
2194 *
2195 *	Remove an address from the table.
2196 */
2197static int
2198bridge_rtdaddr(struct bridge_softc *sc, const uint8_t *addr)
2199{
2200	struct bridge_rtnode *brt;
2201
2202	BRIDGE_RT_LOCK(sc);
2203	if ((brt = bridge_rtnode_lookup(sc, addr)) == NULL) {
2204		BRIDGE_RT_UNLOCK(sc);
2205		return ENOENT;
2206	}
2207	bridge_rtnode_remove(sc, brt);
2208	BRIDGE_RT_PSZ_PERFORM(sc);
2209	BRIDGE_RT_UNLOCK(sc);
2210
2211	bridge_rtnode_destroy(brt);
2212
2213	return 0;
2214}
2215
2216/*
2217 * bridge_rtdelete:
2218 *
2219 *	Delete routes to a speicifc member interface.
2220 */
2221static void
2222bridge_rtdelete(struct bridge_softc *sc, struct ifnet *ifp)
2223{
2224	struct bridge_rtnode *brt;
2225
2226	BRIDGE_RT_LOCK(sc);
2227	LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) {
2228		if (brt->brt_ifp == ifp)
2229			break;
2230	}
2231	if (brt == NULL) {
2232		BRIDGE_RT_UNLOCK(sc);
2233		return;
2234	}
2235	bridge_rtnode_remove(sc, brt);
2236	BRIDGE_RT_PSZ_PERFORM(sc);
2237	BRIDGE_RT_UNLOCK(sc);
2238
2239	bridge_rtnode_destroy(brt);
2240}
2241
2242/*
2243 * bridge_rtable_init:
2244 *
2245 *	Initialize the route table for this bridge.
2246 */
2247static void
2248bridge_rtable_init(struct bridge_softc *sc)
2249{
2250	int i;
2251
2252	sc->sc_rthash = kmem_alloc(sizeof(*sc->sc_rthash) * BRIDGE_RTHASH_SIZE,
2253	    KM_SLEEP);
2254
2255	for (i = 0; i < BRIDGE_RTHASH_SIZE; i++)
2256		LIST_INIT(&sc->sc_rthash[i]);
2257
2258	sc->sc_rthash_key = cprng_fast32();
2259
2260	LIST_INIT(&sc->sc_rtlist);
2261
2262	sc->sc_rtlist_psz = pserialize_create();
2263	sc->sc_rtlist_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_SOFTNET);
2264}
2265
2266/*
2267 * bridge_rtable_fini:
2268 *
2269 *	Deconstruct the route table for this bridge.
2270 */
2271static void
2272bridge_rtable_fini(struct bridge_softc *sc)
2273{
2274
2275	kmem_free(sc->sc_rthash, sizeof(*sc->sc_rthash) * BRIDGE_RTHASH_SIZE);
2276	if (sc->sc_rtlist_lock)
2277		mutex_obj_free(sc->sc_rtlist_lock);
2278	if (sc->sc_rtlist_psz)
2279		pserialize_destroy(sc->sc_rtlist_psz);
2280}
2281
2282/*
2283 * The following hash function is adapted from "Hash Functions" by Bob Jenkins
2284 * ("Algorithm Alley", Dr. Dobbs Journal, September 1997).
2285 */
2286#define	mix(a, b, c)							\
2287do {									\
2288	a -= b; a -= c; a ^= (c >> 13);					\
2289	b -= c; b -= a; b ^= (a << 8);					\
2290	c -= a; c -= b; c ^= (b >> 13);					\
2291	a -= b; a -= c; a ^= (c >> 12);					\
2292	b -= c; b -= a; b ^= (a << 16);					\
2293	c -= a; c -= b; c ^= (b >> 5);					\
2294	a -= b; a -= c; a ^= (c >> 3);					\
2295	b -= c; b -= a; b ^= (a << 10);					\
2296	c -= a; c -= b; c ^= (b >> 15);					\
2297} while (/*CONSTCOND*/0)
2298
2299static inline uint32_t
2300bridge_rthash(struct bridge_softc *sc, const uint8_t *addr)
2301{
2302	uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = sc->sc_rthash_key;
2303
2304	b += addr[5] << 8;
2305	b += addr[4];
2306	a += addr[3] << 24;
2307	a += addr[2] << 16;
2308	a += addr[1] << 8;
2309	a += addr[0];
2310
2311	mix(a, b, c);
2312
2313	return (c & BRIDGE_RTHASH_MASK);
2314}
2315
2316#undef mix
2317
2318/*
2319 * bridge_rtnode_lookup:
2320 *
2321 *	Look up a bridge route node for the specified destination.
2322 */
2323static struct bridge_rtnode *
2324bridge_rtnode_lookup(struct bridge_softc *sc, const uint8_t *addr)
2325{
2326	struct bridge_rtnode *brt;
2327	uint32_t hash;
2328	int dir;
2329
2330	hash = bridge_rthash(sc, addr);
2331	LIST_FOREACH(brt, &sc->sc_rthash[hash], brt_hash) {
2332		dir = memcmp(addr, brt->brt_addr, ETHER_ADDR_LEN);
2333		if (dir == 0)
2334			return (brt);
2335		if (dir > 0)
2336			return (NULL);
2337	}
2338
2339	return (NULL);
2340}
2341
2342/*
2343 * bridge_rtnode_insert:
2344 *
2345 *	Insert the specified bridge node into the route table.  We
2346 *	assume the entry is not already in the table.
2347 */
2348static int
2349bridge_rtnode_insert(struct bridge_softc *sc, struct bridge_rtnode *brt)
2350{
2351	struct bridge_rtnode *lbrt;
2352	uint32_t hash;
2353	int dir;
2354
2355	KASSERT(BRIDGE_RT_LOCKED(sc));
2356
2357	hash = bridge_rthash(sc, brt->brt_addr);
2358
2359	lbrt = LIST_FIRST(&sc->sc_rthash[hash]);
2360	if (lbrt == NULL) {
2361		LIST_INSERT_HEAD(&sc->sc_rthash[hash], brt, brt_hash);
2362		goto out;
2363	}
2364
2365	do {
2366		dir = memcmp(brt->brt_addr, lbrt->brt_addr, ETHER_ADDR_LEN);
2367		if (dir == 0)
2368			return (EEXIST);
2369		if (dir > 0) {
2370			LIST_INSERT_BEFORE(lbrt, brt, brt_hash);
2371			goto out;
2372		}
2373		if (LIST_NEXT(lbrt, brt_hash) == NULL) {
2374			LIST_INSERT_AFTER(lbrt, brt, brt_hash);
2375			goto out;
2376		}
2377		lbrt = LIST_NEXT(lbrt, brt_hash);
2378	} while (lbrt != NULL);
2379
2380#ifdef DIAGNOSTIC
2381	panic("bridge_rtnode_insert: impossible");
2382#endif
2383
2384 out:
2385	LIST_INSERT_HEAD(&sc->sc_rtlist, brt, brt_list);
2386	sc->sc_brtcnt++;
2387
2388	return (0);
2389}
2390
2391/*
2392 * bridge_rtnode_remove:
2393 *
2394 *	Remove a bridge rtnode from the rthash and the rtlist of a bridge.
2395 */
2396static void
2397bridge_rtnode_remove(struct bridge_softc *sc, struct bridge_rtnode *brt)
2398{
2399
2400	KASSERT(BRIDGE_RT_LOCKED(sc));
2401
2402	LIST_REMOVE(brt, brt_hash);
2403	LIST_REMOVE(brt, brt_list);
2404	sc->sc_brtcnt--;
2405}
2406
2407/*
2408 * bridge_rtnode_destroy:
2409 *
2410 *	Destroy a bridge rtnode.
2411 */
2412static void
2413bridge_rtnode_destroy(struct bridge_rtnode *brt)
2414{
2415
2416	pool_put(&bridge_rtnode_pool, brt);
2417}
2418
2419#if defined(BRIDGE_IPF)
2420extern pfil_head_t *inet_pfil_hook;                 /* XXX */
2421extern pfil_head_t *inet6_pfil_hook;                /* XXX */
2422
2423/*
2424 * Send bridge packets through IPF if they are one of the types IPF can deal
2425 * with, or if they are ARP or REVARP.  (IPF will pass ARP and REVARP without
2426 * question.)
2427 */
2428static int
2429bridge_ipf(void *arg, struct mbuf **mp, struct ifnet *ifp, int dir)
2430{
2431	int snap, error;
2432	struct ether_header *eh1, eh2;
2433	struct llc llc1;
2434	uint16_t ether_type;
2435
2436	snap = 0;
2437	error = -1;	/* Default error if not error == 0 */
2438	eh1 = mtod(*mp, struct ether_header *);
2439	ether_type = ntohs(eh1->ether_type);
2440
2441	/*
2442	 * Check for SNAP/LLC.
2443	 */
2444        if (ether_type < ETHERMTU) {
2445                struct llc *llc2 = (struct llc *)(eh1 + 1);
2446
2447                if ((*mp)->m_len >= ETHER_HDR_LEN + 8 &&
2448                    llc2->llc_dsap == LLC_SNAP_LSAP &&
2449                    llc2->llc_ssap == LLC_SNAP_LSAP &&
2450                    llc2->llc_control == LLC_UI) {
2451                	ether_type = htons(llc2->llc_un.type_snap.ether_type);
2452			snap = 1;
2453                }
2454        }
2455
2456	/*
2457	 * If we're trying to filter bridge traffic, don't look at anything
2458	 * other than IP and ARP traffic.  If the filter doesn't understand
2459	 * IPv6, don't allow IPv6 through the bridge either.  This is lame
2460	 * since if we really wanted, say, an AppleTalk filter, we are hosed,
2461	 * but of course we don't have an AppleTalk filter to begin with.
2462	 * (Note that since IPF doesn't understand ARP it will pass *ALL*
2463	 * ARP traffic.)
2464	 */
2465	switch (ether_type) {
2466		case ETHERTYPE_ARP:
2467		case ETHERTYPE_REVARP:
2468			return 0; /* Automatically pass */
2469		case ETHERTYPE_IP:
2470# ifdef INET6
2471		case ETHERTYPE_IPV6:
2472# endif /* INET6 */
2473			break;
2474		default:
2475			goto bad;
2476	}
2477
2478	/* Strip off the Ethernet header and keep a copy. */
2479	m_copydata(*mp, 0, ETHER_HDR_LEN, (void *) &eh2);
2480	m_adj(*mp, ETHER_HDR_LEN);
2481
2482	/* Strip off snap header, if present */
2483	if (snap) {
2484		m_copydata(*mp, 0, sizeof(struct llc), (void *) &llc1);
2485		m_adj(*mp, sizeof(struct llc));
2486	}
2487
2488	/*
2489	 * Check basic packet sanity and run IPF through pfil.
2490	 */
2491	KASSERT(!cpu_intr_p());
2492	switch (ether_type)
2493	{
2494	case ETHERTYPE_IP :
2495		error = (dir == PFIL_IN) ? bridge_ip_checkbasic(mp) : 0;
2496		if (error == 0)
2497			error = pfil_run_hooks(inet_pfil_hook, mp, ifp, dir);
2498		break;
2499# ifdef INET6
2500	case ETHERTYPE_IPV6 :
2501		error = (dir == PFIL_IN) ? bridge_ip6_checkbasic(mp) : 0;
2502		if (error == 0)
2503			error = pfil_run_hooks(inet6_pfil_hook, mp, ifp, dir);
2504		break;
2505# endif
2506	default :
2507		error = 0;
2508		break;
2509	}
2510
2511	if (*mp == NULL)
2512		return error;
2513	if (error != 0)
2514		goto bad;
2515
2516	error = -1;
2517
2518	/*
2519	 * Finally, put everything back the way it was and return
2520	 */
2521	if (snap) {
2522		M_PREPEND(*mp, sizeof(struct llc), M_DONTWAIT);
2523		if (*mp == NULL)
2524			return error;
2525		bcopy(&llc1, mtod(*mp, void *), sizeof(struct llc));
2526	}
2527
2528	M_PREPEND(*mp, ETHER_HDR_LEN, M_DONTWAIT);
2529	if (*mp == NULL)
2530		return error;
2531	bcopy(&eh2, mtod(*mp, void *), ETHER_HDR_LEN);
2532
2533	return 0;
2534
2535    bad:
2536	m_freem(*mp);
2537	*mp = NULL;
2538	return error;
2539}
2540
2541/*
2542 * Perform basic checks on header size since
2543 * IPF assumes ip_input has already processed
2544 * it for it.  Cut-and-pasted from ip_input.c.
2545 * Given how simple the IPv6 version is,
2546 * does the IPv4 version really need to be
2547 * this complicated?
2548 *
2549 * XXX Should we update ipstat here, or not?
2550 * XXX Right now we update ipstat but not
2551 * XXX csum_counter.
2552 */
2553static int
2554bridge_ip_checkbasic(struct mbuf **mp)
2555{
2556	struct mbuf *m = *mp;
2557	struct ip *ip;
2558	int len, hlen;
2559
2560	if (*mp == NULL)
2561		return -1;
2562
2563	if (IP_HDR_ALIGNED_P(mtod(m, void *)) == 0) {
2564		if ((m = m_copyup(m, sizeof(struct ip),
2565			(max_linkhdr + 3) & ~3)) == NULL) {
2566			/* XXXJRT new stat, please */
2567			ip_statinc(IP_STAT_TOOSMALL);
2568			goto bad;
2569		}
2570	} else if (__predict_false(m->m_len < sizeof (struct ip))) {
2571		if ((m = m_pullup(m, sizeof (struct ip))) == NULL) {
2572			ip_statinc(IP_STAT_TOOSMALL);
2573			goto bad;
2574		}
2575	}
2576	ip = mtod(m, struct ip *);
2577	if (ip == NULL) goto bad;
2578
2579	if (ip->ip_v != IPVERSION) {
2580		ip_statinc(IP_STAT_BADVERS);
2581		goto bad;
2582	}
2583	hlen = ip->ip_hl << 2;
2584	if (hlen < sizeof(struct ip)) { /* minimum header length */
2585		ip_statinc(IP_STAT_BADHLEN);
2586		goto bad;
2587	}
2588	if (hlen > m->m_len) {
2589		if ((m = m_pullup(m, hlen)) == 0) {
2590			ip_statinc(IP_STAT_BADHLEN);
2591			goto bad;
2592		}
2593		ip = mtod(m, struct ip *);
2594		if (ip == NULL) goto bad;
2595	}
2596
2597        switch (m->m_pkthdr.csum_flags &
2598                ((m->m_pkthdr.rcvif->if_csum_flags_rx & M_CSUM_IPv4) |
2599                 M_CSUM_IPv4_BAD)) {
2600        case M_CSUM_IPv4|M_CSUM_IPv4_BAD:
2601                /* INET_CSUM_COUNTER_INCR(&ip_hwcsum_bad); */
2602                goto bad;
2603
2604        case M_CSUM_IPv4:
2605                /* Checksum was okay. */
2606                /* INET_CSUM_COUNTER_INCR(&ip_hwcsum_ok); */
2607                break;
2608
2609        default:
2610                /* Must compute it ourselves. */
2611                /* INET_CSUM_COUNTER_INCR(&ip_swcsum); */
2612                if (in_cksum(m, hlen) != 0)
2613                        goto bad;
2614                break;
2615        }
2616
2617        /* Retrieve the packet length. */
2618        len = ntohs(ip->ip_len);
2619
2620        /*
2621         * Check for additional length bogosity
2622         */
2623        if (len < hlen) {
2624		ip_statinc(IP_STAT_BADLEN);
2625                goto bad;
2626        }
2627
2628        /*
2629         * Check that the amount of data in the buffers
2630         * is as at least much as the IP header would have us expect.
2631         * Drop packet if shorter than we expect.
2632         */
2633        if (m->m_pkthdr.len < len) {
2634		ip_statinc(IP_STAT_TOOSHORT);
2635                goto bad;
2636        }
2637
2638	/* Checks out, proceed */
2639	*mp = m;
2640	return 0;
2641
2642    bad:
2643	*mp = m;
2644	return -1;
2645}
2646
2647# ifdef INET6
2648/*
2649 * Same as above, but for IPv6.
2650 * Cut-and-pasted from ip6_input.c.
2651 * XXX Should we update ip6stat, or not?
2652 */
2653static int
2654bridge_ip6_checkbasic(struct mbuf **mp)
2655{
2656	struct mbuf *m = *mp;
2657	struct ip6_hdr *ip6;
2658
2659        /*
2660         * If the IPv6 header is not aligned, slurp it up into a new
2661         * mbuf with space for link headers, in the event we forward
2662         * it.  Otherwise, if it is aligned, make sure the entire base
2663         * IPv6 header is in the first mbuf of the chain.
2664         */
2665        if (IP6_HDR_ALIGNED_P(mtod(m, void *)) == 0) {
2666                struct ifnet *inifp = m->m_pkthdr.rcvif;
2667                if ((m = m_copyup(m, sizeof(struct ip6_hdr),
2668                                  (max_linkhdr + 3) & ~3)) == NULL) {
2669                        /* XXXJRT new stat, please */
2670			ip6_statinc(IP6_STAT_TOOSMALL);
2671                        in6_ifstat_inc(inifp, ifs6_in_hdrerr);
2672                        goto bad;
2673                }
2674        } else if (__predict_false(m->m_len < sizeof(struct ip6_hdr))) {
2675                struct ifnet *inifp = m->m_pkthdr.rcvif;
2676                if ((m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) {
2677			ip6_statinc(IP6_STAT_TOOSMALL);
2678                        in6_ifstat_inc(inifp, ifs6_in_hdrerr);
2679                        goto bad;
2680                }
2681        }
2682
2683        ip6 = mtod(m, struct ip6_hdr *);
2684
2685        if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
2686		ip6_statinc(IP6_STAT_BADVERS);
2687                in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_hdrerr);
2688                goto bad;
2689        }
2690
2691	/* Checks out, proceed */
2692	*mp = m;
2693	return 0;
2694
2695    bad:
2696	*mp = m;
2697	return -1;
2698}
2699# endif /* INET6 */
2700#endif /* BRIDGE_IPF */
2701