if_bridge.c revision 1.162
1/*	$NetBSD: if_bridge.c,v 1.162 2018/12/14 12:27:22 martin Exp $	*/
2
3/*
4 * Copyright 2001 Wasabi Systems, Inc.
5 * All rights reserved.
6 *
7 * Written by Jason R. Thorpe for Wasabi Systems, Inc.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 *    must display the following acknowledgement:
19 *	This product includes software developed for the NetBSD Project by
20 *	Wasabi Systems, Inc.
21 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
22 *    or promote products derived from this software without specific prior
23 *    written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38/*
39 * Copyright (c) 1999, 2000 Jason L. Wright (jason@thought.net)
40 * All rights reserved.
41 *
42 * Redistribution and use in source and binary forms, with or without
43 * modification, are permitted provided that the following conditions
44 * are met:
45 * 1. Redistributions of source code must retain the above copyright
46 *    notice, this list of conditions and the following disclaimer.
47 * 2. Redistributions in binary form must reproduce the above copyright
48 *    notice, this list of conditions and the following disclaimer in the
49 *    documentation and/or other materials provided with the distribution.
50 * 3. All advertising materials mentioning features or use of this software
51 *    must display the following acknowledgement:
52 *	This product includes software developed by Jason L. Wright
53 * 4. The name of the author may not be used to endorse or promote products
54 *    derived from this software without specific prior written permission.
55 *
56 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
57 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
58 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
59 * DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
60 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
61 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
62 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
63 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
64 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
65 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
66 * POSSIBILITY OF SUCH DAMAGE.
67 *
68 * OpenBSD: if_bridge.c,v 1.60 2001/06/15 03:38:33 itojun Exp
69 */
70
71/*
72 * Network interface bridge support.
73 *
74 * TODO:
75 *
76 *	- Currently only supports Ethernet-like interfaces (Ethernet,
77 *	  802.11, VLANs on Ethernet, etc.)  Figure out a nice way
78 *	  to bridge other types of interfaces (FDDI-FDDI, and maybe
79 *	  consider heterogenous bridges).
80 */
81
82#include <sys/cdefs.h>
83__KERNEL_RCSID(0, "$NetBSD: if_bridge.c,v 1.162 2018/12/14 12:27:22 martin Exp $");
84
85#ifdef _KERNEL_OPT
86#include "opt_bridge_ipf.h"
87#include "opt_inet.h"
88#include "opt_net_mpsafe.h"
89#endif /* _KERNEL_OPT */
90
91#include <sys/param.h>
92#include <sys/kernel.h>
93#include <sys/mbuf.h>
94#include <sys/queue.h>
95#include <sys/socket.h>
96#include <sys/socketvar.h> /* for softnet_lock */
97#include <sys/sockio.h>
98#include <sys/systm.h>
99#include <sys/proc.h>
100#include <sys/pool.h>
101#include <sys/kauth.h>
102#include <sys/cpu.h>
103#include <sys/cprng.h>
104#include <sys/mutex.h>
105#include <sys/kmem.h>
106
107#include <net/bpf.h>
108#include <net/if.h>
109#include <net/if_dl.h>
110#include <net/if_types.h>
111#include <net/if_llc.h>
112
113#include <net/if_ether.h>
114#include <net/if_bridgevar.h>
115#include <net/ether_sw_offload.h>
116
117#if defined(BRIDGE_IPF)
118/* Used for bridge_ip[6]_checkbasic */
119#include <netinet/in.h>
120#include <netinet/in_systm.h>
121#include <netinet/ip.h>
122#include <netinet/ip_var.h>
123#include <netinet/ip_private.h>		/* XXX */
124
125#include <netinet/ip6.h>
126#include <netinet6/in6_var.h>
127#include <netinet6/ip6_var.h>
128#include <netinet6/ip6_private.h>	/* XXX */
129#endif /* BRIDGE_IPF */
130
131/*
132 * Size of the route hash table.  Must be a power of two.
133 */
134#ifndef BRIDGE_RTHASH_SIZE
135#define	BRIDGE_RTHASH_SIZE		1024
136#endif
137
138#define	BRIDGE_RTHASH_MASK		(BRIDGE_RTHASH_SIZE - 1)
139
140#include "carp.h"
141#if NCARP > 0
142#include <netinet/in.h>
143#include <netinet/in_var.h>
144#include <netinet/ip_carp.h>
145#endif
146
147#include "ioconf.h"
148
149__CTASSERT(sizeof(struct ifbifconf) == sizeof(struct ifbaconf));
150__CTASSERT(offsetof(struct ifbifconf, ifbic_len) == offsetof(struct ifbaconf, ifbac_len));
151__CTASSERT(offsetof(struct ifbifconf, ifbic_buf) == offsetof(struct ifbaconf, ifbac_buf));
152
153/*
154 * Maximum number of addresses to cache.
155 */
156#ifndef BRIDGE_RTABLE_MAX
157#define	BRIDGE_RTABLE_MAX		100
158#endif
159
160/*
161 * Spanning tree defaults.
162 */
163#define	BSTP_DEFAULT_MAX_AGE		(20 * 256)
164#define	BSTP_DEFAULT_HELLO_TIME		(2 * 256)
165#define	BSTP_DEFAULT_FORWARD_DELAY	(15 * 256)
166#define	BSTP_DEFAULT_HOLD_TIME		(1 * 256)
167#define	BSTP_DEFAULT_BRIDGE_PRIORITY	0x8000
168#define	BSTP_DEFAULT_PORT_PRIORITY	0x80
169#define	BSTP_DEFAULT_PATH_COST		55
170
171/*
172 * Timeout (in seconds) for entries learned dynamically.
173 */
174#ifndef BRIDGE_RTABLE_TIMEOUT
175#define	BRIDGE_RTABLE_TIMEOUT		(20 * 60)	/* same as ARP */
176#endif
177
178/*
179 * Number of seconds between walks of the route list.
180 */
181#ifndef BRIDGE_RTABLE_PRUNE_PERIOD
182#define	BRIDGE_RTABLE_PRUNE_PERIOD	(5 * 60)
183#endif
184
185#define BRIDGE_RT_LOCK(_sc)	mutex_enter((_sc)->sc_rtlist_lock)
186#define BRIDGE_RT_UNLOCK(_sc)	mutex_exit((_sc)->sc_rtlist_lock)
187#define BRIDGE_RT_LOCKED(_sc)	mutex_owned((_sc)->sc_rtlist_lock)
188
189#define BRIDGE_RT_PSZ_PERFORM(_sc) \
190				pserialize_perform((_sc)->sc_rtlist_psz)
191
192#define BRIDGE_RT_RENTER(__s)	do { __s = pserialize_read_enter(); } while (0)
193#define BRIDGE_RT_REXIT(__s)	do { pserialize_read_exit(__s); } while (0)
194
195#define BRIDGE_RTLIST_READER_FOREACH(_brt, _sc)			\
196	PSLIST_READER_FOREACH((_brt), &((_sc)->sc_rtlist),		\
197	    struct bridge_rtnode, brt_list)
198#define BRIDGE_RTLIST_WRITER_FOREACH(_brt, _sc)			\
199	PSLIST_WRITER_FOREACH((_brt), &((_sc)->sc_rtlist),		\
200	    struct bridge_rtnode, brt_list)
201#define BRIDGE_RTLIST_WRITER_INSERT_HEAD(_sc, _brt)			\
202	PSLIST_WRITER_INSERT_HEAD(&(_sc)->sc_rtlist, brt, brt_list)
203#define BRIDGE_RTLIST_WRITER_REMOVE(_brt)				\
204	PSLIST_WRITER_REMOVE((_brt), brt_list)
205
206#define BRIDGE_RTHASH_READER_FOREACH(_brt, _sc, _hash)			\
207	PSLIST_READER_FOREACH((_brt), &(_sc)->sc_rthash[(_hash)],	\
208	    struct bridge_rtnode, brt_hash)
209#define BRIDGE_RTHASH_WRITER_FOREACH(_brt, _sc, _hash)			\
210	PSLIST_WRITER_FOREACH((_brt), &(_sc)->sc_rthash[(_hash)],	\
211	    struct bridge_rtnode, brt_hash)
212#define BRIDGE_RTHASH_WRITER_INSERT_HEAD(_sc, _hash, _brt)		\
213	PSLIST_WRITER_INSERT_HEAD(&(_sc)->sc_rthash[(_hash)], brt, brt_hash)
214#define BRIDGE_RTHASH_WRITER_INSERT_AFTER(_brt, _new)			\
215	PSLIST_WRITER_INSERT_AFTER((_brt), (_new), brt_hash)
216#define BRIDGE_RTHASH_WRITER_REMOVE(_brt)				\
217	PSLIST_WRITER_REMOVE((_brt), brt_hash)
218
219#ifdef NET_MPSAFE
220#define DECLARE_LOCK_VARIABLE
221#define ACQUIRE_GLOBAL_LOCKS()	do { } while (0)
222#define RELEASE_GLOBAL_LOCKS()	do { } while (0)
223#else
224#define DECLARE_LOCK_VARIABLE	int __s
225#define ACQUIRE_GLOBAL_LOCKS()	do {					\
226					KERNEL_LOCK(1, NULL);		\
227					mutex_enter(softnet_lock);	\
228					__s = splsoftnet();		\
229				} while (0)
230#define RELEASE_GLOBAL_LOCKS()	do {					\
231					splx(__s);			\
232					mutex_exit(softnet_lock);	\
233					KERNEL_UNLOCK_ONE(NULL);	\
234				} while (0)
235#endif
236
237struct psref_class *bridge_psref_class __read_mostly;
238
239int	bridge_rtable_prune_period = BRIDGE_RTABLE_PRUNE_PERIOD;
240
241static struct pool bridge_rtnode_pool;
242
243static int	bridge_clone_create(struct if_clone *, int);
244static int	bridge_clone_destroy(struct ifnet *);
245
246static int	bridge_ioctl(struct ifnet *, u_long, void *);
247static int	bridge_init(struct ifnet *);
248static void	bridge_stop(struct ifnet *, int);
249static void	bridge_start(struct ifnet *);
250
251static void	bridge_input(struct ifnet *, struct mbuf *);
252static void	bridge_forward(struct bridge_softc *, struct mbuf *);
253
254static void	bridge_timer(void *);
255
256static void	bridge_broadcast(struct bridge_softc *, struct ifnet *,
257				 struct mbuf *);
258
259static int	bridge_rtupdate(struct bridge_softc *, const uint8_t *,
260				struct ifnet *, int, uint8_t);
261static struct ifnet *bridge_rtlookup(struct bridge_softc *, const uint8_t *);
262static void	bridge_rttrim(struct bridge_softc *);
263static void	bridge_rtage(struct bridge_softc *);
264static void	bridge_rtage_work(struct work *, void *);
265static void	bridge_rtflush(struct bridge_softc *, int);
266static int	bridge_rtdaddr(struct bridge_softc *, const uint8_t *);
267static void	bridge_rtdelete(struct bridge_softc *, struct ifnet *ifp);
268
269static void	bridge_rtable_init(struct bridge_softc *);
270static void	bridge_rtable_fini(struct bridge_softc *);
271
272static struct bridge_rtnode *bridge_rtnode_lookup(struct bridge_softc *,
273						  const uint8_t *);
274static int	bridge_rtnode_insert(struct bridge_softc *,
275				     struct bridge_rtnode *);
276static void	bridge_rtnode_remove(struct bridge_softc *,
277				     struct bridge_rtnode *);
278static void	bridge_rtnode_destroy(struct bridge_rtnode *);
279
280static struct bridge_iflist *bridge_lookup_member(struct bridge_softc *,
281						  const char *name,
282						  struct psref *);
283static struct bridge_iflist *bridge_lookup_member_if(struct bridge_softc *,
284						     struct ifnet *ifp,
285						     struct psref *);
286static void	bridge_release_member(struct bridge_softc *, struct bridge_iflist *,
287                                      struct psref *);
288static void	bridge_delete_member(struct bridge_softc *,
289				     struct bridge_iflist *);
290static void	bridge_acquire_member(struct bridge_softc *sc,
291                                      struct bridge_iflist *,
292                                      struct psref *);
293
294static int	bridge_ioctl_add(struct bridge_softc *, void *);
295static int	bridge_ioctl_del(struct bridge_softc *, void *);
296static int	bridge_ioctl_gifflags(struct bridge_softc *, void *);
297static int	bridge_ioctl_sifflags(struct bridge_softc *, void *);
298static int	bridge_ioctl_scache(struct bridge_softc *, void *);
299static int	bridge_ioctl_gcache(struct bridge_softc *, void *);
300static int	bridge_ioctl_gifs(struct bridge_softc *, void *);
301static int	bridge_ioctl_rts(struct bridge_softc *, void *);
302static int	bridge_ioctl_saddr(struct bridge_softc *, void *);
303static int	bridge_ioctl_sto(struct bridge_softc *, void *);
304static int	bridge_ioctl_gto(struct bridge_softc *, void *);
305static int	bridge_ioctl_daddr(struct bridge_softc *, void *);
306static int	bridge_ioctl_flush(struct bridge_softc *, void *);
307static int	bridge_ioctl_gpri(struct bridge_softc *, void *);
308static int	bridge_ioctl_spri(struct bridge_softc *, void *);
309static int	bridge_ioctl_ght(struct bridge_softc *, void *);
310static int	bridge_ioctl_sht(struct bridge_softc *, void *);
311static int	bridge_ioctl_gfd(struct bridge_softc *, void *);
312static int	bridge_ioctl_sfd(struct bridge_softc *, void *);
313static int	bridge_ioctl_gma(struct bridge_softc *, void *);
314static int	bridge_ioctl_sma(struct bridge_softc *, void *);
315static int	bridge_ioctl_sifprio(struct bridge_softc *, void *);
316static int	bridge_ioctl_sifcost(struct bridge_softc *, void *);
317#if defined(BRIDGE_IPF)
318static int	bridge_ioctl_gfilt(struct bridge_softc *, void *);
319static int	bridge_ioctl_sfilt(struct bridge_softc *, void *);
320static int	bridge_ipf(void *, struct mbuf **, struct ifnet *, int);
321static int	bridge_ip_checkbasic(struct mbuf **mp);
322# ifdef INET6
323static int	bridge_ip6_checkbasic(struct mbuf **mp);
324# endif /* INET6 */
325#endif /* BRIDGE_IPF */
326
327struct bridge_control {
328	int	(*bc_func)(struct bridge_softc *, void *);
329	int	bc_argsize;
330	int	bc_flags;
331};
332
333#define	BC_F_COPYIN		0x01	/* copy arguments in */
334#define	BC_F_COPYOUT		0x02	/* copy arguments out */
335#define	BC_F_SUSER		0x04	/* do super-user check */
336#define BC_F_XLATEIN		0x08	/* xlate arguments in */
337#define BC_F_XLATEOUT		0x10	/* xlate arguments out */
338
339static const struct bridge_control bridge_control_table[] = {
340[BRDGADD] = {bridge_ioctl_add, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_SUSER},
341[BRDGDEL] = {bridge_ioctl_del, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_SUSER},
342
343[BRDGGIFFLGS] = {bridge_ioctl_gifflags, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_COPYOUT},
344[BRDGSIFFLGS] = {bridge_ioctl_sifflags, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_SUSER},
345
346[BRDGSCACHE] = {bridge_ioctl_scache, sizeof(struct ifbrparam), BC_F_COPYIN|BC_F_SUSER},
347[BRDGGCACHE] = {bridge_ioctl_gcache, sizeof(struct ifbrparam), BC_F_COPYOUT},
348
349[OBRDGGIFS] = {bridge_ioctl_gifs, sizeof(struct ifbifconf), BC_F_COPYIN|BC_F_COPYOUT},
350[OBRDGRTS] = {bridge_ioctl_rts, sizeof(struct ifbaconf), BC_F_COPYIN|BC_F_COPYOUT},
351
352[BRDGSADDR] = {bridge_ioctl_saddr, sizeof(struct ifbareq), BC_F_COPYIN|BC_F_SUSER},
353
354[BRDGSTO] = {bridge_ioctl_sto, sizeof(struct ifbrparam), BC_F_COPYIN|BC_F_SUSER},
355[BRDGGTO] = {bridge_ioctl_gto, sizeof(struct ifbrparam), BC_F_COPYOUT},
356
357[BRDGDADDR] = {bridge_ioctl_daddr, sizeof(struct ifbareq), BC_F_COPYIN|BC_F_SUSER},
358
359[BRDGFLUSH] = {bridge_ioctl_flush, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_SUSER},
360
361[BRDGGPRI] = {bridge_ioctl_gpri, sizeof(struct ifbrparam), BC_F_COPYOUT},
362[BRDGSPRI] = {bridge_ioctl_spri, sizeof(struct ifbrparam), BC_F_COPYIN|BC_F_SUSER},
363
364[BRDGGHT] = {bridge_ioctl_ght, sizeof(struct ifbrparam), BC_F_COPYOUT},
365[BRDGSHT] = {bridge_ioctl_sht, sizeof(struct ifbrparam), BC_F_COPYIN|BC_F_SUSER},
366
367[BRDGGFD] = {bridge_ioctl_gfd, sizeof(struct ifbrparam), BC_F_COPYOUT},
368[BRDGSFD] = {bridge_ioctl_sfd, sizeof(struct ifbrparam), BC_F_COPYIN|BC_F_SUSER},
369
370[BRDGGMA] = {bridge_ioctl_gma, sizeof(struct ifbrparam), BC_F_COPYOUT},
371[BRDGSMA] = {bridge_ioctl_sma, sizeof(struct ifbrparam), BC_F_COPYIN|BC_F_SUSER},
372
373[BRDGSIFPRIO] = {bridge_ioctl_sifprio, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_SUSER},
374
375[BRDGSIFCOST] = {bridge_ioctl_sifcost, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_SUSER},
376#if defined(BRIDGE_IPF)
377[BRDGGFILT] = {bridge_ioctl_gfilt, sizeof(struct ifbrparam), BC_F_COPYOUT},
378[BRDGSFILT] = {bridge_ioctl_sfilt, sizeof(struct ifbrparam), BC_F_COPYIN|BC_F_SUSER},
379#endif /* BRIDGE_IPF */
380[BRDGGIFS] = {bridge_ioctl_gifs, sizeof(struct ifbifconf), BC_F_XLATEIN|BC_F_XLATEOUT},
381[BRDGRTS] = {bridge_ioctl_rts, sizeof(struct ifbaconf), BC_F_XLATEIN|BC_F_XLATEOUT},
382};
383
384static const int bridge_control_table_size = __arraycount(bridge_control_table);
385
386static struct if_clone bridge_cloner =
387    IF_CLONE_INITIALIZER("bridge", bridge_clone_create, bridge_clone_destroy);
388
389/*
390 * bridgeattach:
391 *
392 *	Pseudo-device attach routine.
393 */
394void
395bridgeattach(int n)
396{
397
398	pool_init(&bridge_rtnode_pool, sizeof(struct bridge_rtnode),
399	    0, 0, 0, "brtpl", NULL, IPL_NET);
400
401	bridge_psref_class = psref_class_create("bridge", IPL_SOFTNET);
402
403	if_clone_attach(&bridge_cloner);
404}
405
406/*
407 * bridge_clone_create:
408 *
409 *	Create a new bridge instance.
410 */
411static int
412bridge_clone_create(struct if_clone *ifc, int unit)
413{
414	struct bridge_softc *sc;
415	struct ifnet *ifp;
416	int error;
417
418	sc = kmem_zalloc(sizeof(*sc),  KM_SLEEP);
419	ifp = &sc->sc_if;
420
421	sc->sc_brtmax = BRIDGE_RTABLE_MAX;
422	sc->sc_brttimeout = BRIDGE_RTABLE_TIMEOUT;
423	sc->sc_bridge_max_age = BSTP_DEFAULT_MAX_AGE;
424	sc->sc_bridge_hello_time = BSTP_DEFAULT_HELLO_TIME;
425	sc->sc_bridge_forward_delay = BSTP_DEFAULT_FORWARD_DELAY;
426	sc->sc_bridge_priority = BSTP_DEFAULT_BRIDGE_PRIORITY;
427	sc->sc_hold_time = BSTP_DEFAULT_HOLD_TIME;
428	sc->sc_filter_flags = 0;
429
430	/* Initialize our routing table. */
431	bridge_rtable_init(sc);
432
433	error = workqueue_create(&sc->sc_rtage_wq, "bridge_rtage",
434	    bridge_rtage_work, sc, PRI_SOFTNET, IPL_SOFTNET, WQ_MPSAFE);
435	if (error)
436		panic("%s: workqueue_create %d\n", __func__, error);
437
438	callout_init(&sc->sc_brcallout, CALLOUT_MPSAFE);
439	callout_init(&sc->sc_bstpcallout, CALLOUT_MPSAFE);
440
441	mutex_init(&sc->sc_iflist_psref.bip_lock, MUTEX_DEFAULT, IPL_NONE);
442	PSLIST_INIT(&sc->sc_iflist_psref.bip_iflist);
443	sc->sc_iflist_psref.bip_psz = pserialize_create();
444
445	if_initname(ifp, ifc->ifc_name, unit);
446	ifp->if_softc = sc;
447	ifp->if_extflags = IFEF_NO_LINK_STATE_CHANGE;
448#ifdef NET_MPSAFE
449	ifp->if_extflags |= IFEF_MPSAFE;
450#endif
451	ifp->if_mtu = ETHERMTU;
452	ifp->if_ioctl = bridge_ioctl;
453	ifp->if_output = bridge_output;
454	ifp->if_start = bridge_start;
455	ifp->if_stop = bridge_stop;
456	ifp->if_init = bridge_init;
457	ifp->if_type = IFT_BRIDGE;
458	ifp->if_addrlen = 0;
459	ifp->if_dlt = DLT_EN10MB;
460	ifp->if_hdrlen = ETHER_HDR_LEN;
461
462	error = if_initialize(ifp);
463	if (error != 0) {
464		pserialize_destroy(sc->sc_iflist_psref.bip_psz);
465		mutex_destroy(&sc->sc_iflist_psref.bip_lock);
466		callout_destroy(&sc->sc_brcallout);
467		callout_destroy(&sc->sc_bstpcallout);
468		workqueue_destroy(sc->sc_rtage_wq);
469		bridge_rtable_fini(sc);
470		kmem_free(sc, sizeof(*sc));
471
472		return error;
473	}
474	if_alloc_sadl(ifp);
475	if_register(ifp);
476
477	return 0;
478}
479
480/*
481 * bridge_clone_destroy:
482 *
483 *	Destroy a bridge instance.
484 */
485static int
486bridge_clone_destroy(struct ifnet *ifp)
487{
488	struct bridge_softc *sc = ifp->if_softc;
489	struct bridge_iflist *bif;
490
491	if ((ifp->if_flags & IFF_RUNNING) != 0)
492		bridge_stop(ifp, 1);
493
494	BRIDGE_LOCK(sc);
495	for (;;) {
496		bif = PSLIST_WRITER_FIRST(&sc->sc_iflist_psref.bip_iflist, struct bridge_iflist,
497		    bif_next);
498		if (bif == NULL)
499			break;
500		bridge_delete_member(sc, bif);
501	}
502	PSLIST_DESTROY(&sc->sc_iflist_psref.bip_iflist);
503	BRIDGE_UNLOCK(sc);
504
505	if_detach(ifp);
506
507	/* Tear down the routing table. */
508	bridge_rtable_fini(sc);
509
510	pserialize_destroy(sc->sc_iflist_psref.bip_psz);
511	mutex_destroy(&sc->sc_iflist_psref.bip_lock);
512	callout_destroy(&sc->sc_brcallout);
513	callout_destroy(&sc->sc_bstpcallout);
514	workqueue_destroy(sc->sc_rtage_wq);
515	kmem_free(sc, sizeof(*sc));
516
517	return 0;
518}
519
520/*
521 * bridge_ioctl:
522 *
523 *	Handle a control request from the operator.
524 */
525static int
526bridge_ioctl(struct ifnet *ifp, u_long cmd, void *data)
527{
528	struct bridge_softc *sc = ifp->if_softc;
529	struct lwp *l = curlwp;	/* XXX */
530	union {
531		struct ifbreq ifbreq;
532		struct ifbifconf ifbifconf;
533		struct ifbareq ifbareq;
534		struct ifbaconf ifbaconf;
535		struct ifbrparam ifbrparam;
536	} args;
537	struct ifdrv *ifd = (struct ifdrv *) data;
538	const struct bridge_control *bc = NULL; /* XXXGCC */
539	int s, error = 0;
540
541	/* Authorize command before calling splsoftnet(). */
542	switch (cmd) {
543	case SIOCGDRVSPEC:
544	case SIOCSDRVSPEC:
545		if (ifd->ifd_cmd >= bridge_control_table_size
546		    || (bc = &bridge_control_table[ifd->ifd_cmd]) == NULL) {
547			error = EINVAL;
548			return error;
549		}
550
551		/* We only care about BC_F_SUSER at this point. */
552		if ((bc->bc_flags & BC_F_SUSER) == 0)
553			break;
554
555		error = kauth_authorize_network(l->l_cred,
556		    KAUTH_NETWORK_INTERFACE_BRIDGE,
557		    cmd == SIOCGDRVSPEC ?
558		     KAUTH_REQ_NETWORK_INTERFACE_BRIDGE_GETPRIV :
559		     KAUTH_REQ_NETWORK_INTERFACE_BRIDGE_SETPRIV,
560		     ifd, NULL, NULL);
561		if (error)
562			return error;
563
564		break;
565	}
566
567	s = splsoftnet();
568
569	switch (cmd) {
570	case SIOCGDRVSPEC:
571	case SIOCSDRVSPEC:
572		KASSERT(bc != NULL);
573		if (cmd == SIOCGDRVSPEC &&
574		    (bc->bc_flags & (BC_F_COPYOUT|BC_F_XLATEOUT)) == 0) {
575			error = EINVAL;
576			break;
577		}
578		else if (cmd == SIOCSDRVSPEC &&
579		    (bc->bc_flags & (BC_F_COPYOUT|BC_F_XLATEOUT)) != 0) {
580			error = EINVAL;
581			break;
582		}
583
584		/* BC_F_SUSER is checked above, before splsoftnet(). */
585
586		if ((bc->bc_flags & (BC_F_XLATEIN|BC_F_XLATEOUT)) == 0
587		    && (ifd->ifd_len != bc->bc_argsize
588			|| ifd->ifd_len > sizeof(args))) {
589			error = EINVAL;
590			break;
591		}
592
593		memset(&args, 0, sizeof(args));
594		if (bc->bc_flags & BC_F_COPYIN) {
595			error = copyin(ifd->ifd_data, &args, ifd->ifd_len);
596			if (error)
597				break;
598		} else if (bc->bc_flags & BC_F_XLATEIN) {
599			args.ifbifconf.ifbic_len = ifd->ifd_len;
600			args.ifbifconf.ifbic_buf = ifd->ifd_data;
601		}
602
603		error = (*bc->bc_func)(sc, &args);
604		if (error)
605			break;
606
607		if (bc->bc_flags & BC_F_COPYOUT) {
608			error = copyout(&args, ifd->ifd_data, ifd->ifd_len);
609		} else if (bc->bc_flags & BC_F_XLATEOUT) {
610			ifd->ifd_len = args.ifbifconf.ifbic_len;
611			ifd->ifd_data = args.ifbifconf.ifbic_buf;
612		}
613		break;
614
615	case SIOCSIFFLAGS:
616		if ((error = ifioctl_common(ifp, cmd, data)) != 0)
617			break;
618		switch (ifp->if_flags & (IFF_UP|IFF_RUNNING)) {
619		case IFF_RUNNING:
620			/*
621			 * If interface is marked down and it is running,
622			 * then stop and disable it.
623			 */
624			(*ifp->if_stop)(ifp, 1);
625			break;
626		case IFF_UP:
627			/*
628			 * If interface is marked up and it is stopped, then
629			 * start it.
630			 */
631			error = (*ifp->if_init)(ifp);
632			break;
633		default:
634			break;
635		}
636		break;
637
638	case SIOCSIFMTU:
639		if ((error = ifioctl_common(ifp, cmd, data)) == ENETRESET)
640			error = 0;
641		break;
642
643	default:
644		error = ifioctl_common(ifp, cmd, data);
645		break;
646	}
647
648	splx(s);
649
650	return error;
651}
652
653/*
654 * bridge_lookup_member:
655 *
656 *	Lookup a bridge member interface.
657 */
658static struct bridge_iflist *
659bridge_lookup_member(struct bridge_softc *sc, const char *name, struct psref *psref)
660{
661	struct bridge_iflist *bif;
662	struct ifnet *ifp;
663	int s;
664
665	BRIDGE_PSZ_RENTER(s);
666
667	BRIDGE_IFLIST_READER_FOREACH(bif, sc) {
668		ifp = bif->bif_ifp;
669		if (strcmp(ifp->if_xname, name) == 0)
670			break;
671	}
672	if (bif != NULL)
673		bridge_acquire_member(sc, bif, psref);
674
675	BRIDGE_PSZ_REXIT(s);
676
677	return bif;
678}
679
680/*
681 * bridge_lookup_member_if:
682 *
683 *	Lookup a bridge member interface by ifnet*.
684 */
685static struct bridge_iflist *
686bridge_lookup_member_if(struct bridge_softc *sc, struct ifnet *member_ifp,
687    struct psref *psref)
688{
689	struct bridge_iflist *bif;
690	int s;
691
692	BRIDGE_PSZ_RENTER(s);
693
694	bif = member_ifp->if_bridgeif;
695	if (bif != NULL) {
696		psref_acquire(psref, &bif->bif_psref,
697		    bridge_psref_class);
698	}
699
700	BRIDGE_PSZ_REXIT(s);
701
702	return bif;
703}
704
705static void
706bridge_acquire_member(struct bridge_softc *sc, struct bridge_iflist *bif,
707    struct psref *psref)
708{
709
710	psref_acquire(psref, &bif->bif_psref, bridge_psref_class);
711}
712
713/*
714 * bridge_release_member:
715 *
716 *	Release the specified member interface.
717 */
718static void
719bridge_release_member(struct bridge_softc *sc, struct bridge_iflist *bif,
720    struct psref *psref)
721{
722
723	psref_release(psref, &bif->bif_psref, bridge_psref_class);
724}
725
726/*
727 * bridge_delete_member:
728 *
729 *	Delete the specified member interface.
730 */
731static void
732bridge_delete_member(struct bridge_softc *sc, struct bridge_iflist *bif)
733{
734	struct ifnet *ifs = bif->bif_ifp;
735
736	KASSERT(BRIDGE_LOCKED(sc));
737
738	ifs->_if_input = ether_input;
739	ifs->if_bridge = NULL;
740	ifs->if_bridgeif = NULL;
741
742	PSLIST_WRITER_REMOVE(bif, bif_next);
743	BRIDGE_PSZ_PERFORM(sc);
744	BRIDGE_UNLOCK(sc);
745
746	psref_target_destroy(&bif->bif_psref, bridge_psref_class);
747
748	PSLIST_ENTRY_DESTROY(bif, bif_next);
749	kmem_free(bif, sizeof(*bif));
750
751	BRIDGE_LOCK(sc);
752}
753
754/*
755 * bridge_calc_csum_flags:
756 *
757 *	Calculate logical and b/w csum flags each member interface supports.
758 */
759void
760bridge_calc_csum_flags(struct bridge_softc *sc)
761{
762	struct bridge_iflist *bif;
763	struct ifnet *ifs;
764	int flags = ~0;
765
766	BRIDGE_LOCK(sc);
767	BRIDGE_IFLIST_READER_FOREACH(bif, sc) {
768		ifs = bif->bif_ifp;
769		flags &= ifs->if_csum_flags_tx;
770	}
771	sc->sc_csum_flags_tx = flags;
772	BRIDGE_UNLOCK(sc);
773#ifdef DEBUG
774	printf("%s: 0x%x\n", __func__, flags);
775#endif
776}
777
778static int
779bridge_ioctl_add(struct bridge_softc *sc, void *arg)
780{
781	struct ifbreq *req = arg;
782	struct bridge_iflist *bif = NULL;
783	struct ifnet *ifs;
784	int error = 0;
785	struct psref psref;
786
787	ifs = if_get(req->ifbr_ifsname, &psref);
788	if (ifs == NULL)
789		return ENOENT;
790
791	if (ifs->if_bridge == sc) {
792		error = EEXIST;
793		goto out;
794	}
795
796	if (ifs->if_bridge != NULL) {
797		error = EBUSY;
798		goto out;
799	}
800
801	if (ifs->_if_input != ether_input) {
802		error = EINVAL;
803		goto out;
804	}
805
806	/* FIXME: doesn't work with non-IFF_SIMPLEX interfaces */
807	if ((ifs->if_flags & IFF_SIMPLEX) == 0) {
808		error = EINVAL;
809		goto out;
810	}
811
812	bif = kmem_alloc(sizeof(*bif), KM_SLEEP);
813
814	switch (ifs->if_type) {
815	case IFT_ETHER:
816		if (sc->sc_if.if_mtu != ifs->if_mtu) {
817			error = EINVAL;
818			goto out;
819		}
820		/* FALLTHROUGH */
821	case IFT_L2TP:
822		IFNET_LOCK(ifs);
823		error = ether_enable_vlan_mtu(ifs);
824		IFNET_UNLOCK(ifs);
825		if (error > 0)
826			goto out;
827		/*
828		 * Place the interface into promiscuous mode.
829		 */
830		error = ifpromisc(ifs, 1);
831		if (error)
832			goto out;
833		break;
834	default:
835		error = EINVAL;
836		goto out;
837	}
838
839	bif->bif_ifp = ifs;
840	bif->bif_flags = IFBIF_LEARNING | IFBIF_DISCOVER;
841	bif->bif_priority = BSTP_DEFAULT_PORT_PRIORITY;
842	bif->bif_path_cost = BSTP_DEFAULT_PATH_COST;
843	PSLIST_ENTRY_INIT(bif, bif_next);
844	psref_target_init(&bif->bif_psref, bridge_psref_class);
845
846	BRIDGE_LOCK(sc);
847
848	ifs->if_bridge = sc;
849	ifs->if_bridgeif = bif;
850	PSLIST_WRITER_INSERT_HEAD(&sc->sc_iflist_psref.bip_iflist, bif, bif_next);
851	ifs->_if_input = bridge_input;
852
853	BRIDGE_UNLOCK(sc);
854
855	bridge_calc_csum_flags(sc);
856
857	if (sc->sc_if.if_flags & IFF_RUNNING)
858		bstp_initialization(sc);
859	else
860		bstp_stop(sc);
861
862out:
863	if_put(ifs, &psref);
864	if (error) {
865		if (bif != NULL)
866			kmem_free(bif, sizeof(*bif));
867	}
868	return error;
869}
870
871static int
872bridge_ioctl_del(struct bridge_softc *sc, void *arg)
873{
874	struct ifbreq *req = arg;
875	const char *name = req->ifbr_ifsname;
876	struct bridge_iflist *bif;
877	struct ifnet *ifs;
878
879	BRIDGE_LOCK(sc);
880
881	/*
882	 * Don't use bridge_lookup_member. We want to get a member
883	 * with bif_refs == 0.
884	 */
885	BRIDGE_IFLIST_WRITER_FOREACH(bif, sc) {
886		ifs = bif->bif_ifp;
887		if (strcmp(ifs->if_xname, name) == 0)
888			break;
889	}
890
891	if (bif == NULL) {
892		BRIDGE_UNLOCK(sc);
893		return ENOENT;
894	}
895
896	bridge_delete_member(sc, bif);
897
898	BRIDGE_UNLOCK(sc);
899
900	switch (ifs->if_type) {
901	case IFT_ETHER:
902	case IFT_L2TP:
903		/*
904		 * Take the interface out of promiscuous mode.
905		 * Don't call it with holding a spin lock.
906		 */
907		(void) ifpromisc(ifs, 0);
908		IFNET_LOCK(ifs);
909		(void) ether_disable_vlan_mtu(ifs);
910		IFNET_UNLOCK(ifs);
911		break;
912	default:
913#ifdef DIAGNOSTIC
914		panic("bridge_delete_member: impossible");
915#endif
916		break;
917	}
918
919	bridge_rtdelete(sc, ifs);
920	bridge_calc_csum_flags(sc);
921
922	if (sc->sc_if.if_flags & IFF_RUNNING)
923		bstp_initialization(sc);
924
925	return 0;
926}
927
928static int
929bridge_ioctl_gifflags(struct bridge_softc *sc, void *arg)
930{
931	struct ifbreq *req = arg;
932	struct bridge_iflist *bif;
933	struct psref psref;
934
935	bif = bridge_lookup_member(sc, req->ifbr_ifsname, &psref);
936	if (bif == NULL)
937		return ENOENT;
938
939	req->ifbr_ifsflags = bif->bif_flags;
940	req->ifbr_state = bif->bif_state;
941	req->ifbr_priority = bif->bif_priority;
942	req->ifbr_path_cost = bif->bif_path_cost;
943	req->ifbr_portno = bif->bif_ifp->if_index & 0xff;
944
945	bridge_release_member(sc, bif, &psref);
946
947	return 0;
948}
949
950static int
951bridge_ioctl_sifflags(struct bridge_softc *sc, void *arg)
952{
953	struct ifbreq *req = arg;
954	struct bridge_iflist *bif;
955	struct psref psref;
956
957	bif = bridge_lookup_member(sc, req->ifbr_ifsname, &psref);
958	if (bif == NULL)
959		return ENOENT;
960
961	if (req->ifbr_ifsflags & IFBIF_STP) {
962		switch (bif->bif_ifp->if_type) {
963		case IFT_ETHER:
964		case IFT_L2TP:
965			/* These can do spanning tree. */
966			break;
967
968		default:
969			/* Nothing else can. */
970			bridge_release_member(sc, bif, &psref);
971			return EINVAL;
972		}
973	}
974
975	bif->bif_flags = req->ifbr_ifsflags;
976
977	bridge_release_member(sc, bif, &psref);
978
979	if (sc->sc_if.if_flags & IFF_RUNNING)
980		bstp_initialization(sc);
981
982	return 0;
983}
984
985static int
986bridge_ioctl_scache(struct bridge_softc *sc, void *arg)
987{
988	struct ifbrparam *param = arg;
989
990	sc->sc_brtmax = param->ifbrp_csize;
991	bridge_rttrim(sc);
992
993	return 0;
994}
995
996static int
997bridge_ioctl_gcache(struct bridge_softc *sc, void *arg)
998{
999	struct ifbrparam *param = arg;
1000
1001	param->ifbrp_csize = sc->sc_brtmax;
1002
1003	return 0;
1004}
1005
1006static int
1007bridge_ioctl_gifs(struct bridge_softc *sc, void *arg)
1008{
1009	struct ifbifconf *bifc = arg;
1010	struct bridge_iflist *bif;
1011	struct ifbreq *breqs;
1012	int i, count, error = 0;
1013
1014retry:
1015	BRIDGE_LOCK(sc);
1016	count = 0;
1017	BRIDGE_IFLIST_WRITER_FOREACH(bif, sc)
1018		count++;
1019	BRIDGE_UNLOCK(sc);
1020
1021	if (count == 0) {
1022		bifc->ifbic_len = 0;
1023		return 0;
1024	}
1025
1026	if (bifc->ifbic_len == 0 || bifc->ifbic_len < (sizeof(*breqs) * count)) {
1027		/* Tell that a larger buffer is needed */
1028		bifc->ifbic_len = sizeof(*breqs) * count;
1029		return 0;
1030	}
1031
1032	breqs = kmem_alloc(sizeof(*breqs) * count, KM_SLEEP);
1033
1034	BRIDGE_LOCK(sc);
1035
1036	i = 0;
1037	BRIDGE_IFLIST_WRITER_FOREACH(bif, sc)
1038		i++;
1039	if (i > count) {
1040		/*
1041		 * The number of members has been increased.
1042		 * We need more memory!
1043		 */
1044		BRIDGE_UNLOCK(sc);
1045		kmem_free(breqs, sizeof(*breqs) * count);
1046		goto retry;
1047	}
1048
1049	i = 0;
1050	BRIDGE_IFLIST_WRITER_FOREACH(bif, sc) {
1051		struct ifbreq *breq = &breqs[i++];
1052		memset(breq, 0, sizeof(*breq));
1053
1054		strlcpy(breq->ifbr_ifsname, bif->bif_ifp->if_xname,
1055		    sizeof(breq->ifbr_ifsname));
1056		breq->ifbr_ifsflags = bif->bif_flags;
1057		breq->ifbr_state = bif->bif_state;
1058		breq->ifbr_priority = bif->bif_priority;
1059		breq->ifbr_path_cost = bif->bif_path_cost;
1060		breq->ifbr_portno = bif->bif_ifp->if_index & 0xff;
1061	}
1062
1063	/* Don't call copyout with holding the mutex */
1064	BRIDGE_UNLOCK(sc);
1065
1066	for (i = 0; i < count; i++) {
1067		error = copyout(&breqs[i], bifc->ifbic_req + i, sizeof(*breqs));
1068		if (error)
1069			break;
1070	}
1071	bifc->ifbic_len = sizeof(*breqs) * i;
1072
1073	kmem_free(breqs, sizeof(*breqs) * count);
1074
1075	return error;
1076}
1077
1078static int
1079bridge_ioctl_rts(struct bridge_softc *sc, void *arg)
1080{
1081	struct ifbaconf *bac = arg;
1082	struct bridge_rtnode *brt;
1083	struct ifbareq bareq;
1084	int count = 0, error = 0, len;
1085
1086	if (bac->ifbac_len == 0)
1087		return 0;
1088
1089	BRIDGE_RT_LOCK(sc);
1090
1091	/* The passed buffer is not enough, tell a required size. */
1092	if (bac->ifbac_len < (sizeof(bareq) * sc->sc_brtcnt)) {
1093		count = sc->sc_brtcnt;
1094		goto out;
1095	}
1096
1097	len = bac->ifbac_len;
1098	BRIDGE_RTLIST_WRITER_FOREACH(brt, sc) {
1099		if (len < sizeof(bareq))
1100			goto out;
1101		memset(&bareq, 0, sizeof(bareq));
1102		strlcpy(bareq.ifba_ifsname, brt->brt_ifp->if_xname,
1103		    sizeof(bareq.ifba_ifsname));
1104		memcpy(bareq.ifba_dst, brt->brt_addr, sizeof(brt->brt_addr));
1105		if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
1106			bareq.ifba_expire = brt->brt_expire - time_uptime;
1107		} else
1108			bareq.ifba_expire = 0;
1109		bareq.ifba_flags = brt->brt_flags;
1110
1111		error = copyout(&bareq, bac->ifbac_req + count, sizeof(bareq));
1112		if (error)
1113			goto out;
1114		count++;
1115		len -= sizeof(bareq);
1116	}
1117out:
1118	BRIDGE_RT_UNLOCK(sc);
1119
1120	bac->ifbac_len = sizeof(bareq) * count;
1121	return error;
1122}
1123
1124static int
1125bridge_ioctl_saddr(struct bridge_softc *sc, void *arg)
1126{
1127	struct ifbareq *req = arg;
1128	struct bridge_iflist *bif;
1129	int error;
1130	struct psref psref;
1131
1132	bif = bridge_lookup_member(sc, req->ifba_ifsname, &psref);
1133	if (bif == NULL)
1134		return ENOENT;
1135
1136	error = bridge_rtupdate(sc, req->ifba_dst, bif->bif_ifp, 1,
1137	    req->ifba_flags);
1138
1139	bridge_release_member(sc, bif, &psref);
1140
1141	return error;
1142}
1143
1144static int
1145bridge_ioctl_sto(struct bridge_softc *sc, void *arg)
1146{
1147	struct ifbrparam *param = arg;
1148
1149	sc->sc_brttimeout = param->ifbrp_ctime;
1150
1151	return 0;
1152}
1153
1154static int
1155bridge_ioctl_gto(struct bridge_softc *sc, void *arg)
1156{
1157	struct ifbrparam *param = arg;
1158
1159	param->ifbrp_ctime = sc->sc_brttimeout;
1160
1161	return 0;
1162}
1163
1164static int
1165bridge_ioctl_daddr(struct bridge_softc *sc, void *arg)
1166{
1167	struct ifbareq *req = arg;
1168
1169	return (bridge_rtdaddr(sc, req->ifba_dst));
1170}
1171
1172static int
1173bridge_ioctl_flush(struct bridge_softc *sc, void *arg)
1174{
1175	struct ifbreq *req = arg;
1176
1177	bridge_rtflush(sc, req->ifbr_ifsflags);
1178
1179	return 0;
1180}
1181
1182static int
1183bridge_ioctl_gpri(struct bridge_softc *sc, void *arg)
1184{
1185	struct ifbrparam *param = arg;
1186
1187	param->ifbrp_prio = sc->sc_bridge_priority;
1188
1189	return 0;
1190}
1191
1192static int
1193bridge_ioctl_spri(struct bridge_softc *sc, void *arg)
1194{
1195	struct ifbrparam *param = arg;
1196
1197	sc->sc_bridge_priority = param->ifbrp_prio;
1198
1199	if (sc->sc_if.if_flags & IFF_RUNNING)
1200		bstp_initialization(sc);
1201
1202	return 0;
1203}
1204
1205static int
1206bridge_ioctl_ght(struct bridge_softc *sc, void *arg)
1207{
1208	struct ifbrparam *param = arg;
1209
1210	param->ifbrp_hellotime = sc->sc_bridge_hello_time >> 8;
1211
1212	return 0;
1213}
1214
1215static int
1216bridge_ioctl_sht(struct bridge_softc *sc, void *arg)
1217{
1218	struct ifbrparam *param = arg;
1219
1220	if (param->ifbrp_hellotime == 0)
1221		return EINVAL;
1222	sc->sc_bridge_hello_time = param->ifbrp_hellotime << 8;
1223
1224	if (sc->sc_if.if_flags & IFF_RUNNING)
1225		bstp_initialization(sc);
1226
1227	return 0;
1228}
1229
1230static int
1231bridge_ioctl_gfd(struct bridge_softc *sc, void *arg)
1232{
1233	struct ifbrparam *param = arg;
1234
1235	param->ifbrp_fwddelay = sc->sc_bridge_forward_delay >> 8;
1236
1237	return 0;
1238}
1239
1240static int
1241bridge_ioctl_sfd(struct bridge_softc *sc, void *arg)
1242{
1243	struct ifbrparam *param = arg;
1244
1245	if (param->ifbrp_fwddelay == 0)
1246		return EINVAL;
1247	sc->sc_bridge_forward_delay = param->ifbrp_fwddelay << 8;
1248
1249	if (sc->sc_if.if_flags & IFF_RUNNING)
1250		bstp_initialization(sc);
1251
1252	return 0;
1253}
1254
1255static int
1256bridge_ioctl_gma(struct bridge_softc *sc, void *arg)
1257{
1258	struct ifbrparam *param = arg;
1259
1260	param->ifbrp_maxage = sc->sc_bridge_max_age >> 8;
1261
1262	return 0;
1263}
1264
1265static int
1266bridge_ioctl_sma(struct bridge_softc *sc, void *arg)
1267{
1268	struct ifbrparam *param = arg;
1269
1270	if (param->ifbrp_maxage == 0)
1271		return EINVAL;
1272	sc->sc_bridge_max_age = param->ifbrp_maxage << 8;
1273
1274	if (sc->sc_if.if_flags & IFF_RUNNING)
1275		bstp_initialization(sc);
1276
1277	return 0;
1278}
1279
1280static int
1281bridge_ioctl_sifprio(struct bridge_softc *sc, void *arg)
1282{
1283	struct ifbreq *req = arg;
1284	struct bridge_iflist *bif;
1285	struct psref psref;
1286
1287	bif = bridge_lookup_member(sc, req->ifbr_ifsname, &psref);
1288	if (bif == NULL)
1289		return ENOENT;
1290
1291	bif->bif_priority = req->ifbr_priority;
1292
1293	if (sc->sc_if.if_flags & IFF_RUNNING)
1294		bstp_initialization(sc);
1295
1296	bridge_release_member(sc, bif, &psref);
1297
1298	return 0;
1299}
1300
1301#if defined(BRIDGE_IPF)
1302static int
1303bridge_ioctl_gfilt(struct bridge_softc *sc, void *arg)
1304{
1305	struct ifbrparam *param = arg;
1306
1307	param->ifbrp_filter = sc->sc_filter_flags;
1308
1309	return 0;
1310}
1311
1312static int
1313bridge_ioctl_sfilt(struct bridge_softc *sc, void *arg)
1314{
1315	struct ifbrparam *param = arg;
1316	uint32_t nflags, oflags;
1317
1318	if (param->ifbrp_filter & ~IFBF_FILT_MASK)
1319		return EINVAL;
1320
1321	nflags = param->ifbrp_filter;
1322	oflags = sc->sc_filter_flags;
1323
1324	if ((nflags & IFBF_FILT_USEIPF) && !(oflags & IFBF_FILT_USEIPF)) {
1325		pfil_add_hook((void *)bridge_ipf, NULL, PFIL_IN|PFIL_OUT,
1326			sc->sc_if.if_pfil);
1327	}
1328	if (!(nflags & IFBF_FILT_USEIPF) && (oflags & IFBF_FILT_USEIPF)) {
1329		pfil_remove_hook((void *)bridge_ipf, NULL, PFIL_IN|PFIL_OUT,
1330			sc->sc_if.if_pfil);
1331	}
1332
1333	sc->sc_filter_flags = nflags;
1334
1335	return 0;
1336}
1337#endif /* BRIDGE_IPF */
1338
1339static int
1340bridge_ioctl_sifcost(struct bridge_softc *sc, void *arg)
1341{
1342	struct ifbreq *req = arg;
1343	struct bridge_iflist *bif;
1344	struct psref psref;
1345
1346	bif = bridge_lookup_member(sc, req->ifbr_ifsname, &psref);
1347	if (bif == NULL)
1348		return ENOENT;
1349
1350	bif->bif_path_cost = req->ifbr_path_cost;
1351
1352	if (sc->sc_if.if_flags & IFF_RUNNING)
1353		bstp_initialization(sc);
1354
1355	bridge_release_member(sc, bif, &psref);
1356
1357	return 0;
1358}
1359
1360/*
1361 * bridge_ifdetach:
1362 *
1363 *	Detach an interface from a bridge.  Called when a member
1364 *	interface is detaching.
1365 */
1366void
1367bridge_ifdetach(struct ifnet *ifp)
1368{
1369	struct bridge_softc *sc = ifp->if_bridge;
1370	struct ifbreq breq;
1371
1372	/* ioctl_lock should prevent this from happening */
1373	KASSERT(sc != NULL);
1374
1375	memset(&breq, 0, sizeof(breq));
1376	strlcpy(breq.ifbr_ifsname, ifp->if_xname, sizeof(breq.ifbr_ifsname));
1377
1378	(void) bridge_ioctl_del(sc, &breq);
1379}
1380
1381/*
1382 * bridge_init:
1383 *
1384 *	Initialize a bridge interface.
1385 */
1386static int
1387bridge_init(struct ifnet *ifp)
1388{
1389	struct bridge_softc *sc = ifp->if_softc;
1390
1391	KASSERT((ifp->if_flags & IFF_RUNNING) == 0);
1392
1393	callout_reset(&sc->sc_brcallout, bridge_rtable_prune_period * hz,
1394	    bridge_timer, sc);
1395	bstp_initialization(sc);
1396
1397	ifp->if_flags |= IFF_RUNNING;
1398	return 0;
1399}
1400
1401/*
1402 * bridge_stop:
1403 *
1404 *	Stop the bridge interface.
1405 */
1406static void
1407bridge_stop(struct ifnet *ifp, int disable)
1408{
1409	struct bridge_softc *sc = ifp->if_softc;
1410
1411	KASSERT((ifp->if_flags & IFF_RUNNING) != 0);
1412	ifp->if_flags &= ~IFF_RUNNING;
1413
1414	callout_halt(&sc->sc_brcallout, NULL);
1415	workqueue_wait(sc->sc_rtage_wq, &sc->sc_rtage_wk);
1416	bstp_stop(sc);
1417	bridge_rtflush(sc, IFBF_FLUSHDYN);
1418}
1419
1420/*
1421 * bridge_enqueue:
1422 *
1423 *	Enqueue a packet on a bridge member interface.
1424 */
1425void
1426bridge_enqueue(struct bridge_softc *sc, struct ifnet *dst_ifp, struct mbuf *m,
1427    int runfilt)
1428{
1429	int len, error;
1430	short mflags;
1431
1432	if (runfilt) {
1433		if (pfil_run_hooks(sc->sc_if.if_pfil, &m,
1434		    dst_ifp, PFIL_OUT) != 0) {
1435			if (m != NULL)
1436				m_freem(m);
1437			return;
1438		}
1439		if (m == NULL)
1440			return;
1441	}
1442
1443#ifdef ALTQ
1444	KERNEL_LOCK(1, NULL);
1445	/*
1446	 * If ALTQ is enabled on the member interface, do
1447	 * classification; the queueing discipline might
1448	 * not require classification, but might require
1449	 * the address family/header pointer in the pktattr.
1450	 */
1451	if (ALTQ_IS_ENABLED(&dst_ifp->if_snd)) {
1452		/* XXX IFT_ETHER */
1453		altq_etherclassify(&dst_ifp->if_snd, m);
1454	}
1455	KERNEL_UNLOCK_ONE(NULL);
1456#endif /* ALTQ */
1457
1458	len = m->m_pkthdr.len;
1459	mflags = m->m_flags;
1460
1461	error = if_transmit_lock(dst_ifp, m);
1462	if (error) {
1463		/* mbuf is already freed */
1464		sc->sc_if.if_oerrors++;
1465		return;
1466	}
1467
1468	sc->sc_if.if_opackets++;
1469	sc->sc_if.if_obytes += len;
1470	if (mflags & M_MCAST)
1471		sc->sc_if.if_omcasts++;
1472}
1473
1474/*
1475 * bridge_output:
1476 *
1477 *	Send output from a bridge member interface.  This
1478 *	performs the bridging function for locally originated
1479 *	packets.
1480 *
1481 *	The mbuf has the Ethernet header already attached.  We must
1482 *	enqueue or free the mbuf before returning.
1483 */
1484int
1485bridge_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *sa,
1486    const struct rtentry *rt)
1487{
1488	struct ether_header *eh;
1489	struct ifnet *dst_if;
1490	struct bridge_softc *sc;
1491	struct mbuf *n;
1492	int s;
1493
1494	/*
1495	 * bridge_output() is called from ether_output(), furthermore
1496	 * ifp argument doesn't point to bridge(4). So, don't assert
1497	 * IFEF_MPSAFE here.
1498	 */
1499
1500	if (m->m_len < ETHER_HDR_LEN) {
1501		m = m_pullup(m, ETHER_HDR_LEN);
1502		if (m == NULL)
1503			return 0;
1504	}
1505
1506	eh = mtod(m, struct ether_header *);
1507	sc = ifp->if_bridge;
1508
1509	if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
1510		if (memcmp(etherbroadcastaddr,
1511		    eh->ether_dhost, ETHER_ADDR_LEN) == 0)
1512			m->m_flags |= M_BCAST;
1513		else
1514			m->m_flags |= M_MCAST;
1515	}
1516
1517	/*
1518	 * If bridge is down, but the original output interface is up,
1519	 * go ahead and send out that interface.  Otherwise, the packet
1520	 * is dropped below.
1521	 */
1522	if (__predict_false(sc == NULL) ||
1523	    (sc->sc_if.if_flags & IFF_RUNNING) == 0) {
1524		dst_if = ifp;
1525		goto unicast_asis;
1526	}
1527
1528	/*
1529	 * If the packet is a multicast, or we don't know a better way to
1530	 * get there, send to all interfaces.
1531	 */
1532	if ((m->m_flags & (M_MCAST | M_BCAST)) != 0)
1533		dst_if = NULL;
1534	else
1535		dst_if = bridge_rtlookup(sc, eh->ether_dhost);
1536
1537	/*
1538	 * In general, we need to handle TX offload in software before
1539	 * enqueueing a packet. However, we can send it as is in the
1540	 * cases of unicast via (1) the source interface, or (2) an
1541	 * interface which supports the specified offload options.
1542	 * For multicast or broadcast, send it as is only if (3) all
1543	 * the member interfaces support the specified options.
1544	 */
1545
1546	/*
1547	 * Unicast via the source interface.
1548	 */
1549	if (dst_if == ifp)
1550		goto unicast_asis;
1551
1552	/*
1553	 * Unicast via other interface.
1554	 */
1555	if (dst_if != NULL) {
1556		KASSERT(m->m_flags & M_PKTHDR);
1557		if (TX_OFFLOAD_SUPPORTED(dst_if->if_csum_flags_tx,
1558		    m->m_pkthdr.csum_flags)) {
1559			/*
1560			 * Unicast via an interface which supports the
1561			 * specified offload options.
1562			 */
1563			goto unicast_asis;
1564		}
1565
1566		/*
1567		 * Handle TX offload in software. For TSO, a packet is
1568		 * split into multiple chunks. Thus, the return value of
1569		 * ether_sw_offload_tx() is mbuf chain consists of them.
1570		 */
1571		m = ether_sw_offload_tx(ifp, m);
1572		if (m == NULL)
1573			return 0;
1574
1575		do {
1576			n = m->m_nextpkt;
1577			if ((dst_if->if_flags & IFF_RUNNING) == 0)
1578				m_freem(m);
1579			else
1580				bridge_enqueue(sc, dst_if, m, 0);
1581			m = n;
1582		} while (m != NULL);
1583
1584		return 0;
1585	}
1586
1587	/*
1588	 * Multicast or broadcast.
1589	 */
1590	if (TX_OFFLOAD_SUPPORTED(sc->sc_csum_flags_tx,
1591	    m->m_pkthdr.csum_flags)) {
1592		/*
1593		 * Specified TX offload options are supported by all
1594		 * the member interfaces of this bridge.
1595		 */
1596		m->m_nextpkt = NULL;	/* XXX */
1597	} else {
1598		/*
1599		 * Otherwise, handle TX offload in software.
1600		 */
1601		m = ether_sw_offload_tx(ifp, m);
1602		if (m == NULL)
1603			return 0;
1604	}
1605
1606	do {
1607		/* XXX Should call bridge_broadcast, but there are locking
1608		 * issues which need resolving first. */
1609		struct bridge_iflist *bif;
1610		struct mbuf *mc;
1611		bool used = false;
1612
1613		n = m->m_nextpkt;
1614
1615		BRIDGE_PSZ_RENTER(s);
1616		BRIDGE_IFLIST_READER_FOREACH(bif, sc) {
1617			struct psref psref;
1618
1619			bridge_acquire_member(sc, bif, &psref);
1620			BRIDGE_PSZ_REXIT(s);
1621
1622			dst_if = bif->bif_ifp;
1623			if ((dst_if->if_flags & IFF_RUNNING) == 0)
1624				goto next;
1625
1626			/*
1627			 * If this is not the original output interface,
1628			 * and the interface is participating in spanning
1629			 * tree, make sure the port is in a state that
1630			 * allows forwarding.
1631			 */
1632			if (dst_if != ifp &&
1633			    (bif->bif_flags & IFBIF_STP) != 0) {
1634				switch (bif->bif_state) {
1635				case BSTP_IFSTATE_BLOCKING:
1636				case BSTP_IFSTATE_LISTENING:
1637				case BSTP_IFSTATE_DISABLED:
1638					goto next;
1639				}
1640			}
1641
1642			if (PSLIST_READER_NEXT(bif, struct bridge_iflist,
1643			    bif_next) == NULL &&
1644			    ((m->m_flags & (M_MCAST | M_BCAST)) == 0 ||
1645			    dst_if == ifp))
1646			{
1647				used = true;
1648				mc = m;
1649			} else {
1650				mc = m_copypacket(m, M_DONTWAIT);
1651				if (mc == NULL) {
1652					sc->sc_if.if_oerrors++;
1653					goto next;
1654				}
1655			}
1656
1657			bridge_enqueue(sc, dst_if, mc, 0);
1658
1659			if ((m->m_flags & (M_MCAST | M_BCAST)) != 0 &&
1660			    dst_if != ifp)
1661			{
1662				if (PSLIST_READER_NEXT(bif,
1663				    struct bridge_iflist, bif_next) == NULL)
1664				{
1665					used = true;
1666					mc = m;
1667				} else {
1668					mc = m_copypacket(m, M_DONTWAIT);
1669					if (mc == NULL) {
1670						sc->sc_if.if_oerrors++;
1671						goto next;
1672					}
1673				}
1674
1675				m_set_rcvif(mc, dst_if);
1676				mc->m_flags &= ~M_PROMISC;
1677
1678				s = splsoftnet();
1679				KERNEL_LOCK_UNLESS_IFP_MPSAFE(dst_if);
1680				ether_input(dst_if, mc);
1681				KERNEL_UNLOCK_UNLESS_IFP_MPSAFE(dst_if);
1682				splx(s);
1683			}
1684
1685next:
1686			BRIDGE_PSZ_RENTER(s);
1687			bridge_release_member(sc, bif, &psref);
1688
1689			/* Guarantee we don't re-enter the loop as we already
1690			 * decided we're at the end. */
1691			if (used)
1692				break;
1693		}
1694		BRIDGE_PSZ_REXIT(s);
1695
1696		if (!used)
1697			m_freem(m);
1698
1699		m = n;
1700	} while (m != NULL);
1701	return 0;
1702
1703unicast_asis:
1704	/*
1705	 * XXX Spanning tree consideration here?
1706	 */
1707	if ((dst_if->if_flags & IFF_RUNNING) == 0)
1708		m_freem(m);
1709	else
1710		bridge_enqueue(sc, dst_if, m, 0);
1711	return 0;
1712}
1713
1714/*
1715 * bridge_start:
1716 *
1717 *	Start output on a bridge.
1718 *
1719 *	NOTE: This routine should never be called in this implementation.
1720 */
1721static void
1722bridge_start(struct ifnet *ifp)
1723{
1724
1725	printf("%s: bridge_start() called\n", ifp->if_xname);
1726}
1727
1728/*
1729 * bridge_forward:
1730 *
1731 *	The forwarding function of the bridge.
1732 */
1733static void
1734bridge_forward(struct bridge_softc *sc, struct mbuf *m)
1735{
1736	struct bridge_iflist *bif;
1737	struct ifnet *src_if, *dst_if;
1738	struct ether_header *eh;
1739	struct psref psref;
1740	struct psref psref_src;
1741	DECLARE_LOCK_VARIABLE;
1742
1743	if ((sc->sc_if.if_flags & IFF_RUNNING) == 0)
1744		return;
1745
1746	src_if = m_get_rcvif_psref(m, &psref_src);
1747	if (src_if == NULL) {
1748		/* Interface is being destroyed? */
1749		m_freem(m);
1750		goto out;
1751	}
1752
1753	sc->sc_if.if_ipackets++;
1754	sc->sc_if.if_ibytes += m->m_pkthdr.len;
1755
1756	/*
1757	 * Look up the bridge_iflist.
1758	 */
1759	bif = bridge_lookup_member_if(sc, src_if, &psref);
1760	if (bif == NULL) {
1761		/* Interface is not a bridge member (anymore?) */
1762		m_freem(m);
1763		goto out;
1764	}
1765
1766	if (bif->bif_flags & IFBIF_STP) {
1767		switch (bif->bif_state) {
1768		case BSTP_IFSTATE_BLOCKING:
1769		case BSTP_IFSTATE_LISTENING:
1770		case BSTP_IFSTATE_DISABLED:
1771			m_freem(m);
1772			bridge_release_member(sc, bif, &psref);
1773			goto out;
1774		}
1775	}
1776
1777	eh = mtod(m, struct ether_header *);
1778
1779	/*
1780	 * If the interface is learning, and the source
1781	 * address is valid and not multicast, record
1782	 * the address.
1783	 */
1784	if ((bif->bif_flags & IFBIF_LEARNING) != 0 &&
1785	    ETHER_IS_MULTICAST(eh->ether_shost) == 0 &&
1786	    (eh->ether_shost[0] == 0 &&
1787	     eh->ether_shost[1] == 0 &&
1788	     eh->ether_shost[2] == 0 &&
1789	     eh->ether_shost[3] == 0 &&
1790	     eh->ether_shost[4] == 0 &&
1791	     eh->ether_shost[5] == 0) == 0) {
1792		(void) bridge_rtupdate(sc, eh->ether_shost,
1793		    src_if, 0, IFBAF_DYNAMIC);
1794	}
1795
1796	if ((bif->bif_flags & IFBIF_STP) != 0 &&
1797	    bif->bif_state == BSTP_IFSTATE_LEARNING) {
1798		m_freem(m);
1799		bridge_release_member(sc, bif, &psref);
1800		goto out;
1801	}
1802
1803	bridge_release_member(sc, bif, &psref);
1804
1805	/*
1806	 * At this point, the port either doesn't participate
1807	 * in spanning tree or it is in the forwarding state.
1808	 */
1809
1810	/*
1811	 * If the packet is unicast, destined for someone on
1812	 * "this" side of the bridge, drop it.
1813	 */
1814	if ((m->m_flags & (M_BCAST|M_MCAST)) == 0) {
1815		dst_if = bridge_rtlookup(sc, eh->ether_dhost);
1816		if (src_if == dst_if) {
1817			m_freem(m);
1818			goto out;
1819		}
1820	} else {
1821		/* ...forward it to all interfaces. */
1822		sc->sc_if.if_imcasts++;
1823		dst_if = NULL;
1824	}
1825
1826	if (pfil_run_hooks(sc->sc_if.if_pfil, &m, src_if, PFIL_IN) != 0) {
1827		if (m != NULL)
1828			m_freem(m);
1829		goto out;
1830	}
1831	if (m == NULL)
1832		goto out;
1833
1834	if (dst_if == NULL) {
1835		bridge_broadcast(sc, src_if, m);
1836		goto out;
1837	}
1838
1839	m_put_rcvif_psref(src_if, &psref_src);
1840	src_if = NULL;
1841
1842	/*
1843	 * At this point, we're dealing with a unicast frame
1844	 * going to a different interface.
1845	 */
1846	if ((dst_if->if_flags & IFF_RUNNING) == 0) {
1847		m_freem(m);
1848		goto out;
1849	}
1850
1851	bif = bridge_lookup_member_if(sc, dst_if, &psref);
1852	if (bif == NULL) {
1853		/* Not a member of the bridge (anymore?) */
1854		m_freem(m);
1855		goto out;
1856	}
1857
1858	if (bif->bif_flags & IFBIF_STP) {
1859		switch (bif->bif_state) {
1860		case BSTP_IFSTATE_DISABLED:
1861		case BSTP_IFSTATE_BLOCKING:
1862			m_freem(m);
1863			bridge_release_member(sc, bif, &psref);
1864			goto out;
1865		}
1866	}
1867
1868	bridge_release_member(sc, bif, &psref);
1869
1870	/*
1871	 * Before enqueueing this packet to the destination interface,
1872	 * clear any in-bound checksum flags to prevent them from being
1873	 * misused as out-bound flags.
1874	 */
1875	m->m_pkthdr.csum_flags = 0;
1876
1877	ACQUIRE_GLOBAL_LOCKS();
1878	bridge_enqueue(sc, dst_if, m, 1);
1879	RELEASE_GLOBAL_LOCKS();
1880out:
1881	if (src_if != NULL)
1882		m_put_rcvif_psref(src_if, &psref_src);
1883	return;
1884}
1885
1886static bool
1887bstp_state_before_learning(struct bridge_iflist *bif)
1888{
1889	if (bif->bif_flags & IFBIF_STP) {
1890		switch (bif->bif_state) {
1891		case BSTP_IFSTATE_BLOCKING:
1892		case BSTP_IFSTATE_LISTENING:
1893		case BSTP_IFSTATE_DISABLED:
1894			return true;
1895		}
1896	}
1897	return false;
1898}
1899
1900static bool
1901bridge_ourether(struct bridge_iflist *bif, struct ether_header *eh, int src)
1902{
1903	uint8_t *ether = src ? eh->ether_shost : eh->ether_dhost;
1904
1905	if (memcmp(CLLADDR(bif->bif_ifp->if_sadl), ether, ETHER_ADDR_LEN) == 0
1906#if NCARP > 0
1907	    || (bif->bif_ifp->if_carp &&
1908	        carp_ourether(bif->bif_ifp->if_carp, eh, IFT_ETHER, src) != NULL)
1909#endif /* NCARP > 0 */
1910	    )
1911		return true;
1912
1913	return false;
1914}
1915
1916/*
1917 * bridge_input:
1918 *
1919 *	Receive input from a member interface.  Queue the packet for
1920 *	bridging if it is not for us.
1921 */
1922static void
1923bridge_input(struct ifnet *ifp, struct mbuf *m)
1924{
1925	struct bridge_softc *sc = ifp->if_bridge;
1926	struct bridge_iflist *bif;
1927	struct ether_header *eh;
1928	struct psref psref;
1929	int bound;
1930	DECLARE_LOCK_VARIABLE;
1931
1932	KASSERT(!cpu_intr_p());
1933
1934	if (__predict_false(sc == NULL) ||
1935	    (sc->sc_if.if_flags & IFF_RUNNING) == 0) {
1936		ACQUIRE_GLOBAL_LOCKS();
1937		ether_input(ifp, m);
1938		RELEASE_GLOBAL_LOCKS();
1939		return;
1940	}
1941
1942	bound = curlwp_bind();
1943	bif = bridge_lookup_member_if(sc, ifp, &psref);
1944	if (bif == NULL) {
1945		curlwp_bindx(bound);
1946		ACQUIRE_GLOBAL_LOCKS();
1947		ether_input(ifp, m);
1948		RELEASE_GLOBAL_LOCKS();
1949		return;
1950	}
1951
1952	eh = mtod(m, struct ether_header *);
1953
1954	if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
1955		if (memcmp(etherbroadcastaddr,
1956		    eh->ether_dhost, ETHER_ADDR_LEN) == 0)
1957			m->m_flags |= M_BCAST;
1958		else
1959			m->m_flags |= M_MCAST;
1960	}
1961
1962	/*
1963	 * A 'fast' path for packets addressed to interfaces that are
1964	 * part of this bridge.
1965	 */
1966	if (!(m->m_flags & (M_BCAST|M_MCAST)) &&
1967	    !bstp_state_before_learning(bif)) {
1968		struct bridge_iflist *_bif;
1969		struct ifnet *_ifp = NULL;
1970		int s;
1971		struct psref _psref;
1972
1973		BRIDGE_PSZ_RENTER(s);
1974		BRIDGE_IFLIST_READER_FOREACH(_bif, sc) {
1975			/* It is destined for us. */
1976			if (bridge_ourether(_bif, eh, 0)) {
1977				bridge_acquire_member(sc, _bif, &_psref);
1978				BRIDGE_PSZ_REXIT(s);
1979				if (_bif->bif_flags & IFBIF_LEARNING)
1980					(void) bridge_rtupdate(sc,
1981					    eh->ether_shost, ifp, 0, IFBAF_DYNAMIC);
1982				m_set_rcvif(m, _bif->bif_ifp);
1983				_ifp = _bif->bif_ifp;
1984				bridge_release_member(sc, _bif, &_psref);
1985				goto out;
1986			}
1987
1988			/* We just received a packet that we sent out. */
1989			if (bridge_ourether(_bif, eh, 1))
1990				break;
1991		}
1992		BRIDGE_PSZ_REXIT(s);
1993out:
1994
1995		if (_bif != NULL) {
1996			bridge_release_member(sc, bif, &psref);
1997			curlwp_bindx(bound);
1998			if (_ifp != NULL) {
1999				m->m_flags &= ~M_PROMISC;
2000				ACQUIRE_GLOBAL_LOCKS();
2001				ether_input(_ifp, m);
2002				RELEASE_GLOBAL_LOCKS();
2003			} else
2004				m_freem(m);
2005			return;
2006		}
2007	}
2008
2009	/* Tap off 802.1D packets; they do not get forwarded. */
2010	if (bif->bif_flags & IFBIF_STP &&
2011	    memcmp(eh->ether_dhost, bstp_etheraddr, ETHER_ADDR_LEN) == 0) {
2012		bstp_input(sc, bif, m);
2013		bridge_release_member(sc, bif, &psref);
2014		curlwp_bindx(bound);
2015		return;
2016	}
2017
2018	/*
2019	 * A normal switch would discard the packet here, but that's not what
2020	 * we've done historically. This also prevents some obnoxious behaviour.
2021	 */
2022	if (bstp_state_before_learning(bif)) {
2023		bridge_release_member(sc, bif, &psref);
2024		curlwp_bindx(bound);
2025		ACQUIRE_GLOBAL_LOCKS();
2026		ether_input(ifp, m);
2027		RELEASE_GLOBAL_LOCKS();
2028		return;
2029	}
2030
2031	bridge_release_member(sc, bif, &psref);
2032
2033	bridge_forward(sc, m);
2034
2035	curlwp_bindx(bound);
2036}
2037
2038/*
2039 * bridge_broadcast:
2040 *
2041 *	Send a frame to all interfaces that are members of
2042 *	the bridge, except for the one on which the packet
2043 *	arrived.
2044 */
2045static void
2046bridge_broadcast(struct bridge_softc *sc, struct ifnet *src_if,
2047    struct mbuf *m)
2048{
2049	struct bridge_iflist *bif;
2050	struct mbuf *mc;
2051	struct ifnet *dst_if;
2052	bool bmcast;
2053	int s;
2054	DECLARE_LOCK_VARIABLE;
2055
2056	bmcast = m->m_flags & (M_BCAST|M_MCAST);
2057
2058	BRIDGE_PSZ_RENTER(s);
2059	BRIDGE_IFLIST_READER_FOREACH(bif, sc) {
2060		struct psref psref;
2061
2062		bridge_acquire_member(sc, bif, &psref);
2063		BRIDGE_PSZ_REXIT(s);
2064
2065		dst_if = bif->bif_ifp;
2066
2067		if (bif->bif_flags & IFBIF_STP) {
2068			switch (bif->bif_state) {
2069			case BSTP_IFSTATE_BLOCKING:
2070			case BSTP_IFSTATE_DISABLED:
2071				goto next;
2072			}
2073		}
2074
2075		if ((bif->bif_flags & IFBIF_DISCOVER) == 0 && !bmcast)
2076			goto next;
2077
2078		if ((dst_if->if_flags & IFF_RUNNING) == 0)
2079			goto next;
2080
2081		if (dst_if != src_if) {
2082			mc = m_copypacket(m, M_DONTWAIT);
2083			if (mc == NULL) {
2084				sc->sc_if.if_oerrors++;
2085				goto next;
2086			}
2087			/*
2088			 * Before enqueueing this packet to the destination
2089			 * interface, clear any in-bound checksum flags to
2090			 * prevent them from being misused as out-bound flags.
2091			 */
2092			mc->m_pkthdr.csum_flags = 0;
2093
2094			ACQUIRE_GLOBAL_LOCKS();
2095			bridge_enqueue(sc, dst_if, mc, 1);
2096			RELEASE_GLOBAL_LOCKS();
2097		}
2098
2099		if (bmcast) {
2100			mc = m_copypacket(m, M_DONTWAIT);
2101			if (mc == NULL) {
2102				sc->sc_if.if_oerrors++;
2103				goto next;
2104			}
2105
2106			m_set_rcvif(mc, dst_if);
2107			mc->m_flags &= ~M_PROMISC;
2108
2109			ACQUIRE_GLOBAL_LOCKS();
2110			ether_input(dst_if, mc);
2111			RELEASE_GLOBAL_LOCKS();
2112		}
2113next:
2114		BRIDGE_PSZ_RENTER(s);
2115		bridge_release_member(sc, bif, &psref);
2116	}
2117	BRIDGE_PSZ_REXIT(s);
2118
2119	m_freem(m);
2120}
2121
2122static int
2123bridge_rtalloc(struct bridge_softc *sc, const uint8_t *dst,
2124    struct bridge_rtnode **brtp)
2125{
2126	struct bridge_rtnode *brt;
2127	int error;
2128
2129	if (sc->sc_brtcnt >= sc->sc_brtmax)
2130		return ENOSPC;
2131
2132	/*
2133	 * Allocate a new bridge forwarding node, and
2134	 * initialize the expiration time and Ethernet
2135	 * address.
2136	 */
2137	brt = pool_get(&bridge_rtnode_pool, PR_NOWAIT);
2138	if (brt == NULL)
2139		return ENOMEM;
2140
2141	memset(brt, 0, sizeof(*brt));
2142	brt->brt_expire = time_uptime + sc->sc_brttimeout;
2143	brt->brt_flags = IFBAF_DYNAMIC;
2144	memcpy(brt->brt_addr, dst, ETHER_ADDR_LEN);
2145	PSLIST_ENTRY_INIT(brt, brt_list);
2146	PSLIST_ENTRY_INIT(brt, brt_hash);
2147
2148	BRIDGE_RT_LOCK(sc);
2149	error = bridge_rtnode_insert(sc, brt);
2150	BRIDGE_RT_UNLOCK(sc);
2151
2152	if (error != 0) {
2153		pool_put(&bridge_rtnode_pool, brt);
2154		return error;
2155	}
2156
2157	*brtp = brt;
2158	return 0;
2159}
2160
2161/*
2162 * bridge_rtupdate:
2163 *
2164 *	Add a bridge routing entry.
2165 */
2166static int
2167bridge_rtupdate(struct bridge_softc *sc, const uint8_t *dst,
2168    struct ifnet *dst_if, int setflags, uint8_t flags)
2169{
2170	struct bridge_rtnode *brt;
2171	int s;
2172
2173again:
2174	/*
2175	 * A route for this destination might already exist.  If so,
2176	 * update it, otherwise create a new one.
2177	 */
2178	BRIDGE_RT_RENTER(s);
2179	brt = bridge_rtnode_lookup(sc, dst);
2180
2181	if (brt != NULL) {
2182		brt->brt_ifp = dst_if;
2183		if (setflags) {
2184			brt->brt_flags = flags;
2185			if (flags & IFBAF_STATIC)
2186				brt->brt_expire = 0;
2187			else
2188				brt->brt_expire = time_uptime + sc->sc_brttimeout;
2189		} else {
2190			if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC)
2191				brt->brt_expire = time_uptime + sc->sc_brttimeout;
2192		}
2193	}
2194	BRIDGE_RT_REXIT(s);
2195
2196	if (brt == NULL) {
2197		int r;
2198
2199		r = bridge_rtalloc(sc, dst, &brt);
2200		if (r != 0)
2201			return r;
2202		goto again;
2203	}
2204
2205	return 0;
2206}
2207
2208/*
2209 * bridge_rtlookup:
2210 *
2211 *	Lookup the destination interface for an address.
2212 */
2213static struct ifnet *
2214bridge_rtlookup(struct bridge_softc *sc, const uint8_t *addr)
2215{
2216	struct bridge_rtnode *brt;
2217	struct ifnet *ifs = NULL;
2218	int s;
2219
2220	BRIDGE_RT_RENTER(s);
2221	brt = bridge_rtnode_lookup(sc, addr);
2222	if (brt != NULL)
2223		ifs = brt->brt_ifp;
2224	BRIDGE_RT_REXIT(s);
2225
2226	return ifs;
2227}
2228
2229typedef bool (*bridge_iterate_cb_t)
2230    (struct bridge_softc *, struct bridge_rtnode *, bool *, void *);
2231
2232/*
2233 * bridge_rtlist_iterate_remove:
2234 *
2235 *	It iterates on sc->sc_rtlist and removes rtnodes of it which func
2236 *	callback judges to remove. Removals of rtnodes are done in a manner
2237 *	of pserialize. To this end, all kmem_* operations are placed out of
2238 *	mutexes.
2239 */
2240static void
2241bridge_rtlist_iterate_remove(struct bridge_softc *sc, bridge_iterate_cb_t func, void *arg)
2242{
2243	struct bridge_rtnode *brt;
2244	struct bridge_rtnode **brt_list;
2245	int i, count;
2246
2247retry:
2248	count = sc->sc_brtcnt;
2249	if (count == 0)
2250		return;
2251	brt_list = kmem_alloc(sizeof(*brt_list) * count, KM_SLEEP);
2252
2253	BRIDGE_RT_LOCK(sc);
2254	if (__predict_false(sc->sc_brtcnt > count)) {
2255		/* The rtnodes increased, we need more memory */
2256		BRIDGE_RT_UNLOCK(sc);
2257		kmem_free(brt_list, sizeof(*brt_list) * count);
2258		goto retry;
2259	}
2260
2261	i = 0;
2262	/*
2263	 * We don't need to use a _SAFE variant here because we know
2264	 * that a removed item keeps its next pointer as-is thanks to
2265	 * pslist(9) and isn't freed in the loop.
2266	 */
2267	BRIDGE_RTLIST_WRITER_FOREACH(brt, sc) {
2268		bool need_break = false;
2269		if (func(sc, brt, &need_break, arg)) {
2270			bridge_rtnode_remove(sc, brt);
2271			brt_list[i++] = brt;
2272		}
2273		if (need_break)
2274			break;
2275	}
2276
2277	if (i > 0)
2278		BRIDGE_RT_PSZ_PERFORM(sc);
2279	BRIDGE_RT_UNLOCK(sc);
2280
2281	while (--i >= 0)
2282		bridge_rtnode_destroy(brt_list[i]);
2283
2284	kmem_free(brt_list, sizeof(*brt_list) * count);
2285}
2286
2287static bool
2288bridge_rttrim0_cb(struct bridge_softc *sc, struct bridge_rtnode *brt,
2289    bool *need_break, void *arg)
2290{
2291	if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
2292		/* Take into account of the subsequent removal */
2293		if ((sc->sc_brtcnt - 1) <= sc->sc_brtmax)
2294			*need_break = true;
2295		return true;
2296	} else
2297		return false;
2298}
2299
2300static void
2301bridge_rttrim0(struct bridge_softc *sc)
2302{
2303	bridge_rtlist_iterate_remove(sc, bridge_rttrim0_cb, NULL);
2304}
2305
2306/*
2307 * bridge_rttrim:
2308 *
2309 *	Trim the routine table so that we have a number
2310 *	of routing entries less than or equal to the
2311 *	maximum number.
2312 */
2313static void
2314bridge_rttrim(struct bridge_softc *sc)
2315{
2316
2317	/* Make sure we actually need to do this. */
2318	if (sc->sc_brtcnt <= sc->sc_brtmax)
2319		return;
2320
2321	/* Force an aging cycle; this might trim enough addresses. */
2322	bridge_rtage(sc);
2323	if (sc->sc_brtcnt <= sc->sc_brtmax)
2324		return;
2325
2326	bridge_rttrim0(sc);
2327
2328	return;
2329}
2330
2331/*
2332 * bridge_timer:
2333 *
2334 *	Aging timer for the bridge.
2335 */
2336static void
2337bridge_timer(void *arg)
2338{
2339	struct bridge_softc *sc = arg;
2340
2341	workqueue_enqueue(sc->sc_rtage_wq, &sc->sc_rtage_wk, NULL);
2342}
2343
2344static void
2345bridge_rtage_work(struct work *wk, void *arg)
2346{
2347	struct bridge_softc *sc = arg;
2348
2349	KASSERT(wk == &sc->sc_rtage_wk);
2350
2351	bridge_rtage(sc);
2352
2353	if (sc->sc_if.if_flags & IFF_RUNNING)
2354		callout_reset(&sc->sc_brcallout,
2355		    bridge_rtable_prune_period * hz, bridge_timer, sc);
2356}
2357
2358static bool
2359bridge_rtage_cb(struct bridge_softc *sc, struct bridge_rtnode *brt,
2360    bool *need_break, void *arg)
2361{
2362	if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC &&
2363	    time_uptime >= brt->brt_expire)
2364		return true;
2365	else
2366		return false;
2367}
2368
2369/*
2370 * bridge_rtage:
2371 *
2372 *	Perform an aging cycle.
2373 */
2374static void
2375bridge_rtage(struct bridge_softc *sc)
2376{
2377	bridge_rtlist_iterate_remove(sc, bridge_rtage_cb, NULL);
2378}
2379
2380
2381static bool
2382bridge_rtflush_cb(struct bridge_softc *sc, struct bridge_rtnode *brt,
2383    bool *need_break, void *arg)
2384{
2385	int full = *(int*)arg;
2386
2387	if (full || (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC)
2388		return true;
2389	else
2390		return false;
2391}
2392
2393/*
2394 * bridge_rtflush:
2395 *
2396 *	Remove all dynamic addresses from the bridge.
2397 */
2398static void
2399bridge_rtflush(struct bridge_softc *sc, int full)
2400{
2401	bridge_rtlist_iterate_remove(sc, bridge_rtflush_cb, &full);
2402}
2403
2404/*
2405 * bridge_rtdaddr:
2406 *
2407 *	Remove an address from the table.
2408 */
2409static int
2410bridge_rtdaddr(struct bridge_softc *sc, const uint8_t *addr)
2411{
2412	struct bridge_rtnode *brt;
2413
2414	BRIDGE_RT_LOCK(sc);
2415	if ((brt = bridge_rtnode_lookup(sc, addr)) == NULL) {
2416		BRIDGE_RT_UNLOCK(sc);
2417		return ENOENT;
2418	}
2419	bridge_rtnode_remove(sc, brt);
2420	BRIDGE_RT_PSZ_PERFORM(sc);
2421	BRIDGE_RT_UNLOCK(sc);
2422
2423	bridge_rtnode_destroy(brt);
2424
2425	return 0;
2426}
2427
2428/*
2429 * bridge_rtdelete:
2430 *
2431 *	Delete routes to a speicifc member interface.
2432 */
2433static void
2434bridge_rtdelete(struct bridge_softc *sc, struct ifnet *ifp)
2435{
2436	struct bridge_rtnode *brt;
2437
2438	/* XXX pserialize_perform for each entry is slow */
2439again:
2440	BRIDGE_RT_LOCK(sc);
2441	BRIDGE_RTLIST_WRITER_FOREACH(brt, sc) {
2442		if (brt->brt_ifp == ifp)
2443			break;
2444	}
2445	if (brt == NULL) {
2446		BRIDGE_RT_UNLOCK(sc);
2447		return;
2448	}
2449	bridge_rtnode_remove(sc, brt);
2450	BRIDGE_RT_PSZ_PERFORM(sc);
2451	BRIDGE_RT_UNLOCK(sc);
2452
2453	bridge_rtnode_destroy(brt);
2454
2455	goto again;
2456}
2457
2458/*
2459 * bridge_rtable_init:
2460 *
2461 *	Initialize the route table for this bridge.
2462 */
2463static void
2464bridge_rtable_init(struct bridge_softc *sc)
2465{
2466	int i;
2467
2468	sc->sc_rthash = kmem_alloc(sizeof(*sc->sc_rthash) * BRIDGE_RTHASH_SIZE,
2469	    KM_SLEEP);
2470
2471	for (i = 0; i < BRIDGE_RTHASH_SIZE; i++)
2472		PSLIST_INIT(&sc->sc_rthash[i]);
2473
2474	sc->sc_rthash_key = cprng_fast32();
2475
2476	PSLIST_INIT(&sc->sc_rtlist);
2477
2478	sc->sc_rtlist_psz = pserialize_create();
2479	sc->sc_rtlist_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_SOFTNET);
2480}
2481
2482/*
2483 * bridge_rtable_fini:
2484 *
2485 *	Deconstruct the route table for this bridge.
2486 */
2487static void
2488bridge_rtable_fini(struct bridge_softc *sc)
2489{
2490
2491	kmem_free(sc->sc_rthash, sizeof(*sc->sc_rthash) * BRIDGE_RTHASH_SIZE);
2492	mutex_obj_free(sc->sc_rtlist_lock);
2493	pserialize_destroy(sc->sc_rtlist_psz);
2494}
2495
2496/*
2497 * The following hash function is adapted from "Hash Functions" by Bob Jenkins
2498 * ("Algorithm Alley", Dr. Dobbs Journal, September 1997).
2499 */
2500#define	mix(a, b, c)							\
2501do {									\
2502	a -= b; a -= c; a ^= (c >> 13);					\
2503	b -= c; b -= a; b ^= (a << 8);					\
2504	c -= a; c -= b; c ^= (b >> 13);					\
2505	a -= b; a -= c; a ^= (c >> 12);					\
2506	b -= c; b -= a; b ^= (a << 16);					\
2507	c -= a; c -= b; c ^= (b >> 5);					\
2508	a -= b; a -= c; a ^= (c >> 3);					\
2509	b -= c; b -= a; b ^= (a << 10);					\
2510	c -= a; c -= b; c ^= (b >> 15);					\
2511} while (/*CONSTCOND*/0)
2512
2513static inline uint32_t
2514bridge_rthash(struct bridge_softc *sc, const uint8_t *addr)
2515{
2516	uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = sc->sc_rthash_key;
2517
2518	b += addr[5] << 8;
2519	b += addr[4];
2520	a += addr[3] << 24;
2521	a += addr[2] << 16;
2522	a += addr[1] << 8;
2523	a += addr[0];
2524
2525	mix(a, b, c);
2526
2527	return (c & BRIDGE_RTHASH_MASK);
2528}
2529
2530#undef mix
2531
2532/*
2533 * bridge_rtnode_lookup:
2534 *
2535 *	Look up a bridge route node for the specified destination.
2536 */
2537static struct bridge_rtnode *
2538bridge_rtnode_lookup(struct bridge_softc *sc, const uint8_t *addr)
2539{
2540	struct bridge_rtnode *brt;
2541	uint32_t hash;
2542	int dir;
2543
2544	hash = bridge_rthash(sc, addr);
2545	BRIDGE_RTHASH_READER_FOREACH(brt, sc, hash) {
2546		dir = memcmp(addr, brt->brt_addr, ETHER_ADDR_LEN);
2547		if (dir == 0)
2548			return brt;
2549		if (dir > 0)
2550			return NULL;
2551	}
2552
2553	return NULL;
2554}
2555
2556/*
2557 * bridge_rtnode_insert:
2558 *
2559 *	Insert the specified bridge node into the route table.  We
2560 *	assume the entry is not already in the table.
2561 */
2562static int
2563bridge_rtnode_insert(struct bridge_softc *sc, struct bridge_rtnode *brt)
2564{
2565	struct bridge_rtnode *lbrt, *prev = NULL;
2566	uint32_t hash;
2567
2568	KASSERT(BRIDGE_RT_LOCKED(sc));
2569
2570	hash = bridge_rthash(sc, brt->brt_addr);
2571	BRIDGE_RTHASH_WRITER_FOREACH(lbrt, sc, hash) {
2572		int dir = memcmp(brt->brt_addr, lbrt->brt_addr, ETHER_ADDR_LEN);
2573		if (dir == 0)
2574			return EEXIST;
2575		if (dir > 0)
2576			break;
2577		prev = lbrt;
2578	}
2579	if (prev == NULL)
2580		BRIDGE_RTHASH_WRITER_INSERT_HEAD(sc, hash, brt);
2581	else
2582		BRIDGE_RTHASH_WRITER_INSERT_AFTER(prev, brt);
2583
2584	BRIDGE_RTLIST_WRITER_INSERT_HEAD(sc, brt);
2585	sc->sc_brtcnt++;
2586
2587	return 0;
2588}
2589
2590/*
2591 * bridge_rtnode_remove:
2592 *
2593 *	Remove a bridge rtnode from the rthash and the rtlist of a bridge.
2594 */
2595static void
2596bridge_rtnode_remove(struct bridge_softc *sc, struct bridge_rtnode *brt)
2597{
2598
2599	KASSERT(BRIDGE_RT_LOCKED(sc));
2600
2601	BRIDGE_RTHASH_WRITER_REMOVE(brt);
2602	BRIDGE_RTLIST_WRITER_REMOVE(brt);
2603	sc->sc_brtcnt--;
2604}
2605
2606/*
2607 * bridge_rtnode_destroy:
2608 *
2609 *	Destroy a bridge rtnode.
2610 */
2611static void
2612bridge_rtnode_destroy(struct bridge_rtnode *brt)
2613{
2614
2615	PSLIST_ENTRY_DESTROY(brt, brt_list);
2616	PSLIST_ENTRY_DESTROY(brt, brt_hash);
2617	pool_put(&bridge_rtnode_pool, brt);
2618}
2619
2620#if defined(BRIDGE_IPF)
2621extern pfil_head_t *inet_pfil_hook;                 /* XXX */
2622extern pfil_head_t *inet6_pfil_hook;                /* XXX */
2623
2624/*
2625 * Send bridge packets through IPF if they are one of the types IPF can deal
2626 * with, or if they are ARP or REVARP.  (IPF will pass ARP and REVARP without
2627 * question.)
2628 */
2629static int
2630bridge_ipf(void *arg, struct mbuf **mp, struct ifnet *ifp, int dir)
2631{
2632	int snap, error;
2633	struct ether_header *eh1, eh2;
2634	struct llc llc1;
2635	uint16_t ether_type;
2636
2637	snap = 0;
2638	error = -1;	/* Default error if not error == 0 */
2639	eh1 = mtod(*mp, struct ether_header *);
2640	ether_type = ntohs(eh1->ether_type);
2641
2642	/*
2643	 * Check for SNAP/LLC.
2644	 */
2645	if (ether_type < ETHERMTU) {
2646		struct llc *llc2 = (struct llc *)(eh1 + 1);
2647
2648		if ((*mp)->m_len >= ETHER_HDR_LEN + 8 &&
2649		    llc2->llc_dsap == LLC_SNAP_LSAP &&
2650		    llc2->llc_ssap == LLC_SNAP_LSAP &&
2651		    llc2->llc_control == LLC_UI) {
2652			ether_type = htons(llc2->llc_un.type_snap.ether_type);
2653			snap = 1;
2654		}
2655	}
2656
2657	/*
2658	 * If we're trying to filter bridge traffic, don't look at anything
2659	 * other than IP and ARP traffic.  If the filter doesn't understand
2660	 * IPv6, don't allow IPv6 through the bridge either.  This is lame
2661	 * since if we really wanted, say, an AppleTalk filter, we are hosed,
2662	 * but of course we don't have an AppleTalk filter to begin with.
2663	 * (Note that since IPF doesn't understand ARP it will pass *ALL*
2664	 * ARP traffic.)
2665	 */
2666	switch (ether_type) {
2667		case ETHERTYPE_ARP:
2668		case ETHERTYPE_REVARP:
2669			return 0; /* Automatically pass */
2670		case ETHERTYPE_IP:
2671# ifdef INET6
2672		case ETHERTYPE_IPV6:
2673# endif /* INET6 */
2674			break;
2675		default:
2676			goto bad;
2677	}
2678
2679	/* Strip off the Ethernet header and keep a copy. */
2680	m_copydata(*mp, 0, ETHER_HDR_LEN, (void *) &eh2);
2681	m_adj(*mp, ETHER_HDR_LEN);
2682
2683	/* Strip off snap header, if present */
2684	if (snap) {
2685		m_copydata(*mp, 0, sizeof(struct llc), (void *) &llc1);
2686		m_adj(*mp, sizeof(struct llc));
2687	}
2688
2689	/*
2690	 * Check basic packet sanity and run IPF through pfil.
2691	 */
2692	KASSERT(!cpu_intr_p());
2693	switch (ether_type)
2694	{
2695	case ETHERTYPE_IP :
2696		error = bridge_ip_checkbasic(mp);
2697		if (error == 0)
2698			error = pfil_run_hooks(inet_pfil_hook, mp, ifp, dir);
2699		break;
2700# ifdef INET6
2701	case ETHERTYPE_IPV6 :
2702		error = bridge_ip6_checkbasic(mp);
2703		if (error == 0)
2704			error = pfil_run_hooks(inet6_pfil_hook, mp, ifp, dir);
2705		break;
2706# endif
2707	default :
2708		error = 0;
2709		break;
2710	}
2711
2712	if (*mp == NULL)
2713		return error;
2714	if (error != 0)
2715		goto bad;
2716
2717	error = -1;
2718
2719	/*
2720	 * Finally, put everything back the way it was and return
2721	 */
2722	if (snap) {
2723		M_PREPEND(*mp, sizeof(struct llc), M_DONTWAIT);
2724		if (*mp == NULL)
2725			return error;
2726		bcopy(&llc1, mtod(*mp, void *), sizeof(struct llc));
2727	}
2728
2729	M_PREPEND(*mp, ETHER_HDR_LEN, M_DONTWAIT);
2730	if (*mp == NULL)
2731		return error;
2732	bcopy(&eh2, mtod(*mp, void *), ETHER_HDR_LEN);
2733
2734	return 0;
2735
2736    bad:
2737	m_freem(*mp);
2738	*mp = NULL;
2739	return error;
2740}
2741
2742/*
2743 * Perform basic checks on header size since
2744 * IPF assumes ip_input has already processed
2745 * it for it.  Cut-and-pasted from ip_input.c.
2746 * Given how simple the IPv6 version is,
2747 * does the IPv4 version really need to be
2748 * this complicated?
2749 *
2750 * XXX Should we update ipstat here, or not?
2751 * XXX Right now we update ipstat but not
2752 * XXX csum_counter.
2753 */
2754static int
2755bridge_ip_checkbasic(struct mbuf **mp)
2756{
2757	struct mbuf *m = *mp;
2758	struct ip *ip;
2759	int len, hlen;
2760
2761	if (*mp == NULL)
2762		return -1;
2763
2764	if (IP_HDR_ALIGNED_P(mtod(m, void *)) == 0) {
2765		if ((m = m_copyup(m, sizeof(struct ip),
2766			(max_linkhdr + 3) & ~3)) == NULL) {
2767			/* XXXJRT new stat, please */
2768			ip_statinc(IP_STAT_TOOSMALL);
2769			goto bad;
2770		}
2771	} else if (__predict_false(m->m_len < sizeof (struct ip))) {
2772		if ((m = m_pullup(m, sizeof (struct ip))) == NULL) {
2773			ip_statinc(IP_STAT_TOOSMALL);
2774			goto bad;
2775		}
2776	}
2777	ip = mtod(m, struct ip *);
2778	if (ip == NULL) goto bad;
2779
2780	if (ip->ip_v != IPVERSION) {
2781		ip_statinc(IP_STAT_BADVERS);
2782		goto bad;
2783	}
2784	hlen = ip->ip_hl << 2;
2785	if (hlen < sizeof(struct ip)) { /* minimum header length */
2786		ip_statinc(IP_STAT_BADHLEN);
2787		goto bad;
2788	}
2789	if (hlen > m->m_len) {
2790		if ((m = m_pullup(m, hlen)) == 0) {
2791			ip_statinc(IP_STAT_BADHLEN);
2792			goto bad;
2793		}
2794		ip = mtod(m, struct ip *);
2795		if (ip == NULL) goto bad;
2796	}
2797
2798	switch (m->m_pkthdr.csum_flags &
2799	        ((m_get_rcvif_NOMPSAFE(m)->if_csum_flags_rx & M_CSUM_IPv4) |
2800	         M_CSUM_IPv4_BAD)) {
2801	case M_CSUM_IPv4|M_CSUM_IPv4_BAD:
2802		/* INET_CSUM_COUNTER_INCR(&ip_hwcsum_bad); */
2803		goto bad;
2804
2805	case M_CSUM_IPv4:
2806		/* Checksum was okay. */
2807		/* INET_CSUM_COUNTER_INCR(&ip_hwcsum_ok); */
2808		break;
2809
2810	default:
2811		/* Must compute it ourselves. */
2812		/* INET_CSUM_COUNTER_INCR(&ip_swcsum); */
2813		if (in_cksum(m, hlen) != 0)
2814			goto bad;
2815		break;
2816	}
2817
2818	/* Retrieve the packet length. */
2819	len = ntohs(ip->ip_len);
2820
2821	/*
2822	 * Check for additional length bogosity
2823	 */
2824	if (len < hlen) {
2825		ip_statinc(IP_STAT_BADLEN);
2826		goto bad;
2827	}
2828
2829	/*
2830	 * Check that the amount of data in the buffers
2831	 * is as at least much as the IP header would have us expect.
2832	 * Drop packet if shorter than we expect.
2833	 */
2834	if (m->m_pkthdr.len < len) {
2835		ip_statinc(IP_STAT_TOOSHORT);
2836		goto bad;
2837	}
2838
2839	/* Checks out, proceed */
2840	*mp = m;
2841	return 0;
2842
2843    bad:
2844	*mp = m;
2845	return -1;
2846}
2847
2848# ifdef INET6
2849/*
2850 * Same as above, but for IPv6.
2851 * Cut-and-pasted from ip6_input.c.
2852 * XXX Should we update ip6stat, or not?
2853 */
2854static int
2855bridge_ip6_checkbasic(struct mbuf **mp)
2856{
2857	struct mbuf *m = *mp;
2858	struct ip6_hdr *ip6;
2859
2860	/*
2861	 * If the IPv6 header is not aligned, slurp it up into a new
2862	 * mbuf with space for link headers, in the event we forward
2863	 * it.  Otherwise, if it is aligned, make sure the entire base
2864	 * IPv6 header is in the first mbuf of the chain.
2865	 */
2866	if (IP6_HDR_ALIGNED_P(mtod(m, void *)) == 0) {
2867		struct ifnet *inifp = m_get_rcvif_NOMPSAFE(m);
2868		if ((m = m_copyup(m, sizeof(struct ip6_hdr),
2869		                  (max_linkhdr + 3) & ~3)) == NULL) {
2870			/* XXXJRT new stat, please */
2871			ip6_statinc(IP6_STAT_TOOSMALL);
2872			in6_ifstat_inc(inifp, ifs6_in_hdrerr);
2873			goto bad;
2874		}
2875	} else if (__predict_false(m->m_len < sizeof(struct ip6_hdr))) {
2876		struct ifnet *inifp = m_get_rcvif_NOMPSAFE(m);
2877		if ((m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) {
2878			ip6_statinc(IP6_STAT_TOOSMALL);
2879			in6_ifstat_inc(inifp, ifs6_in_hdrerr);
2880			goto bad;
2881		}
2882	}
2883
2884	ip6 = mtod(m, struct ip6_hdr *);
2885
2886	if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
2887		ip6_statinc(IP6_STAT_BADVERS);
2888		in6_ifstat_inc(m_get_rcvif_NOMPSAFE(m), ifs6_in_hdrerr);
2889		goto bad;
2890	}
2891
2892	/* Checks out, proceed */
2893	*mp = m;
2894	return 0;
2895
2896    bad:
2897	*mp = m;
2898	return -1;
2899}
2900# endif /* INET6 */
2901#endif /* BRIDGE_IPF */
2902