if_bridge.c revision 1.137
1/*	$NetBSD: if_bridge.c,v 1.137 2017/10/25 04:17:34 ozaki-r Exp $	*/
2
3/*
4 * Copyright 2001 Wasabi Systems, Inc.
5 * All rights reserved.
6 *
7 * Written by Jason R. Thorpe for Wasabi Systems, Inc.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 *    must display the following acknowledgement:
19 *	This product includes software developed for the NetBSD Project by
20 *	Wasabi Systems, Inc.
21 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
22 *    or promote products derived from this software without specific prior
23 *    written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38/*
39 * Copyright (c) 1999, 2000 Jason L. Wright (jason@thought.net)
40 * All rights reserved.
41 *
42 * Redistribution and use in source and binary forms, with or without
43 * modification, are permitted provided that the following conditions
44 * are met:
45 * 1. Redistributions of source code must retain the above copyright
46 *    notice, this list of conditions and the following disclaimer.
47 * 2. Redistributions in binary form must reproduce the above copyright
48 *    notice, this list of conditions and the following disclaimer in the
49 *    documentation and/or other materials provided with the distribution.
50 * 3. All advertising materials mentioning features or use of this software
51 *    must display the following acknowledgement:
52 *	This product includes software developed by Jason L. Wright
53 * 4. The name of the author may not be used to endorse or promote products
54 *    derived from this software without specific prior written permission.
55 *
56 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
57 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
58 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
59 * DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
60 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
61 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
62 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
63 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
64 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
65 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
66 * POSSIBILITY OF SUCH DAMAGE.
67 *
68 * OpenBSD: if_bridge.c,v 1.60 2001/06/15 03:38:33 itojun Exp
69 */
70
71/*
72 * Network interface bridge support.
73 *
74 * TODO:
75 *
76 *	- Currently only supports Ethernet-like interfaces (Ethernet,
77 *	  802.11, VLANs on Ethernet, etc.)  Figure out a nice way
78 *	  to bridge other types of interfaces (FDDI-FDDI, and maybe
79 *	  consider heterogenous bridges).
80 */
81
82#include <sys/cdefs.h>
83__KERNEL_RCSID(0, "$NetBSD: if_bridge.c,v 1.137 2017/10/25 04:17:34 ozaki-r Exp $");
84
85#ifdef _KERNEL_OPT
86#include "opt_bridge_ipf.h"
87#include "opt_inet.h"
88#include "opt_net_mpsafe.h"
89#endif /* _KERNEL_OPT */
90
91#include <sys/param.h>
92#include <sys/kernel.h>
93#include <sys/mbuf.h>
94#include <sys/queue.h>
95#include <sys/socket.h>
96#include <sys/socketvar.h> /* for softnet_lock */
97#include <sys/sockio.h>
98#include <sys/systm.h>
99#include <sys/proc.h>
100#include <sys/pool.h>
101#include <sys/kauth.h>
102#include <sys/cpu.h>
103#include <sys/cprng.h>
104#include <sys/mutex.h>
105#include <sys/kmem.h>
106
107#include <net/bpf.h>
108#include <net/if.h>
109#include <net/if_dl.h>
110#include <net/if_types.h>
111#include <net/if_llc.h>
112
113#include <net/if_ether.h>
114#include <net/if_bridgevar.h>
115
116#if defined(BRIDGE_IPF)
117/* Used for bridge_ip[6]_checkbasic */
118#include <netinet/in.h>
119#include <netinet/in_systm.h>
120#include <netinet/ip.h>
121#include <netinet/ip_var.h>
122#include <netinet/ip_private.h>		/* XXX */
123
124#include <netinet/ip6.h>
125#include <netinet6/in6_var.h>
126#include <netinet6/ip6_var.h>
127#include <netinet6/ip6_private.h>	/* XXX */
128#endif /* BRIDGE_IPF */
129
130/*
131 * Size of the route hash table.  Must be a power of two.
132 */
133#ifndef BRIDGE_RTHASH_SIZE
134#define	BRIDGE_RTHASH_SIZE		1024
135#endif
136
137#define	BRIDGE_RTHASH_MASK		(BRIDGE_RTHASH_SIZE - 1)
138
139#include "carp.h"
140#if NCARP > 0
141#include <netinet/in.h>
142#include <netinet/in_var.h>
143#include <netinet/ip_carp.h>
144#endif
145
146#include "ioconf.h"
147
148__CTASSERT(sizeof(struct ifbifconf) == sizeof(struct ifbaconf));
149__CTASSERT(offsetof(struct ifbifconf, ifbic_len) == offsetof(struct ifbaconf, ifbac_len));
150__CTASSERT(offsetof(struct ifbifconf, ifbic_buf) == offsetof(struct ifbaconf, ifbac_buf));
151
152/*
153 * Maximum number of addresses to cache.
154 */
155#ifndef BRIDGE_RTABLE_MAX
156#define	BRIDGE_RTABLE_MAX		100
157#endif
158
159/*
160 * Spanning tree defaults.
161 */
162#define	BSTP_DEFAULT_MAX_AGE		(20 * 256)
163#define	BSTP_DEFAULT_HELLO_TIME		(2 * 256)
164#define	BSTP_DEFAULT_FORWARD_DELAY	(15 * 256)
165#define	BSTP_DEFAULT_HOLD_TIME		(1 * 256)
166#define	BSTP_DEFAULT_BRIDGE_PRIORITY	0x8000
167#define	BSTP_DEFAULT_PORT_PRIORITY	0x80
168#define	BSTP_DEFAULT_PATH_COST		55
169
170/*
171 * Timeout (in seconds) for entries learned dynamically.
172 */
173#ifndef BRIDGE_RTABLE_TIMEOUT
174#define	BRIDGE_RTABLE_TIMEOUT		(20 * 60)	/* same as ARP */
175#endif
176
177/*
178 * Number of seconds between walks of the route list.
179 */
180#ifndef BRIDGE_RTABLE_PRUNE_PERIOD
181#define	BRIDGE_RTABLE_PRUNE_PERIOD	(5 * 60)
182#endif
183
184#define BRIDGE_RT_LOCK(_sc)	if ((_sc)->sc_rtlist_lock) \
185					mutex_enter((_sc)->sc_rtlist_lock)
186#define BRIDGE_RT_UNLOCK(_sc)	if ((_sc)->sc_rtlist_lock) \
187					mutex_exit((_sc)->sc_rtlist_lock)
188#define BRIDGE_RT_LOCKED(_sc)	(!(_sc)->sc_rtlist_lock || \
189				 mutex_owned((_sc)->sc_rtlist_lock))
190
191#define BRIDGE_RT_PSZ_PERFORM(_sc) \
192				if ((_sc)->sc_rtlist_psz != NULL) \
193					pserialize_perform((_sc)->sc_rtlist_psz);
194
195#define BRIDGE_RT_RENTER(__s)	do { __s = pserialize_read_enter(); } while (0)
196#define BRIDGE_RT_REXIT(__s)	do { pserialize_read_exit(__s); } while (0)
197
198
199#ifdef NET_MPSAFE
200#define DECLARE_LOCK_VARIABLE
201#define ACQUIRE_GLOBAL_LOCKS()	do { } while (0)
202#define RELEASE_GLOBAL_LOCKS()	do { } while (0)
203#else
204#define DECLARE_LOCK_VARIABLE	int __s
205#define ACQUIRE_GLOBAL_LOCKS()	do {					\
206					KERNEL_LOCK(1, NULL);		\
207					mutex_enter(softnet_lock);	\
208					__s = splsoftnet();		\
209				} while (0)
210#define RELEASE_GLOBAL_LOCKS()	do {					\
211					splx(__s);			\
212					mutex_exit(softnet_lock);	\
213					KERNEL_UNLOCK_ONE(NULL);	\
214				} while (0)
215#endif
216
217struct psref_class *bridge_psref_class __read_mostly;
218
219int	bridge_rtable_prune_period = BRIDGE_RTABLE_PRUNE_PERIOD;
220
221static struct pool bridge_rtnode_pool;
222
223static int	bridge_clone_create(struct if_clone *, int);
224static int	bridge_clone_destroy(struct ifnet *);
225
226static int	bridge_ioctl(struct ifnet *, u_long, void *);
227static int	bridge_init(struct ifnet *);
228static void	bridge_stop(struct ifnet *, int);
229static void	bridge_start(struct ifnet *);
230
231static void	bridge_input(struct ifnet *, struct mbuf *);
232static void	bridge_forward(struct bridge_softc *, struct mbuf *);
233
234static void	bridge_timer(void *);
235
236static void	bridge_broadcast(struct bridge_softc *, struct ifnet *,
237				 struct mbuf *);
238
239static int	bridge_rtupdate(struct bridge_softc *, const uint8_t *,
240				struct ifnet *, int, uint8_t);
241static struct ifnet *bridge_rtlookup(struct bridge_softc *, const uint8_t *);
242static void	bridge_rttrim(struct bridge_softc *);
243static void	bridge_rtage(struct bridge_softc *);
244static void	bridge_rtage_work(struct work *, void *);
245static void	bridge_rtflush(struct bridge_softc *, int);
246static int	bridge_rtdaddr(struct bridge_softc *, const uint8_t *);
247static void	bridge_rtdelete(struct bridge_softc *, struct ifnet *ifp);
248
249static void	bridge_rtable_init(struct bridge_softc *);
250static void	bridge_rtable_fini(struct bridge_softc *);
251
252static struct bridge_rtnode *bridge_rtnode_lookup(struct bridge_softc *,
253						  const uint8_t *);
254static int	bridge_rtnode_insert(struct bridge_softc *,
255				     struct bridge_rtnode *);
256static void	bridge_rtnode_remove(struct bridge_softc *,
257				     struct bridge_rtnode *);
258static void	bridge_rtnode_destroy(struct bridge_rtnode *);
259
260static struct bridge_iflist *bridge_lookup_member(struct bridge_softc *,
261						  const char *name,
262						  struct psref *);
263static struct bridge_iflist *bridge_lookup_member_if(struct bridge_softc *,
264						     struct ifnet *ifp,
265						     struct psref *);
266static void	bridge_release_member(struct bridge_softc *, struct bridge_iflist *,
267                                      struct psref *);
268static void	bridge_delete_member(struct bridge_softc *,
269				     struct bridge_iflist *);
270static void	bridge_acquire_member(struct bridge_softc *sc,
271                                      struct bridge_iflist *,
272                                      struct psref *);
273
274static int	bridge_ioctl_add(struct bridge_softc *, void *);
275static int	bridge_ioctl_del(struct bridge_softc *, void *);
276static int	bridge_ioctl_gifflags(struct bridge_softc *, void *);
277static int	bridge_ioctl_sifflags(struct bridge_softc *, void *);
278static int	bridge_ioctl_scache(struct bridge_softc *, void *);
279static int	bridge_ioctl_gcache(struct bridge_softc *, void *);
280static int	bridge_ioctl_gifs(struct bridge_softc *, void *);
281static int	bridge_ioctl_rts(struct bridge_softc *, void *);
282static int	bridge_ioctl_saddr(struct bridge_softc *, void *);
283static int	bridge_ioctl_sto(struct bridge_softc *, void *);
284static int	bridge_ioctl_gto(struct bridge_softc *, void *);
285static int	bridge_ioctl_daddr(struct bridge_softc *, void *);
286static int	bridge_ioctl_flush(struct bridge_softc *, void *);
287static int	bridge_ioctl_gpri(struct bridge_softc *, void *);
288static int	bridge_ioctl_spri(struct bridge_softc *, void *);
289static int	bridge_ioctl_ght(struct bridge_softc *, void *);
290static int	bridge_ioctl_sht(struct bridge_softc *, void *);
291static int	bridge_ioctl_gfd(struct bridge_softc *, void *);
292static int	bridge_ioctl_sfd(struct bridge_softc *, void *);
293static int	bridge_ioctl_gma(struct bridge_softc *, void *);
294static int	bridge_ioctl_sma(struct bridge_softc *, void *);
295static int	bridge_ioctl_sifprio(struct bridge_softc *, void *);
296static int	bridge_ioctl_sifcost(struct bridge_softc *, void *);
297#if defined(BRIDGE_IPF)
298static int	bridge_ioctl_gfilt(struct bridge_softc *, void *);
299static int	bridge_ioctl_sfilt(struct bridge_softc *, void *);
300static int	bridge_ipf(void *, struct mbuf **, struct ifnet *, int);
301static int	bridge_ip_checkbasic(struct mbuf **mp);
302# ifdef INET6
303static int	bridge_ip6_checkbasic(struct mbuf **mp);
304# endif /* INET6 */
305#endif /* BRIDGE_IPF */
306
307struct bridge_control {
308	int	(*bc_func)(struct bridge_softc *, void *);
309	int	bc_argsize;
310	int	bc_flags;
311};
312
313#define	BC_F_COPYIN		0x01	/* copy arguments in */
314#define	BC_F_COPYOUT		0x02	/* copy arguments out */
315#define	BC_F_SUSER		0x04	/* do super-user check */
316#define BC_F_XLATEIN		0x08	/* xlate arguments in */
317#define BC_F_XLATEOUT		0x10	/* xlate arguments out */
318
319static const struct bridge_control bridge_control_table[] = {
320[BRDGADD] = {bridge_ioctl_add, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_SUSER},
321[BRDGDEL] = {bridge_ioctl_del, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_SUSER},
322
323[BRDGGIFFLGS] = {bridge_ioctl_gifflags, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_COPYOUT},
324[BRDGSIFFLGS] = {bridge_ioctl_sifflags, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_SUSER},
325
326[BRDGSCACHE] = {bridge_ioctl_scache, sizeof(struct ifbrparam), BC_F_COPYIN|BC_F_SUSER},
327[BRDGGCACHE] = {bridge_ioctl_gcache, sizeof(struct ifbrparam), BC_F_COPYOUT},
328
329[OBRDGGIFS] = {bridge_ioctl_gifs, sizeof(struct ifbifconf), BC_F_COPYIN|BC_F_COPYOUT},
330[OBRDGRTS] = {bridge_ioctl_rts, sizeof(struct ifbaconf), BC_F_COPYIN|BC_F_COPYOUT},
331
332[BRDGSADDR] = {bridge_ioctl_saddr, sizeof(struct ifbareq), BC_F_COPYIN|BC_F_SUSER},
333
334[BRDGSTO] = {bridge_ioctl_sto, sizeof(struct ifbrparam), BC_F_COPYIN|BC_F_SUSER},
335[BRDGGTO] = {bridge_ioctl_gto, sizeof(struct ifbrparam), BC_F_COPYOUT},
336
337[BRDGDADDR] = {bridge_ioctl_daddr, sizeof(struct ifbareq), BC_F_COPYIN|BC_F_SUSER},
338
339[BRDGFLUSH] = {bridge_ioctl_flush, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_SUSER},
340
341[BRDGGPRI] = {bridge_ioctl_gpri, sizeof(struct ifbrparam), BC_F_COPYOUT},
342[BRDGSPRI] = {bridge_ioctl_spri, sizeof(struct ifbrparam), BC_F_COPYIN|BC_F_SUSER},
343
344[BRDGGHT] = {bridge_ioctl_ght, sizeof(struct ifbrparam), BC_F_COPYOUT},
345[BRDGSHT] = {bridge_ioctl_sht, sizeof(struct ifbrparam), BC_F_COPYIN|BC_F_SUSER},
346
347[BRDGGFD] = {bridge_ioctl_gfd, sizeof(struct ifbrparam), BC_F_COPYOUT},
348[BRDGSFD] = {bridge_ioctl_sfd, sizeof(struct ifbrparam), BC_F_COPYIN|BC_F_SUSER},
349
350[BRDGGMA] = {bridge_ioctl_gma, sizeof(struct ifbrparam), BC_F_COPYOUT},
351[BRDGSMA] = {bridge_ioctl_sma, sizeof(struct ifbrparam), BC_F_COPYIN|BC_F_SUSER},
352
353[BRDGSIFPRIO] = {bridge_ioctl_sifprio, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_SUSER},
354
355[BRDGSIFCOST] = {bridge_ioctl_sifcost, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_SUSER},
356#if defined(BRIDGE_IPF)
357[BRDGGFILT] = {bridge_ioctl_gfilt, sizeof(struct ifbrparam), BC_F_COPYOUT},
358[BRDGSFILT] = {bridge_ioctl_sfilt, sizeof(struct ifbrparam), BC_F_COPYIN|BC_F_SUSER},
359#endif /* BRIDGE_IPF */
360[BRDGGIFS] = {bridge_ioctl_gifs, sizeof(struct ifbifconf), BC_F_XLATEIN|BC_F_XLATEOUT},
361[BRDGRTS] = {bridge_ioctl_rts, sizeof(struct ifbaconf), BC_F_XLATEIN|BC_F_XLATEOUT},
362};
363
364static const int bridge_control_table_size = __arraycount(bridge_control_table);
365
366static struct if_clone bridge_cloner =
367    IF_CLONE_INITIALIZER("bridge", bridge_clone_create, bridge_clone_destroy);
368
369/*
370 * bridgeattach:
371 *
372 *	Pseudo-device attach routine.
373 */
374void
375bridgeattach(int n)
376{
377
378	pool_init(&bridge_rtnode_pool, sizeof(struct bridge_rtnode),
379	    0, 0, 0, "brtpl", NULL, IPL_NET);
380
381	bridge_psref_class = psref_class_create("bridge", IPL_SOFTNET);
382
383	if_clone_attach(&bridge_cloner);
384}
385
386/*
387 * bridge_clone_create:
388 *
389 *	Create a new bridge instance.
390 */
391static int
392bridge_clone_create(struct if_clone *ifc, int unit)
393{
394	struct bridge_softc *sc;
395	struct ifnet *ifp;
396	int error;
397
398	sc = kmem_zalloc(sizeof(*sc),  KM_SLEEP);
399	ifp = &sc->sc_if;
400
401	sc->sc_brtmax = BRIDGE_RTABLE_MAX;
402	sc->sc_brttimeout = BRIDGE_RTABLE_TIMEOUT;
403	sc->sc_bridge_max_age = BSTP_DEFAULT_MAX_AGE;
404	sc->sc_bridge_hello_time = BSTP_DEFAULT_HELLO_TIME;
405	sc->sc_bridge_forward_delay = BSTP_DEFAULT_FORWARD_DELAY;
406	sc->sc_bridge_priority = BSTP_DEFAULT_BRIDGE_PRIORITY;
407	sc->sc_hold_time = BSTP_DEFAULT_HOLD_TIME;
408	sc->sc_filter_flags = 0;
409
410	/* Initialize our routing table. */
411	bridge_rtable_init(sc);
412
413	error = workqueue_create(&sc->sc_rtage_wq, "bridge_rtage",
414	    bridge_rtage_work, sc, PRI_SOFTNET, IPL_SOFTNET, WQ_MPSAFE);
415	if (error)
416		panic("%s: workqueue_create %d\n", __func__, error);
417
418	callout_init(&sc->sc_brcallout, 0);
419	callout_init(&sc->sc_bstpcallout, 0);
420
421	mutex_init(&sc->sc_iflist_psref.bip_lock, MUTEX_DEFAULT, IPL_NONE);
422	PSLIST_INIT(&sc->sc_iflist_psref.bip_iflist);
423	sc->sc_iflist_psref.bip_psz = pserialize_create();
424
425	if_initname(ifp, ifc->ifc_name, unit);
426	ifp->if_softc = sc;
427	ifp->if_extflags = IFEF_OUTPUT_MPSAFE;
428	ifp->if_mtu = ETHERMTU;
429	ifp->if_ioctl = bridge_ioctl;
430	ifp->if_output = bridge_output;
431	ifp->if_start = bridge_start;
432	ifp->if_stop = bridge_stop;
433	ifp->if_init = bridge_init;
434	ifp->if_type = IFT_BRIDGE;
435	ifp->if_addrlen = 0;
436	ifp->if_dlt = DLT_EN10MB;
437	ifp->if_hdrlen = ETHER_HDR_LEN;
438
439	error = if_initialize(ifp);
440	if (error != 0) {
441		pserialize_destroy(sc->sc_iflist_psref.bip_psz);
442		mutex_destroy(&sc->sc_iflist_psref.bip_lock);
443		callout_destroy(&sc->sc_brcallout);
444		callout_destroy(&sc->sc_bstpcallout);
445		workqueue_destroy(sc->sc_rtage_wq);
446		bridge_rtable_fini(sc);
447		kmem_free(sc, sizeof(*sc));
448
449		return error;
450	}
451	if_register(ifp);
452
453	if_alloc_sadl(ifp);
454
455	return 0;
456}
457
458/*
459 * bridge_clone_destroy:
460 *
461 *	Destroy a bridge instance.
462 */
463static int
464bridge_clone_destroy(struct ifnet *ifp)
465{
466	struct bridge_softc *sc = ifp->if_softc;
467	struct bridge_iflist *bif;
468	int s;
469
470	s = splsoftnet();
471
472	bridge_stop(ifp, 1);
473
474	BRIDGE_LOCK(sc);
475	for (;;) {
476		bif = PSLIST_WRITER_FIRST(&sc->sc_iflist_psref.bip_iflist, struct bridge_iflist,
477		    bif_next);
478		if (bif == NULL)
479			break;
480		bridge_delete_member(sc, bif);
481	}
482	PSLIST_DESTROY(&sc->sc_iflist_psref.bip_iflist);
483	BRIDGE_UNLOCK(sc);
484
485	splx(s);
486
487	if_detach(ifp);
488
489	/* Tear down the routing table. */
490	bridge_rtable_fini(sc);
491
492	pserialize_destroy(sc->sc_iflist_psref.bip_psz);
493	mutex_destroy(&sc->sc_iflist_psref.bip_lock);
494	callout_destroy(&sc->sc_brcallout);
495	callout_destroy(&sc->sc_bstpcallout);
496	workqueue_destroy(sc->sc_rtage_wq);
497	kmem_free(sc, sizeof(*sc));
498
499	return 0;
500}
501
502/*
503 * bridge_ioctl:
504 *
505 *	Handle a control request from the operator.
506 */
507static int
508bridge_ioctl(struct ifnet *ifp, u_long cmd, void *data)
509{
510	struct bridge_softc *sc = ifp->if_softc;
511	struct lwp *l = curlwp;	/* XXX */
512	union {
513		struct ifbreq ifbreq;
514		struct ifbifconf ifbifconf;
515		struct ifbareq ifbareq;
516		struct ifbaconf ifbaconf;
517		struct ifbrparam ifbrparam;
518	} args;
519	struct ifdrv *ifd = (struct ifdrv *) data;
520	const struct bridge_control *bc = NULL; /* XXXGCC */
521	int s, error = 0;
522
523	/* Authorize command before calling splsoftnet(). */
524	switch (cmd) {
525	case SIOCGDRVSPEC:
526	case SIOCSDRVSPEC:
527		if (ifd->ifd_cmd >= bridge_control_table_size
528		    || (bc = &bridge_control_table[ifd->ifd_cmd]) == NULL) {
529			error = EINVAL;
530			return error;
531		}
532
533		/* We only care about BC_F_SUSER at this point. */
534		if ((bc->bc_flags & BC_F_SUSER) == 0)
535			break;
536
537		error = kauth_authorize_network(l->l_cred,
538		    KAUTH_NETWORK_INTERFACE_BRIDGE,
539		    cmd == SIOCGDRVSPEC ?
540		     KAUTH_REQ_NETWORK_INTERFACE_BRIDGE_GETPRIV :
541		     KAUTH_REQ_NETWORK_INTERFACE_BRIDGE_SETPRIV,
542		     ifd, NULL, NULL);
543		if (error)
544			return error;
545
546		break;
547	}
548
549	s = splsoftnet();
550
551	switch (cmd) {
552	case SIOCGDRVSPEC:
553	case SIOCSDRVSPEC:
554		KASSERT(bc != NULL);
555		if (cmd == SIOCGDRVSPEC &&
556		    (bc->bc_flags & (BC_F_COPYOUT|BC_F_XLATEOUT)) == 0) {
557			error = EINVAL;
558			break;
559		}
560		else if (cmd == SIOCSDRVSPEC &&
561		    (bc->bc_flags & (BC_F_COPYOUT|BC_F_XLATEOUT)) != 0) {
562			error = EINVAL;
563			break;
564		}
565
566		/* BC_F_SUSER is checked above, before splsoftnet(). */
567
568		if ((bc->bc_flags & (BC_F_XLATEIN|BC_F_XLATEOUT)) == 0
569		    && (ifd->ifd_len != bc->bc_argsize
570			|| ifd->ifd_len > sizeof(args))) {
571			error = EINVAL;
572			break;
573		}
574
575		memset(&args, 0, sizeof(args));
576		if (bc->bc_flags & BC_F_COPYIN) {
577			error = copyin(ifd->ifd_data, &args, ifd->ifd_len);
578			if (error)
579				break;
580		} else if (bc->bc_flags & BC_F_XLATEIN) {
581			args.ifbifconf.ifbic_len = ifd->ifd_len;
582			args.ifbifconf.ifbic_buf = ifd->ifd_data;
583		}
584
585		error = (*bc->bc_func)(sc, &args);
586		if (error)
587			break;
588
589		if (bc->bc_flags & BC_F_COPYOUT) {
590			error = copyout(&args, ifd->ifd_data, ifd->ifd_len);
591		} else if (bc->bc_flags & BC_F_XLATEOUT) {
592			ifd->ifd_len = args.ifbifconf.ifbic_len;
593			ifd->ifd_data = args.ifbifconf.ifbic_buf;
594		}
595		break;
596
597	case SIOCSIFFLAGS:
598		if ((error = ifioctl_common(ifp, cmd, data)) != 0)
599			break;
600		switch (ifp->if_flags & (IFF_UP|IFF_RUNNING)) {
601		case IFF_RUNNING:
602			/*
603			 * If interface is marked down and it is running,
604			 * then stop and disable it.
605			 */
606			(*ifp->if_stop)(ifp, 1);
607			break;
608		case IFF_UP:
609			/*
610			 * If interface is marked up and it is stopped, then
611			 * start it.
612			 */
613			error = (*ifp->if_init)(ifp);
614			break;
615		default:
616			break;
617		}
618		break;
619
620	case SIOCSIFMTU:
621		if ((error = ifioctl_common(ifp, cmd, data)) == ENETRESET)
622			error = 0;
623		break;
624
625	default:
626		error = ifioctl_common(ifp, cmd, data);
627		break;
628	}
629
630	splx(s);
631
632	return error;
633}
634
635/*
636 * bridge_lookup_member:
637 *
638 *	Lookup a bridge member interface.
639 */
640static struct bridge_iflist *
641bridge_lookup_member(struct bridge_softc *sc, const char *name, struct psref *psref)
642{
643	struct bridge_iflist *bif;
644	struct ifnet *ifp;
645	int s;
646
647	BRIDGE_PSZ_RENTER(s);
648
649	BRIDGE_IFLIST_READER_FOREACH(bif, sc) {
650		ifp = bif->bif_ifp;
651		if (strcmp(ifp->if_xname, name) == 0)
652			break;
653	}
654	if (bif != NULL)
655		bridge_acquire_member(sc, bif, psref);
656
657	BRIDGE_PSZ_REXIT(s);
658
659	return bif;
660}
661
662/*
663 * bridge_lookup_member_if:
664 *
665 *	Lookup a bridge member interface by ifnet*.
666 */
667static struct bridge_iflist *
668bridge_lookup_member_if(struct bridge_softc *sc, struct ifnet *member_ifp,
669    struct psref *psref)
670{
671	struct bridge_iflist *bif;
672	int s;
673
674	BRIDGE_PSZ_RENTER(s);
675
676	bif = member_ifp->if_bridgeif;
677	if (bif != NULL) {
678		psref_acquire(psref, &bif->bif_psref,
679		    bridge_psref_class);
680	}
681
682	BRIDGE_PSZ_REXIT(s);
683
684	return bif;
685}
686
687static void
688bridge_acquire_member(struct bridge_softc *sc, struct bridge_iflist *bif,
689    struct psref *psref)
690{
691
692	psref_acquire(psref, &bif->bif_psref, bridge_psref_class);
693}
694
695/*
696 * bridge_release_member:
697 *
698 *	Release the specified member interface.
699 */
700static void
701bridge_release_member(struct bridge_softc *sc, struct bridge_iflist *bif,
702    struct psref *psref)
703{
704
705	psref_release(psref, &bif->bif_psref, bridge_psref_class);
706}
707
708/*
709 * bridge_delete_member:
710 *
711 *	Delete the specified member interface.
712 */
713static void
714bridge_delete_member(struct bridge_softc *sc, struct bridge_iflist *bif)
715{
716	struct ifnet *ifs = bif->bif_ifp;
717
718	KASSERT(BRIDGE_LOCKED(sc));
719
720	ifs->_if_input = ether_input;
721	ifs->if_bridge = NULL;
722	ifs->if_bridgeif = NULL;
723
724	PSLIST_WRITER_REMOVE(bif, bif_next);
725	BRIDGE_PSZ_PERFORM(sc);
726	BRIDGE_UNLOCK(sc);
727
728	psref_target_destroy(&bif->bif_psref, bridge_psref_class);
729
730	PSLIST_ENTRY_DESTROY(bif, bif_next);
731	kmem_free(bif, sizeof(*bif));
732
733	BRIDGE_LOCK(sc);
734}
735
736static int
737bridge_ioctl_add(struct bridge_softc *sc, void *arg)
738{
739	struct ifbreq *req = arg;
740	struct bridge_iflist *bif = NULL;
741	struct ifnet *ifs;
742	int error = 0;
743	struct psref psref;
744
745	ifs = if_get(req->ifbr_ifsname, &psref);
746	if (ifs == NULL)
747		return ENOENT;
748
749	if (ifs->if_bridge == sc) {
750		error = EEXIST;
751		goto out;
752	}
753
754	if (ifs->if_bridge != NULL) {
755		error = EBUSY;
756		goto out;
757	}
758
759	if (ifs->_if_input != ether_input) {
760		error = EINVAL;
761		goto out;
762	}
763
764	/* FIXME: doesn't work with non-IFF_SIMPLEX interfaces */
765	if ((ifs->if_flags & IFF_SIMPLEX) == 0) {
766		error = EINVAL;
767		goto out;
768	}
769
770	bif = kmem_alloc(sizeof(*bif), KM_SLEEP);
771
772	switch (ifs->if_type) {
773	case IFT_ETHER:
774		if (sc->sc_if.if_mtu != ifs->if_mtu) {
775			error = EINVAL;
776			goto out;
777		}
778		/* FALLTHROUGH */
779	case IFT_L2TP:
780		if ((error = ether_enable_vlan_mtu(ifs)) > 0)
781			goto out;
782		/*
783		 * Place the interface into promiscuous mode.
784		 */
785		error = ifpromisc(ifs, 1);
786		if (error)
787			goto out;
788		break;
789	default:
790		error = EINVAL;
791		goto out;
792	}
793
794	bif->bif_ifp = ifs;
795	bif->bif_flags = IFBIF_LEARNING | IFBIF_DISCOVER;
796	bif->bif_priority = BSTP_DEFAULT_PORT_PRIORITY;
797	bif->bif_path_cost = BSTP_DEFAULT_PATH_COST;
798	PSLIST_ENTRY_INIT(bif, bif_next);
799	psref_target_init(&bif->bif_psref, bridge_psref_class);
800
801	BRIDGE_LOCK(sc);
802
803	ifs->if_bridge = sc;
804	ifs->if_bridgeif = bif;
805	PSLIST_WRITER_INSERT_HEAD(&sc->sc_iflist_psref.bip_iflist, bif, bif_next);
806	ifs->_if_input = bridge_input;
807
808	BRIDGE_UNLOCK(sc);
809
810	if (sc->sc_if.if_flags & IFF_RUNNING)
811		bstp_initialization(sc);
812	else
813		bstp_stop(sc);
814
815 out:
816	if_put(ifs, &psref);
817	if (error) {
818		if (bif != NULL)
819			kmem_free(bif, sizeof(*bif));
820	}
821	return error;
822}
823
824static int
825bridge_ioctl_del(struct bridge_softc *sc, void *arg)
826{
827	struct ifbreq *req = arg;
828	const char *name = req->ifbr_ifsname;
829	struct bridge_iflist *bif;
830	struct ifnet *ifs;
831
832	BRIDGE_LOCK(sc);
833
834	/*
835	 * Don't use bridge_lookup_member. We want to get a member
836	 * with bif_refs == 0.
837	 */
838	BRIDGE_IFLIST_WRITER_FOREACH(bif, sc) {
839		ifs = bif->bif_ifp;
840		if (strcmp(ifs->if_xname, name) == 0)
841			break;
842	}
843
844	if (bif == NULL) {
845		BRIDGE_UNLOCK(sc);
846		return ENOENT;
847	}
848
849	bridge_delete_member(sc, bif);
850
851	BRIDGE_UNLOCK(sc);
852
853	switch (ifs->if_type) {
854	case IFT_ETHER:
855	case IFT_L2TP:
856		/*
857		 * Take the interface out of promiscuous mode.
858		 * Don't call it with holding a spin lock.
859		 */
860		(void) ifpromisc(ifs, 0);
861		(void) ether_disable_vlan_mtu(ifs);
862		break;
863	default:
864#ifdef DIAGNOSTIC
865		panic("bridge_delete_member: impossible");
866#endif
867		break;
868	}
869
870	bridge_rtdelete(sc, ifs);
871
872	if (sc->sc_if.if_flags & IFF_RUNNING)
873		bstp_initialization(sc);
874
875	return 0;
876}
877
878static int
879bridge_ioctl_gifflags(struct bridge_softc *sc, void *arg)
880{
881	struct ifbreq *req = arg;
882	struct bridge_iflist *bif;
883	struct psref psref;
884
885	bif = bridge_lookup_member(sc, req->ifbr_ifsname, &psref);
886	if (bif == NULL)
887		return ENOENT;
888
889	req->ifbr_ifsflags = bif->bif_flags;
890	req->ifbr_state = bif->bif_state;
891	req->ifbr_priority = bif->bif_priority;
892	req->ifbr_path_cost = bif->bif_path_cost;
893	req->ifbr_portno = bif->bif_ifp->if_index & 0xff;
894
895	bridge_release_member(sc, bif, &psref);
896
897	return 0;
898}
899
900static int
901bridge_ioctl_sifflags(struct bridge_softc *sc, void *arg)
902{
903	struct ifbreq *req = arg;
904	struct bridge_iflist *bif;
905	struct psref psref;
906
907	bif = bridge_lookup_member(sc, req->ifbr_ifsname, &psref);
908	if (bif == NULL)
909		return ENOENT;
910
911	if (req->ifbr_ifsflags & IFBIF_STP) {
912		switch (bif->bif_ifp->if_type) {
913		case IFT_ETHER:
914		case IFT_L2TP:
915			/* These can do spanning tree. */
916			break;
917
918		default:
919			/* Nothing else can. */
920			bridge_release_member(sc, bif, &psref);
921			return EINVAL;
922		}
923	}
924
925	bif->bif_flags = req->ifbr_ifsflags;
926
927	bridge_release_member(sc, bif, &psref);
928
929	if (sc->sc_if.if_flags & IFF_RUNNING)
930		bstp_initialization(sc);
931
932	return 0;
933}
934
935static int
936bridge_ioctl_scache(struct bridge_softc *sc, void *arg)
937{
938	struct ifbrparam *param = arg;
939
940	sc->sc_brtmax = param->ifbrp_csize;
941	bridge_rttrim(sc);
942
943	return 0;
944}
945
946static int
947bridge_ioctl_gcache(struct bridge_softc *sc, void *arg)
948{
949	struct ifbrparam *param = arg;
950
951	param->ifbrp_csize = sc->sc_brtmax;
952
953	return 0;
954}
955
956static int
957bridge_ioctl_gifs(struct bridge_softc *sc, void *arg)
958{
959	struct ifbifconf *bifc = arg;
960	struct bridge_iflist *bif;
961	struct ifbreq *breqs;
962	int i, count, error = 0;
963
964retry:
965	BRIDGE_LOCK(sc);
966	count = 0;
967	BRIDGE_IFLIST_WRITER_FOREACH(bif, sc)
968		count++;
969	BRIDGE_UNLOCK(sc);
970
971	if (count == 0) {
972		bifc->ifbic_len = 0;
973		return 0;
974	}
975
976	if (bifc->ifbic_len == 0 || bifc->ifbic_len < (sizeof(*breqs) * count)) {
977		/* Tell that a larger buffer is needed */
978		bifc->ifbic_len = sizeof(*breqs) * count;
979		return 0;
980	}
981
982	breqs = kmem_alloc(sizeof(*breqs) * count, KM_SLEEP);
983
984	BRIDGE_LOCK(sc);
985
986	i = 0;
987	BRIDGE_IFLIST_WRITER_FOREACH(bif, sc)
988		i++;
989	if (i > count) {
990		/*
991		 * The number of members has been increased.
992		 * We need more memory!
993		 */
994		BRIDGE_UNLOCK(sc);
995		kmem_free(breqs, sizeof(*breqs) * count);
996		goto retry;
997	}
998
999	i = 0;
1000	BRIDGE_IFLIST_WRITER_FOREACH(bif, sc) {
1001		struct ifbreq *breq = &breqs[i++];
1002		memset(breq, 0, sizeof(*breq));
1003
1004		strlcpy(breq->ifbr_ifsname, bif->bif_ifp->if_xname,
1005		    sizeof(breq->ifbr_ifsname));
1006		breq->ifbr_ifsflags = bif->bif_flags;
1007		breq->ifbr_state = bif->bif_state;
1008		breq->ifbr_priority = bif->bif_priority;
1009		breq->ifbr_path_cost = bif->bif_path_cost;
1010		breq->ifbr_portno = bif->bif_ifp->if_index & 0xff;
1011	}
1012
1013	/* Don't call copyout with holding the mutex */
1014	BRIDGE_UNLOCK(sc);
1015
1016	for (i = 0; i < count; i++) {
1017		error = copyout(&breqs[i], bifc->ifbic_req + i, sizeof(*breqs));
1018		if (error)
1019			break;
1020	}
1021	bifc->ifbic_len = sizeof(*breqs) * i;
1022
1023	kmem_free(breqs, sizeof(*breqs) * count);
1024
1025	return error;
1026}
1027
1028static int
1029bridge_ioctl_rts(struct bridge_softc *sc, void *arg)
1030{
1031	struct ifbaconf *bac = arg;
1032	struct bridge_rtnode *brt;
1033	struct ifbareq bareq;
1034	int count = 0, error = 0, len;
1035
1036	if (bac->ifbac_len == 0)
1037		return 0;
1038
1039	BRIDGE_RT_LOCK(sc);
1040
1041	len = bac->ifbac_len;
1042	LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) {
1043		if (len < sizeof(bareq))
1044			goto out;
1045		memset(&bareq, 0, sizeof(bareq));
1046		strlcpy(bareq.ifba_ifsname, brt->brt_ifp->if_xname,
1047		    sizeof(bareq.ifba_ifsname));
1048		memcpy(bareq.ifba_dst, brt->brt_addr, sizeof(brt->brt_addr));
1049		if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
1050			bareq.ifba_expire = brt->brt_expire - time_uptime;
1051		} else
1052			bareq.ifba_expire = 0;
1053		bareq.ifba_flags = brt->brt_flags;
1054
1055		error = copyout(&bareq, bac->ifbac_req + count, sizeof(bareq));
1056		if (error)
1057			goto out;
1058		count++;
1059		len -= sizeof(bareq);
1060	}
1061 out:
1062	BRIDGE_RT_UNLOCK(sc);
1063
1064	bac->ifbac_len = sizeof(bareq) * count;
1065	return error;
1066}
1067
1068static int
1069bridge_ioctl_saddr(struct bridge_softc *sc, void *arg)
1070{
1071	struct ifbareq *req = arg;
1072	struct bridge_iflist *bif;
1073	int error;
1074	struct psref psref;
1075
1076	bif = bridge_lookup_member(sc, req->ifba_ifsname, &psref);
1077	if (bif == NULL)
1078		return ENOENT;
1079
1080	error = bridge_rtupdate(sc, req->ifba_dst, bif->bif_ifp, 1,
1081	    req->ifba_flags);
1082
1083	bridge_release_member(sc, bif, &psref);
1084
1085	return error;
1086}
1087
1088static int
1089bridge_ioctl_sto(struct bridge_softc *sc, void *arg)
1090{
1091	struct ifbrparam *param = arg;
1092
1093	sc->sc_brttimeout = param->ifbrp_ctime;
1094
1095	return 0;
1096}
1097
1098static int
1099bridge_ioctl_gto(struct bridge_softc *sc, void *arg)
1100{
1101	struct ifbrparam *param = arg;
1102
1103	param->ifbrp_ctime = sc->sc_brttimeout;
1104
1105	return 0;
1106}
1107
1108static int
1109bridge_ioctl_daddr(struct bridge_softc *sc, void *arg)
1110{
1111	struct ifbareq *req = arg;
1112
1113	return (bridge_rtdaddr(sc, req->ifba_dst));
1114}
1115
1116static int
1117bridge_ioctl_flush(struct bridge_softc *sc, void *arg)
1118{
1119	struct ifbreq *req = arg;
1120
1121	bridge_rtflush(sc, req->ifbr_ifsflags);
1122
1123	return 0;
1124}
1125
1126static int
1127bridge_ioctl_gpri(struct bridge_softc *sc, void *arg)
1128{
1129	struct ifbrparam *param = arg;
1130
1131	param->ifbrp_prio = sc->sc_bridge_priority;
1132
1133	return 0;
1134}
1135
1136static int
1137bridge_ioctl_spri(struct bridge_softc *sc, void *arg)
1138{
1139	struct ifbrparam *param = arg;
1140
1141	sc->sc_bridge_priority = param->ifbrp_prio;
1142
1143	if (sc->sc_if.if_flags & IFF_RUNNING)
1144		bstp_initialization(sc);
1145
1146	return 0;
1147}
1148
1149static int
1150bridge_ioctl_ght(struct bridge_softc *sc, void *arg)
1151{
1152	struct ifbrparam *param = arg;
1153
1154	param->ifbrp_hellotime = sc->sc_bridge_hello_time >> 8;
1155
1156	return 0;
1157}
1158
1159static int
1160bridge_ioctl_sht(struct bridge_softc *sc, void *arg)
1161{
1162	struct ifbrparam *param = arg;
1163
1164	if (param->ifbrp_hellotime == 0)
1165		return EINVAL;
1166	sc->sc_bridge_hello_time = param->ifbrp_hellotime << 8;
1167
1168	if (sc->sc_if.if_flags & IFF_RUNNING)
1169		bstp_initialization(sc);
1170
1171	return 0;
1172}
1173
1174static int
1175bridge_ioctl_gfd(struct bridge_softc *sc, void *arg)
1176{
1177	struct ifbrparam *param = arg;
1178
1179	param->ifbrp_fwddelay = sc->sc_bridge_forward_delay >> 8;
1180
1181	return 0;
1182}
1183
1184static int
1185bridge_ioctl_sfd(struct bridge_softc *sc, void *arg)
1186{
1187	struct ifbrparam *param = arg;
1188
1189	if (param->ifbrp_fwddelay == 0)
1190		return EINVAL;
1191	sc->sc_bridge_forward_delay = param->ifbrp_fwddelay << 8;
1192
1193	if (sc->sc_if.if_flags & IFF_RUNNING)
1194		bstp_initialization(sc);
1195
1196	return 0;
1197}
1198
1199static int
1200bridge_ioctl_gma(struct bridge_softc *sc, void *arg)
1201{
1202	struct ifbrparam *param = arg;
1203
1204	param->ifbrp_maxage = sc->sc_bridge_max_age >> 8;
1205
1206	return 0;
1207}
1208
1209static int
1210bridge_ioctl_sma(struct bridge_softc *sc, void *arg)
1211{
1212	struct ifbrparam *param = arg;
1213
1214	if (param->ifbrp_maxage == 0)
1215		return EINVAL;
1216	sc->sc_bridge_max_age = param->ifbrp_maxage << 8;
1217
1218	if (sc->sc_if.if_flags & IFF_RUNNING)
1219		bstp_initialization(sc);
1220
1221	return 0;
1222}
1223
1224static int
1225bridge_ioctl_sifprio(struct bridge_softc *sc, void *arg)
1226{
1227	struct ifbreq *req = arg;
1228	struct bridge_iflist *bif;
1229	struct psref psref;
1230
1231	bif = bridge_lookup_member(sc, req->ifbr_ifsname, &psref);
1232	if (bif == NULL)
1233		return ENOENT;
1234
1235	bif->bif_priority = req->ifbr_priority;
1236
1237	if (sc->sc_if.if_flags & IFF_RUNNING)
1238		bstp_initialization(sc);
1239
1240	bridge_release_member(sc, bif, &psref);
1241
1242	return 0;
1243}
1244
1245#if defined(BRIDGE_IPF)
1246static int
1247bridge_ioctl_gfilt(struct bridge_softc *sc, void *arg)
1248{
1249	struct ifbrparam *param = arg;
1250
1251	param->ifbrp_filter = sc->sc_filter_flags;
1252
1253	return 0;
1254}
1255
1256static int
1257bridge_ioctl_sfilt(struct bridge_softc *sc, void *arg)
1258{
1259	struct ifbrparam *param = arg;
1260	uint32_t nflags, oflags;
1261
1262	if (param->ifbrp_filter & ~IFBF_FILT_MASK)
1263		return EINVAL;
1264
1265	nflags = param->ifbrp_filter;
1266	oflags = sc->sc_filter_flags;
1267
1268	if ((nflags & IFBF_FILT_USEIPF) && !(oflags & IFBF_FILT_USEIPF)) {
1269		pfil_add_hook((void *)bridge_ipf, NULL, PFIL_IN|PFIL_OUT,
1270			sc->sc_if.if_pfil);
1271	}
1272	if (!(nflags & IFBF_FILT_USEIPF) && (oflags & IFBF_FILT_USEIPF)) {
1273		pfil_remove_hook((void *)bridge_ipf, NULL, PFIL_IN|PFIL_OUT,
1274			sc->sc_if.if_pfil);
1275	}
1276
1277	sc->sc_filter_flags = nflags;
1278
1279	return 0;
1280}
1281#endif /* BRIDGE_IPF */
1282
1283static int
1284bridge_ioctl_sifcost(struct bridge_softc *sc, void *arg)
1285{
1286	struct ifbreq *req = arg;
1287	struct bridge_iflist *bif;
1288	struct psref psref;
1289
1290	bif = bridge_lookup_member(sc, req->ifbr_ifsname, &psref);
1291	if (bif == NULL)
1292		return ENOENT;
1293
1294	bif->bif_path_cost = req->ifbr_path_cost;
1295
1296	if (sc->sc_if.if_flags & IFF_RUNNING)
1297		bstp_initialization(sc);
1298
1299	bridge_release_member(sc, bif, &psref);
1300
1301	return 0;
1302}
1303
1304/*
1305 * bridge_ifdetach:
1306 *
1307 *	Detach an interface from a bridge.  Called when a member
1308 *	interface is detaching.
1309 */
1310void
1311bridge_ifdetach(struct ifnet *ifp)
1312{
1313	struct bridge_softc *sc = ifp->if_bridge;
1314	struct ifbreq breq;
1315
1316	/* ioctl_lock should prevent this from happening */
1317	KASSERT(sc != NULL);
1318
1319	memset(&breq, 0, sizeof(breq));
1320	strlcpy(breq.ifbr_ifsname, ifp->if_xname, sizeof(breq.ifbr_ifsname));
1321
1322	(void) bridge_ioctl_del(sc, &breq);
1323}
1324
1325/*
1326 * bridge_init:
1327 *
1328 *	Initialize a bridge interface.
1329 */
1330static int
1331bridge_init(struct ifnet *ifp)
1332{
1333	struct bridge_softc *sc = ifp->if_softc;
1334
1335	if (ifp->if_flags & IFF_RUNNING)
1336		return 0;
1337
1338	callout_reset(&sc->sc_brcallout, bridge_rtable_prune_period * hz,
1339	    bridge_timer, sc);
1340
1341	ifp->if_flags |= IFF_RUNNING;
1342	bstp_initialization(sc);
1343	return 0;
1344}
1345
1346/*
1347 * bridge_stop:
1348 *
1349 *	Stop the bridge interface.
1350 */
1351static void
1352bridge_stop(struct ifnet *ifp, int disable)
1353{
1354	struct bridge_softc *sc = ifp->if_softc;
1355
1356	if ((ifp->if_flags & IFF_RUNNING) == 0)
1357		return;
1358
1359	callout_stop(&sc->sc_brcallout);
1360	bstp_stop(sc);
1361
1362	bridge_rtflush(sc, IFBF_FLUSHDYN);
1363
1364	ifp->if_flags &= ~IFF_RUNNING;
1365}
1366
1367/*
1368 * bridge_enqueue:
1369 *
1370 *	Enqueue a packet on a bridge member interface.
1371 */
1372void
1373bridge_enqueue(struct bridge_softc *sc, struct ifnet *dst_ifp, struct mbuf *m,
1374    int runfilt)
1375{
1376	int len, error;
1377	short mflags;
1378
1379	/*
1380	 * Clear any in-bound checksum flags for this packet.
1381	 */
1382	m->m_pkthdr.csum_flags = 0;
1383
1384	if (runfilt) {
1385		if (pfil_run_hooks(sc->sc_if.if_pfil, &m,
1386		    dst_ifp, PFIL_OUT) != 0) {
1387			if (m != NULL)
1388				m_freem(m);
1389			return;
1390		}
1391		if (m == NULL)
1392			return;
1393	}
1394
1395#ifdef ALTQ
1396	KERNEL_LOCK(1, NULL);
1397	/*
1398	 * If ALTQ is enabled on the member interface, do
1399	 * classification; the queueing discipline might
1400	 * not require classification, but might require
1401	 * the address family/header pointer in the pktattr.
1402	 */
1403	if (ALTQ_IS_ENABLED(&dst_ifp->if_snd)) {
1404		/* XXX IFT_ETHER */
1405		altq_etherclassify(&dst_ifp->if_snd, m);
1406	}
1407	KERNEL_UNLOCK_ONE(NULL);
1408#endif /* ALTQ */
1409
1410	len = m->m_pkthdr.len;
1411	mflags = m->m_flags;
1412
1413	error = if_transmit_lock(dst_ifp, m);
1414	if (error) {
1415		/* mbuf is already freed */
1416		sc->sc_if.if_oerrors++;
1417		return;
1418	}
1419
1420	sc->sc_if.if_opackets++;
1421	sc->sc_if.if_obytes += len;
1422	if (mflags & M_MCAST)
1423		sc->sc_if.if_omcasts++;
1424}
1425
1426/*
1427 * bridge_output:
1428 *
1429 *	Send output from a bridge member interface.  This
1430 *	performs the bridging function for locally originated
1431 *	packets.
1432 *
1433 *	The mbuf has the Ethernet header already attached.  We must
1434 *	enqueue or free the mbuf before returning.
1435 */
1436int
1437bridge_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *sa,
1438    const struct rtentry *rt)
1439{
1440	struct ether_header *eh;
1441	struct ifnet *dst_if;
1442	struct bridge_softc *sc;
1443	int s;
1444
1445	/*
1446	 * bridge_output() is called from ether_output(), furthermore
1447	 * ifp argument doesn't point to bridge(4). So, don't assert
1448	 * IFEF_OUTPUT_MPSAFE here.
1449	 */
1450
1451	if (m->m_len < ETHER_HDR_LEN) {
1452		m = m_pullup(m, ETHER_HDR_LEN);
1453		if (m == NULL)
1454			return 0;
1455	}
1456
1457	eh = mtod(m, struct ether_header *);
1458	sc = ifp->if_bridge;
1459
1460	if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
1461		if (memcmp(etherbroadcastaddr,
1462		    eh->ether_dhost, ETHER_ADDR_LEN) == 0)
1463			m->m_flags |= M_BCAST;
1464		else
1465			m->m_flags |= M_MCAST;
1466	}
1467
1468	/*
1469	 * If bridge is down, but the original output interface is up,
1470	 * go ahead and send out that interface.  Otherwise, the packet
1471	 * is dropped below.
1472	 */
1473	if (__predict_false(sc == NULL) ||
1474	    (sc->sc_if.if_flags & IFF_RUNNING) == 0) {
1475		dst_if = ifp;
1476		goto sendunicast;
1477	}
1478
1479	/*
1480	 * If the packet is a multicast, or we don't know a better way to
1481	 * get there, send to all interfaces.
1482	 */
1483	if ((m->m_flags & (M_MCAST | M_BCAST)) != 0)
1484		dst_if = NULL;
1485	else
1486		dst_if = bridge_rtlookup(sc, eh->ether_dhost);
1487	if (dst_if == NULL) {
1488		/* XXX Should call bridge_broadcast, but there are locking
1489		 * issues which need resolving first. */
1490		struct bridge_iflist *bif;
1491		struct mbuf *mc;
1492		bool used = false;
1493
1494		BRIDGE_PSZ_RENTER(s);
1495		BRIDGE_IFLIST_READER_FOREACH(bif, sc) {
1496			struct psref psref;
1497
1498			bridge_acquire_member(sc, bif, &psref);
1499			BRIDGE_PSZ_REXIT(s);
1500
1501			dst_if = bif->bif_ifp;
1502			if ((dst_if->if_flags & IFF_RUNNING) == 0)
1503				goto next;
1504
1505			/*
1506			 * If this is not the original output interface,
1507			 * and the interface is participating in spanning
1508			 * tree, make sure the port is in a state that
1509			 * allows forwarding.
1510			 */
1511			if (dst_if != ifp &&
1512			    (bif->bif_flags & IFBIF_STP) != 0) {
1513				switch (bif->bif_state) {
1514				case BSTP_IFSTATE_BLOCKING:
1515				case BSTP_IFSTATE_LISTENING:
1516				case BSTP_IFSTATE_DISABLED:
1517					goto next;
1518				}
1519			}
1520
1521			if (PSLIST_READER_NEXT(bif, struct bridge_iflist,
1522			    bif_next) == NULL &&
1523			    ((m->m_flags & (M_MCAST | M_BCAST)) == 0 ||
1524			    dst_if == ifp))
1525			{
1526				used = true;
1527				mc = m;
1528			} else {
1529				mc = m_copym(m, 0, M_COPYALL, M_NOWAIT);
1530				if (mc == NULL) {
1531					sc->sc_if.if_oerrors++;
1532					goto next;
1533				}
1534			}
1535
1536			bridge_enqueue(sc, dst_if, mc, 0);
1537
1538			if ((m->m_flags & (M_MCAST | M_BCAST)) != 0 &&
1539			    dst_if != ifp)
1540			{
1541				if (PSLIST_READER_NEXT(bif,
1542				    struct bridge_iflist, bif_next) == NULL)
1543				{
1544					used = true;
1545					mc = m;
1546				} else {
1547					mc = m_copym(m, 0, M_COPYALL,
1548					    M_DONTWAIT);
1549					if (mc == NULL) {
1550						sc->sc_if.if_oerrors++;
1551						goto next;
1552					}
1553				}
1554
1555				m_set_rcvif(mc, dst_if);
1556				mc->m_flags &= ~M_PROMISC;
1557
1558#ifndef NET_MPSAFE
1559				s = splsoftnet();
1560#endif
1561				ether_input(dst_if, mc);
1562#ifndef NET_MPSAFE
1563				splx(s);
1564#endif
1565			}
1566
1567next:
1568			BRIDGE_PSZ_RENTER(s);
1569			bridge_release_member(sc, bif, &psref);
1570
1571			/* Guarantee we don't re-enter the loop as we already
1572			 * decided we're at the end. */
1573			if (used)
1574				break;
1575		}
1576		BRIDGE_PSZ_REXIT(s);
1577
1578		if (!used)
1579			m_freem(m);
1580		return 0;
1581	}
1582
1583 sendunicast:
1584	/*
1585	 * XXX Spanning tree consideration here?
1586	 */
1587
1588	if ((dst_if->if_flags & IFF_RUNNING) == 0) {
1589		m_freem(m);
1590		return 0;
1591	}
1592
1593	bridge_enqueue(sc, dst_if, m, 0);
1594
1595	return 0;
1596}
1597
1598/*
1599 * bridge_start:
1600 *
1601 *	Start output on a bridge.
1602 *
1603 *	NOTE: This routine should never be called in this implementation.
1604 */
1605static void
1606bridge_start(struct ifnet *ifp)
1607{
1608
1609	printf("%s: bridge_start() called\n", ifp->if_xname);
1610}
1611
1612/*
1613 * bridge_forward:
1614 *
1615 *	The forwarding function of the bridge.
1616 */
1617static void
1618bridge_forward(struct bridge_softc *sc, struct mbuf *m)
1619{
1620	struct bridge_iflist *bif;
1621	struct ifnet *src_if, *dst_if;
1622	struct ether_header *eh;
1623	struct psref psref;
1624	struct psref psref_src;
1625	DECLARE_LOCK_VARIABLE;
1626
1627	if ((sc->sc_if.if_flags & IFF_RUNNING) == 0)
1628		return;
1629
1630	src_if = m_get_rcvif_psref(m, &psref_src);
1631	if (src_if == NULL) {
1632		/* Interface is being destroyed? */
1633		m_freem(m);
1634		goto out;
1635	}
1636
1637	sc->sc_if.if_ipackets++;
1638	sc->sc_if.if_ibytes += m->m_pkthdr.len;
1639
1640	/*
1641	 * Look up the bridge_iflist.
1642	 */
1643	bif = bridge_lookup_member_if(sc, src_if, &psref);
1644	if (bif == NULL) {
1645		/* Interface is not a bridge member (anymore?) */
1646		m_freem(m);
1647		goto out;
1648	}
1649
1650	if (bif->bif_flags & IFBIF_STP) {
1651		switch (bif->bif_state) {
1652		case BSTP_IFSTATE_BLOCKING:
1653		case BSTP_IFSTATE_LISTENING:
1654		case BSTP_IFSTATE_DISABLED:
1655			m_freem(m);
1656			bridge_release_member(sc, bif, &psref);
1657			goto out;
1658		}
1659	}
1660
1661	eh = mtod(m, struct ether_header *);
1662
1663	/*
1664	 * If the interface is learning, and the source
1665	 * address is valid and not multicast, record
1666	 * the address.
1667	 */
1668	if ((bif->bif_flags & IFBIF_LEARNING) != 0 &&
1669	    ETHER_IS_MULTICAST(eh->ether_shost) == 0 &&
1670	    (eh->ether_shost[0] == 0 &&
1671	     eh->ether_shost[1] == 0 &&
1672	     eh->ether_shost[2] == 0 &&
1673	     eh->ether_shost[3] == 0 &&
1674	     eh->ether_shost[4] == 0 &&
1675	     eh->ether_shost[5] == 0) == 0) {
1676		(void) bridge_rtupdate(sc, eh->ether_shost,
1677		    src_if, 0, IFBAF_DYNAMIC);
1678	}
1679
1680	if ((bif->bif_flags & IFBIF_STP) != 0 &&
1681	    bif->bif_state == BSTP_IFSTATE_LEARNING) {
1682		m_freem(m);
1683		bridge_release_member(sc, bif, &psref);
1684		goto out;
1685	}
1686
1687	bridge_release_member(sc, bif, &psref);
1688
1689	/*
1690	 * At this point, the port either doesn't participate
1691	 * in spanning tree or it is in the forwarding state.
1692	 */
1693
1694	/*
1695	 * If the packet is unicast, destined for someone on
1696	 * "this" side of the bridge, drop it.
1697	 */
1698	if ((m->m_flags & (M_BCAST|M_MCAST)) == 0) {
1699		dst_if = bridge_rtlookup(sc, eh->ether_dhost);
1700		if (src_if == dst_if) {
1701			m_freem(m);
1702			goto out;
1703		}
1704	} else {
1705		/* ...forward it to all interfaces. */
1706		sc->sc_if.if_imcasts++;
1707		dst_if = NULL;
1708	}
1709
1710	if (pfil_run_hooks(sc->sc_if.if_pfil, &m, src_if, PFIL_IN) != 0) {
1711		if (m != NULL)
1712			m_freem(m);
1713		goto out;
1714	}
1715	if (m == NULL)
1716		goto out;
1717
1718	if (dst_if == NULL) {
1719		bridge_broadcast(sc, src_if, m);
1720		goto out;
1721	}
1722
1723	m_put_rcvif_psref(src_if, &psref_src);
1724	src_if = NULL;
1725
1726	/*
1727	 * At this point, we're dealing with a unicast frame
1728	 * going to a different interface.
1729	 */
1730	if ((dst_if->if_flags & IFF_RUNNING) == 0) {
1731		m_freem(m);
1732		goto out;
1733	}
1734
1735	bif = bridge_lookup_member_if(sc, dst_if, &psref);
1736	if (bif == NULL) {
1737		/* Not a member of the bridge (anymore?) */
1738		m_freem(m);
1739		goto out;
1740	}
1741
1742	if (bif->bif_flags & IFBIF_STP) {
1743		switch (bif->bif_state) {
1744		case BSTP_IFSTATE_DISABLED:
1745		case BSTP_IFSTATE_BLOCKING:
1746			m_freem(m);
1747			bridge_release_member(sc, bif, &psref);
1748			goto out;
1749		}
1750	}
1751
1752	bridge_release_member(sc, bif, &psref);
1753
1754	ACQUIRE_GLOBAL_LOCKS();
1755	bridge_enqueue(sc, dst_if, m, 1);
1756	RELEASE_GLOBAL_LOCKS();
1757out:
1758	if (src_if != NULL)
1759		m_put_rcvif_psref(src_if, &psref_src);
1760	return;
1761}
1762
1763static bool
1764bstp_state_before_learning(struct bridge_iflist *bif)
1765{
1766	if (bif->bif_flags & IFBIF_STP) {
1767		switch (bif->bif_state) {
1768		case BSTP_IFSTATE_BLOCKING:
1769		case BSTP_IFSTATE_LISTENING:
1770		case BSTP_IFSTATE_DISABLED:
1771			return true;
1772		}
1773	}
1774	return false;
1775}
1776
1777static bool
1778bridge_ourether(struct bridge_iflist *bif, struct ether_header *eh, int src)
1779{
1780	uint8_t *ether = src ? eh->ether_shost : eh->ether_dhost;
1781
1782	if (memcmp(CLLADDR(bif->bif_ifp->if_sadl), ether, ETHER_ADDR_LEN) == 0
1783#if NCARP > 0
1784	    || (bif->bif_ifp->if_carp &&
1785	        carp_ourether(bif->bif_ifp->if_carp, eh, IFT_ETHER, src) != NULL)
1786#endif /* NCARP > 0 */
1787	    )
1788		return true;
1789
1790	return false;
1791}
1792
1793/*
1794 * bridge_input:
1795 *
1796 *	Receive input from a member interface.  Queue the packet for
1797 *	bridging if it is not for us.
1798 */
1799static void
1800bridge_input(struct ifnet *ifp, struct mbuf *m)
1801{
1802	struct bridge_softc *sc = ifp->if_bridge;
1803	struct bridge_iflist *bif;
1804	struct ether_header *eh;
1805	struct psref psref;
1806	int bound;
1807	DECLARE_LOCK_VARIABLE;
1808
1809	KASSERT(!cpu_intr_p());
1810
1811	if (__predict_false(sc == NULL) ||
1812	    (sc->sc_if.if_flags & IFF_RUNNING) == 0) {
1813		ACQUIRE_GLOBAL_LOCKS();
1814		ether_input(ifp, m);
1815		RELEASE_GLOBAL_LOCKS();
1816		return;
1817	}
1818
1819	bound = curlwp_bind();
1820	bif = bridge_lookup_member_if(sc, ifp, &psref);
1821	if (bif == NULL) {
1822		curlwp_bindx(bound);
1823		ACQUIRE_GLOBAL_LOCKS();
1824		ether_input(ifp, m);
1825		RELEASE_GLOBAL_LOCKS();
1826		return;
1827	}
1828
1829	eh = mtod(m, struct ether_header *);
1830
1831	if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
1832		if (memcmp(etherbroadcastaddr,
1833		    eh->ether_dhost, ETHER_ADDR_LEN) == 0)
1834			m->m_flags |= M_BCAST;
1835		else
1836			m->m_flags |= M_MCAST;
1837	}
1838
1839	/*
1840	 * A 'fast' path for packets addressed to interfaces that are
1841	 * part of this bridge.
1842	 */
1843	if (!(m->m_flags & (M_BCAST|M_MCAST)) &&
1844	    !bstp_state_before_learning(bif)) {
1845		struct bridge_iflist *_bif;
1846		struct ifnet *_ifp = NULL;
1847		int s;
1848		struct psref _psref;
1849
1850		BRIDGE_PSZ_RENTER(s);
1851		BRIDGE_IFLIST_READER_FOREACH(_bif, sc) {
1852			/* It is destined for us. */
1853			if (bridge_ourether(_bif, eh, 0)) {
1854				bridge_acquire_member(sc, _bif, &_psref);
1855				BRIDGE_PSZ_REXIT(s);
1856				if (_bif->bif_flags & IFBIF_LEARNING)
1857					(void) bridge_rtupdate(sc,
1858					    eh->ether_shost, ifp, 0, IFBAF_DYNAMIC);
1859				m_set_rcvif(m, _bif->bif_ifp);
1860				_ifp = _bif->bif_ifp;
1861				bridge_release_member(sc, _bif, &_psref);
1862				goto out;
1863			}
1864
1865			/* We just received a packet that we sent out. */
1866			if (bridge_ourether(_bif, eh, 1))
1867				break;
1868		}
1869		BRIDGE_PSZ_REXIT(s);
1870out:
1871
1872		if (_bif != NULL) {
1873			bridge_release_member(sc, bif, &psref);
1874			curlwp_bindx(bound);
1875			if (_ifp != NULL) {
1876				m->m_flags &= ~M_PROMISC;
1877				ACQUIRE_GLOBAL_LOCKS();
1878				ether_input(_ifp, m);
1879				RELEASE_GLOBAL_LOCKS();
1880			} else
1881				m_freem(m);
1882			return;
1883		}
1884	}
1885
1886	/* Tap off 802.1D packets; they do not get forwarded. */
1887	if (bif->bif_flags & IFBIF_STP &&
1888	    memcmp(eh->ether_dhost, bstp_etheraddr, ETHER_ADDR_LEN) == 0) {
1889		bstp_input(sc, bif, m);
1890		bridge_release_member(sc, bif, &psref);
1891		curlwp_bindx(bound);
1892		return;
1893	}
1894
1895	/*
1896	 * A normal switch would discard the packet here, but that's not what
1897	 * we've done historically. This also prevents some obnoxious behaviour.
1898	 */
1899	if (bstp_state_before_learning(bif)) {
1900		bridge_release_member(sc, bif, &psref);
1901		curlwp_bindx(bound);
1902		ACQUIRE_GLOBAL_LOCKS();
1903		ether_input(ifp, m);
1904		RELEASE_GLOBAL_LOCKS();
1905		return;
1906	}
1907
1908	bridge_release_member(sc, bif, &psref);
1909
1910	bridge_forward(sc, m);
1911
1912	curlwp_bindx(bound);
1913}
1914
1915/*
1916 * bridge_broadcast:
1917 *
1918 *	Send a frame to all interfaces that are members of
1919 *	the bridge, except for the one on which the packet
1920 *	arrived.
1921 */
1922static void
1923bridge_broadcast(struct bridge_softc *sc, struct ifnet *src_if,
1924    struct mbuf *m)
1925{
1926	struct bridge_iflist *bif;
1927	struct mbuf *mc;
1928	struct ifnet *dst_if;
1929	bool bmcast;
1930	int s;
1931	DECLARE_LOCK_VARIABLE;
1932
1933	bmcast = m->m_flags & (M_BCAST|M_MCAST);
1934
1935	BRIDGE_PSZ_RENTER(s);
1936	BRIDGE_IFLIST_READER_FOREACH(bif, sc) {
1937		struct psref psref;
1938
1939		bridge_acquire_member(sc, bif, &psref);
1940		BRIDGE_PSZ_REXIT(s);
1941
1942		dst_if = bif->bif_ifp;
1943
1944		if (bif->bif_flags & IFBIF_STP) {
1945			switch (bif->bif_state) {
1946			case BSTP_IFSTATE_BLOCKING:
1947			case BSTP_IFSTATE_DISABLED:
1948				goto next;
1949			}
1950		}
1951
1952		if ((bif->bif_flags & IFBIF_DISCOVER) == 0 && !bmcast)
1953			goto next;
1954
1955		if ((dst_if->if_flags & IFF_RUNNING) == 0)
1956			goto next;
1957
1958		if (dst_if != src_if) {
1959			mc = m_copym(m, 0, M_COPYALL, M_DONTWAIT);
1960			if (mc == NULL) {
1961				sc->sc_if.if_oerrors++;
1962				goto next;
1963			}
1964			ACQUIRE_GLOBAL_LOCKS();
1965			bridge_enqueue(sc, dst_if, mc, 1);
1966			RELEASE_GLOBAL_LOCKS();
1967		}
1968
1969		if (bmcast) {
1970			mc = m_copym(m, 0, M_COPYALL, M_DONTWAIT);
1971			if (mc == NULL) {
1972				sc->sc_if.if_oerrors++;
1973				goto next;
1974			}
1975
1976			m_set_rcvif(mc, dst_if);
1977			mc->m_flags &= ~M_PROMISC;
1978
1979			ACQUIRE_GLOBAL_LOCKS();
1980			ether_input(dst_if, mc);
1981			RELEASE_GLOBAL_LOCKS();
1982		}
1983next:
1984		BRIDGE_PSZ_RENTER(s);
1985		bridge_release_member(sc, bif, &psref);
1986	}
1987	BRIDGE_PSZ_REXIT(s);
1988
1989	m_freem(m);
1990}
1991
1992static int
1993bridge_rtalloc(struct bridge_softc *sc, const uint8_t *dst,
1994    struct bridge_rtnode **brtp)
1995{
1996	struct bridge_rtnode *brt;
1997	int error;
1998
1999	if (sc->sc_brtcnt >= sc->sc_brtmax)
2000		return ENOSPC;
2001
2002	/*
2003	 * Allocate a new bridge forwarding node, and
2004	 * initialize the expiration time and Ethernet
2005	 * address.
2006	 */
2007	brt = pool_get(&bridge_rtnode_pool, PR_NOWAIT);
2008	if (brt == NULL)
2009		return ENOMEM;
2010
2011	memset(brt, 0, sizeof(*brt));
2012	brt->brt_expire = time_uptime + sc->sc_brttimeout;
2013	brt->brt_flags = IFBAF_DYNAMIC;
2014	memcpy(brt->brt_addr, dst, ETHER_ADDR_LEN);
2015
2016	BRIDGE_RT_LOCK(sc);
2017	error = bridge_rtnode_insert(sc, brt);
2018	BRIDGE_RT_UNLOCK(sc);
2019
2020	if (error != 0) {
2021		pool_put(&bridge_rtnode_pool, brt);
2022		return error;
2023	}
2024
2025	*brtp = brt;
2026	return 0;
2027}
2028
2029/*
2030 * bridge_rtupdate:
2031 *
2032 *	Add a bridge routing entry.
2033 */
2034static int
2035bridge_rtupdate(struct bridge_softc *sc, const uint8_t *dst,
2036    struct ifnet *dst_if, int setflags, uint8_t flags)
2037{
2038	struct bridge_rtnode *brt;
2039	int s;
2040
2041again:
2042	/*
2043	 * A route for this destination might already exist.  If so,
2044	 * update it, otherwise create a new one.
2045	 */
2046	BRIDGE_RT_RENTER(s);
2047	brt = bridge_rtnode_lookup(sc, dst);
2048
2049	if (brt != NULL) {
2050		brt->brt_ifp = dst_if;
2051		if (setflags) {
2052			brt->brt_flags = flags;
2053			if (flags & IFBAF_STATIC)
2054				brt->brt_expire = 0;
2055			else
2056				brt->brt_expire = time_uptime + sc->sc_brttimeout;
2057		} else {
2058			if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC)
2059				brt->brt_expire = time_uptime + sc->sc_brttimeout;
2060		}
2061	}
2062	BRIDGE_RT_REXIT(s);
2063
2064	if (brt == NULL) {
2065		int r;
2066
2067		r = bridge_rtalloc(sc, dst, &brt);
2068		if (r != 0)
2069			return r;
2070		goto again;
2071	}
2072
2073	return 0;
2074}
2075
2076/*
2077 * bridge_rtlookup:
2078 *
2079 *	Lookup the destination interface for an address.
2080 */
2081static struct ifnet *
2082bridge_rtlookup(struct bridge_softc *sc, const uint8_t *addr)
2083{
2084	struct bridge_rtnode *brt;
2085	struct ifnet *ifs = NULL;
2086	int s;
2087
2088	BRIDGE_RT_RENTER(s);
2089	brt = bridge_rtnode_lookup(sc, addr);
2090	if (brt != NULL)
2091		ifs = brt->brt_ifp;
2092	BRIDGE_RT_REXIT(s);
2093
2094	return ifs;
2095}
2096
2097typedef bool (*bridge_iterate_cb_t)
2098    (struct bridge_softc *, struct bridge_rtnode *, bool *, void *);
2099
2100/*
2101 * bridge_rtlist_iterate_remove:
2102 *
2103 *	It iterates on sc->sc_rtlist and removes rtnodes of it which func
2104 *	callback judges to remove. Removals of rtnodes are done in a manner
2105 *	of pserialize. To this end, all kmem_* operations are placed out of
2106 *	mutexes.
2107 */
2108static void
2109bridge_rtlist_iterate_remove(struct bridge_softc *sc, bridge_iterate_cb_t func, void *arg)
2110{
2111	struct bridge_rtnode *brt, *nbrt;
2112	struct bridge_rtnode **brt_list;
2113	int i, count;
2114
2115retry:
2116	count = sc->sc_brtcnt;
2117	if (count == 0)
2118		return;
2119	brt_list = kmem_alloc(sizeof(*brt_list) * count, KM_SLEEP);
2120
2121	BRIDGE_RT_LOCK(sc);
2122	if (__predict_false(sc->sc_brtcnt > count)) {
2123		/* The rtnodes increased, we need more memory */
2124		BRIDGE_RT_UNLOCK(sc);
2125		kmem_free(brt_list, sizeof(*brt_list) * count);
2126		goto retry;
2127	}
2128
2129	i = 0;
2130	LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
2131		bool need_break = false;
2132		if (func(sc, brt, &need_break, arg)) {
2133			bridge_rtnode_remove(sc, brt);
2134			brt_list[i++] = brt;
2135		}
2136		if (need_break)
2137			break;
2138	}
2139
2140	if (i > 0)
2141		BRIDGE_RT_PSZ_PERFORM(sc);
2142	BRIDGE_RT_UNLOCK(sc);
2143
2144	while (--i >= 0)
2145		bridge_rtnode_destroy(brt_list[i]);
2146
2147	kmem_free(brt_list, sizeof(*brt_list) * count);
2148}
2149
2150static bool
2151bridge_rttrim0_cb(struct bridge_softc *sc, struct bridge_rtnode *brt,
2152    bool *need_break, void *arg)
2153{
2154	if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
2155		/* Take into account of the subsequent removal */
2156		if ((sc->sc_brtcnt - 1) <= sc->sc_brtmax)
2157			*need_break = true;
2158		return true;
2159	} else
2160		return false;
2161}
2162
2163static void
2164bridge_rttrim0(struct bridge_softc *sc)
2165{
2166	bridge_rtlist_iterate_remove(sc, bridge_rttrim0_cb, NULL);
2167}
2168
2169/*
2170 * bridge_rttrim:
2171 *
2172 *	Trim the routine table so that we have a number
2173 *	of routing entries less than or equal to the
2174 *	maximum number.
2175 */
2176static void
2177bridge_rttrim(struct bridge_softc *sc)
2178{
2179
2180	/* Make sure we actually need to do this. */
2181	if (sc->sc_brtcnt <= sc->sc_brtmax)
2182		return;
2183
2184	/* Force an aging cycle; this might trim enough addresses. */
2185	bridge_rtage(sc);
2186	if (sc->sc_brtcnt <= sc->sc_brtmax)
2187		return;
2188
2189	bridge_rttrim0(sc);
2190
2191	return;
2192}
2193
2194/*
2195 * bridge_timer:
2196 *
2197 *	Aging timer for the bridge.
2198 */
2199static void
2200bridge_timer(void *arg)
2201{
2202	struct bridge_softc *sc = arg;
2203
2204	workqueue_enqueue(sc->sc_rtage_wq, &sc->sc_rtage_wk, NULL);
2205}
2206
2207static void
2208bridge_rtage_work(struct work *wk, void *arg)
2209{
2210	struct bridge_softc *sc = arg;
2211
2212	KASSERT(wk == &sc->sc_rtage_wk);
2213
2214	bridge_rtage(sc);
2215
2216	if (sc->sc_if.if_flags & IFF_RUNNING)
2217		callout_reset(&sc->sc_brcallout,
2218		    bridge_rtable_prune_period * hz, bridge_timer, sc);
2219}
2220
2221static bool
2222bridge_rtage_cb(struct bridge_softc *sc, struct bridge_rtnode *brt,
2223    bool *need_break, void *arg)
2224{
2225	if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC &&
2226	    time_uptime >= brt->brt_expire)
2227		return true;
2228	else
2229		return false;
2230}
2231
2232/*
2233 * bridge_rtage:
2234 *
2235 *	Perform an aging cycle.
2236 */
2237static void
2238bridge_rtage(struct bridge_softc *sc)
2239{
2240	bridge_rtlist_iterate_remove(sc, bridge_rtage_cb, NULL);
2241}
2242
2243
2244static bool
2245bridge_rtflush_cb(struct bridge_softc *sc, struct bridge_rtnode *brt,
2246    bool *need_break, void *arg)
2247{
2248	int full = *(int*)arg;
2249
2250	if (full || (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC)
2251		return true;
2252	else
2253		return false;
2254}
2255
2256/*
2257 * bridge_rtflush:
2258 *
2259 *	Remove all dynamic addresses from the bridge.
2260 */
2261static void
2262bridge_rtflush(struct bridge_softc *sc, int full)
2263{
2264	bridge_rtlist_iterate_remove(sc, bridge_rtflush_cb, &full);
2265}
2266
2267/*
2268 * bridge_rtdaddr:
2269 *
2270 *	Remove an address from the table.
2271 */
2272static int
2273bridge_rtdaddr(struct bridge_softc *sc, const uint8_t *addr)
2274{
2275	struct bridge_rtnode *brt;
2276
2277	BRIDGE_RT_LOCK(sc);
2278	if ((brt = bridge_rtnode_lookup(sc, addr)) == NULL) {
2279		BRIDGE_RT_UNLOCK(sc);
2280		return ENOENT;
2281	}
2282	bridge_rtnode_remove(sc, brt);
2283	BRIDGE_RT_PSZ_PERFORM(sc);
2284	BRIDGE_RT_UNLOCK(sc);
2285
2286	bridge_rtnode_destroy(brt);
2287
2288	return 0;
2289}
2290
2291/*
2292 * bridge_rtdelete:
2293 *
2294 *	Delete routes to a speicifc member interface.
2295 */
2296static void
2297bridge_rtdelete(struct bridge_softc *sc, struct ifnet *ifp)
2298{
2299	struct bridge_rtnode *brt;
2300
2301	BRIDGE_RT_LOCK(sc);
2302	LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) {
2303		if (brt->brt_ifp == ifp)
2304			break;
2305	}
2306	if (brt == NULL) {
2307		BRIDGE_RT_UNLOCK(sc);
2308		return;
2309	}
2310	bridge_rtnode_remove(sc, brt);
2311	BRIDGE_RT_PSZ_PERFORM(sc);
2312	BRIDGE_RT_UNLOCK(sc);
2313
2314	bridge_rtnode_destroy(brt);
2315}
2316
2317/*
2318 * bridge_rtable_init:
2319 *
2320 *	Initialize the route table for this bridge.
2321 */
2322static void
2323bridge_rtable_init(struct bridge_softc *sc)
2324{
2325	int i;
2326
2327	sc->sc_rthash = kmem_alloc(sizeof(*sc->sc_rthash) * BRIDGE_RTHASH_SIZE,
2328	    KM_SLEEP);
2329
2330	for (i = 0; i < BRIDGE_RTHASH_SIZE; i++)
2331		LIST_INIT(&sc->sc_rthash[i]);
2332
2333	sc->sc_rthash_key = cprng_fast32();
2334
2335	LIST_INIT(&sc->sc_rtlist);
2336
2337	sc->sc_rtlist_psz = pserialize_create();
2338	sc->sc_rtlist_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_SOFTNET);
2339}
2340
2341/*
2342 * bridge_rtable_fini:
2343 *
2344 *	Deconstruct the route table for this bridge.
2345 */
2346static void
2347bridge_rtable_fini(struct bridge_softc *sc)
2348{
2349
2350	kmem_free(sc->sc_rthash, sizeof(*sc->sc_rthash) * BRIDGE_RTHASH_SIZE);
2351	if (sc->sc_rtlist_lock)
2352		mutex_obj_free(sc->sc_rtlist_lock);
2353	if (sc->sc_rtlist_psz)
2354		pserialize_destroy(sc->sc_rtlist_psz);
2355}
2356
2357/*
2358 * The following hash function is adapted from "Hash Functions" by Bob Jenkins
2359 * ("Algorithm Alley", Dr. Dobbs Journal, September 1997).
2360 */
2361#define	mix(a, b, c)							\
2362do {									\
2363	a -= b; a -= c; a ^= (c >> 13);					\
2364	b -= c; b -= a; b ^= (a << 8);					\
2365	c -= a; c -= b; c ^= (b >> 13);					\
2366	a -= b; a -= c; a ^= (c >> 12);					\
2367	b -= c; b -= a; b ^= (a << 16);					\
2368	c -= a; c -= b; c ^= (b >> 5);					\
2369	a -= b; a -= c; a ^= (c >> 3);					\
2370	b -= c; b -= a; b ^= (a << 10);					\
2371	c -= a; c -= b; c ^= (b >> 15);					\
2372} while (/*CONSTCOND*/0)
2373
2374static inline uint32_t
2375bridge_rthash(struct bridge_softc *sc, const uint8_t *addr)
2376{
2377	uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = sc->sc_rthash_key;
2378
2379	b += addr[5] << 8;
2380	b += addr[4];
2381	a += addr[3] << 24;
2382	a += addr[2] << 16;
2383	a += addr[1] << 8;
2384	a += addr[0];
2385
2386	mix(a, b, c);
2387
2388	return (c & BRIDGE_RTHASH_MASK);
2389}
2390
2391#undef mix
2392
2393/*
2394 * bridge_rtnode_lookup:
2395 *
2396 *	Look up a bridge route node for the specified destination.
2397 */
2398static struct bridge_rtnode *
2399bridge_rtnode_lookup(struct bridge_softc *sc, const uint8_t *addr)
2400{
2401	struct bridge_rtnode *brt;
2402	uint32_t hash;
2403	int dir;
2404
2405	hash = bridge_rthash(sc, addr);
2406	LIST_FOREACH(brt, &sc->sc_rthash[hash], brt_hash) {
2407		dir = memcmp(addr, brt->brt_addr, ETHER_ADDR_LEN);
2408		if (dir == 0)
2409			return brt;
2410		if (dir > 0)
2411			return NULL;
2412	}
2413
2414	return NULL;
2415}
2416
2417/*
2418 * bridge_rtnode_insert:
2419 *
2420 *	Insert the specified bridge node into the route table.  We
2421 *	assume the entry is not already in the table.
2422 */
2423static int
2424bridge_rtnode_insert(struct bridge_softc *sc, struct bridge_rtnode *brt)
2425{
2426	struct bridge_rtnode *lbrt;
2427	uint32_t hash;
2428	int dir;
2429
2430	KASSERT(BRIDGE_RT_LOCKED(sc));
2431
2432	hash = bridge_rthash(sc, brt->brt_addr);
2433
2434	lbrt = LIST_FIRST(&sc->sc_rthash[hash]);
2435	if (lbrt == NULL) {
2436		LIST_INSERT_HEAD(&sc->sc_rthash[hash], brt, brt_hash);
2437		goto out;
2438	}
2439
2440	do {
2441		dir = memcmp(brt->brt_addr, lbrt->brt_addr, ETHER_ADDR_LEN);
2442		if (dir == 0)
2443			return EEXIST;
2444		if (dir > 0) {
2445			LIST_INSERT_BEFORE(lbrt, brt, brt_hash);
2446			goto out;
2447		}
2448		if (LIST_NEXT(lbrt, brt_hash) == NULL) {
2449			LIST_INSERT_AFTER(lbrt, brt, brt_hash);
2450			goto out;
2451		}
2452		lbrt = LIST_NEXT(lbrt, brt_hash);
2453	} while (lbrt != NULL);
2454
2455#ifdef DIAGNOSTIC
2456	panic("bridge_rtnode_insert: impossible");
2457#endif
2458
2459 out:
2460	LIST_INSERT_HEAD(&sc->sc_rtlist, brt, brt_list);
2461	sc->sc_brtcnt++;
2462
2463	return 0;
2464}
2465
2466/*
2467 * bridge_rtnode_remove:
2468 *
2469 *	Remove a bridge rtnode from the rthash and the rtlist of a bridge.
2470 */
2471static void
2472bridge_rtnode_remove(struct bridge_softc *sc, struct bridge_rtnode *brt)
2473{
2474
2475	KASSERT(BRIDGE_RT_LOCKED(sc));
2476
2477	LIST_REMOVE(brt, brt_hash);
2478	LIST_REMOVE(brt, brt_list);
2479	sc->sc_brtcnt--;
2480}
2481
2482/*
2483 * bridge_rtnode_destroy:
2484 *
2485 *	Destroy a bridge rtnode.
2486 */
2487static void
2488bridge_rtnode_destroy(struct bridge_rtnode *brt)
2489{
2490
2491	pool_put(&bridge_rtnode_pool, brt);
2492}
2493
2494#if defined(BRIDGE_IPF)
2495extern pfil_head_t *inet_pfil_hook;                 /* XXX */
2496extern pfil_head_t *inet6_pfil_hook;                /* XXX */
2497
2498/*
2499 * Send bridge packets through IPF if they are one of the types IPF can deal
2500 * with, or if they are ARP or REVARP.  (IPF will pass ARP and REVARP without
2501 * question.)
2502 */
2503static int
2504bridge_ipf(void *arg, struct mbuf **mp, struct ifnet *ifp, int dir)
2505{
2506	int snap, error;
2507	struct ether_header *eh1, eh2;
2508	struct llc llc1;
2509	uint16_t ether_type;
2510
2511	snap = 0;
2512	error = -1;	/* Default error if not error == 0 */
2513	eh1 = mtod(*mp, struct ether_header *);
2514	ether_type = ntohs(eh1->ether_type);
2515
2516	/*
2517	 * Check for SNAP/LLC.
2518	 */
2519	if (ether_type < ETHERMTU) {
2520		struct llc *llc2 = (struct llc *)(eh1 + 1);
2521
2522		if ((*mp)->m_len >= ETHER_HDR_LEN + 8 &&
2523		    llc2->llc_dsap == LLC_SNAP_LSAP &&
2524		    llc2->llc_ssap == LLC_SNAP_LSAP &&
2525		    llc2->llc_control == LLC_UI) {
2526			ether_type = htons(llc2->llc_un.type_snap.ether_type);
2527			snap = 1;
2528		}
2529	}
2530
2531	/*
2532	 * If we're trying to filter bridge traffic, don't look at anything
2533	 * other than IP and ARP traffic.  If the filter doesn't understand
2534	 * IPv6, don't allow IPv6 through the bridge either.  This is lame
2535	 * since if we really wanted, say, an AppleTalk filter, we are hosed,
2536	 * but of course we don't have an AppleTalk filter to begin with.
2537	 * (Note that since IPF doesn't understand ARP it will pass *ALL*
2538	 * ARP traffic.)
2539	 */
2540	switch (ether_type) {
2541		case ETHERTYPE_ARP:
2542		case ETHERTYPE_REVARP:
2543			return 0; /* Automatically pass */
2544		case ETHERTYPE_IP:
2545# ifdef INET6
2546		case ETHERTYPE_IPV6:
2547# endif /* INET6 */
2548			break;
2549		default:
2550			goto bad;
2551	}
2552
2553	/* Strip off the Ethernet header and keep a copy. */
2554	m_copydata(*mp, 0, ETHER_HDR_LEN, (void *) &eh2);
2555	m_adj(*mp, ETHER_HDR_LEN);
2556
2557	/* Strip off snap header, if present */
2558	if (snap) {
2559		m_copydata(*mp, 0, sizeof(struct llc), (void *) &llc1);
2560		m_adj(*mp, sizeof(struct llc));
2561	}
2562
2563	/*
2564	 * Check basic packet sanity and run IPF through pfil.
2565	 */
2566	KASSERT(!cpu_intr_p());
2567	switch (ether_type)
2568	{
2569	case ETHERTYPE_IP :
2570		error = bridge_ip_checkbasic(mp);
2571		if (error == 0)
2572			error = pfil_run_hooks(inet_pfil_hook, mp, ifp, dir);
2573		break;
2574# ifdef INET6
2575	case ETHERTYPE_IPV6 :
2576		error = bridge_ip6_checkbasic(mp);
2577		if (error == 0)
2578			error = pfil_run_hooks(inet6_pfil_hook, mp, ifp, dir);
2579		break;
2580# endif
2581	default :
2582		error = 0;
2583		break;
2584	}
2585
2586	if (*mp == NULL)
2587		return error;
2588	if (error != 0)
2589		goto bad;
2590
2591	error = -1;
2592
2593	/*
2594	 * Finally, put everything back the way it was and return
2595	 */
2596	if (snap) {
2597		M_PREPEND(*mp, sizeof(struct llc), M_DONTWAIT);
2598		if (*mp == NULL)
2599			return error;
2600		bcopy(&llc1, mtod(*mp, void *), sizeof(struct llc));
2601	}
2602
2603	M_PREPEND(*mp, ETHER_HDR_LEN, M_DONTWAIT);
2604	if (*mp == NULL)
2605		return error;
2606	bcopy(&eh2, mtod(*mp, void *), ETHER_HDR_LEN);
2607
2608	return 0;
2609
2610    bad:
2611	m_freem(*mp);
2612	*mp = NULL;
2613	return error;
2614}
2615
2616/*
2617 * Perform basic checks on header size since
2618 * IPF assumes ip_input has already processed
2619 * it for it.  Cut-and-pasted from ip_input.c.
2620 * Given how simple the IPv6 version is,
2621 * does the IPv4 version really need to be
2622 * this complicated?
2623 *
2624 * XXX Should we update ipstat here, or not?
2625 * XXX Right now we update ipstat but not
2626 * XXX csum_counter.
2627 */
2628static int
2629bridge_ip_checkbasic(struct mbuf **mp)
2630{
2631	struct mbuf *m = *mp;
2632	struct ip *ip;
2633	int len, hlen;
2634
2635	if (*mp == NULL)
2636		return -1;
2637
2638	if (IP_HDR_ALIGNED_P(mtod(m, void *)) == 0) {
2639		if ((m = m_copyup(m, sizeof(struct ip),
2640			(max_linkhdr + 3) & ~3)) == NULL) {
2641			/* XXXJRT new stat, please */
2642			ip_statinc(IP_STAT_TOOSMALL);
2643			goto bad;
2644		}
2645	} else if (__predict_false(m->m_len < sizeof (struct ip))) {
2646		if ((m = m_pullup(m, sizeof (struct ip))) == NULL) {
2647			ip_statinc(IP_STAT_TOOSMALL);
2648			goto bad;
2649		}
2650	}
2651	ip = mtod(m, struct ip *);
2652	if (ip == NULL) goto bad;
2653
2654	if (ip->ip_v != IPVERSION) {
2655		ip_statinc(IP_STAT_BADVERS);
2656		goto bad;
2657	}
2658	hlen = ip->ip_hl << 2;
2659	if (hlen < sizeof(struct ip)) { /* minimum header length */
2660		ip_statinc(IP_STAT_BADHLEN);
2661		goto bad;
2662	}
2663	if (hlen > m->m_len) {
2664		if ((m = m_pullup(m, hlen)) == 0) {
2665			ip_statinc(IP_STAT_BADHLEN);
2666			goto bad;
2667		}
2668		ip = mtod(m, struct ip *);
2669		if (ip == NULL) goto bad;
2670	}
2671
2672	switch (m->m_pkthdr.csum_flags &
2673	        ((m_get_rcvif_NOMPSAFE(m)->if_csum_flags_rx & M_CSUM_IPv4) |
2674	         M_CSUM_IPv4_BAD)) {
2675	case M_CSUM_IPv4|M_CSUM_IPv4_BAD:
2676		/* INET_CSUM_COUNTER_INCR(&ip_hwcsum_bad); */
2677		goto bad;
2678
2679	case M_CSUM_IPv4:
2680		/* Checksum was okay. */
2681		/* INET_CSUM_COUNTER_INCR(&ip_hwcsum_ok); */
2682		break;
2683
2684	default:
2685		/* Must compute it ourselves. */
2686		/* INET_CSUM_COUNTER_INCR(&ip_swcsum); */
2687		if (in_cksum(m, hlen) != 0)
2688			goto bad;
2689		break;
2690	}
2691
2692	/* Retrieve the packet length. */
2693	len = ntohs(ip->ip_len);
2694
2695	/*
2696	 * Check for additional length bogosity
2697	 */
2698	if (len < hlen) {
2699		ip_statinc(IP_STAT_BADLEN);
2700		goto bad;
2701	}
2702
2703	/*
2704	 * Check that the amount of data in the buffers
2705	 * is as at least much as the IP header would have us expect.
2706	 * Drop packet if shorter than we expect.
2707	 */
2708	if (m->m_pkthdr.len < len) {
2709		ip_statinc(IP_STAT_TOOSHORT);
2710		goto bad;
2711	}
2712
2713	/* Checks out, proceed */
2714	*mp = m;
2715	return 0;
2716
2717    bad:
2718	*mp = m;
2719	return -1;
2720}
2721
2722# ifdef INET6
2723/*
2724 * Same as above, but for IPv6.
2725 * Cut-and-pasted from ip6_input.c.
2726 * XXX Should we update ip6stat, or not?
2727 */
2728static int
2729bridge_ip6_checkbasic(struct mbuf **mp)
2730{
2731	struct mbuf *m = *mp;
2732	struct ip6_hdr *ip6;
2733
2734	/*
2735	 * If the IPv6 header is not aligned, slurp it up into a new
2736	 * mbuf with space for link headers, in the event we forward
2737	 * it.  Otherwise, if it is aligned, make sure the entire base
2738	 * IPv6 header is in the first mbuf of the chain.
2739	 */
2740	if (IP6_HDR_ALIGNED_P(mtod(m, void *)) == 0) {
2741		struct ifnet *inifp = m_get_rcvif_NOMPSAFE(m);
2742		if ((m = m_copyup(m, sizeof(struct ip6_hdr),
2743		                  (max_linkhdr + 3) & ~3)) == NULL) {
2744			/* XXXJRT new stat, please */
2745			ip6_statinc(IP6_STAT_TOOSMALL);
2746			in6_ifstat_inc(inifp, ifs6_in_hdrerr);
2747			goto bad;
2748		}
2749	} else if (__predict_false(m->m_len < sizeof(struct ip6_hdr))) {
2750		struct ifnet *inifp = m_get_rcvif_NOMPSAFE(m);
2751		if ((m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) {
2752			ip6_statinc(IP6_STAT_TOOSMALL);
2753			in6_ifstat_inc(inifp, ifs6_in_hdrerr);
2754			goto bad;
2755		}
2756	}
2757
2758	ip6 = mtod(m, struct ip6_hdr *);
2759
2760	if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
2761		ip6_statinc(IP6_STAT_BADVERS);
2762		in6_ifstat_inc(m_get_rcvif_NOMPSAFE(m), ifs6_in_hdrerr);
2763		goto bad;
2764	}
2765
2766	/* Checks out, proceed */
2767	*mp = m;
2768	return 0;
2769
2770    bad:
2771	*mp = m;
2772	return -1;
2773}
2774# endif /* INET6 */
2775#endif /* BRIDGE_IPF */
2776