if_vxlan.c revision 284365
1/*-
2 * Copyright (c) 2014, Bryan Venteicher <bryanv@FreeBSD.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice unmodified, this list of conditions, and the following
10 *    disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27#include "opt_inet.h"
28#include "opt_inet6.h"
29
30#include <sys/cdefs.h>
31__FBSDID("$FreeBSD: stable/10/sys/net/if_vxlan.c 284365 2015-06-14 03:14:45Z bryanv $");
32
33#include <sys/param.h>
34#include <sys/eventhandler.h>
35#include <sys/kernel.h>
36#include <sys/lock.h>
37#include <sys/hash.h>
38#include <sys/malloc.h>
39#include <sys/mbuf.h>
40#include <sys/module.h>
41#include <sys/refcount.h>
42#include <sys/rmlock.h>
43#include <sys/priv.h>
44#include <sys/proc.h>
45#include <sys/queue.h>
46#include <sys/sbuf.h>
47#include <sys/socket.h>
48#include <sys/socketvar.h>
49#include <sys/sockio.h>
50#include <sys/sysctl.h>
51#include <sys/systm.h>
52
53#include <net/bpf.h>
54#include <net/ethernet.h>
55#include <net/if.h>
56#include <net/if_var.h>
57#include <net/if_clone.h>
58#include <net/if_dl.h>
59#include <net/if_types.h>
60#include <net/if_vxlan.h>
61#include <net/netisr.h>
62
63#include <netinet/in.h>
64#include <netinet/in_systm.h>
65#include <netinet/in_var.h>
66#include <netinet/in_pcb.h>
67#include <netinet/ip.h>
68#include <netinet/ip6.h>
69#include <netinet/ip_var.h>
70#include <netinet6/ip6_var.h>
71#include <netinet/udp.h>
72#include <netinet/udp_var.h>
73
74struct vxlan_softc;
75LIST_HEAD(vxlan_softc_head, vxlan_softc);
76
77struct vxlan_socket_mc_info {
78	union vxlan_sockaddr		 vxlsomc_saddr;
79	union vxlan_sockaddr		 vxlsomc_gaddr;
80	int				 vxlsomc_ifidx;
81	int				 vxlsomc_users;
82};
83
84#define VXLAN_SO_MC_MAX_GROUPS		32
85
86#define VXLAN_SO_VNI_HASH_SHIFT		6
87#define VXLAN_SO_VNI_HASH_SIZE		(1 << VXLAN_SO_VNI_HASH_SHIFT)
88#define VXLAN_SO_VNI_HASH(_vni)		((_vni) % VXLAN_SO_VNI_HASH_SIZE)
89
90struct vxlan_socket {
91	struct socket			*vxlso_sock;
92	struct rmlock			 vxlso_lock;
93	u_int				 vxlso_refcnt;
94	union vxlan_sockaddr		 vxlso_laddr;
95	LIST_ENTRY(vxlan_socket)	 vxlso_entry;
96	struct vxlan_softc_head		 vxlso_vni_hash[VXLAN_SO_VNI_HASH_SIZE];
97	struct vxlan_socket_mc_info	 vxlso_mc[VXLAN_SO_MC_MAX_GROUPS];
98};
99
100#define VXLAN_SO_RLOCK(_vso, _p)	rm_rlock(&(_vso)->vxlso_lock, (_p))
101#define VXLAN_SO_RUNLOCK(_vso, _p)	rm_runlock(&(_vso)->vxlso_lock, (_p))
102#define VXLAN_SO_WLOCK(_vso)		rm_wlock(&(_vso)->vxlso_lock)
103#define VXLAN_SO_WUNLOCK(_vso)		rm_wunlock(&(_vso)->vxlso_lock)
104#define VXLAN_SO_LOCK_ASSERT(_vso) \
105    rm_assert(&(_vso)->vxlso_lock, RA_LOCKED)
106#define VXLAN_SO_LOCK_WASSERT(_vso) \
107    rm_assert(&(_vso)->vxlso_lock, RA_WLOCKED)
108
109#define VXLAN_SO_ACQUIRE(_vso)		refcount_acquire(&(_vso)->vxlso_refcnt)
110#define VXLAN_SO_RELEASE(_vso)		refcount_release(&(_vso)->vxlso_refcnt)
111
112struct vxlan_ftable_entry {
113	LIST_ENTRY(vxlan_ftable_entry)	 vxlfe_hash;
114	uint16_t			 vxlfe_flags;
115	uint8_t				 vxlfe_mac[ETHER_ADDR_LEN];
116	union vxlan_sockaddr		 vxlfe_raddr;
117	time_t				 vxlfe_expire;
118};
119
120#define VXLAN_FE_FLAG_DYNAMIC		0x01
121#define VXLAN_FE_FLAG_STATIC		0x02
122
123#define VXLAN_FE_IS_DYNAMIC(_fe) \
124    ((_fe)->vxlfe_flags & VXLAN_FE_FLAG_DYNAMIC)
125
126#define VXLAN_SC_FTABLE_SHIFT		9
127#define VXLAN_SC_FTABLE_SIZE		(1 << VXLAN_SC_FTABLE_SHIFT)
128#define VXLAN_SC_FTABLE_MASK		(VXLAN_SC_FTABLE_SIZE - 1)
129#define VXLAN_SC_FTABLE_HASH(_sc, _mac)	\
130    (vxlan_mac_hash(_sc, _mac) % VXLAN_SC_FTABLE_SIZE)
131
132LIST_HEAD(vxlan_ftable_head, vxlan_ftable_entry);
133
134struct vxlan_statistics {
135	uint32_t	ftable_nospace;
136	uint32_t	ftable_lock_upgrade_failed;
137};
138
139struct vxlan_softc {
140	struct ifnet			*vxl_ifp;
141	struct vxlan_socket		*vxl_sock;
142	uint32_t			 vxl_vni;
143	union vxlan_sockaddr		 vxl_src_addr;
144	union vxlan_sockaddr		 vxl_dst_addr;
145	uint32_t			 vxl_flags;
146#define VXLAN_FLAG_INIT		0x0001
147#define VXLAN_FLAG_TEARDOWN	0x0002
148#define VXLAN_FLAG_LEARN	0x0004
149
150	uint32_t			 vxl_port_hash_key;
151	uint16_t			 vxl_min_port;
152	uint16_t			 vxl_max_port;
153	uint8_t				 vxl_ttl;
154
155	/* Lookup table from MAC address to forwarding entry. */
156	uint32_t			 vxl_ftable_cnt;
157	uint32_t			 vxl_ftable_max;
158	uint32_t			 vxl_ftable_timeout;
159	uint32_t			 vxl_ftable_hash_key;
160	struct vxlan_ftable_head	*vxl_ftable;
161
162	/* Derived from vxl_dst_addr. */
163	struct vxlan_ftable_entry	 vxl_default_fe;
164
165	struct ip_moptions		*vxl_im4o;
166	struct ip6_moptions		*vxl_im6o;
167
168	struct rmlock			 vxl_lock;
169	volatile u_int			 vxl_refcnt;
170
171	int				 vxl_unit;
172	int				 vxl_vso_mc_index;
173	struct vxlan_statistics		 vxl_stats;
174	struct sysctl_oid		*vxl_sysctl_node;
175	struct sysctl_ctx_list		 vxl_sysctl_ctx;
176	struct callout			 vxl_callout;
177	uint8_t				 vxl_hwaddr[ETHER_ADDR_LEN];
178	int				 vxl_mc_ifindex;
179	struct ifnet			*vxl_mc_ifp;
180	char				 vxl_mc_ifname[IFNAMSIZ];
181	LIST_ENTRY(vxlan_softc)		 vxl_entry;
182	LIST_ENTRY(vxlan_softc)		 vxl_ifdetach_list;
183};
184
185#define VXLAN_RLOCK(_sc, _p)	rm_rlock(&(_sc)->vxl_lock, (_p))
186#define VXLAN_RUNLOCK(_sc, _p)	rm_runlock(&(_sc)->vxl_lock, (_p))
187#define VXLAN_WLOCK(_sc)	rm_wlock(&(_sc)->vxl_lock)
188#define VXLAN_WUNLOCK(_sc)	rm_wunlock(&(_sc)->vxl_lock)
189#define VXLAN_LOCK_WOWNED(_sc)	rm_wowned(&(_sc)->vxl_lock)
190#define VXLAN_LOCK_ASSERT(_sc)	rm_assert(&(_sc)->vxl_lock, RA_LOCKED)
191#define VXLAN_LOCK_WASSERT(_sc) rm_assert(&(_sc)->vxl_lock, RA_WLOCKED)
192#define VXLAN_UNLOCK(_sc, _p) do {		\
193    if (VXLAN_LOCK_WOWNED(_sc))			\
194	VXLAN_WUNLOCK(_sc);			\
195    else					\
196	VXLAN_RUNLOCK(_sc, _p);			\
197} while (0)
198
199#define VXLAN_ACQUIRE(_sc)	refcount_acquire(&(_sc)->vxl_refcnt)
200#define VXLAN_RELEASE(_sc)	refcount_release(&(_sc)->vxl_refcnt)
201
202#define	satoconstsin(sa)	((const struct sockaddr_in *)(sa))
203#define	satoconstsin6(sa)	((const struct sockaddr_in6 *)(sa))
204
205struct vxlanudphdr {
206	struct udphdr		vxlh_udp;
207	struct vxlan_header	vxlh_hdr;
208} __packed;
209
210static int	vxlan_ftable_addr_cmp(const uint8_t *, const uint8_t *);
211static void	vxlan_ftable_init(struct vxlan_softc *);
212static void	vxlan_ftable_fini(struct vxlan_softc *);
213static void	vxlan_ftable_flush(struct vxlan_softc *, int);
214static void	vxlan_ftable_expire(struct vxlan_softc *);
215static int	vxlan_ftable_update_locked(struct vxlan_softc *,
216		    const struct sockaddr *, const uint8_t *,
217		    struct rm_priotracker *);
218static int	vxlan_ftable_update(struct vxlan_softc *,
219		    const struct sockaddr *, const uint8_t *);
220static int	vxlan_ftable_sysctl_dump(SYSCTL_HANDLER_ARGS);
221
222static struct vxlan_ftable_entry *
223		vxlan_ftable_entry_alloc(void);
224static void	vxlan_ftable_entry_free(struct vxlan_ftable_entry *);
225static void	vxlan_ftable_entry_init(struct vxlan_softc *,
226		    struct vxlan_ftable_entry *, const uint8_t *,
227		    const struct sockaddr *, uint32_t);
228static void	vxlan_ftable_entry_destroy(struct vxlan_softc *,
229		    struct vxlan_ftable_entry *);
230static int	vxlan_ftable_entry_insert(struct vxlan_softc *,
231		    struct vxlan_ftable_entry *);
232static struct vxlan_ftable_entry *
233		vxlan_ftable_entry_lookup(struct vxlan_softc *,
234		    const uint8_t *);
235static void	vxlan_ftable_entry_dump(struct vxlan_ftable_entry *,
236		    struct sbuf *);
237
238static struct vxlan_socket *
239		vxlan_socket_alloc(const union vxlan_sockaddr *);
240static void	vxlan_socket_destroy(struct vxlan_socket *);
241static void	vxlan_socket_release(struct vxlan_socket *);
242static struct vxlan_socket *
243		vxlan_socket_lookup(union vxlan_sockaddr *vxlsa);
244static void	vxlan_socket_insert(struct vxlan_socket *);
245static int	vxlan_socket_init(struct vxlan_socket *, struct ifnet *);
246static int	vxlan_socket_bind(struct vxlan_socket *, struct ifnet *);
247static int	vxlan_socket_create(struct ifnet *, int,
248		    const union vxlan_sockaddr *, struct vxlan_socket **);
249static void	vxlan_socket_ifdetach(struct vxlan_socket *,
250		    struct ifnet *, struct vxlan_softc_head *);
251
252static struct vxlan_socket *
253		vxlan_socket_mc_lookup(const union vxlan_sockaddr *);
254static int	vxlan_sockaddr_mc_info_match(
255		    const struct vxlan_socket_mc_info *,
256		    const union vxlan_sockaddr *,
257		    const union vxlan_sockaddr *, int);
258static int	vxlan_socket_mc_join_group(struct vxlan_socket *,
259		    const union vxlan_sockaddr *, const union vxlan_sockaddr *,
260		    int *, union vxlan_sockaddr *);
261static int	vxlan_socket_mc_leave_group(struct vxlan_socket *,
262		    const union vxlan_sockaddr *,
263		    const union vxlan_sockaddr *, int);
264static int	vxlan_socket_mc_add_group(struct vxlan_socket *,
265		    const union vxlan_sockaddr *, const union vxlan_sockaddr *,
266		    int, int *);
267static void	vxlan_socket_mc_release_group_by_idx(struct vxlan_socket *,
268		    int);
269
270static struct vxlan_softc *
271		vxlan_socket_lookup_softc_locked(struct vxlan_socket *,
272		    uint32_t);
273static struct vxlan_softc *
274		vxlan_socket_lookup_softc(struct vxlan_socket *, uint32_t);
275static int	vxlan_socket_insert_softc(struct vxlan_socket *,
276		    struct vxlan_softc *);
277static void	vxlan_socket_remove_softc(struct vxlan_socket *,
278		    struct vxlan_softc *);
279
280static struct ifnet *
281		vxlan_multicast_if_ref(struct vxlan_softc *, int);
282static void	vxlan_free_multicast(struct vxlan_softc *);
283static int	vxlan_setup_multicast_interface(struct vxlan_softc *);
284
285static int	vxlan_setup_multicast(struct vxlan_softc *);
286static int	vxlan_setup_socket(struct vxlan_softc *);
287static void	vxlan_setup_interface(struct vxlan_softc *);
288static int	vxlan_valid_init_config(struct vxlan_softc *);
289static void	vxlan_init_wait(struct vxlan_softc *);
290static void	vxlan_init_complete(struct vxlan_softc *);
291static void	vxlan_init(void *);
292static void	vxlan_release(struct vxlan_softc *);
293static void	vxlan_teardown_wait(struct vxlan_softc *);
294static void	vxlan_teardown_complete(struct vxlan_softc *);
295static void	vxlan_teardown_locked(struct vxlan_softc *);
296static void	vxlan_teardown(struct vxlan_softc *);
297static void	vxlan_ifdetach(struct vxlan_softc *, struct ifnet *,
298		    struct vxlan_softc_head *);
299static void	vxlan_timer(void *);
300
301static int	vxlan_ctrl_get_config(struct vxlan_softc *, void *);
302static int	vxlan_ctrl_set_vni(struct vxlan_softc *, void *);
303static int	vxlan_ctrl_set_local_addr(struct vxlan_softc *, void *);
304static int	vxlan_ctrl_set_remote_addr(struct vxlan_softc *, void *);
305static int	vxlan_ctrl_set_local_port(struct vxlan_softc *, void *);
306static int	vxlan_ctrl_set_remote_port(struct vxlan_softc *, void *);
307static int	vxlan_ctrl_set_port_range(struct vxlan_softc *, void *);
308static int	vxlan_ctrl_set_ftable_timeout(struct vxlan_softc *, void *);
309static int	vxlan_ctrl_set_ftable_max(struct vxlan_softc *, void *);
310static int	vxlan_ctrl_set_multicast_if(struct vxlan_softc * , void *);
311static int	vxlan_ctrl_set_ttl(struct vxlan_softc *, void *);
312static int	vxlan_ctrl_set_learn(struct vxlan_softc *, void *);
313static int	vxlan_ctrl_ftable_entry_add(struct vxlan_softc *, void *);
314static int	vxlan_ctrl_ftable_entry_rem(struct vxlan_softc *, void *);
315static int	vxlan_ctrl_flush(struct vxlan_softc *, void *);
316static int	vxlan_ioctl_drvspec(struct vxlan_softc *,
317		    struct ifdrv *, int);
318static int	vxlan_ioctl_ifflags(struct vxlan_softc *);
319static int	vxlan_ioctl(struct ifnet *, u_long, caddr_t);
320
321#if defined(INET) || defined(INET6)
322static uint16_t vxlan_pick_source_port(struct vxlan_softc *, struct mbuf *);
323static void	vxlan_encap_header(struct vxlan_softc *, struct mbuf *,
324		    int, uint16_t, uint16_t);
325#endif
326static int	vxlan_encap4(struct vxlan_softc *,
327		    const union vxlan_sockaddr *, struct mbuf *);
328static int	vxlan_encap6(struct vxlan_softc *,
329		    const union vxlan_sockaddr *, struct mbuf *);
330static int	vxlan_transmit(struct ifnet *, struct mbuf *);
331static void	vxlan_qflush(struct ifnet *);
332static void	vxlan_rcv_udp_packet(struct mbuf *, int, struct inpcb *,
333		    const struct sockaddr *, void *);
334static int	vxlan_input(struct vxlan_socket *, uint32_t, struct mbuf **,
335		    const struct sockaddr *);
336
337static void	vxlan_set_default_config(struct vxlan_softc *);
338static int	vxlan_set_user_config(struct vxlan_softc *,
339		     struct ifvxlanparam *);
340static int	vxlan_clone_create(struct if_clone *, int, caddr_t);
341static void	vxlan_clone_destroy(struct ifnet *);
342
343static uint32_t vxlan_mac_hash(struct vxlan_softc *, const uint8_t *);
344static void	vxlan_fakeaddr(struct vxlan_softc *);
345
346static int	vxlan_sockaddr_cmp(const union vxlan_sockaddr *,
347		    const struct sockaddr *);
348static void	vxlan_sockaddr_copy(union vxlan_sockaddr *,
349		    const struct sockaddr *);
350static int	vxlan_sockaddr_in_equal(const union vxlan_sockaddr *,
351		    const struct sockaddr *);
352static void	vxlan_sockaddr_in_copy(union vxlan_sockaddr *,
353		    const struct sockaddr *);
354static int	vxlan_sockaddr_supported(const union vxlan_sockaddr *, int);
355static int	vxlan_sockaddr_in_any(const union vxlan_sockaddr *);
356static int	vxlan_sockaddr_in_multicast(const union vxlan_sockaddr *);
357
358static int	vxlan_can_change_config(struct vxlan_softc *);
359static int	vxlan_check_vni(uint32_t);
360static int	vxlan_check_ttl(int);
361static int	vxlan_check_ftable_timeout(uint32_t);
362static int	vxlan_check_ftable_max(uint32_t);
363
364static void	vxlan_sysctl_setup(struct vxlan_softc *);
365static void	vxlan_sysctl_destroy(struct vxlan_softc *);
366static int	vxlan_tunable_int(struct vxlan_softc *, const char *, int);
367
368static void	vxlan_ifdetach_event(void *, struct ifnet *);
369static void	vxlan_load(void);
370static void	vxlan_unload(void);
371static int	vxlan_modevent(module_t, int, void *);
372
373static const char vxlan_name[] = "vxlan";
374static MALLOC_DEFINE(M_VXLAN, vxlan_name,
375    "Virtual eXtensible LAN Interface");
376static struct if_clone *vxlan_cloner;
377static struct mtx vxlan_list_mtx;
378static LIST_HEAD(, vxlan_socket) vxlan_socket_list;
379
380static eventhandler_tag vxlan_ifdetach_event_tag;
381
382SYSCTL_DECL(_net_link);
383SYSCTL_NODE(_net_link, OID_AUTO, vxlan, CTLFLAG_RW, 0,
384    "Virtual eXtensible Local Area Network");
385
386static int vxlan_legacy_port = 0;
387TUNABLE_INT("net.link.vxlan.legacy_port", &vxlan_legacy_port);
388static int vxlan_reuse_port = 0;
389TUNABLE_INT("net.link.vxlan.reuse_port", &vxlan_reuse_port);
390
391/* Default maximum number of addresses in the forwarding table. */
392#ifndef VXLAN_FTABLE_MAX
393#define VXLAN_FTABLE_MAX	2000
394#endif
395
396/* Timeout (in seconds) of addresses learned in the forwarding table. */
397#ifndef VXLAN_FTABLE_TIMEOUT
398#define VXLAN_FTABLE_TIMEOUT	(20 * 60)
399#endif
400
401/*
402 * Maximum timeout (in seconds) of addresses learned in the forwarding
403 * table.
404 */
405#ifndef VXLAN_FTABLE_MAX_TIMEOUT
406#define VXLAN_FTABLE_MAX_TIMEOUT	(60 * 60 * 24)
407#endif
408
409/* Number of seconds between pruning attempts of the forwarding table. */
410#ifndef VXLAN_FTABLE_PRUNE
411#define VXLAN_FTABLE_PRUNE	(5 * 60)
412#endif
413
414static int vxlan_ftable_prune_period = VXLAN_FTABLE_PRUNE;
415
416struct vxlan_control {
417	int	(*vxlc_func)(struct vxlan_softc *, void *);
418	int	vxlc_argsize;
419	int	vxlc_flags;
420#define VXLAN_CTRL_FLAG_COPYIN	0x01
421#define VXLAN_CTRL_FLAG_COPYOUT	0x02
422#define VXLAN_CTRL_FLAG_SUSER	0x04
423};
424
425static const struct vxlan_control vxlan_control_table[] = {
426	[VXLAN_CMD_GET_CONFIG] =
427	    {	vxlan_ctrl_get_config, sizeof(struct ifvxlancfg),
428		VXLAN_CTRL_FLAG_COPYOUT
429	    },
430
431	[VXLAN_CMD_SET_VNI] =
432	    {   vxlan_ctrl_set_vni, sizeof(struct ifvxlancmd),
433		VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
434	    },
435
436	[VXLAN_CMD_SET_LOCAL_ADDR] =
437	    {   vxlan_ctrl_set_local_addr, sizeof(struct ifvxlancmd),
438		VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
439	    },
440
441	[VXLAN_CMD_SET_REMOTE_ADDR] =
442	    {   vxlan_ctrl_set_remote_addr, sizeof(struct ifvxlancmd),
443		VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
444	    },
445
446	[VXLAN_CMD_SET_LOCAL_PORT] =
447	    {   vxlan_ctrl_set_local_port, sizeof(struct ifvxlancmd),
448		VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
449	    },
450
451	[VXLAN_CMD_SET_REMOTE_PORT] =
452	    {   vxlan_ctrl_set_remote_port, sizeof(struct ifvxlancmd),
453		VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
454	    },
455
456	[VXLAN_CMD_SET_PORT_RANGE] =
457	    {   vxlan_ctrl_set_port_range, sizeof(struct ifvxlancmd),
458		VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
459	    },
460
461	[VXLAN_CMD_SET_FTABLE_TIMEOUT] =
462	    {	vxlan_ctrl_set_ftable_timeout, sizeof(struct ifvxlancmd),
463		VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
464	    },
465
466	[VXLAN_CMD_SET_FTABLE_MAX] =
467	    {	vxlan_ctrl_set_ftable_max, sizeof(struct ifvxlancmd),
468		VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
469	    },
470
471	[VXLAN_CMD_SET_MULTICAST_IF] =
472	    {	vxlan_ctrl_set_multicast_if, sizeof(struct ifvxlancmd),
473		VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
474	    },
475
476	[VXLAN_CMD_SET_TTL] =
477	    {	vxlan_ctrl_set_ttl, sizeof(struct ifvxlancmd),
478		VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
479	    },
480
481	[VXLAN_CMD_SET_LEARN] =
482	    {	vxlan_ctrl_set_learn, sizeof(struct ifvxlancmd),
483		VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
484	    },
485
486	[VXLAN_CMD_FTABLE_ENTRY_ADD] =
487	    {	vxlan_ctrl_ftable_entry_add, sizeof(struct ifvxlancmd),
488		VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
489	    },
490
491	[VXLAN_CMD_FTABLE_ENTRY_REM] =
492	    {	vxlan_ctrl_ftable_entry_rem, sizeof(struct ifvxlancmd),
493		VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
494	    },
495
496	[VXLAN_CMD_FLUSH] =
497	    {   vxlan_ctrl_flush, sizeof(struct ifvxlancmd),
498		VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
499	    },
500};
501
502static const int vxlan_control_table_size = nitems(vxlan_control_table);
503
504static int
505vxlan_ftable_addr_cmp(const uint8_t *a, const uint8_t *b)
506{
507	int i, d;
508
509	for (i = 0, d = 0; i < ETHER_ADDR_LEN && d == 0; i++)
510		d = ((int)a[i]) - ((int)b[i]);
511
512	return (d);
513}
514
515static void
516vxlan_ftable_init(struct vxlan_softc *sc)
517{
518	int i;
519
520	sc->vxl_ftable = malloc(sizeof(struct vxlan_ftable_head) *
521	    VXLAN_SC_FTABLE_SIZE, M_VXLAN, M_ZERO | M_WAITOK);
522
523	for (i = 0; i < VXLAN_SC_FTABLE_SIZE; i++)
524		LIST_INIT(&sc->vxl_ftable[i]);
525	sc->vxl_ftable_hash_key = arc4random();
526}
527
528static void
529vxlan_ftable_fini(struct vxlan_softc *sc)
530{
531	int i;
532
533	for (i = 0; i < VXLAN_SC_FTABLE_SIZE; i++) {
534		KASSERT(LIST_EMPTY(&sc->vxl_ftable[i]),
535		    ("%s: vxlan %p ftable[%d] not empty", __func__, sc, i));
536	}
537	MPASS(sc->vxl_ftable_cnt == 0);
538
539	free(sc->vxl_ftable, M_VXLAN);
540	sc->vxl_ftable = NULL;
541}
542
543static void
544vxlan_ftable_flush(struct vxlan_softc *sc, int all)
545{
546	struct vxlan_ftable_entry *fe, *tfe;
547	int i;
548
549	for (i = 0; i < VXLAN_SC_FTABLE_SIZE; i++) {
550		LIST_FOREACH_SAFE(fe, &sc->vxl_ftable[i], vxlfe_hash, tfe) {
551			if (all || VXLAN_FE_IS_DYNAMIC(fe))
552				vxlan_ftable_entry_destroy(sc, fe);
553		}
554	}
555}
556
557static void
558vxlan_ftable_expire(struct vxlan_softc *sc)
559{
560	struct vxlan_ftable_entry *fe, *tfe;
561	int i;
562
563	VXLAN_LOCK_WASSERT(sc);
564
565	for (i = 0; i < VXLAN_SC_FTABLE_SIZE; i++) {
566		LIST_FOREACH_SAFE(fe, &sc->vxl_ftable[i], vxlfe_hash, tfe) {
567			if (VXLAN_FE_IS_DYNAMIC(fe) &&
568			    time_uptime >= fe->vxlfe_expire)
569				vxlan_ftable_entry_destroy(sc, fe);
570		}
571	}
572}
573
574static int
575vxlan_ftable_update_locked(struct vxlan_softc *sc, const struct sockaddr *sa,
576    const uint8_t *mac, struct rm_priotracker *tracker)
577{
578	union vxlan_sockaddr vxlsa;
579	struct vxlan_ftable_entry *fe;
580	int error;
581
582	VXLAN_LOCK_ASSERT(sc);
583
584again:
585	/*
586	 * A forwarding entry for this MAC address might already exist. If
587	 * so, update it, otherwise create a new one. We may have to upgrade
588	 * the lock if we have to change or create an entry.
589	 */
590	fe = vxlan_ftable_entry_lookup(sc, mac);
591	if (fe != NULL) {
592		fe->vxlfe_expire = time_uptime + sc->vxl_ftable_timeout;
593
594		if (!VXLAN_FE_IS_DYNAMIC(fe) ||
595		    vxlan_sockaddr_in_equal(&fe->vxlfe_raddr, sa))
596			return (0);
597		if (!VXLAN_LOCK_WOWNED(sc)) {
598			VXLAN_RUNLOCK(sc, tracker);
599			VXLAN_WLOCK(sc);
600			sc->vxl_stats.ftable_lock_upgrade_failed++;
601			goto again;
602		}
603		vxlan_sockaddr_in_copy(&fe->vxlfe_raddr, sa);
604		return (0);
605	}
606
607	if (!VXLAN_LOCK_WOWNED(sc)) {
608		VXLAN_RUNLOCK(sc, tracker);
609		VXLAN_WLOCK(sc);
610		sc->vxl_stats.ftable_lock_upgrade_failed++;
611		goto again;
612	}
613
614	if (sc->vxl_ftable_cnt >= sc->vxl_ftable_max) {
615		sc->vxl_stats.ftable_nospace++;
616		return (ENOSPC);
617	}
618
619	fe = vxlan_ftable_entry_alloc();
620	if (fe == NULL)
621		return (ENOMEM);
622
623	/*
624	 * The source port may be randomly select by the remove host, so
625	 * use the port of the default destination address.
626	 */
627	vxlan_sockaddr_copy(&vxlsa, sa);
628	vxlsa.in4.sin_port = sc->vxl_dst_addr.in4.sin_port;
629
630	vxlan_ftable_entry_init(sc, fe, mac, &vxlsa.sa,
631	    VXLAN_FE_FLAG_DYNAMIC);
632
633	/* The prior lookup failed, so the insert should not. */
634	error = vxlan_ftable_entry_insert(sc, fe);
635	MPASS(error == 0);
636
637	return (0);
638}
639
640static int
641vxlan_ftable_update(struct vxlan_softc *sc, const struct sockaddr *sa,
642    const uint8_t *mac)
643{
644	struct rm_priotracker tracker;
645	int error;
646
647	VXLAN_RLOCK(sc, &tracker);
648	error = vxlan_ftable_update_locked(sc, sa, mac, &tracker);
649	VXLAN_UNLOCK(sc, &tracker);
650
651	return (error);
652}
653
654static int
655vxlan_ftable_sysctl_dump(SYSCTL_HANDLER_ARGS)
656{
657	struct rm_priotracker tracker;
658	struct sbuf sb;
659	struct vxlan_softc *sc;
660	struct vxlan_ftable_entry *fe;
661	size_t size;
662	int i, error;
663
664	/*
665	 * This is mostly intended for debugging during development. It is
666	 * not practical to dump an entire large table this way.
667	 */
668
669	sc = arg1;
670	size = PAGE_SIZE;	/* Calculate later. */
671
672	sbuf_new(&sb, NULL, size, SBUF_FIXEDLEN);
673	sbuf_putc(&sb, '\n');
674
675	VXLAN_RLOCK(sc, &tracker);
676	for (i = 0; i < VXLAN_SC_FTABLE_SIZE; i++) {
677		LIST_FOREACH(fe, &sc->vxl_ftable[i], vxlfe_hash) {
678			if (sbuf_error(&sb) != 0)
679				break;
680			vxlan_ftable_entry_dump(fe, &sb);
681		}
682	}
683	VXLAN_RUNLOCK(sc, &tracker);
684
685	if (sbuf_len(&sb) == 1)
686		sbuf_setpos(&sb, 0);
687
688	sbuf_finish(&sb);
689	error = sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req);
690	sbuf_delete(&sb);
691
692	return (error);
693}
694
695static struct vxlan_ftable_entry *
696vxlan_ftable_entry_alloc(void)
697{
698	struct vxlan_ftable_entry *fe;
699
700	fe = malloc(sizeof(*fe), M_VXLAN, M_ZERO | M_NOWAIT);
701
702	return (fe);
703}
704
705static void
706vxlan_ftable_entry_free(struct vxlan_ftable_entry *fe)
707{
708
709	free(fe, M_VXLAN);
710}
711
712static void
713vxlan_ftable_entry_init(struct vxlan_softc *sc, struct vxlan_ftable_entry *fe,
714    const uint8_t *mac, const struct sockaddr *sa, uint32_t flags)
715{
716
717	fe->vxlfe_flags = flags;
718	fe->vxlfe_expire = time_uptime + sc->vxl_ftable_timeout;
719	memcpy(fe->vxlfe_mac, mac, ETHER_ADDR_LEN);
720	vxlan_sockaddr_copy(&fe->vxlfe_raddr, sa);
721}
722
723static void
724vxlan_ftable_entry_destroy(struct vxlan_softc *sc,
725    struct vxlan_ftable_entry *fe)
726{
727
728	sc->vxl_ftable_cnt--;
729	LIST_REMOVE(fe, vxlfe_hash);
730	vxlan_ftable_entry_free(fe);
731}
732
733static int
734vxlan_ftable_entry_insert(struct vxlan_softc *sc,
735    struct vxlan_ftable_entry *fe)
736{
737	struct vxlan_ftable_entry *lfe;
738	uint32_t hash;
739	int dir;
740
741	VXLAN_LOCK_WASSERT(sc);
742	hash = VXLAN_SC_FTABLE_HASH(sc, fe->vxlfe_mac);
743
744	lfe = LIST_FIRST(&sc->vxl_ftable[hash]);
745	if (lfe == NULL) {
746		LIST_INSERT_HEAD(&sc->vxl_ftable[hash], fe, vxlfe_hash);
747		goto out;
748	}
749
750	do {
751		dir = vxlan_ftable_addr_cmp(fe->vxlfe_mac, lfe->vxlfe_mac);
752		if (dir == 0)
753			return (EEXIST);
754		if (dir > 0) {
755			LIST_INSERT_BEFORE(lfe, fe, vxlfe_hash);
756			goto out;
757		} else if (LIST_NEXT(lfe, vxlfe_hash) == NULL) {
758			LIST_INSERT_AFTER(lfe, fe, vxlfe_hash);
759			goto out;
760		} else
761			lfe = LIST_NEXT(lfe, vxlfe_hash);
762	} while (lfe != NULL);
763
764out:
765	sc->vxl_ftable_cnt++;
766
767	return (0);
768}
769
770static struct vxlan_ftable_entry *
771vxlan_ftable_entry_lookup(struct vxlan_softc *sc, const uint8_t *mac)
772{
773	struct vxlan_ftable_entry *fe;
774	uint32_t hash;
775	int dir;
776
777	VXLAN_LOCK_ASSERT(sc);
778	hash = VXLAN_SC_FTABLE_HASH(sc, mac);
779
780	LIST_FOREACH(fe, &sc->vxl_ftable[hash], vxlfe_hash) {
781		dir = vxlan_ftable_addr_cmp(fe->vxlfe_mac, mac);
782		if (dir == 0)
783			return (fe);
784		if (dir > 0)
785			break;
786	}
787
788	return (NULL);
789}
790
791static void
792vxlan_ftable_entry_dump(struct vxlan_ftable_entry *fe, struct sbuf *sb)
793{
794	char buf[64];
795	const union vxlan_sockaddr *sa;
796	const void *addr;
797	int i, len, af, width;
798
799	sa = &fe->vxlfe_raddr;
800	af = sa->sa.sa_family;
801	len = sbuf_len(sb);
802
803	sbuf_printf(sb, "%c 0x%02X ", VXLAN_FE_IS_DYNAMIC(fe) ? 'D' : 'S',
804	    fe->vxlfe_flags);
805
806	for (i = 0; i < ETHER_ADDR_LEN - 1; i++)
807		sbuf_printf(sb, "%02X:", fe->vxlfe_mac[i]);
808	sbuf_printf(sb, "%02X ", fe->vxlfe_mac[i]);
809
810	if (af == AF_INET) {
811		addr = &sa->in4.sin_addr;
812		width = INET_ADDRSTRLEN - 1;
813	} else {
814		addr = &sa->in6.sin6_addr;
815		width = INET6_ADDRSTRLEN - 1;
816	}
817	inet_ntop(af, addr, buf, sizeof(buf));
818	sbuf_printf(sb, "%*s ", width, buf);
819
820	sbuf_printf(sb, "%08jd", (intmax_t)fe->vxlfe_expire);
821
822	sbuf_putc(sb, '\n');
823
824	/* Truncate a partial line. */
825	if (sbuf_error(sb) != 0)
826		sbuf_setpos(sb, len);
827}
828
829static struct vxlan_socket *
830vxlan_socket_alloc(const union vxlan_sockaddr *sa)
831{
832	struct vxlan_socket *vso;
833	int i;
834
835	vso = malloc(sizeof(*vso), M_VXLAN, M_WAITOK | M_ZERO);
836	rm_init(&vso->vxlso_lock, "vxlansorm");
837	refcount_init(&vso->vxlso_refcnt, 0);
838	for (i = 0; i < VXLAN_SO_VNI_HASH_SIZE; i++)
839		LIST_INIT(&vso->vxlso_vni_hash[i]);
840	vso->vxlso_laddr = *sa;
841
842	return (vso);
843}
844
845static void
846vxlan_socket_destroy(struct vxlan_socket *vso)
847{
848	struct socket *so;
849	struct vxlan_socket_mc_info *mc;
850	int i;
851
852	for (i = 0; i < VXLAN_SO_MC_MAX_GROUPS; i++) {
853		mc = &vso->vxlso_mc[i];
854		KASSERT(mc->vxlsomc_gaddr.sa.sa_family == AF_UNSPEC,
855		    ("%s: socket %p mc[%d] still has address",
856		     __func__, vso, i));
857	}
858
859	for (i = 0; i < VXLAN_SO_VNI_HASH_SIZE; i++) {
860		KASSERT(LIST_EMPTY(&vso->vxlso_vni_hash[i]),
861		    ("%s: socket %p vni_hash[%d] not empty",
862		     __func__, vso, i));
863	}
864
865	so = vso->vxlso_sock;
866	if (so != NULL) {
867		vso->vxlso_sock = NULL;
868		soclose(so);
869	}
870
871	rm_destroy(&vso->vxlso_lock);
872	free(vso, M_VXLAN);
873}
874
875static void
876vxlan_socket_release(struct vxlan_socket *vso)
877{
878	int destroy;
879
880	mtx_lock(&vxlan_list_mtx);
881	destroy = VXLAN_SO_RELEASE(vso);
882	if (destroy != 0)
883		LIST_REMOVE(vso, vxlso_entry);
884	mtx_unlock(&vxlan_list_mtx);
885
886	if (destroy != 0)
887		vxlan_socket_destroy(vso);
888}
889
890static struct vxlan_socket *
891vxlan_socket_lookup(union vxlan_sockaddr *vxlsa)
892{
893	struct vxlan_socket *vso;
894
895	mtx_lock(&vxlan_list_mtx);
896	LIST_FOREACH(vso, &vxlan_socket_list, vxlso_entry) {
897		if (vxlan_sockaddr_cmp(&vso->vxlso_laddr, &vxlsa->sa) == 0) {
898			VXLAN_SO_ACQUIRE(vso);
899			break;
900		}
901	}
902	mtx_unlock(&vxlan_list_mtx);
903
904	return (vso);
905}
906
907static void
908vxlan_socket_insert(struct vxlan_socket *vso)
909{
910
911	mtx_lock(&vxlan_list_mtx);
912	VXLAN_SO_ACQUIRE(vso);
913	LIST_INSERT_HEAD(&vxlan_socket_list, vso, vxlso_entry);
914	mtx_unlock(&vxlan_list_mtx);
915}
916
917static int
918vxlan_socket_init(struct vxlan_socket *vso, struct ifnet *ifp)
919{
920	struct thread *td;
921	int error;
922
923	td = curthread;
924
925	error = socreate(vso->vxlso_laddr.sa.sa_family, &vso->vxlso_sock,
926	    SOCK_DGRAM, IPPROTO_UDP, td->td_ucred, td);
927	if (error) {
928		if_printf(ifp, "cannot create socket: %d\n", error);
929		return (error);
930	}
931
932	error = udp_set_kernel_tunneling(vso->vxlso_sock,
933	    vxlan_rcv_udp_packet, vso);
934	if (error) {
935		if_printf(ifp, "cannot set tunneling function: %d\n", error);
936		return (error);
937	}
938
939	if (vxlan_reuse_port != 0) {
940		struct sockopt sopt;
941		int val = 1;
942
943		bzero(&sopt, sizeof(sopt));
944		sopt.sopt_dir = SOPT_SET;
945		sopt.sopt_level = IPPROTO_IP;
946		sopt.sopt_name = SO_REUSEPORT;
947		sopt.sopt_val = &val;
948		sopt.sopt_valsize = sizeof(val);
949		error = sosetopt(vso->vxlso_sock, &sopt);
950		if (error) {
951			if_printf(ifp,
952			    "cannot set REUSEADDR socket opt: %d\n", error);
953			return (error);
954		}
955	}
956
957	return (0);
958}
959
960static int
961vxlan_socket_bind(struct vxlan_socket *vso, struct ifnet *ifp)
962{
963	union vxlan_sockaddr laddr;
964	struct thread *td;
965	int error;
966
967	td = curthread;
968	laddr = vso->vxlso_laddr;
969
970	error = sobind(vso->vxlso_sock, &laddr.sa, td);
971	if (error) {
972		if (error != EADDRINUSE)
973			if_printf(ifp, "cannot bind socket: %d\n", error);
974		return (error);
975	}
976
977	return (0);
978}
979
980static int
981vxlan_socket_create(struct ifnet *ifp, int multicast,
982    const union vxlan_sockaddr *saddr, struct vxlan_socket **vsop)
983{
984	union vxlan_sockaddr laddr;
985	struct vxlan_socket *vso;
986	int error;
987
988	laddr = *saddr;
989
990	/*
991	 * If this socket will be multicast, then only the local port
992	 * must be specified when binding.
993	 */
994	if (multicast != 0) {
995		if (VXLAN_SOCKADDR_IS_IPV4(&laddr))
996			laddr.in4.sin_addr.s_addr = INADDR_ANY;
997#ifdef INET6
998		else
999			laddr.in6.sin6_addr = in6addr_any;
1000#endif
1001	}
1002
1003	vso = vxlan_socket_alloc(&laddr);
1004	if (vso == NULL)
1005		return (ENOMEM);
1006
1007	error = vxlan_socket_init(vso, ifp);
1008	if (error)
1009		goto fail;
1010
1011	error = vxlan_socket_bind(vso, ifp);
1012	if (error)
1013		goto fail;
1014
1015	/*
1016	 * There is a small window between the bind completing and
1017	 * inserting the socket, so that a concurrent create may fail.
1018	 * Let's not worry about that for now.
1019	 */
1020	vxlan_socket_insert(vso);
1021	*vsop = vso;
1022
1023	return (0);
1024
1025fail:
1026	vxlan_socket_destroy(vso);
1027
1028	return (error);
1029}
1030
1031static void
1032vxlan_socket_ifdetach(struct vxlan_socket *vso, struct ifnet *ifp,
1033    struct vxlan_softc_head *list)
1034{
1035	struct rm_priotracker tracker;
1036	struct vxlan_softc *sc;
1037	int i;
1038
1039	VXLAN_SO_RLOCK(vso, &tracker);
1040	for (i = 0; i < VXLAN_SO_VNI_HASH_SIZE; i++) {
1041		LIST_FOREACH(sc, &vso->vxlso_vni_hash[i], vxl_entry)
1042			vxlan_ifdetach(sc, ifp, list);
1043	}
1044	VXLAN_SO_RUNLOCK(vso, &tracker);
1045}
1046
1047static struct vxlan_socket *
1048vxlan_socket_mc_lookup(const union vxlan_sockaddr *vxlsa)
1049{
1050	struct vxlan_socket *vso;
1051	union vxlan_sockaddr laddr;
1052
1053	laddr = *vxlsa;
1054
1055	if (VXLAN_SOCKADDR_IS_IPV4(&laddr))
1056		laddr.in4.sin_addr.s_addr = INADDR_ANY;
1057#ifdef INET6
1058	else
1059		laddr.in6.sin6_addr = in6addr_any;
1060#endif
1061
1062	vso = vxlan_socket_lookup(&laddr);
1063
1064	return (vso);
1065}
1066
1067static int
1068vxlan_sockaddr_mc_info_match(const struct vxlan_socket_mc_info *mc,
1069    const union vxlan_sockaddr *group, const union vxlan_sockaddr *local,
1070    int ifidx)
1071{
1072
1073	if (!vxlan_sockaddr_in_any(local) &&
1074	    !vxlan_sockaddr_in_equal(&mc->vxlsomc_saddr, &local->sa))
1075		return (0);
1076	if (!vxlan_sockaddr_in_equal(&mc->vxlsomc_gaddr, &group->sa))
1077		return (0);
1078	if (ifidx != 0 && ifidx != mc->vxlsomc_ifidx)
1079		return (0);
1080
1081	return (1);
1082}
1083
1084static int
1085vxlan_socket_mc_join_group(struct vxlan_socket *vso,
1086    const union vxlan_sockaddr *group, const union vxlan_sockaddr *local,
1087    int *ifidx, union vxlan_sockaddr *source)
1088{
1089	struct sockopt sopt;
1090	int error;
1091
1092	*source = *local;
1093
1094	if (VXLAN_SOCKADDR_IS_IPV4(group)) {
1095		struct ip_mreq mreq;
1096
1097		mreq.imr_multiaddr = group->in4.sin_addr;
1098		mreq.imr_interface = local->in4.sin_addr;
1099
1100		bzero(&sopt, sizeof(sopt));
1101		sopt.sopt_dir = SOPT_SET;
1102		sopt.sopt_level = IPPROTO_IP;
1103		sopt.sopt_name = IP_ADD_MEMBERSHIP;
1104		sopt.sopt_val = &mreq;
1105		sopt.sopt_valsize = sizeof(mreq);
1106		error = sosetopt(vso->vxlso_sock, &sopt);
1107		if (error)
1108			return (error);
1109
1110		/*
1111		 * BMV: Ideally, there would be a formal way for us to get
1112		 * the local interface that was selected based on the
1113		 * imr_interface address. We could then update *ifidx so
1114		 * vxlan_sockaddr_mc_info_match() would return a match for
1115		 * later creates that explicitly set the multicast interface.
1116		 *
1117		 * If we really need to, we can of course look in the INP's
1118		 * membership list:
1119		 *     sotoinpcb(vso->vxlso_sock)->inp_moptions->
1120		 *         imo_membership[]->inm_ifp
1121		 * similarly to imo_match_group().
1122		 */
1123		source->in4.sin_addr = local->in4.sin_addr;
1124
1125	} else if (VXLAN_SOCKADDR_IS_IPV6(group)) {
1126		struct ipv6_mreq mreq;
1127
1128		mreq.ipv6mr_multiaddr = group->in6.sin6_addr;
1129		mreq.ipv6mr_interface = *ifidx;
1130
1131		bzero(&sopt, sizeof(sopt));
1132		sopt.sopt_dir = SOPT_SET;
1133		sopt.sopt_level = IPPROTO_IPV6;
1134		sopt.sopt_name = IPV6_JOIN_GROUP;
1135		sopt.sopt_val = &mreq;
1136		sopt.sopt_valsize = sizeof(mreq);
1137		error = sosetopt(vso->vxlso_sock, &sopt);
1138		if (error)
1139			return (error);
1140
1141		/*
1142		 * BMV: As with IPv4, we would really like to know what
1143		 * interface in6p_lookup_mcast_ifp() selected.
1144		 */
1145	} else
1146		error = EAFNOSUPPORT;
1147
1148	return (error);
1149}
1150
1151static int
1152vxlan_socket_mc_leave_group(struct vxlan_socket *vso,
1153    const union vxlan_sockaddr *group, const union vxlan_sockaddr *source,
1154    int ifidx)
1155{
1156	struct sockopt sopt;
1157	int error;
1158
1159	bzero(&sopt, sizeof(sopt));
1160	sopt.sopt_dir = SOPT_SET;
1161
1162	if (VXLAN_SOCKADDR_IS_IPV4(group)) {
1163		struct ip_mreq mreq;
1164
1165		mreq.imr_multiaddr = group->in4.sin_addr;
1166		mreq.imr_interface = source->in4.sin_addr;
1167
1168		sopt.sopt_level = IPPROTO_IP;
1169		sopt.sopt_name = IP_DROP_MEMBERSHIP;
1170		sopt.sopt_val = &mreq;
1171		sopt.sopt_valsize = sizeof(mreq);
1172		error = sosetopt(vso->vxlso_sock, &sopt);
1173
1174	} else if (VXLAN_SOCKADDR_IS_IPV6(group)) {
1175		struct ipv6_mreq mreq;
1176
1177		mreq.ipv6mr_multiaddr = group->in6.sin6_addr;
1178		mreq.ipv6mr_interface = ifidx;
1179
1180		sopt.sopt_level = IPPROTO_IPV6;
1181		sopt.sopt_name = IPV6_LEAVE_GROUP;
1182		sopt.sopt_val = &mreq;
1183		sopt.sopt_valsize = sizeof(mreq);
1184		error = sosetopt(vso->vxlso_sock, &sopt);
1185
1186	} else
1187		error = EAFNOSUPPORT;
1188
1189	return (error);
1190}
1191
1192static int
1193vxlan_socket_mc_add_group(struct vxlan_socket *vso,
1194    const union vxlan_sockaddr *group, const union vxlan_sockaddr *local,
1195    int ifidx, int *idx)
1196{
1197	union vxlan_sockaddr source;
1198	struct vxlan_socket_mc_info *mc;
1199	int i, empty, error;
1200
1201	/*
1202	 * Within a socket, the same multicast group may be used by multiple
1203	 * interfaces, each with a different network identifier. But a socket
1204	 * may only join a multicast group once, so keep track of the users
1205	 * here.
1206	 */
1207
1208	VXLAN_SO_WLOCK(vso);
1209	for (empty = 0, i = 0; i < VXLAN_SO_MC_MAX_GROUPS; i++) {
1210		mc = &vso->vxlso_mc[i];
1211
1212		if (mc->vxlsomc_gaddr.sa.sa_family == AF_UNSPEC) {
1213			empty++;
1214			continue;
1215		}
1216
1217		if (vxlan_sockaddr_mc_info_match(mc, group, local, ifidx))
1218			goto out;
1219	}
1220	VXLAN_SO_WUNLOCK(vso);
1221
1222	if (empty == 0)
1223		return (ENOSPC);
1224
1225	error = vxlan_socket_mc_join_group(vso, group, local, &ifidx, &source);
1226	if (error)
1227		return (error);
1228
1229	VXLAN_SO_WLOCK(vso);
1230	for (i = 0; i < VXLAN_SO_MC_MAX_GROUPS; i++) {
1231		mc = &vso->vxlso_mc[i];
1232
1233		if (mc->vxlsomc_gaddr.sa.sa_family == AF_UNSPEC) {
1234			vxlan_sockaddr_copy(&mc->vxlsomc_gaddr, &group->sa);
1235			vxlan_sockaddr_copy(&mc->vxlsomc_saddr, &source.sa);
1236			mc->vxlsomc_ifidx = ifidx;
1237			goto out;
1238		}
1239	}
1240	VXLAN_SO_WUNLOCK(vso);
1241
1242	error = vxlan_socket_mc_leave_group(vso, group, &source, ifidx);
1243	MPASS(error == 0);
1244
1245	return (ENOSPC);
1246
1247out:
1248	mc->vxlsomc_users++;
1249	VXLAN_SO_WUNLOCK(vso);
1250
1251	*idx = i;
1252
1253	return (0);
1254}
1255
1256static void
1257vxlan_socket_mc_release_group_by_idx(struct vxlan_socket *vso, int idx)
1258{
1259	union vxlan_sockaddr group, source;
1260	struct vxlan_socket_mc_info *mc;
1261	int ifidx, leave;
1262
1263	KASSERT(idx >= 0 && idx < VXLAN_SO_MC_MAX_GROUPS,
1264	    ("%s: vso %p idx %d out of bounds", __func__, vso, idx));
1265
1266	leave = 0;
1267	mc = &vso->vxlso_mc[idx];
1268
1269	VXLAN_SO_WLOCK(vso);
1270	mc->vxlsomc_users--;
1271	if (mc->vxlsomc_users == 0) {
1272		group = mc->vxlsomc_gaddr;
1273		source = mc->vxlsomc_saddr;
1274		ifidx = mc->vxlsomc_ifidx;
1275		bzero(mc, sizeof(*mc));
1276		leave = 1;
1277	}
1278	VXLAN_SO_WUNLOCK(vso);
1279
1280	if (leave != 0) {
1281		/*
1282		 * Our socket's membership in this group may have already
1283		 * been removed if we joined through an interface that's
1284		 * been detached.
1285		 */
1286		vxlan_socket_mc_leave_group(vso, &group, &source, ifidx);
1287	}
1288}
1289
1290static struct vxlan_softc *
1291vxlan_socket_lookup_softc_locked(struct vxlan_socket *vso, uint32_t vni)
1292{
1293	struct vxlan_softc *sc;
1294	uint32_t hash;
1295
1296	VXLAN_SO_LOCK_ASSERT(vso);
1297	hash = VXLAN_SO_VNI_HASH(vni);
1298
1299	LIST_FOREACH(sc, &vso->vxlso_vni_hash[hash], vxl_entry) {
1300		if (sc->vxl_vni == vni) {
1301			VXLAN_ACQUIRE(sc);
1302			break;
1303		}
1304	}
1305
1306	return (sc);
1307}
1308
1309static struct vxlan_softc *
1310vxlan_socket_lookup_softc(struct vxlan_socket *vso, uint32_t vni)
1311{
1312	struct rm_priotracker tracker;
1313	struct vxlan_softc *sc;
1314
1315	VXLAN_SO_RLOCK(vso, &tracker);
1316	sc = vxlan_socket_lookup_softc_locked(vso, vni);
1317	VXLAN_SO_RUNLOCK(vso, &tracker);
1318
1319	return (sc);
1320}
1321
1322static int
1323vxlan_socket_insert_softc(struct vxlan_socket *vso, struct vxlan_softc *sc)
1324{
1325	struct vxlan_softc *tsc;
1326	uint32_t vni, hash;
1327
1328	vni = sc->vxl_vni;
1329	hash = VXLAN_SO_VNI_HASH(vni);
1330
1331	VXLAN_SO_WLOCK(vso);
1332	tsc = vxlan_socket_lookup_softc_locked(vso, vni);
1333	if (tsc != NULL) {
1334		VXLAN_SO_WUNLOCK(vso);
1335		vxlan_release(tsc);
1336		return (EEXIST);
1337	}
1338
1339	VXLAN_ACQUIRE(sc);
1340	LIST_INSERT_HEAD(&vso->vxlso_vni_hash[hash], sc, vxl_entry);
1341	VXLAN_SO_WUNLOCK(vso);
1342
1343	return (0);
1344}
1345
1346static void
1347vxlan_socket_remove_softc(struct vxlan_socket *vso, struct vxlan_softc *sc)
1348{
1349
1350	VXLAN_SO_WLOCK(vso);
1351	LIST_REMOVE(sc, vxl_entry);
1352	VXLAN_SO_WUNLOCK(vso);
1353
1354	vxlan_release(sc);
1355}
1356
1357static struct ifnet *
1358vxlan_multicast_if_ref(struct vxlan_softc *sc, int ipv4)
1359{
1360	struct ifnet *ifp;
1361
1362	VXLAN_LOCK_ASSERT(sc);
1363
1364	if (ipv4 && sc->vxl_im4o != NULL)
1365		ifp = sc->vxl_im4o->imo_multicast_ifp;
1366	else if (!ipv4 && sc->vxl_im6o != NULL)
1367		ifp = sc->vxl_im6o->im6o_multicast_ifp;
1368	else
1369		ifp = NULL;
1370
1371	if (ifp != NULL)
1372		if_ref(ifp);
1373
1374	return (ifp);
1375}
1376
1377static void
1378vxlan_free_multicast(struct vxlan_softc *sc)
1379{
1380
1381	if (sc->vxl_mc_ifp != NULL) {
1382		if_rele(sc->vxl_mc_ifp);
1383		sc->vxl_mc_ifp = NULL;
1384		sc->vxl_mc_ifindex = 0;
1385	}
1386
1387	if (sc->vxl_im4o != NULL) {
1388		free(sc->vxl_im4o, M_VXLAN);
1389		sc->vxl_im4o = NULL;
1390	}
1391
1392	if (sc->vxl_im6o != NULL) {
1393		free(sc->vxl_im6o, M_VXLAN);
1394		sc->vxl_im6o = NULL;
1395	}
1396}
1397
1398static int
1399vxlan_setup_multicast_interface(struct vxlan_softc *sc)
1400{
1401	struct ifnet *ifp;
1402
1403	ifp = ifunit_ref(sc->vxl_mc_ifname);
1404	if (ifp == NULL) {
1405		if_printf(sc->vxl_ifp, "multicast interfaces %s does "
1406		    "not exist\n", sc->vxl_mc_ifname);
1407		return (ENOENT);
1408	}
1409
1410	if ((ifp->if_flags & IFF_MULTICAST) == 0) {
1411		if_printf(sc->vxl_ifp, "interface %s does not support "
1412		     "multicast\n", sc->vxl_mc_ifname);
1413		if_rele(ifp);
1414		return (ENOTSUP);
1415	}
1416
1417	sc->vxl_mc_ifp = ifp;
1418	sc->vxl_mc_ifindex = ifp->if_index;
1419
1420	return (0);
1421}
1422
1423static int
1424vxlan_setup_multicast(struct vxlan_softc *sc)
1425{
1426	const union vxlan_sockaddr *group;
1427	int error;
1428
1429	group = &sc->vxl_dst_addr;
1430	error = 0;
1431
1432	if (sc->vxl_mc_ifname[0] != '\0') {
1433		error = vxlan_setup_multicast_interface(sc);
1434		if (error)
1435			return (error);
1436	}
1437
1438	/*
1439	 * Initialize an multicast options structure that is sufficiently
1440	 * populated for use in the respective IP output routine. This
1441	 * structure is typically stored in the socket, but our sockets
1442	 * may be shared among multiple interfaces.
1443	 */
1444	if (VXLAN_SOCKADDR_IS_IPV4(group)) {
1445		sc->vxl_im4o = malloc(sizeof(struct ip_moptions), M_VXLAN,
1446		    M_ZERO | M_WAITOK);
1447		sc->vxl_im4o->imo_multicast_ifp = sc->vxl_mc_ifp;
1448		sc->vxl_im4o->imo_multicast_ttl = sc->vxl_ttl;
1449		sc->vxl_im4o->imo_multicast_vif = -1;
1450	} else if (VXLAN_SOCKADDR_IS_IPV6(group)) {
1451		sc->vxl_im6o = malloc(sizeof(struct ip6_moptions), M_VXLAN,
1452		    M_ZERO | M_WAITOK);
1453		sc->vxl_im6o->im6o_multicast_ifp = sc->vxl_mc_ifp;
1454		sc->vxl_im6o->im6o_multicast_hlim = sc->vxl_ttl;
1455	}
1456
1457	return (error);
1458}
1459
1460static int
1461vxlan_setup_socket(struct vxlan_softc *sc)
1462{
1463	struct vxlan_socket *vso;
1464	struct ifnet *ifp;
1465	union vxlan_sockaddr *saddr, *daddr;
1466	int multicast, error;
1467
1468	vso = NULL;
1469	ifp = sc->vxl_ifp;
1470	saddr = &sc->vxl_src_addr;
1471	daddr = &sc->vxl_dst_addr;
1472
1473	multicast = vxlan_sockaddr_in_multicast(daddr);
1474	MPASS(multicast != -1);
1475	sc->vxl_vso_mc_index = -1;
1476
1477	/*
1478	 * Try to create the socket. If that fails, attempt to use an
1479	 * existing socket.
1480	 */
1481	error = vxlan_socket_create(ifp, multicast, saddr, &vso);
1482	if (error) {
1483		if (multicast != 0)
1484			vso = vxlan_socket_mc_lookup(saddr);
1485		else
1486			vso = vxlan_socket_lookup(saddr);
1487
1488		if (vso == NULL) {
1489			if_printf(ifp, "cannot create socket (error: %d), "
1490			    "and no existing socket found\n", error);
1491			goto out;
1492		}
1493	}
1494
1495	if (multicast != 0) {
1496		error = vxlan_setup_multicast(sc);
1497		if (error)
1498			goto out;
1499
1500		error = vxlan_socket_mc_add_group(vso, daddr, saddr,
1501		    sc->vxl_mc_ifindex, &sc->vxl_vso_mc_index);
1502		if (error)
1503			goto out;
1504	}
1505
1506	sc->vxl_sock = vso;
1507	error = vxlan_socket_insert_softc(vso, sc);
1508	if (error) {
1509		sc->vxl_sock = NULL;
1510		if_printf(ifp, "network identifier %d already exists in "
1511		    "this socket\n", sc->vxl_vni);
1512		goto out;
1513	}
1514
1515	return (0);
1516
1517out:
1518	if (vso != NULL) {
1519		if (sc->vxl_vso_mc_index != -1) {
1520			vxlan_socket_mc_release_group_by_idx(vso,
1521			    sc->vxl_vso_mc_index);
1522			sc->vxl_vso_mc_index = -1;
1523		}
1524		if (multicast != 0)
1525			vxlan_free_multicast(sc);
1526		vxlan_socket_release(vso);
1527	}
1528
1529	return (error);
1530}
1531
1532static void
1533vxlan_setup_interface(struct vxlan_softc *sc)
1534{
1535	struct ifnet *ifp;
1536
1537	ifp = sc->vxl_ifp;
1538	ifp->if_hdrlen = ETHER_HDR_LEN + sizeof(struct vxlanudphdr);
1539
1540	if (VXLAN_SOCKADDR_IS_IPV4(&sc->vxl_dst_addr) != 0)
1541		ifp->if_hdrlen += sizeof(struct ip);
1542	else if (VXLAN_SOCKADDR_IS_IPV6(&sc->vxl_dst_addr) != 0)
1543		ifp->if_hdrlen += sizeof(struct ip6_hdr);
1544}
1545
1546static int
1547vxlan_valid_init_config(struct vxlan_softc *sc)
1548{
1549	const char *reason;
1550
1551	if (vxlan_check_vni(sc->vxl_vni) != 0) {
1552		reason = "invalid virtual network identifier specified";
1553		goto fail;
1554	}
1555
1556	if (vxlan_sockaddr_supported(&sc->vxl_src_addr, 1) == 0) {
1557		reason = "source address type is not supported";
1558		goto fail;
1559	}
1560
1561	if (vxlan_sockaddr_supported(&sc->vxl_dst_addr, 0) == 0) {
1562		reason = "destination address type is not supported";
1563		goto fail;
1564	}
1565
1566	if (vxlan_sockaddr_in_any(&sc->vxl_dst_addr) != 0) {
1567		reason = "no valid destination address specified";
1568		goto fail;
1569	}
1570
1571	if (vxlan_sockaddr_in_multicast(&sc->vxl_dst_addr) == 0 &&
1572	    sc->vxl_mc_ifname[0] != '\0') {
1573		reason = "can only specify interface with a group address";
1574		goto fail;
1575	}
1576
1577	if (vxlan_sockaddr_in_any(&sc->vxl_src_addr) == 0) {
1578		if (VXLAN_SOCKADDR_IS_IPV4(&sc->vxl_src_addr) ^
1579		    VXLAN_SOCKADDR_IS_IPV4(&sc->vxl_dst_addr)) {
1580			reason = "source and destination address must both "
1581			    "be either IPv4 or IPv6";
1582			goto fail;
1583		}
1584	}
1585
1586	if (sc->vxl_src_addr.in4.sin_port == 0) {
1587		reason = "local port not specified";
1588		goto fail;
1589	}
1590
1591	if (sc->vxl_dst_addr.in4.sin_port == 0) {
1592		reason = "remote port not specified";
1593		goto fail;
1594	}
1595
1596	return (0);
1597
1598fail:
1599	if_printf(sc->vxl_ifp, "cannot initialize interface: %s\n", reason);
1600	return (EINVAL);
1601}
1602
1603static void
1604vxlan_init_wait(struct vxlan_softc *sc)
1605{
1606
1607	VXLAN_LOCK_WASSERT(sc);
1608	while (sc->vxl_flags & VXLAN_FLAG_INIT)
1609		rm_sleep(sc, &sc->vxl_lock, 0, "vxlint", hz);
1610}
1611
1612static void
1613vxlan_init_complete(struct vxlan_softc *sc)
1614{
1615
1616	VXLAN_WLOCK(sc);
1617	sc->vxl_flags &= ~VXLAN_FLAG_INIT;
1618	wakeup(sc);
1619	VXLAN_WUNLOCK(sc);
1620}
1621
1622static void
1623vxlan_init(void *xsc)
1624{
1625	static const uint8_t empty_mac[ETHER_ADDR_LEN];
1626	struct vxlan_softc *sc;
1627	struct ifnet *ifp;
1628
1629	sc = xsc;
1630	ifp = sc->vxl_ifp;
1631
1632	VXLAN_WLOCK(sc);
1633	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1634		VXLAN_WUNLOCK(sc);
1635		return;
1636	}
1637	sc->vxl_flags |= VXLAN_FLAG_INIT;
1638	VXLAN_WUNLOCK(sc);
1639
1640	if (vxlan_valid_init_config(sc) != 0)
1641		goto out;
1642
1643	vxlan_setup_interface(sc);
1644
1645	if (vxlan_setup_socket(sc) != 0)
1646		goto out;
1647
1648	/* Initialize the default forwarding entry. */
1649	vxlan_ftable_entry_init(sc, &sc->vxl_default_fe, empty_mac,
1650	    &sc->vxl_dst_addr.sa, VXLAN_FE_FLAG_STATIC);
1651
1652	VXLAN_WLOCK(sc);
1653	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1654	callout_reset(&sc->vxl_callout, vxlan_ftable_prune_period * hz,
1655	    vxlan_timer, sc);
1656	VXLAN_WUNLOCK(sc);
1657
1658out:
1659	vxlan_init_complete(sc);
1660}
1661
1662static void
1663vxlan_release(struct vxlan_softc *sc)
1664{
1665
1666	/*
1667	 * The softc may be destroyed as soon as we release our reference,
1668	 * so we cannot serialize the wakeup with the softc lock. We use a
1669	 * timeout in our sleeps so a missed wakeup is unfortunate but not
1670	 * fatal.
1671	 */
1672	if (VXLAN_RELEASE(sc) != 0)
1673		wakeup(sc);
1674}
1675
1676static void
1677vxlan_teardown_wait(struct vxlan_softc *sc)
1678{
1679
1680	VXLAN_LOCK_WASSERT(sc);
1681	while (sc->vxl_flags & VXLAN_FLAG_TEARDOWN)
1682		rm_sleep(sc, &sc->vxl_lock, 0, "vxltrn", hz);
1683}
1684
1685static void
1686vxlan_teardown_complete(struct vxlan_softc *sc)
1687{
1688
1689	VXLAN_WLOCK(sc);
1690	sc->vxl_flags &= ~VXLAN_FLAG_TEARDOWN;
1691	wakeup(sc);
1692	VXLAN_WUNLOCK(sc);
1693}
1694
1695static void
1696vxlan_teardown_locked(struct vxlan_softc *sc)
1697{
1698	struct ifnet *ifp;
1699	struct vxlan_socket *vso;
1700
1701	ifp = sc->vxl_ifp;
1702
1703	VXLAN_LOCK_WASSERT(sc);
1704	MPASS(sc->vxl_flags & VXLAN_FLAG_TEARDOWN);
1705
1706	ifp->if_flags &= ~IFF_UP;
1707	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1708	callout_stop(&sc->vxl_callout);
1709	vso = sc->vxl_sock;
1710	sc->vxl_sock = NULL;
1711
1712	VXLAN_WUNLOCK(sc);
1713
1714	if (vso != NULL) {
1715		vxlan_socket_remove_softc(vso, sc);
1716
1717		if (sc->vxl_vso_mc_index != -1) {
1718			vxlan_socket_mc_release_group_by_idx(vso,
1719			    sc->vxl_vso_mc_index);
1720			sc->vxl_vso_mc_index = -1;
1721		}
1722	}
1723
1724	VXLAN_WLOCK(sc);
1725	while (sc->vxl_refcnt != 0)
1726		rm_sleep(sc, &sc->vxl_lock, 0, "vxldrn", hz);
1727	VXLAN_WUNLOCK(sc);
1728
1729	callout_drain(&sc->vxl_callout);
1730
1731	vxlan_free_multicast(sc);
1732	if (vso != NULL)
1733		vxlan_socket_release(vso);
1734
1735	vxlan_teardown_complete(sc);
1736}
1737
1738static void
1739vxlan_teardown(struct vxlan_softc *sc)
1740{
1741
1742	VXLAN_WLOCK(sc);
1743	if (sc->vxl_flags & VXLAN_FLAG_TEARDOWN) {
1744		vxlan_teardown_wait(sc);
1745		VXLAN_WUNLOCK(sc);
1746		return;
1747	}
1748
1749	sc->vxl_flags |= VXLAN_FLAG_TEARDOWN;
1750	vxlan_teardown_locked(sc);
1751}
1752
1753static void
1754vxlan_ifdetach(struct vxlan_softc *sc, struct ifnet *ifp,
1755    struct vxlan_softc_head *list)
1756{
1757
1758	VXLAN_WLOCK(sc);
1759
1760	if (sc->vxl_mc_ifp != ifp)
1761		goto out;
1762	if (sc->vxl_flags & VXLAN_FLAG_TEARDOWN)
1763		goto out;
1764
1765	sc->vxl_flags |= VXLAN_FLAG_TEARDOWN;
1766	LIST_INSERT_HEAD(list, sc, vxl_ifdetach_list);
1767
1768out:
1769	VXLAN_WUNLOCK(sc);
1770}
1771
1772static void
1773vxlan_timer(void *xsc)
1774{
1775	struct vxlan_softc *sc;
1776
1777	sc = xsc;
1778	VXLAN_LOCK_WASSERT(sc);
1779
1780	vxlan_ftable_expire(sc);
1781	callout_schedule(&sc->vxl_callout, vxlan_ftable_prune_period * hz);
1782}
1783
1784static int
1785vxlan_ioctl_ifflags(struct vxlan_softc *sc)
1786{
1787	struct ifnet *ifp;
1788
1789	ifp = sc->vxl_ifp;
1790
1791	if (ifp->if_flags & IFF_UP) {
1792		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1793			vxlan_init(sc);
1794	} else {
1795		if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1796			vxlan_teardown(sc);
1797	}
1798
1799	return (0);
1800}
1801
1802static int
1803vxlan_ctrl_get_config(struct vxlan_softc *sc, void *arg)
1804{
1805	struct rm_priotracker tracker;
1806	struct ifvxlancfg *cfg;
1807
1808	cfg = arg;
1809	bzero(cfg, sizeof(*cfg));
1810
1811	VXLAN_RLOCK(sc, &tracker);
1812	cfg->vxlc_vni = sc->vxl_vni;
1813	memcpy(&cfg->vxlc_local_sa, &sc->vxl_src_addr,
1814	    sizeof(union vxlan_sockaddr));
1815	memcpy(&cfg->vxlc_remote_sa, &sc->vxl_dst_addr,
1816	    sizeof(union vxlan_sockaddr));
1817	cfg->vxlc_mc_ifindex = sc->vxl_mc_ifindex;
1818	cfg->vxlc_ftable_cnt = sc->vxl_ftable_cnt;
1819	cfg->vxlc_ftable_max = sc->vxl_ftable_max;
1820	cfg->vxlc_ftable_timeout = sc->vxl_ftable_timeout;
1821	cfg->vxlc_port_min = sc->vxl_min_port;
1822	cfg->vxlc_port_max = sc->vxl_max_port;
1823	cfg->vxlc_learn = (sc->vxl_flags & VXLAN_FLAG_LEARN) != 0;
1824	cfg->vxlc_ttl = sc->vxl_ttl;
1825	VXLAN_RUNLOCK(sc, &tracker);
1826
1827	return (0);
1828}
1829
1830static int
1831vxlan_ctrl_set_vni(struct vxlan_softc *sc, void *arg)
1832{
1833	struct ifvxlancmd *cmd;
1834	int error;
1835
1836	cmd = arg;
1837
1838	if (vxlan_check_vni(cmd->vxlcmd_vni) != 0)
1839		return (EINVAL);
1840
1841	VXLAN_WLOCK(sc);
1842	if (vxlan_can_change_config(sc)) {
1843		sc->vxl_vni = cmd->vxlcmd_vni;
1844		error = 0;
1845	} else
1846		error = EBUSY;
1847	VXLAN_WUNLOCK(sc);
1848
1849	return (error);
1850}
1851
1852static int
1853vxlan_ctrl_set_local_addr(struct vxlan_softc *sc, void *arg)
1854{
1855	struct ifvxlancmd *cmd;
1856	union vxlan_sockaddr *vxlsa;
1857	int error;
1858
1859	cmd = arg;
1860	vxlsa = &cmd->vxlcmd_sa;
1861
1862	if (!VXLAN_SOCKADDR_IS_IPV46(vxlsa))
1863		return (EINVAL);
1864	if (vxlan_sockaddr_in_multicast(vxlsa) != 0)
1865		return (EINVAL);
1866
1867	VXLAN_WLOCK(sc);
1868	if (vxlan_can_change_config(sc)) {
1869		vxlan_sockaddr_in_copy(&sc->vxl_src_addr, &vxlsa->sa);
1870		error = 0;
1871	} else
1872		error = EBUSY;
1873	VXLAN_WUNLOCK(sc);
1874
1875	return (error);
1876}
1877
1878static int
1879vxlan_ctrl_set_remote_addr(struct vxlan_softc *sc, void *arg)
1880{
1881	struct ifvxlancmd *cmd;
1882	union vxlan_sockaddr *vxlsa;
1883	int error;
1884
1885	cmd = arg;
1886	vxlsa = &cmd->vxlcmd_sa;
1887
1888	if (!VXLAN_SOCKADDR_IS_IPV46(vxlsa))
1889		return (EINVAL);
1890
1891	VXLAN_WLOCK(sc);
1892	if (vxlan_can_change_config(sc)) {
1893		vxlan_sockaddr_in_copy(&sc->vxl_dst_addr, &vxlsa->sa);
1894		error = 0;
1895	} else
1896		error = EBUSY;
1897	VXLAN_WUNLOCK(sc);
1898
1899	return (error);
1900}
1901
1902static int
1903vxlan_ctrl_set_local_port(struct vxlan_softc *sc, void *arg)
1904{
1905	struct ifvxlancmd *cmd;
1906	int error;
1907
1908	cmd = arg;
1909
1910	if (cmd->vxlcmd_port == 0)
1911		return (EINVAL);
1912
1913	VXLAN_WLOCK(sc);
1914	if (vxlan_can_change_config(sc)) {
1915		sc->vxl_src_addr.in4.sin_port = htons(cmd->vxlcmd_port);
1916		error = 0;
1917	} else
1918		error = EBUSY;
1919	VXLAN_WUNLOCK(sc);
1920
1921	return (error);
1922}
1923
1924static int
1925vxlan_ctrl_set_remote_port(struct vxlan_softc *sc, void *arg)
1926{
1927	struct ifvxlancmd *cmd;
1928	int error;
1929
1930	cmd = arg;
1931
1932	if (cmd->vxlcmd_port == 0)
1933		return (EINVAL);
1934
1935	VXLAN_WLOCK(sc);
1936	if (vxlan_can_change_config(sc)) {
1937		sc->vxl_dst_addr.in4.sin_port = htons(cmd->vxlcmd_port);
1938		error = 0;
1939	} else
1940		error = EBUSY;
1941	VXLAN_WUNLOCK(sc);
1942
1943	return (error);
1944}
1945
1946static int
1947vxlan_ctrl_set_port_range(struct vxlan_softc *sc, void *arg)
1948{
1949	struct ifvxlancmd *cmd;
1950	uint16_t min, max;
1951	int error;
1952
1953	cmd = arg;
1954	min = cmd->vxlcmd_port_min;
1955	max = cmd->vxlcmd_port_max;
1956
1957	if (max < min)
1958		return (EINVAL);
1959
1960	VXLAN_WLOCK(sc);
1961	if (vxlan_can_change_config(sc)) {
1962		sc->vxl_min_port = min;
1963		sc->vxl_max_port = max;
1964		error = 0;
1965	} else
1966		error = EBUSY;
1967	VXLAN_WUNLOCK(sc);
1968
1969	return (error);
1970}
1971
1972static int
1973vxlan_ctrl_set_ftable_timeout(struct vxlan_softc *sc, void *arg)
1974{
1975	struct ifvxlancmd *cmd;
1976	int error;
1977
1978	cmd = arg;
1979
1980	VXLAN_WLOCK(sc);
1981	if (vxlan_check_ftable_timeout(cmd->vxlcmd_ftable_timeout) == 0) {
1982		sc->vxl_ftable_timeout = cmd->vxlcmd_ftable_timeout;
1983		error = 0;
1984	} else
1985		error = EINVAL;
1986	VXLAN_WUNLOCK(sc);
1987
1988	return (error);
1989}
1990
1991static int
1992vxlan_ctrl_set_ftable_max(struct vxlan_softc *sc, void *arg)
1993{
1994	struct ifvxlancmd *cmd;
1995	int error;
1996
1997	cmd = arg;
1998
1999	VXLAN_WLOCK(sc);
2000	if (vxlan_check_ftable_max(cmd->vxlcmd_ftable_max) == 0) {
2001		sc->vxl_ftable_max = cmd->vxlcmd_ftable_max;
2002		error = 0;
2003	} else
2004		error = EINVAL;
2005	VXLAN_WUNLOCK(sc);
2006
2007	return (error);
2008}
2009
2010static int
2011vxlan_ctrl_set_multicast_if(struct vxlan_softc * sc, void *arg)
2012{
2013	struct ifvxlancmd *cmd;
2014	int error;
2015
2016	cmd = arg;
2017
2018	VXLAN_WLOCK(sc);
2019	if (vxlan_can_change_config(sc)) {
2020		strlcpy(sc->vxl_mc_ifname, cmd->vxlcmd_ifname, IFNAMSIZ);
2021		error = 0;
2022	} else
2023		error = EBUSY;
2024	VXLAN_WUNLOCK(sc);
2025
2026	return (error);
2027}
2028
2029static int
2030vxlan_ctrl_set_ttl(struct vxlan_softc *sc, void *arg)
2031{
2032	struct ifvxlancmd *cmd;
2033	int error;
2034
2035	cmd = arg;
2036
2037	VXLAN_WLOCK(sc);
2038	if (vxlan_check_ttl(cmd->vxlcmd_ttl) == 0) {
2039		sc->vxl_ttl = cmd->vxlcmd_ttl;
2040		if (sc->vxl_im4o != NULL)
2041			sc->vxl_im4o->imo_multicast_ttl = sc->vxl_ttl;
2042		if (sc->vxl_im6o != NULL)
2043			sc->vxl_im6o->im6o_multicast_hlim = sc->vxl_ttl;
2044		error = 0;
2045	} else
2046		error = EINVAL;
2047	VXLAN_WUNLOCK(sc);
2048
2049	return (error);
2050}
2051
2052static int
2053vxlan_ctrl_set_learn(struct vxlan_softc *sc, void *arg)
2054{
2055	struct ifvxlancmd *cmd;
2056
2057	cmd = arg;
2058
2059	VXLAN_WLOCK(sc);
2060	if (cmd->vxlcmd_flags & VXLAN_CMD_FLAG_LEARN)
2061		sc->vxl_flags |= VXLAN_FLAG_LEARN;
2062	else
2063		sc->vxl_flags &= ~VXLAN_FLAG_LEARN;
2064	VXLAN_WUNLOCK(sc);
2065
2066	return (0);
2067}
2068
2069static int
2070vxlan_ctrl_ftable_entry_add(struct vxlan_softc *sc, void *arg)
2071{
2072	union vxlan_sockaddr vxlsa;
2073	struct ifvxlancmd *cmd;
2074	struct vxlan_ftable_entry *fe;
2075	int error;
2076
2077	cmd = arg;
2078	vxlsa = cmd->vxlcmd_sa;
2079
2080	if (!VXLAN_SOCKADDR_IS_IPV46(&vxlsa))
2081		return (EINVAL);
2082	if (vxlan_sockaddr_in_any(&vxlsa) != 0)
2083		return (EINVAL);
2084	if (vxlan_sockaddr_in_multicast(&vxlsa) != 0)
2085		return (EINVAL);
2086	/* BMV: We could support both IPv4 and IPv6 later. */
2087	if (vxlsa.sa.sa_family != sc->vxl_dst_addr.sa.sa_family)
2088		return (EAFNOSUPPORT);
2089
2090	fe = vxlan_ftable_entry_alloc();
2091	if (fe == NULL)
2092		return (ENOMEM);
2093
2094	if (vxlsa.in4.sin_port == 0)
2095		vxlsa.in4.sin_port = sc->vxl_dst_addr.in4.sin_port;
2096
2097	vxlan_ftable_entry_init(sc, fe, cmd->vxlcmd_mac, &vxlsa.sa,
2098	    VXLAN_FE_FLAG_STATIC);
2099
2100	VXLAN_WLOCK(sc);
2101	error = vxlan_ftable_entry_insert(sc, fe);
2102	VXLAN_WUNLOCK(sc);
2103
2104	if (error)
2105		vxlan_ftable_entry_free(fe);
2106
2107	return (error);
2108}
2109
2110static int
2111vxlan_ctrl_ftable_entry_rem(struct vxlan_softc *sc, void *arg)
2112{
2113	struct ifvxlancmd *cmd;
2114	struct vxlan_ftable_entry *fe;
2115	int error;
2116
2117	cmd = arg;
2118
2119	VXLAN_WLOCK(sc);
2120	fe = vxlan_ftable_entry_lookup(sc, cmd->vxlcmd_mac);
2121	if (fe != NULL) {
2122		vxlan_ftable_entry_destroy(sc, fe);
2123		error = 0;
2124	} else
2125		error = ENOENT;
2126	VXLAN_WUNLOCK(sc);
2127
2128	return (error);
2129}
2130
2131static int
2132vxlan_ctrl_flush(struct vxlan_softc *sc, void *arg)
2133{
2134	struct ifvxlancmd *cmd;
2135	int all;
2136
2137	cmd = arg;
2138	all = cmd->vxlcmd_flags & VXLAN_CMD_FLAG_FLUSH_ALL;
2139
2140	VXLAN_WLOCK(sc);
2141	vxlan_ftable_flush(sc, all);
2142	VXLAN_WUNLOCK(sc);
2143
2144	return (0);
2145}
2146
2147static int
2148vxlan_ioctl_drvspec(struct vxlan_softc *sc, struct ifdrv *ifd, int get)
2149{
2150	const struct vxlan_control *vc;
2151	union {
2152		struct ifvxlancfg	cfg;
2153		struct ifvxlancmd	cmd;
2154	} args;
2155	int out, error;
2156
2157	if (ifd->ifd_cmd >= vxlan_control_table_size)
2158		return (EINVAL);
2159
2160	bzero(&args, sizeof(args));
2161	vc = &vxlan_control_table[ifd->ifd_cmd];
2162	out = (vc->vxlc_flags & VXLAN_CTRL_FLAG_COPYOUT) != 0;
2163
2164	if ((get != 0 && out == 0) || (get == 0 && out != 0))
2165		return (EINVAL);
2166
2167	if (vc->vxlc_flags & VXLAN_CTRL_FLAG_SUSER) {
2168		error = priv_check(curthread, PRIV_NET_VXLAN);
2169		if (error)
2170			return (error);
2171	}
2172
2173	if (ifd->ifd_len != vc->vxlc_argsize ||
2174	    ifd->ifd_len > sizeof(args))
2175		return (EINVAL);
2176
2177	if (vc->vxlc_flags & VXLAN_CTRL_FLAG_COPYIN) {
2178		error = copyin(ifd->ifd_data, &args, ifd->ifd_len);
2179		if (error)
2180			return (error);
2181	}
2182
2183	error = vc->vxlc_func(sc, &args);
2184	if (error)
2185		return (error);
2186
2187	if (vc->vxlc_flags & VXLAN_CTRL_FLAG_COPYOUT) {
2188		error = copyout(&args, ifd->ifd_data, ifd->ifd_len);
2189		if (error)
2190			return (error);
2191	}
2192
2193	return (0);
2194}
2195
2196static int
2197vxlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
2198{
2199	struct vxlan_softc *sc;
2200	struct ifreq *ifr;
2201	struct ifdrv *ifd;
2202	int error;
2203
2204	sc = ifp->if_softc;
2205	ifr = (struct ifreq *) data;
2206	ifd = (struct ifdrv *) data;
2207
2208	switch (cmd) {
2209	case SIOCADDMULTI:
2210	case SIOCDELMULTI:
2211		error = 0;
2212		break;
2213
2214	case SIOCGDRVSPEC:
2215	case SIOCSDRVSPEC:
2216		error = vxlan_ioctl_drvspec(sc, ifd, cmd == SIOCGDRVSPEC);
2217		break;
2218
2219	case SIOCSIFFLAGS:
2220		error = vxlan_ioctl_ifflags(sc);
2221		break;
2222	default:
2223		error = ether_ioctl(ifp, cmd, data);
2224		break;
2225	}
2226
2227	return (error);
2228}
2229
2230#if defined(INET) || defined(INET6)
2231static uint16_t
2232vxlan_pick_source_port(struct vxlan_softc *sc, struct mbuf *m)
2233{
2234	int range;
2235	uint32_t hash;
2236
2237	range = sc->vxl_max_port - sc->vxl_min_port + 1;
2238
2239	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE &&
2240	    M_HASHTYPE_GET(m) != M_HASHTYPE_OPAQUE)
2241		hash = m->m_pkthdr.flowid;
2242	else
2243		hash = jenkins_hash(m->m_data, ETHER_HDR_LEN,
2244		    sc->vxl_port_hash_key);
2245
2246	return (sc->vxl_min_port + (hash % range));
2247}
2248
2249static void
2250vxlan_encap_header(struct vxlan_softc *sc, struct mbuf *m, int ipoff,
2251    uint16_t srcport, uint16_t dstport)
2252{
2253	struct vxlanudphdr *hdr;
2254	struct udphdr *udph;
2255	struct vxlan_header *vxh;
2256	int len;
2257
2258	len = m->m_pkthdr.len - ipoff;
2259	MPASS(len >= sizeof(struct vxlanudphdr));
2260	hdr = mtodo(m, ipoff);
2261
2262	udph = &hdr->vxlh_udp;
2263	udph->uh_sport = srcport;
2264	udph->uh_dport = dstport;
2265	udph->uh_ulen = htons(len);
2266	udph->uh_sum = 0;
2267
2268	vxh = &hdr->vxlh_hdr;
2269	vxh->vxlh_flags = htonl(VXLAN_HDR_FLAGS_VALID_VNI);
2270	vxh->vxlh_vni = htonl(sc->vxl_vni << VXLAN_HDR_VNI_SHIFT);
2271}
2272#endif
2273
2274static int
2275vxlan_encap4(struct vxlan_softc *sc, const union vxlan_sockaddr *fvxlsa,
2276    struct mbuf *m)
2277{
2278#ifdef INET
2279	struct ifnet *ifp;
2280	struct ip *ip;
2281	struct in_addr srcaddr, dstaddr;
2282	uint16_t srcport, dstport;
2283	int len, mcast, error;
2284
2285	ifp = sc->vxl_ifp;
2286	srcaddr = sc->vxl_src_addr.in4.sin_addr;
2287	srcport = vxlan_pick_source_port(sc, m);
2288	dstaddr = fvxlsa->in4.sin_addr;
2289	dstport = fvxlsa->in4.sin_port;
2290
2291	M_PREPEND(m, sizeof(struct ip) + sizeof(struct vxlanudphdr),
2292	    M_NOWAIT);
2293	if (m == NULL) {
2294		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
2295		return (ENOBUFS);
2296	}
2297
2298	len = m->m_pkthdr.len;
2299
2300	ip = mtod(m, struct ip *);
2301	ip->ip_tos = 0;
2302	ip->ip_len = htons(len);
2303	ip->ip_off = 0;
2304	ip->ip_ttl = sc->vxl_ttl;
2305	ip->ip_p = IPPROTO_UDP;
2306	ip->ip_sum = 0;
2307	ip->ip_src = srcaddr;
2308	ip->ip_dst = dstaddr;
2309
2310	vxlan_encap_header(sc, m, sizeof(struct ip), srcport, dstport);
2311
2312	mcast = (m->m_flags & (M_MCAST | M_BCAST)) ? 1 : 0;
2313	m->m_flags &= ~(M_MCAST | M_BCAST);
2314
2315	error = ip_output(m, NULL, NULL, 0, sc->vxl_im4o, NULL);
2316	if (error == 0) {
2317		if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
2318		if_inc_counter(ifp, IFCOUNTER_OBYTES, len);
2319		if (mcast != 0)
2320			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
2321	} else
2322		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
2323
2324	return (error);
2325#else
2326	m_freem(m);
2327	return (ENOTSUP);
2328#endif
2329}
2330
2331static int
2332vxlan_encap6(struct vxlan_softc *sc, const union vxlan_sockaddr *fvxlsa,
2333    struct mbuf *m)
2334{
2335#ifdef INET6
2336	struct ifnet *ifp;
2337	struct ip6_hdr *ip6;
2338	const struct in6_addr *srcaddr, *dstaddr;
2339	uint16_t srcport, dstport;
2340	int len, mcast, error;
2341
2342	ifp = sc->vxl_ifp;
2343	srcaddr = &sc->vxl_src_addr.in6.sin6_addr;
2344	srcport = vxlan_pick_source_port(sc, m);
2345	dstaddr = &fvxlsa->in6.sin6_addr;
2346	dstport = fvxlsa->in6.sin6_port;
2347
2348	M_PREPEND(m, sizeof(struct ip6_hdr) + sizeof(struct vxlanudphdr),
2349	    M_NOWAIT);
2350	if (m == NULL) {
2351		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
2352		return (ENOBUFS);
2353	}
2354
2355	len = m->m_pkthdr.len;
2356
2357	ip6 = mtod(m, struct ip6_hdr *);
2358	ip6->ip6_flow = 0;		/* BMV: Keep in forwarding entry? */
2359	ip6->ip6_vfc = IPV6_VERSION;
2360	ip6->ip6_plen = 0;
2361	ip6->ip6_nxt = IPPROTO_UDP;
2362	ip6->ip6_hlim = sc->vxl_ttl;
2363	ip6->ip6_src = *srcaddr;
2364	ip6->ip6_dst = *dstaddr;
2365
2366	vxlan_encap_header(sc, m, sizeof(struct ip6_hdr), srcport, dstport);
2367
2368	/*
2369	 * XXX BMV We need support for RFC6935 before we can send and
2370	 * receive IPv6 UDP packets with a zero checksum.
2371	 */
2372	{
2373		struct udphdr *hdr = mtodo(m, sizeof(struct ip6_hdr));
2374		hdr->uh_sum = in6_cksum_pseudo(ip6,
2375		    m->m_pkthdr.len - sizeof(struct ip6_hdr), IPPROTO_UDP, 0);
2376		m->m_pkthdr.csum_flags = CSUM_UDP_IPV6;
2377		m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
2378	}
2379
2380	mcast = (m->m_flags & (M_MCAST | M_BCAST)) ? 1 : 0;
2381	m->m_flags &= ~(M_MCAST | M_BCAST);
2382
2383	error = ip6_output(m, NULL, NULL, 0, sc->vxl_im6o, NULL, NULL);
2384	if (error == 0) {
2385		if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
2386		if_inc_counter(ifp, IFCOUNTER_OBYTES, len);
2387		if (mcast != 0)
2388			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
2389	} else
2390		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
2391
2392	return (error);
2393#else
2394	m_freem(m);
2395	return (ENOTSUP);
2396#endif
2397}
2398
2399static int
2400vxlan_transmit(struct ifnet *ifp, struct mbuf *m)
2401{
2402	struct rm_priotracker tracker;
2403	union vxlan_sockaddr vxlsa;
2404	struct vxlan_softc *sc;
2405	struct vxlan_ftable_entry *fe;
2406	struct ifnet *mcifp;
2407	struct ether_header *eh;
2408	int ipv4, error;
2409
2410	sc = ifp->if_softc;
2411	eh = mtod(m, struct ether_header *);
2412	fe = NULL;
2413	mcifp = NULL;
2414
2415	ETHER_BPF_MTAP(ifp, m);
2416
2417	VXLAN_RLOCK(sc, &tracker);
2418	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
2419		VXLAN_RUNLOCK(sc, &tracker);
2420		m_freem(m);
2421		return (ENETDOWN);
2422	}
2423
2424	if ((m->m_flags & (M_BCAST | M_MCAST)) == 0)
2425		fe = vxlan_ftable_entry_lookup(sc, eh->ether_dhost);
2426	if (fe == NULL)
2427		fe = &sc->vxl_default_fe;
2428	vxlan_sockaddr_copy(&vxlsa, &fe->vxlfe_raddr.sa);
2429
2430	ipv4 = VXLAN_SOCKADDR_IS_IPV4(&vxlsa) != 0;
2431	if (vxlan_sockaddr_in_multicast(&vxlsa) != 0)
2432		mcifp = vxlan_multicast_if_ref(sc, ipv4);
2433
2434	VXLAN_ACQUIRE(sc);
2435	VXLAN_RUNLOCK(sc, &tracker);
2436
2437	if (ipv4 != 0)
2438		error = vxlan_encap4(sc, &vxlsa, m);
2439	else
2440		error = vxlan_encap6(sc, &vxlsa, m);
2441
2442	vxlan_release(sc);
2443	if (mcifp != NULL)
2444		if_rele(mcifp);
2445
2446	return (error);
2447}
2448
2449static void
2450vxlan_qflush(struct ifnet *ifp __unused)
2451{
2452}
2453
2454static void
2455vxlan_rcv_udp_packet(struct mbuf *m, int offset, struct inpcb *inpcb,
2456    const struct sockaddr *srcsa, void *xvso)
2457{
2458	struct vxlan_socket *vso;
2459	struct vxlan_header *vxh, vxlanhdr;
2460	uint32_t vni;
2461	int error;
2462
2463	M_ASSERTPKTHDR(m);
2464	vso = xvso;
2465	offset += sizeof(struct udphdr);
2466
2467	if (m->m_pkthdr.len < offset + sizeof(struct vxlan_header))
2468		goto out;
2469
2470	if (__predict_false(m->m_len < offset + sizeof(struct vxlan_header))) {
2471		m_copydata(m, offset, sizeof(struct vxlan_header),
2472		    (caddr_t) &vxlanhdr);
2473		vxh = &vxlanhdr;
2474	} else
2475		vxh = mtodo(m, offset);
2476
2477	/*
2478	 * Drop if there is a reserved bit set in either the flags or VNI
2479	 * fields of the header. This goes against the specification, but
2480	 * a bit set may indicate an unsupported new feature. This matches
2481	 * the behavior of the Linux implementation.
2482	 */
2483	if (vxh->vxlh_flags != htonl(VXLAN_HDR_FLAGS_VALID_VNI) ||
2484	    vxh->vxlh_vni & ~htonl(VXLAN_VNI_MASK))
2485		goto out;
2486
2487	vni = ntohl(vxh->vxlh_vni) >> VXLAN_HDR_VNI_SHIFT;
2488	/* Adjust to the start of the inner Ethernet frame. */
2489	m_adj(m, offset + sizeof(struct vxlan_header));
2490
2491	error = vxlan_input(vso, vni, &m, srcsa);
2492	MPASS(error != 0 || m == NULL);
2493
2494out:
2495	if (m != NULL)
2496		m_freem(m);
2497}
2498
2499static int
2500vxlan_input(struct vxlan_socket *vso, uint32_t vni, struct mbuf **m0,
2501    const struct sockaddr *sa)
2502{
2503	struct vxlan_softc *sc;
2504	struct ifnet *ifp;
2505	struct mbuf *m;
2506	struct ether_header *eh;
2507	int error;
2508
2509	sc = vxlan_socket_lookup_softc(vso, vni);
2510	if (sc == NULL)
2511		return (ENOENT);
2512
2513	ifp = sc->vxl_ifp;
2514	m = *m0;
2515	eh = mtod(m, struct ether_header *);
2516
2517	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
2518		error = ENETDOWN;
2519		goto out;
2520	} else if (ifp == m->m_pkthdr.rcvif) {
2521		/* XXX Does not catch more complex loops. */
2522		error = EDEADLK;
2523		goto out;
2524	}
2525
2526	if (sc->vxl_flags & VXLAN_FLAG_LEARN)
2527		vxlan_ftable_update(sc, sa, eh->ether_shost);
2528
2529	m_clrprotoflags(m);
2530	m->m_pkthdr.rcvif = ifp;
2531	M_SETFIB(m, ifp->if_fib);
2532
2533	error = netisr_queue_src(NETISR_ETHER, 0, m);
2534	*m0 = NULL;
2535
2536out:
2537	vxlan_release(sc);
2538	return (error);
2539}
2540
2541static void
2542vxlan_set_default_config(struct vxlan_softc *sc)
2543{
2544
2545	sc->vxl_flags |= VXLAN_FLAG_LEARN;
2546
2547	sc->vxl_vni = VXLAN_VNI_MAX;
2548	sc->vxl_ttl = IPDEFTTL;
2549
2550	if (!vxlan_tunable_int(sc, "legacy_port", vxlan_legacy_port)) {
2551		sc->vxl_src_addr.in4.sin_port = htons(VXLAN_PORT);
2552		sc->vxl_dst_addr.in4.sin_port = htons(VXLAN_PORT);
2553	} else {
2554		sc->vxl_src_addr.in4.sin_port = htons(VXLAN_LEGACY_PORT);
2555		sc->vxl_dst_addr.in4.sin_port = htons(VXLAN_LEGACY_PORT);
2556	}
2557
2558	sc->vxl_min_port = V_ipport_firstauto;
2559	sc->vxl_max_port = V_ipport_lastauto;
2560
2561	sc->vxl_ftable_max = VXLAN_FTABLE_MAX;
2562	sc->vxl_ftable_timeout = VXLAN_FTABLE_TIMEOUT;
2563}
2564
2565static int
2566vxlan_set_user_config(struct vxlan_softc *sc, struct ifvxlanparam *vxlp)
2567{
2568
2569#ifndef INET
2570	if (vxlp->vxlp_with & (VXLAN_PARAM_WITH_LOCAL_ADDR4 |
2571	    VXLAN_PARAM_WITH_REMOTE_ADDR4))
2572		return (EAFNOSUPPORT);
2573#endif
2574
2575#ifndef INET6
2576	if (vxlp->vxlp_with & (VXLAN_PARAM_WITH_LOCAL_ADDR6 |
2577	    VXLAN_PARAM_WITH_REMOTE_ADDR6))
2578		return (EAFNOSUPPORT);
2579#endif
2580
2581	if (vxlp->vxlp_with & VXLAN_PARAM_WITH_VNI) {
2582		if (vxlan_check_vni(vxlp->vxlp_vni) == 0)
2583			sc->vxl_vni = vxlp->vxlp_vni;
2584	}
2585
2586	if (vxlp->vxlp_with & VXLAN_PARAM_WITH_LOCAL_ADDR4) {
2587		sc->vxl_src_addr.in4.sin_len = sizeof(struct sockaddr_in);
2588		sc->vxl_src_addr.in4.sin_family = AF_INET;
2589		sc->vxl_src_addr.in4.sin_addr = vxlp->vxlp_local_in4;
2590	} else if (vxlp->vxlp_with & VXLAN_PARAM_WITH_LOCAL_ADDR6) {
2591		sc->vxl_src_addr.in6.sin6_len = sizeof(struct sockaddr_in6);
2592		sc->vxl_src_addr.in6.sin6_family = AF_INET6;
2593		sc->vxl_src_addr.in6.sin6_addr = vxlp->vxlp_local_in6;
2594	}
2595
2596	if (vxlp->vxlp_with & VXLAN_PARAM_WITH_REMOTE_ADDR4) {
2597		sc->vxl_dst_addr.in4.sin_len = sizeof(struct sockaddr_in);
2598		sc->vxl_dst_addr.in4.sin_family = AF_INET;
2599		sc->vxl_dst_addr.in4.sin_addr = vxlp->vxlp_remote_in4;
2600	} else if (vxlp->vxlp_with & VXLAN_PARAM_WITH_REMOTE_ADDR6) {
2601		sc->vxl_dst_addr.in6.sin6_len = sizeof(struct sockaddr_in6);
2602		sc->vxl_dst_addr.in6.sin6_family = AF_INET6;
2603		sc->vxl_dst_addr.in6.sin6_addr = vxlp->vxlp_remote_in6;
2604	}
2605
2606	if (vxlp->vxlp_with & VXLAN_PARAM_WITH_LOCAL_PORT)
2607		sc->vxl_src_addr.in4.sin_port = htons(vxlp->vxlp_local_port);
2608	if (vxlp->vxlp_with & VXLAN_PARAM_WITH_REMOTE_PORT)
2609		sc->vxl_dst_addr.in4.sin_port = htons(vxlp->vxlp_remote_port);
2610
2611	if (vxlp->vxlp_with & VXLAN_PARAM_WITH_PORT_RANGE) {
2612		if (vxlp->vxlp_min_port <= vxlp->vxlp_max_port) {
2613			sc->vxl_min_port = vxlp->vxlp_min_port;
2614			sc->vxl_max_port = vxlp->vxlp_max_port;
2615		}
2616	}
2617
2618	if (vxlp->vxlp_with & VXLAN_PARAM_WITH_MULTICAST_IF)
2619		strlcpy(sc->vxl_mc_ifname, vxlp->vxlp_mc_ifname, IFNAMSIZ);
2620
2621	if (vxlp->vxlp_with & VXLAN_PARAM_WITH_FTABLE_TIMEOUT) {
2622		if (vxlan_check_ftable_timeout(vxlp->vxlp_ftable_timeout) == 0)
2623			sc->vxl_ftable_timeout = vxlp->vxlp_ftable_timeout;
2624	}
2625
2626	if (vxlp->vxlp_with & VXLAN_PARAM_WITH_FTABLE_MAX) {
2627		if (vxlan_check_ftable_max(vxlp->vxlp_ftable_max) == 0)
2628			sc->vxl_ftable_max = vxlp->vxlp_ftable_max;
2629	}
2630
2631	if (vxlp->vxlp_with & VXLAN_PARAM_WITH_TTL) {
2632		if (vxlan_check_ttl(vxlp->vxlp_ttl) == 0)
2633			sc->vxl_ttl = vxlp->vxlp_ttl;
2634	}
2635
2636	if (vxlp->vxlp_with & VXLAN_PARAM_WITH_LEARN) {
2637		if (vxlp->vxlp_learn == 0)
2638			sc->vxl_flags &= ~VXLAN_FLAG_LEARN;
2639	}
2640
2641	return (0);
2642}
2643
2644static int
2645vxlan_clone_create(struct if_clone *ifc, int unit, caddr_t params)
2646{
2647	struct vxlan_softc *sc;
2648	struct ifnet *ifp;
2649	struct ifvxlanparam vxlp;
2650	int error;
2651
2652	sc = malloc(sizeof(struct vxlan_softc), M_VXLAN, M_WAITOK | M_ZERO);
2653	sc->vxl_unit = unit;
2654	vxlan_set_default_config(sc);
2655
2656	if (params != 0) {
2657		error = copyin(params, &vxlp, sizeof(vxlp));
2658		if (error)
2659			goto fail;
2660
2661		error = vxlan_set_user_config(sc, &vxlp);
2662		if (error)
2663			goto fail;
2664	}
2665
2666	ifp = if_alloc(IFT_ETHER);
2667	if (ifp == NULL) {
2668		error = ENOSPC;
2669		goto fail;
2670	}
2671
2672	sc->vxl_ifp = ifp;
2673	rm_init(&sc->vxl_lock, "vxlanrm");
2674	callout_init_rw(&sc->vxl_callout, &sc->vxl_lock, 0);
2675	sc->vxl_port_hash_key = arc4random();
2676	vxlan_ftable_init(sc);
2677
2678	vxlan_sysctl_setup(sc);
2679
2680	ifp->if_softc = sc;
2681	if_initname(ifp, vxlan_name, unit);
2682	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2683	ifp->if_init = vxlan_init;
2684	ifp->if_ioctl = vxlan_ioctl;
2685	ifp->if_transmit = vxlan_transmit;
2686	ifp->if_qflush = vxlan_qflush;
2687
2688	vxlan_fakeaddr(sc);
2689	ether_ifattach(ifp, sc->vxl_hwaddr);
2690
2691	ifp->if_baudrate = 0;
2692	ifp->if_hdrlen = 0;
2693
2694	return (0);
2695
2696fail:
2697	free(sc, M_VXLAN);
2698	return (error);
2699}
2700
2701static void
2702vxlan_clone_destroy(struct ifnet *ifp)
2703{
2704	struct vxlan_softc *sc;
2705
2706	sc = ifp->if_softc;
2707
2708	vxlan_teardown(sc);
2709
2710	vxlan_ftable_flush(sc, 1);
2711
2712	ether_ifdetach(ifp);
2713	if_free(ifp);
2714
2715	vxlan_ftable_fini(sc);
2716
2717	vxlan_sysctl_destroy(sc);
2718	rm_destroy(&sc->vxl_lock);
2719	free(sc, M_VXLAN);
2720}
2721
2722/* BMV: Taken from if_bridge. */
2723static uint32_t
2724vxlan_mac_hash(struct vxlan_softc *sc, const uint8_t *addr)
2725{
2726	uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = sc->vxl_ftable_hash_key;
2727
2728	b += addr[5] << 8;
2729	b += addr[4];
2730	a += addr[3] << 24;
2731	a += addr[2] << 16;
2732	a += addr[1] << 8;
2733	a += addr[0];
2734
2735/*
2736 * The following hash function is adapted from "Hash Functions" by Bob Jenkins
2737 * ("Algorithm Alley", Dr. Dobbs Journal, September 1997).
2738 */
2739#define	mix(a, b, c)							\
2740do {									\
2741	a -= b; a -= c; a ^= (c >> 13);					\
2742	b -= c; b -= a; b ^= (a << 8);					\
2743	c -= a; c -= b; c ^= (b >> 13);					\
2744	a -= b; a -= c; a ^= (c >> 12);					\
2745	b -= c; b -= a; b ^= (a << 16);					\
2746	c -= a; c -= b; c ^= (b >> 5);					\
2747	a -= b; a -= c; a ^= (c >> 3);					\
2748	b -= c; b -= a; b ^= (a << 10);					\
2749	c -= a; c -= b; c ^= (b >> 15);					\
2750} while (0)
2751
2752	mix(a, b, c);
2753
2754#undef mix
2755
2756	return (c);
2757}
2758
2759static void
2760vxlan_fakeaddr(struct vxlan_softc *sc)
2761{
2762
2763	/*
2764	 * Generate a non-multicast, locally administered address.
2765	 *
2766	 * BMV: Should we use the FreeBSD OUI range instead?
2767	 */
2768	arc4rand(sc->vxl_hwaddr, ETHER_ADDR_LEN, 1);
2769	sc->vxl_hwaddr[0] &= ~1;
2770	sc->vxl_hwaddr[0] |= 2;
2771}
2772
2773static int
2774vxlan_sockaddr_cmp(const union vxlan_sockaddr *vxladdr,
2775    const struct sockaddr *sa)
2776{
2777
2778	return (bcmp(&vxladdr->sa, sa, vxladdr->sa.sa_len));
2779}
2780
2781static void
2782vxlan_sockaddr_copy(union vxlan_sockaddr *vxladdr,
2783    const struct sockaddr *sa)
2784{
2785
2786	MPASS(sa->sa_family == AF_INET || sa->sa_family == AF_INET6);
2787	bzero(vxladdr, sizeof(*vxladdr));
2788
2789	if (sa->sa_family == AF_INET) {
2790		vxladdr->in4 = *satoconstsin(sa);
2791		vxladdr->in4.sin_len = sizeof(struct sockaddr_in);
2792	} else if (sa->sa_family == AF_INET6) {
2793		vxladdr->in6 = *satoconstsin6(sa);
2794		vxladdr->in6.sin6_len = sizeof(struct sockaddr_in6);
2795	}
2796}
2797
2798static int
2799vxlan_sockaddr_in_equal(const union vxlan_sockaddr *vxladdr,
2800    const struct sockaddr *sa)
2801{
2802	int equal;
2803
2804	if (sa->sa_family == AF_INET) {
2805		const struct in_addr *in4 = &satoconstsin(sa)->sin_addr;
2806		equal = in4->s_addr == vxladdr->in4.sin_addr.s_addr;
2807	} else if (sa->sa_family == AF_INET6) {
2808		const struct in6_addr *in6 = &satoconstsin6(sa)->sin6_addr;
2809		equal = IN6_ARE_ADDR_EQUAL(in6, &vxladdr->in6.sin6_addr);
2810	} else
2811		equal = 0;
2812
2813	return (equal);
2814}
2815
2816static void
2817vxlan_sockaddr_in_copy(union vxlan_sockaddr *vxladdr,
2818    const struct sockaddr *sa)
2819{
2820
2821	MPASS(sa->sa_family == AF_INET || sa->sa_family == AF_INET6);
2822
2823	if (sa->sa_family == AF_INET) {
2824		const struct in_addr *in4 = &satoconstsin(sa)->sin_addr;
2825		vxladdr->in4.sin_family = AF_INET;
2826		vxladdr->in4.sin_len = sizeof(struct sockaddr_in);
2827		vxladdr->in4.sin_addr = *in4;
2828	} else if (sa->sa_family == AF_INET6) {
2829		const struct in6_addr *in6 = &satoconstsin6(sa)->sin6_addr;
2830		vxladdr->in6.sin6_family = AF_INET6;
2831		vxladdr->in6.sin6_len = sizeof(struct sockaddr_in6);
2832		vxladdr->in6.sin6_addr = *in6;
2833	}
2834}
2835
2836static int
2837vxlan_sockaddr_supported(const union vxlan_sockaddr *vxladdr, int unspec)
2838{
2839	const struct sockaddr *sa;
2840	int supported;
2841
2842	sa = &vxladdr->sa;
2843	supported = 0;
2844
2845	if (sa->sa_family == AF_UNSPEC && unspec != 0) {
2846		supported = 1;
2847	} else if (sa->sa_family == AF_INET) {
2848#ifdef INET
2849		supported = 1;
2850#endif
2851	} else if (sa->sa_family == AF_INET6) {
2852#ifdef INET6
2853		supported = 1;
2854#endif
2855	}
2856
2857	return (supported);
2858}
2859
2860static int
2861vxlan_sockaddr_in_any(const union vxlan_sockaddr *vxladdr)
2862{
2863	const struct sockaddr *sa;
2864	int any;
2865
2866	sa = &vxladdr->sa;
2867
2868	if (sa->sa_family == AF_INET) {
2869		const struct in_addr *in4 = &satoconstsin(sa)->sin_addr;
2870		any = in4->s_addr == INADDR_ANY;
2871	} else if (sa->sa_family == AF_INET6) {
2872		const struct in6_addr *in6 = &satoconstsin6(sa)->sin6_addr;
2873		any = IN6_IS_ADDR_UNSPECIFIED(in6);
2874	} else
2875		any = -1;
2876
2877	return (any);
2878}
2879
2880static int
2881vxlan_sockaddr_in_multicast(const union vxlan_sockaddr *vxladdr)
2882{
2883	const struct sockaddr *sa;
2884	int mc;
2885
2886	sa = &vxladdr->sa;
2887
2888	if (sa->sa_family == AF_INET) {
2889		const struct in_addr *in4 = &satoconstsin(sa)->sin_addr;
2890		mc = IN_MULTICAST(ntohl(in4->s_addr));
2891	} else if (sa->sa_family == AF_INET6) {
2892		const struct in6_addr *in6 = &satoconstsin6(sa)->sin6_addr;
2893		mc = IN6_IS_ADDR_MULTICAST(in6);
2894	} else
2895		mc = -1;
2896
2897	return (mc);
2898}
2899
2900static int
2901vxlan_can_change_config(struct vxlan_softc *sc)
2902{
2903	struct ifnet *ifp;
2904
2905	ifp = sc->vxl_ifp;
2906	VXLAN_LOCK_ASSERT(sc);
2907
2908	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
2909		return (0);
2910	if (sc->vxl_flags & (VXLAN_FLAG_INIT | VXLAN_FLAG_TEARDOWN))
2911		return (0);
2912
2913	return (1);
2914}
2915
2916static int
2917vxlan_check_vni(uint32_t vni)
2918{
2919
2920	return (vni >= VXLAN_VNI_MAX);
2921}
2922
2923static int
2924vxlan_check_ttl(int ttl)
2925{
2926
2927	return (ttl > MAXTTL);
2928}
2929
2930static int
2931vxlan_check_ftable_timeout(uint32_t timeout)
2932{
2933
2934	return (timeout > VXLAN_FTABLE_MAX_TIMEOUT);
2935}
2936
2937static int
2938vxlan_check_ftable_max(uint32_t max)
2939{
2940
2941	return (max > VXLAN_FTABLE_MAX);
2942}
2943
2944static void
2945vxlan_sysctl_setup(struct vxlan_softc *sc)
2946{
2947	struct sysctl_ctx_list *ctx;
2948	struct sysctl_oid *node;
2949	struct vxlan_statistics *stats;
2950	char namebuf[8];
2951
2952	ctx = &sc->vxl_sysctl_ctx;
2953	stats = &sc->vxl_stats;
2954	snprintf(namebuf, sizeof(namebuf), "%d", sc->vxl_unit);
2955
2956	sysctl_ctx_init(ctx);
2957	sc->vxl_sysctl_node = SYSCTL_ADD_NODE(ctx,
2958	    SYSCTL_STATIC_CHILDREN(_net_link_vxlan), OID_AUTO, namebuf,
2959	    CTLFLAG_RD, NULL, "");
2960
2961	node = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(sc->vxl_sysctl_node),
2962	    OID_AUTO, "ftable", CTLFLAG_RD, NULL, "");
2963	SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "count",
2964	    CTLFLAG_RD, &sc->vxl_ftable_cnt, 0,
2965	    "Number of entries in fowarding table");
2966	SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "max",
2967	     CTLFLAG_RD, &sc->vxl_ftable_max, 0,
2968	    "Maximum number of entries allowed in fowarding table");
2969	SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "timeout",
2970	    CTLFLAG_RD, &sc->vxl_ftable_timeout, 0,
2971	    "Number of seconds between prunes of the forwarding table");
2972	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "dump",
2973	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE | CTLFLAG_SKIP,
2974	    sc, 0, vxlan_ftable_sysctl_dump, "A",
2975	    "Dump the forwarding table entries");
2976
2977	node = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(sc->vxl_sysctl_node),
2978	    OID_AUTO, "stats", CTLFLAG_RD, NULL, "");
2979	SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(node), OID_AUTO,
2980	    "ftable_nospace", CTLFLAG_RD, &stats->ftable_nospace, 0,
2981	    "Fowarding table reached maximum entries");
2982	SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(node), OID_AUTO,
2983	    "ftable_lock_upgrade_failed", CTLFLAG_RD,
2984	    &stats->ftable_lock_upgrade_failed, 0,
2985	    "Forwarding table update required lock upgrade");
2986}
2987
2988static void
2989vxlan_sysctl_destroy(struct vxlan_softc *sc)
2990{
2991
2992	sysctl_ctx_free(&sc->vxl_sysctl_ctx);
2993	sc->vxl_sysctl_node = NULL;
2994}
2995
2996static int
2997vxlan_tunable_int(struct vxlan_softc *sc, const char *knob, int def)
2998{
2999	char path[64];
3000
3001	snprintf(path, sizeof(path), "net.link.vxlan.%d.%s",
3002	    sc->vxl_unit, knob);
3003	TUNABLE_INT_FETCH(path, &def);
3004
3005	return (def);
3006}
3007
3008static void
3009vxlan_ifdetach_event(void *arg __unused, struct ifnet *ifp)
3010{
3011	struct vxlan_softc_head list;
3012	struct vxlan_socket *vso;
3013	struct vxlan_softc *sc, *tsc;
3014
3015	LIST_INIT(&list);
3016
3017	if (ifp->if_flags & IFF_RENAMING)
3018		return;
3019	if ((ifp->if_flags & IFF_MULTICAST) == 0)
3020		return;
3021
3022	mtx_lock(&vxlan_list_mtx);
3023	LIST_FOREACH(vso, &vxlan_socket_list, vxlso_entry)
3024		vxlan_socket_ifdetach(vso, ifp, &list);
3025	mtx_unlock(&vxlan_list_mtx);
3026
3027	LIST_FOREACH_SAFE(sc, &list, vxl_ifdetach_list, tsc) {
3028		LIST_REMOVE(sc, vxl_ifdetach_list);
3029
3030		VXLAN_WLOCK(sc);
3031		if (sc->vxl_flags & VXLAN_FLAG_INIT)
3032			vxlan_init_wait(sc);
3033		vxlan_teardown_locked(sc);
3034	}
3035}
3036
3037static void
3038vxlan_load(void)
3039{
3040
3041	mtx_init(&vxlan_list_mtx, "vxlan list", NULL, MTX_DEF);
3042	LIST_INIT(&vxlan_socket_list);
3043	vxlan_ifdetach_event_tag = EVENTHANDLER_REGISTER(ifnet_departure_event,
3044	    vxlan_ifdetach_event, NULL, EVENTHANDLER_PRI_ANY);
3045	vxlan_cloner = if_clone_simple(vxlan_name, vxlan_clone_create,
3046	    vxlan_clone_destroy, 0);
3047}
3048
3049static void
3050vxlan_unload(void)
3051{
3052
3053	EVENTHANDLER_DEREGISTER(ifnet_departure_event,
3054	    vxlan_ifdetach_event_tag);
3055	if_clone_detach(vxlan_cloner);
3056	mtx_destroy(&vxlan_list_mtx);
3057	MPASS(LIST_EMPTY(&vxlan_socket_list));
3058}
3059
3060static int
3061vxlan_modevent(module_t mod, int type, void *unused)
3062{
3063	int error;
3064
3065	error = 0;
3066
3067	switch (type) {
3068	case MOD_LOAD:
3069		vxlan_load();
3070		break;
3071	case MOD_UNLOAD:
3072		vxlan_unload();
3073		break;
3074	default:
3075		error = ENOTSUP;
3076		break;
3077	}
3078
3079	return (error);
3080}
3081
3082static moduledata_t vxlan_mod = {
3083	"if_vxlan",
3084	vxlan_modevent,
3085	0
3086};
3087
3088DECLARE_MODULE(if_vxlan, vxlan_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
3089MODULE_VERSION(if_vxlan, 1);
3090