1/*	$OpenBSD: if_gre.c,v 1.178 2023/12/23 10:52:54 bluhm Exp $ */
2/*	$NetBSD: if_gre.c,v 1.9 1999/10/25 19:18:11 drochner Exp $ */
3
4/*
5 * Copyright (c) 1998 The NetBSD Foundation, Inc.
6 * All rights reserved.
7 *
8 * This code is derived from software contributed to The NetBSD Foundation
9 * by Heiko W.Rupp <hwr@pilhuhn.de>
10 *
11 * IPv6-over-GRE contributed by Gert Doering <gert@greenie.muc.de>
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 *    notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 *    notice, this list of conditions and the following disclaimer in the
20 *    documentation and/or other materials provided with the distribution.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
24 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
26 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32 * POSSIBILITY OF SUCH DAMAGE.
33 */
34
35/*
36 * Encapsulate L3 protocols into IP, per RFC 1701 and 1702.
37 * See gre(4) for more details.
38 * Also supported: IP in IP encapsulation (proto 55) per RFC 2004.
39 */
40
41#include "bpfilter.h"
42#include "pf.h"
43
44#include <sys/param.h>
45#include <sys/mbuf.h>
46#include <sys/socket.h>
47#include <sys/sockio.h>
48#include <sys/kernel.h>
49#include <sys/systm.h>
50#include <sys/errno.h>
51#include <sys/timeout.h>
52#include <sys/queue.h>
53#include <sys/tree.h>
54#include <sys/pool.h>
55#include <sys/rwlock.h>
56
57#include <crypto/siphash.h>
58
59#include <net/if.h>
60#include <net/if_var.h>
61#include <net/if_types.h>
62#include <net/if_media.h>
63#include <net/route.h>
64
65#include <netinet/in.h>
66#include <netinet/in_var.h>
67#include <netinet/if_ether.h>
68#include <netinet/ip.h>
69#include <netinet/ip_var.h>
70#include <netinet/ip_ecn.h>
71
72#ifdef INET6
73#include <netinet/ip6.h>
74#include <netinet6/ip6_var.h>
75#include <netinet6/in6_var.h>
76#endif
77
78#ifdef PIPEX
79#include <net/pipex.h>
80#endif
81
82#ifdef MPLS
83#include <netmpls/mpls.h>
84#endif /* MPLS */
85
86#if NBPFILTER > 0
87#include <net/bpf.h>
88#endif
89
90#if NPF > 0
91#include <net/pfvar.h>
92#endif
93
94#include <net/if_gre.h>
95
96#include <netinet/ip_gre.h>
97#include <sys/sysctl.h>
98
99/* for nvgre bridge shizz */
100#include <net/if_bridge.h>
101#include <net/if_etherbridge.h>
102
103/*
104 * packet formats
105 */
106struct gre_header {
107	uint16_t		gre_flags;
108#define GRE_CP				0x8000  /* Checksum Present */
109#define GRE_KP				0x2000  /* Key Present */
110#define GRE_SP				0x1000  /* Sequence Present */
111
112#define GRE_VERS_MASK			0x0007
113#define GRE_VERS_0			0x0000
114#define GRE_VERS_1			0x0001
115
116	uint16_t		gre_proto;
117} __packed __aligned(4);
118
119struct gre_h_cksum {
120	uint16_t		gre_cksum;
121	uint16_t		gre_reserved1;
122} __packed __aligned(4);
123
124struct gre_h_key {
125	uint32_t		gre_key;
126} __packed __aligned(4);
127
128#define GRE_EOIP		0x6400
129
130struct gre_h_key_eoip {
131	uint16_t		eoip_len;	/* network order */
132	uint16_t		eoip_tunnel_id;	/* little endian */
133} __packed __aligned(4);
134
135#define NVGRE_VSID_RES_MIN	0x000000 /* reserved for future use */
136#define NVGRE_VSID_RES_MAX	0x000fff
137#define NVGRE_VSID_NVE2NVE	0xffffff /* vendor specific NVE-to-NVE comms */
138
139struct gre_h_seq {
140	uint32_t		gre_seq;
141} __packed __aligned(4);
142
143struct gre_h_wccp {
144	uint8_t			wccp_flags;
145	uint8_t			service_id;
146	uint8_t			alt_bucket;
147	uint8_t			pri_bucket;
148} __packed __aligned(4);
149
150#define GRE_WCCP 0x883e
151
152#define GRE_HDRLEN (sizeof(struct ip) + sizeof(struct gre_header))
153
154/*
155 * GRE tunnel metadata
156 */
157
158#define GRE_KA_NONE		0
159#define GRE_KA_DOWN		1
160#define GRE_KA_HOLD		2
161#define GRE_KA_UP		3
162
163union gre_addr {
164	struct in_addr		in4;
165	struct in6_addr		in6;
166};
167
168static inline int
169		gre_ip_cmp(int, const union gre_addr *,
170		    const union gre_addr *);
171
172#define GRE_KEY_MIN		0x00000000U
173#define GRE_KEY_MAX		0xffffffffU
174#define GRE_KEY_SHIFT		0
175
176#define GRE_KEY_ENTROPY_MIN	0x00000000U
177#define GRE_KEY_ENTROPY_MAX	0x00ffffffU
178#define GRE_KEY_ENTROPY_SHIFT	8
179
180struct gre_tunnel {
181	uint32_t		t_key_mask;
182#define GRE_KEY_NONE			htonl(0x00000000U)
183#define GRE_KEY_ENTROPY			htonl(0xffffff00U)
184#define GRE_KEY_MASK			htonl(0xffffffffU)
185	uint32_t		t_key;
186
187	u_int			t_rtableid;
188	union gre_addr		t_src;
189#define t_src4	t_src.in4
190#define t_src6	t_src.in6
191	union gre_addr		t_dst;
192#define t_dst4	t_dst.in4
193#define t_dst6	t_dst.in6
194	int			t_ttl;
195	int			t_txhprio;
196	int			t_rxhprio;
197	int			t_ecn;
198	uint16_t		t_df;
199	sa_family_t		t_af;
200};
201
202static int
203		gre_cmp_src(const struct gre_tunnel *,
204		    const struct gre_tunnel *);
205static int
206		gre_cmp(const struct gre_tunnel *, const struct gre_tunnel *);
207
208static int	gre_set_tunnel(struct gre_tunnel *, struct if_laddrreq *, int);
209static int	gre_get_tunnel(struct gre_tunnel *, struct if_laddrreq *);
210static int	gre_del_tunnel(struct gre_tunnel *);
211
212static int	gre_set_vnetid(struct gre_tunnel *, struct ifreq *);
213static int	gre_get_vnetid(struct gre_tunnel *, struct ifreq *);
214static int	gre_del_vnetid(struct gre_tunnel *);
215
216static int	gre_set_vnetflowid(struct gre_tunnel *, struct ifreq *);
217static int	gre_get_vnetflowid(struct gre_tunnel *, struct ifreq *);
218
219static struct mbuf *
220		gre_encap_dst(const struct gre_tunnel *, const union gre_addr *,
221		    struct mbuf *, uint16_t, uint8_t, uint8_t);
222#define gre_encap(_t, _m, _p, _ttl, _tos) \
223		gre_encap_dst((_t), &(_t)->t_dst, (_m), (_p), (_ttl), (_tos))
224
225static struct mbuf *
226		gre_encap_dst_ip(const struct gre_tunnel *,
227		    const union gre_addr *, struct mbuf *, uint8_t, uint8_t);
228#define gre_encap_ip(_t, _m, _ttl, _tos) \
229		gre_encap_dst_ip((_t), &(_t)->t_dst, (_m), (_ttl), (_tos))
230
231static int
232		gre_ip_output(const struct gre_tunnel *, struct mbuf *);
233
234static int	gre_tunnel_ioctl(struct ifnet *, struct gre_tunnel *,
235		    u_long, void *);
236
237static uint8_t	gre_l2_tos(const struct gre_tunnel *, const struct mbuf *);
238static uint8_t	gre_l3_tos(const struct gre_tunnel *,
239		    const struct mbuf *, uint8_t);
240
241/*
242 * layer 3 GRE tunnels
243 */
244
245struct gre_softc {
246	struct gre_tunnel	sc_tunnel; /* must be first */
247	TAILQ_ENTRY(gre_softc)	sc_entry;
248
249	struct ifnet		sc_if;
250
251	struct timeout		sc_ka_send;
252	struct timeout		sc_ka_hold;
253
254	unsigned int		sc_ka_state;
255	unsigned int		sc_ka_timeo;
256	unsigned int		sc_ka_count;
257
258	unsigned int		sc_ka_holdmax;
259	unsigned int		sc_ka_holdcnt;
260
261	SIPHASH_KEY		sc_ka_key;
262	uint32_t		sc_ka_bias;
263	int			sc_ka_recvtm;
264};
265
266TAILQ_HEAD(gre_list, gre_softc);
267
268struct gre_keepalive {
269	uint32_t		gk_uptime;
270	uint32_t		gk_random;
271	uint8_t			gk_digest[SIPHASH_DIGEST_LENGTH];
272} __packed __aligned(4);
273
274static int	gre_clone_create(struct if_clone *, int);
275static int	gre_clone_destroy(struct ifnet *);
276
277struct if_clone gre_cloner =
278    IF_CLONE_INITIALIZER("gre", gre_clone_create, gre_clone_destroy);
279
280/* protected by NET_LOCK */
281struct gre_list gre_list = TAILQ_HEAD_INITIALIZER(gre_list);
282
283static int	gre_output(struct ifnet *, struct mbuf *, struct sockaddr *,
284		    struct rtentry *);
285static void	gre_start(struct ifnet *);
286static int	gre_ioctl(struct ifnet *, u_long, caddr_t);
287
288static int	gre_up(struct gre_softc *);
289static int	gre_down(struct gre_softc *);
290static void	gre_link_state(struct ifnet *, unsigned int);
291
292static int	gre_input_key(struct mbuf **, int *, int, int, uint8_t,
293		    struct gre_tunnel *);
294
295static struct mbuf *
296		gre_ipv4_patch(const struct gre_tunnel *, struct mbuf *,
297		    uint8_t *, uint8_t);
298#ifdef INET6
299static struct mbuf *
300		gre_ipv6_patch(const struct gre_tunnel *, struct mbuf *,
301		    uint8_t *, uint8_t);
302#endif
303#ifdef MPLS
304static struct mbuf *
305		gre_mpls_patch(const struct gre_tunnel *, struct mbuf *,
306		    uint8_t *, uint8_t);
307#endif
308static void	gre_keepalive_send(void *);
309static void	gre_keepalive_recv(struct ifnet *ifp, struct mbuf *);
310static void	gre_keepalive_hold(void *);
311
312static struct mbuf *
313		gre_l3_encap_dst(const struct gre_tunnel *, const void *,
314		    struct mbuf *m, sa_family_t);
315
316#define gre_l3_encap(_t, _m, _af) \
317		gre_l3_encap_dst((_t), &(_t)->t_dst, (_m), (_af))
318
319struct mgre_softc {
320	struct gre_tunnel	sc_tunnel; /* must be first */
321	RBT_ENTRY(mgre_softc)	sc_entry;
322
323	struct ifnet		sc_if;
324};
325
326RBT_HEAD(mgre_tree, mgre_softc);
327
328static inline int
329		mgre_cmp(const struct mgre_softc *, const struct mgre_softc *);
330
331RBT_PROTOTYPE(mgre_tree, mgre_softc, sc_entry, mgre_cmp);
332
333static int	mgre_clone_create(struct if_clone *, int);
334static int	mgre_clone_destroy(struct ifnet *);
335
336struct if_clone mgre_cloner =
337    IF_CLONE_INITIALIZER("mgre", mgre_clone_create, mgre_clone_destroy);
338
339static void	mgre_rtrequest(struct ifnet *, int, struct rtentry *);
340static int	mgre_output(struct ifnet *, struct mbuf *, struct sockaddr *,
341		    struct rtentry *);
342static void	mgre_start(struct ifnet *);
343static int	mgre_ioctl(struct ifnet *, u_long, caddr_t);
344
345static int	mgre_set_tunnel(struct mgre_softc *, struct if_laddrreq *);
346static int	mgre_get_tunnel(struct mgre_softc *, struct if_laddrreq *);
347static int	mgre_up(struct mgre_softc *);
348static int	mgre_down(struct mgre_softc *);
349
350/* protected by NET_LOCK */
351struct mgre_tree mgre_tree = RBT_INITIALIZER();
352
353/*
354 * Ethernet GRE tunnels
355 */
356
357static struct mbuf *
358		gre_ether_align(struct mbuf *, int);
359
360struct egre_softc {
361	struct gre_tunnel	sc_tunnel; /* must be first */
362	RBT_ENTRY(egre_softc)	sc_entry;
363
364	struct arpcom		sc_ac;
365	struct ifmedia		sc_media;
366};
367
368RBT_HEAD(egre_tree, egre_softc);
369
370static inline int
371		egre_cmp(const struct egre_softc *, const struct egre_softc *);
372
373RBT_PROTOTYPE(egre_tree, egre_softc, sc_entry, egre_cmp);
374
375static int	egre_clone_create(struct if_clone *, int);
376static int	egre_clone_destroy(struct ifnet *);
377
378static void	egre_start(struct ifnet *);
379static int	egre_ioctl(struct ifnet *, u_long, caddr_t);
380static int	egre_media_change(struct ifnet *);
381static void	egre_media_status(struct ifnet *, struct ifmediareq *);
382
383static int	egre_up(struct egre_softc *);
384static int	egre_down(struct egre_softc *);
385
386static int	egre_input(const struct gre_tunnel *, struct mbuf *, int,
387		    uint8_t);
388struct if_clone egre_cloner =
389    IF_CLONE_INITIALIZER("egre", egre_clone_create, egre_clone_destroy);
390
391/* protected by NET_LOCK */
392struct egre_tree egre_tree = RBT_INITIALIZER();
393
394/*
395 * Network Virtualisation Using Generic Routing Encapsulation (NVGRE)
396 */
397
398struct nvgre_softc {
399	struct gre_tunnel	 sc_tunnel; /* must be first */
400	unsigned int		 sc_ifp0;
401	RBT_ENTRY(nvgre_softc)	 sc_uentry;
402	RBT_ENTRY(nvgre_softc)	 sc_mentry;
403
404	struct arpcom		 sc_ac;
405	struct ifmedia		 sc_media;
406
407	struct mbuf_queue	 sc_send_list;
408	struct task		 sc_send_task;
409
410	void			*sc_inm;
411	struct task		 sc_ltask;
412	struct task		 sc_dtask;
413
414	struct etherbridge	 sc_eb;
415};
416
417RBT_HEAD(nvgre_ucast_tree, nvgre_softc);
418RBT_HEAD(nvgre_mcast_tree, nvgre_softc);
419
420static inline int
421		nvgre_cmp_ucast(const struct nvgre_softc *,
422		    const struct nvgre_softc *);
423static int
424		nvgre_cmp_mcast(const struct gre_tunnel *,
425		    const union gre_addr *, unsigned int,
426		    const struct gre_tunnel *, const union gre_addr *,
427		    unsigned int);
428static inline int
429		nvgre_cmp_mcast_sc(const struct nvgre_softc *,
430		    const struct nvgre_softc *);
431
432RBT_PROTOTYPE(nvgre_ucast_tree, nvgre_softc, sc_uentry, nvgre_cmp_ucast);
433RBT_PROTOTYPE(nvgre_mcast_tree, nvgre_softc, sc_mentry, nvgre_cmp_mcast_sc);
434
435static int	nvgre_clone_create(struct if_clone *, int);
436static int	nvgre_clone_destroy(struct ifnet *);
437
438static void	nvgre_start(struct ifnet *);
439static int	nvgre_ioctl(struct ifnet *, u_long, caddr_t);
440
441static int	nvgre_up(struct nvgre_softc *);
442static int	nvgre_down(struct nvgre_softc *);
443static int	nvgre_set_parent(struct nvgre_softc *, const char *);
444static void	nvgre_link_change(void *);
445static void	nvgre_detach(void *);
446
447static int	nvgre_input(const struct gre_tunnel *, struct mbuf *, int,
448		    uint8_t);
449static void	nvgre_send(void *);
450
451static int	nvgre_add_addr(struct nvgre_softc *, const struct ifbareq *);
452static int	nvgre_del_addr(struct nvgre_softc *, const struct ifbareq *);
453
454static int	 nvgre_eb_port_eq(void *, void *, void *);
455static void	*nvgre_eb_port_take(void *, void *);
456static void	 nvgre_eb_port_rele(void *, void *);
457static size_t	 nvgre_eb_port_ifname(void *, char *, size_t, void *);
458static void	 nvgre_eb_port_sa(void *, struct sockaddr_storage *, void *);
459
460static const struct etherbridge_ops nvgre_etherbridge_ops = {
461	nvgre_eb_port_eq,
462	nvgre_eb_port_take,
463	nvgre_eb_port_rele,
464	nvgre_eb_port_ifname,
465	nvgre_eb_port_sa,
466};
467
468struct if_clone nvgre_cloner =
469    IF_CLONE_INITIALIZER("nvgre", nvgre_clone_create, nvgre_clone_destroy);
470
471struct pool nvgre_endpoint_pool;
472
473/* protected by NET_LOCK */
474struct nvgre_ucast_tree nvgre_ucast_tree = RBT_INITIALIZER();
475struct nvgre_mcast_tree nvgre_mcast_tree = RBT_INITIALIZER();
476
477/*
478 * MikroTik Ethernet over IP protocol (eoip)
479 */
480
481struct eoip_softc {
482	struct gre_tunnel	sc_tunnel; /* must be first */
483	uint16_t		sc_tunnel_id;
484	RBT_ENTRY(eoip_softc)	sc_entry;
485
486	struct arpcom		sc_ac;
487	struct ifmedia		sc_media;
488
489	struct timeout		sc_ka_send;
490	struct timeout		sc_ka_hold;
491
492	unsigned int		sc_ka_state;
493	unsigned int		sc_ka_timeo;
494	unsigned int		sc_ka_count;
495
496	unsigned int		sc_ka_holdmax;
497	unsigned int		sc_ka_holdcnt;
498};
499
500RBT_HEAD(eoip_tree, eoip_softc);
501
502static inline int
503		eoip_cmp(const struct eoip_softc *, const struct eoip_softc *);
504
505RBT_PROTOTYPE(eoip_tree, eoip_softc, sc_entry, eoip_cmp);
506
507static int	eoip_clone_create(struct if_clone *, int);
508static int	eoip_clone_destroy(struct ifnet *);
509
510static void	eoip_start(struct ifnet *);
511static int	eoip_ioctl(struct ifnet *, u_long, caddr_t);
512
513static void	eoip_keepalive_send(void *);
514static void	eoip_keepalive_recv(struct eoip_softc *);
515static void	eoip_keepalive_hold(void *);
516
517static int	eoip_up(struct eoip_softc *);
518static int	eoip_down(struct eoip_softc *);
519
520static struct mbuf *
521		eoip_encap(struct eoip_softc *, struct mbuf *, uint8_t);
522
523static struct mbuf *
524		eoip_input(struct gre_tunnel *, struct mbuf *,
525		    const struct gre_header *, uint8_t, int);
526struct if_clone eoip_cloner =
527    IF_CLONE_INITIALIZER("eoip", eoip_clone_create, eoip_clone_destroy);
528
529/* protected by NET_LOCK */
530struct eoip_tree eoip_tree = RBT_INITIALIZER();
531
532/*
533 * It is not easy to calculate the right value for a GRE MTU.
534 * We leave this task to the admin and use the same default that
535 * other vendors use.
536 */
537#define GREMTU 1476
538
539/*
540 * We can control the acceptance of GRE and MobileIP packets by
541 * altering the sysctl net.inet.gre.allow values
542 * respectively. Zero means drop them, all else is acceptance.  We can also
543 * control acceptance of WCCPv1-style GRE packets through the
544 * net.inet.gre.wccp value, but be aware it depends upon normal GRE being
545 * allowed as well.
546 *
547 */
548int gre_allow = 0;
549int gre_wccp = 0;
550
551void
552greattach(int n)
553{
554	if_clone_attach(&gre_cloner);
555	if_clone_attach(&mgre_cloner);
556	if_clone_attach(&egre_cloner);
557	if_clone_attach(&nvgre_cloner);
558	if_clone_attach(&eoip_cloner);
559}
560
561static int
562gre_clone_create(struct if_clone *ifc, int unit)
563{
564	struct gre_softc *sc;
565	struct ifnet *ifp;
566
567	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
568	snprintf(sc->sc_if.if_xname, sizeof sc->sc_if.if_xname, "%s%d",
569	    ifc->ifc_name, unit);
570
571	ifp = &sc->sc_if;
572	ifp->if_softc = sc;
573	ifp->if_type = IFT_TUNNEL;
574	ifp->if_hdrlen = GRE_HDRLEN;
575	ifp->if_mtu = GREMTU;
576	ifp->if_flags = IFF_POINTOPOINT|IFF_MULTICAST;
577	ifp->if_xflags = IFXF_CLONED;
578	ifp->if_bpf_mtap = p2p_bpf_mtap;
579	ifp->if_input = p2p_input;
580	ifp->if_output = gre_output;
581	ifp->if_start = gre_start;
582	ifp->if_ioctl = gre_ioctl;
583	ifp->if_rtrequest = p2p_rtrequest;
584
585	sc->sc_tunnel.t_ttl = ip_defttl;
586	sc->sc_tunnel.t_txhprio = IF_HDRPRIO_PAYLOAD;
587	sc->sc_tunnel.t_rxhprio = IF_HDRPRIO_PACKET;
588	sc->sc_tunnel.t_df = htons(0);
589	sc->sc_tunnel.t_ecn = ECN_ALLOWED;
590
591	timeout_set(&sc->sc_ka_send, gre_keepalive_send, sc);
592	timeout_set_proc(&sc->sc_ka_hold, gre_keepalive_hold, sc);
593	sc->sc_ka_state = GRE_KA_NONE;
594
595	if_counters_alloc(ifp);
596	if_attach(ifp);
597	if_alloc_sadl(ifp);
598
599#if NBPFILTER > 0
600	bpfattach(&ifp->if_bpf, ifp, DLT_LOOP, sizeof(uint32_t));
601#endif
602
603	ifp->if_llprio = IFQ_TOS2PRIO(IPTOS_PREC_INTERNETCONTROL);
604
605	NET_LOCK();
606	TAILQ_INSERT_TAIL(&gre_list, sc, sc_entry);
607	NET_UNLOCK();
608
609	return (0);
610}
611
612static int
613gre_clone_destroy(struct ifnet *ifp)
614{
615	struct gre_softc *sc = ifp->if_softc;
616
617	NET_LOCK();
618	if (ISSET(ifp->if_flags, IFF_RUNNING))
619		gre_down(sc);
620
621	TAILQ_REMOVE(&gre_list, sc, sc_entry);
622	NET_UNLOCK();
623
624	if_detach(ifp);
625
626	free(sc, M_DEVBUF, sizeof(*sc));
627
628	return (0);
629}
630
631static int
632mgre_clone_create(struct if_clone *ifc, int unit)
633{
634	struct mgre_softc *sc;
635	struct ifnet *ifp;
636
637	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
638	ifp = &sc->sc_if;
639
640	snprintf(ifp->if_xname, sizeof(ifp->if_xname),
641	    "%s%d", ifc->ifc_name, unit);
642
643	ifp->if_softc = sc;
644	ifp->if_type = IFT_L3IPVLAN;
645	ifp->if_hdrlen = GRE_HDRLEN;
646	ifp->if_mtu = GREMTU;
647	ifp->if_flags = IFF_MULTICAST|IFF_SIMPLEX;
648	ifp->if_xflags = IFXF_CLONED;
649	ifp->if_bpf_mtap = p2p_bpf_mtap;
650	ifp->if_input = p2p_input;
651	ifp->if_rtrequest = mgre_rtrequest;
652	ifp->if_output = mgre_output;
653	ifp->if_start = mgre_start;
654	ifp->if_ioctl = mgre_ioctl;
655
656	sc->sc_tunnel.t_ttl = ip_defttl;
657	sc->sc_tunnel.t_txhprio = IF_HDRPRIO_PAYLOAD;
658	sc->sc_tunnel.t_rxhprio = IF_HDRPRIO_PACKET;
659	sc->sc_tunnel.t_df = htons(0);
660	sc->sc_tunnel.t_ecn = ECN_ALLOWED;
661
662	if_counters_alloc(ifp);
663	if_attach(ifp);
664	if_alloc_sadl(ifp);
665
666#if NBPFILTER > 0
667	bpfattach(&ifp->if_bpf, ifp, DLT_LOOP, sizeof(uint32_t));
668#endif
669
670	return (0);
671}
672
673static int
674mgre_clone_destroy(struct ifnet *ifp)
675{
676	struct mgre_softc *sc = ifp->if_softc;
677
678	NET_LOCK();
679	if (ISSET(ifp->if_flags, IFF_RUNNING))
680		mgre_down(sc);
681	NET_UNLOCK();
682
683	if_detach(ifp);
684
685	free(sc, M_DEVBUF, sizeof(*sc));
686
687	return (0);
688}
689
690static int
691egre_clone_create(struct if_clone *ifc, int unit)
692{
693	struct egre_softc *sc;
694	struct ifnet *ifp;
695
696	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
697	ifp = &sc->sc_ac.ac_if;
698
699	snprintf(ifp->if_xname, sizeof(ifp->if_xname), "%s%d",
700	    ifc->ifc_name, unit);
701
702	ifp->if_softc = sc;
703	ifp->if_hardmtu = ETHER_MAX_HARDMTU_LEN;
704	ifp->if_ioctl = egre_ioctl;
705	ifp->if_start = egre_start;
706	ifp->if_xflags = IFXF_CLONED;
707	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
708	ether_fakeaddr(ifp);
709
710	sc->sc_tunnel.t_ttl = ip_defttl;
711	sc->sc_tunnel.t_txhprio = 0;
712	sc->sc_tunnel.t_rxhprio = IF_HDRPRIO_PACKET;
713	sc->sc_tunnel.t_df = htons(0);
714
715	ifmedia_init(&sc->sc_media, 0, egre_media_change, egre_media_status);
716	ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL);
717	ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO);
718
719	if_counters_alloc(ifp);
720	if_attach(ifp);
721	ether_ifattach(ifp);
722
723	return (0);
724}
725
726static int
727egre_clone_destroy(struct ifnet *ifp)
728{
729	struct egre_softc *sc = ifp->if_softc;
730
731	NET_LOCK();
732	if (ISSET(ifp->if_flags, IFF_RUNNING))
733		egre_down(sc);
734	NET_UNLOCK();
735
736	ifmedia_delete_instance(&sc->sc_media, IFM_INST_ANY);
737	ether_ifdetach(ifp);
738	if_detach(ifp);
739
740	free(sc, M_DEVBUF, sizeof(*sc));
741
742	return (0);
743}
744
745static int
746nvgre_clone_create(struct if_clone *ifc, int unit)
747{
748	struct nvgre_softc *sc;
749	struct ifnet *ifp;
750	struct gre_tunnel *tunnel;
751	int error;
752
753	if (nvgre_endpoint_pool.pr_size == 0) {
754		pool_init(&nvgre_endpoint_pool, sizeof(union gre_addr),
755		    0, IPL_SOFTNET, 0, "nvgreep", NULL);
756	}
757
758	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
759	ifp = &sc->sc_ac.ac_if;
760
761	snprintf(ifp->if_xname, sizeof(ifp->if_xname), "%s%d",
762	    ifc->ifc_name, unit);
763
764	error = etherbridge_init(&sc->sc_eb, ifp->if_xname,
765	    &nvgre_etherbridge_ops, sc);
766	if (error != 0) {
767		free(sc, M_DEVBUF, sizeof(*sc));
768		return (error);
769	}
770
771	ifp->if_softc = sc;
772	ifp->if_hardmtu = ETHER_MAX_HARDMTU_LEN;
773	ifp->if_ioctl = nvgre_ioctl;
774	ifp->if_start = nvgre_start;
775	ifp->if_xflags = IFXF_CLONED;
776	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
777	ether_fakeaddr(ifp);
778
779	tunnel = &sc->sc_tunnel;
780	tunnel->t_ttl = IP_DEFAULT_MULTICAST_TTL;
781	tunnel->t_txhprio = 0;
782	sc->sc_tunnel.t_rxhprio = IF_HDRPRIO_PACKET;
783	tunnel->t_df = htons(IP_DF);
784	tunnel->t_key_mask = GRE_KEY_ENTROPY;
785	tunnel->t_key = htonl((NVGRE_VSID_RES_MAX + 1) <<
786	    GRE_KEY_ENTROPY_SHIFT);
787
788	mq_init(&sc->sc_send_list, IFQ_MAXLEN * 2, IPL_SOFTNET);
789	task_set(&sc->sc_send_task, nvgre_send, sc);
790	task_set(&sc->sc_ltask, nvgre_link_change, sc);
791	task_set(&sc->sc_dtask, nvgre_detach, sc);
792
793	ifmedia_init(&sc->sc_media, 0, egre_media_change, egre_media_status);
794	ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL);
795	ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO);
796
797	if_counters_alloc(ifp);
798	if_attach(ifp);
799	ether_ifattach(ifp);
800
801	return (0);
802}
803
804static int
805nvgre_clone_destroy(struct ifnet *ifp)
806{
807	struct nvgre_softc *sc = ifp->if_softc;
808
809	NET_LOCK();
810	if (ISSET(ifp->if_flags, IFF_RUNNING))
811		nvgre_down(sc);
812	NET_UNLOCK();
813
814	etherbridge_destroy(&sc->sc_eb);
815
816	ifmedia_delete_instance(&sc->sc_media, IFM_INST_ANY);
817	ether_ifdetach(ifp);
818	if_detach(ifp);
819
820	free(sc, M_DEVBUF, sizeof(*sc));
821
822	return (0);
823}
824
825static int
826eoip_clone_create(struct if_clone *ifc, int unit)
827{
828	struct eoip_softc *sc;
829	struct ifnet *ifp;
830
831	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
832	ifp = &sc->sc_ac.ac_if;
833
834	snprintf(ifp->if_xname, sizeof(ifp->if_xname), "%s%d",
835	    ifc->ifc_name, unit);
836
837	ifp->if_softc = sc;
838	ifp->if_hardmtu = ETHER_MAX_HARDMTU_LEN;
839	ifp->if_ioctl = eoip_ioctl;
840	ifp->if_start = eoip_start;
841	ifp->if_xflags = IFXF_CLONED;
842	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
843	ether_fakeaddr(ifp);
844
845	sc->sc_tunnel.t_ttl = ip_defttl;
846	sc->sc_tunnel.t_txhprio = 0;
847	sc->sc_tunnel.t_rxhprio = IF_HDRPRIO_PACKET;
848	sc->sc_tunnel.t_df = htons(0);
849
850	sc->sc_ka_timeo = 10;
851	sc->sc_ka_count = 10;
852
853	timeout_set(&sc->sc_ka_send, eoip_keepalive_send, sc);
854	timeout_set_proc(&sc->sc_ka_hold, eoip_keepalive_hold, sc);
855	sc->sc_ka_state = GRE_KA_DOWN;
856
857	ifmedia_init(&sc->sc_media, 0, egre_media_change, egre_media_status);
858	ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL);
859	ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO);
860
861	if_counters_alloc(ifp);
862	if_attach(ifp);
863	ether_ifattach(ifp);
864
865	return (0);
866}
867
868static int
869eoip_clone_destroy(struct ifnet *ifp)
870{
871	struct eoip_softc *sc = ifp->if_softc;
872
873	NET_LOCK();
874	if (ISSET(ifp->if_flags, IFF_RUNNING))
875		eoip_down(sc);
876	NET_UNLOCK();
877
878	ifmedia_delete_instance(&sc->sc_media, IFM_INST_ANY);
879	ether_ifdetach(ifp);
880	if_detach(ifp);
881
882	free(sc, M_DEVBUF, sizeof(*sc));
883
884	return (0);
885}
886
887int
888gre_input(struct mbuf **mp, int *offp, int type, int af)
889{
890	struct mbuf *m = *mp;
891	struct gre_tunnel key;
892	struct ip *ip;
893
894	ip = mtod(m, struct ip *);
895
896	/* XXX check if ip_src is sane for nvgre? */
897
898	key.t_af = AF_INET;
899	key.t_src4 = ip->ip_dst;
900	key.t_dst4 = ip->ip_src;
901
902	if (gre_input_key(mp, offp, type, af, ip->ip_tos, &key) == -1)
903		return (rip_input(mp, offp, type, af));
904
905	return (IPPROTO_DONE);
906}
907
908#ifdef INET6
909int
910gre_input6(struct mbuf **mp, int *offp, int type, int af)
911{
912	struct mbuf *m = *mp;
913	struct gre_tunnel key;
914	struct ip6_hdr *ip6;
915	uint32_t flow;
916
917	ip6 = mtod(m, struct ip6_hdr *);
918
919	/* XXX check if ip6_src is sane for nvgre? */
920
921	key.t_af = AF_INET6;
922	key.t_src6 = ip6->ip6_dst;
923	key.t_dst6 = ip6->ip6_src;
924
925	flow = bemtoh32(&ip6->ip6_flow);
926
927	if (gre_input_key(mp, offp, type, af, flow >> 20, &key) == -1)
928		return (rip6_input(mp, offp, type, af));
929
930	return (IPPROTO_DONE);
931}
932#endif /* INET6 */
933
934static inline struct ifnet *
935gre_find(const struct gre_tunnel *key)
936{
937	struct gre_softc *sc;
938
939	TAILQ_FOREACH(sc, &gre_list, sc_entry) {
940		if (gre_cmp(key, &sc->sc_tunnel) != 0)
941			continue;
942
943		if (!ISSET(sc->sc_if.if_flags, IFF_RUNNING))
944			continue;
945
946		return (&sc->sc_if);
947	}
948
949	return (NULL);
950}
951
952static inline struct ifnet *
953mgre_find(const struct gre_tunnel *key)
954{
955	struct mgre_softc *sc;
956
957	NET_ASSERT_LOCKED();
958	sc = RBT_FIND(mgre_tree, &mgre_tree, (const struct mgre_softc *)key);
959	if (sc != NULL)
960		return (&sc->sc_if);
961
962	return (NULL);
963}
964
965static struct mbuf *
966gre_input_1(struct gre_tunnel *key, struct mbuf *m,
967    const struct gre_header *gh, uint8_t otos, int iphlen)
968{
969	switch (gh->gre_proto) {
970	case htons(ETHERTYPE_PPP):
971#ifdef PIPEX
972		if (pipex_enable) {
973			struct pipex_session *session;
974
975			session = pipex_pptp_lookup_session(m);
976			if (session != NULL) {
977				struct mbuf *m0;
978
979				m0 = pipex_pptp_input(m, session);
980				pipex_rele_session(session);
981
982				if (m0 == NULL)
983					return (NULL);
984			}
985		}
986#endif
987		break;
988	case htons(GRE_EOIP):
989		return (eoip_input(key, m, gh, otos, iphlen));
990		break;
991	}
992
993	return (m);
994}
995
996static int
997gre_input_key(struct mbuf **mp, int *offp, int type, int af, uint8_t otos,
998    struct gre_tunnel *key)
999{
1000	struct mbuf *m = *mp;
1001	int iphlen = *offp, hlen, rxprio;
1002	struct ifnet *ifp;
1003	const struct gre_tunnel *tunnel;
1004	caddr_t buf;
1005	struct gre_header *gh;
1006	struct gre_h_key *gkh;
1007	struct mbuf *(*patch)(const struct gre_tunnel *, struct mbuf *,
1008	    uint8_t *, uint8_t);
1009	int mcast = 0;
1010	uint8_t itos;
1011
1012	if (!gre_allow)
1013		goto decline;
1014
1015	key->t_rtableid = m->m_pkthdr.ph_rtableid;
1016
1017	hlen = iphlen + sizeof(*gh);
1018	if (m->m_pkthdr.len < hlen)
1019		goto decline;
1020
1021	m = m_pullup(m, hlen);
1022	if (m == NULL)
1023		return (IPPROTO_DONE);
1024
1025	buf = mtod(m, caddr_t);
1026	gh = (struct gre_header *)(buf + iphlen);
1027
1028	/* check the version */
1029	switch (gh->gre_flags & htons(GRE_VERS_MASK)) {
1030	case htons(GRE_VERS_0):
1031		break;
1032
1033	case htons(GRE_VERS_1):
1034		m = gre_input_1(key, m, gh, otos, iphlen);
1035		if (m == NULL)
1036			return (IPPROTO_DONE);
1037		/* FALLTHROUGH */
1038	default:
1039		goto decline;
1040	}
1041
1042	/* the only optional bit in the header is K flag */
1043	if ((gh->gre_flags & htons(~(GRE_KP|GRE_VERS_MASK))) != htons(0))
1044		goto decline;
1045
1046	if (gh->gre_flags & htons(GRE_KP)) {
1047		hlen += sizeof(*gkh);
1048		if (m->m_pkthdr.len < hlen)
1049			goto decline;
1050
1051		m = m_pullup(m, hlen);
1052		if (m == NULL)
1053			return (IPPROTO_DONE);
1054
1055		buf = mtod(m, caddr_t);
1056		gh = (struct gre_header *)(buf + iphlen);
1057		gkh = (struct gre_h_key *)(gh + 1);
1058
1059		key->t_key_mask = GRE_KEY_MASK;
1060		key->t_key = gkh->gre_key;
1061	} else
1062		key->t_key_mask = GRE_KEY_NONE;
1063
1064	if (gh->gre_proto == htons(ETHERTYPE_TRANSETHER)) {
1065		if (egre_input(key, m, hlen, otos) == -1 &&
1066		    nvgre_input(key, m, hlen, otos) == -1)
1067			goto decline;
1068
1069		return (IPPROTO_DONE);
1070	}
1071
1072	ifp = gre_find(key);
1073	if (ifp == NULL) {
1074		ifp = mgre_find(key);
1075		if (ifp == NULL)
1076			goto decline;
1077	}
1078
1079	switch (gh->gre_proto) {
1080	case htons(GRE_WCCP): {
1081		struct mbuf *n;
1082		int off;
1083
1084		/* WCCP/GRE:
1085		 *   So far as I can see (and test) it seems that Cisco's WCCP
1086		 *   GRE tunnel is precisely a IP-in-GRE tunnel that differs
1087		 *   only in its protocol number.  At least, it works for me.
1088		 *
1089		 *   The Internet Drafts can be found if you look for
1090		 *   the following:
1091		 *     draft-forster-wrec-wccp-v1-00.txt
1092		 *     draft-wilson-wrec-wccp-v2-01.txt
1093		 */
1094
1095		if (!gre_wccp && !ISSET(ifp->if_flags, IFF_LINK0))
1096			goto decline;
1097
1098		/*
1099		 * If the first nibble of the payload does not look like
1100		 * IPv4, assume it is WCCP v2.
1101		 */
1102		n = m_getptr(m, hlen, &off);
1103		if (n == NULL)
1104			goto decline;
1105		if (n->m_data[off] >> 4 != IPVERSION)
1106			hlen += 4;  /* four-octet Redirect header */
1107
1108		/* FALLTHROUGH */
1109	}
1110	case htons(ETHERTYPE_IP):
1111		m->m_pkthdr.ph_family = AF_INET;
1112		patch = gre_ipv4_patch;
1113		break;
1114#ifdef INET6
1115	case htons(ETHERTYPE_IPV6):
1116		m->m_pkthdr.ph_family = AF_INET6;
1117		patch = gre_ipv6_patch;
1118		break;
1119#endif
1120#ifdef MPLS
1121	case htons(ETHERTYPE_MPLS_MCAST):
1122		mcast = M_MCAST|M_BCAST;
1123		/* fallthrough */
1124	case htons(ETHERTYPE_MPLS):
1125		m->m_pkthdr.ph_family = AF_MPLS;
1126		patch = gre_mpls_patch;
1127		break;
1128#endif
1129	case htons(0):
1130		if (ifp->if_type != IFT_TUNNEL) {
1131			/* keepalives dont make sense for mgre */
1132			goto decline;
1133		}
1134
1135		m_adj(m, hlen);
1136		gre_keepalive_recv(ifp, m);
1137		return (IPPROTO_DONE);
1138
1139	default:
1140		goto decline;
1141	}
1142
1143	/* it's ours now */
1144
1145	m_adj(m, hlen);
1146
1147	tunnel = ifp->if_softc; /* gre and mgre tunnel info is at the front */
1148
1149	m = (*patch)(tunnel, m, &itos, otos);
1150	if (m == NULL)
1151		return (IPPROTO_DONE);
1152
1153	if (tunnel->t_key_mask == GRE_KEY_ENTROPY) {
1154		SET(m->m_pkthdr.csum_flags, M_FLOWID);
1155		m->m_pkthdr.ph_flowid =
1156		    bemtoh32(&key->t_key) & ~GRE_KEY_ENTROPY;
1157	}
1158
1159	rxprio = tunnel->t_rxhprio;
1160	switch (rxprio) {
1161	case IF_HDRPRIO_PACKET:
1162		/* nop */
1163		break;
1164	case IF_HDRPRIO_OUTER:
1165		m->m_pkthdr.pf.prio = IFQ_TOS2PRIO(otos);
1166		break;
1167	case IF_HDRPRIO_PAYLOAD:
1168		m->m_pkthdr.pf.prio = IFQ_TOS2PRIO(itos);
1169		break;
1170	default:
1171		m->m_pkthdr.pf.prio = rxprio;
1172		break;
1173	}
1174
1175	m->m_flags &= ~(M_MCAST|M_BCAST);
1176	m->m_flags |= mcast;
1177
1178	if_vinput(ifp, m);
1179	return (IPPROTO_DONE);
1180decline:
1181	*mp = m;
1182	return (-1);
1183}
1184
1185static struct mbuf *
1186gre_ipv4_patch(const struct gre_tunnel *tunnel, struct mbuf *m,
1187    uint8_t *itosp, uint8_t otos)
1188{
1189	struct ip *ip;
1190	uint8_t itos;
1191
1192	m = m_pullup(m, sizeof(*ip));
1193	if (m == NULL)
1194		return (NULL);
1195
1196	ip = mtod(m, struct ip *);
1197
1198	itos = ip->ip_tos;
1199	if (ip_ecn_egress(tunnel->t_ecn, &otos, &itos) == 0) {
1200		m_freem(m);
1201		return (NULL);
1202	}
1203	if (itos != ip->ip_tos)
1204		ip_tos_patch(ip, itos);
1205
1206	*itosp = itos;
1207
1208	return (m);
1209}
1210
1211#ifdef INET6
1212static struct mbuf *
1213gre_ipv6_patch(const struct gre_tunnel *tunnel, struct mbuf *m,
1214    uint8_t *itosp, uint8_t otos)
1215{
1216	struct ip6_hdr *ip6;
1217	uint32_t flow;
1218	uint8_t itos;
1219
1220	m = m_pullup(m, sizeof(*ip6));
1221	if (m == NULL)
1222		return (NULL);
1223
1224	ip6 = mtod(m, struct ip6_hdr *);
1225
1226	flow = bemtoh32(&ip6->ip6_flow);
1227	itos = flow >> 20;
1228	if (ip_ecn_egress(tunnel->t_ecn, &otos, &itos) == 0) {
1229		m_freem(m);
1230		return (NULL);
1231	}
1232
1233	CLR(flow, 0xff << 20);
1234	SET(flow, itos << 20);
1235	htobem32(&ip6->ip6_flow, flow);
1236
1237	*itosp = itos;
1238
1239	return (m);
1240}
1241#endif
1242
1243#ifdef MPLS
1244static struct mbuf *
1245gre_mpls_patch(const struct gre_tunnel *tunnel, struct mbuf *m,
1246    uint8_t *itosp, uint8_t otos)
1247{
1248	uint8_t itos;
1249	uint32_t shim;
1250
1251	m = m_pullup(m, sizeof(shim));
1252	if (m == NULL)
1253		return (NULL);
1254
1255	shim = *mtod(m, uint32_t *);
1256	itos = (ntohl(shim & MPLS_EXP_MASK) >> MPLS_EXP_OFFSET) << 5;
1257
1258	if (ip_ecn_egress(tunnel->t_ecn, &otos, &itos) == 0) {
1259		m_freem(m);
1260		return (NULL);
1261	}
1262
1263	*itosp = itos;
1264
1265	return (m);
1266}
1267#endif
1268
1269#define gre_l2_prio(_t, _m, _otos) do {					\
1270	int rxprio = (_t)->t_rxhprio;					\
1271	switch (rxprio) {						\
1272	case IF_HDRPRIO_PACKET:						\
1273		/* nop */						\
1274		break;							\
1275	case IF_HDRPRIO_OUTER:						\
1276		(_m)->m_pkthdr.pf.prio = IFQ_TOS2PRIO((_otos));		\
1277		break;							\
1278	default:							\
1279		(_m)->m_pkthdr.pf.prio = rxprio;			\
1280		break;							\
1281	}								\
1282} while (0)
1283
1284static int
1285egre_input(const struct gre_tunnel *key, struct mbuf *m, int hlen, uint8_t otos)
1286{
1287	struct egre_softc *sc;
1288
1289	NET_ASSERT_LOCKED();
1290	sc = RBT_FIND(egre_tree, &egre_tree, (const struct egre_softc *)key);
1291	if (sc == NULL)
1292		return (-1);
1293
1294	/* it's ours now */
1295	m = gre_ether_align(m, hlen);
1296	if (m == NULL)
1297		return (0);
1298
1299	if (sc->sc_tunnel.t_key_mask == GRE_KEY_ENTROPY) {
1300		SET(m->m_pkthdr.csum_flags, M_FLOWID);
1301		m->m_pkthdr.ph_flowid =
1302		    bemtoh32(&key->t_key) & ~GRE_KEY_ENTROPY;
1303	}
1304
1305	m->m_flags &= ~(M_MCAST|M_BCAST);
1306
1307	gre_l2_prio(&sc->sc_tunnel, m, otos);
1308
1309	if_vinput(&sc->sc_ac.ac_if, m);
1310
1311	return (0);
1312}
1313
1314static inline struct nvgre_softc *
1315nvgre_mcast_find(const struct gre_tunnel *key, unsigned int if0idx)
1316{
1317	struct nvgre_softc *sc;
1318	int rv;
1319
1320	/*
1321	 * building an nvgre_softc to use with RBT_FIND is expensive, and
1322	 * would need to swap the src and dst addresses in the key. so do the
1323	 * find by hand.
1324	 */
1325
1326	NET_ASSERT_LOCKED();
1327	sc = RBT_ROOT(nvgre_mcast_tree, &nvgre_mcast_tree);
1328	while (sc != NULL) {
1329		rv = nvgre_cmp_mcast(key, &key->t_src, if0idx,
1330		    &sc->sc_tunnel, &sc->sc_tunnel.t_dst, sc->sc_ifp0);
1331		if (rv == 0)
1332			return (sc);
1333		if (rv < 0)
1334			sc = RBT_LEFT(nvgre_mcast_tree, sc);
1335		else
1336			sc = RBT_RIGHT(nvgre_mcast_tree, sc);
1337	}
1338
1339	return (NULL);
1340}
1341
1342static inline struct nvgre_softc *
1343nvgre_ucast_find(const struct gre_tunnel *key)
1344{
1345	NET_ASSERT_LOCKED();
1346	return (RBT_FIND(nvgre_ucast_tree, &nvgre_ucast_tree,
1347	    (struct nvgre_softc *)key));
1348}
1349
1350static int
1351nvgre_input(const struct gre_tunnel *key, struct mbuf *m, int hlen,
1352    uint8_t otos)
1353{
1354	struct nvgre_softc *sc;
1355	struct ether_header *eh;
1356
1357	if (ISSET(m->m_flags, M_MCAST|M_BCAST))
1358		sc = nvgre_mcast_find(key, m->m_pkthdr.ph_ifidx);
1359	else
1360		sc = nvgre_ucast_find(key);
1361
1362	if (sc == NULL)
1363		return (-1);
1364
1365	/* it's ours now */
1366	m = gre_ether_align(m, hlen);
1367	if (m == NULL)
1368		return (0);
1369
1370	eh = mtod(m, struct ether_header *);
1371	etherbridge_map_ea(&sc->sc_eb, (void *)&key->t_dst,
1372	    (struct ether_addr *)eh->ether_shost);
1373
1374	SET(m->m_pkthdr.csum_flags, M_FLOWID);
1375	m->m_pkthdr.ph_flowid = bemtoh32(&key->t_key) & ~GRE_KEY_ENTROPY;
1376
1377	m->m_flags &= ~(M_MCAST|M_BCAST);
1378
1379	gre_l2_prio(&sc->sc_tunnel, m, otos);
1380
1381	if_vinput(&sc->sc_ac.ac_if, m);
1382
1383	return (0);
1384}
1385
1386static struct mbuf *
1387gre_ether_align(struct mbuf *m, int hlen)
1388{
1389	struct mbuf *n;
1390	int off;
1391
1392	m_adj(m, hlen);
1393
1394	if (m->m_pkthdr.len < sizeof(struct ether_header)) {
1395		m_freem(m);
1396		return (NULL);
1397	}
1398
1399	m = m_pullup(m, sizeof(struct ether_header));
1400	if (m == NULL)
1401		return (NULL);
1402
1403	n = m_getptr(m, sizeof(struct ether_header), &off);
1404	if (n == NULL) {
1405		m_freem(m);
1406		return (NULL);
1407	}
1408
1409	if (!ALIGNED_POINTER(mtod(n, caddr_t) + off, uint32_t)) {
1410		n = m_dup_pkt(m, ETHER_ALIGN, M_NOWAIT);
1411		m_freem(m);
1412		if (n == NULL)
1413			return (NULL);
1414		m = n;
1415	}
1416
1417	return (m);
1418}
1419
1420static void
1421gre_keepalive_recv(struct ifnet *ifp, struct mbuf *m)
1422{
1423	struct gre_softc *sc = ifp->if_softc;
1424	struct gre_keepalive *gk;
1425	SIPHASH_CTX ctx;
1426	uint8_t digest[SIPHASH_DIGEST_LENGTH];
1427	int uptime, delta;
1428	int tick = ticks;
1429
1430	if (sc->sc_ka_state == GRE_KA_NONE ||
1431	    sc->sc_tunnel.t_rtableid != sc->sc_if.if_rdomain)
1432		goto drop;
1433
1434	if (m->m_pkthdr.len < sizeof(*gk))
1435		goto drop;
1436	m = m_pullup(m, sizeof(*gk));
1437	if (m == NULL)
1438		return;
1439
1440	gk = mtod(m, struct gre_keepalive *);
1441	uptime = bemtoh32(&gk->gk_uptime) - sc->sc_ka_bias;
1442	delta = tick - uptime;
1443	if (delta < 0)
1444		goto drop;
1445	if (delta > hz * 10) /* magic */
1446		goto drop;
1447
1448	/* avoid too much siphash work */
1449	delta = tick - sc->sc_ka_recvtm;
1450	if (delta > 0 && delta < (hz / 10))
1451		goto drop;
1452
1453	SipHash24_Init(&ctx, &sc->sc_ka_key);
1454	SipHash24_Update(&ctx, &gk->gk_uptime, sizeof(gk->gk_uptime));
1455	SipHash24_Update(&ctx, &gk->gk_random, sizeof(gk->gk_random));
1456	SipHash24_Final(digest, &ctx);
1457
1458	if (memcmp(digest, gk->gk_digest, sizeof(digest)) != 0)
1459		goto drop;
1460
1461	sc->sc_ka_recvtm = tick;
1462
1463	switch (sc->sc_ka_state) {
1464	case GRE_KA_DOWN:
1465		sc->sc_ka_state = GRE_KA_HOLD;
1466		sc->sc_ka_holdcnt = sc->sc_ka_holdmax;
1467		sc->sc_ka_holdmax = MIN(sc->sc_ka_holdmax * 2,
1468		    16 * sc->sc_ka_count);
1469		break;
1470	case GRE_KA_HOLD:
1471		if (--sc->sc_ka_holdcnt > 0)
1472			break;
1473
1474		sc->sc_ka_state = GRE_KA_UP;
1475		gre_link_state(&sc->sc_if, sc->sc_ka_state);
1476		break;
1477
1478	case GRE_KA_UP:
1479		sc->sc_ka_holdmax--;
1480		sc->sc_ka_holdmax = MAX(sc->sc_ka_holdmax, sc->sc_ka_count);
1481		break;
1482	}
1483
1484	timeout_add_sec(&sc->sc_ka_hold, sc->sc_ka_timeo * sc->sc_ka_count);
1485
1486drop:
1487	m_freem(m);
1488}
1489
1490static int
1491gre_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
1492    struct rtentry *rt)
1493{
1494	struct m_tag *mtag;
1495	int error = 0;
1496
1497	if (!gre_allow) {
1498		error = EACCES;
1499		goto drop;
1500	}
1501
1502	if (!ISSET(ifp->if_flags, IFF_RUNNING)) {
1503		error = ENETDOWN;
1504		goto drop;
1505	}
1506
1507	switch (dst->sa_family) {
1508	case AF_INET:
1509#ifdef INET6
1510	case AF_INET6:
1511#endif
1512#ifdef MPLS
1513	case AF_MPLS:
1514#endif
1515		break;
1516	default:
1517		error = EAFNOSUPPORT;
1518		goto drop;
1519	}
1520
1521	/* Try to limit infinite recursion through misconfiguration. */
1522	for (mtag = m_tag_find(m, PACKET_TAG_GRE, NULL); mtag;
1523	     mtag = m_tag_find(m, PACKET_TAG_GRE, mtag)) {
1524		if (memcmp((caddr_t)(mtag + 1), &ifp->if_index,
1525		    sizeof(ifp->if_index)) == 0) {
1526			m_freem(m);
1527			error = EIO;
1528			goto end;
1529		}
1530	}
1531
1532	mtag = m_tag_get(PACKET_TAG_GRE, sizeof(ifp->if_index), M_NOWAIT);
1533	if (mtag == NULL) {
1534		m_freem(m);
1535		error = ENOBUFS;
1536		goto end;
1537	}
1538	memcpy((caddr_t)(mtag + 1), &ifp->if_index, sizeof(ifp->if_index));
1539	m_tag_prepend(m, mtag);
1540
1541	m->m_pkthdr.ph_family = dst->sa_family;
1542
1543	error = if_enqueue(ifp, m);
1544end:
1545	if (error)
1546		ifp->if_oerrors++;
1547	return (error);
1548
1549drop:
1550	m_freem(m);
1551	return (error);
1552}
1553
1554void
1555gre_start(struct ifnet *ifp)
1556{
1557	struct gre_softc *sc = ifp->if_softc;
1558	struct mbuf *m;
1559	int af;
1560#if NBPFILTER > 0
1561	caddr_t if_bpf;
1562#endif
1563
1564	while ((m = ifq_dequeue(&ifp->if_snd)) != NULL) {
1565		af = m->m_pkthdr.ph_family;
1566
1567#if NBPFILTER > 0
1568		if_bpf = ifp->if_bpf;
1569		if (if_bpf)
1570			bpf_mtap_af(if_bpf, af, m, BPF_DIRECTION_OUT);
1571#endif
1572
1573		m = gre_l3_encap(&sc->sc_tunnel, m, af);
1574		if (m == NULL || gre_ip_output(&sc->sc_tunnel, m) != 0) {
1575			ifp->if_oerrors++;
1576			continue;
1577		}
1578	}
1579}
1580
1581void
1582mgre_rtrequest(struct ifnet *ifp, int req, struct rtentry *rt)
1583{
1584	struct ifnet *lo0ifp;
1585	struct ifaddr *ifa, *lo0ifa;
1586
1587	switch (req) {
1588	case RTM_ADD:
1589		if (!ISSET(rt->rt_flags, RTF_LOCAL))
1590			break;
1591
1592		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1593			if (memcmp(rt_key(rt), ifa->ifa_addr,
1594			    rt_key(rt)->sa_len) == 0)
1595				break;
1596		}
1597
1598		if (ifa == NULL)
1599			break;
1600
1601		KASSERT(ifa == rt->rt_ifa);
1602
1603		lo0ifp = if_get(rtable_loindex(ifp->if_rdomain));
1604		KASSERT(lo0ifp != NULL);
1605		TAILQ_FOREACH(lo0ifa, &lo0ifp->if_addrlist, ifa_list) {
1606			if (lo0ifa->ifa_addr->sa_family ==
1607			    ifa->ifa_addr->sa_family)
1608				break;
1609		}
1610		if_put(lo0ifp);
1611
1612		if (lo0ifa == NULL)
1613			break;
1614
1615		rt->rt_flags &= ~RTF_LLINFO;
1616		break;
1617	case RTM_DELETE:
1618	case RTM_RESOLVE:
1619	default:
1620		break;
1621	}
1622}
1623
1624static int
1625mgre_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dest,
1626    struct rtentry *rt0)
1627{
1628	struct mgre_softc *sc = ifp->if_softc;
1629	struct sockaddr *gate;
1630	struct rtentry *rt;
1631	struct m_tag *mtag;
1632	int error = 0;
1633	sa_family_t af;
1634	const void *addr;
1635
1636	if (!gre_allow) {
1637		error = EACCES;
1638		goto drop;
1639	}
1640
1641	if (!ISSET(ifp->if_flags, IFF_RUNNING)) {
1642		error = ENETDOWN;
1643		goto drop;
1644	}
1645
1646	switch (dest->sa_family) {
1647	case AF_INET:
1648#ifdef INET6
1649	case AF_INET6:
1650#endif
1651#ifdef MPLS
1652	case AF_MPLS:
1653#endif
1654		break;
1655	default:
1656		error = EAFNOSUPPORT;
1657		goto drop;
1658	}
1659
1660	if (ISSET(m->m_flags, M_MCAST|M_BCAST)) {
1661		error = ENETUNREACH;
1662		goto drop;
1663	}
1664
1665	rt = rt_getll(rt0);
1666
1667	/* check rt_expire? */
1668	if (ISSET(rt->rt_flags, RTF_REJECT)) {
1669		error = (rt == rt0) ? EHOSTDOWN : EHOSTUNREACH;
1670		goto drop;
1671	}
1672	if (!ISSET(rt->rt_flags, RTF_HOST)) {
1673		error = EHOSTUNREACH;
1674		goto drop;
1675	}
1676	if (ISSET(rt->rt_flags, RTF_GATEWAY)) {
1677		error = EINVAL;
1678		goto drop;
1679	}
1680
1681	gate = rt->rt_gateway;
1682	af = gate->sa_family;
1683	if (af != sc->sc_tunnel.t_af) {
1684		error = EAGAIN;
1685		goto drop;
1686	}
1687
1688	/* Try to limit infinite recursion through misconfiguration. */
1689	for (mtag = m_tag_find(m, PACKET_TAG_GRE, NULL); mtag;
1690	     mtag = m_tag_find(m, PACKET_TAG_GRE, mtag)) {
1691		if (memcmp((caddr_t)(mtag + 1), &ifp->if_index,
1692		    sizeof(ifp->if_index)) == 0) {
1693			error = EIO;
1694			goto drop;
1695		}
1696	}
1697
1698	mtag = m_tag_get(PACKET_TAG_GRE, sizeof(ifp->if_index), M_NOWAIT);
1699	if (mtag == NULL) {
1700		error = ENOBUFS;
1701		goto drop;
1702	}
1703	memcpy((caddr_t)(mtag + 1), &ifp->if_index, sizeof(ifp->if_index));
1704	m_tag_prepend(m, mtag);
1705
1706	switch (af) {
1707	case AF_INET: {
1708		struct sockaddr_in *sin = (struct sockaddr_in *)gate;
1709		addr = &sin->sin_addr;
1710		break;
1711	}
1712#ifdef INET6
1713	case AF_INET6: {
1714		struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)gate;
1715		addr = &sin6->sin6_addr;
1716		break;
1717	}
1718 #endif
1719	default:
1720		unhandled_af(af);
1721		/* NOTREACHED */
1722	}
1723
1724	m = gre_l3_encap_dst(&sc->sc_tunnel, addr, m, dest->sa_family);
1725	if (m == NULL) {
1726		ifp->if_oerrors++;
1727		return (ENOBUFS);
1728	}
1729
1730	m->m_pkthdr.ph_family = dest->sa_family;
1731
1732	error = if_enqueue(ifp, m);
1733	if (error)
1734		ifp->if_oerrors++;
1735	return (error);
1736
1737drop:
1738	m_freem(m);
1739	return (error);
1740}
1741
1742static void
1743mgre_start(struct ifnet *ifp)
1744{
1745	struct mgre_softc *sc = ifp->if_softc;
1746	struct mbuf *m;
1747#if NBPFILTER > 0
1748	caddr_t if_bpf;
1749#endif
1750
1751	while ((m = ifq_dequeue(&ifp->if_snd)) != NULL) {
1752#if NBPFILTER > 0
1753		if_bpf = ifp->if_bpf;
1754		if (if_bpf) {
1755			struct m_hdr mh;
1756			struct mbuf *n;
1757			int off;
1758
1759			n = m_getptr(m, ifp->if_hdrlen, &off);
1760			KASSERT(n != NULL);
1761
1762			mh.mh_flags = 0;
1763			mh.mh_next = n->m_next;
1764			mh.mh_len = n->m_len - off;
1765			mh.mh_data = n->m_data + off;
1766
1767			bpf_mtap_af(if_bpf, m->m_pkthdr.ph_family,
1768			    (struct mbuf *)&mh, BPF_DIRECTION_OUT);
1769		}
1770#endif
1771
1772		if (gre_ip_output(&sc->sc_tunnel, m) != 0) {
1773			ifp->if_oerrors++;
1774			continue;
1775		}
1776	}
1777}
1778
1779static void
1780egre_start(struct ifnet *ifp)
1781{
1782	struct egre_softc *sc = ifp->if_softc;
1783	struct mbuf *m0, *m;
1784#if NBPFILTER > 0
1785	caddr_t if_bpf;
1786#endif
1787
1788	if (!gre_allow) {
1789		ifq_purge(&ifp->if_snd);
1790		return;
1791	}
1792
1793	while ((m0 = ifq_dequeue(&ifp->if_snd)) != NULL) {
1794#if NBPFILTER > 0
1795		if_bpf = ifp->if_bpf;
1796		if (if_bpf)
1797			bpf_mtap_ether(if_bpf, m0, BPF_DIRECTION_OUT);
1798#endif
1799
1800		/* force prepend mbuf because of alignment problems */
1801		m = m_get(M_DONTWAIT, m0->m_type);
1802		if (m == NULL) {
1803			m_freem(m0);
1804			continue;
1805		}
1806
1807		M_MOVE_PKTHDR(m, m0);
1808		m->m_next = m0;
1809
1810		m_align(m, 0);
1811		m->m_len = 0;
1812
1813		m = gre_encap(&sc->sc_tunnel, m, htons(ETHERTYPE_TRANSETHER),
1814		    sc->sc_tunnel.t_ttl, gre_l2_tos(&sc->sc_tunnel, m));
1815		if (m == NULL || gre_ip_output(&sc->sc_tunnel, m) != 0) {
1816			ifp->if_oerrors++;
1817			continue;
1818		}
1819	}
1820}
1821
1822static struct mbuf *
1823gre_l3_encap_dst(const struct gre_tunnel *tunnel, const void *dst,
1824    struct mbuf *m, sa_family_t af)
1825{
1826	uint16_t proto;
1827	uint8_t ttl, itos, otos;
1828	int tttl = tunnel->t_ttl;
1829	int ttloff;
1830
1831	switch (af) {
1832	case AF_INET: {
1833		struct ip *ip;
1834
1835		m = m_pullup(m, sizeof(*ip));
1836		if (m == NULL)
1837			return (NULL);
1838
1839		ip = mtod(m, struct ip *);
1840		itos = ip->ip_tos;
1841
1842		ttloff = offsetof(struct ip, ip_ttl);
1843		proto = htons(ETHERTYPE_IP);
1844		break;
1845	}
1846#ifdef INET6
1847	case AF_INET6: {
1848		struct ip6_hdr *ip6;
1849
1850		m = m_pullup(m, sizeof(*ip6));
1851		if (m == NULL)
1852			return (NULL);
1853
1854		ip6 = mtod(m, struct ip6_hdr *);
1855		itos = (ntohl(ip6->ip6_flow) & 0x0ff00000) >> 20;
1856
1857		ttloff = offsetof(struct ip6_hdr, ip6_hlim);
1858		proto = htons(ETHERTYPE_IPV6);
1859		break;
1860	}
1861 #endif
1862#ifdef MPLS
1863	case AF_MPLS: {
1864		uint32_t shim;
1865
1866		m = m_pullup(m, sizeof(shim));
1867		if (m == NULL)
1868			return (NULL);
1869
1870		shim = bemtoh32(mtod(m, uint32_t *)) & MPLS_EXP_MASK;
1871		itos = (shim >> MPLS_EXP_OFFSET) << 5;
1872
1873		ttloff = 3;
1874
1875		if (m->m_flags & (M_BCAST | M_MCAST))
1876			proto = htons(ETHERTYPE_MPLS_MCAST);
1877		else
1878			proto = htons(ETHERTYPE_MPLS);
1879		break;
1880	}
1881#endif
1882	default:
1883		unhandled_af(af);
1884	}
1885
1886	if (tttl == -1) {
1887		KASSERT(m->m_len > ttloff); /* m_pullup has happened */
1888
1889		ttl = *(m->m_data + ttloff);
1890	} else
1891		ttl = tttl;
1892
1893	itos = gre_l3_tos(tunnel, m, itos);
1894	ip_ecn_ingress(tunnel->t_ecn, &otos, &itos);
1895
1896	return (gre_encap_dst(tunnel, dst, m, proto, ttl, otos));
1897}
1898
1899static struct mbuf *
1900gre_encap_dst(const struct gre_tunnel *tunnel, const union gre_addr *dst,
1901    struct mbuf *m, uint16_t proto, uint8_t ttl, uint8_t tos)
1902{
1903	struct gre_header *gh;
1904	struct gre_h_key *gkh;
1905	int hlen;
1906
1907	hlen = sizeof(*gh);
1908	if (tunnel->t_key_mask != GRE_KEY_NONE)
1909		hlen += sizeof(*gkh);
1910
1911	m = m_prepend(m, hlen, M_DONTWAIT);
1912	if (m == NULL)
1913		return (NULL);
1914
1915	gh = mtod(m, struct gre_header *);
1916	gh->gre_flags = GRE_VERS_0;
1917	gh->gre_proto = proto;
1918	if (tunnel->t_key_mask != GRE_KEY_NONE) {
1919		gh->gre_flags |= htons(GRE_KP);
1920
1921		gkh = (struct gre_h_key *)(gh + 1);
1922		gkh->gre_key = tunnel->t_key;
1923
1924		if (tunnel->t_key_mask == GRE_KEY_ENTROPY &&
1925		    ISSET(m->m_pkthdr.csum_flags, M_FLOWID)) {
1926			gkh->gre_key |= htonl(~GRE_KEY_ENTROPY &
1927			    m->m_pkthdr.ph_flowid);
1928		}
1929	}
1930
1931	return (gre_encap_dst_ip(tunnel, dst, m, ttl, tos));
1932}
1933
1934static struct mbuf *
1935gre_encap_dst_ip(const struct gre_tunnel *tunnel, const union gre_addr *dst,
1936    struct mbuf *m, uint8_t ttl, uint8_t tos)
1937{
1938	switch (tunnel->t_af) {
1939	case AF_UNSPEC:
1940		/* packets may arrive before tunnel is set up */
1941		m_freem(m);
1942		return (NULL);
1943	case AF_INET: {
1944		struct ip *ip;
1945
1946		m = m_prepend(m, sizeof(*ip), M_DONTWAIT);
1947		if (m == NULL)
1948			return (NULL);
1949
1950		ip = mtod(m, struct ip *);
1951		ip->ip_v = IPVERSION;
1952		ip->ip_hl = sizeof(*ip) >> 2;
1953		ip->ip_off = tunnel->t_df;
1954		ip->ip_tos = tos;
1955		ip->ip_len = htons(m->m_pkthdr.len);
1956		ip->ip_ttl = ttl;
1957		ip->ip_p = IPPROTO_GRE;
1958		ip->ip_src = tunnel->t_src4;
1959		ip->ip_dst = dst->in4;
1960		break;
1961	}
1962#ifdef INET6
1963	case AF_INET6: {
1964		struct ip6_hdr *ip6;
1965		int len = m->m_pkthdr.len;
1966
1967		m = m_prepend(m, sizeof(*ip6), M_DONTWAIT);
1968		if (m == NULL)
1969			return (NULL);
1970
1971		ip6 = mtod(m, struct ip6_hdr *);
1972		ip6->ip6_flow = ISSET(m->m_pkthdr.csum_flags, M_FLOWID) ?
1973		    htonl(m->m_pkthdr.ph_flowid) : 0;
1974		ip6->ip6_vfc |= IPV6_VERSION;
1975		ip6->ip6_flow |= htonl((uint32_t)tos << 20);
1976		ip6->ip6_plen = htons(len);
1977		ip6->ip6_nxt = IPPROTO_GRE;
1978		ip6->ip6_hlim = ttl;
1979		ip6->ip6_src = tunnel->t_src6;
1980		ip6->ip6_dst = dst->in6;
1981
1982		if (tunnel->t_df)
1983			SET(m->m_pkthdr.csum_flags, M_IPV6_DF_OUT);
1984
1985		break;
1986	}
1987#endif /* INET6 */
1988	default:
1989		unhandled_af(tunnel->t_af);
1990	}
1991
1992	return (m);
1993}
1994
1995static int
1996gre_ip_output(const struct gre_tunnel *tunnel, struct mbuf *m)
1997{
1998	m->m_flags &= ~(M_BCAST|M_MCAST);
1999	m->m_pkthdr.ph_rtableid = tunnel->t_rtableid;
2000
2001#if NPF > 0
2002	pf_pkt_addr_changed(m);
2003#endif
2004
2005	switch (tunnel->t_af) {
2006	case AF_INET:
2007		ip_send(m);
2008		break;
2009#ifdef INET6
2010	case AF_INET6:
2011		ip6_send(m);
2012		break;
2013#endif
2014	default:
2015		unhandled_af(tunnel->t_af);
2016	}
2017
2018	return (0);
2019}
2020
2021static int
2022gre_tunnel_ioctl(struct ifnet *ifp, struct gre_tunnel *tunnel,
2023    u_long cmd, void *data)
2024{
2025	struct ifreq *ifr = (struct ifreq *)data;
2026	int error = 0;
2027
2028	switch(cmd) {
2029	case SIOCSIFMTU:
2030		if (ifr->ifr_mtu < 576) {
2031			error = EINVAL;
2032			break;
2033		}
2034		ifp->if_mtu = ifr->ifr_mtu;
2035		break;
2036	case SIOCADDMULTI:
2037	case SIOCDELMULTI:
2038		break;
2039
2040	case SIOCSVNETID:
2041		error = gre_set_vnetid(tunnel, ifr);
2042		break;
2043
2044	case SIOCGVNETID:
2045		error = gre_get_vnetid(tunnel, ifr);
2046		break;
2047	case SIOCDVNETID:
2048		error = gre_del_vnetid(tunnel);
2049		break;
2050
2051	case SIOCSVNETFLOWID:
2052		error = gre_set_vnetflowid(tunnel, ifr);
2053		break;
2054
2055	case SIOCGVNETFLOWID:
2056		error = gre_get_vnetflowid(tunnel, ifr);
2057		break;
2058
2059	case SIOCSLIFPHYADDR:
2060		error = gre_set_tunnel(tunnel, (struct if_laddrreq *)data, 1);
2061		break;
2062	case SIOCGLIFPHYADDR:
2063		error = gre_get_tunnel(tunnel, (struct if_laddrreq *)data);
2064		break;
2065	case SIOCDIFPHYADDR:
2066		error = gre_del_tunnel(tunnel);
2067		break;
2068
2069	case SIOCSLIFPHYRTABLE:
2070		if (ifr->ifr_rdomainid < 0 ||
2071		    ifr->ifr_rdomainid > RT_TABLEID_MAX ||
2072		    !rtable_exists(ifr->ifr_rdomainid)) {
2073			error = EINVAL;
2074			break;
2075		}
2076		tunnel->t_rtableid = ifr->ifr_rdomainid;
2077		break;
2078	case SIOCGLIFPHYRTABLE:
2079		ifr->ifr_rdomainid = tunnel->t_rtableid;
2080		break;
2081
2082	case SIOCSLIFPHYDF:
2083		/* commit */
2084		tunnel->t_df = ifr->ifr_df ? htons(IP_DF) : htons(0);
2085		break;
2086	case SIOCGLIFPHYDF:
2087		ifr->ifr_df = tunnel->t_df ? 1 : 0;
2088		break;
2089
2090	default:
2091		error = ENOTTY;
2092		break;
2093	}
2094
2095	return (error);
2096}
2097
2098static uint8_t
2099gre_l2_tos(const struct gre_tunnel *t, const struct mbuf *m)
2100{
2101	uint8_t prio;
2102
2103	switch (t->t_txhprio) {
2104	case IF_HDRPRIO_PACKET:
2105		prio = m->m_pkthdr.pf.prio;
2106		break;
2107	default:
2108		prio = t->t_txhprio;
2109		break;
2110	}
2111
2112	return (IFQ_PRIO2TOS(prio));
2113}
2114
2115static uint8_t
2116gre_l3_tos(const struct gre_tunnel *t, const struct mbuf *m, uint8_t tos)
2117{
2118	uint8_t prio;
2119
2120	switch (t->t_txhprio) {
2121	case IF_HDRPRIO_PAYLOAD:
2122		return (tos);
2123	case IF_HDRPRIO_PACKET:
2124		prio = m->m_pkthdr.pf.prio;
2125		break;
2126	default:
2127		prio = t->t_txhprio;
2128		break;
2129	}
2130
2131	return (IFQ_PRIO2TOS(prio) | (tos & IPTOS_ECN_MASK));
2132}
2133
2134static int
2135gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
2136{
2137	struct gre_softc *sc = ifp->if_softc;
2138	struct ifreq *ifr = (struct ifreq *)data;
2139	struct ifkalivereq *ikar = (struct ifkalivereq *)data;
2140	int error = 0;
2141
2142	switch(cmd) {
2143	case SIOCSIFADDR:
2144		ifp->if_flags |= IFF_UP;
2145		/* FALLTHROUGH */
2146	case SIOCSIFFLAGS:
2147		if (ISSET(ifp->if_flags, IFF_UP)) {
2148			if (!ISSET(ifp->if_flags, IFF_RUNNING))
2149				error = gre_up(sc);
2150			else
2151				error = 0;
2152		} else {
2153			if (ISSET(ifp->if_flags, IFF_RUNNING))
2154				error = gre_down(sc);
2155		}
2156		break;
2157	case SIOCSIFRDOMAIN:
2158		/* let if_rdomain do its thing */
2159		error = ENOTTY;
2160		break;
2161
2162	case SIOCSETKALIVE:
2163		if (ikar->ikar_timeo < 0 || ikar->ikar_timeo > 86400 ||
2164		    ikar->ikar_cnt < 0 || ikar->ikar_cnt > 256 ||
2165		    (ikar->ikar_timeo == 0) != (ikar->ikar_cnt == 0))
2166			return (EINVAL);
2167
2168		if (ikar->ikar_timeo == 0 || ikar->ikar_cnt == 0) {
2169			sc->sc_ka_count = 0;
2170			sc->sc_ka_timeo = 0;
2171			sc->sc_ka_state = GRE_KA_NONE;
2172		} else {
2173			sc->sc_ka_count = ikar->ikar_cnt;
2174			sc->sc_ka_timeo = ikar->ikar_timeo;
2175			sc->sc_ka_state = GRE_KA_DOWN;
2176
2177			arc4random_buf(&sc->sc_ka_key, sizeof(sc->sc_ka_key));
2178			sc->sc_ka_bias = arc4random();
2179			sc->sc_ka_holdmax = sc->sc_ka_count;
2180
2181			sc->sc_ka_recvtm = ticks - hz;
2182			timeout_add(&sc->sc_ka_send, 1);
2183			timeout_add_sec(&sc->sc_ka_hold,
2184			    sc->sc_ka_timeo * sc->sc_ka_count);
2185		}
2186		break;
2187
2188	case SIOCGETKALIVE:
2189		ikar->ikar_cnt = sc->sc_ka_count;
2190		ikar->ikar_timeo = sc->sc_ka_timeo;
2191		break;
2192
2193	case SIOCSLIFPHYTTL:
2194		if (ifr->ifr_ttl != -1 &&
2195		    (ifr->ifr_ttl < 1 || ifr->ifr_ttl > 0xff)) {
2196			error = EINVAL;
2197			break;
2198		}
2199
2200		/* commit */
2201		sc->sc_tunnel.t_ttl = ifr->ifr_ttl;
2202		break;
2203
2204	case SIOCGLIFPHYTTL:
2205		ifr->ifr_ttl = sc->sc_tunnel.t_ttl;
2206		break;
2207
2208	case SIOCSLIFPHYECN:
2209		sc->sc_tunnel.t_ecn =
2210		    ifr->ifr_metric ? ECN_ALLOWED : ECN_FORBIDDEN;
2211		break;
2212	case SIOCGLIFPHYECN:
2213		ifr->ifr_metric = (sc->sc_tunnel.t_ecn == ECN_ALLOWED);
2214		break;
2215
2216	case SIOCSTXHPRIO:
2217		error = if_txhprio_l3_check(ifr->ifr_hdrprio);
2218		if (error != 0)
2219			break;
2220
2221		sc->sc_tunnel.t_txhprio = ifr->ifr_hdrprio;
2222		break;
2223	case SIOCGTXHPRIO:
2224		ifr->ifr_hdrprio = sc->sc_tunnel.t_txhprio;
2225		break;
2226
2227	case SIOCSRXHPRIO:
2228		error = if_rxhprio_l3_check(ifr->ifr_hdrprio);
2229		if (error != 0)
2230			break;
2231
2232		sc->sc_tunnel.t_rxhprio = ifr->ifr_hdrprio;
2233		break;
2234	case SIOCGRXHPRIO:
2235		ifr->ifr_hdrprio = sc->sc_tunnel.t_rxhprio;
2236		break;
2237
2238	default:
2239		error = gre_tunnel_ioctl(ifp, &sc->sc_tunnel, cmd, data);
2240		break;
2241	}
2242
2243	return (error);
2244}
2245
2246static int
2247mgre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
2248{
2249	struct mgre_softc *sc = ifp->if_softc;
2250	struct ifreq *ifr = (struct ifreq *)data;
2251	int error = 0;
2252
2253	switch(cmd) {
2254	case SIOCSIFADDR:
2255		break;
2256	case SIOCSIFFLAGS:
2257		if (ISSET(ifp->if_flags, IFF_UP)) {
2258			if (!ISSET(ifp->if_flags, IFF_RUNNING))
2259				error = mgre_up(sc);
2260			else
2261				error = 0;
2262		} else {
2263			if (ISSET(ifp->if_flags, IFF_RUNNING))
2264				error = mgre_down(sc);
2265		}
2266		break;
2267
2268	case SIOCSLIFPHYTTL:
2269		if (ifr->ifr_ttl != -1 &&
2270		    (ifr->ifr_ttl < 1 || ifr->ifr_ttl > 0xff)) {
2271			error = EINVAL;
2272			break;
2273		}
2274
2275		/* commit */
2276		sc->sc_tunnel.t_ttl = ifr->ifr_ttl;
2277		break;
2278
2279	case SIOCGLIFPHYTTL:
2280		ifr->ifr_ttl = sc->sc_tunnel.t_ttl;
2281		break;
2282
2283	case SIOCSLIFPHYECN:
2284		sc->sc_tunnel.t_ecn =
2285		    ifr->ifr_metric ? ECN_ALLOWED : ECN_FORBIDDEN;
2286		break;
2287	case SIOCGLIFPHYECN:
2288		ifr->ifr_metric = (sc->sc_tunnel.t_ecn == ECN_ALLOWED);
2289		break;
2290
2291	case SIOCSLIFPHYADDR:
2292		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
2293			error = EBUSY;
2294			break;
2295		}
2296		error = mgre_set_tunnel(sc, (struct if_laddrreq *)data);
2297		break;
2298	case SIOCGLIFPHYADDR:
2299		error = mgre_get_tunnel(sc, (struct if_laddrreq *)data);
2300		break;
2301
2302	case SIOCSTXHPRIO:
2303		error = if_txhprio_l3_check(ifr->ifr_hdrprio);
2304		if (error != 0)
2305			break;
2306
2307		sc->sc_tunnel.t_txhprio = ifr->ifr_hdrprio;
2308		break;
2309	case SIOCGTXHPRIO:
2310		ifr->ifr_hdrprio = sc->sc_tunnel.t_txhprio;
2311		break;
2312
2313	case SIOCSRXHPRIO:
2314		error = if_rxhprio_l3_check(ifr->ifr_hdrprio);
2315		if (error != 0)
2316			break;
2317
2318		sc->sc_tunnel.t_rxhprio = ifr->ifr_hdrprio;
2319		break;
2320	case SIOCGRXHPRIO:
2321		ifr->ifr_hdrprio = sc->sc_tunnel.t_rxhprio;
2322		break;
2323
2324	case SIOCSVNETID:
2325	case SIOCDVNETID:
2326	case SIOCDIFPHYADDR:
2327	case SIOCSLIFPHYRTABLE:
2328		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
2329			error = EBUSY;
2330			break;
2331		}
2332
2333		/* FALLTHROUGH */
2334	default:
2335		error = gre_tunnel_ioctl(ifp, &sc->sc_tunnel, cmd, data);
2336		break;
2337	}
2338
2339	return (error);
2340}
2341
2342static int
2343mgre_set_tunnel(struct mgre_softc *sc, struct if_laddrreq *req)
2344{
2345	struct gre_tunnel *tunnel = &sc->sc_tunnel;
2346	struct sockaddr *addr = (struct sockaddr *)&req->addr;
2347	struct sockaddr *dstaddr = (struct sockaddr *)&req->dstaddr;
2348	struct sockaddr_in *addr4;
2349#ifdef INET6
2350	struct sockaddr_in6 *addr6;
2351	int error;
2352#endif
2353
2354	if (dstaddr->sa_family != AF_UNSPEC)
2355		return (EINVAL);
2356
2357	/* validate */
2358	switch (addr->sa_family) {
2359	case AF_INET:
2360		if (addr->sa_len != sizeof(*addr4))
2361			return (EINVAL);
2362
2363		addr4 = (struct sockaddr_in *)addr;
2364		if (in_nullhost(addr4->sin_addr) ||
2365		    IN_MULTICAST(addr4->sin_addr.s_addr))
2366			return (EINVAL);
2367
2368		tunnel->t_src4 = addr4->sin_addr;
2369		tunnel->t_dst4.s_addr = INADDR_ANY;
2370
2371		break;
2372#ifdef INET6
2373	case AF_INET6:
2374		if (addr->sa_len != sizeof(*addr6))
2375			return (EINVAL);
2376
2377		addr6 = (struct sockaddr_in6 *)addr;
2378		if (IN6_IS_ADDR_UNSPECIFIED(&addr6->sin6_addr) ||
2379		    IN6_IS_ADDR_MULTICAST(&addr6->sin6_addr))
2380			return (EINVAL);
2381
2382		error = in6_embedscope(&tunnel->t_src6, addr6, NULL, NULL);
2383		if (error != 0)
2384			return (error);
2385
2386		memset(&tunnel->t_dst6, 0, sizeof(tunnel->t_dst6));
2387
2388		break;
2389#endif
2390	default:
2391		return (EAFNOSUPPORT);
2392	}
2393
2394	/* commit */
2395	tunnel->t_af = addr->sa_family;
2396
2397	return (0);
2398}
2399
2400static int
2401mgre_get_tunnel(struct mgre_softc *sc, struct if_laddrreq *req)
2402{
2403	struct gre_tunnel *tunnel = &sc->sc_tunnel;
2404	struct sockaddr *dstaddr = (struct sockaddr *)&req->dstaddr;
2405	struct sockaddr_in *sin;
2406#ifdef INET6
2407	struct sockaddr_in6 *sin6;
2408#endif
2409
2410	switch (tunnel->t_af) {
2411	case AF_UNSPEC:
2412		return (EADDRNOTAVAIL);
2413	case AF_INET:
2414		sin = (struct sockaddr_in *)&req->addr;
2415		memset(sin, 0, sizeof(*sin));
2416		sin->sin_family = AF_INET;
2417		sin->sin_len = sizeof(*sin);
2418		sin->sin_addr = tunnel->t_src4;
2419		break;
2420
2421#ifdef INET6
2422	case AF_INET6:
2423		sin6 = (struct sockaddr_in6 *)&req->addr;
2424		memset(sin6, 0, sizeof(*sin6));
2425		sin6->sin6_family = AF_INET6;
2426		sin6->sin6_len = sizeof(*sin6);
2427		in6_recoverscope(sin6, &tunnel->t_src6);
2428		break;
2429#endif
2430	default:
2431		unhandled_af(tunnel->t_af);
2432	}
2433
2434	dstaddr->sa_len = 2;
2435	dstaddr->sa_family = AF_UNSPEC;
2436
2437	return (0);
2438}
2439
2440static int
2441egre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
2442{
2443	struct egre_softc *sc = ifp->if_softc;
2444	struct ifreq *ifr = (struct ifreq *)data;
2445	int error = 0;
2446
2447	switch(cmd) {
2448	case SIOCSIFADDR:
2449		break;
2450	case SIOCSIFFLAGS:
2451		if (ISSET(ifp->if_flags, IFF_UP)) {
2452			if (!ISSET(ifp->if_flags, IFF_RUNNING))
2453				error = egre_up(sc);
2454			else
2455				error = 0;
2456		} else {
2457			if (ISSET(ifp->if_flags, IFF_RUNNING))
2458				error = egre_down(sc);
2459		}
2460		break;
2461
2462	case SIOCSLIFPHYTTL:
2463		if (ifr->ifr_ttl < 1 || ifr->ifr_ttl > 0xff) {
2464			error = EINVAL;
2465			break;
2466		}
2467
2468		/* commit */
2469		sc->sc_tunnel.t_ttl = (uint8_t)ifr->ifr_ttl;
2470		break;
2471
2472	case SIOCGLIFPHYTTL:
2473		ifr->ifr_ttl = (int)sc->sc_tunnel.t_ttl;
2474		break;
2475
2476	case SIOCSTXHPRIO:
2477		error = if_txhprio_l2_check(ifr->ifr_hdrprio);
2478		if (error != 0)
2479			break;
2480
2481		sc->sc_tunnel.t_txhprio = ifr->ifr_hdrprio;
2482		break;
2483	case SIOCGTXHPRIO:
2484		ifr->ifr_hdrprio = sc->sc_tunnel.t_txhprio;
2485		break;
2486
2487	case SIOCSRXHPRIO:
2488		error = if_rxhprio_l2_check(ifr->ifr_hdrprio);
2489		if (error != 0)
2490			break;
2491
2492		sc->sc_tunnel.t_rxhprio = ifr->ifr_hdrprio;
2493		break;
2494	case SIOCGRXHPRIO:
2495		ifr->ifr_hdrprio = sc->sc_tunnel.t_rxhprio;
2496		break;
2497
2498	case SIOCSVNETID:
2499	case SIOCDVNETID:
2500	case SIOCSVNETFLOWID:
2501	case SIOCSLIFPHYADDR:
2502	case SIOCDIFPHYADDR:
2503	case SIOCSLIFPHYRTABLE:
2504		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
2505			error = EBUSY;
2506			break;
2507		}
2508
2509		/* FALLTHROUGH */
2510	default:
2511		error = gre_tunnel_ioctl(ifp, &sc->sc_tunnel, cmd, data);
2512		if (error == ENOTTY)
2513			error = ether_ioctl(ifp, &sc->sc_ac, cmd, data);
2514		break;
2515	}
2516
2517	if (error == ENETRESET) {
2518		/* no hardware to program */
2519		error = 0;
2520	}
2521
2522	return (error);
2523}
2524
2525static int
2526nvgre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
2527{
2528	struct nvgre_softc *sc = ifp->if_softc;
2529	struct gre_tunnel *tunnel = &sc->sc_tunnel;
2530
2531	struct ifreq *ifr = (struct ifreq *)data;
2532	struct if_parent *parent = (struct if_parent *)data;
2533	struct ifbrparam *bparam = (struct ifbrparam *)data;
2534	struct ifnet *ifp0;
2535
2536	int error = 0;
2537
2538	switch (cmd) {
2539	case SIOCSIFADDR:
2540		break;
2541	case SIOCSIFFLAGS:
2542		if (ISSET(ifp->if_flags, IFF_UP)) {
2543			if (!ISSET(ifp->if_flags, IFF_RUNNING))
2544				error = nvgre_up(sc);
2545			else
2546				error = ENETRESET;
2547		} else {
2548			if (ISSET(ifp->if_flags, IFF_RUNNING))
2549				error = nvgre_down(sc);
2550		}
2551		break;
2552
2553	case SIOCSLIFPHYADDR:
2554		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
2555			error = EBUSY;
2556			break;
2557		}
2558		error = gre_set_tunnel(tunnel, (struct if_laddrreq *)data, 0);
2559		if (error == 0)
2560			etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL);
2561		break;
2562	case SIOCGLIFPHYADDR:
2563		error = gre_get_tunnel(tunnel, (struct if_laddrreq *)data);
2564		break;
2565	case SIOCDIFPHYADDR:
2566		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
2567			error = EBUSY;
2568			break;
2569		}
2570		error = gre_del_tunnel(tunnel);
2571		if (error == 0)
2572			etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL);
2573		break;
2574
2575	case SIOCSIFPARENT:
2576		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
2577			error = EBUSY;
2578			break;
2579		}
2580		error = nvgre_set_parent(sc, parent->ifp_parent);
2581		if (error == 0)
2582			etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL);
2583		break;
2584	case SIOCGIFPARENT:
2585		ifp0 = if_get(sc->sc_ifp0);
2586		if (ifp0 == NULL)
2587			error = EADDRNOTAVAIL;
2588		else {
2589			memcpy(parent->ifp_parent, ifp0->if_xname,
2590			    sizeof(parent->ifp_parent));
2591		}
2592		if_put(ifp0);
2593		break;
2594	case SIOCDIFPARENT:
2595		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
2596			error = EBUSY;
2597			break;
2598		}
2599		/* commit */
2600		sc->sc_ifp0 = 0;
2601		etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL);
2602		break;
2603
2604	case SIOCSVNETID:
2605		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
2606			error = EBUSY;
2607			break;
2608		}
2609		if (ifr->ifr_vnetid < GRE_KEY_ENTROPY_MIN ||
2610		    ifr->ifr_vnetid > GRE_KEY_ENTROPY_MAX) {
2611			error = EINVAL;
2612			break;
2613		}
2614
2615		/* commit */
2616		tunnel->t_key = htonl(ifr->ifr_vnetid << GRE_KEY_ENTROPY_SHIFT);
2617		etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL);
2618		break;
2619	case SIOCGVNETID:
2620		error = gre_get_vnetid(tunnel, ifr);
2621		break;
2622
2623	case SIOCSLIFPHYRTABLE:
2624		if (ifr->ifr_rdomainid < 0 ||
2625		    ifr->ifr_rdomainid > RT_TABLEID_MAX ||
2626		    !rtable_exists(ifr->ifr_rdomainid)) {
2627			error = EINVAL;
2628			break;
2629		}
2630		tunnel->t_rtableid = ifr->ifr_rdomainid;
2631		etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL);
2632		break;
2633	case SIOCGLIFPHYRTABLE:
2634		ifr->ifr_rdomainid = tunnel->t_rtableid;
2635		break;
2636
2637	case SIOCSLIFPHYDF:
2638		/* commit */
2639		tunnel->t_df = ifr->ifr_df ? htons(IP_DF) : htons(0);
2640		break;
2641	case SIOCGLIFPHYDF:
2642		ifr->ifr_df = tunnel->t_df ? 1 : 0;
2643		break;
2644
2645	case SIOCSLIFPHYTTL:
2646		if (ifr->ifr_ttl < 1 || ifr->ifr_ttl > 0xff) {
2647			error = EINVAL;
2648			break;
2649		}
2650
2651		/* commit */
2652		tunnel->t_ttl = ifr->ifr_ttl;
2653		break;
2654
2655	case SIOCGLIFPHYTTL:
2656		ifr->ifr_ttl = tunnel->t_ttl;
2657		break;
2658
2659	case SIOCSTXHPRIO:
2660		error = if_txhprio_l2_check(ifr->ifr_hdrprio);
2661		if (error != 0)
2662			break;
2663
2664		sc->sc_tunnel.t_txhprio = ifr->ifr_hdrprio;
2665		break;
2666	case SIOCGTXHPRIO:
2667		ifr->ifr_hdrprio = sc->sc_tunnel.t_txhprio;
2668		break;
2669
2670	case SIOCSRXHPRIO:
2671		error = if_rxhprio_l2_check(ifr->ifr_hdrprio);
2672		if (error != 0)
2673			break;
2674
2675		sc->sc_tunnel.t_rxhprio = ifr->ifr_hdrprio;
2676		break;
2677	case SIOCGRXHPRIO:
2678		ifr->ifr_hdrprio = sc->sc_tunnel.t_rxhprio;
2679		break;
2680
2681	case SIOCBRDGSCACHE:
2682		error = etherbridge_set_max(&sc->sc_eb, bparam);
2683		break;
2684	case SIOCBRDGGCACHE:
2685		error = etherbridge_get_max(&sc->sc_eb, bparam);
2686		break;
2687
2688	case SIOCBRDGSTO:
2689		error = etherbridge_set_tmo(&sc->sc_eb, bparam);
2690		break;
2691	case SIOCBRDGGTO:
2692		error = etherbridge_get_tmo(&sc->sc_eb, bparam);
2693		break;
2694
2695	case SIOCBRDGRTS:
2696		error = etherbridge_rtfind(&sc->sc_eb,
2697		    (struct ifbaconf *)data);
2698		break;
2699	case SIOCBRDGFLUSH:
2700		etherbridge_flush(&sc->sc_eb,
2701		    ((struct ifbreq *)data)->ifbr_ifsflags);
2702		break;
2703	case SIOCBRDGSADDR:
2704		error = nvgre_add_addr(sc, (struct ifbareq *)data);
2705		break;
2706	case SIOCBRDGDADDR:
2707		error = nvgre_del_addr(sc, (struct ifbareq *)data);
2708		break;
2709
2710	case SIOCADDMULTI:
2711	case SIOCDELMULTI:
2712		break;
2713
2714	default:
2715		error = ether_ioctl(ifp, &sc->sc_ac, cmd, data);
2716		break;
2717	}
2718
2719	if (error == ENETRESET) {
2720		/* no hardware to program */
2721		error = 0;
2722	}
2723
2724	return (error);
2725}
2726
2727static int
2728eoip_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
2729{
2730	struct eoip_softc *sc = ifp->if_softc;
2731	struct ifreq *ifr = (struct ifreq *)data;
2732	struct ifkalivereq *ikar = (struct ifkalivereq *)data;
2733	int error = 0;
2734
2735	switch(cmd) {
2736	case SIOCSIFADDR:
2737		break;
2738	case SIOCSIFFLAGS:
2739		if (ISSET(ifp->if_flags, IFF_UP)) {
2740			if (!ISSET(ifp->if_flags, IFF_RUNNING))
2741				error = eoip_up(sc);
2742			else
2743				error = 0;
2744		} else {
2745			if (ISSET(ifp->if_flags, IFF_RUNNING))
2746				error = eoip_down(sc);
2747		}
2748		break;
2749
2750	case SIOCSETKALIVE:
2751		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
2752			error = EBUSY;
2753			break;
2754		}
2755
2756		if (ikar->ikar_timeo < 0 || ikar->ikar_timeo > 86400 ||
2757		    ikar->ikar_cnt < 0 || ikar->ikar_cnt > 256)
2758			return (EINVAL);
2759
2760		if (ikar->ikar_timeo == 0 || ikar->ikar_cnt == 0) {
2761			sc->sc_ka_count = 0;
2762			sc->sc_ka_timeo = 0;
2763			sc->sc_ka_state = GRE_KA_NONE;
2764		} else {
2765			sc->sc_ka_count = ikar->ikar_cnt;
2766			sc->sc_ka_timeo = ikar->ikar_timeo;
2767			sc->sc_ka_state = GRE_KA_DOWN;
2768		}
2769		break;
2770
2771	case SIOCGETKALIVE:
2772		ikar->ikar_cnt = sc->sc_ka_count;
2773		ikar->ikar_timeo = sc->sc_ka_timeo;
2774		break;
2775
2776	case SIOCSVNETID:
2777		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
2778			error = EBUSY;
2779			break;
2780		}
2781		if (ifr->ifr_vnetid < 0 || ifr->ifr_vnetid > 0xffff)
2782			return (EINVAL);
2783
2784		sc->sc_tunnel.t_key = htole16(ifr->ifr_vnetid); /* for cmp */
2785		sc->sc_tunnel_id = htole16(ifr->ifr_vnetid);
2786		break;
2787
2788	case SIOCGVNETID:
2789		ifr->ifr_vnetid = letoh16(sc->sc_tunnel_id);
2790		break;
2791
2792	case SIOCSLIFPHYADDR:
2793		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
2794			error = EBUSY;
2795			break;
2796		}
2797
2798		error = gre_set_tunnel(&sc->sc_tunnel,
2799		    (struct if_laddrreq *)data, 1);
2800		break;
2801	case SIOCGLIFPHYADDR:
2802		error = gre_get_tunnel(&sc->sc_tunnel,
2803		    (struct if_laddrreq *)data);
2804		break;
2805	case SIOCDIFPHYADDR:
2806		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
2807			error = EBUSY;
2808			break;
2809		}
2810
2811		error = gre_del_tunnel(&sc->sc_tunnel);
2812		break;
2813
2814	case SIOCSLIFPHYRTABLE:
2815		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
2816			error = EBUSY;
2817			break;
2818		}
2819
2820		if (ifr->ifr_rdomainid < 0 ||
2821		    ifr->ifr_rdomainid > RT_TABLEID_MAX ||
2822		    !rtable_exists(ifr->ifr_rdomainid)) {
2823			error = EINVAL;
2824			break;
2825		}
2826		sc->sc_tunnel.t_rtableid = ifr->ifr_rdomainid;
2827		break;
2828	case SIOCGLIFPHYRTABLE:
2829		ifr->ifr_rdomainid = sc->sc_tunnel.t_rtableid;
2830		break;
2831
2832	case SIOCSLIFPHYTTL:
2833		if (ifr->ifr_ttl < 1 || ifr->ifr_ttl > 0xff) {
2834			error = EINVAL;
2835			break;
2836		}
2837
2838		/* commit */
2839		sc->sc_tunnel.t_ttl = (uint8_t)ifr->ifr_ttl;
2840		break;
2841	case SIOCGLIFPHYTTL:
2842		ifr->ifr_ttl = (int)sc->sc_tunnel.t_ttl;
2843		break;
2844
2845	case SIOCSLIFPHYDF:
2846		/* commit */
2847		sc->sc_tunnel.t_df = ifr->ifr_df ? htons(IP_DF) : htons(0);
2848		break;
2849	case SIOCGLIFPHYDF:
2850		ifr->ifr_df = sc->sc_tunnel.t_df ? 1 : 0;
2851		break;
2852
2853	case SIOCSTXHPRIO:
2854		error = if_txhprio_l2_check(ifr->ifr_hdrprio);
2855		if (error != 0)
2856			break;
2857
2858		sc->sc_tunnel.t_txhprio = ifr->ifr_hdrprio;
2859		break;
2860	case SIOCGTXHPRIO:
2861		ifr->ifr_hdrprio = sc->sc_tunnel.t_txhprio;
2862		break;
2863
2864	case SIOCSRXHPRIO:
2865		error = if_rxhprio_l2_check(ifr->ifr_hdrprio);
2866		if (error != 0)
2867			break;
2868
2869		sc->sc_tunnel.t_rxhprio = ifr->ifr_hdrprio;
2870		break;
2871	case SIOCGRXHPRIO:
2872		ifr->ifr_hdrprio = sc->sc_tunnel.t_rxhprio;
2873		break;
2874
2875	case SIOCADDMULTI:
2876	case SIOCDELMULTI:
2877		break;
2878
2879	default:
2880		error = ether_ioctl(ifp, &sc->sc_ac, cmd, data);
2881		break;
2882	}
2883
2884	if (error == ENETRESET) {
2885		/* no hardware to program */
2886		error = 0;
2887	}
2888
2889	return (error);
2890}
2891
2892static int
2893gre_up(struct gre_softc *sc)
2894{
2895	NET_ASSERT_LOCKED();
2896	SET(sc->sc_if.if_flags, IFF_RUNNING);
2897
2898	if (sc->sc_ka_state != GRE_KA_NONE)
2899		gre_keepalive_send(sc);
2900
2901	return (0);
2902}
2903
2904static int
2905gre_down(struct gre_softc *sc)
2906{
2907	NET_ASSERT_LOCKED();
2908	CLR(sc->sc_if.if_flags, IFF_RUNNING);
2909
2910	if (sc->sc_ka_state != GRE_KA_NONE) {
2911		timeout_del_barrier(&sc->sc_ka_hold);
2912		timeout_del_barrier(&sc->sc_ka_send);
2913
2914		sc->sc_ka_state = GRE_KA_DOWN;
2915		gre_link_state(&sc->sc_if, sc->sc_ka_state);
2916	}
2917
2918	return (0);
2919}
2920
2921static void
2922gre_link_state(struct ifnet *ifp, unsigned int state)
2923{
2924	int link_state = LINK_STATE_UNKNOWN;
2925
2926	if (ISSET(ifp->if_flags, IFF_RUNNING)) {
2927		switch (state) {
2928		case GRE_KA_NONE:
2929			/* maybe up? or down? it's unknown, really */
2930			break;
2931		case GRE_KA_UP:
2932			link_state = LINK_STATE_UP;
2933			break;
2934		default:
2935			link_state = LINK_STATE_KALIVE_DOWN;
2936			break;
2937		}
2938	}
2939
2940	if (ifp->if_link_state != link_state) {
2941		ifp->if_link_state = link_state;
2942		if_link_state_change(ifp);
2943	}
2944}
2945
2946static void
2947gre_keepalive_send(void *arg)
2948{
2949	struct gre_tunnel t;
2950	struct gre_softc *sc = arg;
2951	struct mbuf *m;
2952	struct gre_keepalive *gk;
2953	SIPHASH_CTX ctx;
2954	int linkhdr, len;
2955	uint16_t proto;
2956	uint8_t ttl;
2957	uint8_t tos;
2958
2959	/*
2960	 * re-schedule immediately, so we deal with incomplete configuration
2961	 * or temporary errors.
2962	 */
2963	if (sc->sc_ka_timeo)
2964		timeout_add_sec(&sc->sc_ka_send, sc->sc_ka_timeo);
2965
2966	if (!ISSET(sc->sc_if.if_flags, IFF_RUNNING) ||
2967	    sc->sc_ka_state == GRE_KA_NONE ||
2968	    sc->sc_tunnel.t_af == AF_UNSPEC ||
2969	    sc->sc_tunnel.t_rtableid != sc->sc_if.if_rdomain)
2970		return;
2971
2972	/* this is really conservative */
2973#ifdef INET6
2974	linkhdr = max_linkhdr + MAX(sizeof(struct ip), sizeof(struct ip6_hdr)) +
2975	    sizeof(struct gre_header) + sizeof(struct gre_h_key);
2976#else
2977	linkhdr = max_linkhdr + sizeof(struct ip) +
2978	    sizeof(struct gre_header) + sizeof(struct gre_h_key);
2979#endif
2980	len = linkhdr + sizeof(*gk);
2981
2982	MGETHDR(m, M_DONTWAIT, MT_DATA);
2983	if (m == NULL)
2984		return;
2985
2986	if (len > MHLEN) {
2987		MCLGETL(m, M_DONTWAIT, len);
2988		if (!ISSET(m->m_flags, M_EXT)) {
2989			m_freem(m);
2990			return;
2991		}
2992	}
2993
2994	m->m_pkthdr.len = m->m_len = len;
2995	m_adj(m, linkhdr);
2996
2997	/*
2998	 * build the inside packet
2999	 */
3000	gk = mtod(m, struct gre_keepalive *);
3001	htobem32(&gk->gk_uptime, sc->sc_ka_bias + ticks);
3002	htobem32(&gk->gk_random, arc4random());
3003
3004	SipHash24_Init(&ctx, &sc->sc_ka_key);
3005	SipHash24_Update(&ctx, &gk->gk_uptime, sizeof(gk->gk_uptime));
3006	SipHash24_Update(&ctx, &gk->gk_random, sizeof(gk->gk_random));
3007	SipHash24_Final(gk->gk_digest, &ctx);
3008
3009	ttl = sc->sc_tunnel.t_ttl == -1 ? ip_defttl : sc->sc_tunnel.t_ttl;
3010
3011	m->m_pkthdr.pf.prio = sc->sc_if.if_llprio;
3012	tos = gre_l3_tos(&sc->sc_tunnel, m, IFQ_PRIO2TOS(m->m_pkthdr.pf.prio));
3013
3014	t.t_af = sc->sc_tunnel.t_af;
3015	t.t_df = sc->sc_tunnel.t_df;
3016	t.t_src = sc->sc_tunnel.t_dst;
3017	t.t_dst = sc->sc_tunnel.t_src;
3018	t.t_key = sc->sc_tunnel.t_key;
3019	t.t_key_mask = sc->sc_tunnel.t_key_mask;
3020
3021	m = gre_encap(&t, m, htons(0), ttl, tos);
3022	if (m == NULL)
3023		return;
3024
3025	switch (sc->sc_tunnel.t_af) {
3026	case AF_INET: {
3027		struct ip *ip;
3028
3029		ip = mtod(m, struct ip *);
3030		ip->ip_id = htons(ip_randomid());
3031		in_hdr_cksum_out(m, NULL);
3032
3033		proto = htons(ETHERTYPE_IP);
3034		break;
3035	}
3036#ifdef INET6
3037	case AF_INET6:
3038		proto = htons(ETHERTYPE_IPV6);
3039		break;
3040#endif
3041	default:
3042		m_freem(m);
3043		return;
3044	}
3045
3046	/*
3047	 * put it in the tunnel
3048	 */
3049	m = gre_encap(&sc->sc_tunnel, m, proto, ttl, tos);
3050	if (m == NULL)
3051		return;
3052
3053	gre_ip_output(&sc->sc_tunnel, m);
3054}
3055
3056static void
3057gre_keepalive_hold(void *arg)
3058{
3059	struct gre_softc *sc = arg;
3060	struct ifnet *ifp = &sc->sc_if;
3061
3062	if (!ISSET(ifp->if_flags, IFF_RUNNING) ||
3063	    sc->sc_ka_state == GRE_KA_NONE)
3064		return;
3065
3066	NET_LOCK();
3067	sc->sc_ka_state = GRE_KA_DOWN;
3068	gre_link_state(ifp, sc->sc_ka_state);
3069	NET_UNLOCK();
3070}
3071
3072static int
3073gre_set_tunnel(struct gre_tunnel *tunnel, struct if_laddrreq *req, int ucast)
3074{
3075	struct sockaddr *src = (struct sockaddr *)&req->addr;
3076	struct sockaddr *dst = (struct sockaddr *)&req->dstaddr;
3077	struct sockaddr_in *src4, *dst4;
3078#ifdef INET6
3079	struct sockaddr_in6 *src6, *dst6;
3080	int error;
3081#endif
3082
3083	/* sa_family and sa_len must be equal */
3084	if (src->sa_family != dst->sa_family || src->sa_len != dst->sa_len)
3085		return (EINVAL);
3086
3087	/* validate */
3088	switch (dst->sa_family) {
3089	case AF_INET:
3090		if (dst->sa_len != sizeof(*dst4))
3091			return (EINVAL);
3092
3093		src4 = (struct sockaddr_in *)src;
3094		if (in_nullhost(src4->sin_addr) ||
3095		    IN_MULTICAST(src4->sin_addr.s_addr))
3096			return (EINVAL);
3097
3098		dst4 = (struct sockaddr_in *)dst;
3099		if (in_nullhost(dst4->sin_addr) ||
3100		    (IN_MULTICAST(dst4->sin_addr.s_addr) != !ucast))
3101			return (EINVAL);
3102
3103		tunnel->t_src4 = src4->sin_addr;
3104		tunnel->t_dst4 = dst4->sin_addr;
3105
3106		break;
3107#ifdef INET6
3108	case AF_INET6:
3109		if (dst->sa_len != sizeof(*dst6))
3110			return (EINVAL);
3111
3112		src6 = (struct sockaddr_in6 *)src;
3113		if (IN6_IS_ADDR_UNSPECIFIED(&src6->sin6_addr) ||
3114		    IN6_IS_ADDR_MULTICAST(&src6->sin6_addr))
3115			return (EINVAL);
3116
3117		dst6 = (struct sockaddr_in6 *)dst;
3118		if (IN6_IS_ADDR_UNSPECIFIED(&dst6->sin6_addr) ||
3119		    IN6_IS_ADDR_MULTICAST(&dst6->sin6_addr) != !ucast)
3120			return (EINVAL);
3121
3122		if (src6->sin6_scope_id != dst6->sin6_scope_id)
3123			return (EINVAL);
3124
3125		error = in6_embedscope(&tunnel->t_src6, src6, NULL, NULL);
3126		if (error != 0)
3127			return (error);
3128
3129		error = in6_embedscope(&tunnel->t_dst6, dst6, NULL, NULL);
3130		if (error != 0)
3131			return (error);
3132
3133		break;
3134#endif
3135	default:
3136		return (EAFNOSUPPORT);
3137	}
3138
3139	/* commit */
3140	tunnel->t_af = dst->sa_family;
3141
3142	return (0);
3143}
3144
3145static int
3146gre_get_tunnel(struct gre_tunnel *tunnel, struct if_laddrreq *req)
3147{
3148	struct sockaddr *src = (struct sockaddr *)&req->addr;
3149	struct sockaddr *dst = (struct sockaddr *)&req->dstaddr;
3150	struct sockaddr_in *sin;
3151#ifdef INET6 /* ifconfig already embeds the scopeid */
3152	struct sockaddr_in6 *sin6;
3153#endif
3154
3155	switch (tunnel->t_af) {
3156	case AF_UNSPEC:
3157		return (EADDRNOTAVAIL);
3158	case AF_INET:
3159		sin = (struct sockaddr_in *)src;
3160		memset(sin, 0, sizeof(*sin));
3161		sin->sin_family = AF_INET;
3162		sin->sin_len = sizeof(*sin);
3163		sin->sin_addr = tunnel->t_src4;
3164
3165		sin = (struct sockaddr_in *)dst;
3166		memset(sin, 0, sizeof(*sin));
3167		sin->sin_family = AF_INET;
3168		sin->sin_len = sizeof(*sin);
3169		sin->sin_addr = tunnel->t_dst4;
3170
3171		break;
3172
3173#ifdef INET6
3174	case AF_INET6:
3175		sin6 = (struct sockaddr_in6 *)src;
3176		memset(sin6, 0, sizeof(*sin6));
3177		sin6->sin6_family = AF_INET6;
3178		sin6->sin6_len = sizeof(*sin6);
3179		in6_recoverscope(sin6, &tunnel->t_src6);
3180
3181		sin6 = (struct sockaddr_in6 *)dst;
3182		memset(sin6, 0, sizeof(*sin6));
3183		sin6->sin6_family = AF_INET6;
3184		sin6->sin6_len = sizeof(*sin6);
3185		in6_recoverscope(sin6, &tunnel->t_dst6);
3186
3187		break;
3188#endif
3189	default:
3190		return (EAFNOSUPPORT);
3191	}
3192
3193	return (0);
3194}
3195
3196static int
3197gre_del_tunnel(struct gre_tunnel *tunnel)
3198{
3199	/* commit */
3200	tunnel->t_af = AF_UNSPEC;
3201
3202	return (0);
3203}
3204
3205static int
3206gre_set_vnetid(struct gre_tunnel *tunnel, struct ifreq *ifr)
3207{
3208	uint32_t key;
3209	uint32_t min = GRE_KEY_MIN;
3210	uint32_t max = GRE_KEY_MAX;
3211	unsigned int shift = GRE_KEY_SHIFT;
3212	uint32_t mask = GRE_KEY_MASK;
3213
3214	if (tunnel->t_key_mask == GRE_KEY_ENTROPY) {
3215		min = GRE_KEY_ENTROPY_MIN;
3216		max = GRE_KEY_ENTROPY_MAX;
3217		shift = GRE_KEY_ENTROPY_SHIFT;
3218		mask = GRE_KEY_ENTROPY;
3219	}
3220
3221	if (ifr->ifr_vnetid < min || ifr->ifr_vnetid > max)
3222		return (EINVAL);
3223
3224	key = htonl(ifr->ifr_vnetid << shift);
3225
3226	/* commit */
3227	tunnel->t_key_mask = mask;
3228	tunnel->t_key = key;
3229
3230	return (0);
3231}
3232
3233static int
3234gre_get_vnetid(struct gre_tunnel *tunnel, struct ifreq *ifr)
3235{
3236	int shift;
3237
3238	switch (tunnel->t_key_mask) {
3239	case GRE_KEY_NONE:
3240		return (EADDRNOTAVAIL);
3241	case GRE_KEY_ENTROPY:
3242		shift = GRE_KEY_ENTROPY_SHIFT;
3243		break;
3244	case GRE_KEY_MASK:
3245		shift = GRE_KEY_SHIFT;
3246		break;
3247	}
3248
3249	ifr->ifr_vnetid = ntohl(tunnel->t_key) >> shift;
3250
3251	return (0);
3252}
3253
3254static int
3255gre_del_vnetid(struct gre_tunnel *tunnel)
3256{
3257	tunnel->t_key_mask = GRE_KEY_NONE;
3258
3259	return (0);
3260}
3261
3262static int
3263gre_set_vnetflowid(struct gre_tunnel *tunnel, struct ifreq *ifr)
3264{
3265	uint32_t mask, key;
3266
3267	if (tunnel->t_key_mask == GRE_KEY_NONE)
3268		return (EADDRNOTAVAIL);
3269
3270	mask = ifr->ifr_vnetid ? GRE_KEY_ENTROPY : GRE_KEY_MASK;
3271	if (tunnel->t_key_mask == mask) {
3272		/* nop */
3273		return (0);
3274	}
3275
3276	key = ntohl(tunnel->t_key);
3277	if (mask == GRE_KEY_ENTROPY) {
3278		if (key > GRE_KEY_ENTROPY_MAX)
3279			return (ERANGE);
3280
3281		key = htonl(key << GRE_KEY_ENTROPY_SHIFT);
3282	} else
3283		key = htonl(key >> GRE_KEY_ENTROPY_SHIFT);
3284
3285	/* commit */
3286	tunnel->t_key_mask = mask;
3287	tunnel->t_key = key;
3288
3289	return (0);
3290}
3291
3292static int
3293gre_get_vnetflowid(struct gre_tunnel *tunnel, struct ifreq *ifr)
3294{
3295	if (tunnel->t_key_mask == GRE_KEY_NONE)
3296		return (EADDRNOTAVAIL);
3297
3298	ifr->ifr_vnetid = tunnel->t_key_mask == GRE_KEY_ENTROPY;
3299
3300	return (0);
3301}
3302
3303static int
3304mgre_up(struct mgre_softc *sc)
3305{
3306	unsigned int hlen;
3307
3308	switch (sc->sc_tunnel.t_af) {
3309	case AF_UNSPEC:
3310		return (EDESTADDRREQ);
3311	case AF_INET:
3312		hlen = sizeof(struct ip);
3313		break;
3314#ifdef INET6
3315	case AF_INET6:
3316		hlen = sizeof(struct ip6_hdr);
3317		break;
3318#endif /* INET6 */
3319	default:
3320		unhandled_af(sc->sc_tunnel.t_af);
3321	}
3322
3323	hlen += sizeof(struct gre_header);
3324	if (sc->sc_tunnel.t_key_mask != GRE_KEY_NONE)
3325		hlen += sizeof(struct gre_h_key);
3326
3327	NET_ASSERT_LOCKED();
3328
3329	if (RBT_INSERT(mgre_tree, &mgre_tree, sc) != NULL)
3330		return (EADDRINUSE);
3331
3332	sc->sc_if.if_hdrlen = hlen;
3333	SET(sc->sc_if.if_flags, IFF_RUNNING);
3334
3335	return (0);
3336}
3337
3338static int
3339mgre_down(struct mgre_softc *sc)
3340{
3341	NET_ASSERT_LOCKED();
3342
3343	CLR(sc->sc_if.if_flags, IFF_RUNNING);
3344	sc->sc_if.if_hdrlen = GRE_HDRLEN; /* symmetry */
3345
3346	RBT_REMOVE(mgre_tree, &mgre_tree, sc);
3347
3348	/* barrier? */
3349
3350	return (0);
3351}
3352
3353static int
3354egre_up(struct egre_softc *sc)
3355{
3356	if (sc->sc_tunnel.t_af == AF_UNSPEC)
3357		return (EDESTADDRREQ);
3358
3359	NET_ASSERT_LOCKED();
3360
3361	if (RBT_INSERT(egre_tree, &egre_tree, sc) != NULL)
3362		return (EADDRINUSE);
3363
3364	SET(sc->sc_ac.ac_if.if_flags, IFF_RUNNING);
3365
3366	return (0);
3367}
3368
3369static int
3370egre_down(struct egre_softc *sc)
3371{
3372	NET_ASSERT_LOCKED();
3373
3374	CLR(sc->sc_ac.ac_if.if_flags, IFF_RUNNING);
3375
3376	RBT_REMOVE(egre_tree, &egre_tree, sc);
3377
3378	/* barrier? */
3379
3380	return (0);
3381}
3382
3383static int
3384egre_media_change(struct ifnet *ifp)
3385{
3386	return (ENOTTY);
3387}
3388
3389static void
3390egre_media_status(struct ifnet *ifp, struct ifmediareq *imr)
3391{
3392	imr->ifm_active = IFM_ETHER | IFM_AUTO;
3393	imr->ifm_status = IFM_AVALID | IFM_ACTIVE;
3394}
3395
3396static int
3397nvgre_up(struct nvgre_softc *sc)
3398{
3399	struct gre_tunnel *tunnel = &sc->sc_tunnel;
3400	struct ifnet *ifp0;
3401	void *inm;
3402	int error;
3403
3404	if (tunnel->t_af == AF_UNSPEC)
3405		return (EDESTADDRREQ);
3406
3407	ifp0 = if_get(sc->sc_ifp0);
3408	if (ifp0 == NULL)
3409		return (ENXIO);
3410	if (!ISSET(ifp0->if_flags, IFF_MULTICAST)) {
3411		error = ENODEV;
3412		goto put;
3413	}
3414
3415	NET_ASSERT_LOCKED();
3416
3417	if (RBT_INSERT(nvgre_mcast_tree, &nvgre_mcast_tree, sc) != NULL) {
3418		error = EADDRINUSE;
3419		goto put;
3420	}
3421	if (RBT_INSERT(nvgre_ucast_tree, &nvgre_ucast_tree, sc) != NULL) {
3422		error = EADDRINUSE;
3423		goto remove_mcast;
3424	}
3425
3426	switch (tunnel->t_af) {
3427	case AF_INET:
3428		inm = in_addmulti(&tunnel->t_dst4, ifp0);
3429		if (inm == NULL) {
3430			error = ECONNABORTED;
3431			goto remove_ucast;
3432		}
3433		break;
3434#ifdef INET6
3435	case AF_INET6:
3436		inm = in6_addmulti(&tunnel->t_dst6, ifp0, &error);
3437		if (inm == NULL) {
3438			/* error is already set */
3439			goto remove_ucast;
3440		}
3441		break;
3442#endif /* INET6 */
3443	default:
3444		unhandled_af(tunnel->t_af);
3445	}
3446
3447	if_linkstatehook_add(ifp0, &sc->sc_ltask);
3448	if_detachhook_add(ifp0, &sc->sc_dtask);
3449
3450	if_put(ifp0);
3451
3452	sc->sc_inm = inm;
3453	SET(sc->sc_ac.ac_if.if_flags, IFF_RUNNING);
3454
3455	return (0);
3456
3457remove_ucast:
3458	RBT_REMOVE(nvgre_ucast_tree, &nvgre_ucast_tree, sc);
3459remove_mcast:
3460	RBT_REMOVE(nvgre_mcast_tree, &nvgre_mcast_tree, sc);
3461put:
3462	if_put(ifp0);
3463	return (error);
3464}
3465
3466static int
3467nvgre_down(struct nvgre_softc *sc)
3468{
3469	struct gre_tunnel *tunnel = &sc->sc_tunnel;
3470	struct ifnet *ifp = &sc->sc_ac.ac_if;
3471	struct taskq *softnet = net_tq(ifp->if_index);
3472	struct ifnet *ifp0;
3473
3474	NET_ASSERT_LOCKED();
3475
3476	CLR(ifp->if_flags, IFF_RUNNING);
3477
3478	NET_UNLOCK();
3479	ifq_barrier(&ifp->if_snd);
3480	if (!task_del(softnet, &sc->sc_send_task))
3481		taskq_barrier(softnet);
3482	NET_LOCK();
3483
3484	mq_purge(&sc->sc_send_list);
3485
3486	ifp0 = if_get(sc->sc_ifp0);
3487	if (ifp0 != NULL) {
3488		if_detachhook_del(ifp0, &sc->sc_dtask);
3489		if_linkstatehook_del(ifp0, &sc->sc_ltask);
3490	}
3491	if_put(ifp0);
3492
3493	switch (tunnel->t_af) {
3494	case AF_INET:
3495		in_delmulti(sc->sc_inm);
3496		break;
3497
3498#ifdef INET6
3499	case AF_INET6:
3500		in6_delmulti(sc->sc_inm);
3501		break;
3502#endif
3503	default:
3504		unhandled_af(tunnel->t_af);
3505	}
3506
3507	RBT_REMOVE(nvgre_ucast_tree, &nvgre_ucast_tree, sc);
3508	RBT_REMOVE(nvgre_mcast_tree, &nvgre_mcast_tree, sc);
3509
3510	return (0);
3511}
3512
3513static void
3514nvgre_link_change(void *arg)
3515{
3516	/* nop */
3517}
3518
3519static void
3520nvgre_detach(void *arg)
3521{
3522	struct nvgre_softc *sc = arg;
3523	struct ifnet *ifp = &sc->sc_ac.ac_if;
3524
3525	if (ISSET(ifp->if_flags, IFF_RUNNING)) {
3526		nvgre_down(sc);
3527		if_down(ifp);
3528	}
3529
3530	sc->sc_ifp0 = 0;
3531}
3532
3533static int
3534nvgre_set_parent(struct nvgre_softc *sc, const char *parent)
3535{
3536	struct ifnet *ifp0;
3537
3538	ifp0 = if_unit(parent);
3539	if (ifp0 == NULL)
3540		return (EINVAL);
3541
3542	if (!ISSET(ifp0->if_flags, IFF_MULTICAST)) {
3543		if_put(ifp0);
3544		return (EPROTONOSUPPORT);
3545	}
3546
3547	ifsetlro(ifp0, 0);
3548
3549	/* commit */
3550	sc->sc_ifp0 = ifp0->if_index;
3551	if_put(ifp0);
3552
3553	return (0);
3554}
3555
3556static int
3557nvgre_add_addr(struct nvgre_softc *sc, const struct ifbareq *ifba)
3558{
3559	struct sockaddr_in *sin;
3560#ifdef INET6
3561	struct sockaddr_in6 *sin6;
3562	struct sockaddr_in6 src6 = {
3563		.sin6_len = sizeof(src6),
3564		.sin6_family = AF_UNSPEC,
3565	};
3566	int error;
3567#endif
3568	union gre_addr endpoint;
3569	unsigned int type;
3570
3571	/* ignore ifba_ifsname */
3572
3573	if (ISSET(ifba->ifba_flags, ~IFBAF_TYPEMASK))
3574		return (EINVAL);
3575	switch (ifba->ifba_flags & IFBAF_TYPEMASK) {
3576	case IFBAF_DYNAMIC:
3577		type = EBE_DYNAMIC;
3578		break;
3579	case IFBAF_STATIC:
3580		type = EBE_STATIC;
3581		break;
3582	default:
3583		return (EINVAL);
3584	}
3585
3586	memset(&endpoint, 0, sizeof(endpoint));
3587
3588	if (ifba->ifba_dstsa.ss_family != sc->sc_tunnel.t_af)
3589		return (EAFNOSUPPORT);
3590	switch (ifba->ifba_dstsa.ss_family) {
3591	case AF_INET:
3592		sin = (struct sockaddr_in *)&ifba->ifba_dstsa;
3593		if (in_nullhost(sin->sin_addr) ||
3594		    IN_MULTICAST(sin->sin_addr.s_addr))
3595			return (EADDRNOTAVAIL);
3596
3597		endpoint.in4 = sin->sin_addr;
3598		break;
3599
3600#ifdef INET6
3601	case AF_INET6:
3602		sin6 = (struct sockaddr_in6 *)&ifba->ifba_dstsa;
3603		if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) ||
3604		    IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
3605			return (EADDRNOTAVAIL);
3606
3607		in6_recoverscope(&src6, &sc->sc_tunnel.t_src6);
3608
3609		if (src6.sin6_scope_id != sin6->sin6_scope_id)
3610			return (EADDRNOTAVAIL);
3611
3612		error = in6_embedscope(&endpoint.in6, sin6, NULL, NULL);
3613		if (error != 0)
3614			return (error);
3615
3616		break;
3617#endif
3618	default: /* AF_UNSPEC */
3619		return (EADDRNOTAVAIL);
3620	}
3621
3622	return (etherbridge_add_addr(&sc->sc_eb, &endpoint,
3623	    &ifba->ifba_dst, type));
3624}
3625
3626static int
3627nvgre_del_addr(struct nvgre_softc *sc, const struct ifbareq *ifba)
3628{
3629	return (etherbridge_del_addr(&sc->sc_eb, &ifba->ifba_dst));
3630}
3631
3632static void
3633nvgre_start(struct ifnet *ifp)
3634{
3635	struct nvgre_softc *sc = ifp->if_softc;
3636	const struct gre_tunnel *tunnel = &sc->sc_tunnel;
3637	union gre_addr gateway;
3638	struct mbuf_list ml = MBUF_LIST_INITIALIZER();
3639	struct ether_header *eh;
3640	struct mbuf *m, *m0;
3641#if NBPFILTER > 0
3642	caddr_t if_bpf;
3643#endif
3644
3645	if (!gre_allow) {
3646		ifq_purge(&ifp->if_snd);
3647		return;
3648	}
3649
3650	while ((m0 = ifq_dequeue(&ifp->if_snd)) != NULL) {
3651#if NBPFILTER > 0
3652		if_bpf = ifp->if_bpf;
3653		if (if_bpf)
3654			bpf_mtap_ether(if_bpf, m0, BPF_DIRECTION_OUT);
3655#endif
3656
3657		eh = mtod(m0, struct ether_header *);
3658		if (ETHER_IS_BROADCAST(eh->ether_dhost))
3659			gateway = tunnel->t_dst;
3660		else {
3661			const union gre_addr *endpoint;
3662
3663			smr_read_enter();
3664			endpoint = etherbridge_resolve_ea(&sc->sc_eb,
3665			    (struct ether_addr *)eh->ether_dhost);
3666			if (endpoint == NULL) {
3667				/* "flood" to unknown hosts */
3668				endpoint = &tunnel->t_dst;
3669			}
3670			gateway = *endpoint;
3671			smr_read_leave();
3672		}
3673
3674		/* force prepend mbuf because of alignment problems */
3675		m = m_get(M_DONTWAIT, m0->m_type);
3676		if (m == NULL) {
3677			m_freem(m0);
3678			continue;
3679		}
3680
3681		M_MOVE_PKTHDR(m, m0);
3682		m->m_next = m0;
3683
3684		m_align(m, 0);
3685		m->m_len = 0;
3686
3687		m = gre_encap_dst(tunnel, &gateway, m,
3688		    htons(ETHERTYPE_TRANSETHER),
3689		    tunnel->t_ttl, gre_l2_tos(tunnel, m));
3690		if (m == NULL)
3691			continue;
3692
3693		m->m_flags &= ~(M_BCAST|M_MCAST);
3694		m->m_pkthdr.ph_rtableid = tunnel->t_rtableid;
3695
3696#if NPF > 0
3697		pf_pkt_addr_changed(m);
3698#endif
3699
3700		ml_enqueue(&ml, m);
3701	}
3702
3703	if (!ml_empty(&ml)) {
3704		if (mq_enlist(&sc->sc_send_list, &ml) == 0)
3705			task_add(net_tq(ifp->if_index), &sc->sc_send_task);
3706		/* else set OACTIVE? */
3707	}
3708}
3709
3710static uint64_t
3711nvgre_send4(struct nvgre_softc *sc, struct mbuf_list *ml)
3712{
3713	struct ip_moptions imo;
3714	struct mbuf *m;
3715	uint64_t oerrors = 0;
3716
3717	imo.imo_ifidx = sc->sc_ifp0;
3718	imo.imo_ttl = sc->sc_tunnel.t_ttl;
3719	imo.imo_loop = 0;
3720
3721	NET_LOCK();
3722	while ((m = ml_dequeue(ml)) != NULL) {
3723		if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &imo, NULL, 0) != 0)
3724			oerrors++;
3725	}
3726	NET_UNLOCK();
3727
3728	return (oerrors);
3729}
3730
3731#ifdef INET6
3732static uint64_t
3733nvgre_send6(struct nvgre_softc *sc, struct mbuf_list *ml)
3734{
3735	struct ip6_moptions im6o;
3736	struct mbuf *m;
3737	uint64_t oerrors = 0;
3738
3739	im6o.im6o_ifidx = sc->sc_ifp0;
3740	im6o.im6o_hlim = sc->sc_tunnel.t_ttl;
3741	im6o.im6o_loop = 0;
3742
3743	NET_LOCK();
3744	while ((m = ml_dequeue(ml)) != NULL) {
3745		if (ip6_output(m, NULL, NULL, 0, &im6o, NULL) != 0)
3746			oerrors++;
3747	}
3748	NET_UNLOCK();
3749
3750	return (oerrors);
3751}
3752#endif /* INET6 */
3753
3754static void
3755nvgre_send(void *arg)
3756{
3757	struct nvgre_softc *sc = arg;
3758	struct ifnet *ifp = &sc->sc_ac.ac_if;
3759	sa_family_t af = sc->sc_tunnel.t_af;
3760	struct mbuf_list ml;
3761	uint64_t oerrors;
3762
3763	if (!ISSET(ifp->if_flags, IFF_RUNNING))
3764		return;
3765
3766	mq_delist(&sc->sc_send_list, &ml);
3767	if (ml_empty(&ml))
3768		return;
3769
3770	switch (af) {
3771	case AF_INET:
3772		oerrors = nvgre_send4(sc, &ml);
3773		break;
3774#ifdef INET6
3775	case AF_INET6:
3776		oerrors = nvgre_send6(sc, &ml);
3777		break;
3778#endif
3779	default:
3780		unhandled_af(af);
3781		/* NOTREACHED */
3782	}
3783
3784	ifp->if_oerrors += oerrors; /* XXX should be ifq_oerrors */
3785}
3786
3787static int
3788eoip_up(struct eoip_softc *sc)
3789{
3790	if (sc->sc_tunnel.t_af == AF_UNSPEC)
3791		return (EDESTADDRREQ);
3792
3793	NET_ASSERT_LOCKED();
3794
3795	if (RBT_INSERT(eoip_tree, &eoip_tree, sc) != NULL)
3796		return (EADDRINUSE);
3797
3798	SET(sc->sc_ac.ac_if.if_flags, IFF_RUNNING);
3799
3800	if (sc->sc_ka_state != GRE_KA_NONE) {
3801		sc->sc_ka_holdmax = sc->sc_ka_count;
3802		eoip_keepalive_send(sc);
3803	}
3804
3805	return (0);
3806}
3807
3808static int
3809eoip_down(struct eoip_softc *sc)
3810{
3811	NET_ASSERT_LOCKED();
3812	CLR(sc->sc_ac.ac_if.if_flags, IFF_RUNNING);
3813
3814	if (sc->sc_ka_state != GRE_KA_NONE) {
3815		timeout_del_barrier(&sc->sc_ka_hold);
3816		timeout_del_barrier(&sc->sc_ka_send);
3817
3818		sc->sc_ka_state = GRE_KA_DOWN;
3819		gre_link_state(&sc->sc_ac.ac_if, sc->sc_ka_state);
3820	}
3821
3822	RBT_REMOVE(eoip_tree, &eoip_tree, sc);
3823
3824	return (0);
3825}
3826
3827static void
3828eoip_start(struct ifnet *ifp)
3829{
3830	struct eoip_softc *sc = ifp->if_softc;
3831	struct mbuf *m0, *m;
3832#if NBPFILTER > 0
3833	caddr_t if_bpf;
3834#endif
3835
3836	if (!gre_allow) {
3837		ifq_purge(&ifp->if_snd);
3838		return;
3839	}
3840
3841	while ((m0 = ifq_dequeue(&ifp->if_snd)) != NULL) {
3842#if NBPFILTER > 0
3843		if_bpf = ifp->if_bpf;
3844		if (if_bpf)
3845			bpf_mtap_ether(if_bpf, m0, BPF_DIRECTION_OUT);
3846#endif
3847
3848		/* force prepend mbuf because of alignment problems */
3849		m = m_get(M_DONTWAIT, m0->m_type);
3850		if (m == NULL) {
3851			m_freem(m0);
3852			continue;
3853		}
3854
3855		M_MOVE_PKTHDR(m, m0);
3856		m->m_next = m0;
3857
3858		m_align(m, 0);
3859		m->m_len = 0;
3860
3861		m = eoip_encap(sc, m, gre_l2_tos(&sc->sc_tunnel, m));
3862		if (m == NULL || gre_ip_output(&sc->sc_tunnel, m) != 0) {
3863			ifp->if_oerrors++;
3864			continue;
3865		}
3866	}
3867}
3868
3869static struct mbuf *
3870eoip_encap(struct eoip_softc *sc, struct mbuf *m, uint8_t tos)
3871{
3872	struct gre_header *gh;
3873	struct gre_h_key_eoip *eoiph;
3874	int len = m->m_pkthdr.len;
3875
3876	m = m_prepend(m, sizeof(*gh) + sizeof(*eoiph), M_DONTWAIT);
3877	if (m == NULL)
3878		return (NULL);
3879
3880	gh = mtod(m, struct gre_header *);
3881	gh->gre_flags = htons(GRE_VERS_1 | GRE_KP);
3882	gh->gre_proto = htons(GRE_EOIP);
3883
3884	eoiph = (struct gre_h_key_eoip *)(gh + 1);
3885	htobem16(&eoiph->eoip_len, len);
3886	eoiph->eoip_tunnel_id = sc->sc_tunnel_id;
3887
3888	return (gre_encap_ip(&sc->sc_tunnel, m, sc->sc_tunnel.t_ttl, tos));
3889}
3890
3891static void
3892eoip_keepalive_send(void *arg)
3893{
3894	struct eoip_softc *sc = arg;
3895	struct ifnet *ifp = &sc->sc_ac.ac_if;
3896	struct mbuf *m;
3897	int linkhdr;
3898
3899	if (!ISSET(ifp->if_flags, IFF_RUNNING))
3900		return;
3901
3902	/* this is really conservative */
3903#ifdef INET6
3904	linkhdr = max_linkhdr + MAX(sizeof(struct ip), sizeof(struct ip6_hdr)) +
3905	    sizeof(struct gre_header) + sizeof(struct gre_h_key_eoip);
3906#else
3907	linkhdr = max_linkhdr + sizeof(struct ip) +
3908	    sizeof(struct gre_header) + sizeof(struct gre_h_key_eoip);
3909#endif
3910	MGETHDR(m, M_DONTWAIT, MT_DATA);
3911	if (m == NULL)
3912		return;
3913
3914	if (linkhdr > MHLEN) {
3915		MCLGETL(m, M_DONTWAIT, linkhdr);
3916		if (!ISSET(m->m_flags, M_EXT)) {
3917			m_freem(m);
3918			return;
3919		}
3920	}
3921
3922	m->m_pkthdr.pf.prio = ifp->if_llprio;
3923	m->m_pkthdr.len = m->m_len = linkhdr;
3924	m_adj(m, linkhdr);
3925
3926	m = eoip_encap(sc, m, gre_l2_tos(&sc->sc_tunnel, m));
3927	if (m == NULL)
3928		return;
3929
3930	gre_ip_output(&sc->sc_tunnel, m);
3931
3932	timeout_add_sec(&sc->sc_ka_send, sc->sc_ka_timeo);
3933}
3934
3935static void
3936eoip_keepalive_hold(void *arg)
3937{
3938	struct eoip_softc *sc = arg;
3939	struct ifnet *ifp = &sc->sc_ac.ac_if;
3940
3941	if (!ISSET(ifp->if_flags, IFF_RUNNING))
3942		return;
3943
3944	NET_LOCK();
3945	sc->sc_ka_state = GRE_KA_DOWN;
3946	gre_link_state(ifp, sc->sc_ka_state);
3947	NET_UNLOCK();
3948}
3949
3950static void
3951eoip_keepalive_recv(struct eoip_softc *sc)
3952{
3953	switch (sc->sc_ka_state) {
3954	case GRE_KA_NONE:
3955		return;
3956	case GRE_KA_DOWN:
3957		sc->sc_ka_state = GRE_KA_HOLD;
3958		sc->sc_ka_holdcnt = sc->sc_ka_holdmax;
3959		sc->sc_ka_holdmax = MIN(sc->sc_ka_holdmax * 2,
3960		    16 * sc->sc_ka_count);
3961		break;
3962	case GRE_KA_HOLD:
3963		if (--sc->sc_ka_holdcnt > 0)
3964			break;
3965
3966		sc->sc_ka_state = GRE_KA_UP;
3967		gre_link_state(&sc->sc_ac.ac_if, sc->sc_ka_state);
3968		break;
3969
3970	case GRE_KA_UP:
3971		sc->sc_ka_holdmax--;
3972		sc->sc_ka_holdmax = MAX(sc->sc_ka_holdmax, sc->sc_ka_count);
3973		break;
3974	}
3975
3976	timeout_add_sec(&sc->sc_ka_hold, sc->sc_ka_timeo * sc->sc_ka_count);
3977}
3978
3979static struct mbuf *
3980eoip_input(struct gre_tunnel *key, struct mbuf *m,
3981    const struct gre_header *gh, uint8_t otos, int iphlen)
3982{
3983	struct eoip_softc *sc;
3984	struct gre_h_key_eoip *eoiph;
3985	int hlen, len;
3986	caddr_t buf;
3987
3988	if (gh->gre_flags != htons(GRE_KP | GRE_VERS_1))
3989		goto decline;
3990
3991	hlen = iphlen + sizeof(*gh) + sizeof(*eoiph);
3992	if (m->m_pkthdr.len < hlen)
3993		goto decline;
3994
3995	m = m_pullup(m, hlen);
3996	if (m == NULL)
3997		return (NULL);
3998
3999	buf = mtod(m, caddr_t);
4000	gh = (struct gre_header *)(buf + iphlen);
4001	eoiph = (struct gre_h_key_eoip *)(gh + 1);
4002
4003	key->t_key = eoiph->eoip_tunnel_id;
4004
4005	NET_ASSERT_LOCKED();
4006	sc = RBT_FIND(eoip_tree, &eoip_tree, (const struct eoip_softc *)key);
4007	if (sc == NULL)
4008		goto decline;
4009
4010	/* it's ours now */
4011	len = bemtoh16(&eoiph->eoip_len);
4012	if (len == 0) {
4013		eoip_keepalive_recv(sc);
4014		goto drop;
4015	}
4016
4017	m = gre_ether_align(m, hlen);
4018	if (m == NULL)
4019		return (NULL);
4020
4021	if (m->m_pkthdr.len < len)
4022		goto drop;
4023	if (m->m_pkthdr.len != len)
4024		m_adj(m, len - m->m_pkthdr.len);
4025
4026	m->m_flags &= ~(M_MCAST|M_BCAST);
4027
4028	gre_l2_prio(&sc->sc_tunnel, m, otos);
4029
4030	if_vinput(&sc->sc_ac.ac_if, m);
4031
4032	return (NULL);
4033
4034decline:
4035	return (m);
4036drop:
4037	m_freem(m);
4038	return (NULL);
4039}
4040
4041const struct sysctl_bounded_args gre_vars[] = {
4042	{ GRECTL_ALLOW, &gre_allow, 0, 1 },
4043	{ GRECTL_WCCP, &gre_wccp, 0, 1 },
4044};
4045
4046int
4047gre_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
4048    size_t newlen)
4049{
4050	int error;
4051
4052	NET_LOCK();
4053	error = sysctl_bounded_arr(gre_vars, nitems(gre_vars), name,
4054	    namelen, oldp, oldlenp, newp, newlen);
4055	NET_UNLOCK();
4056	return error;
4057}
4058
4059static inline int
4060gre_ip_cmp(int af, const union gre_addr *a, const union gre_addr *b)
4061{
4062	switch (af) {
4063#ifdef INET6
4064	case AF_INET6:
4065		return (memcmp(&a->in6, &b->in6, sizeof(a->in6)));
4066#endif /* INET6 */
4067	case AF_INET:
4068		return (memcmp(&a->in4, &b->in4, sizeof(a->in4)));
4069	default:
4070		unhandled_af(af);
4071	}
4072
4073	return (0);
4074}
4075
4076static int
4077gre_cmp_src(const struct gre_tunnel *a, const struct gre_tunnel *b)
4078{
4079	uint32_t ka, kb;
4080	uint32_t mask;
4081	int rv;
4082
4083	/* is K set at all? */
4084	ka = a->t_key_mask & GRE_KEY_ENTROPY;
4085	kb = b->t_key_mask & GRE_KEY_ENTROPY;
4086
4087	/* sort by whether K is set */
4088	if (ka > kb)
4089		return (1);
4090	if (ka < kb)
4091		return (-1);
4092
4093	/* is K set on both? */
4094	if (ka != GRE_KEY_NONE) {
4095		/* get common prefix */
4096		mask = a->t_key_mask & b->t_key_mask;
4097
4098		ka = a->t_key & mask;
4099		kb = b->t_key & mask;
4100
4101		/* sort by common prefix */
4102		if (ka > kb)
4103			return (1);
4104		if (ka < kb)
4105			return (-1);
4106	}
4107
4108	/* sort by routing table */
4109	if (a->t_rtableid > b->t_rtableid)
4110		return (1);
4111	if (a->t_rtableid < b->t_rtableid)
4112		return (-1);
4113
4114	/* sort by address */
4115	if (a->t_af > b->t_af)
4116		return (1);
4117	if (a->t_af < b->t_af)
4118		return (-1);
4119
4120	rv = gre_ip_cmp(a->t_af, &a->t_src, &b->t_src);
4121	if (rv != 0)
4122		return (rv);
4123
4124	return (0);
4125}
4126
4127static int
4128gre_cmp(const struct gre_tunnel *a, const struct gre_tunnel *b)
4129{
4130	int rv;
4131
4132	rv = gre_cmp_src(a, b);
4133	if (rv != 0)
4134		return (rv);
4135
4136	return (gre_ip_cmp(a->t_af, &a->t_dst, &b->t_dst));
4137}
4138
4139static inline int
4140mgre_cmp(const struct mgre_softc *a, const struct mgre_softc *b)
4141{
4142	return (gre_cmp_src(&a->sc_tunnel, &b->sc_tunnel));
4143}
4144
4145RBT_GENERATE(mgre_tree, mgre_softc, sc_entry, mgre_cmp);
4146
4147static inline int
4148egre_cmp(const struct egre_softc *a, const struct egre_softc *b)
4149{
4150	return (gre_cmp(&a->sc_tunnel, &b->sc_tunnel));
4151}
4152
4153RBT_GENERATE(egre_tree, egre_softc, sc_entry, egre_cmp);
4154
4155static int
4156nvgre_cmp_tunnel(const struct gre_tunnel *a, const struct gre_tunnel *b)
4157{
4158	uint32_t ka, kb;
4159
4160	ka = a->t_key & GRE_KEY_ENTROPY;
4161	kb = b->t_key & GRE_KEY_ENTROPY;
4162
4163	/* sort by common prefix */
4164	if (ka > kb)
4165		return (1);
4166	if (ka < kb)
4167		return (-1);
4168
4169	/* sort by routing table */
4170	if (a->t_rtableid > b->t_rtableid)
4171		return (1);
4172	if (a->t_rtableid < b->t_rtableid)
4173		return (-1);
4174
4175	/* sort by address */
4176	if (a->t_af > b->t_af)
4177		return (1);
4178	if (a->t_af < b->t_af)
4179		return (-1);
4180
4181	return (0);
4182}
4183
4184static inline int
4185nvgre_cmp_ucast(const struct nvgre_softc *na, const struct nvgre_softc *nb)
4186{
4187	const struct gre_tunnel *a = &na->sc_tunnel;
4188	const struct gre_tunnel *b = &nb->sc_tunnel;
4189	int rv;
4190
4191	rv = nvgre_cmp_tunnel(a, b);
4192	if (rv != 0)
4193		return (rv);
4194
4195	rv = gre_ip_cmp(a->t_af, &a->t_src, &b->t_src);
4196	if (rv != 0)
4197		return (rv);
4198
4199	return (0);
4200}
4201
4202static int
4203nvgre_cmp_mcast(const struct gre_tunnel *a, const union gre_addr *aa,
4204    unsigned int if0idxa, const struct gre_tunnel *b,
4205    const union gre_addr *ab,unsigned int if0idxb)
4206{
4207	int rv;
4208
4209	rv = nvgre_cmp_tunnel(a, b);
4210	if (rv != 0)
4211		return (rv);
4212
4213	rv = gre_ip_cmp(a->t_af, aa, ab);
4214	if (rv != 0)
4215		return (rv);
4216
4217	if (if0idxa > if0idxb)
4218		return (1);
4219	if (if0idxa < if0idxb)
4220		return (-1);
4221
4222	return (0);
4223}
4224
4225static inline int
4226nvgre_cmp_mcast_sc(const struct nvgre_softc *na, const struct nvgre_softc *nb)
4227{
4228	const struct gre_tunnel *a = &na->sc_tunnel;
4229	const struct gre_tunnel *b = &nb->sc_tunnel;
4230
4231	return (nvgre_cmp_mcast(a, &a->t_dst, na->sc_ifp0,
4232	    b, &b->t_dst, nb->sc_ifp0));
4233}
4234
4235RBT_GENERATE(nvgre_ucast_tree, nvgre_softc, sc_uentry, nvgre_cmp_ucast);
4236RBT_GENERATE(nvgre_mcast_tree, nvgre_softc, sc_mentry, nvgre_cmp_mcast_sc);
4237
4238static inline int
4239eoip_cmp(const struct eoip_softc *ea, const struct eoip_softc *eb)
4240{
4241	const struct gre_tunnel *a = &ea->sc_tunnel;
4242	const struct gre_tunnel *b = &eb->sc_tunnel;
4243	int rv;
4244
4245	if (a->t_key > b->t_key)
4246		return (1);
4247	if (a->t_key < b->t_key)
4248		return (-1);
4249
4250	/* sort by routing table */
4251	if (a->t_rtableid > b->t_rtableid)
4252		return (1);
4253	if (a->t_rtableid < b->t_rtableid)
4254		return (-1);
4255
4256	/* sort by address */
4257	if (a->t_af > b->t_af)
4258		return (1);
4259	if (a->t_af < b->t_af)
4260		return (-1);
4261
4262	rv = gre_ip_cmp(a->t_af, &a->t_src, &b->t_src);
4263	if (rv != 0)
4264		return (rv);
4265
4266	rv = gre_ip_cmp(a->t_af, &a->t_dst, &b->t_dst);
4267	if (rv != 0)
4268		return (rv);
4269
4270	return (0);
4271}
4272
4273RBT_GENERATE(eoip_tree, eoip_softc, sc_entry, eoip_cmp);
4274
4275static int
4276nvgre_eb_port_eq(void *arg, void *a, void *b)
4277{
4278	struct nvgre_softc *sc = arg;
4279
4280	return (gre_ip_cmp(sc->sc_tunnel.t_af, a, b) == 0);
4281}
4282
4283static void *
4284nvgre_eb_port_take(void *arg, void *port)
4285{
4286	union gre_addr *ea = port;
4287	union gre_addr *endpoint;
4288
4289	endpoint = pool_get(&nvgre_endpoint_pool, PR_NOWAIT);
4290	if (endpoint == NULL)
4291		return (NULL);
4292
4293	*endpoint = *ea;
4294
4295	return (endpoint);
4296}
4297
4298static void
4299nvgre_eb_port_rele(void *arg, void *port)
4300{
4301	union gre_addr *endpoint = port;
4302
4303	pool_put(&nvgre_endpoint_pool, endpoint);
4304}
4305
4306static size_t
4307nvgre_eb_port_ifname(void *arg, char *dst, size_t len, void *port)
4308{
4309	struct nvgre_softc *sc = arg;
4310
4311	return (strlcpy(dst, sc->sc_ac.ac_if.if_xname, len));
4312}
4313
4314static void
4315nvgre_eb_port_sa(void *arg, struct sockaddr_storage *ss, void *port)
4316{
4317	struct nvgre_softc *sc = arg;
4318	union gre_addr *endpoint = port;
4319
4320	switch (sc->sc_tunnel.t_af) {
4321	case AF_INET: {
4322		struct sockaddr_in *sin = (struct sockaddr_in *)ss;
4323
4324		sin->sin_len = sizeof(*sin);
4325		sin->sin_family = AF_INET;
4326		sin->sin_addr = endpoint->in4;
4327		break;
4328	}
4329#ifdef INET6
4330	case AF_INET6: {
4331		struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)ss;
4332
4333		sin6->sin6_len = sizeof(*sin6);
4334		sin6->sin6_family = AF_INET6;
4335		in6_recoverscope(sin6, &endpoint->in6);
4336
4337		break;
4338	}
4339#endif /* INET6 */
4340	default:
4341		unhandled_af(sc->sc_tunnel.t_af);
4342	}
4343}
4344