1/*	$NetBSD: if_mpls.c,v 1.8 2011/07/03 18:46:12 kefren Exp $ */
2
3/*
4 * Copyright (c) 2010 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Mihai Chelaru <kefren@NetBSD.org>
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32#include <sys/cdefs.h>
33__KERNEL_RCSID(0, "$NetBSD: if_mpls.c,v 1.8 2011/07/03 18:46:12 kefren Exp $");
34
35#include "opt_inet.h"
36#include "opt_mpls.h"
37
38#include <sys/param.h>
39
40#include <sys/errno.h>
41#include <sys/malloc.h>
42#include <sys/mbuf.h>
43#include <sys/sysctl.h>
44
45#include <net/bpf.h>
46#include <net/if.h>
47#include <net/if_types.h>
48#include <net/netisr.h>
49#include <net/route.h>
50
51#ifdef INET
52#include <netinet/in.h>
53#include <netinet/in_systm.h>
54#include <netinet/in_var.h>
55#include <netinet/ip.h>
56#endif
57
58#ifdef INET6
59#include <netinet/ip6.h>
60#include <netinet6/in6_var.h>
61#include <netinet6/ip6_var.h>
62#endif
63
64#include <netmpls/mpls.h>
65#include <netmpls/mpls_var.h>
66
67#include "if_mpls.h"
68
69void ifmplsattach(int);
70
71static int mpls_clone_create(struct if_clone *, int);
72static int mpls_clone_destroy(struct ifnet *);
73
74static struct if_clone mpls_if_cloner =
75	IF_CLONE_INITIALIZER("mpls", mpls_clone_create, mpls_clone_destroy);
76
77
78static void mpls_input(struct ifnet *, struct mbuf *);
79static int mpls_output(struct ifnet *, struct mbuf *, const struct sockaddr *,
80	struct rtentry *);
81static int mpls_ioctl(struct ifnet *, u_long, void *);
82static int mpls_send_frame(struct mbuf *, struct ifnet *, struct rtentry *);
83static int mpls_lse(struct mbuf *);
84
85#ifdef INET
86static int mpls_unlabel_inet(struct mbuf *);
87static struct mbuf *mpls_label_inet(struct mbuf *, union mpls_shim *, uint);
88#endif
89
90#ifdef INET6
91static int mpls_unlabel_inet6(struct mbuf *);
92static struct mbuf *mpls_label_inet6(struct mbuf *, union mpls_shim *, uint);
93#endif
94
95static struct mbuf *mpls_prepend_shim(struct mbuf *, union mpls_shim *);
96
97extern int mpls_defttl, mpls_mapttl_inet, mpls_mapttl_inet6, mpls_icmp_respond,
98	mpls_forwarding, mpls_accept, mpls_mapprec_inet, mpls_mapclass_inet6;
99
100/* ARGSUSED */
101void
102ifmplsattach(int count)
103{
104	if_clone_attach(&mpls_if_cloner);
105}
106
107static int
108mpls_clone_create(struct if_clone *ifc, int unit)
109{
110	struct mpls_softc *sc;
111
112	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK | M_ZERO);
113
114	if_initname(&sc->sc_if, ifc->ifc_name, unit);
115	sc->sc_if.if_softc = sc;
116	sc->sc_if.if_type = IFT_MPLS;
117	sc->sc_if.if_addrlen = 0;
118	sc->sc_if.if_hdrlen = sizeof(union mpls_shim);
119	sc->sc_if.if_dlt = DLT_NULL;
120	sc->sc_if.if_mtu = 1500;
121	sc->sc_if.if_flags = 0;
122	sc->sc_if.if_input = mpls_input;
123	sc->sc_if.if_output = mpls_output;
124	sc->sc_if.if_ioctl = mpls_ioctl;
125
126	if_attach(&sc->sc_if);
127	if_alloc_sadl(&sc->sc_if);
128	bpf_attach(&sc->sc_if, DLT_NULL, sizeof(uint32_t));
129	return 0;
130}
131
132static int
133mpls_clone_destroy(struct ifnet *ifp)
134{
135	int s;
136
137	bpf_detach(ifp);
138
139	s = splnet();
140	if_detach(ifp);
141	splx(s);
142
143	free(ifp->if_softc, M_DEVBUF);
144	return 0;
145}
146
147static void
148mpls_input(struct ifnet *ifp, struct mbuf *m)
149{
150#if 0
151	/*
152	 * TODO - kefren
153	 * I'd love to unshim the packet, guess family
154	 * and pass it to bpf
155	 */
156	bpf_mtap_af(ifp, AF_MPLS, m);
157#endif
158
159	mpls_lse(m);
160}
161
162void
163mplsintr(void)
164{
165	struct mbuf *m;
166	int s;
167
168	while (!IF_IS_EMPTY(&mplsintrq)) {
169		s = splnet();
170		IF_DEQUEUE(&mplsintrq, m);
171		splx(s);
172
173		if (!m)
174			return;
175
176		if (((m->m_flags & M_PKTHDR) == 0) ||
177		    (m->m_pkthdr.rcvif == 0))
178			panic("mplsintr(): no pkthdr or rcvif");
179
180#ifdef MBUFTRACE
181		m_claimm(m, &mpls_owner);
182#endif
183		mpls_input(m->m_pkthdr.rcvif, m);
184	}
185}
186
187/*
188 * prepend shim and deliver
189 */
190static int
191mpls_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, struct rtentry *rt)
192{
193	union mpls_shim mh, *pms;
194	struct rtentry *rt1;
195	int err;
196	uint psize = sizeof(struct sockaddr_mpls);
197
198	if ((ifp->if_flags & (IFF_UP|IFF_RUNNING)) != (IFF_UP|IFF_RUNNING)) {
199		m_freem(m);
200		return ENETDOWN;
201	}
202
203	if (rt_gettag(rt) == NULL || rt_gettag(rt)->sa_family != AF_MPLS) {
204		m_freem(m);
205		return EINVAL;
206	}
207
208	bpf_mtap_af(ifp, dst->sa_family, m);
209
210	memset(&mh, 0, sizeof(mh));
211	mh.s_addr = MPLS_GETSADDR(rt);
212	mh.shim.bos = 1;
213	mh.shim.exp = 0;
214	mh.shim.ttl = mpls_defttl;
215
216	pms = &((struct sockaddr_mpls*)rt_gettag(rt))->smpls_addr;
217
218	while (psize <= rt_gettag(rt)->sa_len - sizeof(mh)) {
219		pms++;
220		if (mh.shim.label != MPLS_LABEL_IMPLNULL &&
221		    ((m = mpls_prepend_shim(m, &mh)) == NULL))
222			return ENOBUFS;
223		memset(&mh, 0, sizeof(mh));
224		mh.s_addr = ntohl(pms->s_addr);
225		mh.shim.bos = mh.shim.exp = 0;
226		mh.shim.ttl = mpls_defttl;
227		psize += sizeof(mh);
228	}
229
230	switch(dst->sa_family) {
231#ifdef INET
232	case AF_INET:
233		m = mpls_label_inet(m, &mh, psize - sizeof(struct sockaddr_mpls));
234		break;
235#endif
236#ifdef INET6
237	case AF_INET6:
238		m = mpls_label_inet6(m, &mh, psize - sizeof(struct sockaddr_mpls));
239		break;
240#endif
241	default:
242		m = mpls_prepend_shim(m, &mh);
243		break;
244	}
245
246	if (m == NULL) {
247		IF_DROP(&ifp->if_snd);
248		ifp->if_oerrors++;
249		return ENOBUFS;
250	}
251
252	ifp->if_opackets++;
253	ifp->if_obytes += m->m_pkthdr.len;
254
255	if ((rt1=rtalloc1(rt->rt_gateway, 1)) == NULL) {
256		m_freem(m);
257		return EHOSTUNREACH;
258	}
259
260	err = mpls_send_frame(m, rt1->rt_ifp, rt);
261	RTFREE(rt1);
262	return err;
263}
264
265static int
266mpls_ioctl(struct ifnet *ifp, u_long cmd, void *data)
267{
268	int error = 0, s = splnet();
269	struct ifreq *ifr = data;
270
271	switch(cmd) {
272	case SIOCINITIFADDR:
273		ifp->if_flags |= IFF_UP | IFF_RUNNING;
274		break;
275	case SIOCSIFMTU:
276		if (ifr != NULL && ifr->ifr_mtu < 576) {
277			error = EINVAL;
278			break;
279		}
280		/* FALLTHROUGH */
281	case SIOCGIFMTU:
282		if ((error = ifioctl_common(ifp, cmd, data)) == ENETRESET)
283			error = 0;
284		break;
285	case SIOCSIFFLAGS:
286		if ((error = ifioctl_common(ifp, cmd, data)) != 0)
287			break;
288		if (ifp->if_flags & IFF_UP)
289			ifp->if_flags |= IFF_RUNNING;
290		break;
291	default:
292		error = ifioctl_common(ifp, cmd, data);
293		break;
294	}
295	splx(s);
296	return error;
297}
298
299/*
300 * MPLS Label Switch Engine
301 */
302static int
303mpls_lse(struct mbuf *m)
304{
305	struct sockaddr_mpls dst;
306	union mpls_shim tshim, *htag;
307	struct rtentry *rt = NULL;
308	int error = ENOBUFS;
309	uint psize = sizeof(struct sockaddr_mpls);
310
311	if (m->m_len < sizeof(union mpls_shim) &&
312	    (m = m_pullup(m, sizeof(union mpls_shim))) == NULL)
313		goto done;
314
315	dst.smpls_len = sizeof(struct sockaddr_mpls);
316	dst.smpls_family = AF_MPLS;
317	dst.smpls_addr.s_addr = ntohl(mtod(m, union mpls_shim *)->s_addr);
318
319	/* Check if we're accepting MPLS Frames */
320	error = EINVAL;
321	if (!mpls_accept)
322		goto done;
323
324	/* TTL decrement */
325	if ((m = mpls_ttl_dec(m)) == NULL)
326		goto done;
327
328	if (dst.smpls_addr.shim.label <= MPLS_LABEL_RESMAX) {
329		/* Don't swap reserved labels */
330		switch (dst.smpls_addr.shim.label) {
331#ifdef INET
332		case MPLS_LABEL_IPV4NULL:
333			/* Pop shim and push mbuf to IP stack */
334			if (dst.smpls_addr.shim.bos)
335				error = mpls_unlabel_inet(m);
336			break;
337#endif
338#ifdef INET6
339		case MPLS_LABEL_IPV6NULL:
340			/* Pop shim and push mbuf to IPv6 stack */
341			if (dst.smpls_addr.shim.bos)
342				error = mpls_unlabel_inet6(m);
343			break;
344#endif
345		case MPLS_LABEL_RTALERT:	/* Yeah, I'm all alerted */
346		case MPLS_LABEL_IMPLNULL:	/* This is logical only */
347		default:			/* Rest are not allowed */
348			break;
349		}
350		goto done;
351	}
352
353	/* Check if we should do MPLS forwarding */
354	error = EHOSTUNREACH;
355	if (!mpls_forwarding)
356		goto done;
357
358	/* Get a route to dst */
359	dst.smpls_addr.shim.ttl =
360	    dst.smpls_addr.shim.bos =
361	    dst.smpls_addr.shim.exp = 0;
362	dst.smpls_addr.s_addr = htonl(dst.smpls_addr.s_addr);
363	if ((rt = rtalloc1((const struct sockaddr*)&dst, 1)) == NULL)
364		goto done;
365
366	/* MPLS packet with no MPLS tagged route ? */
367	if ((rt->rt_flags & RTF_GATEWAY) == 0 ||
368	     rt_gettag(rt) == NULL ||
369	     rt_gettag(rt)->sa_family != AF_MPLS)
370		goto done;
371
372	tshim.s_addr = MPLS_GETSADDR(rt);
373
374	/* Swap labels */
375	if ((m->m_len < sizeof(union mpls_shim)) &&
376	    (m = m_pullup(m, sizeof(union mpls_shim))) == 0) {
377		error = ENOBUFS;
378		goto done;
379	}
380
381	/* Replace only the label */
382	htag = mtod(m, union mpls_shim *);
383	htag->s_addr = ntohl(htag->s_addr);
384	htag->shim.label = tshim.shim.label;
385	htag->s_addr = htonl(htag->s_addr);
386
387	/* check if there is anything more to prepend */
388	htag = &((struct sockaddr_mpls*)rt_gettag(rt))->smpls_addr;
389	while (psize <= rt_gettag(rt)->sa_len - sizeof(tshim)) {
390		htag++;
391		memset(&tshim, 0, sizeof(tshim));
392		tshim.s_addr = ntohl(htag->s_addr);
393		tshim.shim.bos = tshim.shim.exp = 0;
394		tshim.shim.ttl = mpls_defttl;
395		if (tshim.shim.label != MPLS_LABEL_IMPLNULL &&
396		    ((m = mpls_prepend_shim(m, &tshim)) == NULL))
397			return ENOBUFS;
398		psize += sizeof(tshim);
399	}
400
401	error = mpls_send_frame(m, rt->rt_ifp, rt);
402
403done:
404	if (error != 0 && m != NULL)
405		m_freem(m);
406	if (rt != NULL)
407		RTFREE(rt);
408
409	return error;
410}
411
412static int
413mpls_send_frame(struct mbuf *m, struct ifnet *ifp, struct rtentry *rt)
414{
415	union mpls_shim msh;
416
417	if ((rt->rt_flags & RTF_GATEWAY) == 0)
418		return EHOSTUNREACH;
419
420	rt->rt_use++;
421
422	msh.s_addr = MPLS_GETSADDR(rt);
423	if (msh.shim.label == MPLS_LABEL_IMPLNULL ||
424	    (m->m_flags & (M_MCAST | M_BCAST))) {
425		m_adj(m, sizeof(union mpls_shim));
426		m->m_pkthdr.csum_flags = 0;
427	}
428
429	switch(ifp->if_type) {
430	/* only these are supported for now */
431	case IFT_ETHER:
432	case IFT_TUNNEL:
433	case IFT_LOOP:
434		return (*ifp->if_output)(ifp, m, rt->rt_gateway, rt);
435		break;
436	default:
437		return ENETUNREACH;
438	}
439	return 0;
440}
441
442
443
444#ifdef INET
445static int
446mpls_unlabel_inet(struct mbuf *m)
447{
448	int s, iphlen;
449	struct ip *iph;
450	union mpls_shim *ms;
451	struct ifqueue *inq;
452
453	if (mpls_mapttl_inet || mpls_mapprec_inet) {
454
455		/* get shim info */
456		ms = mtod(m, union mpls_shim *);
457		ms->s_addr = ntohl(ms->s_addr);
458
459		/* and get rid of it */
460		m_adj(m, sizeof(union mpls_shim));
461
462		/* get ip header */
463		if (m->m_len < sizeof (struct ip) &&
464		    (m = m_pullup(m, sizeof(struct ip))) == NULL)
465			return ENOBUFS;
466		iph = mtod(m, struct ip *);
467		iphlen = iph->ip_hl << 2;
468
469		/* get it all */
470		if (m->m_len < iphlen) {
471			if ((m = m_pullup(m, iphlen)) == NULL)
472				return ENOBUFS;
473			iph = mtod(m, struct ip *);
474		}
475
476		/* check ipsum */
477		if (in_cksum(m, iphlen) != 0) {
478			m_freem(m);
479			return EINVAL;
480		}
481
482		/* set IP ttl from MPLS ttl */
483		if (mpls_mapttl_inet)
484			iph->ip_ttl = ms->shim.ttl;
485
486		/* set IP Precedence from MPLS Exp */
487		if (mpls_mapprec_inet) {
488			iph->ip_tos = (iph->ip_tos << 3) >> 3;
489			iph->ip_tos |= ms->shim.exp << 5;
490		}
491
492		/* reset ipsum because we modified TTL and TOS */
493		iph->ip_sum = 0;
494		iph->ip_sum = in_cksum(m, iphlen);
495	} else
496		m_adj(m, sizeof(union mpls_shim));
497
498	/* Put it on IP queue */
499	inq = &ipintrq;
500	s = splnet();
501	if (IF_QFULL(inq)) {
502		IF_DROP(inq);
503		splx(s);
504		m_freem(m);
505		return ENOBUFS;
506	}
507	IF_ENQUEUE(inq, m);
508	splx(s);
509	schednetisr(NETISR_IP);
510
511	return 0;
512}
513
514/*
515 * Prepend MPLS label
516 */
517static struct mbuf *
518mpls_label_inet(struct mbuf *m, union mpls_shim *ms, uint offset)
519{
520	struct ip iphdr;
521
522	if (mpls_mapttl_inet || mpls_mapprec_inet) {
523		if ((m->m_len < sizeof(struct ip)) &&
524		    (m = m_pullup(m, offset + sizeof(struct ip))) == 0)
525			return NULL; /* XXX */
526		m_copydata(m, offset, sizeof(struct ip), &iphdr);
527
528		/* Map TTL */
529		if (mpls_mapttl_inet)
530			ms->shim.ttl = iphdr.ip_ttl;
531
532		/* Copy IP precedence to EXP */
533		if (mpls_mapprec_inet)
534			ms->shim.exp = ((u_int8_t)iphdr.ip_tos) >> 5;
535	}
536
537	if ((m = mpls_prepend_shim(m, ms)) == NULL)
538		return NULL;
539
540	return m;
541}
542
543#endif	/* INET */
544
545#ifdef INET6
546
547static int
548mpls_unlabel_inet6(struct mbuf *m)
549{
550	struct ip6_hdr *ip6hdr;
551	union mpls_shim ms;
552	struct ifqueue *inq;
553	int s;
554
555	/* TODO: mapclass */
556	if (mpls_mapttl_inet6) {
557		ms.s_addr = ntohl(mtod(m, union mpls_shim *)->s_addr);
558		m_adj(m, sizeof(union mpls_shim));
559
560		if (m->m_len < sizeof (struct ip6_hdr) &&
561		    (m = m_pullup(m, sizeof(struct ip6_hdr))) == 0)
562			return ENOBUFS;
563		ip6hdr = mtod(m, struct ip6_hdr *);
564
565		/* Because we just decremented this in mpls_lse */
566		ip6hdr->ip6_hlim = ms.shim.ttl + 1;
567	} else
568		m_adj(m, sizeof(union mpls_shim));
569
570	/* Put it back on IPv6 stack */
571	schednetisr(NETISR_IPV6);
572	inq = &ip6intrq;
573	s = splnet();
574	if (IF_QFULL(inq)) {
575		IF_DROP(inq);
576		splx(s);
577		m_freem(m);
578		return ENOBUFS;
579	}
580
581	IF_ENQUEUE(inq, m);
582	splx(s);
583
584	return 0;
585}
586
587static struct mbuf *
588mpls_label_inet6(struct mbuf *m, union mpls_shim *ms, uint offset)
589{
590	struct ip6_hdr ip6h;
591
592	if (mpls_mapttl_inet6 || mpls_mapclass_inet6) {
593		if (m->m_len < sizeof(struct ip6_hdr) &&
594		    (m = m_pullup(m, offset + sizeof(struct ip6_hdr))) == 0)
595			return NULL;
596		m_copydata(m, offset, sizeof(struct ip6_hdr), &ip6h);
597
598		if (mpls_mapttl_inet6)
599			ms->shim.ttl = ip6h.ip6_hlim;
600
601		if (mpls_mapclass_inet6)
602			ms->shim.exp = ip6h.ip6_vfc << 1 >> 5;
603	}
604
605	if ((m = mpls_prepend_shim(m, ms)) == NULL)
606		return NULL;
607
608	return m;
609}
610
611#endif	/* INET6 */
612
613static struct mbuf *
614mpls_prepend_shim(struct mbuf *m, union mpls_shim *ms)
615{
616	union mpls_shim *shim;
617
618	M_PREPEND(m, sizeof(*ms), M_DONTWAIT);
619	if (m == NULL)
620		return NULL;
621
622	if (m->m_len < sizeof(union mpls_shim) &&
623	    (m = m_pullup(m, sizeof(union mpls_shim))) == 0)
624		return NULL;
625
626	shim = mtod(m, union mpls_shim *);
627
628	memcpy(shim, ms, sizeof(*shim));
629	shim->s_addr = htonl(shim->s_addr);
630
631	return m;
632}
633