1/*
2 * xfrm4_policy.c
3 *
4 * Changes:
5 *	Kazunori MIYAZAWA @USAGI
6 * 	YOSHIFUJI Hideaki @USAGI
7 *		Split up af-specific portion
8 *
9 */
10
11#include <linux/compiler.h>
12#include <linux/inetdevice.h>
13#include <net/xfrm.h>
14#include <net/ip.h>
15
16static struct dst_ops xfrm4_dst_ops;
17static struct xfrm_policy_afinfo xfrm4_policy_afinfo;
18
19static int xfrm4_dst_lookup(struct xfrm_dst **dst, struct flowi *fl)
20{
21	return __ip_route_output_key((struct rtable**)dst, fl);
22}
23
24static int xfrm4_get_saddr(xfrm_address_t *saddr, xfrm_address_t *daddr)
25{
26	struct rtable *rt;
27	struct flowi fl_tunnel = {
28		.nl_u = {
29			.ip4_u = {
30				.daddr = daddr->a4,
31			},
32		},
33	};
34
35	if (!xfrm4_dst_lookup((struct xfrm_dst **)&rt, &fl_tunnel)) {
36		saddr->a4 = rt->rt_src;
37		dst_release(&rt->u.dst);
38		return 0;
39	}
40	return -EHOSTUNREACH;
41}
42
43static struct dst_entry *
44__xfrm4_find_bundle(struct flowi *fl, struct xfrm_policy *policy)
45{
46	struct dst_entry *dst;
47
48	read_lock_bh(&policy->lock);
49	for (dst = policy->bundles; dst; dst = dst->next) {
50		struct xfrm_dst *xdst = (struct xfrm_dst*)dst;
51		if (xdst->u.rt.fl.oif == fl->oif &&
52		    xdst->u.rt.fl.fl4_dst == fl->fl4_dst &&
53		    xdst->u.rt.fl.fl4_src == fl->fl4_src &&
54		    xdst->u.rt.fl.fl4_tos == fl->fl4_tos &&
55		    xfrm_bundle_ok(policy, xdst, fl, AF_INET, 0)) {
56			dst_clone(dst);
57			break;
58		}
59	}
60	read_unlock_bh(&policy->lock);
61	return dst;
62}
63
64/* Allocate chain of dst_entry's, attach known xfrm's, calculate
65 * all the metrics... Shortly, bundle a bundle.
66 */
67
68static int
69__xfrm4_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int nx,
70		      struct flowi *fl, struct dst_entry **dst_p)
71{
72	struct dst_entry *dst, *dst_prev;
73	struct rtable *rt0 = (struct rtable*)(*dst_p);
74	struct rtable *rt = rt0;
75	struct flowi fl_tunnel = {
76		.nl_u = {
77			.ip4_u = {
78				.saddr = fl->fl4_src,
79				.daddr = fl->fl4_dst,
80				.tos = fl->fl4_tos
81			}
82		}
83	};
84	int i;
85	int err;
86	int header_len = 0;
87	int trailer_len = 0;
88
89	dst = dst_prev = NULL;
90	dst_hold(&rt->u.dst);
91
92	for (i = 0; i < nx; i++) {
93		struct dst_entry *dst1 = dst_alloc(&xfrm4_dst_ops);
94		struct xfrm_dst *xdst;
95
96		if (unlikely(dst1 == NULL)) {
97			err = -ENOBUFS;
98			dst_release(&rt->u.dst);
99			goto error;
100		}
101
102		if (!dst)
103			dst = dst1;
104		else {
105			dst_prev->child = dst1;
106			dst1->flags |= DST_NOHASH;
107			dst_clone(dst1);
108		}
109
110		xdst = (struct xfrm_dst *)dst1;
111		xdst->route = &rt->u.dst;
112		xdst->genid = xfrm[i]->genid;
113
114		dst1->next = dst_prev;
115		dst_prev = dst1;
116
117		header_len += xfrm[i]->props.header_len;
118		trailer_len += xfrm[i]->props.trailer_len;
119
120		if (xfrm[i]->props.mode == XFRM_MODE_TUNNEL) {
121			unsigned short encap_family = xfrm[i]->props.family;
122			switch (encap_family) {
123			case AF_INET:
124				fl_tunnel.fl4_dst = xfrm[i]->id.daddr.a4;
125				fl_tunnel.fl4_src = xfrm[i]->props.saddr.a4;
126				break;
127#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
128			case AF_INET6:
129				ipv6_addr_copy(&fl_tunnel.fl6_dst, (struct in6_addr*)&xfrm[i]->id.daddr.a6);
130				ipv6_addr_copy(&fl_tunnel.fl6_src, (struct in6_addr*)&xfrm[i]->props.saddr.a6);
131				break;
132#endif
133			default:
134				BUG_ON(1);
135			}
136			err = xfrm_dst_lookup((struct xfrm_dst **)&rt,
137					      &fl_tunnel, encap_family);
138			if (err)
139				goto error;
140		} else
141			dst_hold(&rt->u.dst);
142	}
143
144	dst_prev->child = &rt->u.dst;
145	dst->path = &rt->u.dst;
146
147	*dst_p = dst;
148	dst = dst_prev;
149
150	dst_prev = *dst_p;
151	i = 0;
152	for (; dst_prev != &rt->u.dst; dst_prev = dst_prev->child) {
153		struct xfrm_dst *x = (struct xfrm_dst*)dst_prev;
154		struct xfrm_state_afinfo *afinfo;
155		x->u.rt.fl = *fl;
156
157		dst_prev->xfrm = xfrm[i++];
158		dst_prev->dev = rt->u.dst.dev;
159		if (rt->u.dst.dev)
160			dev_hold(rt->u.dst.dev);
161		dst_prev->obsolete	= -1;
162		dst_prev->flags	       |= DST_HOST;
163		dst_prev->lastuse	= jiffies;
164		dst_prev->header_len	= header_len;
165		dst_prev->nfheader_len	= 0;
166		dst_prev->trailer_len	= trailer_len;
167		memcpy(&dst_prev->metrics, &x->route->metrics, sizeof(dst_prev->metrics));
168
169		/* Copy neighbout for reachability confirmation */
170		dst_prev->neighbour	= neigh_clone(rt->u.dst.neighbour);
171		dst_prev->input		= rt->u.dst.input;
172		afinfo = xfrm_state_get_afinfo(dst_prev->xfrm->props.family);
173		if (!afinfo) {
174			dst = *dst_p;
175			err = -EAFNOSUPPORT;
176			goto error;
177		}
178		dst_prev->output = afinfo->output;
179		xfrm_state_put_afinfo(afinfo);
180		if (dst_prev->xfrm->props.family == AF_INET && rt->peer)
181			atomic_inc(&rt->peer->refcnt);
182		x->u.rt.peer = rt->peer;
183		/* Sheit... I remember I did this right. Apparently,
184		 * it was magically lost, so this code needs audit */
185		x->u.rt.rt_flags = rt0->rt_flags&(RTCF_BROADCAST|RTCF_MULTICAST|RTCF_LOCAL);
186		x->u.rt.rt_type = rt->rt_type;
187		x->u.rt.rt_src = rt0->rt_src;
188		x->u.rt.rt_dst = rt0->rt_dst;
189		x->u.rt.rt_gateway = rt->rt_gateway;
190		x->u.rt.rt_spec_dst = rt0->rt_spec_dst;
191		x->u.rt.idev = rt0->idev;
192		in_dev_hold(rt0->idev);
193		header_len -= x->u.dst.xfrm->props.header_len;
194		trailer_len -= x->u.dst.xfrm->props.trailer_len;
195	}
196
197	xfrm_init_pmtu(dst);
198	return 0;
199
200error:
201	if (dst)
202		dst_free(dst);
203	return err;
204}
205
206static void
207_decode_session4(struct sk_buff *skb, struct flowi *fl)
208{
209	struct iphdr *iph = ip_hdr(skb);
210	u8 *xprth = skb_network_header(skb) + iph->ihl * 4;
211
212	memset(fl, 0, sizeof(struct flowi));
213	if (!(iph->frag_off & htons(IP_MF | IP_OFFSET))) {
214		switch (iph->protocol) {
215		case IPPROTO_UDP:
216		case IPPROTO_UDPLITE:
217		case IPPROTO_TCP:
218		case IPPROTO_SCTP:
219		case IPPROTO_DCCP:
220			if (pskb_may_pull(skb, xprth + 4 - skb->data)) {
221				__be16 *ports = (__be16 *)xprth;
222
223				fl->fl_ip_sport = ports[0];
224				fl->fl_ip_dport = ports[1];
225			}
226			break;
227
228		case IPPROTO_ICMP:
229			if (pskb_may_pull(skb, xprth + 2 - skb->data)) {
230				u8 *icmp = xprth;
231
232				fl->fl_icmp_type = icmp[0];
233				fl->fl_icmp_code = icmp[1];
234			}
235			break;
236
237		case IPPROTO_ESP:
238			if (pskb_may_pull(skb, xprth + 4 - skb->data)) {
239				__be32 *ehdr = (__be32 *)xprth;
240
241				fl->fl_ipsec_spi = ehdr[0];
242			}
243			break;
244
245		case IPPROTO_AH:
246			if (pskb_may_pull(skb, xprth + 8 - skb->data)) {
247				__be32 *ah_hdr = (__be32*)xprth;
248
249				fl->fl_ipsec_spi = ah_hdr[1];
250			}
251			break;
252
253		case IPPROTO_COMP:
254			if (pskb_may_pull(skb, xprth + 4 - skb->data)) {
255				__be16 *ipcomp_hdr = (__be16 *)xprth;
256
257				fl->fl_ipsec_spi = htonl(ntohs(ipcomp_hdr[1]));
258			}
259			break;
260		default:
261			fl->fl_ipsec_spi = 0;
262			break;
263		}
264	}
265	fl->proto = iph->protocol;
266	fl->fl4_dst = iph->daddr;
267	fl->fl4_src = iph->saddr;
268	fl->fl4_tos = iph->tos;
269}
270
271static inline int xfrm4_garbage_collect(void)
272{
273	xfrm4_policy_afinfo.garbage_collect();
274	return (atomic_read(&xfrm4_dst_ops.entries) > xfrm4_dst_ops.gc_thresh*2);
275}
276
277static void xfrm4_update_pmtu(struct dst_entry *dst, u32 mtu)
278{
279	struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
280	struct dst_entry *path = xdst->route;
281
282	path->ops->update_pmtu(path, mtu);
283}
284
285static void xfrm4_dst_destroy(struct dst_entry *dst)
286{
287	struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
288
289	if (likely(xdst->u.rt.idev))
290		in_dev_put(xdst->u.rt.idev);
291	if (dst->xfrm && dst->xfrm->props.family == AF_INET && likely(xdst->u.rt.peer))
292		inet_putpeer(xdst->u.rt.peer);
293	xfrm_dst_destroy(xdst);
294}
295
296static void xfrm4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
297			     int unregister)
298{
299	struct xfrm_dst *xdst;
300
301	if (!unregister)
302		return;
303
304	xdst = (struct xfrm_dst *)dst;
305	if (xdst->u.rt.idev->dev == dev) {
306		struct in_device *loopback_idev = in_dev_get(&loopback_dev);
307		BUG_ON(!loopback_idev);
308
309		do {
310			in_dev_put(xdst->u.rt.idev);
311			xdst->u.rt.idev = loopback_idev;
312			in_dev_hold(loopback_idev);
313			xdst = (struct xfrm_dst *)xdst->u.dst.child;
314		} while (xdst->u.dst.xfrm);
315
316		__in_dev_put(loopback_idev);
317	}
318
319	xfrm_dst_ifdown(dst, dev);
320}
321
322static struct dst_ops xfrm4_dst_ops = {
323	.family =		AF_INET,
324	.protocol =		__constant_htons(ETH_P_IP),
325	.gc =			xfrm4_garbage_collect,
326	.update_pmtu =		xfrm4_update_pmtu,
327	.destroy =		xfrm4_dst_destroy,
328	.ifdown =		xfrm4_dst_ifdown,
329	.gc_thresh =		1024,
330	.entry_size =		sizeof(struct xfrm_dst),
331};
332
333static struct xfrm_policy_afinfo xfrm4_policy_afinfo = {
334	.family = 		AF_INET,
335	.dst_ops =		&xfrm4_dst_ops,
336	.dst_lookup =		xfrm4_dst_lookup,
337	.get_saddr =		xfrm4_get_saddr,
338	.find_bundle = 		__xfrm4_find_bundle,
339	.bundle_create =	__xfrm4_bundle_create,
340	.decode_session =	_decode_session4,
341};
342
343static void __init xfrm4_policy_init(void)
344{
345	xfrm_policy_register_afinfo(&xfrm4_policy_afinfo);
346}
347
348static void __exit xfrm4_policy_fini(void)
349{
350	xfrm_policy_unregister_afinfo(&xfrm4_policy_afinfo);
351}
352
353void __init xfrm4_init(void)
354{
355	xfrm4_state_init();
356	xfrm4_policy_init();
357}
358