1// SPDX-License-Identifier: GPL-2.0-or-later
2/* L2TPv3 IP encapsulation support
3 *
4 * Copyright (c) 2008,2009,2010 Katalix Systems Ltd
5 */
6
7#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
8
9#include <asm/ioctls.h>
10#include <linux/icmp.h>
11#include <linux/module.h>
12#include <linux/skbuff.h>
13#include <linux/random.h>
14#include <linux/socket.h>
15#include <linux/l2tp.h>
16#include <linux/in.h>
17#include <net/sock.h>
18#include <net/ip.h>
19#include <net/icmp.h>
20#include <net/udp.h>
21#include <net/inet_common.h>
22#include <net/tcp_states.h>
23#include <net/protocol.h>
24#include <net/xfrm.h>
25
26#include "l2tp_core.h"
27
28struct l2tp_ip_sock {
29	/* inet_sock has to be the first member of l2tp_ip_sock */
30	struct inet_sock	inet;
31
32	u32			conn_id;
33	u32			peer_conn_id;
34};
35
36static DEFINE_RWLOCK(l2tp_ip_lock);
37static struct hlist_head l2tp_ip_table;
38static struct hlist_head l2tp_ip_bind_table;
39
40static inline struct l2tp_ip_sock *l2tp_ip_sk(const struct sock *sk)
41{
42	return (struct l2tp_ip_sock *)sk;
43}
44
45static struct sock *__l2tp_ip_bind_lookup(const struct net *net, __be32 laddr,
46					  __be32 raddr, int dif, u32 tunnel_id)
47{
48	struct sock *sk;
49
50	sk_for_each_bound(sk, &l2tp_ip_bind_table) {
51		const struct l2tp_ip_sock *l2tp = l2tp_ip_sk(sk);
52		const struct inet_sock *inet = inet_sk(sk);
53		int bound_dev_if;
54
55		if (!net_eq(sock_net(sk), net))
56			continue;
57
58		bound_dev_if = READ_ONCE(sk->sk_bound_dev_if);
59		if (bound_dev_if && dif && bound_dev_if != dif)
60			continue;
61
62		if (inet->inet_rcv_saddr && laddr &&
63		    inet->inet_rcv_saddr != laddr)
64			continue;
65
66		if (inet->inet_daddr && raddr && inet->inet_daddr != raddr)
67			continue;
68
69		if (l2tp->conn_id != tunnel_id)
70			continue;
71
72		goto found;
73	}
74
75	sk = NULL;
76found:
77	return sk;
78}
79
80/* When processing receive frames, there are two cases to
81 * consider. Data frames consist of a non-zero session-id and an
82 * optional cookie. Control frames consist of a regular L2TP header
83 * preceded by 32-bits of zeros.
84 *
85 * L2TPv3 Session Header Over IP
86 *
87 *  0                   1                   2                   3
88 *  0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
89 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
90 * |                           Session ID                          |
91 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
92 * |               Cookie (optional, maximum 64 bits)...
93 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
94 *                                                                 |
95 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
96 *
97 * L2TPv3 Control Message Header Over IP
98 *
99 *  0                   1                   2                   3
100 *  0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
101 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
102 * |                      (32 bits of zeros)                       |
103 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
104 * |T|L|x|x|S|x|x|x|x|x|x|x|  Ver  |             Length            |
105 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
106 * |                     Control Connection ID                     |
107 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
108 * |               Ns              |               Nr              |
109 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
110 *
111 * All control frames are passed to userspace.
112 */
113static int l2tp_ip_recv(struct sk_buff *skb)
114{
115	struct net *net = dev_net(skb->dev);
116	struct sock *sk;
117	u32 session_id;
118	u32 tunnel_id;
119	unsigned char *ptr, *optr;
120	struct l2tp_session *session;
121	struct l2tp_tunnel *tunnel = NULL;
122	struct iphdr *iph;
123
124	if (!pskb_may_pull(skb, 4))
125		goto discard;
126
127	/* Point to L2TP header */
128	optr = skb->data;
129	ptr = skb->data;
130	session_id = ntohl(*((__be32 *)ptr));
131	ptr += 4;
132
133	/* RFC3931: L2TP/IP packets have the first 4 bytes containing
134	 * the session_id. If it is 0, the packet is a L2TP control
135	 * frame and the session_id value can be discarded.
136	 */
137	if (session_id == 0) {
138		__skb_pull(skb, 4);
139		goto pass_up;
140	}
141
142	/* Ok, this is a data packet. Lookup the session. */
143	session = l2tp_session_get(net, session_id);
144	if (!session)
145		goto discard;
146
147	tunnel = session->tunnel;
148	if (!tunnel)
149		goto discard_sess;
150
151	if (l2tp_v3_ensure_opt_in_linear(session, skb, &ptr, &optr))
152		goto discard_sess;
153
154	l2tp_recv_common(session, skb, ptr, optr, 0, skb->len);
155	l2tp_session_dec_refcount(session);
156
157	return 0;
158
159pass_up:
160	/* Get the tunnel_id from the L2TP header */
161	if (!pskb_may_pull(skb, 12))
162		goto discard;
163
164	if ((skb->data[0] & 0xc0) != 0xc0)
165		goto discard;
166
167	tunnel_id = ntohl(*(__be32 *)&skb->data[4]);
168	iph = (struct iphdr *)skb_network_header(skb);
169
170	read_lock_bh(&l2tp_ip_lock);
171	sk = __l2tp_ip_bind_lookup(net, iph->daddr, iph->saddr, inet_iif(skb),
172				   tunnel_id);
173	if (!sk) {
174		read_unlock_bh(&l2tp_ip_lock);
175		goto discard;
176	}
177	sock_hold(sk);
178	read_unlock_bh(&l2tp_ip_lock);
179
180	if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
181		goto discard_put;
182
183	nf_reset_ct(skb);
184
185	return sk_receive_skb(sk, skb, 1);
186
187discard_sess:
188	l2tp_session_dec_refcount(session);
189	goto discard;
190
191discard_put:
192	sock_put(sk);
193
194discard:
195	kfree_skb(skb);
196	return 0;
197}
198
199static int l2tp_ip_hash(struct sock *sk)
200{
201	if (sk_unhashed(sk)) {
202		write_lock_bh(&l2tp_ip_lock);
203		sk_add_node(sk, &l2tp_ip_table);
204		write_unlock_bh(&l2tp_ip_lock);
205	}
206	return 0;
207}
208
209static void l2tp_ip_unhash(struct sock *sk)
210{
211	if (sk_unhashed(sk))
212		return;
213	write_lock_bh(&l2tp_ip_lock);
214	sk_del_node_init(sk);
215	write_unlock_bh(&l2tp_ip_lock);
216}
217
218static int l2tp_ip_open(struct sock *sk)
219{
220	/* Prevent autobind. We don't have ports. */
221	inet_sk(sk)->inet_num = IPPROTO_L2TP;
222
223	l2tp_ip_hash(sk);
224	return 0;
225}
226
227static void l2tp_ip_close(struct sock *sk, long timeout)
228{
229	write_lock_bh(&l2tp_ip_lock);
230	hlist_del_init(&sk->sk_bind_node);
231	sk_del_node_init(sk);
232	write_unlock_bh(&l2tp_ip_lock);
233	sk_common_release(sk);
234}
235
236static void l2tp_ip_destroy_sock(struct sock *sk)
237{
238	struct l2tp_tunnel *tunnel = l2tp_sk_to_tunnel(sk);
239	struct sk_buff *skb;
240
241	while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL)
242		kfree_skb(skb);
243
244	if (tunnel)
245		l2tp_tunnel_delete(tunnel);
246}
247
248static int l2tp_ip_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
249{
250	struct inet_sock *inet = inet_sk(sk);
251	struct sockaddr_l2tpip *addr = (struct sockaddr_l2tpip *)uaddr;
252	struct net *net = sock_net(sk);
253	int ret;
254	int chk_addr_ret;
255
256	if (addr_len < sizeof(struct sockaddr_l2tpip))
257		return -EINVAL;
258	if (addr->l2tp_family != AF_INET)
259		return -EINVAL;
260
261	lock_sock(sk);
262
263	ret = -EINVAL;
264	if (!sock_flag(sk, SOCK_ZAPPED))
265		goto out;
266
267	if (sk->sk_state != TCP_CLOSE)
268		goto out;
269
270	chk_addr_ret = inet_addr_type(net, addr->l2tp_addr.s_addr);
271	ret = -EADDRNOTAVAIL;
272	if (addr->l2tp_addr.s_addr && chk_addr_ret != RTN_LOCAL &&
273	    chk_addr_ret != RTN_MULTICAST && chk_addr_ret != RTN_BROADCAST)
274		goto out;
275
276	if (addr->l2tp_addr.s_addr) {
277		inet->inet_rcv_saddr = addr->l2tp_addr.s_addr;
278		inet->inet_saddr = addr->l2tp_addr.s_addr;
279	}
280	if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST)
281		inet->inet_saddr = 0;  /* Use device */
282
283	write_lock_bh(&l2tp_ip_lock);
284	if (__l2tp_ip_bind_lookup(net, addr->l2tp_addr.s_addr, 0,
285				  sk->sk_bound_dev_if, addr->l2tp_conn_id)) {
286		write_unlock_bh(&l2tp_ip_lock);
287		ret = -EADDRINUSE;
288		goto out;
289	}
290
291	sk_dst_reset(sk);
292	l2tp_ip_sk(sk)->conn_id = addr->l2tp_conn_id;
293
294	sk_add_bind_node(sk, &l2tp_ip_bind_table);
295	sk_del_node_init(sk);
296	write_unlock_bh(&l2tp_ip_lock);
297
298	ret = 0;
299	sock_reset_flag(sk, SOCK_ZAPPED);
300
301out:
302	release_sock(sk);
303
304	return ret;
305}
306
307static int l2tp_ip_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
308{
309	struct sockaddr_l2tpip *lsa = (struct sockaddr_l2tpip *)uaddr;
310	int rc;
311
312	if (addr_len < sizeof(*lsa))
313		return -EINVAL;
314
315	if (ipv4_is_multicast(lsa->l2tp_addr.s_addr))
316		return -EINVAL;
317
318	lock_sock(sk);
319
320	/* Must bind first - autobinding does not work */
321	if (sock_flag(sk, SOCK_ZAPPED)) {
322		rc = -EINVAL;
323		goto out_sk;
324	}
325
326	rc = __ip4_datagram_connect(sk, uaddr, addr_len);
327	if (rc < 0)
328		goto out_sk;
329
330	l2tp_ip_sk(sk)->peer_conn_id = lsa->l2tp_conn_id;
331
332	write_lock_bh(&l2tp_ip_lock);
333	hlist_del_init(&sk->sk_bind_node);
334	sk_add_bind_node(sk, &l2tp_ip_bind_table);
335	write_unlock_bh(&l2tp_ip_lock);
336
337out_sk:
338	release_sock(sk);
339
340	return rc;
341}
342
343static int l2tp_ip_disconnect(struct sock *sk, int flags)
344{
345	if (sock_flag(sk, SOCK_ZAPPED))
346		return 0;
347
348	return __udp_disconnect(sk, flags);
349}
350
351static int l2tp_ip_getname(struct socket *sock, struct sockaddr *uaddr,
352			   int peer)
353{
354	struct sock *sk		= sock->sk;
355	struct inet_sock *inet	= inet_sk(sk);
356	struct l2tp_ip_sock *lsk = l2tp_ip_sk(sk);
357	struct sockaddr_l2tpip *lsa = (struct sockaddr_l2tpip *)uaddr;
358
359	memset(lsa, 0, sizeof(*lsa));
360	lsa->l2tp_family = AF_INET;
361	if (peer) {
362		if (!inet->inet_dport)
363			return -ENOTCONN;
364		lsa->l2tp_conn_id = lsk->peer_conn_id;
365		lsa->l2tp_addr.s_addr = inet->inet_daddr;
366	} else {
367		__be32 addr = inet->inet_rcv_saddr;
368
369		if (!addr)
370			addr = inet->inet_saddr;
371		lsa->l2tp_conn_id = lsk->conn_id;
372		lsa->l2tp_addr.s_addr = addr;
373	}
374	return sizeof(*lsa);
375}
376
377static int l2tp_ip_backlog_recv(struct sock *sk, struct sk_buff *skb)
378{
379	int rc;
380
381	/* Charge it to the socket, dropping if the queue is full. */
382	rc = sock_queue_rcv_skb(sk, skb);
383	if (rc < 0)
384		goto drop;
385
386	return 0;
387
388drop:
389	IP_INC_STATS(sock_net(sk), IPSTATS_MIB_INDISCARDS);
390	kfree_skb(skb);
391	return 0;
392}
393
394/* Userspace will call sendmsg() on the tunnel socket to send L2TP
395 * control frames.
396 */
397static int l2tp_ip_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
398{
399	struct sk_buff *skb;
400	int rc;
401	struct inet_sock *inet = inet_sk(sk);
402	struct rtable *rt = NULL;
403	struct flowi4 *fl4;
404	int connected = 0;
405	__be32 daddr;
406
407	lock_sock(sk);
408
409	rc = -ENOTCONN;
410	if (sock_flag(sk, SOCK_DEAD))
411		goto out;
412
413	/* Get and verify the address. */
414	if (msg->msg_name) {
415		DECLARE_SOCKADDR(struct sockaddr_l2tpip *, lip, msg->msg_name);
416
417		rc = -EINVAL;
418		if (msg->msg_namelen < sizeof(*lip))
419			goto out;
420
421		if (lip->l2tp_family != AF_INET) {
422			rc = -EAFNOSUPPORT;
423			if (lip->l2tp_family != AF_UNSPEC)
424				goto out;
425		}
426
427		daddr = lip->l2tp_addr.s_addr;
428	} else {
429		rc = -EDESTADDRREQ;
430		if (sk->sk_state != TCP_ESTABLISHED)
431			goto out;
432
433		daddr = inet->inet_daddr;
434		connected = 1;
435	}
436
437	/* Allocate a socket buffer */
438	rc = -ENOMEM;
439	skb = sock_wmalloc(sk, 2 + NET_SKB_PAD + sizeof(struct iphdr) +
440			   4 + len, 0, GFP_KERNEL);
441	if (!skb)
442		goto error;
443
444	/* Reserve space for headers, putting IP header on 4-byte boundary. */
445	skb_reserve(skb, 2 + NET_SKB_PAD);
446	skb_reset_network_header(skb);
447	skb_reserve(skb, sizeof(struct iphdr));
448	skb_reset_transport_header(skb);
449
450	/* Insert 0 session_id */
451	*((__be32 *)skb_put(skb, 4)) = 0;
452
453	/* Copy user data into skb */
454	rc = memcpy_from_msg(skb_put(skb, len), msg, len);
455	if (rc < 0) {
456		kfree_skb(skb);
457		goto error;
458	}
459
460	fl4 = &inet->cork.fl.u.ip4;
461	if (connected)
462		rt = (struct rtable *)__sk_dst_check(sk, 0);
463
464	rcu_read_lock();
465	if (!rt) {
466		const struct ip_options_rcu *inet_opt;
467
468		inet_opt = rcu_dereference(inet->inet_opt);
469
470		/* Use correct destination address if we have options. */
471		if (inet_opt && inet_opt->opt.srr)
472			daddr = inet_opt->opt.faddr;
473
474		/* If this fails, retransmit mechanism of transport layer will
475		 * keep trying until route appears or the connection times
476		 * itself out.
477		 */
478		rt = ip_route_output_ports(sock_net(sk), fl4, sk,
479					   daddr, inet->inet_saddr,
480					   inet->inet_dport, inet->inet_sport,
481					   sk->sk_protocol, ip_sock_rt_tos(sk),
482					   sk->sk_bound_dev_if);
483		if (IS_ERR(rt))
484			goto no_route;
485		if (connected) {
486			sk_setup_caps(sk, &rt->dst);
487		} else {
488			skb_dst_set(skb, &rt->dst);
489			goto xmit;
490		}
491	}
492
493	/* We don't need to clone dst here, it is guaranteed to not disappear.
494	 *  __dev_xmit_skb() might force a refcount if needed.
495	 */
496	skb_dst_set_noref(skb, &rt->dst);
497
498xmit:
499	/* Queue the packet to IP for output */
500	rc = ip_queue_xmit(sk, skb, &inet->cork.fl);
501	rcu_read_unlock();
502
503error:
504	if (rc >= 0)
505		rc = len;
506
507out:
508	release_sock(sk);
509	return rc;
510
511no_route:
512	rcu_read_unlock();
513	IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
514	kfree_skb(skb);
515	rc = -EHOSTUNREACH;
516	goto out;
517}
518
519static int l2tp_ip_recvmsg(struct sock *sk, struct msghdr *msg,
520			   size_t len, int flags, int *addr_len)
521{
522	struct inet_sock *inet = inet_sk(sk);
523	size_t copied = 0;
524	int err = -EOPNOTSUPP;
525	DECLARE_SOCKADDR(struct sockaddr_in *, sin, msg->msg_name);
526	struct sk_buff *skb;
527
528	if (flags & MSG_OOB)
529		goto out;
530
531	skb = skb_recv_datagram(sk, flags, &err);
532	if (!skb)
533		goto out;
534
535	copied = skb->len;
536	if (len < copied) {
537		msg->msg_flags |= MSG_TRUNC;
538		copied = len;
539	}
540
541	err = skb_copy_datagram_msg(skb, 0, msg, copied);
542	if (err)
543		goto done;
544
545	sock_recv_timestamp(msg, sk, skb);
546
547	/* Copy the address. */
548	if (sin) {
549		sin->sin_family = AF_INET;
550		sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
551		sin->sin_port = 0;
552		memset(&sin->sin_zero, 0, sizeof(sin->sin_zero));
553		*addr_len = sizeof(*sin);
554	}
555	if (inet_cmsg_flags(inet))
556		ip_cmsg_recv(msg, skb);
557	if (flags & MSG_TRUNC)
558		copied = skb->len;
559done:
560	skb_free_datagram(sk, skb);
561out:
562	return err ? err : copied;
563}
564
565int l2tp_ioctl(struct sock *sk, int cmd, int *karg)
566{
567	struct sk_buff *skb;
568
569	switch (cmd) {
570	case SIOCOUTQ:
571		*karg = sk_wmem_alloc_get(sk);
572		break;
573	case SIOCINQ:
574		spin_lock_bh(&sk->sk_receive_queue.lock);
575		skb = skb_peek(&sk->sk_receive_queue);
576		*karg = skb ? skb->len : 0;
577		spin_unlock_bh(&sk->sk_receive_queue.lock);
578		break;
579
580	default:
581		return -ENOIOCTLCMD;
582	}
583
584	return 0;
585}
586EXPORT_SYMBOL_GPL(l2tp_ioctl);
587
588static struct proto l2tp_ip_prot = {
589	.name		   = "L2TP/IP",
590	.owner		   = THIS_MODULE,
591	.init		   = l2tp_ip_open,
592	.close		   = l2tp_ip_close,
593	.bind		   = l2tp_ip_bind,
594	.connect	   = l2tp_ip_connect,
595	.disconnect	   = l2tp_ip_disconnect,
596	.ioctl		   = l2tp_ioctl,
597	.destroy	   = l2tp_ip_destroy_sock,
598	.setsockopt	   = ip_setsockopt,
599	.getsockopt	   = ip_getsockopt,
600	.sendmsg	   = l2tp_ip_sendmsg,
601	.recvmsg	   = l2tp_ip_recvmsg,
602	.backlog_rcv	   = l2tp_ip_backlog_recv,
603	.hash		   = l2tp_ip_hash,
604	.unhash		   = l2tp_ip_unhash,
605	.obj_size	   = sizeof(struct l2tp_ip_sock),
606};
607
608static const struct proto_ops l2tp_ip_ops = {
609	.family		   = PF_INET,
610	.owner		   = THIS_MODULE,
611	.release	   = inet_release,
612	.bind		   = inet_bind,
613	.connect	   = inet_dgram_connect,
614	.socketpair	   = sock_no_socketpair,
615	.accept		   = sock_no_accept,
616	.getname	   = l2tp_ip_getname,
617	.poll		   = datagram_poll,
618	.ioctl		   = inet_ioctl,
619	.gettstamp	   = sock_gettstamp,
620	.listen		   = sock_no_listen,
621	.shutdown	   = inet_shutdown,
622	.setsockopt	   = sock_common_setsockopt,
623	.getsockopt	   = sock_common_getsockopt,
624	.sendmsg	   = inet_sendmsg,
625	.recvmsg	   = sock_common_recvmsg,
626	.mmap		   = sock_no_mmap,
627};
628
629static struct inet_protosw l2tp_ip_protosw = {
630	.type		= SOCK_DGRAM,
631	.protocol	= IPPROTO_L2TP,
632	.prot		= &l2tp_ip_prot,
633	.ops		= &l2tp_ip_ops,
634};
635
636static struct net_protocol l2tp_ip_protocol __read_mostly = {
637	.handler	= l2tp_ip_recv,
638};
639
640static int __init l2tp_ip_init(void)
641{
642	int err;
643
644	pr_info("L2TP IP encapsulation support (L2TPv3)\n");
645
646	err = proto_register(&l2tp_ip_prot, 1);
647	if (err != 0)
648		goto out;
649
650	err = inet_add_protocol(&l2tp_ip_protocol, IPPROTO_L2TP);
651	if (err)
652		goto out1;
653
654	inet_register_protosw(&l2tp_ip_protosw);
655	return 0;
656
657out1:
658	proto_unregister(&l2tp_ip_prot);
659out:
660	return err;
661}
662
663static void __exit l2tp_ip_exit(void)
664{
665	inet_unregister_protosw(&l2tp_ip_protosw);
666	inet_del_protocol(&l2tp_ip_protocol, IPPROTO_L2TP);
667	proto_unregister(&l2tp_ip_prot);
668}
669
670module_init(l2tp_ip_init);
671module_exit(l2tp_ip_exit);
672
673MODULE_LICENSE("GPL");
674MODULE_AUTHOR("James Chapman <jchapman@katalix.com>");
675MODULE_DESCRIPTION("L2TP over IP");
676MODULE_VERSION("1.0");
677
678/* Use the values of SOCK_DGRAM (2) as type and IPPROTO_L2TP (115) as protocol,
679 * because __stringify doesn't like enums
680 */
681MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET, 115, 2);
682MODULE_ALIAS_NET_PF_PROTO(PF_INET, 115);
683