1// SPDX-License-Identifier: GPL-2.0
2/* Copyright (c) 2021 Cong Wang <cong.wang@bytedance.com> */
3
4#include <linux/skmsg.h>
5#include <linux/bpf.h>
6#include <net/sock.h>
7#include <net/af_unix.h>
8
9#define unix_sk_has_data(__sk, __psock)					\
10		({	!skb_queue_empty(&__sk->sk_receive_queue) ||	\
11			!skb_queue_empty(&__psock->ingress_skb) ||	\
12			!list_empty(&__psock->ingress_msg);		\
13		})
14
15static int unix_msg_wait_data(struct sock *sk, struct sk_psock *psock,
16			      long timeo)
17{
18	DEFINE_WAIT_FUNC(wait, woken_wake_function);
19	struct unix_sock *u = unix_sk(sk);
20	int ret = 0;
21
22	if (sk->sk_shutdown & RCV_SHUTDOWN)
23		return 1;
24
25	if (!timeo)
26		return ret;
27
28	add_wait_queue(sk_sleep(sk), &wait);
29	sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
30	if (!unix_sk_has_data(sk, psock)) {
31		mutex_unlock(&u->iolock);
32		wait_woken(&wait, TASK_INTERRUPTIBLE, timeo);
33		mutex_lock(&u->iolock);
34		ret = unix_sk_has_data(sk, psock);
35	}
36	sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
37	remove_wait_queue(sk_sleep(sk), &wait);
38	return ret;
39}
40
41static int __unix_recvmsg(struct sock *sk, struct msghdr *msg,
42			  size_t len, int flags)
43{
44	if (sk->sk_type == SOCK_DGRAM)
45		return __unix_dgram_recvmsg(sk, msg, len, flags);
46	else
47		return __unix_stream_recvmsg(sk, msg, len, flags);
48}
49
50static int unix_bpf_recvmsg(struct sock *sk, struct msghdr *msg,
51			    size_t len, int flags, int *addr_len)
52{
53	struct unix_sock *u = unix_sk(sk);
54	struct sk_psock *psock;
55	int copied;
56
57	if (!len)
58		return 0;
59
60	psock = sk_psock_get(sk);
61	if (unlikely(!psock))
62		return __unix_recvmsg(sk, msg, len, flags);
63
64	mutex_lock(&u->iolock);
65	if (!skb_queue_empty(&sk->sk_receive_queue) &&
66	    sk_psock_queue_empty(psock)) {
67		mutex_unlock(&u->iolock);
68		sk_psock_put(sk, psock);
69		return __unix_recvmsg(sk, msg, len, flags);
70	}
71
72msg_bytes_ready:
73	copied = sk_msg_recvmsg(sk, psock, msg, len, flags);
74	if (!copied) {
75		long timeo;
76		int data;
77
78		timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
79		data = unix_msg_wait_data(sk, psock, timeo);
80		if (data) {
81			if (!sk_psock_queue_empty(psock))
82				goto msg_bytes_ready;
83			mutex_unlock(&u->iolock);
84			sk_psock_put(sk, psock);
85			return __unix_recvmsg(sk, msg, len, flags);
86		}
87		copied = -EAGAIN;
88	}
89	mutex_unlock(&u->iolock);
90	sk_psock_put(sk, psock);
91	return copied;
92}
93
94static struct proto *unix_dgram_prot_saved __read_mostly;
95static DEFINE_SPINLOCK(unix_dgram_prot_lock);
96static struct proto unix_dgram_bpf_prot;
97
98static struct proto *unix_stream_prot_saved __read_mostly;
99static DEFINE_SPINLOCK(unix_stream_prot_lock);
100static struct proto unix_stream_bpf_prot;
101
102static void unix_dgram_bpf_rebuild_protos(struct proto *prot, const struct proto *base)
103{
104	*prot        = *base;
105	prot->close  = sock_map_close;
106	prot->recvmsg = unix_bpf_recvmsg;
107	prot->sock_is_readable = sk_msg_is_readable;
108}
109
110static void unix_stream_bpf_rebuild_protos(struct proto *prot,
111					   const struct proto *base)
112{
113	*prot        = *base;
114	prot->close  = sock_map_close;
115	prot->recvmsg = unix_bpf_recvmsg;
116	prot->sock_is_readable = sk_msg_is_readable;
117	prot->unhash  = sock_map_unhash;
118}
119
120static void unix_dgram_bpf_check_needs_rebuild(struct proto *ops)
121{
122	if (unlikely(ops != smp_load_acquire(&unix_dgram_prot_saved))) {
123		spin_lock_bh(&unix_dgram_prot_lock);
124		if (likely(ops != unix_dgram_prot_saved)) {
125			unix_dgram_bpf_rebuild_protos(&unix_dgram_bpf_prot, ops);
126			smp_store_release(&unix_dgram_prot_saved, ops);
127		}
128		spin_unlock_bh(&unix_dgram_prot_lock);
129	}
130}
131
132static void unix_stream_bpf_check_needs_rebuild(struct proto *ops)
133{
134	if (unlikely(ops != smp_load_acquire(&unix_stream_prot_saved))) {
135		spin_lock_bh(&unix_stream_prot_lock);
136		if (likely(ops != unix_stream_prot_saved)) {
137			unix_stream_bpf_rebuild_protos(&unix_stream_bpf_prot, ops);
138			smp_store_release(&unix_stream_prot_saved, ops);
139		}
140		spin_unlock_bh(&unix_stream_prot_lock);
141	}
142}
143
144int unix_dgram_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore)
145{
146	if (sk->sk_type != SOCK_DGRAM)
147		return -EOPNOTSUPP;
148
149	if (restore) {
150		sk->sk_write_space = psock->saved_write_space;
151		sock_replace_proto(sk, psock->sk_proto);
152		return 0;
153	}
154
155	unix_dgram_bpf_check_needs_rebuild(psock->sk_proto);
156	sock_replace_proto(sk, &unix_dgram_bpf_prot);
157	return 0;
158}
159
160int unix_stream_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore)
161{
162	struct sock *sk_pair;
163
164	/* Restore does not decrement the sk_pair reference yet because we must
165	 * keep the a reference to the socket until after an RCU grace period
166	 * and any pending sends have completed.
167	 */
168	if (restore) {
169		sk->sk_write_space = psock->saved_write_space;
170		sock_replace_proto(sk, psock->sk_proto);
171		return 0;
172	}
173
174	/* psock_update_sk_prot can be called multiple times if psock is
175	 * added to multiple maps and/or slots in the same map. There is
176	 * also an edge case where replacing a psock with itself can trigger
177	 * an extra psock_update_sk_prot during the insert process. So it
178	 * must be safe to do multiple calls. Here we need to ensure we don't
179	 * increment the refcnt through sock_hold many times. There will only
180	 * be a single matching destroy operation.
181	 */
182	if (!psock->sk_pair) {
183		sk_pair = unix_peer(sk);
184		sock_hold(sk_pair);
185		psock->sk_pair = sk_pair;
186	}
187
188	unix_stream_bpf_check_needs_rebuild(psock->sk_proto);
189	sock_replace_proto(sk, &unix_stream_bpf_prot);
190	return 0;
191}
192
193void __init unix_bpf_build_proto(void)
194{
195	unix_dgram_bpf_rebuild_protos(&unix_dgram_bpf_prot, &unix_dgram_proto);
196	unix_stream_bpf_rebuild_protos(&unix_stream_bpf_prot, &unix_stream_proto);
197
198}
199