1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Support for async notification of waitid
4 */
5#include <linux/kernel.h>
6#include <linux/errno.h>
7#include <linux/fs.h>
8#include <linux/file.h>
9#include <linux/compat.h>
10#include <linux/io_uring.h>
11
12#include <uapi/linux/io_uring.h>
13
14#include "io_uring.h"
15#include "cancel.h"
16#include "waitid.h"
17#include "../kernel/exit.h"
18
19static void io_waitid_cb(struct io_kiocb *req, struct io_tw_state *ts);
20
21#define IO_WAITID_CANCEL_FLAG	BIT(31)
22#define IO_WAITID_REF_MASK	GENMASK(30, 0)
23
24struct io_waitid {
25	struct file *file;
26	int which;
27	pid_t upid;
28	int options;
29	atomic_t refs;
30	struct wait_queue_head *head;
31	struct siginfo __user *infop;
32	struct waitid_info info;
33};
34
35static void io_waitid_free(struct io_kiocb *req)
36{
37	struct io_waitid_async *iwa = req->async_data;
38
39	put_pid(iwa->wo.wo_pid);
40	kfree(req->async_data);
41	req->async_data = NULL;
42	req->flags &= ~REQ_F_ASYNC_DATA;
43}
44
45#ifdef CONFIG_COMPAT
46static bool io_waitid_compat_copy_si(struct io_waitid *iw, int signo)
47{
48	struct compat_siginfo __user *infop;
49	bool ret;
50
51	infop = (struct compat_siginfo __user *) iw->infop;
52
53	if (!user_write_access_begin(infop, sizeof(*infop)))
54		return false;
55
56	unsafe_put_user(signo, &infop->si_signo, Efault);
57	unsafe_put_user(0, &infop->si_errno, Efault);
58	unsafe_put_user(iw->info.cause, &infop->si_code, Efault);
59	unsafe_put_user(iw->info.pid, &infop->si_pid, Efault);
60	unsafe_put_user(iw->info.uid, &infop->si_uid, Efault);
61	unsafe_put_user(iw->info.status, &infop->si_status, Efault);
62	ret = true;
63done:
64	user_write_access_end();
65	return ret;
66Efault:
67	ret = false;
68	goto done;
69}
70#endif
71
72static bool io_waitid_copy_si(struct io_kiocb *req, int signo)
73{
74	struct io_waitid *iw = io_kiocb_to_cmd(req, struct io_waitid);
75	bool ret;
76
77	if (!iw->infop)
78		return true;
79
80#ifdef CONFIG_COMPAT
81	if (req->ctx->compat)
82		return io_waitid_compat_copy_si(iw, signo);
83#endif
84
85	if (!user_write_access_begin(iw->infop, sizeof(*iw->infop)))
86		return false;
87
88	unsafe_put_user(signo, &iw->infop->si_signo, Efault);
89	unsafe_put_user(0, &iw->infop->si_errno, Efault);
90	unsafe_put_user(iw->info.cause, &iw->infop->si_code, Efault);
91	unsafe_put_user(iw->info.pid, &iw->infop->si_pid, Efault);
92	unsafe_put_user(iw->info.uid, &iw->infop->si_uid, Efault);
93	unsafe_put_user(iw->info.status, &iw->infop->si_status, Efault);
94	ret = true;
95done:
96	user_write_access_end();
97	return ret;
98Efault:
99	ret = false;
100	goto done;
101}
102
103static int io_waitid_finish(struct io_kiocb *req, int ret)
104{
105	int signo = 0;
106
107	if (ret > 0) {
108		signo = SIGCHLD;
109		ret = 0;
110	}
111
112	if (!io_waitid_copy_si(req, signo))
113		ret = -EFAULT;
114	io_waitid_free(req);
115	return ret;
116}
117
118static void io_waitid_complete(struct io_kiocb *req, int ret)
119{
120	struct io_waitid *iw = io_kiocb_to_cmd(req, struct io_waitid);
121	struct io_tw_state ts = { .locked = true };
122
123	/* anyone completing better be holding a reference */
124	WARN_ON_ONCE(!(atomic_read(&iw->refs) & IO_WAITID_REF_MASK));
125
126	lockdep_assert_held(&req->ctx->uring_lock);
127
128	hlist_del_init(&req->hash_node);
129
130	ret = io_waitid_finish(req, ret);
131	if (ret < 0)
132		req_set_fail(req);
133	io_req_set_res(req, ret, 0);
134	io_req_task_complete(req, &ts);
135}
136
137static bool __io_waitid_cancel(struct io_ring_ctx *ctx, struct io_kiocb *req)
138{
139	struct io_waitid *iw = io_kiocb_to_cmd(req, struct io_waitid);
140	struct io_waitid_async *iwa = req->async_data;
141
142	/*
143	 * Mark us canceled regardless of ownership. This will prevent a
144	 * potential retry from a spurious wakeup.
145	 */
146	atomic_or(IO_WAITID_CANCEL_FLAG, &iw->refs);
147
148	/* claim ownership */
149	if (atomic_fetch_inc(&iw->refs) & IO_WAITID_REF_MASK)
150		return false;
151
152	spin_lock_irq(&iw->head->lock);
153	list_del_init(&iwa->wo.child_wait.entry);
154	spin_unlock_irq(&iw->head->lock);
155	io_waitid_complete(req, -ECANCELED);
156	return true;
157}
158
159int io_waitid_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd,
160		     unsigned int issue_flags)
161{
162	struct hlist_node *tmp;
163	struct io_kiocb *req;
164	int nr = 0;
165
166	if (cd->flags & (IORING_ASYNC_CANCEL_FD|IORING_ASYNC_CANCEL_FD_FIXED))
167		return -ENOENT;
168
169	io_ring_submit_lock(ctx, issue_flags);
170	hlist_for_each_entry_safe(req, tmp, &ctx->waitid_list, hash_node) {
171		if (req->cqe.user_data != cd->data &&
172		    !(cd->flags & IORING_ASYNC_CANCEL_ANY))
173			continue;
174		if (__io_waitid_cancel(ctx, req))
175			nr++;
176		if (!(cd->flags & IORING_ASYNC_CANCEL_ALL))
177			break;
178	}
179	io_ring_submit_unlock(ctx, issue_flags);
180
181	if (nr)
182		return nr;
183
184	return -ENOENT;
185}
186
187bool io_waitid_remove_all(struct io_ring_ctx *ctx, struct task_struct *task,
188			  bool cancel_all)
189{
190	struct hlist_node *tmp;
191	struct io_kiocb *req;
192	bool found = false;
193
194	lockdep_assert_held(&ctx->uring_lock);
195
196	hlist_for_each_entry_safe(req, tmp, &ctx->waitid_list, hash_node) {
197		if (!io_match_task_safe(req, task, cancel_all))
198			continue;
199		hlist_del_init(&req->hash_node);
200		__io_waitid_cancel(ctx, req);
201		found = true;
202	}
203
204	return found;
205}
206
207static inline bool io_waitid_drop_issue_ref(struct io_kiocb *req)
208{
209	struct io_waitid *iw = io_kiocb_to_cmd(req, struct io_waitid);
210	struct io_waitid_async *iwa = req->async_data;
211
212	if (!atomic_sub_return(1, &iw->refs))
213		return false;
214
215	/*
216	 * Wakeup triggered, racing with us. It was prevented from
217	 * completing because of that, queue up the tw to do that.
218	 */
219	req->io_task_work.func = io_waitid_cb;
220	io_req_task_work_add(req);
221	remove_wait_queue(iw->head, &iwa->wo.child_wait);
222	return true;
223}
224
225static void io_waitid_cb(struct io_kiocb *req, struct io_tw_state *ts)
226{
227	struct io_waitid_async *iwa = req->async_data;
228	struct io_ring_ctx *ctx = req->ctx;
229	int ret;
230
231	io_tw_lock(ctx, ts);
232
233	ret = __do_wait(&iwa->wo);
234
235	/*
236	 * If we get -ERESTARTSYS here, we need to re-arm and check again
237	 * to ensure we get another callback. If the retry works, then we can
238	 * just remove ourselves from the waitqueue again and finish the
239	 * request.
240	 */
241	if (unlikely(ret == -ERESTARTSYS)) {
242		struct io_waitid *iw = io_kiocb_to_cmd(req, struct io_waitid);
243
244		/* Don't retry if cancel found it meanwhile */
245		ret = -ECANCELED;
246		if (!(atomic_read(&iw->refs) & IO_WAITID_CANCEL_FLAG)) {
247			iw->head = &current->signal->wait_chldexit;
248			add_wait_queue(iw->head, &iwa->wo.child_wait);
249			ret = __do_wait(&iwa->wo);
250			if (ret == -ERESTARTSYS) {
251				/* retry armed, drop our ref */
252				io_waitid_drop_issue_ref(req);
253				return;
254			}
255
256			remove_wait_queue(iw->head, &iwa->wo.child_wait);
257		}
258	}
259
260	io_waitid_complete(req, ret);
261}
262
263static int io_waitid_wait(struct wait_queue_entry *wait, unsigned mode,
264			  int sync, void *key)
265{
266	struct wait_opts *wo = container_of(wait, struct wait_opts, child_wait);
267	struct io_waitid_async *iwa = container_of(wo, struct io_waitid_async, wo);
268	struct io_kiocb *req = iwa->req;
269	struct io_waitid *iw = io_kiocb_to_cmd(req, struct io_waitid);
270	struct task_struct *p = key;
271
272	if (!pid_child_should_wake(wo, p))
273		return 0;
274
275	/* cancel is in progress */
276	if (atomic_fetch_inc(&iw->refs) & IO_WAITID_REF_MASK)
277		return 1;
278
279	req->io_task_work.func = io_waitid_cb;
280	io_req_task_work_add(req);
281	list_del_init(&wait->entry);
282	return 1;
283}
284
285int io_waitid_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
286{
287	struct io_waitid *iw = io_kiocb_to_cmd(req, struct io_waitid);
288
289	if (sqe->addr || sqe->buf_index || sqe->addr3 || sqe->waitid_flags)
290		return -EINVAL;
291
292	iw->which = READ_ONCE(sqe->len);
293	iw->upid = READ_ONCE(sqe->fd);
294	iw->options = READ_ONCE(sqe->file_index);
295	iw->infop = u64_to_user_ptr(READ_ONCE(sqe->addr2));
296	return 0;
297}
298
299int io_waitid(struct io_kiocb *req, unsigned int issue_flags)
300{
301	struct io_waitid *iw = io_kiocb_to_cmd(req, struct io_waitid);
302	struct io_ring_ctx *ctx = req->ctx;
303	struct io_waitid_async *iwa;
304	int ret;
305
306	if (io_alloc_async_data(req))
307		return -ENOMEM;
308
309	iwa = req->async_data;
310	iwa->req = req;
311
312	ret = kernel_waitid_prepare(&iwa->wo, iw->which, iw->upid, &iw->info,
313					iw->options, NULL);
314	if (ret)
315		goto done;
316
317	/*
318	 * Mark the request as busy upfront, in case we're racing with the
319	 * wakeup. If we are, then we'll notice when we drop this initial
320	 * reference again after arming.
321	 */
322	atomic_set(&iw->refs, 1);
323
324	/*
325	 * Cancel must hold the ctx lock, so there's no risk of cancelation
326	 * finding us until a) we remain on the list, and b) the lock is
327	 * dropped. We only need to worry about racing with the wakeup
328	 * callback.
329	 */
330	io_ring_submit_lock(ctx, issue_flags);
331	hlist_add_head(&req->hash_node, &ctx->waitid_list);
332
333	init_waitqueue_func_entry(&iwa->wo.child_wait, io_waitid_wait);
334	iwa->wo.child_wait.private = req->task;
335	iw->head = &current->signal->wait_chldexit;
336	add_wait_queue(iw->head, &iwa->wo.child_wait);
337
338	ret = __do_wait(&iwa->wo);
339	if (ret == -ERESTARTSYS) {
340		/*
341		 * Nobody else grabbed a reference, it'll complete when we get
342		 * a waitqueue callback, or if someone cancels it.
343		 */
344		if (!io_waitid_drop_issue_ref(req)) {
345			io_ring_submit_unlock(ctx, issue_flags);
346			return IOU_ISSUE_SKIP_COMPLETE;
347		}
348
349		/*
350		 * Wakeup triggered, racing with us. It was prevented from
351		 * completing because of that, queue up the tw to do that.
352		 */
353		io_ring_submit_unlock(ctx, issue_flags);
354		return IOU_ISSUE_SKIP_COMPLETE;
355	}
356
357	hlist_del_init(&req->hash_node);
358	remove_wait_queue(iw->head, &iwa->wo.child_wait);
359	ret = io_waitid_finish(req, ret);
360
361	io_ring_submit_unlock(ctx, issue_flags);
362done:
363	if (ret < 0)
364		req_set_fail(req);
365	io_req_set_res(req, ret, 0);
366	return IOU_OK;
367}
368