Lines Matching refs:ctx

143 static bool io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
177 static inline void io_submit_flush_completions(struct io_ring_ctx *ctx)
179 if (!wq_list_empty(&ctx->submit_state.compl_reqs) ||
180 ctx->submit_state.cqes_count)
181 __io_submit_flush_completions(ctx);
184 static inline unsigned int __io_cqring_events(struct io_ring_ctx *ctx)
186 return ctx->cached_cq_tail - READ_ONCE(ctx->rings->cq.head);
189 static inline unsigned int __io_cqring_events_user(struct io_ring_ctx *ctx)
191 return READ_ONCE(ctx->rings->cq.tail) - READ_ONCE(ctx->rings->cq.head);
220 struct io_ring_ctx *ctx = head->ctx;
223 spin_lock_irq(&ctx->timeout_lock);
225 spin_unlock_irq(&ctx->timeout_lock);
238 static inline void io_req_add_to_cache(struct io_kiocb *req, struct io_ring_ctx *ctx)
240 wq_stack_add_head(&req->comp_list, &ctx->submit_state.free_list);
245 struct io_ring_ctx *ctx = container_of(ref, struct io_ring_ctx, refs);
247 complete(&ctx->ref_comp);
252 struct io_ring_ctx *ctx = container_of(work, struct io_ring_ctx,
254 struct llist_node *node = llist_del_all(&ctx->fallback_llist);
258 percpu_ref_get(&ctx->refs);
259 mutex_lock(&ctx->uring_lock);
264 io_submit_flush_completions(ctx);
265 mutex_unlock(&ctx->uring_lock);
266 percpu_ref_put(&ctx->refs);
285 struct io_ring_ctx *ctx;
288 ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
289 if (!ctx)
292 xa_init(&ctx->io_bl_xa);
301 if (io_alloc_hash_table(&ctx->cancel_table, hash_bits))
303 if (io_alloc_hash_table(&ctx->cancel_table_locked, hash_bits))
305 if (percpu_ref_init(&ctx->refs, io_ring_ctx_ref_free,
309 ctx->flags = p->flags;
310 atomic_set(&ctx->cq_wait_nr, IO_CQ_WAKE_INIT);
311 init_waitqueue_head(&ctx->sqo_sq_wait);
312 INIT_LIST_HEAD(&ctx->sqd_list);
313 INIT_LIST_HEAD(&ctx->cq_overflow_list);
314 INIT_LIST_HEAD(&ctx->io_buffers_cache);
315 INIT_HLIST_HEAD(&ctx->io_buf_list);
316 io_alloc_cache_init(&ctx->rsrc_node_cache, IO_NODE_ALLOC_CACHE_MAX,
318 io_alloc_cache_init(&ctx->apoll_cache, IO_ALLOC_CACHE_MAX,
320 io_alloc_cache_init(&ctx->netmsg_cache, IO_ALLOC_CACHE_MAX,
322 io_futex_cache_init(ctx);
323 init_completion(&ctx->ref_comp);
324 xa_init_flags(&ctx->personalities, XA_FLAGS_ALLOC1);
325 mutex_init(&ctx->uring_lock);
326 init_waitqueue_head(&ctx->cq_wait);
327 init_waitqueue_head(&ctx->poll_wq);
328 init_waitqueue_head(&ctx->rsrc_quiesce_wq);
329 spin_lock_init(&ctx->completion_lock);
330 spin_lock_init(&ctx->timeout_lock);
331 INIT_WQ_LIST(&ctx->iopoll_list);
332 INIT_LIST_HEAD(&ctx->io_buffers_comp);
333 INIT_LIST_HEAD(&ctx->defer_list);
334 INIT_LIST_HEAD(&ctx->timeout_list);
335 INIT_LIST_HEAD(&ctx->ltimeout_list);
336 INIT_LIST_HEAD(&ctx->rsrc_ref_list);
337 init_llist_head(&ctx->work_llist);
338 INIT_LIST_HEAD(&ctx->tctx_list);
339 ctx->submit_state.free_list.next = NULL;
340 INIT_WQ_LIST(&ctx->locked_free_list);
341 INIT_HLIST_HEAD(&ctx->waitid_list);
343 INIT_HLIST_HEAD(&ctx->futex_list);
345 INIT_DELAYED_WORK(&ctx->fallback_work, io_fallback_req_func);
346 INIT_WQ_LIST(&ctx->submit_state.compl_reqs);
347 INIT_HLIST_HEAD(&ctx->cancelable_uring_cmd);
348 io_napi_init(ctx);
350 return ctx;
352 kfree(ctx->cancel_table.hbs);
353 kfree(ctx->cancel_table_locked.hbs);
354 xa_destroy(&ctx->io_bl_xa);
355 kfree(ctx);
359 static void io_account_cq_overflow(struct io_ring_ctx *ctx)
361 struct io_rings *r = ctx->rings;
364 ctx->cq_extra--;
370 struct io_ring_ctx *ctx = req->ctx;
372 return seq + READ_ONCE(ctx->cq_extra) != ctx->cached_cq_tail;
381 spin_lock(&req->ctx->completion_lock);
383 spin_unlock(&req->ctx->completion_lock);
454 struct io_ring_ctx *ctx = req->ctx;
476 if (should_hash || (ctx->flags & IORING_SETUP_IOPOLL))
489 struct io_ring_ctx *ctx = req->ctx;
491 spin_lock_irq(&ctx->timeout_lock);
494 spin_unlock_irq(&ctx->timeout_lock);
528 static __cold void io_queue_deferred(struct io_ring_ctx *ctx)
530 while (!list_empty(&ctx->defer_list)) {
531 struct io_defer_entry *de = list_first_entry(&ctx->defer_list,
560 static void io_eventfd_signal(struct io_ring_ctx *ctx)
566 * rcu_dereference ctx->io_ev_fd once and use it for both for checking
569 ev_fd = rcu_dereference(ctx->io_ev_fd);
573 * completed between the NULL check of ctx->io_ev_fd at the start of
578 if (READ_ONCE(ctx->rings->cq_flags) & IORING_CQ_EVENTFD_DISABLED)
597 static void io_eventfd_flush_signal(struct io_ring_ctx *ctx)
601 spin_lock(&ctx->completion_lock);
611 skip = ctx->cached_cq_tail == ctx->evfd_last_cq_tail;
612 ctx->evfd_last_cq_tail = ctx->cached_cq_tail;
613 spin_unlock(&ctx->completion_lock);
617 io_eventfd_signal(ctx);
620 void __io_commit_cqring_flush(struct io_ring_ctx *ctx)
622 if (ctx->poll_activated)
623 io_poll_wq_wake(ctx);
624 if (ctx->off_timeout_used)
625 io_flush_timeouts(ctx);
626 if (ctx->drain_active) {
627 spin_lock(&ctx->completion_lock);
628 io_queue_deferred(ctx);
629 spin_unlock(&ctx->completion_lock);
631 if (ctx->has_evfd)
632 io_eventfd_flush_signal(ctx);
635 static inline void __io_cq_lock(struct io_ring_ctx *ctx)
637 if (!ctx->lockless_cq)
638 spin_lock(&ctx->completion_lock);
641 static inline void io_cq_lock(struct io_ring_ctx *ctx)
642 __acquires(ctx->completion_lock)
644 spin_lock(&ctx->completion_lock);
647 static inline void __io_cq_unlock_post(struct io_ring_ctx *ctx)
649 io_commit_cqring(ctx);
650 if (!ctx->task_complete) {
651 if (!ctx->lockless_cq)
652 spin_unlock(&ctx->completion_lock);
654 if (!ctx->syscall_iopoll)
655 io_cqring_wake(ctx);
657 io_commit_cqring_flush(ctx);
660 static void io_cq_unlock_post(struct io_ring_ctx *ctx)
661 __releases(ctx->completion_lock)
663 io_commit_cqring(ctx);
664 spin_unlock(&ctx->completion_lock);
665 io_cqring_wake(ctx);
666 io_commit_cqring_flush(ctx);
669 static void io_cqring_overflow_kill(struct io_ring_ctx *ctx)
674 spin_lock(&ctx->completion_lock);
675 list_splice_init(&ctx->cq_overflow_list, &list);
676 clear_bit(IO_CHECK_CQ_OVERFLOW_BIT, &ctx->check_cq);
677 spin_unlock(&ctx->completion_lock);
686 static void __io_cqring_overflow_flush(struct io_ring_ctx *ctx)
690 if (__io_cqring_events(ctx) == ctx->cq_entries)
693 if (ctx->flags & IORING_SETUP_CQE32)
696 io_cq_lock(ctx);
697 while (!list_empty(&ctx->cq_overflow_list)) {
701 if (!io_get_cqe_overflow(ctx, &cqe, true))
703 ocqe = list_first_entry(&ctx->cq_overflow_list,
710 if (list_empty(&ctx->cq_overflow_list)) {
711 clear_bit(IO_CHECK_CQ_OVERFLOW_BIT, &ctx->check_cq);
712 atomic_andnot(IORING_SQ_CQ_OVERFLOW, &ctx->rings->sq_flags);
714 io_cq_unlock_post(ctx);
717 static void io_cqring_do_overflow_flush(struct io_ring_ctx *ctx)
720 if (ctx->flags & IORING_SETUP_IOPOLL)
721 mutex_lock(&ctx->uring_lock);
722 __io_cqring_overflow_flush(ctx);
723 if (ctx->flags & IORING_SETUP_IOPOLL)
724 mutex_unlock(&ctx->uring_lock);
727 static void io_cqring_overflow_flush(struct io_ring_ctx *ctx)
729 if (test_bit(IO_CHECK_CQ_OVERFLOW_BIT, &ctx->check_cq))
730 io_cqring_do_overflow_flush(ctx);
780 static bool io_cqring_event_overflow(struct io_ring_ctx *ctx, u64 user_data,
785 bool is_cqe32 = (ctx->flags & IORING_SETUP_CQE32);
787 lockdep_assert_held(&ctx->completion_lock);
793 trace_io_uring_cqe_overflow(ctx, user_data, res, cflags, ocqe);
800 io_account_cq_overflow(ctx);
801 set_bit(IO_CHECK_CQ_DROPPED_BIT, &ctx->check_cq);
804 if (list_empty(&ctx->cq_overflow_list)) {
805 set_bit(IO_CHECK_CQ_OVERFLOW_BIT, &ctx->check_cq);
806 atomic_or(IORING_SQ_CQ_OVERFLOW, &ctx->rings->sq_flags);
816 list_add_tail(&ocqe->list, &ctx->cq_overflow_list);
822 io_cqring_event_overflow(req->ctx, req->cqe.user_data,
833 bool io_cqe_cache_refill(struct io_ring_ctx *ctx, bool overflow)
835 struct io_rings *rings = ctx->rings;
836 unsigned int off = ctx->cached_cq_tail & (ctx->cq_entries - 1);
844 if (!overflow && (ctx->check_cq & BIT(IO_CHECK_CQ_OVERFLOW_BIT)))
848 queued = min(__io_cqring_events(ctx), ctx->cq_entries);
849 free = ctx->cq_entries - queued;
851 len = min(free, ctx->cq_entries - off);
855 if (ctx->flags & IORING_SETUP_CQE32) {
860 ctx->cqe_cached = &rings->cqes[off];
861 ctx->cqe_sentinel = ctx->cqe_cached + len;
865 static bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data, s32 res,
870 ctx->cq_extra++;
877 if (likely(io_get_cqe(ctx, &cqe))) {
878 trace_io_uring_complete(ctx, NULL, user_data, res, cflags, 0, 0);
884 if (ctx->flags & IORING_SETUP_CQE32) {
893 static void __io_flush_post_cqes(struct io_ring_ctx *ctx)
894 __must_hold(&ctx->uring_lock)
896 struct io_submit_state *state = &ctx->submit_state;
899 lockdep_assert_held(&ctx->uring_lock);
901 struct io_uring_cqe *cqe = &ctx->completion_cqes[i];
903 if (!io_fill_cqe_aux(ctx, cqe->user_data, cqe->res, cqe->flags)) {
904 if (ctx->lockless_cq) {
905 spin_lock(&ctx->completion_lock);
906 io_cqring_event_overflow(ctx, cqe->user_data,
908 spin_unlock(&ctx->completion_lock);
910 io_cqring_event_overflow(ctx, cqe->user_data,
918 static bool __io_post_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags,
923 io_cq_lock(ctx);
924 filled = io_fill_cqe_aux(ctx, user_data, res, cflags);
926 filled = io_cqring_event_overflow(ctx, user_data, res, cflags, 0, 0);
928 io_cq_unlock_post(ctx);
932 bool io_post_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags)
934 return __io_post_aux_cqe(ctx, user_data, res, cflags, true);
943 struct io_ring_ctx *ctx = req->ctx;
950 return __io_post_aux_cqe(ctx, user_data, res, cflags, false);
952 lockdep_assert_held(&ctx->uring_lock);
954 if (ctx->submit_state.cqes_count == ARRAY_SIZE(ctx->completion_cqes)) {
955 __io_cq_lock(ctx);
956 __io_flush_post_cqes(ctx);
958 __io_cq_unlock_post(ctx);
965 if (test_bit(IO_CHECK_CQ_OVERFLOW_BIT, &ctx->check_cq))
968 cqe = &ctx->completion_cqes[ctx->submit_state.cqes_count++];
977 struct io_ring_ctx *ctx = req->ctx;
980 io_cq_lock(ctx);
982 if (!io_fill_cqe_req(ctx, req))
1011 wq_list_add_head(&req->comp_list, &ctx->locked_free_list);
1012 ctx->locked_free_nr++;
1014 io_cq_unlock_post(ctx);
1017 io_ring_submit_lock(ctx, issue_flags);
1018 io_put_rsrc_node(ctx, rsrc_node);
1019 io_ring_submit_unlock(ctx, issue_flags);
1025 struct io_ring_ctx *ctx = req->ctx;
1027 if (ctx->task_complete && ctx->submitter_task != current) {
1031 !(ctx->flags & IORING_SETUP_IOPOLL)) {
1034 mutex_lock(&ctx->uring_lock);
1036 mutex_unlock(&ctx->uring_lock);
1041 __must_hold(&ctx->uring_lock)
1045 lockdep_assert_held(&req->ctx->uring_lock);
1058 static void io_preinit_req(struct io_kiocb *req, struct io_ring_ctx *ctx)
1060 req->ctx = ctx;
1068 static void io_flush_cached_locked_reqs(struct io_ring_ctx *ctx,
1071 spin_lock(&ctx->completion_lock);
1072 wq_list_splice(&ctx->locked_free_list, &state->free_list);
1073 ctx->locked_free_nr = 0;
1074 spin_unlock(&ctx->completion_lock);
1083 __cold bool __io_alloc_req_refill(struct io_ring_ctx *ctx)
1084 __must_hold(&ctx->uring_lock)
1095 if (data_race(ctx->locked_free_nr) > IO_COMPL_BATCH) {
1096 io_flush_cached_locked_reqs(ctx, &ctx->submit_state);
1097 if (!io_req_cache_empty(ctx))
1114 percpu_ref_get_many(&ctx->refs, ret);
1118 io_preinit_req(req, ctx);
1119 io_req_add_to_cache(req, ctx);
1136 struct io_ring_ctx *ctx = req->ctx;
1138 spin_lock(&ctx->completion_lock);
1140 spin_unlock(&ctx->completion_lock);
1160 static void ctx_flush_and_put(struct io_ring_ctx *ctx, struct io_tw_state *ts)
1162 if (!ctx)
1164 if (ctx->flags & IORING_SETUP_TASKRUN_FLAG)
1165 atomic_andnot(IORING_SQ_TASKRUN, &ctx->rings->sq_flags);
1167 io_submit_flush_completions(ctx);
1168 mutex_unlock(&ctx->uring_lock);
1171 percpu_ref_put(&ctx->refs);
1183 struct io_ring_ctx *ctx = NULL;
1191 if (req->ctx != ctx) {
1192 ctx_flush_and_put(ctx, &ts);
1193 ctx = req->ctx;
1195 ts.locked = mutex_trylock(&ctx->uring_lock);
1196 percpu_ref_get(&ctx->refs);
1204 ctx_flush_and_put(ctx, &ts);
1205 ctx = NULL;
1210 ctx_flush_and_put(ctx, &ts);
1237 if (sync && last_ctx != req->ctx) {
1242 last_ctx = req->ctx;
1246 &req->ctx->fallback_llist))
1247 schedule_delayed_work(&req->ctx->fallback_work, 1);
1295 struct io_ring_ctx *ctx = req->ctx;
1309 head = READ_ONCE(ctx->work_llist.first);
1333 } while (!try_cmpxchg(&ctx->work_llist.first, &head,
1345 if (ctx->flags & IORING_SETUP_TASKRUN_FLAG)
1346 atomic_or(IORING_SQ_TASKRUN, &ctx->rings->sq_flags);
1347 if (ctx->has_evfd)
1348 io_eventfd_signal(ctx);
1351 nr_wait = atomic_read(&ctx->cq_wait_nr);
1358 wake_up_state(ctx->submitter_task, TASK_INTERRUPTIBLE);
1364 struct io_ring_ctx *ctx = req->ctx;
1370 if (ctx->flags & IORING_SETUP_TASKRUN_FLAG)
1371 atomic_or(IORING_SQ_TASKRUN, &ctx->rings->sq_flags);
1374 if (ctx->flags & IORING_SETUP_SQPOLL) {
1375 struct io_sq_data *sqd = ctx->sq_data;
1382 if (likely(!task_work_add(req->task, &tctx->task_work, ctx->notify_method)))
1390 if (req->ctx->flags & IORING_SETUP_DEFER_TASKRUN) {
1399 static void __cold io_move_task_work_from_local(struct io_ring_ctx *ctx)
1403 node = llist_del_all(&ctx->work_llist);
1413 static bool io_run_local_work_continue(struct io_ring_ctx *ctx, int events,
1416 if (llist_empty(&ctx->work_llist))
1420 if (ctx->flags & IORING_SETUP_TASKRUN_FLAG)
1421 atomic_or(IORING_SQ_TASKRUN, &ctx->rings->sq_flags);
1425 static int __io_run_local_work(struct io_ring_ctx *ctx, struct io_tw_state *ts,
1432 if (WARN_ON_ONCE(ctx->submitter_task != current))
1434 if (ctx->flags & IORING_SETUP_TASKRUN_FLAG)
1435 atomic_andnot(IORING_SQ_TASKRUN, &ctx->rings->sq_flags);
1441 node = llist_reverse_order(io_llist_xchg(&ctx->work_llist, NULL));
1454 if (io_run_local_work_continue(ctx, ret, min_events))
1457 io_submit_flush_completions(ctx);
1458 if (io_run_local_work_continue(ctx, ret, min_events))
1462 trace_io_uring_local_work_run(ctx, ret, loops);
1466 static inline int io_run_local_work_locked(struct io_ring_ctx *ctx,
1472 if (llist_empty(&ctx->work_llist))
1475 ret = __io_run_local_work(ctx, &ts, min_events);
1478 mutex_lock(&ctx->uring_lock);
1482 static int io_run_local_work(struct io_ring_ctx *ctx, int min_events)
1487 ts.locked = mutex_trylock(&ctx->uring_lock);
1488 ret = __io_run_local_work(ctx, &ts, min_events);
1490 mutex_unlock(&ctx->uring_lock);
1497 io_tw_lock(req->ctx, ts);
1503 io_tw_lock(req->ctx, ts);
1534 static void io_free_batch_list(struct io_ring_ctx *ctx,
1536 __must_hold(&ctx->uring_lock)
1553 if (!io_alloc_cache_put(&ctx->apoll_cache, &apoll->cache))
1564 io_req_put_rsrc_locked(req, ctx);
1568 io_req_add_to_cache(req, ctx);
1572 void __io_submit_flush_completions(struct io_ring_ctx *ctx)
1573 __must_hold(&ctx->uring_lock)
1575 struct io_submit_state *state = &ctx->submit_state;
1578 __io_cq_lock(ctx);
1581 __io_flush_post_cqes(ctx);
1587 unlikely(!io_fill_cqe_req(ctx, req))) {
1588 if (ctx->lockless_cq) {
1589 spin_lock(&ctx->completion_lock);
1591 spin_unlock(&ctx->completion_lock);
1597 __io_cq_unlock_post(ctx);
1599 if (!wq_list_empty(&ctx->submit_state.compl_reqs)) {
1600 io_free_batch_list(ctx, state->compl_reqs.first);
1605 static unsigned io_cqring_events(struct io_ring_ctx *ctx)
1609 return __io_cqring_events(ctx);
1616 static __cold void io_iopoll_try_reap_events(struct io_ring_ctx *ctx)
1618 if (!(ctx->flags & IORING_SETUP_IOPOLL))
1621 mutex_lock(&ctx->uring_lock);
1622 while (!wq_list_empty(&ctx->iopoll_list)) {
1624 if (io_do_iopoll(ctx, true) == 0)
1632 mutex_unlock(&ctx->uring_lock);
1634 mutex_lock(&ctx->uring_lock);
1637 mutex_unlock(&ctx->uring_lock);
1640 static int io_iopoll_check(struct io_ring_ctx *ctx, long min)
1645 if (!io_allowed_run_tw(ctx))
1648 check_cq = READ_ONCE(ctx->check_cq);
1651 __io_cqring_overflow_flush(ctx);
1664 if (io_cqring_events(ctx))
1680 if (wq_list_empty(&ctx->iopoll_list) ||
1681 io_task_work_pending(ctx)) {
1682 u32 tail = ctx->cached_cq_tail;
1684 (void) io_run_local_work_locked(ctx, min);
1687 wq_list_empty(&ctx->iopoll_list)) {
1688 mutex_unlock(&ctx->uring_lock);
1690 mutex_lock(&ctx->uring_lock);
1693 if (tail != ctx->cached_cq_tail ||
1694 wq_list_empty(&ctx->iopoll_list))
1697 ret = io_do_iopoll(ctx, !min);
1728 struct io_ring_ctx *ctx = req->ctx;
1733 mutex_lock(&ctx->uring_lock);
1740 if (wq_list_empty(&ctx->iopoll_list)) {
1741 ctx->poll_multi_queue = false;
1742 } else if (!ctx->poll_multi_queue) {
1745 list_req = container_of(ctx->iopoll_list.first, struct io_kiocb,
1748 ctx->poll_multi_queue = true;
1756 wq_list_add_head(&req->comp_list, &ctx->iopoll_list);
1758 wq_list_add_tail(&req->comp_list, &ctx->iopoll_list);
1767 if ((ctx->flags & IORING_SETUP_SQPOLL) &&
1768 wq_has_sleeper(&ctx->sq_data->wait))
1769 wake_up(&ctx->sq_data->wait);
1771 mutex_unlock(&ctx->uring_lock);
1818 u32 seq = req->ctx->cached_sq_head;
1828 __must_hold(&ctx->uring_lock)
1830 struct io_ring_ctx *ctx = req->ctx;
1836 spin_lock(&ctx->completion_lock);
1837 if (!req_need_defer(req, seq) && list_empty_careful(&ctx->defer_list)) {
1838 spin_unlock(&ctx->completion_lock);
1840 ctx->drain_active = false;
1844 spin_unlock(&ctx->completion_lock);
1854 spin_lock(&ctx->completion_lock);
1855 if (!req_need_defer(req, seq) && list_empty(&ctx->defer_list)) {
1856 spin_unlock(&ctx->completion_lock);
1864 list_add_tail(&de->list, &ctx->defer_list);
1865 spin_unlock(&ctx->completion_lock);
1919 if ((req->ctx->flags & IORING_SETUP_IOPOLL) && def->iopoll_queue)
1927 io_tw_lock(req->ctx, ts);
2023 if (!(req->ctx->flags & IORING_SETUP_IOPOLL))
2046 struct io_ring_ctx *ctx = req->ctx;
2050 io_ring_submit_lock(ctx, issue_flags);
2052 if (unlikely((unsigned int)fd >= ctx->nr_user_files))
2054 fd = array_index_nospec(fd, ctx->nr_user_files);
2055 slot = io_fixed_file_slot(&ctx->file_table, fd);
2057 __io_req_set_rsrc_node(req, ctx);
2061 io_ring_submit_unlock(ctx, issue_flags);
2078 __must_hold(&req->ctx->uring_lock)
2107 __must_hold(&req->ctx->uring_lock)
2122 __must_hold(&req->ctx->uring_lock)
2140 if (unlikely(req->ctx->drain_active))
2152 static inline bool io_check_restriction(struct io_ring_ctx *ctx,
2156 if (!test_bit(req->opcode, ctx->restrictions.sqe_op))
2159 if ((sqe_flags & ctx->restrictions.sqe_flags_required) !=
2160 ctx->restrictions.sqe_flags_required)
2163 if (sqe_flags & ~(ctx->restrictions.sqe_flags_allowed |
2164 ctx->restrictions.sqe_flags_required))
2172 struct io_ring_ctx *ctx = req->ctx;
2173 struct io_kiocb *head = ctx->submit_state.link.head;
2175 ctx->drain_active = true;
2185 ctx->drain_next = true;
2196 static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
2198 __must_hold(&ctx->uring_lock)
2230 ctx->drain_disabled = true;
2232 if (ctx->drain_disabled)
2237 if (unlikely(ctx->restricted || ctx->drain_active || ctx->drain_next)) {
2238 if (ctx->restricted && !io_check_restriction(ctx, req, sqe_flags))
2241 if (ctx->drain_active)
2244 if (unlikely(ctx->drain_next) && !ctx->submit_state.link.head) {
2245 ctx->drain_next = false;
2246 ctx->drain_active = true;
2253 if (!def->iopoll && (ctx->flags & IORING_SETUP_IOPOLL))
2257 struct io_submit_state *state = &ctx->submit_state;
2276 req->creds = xa_load(&ctx->personalities, personality);
2294 struct io_ring_ctx *ctx = req->ctx;
2295 struct io_submit_link *link = &ctx->submit_state.link;
2328 static inline int io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
2330 __must_hold(&ctx->uring_lock)
2332 struct io_submit_link *link = &ctx->submit_state.link;
2335 ret = io_init_req(ctx, req, sqe);
2384 static void io_submit_state_end(struct io_ring_ctx *ctx)
2386 struct io_submit_state *state = &ctx->submit_state;
2391 io_submit_flush_completions(ctx);
2409 static void io_commit_sqring(struct io_ring_ctx *ctx)
2411 struct io_rings *rings = ctx->rings;
2418 smp_store_release(&rings->sq.head, ctx->cached_sq_head);
2429 static bool io_get_sqe(struct io_ring_ctx *ctx, const struct io_uring_sqe **sqe)
2431 unsigned mask = ctx->sq_entries - 1;
2432 unsigned head = ctx->cached_sq_head++ & mask;
2434 if (!(ctx->flags & IORING_SETUP_NO_SQARRAY)) {
2435 head = READ_ONCE(ctx->sq_array[head]);
2436 if (unlikely(head >= ctx->sq_entries)) {
2438 spin_lock(&ctx->completion_lock);
2439 ctx->cq_extra--;
2440 spin_unlock(&ctx->completion_lock);
2441 WRITE_ONCE(ctx->rings->sq_dropped,
2442 READ_ONCE(ctx->rings->sq_dropped) + 1);
2457 if (ctx->flags & IORING_SETUP_SQE128)
2459 *sqe = &ctx->sq_sqes[head];
2463 int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr)
2464 __must_hold(&ctx->uring_lock)
2466 unsigned int entries = io_sqring_entries(ctx);
2475 io_submit_state_start(&ctx->submit_state, left);
2481 if (unlikely(!io_alloc_req(ctx, &req)))
2483 if (unlikely(!io_get_sqe(ctx, &sqe))) {
2484 io_req_add_to_cache(req, ctx);
2492 if (unlikely(io_submit_sqe(ctx, req, sqe)) &&
2493 !(ctx->flags & IORING_SETUP_SUBMIT_ALL)) {
2502 if (!ret && io_req_cache_empty(ctx))
2507 io_submit_state_end(ctx);
2509 io_commit_sqring(ctx);
2522 if (io_should_wake(iowq) || io_has_work(iowq->ctx))
2527 int io_run_task_work_sig(struct io_ring_ctx *ctx)
2529 if (!llist_empty(&ctx->work_llist)) {
2531 if (io_run_local_work(ctx, INT_MAX) > 0)
2551 static inline int io_cqring_wait_schedule(struct io_ring_ctx *ctx,
2556 if (unlikely(READ_ONCE(ctx->check_cq)))
2558 if (unlikely(!llist_empty(&ctx->work_llist)))
2587 static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
2592 struct io_rings *rings = ctx->rings;
2595 if (!io_allowed_run_tw(ctx))
2597 if (!llist_empty(&ctx->work_llist))
2598 io_run_local_work(ctx, min_events);
2600 io_cqring_overflow_flush(ctx);
2602 if (__io_cqring_events_user(ctx) >= min_events)
2608 iowq.ctx = ctx;
2609 iowq.nr_timeouts = atomic_read(&ctx->cq_timeouts);
2610 iowq.cq_tail = READ_ONCE(ctx->rings->cq.head) + min_events;
2620 io_napi_adjust_timeout(ctx, &iowq, &ts);
2636 io_napi_busy_loop(ctx, &iowq);
2638 trace_io_uring_cqring_wait(ctx, min_events);
2640 int nr_wait = (int) iowq.cq_tail - READ_ONCE(ctx->rings->cq.tail);
2643 if (ctx->flags & IORING_SETUP_DEFER_TASKRUN) {
2644 atomic_set(&ctx->cq_wait_nr, nr_wait);
2647 prepare_to_wait_exclusive(&ctx->cq_wait, &iowq.wq,
2651 ret = io_cqring_wait_schedule(ctx, &iowq);
2653 atomic_set(&ctx->cq_wait_nr, IO_CQ_WAKE_INIT);
2661 if (!llist_empty(&ctx->work_llist))
2662 io_run_local_work(ctx, nr_wait);
2676 check_cq = READ_ONCE(ctx->check_cq);
2680 io_cqring_do_overflow_flush(ctx);
2694 if (!(ctx->flags & IORING_SETUP_DEFER_TASKRUN))
2695 finish_wait(&ctx->cq_wait, &iowq.wq);
2784 static void *io_rings_map(struct io_ring_ctx *ctx, unsigned long uaddr,
2787 return __io_uaddr_map(&ctx->ring_pages, &ctx->n_ring_pages, uaddr,
2791 static void *io_sqes_map(struct io_ring_ctx *ctx, unsigned long uaddr,
2794 return __io_uaddr_map(&ctx->sqe_pages, &ctx->n_sqe_pages, uaddr,
2798 static void io_rings_free(struct io_ring_ctx *ctx)
2800 if (!(ctx->flags & IORING_SETUP_NO_MMAP)) {
2801 io_mem_free(ctx->rings);
2802 io_mem_free(ctx->sq_sqes);
2804 io_pages_free(&ctx->ring_pages, ctx->n_ring_pages);
2805 ctx->n_ring_pages = 0;
2806 io_pages_free(&ctx->sqe_pages, ctx->n_sqe_pages);
2807 ctx->n_sqe_pages = 0;
2810 ctx->rings = NULL;
2811 ctx->sq_sqes = NULL;
2825 static unsigned long rings_size(struct io_ring_ctx *ctx, unsigned int sq_entries,
2834 if (ctx->flags & IORING_SETUP_CQE32) {
2845 if (ctx->flags & IORING_SETUP_NO_SQARRAY) {
2864 static void io_req_caches_free(struct io_ring_ctx *ctx)
2869 mutex_lock(&ctx->uring_lock);
2870 io_flush_cached_locked_reqs(ctx, &ctx->submit_state);
2872 while (!io_req_cache_empty(ctx)) {
2873 req = io_extract_req(ctx);
2878 percpu_ref_put_many(&ctx->refs, nr);
2879 mutex_unlock(&ctx->uring_lock);
2887 static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx)
2889 io_sq_thread_finish(ctx);
2891 if (WARN_ON_ONCE(!list_empty(&ctx->rsrc_ref_list)))
2894 mutex_lock(&ctx->uring_lock);
2895 if (ctx->buf_data)
2896 __io_sqe_buffers_unregister(ctx);
2897 if (ctx->file_data)
2898 __io_sqe_files_unregister(ctx);
2899 io_cqring_overflow_kill(ctx);
2900 io_eventfd_unregister(ctx);
2901 io_alloc_cache_free(&ctx->apoll_cache, io_apoll_cache_free);
2902 io_alloc_cache_free(&ctx->netmsg_cache, io_netmsg_cache_free);
2903 io_futex_cache_free(ctx);
2904 io_destroy_buffers(ctx);
2905 mutex_unlock(&ctx->uring_lock);
2906 if (ctx->sq_creds)
2907 put_cred(ctx->sq_creds);
2908 if (ctx->submitter_task)
2909 put_task_struct(ctx->submitter_task);
2912 if (ctx->rsrc_node)
2913 io_rsrc_node_destroy(ctx, ctx->rsrc_node);
2915 WARN_ON_ONCE(!list_empty(&ctx->rsrc_ref_list));
2916 WARN_ON_ONCE(!list_empty(&ctx->ltimeout_list));
2918 io_alloc_cache_free(&ctx->rsrc_node_cache, io_rsrc_node_cache_free);
2919 if (ctx->mm_account) {
2920 mmdrop(ctx->mm_account);
2921 ctx->mm_account = NULL;
2923 io_rings_free(ctx);
2924 io_kbuf_mmap_list_free(ctx);
2926 percpu_ref_exit(&ctx->refs);
2927 free_uid(ctx->user);
2928 io_req_caches_free(ctx);
2929 if (ctx->hash_map)
2930 io_wq_put_hash(ctx->hash_map);
2931 io_napi_free(ctx);
2932 kfree(ctx->cancel_table.hbs);
2933 kfree(ctx->cancel_table_locked.hbs);
2934 xa_destroy(&ctx->io_bl_xa);
2935 kfree(ctx);
2940 struct io_ring_ctx *ctx = container_of(cb, struct io_ring_ctx,
2943 mutex_lock(&ctx->uring_lock);
2944 ctx->poll_activated = true;
2945 mutex_unlock(&ctx->uring_lock);
2951 wake_up_all(&ctx->poll_wq);
2952 percpu_ref_put(&ctx->refs);
2955 __cold void io_activate_pollwq(struct io_ring_ctx *ctx)
2957 spin_lock(&ctx->completion_lock);
2959 if (ctx->poll_activated || ctx->poll_wq_task_work.func)
2961 if (WARN_ON_ONCE(!ctx->task_complete))
2963 if (!ctx->submitter_task)
2969 init_task_work(&ctx->poll_wq_task_work, io_activate_pollwq_cb);
2970 percpu_ref_get(&ctx->refs);
2971 if (task_work_add(ctx->submitter_task, &ctx->poll_wq_task_work, TWA_SIGNAL))
2972 percpu_ref_put(&ctx->refs);
2974 spin_unlock(&ctx->completion_lock);
2979 struct io_ring_ctx *ctx = file->private_data;
2982 if (unlikely(!ctx->poll_activated))
2983 io_activate_pollwq(ctx);
2985 poll_wait(file, &ctx->poll_wq, wait);
2991 if (!io_sqring_full(ctx))
2999 * lock(&ctx->uring_lock);
3001 * lock(&ctx->uring_lock);
3008 if (__io_cqring_events_user(ctx) || io_has_work(ctx))
3017 struct io_ring_ctx *ctx;
3033 io_uring_del_tctx_node((unsigned long)work->ctx);
3041 return req->ctx == data;
3046 struct io_ring_ctx *ctx = container_of(work, struct io_ring_ctx, exit_work);
3060 if (test_bit(IO_CHECK_CQ_OVERFLOW_BIT, &ctx->check_cq)) {
3061 mutex_lock(&ctx->uring_lock);
3062 io_cqring_overflow_kill(ctx);
3063 mutex_unlock(&ctx->uring_lock);
3066 if (ctx->flags & IORING_SETUP_DEFER_TASKRUN)
3067 io_move_task_work_from_local(ctx);
3069 while (io_uring_try_cancel_requests(ctx, NULL, true))
3072 if (ctx->sq_data) {
3073 struct io_sq_data *sqd = ctx->sq_data;
3080 io_cancel_ctx_cb, ctx, true);
3084 io_req_caches_free(ctx);
3101 } while (!wait_for_completion_interruptible_timeout(&ctx->ref_comp, interval));
3105 exit.ctx = ctx;
3107 mutex_lock(&ctx->uring_lock);
3108 while (!list_empty(&ctx->tctx_list)) {
3111 node = list_first_entry(&ctx->tctx_list, struct io_tctx_node,
3114 list_rotate_left(&ctx->tctx_list);
3119 mutex_unlock(&ctx->uring_lock);
3126 mutex_lock(&ctx->uring_lock);
3128 mutex_unlock(&ctx->uring_lock);
3129 spin_lock(&ctx->completion_lock);
3130 spin_unlock(&ctx->completion_lock);
3133 if (ctx->flags & IORING_SETUP_DEFER_TASKRUN)
3136 io_ring_ctx_free(ctx);
3139 static __cold void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx)
3144 mutex_lock(&ctx->uring_lock);
3145 percpu_ref_kill(&ctx->refs);
3146 xa_for_each(&ctx->personalities, index, creds)
3147 io_unregister_personality(ctx, index);
3148 if (ctx->rings)
3149 io_poll_remove_all(ctx, NULL, true);
3150 mutex_unlock(&ctx->uring_lock);
3153 * If we failed setting up the ctx, we might not have any rings
3156 if (ctx->rings)
3157 io_kill_timeouts(ctx, NULL, true);
3159 flush_delayed_work(&ctx->fallback_work);
3161 INIT_WORK(&ctx->exit_work, io_ring_exit_work);
3168 queue_work(iou_wq, &ctx->exit_work);
3173 struct io_ring_ctx *ctx = file->private_data;
3176 io_ring_ctx_wait_and_kill(ctx);
3193 static __cold bool io_cancel_defer_files(struct io_ring_ctx *ctx,
3200 spin_lock(&ctx->completion_lock);
3201 list_for_each_entry_reverse(de, &ctx->defer_list, list) {
3203 list_cut_position(&list, &ctx->defer_list, &de->list);
3207 spin_unlock(&ctx->completion_lock);
3220 static __cold bool io_uring_try_cancel_iowq(struct io_ring_ctx *ctx)
3226 mutex_lock(&ctx->uring_lock);
3227 list_for_each_entry(node, &ctx->tctx_list, ctx_node) {
3232 * killed after ctx nodes, which requires to take the lock.
3236 cret = io_wq_cancel_cb(tctx->io_wq, io_cancel_ctx_cb, ctx, true);
3239 mutex_unlock(&ctx->uring_lock);
3244 static bool io_uring_try_cancel_uring_cmd(struct io_ring_ctx *ctx,
3251 lockdep_assert_held(&ctx->uring_lock);
3253 hlist_for_each_entry_safe(req, tmp, &ctx->cancelable_uring_cmd,
3270 io_submit_flush_completions(ctx);
3275 static __cold bool io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
3285 if (ctx->flags & IORING_SETUP_DEFER_TASKRUN) {
3286 atomic_set(&ctx->cq_wait_nr, 1);
3291 if (!ctx->rings)
3295 ret |= io_uring_try_cancel_iowq(ctx);
3298 * Cancels requests of all rings, not only @ctx, but
3307 if ((!(ctx->flags & IORING_SETUP_SQPOLL) && cancel_all) ||
3308 (ctx->sq_data && ctx->sq_data->thread == current)) {
3309 while (!wq_list_empty(&ctx->iopoll_list)) {
3310 io_iopoll_try_reap_events(ctx);
3316 if ((ctx->flags & IORING_SETUP_DEFER_TASKRUN) &&
3317 io_allowed_defer_tw_run(ctx))
3318 ret |= io_run_local_work(ctx, INT_MAX) > 0;
3319 ret |= io_cancel_defer_files(ctx, task, cancel_all);
3320 mutex_lock(&ctx->uring_lock);
3321 ret |= io_poll_remove_all(ctx, task, cancel_all);
3322 ret |= io_waitid_remove_all(ctx, task, cancel_all);
3323 ret |= io_futex_remove_all(ctx, task, cancel_all);
3324 ret |= io_uring_try_cancel_uring_cmd(ctx, task, cancel_all);
3325 mutex_unlock(&ctx->uring_lock);
3326 ret |= io_kill_timeouts(ctx, task, cancel_all);
3340 * Find any io_uring ctx that this task has registered or done IO on, and cancel
3346 struct io_ring_ctx *ctx;
3372 if (node->ctx->sq_data)
3374 loop |= io_uring_try_cancel_requests(node->ctx,
3378 list_for_each_entry(ctx, &sqd->ctx_list, sqd_list)
3379 loop |= io_uring_try_cancel_requests(ctx,
3393 if (!llist_empty(&node->ctx->work_llist)) {
3394 WARN_ON_ONCE(node->ctx->submitter_task &&
3395 node->ctx->submitter_task != current);
3430 struct io_ring_ctx *ctx = file->private_data;
3439 if (ctx->flags & IORING_SETUP_NO_MMAP)
3441 ptr = ctx->rings;
3445 if (ctx->flags & IORING_SETUP_NO_MMAP)
3447 ptr = ctx->sq_sqes;
3454 bl = io_pbuf_get_bl(ctx, bgid);
3458 io_put_bl(ctx, bl);
3607 struct io_ring_ctx *ctx;
3638 ctx = file->private_data;
3640 if (unlikely(ctx->flags & IORING_SETUP_R_DISABLED))
3649 if (ctx->flags & IORING_SETUP_SQPOLL) {
3650 io_cqring_overflow_flush(ctx);
3652 if (unlikely(ctx->sq_data->thread == NULL)) {
3657 wake_up(&ctx->sq_data->wait);
3659 io_sqpoll_wait_sq(ctx);
3663 ret = io_uring_add_tctx_node(ctx);
3667 mutex_lock(&ctx->uring_lock);
3668 ret = io_submit_sqes(ctx, to_submit);
3670 mutex_unlock(&ctx->uring_lock);
3674 if (ctx->syscall_iopoll)
3680 if (ctx->flags & IORING_SETUP_DEFER_TASKRUN)
3681 (void)io_run_local_work_locked(ctx, min_complete);
3683 mutex_unlock(&ctx->uring_lock);
3689 if (ctx->syscall_iopoll) {
3696 mutex_lock(&ctx->uring_lock);
3701 ctx->cq_entries);
3702 ret2 = io_iopoll_check(ctx, min_complete);
3704 mutex_unlock(&ctx->uring_lock);
3712 ctx->cq_entries);
3713 ret2 = io_cqring_wait(ctx, min_complete, sig,
3728 &ctx->check_cq);
3757 static __cold int io_allocate_scq_urings(struct io_ring_ctx *ctx,
3765 ctx->sq_entries = p->sq_entries;
3766 ctx->cq_entries = p->cq_entries;
3768 size = rings_size(ctx, p->sq_entries, p->cq_entries, &sq_array_offset);
3772 if (!(ctx->flags & IORING_SETUP_NO_MMAP))
3775 rings = io_rings_map(ctx, p->cq_off.user_addr, size);
3780 ctx->rings = rings;
3781 if (!(ctx->flags & IORING_SETUP_NO_SQARRAY))
3782 ctx->sq_array = (u32 *)((char *)rings + sq_array_offset);
3793 io_rings_free(ctx);
3797 if (!(ctx->flags & IORING_SETUP_NO_MMAP))
3800 ptr = io_sqes_map(ctx, p->sq_off.user_addr, size);
3803 io_rings_free(ctx);
3807 ctx->sq_sqes = ptr;
3827 static struct file *io_uring_get_file(struct io_ring_ctx *ctx)
3830 return anon_inode_create_getfile("[io_uring]", &io_uring_fops, ctx,
3837 struct io_ring_ctx *ctx;
3883 ctx = io_ring_ctx_alloc(p);
3884 if (!ctx)
3887 if ((ctx->flags & IORING_SETUP_DEFER_TASKRUN) &&
3888 !(ctx->flags & IORING_SETUP_IOPOLL) &&
3889 !(ctx->flags & IORING_SETUP_SQPOLL))
3890 ctx->task_complete = true;
3892 if (ctx->task_complete || (ctx->flags & IORING_SETUP_IOPOLL))
3893 ctx->lockless_cq = true;
3899 if (!ctx->task_complete)
3900 ctx->poll_activated = true;
3908 if (ctx->flags & IORING_SETUP_IOPOLL &&
3909 !(ctx->flags & IORING_SETUP_SQPOLL))
3910 ctx->syscall_iopoll = 1;
3912 ctx->compat = in_compat_syscall();
3914 ctx->user = get_uid(current_user());
3921 if (ctx->flags & IORING_SETUP_SQPOLL) {
3923 if (ctx->flags & (IORING_SETUP_COOP_TASKRUN |
3927 ctx->notify_method = TWA_SIGNAL_NO_IPI;
3928 } else if (ctx->flags & IORING_SETUP_COOP_TASKRUN) {
3929 ctx->notify_method = TWA_SIGNAL_NO_IPI;
3931 if (ctx->flags & IORING_SETUP_TASKRUN_FLAG &&
3932 !(ctx->flags & IORING_SETUP_DEFER_TASKRUN))
3934 ctx->notify_method = TWA_SIGNAL;
3942 if (ctx->flags & IORING_SETUP_DEFER_TASKRUN &&
3943 !(ctx->flags & IORING_SETUP_SINGLE_ISSUER)) {
3954 ctx->mm_account = current->mm;
3956 ret = io_allocate_scq_urings(ctx, p);
3960 ret = io_sq_offload_create(ctx, p);
3964 ret = io_rsrc_init(ctx);
3974 if (!(ctx->flags & IORING_SETUP_NO_SQARRAY))
3975 p->sq_off.array = (char *)ctx->sq_array - (char *)ctx->rings;
3977 if (!(ctx->flags & IORING_SETUP_NO_MMAP))
3988 if (!(ctx->flags & IORING_SETUP_NO_MMAP))
4004 if (ctx->flags & IORING_SETUP_SINGLE_ISSUER
4005 && !(ctx->flags & IORING_SETUP_R_DISABLED))
4006 WRITE_ONCE(ctx->submitter_task, get_task_struct(current));
4008 file = io_uring_get_file(ctx);
4014 ret = __io_uring_add_tctx_node(ctx);
4030 trace_io_uring_create(ret, ctx, p->sq_entries, p->cq_entries, p->flags);
4033 io_ring_ctx_wait_and_kill(ctx);