1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * VFIO generic eventfd code for IRQFD support.
4 * Derived from drivers/vfio/pci/vfio_pci_intrs.c
5 *
6 * Copyright (C) 2012 Red Hat, Inc.  All rights reserved.
7 *     Author: Alex Williamson <alex.williamson@redhat.com>
8 */
9
10#include <linux/vfio.h>
11#include <linux/eventfd.h>
12#include <linux/file.h>
13#include <linux/module.h>
14#include <linux/slab.h>
15#include "vfio.h"
16
17static struct workqueue_struct *vfio_irqfd_cleanup_wq;
18static DEFINE_SPINLOCK(virqfd_lock);
19
20int __init vfio_virqfd_init(void)
21{
22	vfio_irqfd_cleanup_wq =
23		create_singlethread_workqueue("vfio-irqfd-cleanup");
24	if (!vfio_irqfd_cleanup_wq)
25		return -ENOMEM;
26
27	return 0;
28}
29
30void vfio_virqfd_exit(void)
31{
32	destroy_workqueue(vfio_irqfd_cleanup_wq);
33}
34
35static void virqfd_deactivate(struct virqfd *virqfd)
36{
37	queue_work(vfio_irqfd_cleanup_wq, &virqfd->shutdown);
38}
39
40static int virqfd_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
41{
42	struct virqfd *virqfd = container_of(wait, struct virqfd, wait);
43	__poll_t flags = key_to_poll(key);
44
45	if (flags & EPOLLIN) {
46		u64 cnt;
47		eventfd_ctx_do_read(virqfd->eventfd, &cnt);
48
49		/* An event has been signaled, call function */
50		if ((!virqfd->handler ||
51		     virqfd->handler(virqfd->opaque, virqfd->data)) &&
52		    virqfd->thread)
53			schedule_work(&virqfd->inject);
54	}
55
56	if (flags & EPOLLHUP) {
57		unsigned long flags;
58		spin_lock_irqsave(&virqfd_lock, flags);
59
60		/*
61		 * The eventfd is closing, if the virqfd has not yet been
62		 * queued for release, as determined by testing whether the
63		 * virqfd pointer to it is still valid, queue it now.  As
64		 * with kvm irqfds, we know we won't race against the virqfd
65		 * going away because we hold the lock to get here.
66		 */
67		if (*(virqfd->pvirqfd) == virqfd) {
68			*(virqfd->pvirqfd) = NULL;
69			virqfd_deactivate(virqfd);
70		}
71
72		spin_unlock_irqrestore(&virqfd_lock, flags);
73	}
74
75	return 0;
76}
77
78static void virqfd_ptable_queue_proc(struct file *file,
79				     wait_queue_head_t *wqh, poll_table *pt)
80{
81	struct virqfd *virqfd = container_of(pt, struct virqfd, pt);
82	add_wait_queue(wqh, &virqfd->wait);
83}
84
85static void virqfd_shutdown(struct work_struct *work)
86{
87	struct virqfd *virqfd = container_of(work, struct virqfd, shutdown);
88	u64 cnt;
89
90	eventfd_ctx_remove_wait_queue(virqfd->eventfd, &virqfd->wait, &cnt);
91	flush_work(&virqfd->inject);
92	eventfd_ctx_put(virqfd->eventfd);
93
94	kfree(virqfd);
95}
96
97static void virqfd_inject(struct work_struct *work)
98{
99	struct virqfd *virqfd = container_of(work, struct virqfd, inject);
100	if (virqfd->thread)
101		virqfd->thread(virqfd->opaque, virqfd->data);
102}
103
104static void virqfd_flush_inject(struct work_struct *work)
105{
106	struct virqfd *virqfd = container_of(work, struct virqfd, flush_inject);
107
108	flush_work(&virqfd->inject);
109}
110
111int vfio_virqfd_enable(void *opaque,
112		       int (*handler)(void *, void *),
113		       void (*thread)(void *, void *),
114		       void *data, struct virqfd **pvirqfd, int fd)
115{
116	struct fd irqfd;
117	struct eventfd_ctx *ctx;
118	struct virqfd *virqfd;
119	int ret = 0;
120	__poll_t events;
121
122	virqfd = kzalloc(sizeof(*virqfd), GFP_KERNEL_ACCOUNT);
123	if (!virqfd)
124		return -ENOMEM;
125
126	virqfd->pvirqfd = pvirqfd;
127	virqfd->opaque = opaque;
128	virqfd->handler = handler;
129	virqfd->thread = thread;
130	virqfd->data = data;
131
132	INIT_WORK(&virqfd->shutdown, virqfd_shutdown);
133	INIT_WORK(&virqfd->inject, virqfd_inject);
134	INIT_WORK(&virqfd->flush_inject, virqfd_flush_inject);
135
136	irqfd = fdget(fd);
137	if (!irqfd.file) {
138		ret = -EBADF;
139		goto err_fd;
140	}
141
142	ctx = eventfd_ctx_fileget(irqfd.file);
143	if (IS_ERR(ctx)) {
144		ret = PTR_ERR(ctx);
145		goto err_ctx;
146	}
147
148	virqfd->eventfd = ctx;
149
150	/*
151	 * virqfds can be released by closing the eventfd or directly
152	 * through ioctl.  These are both done through a workqueue, so
153	 * we update the pointer to the virqfd under lock to avoid
154	 * pushing multiple jobs to release the same virqfd.
155	 */
156	spin_lock_irq(&virqfd_lock);
157
158	if (*pvirqfd) {
159		spin_unlock_irq(&virqfd_lock);
160		ret = -EBUSY;
161		goto err_busy;
162	}
163	*pvirqfd = virqfd;
164
165	spin_unlock_irq(&virqfd_lock);
166
167	/*
168	 * Install our own custom wake-up handling so we are notified via
169	 * a callback whenever someone signals the underlying eventfd.
170	 */
171	init_waitqueue_func_entry(&virqfd->wait, virqfd_wakeup);
172	init_poll_funcptr(&virqfd->pt, virqfd_ptable_queue_proc);
173
174	events = vfs_poll(irqfd.file, &virqfd->pt);
175
176	/*
177	 * Check if there was an event already pending on the eventfd
178	 * before we registered and trigger it as if we didn't miss it.
179	 */
180	if (events & EPOLLIN) {
181		if ((!handler || handler(opaque, data)) && thread)
182			schedule_work(&virqfd->inject);
183	}
184
185	/*
186	 * Do not drop the file until the irqfd is fully initialized,
187	 * otherwise we might race against the EPOLLHUP.
188	 */
189	fdput(irqfd);
190
191	return 0;
192err_busy:
193	eventfd_ctx_put(ctx);
194err_ctx:
195	fdput(irqfd);
196err_fd:
197	kfree(virqfd);
198
199	return ret;
200}
201EXPORT_SYMBOL_GPL(vfio_virqfd_enable);
202
203void vfio_virqfd_disable(struct virqfd **pvirqfd)
204{
205	unsigned long flags;
206
207	spin_lock_irqsave(&virqfd_lock, flags);
208
209	if (*pvirqfd) {
210		virqfd_deactivate(*pvirqfd);
211		*pvirqfd = NULL;
212	}
213
214	spin_unlock_irqrestore(&virqfd_lock, flags);
215
216	/*
217	 * Block until we know all outstanding shutdown jobs have completed.
218	 * Even if we don't queue the job, flush the wq to be sure it's
219	 * been released.
220	 */
221	flush_workqueue(vfio_irqfd_cleanup_wq);
222}
223EXPORT_SYMBOL_GPL(vfio_virqfd_disable);
224
225void vfio_virqfd_flush_thread(struct virqfd **pvirqfd)
226{
227	unsigned long flags;
228
229	spin_lock_irqsave(&virqfd_lock, flags);
230	if (*pvirqfd && (*pvirqfd)->thread)
231		queue_work(vfio_irqfd_cleanup_wq, &(*pvirqfd)->flush_inject);
232	spin_unlock_irqrestore(&virqfd_lock, flags);
233
234	flush_workqueue(vfio_irqfd_cleanup_wq);
235}
236EXPORT_SYMBOL_GPL(vfio_virqfd_flush_thread);
237