1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Copyright (C) 2012 Red Hat, Inc.  All rights reserved.
4 *
5 * VFIO container (/dev/vfio/vfio)
6 */
7#include <linux/file.h>
8#include <linux/slab.h>
9#include <linux/fs.h>
10#include <linux/capability.h>
11#include <linux/iommu.h>
12#include <linux/miscdevice.h>
13#include <linux/vfio.h>
14#include <uapi/linux/vfio.h>
15
16#include "vfio.h"
17
18struct vfio_container {
19	struct kref			kref;
20	struct list_head		group_list;
21	struct rw_semaphore		group_lock;
22	struct vfio_iommu_driver	*iommu_driver;
23	void				*iommu_data;
24	bool				noiommu;
25};
26
27static struct vfio {
28	struct list_head		iommu_drivers_list;
29	struct mutex			iommu_drivers_lock;
30} vfio;
31
32static void *vfio_noiommu_open(unsigned long arg)
33{
34	if (arg != VFIO_NOIOMMU_IOMMU)
35		return ERR_PTR(-EINVAL);
36	if (!capable(CAP_SYS_RAWIO))
37		return ERR_PTR(-EPERM);
38
39	return NULL;
40}
41
42static void vfio_noiommu_release(void *iommu_data)
43{
44}
45
46static long vfio_noiommu_ioctl(void *iommu_data,
47			       unsigned int cmd, unsigned long arg)
48{
49	if (cmd == VFIO_CHECK_EXTENSION)
50		return vfio_noiommu && (arg == VFIO_NOIOMMU_IOMMU) ? 1 : 0;
51
52	return -ENOTTY;
53}
54
55static int vfio_noiommu_attach_group(void *iommu_data,
56		struct iommu_group *iommu_group, enum vfio_group_type type)
57{
58	return 0;
59}
60
61static void vfio_noiommu_detach_group(void *iommu_data,
62				      struct iommu_group *iommu_group)
63{
64}
65
66static const struct vfio_iommu_driver_ops vfio_noiommu_ops = {
67	.name = "vfio-noiommu",
68	.owner = THIS_MODULE,
69	.open = vfio_noiommu_open,
70	.release = vfio_noiommu_release,
71	.ioctl = vfio_noiommu_ioctl,
72	.attach_group = vfio_noiommu_attach_group,
73	.detach_group = vfio_noiommu_detach_group,
74};
75
76/*
77 * Only noiommu containers can use vfio-noiommu and noiommu containers can only
78 * use vfio-noiommu.
79 */
80static bool vfio_iommu_driver_allowed(struct vfio_container *container,
81				      const struct vfio_iommu_driver *driver)
82{
83	if (!IS_ENABLED(CONFIG_VFIO_NOIOMMU))
84		return true;
85	return container->noiommu == (driver->ops == &vfio_noiommu_ops);
86}
87
88/*
89 * IOMMU driver registration
90 */
91int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops)
92{
93	struct vfio_iommu_driver *driver, *tmp;
94
95	if (WARN_ON(!ops->register_device != !ops->unregister_device))
96		return -EINVAL;
97
98	driver = kzalloc(sizeof(*driver), GFP_KERNEL);
99	if (!driver)
100		return -ENOMEM;
101
102	driver->ops = ops;
103
104	mutex_lock(&vfio.iommu_drivers_lock);
105
106	/* Check for duplicates */
107	list_for_each_entry(tmp, &vfio.iommu_drivers_list, vfio_next) {
108		if (tmp->ops == ops) {
109			mutex_unlock(&vfio.iommu_drivers_lock);
110			kfree(driver);
111			return -EINVAL;
112		}
113	}
114
115	list_add(&driver->vfio_next, &vfio.iommu_drivers_list);
116
117	mutex_unlock(&vfio.iommu_drivers_lock);
118
119	return 0;
120}
121EXPORT_SYMBOL_GPL(vfio_register_iommu_driver);
122
123void vfio_unregister_iommu_driver(const struct vfio_iommu_driver_ops *ops)
124{
125	struct vfio_iommu_driver *driver;
126
127	mutex_lock(&vfio.iommu_drivers_lock);
128	list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) {
129		if (driver->ops == ops) {
130			list_del(&driver->vfio_next);
131			mutex_unlock(&vfio.iommu_drivers_lock);
132			kfree(driver);
133			return;
134		}
135	}
136	mutex_unlock(&vfio.iommu_drivers_lock);
137}
138EXPORT_SYMBOL_GPL(vfio_unregister_iommu_driver);
139
140/*
141 * Container objects - containers are created when /dev/vfio/vfio is
142 * opened, but their lifecycle extends until the last user is done, so
143 * it's freed via kref.  Must support container/group/device being
144 * closed in any order.
145 */
146static void vfio_container_release(struct kref *kref)
147{
148	struct vfio_container *container;
149	container = container_of(kref, struct vfio_container, kref);
150
151	kfree(container);
152}
153
154static void vfio_container_get(struct vfio_container *container)
155{
156	kref_get(&container->kref);
157}
158
159static void vfio_container_put(struct vfio_container *container)
160{
161	kref_put(&container->kref, vfio_container_release);
162}
163
164void vfio_device_container_register(struct vfio_device *device)
165{
166	struct vfio_iommu_driver *iommu_driver =
167		device->group->container->iommu_driver;
168
169	if (iommu_driver && iommu_driver->ops->register_device)
170		iommu_driver->ops->register_device(
171			device->group->container->iommu_data, device);
172}
173
174void vfio_device_container_unregister(struct vfio_device *device)
175{
176	struct vfio_iommu_driver *iommu_driver =
177		device->group->container->iommu_driver;
178
179	if (iommu_driver && iommu_driver->ops->unregister_device)
180		iommu_driver->ops->unregister_device(
181			device->group->container->iommu_data, device);
182}
183
184static long
185vfio_container_ioctl_check_extension(struct vfio_container *container,
186				     unsigned long arg)
187{
188	struct vfio_iommu_driver *driver;
189	long ret = 0;
190
191	down_read(&container->group_lock);
192
193	driver = container->iommu_driver;
194
195	switch (arg) {
196		/* No base extensions yet */
197	default:
198		/*
199		 * If no driver is set, poll all registered drivers for
200		 * extensions and return the first positive result.  If
201		 * a driver is already set, further queries will be passed
202		 * only to that driver.
203		 */
204		if (!driver) {
205			mutex_lock(&vfio.iommu_drivers_lock);
206			list_for_each_entry(driver, &vfio.iommu_drivers_list,
207					    vfio_next) {
208
209				if (!list_empty(&container->group_list) &&
210				    !vfio_iommu_driver_allowed(container,
211							       driver))
212					continue;
213				if (!try_module_get(driver->ops->owner))
214					continue;
215
216				ret = driver->ops->ioctl(NULL,
217							 VFIO_CHECK_EXTENSION,
218							 arg);
219				module_put(driver->ops->owner);
220				if (ret > 0)
221					break;
222			}
223			mutex_unlock(&vfio.iommu_drivers_lock);
224		} else
225			ret = driver->ops->ioctl(container->iommu_data,
226						 VFIO_CHECK_EXTENSION, arg);
227	}
228
229	up_read(&container->group_lock);
230
231	return ret;
232}
233
234/* hold write lock on container->group_lock */
235static int __vfio_container_attach_groups(struct vfio_container *container,
236					  struct vfio_iommu_driver *driver,
237					  void *data)
238{
239	struct vfio_group *group;
240	int ret = -ENODEV;
241
242	list_for_each_entry(group, &container->group_list, container_next) {
243		ret = driver->ops->attach_group(data, group->iommu_group,
244						group->type);
245		if (ret)
246			goto unwind;
247	}
248
249	return ret;
250
251unwind:
252	list_for_each_entry_continue_reverse(group, &container->group_list,
253					     container_next) {
254		driver->ops->detach_group(data, group->iommu_group);
255	}
256
257	return ret;
258}
259
260static long vfio_ioctl_set_iommu(struct vfio_container *container,
261				 unsigned long arg)
262{
263	struct vfio_iommu_driver *driver;
264	long ret = -ENODEV;
265
266	down_write(&container->group_lock);
267
268	/*
269	 * The container is designed to be an unprivileged interface while
270	 * the group can be assigned to specific users.  Therefore, only by
271	 * adding a group to a container does the user get the privilege of
272	 * enabling the iommu, which may allocate finite resources.  There
273	 * is no unset_iommu, but by removing all the groups from a container,
274	 * the container is deprivileged and returns to an unset state.
275	 */
276	if (list_empty(&container->group_list) || container->iommu_driver) {
277		up_write(&container->group_lock);
278		return -EINVAL;
279	}
280
281	mutex_lock(&vfio.iommu_drivers_lock);
282	list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) {
283		void *data;
284
285		if (!vfio_iommu_driver_allowed(container, driver))
286			continue;
287		if (!try_module_get(driver->ops->owner))
288			continue;
289
290		/*
291		 * The arg magic for SET_IOMMU is the same as CHECK_EXTENSION,
292		 * so test which iommu driver reported support for this
293		 * extension and call open on them.  We also pass them the
294		 * magic, allowing a single driver to support multiple
295		 * interfaces if they'd like.
296		 */
297		if (driver->ops->ioctl(NULL, VFIO_CHECK_EXTENSION, arg) <= 0) {
298			module_put(driver->ops->owner);
299			continue;
300		}
301
302		data = driver->ops->open(arg);
303		if (IS_ERR(data)) {
304			ret = PTR_ERR(data);
305			module_put(driver->ops->owner);
306			continue;
307		}
308
309		ret = __vfio_container_attach_groups(container, driver, data);
310		if (ret) {
311			driver->ops->release(data);
312			module_put(driver->ops->owner);
313			continue;
314		}
315
316		container->iommu_driver = driver;
317		container->iommu_data = data;
318		break;
319	}
320
321	mutex_unlock(&vfio.iommu_drivers_lock);
322	up_write(&container->group_lock);
323
324	return ret;
325}
326
327static long vfio_fops_unl_ioctl(struct file *filep,
328				unsigned int cmd, unsigned long arg)
329{
330	struct vfio_container *container = filep->private_data;
331	struct vfio_iommu_driver *driver;
332	void *data;
333	long ret = -EINVAL;
334
335	if (!container)
336		return ret;
337
338	switch (cmd) {
339	case VFIO_GET_API_VERSION:
340		ret = VFIO_API_VERSION;
341		break;
342	case VFIO_CHECK_EXTENSION:
343		ret = vfio_container_ioctl_check_extension(container, arg);
344		break;
345	case VFIO_SET_IOMMU:
346		ret = vfio_ioctl_set_iommu(container, arg);
347		break;
348	default:
349		driver = container->iommu_driver;
350		data = container->iommu_data;
351
352		if (driver) /* passthrough all unrecognized ioctls */
353			ret = driver->ops->ioctl(data, cmd, arg);
354	}
355
356	return ret;
357}
358
359static int vfio_fops_open(struct inode *inode, struct file *filep)
360{
361	struct vfio_container *container;
362
363	container = kzalloc(sizeof(*container), GFP_KERNEL_ACCOUNT);
364	if (!container)
365		return -ENOMEM;
366
367	INIT_LIST_HEAD(&container->group_list);
368	init_rwsem(&container->group_lock);
369	kref_init(&container->kref);
370
371	filep->private_data = container;
372
373	return 0;
374}
375
376static int vfio_fops_release(struct inode *inode, struct file *filep)
377{
378	struct vfio_container *container = filep->private_data;
379
380	filep->private_data = NULL;
381
382	vfio_container_put(container);
383
384	return 0;
385}
386
387static const struct file_operations vfio_fops = {
388	.owner		= THIS_MODULE,
389	.open		= vfio_fops_open,
390	.release	= vfio_fops_release,
391	.unlocked_ioctl	= vfio_fops_unl_ioctl,
392	.compat_ioctl	= compat_ptr_ioctl,
393};
394
395struct vfio_container *vfio_container_from_file(struct file *file)
396{
397	struct vfio_container *container;
398
399	/* Sanity check, is this really our fd? */
400	if (file->f_op != &vfio_fops)
401		return NULL;
402
403	container = file->private_data;
404	WARN_ON(!container); /* fget ensures we don't race vfio_release */
405	return container;
406}
407
408static struct miscdevice vfio_dev = {
409	.minor = VFIO_MINOR,
410	.name = "vfio",
411	.fops = &vfio_fops,
412	.nodename = "vfio/vfio",
413	.mode = S_IRUGO | S_IWUGO,
414};
415
416int vfio_container_attach_group(struct vfio_container *container,
417				struct vfio_group *group)
418{
419	struct vfio_iommu_driver *driver;
420	int ret = 0;
421
422	lockdep_assert_held(&group->group_lock);
423
424	if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO))
425		return -EPERM;
426
427	down_write(&container->group_lock);
428
429	/* Real groups and fake groups cannot mix */
430	if (!list_empty(&container->group_list) &&
431	    container->noiommu != (group->type == VFIO_NO_IOMMU)) {
432		ret = -EPERM;
433		goto out_unlock_container;
434	}
435
436	if (group->type == VFIO_IOMMU) {
437		ret = iommu_group_claim_dma_owner(group->iommu_group, group);
438		if (ret)
439			goto out_unlock_container;
440	}
441
442	driver = container->iommu_driver;
443	if (driver) {
444		ret = driver->ops->attach_group(container->iommu_data,
445						group->iommu_group,
446						group->type);
447		if (ret) {
448			if (group->type == VFIO_IOMMU)
449				iommu_group_release_dma_owner(
450					group->iommu_group);
451			goto out_unlock_container;
452		}
453	}
454
455	group->container = container;
456	group->container_users = 1;
457	container->noiommu = (group->type == VFIO_NO_IOMMU);
458	list_add(&group->container_next, &container->group_list);
459
460	/* Get a reference on the container and mark a user within the group */
461	vfio_container_get(container);
462
463out_unlock_container:
464	up_write(&container->group_lock);
465	return ret;
466}
467
468void vfio_group_detach_container(struct vfio_group *group)
469{
470	struct vfio_container *container = group->container;
471	struct vfio_iommu_driver *driver;
472
473	lockdep_assert_held(&group->group_lock);
474	WARN_ON(group->container_users != 1);
475
476	down_write(&container->group_lock);
477
478	driver = container->iommu_driver;
479	if (driver)
480		driver->ops->detach_group(container->iommu_data,
481					  group->iommu_group);
482
483	if (group->type == VFIO_IOMMU)
484		iommu_group_release_dma_owner(group->iommu_group);
485
486	group->container = NULL;
487	group->container_users = 0;
488	list_del(&group->container_next);
489
490	/* Detaching the last group deprivileges a container, remove iommu */
491	if (driver && list_empty(&container->group_list)) {
492		driver->ops->release(container->iommu_data);
493		module_put(driver->ops->owner);
494		container->iommu_driver = NULL;
495		container->iommu_data = NULL;
496	}
497
498	up_write(&container->group_lock);
499
500	vfio_container_put(container);
501}
502
503int vfio_group_use_container(struct vfio_group *group)
504{
505	lockdep_assert_held(&group->group_lock);
506
507	/*
508	 * The container fd has been assigned with VFIO_GROUP_SET_CONTAINER but
509	 * VFIO_SET_IOMMU hasn't been done yet.
510	 */
511	if (!group->container->iommu_driver)
512		return -EINVAL;
513
514	if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO))
515		return -EPERM;
516
517	get_file(group->opened_file);
518	group->container_users++;
519	return 0;
520}
521
522void vfio_group_unuse_container(struct vfio_group *group)
523{
524	lockdep_assert_held(&group->group_lock);
525
526	WARN_ON(group->container_users <= 1);
527	group->container_users--;
528	fput(group->opened_file);
529}
530
531int vfio_device_container_pin_pages(struct vfio_device *device,
532				    dma_addr_t iova, int npage,
533				    int prot, struct page **pages)
534{
535	struct vfio_container *container = device->group->container;
536	struct iommu_group *iommu_group = device->group->iommu_group;
537	struct vfio_iommu_driver *driver = container->iommu_driver;
538
539	if (npage > VFIO_PIN_PAGES_MAX_ENTRIES)
540		return -E2BIG;
541
542	if (unlikely(!driver || !driver->ops->pin_pages))
543		return -ENOTTY;
544	return driver->ops->pin_pages(container->iommu_data, iommu_group, iova,
545				      npage, prot, pages);
546}
547
548void vfio_device_container_unpin_pages(struct vfio_device *device,
549				       dma_addr_t iova, int npage)
550{
551	struct vfio_container *container = device->group->container;
552
553	if (WARN_ON(npage <= 0 || npage > VFIO_PIN_PAGES_MAX_ENTRIES))
554		return;
555
556	container->iommu_driver->ops->unpin_pages(container->iommu_data, iova,
557						  npage);
558}
559
560int vfio_device_container_dma_rw(struct vfio_device *device,
561				 dma_addr_t iova, void *data,
562				 size_t len, bool write)
563{
564	struct vfio_container *container = device->group->container;
565	struct vfio_iommu_driver *driver = container->iommu_driver;
566
567	if (unlikely(!driver || !driver->ops->dma_rw))
568		return -ENOTTY;
569	return driver->ops->dma_rw(container->iommu_data, iova, data, len,
570				   write);
571}
572
573int __init vfio_container_init(void)
574{
575	int ret;
576
577	mutex_init(&vfio.iommu_drivers_lock);
578	INIT_LIST_HEAD(&vfio.iommu_drivers_list);
579
580	ret = misc_register(&vfio_dev);
581	if (ret) {
582		pr_err("vfio: misc device register failed\n");
583		return ret;
584	}
585
586	if (IS_ENABLED(CONFIG_VFIO_NOIOMMU)) {
587		ret = vfio_register_iommu_driver(&vfio_noiommu_ops);
588		if (ret)
589			goto err_misc;
590	}
591	return 0;
592
593err_misc:
594	misc_deregister(&vfio_dev);
595	return ret;
596}
597
598void vfio_container_cleanup(void)
599{
600	if (IS_ENABLED(CONFIG_VFIO_NOIOMMU))
601		vfio_unregister_iommu_driver(&vfio_noiommu_ops);
602	misc_deregister(&vfio_dev);
603	mutex_destroy(&vfio.iommu_drivers_lock);
604}
605
606MODULE_ALIAS_MISCDEV(VFIO_MINOR);
607MODULE_ALIAS("devname:vfio/vfio");
608