1/* SPDX-License-Identifier: GPL-2.0-only */
2/* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES
3 */
4#ifndef __IOMMUFD_PRIVATE_H
5#define __IOMMUFD_PRIVATE_H
6
7#include <linux/rwsem.h>
8#include <linux/xarray.h>
9#include <linux/refcount.h>
10#include <linux/uaccess.h>
11#include <linux/iommu.h>
12#include <linux/iova_bitmap.h>
13#include <uapi/linux/iommufd.h>
14
15struct iommu_domain;
16struct iommu_group;
17struct iommu_option;
18struct iommufd_device;
19
20struct iommufd_ctx {
21	struct file *file;
22	struct xarray objects;
23	struct xarray groups;
24	wait_queue_head_t destroy_wait;
25
26	u8 account_mode;
27	/* Compatibility with VFIO no iommu */
28	u8 no_iommu_mode;
29	struct iommufd_ioas *vfio_ioas;
30};
31
32/*
33 * The IOVA to PFN map. The map automatically copies the PFNs into multiple
34 * domains and permits sharing of PFNs between io_pagetable instances. This
35 * supports both a design where IOAS's are 1:1 with a domain (eg because the
36 * domain is HW customized), or where the IOAS is 1:N with multiple generic
37 * domains.  The io_pagetable holds an interval tree of iopt_areas which point
38 * to shared iopt_pages which hold the pfns mapped to the page table.
39 *
40 * The locking order is domains_rwsem -> iova_rwsem -> pages::mutex
41 */
42struct io_pagetable {
43	struct rw_semaphore domains_rwsem;
44	struct xarray domains;
45	struct xarray access_list;
46	unsigned int next_domain_id;
47
48	struct rw_semaphore iova_rwsem;
49	struct rb_root_cached area_itree;
50	/* IOVA that cannot become reserved, struct iopt_allowed */
51	struct rb_root_cached allowed_itree;
52	/* IOVA that cannot be allocated, struct iopt_reserved */
53	struct rb_root_cached reserved_itree;
54	u8 disable_large_pages;
55	unsigned long iova_alignment;
56};
57
58void iopt_init_table(struct io_pagetable *iopt);
59void iopt_destroy_table(struct io_pagetable *iopt);
60int iopt_get_pages(struct io_pagetable *iopt, unsigned long iova,
61		   unsigned long length, struct list_head *pages_list);
62void iopt_free_pages_list(struct list_head *pages_list);
63enum {
64	IOPT_ALLOC_IOVA = 1 << 0,
65};
66int iopt_map_user_pages(struct iommufd_ctx *ictx, struct io_pagetable *iopt,
67			unsigned long *iova, void __user *uptr,
68			unsigned long length, int iommu_prot,
69			unsigned int flags);
70int iopt_map_pages(struct io_pagetable *iopt, struct list_head *pages_list,
71		   unsigned long length, unsigned long *dst_iova,
72		   int iommu_prot, unsigned int flags);
73int iopt_unmap_iova(struct io_pagetable *iopt, unsigned long iova,
74		    unsigned long length, unsigned long *unmapped);
75int iopt_unmap_all(struct io_pagetable *iopt, unsigned long *unmapped);
76
77int iopt_read_and_clear_dirty_data(struct io_pagetable *iopt,
78				   struct iommu_domain *domain,
79				   unsigned long flags,
80				   struct iommu_hwpt_get_dirty_bitmap *bitmap);
81int iopt_set_dirty_tracking(struct io_pagetable *iopt,
82			    struct iommu_domain *domain, bool enable);
83
84void iommufd_access_notify_unmap(struct io_pagetable *iopt, unsigned long iova,
85				 unsigned long length);
86int iopt_table_add_domain(struct io_pagetable *iopt,
87			  struct iommu_domain *domain);
88void iopt_table_remove_domain(struct io_pagetable *iopt,
89			      struct iommu_domain *domain);
90int iopt_table_enforce_dev_resv_regions(struct io_pagetable *iopt,
91					struct device *dev,
92					phys_addr_t *sw_msi_start);
93int iopt_set_allow_iova(struct io_pagetable *iopt,
94			struct rb_root_cached *allowed_iova);
95int iopt_reserve_iova(struct io_pagetable *iopt, unsigned long start,
96		      unsigned long last, void *owner);
97void iopt_remove_reserved_iova(struct io_pagetable *iopt, void *owner);
98int iopt_cut_iova(struct io_pagetable *iopt, unsigned long *iovas,
99		  size_t num_iovas);
100void iopt_enable_large_pages(struct io_pagetable *iopt);
101int iopt_disable_large_pages(struct io_pagetable *iopt);
102
103struct iommufd_ucmd {
104	struct iommufd_ctx *ictx;
105	void __user *ubuffer;
106	u32 user_size;
107	void *cmd;
108};
109
110int iommufd_vfio_ioctl(struct iommufd_ctx *ictx, unsigned int cmd,
111		       unsigned long arg);
112
113/* Copy the response in ucmd->cmd back to userspace. */
114static inline int iommufd_ucmd_respond(struct iommufd_ucmd *ucmd,
115				       size_t cmd_len)
116{
117	if (copy_to_user(ucmd->ubuffer, ucmd->cmd,
118			 min_t(size_t, ucmd->user_size, cmd_len)))
119		return -EFAULT;
120	return 0;
121}
122
123enum iommufd_object_type {
124	IOMMUFD_OBJ_NONE,
125	IOMMUFD_OBJ_ANY = IOMMUFD_OBJ_NONE,
126	IOMMUFD_OBJ_DEVICE,
127	IOMMUFD_OBJ_HWPT_PAGING,
128	IOMMUFD_OBJ_HWPT_NESTED,
129	IOMMUFD_OBJ_IOAS,
130	IOMMUFD_OBJ_ACCESS,
131#ifdef CONFIG_IOMMUFD_TEST
132	IOMMUFD_OBJ_SELFTEST,
133#endif
134	IOMMUFD_OBJ_MAX,
135};
136
137/* Base struct for all objects with a userspace ID handle. */
138struct iommufd_object {
139	refcount_t shortterm_users;
140	refcount_t users;
141	enum iommufd_object_type type;
142	unsigned int id;
143};
144
145static inline bool iommufd_lock_obj(struct iommufd_object *obj)
146{
147	if (!refcount_inc_not_zero(&obj->users))
148		return false;
149	if (!refcount_inc_not_zero(&obj->shortterm_users)) {
150		/*
151		 * If the caller doesn't already have a ref on obj this must be
152		 * called under the xa_lock. Otherwise the caller is holding a
153		 * ref on users. Thus it cannot be one before this decrement.
154		 */
155		refcount_dec(&obj->users);
156		return false;
157	}
158	return true;
159}
160
161struct iommufd_object *iommufd_get_object(struct iommufd_ctx *ictx, u32 id,
162					  enum iommufd_object_type type);
163static inline void iommufd_put_object(struct iommufd_ctx *ictx,
164				      struct iommufd_object *obj)
165{
166	/*
167	 * Users first, then shortterm so that REMOVE_WAIT_SHORTTERM never sees
168	 * a spurious !0 users with a 0 shortterm_users.
169	 */
170	refcount_dec(&obj->users);
171	if (refcount_dec_and_test(&obj->shortterm_users))
172		wake_up_interruptible_all(&ictx->destroy_wait);
173}
174
175void iommufd_object_abort(struct iommufd_ctx *ictx, struct iommufd_object *obj);
176void iommufd_object_abort_and_destroy(struct iommufd_ctx *ictx,
177				      struct iommufd_object *obj);
178void iommufd_object_finalize(struct iommufd_ctx *ictx,
179			     struct iommufd_object *obj);
180
181enum {
182	REMOVE_WAIT_SHORTTERM = 1,
183};
184int iommufd_object_remove(struct iommufd_ctx *ictx,
185			  struct iommufd_object *to_destroy, u32 id,
186			  unsigned int flags);
187
188/*
189 * The caller holds a users refcount and wants to destroy the object. At this
190 * point the caller has no shortterm_users reference and at least the xarray
191 * will be holding one.
192 */
193static inline void iommufd_object_destroy_user(struct iommufd_ctx *ictx,
194					       struct iommufd_object *obj)
195{
196	int ret;
197
198	ret = iommufd_object_remove(ictx, obj, obj->id, REMOVE_WAIT_SHORTTERM);
199
200	/*
201	 * If there is a bug and we couldn't destroy the object then we did put
202	 * back the caller's users refcount and will eventually try to free it
203	 * again during close.
204	 */
205	WARN_ON(ret);
206}
207
208/*
209 * The HWPT allocated by autodomains is used in possibly many devices and
210 * is automatically destroyed when its refcount reaches zero.
211 *
212 * If userspace uses the HWPT manually, even for a short term, then it will
213 * disrupt this refcounting and the auto-free in the kernel will not work.
214 * Userspace that tries to use the automatically allocated HWPT must be careful
215 * to ensure that it is consistently destroyed, eg by not racing accesses
216 * and by not attaching an automatic HWPT to a device manually.
217 */
218static inline void
219iommufd_object_put_and_try_destroy(struct iommufd_ctx *ictx,
220				   struct iommufd_object *obj)
221{
222	iommufd_object_remove(ictx, obj, obj->id, 0);
223}
224
225struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx,
226					     size_t size,
227					     enum iommufd_object_type type);
228
229#define __iommufd_object_alloc(ictx, ptr, type, obj)                           \
230	container_of(_iommufd_object_alloc(                                    \
231			     ictx,                                             \
232			     sizeof(*(ptr)) + BUILD_BUG_ON_ZERO(               \
233						      offsetof(typeof(*(ptr)), \
234							       obj) != 0),     \
235			     type),                                            \
236		     typeof(*(ptr)), obj)
237
238#define iommufd_object_alloc(ictx, ptr, type) \
239	__iommufd_object_alloc(ictx, ptr, type, obj)
240
241/*
242 * The IO Address Space (IOAS) pagetable is a virtual page table backed by the
243 * io_pagetable object. It is a user controlled mapping of IOVA -> PFNs. The
244 * mapping is copied into all of the associated domains and made available to
245 * in-kernel users.
246 *
247 * Every iommu_domain that is created is wrapped in a iommufd_hw_pagetable
248 * object. When we go to attach a device to an IOAS we need to get an
249 * iommu_domain and wrapping iommufd_hw_pagetable for it.
250 *
251 * An iommu_domain & iommfd_hw_pagetable will be automatically selected
252 * for a device based on the hwpt_list. If no suitable iommu_domain
253 * is found a new iommu_domain will be created.
254 */
255struct iommufd_ioas {
256	struct iommufd_object obj;
257	struct io_pagetable iopt;
258	struct mutex mutex;
259	struct list_head hwpt_list;
260};
261
262static inline struct iommufd_ioas *iommufd_get_ioas(struct iommufd_ctx *ictx,
263						    u32 id)
264{
265	return container_of(iommufd_get_object(ictx, id,
266					       IOMMUFD_OBJ_IOAS),
267			    struct iommufd_ioas, obj);
268}
269
270struct iommufd_ioas *iommufd_ioas_alloc(struct iommufd_ctx *ictx);
271int iommufd_ioas_alloc_ioctl(struct iommufd_ucmd *ucmd);
272void iommufd_ioas_destroy(struct iommufd_object *obj);
273int iommufd_ioas_iova_ranges(struct iommufd_ucmd *ucmd);
274int iommufd_ioas_allow_iovas(struct iommufd_ucmd *ucmd);
275int iommufd_ioas_map(struct iommufd_ucmd *ucmd);
276int iommufd_ioas_copy(struct iommufd_ucmd *ucmd);
277int iommufd_ioas_unmap(struct iommufd_ucmd *ucmd);
278int iommufd_ioas_option(struct iommufd_ucmd *ucmd);
279int iommufd_option_rlimit_mode(struct iommu_option *cmd,
280			       struct iommufd_ctx *ictx);
281
282int iommufd_vfio_ioas(struct iommufd_ucmd *ucmd);
283int iommufd_check_iova_range(struct io_pagetable *iopt,
284			     struct iommu_hwpt_get_dirty_bitmap *bitmap);
285
286/*
287 * A HW pagetable is called an iommu_domain inside the kernel. This user object
288 * allows directly creating and inspecting the domains. Domains that have kernel
289 * owned page tables will be associated with an iommufd_ioas that provides the
290 * IOVA to PFN map.
291 */
292struct iommufd_hw_pagetable {
293	struct iommufd_object obj;
294	struct iommu_domain *domain;
295};
296
297struct iommufd_hwpt_paging {
298	struct iommufd_hw_pagetable common;
299	struct iommufd_ioas *ioas;
300	bool auto_domain : 1;
301	bool enforce_cache_coherency : 1;
302	bool msi_cookie : 1;
303	bool nest_parent : 1;
304	/* Head at iommufd_ioas::hwpt_list */
305	struct list_head hwpt_item;
306};
307
308struct iommufd_hwpt_nested {
309	struct iommufd_hw_pagetable common;
310	struct iommufd_hwpt_paging *parent;
311};
312
313static inline bool hwpt_is_paging(struct iommufd_hw_pagetable *hwpt)
314{
315	return hwpt->obj.type == IOMMUFD_OBJ_HWPT_PAGING;
316}
317
318static inline struct iommufd_hwpt_paging *
319to_hwpt_paging(struct iommufd_hw_pagetable *hwpt)
320{
321	return container_of(hwpt, struct iommufd_hwpt_paging, common);
322}
323
324static inline struct iommufd_hwpt_paging *
325iommufd_get_hwpt_paging(struct iommufd_ucmd *ucmd, u32 id)
326{
327	return container_of(iommufd_get_object(ucmd->ictx, id,
328					       IOMMUFD_OBJ_HWPT_PAGING),
329			    struct iommufd_hwpt_paging, common.obj);
330}
331
332static inline struct iommufd_hw_pagetable *
333iommufd_get_hwpt_nested(struct iommufd_ucmd *ucmd, u32 id)
334{
335	return container_of(iommufd_get_object(ucmd->ictx, id,
336					       IOMMUFD_OBJ_HWPT_NESTED),
337			    struct iommufd_hw_pagetable, obj);
338}
339
340int iommufd_hwpt_set_dirty_tracking(struct iommufd_ucmd *ucmd);
341int iommufd_hwpt_get_dirty_bitmap(struct iommufd_ucmd *ucmd);
342
343struct iommufd_hwpt_paging *
344iommufd_hwpt_paging_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas,
345			  struct iommufd_device *idev, u32 flags,
346			  bool immediate_attach,
347			  const struct iommu_user_data *user_data);
348int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt,
349				struct iommufd_device *idev);
350struct iommufd_hw_pagetable *
351iommufd_hw_pagetable_detach(struct iommufd_device *idev);
352void iommufd_hwpt_paging_destroy(struct iommufd_object *obj);
353void iommufd_hwpt_paging_abort(struct iommufd_object *obj);
354void iommufd_hwpt_nested_destroy(struct iommufd_object *obj);
355void iommufd_hwpt_nested_abort(struct iommufd_object *obj);
356int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd);
357int iommufd_hwpt_invalidate(struct iommufd_ucmd *ucmd);
358
359static inline void iommufd_hw_pagetable_put(struct iommufd_ctx *ictx,
360					    struct iommufd_hw_pagetable *hwpt)
361{
362	if (hwpt->obj.type == IOMMUFD_OBJ_HWPT_PAGING) {
363		struct iommufd_hwpt_paging *hwpt_paging = to_hwpt_paging(hwpt);
364
365		lockdep_assert_not_held(&hwpt_paging->ioas->mutex);
366
367		if (hwpt_paging->auto_domain) {
368			iommufd_object_put_and_try_destroy(ictx, &hwpt->obj);
369			return;
370		}
371	}
372	refcount_dec(&hwpt->obj.users);
373}
374
375struct iommufd_group {
376	struct kref ref;
377	struct mutex lock;
378	struct iommufd_ctx *ictx;
379	struct iommu_group *group;
380	struct iommufd_hw_pagetable *hwpt;
381	struct list_head device_list;
382	phys_addr_t sw_msi_start;
383};
384
385/*
386 * A iommufd_device object represents the binding relationship between a
387 * consuming driver and the iommufd. These objects are created/destroyed by
388 * external drivers, not by userspace.
389 */
390struct iommufd_device {
391	struct iommufd_object obj;
392	struct iommufd_ctx *ictx;
393	struct iommufd_group *igroup;
394	struct list_head group_item;
395	/* always the physical device */
396	struct device *dev;
397	bool enforce_cache_coherency;
398};
399
400static inline struct iommufd_device *
401iommufd_get_device(struct iommufd_ucmd *ucmd, u32 id)
402{
403	return container_of(iommufd_get_object(ucmd->ictx, id,
404					       IOMMUFD_OBJ_DEVICE),
405			    struct iommufd_device, obj);
406}
407
408void iommufd_device_destroy(struct iommufd_object *obj);
409int iommufd_get_hw_info(struct iommufd_ucmd *ucmd);
410
411struct iommufd_access {
412	struct iommufd_object obj;
413	struct iommufd_ctx *ictx;
414	struct iommufd_ioas *ioas;
415	struct iommufd_ioas *ioas_unpin;
416	struct mutex ioas_lock;
417	const struct iommufd_access_ops *ops;
418	void *data;
419	unsigned long iova_alignment;
420	u32 iopt_access_list_id;
421};
422
423int iopt_add_access(struct io_pagetable *iopt, struct iommufd_access *access);
424void iopt_remove_access(struct io_pagetable *iopt,
425			struct iommufd_access *access,
426			u32 iopt_access_list_id);
427void iommufd_access_destroy_object(struct iommufd_object *obj);
428
429#ifdef CONFIG_IOMMUFD_TEST
430int iommufd_test(struct iommufd_ucmd *ucmd);
431void iommufd_selftest_destroy(struct iommufd_object *obj);
432extern size_t iommufd_test_memory_limit;
433void iommufd_test_syz_conv_iova_id(struct iommufd_ucmd *ucmd,
434				   unsigned int ioas_id, u64 *iova, u32 *flags);
435bool iommufd_should_fail(void);
436int __init iommufd_test_init(void);
437void iommufd_test_exit(void);
438bool iommufd_selftest_is_mock_dev(struct device *dev);
439#else
440static inline void iommufd_test_syz_conv_iova_id(struct iommufd_ucmd *ucmd,
441						 unsigned int ioas_id,
442						 u64 *iova, u32 *flags)
443{
444}
445static inline bool iommufd_should_fail(void)
446{
447	return false;
448}
449static inline int __init iommufd_test_init(void)
450{
451	return 0;
452}
453static inline void iommufd_test_exit(void)
454{
455}
456static inline bool iommufd_selftest_is_mock_dev(struct device *dev)
457{
458	return false;
459}
460#endif
461#endif
462