1// SPDX-License-Identifier: GPL-2.0
2/* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES.
3 *
4 * The io_pagetable is the top of datastructure that maps IOVA's to PFNs. The
5 * PFNs can be placed into an iommu_domain, or returned to the caller as a page
6 * list for access by an in-kernel user.
7 *
8 * The datastructure uses the iopt_pages to optimize the storage of the PFNs
9 * between the domains and xarray.
10 */
11#include <linux/iommufd.h>
12#include <linux/lockdep.h>
13#include <linux/iommu.h>
14#include <linux/sched/mm.h>
15#include <linux/err.h>
16#include <linux/slab.h>
17#include <linux/errno.h>
18#include <uapi/linux/iommufd.h>
19
20#include "io_pagetable.h"
21#include "double_span.h"
22
23struct iopt_pages_list {
24	struct iopt_pages *pages;
25	struct iopt_area *area;
26	struct list_head next;
27	unsigned long start_byte;
28	unsigned long length;
29};
30
31struct iopt_area *iopt_area_contig_init(struct iopt_area_contig_iter *iter,
32					struct io_pagetable *iopt,
33					unsigned long iova,
34					unsigned long last_iova)
35{
36	lockdep_assert_held(&iopt->iova_rwsem);
37
38	iter->cur_iova = iova;
39	iter->last_iova = last_iova;
40	iter->area = iopt_area_iter_first(iopt, iova, iova);
41	if (!iter->area)
42		return NULL;
43	if (!iter->area->pages) {
44		iter->area = NULL;
45		return NULL;
46	}
47	return iter->area;
48}
49
50struct iopt_area *iopt_area_contig_next(struct iopt_area_contig_iter *iter)
51{
52	unsigned long last_iova;
53
54	if (!iter->area)
55		return NULL;
56	last_iova = iopt_area_last_iova(iter->area);
57	if (iter->last_iova <= last_iova)
58		return NULL;
59
60	iter->cur_iova = last_iova + 1;
61	iter->area = iopt_area_iter_next(iter->area, iter->cur_iova,
62					 iter->last_iova);
63	if (!iter->area)
64		return NULL;
65	if (iter->cur_iova != iopt_area_iova(iter->area) ||
66	    !iter->area->pages) {
67		iter->area = NULL;
68		return NULL;
69	}
70	return iter->area;
71}
72
73static bool __alloc_iova_check_hole(struct interval_tree_double_span_iter *span,
74				    unsigned long length,
75				    unsigned long iova_alignment,
76				    unsigned long page_offset)
77{
78	if (span->is_used || span->last_hole - span->start_hole < length - 1)
79		return false;
80
81	span->start_hole = ALIGN(span->start_hole, iova_alignment) |
82			   page_offset;
83	if (span->start_hole > span->last_hole ||
84	    span->last_hole - span->start_hole < length - 1)
85		return false;
86	return true;
87}
88
89static bool __alloc_iova_check_used(struct interval_tree_span_iter *span,
90				    unsigned long length,
91				    unsigned long iova_alignment,
92				    unsigned long page_offset)
93{
94	if (span->is_hole || span->last_used - span->start_used < length - 1)
95		return false;
96
97	span->start_used = ALIGN(span->start_used, iova_alignment) |
98			   page_offset;
99	if (span->start_used > span->last_used ||
100	    span->last_used - span->start_used < length - 1)
101		return false;
102	return true;
103}
104
105/*
106 * Automatically find a block of IOVA that is not being used and not reserved.
107 * Does not return a 0 IOVA even if it is valid.
108 */
109static int iopt_alloc_iova(struct io_pagetable *iopt, unsigned long *iova,
110			   unsigned long uptr, unsigned long length)
111{
112	unsigned long page_offset = uptr % PAGE_SIZE;
113	struct interval_tree_double_span_iter used_span;
114	struct interval_tree_span_iter allowed_span;
115	unsigned long iova_alignment;
116
117	lockdep_assert_held(&iopt->iova_rwsem);
118
119	/* Protect roundup_pow-of_two() from overflow */
120	if (length == 0 || length >= ULONG_MAX / 2)
121		return -EOVERFLOW;
122
123	/*
124	 * Keep alignment present in the uptr when building the IOVA, this
125	 * increases the chance we can map a THP.
126	 */
127	if (!uptr)
128		iova_alignment = roundup_pow_of_two(length);
129	else
130		iova_alignment = min_t(unsigned long,
131				       roundup_pow_of_two(length),
132				       1UL << __ffs64(uptr));
133
134	if (iova_alignment < iopt->iova_alignment)
135		return -EINVAL;
136
137	interval_tree_for_each_span(&allowed_span, &iopt->allowed_itree,
138				    PAGE_SIZE, ULONG_MAX - PAGE_SIZE) {
139		if (RB_EMPTY_ROOT(&iopt->allowed_itree.rb_root)) {
140			allowed_span.start_used = PAGE_SIZE;
141			allowed_span.last_used = ULONG_MAX - PAGE_SIZE;
142			allowed_span.is_hole = false;
143		}
144
145		if (!__alloc_iova_check_used(&allowed_span, length,
146					     iova_alignment, page_offset))
147			continue;
148
149		interval_tree_for_each_double_span(
150			&used_span, &iopt->reserved_itree, &iopt->area_itree,
151			allowed_span.start_used, allowed_span.last_used) {
152			if (!__alloc_iova_check_hole(&used_span, length,
153						     iova_alignment,
154						     page_offset))
155				continue;
156
157			*iova = used_span.start_hole;
158			return 0;
159		}
160	}
161	return -ENOSPC;
162}
163
164static int iopt_check_iova(struct io_pagetable *iopt, unsigned long iova,
165			   unsigned long length)
166{
167	unsigned long last;
168
169	lockdep_assert_held(&iopt->iova_rwsem);
170
171	if ((iova & (iopt->iova_alignment - 1)))
172		return -EINVAL;
173
174	if (check_add_overflow(iova, length - 1, &last))
175		return -EOVERFLOW;
176
177	/* No reserved IOVA intersects the range */
178	if (iopt_reserved_iter_first(iopt, iova, last))
179		return -EINVAL;
180
181	/* Check that there is not already a mapping in the range */
182	if (iopt_area_iter_first(iopt, iova, last))
183		return -EEXIST;
184	return 0;
185}
186
187/*
188 * The area takes a slice of the pages from start_bytes to start_byte + length
189 */
190static int iopt_insert_area(struct io_pagetable *iopt, struct iopt_area *area,
191			    struct iopt_pages *pages, unsigned long iova,
192			    unsigned long start_byte, unsigned long length,
193			    int iommu_prot)
194{
195	lockdep_assert_held_write(&iopt->iova_rwsem);
196
197	if ((iommu_prot & IOMMU_WRITE) && !pages->writable)
198		return -EPERM;
199
200	area->iommu_prot = iommu_prot;
201	area->page_offset = start_byte % PAGE_SIZE;
202	if (area->page_offset & (iopt->iova_alignment - 1))
203		return -EINVAL;
204
205	area->node.start = iova;
206	if (check_add_overflow(iova, length - 1, &area->node.last))
207		return -EOVERFLOW;
208
209	area->pages_node.start = start_byte / PAGE_SIZE;
210	if (check_add_overflow(start_byte, length - 1, &area->pages_node.last))
211		return -EOVERFLOW;
212	area->pages_node.last = area->pages_node.last / PAGE_SIZE;
213	if (WARN_ON(area->pages_node.last >= pages->npages))
214		return -EOVERFLOW;
215
216	/*
217	 * The area is inserted with a NULL pages indicating it is not fully
218	 * initialized yet.
219	 */
220	area->iopt = iopt;
221	interval_tree_insert(&area->node, &iopt->area_itree);
222	return 0;
223}
224
225static struct iopt_area *iopt_area_alloc(void)
226{
227	struct iopt_area *area;
228
229	area = kzalloc(sizeof(*area), GFP_KERNEL_ACCOUNT);
230	if (!area)
231		return NULL;
232	RB_CLEAR_NODE(&area->node.rb);
233	RB_CLEAR_NODE(&area->pages_node.rb);
234	return area;
235}
236
237static int iopt_alloc_area_pages(struct io_pagetable *iopt,
238				 struct list_head *pages_list,
239				 unsigned long length, unsigned long *dst_iova,
240				 int iommu_prot, unsigned int flags)
241{
242	struct iopt_pages_list *elm;
243	unsigned long iova;
244	int rc = 0;
245
246	list_for_each_entry(elm, pages_list, next) {
247		elm->area = iopt_area_alloc();
248		if (!elm->area)
249			return -ENOMEM;
250	}
251
252	down_write(&iopt->iova_rwsem);
253	if ((length & (iopt->iova_alignment - 1)) || !length) {
254		rc = -EINVAL;
255		goto out_unlock;
256	}
257
258	if (flags & IOPT_ALLOC_IOVA) {
259		/* Use the first entry to guess the ideal IOVA alignment */
260		elm = list_first_entry(pages_list, struct iopt_pages_list,
261				       next);
262		rc = iopt_alloc_iova(
263			iopt, dst_iova,
264			(uintptr_t)elm->pages->uptr + elm->start_byte, length);
265		if (rc)
266			goto out_unlock;
267		if (IS_ENABLED(CONFIG_IOMMUFD_TEST) &&
268		    WARN_ON(iopt_check_iova(iopt, *dst_iova, length))) {
269			rc = -EINVAL;
270			goto out_unlock;
271		}
272	} else {
273		rc = iopt_check_iova(iopt, *dst_iova, length);
274		if (rc)
275			goto out_unlock;
276	}
277
278	/*
279	 * Areas are created with a NULL pages so that the IOVA space is
280	 * reserved and we can unlock the iova_rwsem.
281	 */
282	iova = *dst_iova;
283	list_for_each_entry(elm, pages_list, next) {
284		rc = iopt_insert_area(iopt, elm->area, elm->pages, iova,
285				      elm->start_byte, elm->length, iommu_prot);
286		if (rc)
287			goto out_unlock;
288		iova += elm->length;
289	}
290
291out_unlock:
292	up_write(&iopt->iova_rwsem);
293	return rc;
294}
295
296static void iopt_abort_area(struct iopt_area *area)
297{
298	if (IS_ENABLED(CONFIG_IOMMUFD_TEST))
299		WARN_ON(area->pages);
300	if (area->iopt) {
301		down_write(&area->iopt->iova_rwsem);
302		interval_tree_remove(&area->node, &area->iopt->area_itree);
303		up_write(&area->iopt->iova_rwsem);
304	}
305	kfree(area);
306}
307
308void iopt_free_pages_list(struct list_head *pages_list)
309{
310	struct iopt_pages_list *elm;
311
312	while ((elm = list_first_entry_or_null(pages_list,
313					       struct iopt_pages_list, next))) {
314		if (elm->area)
315			iopt_abort_area(elm->area);
316		if (elm->pages)
317			iopt_put_pages(elm->pages);
318		list_del(&elm->next);
319		kfree(elm);
320	}
321}
322
323static int iopt_fill_domains_pages(struct list_head *pages_list)
324{
325	struct iopt_pages_list *undo_elm;
326	struct iopt_pages_list *elm;
327	int rc;
328
329	list_for_each_entry(elm, pages_list, next) {
330		rc = iopt_area_fill_domains(elm->area, elm->pages);
331		if (rc)
332			goto err_undo;
333	}
334	return 0;
335
336err_undo:
337	list_for_each_entry(undo_elm, pages_list, next) {
338		if (undo_elm == elm)
339			break;
340		iopt_area_unfill_domains(undo_elm->area, undo_elm->pages);
341	}
342	return rc;
343}
344
345int iopt_map_pages(struct io_pagetable *iopt, struct list_head *pages_list,
346		   unsigned long length, unsigned long *dst_iova,
347		   int iommu_prot, unsigned int flags)
348{
349	struct iopt_pages_list *elm;
350	int rc;
351
352	rc = iopt_alloc_area_pages(iopt, pages_list, length, dst_iova,
353				   iommu_prot, flags);
354	if (rc)
355		return rc;
356
357	down_read(&iopt->domains_rwsem);
358	rc = iopt_fill_domains_pages(pages_list);
359	if (rc)
360		goto out_unlock_domains;
361
362	down_write(&iopt->iova_rwsem);
363	list_for_each_entry(elm, pages_list, next) {
364		/*
365		 * area->pages must be set inside the domains_rwsem to ensure
366		 * any newly added domains will get filled. Moves the reference
367		 * in from the list.
368		 */
369		elm->area->pages = elm->pages;
370		elm->pages = NULL;
371		elm->area = NULL;
372	}
373	up_write(&iopt->iova_rwsem);
374out_unlock_domains:
375	up_read(&iopt->domains_rwsem);
376	return rc;
377}
378
379/**
380 * iopt_map_user_pages() - Map a user VA to an iova in the io page table
381 * @ictx: iommufd_ctx the iopt is part of
382 * @iopt: io_pagetable to act on
383 * @iova: If IOPT_ALLOC_IOVA is set this is unused on input and contains
384 *        the chosen iova on output. Otherwise is the iova to map to on input
385 * @uptr: User VA to map
386 * @length: Number of bytes to map
387 * @iommu_prot: Combination of IOMMU_READ/WRITE/etc bits for the mapping
388 * @flags: IOPT_ALLOC_IOVA or zero
389 *
390 * iova, uptr, and length must be aligned to iova_alignment. For domain backed
391 * page tables this will pin the pages and load them into the domain at iova.
392 * For non-domain page tables this will only setup a lazy reference and the
393 * caller must use iopt_access_pages() to touch them.
394 *
395 * iopt_unmap_iova() must be called to undo this before the io_pagetable can be
396 * destroyed.
397 */
398int iopt_map_user_pages(struct iommufd_ctx *ictx, struct io_pagetable *iopt,
399			unsigned long *iova, void __user *uptr,
400			unsigned long length, int iommu_prot,
401			unsigned int flags)
402{
403	struct iopt_pages_list elm = {};
404	LIST_HEAD(pages_list);
405	int rc;
406
407	elm.pages = iopt_alloc_pages(uptr, length, iommu_prot & IOMMU_WRITE);
408	if (IS_ERR(elm.pages))
409		return PTR_ERR(elm.pages);
410	if (ictx->account_mode == IOPT_PAGES_ACCOUNT_MM &&
411	    elm.pages->account_mode == IOPT_PAGES_ACCOUNT_USER)
412		elm.pages->account_mode = IOPT_PAGES_ACCOUNT_MM;
413	elm.start_byte = uptr - elm.pages->uptr;
414	elm.length = length;
415	list_add(&elm.next, &pages_list);
416
417	rc = iopt_map_pages(iopt, &pages_list, length, iova, iommu_prot, flags);
418	if (rc) {
419		if (elm.area)
420			iopt_abort_area(elm.area);
421		if (elm.pages)
422			iopt_put_pages(elm.pages);
423		return rc;
424	}
425	return 0;
426}
427
428struct iova_bitmap_fn_arg {
429	unsigned long flags;
430	struct io_pagetable *iopt;
431	struct iommu_domain *domain;
432	struct iommu_dirty_bitmap *dirty;
433};
434
435static int __iommu_read_and_clear_dirty(struct iova_bitmap *bitmap,
436					unsigned long iova, size_t length,
437					void *opaque)
438{
439	struct iopt_area *area;
440	struct iopt_area_contig_iter iter;
441	struct iova_bitmap_fn_arg *arg = opaque;
442	struct iommu_domain *domain = arg->domain;
443	struct iommu_dirty_bitmap *dirty = arg->dirty;
444	const struct iommu_dirty_ops *ops = domain->dirty_ops;
445	unsigned long last_iova = iova + length - 1;
446	unsigned long flags = arg->flags;
447	int ret;
448
449	iopt_for_each_contig_area(&iter, area, arg->iopt, iova, last_iova) {
450		unsigned long last = min(last_iova, iopt_area_last_iova(area));
451
452		ret = ops->read_and_clear_dirty(domain, iter.cur_iova,
453						last - iter.cur_iova + 1, flags,
454						dirty);
455		if (ret)
456			return ret;
457	}
458
459	if (!iopt_area_contig_done(&iter))
460		return -EINVAL;
461	return 0;
462}
463
464static int
465iommu_read_and_clear_dirty(struct iommu_domain *domain,
466			   struct io_pagetable *iopt, unsigned long flags,
467			   struct iommu_hwpt_get_dirty_bitmap *bitmap)
468{
469	const struct iommu_dirty_ops *ops = domain->dirty_ops;
470	struct iommu_iotlb_gather gather;
471	struct iommu_dirty_bitmap dirty;
472	struct iova_bitmap_fn_arg arg;
473	struct iova_bitmap *iter;
474	int ret = 0;
475
476	if (!ops || !ops->read_and_clear_dirty)
477		return -EOPNOTSUPP;
478
479	iter = iova_bitmap_alloc(bitmap->iova, bitmap->length,
480				 bitmap->page_size,
481				 u64_to_user_ptr(bitmap->data));
482	if (IS_ERR(iter))
483		return -ENOMEM;
484
485	iommu_dirty_bitmap_init(&dirty, iter, &gather);
486
487	arg.flags = flags;
488	arg.iopt = iopt;
489	arg.domain = domain;
490	arg.dirty = &dirty;
491	iova_bitmap_for_each(iter, &arg, __iommu_read_and_clear_dirty);
492
493	if (!(flags & IOMMU_DIRTY_NO_CLEAR))
494		iommu_iotlb_sync(domain, &gather);
495
496	iova_bitmap_free(iter);
497
498	return ret;
499}
500
501int iommufd_check_iova_range(struct io_pagetable *iopt,
502			     struct iommu_hwpt_get_dirty_bitmap *bitmap)
503{
504	size_t iommu_pgsize = iopt->iova_alignment;
505	u64 last_iova;
506
507	if (check_add_overflow(bitmap->iova, bitmap->length - 1, &last_iova))
508		return -EOVERFLOW;
509
510	if (bitmap->iova > ULONG_MAX || last_iova > ULONG_MAX)
511		return -EOVERFLOW;
512
513	if ((bitmap->iova & (iommu_pgsize - 1)) ||
514	    ((last_iova + 1) & (iommu_pgsize - 1)))
515		return -EINVAL;
516
517	if (!bitmap->page_size)
518		return -EINVAL;
519
520	if ((bitmap->iova & (bitmap->page_size - 1)) ||
521	    ((last_iova + 1) & (bitmap->page_size - 1)))
522		return -EINVAL;
523
524	return 0;
525}
526
527int iopt_read_and_clear_dirty_data(struct io_pagetable *iopt,
528				   struct iommu_domain *domain,
529				   unsigned long flags,
530				   struct iommu_hwpt_get_dirty_bitmap *bitmap)
531{
532	int ret;
533
534	ret = iommufd_check_iova_range(iopt, bitmap);
535	if (ret)
536		return ret;
537
538	down_read(&iopt->iova_rwsem);
539	ret = iommu_read_and_clear_dirty(domain, iopt, flags, bitmap);
540	up_read(&iopt->iova_rwsem);
541
542	return ret;
543}
544
545static int iopt_clear_dirty_data(struct io_pagetable *iopt,
546				 struct iommu_domain *domain)
547{
548	const struct iommu_dirty_ops *ops = domain->dirty_ops;
549	struct iommu_iotlb_gather gather;
550	struct iommu_dirty_bitmap dirty;
551	struct iopt_area *area;
552	int ret = 0;
553
554	lockdep_assert_held_read(&iopt->iova_rwsem);
555
556	iommu_dirty_bitmap_init(&dirty, NULL, &gather);
557
558	for (area = iopt_area_iter_first(iopt, 0, ULONG_MAX); area;
559	     area = iopt_area_iter_next(area, 0, ULONG_MAX)) {
560		if (!area->pages)
561			continue;
562
563		ret = ops->read_and_clear_dirty(domain, iopt_area_iova(area),
564						iopt_area_length(area), 0,
565						&dirty);
566		if (ret)
567			break;
568	}
569
570	iommu_iotlb_sync(domain, &gather);
571	return ret;
572}
573
574int iopt_set_dirty_tracking(struct io_pagetable *iopt,
575			    struct iommu_domain *domain, bool enable)
576{
577	const struct iommu_dirty_ops *ops = domain->dirty_ops;
578	int ret = 0;
579
580	if (!ops)
581		return -EOPNOTSUPP;
582
583	down_read(&iopt->iova_rwsem);
584
585	/* Clear dirty bits from PTEs to ensure a clean snapshot */
586	if (enable) {
587		ret = iopt_clear_dirty_data(iopt, domain);
588		if (ret)
589			goto out_unlock;
590	}
591
592	ret = ops->set_dirty_tracking(domain, enable);
593
594out_unlock:
595	up_read(&iopt->iova_rwsem);
596	return ret;
597}
598
599int iopt_get_pages(struct io_pagetable *iopt, unsigned long iova,
600		   unsigned long length, struct list_head *pages_list)
601{
602	struct iopt_area_contig_iter iter;
603	unsigned long last_iova;
604	struct iopt_area *area;
605	int rc;
606
607	if (!length)
608		return -EINVAL;
609	if (check_add_overflow(iova, length - 1, &last_iova))
610		return -EOVERFLOW;
611
612	down_read(&iopt->iova_rwsem);
613	iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova) {
614		struct iopt_pages_list *elm;
615		unsigned long last = min(last_iova, iopt_area_last_iova(area));
616
617		elm = kzalloc(sizeof(*elm), GFP_KERNEL_ACCOUNT);
618		if (!elm) {
619			rc = -ENOMEM;
620			goto err_free;
621		}
622		elm->start_byte = iopt_area_start_byte(area, iter.cur_iova);
623		elm->pages = area->pages;
624		elm->length = (last - iter.cur_iova) + 1;
625		kref_get(&elm->pages->kref);
626		list_add_tail(&elm->next, pages_list);
627	}
628	if (!iopt_area_contig_done(&iter)) {
629		rc = -ENOENT;
630		goto err_free;
631	}
632	up_read(&iopt->iova_rwsem);
633	return 0;
634err_free:
635	up_read(&iopt->iova_rwsem);
636	iopt_free_pages_list(pages_list);
637	return rc;
638}
639
640static int iopt_unmap_iova_range(struct io_pagetable *iopt, unsigned long start,
641				 unsigned long last, unsigned long *unmapped)
642{
643	struct iopt_area *area;
644	unsigned long unmapped_bytes = 0;
645	unsigned int tries = 0;
646	int rc = -ENOENT;
647
648	/*
649	 * The domains_rwsem must be held in read mode any time any area->pages
650	 * is NULL. This prevents domain attach/detatch from running
651	 * concurrently with cleaning up the area.
652	 */
653again:
654	down_read(&iopt->domains_rwsem);
655	down_write(&iopt->iova_rwsem);
656	while ((area = iopt_area_iter_first(iopt, start, last))) {
657		unsigned long area_last = iopt_area_last_iova(area);
658		unsigned long area_first = iopt_area_iova(area);
659		struct iopt_pages *pages;
660
661		/* Userspace should not race map/unmap's of the same area */
662		if (!area->pages) {
663			rc = -EBUSY;
664			goto out_unlock_iova;
665		}
666
667		if (area_first < start || area_last > last) {
668			rc = -ENOENT;
669			goto out_unlock_iova;
670		}
671
672		if (area_first != start)
673			tries = 0;
674
675		/*
676		 * num_accesses writers must hold the iova_rwsem too, so we can
677		 * safely read it under the write side of the iovam_rwsem
678		 * without the pages->mutex.
679		 */
680		if (area->num_accesses) {
681			size_t length = iopt_area_length(area);
682
683			start = area_first;
684			area->prevent_access = true;
685			up_write(&iopt->iova_rwsem);
686			up_read(&iopt->domains_rwsem);
687
688			iommufd_access_notify_unmap(iopt, area_first, length);
689			/* Something is not responding to unmap requests. */
690			tries++;
691			if (WARN_ON(tries > 100))
692				return -EDEADLOCK;
693			goto again;
694		}
695
696		pages = area->pages;
697		area->pages = NULL;
698		up_write(&iopt->iova_rwsem);
699
700		iopt_area_unfill_domains(area, pages);
701		iopt_abort_area(area);
702		iopt_put_pages(pages);
703
704		unmapped_bytes += area_last - area_first + 1;
705
706		down_write(&iopt->iova_rwsem);
707	}
708	if (unmapped_bytes)
709		rc = 0;
710
711out_unlock_iova:
712	up_write(&iopt->iova_rwsem);
713	up_read(&iopt->domains_rwsem);
714	if (unmapped)
715		*unmapped = unmapped_bytes;
716	return rc;
717}
718
719/**
720 * iopt_unmap_iova() - Remove a range of iova
721 * @iopt: io_pagetable to act on
722 * @iova: Starting iova to unmap
723 * @length: Number of bytes to unmap
724 * @unmapped: Return number of bytes unmapped
725 *
726 * The requested range must be a superset of existing ranges.
727 * Splitting/truncating IOVA mappings is not allowed.
728 */
729int iopt_unmap_iova(struct io_pagetable *iopt, unsigned long iova,
730		    unsigned long length, unsigned long *unmapped)
731{
732	unsigned long iova_last;
733
734	if (!length)
735		return -EINVAL;
736
737	if (check_add_overflow(iova, length - 1, &iova_last))
738		return -EOVERFLOW;
739
740	return iopt_unmap_iova_range(iopt, iova, iova_last, unmapped);
741}
742
743int iopt_unmap_all(struct io_pagetable *iopt, unsigned long *unmapped)
744{
745	int rc;
746
747	rc = iopt_unmap_iova_range(iopt, 0, ULONG_MAX, unmapped);
748	/* If the IOVAs are empty then unmap all succeeds */
749	if (rc == -ENOENT)
750		return 0;
751	return rc;
752}
753
754/* The caller must always free all the nodes in the allowed_iova rb_root. */
755int iopt_set_allow_iova(struct io_pagetable *iopt,
756			struct rb_root_cached *allowed_iova)
757{
758	struct iopt_allowed *allowed;
759
760	down_write(&iopt->iova_rwsem);
761	swap(*allowed_iova, iopt->allowed_itree);
762
763	for (allowed = iopt_allowed_iter_first(iopt, 0, ULONG_MAX); allowed;
764	     allowed = iopt_allowed_iter_next(allowed, 0, ULONG_MAX)) {
765		if (iopt_reserved_iter_first(iopt, allowed->node.start,
766					     allowed->node.last)) {
767			swap(*allowed_iova, iopt->allowed_itree);
768			up_write(&iopt->iova_rwsem);
769			return -EADDRINUSE;
770		}
771	}
772	up_write(&iopt->iova_rwsem);
773	return 0;
774}
775
776int iopt_reserve_iova(struct io_pagetable *iopt, unsigned long start,
777		      unsigned long last, void *owner)
778{
779	struct iopt_reserved *reserved;
780
781	lockdep_assert_held_write(&iopt->iova_rwsem);
782
783	if (iopt_area_iter_first(iopt, start, last) ||
784	    iopt_allowed_iter_first(iopt, start, last))
785		return -EADDRINUSE;
786
787	reserved = kzalloc(sizeof(*reserved), GFP_KERNEL_ACCOUNT);
788	if (!reserved)
789		return -ENOMEM;
790	reserved->node.start = start;
791	reserved->node.last = last;
792	reserved->owner = owner;
793	interval_tree_insert(&reserved->node, &iopt->reserved_itree);
794	return 0;
795}
796
797static void __iopt_remove_reserved_iova(struct io_pagetable *iopt, void *owner)
798{
799	struct iopt_reserved *reserved, *next;
800
801	lockdep_assert_held_write(&iopt->iova_rwsem);
802
803	for (reserved = iopt_reserved_iter_first(iopt, 0, ULONG_MAX); reserved;
804	     reserved = next) {
805		next = iopt_reserved_iter_next(reserved, 0, ULONG_MAX);
806
807		if (reserved->owner == owner) {
808			interval_tree_remove(&reserved->node,
809					     &iopt->reserved_itree);
810			kfree(reserved);
811		}
812	}
813}
814
815void iopt_remove_reserved_iova(struct io_pagetable *iopt, void *owner)
816{
817	down_write(&iopt->iova_rwsem);
818	__iopt_remove_reserved_iova(iopt, owner);
819	up_write(&iopt->iova_rwsem);
820}
821
822void iopt_init_table(struct io_pagetable *iopt)
823{
824	init_rwsem(&iopt->iova_rwsem);
825	init_rwsem(&iopt->domains_rwsem);
826	iopt->area_itree = RB_ROOT_CACHED;
827	iopt->allowed_itree = RB_ROOT_CACHED;
828	iopt->reserved_itree = RB_ROOT_CACHED;
829	xa_init_flags(&iopt->domains, XA_FLAGS_ACCOUNT);
830	xa_init_flags(&iopt->access_list, XA_FLAGS_ALLOC);
831
832	/*
833	 * iopt's start as SW tables that can use the entire size_t IOVA space
834	 * due to the use of size_t in the APIs. They have no alignment
835	 * restriction.
836	 */
837	iopt->iova_alignment = 1;
838}
839
840void iopt_destroy_table(struct io_pagetable *iopt)
841{
842	struct interval_tree_node *node;
843
844	if (IS_ENABLED(CONFIG_IOMMUFD_TEST))
845		iopt_remove_reserved_iova(iopt, NULL);
846
847	while ((node = interval_tree_iter_first(&iopt->allowed_itree, 0,
848						ULONG_MAX))) {
849		interval_tree_remove(node, &iopt->allowed_itree);
850		kfree(container_of(node, struct iopt_allowed, node));
851	}
852
853	WARN_ON(!RB_EMPTY_ROOT(&iopt->reserved_itree.rb_root));
854	WARN_ON(!xa_empty(&iopt->domains));
855	WARN_ON(!xa_empty(&iopt->access_list));
856	WARN_ON(!RB_EMPTY_ROOT(&iopt->area_itree.rb_root));
857}
858
859/**
860 * iopt_unfill_domain() - Unfill a domain with PFNs
861 * @iopt: io_pagetable to act on
862 * @domain: domain to unfill
863 *
864 * This is used when removing a domain from the iopt. Every area in the iopt
865 * will be unmapped from the domain. The domain must already be removed from the
866 * domains xarray.
867 */
868static void iopt_unfill_domain(struct io_pagetable *iopt,
869			       struct iommu_domain *domain)
870{
871	struct iopt_area *area;
872
873	lockdep_assert_held(&iopt->iova_rwsem);
874	lockdep_assert_held_write(&iopt->domains_rwsem);
875
876	/*
877	 * Some other domain is holding all the pfns still, rapidly unmap this
878	 * domain.
879	 */
880	if (iopt->next_domain_id != 0) {
881		/* Pick an arbitrary remaining domain to act as storage */
882		struct iommu_domain *storage_domain =
883			xa_load(&iopt->domains, 0);
884
885		for (area = iopt_area_iter_first(iopt, 0, ULONG_MAX); area;
886		     area = iopt_area_iter_next(area, 0, ULONG_MAX)) {
887			struct iopt_pages *pages = area->pages;
888
889			if (!pages)
890				continue;
891
892			mutex_lock(&pages->mutex);
893			if (IS_ENABLED(CONFIG_IOMMUFD_TEST))
894				WARN_ON(!area->storage_domain);
895			if (area->storage_domain == domain)
896				area->storage_domain = storage_domain;
897			mutex_unlock(&pages->mutex);
898
899			iopt_area_unmap_domain(area, domain);
900		}
901		return;
902	}
903
904	for (area = iopt_area_iter_first(iopt, 0, ULONG_MAX); area;
905	     area = iopt_area_iter_next(area, 0, ULONG_MAX)) {
906		struct iopt_pages *pages = area->pages;
907
908		if (!pages)
909			continue;
910
911		mutex_lock(&pages->mutex);
912		interval_tree_remove(&area->pages_node, &pages->domains_itree);
913		WARN_ON(area->storage_domain != domain);
914		area->storage_domain = NULL;
915		iopt_area_unfill_domain(area, pages, domain);
916		mutex_unlock(&pages->mutex);
917	}
918}
919
920/**
921 * iopt_fill_domain() - Fill a domain with PFNs
922 * @iopt: io_pagetable to act on
923 * @domain: domain to fill
924 *
925 * Fill the domain with PFNs from every area in the iopt. On failure the domain
926 * is left unchanged.
927 */
928static int iopt_fill_domain(struct io_pagetable *iopt,
929			    struct iommu_domain *domain)
930{
931	struct iopt_area *end_area;
932	struct iopt_area *area;
933	int rc;
934
935	lockdep_assert_held(&iopt->iova_rwsem);
936	lockdep_assert_held_write(&iopt->domains_rwsem);
937
938	for (area = iopt_area_iter_first(iopt, 0, ULONG_MAX); area;
939	     area = iopt_area_iter_next(area, 0, ULONG_MAX)) {
940		struct iopt_pages *pages = area->pages;
941
942		if (!pages)
943			continue;
944
945		mutex_lock(&pages->mutex);
946		rc = iopt_area_fill_domain(area, domain);
947		if (rc) {
948			mutex_unlock(&pages->mutex);
949			goto out_unfill;
950		}
951		if (!area->storage_domain) {
952			WARN_ON(iopt->next_domain_id != 0);
953			area->storage_domain = domain;
954			interval_tree_insert(&area->pages_node,
955					     &pages->domains_itree);
956		}
957		mutex_unlock(&pages->mutex);
958	}
959	return 0;
960
961out_unfill:
962	end_area = area;
963	for (area = iopt_area_iter_first(iopt, 0, ULONG_MAX); area;
964	     area = iopt_area_iter_next(area, 0, ULONG_MAX)) {
965		struct iopt_pages *pages = area->pages;
966
967		if (area == end_area)
968			break;
969		if (!pages)
970			continue;
971		mutex_lock(&pages->mutex);
972		if (iopt->next_domain_id == 0) {
973			interval_tree_remove(&area->pages_node,
974					     &pages->domains_itree);
975			area->storage_domain = NULL;
976		}
977		iopt_area_unfill_domain(area, pages, domain);
978		mutex_unlock(&pages->mutex);
979	}
980	return rc;
981}
982
983/* All existing area's conform to an increased page size */
984static int iopt_check_iova_alignment(struct io_pagetable *iopt,
985				     unsigned long new_iova_alignment)
986{
987	unsigned long align_mask = new_iova_alignment - 1;
988	struct iopt_area *area;
989
990	lockdep_assert_held(&iopt->iova_rwsem);
991	lockdep_assert_held(&iopt->domains_rwsem);
992
993	for (area = iopt_area_iter_first(iopt, 0, ULONG_MAX); area;
994	     area = iopt_area_iter_next(area, 0, ULONG_MAX))
995		if ((iopt_area_iova(area) & align_mask) ||
996		    (iopt_area_length(area) & align_mask) ||
997		    (area->page_offset & align_mask))
998			return -EADDRINUSE;
999
1000	if (IS_ENABLED(CONFIG_IOMMUFD_TEST)) {
1001		struct iommufd_access *access;
1002		unsigned long index;
1003
1004		xa_for_each(&iopt->access_list, index, access)
1005			if (WARN_ON(access->iova_alignment >
1006				    new_iova_alignment))
1007				return -EADDRINUSE;
1008	}
1009	return 0;
1010}
1011
1012int iopt_table_add_domain(struct io_pagetable *iopt,
1013			  struct iommu_domain *domain)
1014{
1015	const struct iommu_domain_geometry *geometry = &domain->geometry;
1016	struct iommu_domain *iter_domain;
1017	unsigned int new_iova_alignment;
1018	unsigned long index;
1019	int rc;
1020
1021	down_write(&iopt->domains_rwsem);
1022	down_write(&iopt->iova_rwsem);
1023
1024	xa_for_each(&iopt->domains, index, iter_domain) {
1025		if (WARN_ON(iter_domain == domain)) {
1026			rc = -EEXIST;
1027			goto out_unlock;
1028		}
1029	}
1030
1031	/*
1032	 * The io page size drives the iova_alignment. Internally the iopt_pages
1033	 * works in PAGE_SIZE units and we adjust when mapping sub-PAGE_SIZE
1034	 * objects into the iommu_domain.
1035	 *
1036	 * A iommu_domain must always be able to accept PAGE_SIZE to be
1037	 * compatible as we can't guarantee higher contiguity.
1038	 */
1039	new_iova_alignment = max_t(unsigned long,
1040				   1UL << __ffs(domain->pgsize_bitmap),
1041				   iopt->iova_alignment);
1042	if (new_iova_alignment > PAGE_SIZE) {
1043		rc = -EINVAL;
1044		goto out_unlock;
1045	}
1046	if (new_iova_alignment != iopt->iova_alignment) {
1047		rc = iopt_check_iova_alignment(iopt, new_iova_alignment);
1048		if (rc)
1049			goto out_unlock;
1050	}
1051
1052	/* No area exists that is outside the allowed domain aperture */
1053	if (geometry->aperture_start != 0) {
1054		rc = iopt_reserve_iova(iopt, 0, geometry->aperture_start - 1,
1055				       domain);
1056		if (rc)
1057			goto out_reserved;
1058	}
1059	if (geometry->aperture_end != ULONG_MAX) {
1060		rc = iopt_reserve_iova(iopt, geometry->aperture_end + 1,
1061				       ULONG_MAX, domain);
1062		if (rc)
1063			goto out_reserved;
1064	}
1065
1066	rc = xa_reserve(&iopt->domains, iopt->next_domain_id, GFP_KERNEL);
1067	if (rc)
1068		goto out_reserved;
1069
1070	rc = iopt_fill_domain(iopt, domain);
1071	if (rc)
1072		goto out_release;
1073
1074	iopt->iova_alignment = new_iova_alignment;
1075	xa_store(&iopt->domains, iopt->next_domain_id, domain, GFP_KERNEL);
1076	iopt->next_domain_id++;
1077	up_write(&iopt->iova_rwsem);
1078	up_write(&iopt->domains_rwsem);
1079	return 0;
1080out_release:
1081	xa_release(&iopt->domains, iopt->next_domain_id);
1082out_reserved:
1083	__iopt_remove_reserved_iova(iopt, domain);
1084out_unlock:
1085	up_write(&iopt->iova_rwsem);
1086	up_write(&iopt->domains_rwsem);
1087	return rc;
1088}
1089
1090static int iopt_calculate_iova_alignment(struct io_pagetable *iopt)
1091{
1092	unsigned long new_iova_alignment;
1093	struct iommufd_access *access;
1094	struct iommu_domain *domain;
1095	unsigned long index;
1096
1097	lockdep_assert_held_write(&iopt->iova_rwsem);
1098	lockdep_assert_held(&iopt->domains_rwsem);
1099
1100	/* See batch_iommu_map_small() */
1101	if (iopt->disable_large_pages)
1102		new_iova_alignment = PAGE_SIZE;
1103	else
1104		new_iova_alignment = 1;
1105
1106	xa_for_each(&iopt->domains, index, domain)
1107		new_iova_alignment = max_t(unsigned long,
1108					   1UL << __ffs(domain->pgsize_bitmap),
1109					   new_iova_alignment);
1110	xa_for_each(&iopt->access_list, index, access)
1111		new_iova_alignment = max_t(unsigned long,
1112					   access->iova_alignment,
1113					   new_iova_alignment);
1114
1115	if (new_iova_alignment > iopt->iova_alignment) {
1116		int rc;
1117
1118		rc = iopt_check_iova_alignment(iopt, new_iova_alignment);
1119		if (rc)
1120			return rc;
1121	}
1122	iopt->iova_alignment = new_iova_alignment;
1123	return 0;
1124}
1125
1126void iopt_table_remove_domain(struct io_pagetable *iopt,
1127			      struct iommu_domain *domain)
1128{
1129	struct iommu_domain *iter_domain = NULL;
1130	unsigned long index;
1131
1132	down_write(&iopt->domains_rwsem);
1133	down_write(&iopt->iova_rwsem);
1134
1135	xa_for_each(&iopt->domains, index, iter_domain)
1136		if (iter_domain == domain)
1137			break;
1138	if (WARN_ON(iter_domain != domain) || index >= iopt->next_domain_id)
1139		goto out_unlock;
1140
1141	/*
1142	 * Compress the xarray to keep it linear by swapping the entry to erase
1143	 * with the tail entry and shrinking the tail.
1144	 */
1145	iopt->next_domain_id--;
1146	iter_domain = xa_erase(&iopt->domains, iopt->next_domain_id);
1147	if (index != iopt->next_domain_id)
1148		xa_store(&iopt->domains, index, iter_domain, GFP_KERNEL);
1149
1150	iopt_unfill_domain(iopt, domain);
1151	__iopt_remove_reserved_iova(iopt, domain);
1152
1153	WARN_ON(iopt_calculate_iova_alignment(iopt));
1154out_unlock:
1155	up_write(&iopt->iova_rwsem);
1156	up_write(&iopt->domains_rwsem);
1157}
1158
1159/**
1160 * iopt_area_split - Split an area into two parts at iova
1161 * @area: The area to split
1162 * @iova: Becomes the last of a new area
1163 *
1164 * This splits an area into two. It is part of the VFIO compatibility to allow
1165 * poking a hole in the mapping. The two areas continue to point at the same
1166 * iopt_pages, just with different starting bytes.
1167 */
1168static int iopt_area_split(struct iopt_area *area, unsigned long iova)
1169{
1170	unsigned long alignment = area->iopt->iova_alignment;
1171	unsigned long last_iova = iopt_area_last_iova(area);
1172	unsigned long start_iova = iopt_area_iova(area);
1173	unsigned long new_start = iova + 1;
1174	struct io_pagetable *iopt = area->iopt;
1175	struct iopt_pages *pages = area->pages;
1176	struct iopt_area *lhs;
1177	struct iopt_area *rhs;
1178	int rc;
1179
1180	lockdep_assert_held_write(&iopt->iova_rwsem);
1181
1182	if (iova == start_iova || iova == last_iova)
1183		return 0;
1184
1185	if (!pages || area->prevent_access)
1186		return -EBUSY;
1187
1188	if (new_start & (alignment - 1) ||
1189	    iopt_area_start_byte(area, new_start) & (alignment - 1))
1190		return -EINVAL;
1191
1192	lhs = iopt_area_alloc();
1193	if (!lhs)
1194		return -ENOMEM;
1195
1196	rhs = iopt_area_alloc();
1197	if (!rhs) {
1198		rc = -ENOMEM;
1199		goto err_free_lhs;
1200	}
1201
1202	mutex_lock(&pages->mutex);
1203	/*
1204	 * Splitting is not permitted if an access exists, we don't track enough
1205	 * information to split existing accesses.
1206	 */
1207	if (area->num_accesses) {
1208		rc = -EINVAL;
1209		goto err_unlock;
1210	}
1211
1212	/*
1213	 * Splitting is not permitted if a domain could have been mapped with
1214	 * huge pages.
1215	 */
1216	if (area->storage_domain && !iopt->disable_large_pages) {
1217		rc = -EINVAL;
1218		goto err_unlock;
1219	}
1220
1221	interval_tree_remove(&area->node, &iopt->area_itree);
1222	rc = iopt_insert_area(iopt, lhs, area->pages, start_iova,
1223			      iopt_area_start_byte(area, start_iova),
1224			      (new_start - 1) - start_iova + 1,
1225			      area->iommu_prot);
1226	if (WARN_ON(rc))
1227		goto err_insert;
1228
1229	rc = iopt_insert_area(iopt, rhs, area->pages, new_start,
1230			      iopt_area_start_byte(area, new_start),
1231			      last_iova - new_start + 1, area->iommu_prot);
1232	if (WARN_ON(rc))
1233		goto err_remove_lhs;
1234
1235	/*
1236	 * If the original area has filled a domain, domains_itree has to be
1237	 * updated.
1238	 */
1239	if (area->storage_domain) {
1240		interval_tree_remove(&area->pages_node, &pages->domains_itree);
1241		interval_tree_insert(&lhs->pages_node, &pages->domains_itree);
1242		interval_tree_insert(&rhs->pages_node, &pages->domains_itree);
1243	}
1244
1245	lhs->storage_domain = area->storage_domain;
1246	lhs->pages = area->pages;
1247	rhs->storage_domain = area->storage_domain;
1248	rhs->pages = area->pages;
1249	kref_get(&rhs->pages->kref);
1250	kfree(area);
1251	mutex_unlock(&pages->mutex);
1252
1253	/*
1254	 * No change to domains or accesses because the pages hasn't been
1255	 * changed
1256	 */
1257	return 0;
1258
1259err_remove_lhs:
1260	interval_tree_remove(&lhs->node, &iopt->area_itree);
1261err_insert:
1262	interval_tree_insert(&area->node, &iopt->area_itree);
1263err_unlock:
1264	mutex_unlock(&pages->mutex);
1265	kfree(rhs);
1266err_free_lhs:
1267	kfree(lhs);
1268	return rc;
1269}
1270
1271int iopt_cut_iova(struct io_pagetable *iopt, unsigned long *iovas,
1272		  size_t num_iovas)
1273{
1274	int rc = 0;
1275	int i;
1276
1277	down_write(&iopt->iova_rwsem);
1278	for (i = 0; i < num_iovas; i++) {
1279		struct iopt_area *area;
1280
1281		area = iopt_area_iter_first(iopt, iovas[i], iovas[i]);
1282		if (!area)
1283			continue;
1284		rc = iopt_area_split(area, iovas[i]);
1285		if (rc)
1286			break;
1287	}
1288	up_write(&iopt->iova_rwsem);
1289	return rc;
1290}
1291
1292void iopt_enable_large_pages(struct io_pagetable *iopt)
1293{
1294	int rc;
1295
1296	down_write(&iopt->domains_rwsem);
1297	down_write(&iopt->iova_rwsem);
1298	WRITE_ONCE(iopt->disable_large_pages, false);
1299	rc = iopt_calculate_iova_alignment(iopt);
1300	WARN_ON(rc);
1301	up_write(&iopt->iova_rwsem);
1302	up_write(&iopt->domains_rwsem);
1303}
1304
1305int iopt_disable_large_pages(struct io_pagetable *iopt)
1306{
1307	int rc = 0;
1308
1309	down_write(&iopt->domains_rwsem);
1310	down_write(&iopt->iova_rwsem);
1311	if (iopt->disable_large_pages)
1312		goto out_unlock;
1313
1314	/* Won't do it if domains already have pages mapped in them */
1315	if (!xa_empty(&iopt->domains) &&
1316	    !RB_EMPTY_ROOT(&iopt->area_itree.rb_root)) {
1317		rc = -EINVAL;
1318		goto out_unlock;
1319	}
1320
1321	WRITE_ONCE(iopt->disable_large_pages, true);
1322	rc = iopt_calculate_iova_alignment(iopt);
1323	if (rc)
1324		WRITE_ONCE(iopt->disable_large_pages, false);
1325out_unlock:
1326	up_write(&iopt->iova_rwsem);
1327	up_write(&iopt->domains_rwsem);
1328	return rc;
1329}
1330
1331int iopt_add_access(struct io_pagetable *iopt, struct iommufd_access *access)
1332{
1333	u32 new_id;
1334	int rc;
1335
1336	down_write(&iopt->domains_rwsem);
1337	down_write(&iopt->iova_rwsem);
1338	rc = xa_alloc(&iopt->access_list, &new_id, access, xa_limit_16b,
1339		      GFP_KERNEL_ACCOUNT);
1340
1341	if (rc)
1342		goto out_unlock;
1343
1344	rc = iopt_calculate_iova_alignment(iopt);
1345	if (rc) {
1346		xa_erase(&iopt->access_list, new_id);
1347		goto out_unlock;
1348	}
1349	access->iopt_access_list_id = new_id;
1350
1351out_unlock:
1352	up_write(&iopt->iova_rwsem);
1353	up_write(&iopt->domains_rwsem);
1354	return rc;
1355}
1356
1357void iopt_remove_access(struct io_pagetable *iopt,
1358			struct iommufd_access *access,
1359			u32 iopt_access_list_id)
1360{
1361	down_write(&iopt->domains_rwsem);
1362	down_write(&iopt->iova_rwsem);
1363	WARN_ON(xa_erase(&iopt->access_list, iopt_access_list_id) != access);
1364	WARN_ON(iopt_calculate_iova_alignment(iopt));
1365	up_write(&iopt->iova_rwsem);
1366	up_write(&iopt->domains_rwsem);
1367}
1368
1369/* Narrow the valid_iova_itree to include reserved ranges from a device. */
1370int iopt_table_enforce_dev_resv_regions(struct io_pagetable *iopt,
1371					struct device *dev,
1372					phys_addr_t *sw_msi_start)
1373{
1374	struct iommu_resv_region *resv;
1375	LIST_HEAD(resv_regions);
1376	unsigned int num_hw_msi = 0;
1377	unsigned int num_sw_msi = 0;
1378	int rc;
1379
1380	if (iommufd_should_fail())
1381		return -EINVAL;
1382
1383	down_write(&iopt->iova_rwsem);
1384	/* FIXME: drivers allocate memory but there is no failure propogated */
1385	iommu_get_resv_regions(dev, &resv_regions);
1386
1387	list_for_each_entry(resv, &resv_regions, list) {
1388		if (resv->type == IOMMU_RESV_DIRECT_RELAXABLE)
1389			continue;
1390
1391		if (sw_msi_start && resv->type == IOMMU_RESV_MSI)
1392			num_hw_msi++;
1393		if (sw_msi_start && resv->type == IOMMU_RESV_SW_MSI) {
1394			*sw_msi_start = resv->start;
1395			num_sw_msi++;
1396		}
1397
1398		rc = iopt_reserve_iova(iopt, resv->start,
1399				       resv->length - 1 + resv->start, dev);
1400		if (rc)
1401			goto out_reserved;
1402	}
1403
1404	/* Drivers must offer sane combinations of regions */
1405	if (WARN_ON(num_sw_msi && num_hw_msi) || WARN_ON(num_sw_msi > 1)) {
1406		rc = -EINVAL;
1407		goto out_reserved;
1408	}
1409
1410	rc = 0;
1411	goto out_free_resv;
1412
1413out_reserved:
1414	__iopt_remove_reserved_iova(iopt, dev);
1415out_free_resv:
1416	iommu_put_resv_regions(dev, &resv_regions);
1417	up_write(&iopt->iova_rwsem);
1418	return rc;
1419}
1420