1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Copyright (c) 2022, Oracle and/or its affiliates.
4 * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved
5 */
6#include <linux/iova_bitmap.h>
7#include <linux/mm.h>
8#include <linux/slab.h>
9#include <linux/highmem.h>
10
11#define BITS_PER_PAGE (PAGE_SIZE * BITS_PER_BYTE)
12
13/*
14 * struct iova_bitmap_map - A bitmap representing an IOVA range
15 *
16 * Main data structure for tracking mapped user pages of bitmap data.
17 *
18 * For example, for something recording dirty IOVAs, it will be provided a
19 * struct iova_bitmap structure, as a general structure for iterating the
20 * total IOVA range. The struct iova_bitmap_map, though, represents the
21 * subset of said IOVA space that is pinned by its parent structure (struct
22 * iova_bitmap).
23 *
24 * The user does not need to exact location of the bits in the bitmap.
25 * From user perspective the only API available is iova_bitmap_set() which
26 * records the IOVA *range* in the bitmap by setting the corresponding
27 * bits.
28 *
29 * The bitmap is an array of u64 whereas each bit represents an IOVA of
30 * range of (1 << pgshift). Thus formula for the bitmap data to be set is:
31 *
32 *   data[(iova / page_size) / 64] & (1ULL << (iova % 64))
33 */
34struct iova_bitmap_map {
35	/* base IOVA representing bit 0 of the first page */
36	unsigned long iova;
37
38	/* page size order that each bit granules to */
39	unsigned long pgshift;
40
41	/* page offset of the first user page pinned */
42	unsigned long pgoff;
43
44	/* number of pages pinned */
45	unsigned long npages;
46
47	/* pinned pages representing the bitmap data */
48	struct page **pages;
49};
50
51/*
52 * struct iova_bitmap - The IOVA bitmap object
53 *
54 * Main data structure for iterating over the bitmap data.
55 *
56 * Abstracts the pinning work and iterates in IOVA ranges.
57 * It uses a windowing scheme and pins the bitmap in relatively
58 * big ranges e.g.
59 *
60 * The bitmap object uses one base page to store all the pinned pages
61 * pointers related to the bitmap. For sizeof(struct page*) == 8 it stores
62 * 512 struct page pointers which, if the base page size is 4K, it means
63 * 2M of bitmap data is pinned at a time. If the iova_bitmap page size is
64 * also 4K then the range window to iterate is 64G.
65 *
66 * For example iterating on a total IOVA range of 4G..128G, it will walk
67 * through this set of ranges:
68 *
69 *    4G  -  68G-1 (64G)
70 *    68G - 128G-1 (64G)
71 *
72 * An example of the APIs on how to use/iterate over the IOVA bitmap:
73 *
74 *   bitmap = iova_bitmap_alloc(iova, length, page_size, data);
75 *   if (IS_ERR(bitmap))
76 *       return PTR_ERR(bitmap);
77 *
78 *   ret = iova_bitmap_for_each(bitmap, arg, dirty_reporter_fn);
79 *
80 *   iova_bitmap_free(bitmap);
81 *
82 * Each iteration of the @dirty_reporter_fn is called with a unique @iova
83 * and @length argument, indicating the current range available through the
84 * iova_bitmap. The @dirty_reporter_fn uses iova_bitmap_set() to mark dirty
85 * areas (@iova_length) within that provided range, as following:
86 *
87 *   iova_bitmap_set(bitmap, iova, iova_length);
88 *
89 * The internals of the object uses an index @mapped_base_index that indexes
90 * which u64 word of the bitmap is mapped, up to @mapped_total_index.
91 * Those keep being incremented until @mapped_total_index is reached while
92 * mapping up to PAGE_SIZE / sizeof(struct page*) maximum of pages.
93 *
94 * The IOVA bitmap is usually located on what tracks DMA mapped ranges or
95 * some form of IOVA range tracking that co-relates to the user passed
96 * bitmap.
97 */
98struct iova_bitmap {
99	/* IOVA range representing the currently mapped bitmap data */
100	struct iova_bitmap_map mapped;
101
102	/* userspace address of the bitmap */
103	u8 __user *bitmap;
104
105	/* u64 index that @mapped points to */
106	unsigned long mapped_base_index;
107
108	/* how many u64 can we walk in total */
109	unsigned long mapped_total_index;
110
111	/* base IOVA of the whole bitmap */
112	unsigned long iova;
113
114	/* length of the IOVA range for the whole bitmap */
115	size_t length;
116
117	/* length of the IOVA range set ahead the pinned pages */
118	unsigned long set_ahead_length;
119};
120
121/*
122 * Converts a relative IOVA to a bitmap index.
123 * This function provides the index into the u64 array (bitmap::bitmap)
124 * for a given IOVA offset.
125 * Relative IOVA means relative to the bitmap::mapped base IOVA
126 * (stored in mapped::iova). All computations in this file are done using
127 * relative IOVAs and thus avoid an extra subtraction against mapped::iova.
128 * The user API iova_bitmap_set() always uses a regular absolute IOVAs.
129 */
130static unsigned long iova_bitmap_offset_to_index(struct iova_bitmap *bitmap,
131						 unsigned long iova)
132{
133	unsigned long pgsize = 1 << bitmap->mapped.pgshift;
134
135	return iova / (BITS_PER_TYPE(*bitmap->bitmap) * pgsize);
136}
137
138/*
139 * Converts a bitmap index to a *relative* IOVA.
140 */
141static unsigned long iova_bitmap_index_to_offset(struct iova_bitmap *bitmap,
142						 unsigned long index)
143{
144	unsigned long pgshift = bitmap->mapped.pgshift;
145
146	return (index * BITS_PER_TYPE(*bitmap->bitmap)) << pgshift;
147}
148
149/*
150 * Returns the base IOVA of the mapped range.
151 */
152static unsigned long iova_bitmap_mapped_iova(struct iova_bitmap *bitmap)
153{
154	unsigned long skip = bitmap->mapped_base_index;
155
156	return bitmap->iova + iova_bitmap_index_to_offset(bitmap, skip);
157}
158
159/*
160 * Pins the bitmap user pages for the current range window.
161 * This is internal to IOVA bitmap and called when advancing the
162 * index (@mapped_base_index) or allocating the bitmap.
163 */
164static int iova_bitmap_get(struct iova_bitmap *bitmap)
165{
166	struct iova_bitmap_map *mapped = &bitmap->mapped;
167	unsigned long npages;
168	u8 __user *addr;
169	long ret;
170
171	/*
172	 * @mapped_base_index is the index of the currently mapped u64 words
173	 * that we have access. Anything before @mapped_base_index is not
174	 * mapped. The range @mapped_base_index .. @mapped_total_index-1 is
175	 * mapped but capped at a maximum number of pages.
176	 */
177	npages = DIV_ROUND_UP((bitmap->mapped_total_index -
178			       bitmap->mapped_base_index) *
179			       sizeof(*bitmap->bitmap), PAGE_SIZE);
180
181	/*
182	 * Bitmap address to be pinned is calculated via pointer arithmetic
183	 * with bitmap u64 word index.
184	 */
185	addr = bitmap->bitmap + bitmap->mapped_base_index;
186
187	/*
188	 * We always cap at max number of 'struct page' a base page can fit.
189	 * This is, for example, on x86 means 2M of bitmap data max.
190	 */
191	npages = min(npages + !!offset_in_page(addr),
192		     PAGE_SIZE / sizeof(struct page *));
193
194	ret = pin_user_pages_fast((unsigned long)addr, npages,
195				  FOLL_WRITE, mapped->pages);
196	if (ret <= 0)
197		return -EFAULT;
198
199	mapped->npages = (unsigned long)ret;
200	/* Base IOVA where @pages point to i.e. bit 0 of the first page */
201	mapped->iova = iova_bitmap_mapped_iova(bitmap);
202
203	/*
204	 * offset of the page where pinned pages bit 0 is located.
205	 * This handles the case where the bitmap is not PAGE_SIZE
206	 * aligned.
207	 */
208	mapped->pgoff = offset_in_page(addr);
209	return 0;
210}
211
212/*
213 * Unpins the bitmap user pages and clears @npages
214 * (un)pinning is abstracted from API user and it's done when advancing
215 * the index or freeing the bitmap.
216 */
217static void iova_bitmap_put(struct iova_bitmap *bitmap)
218{
219	struct iova_bitmap_map *mapped = &bitmap->mapped;
220
221	if (mapped->npages) {
222		unpin_user_pages(mapped->pages, mapped->npages);
223		mapped->npages = 0;
224	}
225}
226
227/**
228 * iova_bitmap_alloc() - Allocates an IOVA bitmap object
229 * @iova: Start address of the IOVA range
230 * @length: Length of the IOVA range
231 * @page_size: Page size of the IOVA bitmap. It defines what each bit
232 *             granularity represents
233 * @data: Userspace address of the bitmap
234 *
235 * Allocates an IOVA object and initializes all its fields including the
236 * first user pages of @data.
237 *
238 * Return: A pointer to a newly allocated struct iova_bitmap
239 * or ERR_PTR() on error.
240 */
241struct iova_bitmap *iova_bitmap_alloc(unsigned long iova, size_t length,
242				      unsigned long page_size, u64 __user *data)
243{
244	struct iova_bitmap_map *mapped;
245	struct iova_bitmap *bitmap;
246	int rc;
247
248	bitmap = kzalloc(sizeof(*bitmap), GFP_KERNEL);
249	if (!bitmap)
250		return ERR_PTR(-ENOMEM);
251
252	mapped = &bitmap->mapped;
253	mapped->pgshift = __ffs(page_size);
254	bitmap->bitmap = (u8 __user *)data;
255	bitmap->mapped_total_index =
256		iova_bitmap_offset_to_index(bitmap, length - 1) + 1;
257	bitmap->iova = iova;
258	bitmap->length = length;
259	mapped->iova = iova;
260	mapped->pages = (struct page **)__get_free_page(GFP_KERNEL);
261	if (!mapped->pages) {
262		rc = -ENOMEM;
263		goto err;
264	}
265
266	rc = iova_bitmap_get(bitmap);
267	if (rc)
268		goto err;
269	return bitmap;
270
271err:
272	iova_bitmap_free(bitmap);
273	return ERR_PTR(rc);
274}
275EXPORT_SYMBOL_NS_GPL(iova_bitmap_alloc, IOMMUFD);
276
277/**
278 * iova_bitmap_free() - Frees an IOVA bitmap object
279 * @bitmap: IOVA bitmap to free
280 *
281 * It unpins and releases pages array memory and clears any leftover
282 * state.
283 */
284void iova_bitmap_free(struct iova_bitmap *bitmap)
285{
286	struct iova_bitmap_map *mapped = &bitmap->mapped;
287
288	iova_bitmap_put(bitmap);
289
290	if (mapped->pages) {
291		free_page((unsigned long)mapped->pages);
292		mapped->pages = NULL;
293	}
294
295	kfree(bitmap);
296}
297EXPORT_SYMBOL_NS_GPL(iova_bitmap_free, IOMMUFD);
298
299/*
300 * Returns the remaining bitmap indexes from mapped_total_index to process for
301 * the currently pinned bitmap pages.
302 */
303static unsigned long iova_bitmap_mapped_remaining(struct iova_bitmap *bitmap)
304{
305	unsigned long remaining, bytes;
306
307	bytes = (bitmap->mapped.npages << PAGE_SHIFT) - bitmap->mapped.pgoff;
308
309	remaining = bitmap->mapped_total_index - bitmap->mapped_base_index;
310	remaining = min_t(unsigned long, remaining,
311			  DIV_ROUND_UP(bytes, sizeof(*bitmap->bitmap)));
312
313	return remaining;
314}
315
316/*
317 * Returns the length of the mapped IOVA range.
318 */
319static unsigned long iova_bitmap_mapped_length(struct iova_bitmap *bitmap)
320{
321	unsigned long max_iova = bitmap->iova + bitmap->length - 1;
322	unsigned long iova = iova_bitmap_mapped_iova(bitmap);
323	unsigned long remaining;
324
325	/*
326	 * iova_bitmap_mapped_remaining() returns a number of indexes which
327	 * when converted to IOVA gives us a max length that the bitmap
328	 * pinned data can cover. Afterwards, that is capped to
329	 * only cover the IOVA range in @bitmap::iova .. @bitmap::length.
330	 */
331	remaining = iova_bitmap_index_to_offset(bitmap,
332			iova_bitmap_mapped_remaining(bitmap));
333
334	if (iova + remaining - 1 > max_iova)
335		remaining -= ((iova + remaining - 1) - max_iova);
336
337	return remaining;
338}
339
340/*
341 * Returns true if there's not more data to iterate.
342 */
343static bool iova_bitmap_done(struct iova_bitmap *bitmap)
344{
345	return bitmap->mapped_base_index >= bitmap->mapped_total_index;
346}
347
348static int iova_bitmap_set_ahead(struct iova_bitmap *bitmap,
349				 size_t set_ahead_length)
350{
351	int ret = 0;
352
353	while (set_ahead_length > 0 && !iova_bitmap_done(bitmap)) {
354		unsigned long length = iova_bitmap_mapped_length(bitmap);
355		unsigned long iova = iova_bitmap_mapped_iova(bitmap);
356
357		ret = iova_bitmap_get(bitmap);
358		if (ret)
359			break;
360
361		length = min(length, set_ahead_length);
362		iova_bitmap_set(bitmap, iova, length);
363
364		set_ahead_length -= length;
365		bitmap->mapped_base_index +=
366			iova_bitmap_offset_to_index(bitmap, length - 1) + 1;
367		iova_bitmap_put(bitmap);
368	}
369
370	bitmap->set_ahead_length = 0;
371	return ret;
372}
373
374/*
375 * Advances to the next range, releases the current pinned
376 * pages and pins the next set of bitmap pages.
377 * Returns 0 on success or otherwise errno.
378 */
379static int iova_bitmap_advance(struct iova_bitmap *bitmap)
380{
381	unsigned long iova = iova_bitmap_mapped_length(bitmap) - 1;
382	unsigned long count = iova_bitmap_offset_to_index(bitmap, iova) + 1;
383
384	bitmap->mapped_base_index += count;
385
386	iova_bitmap_put(bitmap);
387	if (iova_bitmap_done(bitmap))
388		return 0;
389
390	/* Iterate, set and skip any bits requested for next iteration */
391	if (bitmap->set_ahead_length) {
392		int ret;
393
394		ret = iova_bitmap_set_ahead(bitmap, bitmap->set_ahead_length);
395		if (ret)
396			return ret;
397	}
398
399	/* When advancing the index we pin the next set of bitmap pages */
400	return iova_bitmap_get(bitmap);
401}
402
403/**
404 * iova_bitmap_for_each() - Iterates over the bitmap
405 * @bitmap: IOVA bitmap to iterate
406 * @opaque: Additional argument to pass to the callback
407 * @fn: Function that gets called for each IOVA range
408 *
409 * Helper function to iterate over bitmap data representing a portion of IOVA
410 * space. It hides the complexity of iterating bitmaps and translating the
411 * mapped bitmap user pages into IOVA ranges to process.
412 *
413 * Return: 0 on success, and an error on failure either upon
414 * iteration or when the callback returns an error.
415 */
416int iova_bitmap_for_each(struct iova_bitmap *bitmap, void *opaque,
417			 iova_bitmap_fn_t fn)
418{
419	int ret = 0;
420
421	for (; !iova_bitmap_done(bitmap) && !ret;
422	     ret = iova_bitmap_advance(bitmap)) {
423		ret = fn(bitmap, iova_bitmap_mapped_iova(bitmap),
424			 iova_bitmap_mapped_length(bitmap), opaque);
425		if (ret)
426			break;
427	}
428
429	return ret;
430}
431EXPORT_SYMBOL_NS_GPL(iova_bitmap_for_each, IOMMUFD);
432
433/**
434 * iova_bitmap_set() - Records an IOVA range in bitmap
435 * @bitmap: IOVA bitmap
436 * @iova: IOVA to start
437 * @length: IOVA range length
438 *
439 * Set the bits corresponding to the range [iova .. iova+length-1] in
440 * the user bitmap.
441 *
442 */
443void iova_bitmap_set(struct iova_bitmap *bitmap,
444		     unsigned long iova, size_t length)
445{
446	struct iova_bitmap_map *mapped = &bitmap->mapped;
447	unsigned long cur_bit = ((iova - mapped->iova) >>
448			mapped->pgshift) + mapped->pgoff * BITS_PER_BYTE;
449	unsigned long last_bit = (((iova + length - 1) - mapped->iova) >>
450			mapped->pgshift) + mapped->pgoff * BITS_PER_BYTE;
451	unsigned long last_page_idx = mapped->npages - 1;
452
453	do {
454		unsigned int page_idx = cur_bit / BITS_PER_PAGE;
455		unsigned int offset = cur_bit % BITS_PER_PAGE;
456		unsigned int nbits = min(BITS_PER_PAGE - offset,
457					 last_bit - cur_bit + 1);
458		void *kaddr;
459
460		if (unlikely(page_idx > last_page_idx))
461			break;
462
463		kaddr = kmap_local_page(mapped->pages[page_idx]);
464		bitmap_set(kaddr, offset, nbits);
465		kunmap_local(kaddr);
466		cur_bit += nbits;
467	} while (cur_bit <= last_bit);
468
469	if (unlikely(cur_bit <= last_bit)) {
470		bitmap->set_ahead_length =
471			((last_bit - cur_bit + 1) << bitmap->mapped.pgshift);
472	}
473}
474EXPORT_SYMBOL_NS_GPL(iova_bitmap_set, IOMMUFD);
475