1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * MMU-based software IOTLB.
4 *
5 * Copyright (C) 2020-2021 Bytedance Inc. and/or its affiliates. All rights reserved.
6 *
7 * Author: Xie Yongji <xieyongji@bytedance.com>
8 *
9 */
10
11#include <linux/slab.h>
12#include <linux/file.h>
13#include <linux/anon_inodes.h>
14#include <linux/highmem.h>
15#include <linux/vmalloc.h>
16#include <linux/vdpa.h>
17
18#include "iova_domain.h"
19
20static int vduse_iotlb_add_range(struct vduse_iova_domain *domain,
21				 u64 start, u64 last,
22				 u64 addr, unsigned int perm,
23				 struct file *file, u64 offset)
24{
25	struct vdpa_map_file *map_file;
26	int ret;
27
28	map_file = kmalloc(sizeof(*map_file), GFP_ATOMIC);
29	if (!map_file)
30		return -ENOMEM;
31
32	map_file->file = get_file(file);
33	map_file->offset = offset;
34
35	ret = vhost_iotlb_add_range_ctx(domain->iotlb, start, last,
36					addr, perm, map_file);
37	if (ret) {
38		fput(map_file->file);
39		kfree(map_file);
40		return ret;
41	}
42	return 0;
43}
44
45static void vduse_iotlb_del_range(struct vduse_iova_domain *domain,
46				  u64 start, u64 last)
47{
48	struct vdpa_map_file *map_file;
49	struct vhost_iotlb_map *map;
50
51	while ((map = vhost_iotlb_itree_first(domain->iotlb, start, last))) {
52		map_file = (struct vdpa_map_file *)map->opaque;
53		fput(map_file->file);
54		kfree(map_file);
55		vhost_iotlb_map_free(domain->iotlb, map);
56	}
57}
58
59int vduse_domain_set_map(struct vduse_iova_domain *domain,
60			 struct vhost_iotlb *iotlb)
61{
62	struct vdpa_map_file *map_file;
63	struct vhost_iotlb_map *map;
64	u64 start = 0ULL, last = ULLONG_MAX;
65	int ret;
66
67	spin_lock(&domain->iotlb_lock);
68	vduse_iotlb_del_range(domain, start, last);
69
70	for (map = vhost_iotlb_itree_first(iotlb, start, last); map;
71	     map = vhost_iotlb_itree_next(map, start, last)) {
72		map_file = (struct vdpa_map_file *)map->opaque;
73		ret = vduse_iotlb_add_range(domain, map->start, map->last,
74					    map->addr, map->perm,
75					    map_file->file,
76					    map_file->offset);
77		if (ret)
78			goto err;
79	}
80	spin_unlock(&domain->iotlb_lock);
81
82	return 0;
83err:
84	vduse_iotlb_del_range(domain, start, last);
85	spin_unlock(&domain->iotlb_lock);
86	return ret;
87}
88
89void vduse_domain_clear_map(struct vduse_iova_domain *domain,
90			    struct vhost_iotlb *iotlb)
91{
92	struct vhost_iotlb_map *map;
93	u64 start = 0ULL, last = ULLONG_MAX;
94
95	spin_lock(&domain->iotlb_lock);
96	for (map = vhost_iotlb_itree_first(iotlb, start, last); map;
97	     map = vhost_iotlb_itree_next(map, start, last)) {
98		vduse_iotlb_del_range(domain, map->start, map->last);
99	}
100	spin_unlock(&domain->iotlb_lock);
101}
102
103static int vduse_domain_map_bounce_page(struct vduse_iova_domain *domain,
104					 u64 iova, u64 size, u64 paddr)
105{
106	struct vduse_bounce_map *map;
107	u64 last = iova + size - 1;
108
109	while (iova <= last) {
110		map = &domain->bounce_maps[iova >> PAGE_SHIFT];
111		if (!map->bounce_page) {
112			map->bounce_page = alloc_page(GFP_ATOMIC);
113			if (!map->bounce_page)
114				return -ENOMEM;
115		}
116		map->orig_phys = paddr;
117		paddr += PAGE_SIZE;
118		iova += PAGE_SIZE;
119	}
120	return 0;
121}
122
123static void vduse_domain_unmap_bounce_page(struct vduse_iova_domain *domain,
124					   u64 iova, u64 size)
125{
126	struct vduse_bounce_map *map;
127	u64 last = iova + size - 1;
128
129	while (iova <= last) {
130		map = &domain->bounce_maps[iova >> PAGE_SHIFT];
131		map->orig_phys = INVALID_PHYS_ADDR;
132		iova += PAGE_SIZE;
133	}
134}
135
136static void do_bounce(phys_addr_t orig, void *addr, size_t size,
137		      enum dma_data_direction dir)
138{
139	unsigned long pfn = PFN_DOWN(orig);
140	unsigned int offset = offset_in_page(orig);
141	struct page *page;
142	unsigned int sz = 0;
143
144	while (size) {
145		sz = min_t(size_t, PAGE_SIZE - offset, size);
146
147		page = pfn_to_page(pfn);
148		if (dir == DMA_TO_DEVICE)
149			memcpy_from_page(addr, page, offset, sz);
150		else
151			memcpy_to_page(page, offset, addr, sz);
152
153		size -= sz;
154		pfn++;
155		addr += sz;
156		offset = 0;
157	}
158}
159
160static void vduse_domain_bounce(struct vduse_iova_domain *domain,
161				dma_addr_t iova, size_t size,
162				enum dma_data_direction dir)
163{
164	struct vduse_bounce_map *map;
165	unsigned int offset;
166	void *addr;
167	size_t sz;
168
169	if (iova >= domain->bounce_size)
170		return;
171
172	while (size) {
173		map = &domain->bounce_maps[iova >> PAGE_SHIFT];
174		offset = offset_in_page(iova);
175		sz = min_t(size_t, PAGE_SIZE - offset, size);
176
177		if (WARN_ON(!map->bounce_page ||
178			    map->orig_phys == INVALID_PHYS_ADDR))
179			return;
180
181		addr = kmap_local_page(map->bounce_page);
182		do_bounce(map->orig_phys + offset, addr + offset, sz, dir);
183		kunmap_local(addr);
184		size -= sz;
185		iova += sz;
186	}
187}
188
189static struct page *
190vduse_domain_get_coherent_page(struct vduse_iova_domain *domain, u64 iova)
191{
192	u64 start = iova & PAGE_MASK;
193	u64 last = start + PAGE_SIZE - 1;
194	struct vhost_iotlb_map *map;
195	struct page *page = NULL;
196
197	spin_lock(&domain->iotlb_lock);
198	map = vhost_iotlb_itree_first(domain->iotlb, start, last);
199	if (!map)
200		goto out;
201
202	page = pfn_to_page((map->addr + iova - map->start) >> PAGE_SHIFT);
203	get_page(page);
204out:
205	spin_unlock(&domain->iotlb_lock);
206
207	return page;
208}
209
210static struct page *
211vduse_domain_get_bounce_page(struct vduse_iova_domain *domain, u64 iova)
212{
213	struct vduse_bounce_map *map;
214	struct page *page = NULL;
215
216	read_lock(&domain->bounce_lock);
217	map = &domain->bounce_maps[iova >> PAGE_SHIFT];
218	if (domain->user_bounce_pages || !map->bounce_page)
219		goto out;
220
221	page = map->bounce_page;
222	get_page(page);
223out:
224	read_unlock(&domain->bounce_lock);
225
226	return page;
227}
228
229static void
230vduse_domain_free_kernel_bounce_pages(struct vduse_iova_domain *domain)
231{
232	struct vduse_bounce_map *map;
233	unsigned long pfn, bounce_pfns;
234
235	bounce_pfns = domain->bounce_size >> PAGE_SHIFT;
236
237	for (pfn = 0; pfn < bounce_pfns; pfn++) {
238		map = &domain->bounce_maps[pfn];
239		if (WARN_ON(map->orig_phys != INVALID_PHYS_ADDR))
240			continue;
241
242		if (!map->bounce_page)
243			continue;
244
245		__free_page(map->bounce_page);
246		map->bounce_page = NULL;
247	}
248}
249
250int vduse_domain_add_user_bounce_pages(struct vduse_iova_domain *domain,
251				       struct page **pages, int count)
252{
253	struct vduse_bounce_map *map;
254	int i, ret;
255
256	/* Now we don't support partial mapping */
257	if (count != (domain->bounce_size >> PAGE_SHIFT))
258		return -EINVAL;
259
260	write_lock(&domain->bounce_lock);
261	ret = -EEXIST;
262	if (domain->user_bounce_pages)
263		goto out;
264
265	for (i = 0; i < count; i++) {
266		map = &domain->bounce_maps[i];
267		if (map->bounce_page) {
268			/* Copy kernel page to user page if it's in use */
269			if (map->orig_phys != INVALID_PHYS_ADDR)
270				memcpy_to_page(pages[i], 0,
271					       page_address(map->bounce_page),
272					       PAGE_SIZE);
273			__free_page(map->bounce_page);
274		}
275		map->bounce_page = pages[i];
276		get_page(pages[i]);
277	}
278	domain->user_bounce_pages = true;
279	ret = 0;
280out:
281	write_unlock(&domain->bounce_lock);
282
283	return ret;
284}
285
286void vduse_domain_remove_user_bounce_pages(struct vduse_iova_domain *domain)
287{
288	struct vduse_bounce_map *map;
289	unsigned long i, count;
290
291	write_lock(&domain->bounce_lock);
292	if (!domain->user_bounce_pages)
293		goto out;
294
295	count = domain->bounce_size >> PAGE_SHIFT;
296	for (i = 0; i < count; i++) {
297		struct page *page = NULL;
298
299		map = &domain->bounce_maps[i];
300		if (WARN_ON(!map->bounce_page))
301			continue;
302
303		/* Copy user page to kernel page if it's in use */
304		if (map->orig_phys != INVALID_PHYS_ADDR) {
305			page = alloc_page(GFP_ATOMIC | __GFP_NOFAIL);
306			memcpy_from_page(page_address(page),
307					 map->bounce_page, 0, PAGE_SIZE);
308		}
309		put_page(map->bounce_page);
310		map->bounce_page = page;
311	}
312	domain->user_bounce_pages = false;
313out:
314	write_unlock(&domain->bounce_lock);
315}
316
317void vduse_domain_reset_bounce_map(struct vduse_iova_domain *domain)
318{
319	if (!domain->bounce_map)
320		return;
321
322	spin_lock(&domain->iotlb_lock);
323	if (!domain->bounce_map)
324		goto unlock;
325
326	vduse_iotlb_del_range(domain, 0, domain->bounce_size - 1);
327	domain->bounce_map = 0;
328unlock:
329	spin_unlock(&domain->iotlb_lock);
330}
331
332static int vduse_domain_init_bounce_map(struct vduse_iova_domain *domain)
333{
334	int ret = 0;
335
336	if (domain->bounce_map)
337		return 0;
338
339	spin_lock(&domain->iotlb_lock);
340	if (domain->bounce_map)
341		goto unlock;
342
343	ret = vduse_iotlb_add_range(domain, 0, domain->bounce_size - 1,
344				    0, VHOST_MAP_RW, domain->file, 0);
345	if (ret)
346		goto unlock;
347
348	domain->bounce_map = 1;
349unlock:
350	spin_unlock(&domain->iotlb_lock);
351	return ret;
352}
353
354static dma_addr_t
355vduse_domain_alloc_iova(struct iova_domain *iovad,
356			unsigned long size, unsigned long limit)
357{
358	unsigned long shift = iova_shift(iovad);
359	unsigned long iova_len = iova_align(iovad, size) >> shift;
360	unsigned long iova_pfn;
361
362	iova_pfn = alloc_iova_fast(iovad, iova_len, limit >> shift, true);
363
364	return (dma_addr_t)iova_pfn << shift;
365}
366
367static void vduse_domain_free_iova(struct iova_domain *iovad,
368				   dma_addr_t iova, size_t size)
369{
370	unsigned long shift = iova_shift(iovad);
371	unsigned long iova_len = iova_align(iovad, size) >> shift;
372
373	free_iova_fast(iovad, iova >> shift, iova_len);
374}
375
376void vduse_domain_sync_single_for_device(struct vduse_iova_domain *domain,
377				      dma_addr_t dma_addr, size_t size,
378				      enum dma_data_direction dir)
379{
380	read_lock(&domain->bounce_lock);
381	if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)
382		vduse_domain_bounce(domain, dma_addr, size, DMA_TO_DEVICE);
383	read_unlock(&domain->bounce_lock);
384}
385
386void vduse_domain_sync_single_for_cpu(struct vduse_iova_domain *domain,
387				      dma_addr_t dma_addr, size_t size,
388				      enum dma_data_direction dir)
389{
390	read_lock(&domain->bounce_lock);
391	if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
392		vduse_domain_bounce(domain, dma_addr, size, DMA_FROM_DEVICE);
393	read_unlock(&domain->bounce_lock);
394}
395
396dma_addr_t vduse_domain_map_page(struct vduse_iova_domain *domain,
397				 struct page *page, unsigned long offset,
398				 size_t size, enum dma_data_direction dir,
399				 unsigned long attrs)
400{
401	struct iova_domain *iovad = &domain->stream_iovad;
402	unsigned long limit = domain->bounce_size - 1;
403	phys_addr_t pa = page_to_phys(page) + offset;
404	dma_addr_t iova = vduse_domain_alloc_iova(iovad, size, limit);
405
406	if (!iova)
407		return DMA_MAPPING_ERROR;
408
409	if (vduse_domain_init_bounce_map(domain))
410		goto err;
411
412	read_lock(&domain->bounce_lock);
413	if (vduse_domain_map_bounce_page(domain, (u64)iova, (u64)size, pa))
414		goto err_unlock;
415
416	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
417	    (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL))
418		vduse_domain_bounce(domain, iova, size, DMA_TO_DEVICE);
419
420	read_unlock(&domain->bounce_lock);
421
422	return iova;
423err_unlock:
424	read_unlock(&domain->bounce_lock);
425err:
426	vduse_domain_free_iova(iovad, iova, size);
427	return DMA_MAPPING_ERROR;
428}
429
430void vduse_domain_unmap_page(struct vduse_iova_domain *domain,
431			     dma_addr_t dma_addr, size_t size,
432			     enum dma_data_direction dir, unsigned long attrs)
433{
434	struct iova_domain *iovad = &domain->stream_iovad;
435	read_lock(&domain->bounce_lock);
436	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
437	    (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL))
438		vduse_domain_bounce(domain, dma_addr, size, DMA_FROM_DEVICE);
439
440	vduse_domain_unmap_bounce_page(domain, (u64)dma_addr, (u64)size);
441	read_unlock(&domain->bounce_lock);
442	vduse_domain_free_iova(iovad, dma_addr, size);
443}
444
445void *vduse_domain_alloc_coherent(struct vduse_iova_domain *domain,
446				  size_t size, dma_addr_t *dma_addr,
447				  gfp_t flag, unsigned long attrs)
448{
449	struct iova_domain *iovad = &domain->consistent_iovad;
450	unsigned long limit = domain->iova_limit;
451	dma_addr_t iova = vduse_domain_alloc_iova(iovad, size, limit);
452	void *orig = alloc_pages_exact(size, flag);
453
454	if (!iova || !orig)
455		goto err;
456
457	spin_lock(&domain->iotlb_lock);
458	if (vduse_iotlb_add_range(domain, (u64)iova, (u64)iova + size - 1,
459				  virt_to_phys(orig), VHOST_MAP_RW,
460				  domain->file, (u64)iova)) {
461		spin_unlock(&domain->iotlb_lock);
462		goto err;
463	}
464	spin_unlock(&domain->iotlb_lock);
465
466	*dma_addr = iova;
467
468	return orig;
469err:
470	*dma_addr = DMA_MAPPING_ERROR;
471	if (orig)
472		free_pages_exact(orig, size);
473	if (iova)
474		vduse_domain_free_iova(iovad, iova, size);
475
476	return NULL;
477}
478
479void vduse_domain_free_coherent(struct vduse_iova_domain *domain, size_t size,
480				void *vaddr, dma_addr_t dma_addr,
481				unsigned long attrs)
482{
483	struct iova_domain *iovad = &domain->consistent_iovad;
484	struct vhost_iotlb_map *map;
485	struct vdpa_map_file *map_file;
486	phys_addr_t pa;
487
488	spin_lock(&domain->iotlb_lock);
489	map = vhost_iotlb_itree_first(domain->iotlb, (u64)dma_addr,
490				      (u64)dma_addr + size - 1);
491	if (WARN_ON(!map)) {
492		spin_unlock(&domain->iotlb_lock);
493		return;
494	}
495	map_file = (struct vdpa_map_file *)map->opaque;
496	fput(map_file->file);
497	kfree(map_file);
498	pa = map->addr;
499	vhost_iotlb_map_free(domain->iotlb, map);
500	spin_unlock(&domain->iotlb_lock);
501
502	vduse_domain_free_iova(iovad, dma_addr, size);
503	free_pages_exact(phys_to_virt(pa), size);
504}
505
506static vm_fault_t vduse_domain_mmap_fault(struct vm_fault *vmf)
507{
508	struct vduse_iova_domain *domain = vmf->vma->vm_private_data;
509	unsigned long iova = vmf->pgoff << PAGE_SHIFT;
510	struct page *page;
511
512	if (!domain)
513		return VM_FAULT_SIGBUS;
514
515	if (iova < domain->bounce_size)
516		page = vduse_domain_get_bounce_page(domain, iova);
517	else
518		page = vduse_domain_get_coherent_page(domain, iova);
519
520	if (!page)
521		return VM_FAULT_SIGBUS;
522
523	vmf->page = page;
524
525	return 0;
526}
527
528static const struct vm_operations_struct vduse_domain_mmap_ops = {
529	.fault = vduse_domain_mmap_fault,
530};
531
532static int vduse_domain_mmap(struct file *file, struct vm_area_struct *vma)
533{
534	struct vduse_iova_domain *domain = file->private_data;
535
536	vm_flags_set(vma, VM_DONTDUMP | VM_DONTEXPAND);
537	vma->vm_private_data = domain;
538	vma->vm_ops = &vduse_domain_mmap_ops;
539
540	return 0;
541}
542
543static int vduse_domain_release(struct inode *inode, struct file *file)
544{
545	struct vduse_iova_domain *domain = file->private_data;
546
547	spin_lock(&domain->iotlb_lock);
548	vduse_iotlb_del_range(domain, 0, ULLONG_MAX);
549	vduse_domain_remove_user_bounce_pages(domain);
550	vduse_domain_free_kernel_bounce_pages(domain);
551	spin_unlock(&domain->iotlb_lock);
552	put_iova_domain(&domain->stream_iovad);
553	put_iova_domain(&domain->consistent_iovad);
554	vhost_iotlb_free(domain->iotlb);
555	vfree(domain->bounce_maps);
556	kfree(domain);
557
558	return 0;
559}
560
561static const struct file_operations vduse_domain_fops = {
562	.owner = THIS_MODULE,
563	.mmap = vduse_domain_mmap,
564	.release = vduse_domain_release,
565};
566
567void vduse_domain_destroy(struct vduse_iova_domain *domain)
568{
569	fput(domain->file);
570}
571
572struct vduse_iova_domain *
573vduse_domain_create(unsigned long iova_limit, size_t bounce_size)
574{
575	struct vduse_iova_domain *domain;
576	struct file *file;
577	struct vduse_bounce_map *map;
578	unsigned long pfn, bounce_pfns;
579	int ret;
580
581	bounce_pfns = PAGE_ALIGN(bounce_size) >> PAGE_SHIFT;
582	if (iova_limit <= bounce_size)
583		return NULL;
584
585	domain = kzalloc(sizeof(*domain), GFP_KERNEL);
586	if (!domain)
587		return NULL;
588
589	domain->iotlb = vhost_iotlb_alloc(0, 0);
590	if (!domain->iotlb)
591		goto err_iotlb;
592
593	domain->iova_limit = iova_limit;
594	domain->bounce_size = PAGE_ALIGN(bounce_size);
595	domain->bounce_maps = vzalloc(bounce_pfns *
596				sizeof(struct vduse_bounce_map));
597	if (!domain->bounce_maps)
598		goto err_map;
599
600	for (pfn = 0; pfn < bounce_pfns; pfn++) {
601		map = &domain->bounce_maps[pfn];
602		map->orig_phys = INVALID_PHYS_ADDR;
603	}
604	file = anon_inode_getfile("[vduse-domain]", &vduse_domain_fops,
605				domain, O_RDWR);
606	if (IS_ERR(file))
607		goto err_file;
608
609	domain->file = file;
610	rwlock_init(&domain->bounce_lock);
611	spin_lock_init(&domain->iotlb_lock);
612	init_iova_domain(&domain->stream_iovad,
613			PAGE_SIZE, IOVA_START_PFN);
614	ret = iova_domain_init_rcaches(&domain->stream_iovad);
615	if (ret)
616		goto err_iovad_stream;
617	init_iova_domain(&domain->consistent_iovad,
618			PAGE_SIZE, bounce_pfns);
619	ret = iova_domain_init_rcaches(&domain->consistent_iovad);
620	if (ret)
621		goto err_iovad_consistent;
622
623	return domain;
624err_iovad_consistent:
625	put_iova_domain(&domain->stream_iovad);
626err_iovad_stream:
627	fput(file);
628err_file:
629	vfree(domain->bounce_maps);
630err_map:
631	vhost_iotlb_free(domain->iotlb);
632err_iotlb:
633	kfree(domain);
634	return NULL;
635}
636
637int vduse_domain_init(void)
638{
639	return iova_cache_get();
640}
641
642void vduse_domain_exit(void)
643{
644	iova_cache_put();
645}
646