1// SPDX-License-Identifier: MIT
2/*
3 * Copyright 2014-2018 Advanced Micro Devices, Inc.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21 * OTHER DEALINGS IN THE SOFTWARE.
22 */
23#include <linux/dma-buf.h>
24#include <linux/list.h>
25#include <linux/pagemap.h>
26#include <linux/sched/mm.h>
27#include <linux/sched/task.h>
28#include <drm/ttm/ttm_tt.h>
29
30#include <drm/drm_exec.h>
31
32#include "amdgpu_object.h"
33#include "amdgpu_gem.h"
34#include "amdgpu_vm.h"
35#include "amdgpu_hmm.h"
36#include "amdgpu_amdkfd.h"
37#include "amdgpu_dma_buf.h"
38#include <uapi/linux/kfd_ioctl.h>
39#include "amdgpu_xgmi.h"
40#include "kfd_priv.h"
41#include "kfd_smi_events.h"
42
43/* Userptr restore delay, just long enough to allow consecutive VM
44 * changes to accumulate
45 */
46#define AMDGPU_USERPTR_RESTORE_DELAY_MS 1
47
48/*
49 * Align VRAM availability to 2MB to avoid fragmentation caused by 4K allocations in the tail 2MB
50 * BO chunk
51 */
52#define VRAM_AVAILABLITY_ALIGN (1 << 21)
53
54/* Impose limit on how much memory KFD can use */
55static struct {
56	uint64_t max_system_mem_limit;
57	uint64_t max_ttm_mem_limit;
58	int64_t system_mem_used;
59	int64_t ttm_mem_used;
60	spinlock_t mem_limit_lock;
61} kfd_mem_limit;
62
63static const char * const domain_bit_to_string[] = {
64		"CPU",
65		"GTT",
66		"VRAM",
67		"GDS",
68		"GWS",
69		"OA"
70};
71
72#define domain_string(domain) domain_bit_to_string[ffs(domain)-1]
73
74static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work);
75
76static bool kfd_mem_is_attached(struct amdgpu_vm *avm,
77		struct kgd_mem *mem)
78{
79	struct kfd_mem_attachment *entry;
80
81	list_for_each_entry(entry, &mem->attachments, list)
82		if (entry->bo_va->base.vm == avm)
83			return true;
84
85	return false;
86}
87
88/**
89 * reuse_dmamap() - Check whether adev can share the original
90 * userptr BO
91 *
92 * If both adev and bo_adev are in direct mapping or
93 * in the same iommu group, they can share the original BO.
94 *
95 * @adev: Device to which can or cannot share the original BO
96 * @bo_adev: Device to which allocated BO belongs to
97 *
98 * Return: returns true if adev can share original userptr BO,
99 * false otherwise.
100 */
101static bool reuse_dmamap(struct amdgpu_device *adev, struct amdgpu_device *bo_adev)
102{
103	return (adev->ram_is_direct_mapped && bo_adev->ram_is_direct_mapped) ||
104			(adev->dev->iommu_group == bo_adev->dev->iommu_group);
105}
106
107/* Set memory usage limits. Current, limits are
108 *  System (TTM + userptr) memory - 15/16th System RAM
109 *  TTM memory - 3/8th System RAM
110 */
111void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
112{
113	struct sysinfo si;
114	uint64_t mem;
115
116	if (kfd_mem_limit.max_system_mem_limit)
117		return;
118
119	si_meminfo(&si);
120	mem = si.freeram - si.freehigh;
121	mem *= si.mem_unit;
122
123	mtx_init(&kfd_mem_limit.mem_limit_lock, IPL_TTY);
124	kfd_mem_limit.max_system_mem_limit = mem - (mem >> 4);
125	kfd_mem_limit.max_ttm_mem_limit = ttm_tt_pages_limit() << PAGE_SHIFT;
126	pr_debug("Kernel memory limit %lluM, TTM limit %lluM\n",
127		(kfd_mem_limit.max_system_mem_limit >> 20),
128		(kfd_mem_limit.max_ttm_mem_limit >> 20));
129}
130
131void amdgpu_amdkfd_reserve_system_mem(uint64_t size)
132{
133	kfd_mem_limit.system_mem_used += size;
134}
135
136/* Estimate page table size needed to represent a given memory size
137 *
138 * With 4KB pages, we need one 8 byte PTE for each 4KB of memory
139 * (factor 512, >> 9). With 2MB pages, we need one 8 byte PTE for 2MB
140 * of memory (factor 256K, >> 18). ROCm user mode tries to optimize
141 * for 2MB pages for TLB efficiency. However, small allocations and
142 * fragmented system memory still need some 4KB pages. We choose a
143 * compromise that should work in most cases without reserving too
144 * much memory for page tables unnecessarily (factor 16K, >> 14).
145 */
146
147#define ESTIMATE_PT_SIZE(mem_size) max(((mem_size) >> 14), AMDGPU_VM_RESERVED_VRAM)
148
149/**
150 * amdgpu_amdkfd_reserve_mem_limit() - Decrease available memory by size
151 * of buffer.
152 *
153 * @adev: Device to which allocated BO belongs to
154 * @size: Size of buffer, in bytes, encapsulated by B0. This should be
155 * equivalent to amdgpu_bo_size(BO)
156 * @alloc_flag: Flag used in allocating a BO as noted above
157 * @xcp_id: xcp_id is used to get xcp from xcp manager, one xcp is
158 * managed as one compute node in driver for app
159 *
160 * Return:
161 *	returns -ENOMEM in case of error, ZERO otherwise
162 */
163int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
164		uint64_t size, u32 alloc_flag, int8_t xcp_id)
165{
166	uint64_t reserved_for_pt =
167		ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size);
168	size_t system_mem_needed, ttm_mem_needed, vram_needed;
169	int ret = 0;
170	uint64_t vram_size = 0;
171
172	system_mem_needed = 0;
173	ttm_mem_needed = 0;
174	vram_needed = 0;
175	if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_GTT) {
176		system_mem_needed = size;
177		ttm_mem_needed = size;
178	} else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
179		/*
180		 * Conservatively round up the allocation requirement to 2 MB
181		 * to avoid fragmentation caused by 4K allocations in the tail
182		 * 2M BO chunk.
183		 */
184		vram_needed = size;
185		/*
186		 * For GFX 9.4.3, get the VRAM size from XCP structs
187		 */
188		if (WARN_ONCE(xcp_id < 0, "invalid XCP ID %d", xcp_id))
189			return -EINVAL;
190
191		vram_size = KFD_XCP_MEMORY_SIZE(adev, xcp_id);
192		if (adev->gmc.is_app_apu) {
193			system_mem_needed = size;
194			ttm_mem_needed = size;
195		}
196	} else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
197		system_mem_needed = size;
198	} else if (!(alloc_flag &
199				(KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
200				 KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))) {
201		pr_err("%s: Invalid BO type %#x\n", __func__, alloc_flag);
202		return -ENOMEM;
203	}
204
205	spin_lock(&kfd_mem_limit.mem_limit_lock);
206
207	if (kfd_mem_limit.system_mem_used + system_mem_needed >
208	    kfd_mem_limit.max_system_mem_limit)
209		pr_debug("Set no_system_mem_limit=1 if using shared memory\n");
210
211	if ((kfd_mem_limit.system_mem_used + system_mem_needed >
212	     kfd_mem_limit.max_system_mem_limit && !no_system_mem_limit) ||
213	    (kfd_mem_limit.ttm_mem_used + ttm_mem_needed >
214	     kfd_mem_limit.max_ttm_mem_limit) ||
215	    (adev && xcp_id >= 0 && adev->kfd.vram_used[xcp_id] + vram_needed >
216	     vram_size - reserved_for_pt - atomic64_read(&adev->vram_pin_size))) {
217		ret = -ENOMEM;
218		goto release;
219	}
220
221	/* Update memory accounting by decreasing available system
222	 * memory, TTM memory and GPU memory as computed above
223	 */
224	WARN_ONCE(vram_needed && !adev,
225		  "adev reference can't be null when vram is used");
226	if (adev && xcp_id >= 0) {
227		adev->kfd.vram_used[xcp_id] += vram_needed;
228		adev->kfd.vram_used_aligned[xcp_id] += adev->gmc.is_app_apu ?
229				vram_needed :
230				ALIGN(vram_needed, VRAM_AVAILABLITY_ALIGN);
231	}
232	kfd_mem_limit.system_mem_used += system_mem_needed;
233	kfd_mem_limit.ttm_mem_used += ttm_mem_needed;
234
235release:
236	spin_unlock(&kfd_mem_limit.mem_limit_lock);
237	return ret;
238}
239
240void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev,
241		uint64_t size, u32 alloc_flag, int8_t xcp_id)
242{
243	spin_lock(&kfd_mem_limit.mem_limit_lock);
244
245	if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_GTT) {
246		kfd_mem_limit.system_mem_used -= size;
247		kfd_mem_limit.ttm_mem_used -= size;
248	} else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
249		WARN_ONCE(!adev,
250			  "adev reference can't be null when alloc mem flags vram is set");
251		if (WARN_ONCE(xcp_id < 0, "invalid XCP ID %d", xcp_id))
252			goto release;
253
254		if (adev) {
255			adev->kfd.vram_used[xcp_id] -= size;
256			if (adev->gmc.is_app_apu) {
257				adev->kfd.vram_used_aligned[xcp_id] -= size;
258				kfd_mem_limit.system_mem_used -= size;
259				kfd_mem_limit.ttm_mem_used -= size;
260			} else {
261				adev->kfd.vram_used_aligned[xcp_id] -=
262					ALIGN(size, VRAM_AVAILABLITY_ALIGN);
263			}
264		}
265	} else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
266		kfd_mem_limit.system_mem_used -= size;
267	} else if (!(alloc_flag &
268				(KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
269				 KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))) {
270		pr_err("%s: Invalid BO type %#x\n", __func__, alloc_flag);
271		goto release;
272	}
273	WARN_ONCE(adev && xcp_id >= 0 && adev->kfd.vram_used[xcp_id] < 0,
274		  "KFD VRAM memory accounting unbalanced for xcp: %d", xcp_id);
275	WARN_ONCE(kfd_mem_limit.ttm_mem_used < 0,
276		  "KFD TTM memory accounting unbalanced");
277	WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
278		  "KFD system memory accounting unbalanced");
279
280release:
281	spin_unlock(&kfd_mem_limit.mem_limit_lock);
282}
283
284void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo)
285{
286	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
287	u32 alloc_flags = bo->kfd_bo->alloc_flags;
288	u64 size = amdgpu_bo_size(bo);
289
290	amdgpu_amdkfd_unreserve_mem_limit(adev, size, alloc_flags,
291					  bo->xcp_id);
292
293	kfree(bo->kfd_bo);
294}
295
296/**
297 * create_dmamap_sg_bo() - Creates a amdgpu_bo object to reflect information
298 * about USERPTR or DOOREBELL or MMIO BO.
299 *
300 * @adev: Device for which dmamap BO is being created
301 * @mem: BO of peer device that is being DMA mapped. Provides parameters
302 *	 in building the dmamap BO
303 * @bo_out: Output parameter updated with handle of dmamap BO
304 */
305static int
306create_dmamap_sg_bo(struct amdgpu_device *adev,
307		 struct kgd_mem *mem, struct amdgpu_bo **bo_out)
308{
309	struct drm_gem_object *gem_obj;
310	int ret;
311	uint64_t flags = 0;
312
313	ret = amdgpu_bo_reserve(mem->bo, false);
314	if (ret)
315		return ret;
316
317	if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR)
318		flags |= mem->bo->flags & (AMDGPU_GEM_CREATE_COHERENT |
319					AMDGPU_GEM_CREATE_UNCACHED);
320
321	ret = amdgpu_gem_object_create(adev, mem->bo->tbo.base.size, 1,
322			AMDGPU_GEM_DOMAIN_CPU, AMDGPU_GEM_CREATE_PREEMPTIBLE | flags,
323			ttm_bo_type_sg, mem->bo->tbo.base.resv, &gem_obj, 0);
324
325	amdgpu_bo_unreserve(mem->bo);
326
327	if (ret) {
328		pr_err("Error in creating DMA mappable SG BO on domain: %d\n", ret);
329		return -EINVAL;
330	}
331
332	*bo_out = gem_to_amdgpu_bo(gem_obj);
333	(*bo_out)->parent = amdgpu_bo_ref(mem->bo);
334	return ret;
335}
336
337/* amdgpu_amdkfd_remove_eviction_fence - Removes eviction fence from BO's
338 *  reservation object.
339 *
340 * @bo: [IN] Remove eviction fence(s) from this BO
341 * @ef: [IN] This eviction fence is removed if it
342 *  is present in the shared list.
343 *
344 * NOTE: Must be called with BO reserved i.e. bo->tbo.resv->lock held.
345 */
346static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo,
347					struct amdgpu_amdkfd_fence *ef)
348{
349	struct dma_fence *replacement;
350
351	if (!ef)
352		return -EINVAL;
353
354	/* TODO: Instead of block before we should use the fence of the page
355	 * table update and TLB flush here directly.
356	 */
357	replacement = dma_fence_get_stub();
358	dma_resv_replace_fences(bo->tbo.base.resv, ef->base.context,
359				replacement, DMA_RESV_USAGE_BOOKKEEP);
360	dma_fence_put(replacement);
361	return 0;
362}
363
364int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo)
365{
366	struct amdgpu_bo *root = bo;
367	struct amdgpu_vm_bo_base *vm_bo;
368	struct amdgpu_vm *vm;
369	struct amdkfd_process_info *info;
370	struct amdgpu_amdkfd_fence *ef;
371	int ret;
372
373	/* we can always get vm_bo from root PD bo.*/
374	while (root->parent)
375		root = root->parent;
376
377	vm_bo = root->vm_bo;
378	if (!vm_bo)
379		return 0;
380
381	vm = vm_bo->vm;
382	if (!vm)
383		return 0;
384
385	info = vm->process_info;
386	if (!info || !info->eviction_fence)
387		return 0;
388
389	ef = container_of(dma_fence_get(&info->eviction_fence->base),
390			struct amdgpu_amdkfd_fence, base);
391
392	BUG_ON(!dma_resv_trylock(bo->tbo.base.resv));
393	ret = amdgpu_amdkfd_remove_eviction_fence(bo, ef);
394	dma_resv_unlock(bo->tbo.base.resv);
395
396	dma_fence_put(&ef->base);
397	return ret;
398}
399
400static int amdgpu_amdkfd_bo_validate(struct amdgpu_bo *bo, uint32_t domain,
401				     bool wait)
402{
403	struct ttm_operation_ctx ctx = { false, false };
404	int ret;
405
406	if (WARN(amdgpu_ttm_tt_get_usermm(bo->tbo.ttm),
407		 "Called with userptr BO"))
408		return -EINVAL;
409
410	amdgpu_bo_placement_from_domain(bo, domain);
411
412	ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
413	if (ret)
414		goto validate_fail;
415	if (wait)
416		amdgpu_bo_sync_wait(bo, AMDGPU_FENCE_OWNER_KFD, false);
417
418validate_fail:
419	return ret;
420}
421
422static int amdgpu_amdkfd_validate_vm_bo(void *_unused, struct amdgpu_bo *bo)
423{
424	return amdgpu_amdkfd_bo_validate(bo, bo->allowed_domains, false);
425}
426
427/* vm_validate_pt_pd_bos - Validate page table and directory BOs
428 *
429 * Page directories are not updated here because huge page handling
430 * during page table updates can invalidate page directory entries
431 * again. Page directories are only updated after updating page
432 * tables.
433 */
434static int vm_validate_pt_pd_bos(struct amdgpu_vm *vm)
435{
436	struct amdgpu_bo *pd = vm->root.bo;
437	struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev);
438	int ret;
439
440	ret = amdgpu_vm_validate_pt_bos(adev, vm, amdgpu_amdkfd_validate_vm_bo, NULL);
441	if (ret) {
442		pr_err("failed to validate PT BOs\n");
443		return ret;
444	}
445
446	vm->pd_phys_addr = amdgpu_gmc_pd_addr(vm->root.bo);
447
448	return 0;
449}
450
451static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync)
452{
453	struct amdgpu_bo *pd = vm->root.bo;
454	struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev);
455	int ret;
456
457	ret = amdgpu_vm_update_pdes(adev, vm, false);
458	if (ret)
459		return ret;
460
461	return amdgpu_sync_fence(sync, vm->last_update);
462}
463
464static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem)
465{
466	uint32_t mapping_flags = AMDGPU_VM_PAGE_READABLE |
467				 AMDGPU_VM_MTYPE_DEFAULT;
468
469	if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE)
470		mapping_flags |= AMDGPU_VM_PAGE_WRITEABLE;
471	if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE)
472		mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE;
473
474	return amdgpu_gem_va_map_flags(adev, mapping_flags);
475}
476
477/**
478 * create_sg_table() - Create an sg_table for a contiguous DMA addr range
479 * @addr: The starting address to point to
480 * @size: Size of memory area in bytes being pointed to
481 *
482 * Allocates an instance of sg_table and initializes it to point to memory
483 * area specified by input parameters. The address used to build is assumed
484 * to be DMA mapped, if needed.
485 *
486 * DOORBELL or MMIO BOs use only one scatterlist node in their sg_table
487 * because they are physically contiguous.
488 *
489 * Return: Initialized instance of SG Table or NULL
490 */
491static struct sg_table *create_sg_table(uint64_t addr, uint32_t size)
492{
493	struct sg_table *sg = kmalloc(sizeof(*sg), GFP_KERNEL);
494
495	if (!sg)
496		return NULL;
497	if (sg_alloc_table(sg, 1, GFP_KERNEL)) {
498		kfree(sg);
499		return NULL;
500	}
501	sg_dma_address(sg->sgl) = addr;
502	sg->sgl->length = size;
503#ifdef CONFIG_NEED_SG_DMA_LENGTH
504	sg->sgl->dma_length = size;
505#endif
506	return sg;
507}
508
509static int
510kfd_mem_dmamap_userptr(struct kgd_mem *mem,
511		       struct kfd_mem_attachment *attachment)
512{
513	enum dma_data_direction direction =
514		mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ?
515		DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
516	struct ttm_operation_ctx ctx = {.interruptible = true};
517	struct amdgpu_bo *bo = attachment->bo_va->base.bo;
518	struct amdgpu_device *adev = attachment->adev;
519	struct ttm_tt *src_ttm = mem->bo->tbo.ttm;
520	struct ttm_tt *ttm = bo->tbo.ttm;
521	int ret;
522
523	if (WARN_ON(ttm->num_pages != src_ttm->num_pages))
524		return -EINVAL;
525
526	ttm->sg = kmalloc(sizeof(*ttm->sg), GFP_KERNEL);
527	if (unlikely(!ttm->sg))
528		return -ENOMEM;
529
530	/* Same sequence as in amdgpu_ttm_tt_pin_userptr */
531	ret = sg_alloc_table_from_pages(ttm->sg, src_ttm->pages,
532					ttm->num_pages, 0,
533					(u64)ttm->num_pages << PAGE_SHIFT,
534					GFP_KERNEL);
535	if (unlikely(ret))
536		goto free_sg;
537
538	ret = dma_map_sgtable(adev->dev, ttm->sg, direction, 0);
539	if (unlikely(ret))
540		goto release_sg;
541
542	amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
543	ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
544	if (ret)
545		goto unmap_sg;
546
547	return 0;
548
549unmap_sg:
550	dma_unmap_sgtable(adev->dev, ttm->sg, direction, 0);
551release_sg:
552	pr_err("DMA map userptr failed: %d\n", ret);
553	sg_free_table(ttm->sg);
554free_sg:
555	kfree(ttm->sg);
556	ttm->sg = NULL;
557	return ret;
558}
559
560static int
561kfd_mem_dmamap_dmabuf(struct kfd_mem_attachment *attachment)
562{
563	struct ttm_operation_ctx ctx = {.interruptible = true};
564	struct amdgpu_bo *bo = attachment->bo_va->base.bo;
565	int ret;
566
567	amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU);
568	ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
569	if (ret)
570		return ret;
571
572	amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
573	return ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
574}
575
576/**
577 * kfd_mem_dmamap_sg_bo() - Create DMA mapped sg_table to access DOORBELL or MMIO BO
578 * @mem: SG BO of the DOORBELL or MMIO resource on the owning device
579 * @attachment: Virtual address attachment of the BO on accessing device
580 *
581 * An access request from the device that owns DOORBELL does not require DMA mapping.
582 * This is because the request doesn't go through PCIe root complex i.e. it instead
583 * loops back. The need to DMA map arises only when accessing peer device's DOORBELL
584 *
585 * In contrast, all access requests for MMIO need to be DMA mapped without regard to
586 * device ownership. This is because access requests for MMIO go through PCIe root
587 * complex.
588 *
589 * This is accomplished in two steps:
590 *   - Obtain DMA mapped address of DOORBELL or MMIO memory that could be used
591 *         in updating requesting device's page table
592 *   - Signal TTM to mark memory pointed to by requesting device's BO as GPU
593 *         accessible. This allows an update of requesting device's page table
594 *         with entries associated with DOOREBELL or MMIO memory
595 *
596 * This method is invoked in the following contexts:
597 *   - Mapping of DOORBELL or MMIO BO of same or peer device
598 *   - Validating an evicted DOOREBELL or MMIO BO on device seeking access
599 *
600 * Return: ZERO if successful, NON-ZERO otherwise
601 */
602static int
603kfd_mem_dmamap_sg_bo(struct kgd_mem *mem,
604		     struct kfd_mem_attachment *attachment)
605{
606	struct ttm_operation_ctx ctx = {.interruptible = true};
607	struct amdgpu_bo *bo = attachment->bo_va->base.bo;
608	struct amdgpu_device *adev = attachment->adev;
609	struct ttm_tt *ttm = bo->tbo.ttm;
610	enum dma_data_direction dir;
611	dma_addr_t dma_addr;
612	bool mmio;
613	int ret;
614
615	/* Expect SG Table of dmapmap BO to be NULL */
616	mmio = (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP);
617	if (unlikely(ttm->sg)) {
618		pr_err("SG Table of %d BO for peer device is UNEXPECTEDLY NON-NULL", mmio);
619		return -EINVAL;
620	}
621
622	dir = mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ?
623			DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
624	dma_addr = mem->bo->tbo.sg->sgl->dma_address;
625	pr_debug("%d BO size: %d\n", mmio, mem->bo->tbo.sg->sgl->length);
626	pr_debug("%d BO address before DMA mapping: %llx\n", mmio, dma_addr);
627	dma_addr = dma_map_resource(adev->dev, dma_addr,
628			mem->bo->tbo.sg->sgl->length, dir, DMA_ATTR_SKIP_CPU_SYNC);
629	ret = dma_mapping_error(adev->dev, dma_addr);
630	if (unlikely(ret))
631		return ret;
632	pr_debug("%d BO address after DMA mapping: %llx\n", mmio, dma_addr);
633
634	ttm->sg = create_sg_table(dma_addr, mem->bo->tbo.sg->sgl->length);
635	if (unlikely(!ttm->sg)) {
636		ret = -ENOMEM;
637		goto unmap_sg;
638	}
639
640	amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
641	ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
642	if (unlikely(ret))
643		goto free_sg;
644
645	return ret;
646
647free_sg:
648	sg_free_table(ttm->sg);
649	kfree(ttm->sg);
650	ttm->sg = NULL;
651unmap_sg:
652	dma_unmap_resource(adev->dev, dma_addr, mem->bo->tbo.sg->sgl->length,
653			   dir, DMA_ATTR_SKIP_CPU_SYNC);
654	return ret;
655}
656
657static int
658kfd_mem_dmamap_attachment(struct kgd_mem *mem,
659			  struct kfd_mem_attachment *attachment)
660{
661	switch (attachment->type) {
662	case KFD_MEM_ATT_SHARED:
663		return 0;
664	case KFD_MEM_ATT_USERPTR:
665		return kfd_mem_dmamap_userptr(mem, attachment);
666	case KFD_MEM_ATT_DMABUF:
667		return kfd_mem_dmamap_dmabuf(attachment);
668	case KFD_MEM_ATT_SG:
669		return kfd_mem_dmamap_sg_bo(mem, attachment);
670	default:
671		WARN_ON_ONCE(1);
672	}
673	return -EINVAL;
674}
675
676static void
677kfd_mem_dmaunmap_userptr(struct kgd_mem *mem,
678			 struct kfd_mem_attachment *attachment)
679{
680	enum dma_data_direction direction =
681		mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ?
682		DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
683	struct ttm_operation_ctx ctx = {.interruptible = false};
684	struct amdgpu_bo *bo = attachment->bo_va->base.bo;
685	struct amdgpu_device *adev = attachment->adev;
686	struct ttm_tt *ttm = bo->tbo.ttm;
687
688	if (unlikely(!ttm->sg))
689		return;
690
691	amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU);
692	ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
693
694	dma_unmap_sgtable(adev->dev, ttm->sg, direction, 0);
695	sg_free_table(ttm->sg);
696	kfree(ttm->sg);
697	ttm->sg = NULL;
698}
699
700static void
701kfd_mem_dmaunmap_dmabuf(struct kfd_mem_attachment *attachment)
702{
703	/* This is a no-op. We don't want to trigger eviction fences when
704	 * unmapping DMABufs. Therefore the invalidation (moving to system
705	 * domain) is done in kfd_mem_dmamap_dmabuf.
706	 */
707}
708
709/**
710 * kfd_mem_dmaunmap_sg_bo() - Free DMA mapped sg_table of DOORBELL or MMIO BO
711 * @mem: SG BO of the DOORBELL or MMIO resource on the owning device
712 * @attachment: Virtual address attachment of the BO on accessing device
713 *
714 * The method performs following steps:
715 *   - Signal TTM to mark memory pointed to by BO as GPU inaccessible
716 *   - Free SG Table that is used to encapsulate DMA mapped memory of
717 *          peer device's DOORBELL or MMIO memory
718 *
719 * This method is invoked in the following contexts:
720 *     UNMapping of DOORBELL or MMIO BO on a device having access to its memory
721 *     Eviction of DOOREBELL or MMIO BO on device having access to its memory
722 *
723 * Return: void
724 */
725static void
726kfd_mem_dmaunmap_sg_bo(struct kgd_mem *mem,
727		       struct kfd_mem_attachment *attachment)
728{
729	struct ttm_operation_ctx ctx = {.interruptible = true};
730	struct amdgpu_bo *bo = attachment->bo_va->base.bo;
731	struct amdgpu_device *adev = attachment->adev;
732	struct ttm_tt *ttm = bo->tbo.ttm;
733	enum dma_data_direction dir;
734
735	if (unlikely(!ttm->sg)) {
736		pr_err("SG Table of BO is UNEXPECTEDLY NULL");
737		return;
738	}
739
740	amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU);
741	ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
742
743	dir = mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ?
744				DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
745	dma_unmap_resource(adev->dev, ttm->sg->sgl->dma_address,
746			ttm->sg->sgl->length, dir, DMA_ATTR_SKIP_CPU_SYNC);
747	sg_free_table(ttm->sg);
748	kfree(ttm->sg);
749	ttm->sg = NULL;
750	bo->tbo.sg = NULL;
751}
752
753static void
754kfd_mem_dmaunmap_attachment(struct kgd_mem *mem,
755			    struct kfd_mem_attachment *attachment)
756{
757	switch (attachment->type) {
758	case KFD_MEM_ATT_SHARED:
759		break;
760	case KFD_MEM_ATT_USERPTR:
761		kfd_mem_dmaunmap_userptr(mem, attachment);
762		break;
763	case KFD_MEM_ATT_DMABUF:
764		kfd_mem_dmaunmap_dmabuf(attachment);
765		break;
766	case KFD_MEM_ATT_SG:
767		kfd_mem_dmaunmap_sg_bo(mem, attachment);
768		break;
769	default:
770		WARN_ON_ONCE(1);
771	}
772}
773
774static int kfd_mem_export_dmabuf(struct kgd_mem *mem)
775{
776	if (!mem->dmabuf) {
777		struct dma_buf *ret = amdgpu_gem_prime_export(
778			&mem->bo->tbo.base,
779			mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ?
780				DRM_RDWR : 0);
781		if (IS_ERR(ret))
782			return PTR_ERR(ret);
783		mem->dmabuf = ret;
784	}
785
786	return 0;
787}
788
789static int
790kfd_mem_attach_dmabuf(struct amdgpu_device *adev, struct kgd_mem *mem,
791		      struct amdgpu_bo **bo)
792{
793	struct drm_gem_object *gobj;
794	int ret;
795
796	ret = kfd_mem_export_dmabuf(mem);
797	if (ret)
798		return ret;
799
800	gobj = amdgpu_gem_prime_import(adev_to_drm(adev), mem->dmabuf);
801	if (IS_ERR(gobj))
802		return PTR_ERR(gobj);
803
804	*bo = gem_to_amdgpu_bo(gobj);
805	(*bo)->flags |= AMDGPU_GEM_CREATE_PREEMPTIBLE;
806
807	return 0;
808}
809
810/* kfd_mem_attach - Add a BO to a VM
811 *
812 * Everything that needs to bo done only once when a BO is first added
813 * to a VM. It can later be mapped and unmapped many times without
814 * repeating these steps.
815 *
816 * 0. Create BO for DMA mapping, if needed
817 * 1. Allocate and initialize BO VA entry data structure
818 * 2. Add BO to the VM
819 * 3. Determine ASIC-specific PTE flags
820 * 4. Alloc page tables and directories if needed
821 * 4a.  Validate new page tables and directories
822 */
823static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem,
824		struct amdgpu_vm *vm, bool is_aql)
825{
826	struct amdgpu_device *bo_adev = amdgpu_ttm_adev(mem->bo->tbo.bdev);
827	unsigned long bo_size = mem->bo->tbo.base.size;
828	uint64_t va = mem->va;
829	struct kfd_mem_attachment *attachment[2] = {NULL, NULL};
830	struct amdgpu_bo *bo[2] = {NULL, NULL};
831	bool same_hive = false;
832	int i, ret;
833
834	if (!va) {
835		pr_err("Invalid VA when adding BO to VM\n");
836		return -EINVAL;
837	}
838
839	/* Determine access to VRAM, MMIO and DOORBELL BOs of peer devices
840	 *
841	 * The access path of MMIO and DOORBELL BOs of is always over PCIe.
842	 * In contrast the access path of VRAM BOs depens upon the type of
843	 * link that connects the peer device. Access over PCIe is allowed
844	 * if peer device has large BAR. In contrast, access over xGMI is
845	 * allowed for both small and large BAR configurations of peer device
846	 */
847	if ((adev != bo_adev && !adev->gmc.is_app_apu) &&
848	    ((mem->domain == AMDGPU_GEM_DOMAIN_VRAM) ||
849	     (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) ||
850	     (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))) {
851		if (mem->domain == AMDGPU_GEM_DOMAIN_VRAM)
852			same_hive = amdgpu_xgmi_same_hive(adev, bo_adev);
853		if (!same_hive && !amdgpu_device_is_peer_accessible(bo_adev, adev))
854			return -EINVAL;
855	}
856
857	for (i = 0; i <= is_aql; i++) {
858		attachment[i] = kzalloc(sizeof(*attachment[i]), GFP_KERNEL);
859		if (unlikely(!attachment[i])) {
860			ret = -ENOMEM;
861			goto unwind;
862		}
863
864		pr_debug("\t add VA 0x%llx - 0x%llx to vm %p\n", va,
865			 va + bo_size, vm);
866
867		if ((adev == bo_adev && !(mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) ||
868		    (amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm) && reuse_dmamap(adev, bo_adev)) ||
869			same_hive) {
870			/* Mappings on the local GPU, or VRAM mappings in the
871			 * local hive, or userptr mapping can reuse dma map
872			 * address space share the original BO
873			 */
874			attachment[i]->type = KFD_MEM_ATT_SHARED;
875			bo[i] = mem->bo;
876			drm_gem_object_get(&bo[i]->tbo.base);
877		} else if (i > 0) {
878			/* Multiple mappings on the same GPU share the BO */
879			attachment[i]->type = KFD_MEM_ATT_SHARED;
880			bo[i] = bo[0];
881			drm_gem_object_get(&bo[i]->tbo.base);
882		} else if (amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm)) {
883			/* Create an SG BO to DMA-map userptrs on other GPUs */
884			attachment[i]->type = KFD_MEM_ATT_USERPTR;
885			ret = create_dmamap_sg_bo(adev, mem, &bo[i]);
886			if (ret)
887				goto unwind;
888		/* Handle DOORBELL BOs of peer devices and MMIO BOs of local and peer devices */
889		} else if (mem->bo->tbo.type == ttm_bo_type_sg) {
890			WARN_ONCE(!(mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL ||
891				    mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP),
892				  "Handing invalid SG BO in ATTACH request");
893			attachment[i]->type = KFD_MEM_ATT_SG;
894			ret = create_dmamap_sg_bo(adev, mem, &bo[i]);
895			if (ret)
896				goto unwind;
897		/* Enable acces to GTT and VRAM BOs of peer devices */
898		} else if (mem->domain == AMDGPU_GEM_DOMAIN_GTT ||
899			   mem->domain == AMDGPU_GEM_DOMAIN_VRAM) {
900			attachment[i]->type = KFD_MEM_ATT_DMABUF;
901			ret = kfd_mem_attach_dmabuf(adev, mem, &bo[i]);
902			if (ret)
903				goto unwind;
904			pr_debug("Employ DMABUF mechanism to enable peer GPU access\n");
905		} else {
906			WARN_ONCE(true, "Handling invalid ATTACH request");
907			ret = -EINVAL;
908			goto unwind;
909		}
910
911		/* Add BO to VM internal data structures */
912		ret = amdgpu_bo_reserve(bo[i], false);
913		if (ret) {
914			pr_debug("Unable to reserve BO during memory attach");
915			goto unwind;
916		}
917		attachment[i]->bo_va = amdgpu_vm_bo_add(adev, vm, bo[i]);
918		amdgpu_bo_unreserve(bo[i]);
919		if (unlikely(!attachment[i]->bo_va)) {
920			ret = -ENOMEM;
921			pr_err("Failed to add BO object to VM. ret == %d\n",
922			       ret);
923			goto unwind;
924		}
925		attachment[i]->va = va;
926		attachment[i]->pte_flags = get_pte_flags(adev, mem);
927		attachment[i]->adev = adev;
928		list_add(&attachment[i]->list, &mem->attachments);
929
930		va += bo_size;
931	}
932
933	return 0;
934
935unwind:
936	for (; i >= 0; i--) {
937		if (!attachment[i])
938			continue;
939		if (attachment[i]->bo_va) {
940			amdgpu_bo_reserve(bo[i], true);
941			amdgpu_vm_bo_del(adev, attachment[i]->bo_va);
942			amdgpu_bo_unreserve(bo[i]);
943			list_del(&attachment[i]->list);
944		}
945		if (bo[i])
946			drm_gem_object_put(&bo[i]->tbo.base);
947		kfree(attachment[i]);
948	}
949	return ret;
950}
951
952static void kfd_mem_detach(struct kfd_mem_attachment *attachment)
953{
954	struct amdgpu_bo *bo = attachment->bo_va->base.bo;
955
956	pr_debug("\t remove VA 0x%llx in entry %p\n",
957			attachment->va, attachment);
958	amdgpu_vm_bo_del(attachment->adev, attachment->bo_va);
959	drm_gem_object_put(&bo->tbo.base);
960	list_del(&attachment->list);
961	kfree(attachment);
962}
963
964static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem,
965				struct amdkfd_process_info *process_info,
966				bool userptr)
967{
968	mutex_lock(&process_info->lock);
969	if (userptr)
970		list_add_tail(&mem->validate_list,
971			      &process_info->userptr_valid_list);
972	else
973		list_add_tail(&mem->validate_list, &process_info->kfd_bo_list);
974	mutex_unlock(&process_info->lock);
975}
976
977static void remove_kgd_mem_from_kfd_bo_list(struct kgd_mem *mem,
978		struct amdkfd_process_info *process_info)
979{
980	mutex_lock(&process_info->lock);
981	list_del(&mem->validate_list);
982	mutex_unlock(&process_info->lock);
983}
984
985/* Initializes user pages. It registers the MMU notifier and validates
986 * the userptr BO in the GTT domain.
987 *
988 * The BO must already be on the userptr_valid_list. Otherwise an
989 * eviction and restore may happen that leaves the new BO unmapped
990 * with the user mode queues running.
991 *
992 * Takes the process_info->lock to protect against concurrent restore
993 * workers.
994 *
995 * Returns 0 for success, negative errno for errors.
996 */
997static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr,
998			   bool criu_resume)
999{
1000	struct amdkfd_process_info *process_info = mem->process_info;
1001	struct amdgpu_bo *bo = mem->bo;
1002	struct ttm_operation_ctx ctx = { true, false };
1003	struct hmm_range *range;
1004	int ret = 0;
1005
1006	mutex_lock(&process_info->lock);
1007
1008	ret = amdgpu_ttm_tt_set_userptr(&bo->tbo, user_addr, 0);
1009	if (ret) {
1010		pr_err("%s: Failed to set userptr: %d\n", __func__, ret);
1011		goto out;
1012	}
1013
1014	ret = amdgpu_hmm_register(bo, user_addr);
1015	if (ret) {
1016		pr_err("%s: Failed to register MMU notifier: %d\n",
1017		       __func__, ret);
1018		goto out;
1019	}
1020
1021	if (criu_resume) {
1022		/*
1023		 * During a CRIU restore operation, the userptr buffer objects
1024		 * will be validated in the restore_userptr_work worker at a
1025		 * later stage when it is scheduled by another ioctl called by
1026		 * CRIU master process for the target pid for restore.
1027		 */
1028		mutex_lock(&process_info->notifier_lock);
1029		mem->invalid++;
1030		mutex_unlock(&process_info->notifier_lock);
1031		mutex_unlock(&process_info->lock);
1032		return 0;
1033	}
1034
1035	ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages, &range);
1036	if (ret) {
1037		pr_err("%s: Failed to get user pages: %d\n", __func__, ret);
1038		goto unregister_out;
1039	}
1040
1041	ret = amdgpu_bo_reserve(bo, true);
1042	if (ret) {
1043		pr_err("%s: Failed to reserve BO\n", __func__);
1044		goto release_out;
1045	}
1046	amdgpu_bo_placement_from_domain(bo, mem->domain);
1047	ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
1048	if (ret)
1049		pr_err("%s: failed to validate BO\n", __func__);
1050	amdgpu_bo_unreserve(bo);
1051
1052release_out:
1053	amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm, range);
1054unregister_out:
1055	if (ret)
1056		amdgpu_hmm_unregister(bo);
1057out:
1058	mutex_unlock(&process_info->lock);
1059	return ret;
1060}
1061
1062/* Reserving a BO and its page table BOs must happen atomically to
1063 * avoid deadlocks. Some operations update multiple VMs at once. Track
1064 * all the reservation info in a context structure. Optionally a sync
1065 * object can track VM updates.
1066 */
1067struct bo_vm_reservation_context {
1068	/* DRM execution context for the reservation */
1069	struct drm_exec exec;
1070	/* Number of VMs reserved */
1071	unsigned int n_vms;
1072	/* Pointer to sync object */
1073	struct amdgpu_sync *sync;
1074};
1075
1076enum bo_vm_match {
1077	BO_VM_NOT_MAPPED = 0,	/* Match VMs where a BO is not mapped */
1078	BO_VM_MAPPED,		/* Match VMs where a BO is mapped     */
1079	BO_VM_ALL,		/* Match all VMs a BO was added to    */
1080};
1081
1082/**
1083 * reserve_bo_and_vm - reserve a BO and a VM unconditionally.
1084 * @mem: KFD BO structure.
1085 * @vm: the VM to reserve.
1086 * @ctx: the struct that will be used in unreserve_bo_and_vms().
1087 */
1088static int reserve_bo_and_vm(struct kgd_mem *mem,
1089			      struct amdgpu_vm *vm,
1090			      struct bo_vm_reservation_context *ctx)
1091{
1092	struct amdgpu_bo *bo = mem->bo;
1093	int ret;
1094
1095	WARN_ON(!vm);
1096
1097	ctx->n_vms = 1;
1098	ctx->sync = &mem->sync;
1099	drm_exec_init(&ctx->exec, DRM_EXEC_INTERRUPTIBLE_WAIT);
1100	drm_exec_until_all_locked(&ctx->exec) {
1101		ret = amdgpu_vm_lock_pd(vm, &ctx->exec, 2);
1102		drm_exec_retry_on_contention(&ctx->exec);
1103		if (unlikely(ret))
1104			goto error;
1105
1106		ret = drm_exec_prepare_obj(&ctx->exec, &bo->tbo.base, 1);
1107		drm_exec_retry_on_contention(&ctx->exec);
1108		if (unlikely(ret))
1109			goto error;
1110	}
1111	return 0;
1112
1113error:
1114	pr_err("Failed to reserve buffers in ttm.\n");
1115	drm_exec_fini(&ctx->exec);
1116	return ret;
1117}
1118
1119/**
1120 * reserve_bo_and_cond_vms - reserve a BO and some VMs conditionally
1121 * @mem: KFD BO structure.
1122 * @vm: the VM to reserve. If NULL, then all VMs associated with the BO
1123 * is used. Otherwise, a single VM associated with the BO.
1124 * @map_type: the mapping status that will be used to filter the VMs.
1125 * @ctx: the struct that will be used in unreserve_bo_and_vms().
1126 *
1127 * Returns 0 for success, negative for failure.
1128 */
1129static int reserve_bo_and_cond_vms(struct kgd_mem *mem,
1130				struct amdgpu_vm *vm, enum bo_vm_match map_type,
1131				struct bo_vm_reservation_context *ctx)
1132{
1133	struct kfd_mem_attachment *entry;
1134	struct amdgpu_bo *bo = mem->bo;
1135	int ret;
1136
1137	ctx->sync = &mem->sync;
1138	drm_exec_init(&ctx->exec, DRM_EXEC_INTERRUPTIBLE_WAIT |
1139		      DRM_EXEC_IGNORE_DUPLICATES);
1140	drm_exec_until_all_locked(&ctx->exec) {
1141		ctx->n_vms = 0;
1142		list_for_each_entry(entry, &mem->attachments, list) {
1143			if ((vm && vm != entry->bo_va->base.vm) ||
1144				(entry->is_mapped != map_type
1145				&& map_type != BO_VM_ALL))
1146				continue;
1147
1148			ret = amdgpu_vm_lock_pd(entry->bo_va->base.vm,
1149						&ctx->exec, 2);
1150			drm_exec_retry_on_contention(&ctx->exec);
1151			if (unlikely(ret))
1152				goto error;
1153			++ctx->n_vms;
1154		}
1155
1156		ret = drm_exec_prepare_obj(&ctx->exec, &bo->tbo.base, 1);
1157		drm_exec_retry_on_contention(&ctx->exec);
1158		if (unlikely(ret))
1159			goto error;
1160	}
1161	return 0;
1162
1163error:
1164	pr_err("Failed to reserve buffers in ttm.\n");
1165	drm_exec_fini(&ctx->exec);
1166	return ret;
1167}
1168
1169/**
1170 * unreserve_bo_and_vms - Unreserve BO and VMs from a reservation context
1171 * @ctx: Reservation context to unreserve
1172 * @wait: Optionally wait for a sync object representing pending VM updates
1173 * @intr: Whether the wait is interruptible
1174 *
1175 * Also frees any resources allocated in
1176 * reserve_bo_and_(cond_)vm(s). Returns the status from
1177 * amdgpu_sync_wait.
1178 */
1179static int unreserve_bo_and_vms(struct bo_vm_reservation_context *ctx,
1180				 bool wait, bool intr)
1181{
1182	int ret = 0;
1183
1184	if (wait)
1185		ret = amdgpu_sync_wait(ctx->sync, intr);
1186
1187	drm_exec_fini(&ctx->exec);
1188	ctx->sync = NULL;
1189	return ret;
1190}
1191
1192static void unmap_bo_from_gpuvm(struct kgd_mem *mem,
1193				struct kfd_mem_attachment *entry,
1194				struct amdgpu_sync *sync)
1195{
1196	struct amdgpu_bo_va *bo_va = entry->bo_va;
1197	struct amdgpu_device *adev = entry->adev;
1198	struct amdgpu_vm *vm = bo_va->base.vm;
1199
1200	amdgpu_vm_bo_unmap(adev, bo_va, entry->va);
1201
1202	amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update);
1203
1204	amdgpu_sync_fence(sync, bo_va->last_pt_update);
1205
1206	kfd_mem_dmaunmap_attachment(mem, entry);
1207}
1208
1209static int update_gpuvm_pte(struct kgd_mem *mem,
1210			    struct kfd_mem_attachment *entry,
1211			    struct amdgpu_sync *sync)
1212{
1213	struct amdgpu_bo_va *bo_va = entry->bo_va;
1214	struct amdgpu_device *adev = entry->adev;
1215	int ret;
1216
1217	ret = kfd_mem_dmamap_attachment(mem, entry);
1218	if (ret)
1219		return ret;
1220
1221	/* Update the page tables  */
1222	ret = amdgpu_vm_bo_update(adev, bo_va, false);
1223	if (ret) {
1224		pr_err("amdgpu_vm_bo_update failed\n");
1225		return ret;
1226	}
1227
1228	return amdgpu_sync_fence(sync, bo_va->last_pt_update);
1229}
1230
1231static int map_bo_to_gpuvm(struct kgd_mem *mem,
1232			   struct kfd_mem_attachment *entry,
1233			   struct amdgpu_sync *sync,
1234			   bool no_update_pte)
1235{
1236	int ret;
1237
1238	/* Set virtual address for the allocation */
1239	ret = amdgpu_vm_bo_map(entry->adev, entry->bo_va, entry->va, 0,
1240			       amdgpu_bo_size(entry->bo_va->base.bo),
1241			       entry->pte_flags);
1242	if (ret) {
1243		pr_err("Failed to map VA 0x%llx in vm. ret %d\n",
1244				entry->va, ret);
1245		return ret;
1246	}
1247
1248	if (no_update_pte)
1249		return 0;
1250
1251	ret = update_gpuvm_pte(mem, entry, sync);
1252	if (ret) {
1253		pr_err("update_gpuvm_pte() failed\n");
1254		goto update_gpuvm_pte_failed;
1255	}
1256
1257	return 0;
1258
1259update_gpuvm_pte_failed:
1260	unmap_bo_from_gpuvm(mem, entry, sync);
1261	return ret;
1262}
1263
1264static int process_validate_vms(struct amdkfd_process_info *process_info)
1265{
1266	struct amdgpu_vm *peer_vm;
1267	int ret;
1268
1269	list_for_each_entry(peer_vm, &process_info->vm_list_head,
1270			    vm_list_node) {
1271		ret = vm_validate_pt_pd_bos(peer_vm);
1272		if (ret)
1273			return ret;
1274	}
1275
1276	return 0;
1277}
1278
1279static int process_sync_pds_resv(struct amdkfd_process_info *process_info,
1280				 struct amdgpu_sync *sync)
1281{
1282	struct amdgpu_vm *peer_vm;
1283	int ret;
1284
1285	list_for_each_entry(peer_vm, &process_info->vm_list_head,
1286			    vm_list_node) {
1287		struct amdgpu_bo *pd = peer_vm->root.bo;
1288
1289		ret = amdgpu_sync_resv(NULL, sync, pd->tbo.base.resv,
1290				       AMDGPU_SYNC_NE_OWNER,
1291				       AMDGPU_FENCE_OWNER_KFD);
1292		if (ret)
1293			return ret;
1294	}
1295
1296	return 0;
1297}
1298
1299static int process_update_pds(struct amdkfd_process_info *process_info,
1300			      struct amdgpu_sync *sync)
1301{
1302	struct amdgpu_vm *peer_vm;
1303	int ret;
1304
1305	list_for_each_entry(peer_vm, &process_info->vm_list_head,
1306			    vm_list_node) {
1307		ret = vm_update_pds(peer_vm, sync);
1308		if (ret)
1309			return ret;
1310	}
1311
1312	return 0;
1313}
1314
1315static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
1316		       struct dma_fence **ef)
1317{
1318	struct amdkfd_process_info *info = NULL;
1319	int ret;
1320
1321	if (!*process_info) {
1322		info = kzalloc(sizeof(*info), GFP_KERNEL);
1323		if (!info)
1324			return -ENOMEM;
1325
1326		rw_init(&info->lock, "aginfo");
1327		rw_init(&info->notifier_lock, "aginfn");
1328		INIT_LIST_HEAD(&info->vm_list_head);
1329		INIT_LIST_HEAD(&info->kfd_bo_list);
1330		INIT_LIST_HEAD(&info->userptr_valid_list);
1331		INIT_LIST_HEAD(&info->userptr_inval_list);
1332
1333		info->eviction_fence =
1334			amdgpu_amdkfd_fence_create(dma_fence_context_alloc(1),
1335						   current->mm,
1336						   NULL);
1337		if (!info->eviction_fence) {
1338			pr_err("Failed to create eviction fence\n");
1339			ret = -ENOMEM;
1340			goto create_evict_fence_fail;
1341		}
1342
1343		info->pid = get_task_pid(current->group_leader, PIDTYPE_PID);
1344		INIT_DELAYED_WORK(&info->restore_userptr_work,
1345				  amdgpu_amdkfd_restore_userptr_worker);
1346
1347		*process_info = info;
1348		*ef = dma_fence_get(&info->eviction_fence->base);
1349	}
1350
1351	vm->process_info = *process_info;
1352
1353	/* Validate page directory and attach eviction fence */
1354	ret = amdgpu_bo_reserve(vm->root.bo, true);
1355	if (ret)
1356		goto reserve_pd_fail;
1357	ret = vm_validate_pt_pd_bos(vm);
1358	if (ret) {
1359		pr_err("validate_pt_pd_bos() failed\n");
1360		goto validate_pd_fail;
1361	}
1362	ret = amdgpu_bo_sync_wait(vm->root.bo,
1363				  AMDGPU_FENCE_OWNER_KFD, false);
1364	if (ret)
1365		goto wait_pd_fail;
1366	ret = dma_resv_reserve_fences(vm->root.bo->tbo.base.resv, 1);
1367	if (ret)
1368		goto reserve_shared_fail;
1369	dma_resv_add_fence(vm->root.bo->tbo.base.resv,
1370			   &vm->process_info->eviction_fence->base,
1371			   DMA_RESV_USAGE_BOOKKEEP);
1372	amdgpu_bo_unreserve(vm->root.bo);
1373
1374	/* Update process info */
1375	mutex_lock(&vm->process_info->lock);
1376	list_add_tail(&vm->vm_list_node,
1377			&(vm->process_info->vm_list_head));
1378	vm->process_info->n_vms++;
1379	mutex_unlock(&vm->process_info->lock);
1380
1381	return 0;
1382
1383reserve_shared_fail:
1384wait_pd_fail:
1385validate_pd_fail:
1386	amdgpu_bo_unreserve(vm->root.bo);
1387reserve_pd_fail:
1388	vm->process_info = NULL;
1389	if (info) {
1390		/* Two fence references: one in info and one in *ef */
1391		dma_fence_put(&info->eviction_fence->base);
1392		dma_fence_put(*ef);
1393		*ef = NULL;
1394		*process_info = NULL;
1395		put_pid(info->pid);
1396create_evict_fence_fail:
1397		mutex_destroy(&info->lock);
1398		mutex_destroy(&info->notifier_lock);
1399		kfree(info);
1400	}
1401	return ret;
1402}
1403
1404/**
1405 * amdgpu_amdkfd_gpuvm_pin_bo() - Pins a BO using following criteria
1406 * @bo: Handle of buffer object being pinned
1407 * @domain: Domain into which BO should be pinned
1408 *
1409 *   - USERPTR BOs are UNPINNABLE and will return error
1410 *   - All other BO types (GTT, VRAM, MMIO and DOORBELL) will have their
1411 *     PIN count incremented. It is valid to PIN a BO multiple times
1412 *
1413 * Return: ZERO if successful in pinning, Non-Zero in case of error.
1414 */
1415static int amdgpu_amdkfd_gpuvm_pin_bo(struct amdgpu_bo *bo, u32 domain)
1416{
1417	int ret = 0;
1418
1419	ret = amdgpu_bo_reserve(bo, false);
1420	if (unlikely(ret))
1421		return ret;
1422
1423	ret = amdgpu_bo_pin_restricted(bo, domain, 0, 0);
1424	if (ret)
1425		pr_err("Error in Pinning BO to domain: %d\n", domain);
1426
1427	amdgpu_bo_sync_wait(bo, AMDGPU_FENCE_OWNER_KFD, false);
1428	amdgpu_bo_unreserve(bo);
1429
1430	return ret;
1431}
1432
1433/**
1434 * amdgpu_amdkfd_gpuvm_unpin_bo() - Unpins BO using following criteria
1435 * @bo: Handle of buffer object being unpinned
1436 *
1437 *   - Is a illegal request for USERPTR BOs and is ignored
1438 *   - All other BO types (GTT, VRAM, MMIO and DOORBELL) will have their
1439 *     PIN count decremented. Calls to UNPIN must balance calls to PIN
1440 */
1441static void amdgpu_amdkfd_gpuvm_unpin_bo(struct amdgpu_bo *bo)
1442{
1443	int ret = 0;
1444
1445	ret = amdgpu_bo_reserve(bo, false);
1446	if (unlikely(ret))
1447		return;
1448
1449	amdgpu_bo_unpin(bo);
1450	amdgpu_bo_unreserve(bo);
1451}
1452
1453int amdgpu_amdkfd_gpuvm_set_vm_pasid(struct amdgpu_device *adev,
1454				     struct amdgpu_vm *avm, u32 pasid)
1455
1456{
1457	int ret;
1458
1459	/* Free the original amdgpu allocated pasid,
1460	 * will be replaced with kfd allocated pasid.
1461	 */
1462	if (avm->pasid) {
1463		amdgpu_pasid_free(avm->pasid);
1464		amdgpu_vm_set_pasid(adev, avm, 0);
1465	}
1466
1467	ret = amdgpu_vm_set_pasid(adev, avm, pasid);
1468	if (ret)
1469		return ret;
1470
1471	return 0;
1472}
1473
1474int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev,
1475					   struct amdgpu_vm *avm,
1476					   void **process_info,
1477					   struct dma_fence **ef)
1478{
1479	int ret;
1480
1481	/* Already a compute VM? */
1482	if (avm->process_info)
1483		return -EINVAL;
1484
1485	/* Convert VM into a compute VM */
1486	ret = amdgpu_vm_make_compute(adev, avm);
1487	if (ret)
1488		return ret;
1489
1490	/* Initialize KFD part of the VM and process info */
1491	ret = init_kfd_vm(avm, process_info, ef);
1492	if (ret)
1493		return ret;
1494
1495	amdgpu_vm_set_task_info(avm);
1496
1497	return 0;
1498}
1499
1500void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
1501				    struct amdgpu_vm *vm)
1502{
1503	struct amdkfd_process_info *process_info = vm->process_info;
1504
1505	if (!process_info)
1506		return;
1507
1508	/* Update process info */
1509	mutex_lock(&process_info->lock);
1510	process_info->n_vms--;
1511	list_del(&vm->vm_list_node);
1512	mutex_unlock(&process_info->lock);
1513
1514	vm->process_info = NULL;
1515
1516	/* Release per-process resources when last compute VM is destroyed */
1517	if (!process_info->n_vms) {
1518		WARN_ON(!list_empty(&process_info->kfd_bo_list));
1519		WARN_ON(!list_empty(&process_info->userptr_valid_list));
1520		WARN_ON(!list_empty(&process_info->userptr_inval_list));
1521
1522		dma_fence_put(&process_info->eviction_fence->base);
1523		cancel_delayed_work_sync(&process_info->restore_userptr_work);
1524		put_pid(process_info->pid);
1525		mutex_destroy(&process_info->lock);
1526		mutex_destroy(&process_info->notifier_lock);
1527		kfree(process_info);
1528	}
1529}
1530
1531void amdgpu_amdkfd_gpuvm_release_process_vm(struct amdgpu_device *adev,
1532					    void *drm_priv)
1533{
1534	struct amdgpu_vm *avm;
1535
1536	if (WARN_ON(!adev || !drm_priv))
1537		return;
1538
1539	avm = drm_priv_to_vm(drm_priv);
1540
1541	pr_debug("Releasing process vm %p\n", avm);
1542
1543	/* The original pasid of amdgpu vm has already been
1544	 * released during making a amdgpu vm to a compute vm
1545	 * The current pasid is managed by kfd and will be
1546	 * released on kfd process destroy. Set amdgpu pasid
1547	 * to 0 to avoid duplicate release.
1548	 */
1549	amdgpu_vm_release_compute(adev, avm);
1550}
1551
1552uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *drm_priv)
1553{
1554	struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
1555	struct amdgpu_bo *pd = avm->root.bo;
1556	struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev);
1557
1558	if (adev->asic_type < CHIP_VEGA10)
1559		return avm->pd_phys_addr >> AMDGPU_GPU_PAGE_SHIFT;
1560	return avm->pd_phys_addr;
1561}
1562
1563void amdgpu_amdkfd_block_mmu_notifications(void *p)
1564{
1565	struct amdkfd_process_info *pinfo = (struct amdkfd_process_info *)p;
1566
1567	mutex_lock(&pinfo->lock);
1568	WRITE_ONCE(pinfo->block_mmu_notifications, true);
1569	mutex_unlock(&pinfo->lock);
1570}
1571
1572int amdgpu_amdkfd_criu_resume(void *p)
1573{
1574	int ret = 0;
1575	struct amdkfd_process_info *pinfo = (struct amdkfd_process_info *)p;
1576
1577	mutex_lock(&pinfo->lock);
1578	pr_debug("scheduling work\n");
1579	mutex_lock(&pinfo->notifier_lock);
1580	pinfo->evicted_bos++;
1581	mutex_unlock(&pinfo->notifier_lock);
1582	if (!READ_ONCE(pinfo->block_mmu_notifications)) {
1583		ret = -EINVAL;
1584		goto out_unlock;
1585	}
1586	WRITE_ONCE(pinfo->block_mmu_notifications, false);
1587	schedule_delayed_work(&pinfo->restore_userptr_work, 0);
1588
1589out_unlock:
1590	mutex_unlock(&pinfo->lock);
1591	return ret;
1592}
1593
1594size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev,
1595					  uint8_t xcp_id)
1596{
1597	uint64_t reserved_for_pt =
1598		ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size);
1599	ssize_t available;
1600	uint64_t vram_available, system_mem_available, ttm_mem_available;
1601
1602	spin_lock(&kfd_mem_limit.mem_limit_lock);
1603	vram_available = KFD_XCP_MEMORY_SIZE(adev, xcp_id)
1604		- adev->kfd.vram_used_aligned[xcp_id]
1605		- atomic64_read(&adev->vram_pin_size)
1606		- reserved_for_pt;
1607
1608	if (adev->gmc.is_app_apu) {
1609		system_mem_available = no_system_mem_limit ?
1610					kfd_mem_limit.max_system_mem_limit :
1611					kfd_mem_limit.max_system_mem_limit -
1612					kfd_mem_limit.system_mem_used;
1613
1614		ttm_mem_available = kfd_mem_limit.max_ttm_mem_limit -
1615				kfd_mem_limit.ttm_mem_used;
1616
1617		available = min3(system_mem_available, ttm_mem_available,
1618				 vram_available);
1619		available = ALIGN_DOWN(available, PAGE_SIZE);
1620	} else {
1621		available = ALIGN_DOWN(vram_available, VRAM_AVAILABLITY_ALIGN);
1622	}
1623
1624	spin_unlock(&kfd_mem_limit.mem_limit_lock);
1625
1626	if (available < 0)
1627		available = 0;
1628
1629	return available;
1630}
1631
1632int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
1633		struct amdgpu_device *adev, uint64_t va, uint64_t size,
1634		void *drm_priv, struct kgd_mem **mem,
1635		uint64_t *offset, uint32_t flags, bool criu_resume)
1636{
1637	struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
1638	struct amdgpu_fpriv *fpriv = container_of(avm, struct amdgpu_fpriv, vm);
1639	enum ttm_bo_type bo_type = ttm_bo_type_device;
1640	struct sg_table *sg = NULL;
1641	uint64_t user_addr = 0;
1642	struct amdgpu_bo *bo;
1643	struct drm_gem_object *gobj = NULL;
1644	u32 domain, alloc_domain;
1645	uint64_t aligned_size;
1646	int8_t xcp_id = -1;
1647	u64 alloc_flags;
1648	int ret;
1649
1650	/*
1651	 * Check on which domain to allocate BO
1652	 */
1653	if (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
1654		domain = alloc_domain = AMDGPU_GEM_DOMAIN_VRAM;
1655
1656		if (adev->gmc.is_app_apu) {
1657			domain = AMDGPU_GEM_DOMAIN_GTT;
1658			alloc_domain = AMDGPU_GEM_DOMAIN_GTT;
1659			alloc_flags = 0;
1660		} else {
1661			alloc_flags = AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
1662			alloc_flags |= (flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) ?
1663			AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED : 0;
1664		}
1665		xcp_id = fpriv->xcp_id == AMDGPU_XCP_NO_PARTITION ?
1666					0 : fpriv->xcp_id;
1667	} else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_GTT) {
1668		domain = alloc_domain = AMDGPU_GEM_DOMAIN_GTT;
1669		alloc_flags = 0;
1670	} else {
1671		domain = AMDGPU_GEM_DOMAIN_GTT;
1672		alloc_domain = AMDGPU_GEM_DOMAIN_CPU;
1673		alloc_flags = AMDGPU_GEM_CREATE_PREEMPTIBLE;
1674
1675		if (flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
1676			if (!offset || !*offset)
1677				return -EINVAL;
1678			user_addr = untagged_addr(*offset);
1679		} else if (flags & (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
1680				    KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) {
1681			bo_type = ttm_bo_type_sg;
1682			if (size > UINT_MAX)
1683				return -EINVAL;
1684			sg = create_sg_table(*offset, size);
1685			if (!sg)
1686				return -ENOMEM;
1687		} else {
1688			return -EINVAL;
1689		}
1690	}
1691
1692	if (flags & KFD_IOC_ALLOC_MEM_FLAGS_COHERENT)
1693		alloc_flags |= AMDGPU_GEM_CREATE_COHERENT;
1694	if (flags & KFD_IOC_ALLOC_MEM_FLAGS_UNCACHED)
1695		alloc_flags |= AMDGPU_GEM_CREATE_UNCACHED;
1696
1697	*mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
1698	if (!*mem) {
1699		ret = -ENOMEM;
1700		goto err;
1701	}
1702	INIT_LIST_HEAD(&(*mem)->attachments);
1703	rw_init(&(*mem)->lock, "gpuvma");
1704	(*mem)->aql_queue = !!(flags & KFD_IOC_ALLOC_MEM_FLAGS_AQL_QUEUE_MEM);
1705
1706	/* Workaround for AQL queue wraparound bug. Map the same
1707	 * memory twice. That means we only actually allocate half
1708	 * the memory.
1709	 */
1710	if ((*mem)->aql_queue)
1711		size >>= 1;
1712	aligned_size = PAGE_ALIGN(size);
1713
1714	(*mem)->alloc_flags = flags;
1715
1716	amdgpu_sync_create(&(*mem)->sync);
1717
1718	ret = amdgpu_amdkfd_reserve_mem_limit(adev, aligned_size, flags,
1719					      xcp_id);
1720	if (ret) {
1721		pr_debug("Insufficient memory\n");
1722		goto err_reserve_limit;
1723	}
1724
1725	pr_debug("\tcreate BO VA 0x%llx size 0x%llx domain %s xcp_id %d\n",
1726		 va, (*mem)->aql_queue ? size << 1 : size,
1727		 domain_string(alloc_domain), xcp_id);
1728
1729	ret = amdgpu_gem_object_create(adev, aligned_size, 1, alloc_domain, alloc_flags,
1730				       bo_type, NULL, &gobj, xcp_id + 1);
1731	if (ret) {
1732		pr_debug("Failed to create BO on domain %s. ret %d\n",
1733			 domain_string(alloc_domain), ret);
1734		goto err_bo_create;
1735	}
1736	ret = drm_vma_node_allow(&gobj->vma_node, drm_priv);
1737	if (ret) {
1738		pr_debug("Failed to allow vma node access. ret %d\n", ret);
1739		goto err_node_allow;
1740	}
1741	bo = gem_to_amdgpu_bo(gobj);
1742	if (bo_type == ttm_bo_type_sg) {
1743		bo->tbo.sg = sg;
1744		bo->tbo.ttm->sg = sg;
1745	}
1746	bo->kfd_bo = *mem;
1747	(*mem)->bo = bo;
1748	if (user_addr)
1749		bo->flags |= AMDGPU_AMDKFD_CREATE_USERPTR_BO;
1750
1751	(*mem)->va = va;
1752	(*mem)->domain = domain;
1753	(*mem)->mapped_to_gpu_memory = 0;
1754	(*mem)->process_info = avm->process_info;
1755
1756	add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, user_addr);
1757
1758	if (user_addr) {
1759		pr_debug("creating userptr BO for user_addr = %llx\n", user_addr);
1760		ret = init_user_pages(*mem, user_addr, criu_resume);
1761		if (ret)
1762			goto allocate_init_user_pages_failed;
1763	} else  if (flags & (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
1764				KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) {
1765		ret = amdgpu_amdkfd_gpuvm_pin_bo(bo, AMDGPU_GEM_DOMAIN_GTT);
1766		if (ret) {
1767			pr_err("Pinning MMIO/DOORBELL BO during ALLOC FAILED\n");
1768			goto err_pin_bo;
1769		}
1770		bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
1771		bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT;
1772	}
1773
1774	if (offset)
1775		*offset = amdgpu_bo_mmap_offset(bo);
1776
1777	return 0;
1778
1779allocate_init_user_pages_failed:
1780err_pin_bo:
1781	remove_kgd_mem_from_kfd_bo_list(*mem, avm->process_info);
1782	drm_vma_node_revoke(&gobj->vma_node, drm_priv);
1783err_node_allow:
1784	/* Don't unreserve system mem limit twice */
1785	goto err_reserve_limit;
1786err_bo_create:
1787	amdgpu_amdkfd_unreserve_mem_limit(adev, aligned_size, flags, xcp_id);
1788err_reserve_limit:
1789	amdgpu_sync_free(&(*mem)->sync);
1790	mutex_destroy(&(*mem)->lock);
1791	if (gobj)
1792		drm_gem_object_put(gobj);
1793	else
1794		kfree(*mem);
1795err:
1796	if (sg) {
1797		sg_free_table(sg);
1798		kfree(sg);
1799	}
1800	return ret;
1801}
1802
1803int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
1804		struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv,
1805		uint64_t *size)
1806{
1807	struct amdkfd_process_info *process_info = mem->process_info;
1808	unsigned long bo_size = mem->bo->tbo.base.size;
1809	bool use_release_notifier = (mem->bo->kfd_bo == mem);
1810	struct kfd_mem_attachment *entry, *tmp;
1811	struct bo_vm_reservation_context ctx;
1812	unsigned int mapped_to_gpu_memory;
1813	int ret;
1814	bool is_imported = false;
1815
1816	mutex_lock(&mem->lock);
1817
1818	/* Unpin MMIO/DOORBELL BO's that were pinned during allocation */
1819	if (mem->alloc_flags &
1820	    (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
1821	     KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) {
1822		amdgpu_amdkfd_gpuvm_unpin_bo(mem->bo);
1823	}
1824
1825	mapped_to_gpu_memory = mem->mapped_to_gpu_memory;
1826	is_imported = mem->is_imported;
1827	mutex_unlock(&mem->lock);
1828	/* lock is not needed after this, since mem is unused and will
1829	 * be freed anyway
1830	 */
1831
1832	if (mapped_to_gpu_memory > 0) {
1833		pr_debug("BO VA 0x%llx size 0x%lx is still mapped.\n",
1834				mem->va, bo_size);
1835		return -EBUSY;
1836	}
1837
1838	/* Make sure restore workers don't access the BO any more */
1839	mutex_lock(&process_info->lock);
1840	list_del(&mem->validate_list);
1841	mutex_unlock(&process_info->lock);
1842
1843	/* Cleanup user pages and MMU notifiers */
1844	if (amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm)) {
1845		amdgpu_hmm_unregister(mem->bo);
1846		mutex_lock(&process_info->notifier_lock);
1847		amdgpu_ttm_tt_discard_user_pages(mem->bo->tbo.ttm, mem->range);
1848		mutex_unlock(&process_info->notifier_lock);
1849	}
1850
1851	ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx);
1852	if (unlikely(ret))
1853		return ret;
1854
1855	/* The eviction fence should be removed by the last unmap.
1856	 * TODO: Log an error condition if the bo still has the eviction fence
1857	 * attached
1858	 */
1859	amdgpu_amdkfd_remove_eviction_fence(mem->bo,
1860					process_info->eviction_fence);
1861	pr_debug("Release VA 0x%llx - 0x%llx\n", mem->va,
1862		mem->va + bo_size * (1 + mem->aql_queue));
1863
1864	/* Remove from VM internal data structures */
1865	list_for_each_entry_safe(entry, tmp, &mem->attachments, list)
1866		kfd_mem_detach(entry);
1867
1868	ret = unreserve_bo_and_vms(&ctx, false, false);
1869
1870	/* Free the sync object */
1871	amdgpu_sync_free(&mem->sync);
1872
1873	/* If the SG is not NULL, it's one we created for a doorbell or mmio
1874	 * remap BO. We need to free it.
1875	 */
1876	if (mem->bo->tbo.sg) {
1877		sg_free_table(mem->bo->tbo.sg);
1878		kfree(mem->bo->tbo.sg);
1879	}
1880
1881	/* Update the size of the BO being freed if it was allocated from
1882	 * VRAM and is not imported. For APP APU VRAM allocations are done
1883	 * in GTT domain
1884	 */
1885	if (size) {
1886		if (!is_imported &&
1887		   (mem->bo->preferred_domains == AMDGPU_GEM_DOMAIN_VRAM ||
1888		   (adev->gmc.is_app_apu &&
1889		    mem->bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT)))
1890			*size = bo_size;
1891		else
1892			*size = 0;
1893	}
1894
1895	/* Free the BO*/
1896	drm_vma_node_revoke(&mem->bo->tbo.base.vma_node, drm_priv);
1897	if (mem->dmabuf)
1898		dma_buf_put(mem->dmabuf);
1899	mutex_destroy(&mem->lock);
1900
1901	/* If this releases the last reference, it will end up calling
1902	 * amdgpu_amdkfd_release_notify and kfree the mem struct. That's why
1903	 * this needs to be the last call here.
1904	 */
1905	drm_gem_object_put(&mem->bo->tbo.base);
1906
1907	/*
1908	 * For kgd_mem allocated in amdgpu_amdkfd_gpuvm_import_dmabuf(),
1909	 * explicitly free it here.
1910	 */
1911	if (!use_release_notifier)
1912		kfree(mem);
1913
1914	return ret;
1915}
1916
1917int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
1918		struct amdgpu_device *adev, struct kgd_mem *mem,
1919		void *drm_priv)
1920{
1921	struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
1922	int ret;
1923	struct amdgpu_bo *bo;
1924	uint32_t domain;
1925	struct kfd_mem_attachment *entry;
1926	struct bo_vm_reservation_context ctx;
1927	unsigned long bo_size;
1928	bool is_invalid_userptr = false;
1929
1930	bo = mem->bo;
1931	if (!bo) {
1932		pr_err("Invalid BO when mapping memory to GPU\n");
1933		return -EINVAL;
1934	}
1935
1936	/* Make sure restore is not running concurrently. Since we
1937	 * don't map invalid userptr BOs, we rely on the next restore
1938	 * worker to do the mapping
1939	 */
1940	mutex_lock(&mem->process_info->lock);
1941
1942	/* Lock notifier lock. If we find an invalid userptr BO, we can be
1943	 * sure that the MMU notifier is no longer running
1944	 * concurrently and the queues are actually stopped
1945	 */
1946	if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) {
1947		mutex_lock(&mem->process_info->notifier_lock);
1948		is_invalid_userptr = !!mem->invalid;
1949		mutex_unlock(&mem->process_info->notifier_lock);
1950	}
1951
1952	mutex_lock(&mem->lock);
1953
1954	domain = mem->domain;
1955	bo_size = bo->tbo.base.size;
1956
1957	pr_debug("Map VA 0x%llx - 0x%llx to vm %p domain %s\n",
1958			mem->va,
1959			mem->va + bo_size * (1 + mem->aql_queue),
1960			avm, domain_string(domain));
1961
1962	if (!kfd_mem_is_attached(avm, mem)) {
1963		ret = kfd_mem_attach(adev, mem, avm, mem->aql_queue);
1964		if (ret)
1965			goto out;
1966	}
1967
1968	ret = reserve_bo_and_vm(mem, avm, &ctx);
1969	if (unlikely(ret))
1970		goto out;
1971
1972	/* Userptr can be marked as "not invalid", but not actually be
1973	 * validated yet (still in the system domain). In that case
1974	 * the queues are still stopped and we can leave mapping for
1975	 * the next restore worker
1976	 */
1977	if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) &&
1978	    bo->tbo.resource->mem_type == TTM_PL_SYSTEM)
1979		is_invalid_userptr = true;
1980
1981	ret = vm_validate_pt_pd_bos(avm);
1982	if (unlikely(ret))
1983		goto out_unreserve;
1984
1985	if (mem->mapped_to_gpu_memory == 0 &&
1986	    !amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) {
1987		/* Validate BO only once. The eviction fence gets added to BO
1988		 * the first time it is mapped. Validate will wait for all
1989		 * background evictions to complete.
1990		 */
1991		ret = amdgpu_amdkfd_bo_validate(bo, domain, true);
1992		if (ret) {
1993			pr_debug("Validate failed\n");
1994			goto out_unreserve;
1995		}
1996	}
1997
1998	list_for_each_entry(entry, &mem->attachments, list) {
1999		if (entry->bo_va->base.vm != avm || entry->is_mapped)
2000			continue;
2001
2002		pr_debug("\t map VA 0x%llx - 0x%llx in entry %p\n",
2003			 entry->va, entry->va + bo_size, entry);
2004
2005		ret = map_bo_to_gpuvm(mem, entry, ctx.sync,
2006				      is_invalid_userptr);
2007		if (ret) {
2008			pr_err("Failed to map bo to gpuvm\n");
2009			goto out_unreserve;
2010		}
2011
2012		ret = vm_update_pds(avm, ctx.sync);
2013		if (ret) {
2014			pr_err("Failed to update page directories\n");
2015			goto out_unreserve;
2016		}
2017
2018		entry->is_mapped = true;
2019		mem->mapped_to_gpu_memory++;
2020		pr_debug("\t INC mapping count %d\n",
2021			 mem->mapped_to_gpu_memory);
2022	}
2023
2024	if (!amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) && !bo->tbo.pin_count)
2025		dma_resv_add_fence(bo->tbo.base.resv,
2026				   &avm->process_info->eviction_fence->base,
2027				   DMA_RESV_USAGE_BOOKKEEP);
2028	ret = unreserve_bo_and_vms(&ctx, false, false);
2029
2030	goto out;
2031
2032out_unreserve:
2033	unreserve_bo_and_vms(&ctx, false, false);
2034out:
2035	mutex_unlock(&mem->process_info->lock);
2036	mutex_unlock(&mem->lock);
2037	return ret;
2038}
2039
2040int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
2041		struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv)
2042{
2043	struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
2044	struct amdkfd_process_info *process_info = avm->process_info;
2045	unsigned long bo_size = mem->bo->tbo.base.size;
2046	struct kfd_mem_attachment *entry;
2047	struct bo_vm_reservation_context ctx;
2048	int ret;
2049
2050	mutex_lock(&mem->lock);
2051
2052	ret = reserve_bo_and_cond_vms(mem, avm, BO_VM_MAPPED, &ctx);
2053	if (unlikely(ret))
2054		goto out;
2055	/* If no VMs were reserved, it means the BO wasn't actually mapped */
2056	if (ctx.n_vms == 0) {
2057		ret = -EINVAL;
2058		goto unreserve_out;
2059	}
2060
2061	ret = vm_validate_pt_pd_bos(avm);
2062	if (unlikely(ret))
2063		goto unreserve_out;
2064
2065	pr_debug("Unmap VA 0x%llx - 0x%llx from vm %p\n",
2066		mem->va,
2067		mem->va + bo_size * (1 + mem->aql_queue),
2068		avm);
2069
2070	list_for_each_entry(entry, &mem->attachments, list) {
2071		if (entry->bo_va->base.vm != avm || !entry->is_mapped)
2072			continue;
2073
2074		pr_debug("\t unmap VA 0x%llx - 0x%llx from entry %p\n",
2075			 entry->va, entry->va + bo_size, entry);
2076
2077		unmap_bo_from_gpuvm(mem, entry, ctx.sync);
2078		entry->is_mapped = false;
2079
2080		mem->mapped_to_gpu_memory--;
2081		pr_debug("\t DEC mapping count %d\n",
2082			 mem->mapped_to_gpu_memory);
2083	}
2084
2085	/* If BO is unmapped from all VMs, unfence it. It can be evicted if
2086	 * required.
2087	 */
2088	if (mem->mapped_to_gpu_memory == 0 &&
2089	    !amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm) &&
2090	    !mem->bo->tbo.pin_count)
2091		amdgpu_amdkfd_remove_eviction_fence(mem->bo,
2092						process_info->eviction_fence);
2093
2094unreserve_out:
2095	unreserve_bo_and_vms(&ctx, false, false);
2096out:
2097	mutex_unlock(&mem->lock);
2098	return ret;
2099}
2100
2101int amdgpu_amdkfd_gpuvm_sync_memory(
2102		struct amdgpu_device *adev, struct kgd_mem *mem, bool intr)
2103{
2104	struct amdgpu_sync sync;
2105	int ret;
2106
2107	amdgpu_sync_create(&sync);
2108
2109	mutex_lock(&mem->lock);
2110	amdgpu_sync_clone(&mem->sync, &sync);
2111	mutex_unlock(&mem->lock);
2112
2113	ret = amdgpu_sync_wait(&sync, intr);
2114	amdgpu_sync_free(&sync);
2115	return ret;
2116}
2117
2118/**
2119 * amdgpu_amdkfd_map_gtt_bo_to_gart - Map BO to GART and increment reference count
2120 * @adev: Device to which allocated BO belongs
2121 * @bo: Buffer object to be mapped
2122 *
2123 * Before return, bo reference count is incremented. To release the reference and unpin/
2124 * unmap the BO, call amdgpu_amdkfd_free_gtt_mem.
2125 */
2126int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_device *adev, struct amdgpu_bo *bo)
2127{
2128	int ret;
2129
2130	ret = amdgpu_bo_reserve(bo, true);
2131	if (ret) {
2132		pr_err("Failed to reserve bo. ret %d\n", ret);
2133		goto err_reserve_bo_failed;
2134	}
2135
2136	ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
2137	if (ret) {
2138		pr_err("Failed to pin bo. ret %d\n", ret);
2139		goto err_pin_bo_failed;
2140	}
2141
2142	ret = amdgpu_ttm_alloc_gart(&bo->tbo);
2143	if (ret) {
2144		pr_err("Failed to bind bo to GART. ret %d\n", ret);
2145		goto err_map_bo_gart_failed;
2146	}
2147
2148	amdgpu_amdkfd_remove_eviction_fence(
2149		bo, bo->vm_bo->vm->process_info->eviction_fence);
2150
2151	amdgpu_bo_unreserve(bo);
2152
2153	bo = amdgpu_bo_ref(bo);
2154
2155	return 0;
2156
2157err_map_bo_gart_failed:
2158	amdgpu_bo_unpin(bo);
2159err_pin_bo_failed:
2160	amdgpu_bo_unreserve(bo);
2161err_reserve_bo_failed:
2162
2163	return ret;
2164}
2165
2166/** amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel() - Map a GTT BO for kernel CPU access
2167 *
2168 * @mem: Buffer object to be mapped for CPU access
2169 * @kptr[out]: pointer in kernel CPU address space
2170 * @size[out]: size of the buffer
2171 *
2172 * Pins the BO and maps it for kernel CPU access. The eviction fence is removed
2173 * from the BO, since pinned BOs cannot be evicted. The bo must remain on the
2174 * validate_list, so the GPU mapping can be restored after a page table was
2175 * evicted.
2176 *
2177 * Return: 0 on success, error code on failure
2178 */
2179int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_mem *mem,
2180					     void **kptr, uint64_t *size)
2181{
2182	int ret;
2183	struct amdgpu_bo *bo = mem->bo;
2184
2185	if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) {
2186		pr_err("userptr can't be mapped to kernel\n");
2187		return -EINVAL;
2188	}
2189
2190	mutex_lock(&mem->process_info->lock);
2191
2192	ret = amdgpu_bo_reserve(bo, true);
2193	if (ret) {
2194		pr_err("Failed to reserve bo. ret %d\n", ret);
2195		goto bo_reserve_failed;
2196	}
2197
2198	ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
2199	if (ret) {
2200		pr_err("Failed to pin bo. ret %d\n", ret);
2201		goto pin_failed;
2202	}
2203
2204	ret = amdgpu_bo_kmap(bo, kptr);
2205	if (ret) {
2206		pr_err("Failed to map bo to kernel. ret %d\n", ret);
2207		goto kmap_failed;
2208	}
2209
2210	amdgpu_amdkfd_remove_eviction_fence(
2211		bo, mem->process_info->eviction_fence);
2212
2213	if (size)
2214		*size = amdgpu_bo_size(bo);
2215
2216	amdgpu_bo_unreserve(bo);
2217
2218	mutex_unlock(&mem->process_info->lock);
2219	return 0;
2220
2221kmap_failed:
2222	amdgpu_bo_unpin(bo);
2223pin_failed:
2224	amdgpu_bo_unreserve(bo);
2225bo_reserve_failed:
2226	mutex_unlock(&mem->process_info->lock);
2227
2228	return ret;
2229}
2230
2231/** amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel() - Unmap a GTT BO for kernel CPU access
2232 *
2233 * @mem: Buffer object to be unmapped for CPU access
2234 *
2235 * Removes the kernel CPU mapping and unpins the BO. It does not restore the
2236 * eviction fence, so this function should only be used for cleanup before the
2237 * BO is destroyed.
2238 */
2239void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct kgd_mem *mem)
2240{
2241	struct amdgpu_bo *bo = mem->bo;
2242
2243	amdgpu_bo_reserve(bo, true);
2244	amdgpu_bo_kunmap(bo);
2245	amdgpu_bo_unpin(bo);
2246	amdgpu_bo_unreserve(bo);
2247}
2248
2249int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct amdgpu_device *adev,
2250					  struct kfd_vm_fault_info *mem)
2251{
2252	if (atomic_read(&adev->gmc.vm_fault_info_updated) == 1) {
2253		*mem = *adev->gmc.vm_fault_info;
2254		mb(); /* make sure read happened */
2255		atomic_set(&adev->gmc.vm_fault_info_updated, 0);
2256	}
2257	return 0;
2258}
2259
2260int amdgpu_amdkfd_gpuvm_import_dmabuf(struct amdgpu_device *adev,
2261				      struct dma_buf *dma_buf,
2262				      uint64_t va, void *drm_priv,
2263				      struct kgd_mem **mem, uint64_t *size,
2264				      uint64_t *mmap_offset)
2265{
2266	struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
2267	struct drm_gem_object *obj;
2268	struct amdgpu_bo *bo;
2269	int ret;
2270
2271	obj = amdgpu_gem_prime_import(adev_to_drm(adev), dma_buf);
2272	if (IS_ERR(obj))
2273		return PTR_ERR(obj);
2274
2275	bo = gem_to_amdgpu_bo(obj);
2276	if (!(bo->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM |
2277				    AMDGPU_GEM_DOMAIN_GTT))) {
2278		/* Only VRAM and GTT BOs are supported */
2279		ret = -EINVAL;
2280		goto err_put_obj;
2281	}
2282
2283	*mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
2284	if (!*mem) {
2285		ret = -ENOMEM;
2286		goto err_put_obj;
2287	}
2288
2289	ret = drm_vma_node_allow(&obj->vma_node, drm_priv);
2290	if (ret)
2291		goto err_free_mem;
2292
2293	if (size)
2294		*size = amdgpu_bo_size(bo);
2295
2296	if (mmap_offset)
2297		*mmap_offset = amdgpu_bo_mmap_offset(bo);
2298
2299	INIT_LIST_HEAD(&(*mem)->attachments);
2300	rw_init(&(*mem)->lock, "gpuvmi");
2301
2302	(*mem)->alloc_flags =
2303		((bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
2304		KFD_IOC_ALLOC_MEM_FLAGS_VRAM : KFD_IOC_ALLOC_MEM_FLAGS_GTT)
2305		| KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE
2306		| KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE;
2307
2308	get_dma_buf(dma_buf);
2309	(*mem)->dmabuf = dma_buf;
2310	(*mem)->bo = bo;
2311	(*mem)->va = va;
2312	(*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) && !adev->gmc.is_app_apu ?
2313		AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT;
2314
2315	(*mem)->mapped_to_gpu_memory = 0;
2316	(*mem)->process_info = avm->process_info;
2317	add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, false);
2318	amdgpu_sync_create(&(*mem)->sync);
2319	(*mem)->is_imported = true;
2320
2321	return 0;
2322
2323err_free_mem:
2324	kfree(*mem);
2325err_put_obj:
2326	drm_gem_object_put(obj);
2327	return ret;
2328}
2329
2330int amdgpu_amdkfd_gpuvm_export_dmabuf(struct kgd_mem *mem,
2331				      struct dma_buf **dma_buf)
2332{
2333	int ret;
2334
2335	mutex_lock(&mem->lock);
2336	ret = kfd_mem_export_dmabuf(mem);
2337	if (ret)
2338		goto out;
2339
2340	get_dma_buf(mem->dmabuf);
2341	*dma_buf = mem->dmabuf;
2342out:
2343	mutex_unlock(&mem->lock);
2344	return ret;
2345}
2346
2347/* Evict a userptr BO by stopping the queues if necessary
2348 *
2349 * Runs in MMU notifier, may be in RECLAIM_FS context. This means it
2350 * cannot do any memory allocations, and cannot take any locks that
2351 * are held elsewhere while allocating memory.
2352 *
2353 * It doesn't do anything to the BO itself. The real work happens in
2354 * restore, where we get updated page addresses. This function only
2355 * ensures that GPU access to the BO is stopped.
2356 */
2357int amdgpu_amdkfd_evict_userptr(struct mmu_interval_notifier *mni,
2358				unsigned long cur_seq, struct kgd_mem *mem)
2359{
2360	struct amdkfd_process_info *process_info = mem->process_info;
2361	int r = 0;
2362
2363	/* Do not process MMU notifications during CRIU restore until
2364	 * KFD_CRIU_OP_RESUME IOCTL is received
2365	 */
2366	if (READ_ONCE(process_info->block_mmu_notifications))
2367		return 0;
2368
2369	mutex_lock(&process_info->notifier_lock);
2370	mmu_interval_set_seq(mni, cur_seq);
2371
2372	mem->invalid++;
2373	if (++process_info->evicted_bos == 1) {
2374		/* First eviction, stop the queues */
2375		r = kgd2kfd_quiesce_mm(mni->mm,
2376				       KFD_QUEUE_EVICTION_TRIGGER_USERPTR);
2377		if (r)
2378			pr_err("Failed to quiesce KFD\n");
2379		schedule_delayed_work(&process_info->restore_userptr_work,
2380			msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS));
2381	}
2382	mutex_unlock(&process_info->notifier_lock);
2383
2384	return r;
2385}
2386
2387/* Update invalid userptr BOs
2388 *
2389 * Moves invalidated (evicted) userptr BOs from userptr_valid_list to
2390 * userptr_inval_list and updates user pages for all BOs that have
2391 * been invalidated since their last update.
2392 */
2393static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
2394				     struct mm_struct *mm)
2395{
2396	struct kgd_mem *mem, *tmp_mem;
2397	struct amdgpu_bo *bo;
2398	struct ttm_operation_ctx ctx = { false, false };
2399	uint32_t invalid;
2400	int ret = 0;
2401
2402	mutex_lock(&process_info->notifier_lock);
2403
2404	/* Move all invalidated BOs to the userptr_inval_list */
2405	list_for_each_entry_safe(mem, tmp_mem,
2406				 &process_info->userptr_valid_list,
2407				 validate_list)
2408		if (mem->invalid)
2409			list_move_tail(&mem->validate_list,
2410				       &process_info->userptr_inval_list);
2411
2412	/* Go through userptr_inval_list and update any invalid user_pages */
2413	list_for_each_entry(mem, &process_info->userptr_inval_list,
2414			    validate_list) {
2415		invalid = mem->invalid;
2416		if (!invalid)
2417			/* BO hasn't been invalidated since the last
2418			 * revalidation attempt. Keep its page list.
2419			 */
2420			continue;
2421
2422		bo = mem->bo;
2423
2424		amdgpu_ttm_tt_discard_user_pages(bo->tbo.ttm, mem->range);
2425		mem->range = NULL;
2426
2427		/* BO reservations and getting user pages (hmm_range_fault)
2428		 * must happen outside the notifier lock
2429		 */
2430		mutex_unlock(&process_info->notifier_lock);
2431
2432		/* Move the BO to system (CPU) domain if necessary to unmap
2433		 * and free the SG table
2434		 */
2435		if (bo->tbo.resource->mem_type != TTM_PL_SYSTEM) {
2436			if (amdgpu_bo_reserve(bo, true))
2437				return -EAGAIN;
2438			amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU);
2439			ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
2440			amdgpu_bo_unreserve(bo);
2441			if (ret) {
2442				pr_err("%s: Failed to invalidate userptr BO\n",
2443				       __func__);
2444				return -EAGAIN;
2445			}
2446		}
2447
2448		/* Get updated user pages */
2449		ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages,
2450						   &mem->range);
2451		if (ret) {
2452			pr_debug("Failed %d to get user pages\n", ret);
2453
2454			/* Return -EFAULT bad address error as success. It will
2455			 * fail later with a VM fault if the GPU tries to access
2456			 * it. Better than hanging indefinitely with stalled
2457			 * user mode queues.
2458			 *
2459			 * Return other error -EBUSY or -ENOMEM to retry restore
2460			 */
2461			if (ret != -EFAULT)
2462				return ret;
2463
2464			ret = 0;
2465		}
2466
2467		mutex_lock(&process_info->notifier_lock);
2468
2469		/* Mark the BO as valid unless it was invalidated
2470		 * again concurrently.
2471		 */
2472		if (mem->invalid != invalid) {
2473			ret = -EAGAIN;
2474			goto unlock_out;
2475		}
2476		 /* set mem valid if mem has hmm range associated */
2477		if (mem->range)
2478			mem->invalid = 0;
2479	}
2480
2481unlock_out:
2482	mutex_unlock(&process_info->notifier_lock);
2483
2484	return ret;
2485}
2486
2487/* Validate invalid userptr BOs
2488 *
2489 * Validates BOs on the userptr_inval_list. Also updates GPUVM page tables
2490 * with new page addresses and waits for the page table updates to complete.
2491 */
2492static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
2493{
2494	struct ttm_operation_ctx ctx = { false, false };
2495	struct amdgpu_sync sync;
2496	struct drm_exec exec;
2497
2498	struct amdgpu_vm *peer_vm;
2499	struct kgd_mem *mem, *tmp_mem;
2500	struct amdgpu_bo *bo;
2501	int ret;
2502
2503	amdgpu_sync_create(&sync);
2504
2505	drm_exec_init(&exec, 0);
2506	/* Reserve all BOs and page tables for validation */
2507	drm_exec_until_all_locked(&exec) {
2508		/* Reserve all the page directories */
2509		list_for_each_entry(peer_vm, &process_info->vm_list_head,
2510				    vm_list_node) {
2511			ret = amdgpu_vm_lock_pd(peer_vm, &exec, 2);
2512			drm_exec_retry_on_contention(&exec);
2513			if (unlikely(ret))
2514				goto unreserve_out;
2515		}
2516
2517		/* Reserve the userptr_inval_list entries to resv_list */
2518		list_for_each_entry(mem, &process_info->userptr_inval_list,
2519				    validate_list) {
2520			struct drm_gem_object *gobj;
2521
2522			gobj = &mem->bo->tbo.base;
2523			ret = drm_exec_prepare_obj(&exec, gobj, 1);
2524			drm_exec_retry_on_contention(&exec);
2525			if (unlikely(ret))
2526				goto unreserve_out;
2527		}
2528	}
2529
2530	ret = process_validate_vms(process_info);
2531	if (ret)
2532		goto unreserve_out;
2533
2534	/* Validate BOs and update GPUVM page tables */
2535	list_for_each_entry_safe(mem, tmp_mem,
2536				 &process_info->userptr_inval_list,
2537				 validate_list) {
2538		struct kfd_mem_attachment *attachment;
2539
2540		bo = mem->bo;
2541
2542		/* Validate the BO if we got user pages */
2543		if (bo->tbo.ttm->pages[0]) {
2544			amdgpu_bo_placement_from_domain(bo, mem->domain);
2545			ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
2546			if (ret) {
2547				pr_err("%s: failed to validate BO\n", __func__);
2548				goto unreserve_out;
2549			}
2550		}
2551
2552		/* Update mapping. If the BO was not validated
2553		 * (because we couldn't get user pages), this will
2554		 * clear the page table entries, which will result in
2555		 * VM faults if the GPU tries to access the invalid
2556		 * memory.
2557		 */
2558		list_for_each_entry(attachment, &mem->attachments, list) {
2559			if (!attachment->is_mapped)
2560				continue;
2561
2562			kfd_mem_dmaunmap_attachment(mem, attachment);
2563			ret = update_gpuvm_pte(mem, attachment, &sync);
2564			if (ret) {
2565				pr_err("%s: update PTE failed\n", __func__);
2566				/* make sure this gets validated again */
2567				mutex_lock(&process_info->notifier_lock);
2568				mem->invalid++;
2569				mutex_unlock(&process_info->notifier_lock);
2570				goto unreserve_out;
2571			}
2572		}
2573	}
2574
2575	/* Update page directories */
2576	ret = process_update_pds(process_info, &sync);
2577
2578unreserve_out:
2579	drm_exec_fini(&exec);
2580	amdgpu_sync_wait(&sync, false);
2581	amdgpu_sync_free(&sync);
2582
2583	return ret;
2584}
2585
2586/* Confirm that all user pages are valid while holding the notifier lock
2587 *
2588 * Moves valid BOs from the userptr_inval_list back to userptr_val_list.
2589 */
2590static int confirm_valid_user_pages_locked(struct amdkfd_process_info *process_info)
2591{
2592	struct kgd_mem *mem, *tmp_mem;
2593	int ret = 0;
2594
2595	list_for_each_entry_safe(mem, tmp_mem,
2596				 &process_info->userptr_inval_list,
2597				 validate_list) {
2598		bool valid;
2599
2600		/* keep mem without hmm range at userptr_inval_list */
2601		if (!mem->range)
2602			 continue;
2603
2604		/* Only check mem with hmm range associated */
2605		valid = amdgpu_ttm_tt_get_user_pages_done(
2606					mem->bo->tbo.ttm, mem->range);
2607
2608		mem->range = NULL;
2609		if (!valid) {
2610			WARN(!mem->invalid, "Invalid BO not marked invalid");
2611			ret = -EAGAIN;
2612			continue;
2613		}
2614
2615		if (mem->invalid) {
2616			WARN(1, "Valid BO is marked invalid");
2617			ret = -EAGAIN;
2618			continue;
2619		}
2620
2621		list_move_tail(&mem->validate_list,
2622			       &process_info->userptr_valid_list);
2623	}
2624
2625	return ret;
2626}
2627
2628/* Worker callback to restore evicted userptr BOs
2629 *
2630 * Tries to update and validate all userptr BOs. If successful and no
2631 * concurrent evictions happened, the queues are restarted. Otherwise,
2632 * reschedule for another attempt later.
2633 */
2634static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work)
2635{
2636	struct delayed_work *dwork = to_delayed_work(work);
2637	struct amdkfd_process_info *process_info =
2638		container_of(dwork, struct amdkfd_process_info,
2639			     restore_userptr_work);
2640	struct task_struct *usertask;
2641	struct mm_struct *mm;
2642	uint32_t evicted_bos;
2643
2644	mutex_lock(&process_info->notifier_lock);
2645	evicted_bos = process_info->evicted_bos;
2646	mutex_unlock(&process_info->notifier_lock);
2647	if (!evicted_bos)
2648		return;
2649
2650	/* Reference task and mm in case of concurrent process termination */
2651	usertask = get_pid_task(process_info->pid, PIDTYPE_PID);
2652	if (!usertask)
2653		return;
2654	mm = get_task_mm(usertask);
2655	if (!mm) {
2656		put_task_struct(usertask);
2657		return;
2658	}
2659
2660	mutex_lock(&process_info->lock);
2661
2662	if (update_invalid_user_pages(process_info, mm))
2663		goto unlock_out;
2664	/* userptr_inval_list can be empty if all evicted userptr BOs
2665	 * have been freed. In that case there is nothing to validate
2666	 * and we can just restart the queues.
2667	 */
2668	if (!list_empty(&process_info->userptr_inval_list)) {
2669		if (validate_invalid_user_pages(process_info))
2670			goto unlock_out;
2671	}
2672	/* Final check for concurrent evicton and atomic update. If
2673	 * another eviction happens after successful update, it will
2674	 * be a first eviction that calls quiesce_mm. The eviction
2675	 * reference counting inside KFD will handle this case.
2676	 */
2677	mutex_lock(&process_info->notifier_lock);
2678	if (process_info->evicted_bos != evicted_bos)
2679		goto unlock_notifier_out;
2680
2681	if (confirm_valid_user_pages_locked(process_info)) {
2682		WARN(1, "User pages unexpectedly invalid");
2683		goto unlock_notifier_out;
2684	}
2685
2686	process_info->evicted_bos = evicted_bos = 0;
2687
2688	if (kgd2kfd_resume_mm(mm)) {
2689		pr_err("%s: Failed to resume KFD\n", __func__);
2690		/* No recovery from this failure. Probably the CP is
2691		 * hanging. No point trying again.
2692		 */
2693	}
2694
2695unlock_notifier_out:
2696	mutex_unlock(&process_info->notifier_lock);
2697unlock_out:
2698	mutex_unlock(&process_info->lock);
2699
2700	/* If validation failed, reschedule another attempt */
2701	if (evicted_bos) {
2702		schedule_delayed_work(&process_info->restore_userptr_work,
2703			msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS));
2704
2705		kfd_smi_event_queue_restore_rescheduled(mm);
2706	}
2707	mmput(mm);
2708	put_task_struct(usertask);
2709}
2710
2711/** amdgpu_amdkfd_gpuvm_restore_process_bos - Restore all BOs for the given
2712 *   KFD process identified by process_info
2713 *
2714 * @process_info: amdkfd_process_info of the KFD process
2715 *
2716 * After memory eviction, restore thread calls this function. The function
2717 * should be called when the Process is still valid. BO restore involves -
2718 *
2719 * 1.  Release old eviction fence and create new one
2720 * 2.  Get two copies of PD BO list from all the VMs. Keep one copy as pd_list.
2721 * 3   Use the second PD list and kfd_bo_list to create a list (ctx.list) of
2722 *     BOs that need to be reserved.
2723 * 4.  Reserve all the BOs
2724 * 5.  Validate of PD and PT BOs.
2725 * 6.  Validate all KFD BOs using kfd_bo_list and Map them and add new fence
2726 * 7.  Add fence to all PD and PT BOs.
2727 * 8.  Unreserve all BOs
2728 */
2729int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
2730{
2731	struct amdkfd_process_info *process_info = info;
2732	struct amdgpu_vm *peer_vm;
2733	struct kgd_mem *mem;
2734	struct amdgpu_amdkfd_fence *new_fence;
2735	struct list_head duplicate_save;
2736	struct amdgpu_sync sync_obj;
2737	unsigned long failed_size = 0;
2738	unsigned long total_size = 0;
2739	struct drm_exec exec;
2740	int ret;
2741
2742	INIT_LIST_HEAD(&duplicate_save);
2743
2744	mutex_lock(&process_info->lock);
2745
2746	drm_exec_init(&exec, 0);
2747	drm_exec_until_all_locked(&exec) {
2748		list_for_each_entry(peer_vm, &process_info->vm_list_head,
2749				    vm_list_node) {
2750			ret = amdgpu_vm_lock_pd(peer_vm, &exec, 2);
2751			drm_exec_retry_on_contention(&exec);
2752			if (unlikely(ret))
2753				goto ttm_reserve_fail;
2754		}
2755
2756		/* Reserve all BOs and page tables/directory. Add all BOs from
2757		 * kfd_bo_list to ctx.list
2758		 */
2759		list_for_each_entry(mem, &process_info->kfd_bo_list,
2760				    validate_list) {
2761			struct drm_gem_object *gobj;
2762
2763			gobj = &mem->bo->tbo.base;
2764			ret = drm_exec_prepare_obj(&exec, gobj, 1);
2765			drm_exec_retry_on_contention(&exec);
2766			if (unlikely(ret))
2767				goto ttm_reserve_fail;
2768		}
2769	}
2770
2771	amdgpu_sync_create(&sync_obj);
2772
2773	/* Validate PDs and PTs */
2774	ret = process_validate_vms(process_info);
2775	if (ret)
2776		goto validate_map_fail;
2777
2778	ret = process_sync_pds_resv(process_info, &sync_obj);
2779	if (ret) {
2780		pr_debug("Memory eviction: Failed to sync to PD BO moving fence. Try again\n");
2781		goto validate_map_fail;
2782	}
2783
2784	/* Validate BOs and map them to GPUVM (update VM page tables). */
2785	list_for_each_entry(mem, &process_info->kfd_bo_list,
2786			    validate_list) {
2787
2788		struct amdgpu_bo *bo = mem->bo;
2789		uint32_t domain = mem->domain;
2790		struct kfd_mem_attachment *attachment;
2791		struct dma_resv_iter cursor;
2792		struct dma_fence *fence;
2793
2794		total_size += amdgpu_bo_size(bo);
2795
2796		ret = amdgpu_amdkfd_bo_validate(bo, domain, false);
2797		if (ret) {
2798			pr_debug("Memory eviction: Validate BOs failed\n");
2799			failed_size += amdgpu_bo_size(bo);
2800			ret = amdgpu_amdkfd_bo_validate(bo,
2801						AMDGPU_GEM_DOMAIN_GTT, false);
2802			if (ret) {
2803				pr_debug("Memory eviction: Try again\n");
2804				goto validate_map_fail;
2805			}
2806		}
2807		dma_resv_for_each_fence(&cursor, bo->tbo.base.resv,
2808					DMA_RESV_USAGE_KERNEL, fence) {
2809			ret = amdgpu_sync_fence(&sync_obj, fence);
2810			if (ret) {
2811				pr_debug("Memory eviction: Sync BO fence failed. Try again\n");
2812				goto validate_map_fail;
2813			}
2814		}
2815		list_for_each_entry(attachment, &mem->attachments, list) {
2816			if (!attachment->is_mapped)
2817				continue;
2818
2819			if (attachment->bo_va->base.bo->tbo.pin_count)
2820				continue;
2821
2822			kfd_mem_dmaunmap_attachment(mem, attachment);
2823			ret = update_gpuvm_pte(mem, attachment, &sync_obj);
2824			if (ret) {
2825				pr_debug("Memory eviction: update PTE failed. Try again\n");
2826				goto validate_map_fail;
2827			}
2828		}
2829	}
2830
2831	if (failed_size)
2832		pr_debug("0x%lx/0x%lx in system\n", failed_size, total_size);
2833
2834	/* Update page directories */
2835	ret = process_update_pds(process_info, &sync_obj);
2836	if (ret) {
2837		pr_debug("Memory eviction: update PDs failed. Try again\n");
2838		goto validate_map_fail;
2839	}
2840
2841	/* Wait for validate and PT updates to finish */
2842	amdgpu_sync_wait(&sync_obj, false);
2843
2844	/* Release old eviction fence and create new one, because fence only
2845	 * goes from unsignaled to signaled, fence cannot be reused.
2846	 * Use context and mm from the old fence.
2847	 */
2848	new_fence = amdgpu_amdkfd_fence_create(
2849				process_info->eviction_fence->base.context,
2850				process_info->eviction_fence->mm,
2851				NULL);
2852	if (!new_fence) {
2853		pr_err("Failed to create eviction fence\n");
2854		ret = -ENOMEM;
2855		goto validate_map_fail;
2856	}
2857	dma_fence_put(&process_info->eviction_fence->base);
2858	process_info->eviction_fence = new_fence;
2859	*ef = dma_fence_get(&new_fence->base);
2860
2861	/* Attach new eviction fence to all BOs except pinned ones */
2862	list_for_each_entry(mem, &process_info->kfd_bo_list, validate_list) {
2863		if (mem->bo->tbo.pin_count)
2864			continue;
2865
2866		dma_resv_add_fence(mem->bo->tbo.base.resv,
2867				   &process_info->eviction_fence->base,
2868				   DMA_RESV_USAGE_BOOKKEEP);
2869	}
2870	/* Attach eviction fence to PD / PT BOs */
2871	list_for_each_entry(peer_vm, &process_info->vm_list_head,
2872			    vm_list_node) {
2873		struct amdgpu_bo *bo = peer_vm->root.bo;
2874
2875		dma_resv_add_fence(bo->tbo.base.resv,
2876				   &process_info->eviction_fence->base,
2877				   DMA_RESV_USAGE_BOOKKEEP);
2878	}
2879
2880validate_map_fail:
2881	amdgpu_sync_free(&sync_obj);
2882ttm_reserve_fail:
2883	drm_exec_fini(&exec);
2884	mutex_unlock(&process_info->lock);
2885	return ret;
2886}
2887
2888int amdgpu_amdkfd_add_gws_to_process(void *info, void *gws, struct kgd_mem **mem)
2889{
2890	struct amdkfd_process_info *process_info = (struct amdkfd_process_info *)info;
2891	struct amdgpu_bo *gws_bo = (struct amdgpu_bo *)gws;
2892	int ret;
2893
2894	if (!info || !gws)
2895		return -EINVAL;
2896
2897	*mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
2898	if (!*mem)
2899		return -ENOMEM;
2900
2901	rw_init(&(*mem)->lock, "aggws");
2902	INIT_LIST_HEAD(&(*mem)->attachments);
2903	(*mem)->bo = amdgpu_bo_ref(gws_bo);
2904	(*mem)->domain = AMDGPU_GEM_DOMAIN_GWS;
2905	(*mem)->process_info = process_info;
2906	add_kgd_mem_to_kfd_bo_list(*mem, process_info, false);
2907	amdgpu_sync_create(&(*mem)->sync);
2908
2909
2910	/* Validate gws bo the first time it is added to process */
2911	mutex_lock(&(*mem)->process_info->lock);
2912	ret = amdgpu_bo_reserve(gws_bo, false);
2913	if (unlikely(ret)) {
2914		pr_err("Reserve gws bo failed %d\n", ret);
2915		goto bo_reservation_failure;
2916	}
2917
2918	ret = amdgpu_amdkfd_bo_validate(gws_bo, AMDGPU_GEM_DOMAIN_GWS, true);
2919	if (ret) {
2920		pr_err("GWS BO validate failed %d\n", ret);
2921		goto bo_validation_failure;
2922	}
2923	/* GWS resource is shared b/t amdgpu and amdkfd
2924	 * Add process eviction fence to bo so they can
2925	 * evict each other.
2926	 */
2927	ret = dma_resv_reserve_fences(gws_bo->tbo.base.resv, 1);
2928	if (ret)
2929		goto reserve_shared_fail;
2930	dma_resv_add_fence(gws_bo->tbo.base.resv,
2931			   &process_info->eviction_fence->base,
2932			   DMA_RESV_USAGE_BOOKKEEP);
2933	amdgpu_bo_unreserve(gws_bo);
2934	mutex_unlock(&(*mem)->process_info->lock);
2935
2936	return ret;
2937
2938reserve_shared_fail:
2939bo_validation_failure:
2940	amdgpu_bo_unreserve(gws_bo);
2941bo_reservation_failure:
2942	mutex_unlock(&(*mem)->process_info->lock);
2943	amdgpu_sync_free(&(*mem)->sync);
2944	remove_kgd_mem_from_kfd_bo_list(*mem, process_info);
2945	amdgpu_bo_unref(&gws_bo);
2946	mutex_destroy(&(*mem)->lock);
2947	kfree(*mem);
2948	*mem = NULL;
2949	return ret;
2950}
2951
2952int amdgpu_amdkfd_remove_gws_from_process(void *info, void *mem)
2953{
2954	int ret;
2955	struct amdkfd_process_info *process_info = (struct amdkfd_process_info *)info;
2956	struct kgd_mem *kgd_mem = (struct kgd_mem *)mem;
2957	struct amdgpu_bo *gws_bo = kgd_mem->bo;
2958
2959	/* Remove BO from process's validate list so restore worker won't touch
2960	 * it anymore
2961	 */
2962	remove_kgd_mem_from_kfd_bo_list(kgd_mem, process_info);
2963
2964	ret = amdgpu_bo_reserve(gws_bo, false);
2965	if (unlikely(ret)) {
2966		pr_err("Reserve gws bo failed %d\n", ret);
2967		//TODO add BO back to validate_list?
2968		return ret;
2969	}
2970	amdgpu_amdkfd_remove_eviction_fence(gws_bo,
2971			process_info->eviction_fence);
2972	amdgpu_bo_unreserve(gws_bo);
2973	amdgpu_sync_free(&kgd_mem->sync);
2974	amdgpu_bo_unref(&gws_bo);
2975	mutex_destroy(&kgd_mem->lock);
2976	kfree(mem);
2977	return 0;
2978}
2979
2980/* Returns GPU-specific tiling mode information */
2981int amdgpu_amdkfd_get_tile_config(struct amdgpu_device *adev,
2982				struct tile_config *config)
2983{
2984	config->gb_addr_config = adev->gfx.config.gb_addr_config;
2985	config->tile_config_ptr = adev->gfx.config.tile_mode_array;
2986	config->num_tile_configs =
2987			ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2988	config->macro_tile_config_ptr =
2989			adev->gfx.config.macrotile_mode_array;
2990	config->num_macro_tile_configs =
2991			ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2992
2993	/* Those values are not set from GFX9 onwards */
2994	config->num_banks = adev->gfx.config.num_banks;
2995	config->num_ranks = adev->gfx.config.num_ranks;
2996
2997	return 0;
2998}
2999
3000bool amdgpu_amdkfd_bo_mapped_to_dev(struct amdgpu_device *adev, struct kgd_mem *mem)
3001{
3002	struct kfd_mem_attachment *entry;
3003
3004	list_for_each_entry(entry, &mem->attachments, list) {
3005		if (entry->is_mapped && entry->adev == adev)
3006			return true;
3007	}
3008	return false;
3009}
3010
3011#if defined(CONFIG_DEBUG_FS)
3012
3013int kfd_debugfs_kfd_mem_limits(struct seq_file *m, void *data)
3014{
3015
3016	spin_lock(&kfd_mem_limit.mem_limit_lock);
3017	seq_printf(m, "System mem used %lldM out of %lluM\n",
3018		  (kfd_mem_limit.system_mem_used >> 20),
3019		  (kfd_mem_limit.max_system_mem_limit >> 20));
3020	seq_printf(m, "TTM mem used %lldM out of %lluM\n",
3021		  (kfd_mem_limit.ttm_mem_used >> 20),
3022		  (kfd_mem_limit.max_ttm_mem_limit >> 20));
3023	spin_unlock(&kfd_mem_limit.mem_limit_lock);
3024
3025	return 0;
3026}
3027
3028#endif
3029