1/*
2 * Copyright 2017 Red Hat Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22#define NVKM_VMM_LEVELS_MAX 5
23#include "vmm.h"
24
25#include <subdev/fb.h>
26
27static void
28nvkm_vmm_pt_del(struct nvkm_vmm_pt **ppgt)
29{
30	struct nvkm_vmm_pt *pgt = *ppgt;
31	if (pgt) {
32		kvfree(pgt->pde);
33		kfree(pgt);
34		*ppgt = NULL;
35	}
36}
37
38
39static struct nvkm_vmm_pt *
40nvkm_vmm_pt_new(const struct nvkm_vmm_desc *desc, bool sparse,
41		const struct nvkm_vmm_page *page)
42{
43	const u32 pten = 1 << desc->bits;
44	struct nvkm_vmm_pt *pgt;
45	u32 lpte = 0;
46
47	if (desc->type > PGT) {
48		if (desc->type == SPT) {
49			const struct nvkm_vmm_desc *pair = page[-1].desc;
50			lpte = pten >> (desc->bits - pair->bits);
51		} else {
52			lpte = pten;
53		}
54	}
55
56	if (!(pgt = kzalloc(sizeof(*pgt) + lpte, GFP_KERNEL)))
57		return NULL;
58	pgt->page = page ? page->shift : 0;
59	pgt->sparse = sparse;
60
61	if (desc->type == PGD) {
62		pgt->pde = kvcalloc(pten, sizeof(*pgt->pde), GFP_KERNEL);
63		if (!pgt->pde) {
64			kfree(pgt);
65			return NULL;
66		}
67	}
68
69	return pgt;
70}
71
72struct nvkm_vmm_iter {
73	const struct nvkm_vmm_page *page;
74	const struct nvkm_vmm_desc *desc;
75	struct nvkm_vmm *vmm;
76	u64 cnt;
77	u16 max, lvl;
78	u32 pte[NVKM_VMM_LEVELS_MAX];
79	struct nvkm_vmm_pt *pt[NVKM_VMM_LEVELS_MAX];
80	int flush;
81};
82
83#ifdef CONFIG_NOUVEAU_DEBUG_MMU
84static const char *
85nvkm_vmm_desc_type(const struct nvkm_vmm_desc *desc)
86{
87	switch (desc->type) {
88	case PGD: return "PGD";
89	case PGT: return "PGT";
90	case SPT: return "SPT";
91	case LPT: return "LPT";
92	default:
93		return "UNKNOWN";
94	}
95}
96
97static void
98nvkm_vmm_trace(struct nvkm_vmm_iter *it, char *buf)
99{
100	int lvl;
101	for (lvl = it->max; lvl >= 0; lvl--) {
102		if (lvl >= it->lvl)
103			buf += sprintf(buf,  "%05x:", it->pte[lvl]);
104		else
105			buf += sprintf(buf, "xxxxx:");
106	}
107}
108
109#define TRA(i,f,a...) do {                                                     \
110	char _buf[NVKM_VMM_LEVELS_MAX * 7];                                    \
111	struct nvkm_vmm_iter *_it = (i);                                       \
112	nvkm_vmm_trace(_it, _buf);                                             \
113	VMM_TRACE(_it->vmm, "%s "f, _buf, ##a);                                \
114} while(0)
115#else
116#define TRA(i,f,a...)
117#endif
118
119static inline void
120nvkm_vmm_flush_mark(struct nvkm_vmm_iter *it)
121{
122	it->flush = min(it->flush, it->max - it->lvl);
123}
124
125static inline void
126nvkm_vmm_flush(struct nvkm_vmm_iter *it)
127{
128	if (it->flush != NVKM_VMM_LEVELS_MAX) {
129		if (it->vmm->func->flush) {
130			TRA(it, "flush: %d", it->flush);
131			it->vmm->func->flush(it->vmm, it->flush);
132		}
133		it->flush = NVKM_VMM_LEVELS_MAX;
134	}
135}
136
137static void
138nvkm_vmm_unref_pdes(struct nvkm_vmm_iter *it)
139{
140	const struct nvkm_vmm_desc *desc = it->desc;
141	const int type = desc[it->lvl].type == SPT;
142	struct nvkm_vmm_pt *pgd = it->pt[it->lvl + 1];
143	struct nvkm_vmm_pt *pgt = it->pt[it->lvl];
144	struct nvkm_mmu_pt *pt = pgt->pt[type];
145	struct nvkm_vmm *vmm = it->vmm;
146	u32 pdei = it->pte[it->lvl + 1];
147
148	/* Recurse up the tree, unreferencing/destroying unneeded PDs. */
149	it->lvl++;
150	if (--pgd->refs[0]) {
151		const struct nvkm_vmm_desc_func *func = desc[it->lvl].func;
152		/* PD has other valid PDEs, so we need a proper update. */
153		TRA(it, "PDE unmap %s", nvkm_vmm_desc_type(&desc[it->lvl - 1]));
154		pgt->pt[type] = NULL;
155		if (!pgt->refs[!type]) {
156			/* PDE no longer required. */
157			if (pgd->pt[0]) {
158				if (pgt->sparse) {
159					func->sparse(vmm, pgd->pt[0], pdei, 1);
160					pgd->pde[pdei] = NVKM_VMM_PDE_SPARSE;
161				} else {
162					func->unmap(vmm, pgd->pt[0], pdei, 1);
163					pgd->pde[pdei] = NULL;
164				}
165			} else {
166				/* Special handling for Tesla-class GPUs,
167				 * where there's no central PD, but each
168				 * instance has its own embedded PD.
169				 */
170				func->pde(vmm, pgd, pdei);
171				pgd->pde[pdei] = NULL;
172			}
173		} else {
174			/* PDE was pointing at dual-PTs and we're removing
175			 * one of them, leaving the other in place.
176			 */
177			func->pde(vmm, pgd, pdei);
178		}
179
180		/* GPU may have cached the PTs, flush before freeing. */
181		nvkm_vmm_flush_mark(it);
182		nvkm_vmm_flush(it);
183	} else {
184		/* PD has no valid PDEs left, so we can just destroy it. */
185		nvkm_vmm_unref_pdes(it);
186	}
187
188	/* Destroy PD/PT. */
189	TRA(it, "PDE free %s", nvkm_vmm_desc_type(&desc[it->lvl - 1]));
190	nvkm_mmu_ptc_put(vmm->mmu, vmm->bootstrapped, &pt);
191	if (!pgt->refs[!type])
192		nvkm_vmm_pt_del(&pgt);
193	it->lvl--;
194}
195
196static void
197nvkm_vmm_unref_sptes(struct nvkm_vmm_iter *it, struct nvkm_vmm_pt *pgt,
198		     const struct nvkm_vmm_desc *desc, u32 ptei, u32 ptes)
199{
200	const struct nvkm_vmm_desc *pair = it->page[-1].desc;
201	const u32 sptb = desc->bits - pair->bits;
202	const u32 sptn = 1 << sptb;
203	struct nvkm_vmm *vmm = it->vmm;
204	u32 spti = ptei & (sptn - 1), lpti, pteb;
205
206	/* Determine how many SPTEs are being touched under each LPTE,
207	 * and drop reference counts.
208	 */
209	for (lpti = ptei >> sptb; ptes; spti = 0, lpti++) {
210		const u32 pten = min(sptn - spti, ptes);
211		pgt->pte[lpti] -= pten;
212		ptes -= pten;
213	}
214
215	/* We're done here if there's no corresponding LPT. */
216	if (!pgt->refs[0])
217		return;
218
219	for (ptei = pteb = ptei >> sptb; ptei < lpti; pteb = ptei) {
220		/* Skip over any LPTEs that still have valid SPTEs. */
221		if (pgt->pte[pteb] & NVKM_VMM_PTE_SPTES) {
222			for (ptes = 1, ptei++; ptei < lpti; ptes++, ptei++) {
223				if (!(pgt->pte[ptei] & NVKM_VMM_PTE_SPTES))
224					break;
225			}
226			continue;
227		}
228
229		/* As there's no more non-UNMAPPED SPTEs left in the range
230		 * covered by a number of LPTEs, the LPTEs once again take
231		 * control over their address range.
232		 *
233		 * Determine how many LPTEs need to transition state.
234		 */
235		pgt->pte[ptei] &= ~NVKM_VMM_PTE_VALID;
236		for (ptes = 1, ptei++; ptei < lpti; ptes++, ptei++) {
237			if (pgt->pte[ptei] & NVKM_VMM_PTE_SPTES)
238				break;
239			pgt->pte[ptei] &= ~NVKM_VMM_PTE_VALID;
240		}
241
242		if (pgt->pte[pteb] & NVKM_VMM_PTE_SPARSE) {
243			TRA(it, "LPTE %05x: U -> S %d PTEs", pteb, ptes);
244			pair->func->sparse(vmm, pgt->pt[0], pteb, ptes);
245		} else
246		if (pair->func->invalid) {
247			/* If the MMU supports it, restore the LPTE to the
248			 * INVALID state to tell the MMU there is no point
249			 * trying to fetch the corresponding SPTEs.
250			 */
251			TRA(it, "LPTE %05x: U -> I %d PTEs", pteb, ptes);
252			pair->func->invalid(vmm, pgt->pt[0], pteb, ptes);
253		}
254	}
255}
256
257static bool
258nvkm_vmm_unref_ptes(struct nvkm_vmm_iter *it, bool pfn, u32 ptei, u32 ptes)
259{
260	const struct nvkm_vmm_desc *desc = it->desc;
261	const int type = desc->type == SPT;
262	struct nvkm_vmm_pt *pgt = it->pt[0];
263	bool dma;
264
265	if (pfn) {
266		/* Need to clear PTE valid bits before we dma_unmap_page(). */
267		dma = desc->func->pfn_clear(it->vmm, pgt->pt[type], ptei, ptes);
268		if (dma) {
269			/* GPU may have cached the PT, flush before unmap. */
270			nvkm_vmm_flush_mark(it);
271			nvkm_vmm_flush(it);
272			desc->func->pfn_unmap(it->vmm, pgt->pt[type], ptei, ptes);
273		}
274	}
275
276	/* Drop PTE references. */
277	pgt->refs[type] -= ptes;
278
279	/* Dual-PTs need special handling, unless PDE becoming invalid. */
280	if (desc->type == SPT && (pgt->refs[0] || pgt->refs[1]))
281		nvkm_vmm_unref_sptes(it, pgt, desc, ptei, ptes);
282
283	/* PT no longer needed? Destroy it. */
284	if (!pgt->refs[type]) {
285		it->lvl++;
286		TRA(it, "%s empty", nvkm_vmm_desc_type(desc));
287		it->lvl--;
288		nvkm_vmm_unref_pdes(it);
289		return false; /* PTE writes for unmap() not necessary. */
290	}
291
292	return true;
293}
294
295static void
296nvkm_vmm_ref_sptes(struct nvkm_vmm_iter *it, struct nvkm_vmm_pt *pgt,
297		   const struct nvkm_vmm_desc *desc, u32 ptei, u32 ptes)
298{
299	const struct nvkm_vmm_desc *pair = it->page[-1].desc;
300	const u32 sptb = desc->bits - pair->bits;
301	const u32 sptn = 1 << sptb;
302	struct nvkm_vmm *vmm = it->vmm;
303	u32 spti = ptei & (sptn - 1), lpti, pteb;
304
305	/* Determine how many SPTEs are being touched under each LPTE,
306	 * and increase reference counts.
307	 */
308	for (lpti = ptei >> sptb; ptes; spti = 0, lpti++) {
309		const u32 pten = min(sptn - spti, ptes);
310		pgt->pte[lpti] += pten;
311		ptes -= pten;
312	}
313
314	/* We're done here if there's no corresponding LPT. */
315	if (!pgt->refs[0])
316		return;
317
318	for (ptei = pteb = ptei >> sptb; ptei < lpti; pteb = ptei) {
319		/* Skip over any LPTEs that already have valid SPTEs. */
320		if (pgt->pte[pteb] & NVKM_VMM_PTE_VALID) {
321			for (ptes = 1, ptei++; ptei < lpti; ptes++, ptei++) {
322				if (!(pgt->pte[ptei] & NVKM_VMM_PTE_VALID))
323					break;
324			}
325			continue;
326		}
327
328		/* As there are now non-UNMAPPED SPTEs in the range covered
329		 * by a number of LPTEs, we need to transfer control of the
330		 * address range to the SPTEs.
331		 *
332		 * Determine how many LPTEs need to transition state.
333		 */
334		pgt->pte[ptei] |= NVKM_VMM_PTE_VALID;
335		for (ptes = 1, ptei++; ptei < lpti; ptes++, ptei++) {
336			if (pgt->pte[ptei] & NVKM_VMM_PTE_VALID)
337				break;
338			pgt->pte[ptei] |= NVKM_VMM_PTE_VALID;
339		}
340
341		if (pgt->pte[pteb] & NVKM_VMM_PTE_SPARSE) {
342			const u32 spti = pteb * sptn;
343			const u32 sptc = ptes * sptn;
344			/* The entire LPTE is marked as sparse, we need
345			 * to make sure that the SPTEs are too.
346			 */
347			TRA(it, "SPTE %05x: U -> S %d PTEs", spti, sptc);
348			desc->func->sparse(vmm, pgt->pt[1], spti, sptc);
349			/* Sparse LPTEs prevent SPTEs from being accessed. */
350			TRA(it, "LPTE %05x: S -> U %d PTEs", pteb, ptes);
351			pair->func->unmap(vmm, pgt->pt[0], pteb, ptes);
352		} else
353		if (pair->func->invalid) {
354			/* MMU supports blocking SPTEs by marking an LPTE
355			 * as INVALID.  We need to reverse that here.
356			 */
357			TRA(it, "LPTE %05x: I -> U %d PTEs", pteb, ptes);
358			pair->func->unmap(vmm, pgt->pt[0], pteb, ptes);
359		}
360	}
361}
362
363static bool
364nvkm_vmm_ref_ptes(struct nvkm_vmm_iter *it, bool pfn, u32 ptei, u32 ptes)
365{
366	const struct nvkm_vmm_desc *desc = it->desc;
367	const int type = desc->type == SPT;
368	struct nvkm_vmm_pt *pgt = it->pt[0];
369
370	/* Take PTE references. */
371	pgt->refs[type] += ptes;
372
373	/* Dual-PTs need special handling. */
374	if (desc->type == SPT)
375		nvkm_vmm_ref_sptes(it, pgt, desc, ptei, ptes);
376
377	return true;
378}
379
380static void
381nvkm_vmm_sparse_ptes(const struct nvkm_vmm_desc *desc,
382		     struct nvkm_vmm_pt *pgt, u32 ptei, u32 ptes)
383{
384	if (desc->type == PGD) {
385		while (ptes--)
386			pgt->pde[ptei++] = NVKM_VMM_PDE_SPARSE;
387	} else
388	if (desc->type == LPT) {
389		memset(&pgt->pte[ptei], NVKM_VMM_PTE_SPARSE, ptes);
390	}
391}
392
393static bool
394nvkm_vmm_sparse_unref_ptes(struct nvkm_vmm_iter *it, bool pfn, u32 ptei, u32 ptes)
395{
396	struct nvkm_vmm_pt *pt = it->pt[0];
397	if (it->desc->type == PGD)
398		memset(&pt->pde[ptei], 0x00, sizeof(pt->pde[0]) * ptes);
399	else
400	if (it->desc->type == LPT)
401		memset(&pt->pte[ptei], 0x00, sizeof(pt->pte[0]) * ptes);
402	return nvkm_vmm_unref_ptes(it, pfn, ptei, ptes);
403}
404
405static bool
406nvkm_vmm_sparse_ref_ptes(struct nvkm_vmm_iter *it, bool pfn, u32 ptei, u32 ptes)
407{
408	nvkm_vmm_sparse_ptes(it->desc, it->pt[0], ptei, ptes);
409	return nvkm_vmm_ref_ptes(it, pfn, ptei, ptes);
410}
411
412static bool
413nvkm_vmm_ref_hwpt(struct nvkm_vmm_iter *it, struct nvkm_vmm_pt *pgd, u32 pdei)
414{
415	const struct nvkm_vmm_desc *desc = &it->desc[it->lvl - 1];
416	const int type = desc->type == SPT;
417	struct nvkm_vmm_pt *pgt = pgd->pde[pdei];
418	const bool zero = !pgt->sparse && !desc->func->invalid;
419	struct nvkm_vmm *vmm = it->vmm;
420	struct nvkm_mmu *mmu = vmm->mmu;
421	struct nvkm_mmu_pt *pt;
422	u32 pten = 1 << desc->bits;
423	u32 pteb, ptei, ptes;
424	u32 size = desc->size * pten;
425
426	pgd->refs[0]++;
427
428	pgt->pt[type] = nvkm_mmu_ptc_get(mmu, size, desc->align, zero);
429	if (!pgt->pt[type]) {
430		it->lvl--;
431		nvkm_vmm_unref_pdes(it);
432		return false;
433	}
434
435	if (zero)
436		goto done;
437
438	pt = pgt->pt[type];
439
440	if (desc->type == LPT && pgt->refs[1]) {
441		/* SPT already exists covering the same range as this LPT,
442		 * which means we need to be careful that any LPTEs which
443		 * overlap valid SPTEs are unmapped as opposed to invalid
444		 * or sparse, which would prevent the MMU from looking at
445		 * the SPTEs on some GPUs.
446		 */
447		for (ptei = pteb = 0; ptei < pten; pteb = ptei) {
448			bool spte = pgt->pte[ptei] & NVKM_VMM_PTE_SPTES;
449			for (ptes = 1, ptei++; ptei < pten; ptes++, ptei++) {
450				bool next = pgt->pte[ptei] & NVKM_VMM_PTE_SPTES;
451				if (spte != next)
452					break;
453			}
454
455			if (!spte) {
456				if (pgt->sparse)
457					desc->func->sparse(vmm, pt, pteb, ptes);
458				else
459					desc->func->invalid(vmm, pt, pteb, ptes);
460				memset(&pgt->pte[pteb], 0x00, ptes);
461			} else {
462				desc->func->unmap(vmm, pt, pteb, ptes);
463				while (ptes--)
464					pgt->pte[pteb++] |= NVKM_VMM_PTE_VALID;
465			}
466		}
467	} else {
468		if (pgt->sparse) {
469			nvkm_vmm_sparse_ptes(desc, pgt, 0, pten);
470			desc->func->sparse(vmm, pt, 0, pten);
471		} else {
472			desc->func->invalid(vmm, pt, 0, pten);
473		}
474	}
475
476done:
477	TRA(it, "PDE write %s", nvkm_vmm_desc_type(desc));
478	it->desc[it->lvl].func->pde(it->vmm, pgd, pdei);
479	nvkm_vmm_flush_mark(it);
480	return true;
481}
482
483static bool
484nvkm_vmm_ref_swpt(struct nvkm_vmm_iter *it, struct nvkm_vmm_pt *pgd, u32 pdei)
485{
486	const struct nvkm_vmm_desc *desc = &it->desc[it->lvl - 1];
487	struct nvkm_vmm_pt *pgt = pgd->pde[pdei];
488
489	pgt = nvkm_vmm_pt_new(desc, NVKM_VMM_PDE_SPARSED(pgt), it->page);
490	if (!pgt) {
491		if (!pgd->refs[0])
492			nvkm_vmm_unref_pdes(it);
493		return false;
494	}
495
496	pgd->pde[pdei] = pgt;
497	return true;
498}
499
500static inline u64
501nvkm_vmm_iter(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page,
502	      u64 addr, u64 size, const char *name, bool ref, bool pfn,
503	      bool (*REF_PTES)(struct nvkm_vmm_iter *, bool pfn, u32, u32),
504	      nvkm_vmm_pte_func MAP_PTES, struct nvkm_vmm_map *map,
505	      nvkm_vmm_pxe_func CLR_PTES)
506{
507	const struct nvkm_vmm_desc *desc = page->desc;
508	struct nvkm_vmm_iter it;
509	u64 bits = addr >> page->shift;
510
511	it.page = page;
512	it.desc = desc;
513	it.vmm = vmm;
514	it.cnt = size >> page->shift;
515	it.flush = NVKM_VMM_LEVELS_MAX;
516
517	/* Deconstruct address into PTE indices for each mapping level. */
518	for (it.lvl = 0; desc[it.lvl].bits; it.lvl++) {
519		it.pte[it.lvl] = bits & ((1 << desc[it.lvl].bits) - 1);
520		bits >>= desc[it.lvl].bits;
521	}
522	it.max = --it.lvl;
523	it.pt[it.max] = vmm->pd;
524
525	it.lvl = 0;
526	TRA(&it, "%s: %016llx %016llx %d %lld PTEs", name,
527	         addr, size, page->shift, it.cnt);
528	it.lvl = it.max;
529
530	/* Depth-first traversal of page tables. */
531	while (it.cnt) {
532		struct nvkm_vmm_pt *pgt = it.pt[it.lvl];
533		const int type = desc->type == SPT;
534		const u32 pten = 1 << desc->bits;
535		const u32 ptei = it.pte[0];
536		const u32 ptes = min_t(u64, it.cnt, pten - ptei);
537
538		/* Walk down the tree, finding page tables for each level. */
539		for (; it.lvl; it.lvl--) {
540			const u32 pdei = it.pte[it.lvl];
541			struct nvkm_vmm_pt *pgd = pgt;
542
543			/* Software PT. */
544			if (ref && NVKM_VMM_PDE_INVALID(pgd->pde[pdei])) {
545				if (!nvkm_vmm_ref_swpt(&it, pgd, pdei))
546					goto fail;
547			}
548			it.pt[it.lvl - 1] = pgt = pgd->pde[pdei];
549
550			/* Hardware PT.
551			 *
552			 * This is a separate step from above due to GF100 and
553			 * newer having dual page tables at some levels, which
554			 * are refcounted independently.
555			 */
556			if (ref && !pgt->refs[desc[it.lvl - 1].type == SPT]) {
557				if (!nvkm_vmm_ref_hwpt(&it, pgd, pdei))
558					goto fail;
559			}
560		}
561
562		/* Handle PTE updates. */
563		if (!REF_PTES || REF_PTES(&it, pfn, ptei, ptes)) {
564			struct nvkm_mmu_pt *pt = pgt->pt[type];
565			if (MAP_PTES || CLR_PTES) {
566				if (MAP_PTES)
567					MAP_PTES(vmm, pt, ptei, ptes, map);
568				else
569					CLR_PTES(vmm, pt, ptei, ptes);
570				nvkm_vmm_flush_mark(&it);
571			}
572		}
573
574		/* Walk back up the tree to the next position. */
575		it.pte[it.lvl] += ptes;
576		it.cnt -= ptes;
577		if (it.cnt) {
578			while (it.pte[it.lvl] == (1 << desc[it.lvl].bits)) {
579				it.pte[it.lvl++] = 0;
580				it.pte[it.lvl]++;
581			}
582		}
583	}
584
585	nvkm_vmm_flush(&it);
586	return ~0ULL;
587
588fail:
589	/* Reconstruct the failure address so the caller is able to
590	 * reverse any partially completed operations.
591	 */
592	addr = it.pte[it.max--];
593	do {
594		addr  = addr << desc[it.max].bits;
595		addr |= it.pte[it.max];
596	} while (it.max--);
597
598	return addr << page->shift;
599}
600
601static void
602nvkm_vmm_ptes_sparse_put(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page,
603			 u64 addr, u64 size)
604{
605	nvkm_vmm_iter(vmm, page, addr, size, "sparse unref", false, false,
606		      nvkm_vmm_sparse_unref_ptes, NULL, NULL,
607		      page->desc->func->invalid ?
608		      page->desc->func->invalid : page->desc->func->unmap);
609}
610
611static int
612nvkm_vmm_ptes_sparse_get(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page,
613			 u64 addr, u64 size)
614{
615	if ((page->type & NVKM_VMM_PAGE_SPARSE)) {
616		u64 fail = nvkm_vmm_iter(vmm, page, addr, size, "sparse ref",
617					 true, false, nvkm_vmm_sparse_ref_ptes,
618					 NULL, NULL, page->desc->func->sparse);
619		if (fail != ~0ULL) {
620			if ((size = fail - addr))
621				nvkm_vmm_ptes_sparse_put(vmm, page, addr, size);
622			return -ENOMEM;
623		}
624		return 0;
625	}
626	return -EINVAL;
627}
628
629static int
630nvkm_vmm_ptes_sparse(struct nvkm_vmm *vmm, u64 addr, u64 size, bool ref)
631{
632	const struct nvkm_vmm_page *page = vmm->func->page;
633	int m = 0, i;
634	u64 start = addr;
635	u64 block;
636
637	while (size) {
638		/* Limit maximum page size based on remaining size. */
639		while (size < (1ULL << page[m].shift))
640			m++;
641		i = m;
642
643		/* Find largest page size suitable for alignment. */
644		while (!IS_ALIGNED(addr, 1ULL << page[i].shift))
645			i++;
646
647		/* Determine number of PTEs at this page size. */
648		if (i != m) {
649			/* Limited to alignment boundary of next page size. */
650			u64 next = 1ULL << page[i - 1].shift;
651			u64 part = ALIGN(addr, next) - addr;
652			if (size - part >= next)
653				block = (part >> page[i].shift) << page[i].shift;
654			else
655				block = (size >> page[i].shift) << page[i].shift;
656		} else {
657			block = (size >> page[i].shift) << page[i].shift;
658		}
659
660		/* Perform operation. */
661		if (ref) {
662			int ret = nvkm_vmm_ptes_sparse_get(vmm, &page[i], addr, block);
663			if (ret) {
664				if ((size = addr - start))
665					nvkm_vmm_ptes_sparse(vmm, start, size, false);
666				return ret;
667			}
668		} else {
669			nvkm_vmm_ptes_sparse_put(vmm, &page[i], addr, block);
670		}
671
672		size -= block;
673		addr += block;
674	}
675
676	return 0;
677}
678
679static void
680nvkm_vmm_ptes_unmap(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page,
681		    u64 addr, u64 size, bool sparse, bool pfn)
682{
683	const struct nvkm_vmm_desc_func *func = page->desc->func;
684
685	mutex_lock(&vmm->mutex.map);
686	nvkm_vmm_iter(vmm, page, addr, size, "unmap", false, pfn,
687		      NULL, NULL, NULL,
688		      sparse ? func->sparse : func->invalid ? func->invalid :
689							      func->unmap);
690	mutex_unlock(&vmm->mutex.map);
691}
692
693static void
694nvkm_vmm_ptes_map(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page,
695		  u64 addr, u64 size, struct nvkm_vmm_map *map,
696		  nvkm_vmm_pte_func func)
697{
698	mutex_lock(&vmm->mutex.map);
699	nvkm_vmm_iter(vmm, page, addr, size, "map", false, false,
700		      NULL, func, map, NULL);
701	mutex_unlock(&vmm->mutex.map);
702}
703
704static void
705nvkm_vmm_ptes_put_locked(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page,
706			 u64 addr, u64 size)
707{
708	nvkm_vmm_iter(vmm, page, addr, size, "unref", false, false,
709		      nvkm_vmm_unref_ptes, NULL, NULL, NULL);
710}
711
712static void
713nvkm_vmm_ptes_put(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page,
714		  u64 addr, u64 size)
715{
716	mutex_lock(&vmm->mutex.ref);
717	nvkm_vmm_ptes_put_locked(vmm, page, addr, size);
718	mutex_unlock(&vmm->mutex.ref);
719}
720
721static int
722nvkm_vmm_ptes_get(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page,
723		  u64 addr, u64 size)
724{
725	u64 fail;
726
727	mutex_lock(&vmm->mutex.ref);
728	fail = nvkm_vmm_iter(vmm, page, addr, size, "ref", true, false,
729			     nvkm_vmm_ref_ptes, NULL, NULL, NULL);
730	if (fail != ~0ULL) {
731		if (fail != addr)
732			nvkm_vmm_ptes_put_locked(vmm, page, addr, fail - addr);
733		mutex_unlock(&vmm->mutex.ref);
734		return -ENOMEM;
735	}
736	mutex_unlock(&vmm->mutex.ref);
737	return 0;
738}
739
740static void
741__nvkm_vmm_ptes_unmap_put(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page,
742			  u64 addr, u64 size, bool sparse, bool pfn)
743{
744	const struct nvkm_vmm_desc_func *func = page->desc->func;
745
746	nvkm_vmm_iter(vmm, page, addr, size, "unmap + unref",
747		      false, pfn, nvkm_vmm_unref_ptes, NULL, NULL,
748		      sparse ? func->sparse : func->invalid ? func->invalid :
749							      func->unmap);
750}
751
752static void
753nvkm_vmm_ptes_unmap_put(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page,
754			u64 addr, u64 size, bool sparse, bool pfn)
755{
756	if (vmm->managed.raw) {
757		nvkm_vmm_ptes_unmap(vmm, page, addr, size, sparse, pfn);
758		nvkm_vmm_ptes_put(vmm, page, addr, size);
759	} else {
760		__nvkm_vmm_ptes_unmap_put(vmm, page, addr, size, sparse, pfn);
761	}
762}
763
764static int
765__nvkm_vmm_ptes_get_map(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page,
766			u64 addr, u64 size, struct nvkm_vmm_map *map,
767			nvkm_vmm_pte_func func)
768{
769	u64 fail = nvkm_vmm_iter(vmm, page, addr, size, "ref + map", true,
770				 false, nvkm_vmm_ref_ptes, func, map, NULL);
771	if (fail != ~0ULL) {
772		if ((size = fail - addr))
773			nvkm_vmm_ptes_unmap_put(vmm, page, addr, size, false, false);
774		return -ENOMEM;
775	}
776	return 0;
777}
778
779static int
780nvkm_vmm_ptes_get_map(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page,
781		      u64 addr, u64 size, struct nvkm_vmm_map *map,
782		      nvkm_vmm_pte_func func)
783{
784	int ret;
785
786	if (vmm->managed.raw) {
787		ret = nvkm_vmm_ptes_get(vmm, page, addr, size);
788		if (ret)
789			return ret;
790
791		nvkm_vmm_ptes_map(vmm, page, addr, size, map, func);
792
793		return 0;
794	} else {
795		return __nvkm_vmm_ptes_get_map(vmm, page, addr, size, map, func);
796	}
797}
798
799struct nvkm_vma *
800nvkm_vma_new(u64 addr, u64 size)
801{
802	struct nvkm_vma *vma = kzalloc(sizeof(*vma), GFP_KERNEL);
803	if (vma) {
804		vma->addr = addr;
805		vma->size = size;
806		vma->page = NVKM_VMA_PAGE_NONE;
807		vma->refd = NVKM_VMA_PAGE_NONE;
808	}
809	return vma;
810}
811
812struct nvkm_vma *
813nvkm_vma_tail(struct nvkm_vma *vma, u64 tail)
814{
815	struct nvkm_vma *new;
816
817	BUG_ON(vma->size == tail);
818
819	if (!(new = nvkm_vma_new(vma->addr + (vma->size - tail), tail)))
820		return NULL;
821	vma->size -= tail;
822
823	new->mapref = vma->mapref;
824	new->sparse = vma->sparse;
825	new->page = vma->page;
826	new->refd = vma->refd;
827	new->used = vma->used;
828	new->part = vma->part;
829	new->busy = vma->busy;
830	new->mapped = vma->mapped;
831	list_add(&new->head, &vma->head);
832	return new;
833}
834
835static inline void
836nvkm_vmm_free_remove(struct nvkm_vmm *vmm, struct nvkm_vma *vma)
837{
838	rb_erase(&vma->tree, &vmm->free);
839}
840
841static inline void
842nvkm_vmm_free_delete(struct nvkm_vmm *vmm, struct nvkm_vma *vma)
843{
844	nvkm_vmm_free_remove(vmm, vma);
845	list_del(&vma->head);
846	kfree(vma);
847}
848
849static void
850nvkm_vmm_free_insert(struct nvkm_vmm *vmm, struct nvkm_vma *vma)
851{
852	struct rb_node **ptr = &vmm->free.rb_node;
853	struct rb_node *parent = NULL;
854
855	while (*ptr) {
856		struct nvkm_vma *this = rb_entry(*ptr, typeof(*this), tree);
857		parent = *ptr;
858		if (vma->size < this->size)
859			ptr = &parent->rb_left;
860		else
861		if (vma->size > this->size)
862			ptr = &parent->rb_right;
863		else
864		if (vma->addr < this->addr)
865			ptr = &parent->rb_left;
866		else
867		if (vma->addr > this->addr)
868			ptr = &parent->rb_right;
869		else
870			BUG();
871	}
872
873	rb_link_node(&vma->tree, parent, ptr);
874	rb_insert_color(&vma->tree, &vmm->free);
875}
876
877static inline void
878nvkm_vmm_node_remove(struct nvkm_vmm *vmm, struct nvkm_vma *vma)
879{
880	rb_erase(&vma->tree, &vmm->root);
881}
882
883static inline void
884nvkm_vmm_node_delete(struct nvkm_vmm *vmm, struct nvkm_vma *vma)
885{
886	nvkm_vmm_node_remove(vmm, vma);
887	list_del(&vma->head);
888	kfree(vma);
889}
890
891static void
892nvkm_vmm_node_insert(struct nvkm_vmm *vmm, struct nvkm_vma *vma)
893{
894	struct rb_node **ptr = &vmm->root.rb_node;
895	struct rb_node *parent = NULL;
896
897	while (*ptr) {
898		struct nvkm_vma *this = rb_entry(*ptr, typeof(*this), tree);
899		parent = *ptr;
900		if (vma->addr < this->addr)
901			ptr = &parent->rb_left;
902		else
903		if (vma->addr > this->addr)
904			ptr = &parent->rb_right;
905		else
906			BUG();
907	}
908
909	rb_link_node(&vma->tree, parent, ptr);
910	rb_insert_color(&vma->tree, &vmm->root);
911}
912
913struct nvkm_vma *
914nvkm_vmm_node_search(struct nvkm_vmm *vmm, u64 addr)
915{
916	struct rb_node *node = vmm->root.rb_node;
917	while (node) {
918		struct nvkm_vma *vma = rb_entry(node, typeof(*vma), tree);
919		if (addr < vma->addr)
920			node = node->rb_left;
921		else
922		if (addr >= vma->addr + vma->size)
923			node = node->rb_right;
924		else
925			return vma;
926	}
927	return NULL;
928}
929
930#define node(root, dir) (((root)->head.dir == &vmm->list) ? NULL :             \
931	list_entry((root)->head.dir, struct nvkm_vma, head))
932
933static struct nvkm_vma *
934nvkm_vmm_node_merge(struct nvkm_vmm *vmm, struct nvkm_vma *prev,
935		    struct nvkm_vma *vma, struct nvkm_vma *next, u64 size)
936{
937	if (next) {
938		if (vma->size == size) {
939			vma->size += next->size;
940			nvkm_vmm_node_delete(vmm, next);
941			if (prev) {
942				prev->size += vma->size;
943				nvkm_vmm_node_delete(vmm, vma);
944				return prev;
945			}
946			return vma;
947		}
948		BUG_ON(prev);
949
950		nvkm_vmm_node_remove(vmm, next);
951		vma->size -= size;
952		next->addr -= size;
953		next->size += size;
954		nvkm_vmm_node_insert(vmm, next);
955		return next;
956	}
957
958	if (prev) {
959		if (vma->size != size) {
960			nvkm_vmm_node_remove(vmm, vma);
961			prev->size += size;
962			vma->addr += size;
963			vma->size -= size;
964			nvkm_vmm_node_insert(vmm, vma);
965		} else {
966			prev->size += vma->size;
967			nvkm_vmm_node_delete(vmm, vma);
968		}
969		return prev;
970	}
971
972	return vma;
973}
974
975struct nvkm_vma *
976nvkm_vmm_node_split(struct nvkm_vmm *vmm,
977		    struct nvkm_vma *vma, u64 addr, u64 size)
978{
979	struct nvkm_vma *prev = NULL;
980
981	if (vma->addr != addr) {
982		prev = vma;
983		if (!(vma = nvkm_vma_tail(vma, vma->size + vma->addr - addr)))
984			return NULL;
985		vma->part = true;
986		nvkm_vmm_node_insert(vmm, vma);
987	}
988
989	if (vma->size != size) {
990		struct nvkm_vma *tmp;
991		if (!(tmp = nvkm_vma_tail(vma, vma->size - size))) {
992			nvkm_vmm_node_merge(vmm, prev, vma, NULL, vma->size);
993			return NULL;
994		}
995		tmp->part = true;
996		nvkm_vmm_node_insert(vmm, tmp);
997	}
998
999	return vma;
1000}
1001
1002static void
1003nvkm_vma_dump(struct nvkm_vma *vma)
1004{
1005	printk(KERN_ERR "%016llx %016llx %c%c%c%c%c%c%c%c %p\n",
1006	       vma->addr, (u64)vma->size,
1007	       vma->used ? '-' : 'F',
1008	       vma->mapref ? 'R' : '-',
1009	       vma->sparse ? 'S' : '-',
1010	       vma->page != NVKM_VMA_PAGE_NONE ? '0' + vma->page : '-',
1011	       vma->refd != NVKM_VMA_PAGE_NONE ? '0' + vma->refd : '-',
1012	       vma->part ? 'P' : '-',
1013	       vma->busy ? 'B' : '-',
1014	       vma->mapped ? 'M' : '-',
1015	       vma->memory);
1016}
1017
1018static void
1019nvkm_vmm_dump(struct nvkm_vmm *vmm)
1020{
1021	struct nvkm_vma *vma;
1022	list_for_each_entry(vma, &vmm->list, head) {
1023		nvkm_vma_dump(vma);
1024	}
1025}
1026
1027static void
1028nvkm_vmm_dtor(struct nvkm_vmm *vmm)
1029{
1030	struct nvkm_vma *vma;
1031	struct rb_node *node;
1032
1033	if (vmm->rm.client.gsp) {
1034		nvkm_gsp_rm_free(&vmm->rm.object);
1035		nvkm_gsp_device_dtor(&vmm->rm.device);
1036		nvkm_gsp_client_dtor(&vmm->rm.client);
1037		nvkm_vmm_put(vmm, &vmm->rm.rsvd);
1038	}
1039
1040	if (0)
1041		nvkm_vmm_dump(vmm);
1042
1043	while ((node = rb_first(&vmm->root))) {
1044		struct nvkm_vma *vma = rb_entry(node, typeof(*vma), tree);
1045		nvkm_vmm_put(vmm, &vma);
1046	}
1047
1048	if (vmm->bootstrapped) {
1049		const struct nvkm_vmm_page *page = vmm->func->page;
1050		const u64 limit = vmm->limit - vmm->start;
1051
1052		while (page[1].shift)
1053			page++;
1054
1055		nvkm_mmu_ptc_dump(vmm->mmu);
1056		nvkm_vmm_ptes_put(vmm, page, vmm->start, limit);
1057	}
1058
1059	vma = list_first_entry(&vmm->list, typeof(*vma), head);
1060	list_del(&vma->head);
1061	kfree(vma);
1062	WARN_ON(!list_empty(&vmm->list));
1063
1064	if (vmm->nullp) {
1065		dma_free_coherent(vmm->mmu->subdev.device->dev, 16 * 1024,
1066				  vmm->nullp, vmm->null);
1067	}
1068
1069	if (vmm->pd) {
1070		nvkm_mmu_ptc_put(vmm->mmu, true, &vmm->pd->pt[0]);
1071		nvkm_vmm_pt_del(&vmm->pd);
1072	}
1073}
1074
1075static int
1076nvkm_vmm_ctor_managed(struct nvkm_vmm *vmm, u64 addr, u64 size)
1077{
1078	struct nvkm_vma *vma;
1079	if (!(vma = nvkm_vma_new(addr, size)))
1080		return -ENOMEM;
1081	vma->mapref = true;
1082	vma->sparse = false;
1083	vma->used = true;
1084	nvkm_vmm_node_insert(vmm, vma);
1085	list_add_tail(&vma->head, &vmm->list);
1086	return 0;
1087}
1088
1089static int
1090nvkm_vmm_ctor(const struct nvkm_vmm_func *func, struct nvkm_mmu *mmu,
1091	      u32 pd_header, bool managed, u64 addr, u64 size,
1092	      struct lock_class_key *key, const char *name,
1093	      struct nvkm_vmm *vmm)
1094{
1095	static struct lock_class_key _key;
1096	const struct nvkm_vmm_page *page = func->page;
1097	const struct nvkm_vmm_desc *desc;
1098	struct nvkm_vma *vma;
1099	int levels, bits = 0, ret;
1100
1101	vmm->func = func;
1102	vmm->mmu = mmu;
1103	vmm->name = name;
1104	vmm->debug = mmu->subdev.debug;
1105	kref_init(&vmm->kref);
1106
1107	__mutex_init(&vmm->mutex.vmm, "&vmm->mutex.vmm", key ? key : &_key);
1108	mutex_init(&vmm->mutex.ref);
1109	mutex_init(&vmm->mutex.map);
1110
1111	/* Locate the smallest page size supported by the backend, it will
1112	 * have the deepest nesting of page tables.
1113	 */
1114	while (page[1].shift)
1115		page++;
1116
1117	/* Locate the structure that describes the layout of the top-level
1118	 * page table, and determine the number of valid bits in a virtual
1119	 * address.
1120	 */
1121	for (levels = 0, desc = page->desc; desc->bits; desc++, levels++)
1122		bits += desc->bits;
1123	bits += page->shift;
1124	desc--;
1125
1126	if (WARN_ON(levels > NVKM_VMM_LEVELS_MAX))
1127		return -EINVAL;
1128
1129	/* Allocate top-level page table. */
1130	vmm->pd = nvkm_vmm_pt_new(desc, false, NULL);
1131	if (!vmm->pd)
1132		return -ENOMEM;
1133	vmm->pd->refs[0] = 1;
1134	INIT_LIST_HEAD(&vmm->join);
1135
1136	/* ... and the GPU storage for it, except on Tesla-class GPUs that
1137	 * have the PD embedded in the instance structure.
1138	 */
1139	if (desc->size) {
1140		const u32 size = pd_header + desc->size * (1 << desc->bits);
1141		vmm->pd->pt[0] = nvkm_mmu_ptc_get(mmu, size, desc->align, true);
1142		if (!vmm->pd->pt[0])
1143			return -ENOMEM;
1144	}
1145
1146	/* Initialise address-space MM. */
1147	INIT_LIST_HEAD(&vmm->list);
1148	vmm->free = RB_ROOT;
1149	vmm->root = RB_ROOT;
1150
1151	if (managed) {
1152		/* Address-space will be managed by the client for the most
1153		 * part, except for a specified area where NVKM allocations
1154		 * are allowed to be placed.
1155		 */
1156		vmm->start = 0;
1157		vmm->limit = 1ULL << bits;
1158		if (addr + size < addr || addr + size > vmm->limit)
1159			return -EINVAL;
1160
1161		/* Client-managed area before the NVKM-managed area. */
1162		if (addr && (ret = nvkm_vmm_ctor_managed(vmm, 0, addr)))
1163			return ret;
1164
1165		vmm->managed.p.addr = 0;
1166		vmm->managed.p.size = addr;
1167
1168		/* NVKM-managed area. */
1169		if (size) {
1170			if (!(vma = nvkm_vma_new(addr, size)))
1171				return -ENOMEM;
1172			nvkm_vmm_free_insert(vmm, vma);
1173			list_add_tail(&vma->head, &vmm->list);
1174		}
1175
1176		/* Client-managed area after the NVKM-managed area. */
1177		addr = addr + size;
1178		size = vmm->limit - addr;
1179		if (size && (ret = nvkm_vmm_ctor_managed(vmm, addr, size)))
1180			return ret;
1181
1182		vmm->managed.n.addr = addr;
1183		vmm->managed.n.size = size;
1184	} else {
1185		/* Address-space fully managed by NVKM, requiring calls to
1186		 * nvkm_vmm_get()/nvkm_vmm_put() to allocate address-space.
1187		 */
1188		vmm->start = addr;
1189		vmm->limit = size ? (addr + size) : (1ULL << bits);
1190		if (vmm->start > vmm->limit || vmm->limit > (1ULL << bits))
1191			return -EINVAL;
1192
1193		if (!(vma = nvkm_vma_new(vmm->start, vmm->limit - vmm->start)))
1194			return -ENOMEM;
1195
1196		nvkm_vmm_free_insert(vmm, vma);
1197		list_add(&vma->head, &vmm->list);
1198	}
1199
1200	return 0;
1201}
1202
1203int
1204nvkm_vmm_new_(const struct nvkm_vmm_func *func, struct nvkm_mmu *mmu,
1205	      u32 hdr, bool managed, u64 addr, u64 size,
1206	      struct lock_class_key *key, const char *name,
1207	      struct nvkm_vmm **pvmm)
1208{
1209	if (!(*pvmm = kzalloc(sizeof(**pvmm), GFP_KERNEL)))
1210		return -ENOMEM;
1211	return nvkm_vmm_ctor(func, mmu, hdr, managed, addr, size, key, name, *pvmm);
1212}
1213
1214static struct nvkm_vma *
1215nvkm_vmm_pfn_split_merge(struct nvkm_vmm *vmm, struct nvkm_vma *vma,
1216			 u64 addr, u64 size, u8 page, bool map)
1217{
1218	struct nvkm_vma *prev = NULL;
1219	struct nvkm_vma *next = NULL;
1220
1221	if (vma->addr == addr && vma->part && (prev = node(vma, prev))) {
1222		if (prev->memory || prev->mapped != map)
1223			prev = NULL;
1224	}
1225
1226	if (vma->addr + vma->size == addr + size && (next = node(vma, next))) {
1227		if (!next->part ||
1228		    next->memory || next->mapped != map)
1229			next = NULL;
1230	}
1231
1232	if (prev || next)
1233		return nvkm_vmm_node_merge(vmm, prev, vma, next, size);
1234	return nvkm_vmm_node_split(vmm, vma, addr, size);
1235}
1236
1237int
1238nvkm_vmm_pfn_unmap(struct nvkm_vmm *vmm, u64 addr, u64 size)
1239{
1240	struct nvkm_vma *vma = nvkm_vmm_node_search(vmm, addr);
1241	struct nvkm_vma *next;
1242	u64 limit = addr + size;
1243	u64 start = addr;
1244
1245	if (!vma)
1246		return -EINVAL;
1247
1248	do {
1249		if (!vma->mapped || vma->memory)
1250			continue;
1251
1252		size = min(limit - start, vma->size - (start - vma->addr));
1253
1254		nvkm_vmm_ptes_unmap_put(vmm, &vmm->func->page[vma->refd],
1255					start, size, false, true);
1256
1257		next = nvkm_vmm_pfn_split_merge(vmm, vma, start, size, 0, false);
1258		if (!WARN_ON(!next)) {
1259			vma = next;
1260			vma->refd = NVKM_VMA_PAGE_NONE;
1261			vma->mapped = false;
1262		}
1263	} while ((vma = node(vma, next)) && (start = vma->addr) < limit);
1264
1265	return 0;
1266}
1267
1268/*TODO:
1269 * - Avoid PT readback (for dma_unmap etc), this might end up being dealt
1270 *   with inside HMM, which would be a lot nicer for us to deal with.
1271 * - Support for systems without a 4KiB page size.
1272 */
1273int
1274nvkm_vmm_pfn_map(struct nvkm_vmm *vmm, u8 shift, u64 addr, u64 size, u64 *pfn)
1275{
1276	const struct nvkm_vmm_page *page = vmm->func->page;
1277	struct nvkm_vma *vma, *tmp;
1278	u64 limit = addr + size;
1279	u64 start = addr;
1280	int pm = size >> shift;
1281	int pi = 0;
1282
1283	/* Only support mapping where the page size of the incoming page
1284	 * array matches a page size available for direct mapping.
1285	 */
1286	while (page->shift && (page->shift != shift ||
1287	       page->desc->func->pfn == NULL))
1288		page++;
1289
1290	if (!page->shift || !IS_ALIGNED(addr, 1ULL << shift) ||
1291			    !IS_ALIGNED(size, 1ULL << shift) ||
1292	    addr + size < addr || addr + size > vmm->limit) {
1293		VMM_DEBUG(vmm, "paged map %d %d %016llx %016llx\n",
1294			  shift, page->shift, addr, size);
1295		return -EINVAL;
1296	}
1297
1298	if (!(vma = nvkm_vmm_node_search(vmm, addr)))
1299		return -ENOENT;
1300
1301	do {
1302		bool map = !!(pfn[pi] & NVKM_VMM_PFN_V);
1303		bool mapped = vma->mapped;
1304		u64 size = limit - start;
1305		u64 addr = start;
1306		int pn, ret = 0;
1307
1308		/* Narrow the operation window to cover a single action (page
1309		 * should be mapped or not) within a single VMA.
1310		 */
1311		for (pn = 0; pi + pn < pm; pn++) {
1312			if (map != !!(pfn[pi + pn] & NVKM_VMM_PFN_V))
1313				break;
1314		}
1315		size = min_t(u64, size, pn << page->shift);
1316		size = min_t(u64, size, vma->size + vma->addr - addr);
1317
1318		/* Reject any operation to unmanaged regions, and areas that
1319		 * have nvkm_memory objects mapped in them already.
1320		 */
1321		if (!vma->mapref || vma->memory) {
1322			ret = -EINVAL;
1323			goto next;
1324		}
1325
1326		/* In order to both properly refcount GPU page tables, and
1327		 * prevent "normal" mappings and these direct mappings from
1328		 * interfering with each other, we need to track contiguous
1329		 * ranges that have been mapped with this interface.
1330		 *
1331		 * Here we attempt to either split an existing VMA so we're
1332		 * able to flag the region as either unmapped/mapped, or to
1333		 * merge with adjacent VMAs that are already compatible.
1334		 *
1335		 * If the region is already compatible, nothing is required.
1336		 */
1337		if (map != mapped) {
1338			tmp = nvkm_vmm_pfn_split_merge(vmm, vma, addr, size,
1339						       page -
1340						       vmm->func->page, map);
1341			if (WARN_ON(!tmp)) {
1342				ret = -ENOMEM;
1343				goto next;
1344			}
1345
1346			if ((tmp->mapped = map))
1347				tmp->refd = page - vmm->func->page;
1348			else
1349				tmp->refd = NVKM_VMA_PAGE_NONE;
1350			vma = tmp;
1351		}
1352
1353		/* Update HW page tables. */
1354		if (map) {
1355			struct nvkm_vmm_map args;
1356			args.page = page;
1357			args.pfn = &pfn[pi];
1358
1359			if (!mapped) {
1360				ret = nvkm_vmm_ptes_get_map(vmm, page, addr,
1361							    size, &args, page->
1362							    desc->func->pfn);
1363			} else {
1364				nvkm_vmm_ptes_map(vmm, page, addr, size, &args,
1365						  page->desc->func->pfn);
1366			}
1367		} else {
1368			if (mapped) {
1369				nvkm_vmm_ptes_unmap_put(vmm, page, addr, size,
1370							false, true);
1371			}
1372		}
1373
1374next:
1375		/* Iterate to next operation. */
1376		if (vma->addr + vma->size == addr + size)
1377			vma = node(vma, next);
1378		start += size;
1379
1380		if (ret) {
1381			/* Failure is signalled by clearing the valid bit on
1382			 * any PFN that couldn't be modified as requested.
1383			 */
1384			while (size) {
1385				pfn[pi++] = NVKM_VMM_PFN_NONE;
1386				size -= 1 << page->shift;
1387			}
1388		} else {
1389			pi += size >> page->shift;
1390		}
1391	} while (vma && start < limit);
1392
1393	return 0;
1394}
1395
1396void
1397nvkm_vmm_unmap_region(struct nvkm_vmm *vmm, struct nvkm_vma *vma)
1398{
1399	struct nvkm_vma *prev = NULL;
1400	struct nvkm_vma *next;
1401
1402	nvkm_memory_tags_put(vma->memory, vmm->mmu->subdev.device, &vma->tags);
1403	nvkm_memory_unref(&vma->memory);
1404	vma->mapped = false;
1405
1406	if (vma->part && (prev = node(vma, prev)) && prev->mapped)
1407		prev = NULL;
1408	if ((next = node(vma, next)) && (!next->part || next->mapped))
1409		next = NULL;
1410	nvkm_vmm_node_merge(vmm, prev, vma, next, vma->size);
1411}
1412
1413void
1414nvkm_vmm_unmap_locked(struct nvkm_vmm *vmm, struct nvkm_vma *vma, bool pfn)
1415{
1416	const struct nvkm_vmm_page *page = &vmm->func->page[vma->refd];
1417
1418	if (vma->mapref) {
1419		nvkm_vmm_ptes_unmap_put(vmm, page, vma->addr, vma->size, vma->sparse, pfn);
1420		vma->refd = NVKM_VMA_PAGE_NONE;
1421	} else {
1422		nvkm_vmm_ptes_unmap(vmm, page, vma->addr, vma->size, vma->sparse, pfn);
1423	}
1424
1425	nvkm_vmm_unmap_region(vmm, vma);
1426}
1427
1428void
1429nvkm_vmm_unmap(struct nvkm_vmm *vmm, struct nvkm_vma *vma)
1430{
1431	if (vma->memory) {
1432		mutex_lock(&vmm->mutex.vmm);
1433		nvkm_vmm_unmap_locked(vmm, vma, false);
1434		mutex_unlock(&vmm->mutex.vmm);
1435	}
1436}
1437
1438static int
1439nvkm_vmm_map_valid(struct nvkm_vmm *vmm, struct nvkm_vma *vma,
1440		   void *argv, u32 argc, struct nvkm_vmm_map *map)
1441{
1442	switch (nvkm_memory_target(map->memory)) {
1443	case NVKM_MEM_TARGET_VRAM:
1444		if (!(map->page->type & NVKM_VMM_PAGE_VRAM)) {
1445			VMM_DEBUG(vmm, "%d !VRAM", map->page->shift);
1446			return -EINVAL;
1447		}
1448		break;
1449	case NVKM_MEM_TARGET_HOST:
1450	case NVKM_MEM_TARGET_NCOH:
1451		if (!(map->page->type & NVKM_VMM_PAGE_HOST)) {
1452			VMM_DEBUG(vmm, "%d !HOST", map->page->shift);
1453			return -EINVAL;
1454		}
1455		break;
1456	default:
1457		WARN_ON(1);
1458		return -ENOSYS;
1459	}
1460
1461	if (!IS_ALIGNED(     vma->addr, 1ULL << map->page->shift) ||
1462	    !IS_ALIGNED((u64)vma->size, 1ULL << map->page->shift) ||
1463	    !IS_ALIGNED(   map->offset, 1ULL << map->page->shift) ||
1464	    nvkm_memory_page(map->memory) < map->page->shift) {
1465		VMM_DEBUG(vmm, "alignment %016llx %016llx %016llx %d %d",
1466		    vma->addr, (u64)vma->size, map->offset, map->page->shift,
1467		    nvkm_memory_page(map->memory));
1468		return -EINVAL;
1469	}
1470
1471	return vmm->func->valid(vmm, argv, argc, map);
1472}
1473
1474static int
1475nvkm_vmm_map_choose(struct nvkm_vmm *vmm, struct nvkm_vma *vma,
1476		    void *argv, u32 argc, struct nvkm_vmm_map *map)
1477{
1478	for (map->page = vmm->func->page; map->page->shift; map->page++) {
1479		VMM_DEBUG(vmm, "trying %d", map->page->shift);
1480		if (!nvkm_vmm_map_valid(vmm, vma, argv, argc, map))
1481			return 0;
1482	}
1483	return -EINVAL;
1484}
1485
1486static int
1487nvkm_vmm_map_locked(struct nvkm_vmm *vmm, struct nvkm_vma *vma,
1488		    void *argv, u32 argc, struct nvkm_vmm_map *map)
1489{
1490	nvkm_vmm_pte_func func;
1491	int ret;
1492
1493	map->no_comp = vma->no_comp;
1494
1495	/* Make sure we won't overrun the end of the memory object. */
1496	if (unlikely(nvkm_memory_size(map->memory) < map->offset + vma->size)) {
1497		VMM_DEBUG(vmm, "overrun %016llx %016llx %016llx",
1498			  nvkm_memory_size(map->memory),
1499			  map->offset, (u64)vma->size);
1500		return -EINVAL;
1501	}
1502
1503	/* Check remaining arguments for validity. */
1504	if (vma->page == NVKM_VMA_PAGE_NONE &&
1505	    vma->refd == NVKM_VMA_PAGE_NONE) {
1506		/* Find the largest page size we can perform the mapping at. */
1507		const u32 debug = vmm->debug;
1508		vmm->debug = 0;
1509		ret = nvkm_vmm_map_choose(vmm, vma, argv, argc, map);
1510		vmm->debug = debug;
1511		if (ret) {
1512			VMM_DEBUG(vmm, "invalid at any page size");
1513			nvkm_vmm_map_choose(vmm, vma, argv, argc, map);
1514			return -EINVAL;
1515		}
1516	} else {
1517		/* Page size of the VMA is already pre-determined. */
1518		if (vma->refd != NVKM_VMA_PAGE_NONE)
1519			map->page = &vmm->func->page[vma->refd];
1520		else
1521			map->page = &vmm->func->page[vma->page];
1522
1523		ret = nvkm_vmm_map_valid(vmm, vma, argv, argc, map);
1524		if (ret) {
1525			VMM_DEBUG(vmm, "invalid %d\n", ret);
1526			return ret;
1527		}
1528	}
1529
1530	/* Deal with the 'offset' argument, and fetch the backend function. */
1531	map->off = map->offset;
1532	if (map->mem) {
1533		for (; map->off; map->mem = map->mem->next) {
1534			u64 size = (u64)map->mem->length << NVKM_RAM_MM_SHIFT;
1535			if (size > map->off)
1536				break;
1537			map->off -= size;
1538		}
1539		func = map->page->desc->func->mem;
1540	} else
1541	if (map->sgl) {
1542		for (; map->off; map->sgl = sg_next(map->sgl)) {
1543			u64 size = sg_dma_len(map->sgl);
1544			if (size > map->off)
1545				break;
1546			map->off -= size;
1547		}
1548		func = map->page->desc->func->sgl;
1549	} else {
1550		map->dma += map->offset >> PAGE_SHIFT;
1551		map->off  = map->offset & PAGE_MASK;
1552		func = map->page->desc->func->dma;
1553	}
1554
1555	/* Perform the map. */
1556	if (vma->refd == NVKM_VMA_PAGE_NONE) {
1557		ret = nvkm_vmm_ptes_get_map(vmm, map->page, vma->addr, vma->size, map, func);
1558		if (ret)
1559			return ret;
1560
1561		vma->refd = map->page - vmm->func->page;
1562	} else {
1563		nvkm_vmm_ptes_map(vmm, map->page, vma->addr, vma->size, map, func);
1564	}
1565
1566	nvkm_memory_tags_put(vma->memory, vmm->mmu->subdev.device, &vma->tags);
1567	nvkm_memory_unref(&vma->memory);
1568	vma->memory = nvkm_memory_ref(map->memory);
1569	vma->mapped = true;
1570	vma->tags = map->tags;
1571	return 0;
1572}
1573
1574int
1575nvkm_vmm_map(struct nvkm_vmm *vmm, struct nvkm_vma *vma, void *argv, u32 argc,
1576	     struct nvkm_vmm_map *map)
1577{
1578	int ret;
1579
1580	if (nvkm_vmm_in_managed_range(vmm, vma->addr, vma->size) &&
1581	    vmm->managed.raw)
1582		return nvkm_vmm_map_locked(vmm, vma, argv, argc, map);
1583
1584	mutex_lock(&vmm->mutex.vmm);
1585	ret = nvkm_vmm_map_locked(vmm, vma, argv, argc, map);
1586	vma->busy = false;
1587	mutex_unlock(&vmm->mutex.vmm);
1588	return ret;
1589}
1590
1591static void
1592nvkm_vmm_put_region(struct nvkm_vmm *vmm, struct nvkm_vma *vma)
1593{
1594	struct nvkm_vma *prev, *next;
1595
1596	if ((prev = node(vma, prev)) && !prev->used) {
1597		vma->addr  = prev->addr;
1598		vma->size += prev->size;
1599		nvkm_vmm_free_delete(vmm, prev);
1600	}
1601
1602	if ((next = node(vma, next)) && !next->used) {
1603		vma->size += next->size;
1604		nvkm_vmm_free_delete(vmm, next);
1605	}
1606
1607	nvkm_vmm_free_insert(vmm, vma);
1608}
1609
1610void
1611nvkm_vmm_put_locked(struct nvkm_vmm *vmm, struct nvkm_vma *vma)
1612{
1613	const struct nvkm_vmm_page *page = vmm->func->page;
1614	struct nvkm_vma *next = vma;
1615
1616	BUG_ON(vma->part);
1617
1618	if (vma->mapref || !vma->sparse) {
1619		do {
1620			const bool mem = next->memory != NULL;
1621			const bool map = next->mapped;
1622			const u8  refd = next->refd;
1623			const u64 addr = next->addr;
1624			u64 size = next->size;
1625
1626			/* Merge regions that are in the same state. */
1627			while ((next = node(next, next)) && next->part &&
1628			       (next->mapped == map) &&
1629			       (next->memory != NULL) == mem &&
1630			       (next->refd == refd))
1631				size += next->size;
1632
1633			if (map) {
1634				/* Region(s) are mapped, merge the unmap
1635				 * and dereference into a single walk of
1636				 * the page tree.
1637				 */
1638				nvkm_vmm_ptes_unmap_put(vmm, &page[refd], addr,
1639							size, vma->sparse,
1640							!mem);
1641			} else
1642			if (refd != NVKM_VMA_PAGE_NONE) {
1643				/* Drop allocation-time PTE references. */
1644				nvkm_vmm_ptes_put(vmm, &page[refd], addr, size);
1645			}
1646		} while (next && next->part);
1647	}
1648
1649	/* Merge any mapped regions that were split from the initial
1650	 * address-space allocation back into the allocated VMA, and
1651	 * release memory/compression resources.
1652	 */
1653	next = vma;
1654	do {
1655		if (next->mapped)
1656			nvkm_vmm_unmap_region(vmm, next);
1657	} while ((next = node(vma, next)) && next->part);
1658
1659	if (vma->sparse && !vma->mapref) {
1660		/* Sparse region that was allocated with a fixed page size,
1661		 * meaning all relevant PTEs were referenced once when the
1662		 * region was allocated, and remained that way, regardless
1663		 * of whether memory was mapped into it afterwards.
1664		 *
1665		 * The process of unmapping, unsparsing, and dereferencing
1666		 * PTEs can be done in a single page tree walk.
1667		 */
1668		nvkm_vmm_ptes_sparse_put(vmm, &page[vma->refd], vma->addr, vma->size);
1669	} else
1670	if (vma->sparse) {
1671		/* Sparse region that wasn't allocated with a fixed page size,
1672		 * PTE references were taken both at allocation time (to make
1673		 * the GPU see the region as sparse), and when mapping memory
1674		 * into the region.
1675		 *
1676		 * The latter was handled above, and the remaining references
1677		 * are dealt with here.
1678		 */
1679		nvkm_vmm_ptes_sparse(vmm, vma->addr, vma->size, false);
1680	}
1681
1682	/* Remove VMA from the list of allocated nodes. */
1683	nvkm_vmm_node_remove(vmm, vma);
1684
1685	/* Merge VMA back into the free list. */
1686	vma->page = NVKM_VMA_PAGE_NONE;
1687	vma->refd = NVKM_VMA_PAGE_NONE;
1688	vma->used = false;
1689	nvkm_vmm_put_region(vmm, vma);
1690}
1691
1692void
1693nvkm_vmm_put(struct nvkm_vmm *vmm, struct nvkm_vma **pvma)
1694{
1695	struct nvkm_vma *vma = *pvma;
1696	if (vma) {
1697		mutex_lock(&vmm->mutex.vmm);
1698		nvkm_vmm_put_locked(vmm, vma);
1699		mutex_unlock(&vmm->mutex.vmm);
1700		*pvma = NULL;
1701	}
1702}
1703
1704int
1705nvkm_vmm_get_locked(struct nvkm_vmm *vmm, bool getref, bool mapref, bool sparse,
1706		    u8 shift, u8 align, u64 size, struct nvkm_vma **pvma)
1707{
1708	const struct nvkm_vmm_page *page = &vmm->func->page[NVKM_VMA_PAGE_NONE];
1709	struct rb_node *node = NULL, *temp;
1710	struct nvkm_vma *vma = NULL, *tmp;
1711	u64 addr, tail;
1712	int ret;
1713
1714	VMM_TRACE(vmm, "getref %d mapref %d sparse %d "
1715		       "shift: %d align: %d size: %016llx",
1716		  getref, mapref, sparse, shift, align, size);
1717
1718	/* Zero-sized, or lazily-allocated sparse VMAs, make no sense. */
1719	if (unlikely(!size || (!getref && !mapref && sparse))) {
1720		VMM_DEBUG(vmm, "args %016llx %d %d %d",
1721			  size, getref, mapref, sparse);
1722		return -EINVAL;
1723	}
1724
1725	/* Tesla-class GPUs can only select page size per-PDE, which means
1726	 * we're required to know the mapping granularity up-front to find
1727	 * a suitable region of address-space.
1728	 *
1729	 * The same goes if we're requesting up-front allocation of PTES.
1730	 */
1731	if (unlikely((getref || vmm->func->page_block) && !shift)) {
1732		VMM_DEBUG(vmm, "page size required: %d %016llx",
1733			  getref, vmm->func->page_block);
1734		return -EINVAL;
1735	}
1736
1737	/* If a specific page size was requested, determine its index and
1738	 * make sure the requested size is a multiple of the page size.
1739	 */
1740	if (shift) {
1741		for (page = vmm->func->page; page->shift; page++) {
1742			if (shift == page->shift)
1743				break;
1744		}
1745
1746		if (!page->shift || !IS_ALIGNED(size, 1ULL << page->shift)) {
1747			VMM_DEBUG(vmm, "page %d %016llx", shift, size);
1748			return -EINVAL;
1749		}
1750		align = max_t(u8, align, shift);
1751	} else {
1752		align = max_t(u8, align, 12);
1753	}
1754
1755	/* Locate smallest block that can possibly satisfy the allocation. */
1756	temp = vmm->free.rb_node;
1757	while (temp) {
1758		struct nvkm_vma *this = rb_entry(temp, typeof(*this), tree);
1759		if (this->size < size) {
1760			temp = temp->rb_right;
1761		} else {
1762			node = temp;
1763			temp = temp->rb_left;
1764		}
1765	}
1766
1767	if (unlikely(!node))
1768		return -ENOSPC;
1769
1770	/* Take into account alignment restrictions, trying larger blocks
1771	 * in turn until we find a suitable free block.
1772	 */
1773	do {
1774		struct nvkm_vma *this = rb_entry(node, typeof(*this), tree);
1775		struct nvkm_vma *prev = node(this, prev);
1776		struct nvkm_vma *next = node(this, next);
1777		const int p = page - vmm->func->page;
1778
1779		addr = this->addr;
1780		if (vmm->func->page_block && prev && prev->page != p)
1781			addr = ALIGN(addr, vmm->func->page_block);
1782		addr = ALIGN(addr, 1ULL << align);
1783
1784		tail = this->addr + this->size;
1785		if (vmm->func->page_block && next && next->page != p)
1786			tail = ALIGN_DOWN(tail, vmm->func->page_block);
1787
1788		if (addr <= tail && tail - addr >= size) {
1789			nvkm_vmm_free_remove(vmm, this);
1790			vma = this;
1791			break;
1792		}
1793	} while ((node = rb_next(node)));
1794
1795	if (unlikely(!vma))
1796		return -ENOSPC;
1797
1798	/* If the VMA we found isn't already exactly the requested size,
1799	 * it needs to be split, and the remaining free blocks returned.
1800	 */
1801	if (addr != vma->addr) {
1802		if (!(tmp = nvkm_vma_tail(vma, vma->size + vma->addr - addr))) {
1803			nvkm_vmm_put_region(vmm, vma);
1804			return -ENOMEM;
1805		}
1806		nvkm_vmm_free_insert(vmm, vma);
1807		vma = tmp;
1808	}
1809
1810	if (size != vma->size) {
1811		if (!(tmp = nvkm_vma_tail(vma, vma->size - size))) {
1812			nvkm_vmm_put_region(vmm, vma);
1813			return -ENOMEM;
1814		}
1815		nvkm_vmm_free_insert(vmm, tmp);
1816	}
1817
1818	/* Pre-allocate page tables and/or setup sparse mappings. */
1819	if (sparse && getref)
1820		ret = nvkm_vmm_ptes_sparse_get(vmm, page, vma->addr, vma->size);
1821	else if (sparse)
1822		ret = nvkm_vmm_ptes_sparse(vmm, vma->addr, vma->size, true);
1823	else if (getref)
1824		ret = nvkm_vmm_ptes_get(vmm, page, vma->addr, vma->size);
1825	else
1826		ret = 0;
1827	if (ret) {
1828		nvkm_vmm_put_region(vmm, vma);
1829		return ret;
1830	}
1831
1832	vma->mapref = mapref && !getref;
1833	vma->sparse = sparse;
1834	vma->page = page - vmm->func->page;
1835	vma->refd = getref ? vma->page : NVKM_VMA_PAGE_NONE;
1836	vma->used = true;
1837	nvkm_vmm_node_insert(vmm, vma);
1838	*pvma = vma;
1839	return 0;
1840}
1841
1842int
1843nvkm_vmm_get(struct nvkm_vmm *vmm, u8 page, u64 size, struct nvkm_vma **pvma)
1844{
1845	int ret;
1846	mutex_lock(&vmm->mutex.vmm);
1847	ret = nvkm_vmm_get_locked(vmm, false, true, false, page, 0, size, pvma);
1848	mutex_unlock(&vmm->mutex.vmm);
1849	return ret;
1850}
1851
1852void
1853nvkm_vmm_raw_unmap(struct nvkm_vmm *vmm, u64 addr, u64 size,
1854		   bool sparse, u8 refd)
1855{
1856	const struct nvkm_vmm_page *page = &vmm->func->page[refd];
1857
1858	nvkm_vmm_ptes_unmap(vmm, page, addr, size, sparse, false);
1859}
1860
1861void
1862nvkm_vmm_raw_put(struct nvkm_vmm *vmm, u64 addr, u64 size, u8 refd)
1863{
1864	const struct nvkm_vmm_page *page = vmm->func->page;
1865
1866	nvkm_vmm_ptes_put(vmm, &page[refd], addr, size);
1867}
1868
1869int
1870nvkm_vmm_raw_get(struct nvkm_vmm *vmm, u64 addr, u64 size, u8 refd)
1871{
1872	const struct nvkm_vmm_page *page = vmm->func->page;
1873
1874	if (unlikely(!size))
1875		return -EINVAL;
1876
1877	return nvkm_vmm_ptes_get(vmm, &page[refd], addr, size);
1878}
1879
1880int
1881nvkm_vmm_raw_sparse(struct nvkm_vmm *vmm, u64 addr, u64 size, bool ref)
1882{
1883	int ret;
1884
1885	mutex_lock(&vmm->mutex.ref);
1886	ret = nvkm_vmm_ptes_sparse(vmm, addr, size, ref);
1887	mutex_unlock(&vmm->mutex.ref);
1888
1889	return ret;
1890}
1891
1892void
1893nvkm_vmm_part(struct nvkm_vmm *vmm, struct nvkm_memory *inst)
1894{
1895	if (inst && vmm && vmm->func->part) {
1896		mutex_lock(&vmm->mutex.vmm);
1897		vmm->func->part(vmm, inst);
1898		mutex_unlock(&vmm->mutex.vmm);
1899	}
1900}
1901
1902int
1903nvkm_vmm_join(struct nvkm_vmm *vmm, struct nvkm_memory *inst)
1904{
1905	int ret = 0;
1906	if (vmm->func->join) {
1907		mutex_lock(&vmm->mutex.vmm);
1908		ret = vmm->func->join(vmm, inst);
1909		mutex_unlock(&vmm->mutex.vmm);
1910	}
1911	return ret;
1912}
1913
1914static bool
1915nvkm_vmm_boot_ptes(struct nvkm_vmm_iter *it, bool pfn, u32 ptei, u32 ptes)
1916{
1917	const struct nvkm_vmm_desc *desc = it->desc;
1918	const int type = desc->type == SPT;
1919	nvkm_memory_boot(it->pt[0]->pt[type]->memory, it->vmm);
1920	return false;
1921}
1922
1923int
1924nvkm_vmm_boot(struct nvkm_vmm *vmm)
1925{
1926	const struct nvkm_vmm_page *page = vmm->func->page;
1927	const u64 limit = vmm->limit - vmm->start;
1928	int ret;
1929
1930	while (page[1].shift)
1931		page++;
1932
1933	ret = nvkm_vmm_ptes_get(vmm, page, vmm->start, limit);
1934	if (ret)
1935		return ret;
1936
1937	nvkm_vmm_iter(vmm, page, vmm->start, limit, "bootstrap", false, false,
1938		      nvkm_vmm_boot_ptes, NULL, NULL, NULL);
1939	vmm->bootstrapped = true;
1940	return 0;
1941}
1942
1943static void
1944nvkm_vmm_del(struct kref *kref)
1945{
1946	struct nvkm_vmm *vmm = container_of(kref, typeof(*vmm), kref);
1947	nvkm_vmm_dtor(vmm);
1948	kfree(vmm);
1949}
1950
1951void
1952nvkm_vmm_unref(struct nvkm_vmm **pvmm)
1953{
1954	struct nvkm_vmm *vmm = *pvmm;
1955	if (vmm) {
1956		kref_put(&vmm->kref, nvkm_vmm_del);
1957		*pvmm = NULL;
1958	}
1959}
1960
1961struct nvkm_vmm *
1962nvkm_vmm_ref(struct nvkm_vmm *vmm)
1963{
1964	if (vmm)
1965		kref_get(&vmm->kref);
1966	return vmm;
1967}
1968
1969int
1970nvkm_vmm_new(struct nvkm_device *device, u64 addr, u64 size, void *argv,
1971	     u32 argc, struct lock_class_key *key, const char *name,
1972	     struct nvkm_vmm **pvmm)
1973{
1974	struct nvkm_mmu *mmu = device->mmu;
1975	struct nvkm_vmm *vmm = NULL;
1976	int ret;
1977	ret = mmu->func->vmm.ctor(mmu, false, addr, size, argv, argc,
1978				  key, name, &vmm);
1979	if (ret)
1980		nvkm_vmm_unref(&vmm);
1981	*pvmm = vmm;
1982	return ret;
1983}
1984