1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * CPU-agnostic AMD IO page table v2 allocator.
4 *
5 * Copyright (C) 2022, 2023 Advanced Micro Devices, Inc.
6 * Author: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
7 * Author: Vasant Hegde <vasant.hegde@amd.com>
8 */
9
10#define pr_fmt(fmt)	"AMD-Vi: " fmt
11#define dev_fmt(fmt)	pr_fmt(fmt)
12
13#include <linux/bitops.h>
14#include <linux/io-pgtable.h>
15#include <linux/kernel.h>
16
17#include <asm/barrier.h>
18
19#include "amd_iommu_types.h"
20#include "amd_iommu.h"
21
22#define IOMMU_PAGE_PRESENT	BIT_ULL(0)	/* Is present */
23#define IOMMU_PAGE_RW		BIT_ULL(1)	/* Writeable */
24#define IOMMU_PAGE_USER		BIT_ULL(2)	/* Userspace addressable */
25#define IOMMU_PAGE_PWT		BIT_ULL(3)	/* Page write through */
26#define IOMMU_PAGE_PCD		BIT_ULL(4)	/* Page cache disabled */
27#define IOMMU_PAGE_ACCESS	BIT_ULL(5)	/* Was accessed (updated by IOMMU) */
28#define IOMMU_PAGE_DIRTY	BIT_ULL(6)	/* Was written to (updated by IOMMU) */
29#define IOMMU_PAGE_PSE		BIT_ULL(7)	/* Page Size Extensions */
30#define IOMMU_PAGE_NX		BIT_ULL(63)	/* No execute */
31
32#define MAX_PTRS_PER_PAGE	512
33
34#define IOMMU_PAGE_SIZE_2M	BIT_ULL(21)
35#define IOMMU_PAGE_SIZE_1G	BIT_ULL(30)
36
37
38static inline int get_pgtable_level(void)
39{
40	return amd_iommu_gpt_level;
41}
42
43static inline bool is_large_pte(u64 pte)
44{
45	return (pte & IOMMU_PAGE_PSE);
46}
47
48static inline u64 set_pgtable_attr(u64 *page)
49{
50	u64 prot;
51
52	prot = IOMMU_PAGE_PRESENT | IOMMU_PAGE_RW | IOMMU_PAGE_USER;
53	prot |= IOMMU_PAGE_ACCESS | IOMMU_PAGE_DIRTY;
54
55	return (iommu_virt_to_phys(page) | prot);
56}
57
58static inline void *get_pgtable_pte(u64 pte)
59{
60	return iommu_phys_to_virt(pte & PM_ADDR_MASK);
61}
62
63static u64 set_pte_attr(u64 paddr, u64 pg_size, int prot)
64{
65	u64 pte;
66
67	pte = __sme_set(paddr & PM_ADDR_MASK);
68	pte |= IOMMU_PAGE_PRESENT | IOMMU_PAGE_USER;
69	pte |= IOMMU_PAGE_ACCESS | IOMMU_PAGE_DIRTY;
70
71	if (prot & IOMMU_PROT_IW)
72		pte |= IOMMU_PAGE_RW;
73
74	/* Large page */
75	if (pg_size == IOMMU_PAGE_SIZE_1G || pg_size == IOMMU_PAGE_SIZE_2M)
76		pte |= IOMMU_PAGE_PSE;
77
78	return pte;
79}
80
81static inline u64 get_alloc_page_size(u64 size)
82{
83	if (size >= IOMMU_PAGE_SIZE_1G)
84		return IOMMU_PAGE_SIZE_1G;
85
86	if (size >= IOMMU_PAGE_SIZE_2M)
87		return IOMMU_PAGE_SIZE_2M;
88
89	return PAGE_SIZE;
90}
91
92static inline int page_size_to_level(u64 pg_size)
93{
94	if (pg_size == IOMMU_PAGE_SIZE_1G)
95		return PAGE_MODE_3_LEVEL;
96	if (pg_size == IOMMU_PAGE_SIZE_2M)
97		return PAGE_MODE_2_LEVEL;
98
99	return PAGE_MODE_1_LEVEL;
100}
101
102static inline void free_pgtable_page(u64 *pt)
103{
104	free_page((unsigned long)pt);
105}
106
107static void free_pgtable(u64 *pt, int level)
108{
109	u64 *p;
110	int i;
111
112	for (i = 0; i < MAX_PTRS_PER_PAGE; i++) {
113		/* PTE present? */
114		if (!IOMMU_PTE_PRESENT(pt[i]))
115			continue;
116
117		if (is_large_pte(pt[i]))
118			continue;
119
120		/*
121		 * Free the next level. No need to look at l1 tables here since
122		 * they can only contain leaf PTEs; just free them directly.
123		 */
124		p = get_pgtable_pte(pt[i]);
125		if (level > 2)
126			free_pgtable(p, level - 1);
127		else
128			free_pgtable_page(p);
129	}
130
131	free_pgtable_page(pt);
132}
133
134/* Allocate page table */
135static u64 *v2_alloc_pte(int nid, u64 *pgd, unsigned long iova,
136			 unsigned long pg_size, gfp_t gfp, bool *updated)
137{
138	u64 *pte, *page;
139	int level, end_level;
140
141	level = get_pgtable_level() - 1;
142	end_level = page_size_to_level(pg_size);
143	pte = &pgd[PM_LEVEL_INDEX(level, iova)];
144	iova = PAGE_SIZE_ALIGN(iova, PAGE_SIZE);
145
146	while (level >= end_level) {
147		u64 __pte, __npte;
148
149		__pte = *pte;
150
151		if (IOMMU_PTE_PRESENT(__pte) && is_large_pte(__pte)) {
152			/* Unmap large pte */
153			cmpxchg64(pte, *pte, 0ULL);
154			*updated = true;
155			continue;
156		}
157
158		if (!IOMMU_PTE_PRESENT(__pte)) {
159			page = alloc_pgtable_page(nid, gfp);
160			if (!page)
161				return NULL;
162
163			__npte = set_pgtable_attr(page);
164			/* pte could have been changed somewhere. */
165			if (cmpxchg64(pte, __pte, __npte) != __pte)
166				free_pgtable_page(page);
167			else if (IOMMU_PTE_PRESENT(__pte))
168				*updated = true;
169
170			continue;
171		}
172
173		level -= 1;
174		pte = get_pgtable_pte(__pte);
175		pte = &pte[PM_LEVEL_INDEX(level, iova)];
176	}
177
178	/* Tear down existing pte entries */
179	if (IOMMU_PTE_PRESENT(*pte)) {
180		u64 *__pte;
181
182		*updated = true;
183		__pte = get_pgtable_pte(*pte);
184		cmpxchg64(pte, *pte, 0ULL);
185		if (pg_size == IOMMU_PAGE_SIZE_1G)
186			free_pgtable(__pte, end_level - 1);
187		else if (pg_size == IOMMU_PAGE_SIZE_2M)
188			free_pgtable_page(__pte);
189	}
190
191	return pte;
192}
193
194/*
195 * This function checks if there is a PTE for a given dma address.
196 * If there is one, it returns the pointer to it.
197 */
198static u64 *fetch_pte(struct amd_io_pgtable *pgtable,
199		      unsigned long iova, unsigned long *page_size)
200{
201	u64 *pte;
202	int level;
203
204	level = get_pgtable_level() - 1;
205	pte = &pgtable->pgd[PM_LEVEL_INDEX(level, iova)];
206	/* Default page size is 4K */
207	*page_size = PAGE_SIZE;
208
209	while (level) {
210		/* Not present */
211		if (!IOMMU_PTE_PRESENT(*pte))
212			return NULL;
213
214		/* Walk to the next level */
215		pte = get_pgtable_pte(*pte);
216		pte = &pte[PM_LEVEL_INDEX(level - 1, iova)];
217
218		/* Large page */
219		if (is_large_pte(*pte)) {
220			if (level == PAGE_MODE_3_LEVEL)
221				*page_size = IOMMU_PAGE_SIZE_1G;
222			else if (level == PAGE_MODE_2_LEVEL)
223				*page_size = IOMMU_PAGE_SIZE_2M;
224			else
225				return NULL;	/* Wrongly set PSE bit in PTE */
226
227			break;
228		}
229
230		level -= 1;
231	}
232
233	return pte;
234}
235
236static int iommu_v2_map_pages(struct io_pgtable_ops *ops, unsigned long iova,
237			      phys_addr_t paddr, size_t pgsize, size_t pgcount,
238			      int prot, gfp_t gfp, size_t *mapped)
239{
240	struct protection_domain *pdom = io_pgtable_ops_to_domain(ops);
241	struct io_pgtable_cfg *cfg = &pdom->iop.iop.cfg;
242	u64 *pte;
243	unsigned long map_size;
244	unsigned long mapped_size = 0;
245	unsigned long o_iova = iova;
246	size_t size = pgcount << __ffs(pgsize);
247	int ret = 0;
248	bool updated = false;
249
250	if (WARN_ON(!pgsize || (pgsize & cfg->pgsize_bitmap) != pgsize) || !pgcount)
251		return -EINVAL;
252
253	if (!(prot & IOMMU_PROT_MASK))
254		return -EINVAL;
255
256	while (mapped_size < size) {
257		map_size = get_alloc_page_size(pgsize);
258		pte = v2_alloc_pte(pdom->nid, pdom->iop.pgd,
259				   iova, map_size, gfp, &updated);
260		if (!pte) {
261			ret = -EINVAL;
262			goto out;
263		}
264
265		*pte = set_pte_attr(paddr, map_size, prot);
266
267		iova += map_size;
268		paddr += map_size;
269		mapped_size += map_size;
270	}
271
272out:
273	if (updated)
274		amd_iommu_domain_flush_pages(pdom, o_iova, size);
275
276	if (mapped)
277		*mapped += mapped_size;
278
279	return ret;
280}
281
282static unsigned long iommu_v2_unmap_pages(struct io_pgtable_ops *ops,
283					  unsigned long iova,
284					  size_t pgsize, size_t pgcount,
285					  struct iommu_iotlb_gather *gather)
286{
287	struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops);
288	struct io_pgtable_cfg *cfg = &pgtable->iop.cfg;
289	unsigned long unmap_size;
290	unsigned long unmapped = 0;
291	size_t size = pgcount << __ffs(pgsize);
292	u64 *pte;
293
294	if (WARN_ON(!pgsize || (pgsize & cfg->pgsize_bitmap) != pgsize || !pgcount))
295		return 0;
296
297	while (unmapped < size) {
298		pte = fetch_pte(pgtable, iova, &unmap_size);
299		if (!pte)
300			return unmapped;
301
302		*pte = 0ULL;
303
304		iova = (iova & ~(unmap_size - 1)) + unmap_size;
305		unmapped += unmap_size;
306	}
307
308	return unmapped;
309}
310
311static phys_addr_t iommu_v2_iova_to_phys(struct io_pgtable_ops *ops, unsigned long iova)
312{
313	struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops);
314	unsigned long offset_mask, pte_pgsize;
315	u64 *pte, __pte;
316
317	pte = fetch_pte(pgtable, iova, &pte_pgsize);
318	if (!pte || !IOMMU_PTE_PRESENT(*pte))
319		return 0;
320
321	offset_mask = pte_pgsize - 1;
322	__pte = __sme_clr(*pte & PM_ADDR_MASK);
323
324	return (__pte & ~offset_mask) | (iova & offset_mask);
325}
326
327/*
328 * ----------------------------------------------------
329 */
330static void v2_tlb_flush_all(void *cookie)
331{
332}
333
334static void v2_tlb_flush_walk(unsigned long iova, size_t size,
335			      size_t granule, void *cookie)
336{
337}
338
339static void v2_tlb_add_page(struct iommu_iotlb_gather *gather,
340			    unsigned long iova, size_t granule,
341			    void *cookie)
342{
343}
344
345static const struct iommu_flush_ops v2_flush_ops = {
346	.tlb_flush_all	= v2_tlb_flush_all,
347	.tlb_flush_walk = v2_tlb_flush_walk,
348	.tlb_add_page	= v2_tlb_add_page,
349};
350
351static void v2_free_pgtable(struct io_pgtable *iop)
352{
353	struct amd_io_pgtable *pgtable = container_of(iop, struct amd_io_pgtable, iop);
354
355	if (!pgtable || !pgtable->pgd)
356		return;
357
358	/* Free page table */
359	free_pgtable(pgtable->pgd, get_pgtable_level());
360	pgtable->pgd = NULL;
361}
362
363static struct io_pgtable *v2_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie)
364{
365	struct amd_io_pgtable *pgtable = io_pgtable_cfg_to_data(cfg);
366	struct protection_domain *pdom = (struct protection_domain *)cookie;
367	int ias = IOMMU_IN_ADDR_BIT_SIZE;
368
369	pgtable->pgd = alloc_pgtable_page(pdom->nid, GFP_ATOMIC);
370	if (!pgtable->pgd)
371		return NULL;
372
373	if (get_pgtable_level() == PAGE_MODE_5_LEVEL)
374		ias = 57;
375
376	pgtable->iop.ops.map_pages    = iommu_v2_map_pages;
377	pgtable->iop.ops.unmap_pages  = iommu_v2_unmap_pages;
378	pgtable->iop.ops.iova_to_phys = iommu_v2_iova_to_phys;
379
380	cfg->pgsize_bitmap = AMD_IOMMU_PGSIZES_V2,
381	cfg->ias           = ias,
382	cfg->oas           = IOMMU_OUT_ADDR_BIT_SIZE,
383	cfg->tlb           = &v2_flush_ops;
384
385	return &pgtable->iop;
386}
387
388struct io_pgtable_init_fns io_pgtable_amd_iommu_v2_init_fns = {
389	.alloc	= v2_alloc_pgtable,
390	.free	= v2_free_pgtable,
391};
392