1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Copyright IBM Corp. 2011
4 * Author(s): Jan Glauber <jang@linux.vnet.ibm.com>
5 */
6#include <linux/hugetlb.h>
7#include <linux/proc_fs.h>
8#include <linux/vmalloc.h>
9#include <linux/mm.h>
10#include <asm/cacheflush.h>
11#include <asm/facility.h>
12#include <asm/pgalloc.h>
13#include <asm/kfence.h>
14#include <asm/page.h>
15#include <asm/set_memory.h>
16
17static inline unsigned long sske_frame(unsigned long addr, unsigned char skey)
18{
19	asm volatile(".insn rrf,0xb22b0000,%[skey],%[addr],1,0"
20		     : [addr] "+a" (addr) : [skey] "d" (skey));
21	return addr;
22}
23
24void __storage_key_init_range(unsigned long start, unsigned long end)
25{
26	unsigned long boundary, size;
27
28	while (start < end) {
29		if (MACHINE_HAS_EDAT1) {
30			/* set storage keys for a 1MB frame */
31			size = 1UL << 20;
32			boundary = (start + size) & ~(size - 1);
33			if (boundary <= end) {
34				do {
35					start = sske_frame(start, PAGE_DEFAULT_KEY);
36				} while (start < boundary);
37				continue;
38			}
39		}
40		page_set_storage_key(start, PAGE_DEFAULT_KEY, 1);
41		start += PAGE_SIZE;
42	}
43}
44
45#ifdef CONFIG_PROC_FS
46atomic_long_t __bootdata_preserved(direct_pages_count[PG_DIRECT_MAP_MAX]);
47
48void arch_report_meminfo(struct seq_file *m)
49{
50	seq_printf(m, "DirectMap4k:    %8lu kB\n",
51		   atomic_long_read(&direct_pages_count[PG_DIRECT_MAP_4K]) << 2);
52	seq_printf(m, "DirectMap1M:    %8lu kB\n",
53		   atomic_long_read(&direct_pages_count[PG_DIRECT_MAP_1M]) << 10);
54	seq_printf(m, "DirectMap2G:    %8lu kB\n",
55		   atomic_long_read(&direct_pages_count[PG_DIRECT_MAP_2G]) << 21);
56}
57#endif /* CONFIG_PROC_FS */
58
59static void pgt_set(unsigned long *old, unsigned long new, unsigned long addr,
60		    unsigned long dtt)
61{
62	unsigned long *table, mask;
63
64	mask = 0;
65	if (MACHINE_HAS_EDAT2) {
66		switch (dtt) {
67		case CRDTE_DTT_REGION3:
68			mask = ~(PTRS_PER_PUD * sizeof(pud_t) - 1);
69			break;
70		case CRDTE_DTT_SEGMENT:
71			mask = ~(PTRS_PER_PMD * sizeof(pmd_t) - 1);
72			break;
73		case CRDTE_DTT_PAGE:
74			mask = ~(PTRS_PER_PTE * sizeof(pte_t) - 1);
75			break;
76		}
77		table = (unsigned long *)((unsigned long)old & mask);
78		crdte(*old, new, table, dtt, addr, S390_lowcore.kernel_asce.val);
79	} else if (MACHINE_HAS_IDTE) {
80		cspg(old, *old, new);
81	} else {
82		csp((unsigned int *)old + 1, *old, new);
83	}
84}
85
86static int walk_pte_level(pmd_t *pmdp, unsigned long addr, unsigned long end,
87			  unsigned long flags)
88{
89	pte_t *ptep, new;
90
91	if (flags == SET_MEMORY_4K)
92		return 0;
93	ptep = pte_offset_kernel(pmdp, addr);
94	do {
95		new = *ptep;
96		if (pte_none(new))
97			return -EINVAL;
98		if (flags & SET_MEMORY_RO)
99			new = pte_wrprotect(new);
100		else if (flags & SET_MEMORY_RW)
101			new = pte_mkwrite_novma(pte_mkdirty(new));
102		if (flags & SET_MEMORY_NX)
103			new = set_pte_bit(new, __pgprot(_PAGE_NOEXEC));
104		else if (flags & SET_MEMORY_X)
105			new = clear_pte_bit(new, __pgprot(_PAGE_NOEXEC));
106		if (flags & SET_MEMORY_INV) {
107			new = set_pte_bit(new, __pgprot(_PAGE_INVALID));
108		} else if (flags & SET_MEMORY_DEF) {
109			new = __pte(pte_val(new) & PAGE_MASK);
110			new = set_pte_bit(new, PAGE_KERNEL);
111			if (!MACHINE_HAS_NX)
112				new = clear_pte_bit(new, __pgprot(_PAGE_NOEXEC));
113		}
114		pgt_set((unsigned long *)ptep, pte_val(new), addr, CRDTE_DTT_PAGE);
115		ptep++;
116		addr += PAGE_SIZE;
117		cond_resched();
118	} while (addr < end);
119	return 0;
120}
121
122static int split_pmd_page(pmd_t *pmdp, unsigned long addr)
123{
124	unsigned long pte_addr, prot;
125	pte_t *pt_dir, *ptep;
126	pmd_t new;
127	int i, ro, nx;
128
129	pt_dir = vmem_pte_alloc();
130	if (!pt_dir)
131		return -ENOMEM;
132	pte_addr = pmd_pfn(*pmdp) << PAGE_SHIFT;
133	ro = !!(pmd_val(*pmdp) & _SEGMENT_ENTRY_PROTECT);
134	nx = !!(pmd_val(*pmdp) & _SEGMENT_ENTRY_NOEXEC);
135	prot = pgprot_val(ro ? PAGE_KERNEL_RO : PAGE_KERNEL);
136	if (!nx)
137		prot &= ~_PAGE_NOEXEC;
138	ptep = pt_dir;
139	for (i = 0; i < PTRS_PER_PTE; i++) {
140		set_pte(ptep, __pte(pte_addr | prot));
141		pte_addr += PAGE_SIZE;
142		ptep++;
143	}
144	new = __pmd(__pa(pt_dir) | _SEGMENT_ENTRY);
145	pgt_set((unsigned long *)pmdp, pmd_val(new), addr, CRDTE_DTT_SEGMENT);
146	update_page_count(PG_DIRECT_MAP_4K, PTRS_PER_PTE);
147	update_page_count(PG_DIRECT_MAP_1M, -1);
148	return 0;
149}
150
151static void modify_pmd_page(pmd_t *pmdp, unsigned long addr,
152			    unsigned long flags)
153{
154	pmd_t new = *pmdp;
155
156	if (flags & SET_MEMORY_RO)
157		new = pmd_wrprotect(new);
158	else if (flags & SET_MEMORY_RW)
159		new = pmd_mkwrite_novma(pmd_mkdirty(new));
160	if (flags & SET_MEMORY_NX)
161		new = set_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_NOEXEC));
162	else if (flags & SET_MEMORY_X)
163		new = clear_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_NOEXEC));
164	if (flags & SET_MEMORY_INV) {
165		new = set_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_INVALID));
166	} else if (flags & SET_MEMORY_DEF) {
167		new = __pmd(pmd_val(new) & PMD_MASK);
168		new = set_pmd_bit(new, SEGMENT_KERNEL);
169		if (!MACHINE_HAS_NX)
170			new = clear_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_NOEXEC));
171	}
172	pgt_set((unsigned long *)pmdp, pmd_val(new), addr, CRDTE_DTT_SEGMENT);
173}
174
175static int walk_pmd_level(pud_t *pudp, unsigned long addr, unsigned long end,
176			  unsigned long flags)
177{
178	unsigned long next;
179	int need_split;
180	pmd_t *pmdp;
181	int rc = 0;
182
183	pmdp = pmd_offset(pudp, addr);
184	do {
185		if (pmd_none(*pmdp))
186			return -EINVAL;
187		next = pmd_addr_end(addr, end);
188		if (pmd_leaf(*pmdp)) {
189			need_split  = !!(flags & SET_MEMORY_4K);
190			need_split |= !!(addr & ~PMD_MASK);
191			need_split |= !!(addr + PMD_SIZE > next);
192			if (need_split) {
193				rc = split_pmd_page(pmdp, addr);
194				if (rc)
195					return rc;
196				continue;
197			}
198			modify_pmd_page(pmdp, addr, flags);
199		} else {
200			rc = walk_pte_level(pmdp, addr, next, flags);
201			if (rc)
202				return rc;
203		}
204		pmdp++;
205		addr = next;
206		cond_resched();
207	} while (addr < end);
208	return rc;
209}
210
211static int split_pud_page(pud_t *pudp, unsigned long addr)
212{
213	unsigned long pmd_addr, prot;
214	pmd_t *pm_dir, *pmdp;
215	pud_t new;
216	int i, ro, nx;
217
218	pm_dir = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY);
219	if (!pm_dir)
220		return -ENOMEM;
221	pmd_addr = pud_pfn(*pudp) << PAGE_SHIFT;
222	ro = !!(pud_val(*pudp) & _REGION_ENTRY_PROTECT);
223	nx = !!(pud_val(*pudp) & _REGION_ENTRY_NOEXEC);
224	prot = pgprot_val(ro ? SEGMENT_KERNEL_RO : SEGMENT_KERNEL);
225	if (!nx)
226		prot &= ~_SEGMENT_ENTRY_NOEXEC;
227	pmdp = pm_dir;
228	for (i = 0; i < PTRS_PER_PMD; i++) {
229		set_pmd(pmdp, __pmd(pmd_addr | prot));
230		pmd_addr += PMD_SIZE;
231		pmdp++;
232	}
233	new = __pud(__pa(pm_dir) | _REGION3_ENTRY);
234	pgt_set((unsigned long *)pudp, pud_val(new), addr, CRDTE_DTT_REGION3);
235	update_page_count(PG_DIRECT_MAP_1M, PTRS_PER_PMD);
236	update_page_count(PG_DIRECT_MAP_2G, -1);
237	return 0;
238}
239
240static void modify_pud_page(pud_t *pudp, unsigned long addr,
241			    unsigned long flags)
242{
243	pud_t new = *pudp;
244
245	if (flags & SET_MEMORY_RO)
246		new = pud_wrprotect(new);
247	else if (flags & SET_MEMORY_RW)
248		new = pud_mkwrite(pud_mkdirty(new));
249	if (flags & SET_MEMORY_NX)
250		new = set_pud_bit(new, __pgprot(_REGION_ENTRY_NOEXEC));
251	else if (flags & SET_MEMORY_X)
252		new = clear_pud_bit(new, __pgprot(_REGION_ENTRY_NOEXEC));
253	if (flags & SET_MEMORY_INV) {
254		new = set_pud_bit(new, __pgprot(_REGION_ENTRY_INVALID));
255	} else if (flags & SET_MEMORY_DEF) {
256		new = __pud(pud_val(new) & PUD_MASK);
257		new = set_pud_bit(new, REGION3_KERNEL);
258		if (!MACHINE_HAS_NX)
259			new = clear_pud_bit(new, __pgprot(_REGION_ENTRY_NOEXEC));
260	}
261	pgt_set((unsigned long *)pudp, pud_val(new), addr, CRDTE_DTT_REGION3);
262}
263
264static int walk_pud_level(p4d_t *p4d, unsigned long addr, unsigned long end,
265			  unsigned long flags)
266{
267	unsigned long next;
268	int need_split;
269	pud_t *pudp;
270	int rc = 0;
271
272	pudp = pud_offset(p4d, addr);
273	do {
274		if (pud_none(*pudp))
275			return -EINVAL;
276		next = pud_addr_end(addr, end);
277		if (pud_leaf(*pudp)) {
278			need_split  = !!(flags & SET_MEMORY_4K);
279			need_split |= !!(addr & ~PUD_MASK);
280			need_split |= !!(addr + PUD_SIZE > next);
281			if (need_split) {
282				rc = split_pud_page(pudp, addr);
283				if (rc)
284					break;
285				continue;
286			}
287			modify_pud_page(pudp, addr, flags);
288		} else {
289			rc = walk_pmd_level(pudp, addr, next, flags);
290		}
291		pudp++;
292		addr = next;
293		cond_resched();
294	} while (addr < end && !rc);
295	return rc;
296}
297
298static int walk_p4d_level(pgd_t *pgd, unsigned long addr, unsigned long end,
299			  unsigned long flags)
300{
301	unsigned long next;
302	p4d_t *p4dp;
303	int rc = 0;
304
305	p4dp = p4d_offset(pgd, addr);
306	do {
307		if (p4d_none(*p4dp))
308			return -EINVAL;
309		next = p4d_addr_end(addr, end);
310		rc = walk_pud_level(p4dp, addr, next, flags);
311		p4dp++;
312		addr = next;
313		cond_resched();
314	} while (addr < end && !rc);
315	return rc;
316}
317
318DEFINE_MUTEX(cpa_mutex);
319
320static int change_page_attr(unsigned long addr, unsigned long end,
321			    unsigned long flags)
322{
323	unsigned long next;
324	int rc = -EINVAL;
325	pgd_t *pgdp;
326
327	pgdp = pgd_offset_k(addr);
328	do {
329		if (pgd_none(*pgdp))
330			break;
331		next = pgd_addr_end(addr, end);
332		rc = walk_p4d_level(pgdp, addr, next, flags);
333		if (rc)
334			break;
335		cond_resched();
336	} while (pgdp++, addr = next, addr < end && !rc);
337	return rc;
338}
339
340static int change_page_attr_alias(unsigned long addr, unsigned long end,
341				  unsigned long flags)
342{
343	unsigned long alias, offset, va_start, va_end;
344	struct vm_struct *area;
345	int rc = 0;
346
347	/*
348	 * Changes to read-only permissions on kernel VA mappings are also
349	 * applied to the kernel direct mapping. Execute permissions are
350	 * intentionally not transferred to keep all allocated pages within
351	 * the direct mapping non-executable.
352	 */
353	flags &= SET_MEMORY_RO | SET_MEMORY_RW;
354	if (!flags)
355		return 0;
356	area = NULL;
357	while (addr < end) {
358		if (!area)
359			area = find_vm_area((void *)addr);
360		if (!area || !(area->flags & VM_ALLOC))
361			return 0;
362		va_start = (unsigned long)area->addr;
363		va_end = va_start + area->nr_pages * PAGE_SIZE;
364		offset = (addr - va_start) >> PAGE_SHIFT;
365		alias = (unsigned long)page_address(area->pages[offset]);
366		rc = change_page_attr(alias, alias + PAGE_SIZE, flags);
367		if (rc)
368			break;
369		addr += PAGE_SIZE;
370		if (addr >= va_end)
371			area = NULL;
372	}
373	return rc;
374}
375
376int __set_memory(unsigned long addr, unsigned long numpages, unsigned long flags)
377{
378	unsigned long end;
379	int rc;
380
381	if (!MACHINE_HAS_NX)
382		flags &= ~(SET_MEMORY_NX | SET_MEMORY_X);
383	if (!flags)
384		return 0;
385	if (!numpages)
386		return 0;
387	addr &= PAGE_MASK;
388	end = addr + numpages * PAGE_SIZE;
389	mutex_lock(&cpa_mutex);
390	rc = change_page_attr(addr, end, flags);
391	if (rc)
392		goto out;
393	rc = change_page_attr_alias(addr, end, flags);
394out:
395	mutex_unlock(&cpa_mutex);
396	return rc;
397}
398
399int set_direct_map_invalid_noflush(struct page *page)
400{
401	return __set_memory((unsigned long)page_to_virt(page), 1, SET_MEMORY_INV);
402}
403
404int set_direct_map_default_noflush(struct page *page)
405{
406	return __set_memory((unsigned long)page_to_virt(page), 1, SET_MEMORY_DEF);
407}
408
409#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KFENCE)
410
411static void ipte_range(pte_t *pte, unsigned long address, int nr)
412{
413	int i;
414
415	if (test_facility(13)) {
416		__ptep_ipte_range(address, nr - 1, pte, IPTE_GLOBAL);
417		return;
418	}
419	for (i = 0; i < nr; i++) {
420		__ptep_ipte(address, pte, 0, 0, IPTE_GLOBAL);
421		address += PAGE_SIZE;
422		pte++;
423	}
424}
425
426void __kernel_map_pages(struct page *page, int numpages, int enable)
427{
428	unsigned long address;
429	pte_t *ptep, pte;
430	int nr, i, j;
431
432	for (i = 0; i < numpages;) {
433		address = (unsigned long)page_to_virt(page + i);
434		ptep = virt_to_kpte(address);
435		nr = (unsigned long)ptep >> ilog2(sizeof(long));
436		nr = PTRS_PER_PTE - (nr & (PTRS_PER_PTE - 1));
437		nr = min(numpages - i, nr);
438		if (enable) {
439			for (j = 0; j < nr; j++) {
440				pte = clear_pte_bit(*ptep, __pgprot(_PAGE_INVALID));
441				set_pte(ptep, pte);
442				address += PAGE_SIZE;
443				ptep++;
444			}
445		} else {
446			ipte_range(ptep, address, nr);
447		}
448		i += nr;
449	}
450}
451
452#endif /* CONFIG_DEBUG_PAGEALLOC */
453