1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * arch/arm64/mm/hugetlbpage.c
4 *
5 * Copyright (C) 2013 Linaro Ltd.
6 *
7 * Based on arch/x86/mm/hugetlbpage.c.
8 */
9
10#include <linux/init.h>
11#include <linux/fs.h>
12#include <linux/mm.h>
13#include <linux/hugetlb.h>
14#include <linux/pagemap.h>
15#include <linux/err.h>
16#include <linux/sysctl.h>
17#include <asm/mman.h>
18#include <asm/tlb.h>
19#include <asm/tlbflush.h>
20
21/*
22 * HugeTLB Support Matrix
23 *
24 * ---------------------------------------------------
25 * | Page Size | CONT PTE |  PMD  | CONT PMD |  PUD  |
26 * ---------------------------------------------------
27 * |     4K    |   64K    |   2M  |    32M   |   1G  |
28 * |    16K    |    2M    |  32M  |     1G   |       |
29 * |    64K    |    2M    | 512M  |    16G   |       |
30 * ---------------------------------------------------
31 */
32
33/*
34 * Reserve CMA areas for the largest supported gigantic
35 * huge page when requested. Any other smaller gigantic
36 * huge pages could still be served from those areas.
37 */
38#ifdef CONFIG_CMA
39void __init arm64_hugetlb_cma_reserve(void)
40{
41	int order;
42
43	if (pud_sect_supported())
44		order = PUD_SHIFT - PAGE_SHIFT;
45	else
46		order = CONT_PMD_SHIFT - PAGE_SHIFT;
47
48	hugetlb_cma_reserve(order);
49}
50#endif /* CONFIG_CMA */
51
52static bool __hugetlb_valid_size(unsigned long size)
53{
54	switch (size) {
55#ifndef __PAGETABLE_PMD_FOLDED
56	case PUD_SIZE:
57		return pud_sect_supported();
58#endif
59	case CONT_PMD_SIZE:
60	case PMD_SIZE:
61	case CONT_PTE_SIZE:
62		return true;
63	}
64
65	return false;
66}
67
68#ifdef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION
69bool arch_hugetlb_migration_supported(struct hstate *h)
70{
71	size_t pagesize = huge_page_size(h);
72
73	if (!__hugetlb_valid_size(pagesize)) {
74		pr_warn("%s: unrecognized huge page size 0x%lx\n",
75			__func__, pagesize);
76		return false;
77	}
78	return true;
79}
80#endif
81
82int pmd_huge(pmd_t pmd)
83{
84	return pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT);
85}
86
87int pud_huge(pud_t pud)
88{
89#ifndef __PAGETABLE_PMD_FOLDED
90	return pud_val(pud) && !(pud_val(pud) & PUD_TABLE_BIT);
91#else
92	return 0;
93#endif
94}
95
96static int find_num_contig(struct mm_struct *mm, unsigned long addr,
97			   pte_t *ptep, size_t *pgsize)
98{
99	pgd_t *pgdp = pgd_offset(mm, addr);
100	p4d_t *p4dp;
101	pud_t *pudp;
102	pmd_t *pmdp;
103
104	*pgsize = PAGE_SIZE;
105	p4dp = p4d_offset(pgdp, addr);
106	pudp = pud_offset(p4dp, addr);
107	pmdp = pmd_offset(pudp, addr);
108	if ((pte_t *)pmdp == ptep) {
109		*pgsize = PMD_SIZE;
110		return CONT_PMDS;
111	}
112	return CONT_PTES;
113}
114
115static inline int num_contig_ptes(unsigned long size, size_t *pgsize)
116{
117	int contig_ptes = 0;
118
119	*pgsize = size;
120
121	switch (size) {
122#ifndef __PAGETABLE_PMD_FOLDED
123	case PUD_SIZE:
124		if (pud_sect_supported())
125			contig_ptes = 1;
126		break;
127#endif
128	case PMD_SIZE:
129		contig_ptes = 1;
130		break;
131	case CONT_PMD_SIZE:
132		*pgsize = PMD_SIZE;
133		contig_ptes = CONT_PMDS;
134		break;
135	case CONT_PTE_SIZE:
136		*pgsize = PAGE_SIZE;
137		contig_ptes = CONT_PTES;
138		break;
139	}
140
141	return contig_ptes;
142}
143
144pte_t huge_ptep_get(pte_t *ptep)
145{
146	int ncontig, i;
147	size_t pgsize;
148	pte_t orig_pte = __ptep_get(ptep);
149
150	if (!pte_present(orig_pte) || !pte_cont(orig_pte))
151		return orig_pte;
152
153	ncontig = num_contig_ptes(page_size(pte_page(orig_pte)), &pgsize);
154	for (i = 0; i < ncontig; i++, ptep++) {
155		pte_t pte = __ptep_get(ptep);
156
157		if (pte_dirty(pte))
158			orig_pte = pte_mkdirty(orig_pte);
159
160		if (pte_young(pte))
161			orig_pte = pte_mkyoung(orig_pte);
162	}
163	return orig_pte;
164}
165
166/*
167 * Changing some bits of contiguous entries requires us to follow a
168 * Break-Before-Make approach, breaking the whole contiguous set
169 * before we can change any entries. See ARM DDI 0487A.k_iss10775,
170 * "Misprogramming of the Contiguous bit", page D4-1762.
171 *
172 * This helper performs the break step.
173 */
174static pte_t get_clear_contig(struct mm_struct *mm,
175			     unsigned long addr,
176			     pte_t *ptep,
177			     unsigned long pgsize,
178			     unsigned long ncontig)
179{
180	pte_t orig_pte = __ptep_get(ptep);
181	unsigned long i;
182
183	for (i = 0; i < ncontig; i++, addr += pgsize, ptep++) {
184		pte_t pte = __ptep_get_and_clear(mm, addr, ptep);
185
186		/*
187		 * If HW_AFDBM is enabled, then the HW could turn on
188		 * the dirty or accessed bit for any page in the set,
189		 * so check them all.
190		 */
191		if (pte_dirty(pte))
192			orig_pte = pte_mkdirty(orig_pte);
193
194		if (pte_young(pte))
195			orig_pte = pte_mkyoung(orig_pte);
196	}
197	return orig_pte;
198}
199
200static pte_t get_clear_contig_flush(struct mm_struct *mm,
201				    unsigned long addr,
202				    pte_t *ptep,
203				    unsigned long pgsize,
204				    unsigned long ncontig)
205{
206	pte_t orig_pte = get_clear_contig(mm, addr, ptep, pgsize, ncontig);
207	struct vm_area_struct vma = TLB_FLUSH_VMA(mm, 0);
208
209	flush_tlb_range(&vma, addr, addr + (pgsize * ncontig));
210	return orig_pte;
211}
212
213/*
214 * Changing some bits of contiguous entries requires us to follow a
215 * Break-Before-Make approach, breaking the whole contiguous set
216 * before we can change any entries. See ARM DDI 0487A.k_iss10775,
217 * "Misprogramming of the Contiguous bit", page D4-1762.
218 *
219 * This helper performs the break step for use cases where the
220 * original pte is not needed.
221 */
222static void clear_flush(struct mm_struct *mm,
223			     unsigned long addr,
224			     pte_t *ptep,
225			     unsigned long pgsize,
226			     unsigned long ncontig)
227{
228	struct vm_area_struct vma = TLB_FLUSH_VMA(mm, 0);
229	unsigned long i, saddr = addr;
230
231	for (i = 0; i < ncontig; i++, addr += pgsize, ptep++)
232		__ptep_get_and_clear(mm, addr, ptep);
233
234	flush_tlb_range(&vma, saddr, addr);
235}
236
237void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
238			    pte_t *ptep, pte_t pte, unsigned long sz)
239{
240	size_t pgsize;
241	int i;
242	int ncontig;
243	unsigned long pfn, dpfn;
244	pgprot_t hugeprot;
245
246	ncontig = num_contig_ptes(sz, &pgsize);
247
248	if (!pte_present(pte)) {
249		for (i = 0; i < ncontig; i++, ptep++, addr += pgsize)
250			__set_ptes(mm, addr, ptep, pte, 1);
251		return;
252	}
253
254	if (!pte_cont(pte)) {
255		__set_ptes(mm, addr, ptep, pte, 1);
256		return;
257	}
258
259	pfn = pte_pfn(pte);
260	dpfn = pgsize >> PAGE_SHIFT;
261	hugeprot = pte_pgprot(pte);
262
263	clear_flush(mm, addr, ptep, pgsize, ncontig);
264
265	for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn)
266		__set_ptes(mm, addr, ptep, pfn_pte(pfn, hugeprot), 1);
267}
268
269pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
270		      unsigned long addr, unsigned long sz)
271{
272	pgd_t *pgdp;
273	p4d_t *p4dp;
274	pud_t *pudp;
275	pmd_t *pmdp;
276	pte_t *ptep = NULL;
277
278	pgdp = pgd_offset(mm, addr);
279	p4dp = p4d_alloc(mm, pgdp, addr);
280	if (!p4dp)
281		return NULL;
282
283	pudp = pud_alloc(mm, p4dp, addr);
284	if (!pudp)
285		return NULL;
286
287	if (sz == PUD_SIZE) {
288		ptep = (pte_t *)pudp;
289	} else if (sz == (CONT_PTE_SIZE)) {
290		pmdp = pmd_alloc(mm, pudp, addr);
291		if (!pmdp)
292			return NULL;
293
294		WARN_ON(addr & (sz - 1));
295		ptep = pte_alloc_huge(mm, pmdp, addr);
296	} else if (sz == PMD_SIZE) {
297		if (want_pmd_share(vma, addr) && pud_none(READ_ONCE(*pudp)))
298			ptep = huge_pmd_share(mm, vma, addr, pudp);
299		else
300			ptep = (pte_t *)pmd_alloc(mm, pudp, addr);
301	} else if (sz == (CONT_PMD_SIZE)) {
302		pmdp = pmd_alloc(mm, pudp, addr);
303		WARN_ON(addr & (sz - 1));
304		return (pte_t *)pmdp;
305	}
306
307	return ptep;
308}
309
310pte_t *huge_pte_offset(struct mm_struct *mm,
311		       unsigned long addr, unsigned long sz)
312{
313	pgd_t *pgdp;
314	p4d_t *p4dp;
315	pud_t *pudp, pud;
316	pmd_t *pmdp, pmd;
317
318	pgdp = pgd_offset(mm, addr);
319	if (!pgd_present(READ_ONCE(*pgdp)))
320		return NULL;
321
322	p4dp = p4d_offset(pgdp, addr);
323	if (!p4d_present(READ_ONCE(*p4dp)))
324		return NULL;
325
326	pudp = pud_offset(p4dp, addr);
327	pud = READ_ONCE(*pudp);
328	if (sz != PUD_SIZE && pud_none(pud))
329		return NULL;
330	/* hugepage or swap? */
331	if (pud_huge(pud) || !pud_present(pud))
332		return (pte_t *)pudp;
333	/* table; check the next level */
334
335	if (sz == CONT_PMD_SIZE)
336		addr &= CONT_PMD_MASK;
337
338	pmdp = pmd_offset(pudp, addr);
339	pmd = READ_ONCE(*pmdp);
340	if (!(sz == PMD_SIZE || sz == CONT_PMD_SIZE) &&
341	    pmd_none(pmd))
342		return NULL;
343	if (pmd_huge(pmd) || !pmd_present(pmd))
344		return (pte_t *)pmdp;
345
346	if (sz == CONT_PTE_SIZE)
347		return pte_offset_huge(pmdp, (addr & CONT_PTE_MASK));
348
349	return NULL;
350}
351
352unsigned long hugetlb_mask_last_page(struct hstate *h)
353{
354	unsigned long hp_size = huge_page_size(h);
355
356	switch (hp_size) {
357#ifndef __PAGETABLE_PMD_FOLDED
358	case PUD_SIZE:
359		return PGDIR_SIZE - PUD_SIZE;
360#endif
361	case CONT_PMD_SIZE:
362		return PUD_SIZE - CONT_PMD_SIZE;
363	case PMD_SIZE:
364		return PUD_SIZE - PMD_SIZE;
365	case CONT_PTE_SIZE:
366		return PMD_SIZE - CONT_PTE_SIZE;
367	default:
368		break;
369	}
370
371	return 0UL;
372}
373
374pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, vm_flags_t flags)
375{
376	size_t pagesize = 1UL << shift;
377
378	entry = pte_mkhuge(entry);
379	if (pagesize == CONT_PTE_SIZE) {
380		entry = pte_mkcont(entry);
381	} else if (pagesize == CONT_PMD_SIZE) {
382		entry = pmd_pte(pmd_mkcont(pte_pmd(entry)));
383	} else if (pagesize != PUD_SIZE && pagesize != PMD_SIZE) {
384		pr_warn("%s: unrecognized huge page size 0x%lx\n",
385			__func__, pagesize);
386	}
387	return entry;
388}
389
390void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
391		    pte_t *ptep, unsigned long sz)
392{
393	int i, ncontig;
394	size_t pgsize;
395
396	ncontig = num_contig_ptes(sz, &pgsize);
397
398	for (i = 0; i < ncontig; i++, addr += pgsize, ptep++)
399		__pte_clear(mm, addr, ptep);
400}
401
402pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
403			      unsigned long addr, pte_t *ptep)
404{
405	int ncontig;
406	size_t pgsize;
407	pte_t orig_pte = __ptep_get(ptep);
408
409	if (!pte_cont(orig_pte))
410		return __ptep_get_and_clear(mm, addr, ptep);
411
412	ncontig = find_num_contig(mm, addr, ptep, &pgsize);
413
414	return get_clear_contig(mm, addr, ptep, pgsize, ncontig);
415}
416
417/*
418 * huge_ptep_set_access_flags will update access flags (dirty, accesssed)
419 * and write permission.
420 *
421 * For a contiguous huge pte range we need to check whether or not write
422 * permission has to change only on the first pte in the set. Then for
423 * all the contiguous ptes we need to check whether or not there is a
424 * discrepancy between dirty or young.
425 */
426static int __cont_access_flags_changed(pte_t *ptep, pte_t pte, int ncontig)
427{
428	int i;
429
430	if (pte_write(pte) != pte_write(__ptep_get(ptep)))
431		return 1;
432
433	for (i = 0; i < ncontig; i++) {
434		pte_t orig_pte = __ptep_get(ptep + i);
435
436		if (pte_dirty(pte) != pte_dirty(orig_pte))
437			return 1;
438
439		if (pte_young(pte) != pte_young(orig_pte))
440			return 1;
441	}
442
443	return 0;
444}
445
446int huge_ptep_set_access_flags(struct vm_area_struct *vma,
447			       unsigned long addr, pte_t *ptep,
448			       pte_t pte, int dirty)
449{
450	int ncontig, i;
451	size_t pgsize = 0;
452	unsigned long pfn = pte_pfn(pte), dpfn;
453	struct mm_struct *mm = vma->vm_mm;
454	pgprot_t hugeprot;
455	pte_t orig_pte;
456
457	if (!pte_cont(pte))
458		return __ptep_set_access_flags(vma, addr, ptep, pte, dirty);
459
460	ncontig = find_num_contig(mm, addr, ptep, &pgsize);
461	dpfn = pgsize >> PAGE_SHIFT;
462
463	if (!__cont_access_flags_changed(ptep, pte, ncontig))
464		return 0;
465
466	orig_pte = get_clear_contig_flush(mm, addr, ptep, pgsize, ncontig);
467
468	/* Make sure we don't lose the dirty or young state */
469	if (pte_dirty(orig_pte))
470		pte = pte_mkdirty(pte);
471
472	if (pte_young(orig_pte))
473		pte = pte_mkyoung(pte);
474
475	hugeprot = pte_pgprot(pte);
476	for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn)
477		__set_ptes(mm, addr, ptep, pfn_pte(pfn, hugeprot), 1);
478
479	return 1;
480}
481
482void huge_ptep_set_wrprotect(struct mm_struct *mm,
483			     unsigned long addr, pte_t *ptep)
484{
485	unsigned long pfn, dpfn;
486	pgprot_t hugeprot;
487	int ncontig, i;
488	size_t pgsize;
489	pte_t pte;
490
491	if (!pte_cont(__ptep_get(ptep))) {
492		__ptep_set_wrprotect(mm, addr, ptep);
493		return;
494	}
495
496	ncontig = find_num_contig(mm, addr, ptep, &pgsize);
497	dpfn = pgsize >> PAGE_SHIFT;
498
499	pte = get_clear_contig_flush(mm, addr, ptep, pgsize, ncontig);
500	pte = pte_wrprotect(pte);
501
502	hugeprot = pte_pgprot(pte);
503	pfn = pte_pfn(pte);
504
505	for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn)
506		__set_ptes(mm, addr, ptep, pfn_pte(pfn, hugeprot), 1);
507}
508
509pte_t huge_ptep_clear_flush(struct vm_area_struct *vma,
510			    unsigned long addr, pte_t *ptep)
511{
512	struct mm_struct *mm = vma->vm_mm;
513	size_t pgsize;
514	int ncontig;
515
516	if (!pte_cont(__ptep_get(ptep)))
517		return ptep_clear_flush(vma, addr, ptep);
518
519	ncontig = find_num_contig(mm, addr, ptep, &pgsize);
520	return get_clear_contig_flush(mm, addr, ptep, pgsize, ncontig);
521}
522
523static int __init hugetlbpage_init(void)
524{
525	if (pud_sect_supported())
526		hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
527
528	hugetlb_add_hstate(CONT_PMD_SHIFT - PAGE_SHIFT);
529	hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT);
530	hugetlb_add_hstate(CONT_PTE_SHIFT - PAGE_SHIFT);
531
532	return 0;
533}
534arch_initcall(hugetlbpage_init);
535
536bool __init arch_hugetlb_valid_size(unsigned long size)
537{
538	return __hugetlb_valid_size(size);
539}
540
541pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep)
542{
543	if (alternative_has_cap_unlikely(ARM64_WORKAROUND_2645198)) {
544		/*
545		 * Break-before-make (BBM) is required for all user space mappings
546		 * when the permission changes from executable to non-executable
547		 * in cases where cpu is affected with errata #2645198.
548		 */
549		if (pte_user_exec(__ptep_get(ptep)))
550			return huge_ptep_clear_flush(vma, addr, ptep);
551	}
552	return huge_ptep_get_and_clear(vma->vm_mm, addr, ptep);
553}
554
555void huge_ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep,
556				  pte_t old_pte, pte_t pte)
557{
558	unsigned long psize = huge_page_size(hstate_vma(vma));
559
560	set_huge_pte_at(vma->vm_mm, addr, ptep, pte, psize);
561}
562