1/* SPDX-License-Identifier: GPL-2.0-only */
2/*
3 * Based on arch/arm/include/asm/tlbflush.h
4 *
5 * Copyright (C) 1999-2003 Russell King
6 * Copyright (C) 2012 ARM Ltd.
7 */
8#ifndef __ASM_TLBFLUSH_H
9#define __ASM_TLBFLUSH_H
10
11#ifndef __ASSEMBLY__
12
13#include <linux/bitfield.h>
14#include <linux/mm_types.h>
15#include <linux/sched.h>
16#include <linux/mmu_notifier.h>
17#include <asm/cputype.h>
18#include <asm/mmu.h>
19
20/*
21 * Raw TLBI operations.
22 *
23 * Where necessary, use the __tlbi() macro to avoid asm()
24 * boilerplate. Drivers and most kernel code should use the TLB
25 * management routines in preference to the macro below.
26 *
27 * The macro can be used as __tlbi(op) or __tlbi(op, arg), depending
28 * on whether a particular TLBI operation takes an argument or
29 * not. The macros handles invoking the asm with or without the
30 * register argument as appropriate.
31 */
32#define __TLBI_0(op, arg) asm (ARM64_ASM_PREAMBLE			       \
33			       "tlbi " #op "\n"				       \
34		   ALTERNATIVE("nop\n			nop",		       \
35			       "dsb ish\n		tlbi " #op,	       \
36			       ARM64_WORKAROUND_REPEAT_TLBI,		       \
37			       CONFIG_ARM64_WORKAROUND_REPEAT_TLBI)	       \
38			    : : )
39
40#define __TLBI_1(op, arg) asm (ARM64_ASM_PREAMBLE			       \
41			       "tlbi " #op ", %0\n"			       \
42		   ALTERNATIVE("nop\n			nop",		       \
43			       "dsb ish\n		tlbi " #op ", %0",     \
44			       ARM64_WORKAROUND_REPEAT_TLBI,		       \
45			       CONFIG_ARM64_WORKAROUND_REPEAT_TLBI)	       \
46			    : : "r" (arg))
47
48#define __TLBI_N(op, arg, n, ...) __TLBI_##n(op, arg)
49
50#define __tlbi(op, ...)		__TLBI_N(op, ##__VA_ARGS__, 1, 0)
51
52#define __tlbi_user(op, arg) do {						\
53	if (arm64_kernel_unmapped_at_el0())					\
54		__tlbi(op, (arg) | USER_ASID_FLAG);				\
55} while (0)
56
57/* This macro creates a properly formatted VA operand for the TLBI */
58#define __TLBI_VADDR(addr, asid)				\
59	({							\
60		unsigned long __ta = (addr) >> 12;		\
61		__ta &= GENMASK_ULL(43, 0);			\
62		__ta |= (unsigned long)(asid) << 48;		\
63		__ta;						\
64	})
65
66/*
67 * Get translation granule of the system, which is decided by
68 * PAGE_SIZE.  Used by TTL.
69 *  - 4KB	: 1
70 *  - 16KB	: 2
71 *  - 64KB	: 3
72 */
73#define TLBI_TTL_TG_4K		1
74#define TLBI_TTL_TG_16K		2
75#define TLBI_TTL_TG_64K		3
76
77static inline unsigned long get_trans_granule(void)
78{
79	switch (PAGE_SIZE) {
80	case SZ_4K:
81		return TLBI_TTL_TG_4K;
82	case SZ_16K:
83		return TLBI_TTL_TG_16K;
84	case SZ_64K:
85		return TLBI_TTL_TG_64K;
86	default:
87		return 0;
88	}
89}
90
91/*
92 * Level-based TLBI operations.
93 *
94 * When ARMv8.4-TTL exists, TLBI operations take an additional hint for
95 * the level at which the invalidation must take place. If the level is
96 * wrong, no invalidation may take place. In the case where the level
97 * cannot be easily determined, the value TLBI_TTL_UNKNOWN will perform
98 * a non-hinted invalidation. Any provided level outside the hint range
99 * will also cause fall-back to non-hinted invalidation.
100 *
101 * For Stage-2 invalidation, use the level values provided to that effect
102 * in asm/stage2_pgtable.h.
103 */
104#define TLBI_TTL_MASK		GENMASK_ULL(47, 44)
105
106#define TLBI_TTL_UNKNOWN	INT_MAX
107
108#define __tlbi_level(op, addr, level) do {				\
109	u64 arg = addr;							\
110									\
111	if (alternative_has_cap_unlikely(ARM64_HAS_ARMv8_4_TTL) &&	\
112	    level >= 0 && level <= 3) {					\
113		u64 ttl = level & 3;					\
114		ttl |= get_trans_granule() << 2;			\
115		arg &= ~TLBI_TTL_MASK;					\
116		arg |= FIELD_PREP(TLBI_TTL_MASK, ttl);			\
117	}								\
118									\
119	__tlbi(op, arg);						\
120} while(0)
121
122#define __tlbi_user_level(op, arg, level) do {				\
123	if (arm64_kernel_unmapped_at_el0())				\
124		__tlbi_level(op, (arg | USER_ASID_FLAG), level);	\
125} while (0)
126
127/*
128 * This macro creates a properly formatted VA operand for the TLB RANGE. The
129 * value bit assignments are:
130 *
131 * +----------+------+-------+-------+-------+----------------------+
132 * |   ASID   |  TG  | SCALE |  NUM  |  TTL  |        BADDR         |
133 * +-----------------+-------+-------+-------+----------------------+
134 * |63      48|47  46|45   44|43   39|38   37|36                   0|
135 *
136 * The address range is determined by below formula: [BADDR, BADDR + (NUM + 1) *
137 * 2^(5*SCALE + 1) * PAGESIZE)
138 *
139 * Note that the first argument, baddr, is pre-shifted; If LPA2 is in use, BADDR
140 * holds addr[52:16]. Else BADDR holds page number. See for example ARM DDI
141 * 0487J.a section C5.5.60 "TLBI VAE1IS, TLBI VAE1ISNXS, TLB Invalidate by VA,
142 * EL1, Inner Shareable".
143 *
144 */
145#define __TLBI_VADDR_RANGE(baddr, asid, scale, num, ttl)			\
146	({									\
147		unsigned long __ta = (baddr);					\
148		unsigned long __ttl = (ttl >= 1 && ttl <= 3) ? ttl : 0;		\
149		__ta &= GENMASK_ULL(36, 0);					\
150		__ta |= __ttl << 37;						\
151		__ta |= (unsigned long)(num) << 39;				\
152		__ta |= (unsigned long)(scale) << 44;				\
153		__ta |= get_trans_granule() << 46;				\
154		__ta |= (unsigned long)(asid) << 48;				\
155		__ta;								\
156	})
157
158/* These macros are used by the TLBI RANGE feature. */
159#define __TLBI_RANGE_PAGES(num, scale)	\
160	((unsigned long)((num) + 1) << (5 * (scale) + 1))
161#define MAX_TLBI_RANGE_PAGES		__TLBI_RANGE_PAGES(31, 3)
162
163/*
164 * Generate 'num' values from -1 to 31 with -1 rejected by the
165 * __flush_tlb_range() loop below. Its return value is only
166 * significant for a maximum of MAX_TLBI_RANGE_PAGES pages. If
167 * 'pages' is more than that, you must iterate over the overall
168 * range.
169 */
170#define __TLBI_RANGE_NUM(pages, scale)					\
171	({								\
172		int __pages = min((pages),				\
173				  __TLBI_RANGE_PAGES(31, (scale)));	\
174		(__pages >> (5 * (scale) + 1)) - 1;			\
175	})
176
177/*
178 *	TLB Invalidation
179 *	================
180 *
181 * 	This header file implements the low-level TLB invalidation routines
182 *	(sometimes referred to as "flushing" in the kernel) for arm64.
183 *
184 *	Every invalidation operation uses the following template:
185 *
186 *	DSB ISHST	// Ensure prior page-table updates have completed
187 *	TLBI ...	// Invalidate the TLB
188 *	DSB ISH		// Ensure the TLB invalidation has completed
189 *      if (invalidated kernel mappings)
190 *		ISB	// Discard any instructions fetched from the old mapping
191 *
192 *
193 *	The following functions form part of the "core" TLB invalidation API,
194 *	as documented in Documentation/core-api/cachetlb.rst:
195 *
196 *	flush_tlb_all()
197 *		Invalidate the entire TLB (kernel + user) on all CPUs
198 *
199 *	flush_tlb_mm(mm)
200 *		Invalidate an entire user address space on all CPUs.
201 *		The 'mm' argument identifies the ASID to invalidate.
202 *
203 *	flush_tlb_range(vma, start, end)
204 *		Invalidate the virtual-address range '[start, end)' on all
205 *		CPUs for the user address space corresponding to 'vma->mm'.
206 *		Note that this operation also invalidates any walk-cache
207 *		entries associated with translations for the specified address
208 *		range.
209 *
210 *	flush_tlb_kernel_range(start, end)
211 *		Same as flush_tlb_range(..., start, end), but applies to
212 * 		kernel mappings rather than a particular user address space.
213 *		Whilst not explicitly documented, this function is used when
214 *		unmapping pages from vmalloc/io space.
215 *
216 *	flush_tlb_page(vma, addr)
217 *		Invalidate a single user mapping for address 'addr' in the
218 *		address space corresponding to 'vma->mm'.  Note that this
219 *		operation only invalidates a single, last-level page-table
220 *		entry and therefore does not affect any walk-caches.
221 *
222 *
223 *	Next, we have some undocumented invalidation routines that you probably
224 *	don't want to call unless you know what you're doing:
225 *
226 *	local_flush_tlb_all()
227 *		Same as flush_tlb_all(), but only applies to the calling CPU.
228 *
229 *	__flush_tlb_kernel_pgtable(addr)
230 *		Invalidate a single kernel mapping for address 'addr' on all
231 *		CPUs, ensuring that any walk-cache entries associated with the
232 *		translation are also invalidated.
233 *
234 *	__flush_tlb_range(vma, start, end, stride, last_level, tlb_level)
235 *		Invalidate the virtual-address range '[start, end)' on all
236 *		CPUs for the user address space corresponding to 'vma->mm'.
237 *		The invalidation operations are issued at a granularity
238 *		determined by 'stride' and only affect any walk-cache entries
239 *		if 'last_level' is equal to false. tlb_level is the level at
240 *		which the invalidation must take place. If the level is wrong,
241 *		no invalidation may take place. In the case where the level
242 *		cannot be easily determined, the value TLBI_TTL_UNKNOWN will
243 *		perform a non-hinted invalidation.
244 *
245 *
246 *	Finally, take a look at asm/tlb.h to see how tlb_flush() is implemented
247 *	on top of these routines, since that is our interface to the mmu_gather
248 *	API as used by munmap() and friends.
249 */
250static inline void local_flush_tlb_all(void)
251{
252	dsb(nshst);
253	__tlbi(vmalle1);
254	dsb(nsh);
255	isb();
256}
257
258static inline void flush_tlb_all(void)
259{
260	dsb(ishst);
261	__tlbi(vmalle1is);
262	dsb(ish);
263	isb();
264}
265
266static inline void flush_tlb_mm(struct mm_struct *mm)
267{
268	unsigned long asid;
269
270	dsb(ishst);
271	asid = __TLBI_VADDR(0, ASID(mm));
272	__tlbi(aside1is, asid);
273	__tlbi_user(aside1is, asid);
274	dsb(ish);
275	mmu_notifier_arch_invalidate_secondary_tlbs(mm, 0, -1UL);
276}
277
278static inline void __flush_tlb_page_nosync(struct mm_struct *mm,
279					   unsigned long uaddr)
280{
281	unsigned long addr;
282
283	dsb(ishst);
284	addr = __TLBI_VADDR(uaddr, ASID(mm));
285	__tlbi(vale1is, addr);
286	__tlbi_user(vale1is, addr);
287	mmu_notifier_arch_invalidate_secondary_tlbs(mm, uaddr & PAGE_MASK,
288						(uaddr & PAGE_MASK) + PAGE_SIZE);
289}
290
291static inline void flush_tlb_page_nosync(struct vm_area_struct *vma,
292					 unsigned long uaddr)
293{
294	return __flush_tlb_page_nosync(vma->vm_mm, uaddr);
295}
296
297static inline void flush_tlb_page(struct vm_area_struct *vma,
298				  unsigned long uaddr)
299{
300	flush_tlb_page_nosync(vma, uaddr);
301	dsb(ish);
302}
303
304static inline bool arch_tlbbatch_should_defer(struct mm_struct *mm)
305{
306	/*
307	 * TLB flush deferral is not required on systems which are affected by
308	 * ARM64_WORKAROUND_REPEAT_TLBI, as __tlbi()/__tlbi_user() implementation
309	 * will have two consecutive TLBI instructions with a dsb(ish) in between
310	 * defeating the purpose (i.e save overall 'dsb ish' cost).
311	 */
312	if (alternative_has_cap_unlikely(ARM64_WORKAROUND_REPEAT_TLBI))
313		return false;
314
315	return true;
316}
317
318static inline void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch,
319					     struct mm_struct *mm,
320					     unsigned long uaddr)
321{
322	__flush_tlb_page_nosync(mm, uaddr);
323}
324
325/*
326 * If mprotect/munmap/etc occurs during TLB batched flushing, we need to
327 * synchronise all the TLBI issued with a DSB to avoid the race mentioned in
328 * flush_tlb_batched_pending().
329 */
330static inline void arch_flush_tlb_batched_pending(struct mm_struct *mm)
331{
332	dsb(ish);
333}
334
335/*
336 * To support TLB batched flush for multiple pages unmapping, we only send
337 * the TLBI for each page in arch_tlbbatch_add_pending() and wait for the
338 * completion at the end in arch_tlbbatch_flush(). Since we've already issued
339 * TLBI for each page so only a DSB is needed to synchronise its effect on the
340 * other CPUs.
341 *
342 * This will save the time waiting on DSB comparing issuing a TLBI;DSB sequence
343 * for each page.
344 */
345static inline void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
346{
347	dsb(ish);
348}
349
350/*
351 * This is meant to avoid soft lock-ups on large TLB flushing ranges and not
352 * necessarily a performance improvement.
353 */
354#define MAX_DVM_OPS	PTRS_PER_PTE
355
356/*
357 * __flush_tlb_range_op - Perform TLBI operation upon a range
358 *
359 * @op:	TLBI instruction that operates on a range (has 'r' prefix)
360 * @start:	The start address of the range
361 * @pages:	Range as the number of pages from 'start'
362 * @stride:	Flush granularity
363 * @asid:	The ASID of the task (0 for IPA instructions)
364 * @tlb_level:	Translation Table level hint, if known
365 * @tlbi_user:	If 'true', call an additional __tlbi_user()
366 *              (typically for user ASIDs). 'flase' for IPA instructions
367 * @lpa2:	If 'true', the lpa2 scheme is used as set out below
368 *
369 * When the CPU does not support TLB range operations, flush the TLB
370 * entries one by one at the granularity of 'stride'. If the TLB
371 * range ops are supported, then:
372 *
373 * 1. If FEAT_LPA2 is in use, the start address of a range operation must be
374 *    64KB aligned, so flush pages one by one until the alignment is reached
375 *    using the non-range operations. This step is skipped if LPA2 is not in
376 *    use.
377 *
378 * 2. The minimum range granularity is decided by 'scale', so multiple range
379 *    TLBI operations may be required. Start from scale = 3, flush the largest
380 *    possible number of pages ((num+1)*2^(5*scale+1)) that fit into the
381 *    requested range, then decrement scale and continue until one or zero pages
382 *    are left. We must start from highest scale to ensure 64KB start alignment
383 *    is maintained in the LPA2 case.
384 *
385 * 3. If there is 1 page remaining, flush it through non-range operations. Range
386 *    operations can only span an even number of pages. We save this for last to
387 *    ensure 64KB start alignment is maintained for the LPA2 case.
388 */
389#define __flush_tlb_range_op(op, start, pages, stride,			\
390				asid, tlb_level, tlbi_user, lpa2)	\
391do {									\
392	int num = 0;							\
393	int scale = 3;							\
394	int shift = lpa2 ? 16 : PAGE_SHIFT;				\
395	unsigned long addr;						\
396									\
397	while (pages > 0) {						\
398		if (!system_supports_tlb_range() ||			\
399		    pages == 1 ||					\
400		    (lpa2 && start != ALIGN(start, SZ_64K))) {		\
401			addr = __TLBI_VADDR(start, asid);		\
402			__tlbi_level(op, addr, tlb_level);		\
403			if (tlbi_user)					\
404				__tlbi_user_level(op, addr, tlb_level);	\
405			start += stride;				\
406			pages -= stride >> PAGE_SHIFT;			\
407			continue;					\
408		}							\
409									\
410		num = __TLBI_RANGE_NUM(pages, scale);			\
411		if (num >= 0) {						\
412			addr = __TLBI_VADDR_RANGE(start >> shift, asid, \
413						scale, num, tlb_level);	\
414			__tlbi(r##op, addr);				\
415			if (tlbi_user)					\
416				__tlbi_user(r##op, addr);		\
417			start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT; \
418			pages -= __TLBI_RANGE_PAGES(num, scale);	\
419		}							\
420		scale--;						\
421	}								\
422} while (0)
423
424#define __flush_s2_tlb_range_op(op, start, pages, stride, tlb_level) \
425	__flush_tlb_range_op(op, start, pages, stride, 0, tlb_level, false, kvm_lpa2_is_enabled());
426
427static inline void __flush_tlb_range_nosync(struct vm_area_struct *vma,
428				     unsigned long start, unsigned long end,
429				     unsigned long stride, bool last_level,
430				     int tlb_level)
431{
432	unsigned long asid, pages;
433
434	start = round_down(start, stride);
435	end = round_up(end, stride);
436	pages = (end - start) >> PAGE_SHIFT;
437
438	/*
439	 * When not uses TLB range ops, we can handle up to
440	 * (MAX_DVM_OPS - 1) pages;
441	 * When uses TLB range ops, we can handle up to
442	 * (MAX_TLBI_RANGE_PAGES - 1) pages.
443	 */
444	if ((!system_supports_tlb_range() &&
445	     (end - start) >= (MAX_DVM_OPS * stride)) ||
446	    pages >= MAX_TLBI_RANGE_PAGES) {
447		flush_tlb_mm(vma->vm_mm);
448		return;
449	}
450
451	dsb(ishst);
452	asid = ASID(vma->vm_mm);
453
454	if (last_level)
455		__flush_tlb_range_op(vale1is, start, pages, stride, asid,
456				     tlb_level, true, lpa2_is_enabled());
457	else
458		__flush_tlb_range_op(vae1is, start, pages, stride, asid,
459				     tlb_level, true, lpa2_is_enabled());
460
461	mmu_notifier_arch_invalidate_secondary_tlbs(vma->vm_mm, start, end);
462}
463
464static inline void __flush_tlb_range(struct vm_area_struct *vma,
465				     unsigned long start, unsigned long end,
466				     unsigned long stride, bool last_level,
467				     int tlb_level)
468{
469	__flush_tlb_range_nosync(vma, start, end, stride,
470				 last_level, tlb_level);
471	dsb(ish);
472}
473
474static inline void flush_tlb_range(struct vm_area_struct *vma,
475				   unsigned long start, unsigned long end)
476{
477	/*
478	 * We cannot use leaf-only invalidation here, since we may be invalidating
479	 * table entries as part of collapsing hugepages or moving page tables.
480	 * Set the tlb_level to TLBI_TTL_UNKNOWN because we can not get enough
481	 * information here.
482	 */
483	__flush_tlb_range(vma, start, end, PAGE_SIZE, false, TLBI_TTL_UNKNOWN);
484}
485
486static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end)
487{
488	unsigned long addr;
489
490	if ((end - start) > (MAX_DVM_OPS * PAGE_SIZE)) {
491		flush_tlb_all();
492		return;
493	}
494
495	start = __TLBI_VADDR(start, 0);
496	end = __TLBI_VADDR(end, 0);
497
498	dsb(ishst);
499	for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12))
500		__tlbi(vaale1is, addr);
501	dsb(ish);
502	isb();
503}
504
505/*
506 * Used to invalidate the TLB (walk caches) corresponding to intermediate page
507 * table levels (pgd/pud/pmd).
508 */
509static inline void __flush_tlb_kernel_pgtable(unsigned long kaddr)
510{
511	unsigned long addr = __TLBI_VADDR(kaddr, 0);
512
513	dsb(ishst);
514	__tlbi(vaae1is, addr);
515	dsb(ish);
516	isb();
517}
518#endif
519
520#endif
521