1/*
2 * Copyright 2019-2023 Haiku, Inc. All rights reserved.
3 * Released under the terms of the MIT License.
4 */
5
6#include <boot/platform.h>
7#include <boot/stage2.h>
8
9#include "efi_platform.h"
10#include "generic_mmu.h"
11#include "mmu.h"
12
13#include "aarch64.h"
14#include "arch_mmu.h"
15
16// #define TRACE_MMU
17#ifdef TRACE_MMU
18#	define TRACE(x...) dprintf(x)
19#else
20#	define TRACE(x...) ;
21#endif
22
23
24static constexpr bool kTraceMemoryMap = false;
25static constexpr bool kTracePageDirectory = false;
26
27
28// Ignore memory above 512GB
29#define PHYSICAL_MEMORY_LOW		0x00000000
30#define PHYSICAL_MEMORY_HIGH	0x8000000000ull
31
32ARMv8TranslationRegime::TranslationDescriptor translation4Kb48bits = {
33	{L0_SHIFT, L0_ADDR_MASK, false, true, false },
34	{L1_SHIFT, Ln_ADDR_MASK, true, true,  false },
35	{L2_SHIFT, Ln_ADDR_MASK, true, true,  false },
36	{L3_SHIFT, Ln_ADDR_MASK, false, false, true }
37};
38
39
40ARMv8TranslationRegime CurrentRegime(translation4Kb48bits);
41/* ARM port */
42static uint64_t* sPageDirectory = NULL;
43// static uint64_t* sFirstPageTable = NULL;
44static uint64_t* sNextPageTable = NULL;
45// static uint64_t* sLastPageTable = NULL;
46
47
48const char*
49granule_type_str(int tg)
50{
51	switch (tg) {
52		case TG_4KB:
53			return "4KB";
54		case TG_16KB:
55			return "16KB";
56		case TG_64KB:
57			return "64KB";
58		default:
59			return "Invalid Granule";
60	}
61}
62
63
64void
65arch_mmu_dump_table(uint64* table, uint8 currentLevel)
66{
67	ARMv8TranslationTableDescriptor ttd(table);
68
69	if (currentLevel >= CurrentRegime.MaxLevels()) {
70		// This should not happen
71		panic("Too many levels ...");
72		return;
73	}
74
75	uint64 EntriesPerLevel = arch_mmu_entries_per_granularity(CurrentRegime.Granularity());
76	for (uint i = 0 ; i < EntriesPerLevel; i++) {
77		if (!ttd.IsInvalid()) {
78			TRACE("Level %d, @%0lx: TTD %016lx\t", currentLevel, ttd.Location(), ttd.Value());
79			if (ttd.IsTable() && currentLevel < 3) {
80				TRACE("Table! Next Level:\n");
81				arch_mmu_dump_table(ttd.Dereference(), currentLevel + 1);
82			}
83			if (ttd.IsBlock() || (ttd.IsPage() && currentLevel == 3)) {
84				TRACE("Block/Page");
85
86				if (i & 1) { // 2 entries per row
87					TRACE("\n");
88				} else {
89					TRACE("\t");
90				}
91			}
92		}
93		ttd.Next();
94	}
95}
96
97
98void
99arch_mmu_dump_present_tables()
100{
101	uint64 address = arch_mmu_base_register();
102	dprintf("Under TTBR0: %lx\n", address);
103
104	arch_mmu_dump_table(reinterpret_cast<uint64*>(address), 0);
105
106	/* We are willing to transition, but still in EL2, present MMU configuration
107	 * for user is present in EL2 by TTBR0_EL2. Kernel side is not active, but
108	 * allocated under sPageDirectory, defined under TTBR1_EL1.
109	 */
110	dprintf("Under allocated TTBR1_EL1:\n");
111	arch_mmu_dump_table(sPageDirectory, 0);
112}
113
114
115void arch_mmu_setup_EL1(uint64 tcr) {
116
117	// Enable TTBR1
118	tcr &= ~TCR_EPD1_DISABLE;
119
120	// Set space for kernel space
121	tcr &= ~T1SZ_MASK; // Clear
122	// TODO: Compiler dependency?
123	tcr |= TCR_T1SZ(__builtin_popcountl(KERNEL_BASE));
124
125	WRITE_SPECIALREG(TCR_EL1, tcr);
126}
127
128
129uint64
130map_region(addr_t virt_addr, addr_t  phys_addr, size_t size,
131	uint32_t level, uint64_t flags, uint64* descriptor)
132{
133	ARMv8TranslationTableDescriptor ttd(descriptor);
134
135	if (level >= CurrentRegime.MaxLevels()) {
136		panic("Too many levels at mapping\n");
137	}
138
139	uint64 currentLevelSize = CurrentRegime.EntrySize(level);
140
141	ttd.JumpTo(CurrentRegime.DescriptorIndex(virt_addr, level));
142
143	uint64 remainingSizeInTable = CurrentRegime.TableSize(level)
144		- currentLevelSize * CurrentRegime.DescriptorIndex(virt_addr, level);
145
146	TRACE("Level %x, Processing desc %lx indexing %lx\n",
147		level, reinterpret_cast<uint64>(descriptor), ttd.Location());
148
149	if (ttd.IsInvalid()) {
150		// If the physical has the same alignment we could make a block here
151		// instead of using a complete next level table
152		if (size >= currentLevelSize && CurrentRegime.Aligned(phys_addr, level)) {
153			// Set it as block or page
154			if (CurrentRegime.BlocksAllowed(level)) {
155				ttd.SetAsBlock(reinterpret_cast<uint64*>(phys_addr), flags);
156			} else {
157				// Most likely in Level 3...
158				ttd.SetAsPage(reinterpret_cast<uint64*>(phys_addr), flags);
159			}
160
161			// Expand!
162			int64 expandedSize = (size > remainingSizeInTable)?remainingSizeInTable:size;
163
164			do {
165				phys_addr += currentLevelSize;
166				expandedSize -= currentLevelSize;
167				if (expandedSize > 0) {
168					ttd.Next();
169					if (CurrentRegime.BlocksAllowed(level)) {
170						ttd.SetAsBlock(reinterpret_cast<uint64*>(phys_addr), flags);
171					} else {
172						// Most likely in Level 3...
173						ttd.SetAsPage(reinterpret_cast<uint64*>(phys_addr), flags);
174					}
175				}
176			} while (expandedSize > 0);
177
178			return (size > remainingSizeInTable)?(size - remainingSizeInTable):0;
179
180		} else {
181			// Set it to next level
182			uint64 offset = 0;
183			uint64 remainingSize = size;
184			do {
185				uint64* page = NULL;
186				if (ttd.IsInvalid()) {
187					// our region is too small would need to create a level below
188					page = CurrentRegime.AllocatePage();
189					ttd.SetToTable(page, flags);
190				} else if (ttd.IsTable()) {
191					// Next table is allocated, follow it
192					page = ttd.Dereference();
193				} else {
194					panic("Required contiguous descriptor in use by Block/Page for %lx\n", ttd.Location());
195				}
196
197				uint64 unprocessedSize = map_region(virt_addr + offset,
198					phys_addr + offset, remainingSize, level + 1, flags, page);
199
200				offset = remainingSize - unprocessedSize;
201
202				remainingSize = unprocessedSize;
203
204				ttd.Next();
205
206			} while (remainingSize > 0);
207
208			return 0;
209		}
210
211	} else {
212
213		if ((ttd.IsBlock() && CurrentRegime.BlocksAllowed(level))
214			|| (ttd.IsPage() && CurrentRegime.PagesAllowed(level))
215		) {
216			// TODO: Review, overlap? expand?
217			panic("Re-setting a Block/Page descriptor for %lx\n", ttd.Location());
218			return 0;
219		} else if (ttd.IsTable() && CurrentRegime.TablesAllowed(level)) {
220			// Next Level
221			map_region(virt_addr, phys_addr, size, level + 1, flags, ttd.Dereference());
222			return 0;
223		} else {
224			panic("All descriptor types processed for %lx\n", ttd.Location());
225			return 0;
226		}
227	}
228}
229
230
231static void
232map_range(addr_t virt_addr, phys_addr_t phys_addr, size_t size, uint64_t flags)
233{
234	TRACE("map 0x%0lx --> 0x%0lx, len=0x%0lx, flags=0x%0lx\n",
235		(uint64_t)virt_addr, (uint64_t)phys_addr, (uint64_t)size, flags);
236
237	// TODO: Review why we get ranges with 0 size ...
238	if (size == 0) {
239		TRACE("Requesing 0 size map\n");
240		return;
241	}
242
243	// TODO: Review this case
244	if (phys_addr == READ_SPECIALREG(TTBR1_EL1)) {
245		TRACE("Trying to map the TTBR itself?!\n");
246		return;
247	}
248
249	if (arch_mmu_read_access(virt_addr) && arch_mmu_read_access(virt_addr + size)) {
250		TRACE("Range already covered in current MMU\n");
251		return;
252	}
253
254	uint64 address;
255
256	if (arch_mmu_is_kernel_address(virt_addr)) {
257		// Use TTBR1
258		address = READ_SPECIALREG(TTBR1_EL1);
259	} else {
260		// ok, but USE instead TTBR0
261		address = READ_SPECIALREG(TTBR0_EL1);
262	}
263
264	map_region(virt_addr, phys_addr, size, 0, flags, reinterpret_cast<uint64*>(address));
265
266// 	for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
267// 		map_page(virt_addr + offset, phys_addr + offset, flags);
268// 	}
269
270	ASSERT_ALWAYS(insert_virtual_allocated_range(virt_addr, size) >= B_OK);
271}
272
273
274void
275arch_mmu_init()
276{
277	// Stub
278}
279
280
281void
282arch_mmu_post_efi_setup(size_t memory_map_size,
283	efi_memory_descriptor* memory_map, size_t descriptor_size,
284	uint32_t descriptor_version)
285{
286	build_physical_allocated_list(memory_map_size, memory_map,
287		descriptor_size, descriptor_version);
288
289	// Switch EFI to virtual mode, using the kernel pmap.
290	kRuntimeServices->SetVirtualAddressMap(memory_map_size, descriptor_size,
291		descriptor_version, memory_map);
292
293	if (kTraceMemoryMap) {
294		dprintf("phys memory ranges:\n");
295		for (uint32_t i = 0; i < gKernelArgs.num_physical_memory_ranges; i++) {
296			uint64 start = gKernelArgs.physical_memory_range[i].start;
297			uint64 size = gKernelArgs.physical_memory_range[i].size;
298			dprintf("    0x%08" B_PRIx64 "-0x%08" B_PRIx64 ", length 0x%08" B_PRIx64 "\n",
299				start, start + size, size);
300		}
301
302		dprintf("allocated phys memory ranges:\n");
303		for (uint32_t i = 0; i < gKernelArgs.num_physical_allocated_ranges; i++) {
304			uint64 start = gKernelArgs.physical_allocated_range[i].start;
305			uint64 size = gKernelArgs.physical_allocated_range[i].size;
306			dprintf("    0x%08" B_PRIx64 "-0x%08" B_PRIx64 ", length 0x%08" B_PRIx64 "\n",
307				start, start + size, size);
308		}
309
310		dprintf("allocated virt memory ranges:\n");
311		for (uint32_t i = 0; i < gKernelArgs.num_virtual_allocated_ranges; i++) {
312			uint64 start = gKernelArgs.virtual_allocated_range[i].start;
313			uint64 size = gKernelArgs.virtual_allocated_range[i].size;
314			dprintf("    0x%08" B_PRIx64 "-0x%08" B_PRIx64 ", length 0x%08" B_PRIx64 "\n",
315				start, start + size, size);
316		}
317
318		dprintf("virt memory ranges to keep:\n");
319		for (uint32_t i = 0; i < gKernelArgs.arch_args.num_virtual_ranges_to_keep; i++) {
320			uint64 start = gKernelArgs.arch_args.virtual_ranges_to_keep[i].start;
321			uint64 size = gKernelArgs.arch_args.virtual_ranges_to_keep[i].size;
322			dprintf("    0x%08" B_PRIx64 "-0x%08" B_PRIx64 ", length 0x%08" B_PRIx64 "\n",
323				start, start + size, size);
324		}
325	}
326}
327
328
329void
330arch_mmu_allocate_kernel_page_tables(void)
331{
332	uint64* page = NULL;
333	uint64 ttbr1 = READ_SPECIALREG(TTBR1_EL1);
334
335	// Trust possible previous allocations of TTBR1
336	// only if we come from a preset EL1 context
337	if (ttbr1 != 0ll) {
338		if (arch_exception_level() == 1) {
339			page = reinterpret_cast<uint64*>(ttbr1);
340			TRACE("Reusing TTBR1_EL1 present : %" B_PRIx64 "\n", ttbr1);
341		} else if (arch_exception_level() == 2) {
342			TRACE("Ignoring EL1 TTBR1(%" B_PRIx64") tables\n", ttbr1);
343		}
344	}
345
346	// NOTE: On devices supporting multiple translation base registers, TTBR0 must
347	// be used solely.
348	if (page == NULL) {
349		page = CurrentRegime.AllocatePage();
350		if (page != NULL) {
351			WRITE_SPECIALREG(TTBR1_EL1, page);
352		} else {
353			panic("Not enough memory for kernel initial page\n");
354		}
355	}
356
357	sPageDirectory = page;
358}
359
360
361uint32_t
362arch_mmu_generate_post_efi_page_tables(size_t memory_map_size,
363	efi_memory_descriptor* memory_map, size_t descriptor_size,
364	uint32_t descriptor_version)
365{
366	addr_t memory_map_addr = (addr_t)memory_map;
367
368	MemoryAttributeIndirection currentMair;
369
370// 	arch_mmu_allocate_page_tables();
371	arch_mmu_allocate_kernel_page_tables();
372
373	build_physical_memory_list(memory_map_size, memory_map,
374		descriptor_size, descriptor_version,
375		PHYSICAL_MEMORY_LOW, PHYSICAL_MEMORY_HIGH);
376
377	TRACE("Mapping EFI_MEMORY_RUNTIME\n");
378	for (size_t i = 0; i < memory_map_size / descriptor_size; ++i) {
379		efi_memory_descriptor* entry = (efi_memory_descriptor*)(memory_map_addr + i * descriptor_size);
380		if ((entry->Attribute & EFI_MEMORY_RUNTIME) != 0)
381			map_range(entry->VirtualStart, entry->PhysicalStart,
382				entry->NumberOfPages * B_PAGE_SIZE,
383				ARMv8TranslationTableDescriptor::DefaultCodeAttribute | currentMair.MaskOf(MAIR_NORMAL_WB));
384	}
385
386	TRACE("Mapping \"next\" regions\n");
387	void* cookie = NULL;
388	addr_t vaddr;
389	phys_addr_t paddr;
390	size_t size;
391	while (mmu_next_region(&cookie, &vaddr, &paddr, &size)) {
392		map_range(vaddr, paddr, size,
393			ARMv8TranslationTableDescriptor::DefaultCodeAttribute
394			| currentMair.MaskOf(MAIR_NORMAL_WB));
395	}
396
397	// TODO: We actually can only map physical RAM, mapping everything
398	// could cause unwanted MMIO or bus errors on real hardware.
399	map_range(KERNEL_PMAP_BASE, 0, KERNEL_PMAP_SIZE - 1,
400		ARMv8TranslationTableDescriptor::DefaultCodeAttribute
401		| currentMair.MaskOf(MAIR_NORMAL_WB));
402
403	if (gKernelArgs.arch_args.uart.kind[0] != 0) {
404		// Map uart because we want to use it during early boot.
405		uint64 regs_start = gKernelArgs.arch_args.uart.regs.start;
406		uint64 regs_size = ROUNDUP(gKernelArgs.arch_args.uart.regs.size, B_PAGE_SIZE);
407		uint64 base = get_next_virtual_address(regs_size);
408
409		map_range(base, regs_start, regs_size,
410			ARMv8TranslationTableDescriptor::DefaultPeripheralAttribute |
411			currentMair.MaskOf(MAIR_DEVICE_nGnRnE));
412
413		gKernelArgs.arch_args.uart.regs.start = base;
414	}
415
416	sort_address_ranges(gKernelArgs.virtual_allocated_range,
417		gKernelArgs.num_virtual_allocated_ranges);
418
419	addr_t vir_pgdir;
420	platform_bootloader_address_to_kernel_address((void*)sPageDirectory, &vir_pgdir);
421
422	gKernelArgs.arch_args.phys_pgdir = (uint64)sPageDirectory;
423	gKernelArgs.arch_args.vir_pgdir = (uint32)vir_pgdir;
424	gKernelArgs.arch_args.next_pagetable = (uint64)(sNextPageTable) - (uint64)sPageDirectory;
425
426	TRACE("gKernelArgs.arch_args.phys_pgdir     = 0x%08x\n",
427		(uint32_t)gKernelArgs.arch_args.phys_pgdir);
428	TRACE("gKernelArgs.arch_args.vir_pgdir      = 0x%08x\n",
429		(uint32_t)gKernelArgs.arch_args.vir_pgdir);
430	TRACE("gKernelArgs.arch_args.next_pagetable = 0x%08x\n",
431		(uint32_t)gKernelArgs.arch_args.next_pagetable);
432
433	if (kTracePageDirectory)
434		arch_mmu_dump_present_tables();
435
436	return (uint64_t)sPageDirectory;
437}
438