1/*
2 * Copyright 2019-2022 Haiku, Inc. All rights reserved.
3 * Released under the terms of the MIT License.
4 */
5
6
7#include <algorithm>
8
9#include <kernel.h>
10#include <arch_kernel.h>
11#include <boot/platform.h>
12#include <boot/stage2.h>
13#include <efi/types.h>
14#include <efi/boot-services.h>
15#include <string.h>
16
17#include "efi_platform.h"
18#include "generic_mmu.h"
19#include "mmu.h"
20
21
22//#define TRACE_MMU
23#ifdef TRACE_MMU
24#	define TRACE(x...) dprintf(x)
25#else
26#	define TRACE(x...) ;
27#endif
28
29
30//#define TRACE_MEMORY_MAP
31
32// Ignore memory above 512GB
33#define PHYSICAL_MEMORY_LOW		0x00000000
34#define PHYSICAL_MEMORY_HIGH	0x8000000000ull
35
36#define RESERVED_MEMORY_BASE	0x80000000
37
38phys_addr_t sPageTable = 0;
39
40
41static inline
42void *VirtFromPhys(uint64_t physAdr)
43{
44	return (void*)physAdr;
45}
46
47
48#ifdef TRACE_MEMORY_MAP
49static uint64_t
50SignExtendVirtAdr(uint64_t virtAdr)
51{
52	if (((uint64_t)1 << 38) & virtAdr)
53		return virtAdr | 0xFFFFFF8000000000;
54	return virtAdr;
55}
56
57
58static void
59WritePteFlags(uint32 flags)
60{
61	bool first = true;
62	dprintf("{");
63	for (uint32 i = 0; i < 32; i++) {
64		if ((1 << i) & flags) {
65			if (first) first = false; else dprintf(", ");
66			switch (i) {
67			case 0:  dprintf("valid"); break;
68			case 1:  dprintf("read"); break;
69			case 2:  dprintf("write"); break;
70			case 3:  dprintf("exec"); break;
71			case 4:  dprintf("user"); break;
72			case 5:  dprintf("global"); break;
73			case 6:  dprintf("accessed"); break;
74			case 7:  dprintf("dirty"); break;
75			default: dprintf("%" B_PRIu32, i);
76			}
77		}
78	}
79	dprintf("}");
80}
81
82
83static void
84DumpPageWrite(uint64_t virtAdr, uint64_t physAdr, size_t size, uint64 flags, uint64& firstVirt,
85	uint64& firstPhys, uint64& firstFlags, uint64& len)
86{
87	if (virtAdr == firstVirt + len && physAdr == firstPhys + len && flags == firstFlags) {
88		len += size;
89	} else {
90		if (len != 0) {
91			dprintf("  0x%08" B_PRIxADDR " - 0x%08" B_PRIxADDR,
92				firstVirt, firstVirt + (len - 1));
93			dprintf(": 0x%08" B_PRIxADDR " - 0x%08" B_PRIxADDR ", %#" B_PRIxADDR ", ",
94				firstPhys, firstPhys + (len - 1), len);
95			WritePteFlags(firstFlags); dprintf("\n");
96		}
97		firstVirt = virtAdr;
98		firstPhys = physAdr;
99		firstFlags = flags;
100		len = size;
101	}
102}
103
104
105static void
106DumpPageTableInt(Pte* pte, uint64_t virtAdr, uint32_t level, uint64& firstVirt, uint64& firstPhys,
107	uint64& firstFlags, uint64& len)
108{
109	for (uint32 i = 0; i < pteCount; i++) {
110		if (pte[i].isValid) {
111			if (!pte[i].isRead && !pte[i].isWrite && !pte[i].isExec) {
112				if (level == 0)
113					panic("internal page table on level 0");
114
115				DumpPageTableInt((Pte*)VirtFromPhys(B_PAGE_SIZE*pte[i].ppn),
116					virtAdr + ((uint64_t)i << (pageBits + pteIdxBits*level)),
117					level - 1, firstVirt, firstPhys, firstFlags, len);
118			} else {
119				DumpPageWrite(
120					SignExtendVirtAdr(virtAdr + ((uint64_t)i << (pageBits + pteIdxBits*level))),
121					pte[i].ppn * B_PAGE_SIZE,
122					1 << (pageBits + pteIdxBits*level),
123					pte[i].val & 0xff,
124					firstVirt, firstPhys, firstFlags, len);
125			}
126		}
127	}
128}
129
130
131static int
132DumpPageTable(uint64 satp)
133{
134	SatpReg satpReg{.val = satp};
135	Pte* root = (Pte*)VirtFromPhys(satpReg.ppn * B_PAGE_SIZE);
136
137	dprintf("PageTable:\n");
138	uint64 firstVirt = 0;
139	uint64 firstPhys = 0;
140	uint64 firstFlags = 0;
141	uint64 len = 0;
142	DumpPageTableInt(root, 0, 2, firstVirt, firstPhys, firstFlags, len);
143	DumpPageWrite(0, 0, 0, 0, firstVirt, firstPhys, firstFlags, len);
144
145	return 0;
146}
147#endif /* TRACE_MEMORY_MAP */
148
149
150static Pte*
151LookupPte(addr_t virtAdr, bool alloc)
152{
153	Pte *pte = (Pte*)VirtFromPhys(sPageTable);
154	for (int level = 2; level > 0; level --) {
155		pte += VirtAdrPte(virtAdr, level);
156		if (!pte->isValid) {
157			if (!alloc)
158				return NULL;
159			uint64 ppn = mmu_allocate_page() / B_PAGE_SIZE;
160			if (ppn == 0)
161				return NULL;
162			memset((Pte*)VirtFromPhys(B_PAGE_SIZE * ppn), 0, B_PAGE_SIZE);
163			Pte newPte {
164				.isValid = true,
165				.isGlobal = IS_KERNEL_ADDRESS(virtAdr),
166				.ppn = ppn
167			};
168			pte->val = newPte.val;
169		}
170		pte = (Pte*)VirtFromPhys(B_PAGE_SIZE * pte->ppn);
171	}
172	pte += VirtAdrPte(virtAdr, 0);
173	return pte;
174}
175
176
177static void
178Map(addr_t virtAdr, phys_addr_t physAdr, uint64 flags)
179{
180	// TRACE("Map(%#" B_PRIxADDR ", %#" B_PRIxADDR ")\n", virtAdr, physAdr);
181	Pte* pte = LookupPte(virtAdr, true);
182	if (pte == NULL) panic("can't allocate page table");
183
184	Pte newPte {
185		.isValid = true,
186		.isGlobal = IS_KERNEL_ADDRESS(virtAdr),
187		.isAccessed = true,
188		.isDirty = true,
189		.ppn = physAdr / B_PAGE_SIZE,
190	};
191	newPte.val |= flags;
192
193	pte->val = newPte.val;
194}
195
196
197static void
198MapRange(addr_t virtAdr, phys_addr_t physAdr, size_t size, uint64 flags)
199{
200	TRACE("MapRange(%#" B_PRIxADDR " - %#" B_PRIxADDR ", %#" B_PRIxADDR " - %#" B_PRIxADDR ", %#"
201		B_PRIxADDR ")\n", virtAdr, virtAdr + (size - 1), physAdr, physAdr + (size - 1), size);
202	for (size_t i = 0; i < size; i += B_PAGE_SIZE)
203		Map(virtAdr + i, physAdr + i, flags);
204
205	ASSERT_ALWAYS(insert_virtual_allocated_range(virtAdr, size) >= B_OK);
206}
207
208
209static void
210insert_virtual_range_to_keep(uint64 start, uint64 size)
211{
212	status_t status = insert_address_range(
213		gKernelArgs.arch_args.virtual_ranges_to_keep,
214		&gKernelArgs.arch_args.num_virtual_ranges_to_keep,
215		MAX_VIRTUAL_RANGES_TO_KEEP, start, size);
216
217	if (status == B_ENTRY_NOT_FOUND)
218		panic("too many virtual ranges to keep");
219	else if (status != B_OK)
220		panic("failed to add virtual range to keep");
221}
222
223
224static void
225MapAddrRange(addr_range& range, uint64 flags)
226{
227	if (range.size == 0) {
228		range.start = 0;
229		return;
230	}
231
232	phys_addr_t physAdr = range.start;
233	range.start = get_next_virtual_address(range.size);
234
235	MapRange(range.start, physAdr, range.size, flags);
236	insert_virtual_range_to_keep(range.start, range.size);
237}
238
239
240static void
241PreallocKernelRange()
242{
243	Pte* root = (Pte*)VirtFromPhys(sPageTable);
244	for (uint64 i = VirtAdrPte(KERNEL_BASE, 2); i <= VirtAdrPte(KERNEL_TOP, 2);
245		i++) {
246		Pte* pte = &root[i];
247		uint64 ppn = mmu_allocate_page() / B_PAGE_SIZE;
248		if (ppn == 0) panic("can't alloc early physical page");
249		memset(VirtFromPhys(B_PAGE_SIZE * ppn), 0, B_PAGE_SIZE);
250		Pte newPte {
251			.isValid = true,
252			.isGlobal = true,
253			.ppn = ppn
254		};
255		pte->val = newPte.val;
256	}
257}
258
259
260static uint64
261GetSatp()
262{
263	return SatpReg{
264		.ppn = sPageTable / B_PAGE_SIZE,
265		.asid = 0,
266		.mode = satpModeSv39
267	}.val;
268}
269
270
271static void
272GetPhysMemRange(addr_range& range)
273{
274	phys_addr_t beg = (phys_addr_t)(-1), end = 0;
275	if (gKernelArgs.num_physical_memory_ranges <= 0)
276		beg = 0;
277	else {
278		for (size_t i = 0; i < gKernelArgs.num_physical_memory_ranges; i++) {
279			beg = std::min(beg, gKernelArgs.physical_memory_range[i].start);
280			end = std::max(end, gKernelArgs.physical_memory_range[i].start + gKernelArgs.physical_memory_range[i].size);
281		}
282	}
283	range.start = beg;
284	range.size = end - beg;
285}
286
287
288//#pragma mark -
289
290
291void
292arch_mmu_init()
293{
294}
295
296
297void
298arch_mmu_post_efi_setup(size_t memory_map_size,
299	efi_memory_descriptor *memory_map, size_t descriptor_size,
300	uint32_t descriptor_version)
301{
302	build_physical_allocated_list(memory_map_size, memory_map,
303		descriptor_size, descriptor_version);
304
305	// Switch EFI to virtual mode, using the kernel pmap.
306	kRuntimeServices->SetVirtualAddressMap(memory_map_size, descriptor_size,
307		descriptor_version, memory_map);
308
309#ifdef TRACE_MEMORY_MAP
310	dprintf("phys memory ranges:\n");
311	for (uint32_t i = 0; i < gKernelArgs.num_physical_memory_ranges; i++) {
312		uint64 start = gKernelArgs.physical_memory_range[i].start;
313		uint64 size = gKernelArgs.physical_memory_range[i].size;
314		dprintf("    0x%08" B_PRIx64 "-0x%08" B_PRIx64 ", length 0x%08" B_PRIx64 "\n",
315			start, start + size, size);
316	}
317
318	dprintf("allocated phys memory ranges:\n");
319	for (uint32_t i = 0; i < gKernelArgs.num_physical_allocated_ranges; i++) {
320		uint64 start = gKernelArgs.physical_allocated_range[i].start;
321		uint64 size = gKernelArgs.physical_allocated_range[i].size;
322		dprintf("    0x%08" B_PRIx64 "-0x%08" B_PRIx64 ", length 0x%08" B_PRIx64 "\n",
323			start, start + size, size);
324	}
325
326	dprintf("allocated virt memory ranges:\n");
327	for (uint32_t i = 0; i < gKernelArgs.num_virtual_allocated_ranges; i++) {
328		uint64 start = gKernelArgs.virtual_allocated_range[i].start;
329		uint64 size = gKernelArgs.virtual_allocated_range[i].size;
330		dprintf("    0x%08" B_PRIx64 "-0x%08" B_PRIx64 ", length 0x%08" B_PRIx64 "\n",
331			start, start + size, size);
332	}
333
334	dprintf("virt memory ranges to keep:\n");
335	for (uint32_t i = 0; i < gKernelArgs.arch_args.num_virtual_ranges_to_keep; i++) {
336		uint64 start = gKernelArgs.arch_args.virtual_ranges_to_keep[i].start;
337		uint64 size = gKernelArgs.arch_args.virtual_ranges_to_keep[i].size;
338		dprintf("    0x%08" B_PRIx64 "-0x%08" B_PRIx64 ", length 0x%08" B_PRIx64 "\n",
339			start, start + size, size);
340	}
341#endif
342}
343
344
345static void
346fix_memory_map_for_m_mode(size_t memoryMapSize, efi_memory_descriptor* memoryMap,
347	size_t descriptorSize, uint32_t descriptorVersion)
348{
349	addr_t addr = (addr_t)memoryMap;
350
351	for (size_t i = 0; i < memoryMapSize / descriptorSize; ++i) {
352		efi_memory_descriptor* entry = (efi_memory_descriptor *)(addr + i * descriptorSize);
353		if (entry->PhysicalStart == RESERVED_MEMORY_BASE) {
354			entry->Type = EfiReservedMemoryType;
355		}
356	}
357}
358
359
360uint64
361arch_mmu_generate_post_efi_page_tables(size_t memoryMapSize, efi_memory_descriptor* memoryMap,
362	size_t descriptorSize, uint32_t descriptorVersion)
363{
364	sPageTable = mmu_allocate_page();
365	memset(VirtFromPhys(sPageTable), 0, B_PAGE_SIZE);
366	TRACE("sPageTable: %#" B_PRIxADDR "\n", sPageTable);
367
368	PreallocKernelRange();
369
370	gKernelArgs.num_virtual_allocated_ranges = 0;
371	gKernelArgs.arch_args.num_virtual_ranges_to_keep = 0;
372
373	fix_memory_map_for_m_mode(memoryMapSize, memoryMap, descriptorSize, descriptorVersion);
374
375	build_physical_memory_list(memoryMapSize, memoryMap, descriptorSize, descriptorVersion,
376		PHYSICAL_MEMORY_LOW, PHYSICAL_MEMORY_HIGH);
377
378	addr_range physMemRange;
379	GetPhysMemRange(physMemRange);
380	TRACE("physMemRange: %#" B_PRIxADDR ", %#" B_PRIxSIZE "\n",
381		physMemRange.start, physMemRange.size);
382
383	// Physical memory mapping
384	gKernelArgs.arch_args.physMap.start = KERNEL_TOP + 1 - physMemRange.size;
385	gKernelArgs.arch_args.physMap.size = physMemRange.size;
386	MapRange(gKernelArgs.arch_args.physMap.start, physMemRange.start, physMemRange.size,
387		Pte {.isRead = true, .isWrite = true}.val);
388
389	// Boot loader
390	TRACE("Boot loader:\n");
391	for (size_t i = 0; i < memoryMapSize / descriptorSize; ++i) {
392		efi_memory_descriptor* entry = &memoryMap[i];
393		switch (entry->Type) {
394		case EfiLoaderCode:
395		case EfiLoaderData:
396			MapRange(entry->VirtualStart, entry->PhysicalStart, entry->NumberOfPages * B_PAGE_SIZE,
397				Pte {.isRead = true, .isWrite = true, .isExec = true}.val);
398			break;
399		default:
400			;
401		}
402	}
403	TRACE("Boot loader stack\n");
404	addr_t sp = Sp();
405	TRACE("  SP: %#" B_PRIxADDR "\n", sp);
406
407	// EFI runtime services
408	TRACE("EFI runtime services:\n");
409	for (size_t i = 0; i < memoryMapSize / descriptorSize; ++i) {
410		efi_memory_descriptor* entry = &memoryMap[i];
411		if ((entry->Attribute & EFI_MEMORY_RUNTIME) != 0)
412			MapRange(entry->VirtualStart, entry->PhysicalStart, entry->NumberOfPages * B_PAGE_SIZE,
413				Pte {.isRead = true, .isWrite = true, .isExec = true}.val);
414	}
415
416	// Memory regions
417	TRACE("Regions:\n");
418	void* cookie = NULL;
419	addr_t virtAdr;
420	phys_addr_t physAdr;
421	size_t size;
422	while (mmu_next_region(&cookie, &virtAdr, &physAdr, &size)) {
423		MapRange(virtAdr, physAdr, size, Pte {.isRead = true, .isWrite = true, .isExec = true}.val);
424	}
425
426	// Devices
427	TRACE("Devices:\n");
428	MapAddrRange(gKernelArgs.arch_args.clint, Pte {.isRead = true, .isWrite = true}.val);
429	MapAddrRange(gKernelArgs.arch_args.htif, Pte {.isRead = true, .isWrite = true}.val);
430	MapAddrRange(gKernelArgs.arch_args.plic, Pte {.isRead = true, .isWrite = true}.val);
431
432	if (strcmp(gKernelArgs.arch_args.uart.kind, "") != 0) {
433		MapRange(gKernelArgs.arch_args.uart.regs.start,
434			gKernelArgs.arch_args.uart.regs.start,
435			gKernelArgs.arch_args.uart.regs.size,
436			Pte {.isRead = true, .isWrite = true}.val);
437		MapAddrRange(gKernelArgs.arch_args.uart.regs,
438			Pte {.isRead = true, .isWrite = true}.val);
439	}
440
441	sort_address_ranges(gKernelArgs.virtual_allocated_range,
442		gKernelArgs.num_virtual_allocated_ranges);
443
444	#ifdef TRACE_MEMORY_MAP
445	DumpPageTable(GetSatp());
446	#endif
447
448	return GetSatp();
449}
450