1/*
2 * Copyright 2021-2022 Haiku, Inc. All rights reserved.
3 * Released under the terms of the MIT License.
4 */
5
6
7#include <algorithm>
8
9#include <kernel.h>
10#include <arch_kernel.h>
11#include <arch/cpu.h>
12#include <arch/x86/descriptors.h>
13#include <boot/platform.h>
14#include <boot/stage2.h>
15#include <efi/types.h>
16#include <efi/boot-services.h>
17
18#include "efi_platform.h"
19#include "generic_mmu.h"
20#include "mmu.h"
21
22
23//#define TRACE_MMU
24#ifdef TRACE_MMU
25#	define TRACE(x...) dprintf(x)
26#else
27#	define TRACE(x...) ;
28#endif
29
30
31//#define TRACE_MEMORY_MAP
32//#define TRACE_PAGE_DIRECTORY
33
34// Ignore memory below 1M and above 64GB (maximum amount of physical memory on x86 with PAE)
35#define PHYSICAL_MEMORY_LOW		0x00100000
36#define PHYSICAL_MEMORY_HIGH	0x1000000000ull
37
38#define VADDR_TO_PDENT(va)		(((va) / B_PAGE_SIZE) / 1024)
39#define VADDR_TO_PTENT(va)		(((va) / B_PAGE_SIZE) % 1024)
40#define X86_PDE_ADDRESS_MASK	0xfffff000
41#define X86_PTE_ADDRESS_MASK	0xfffff000
42
43#define ALIGN_PAGEDIR			B_PAGE_SIZE
44
45
46struct gdt_idt_descr {
47	uint16_t	limit;
48	uint32_t	base;
49} _PACKED;
50
51
52static const uint32_t kDefaultPageTableFlags = 0x07;      // present, user, R/W
53
54
55static uint32_t *sPageDirectory = NULL;
56
57
58#ifdef TRACE_PAGE_DIRECTORY
59static void
60dump_page_dir(void)
61{
62	dprintf("=== Page Directory ===\n");
63	for (uint32_t i = 0; i < 1024; i++) {
64		uint32_t directoryEntry = sPageDirectory[i];
65		if (directoryEntry != 0) {
66			dprintf("virt 0x%08x --> page table 0x%08x type 0x%08x\n",
67				i << 22, directoryEntry & X86_PDE_ADDRESS_MASK,
68				directoryEntry & (~X86_PDE_ADDRESS_MASK));
69			uint32_t *pageTable = (uint32_t *)(directoryEntry & X86_PDE_ADDRESS_MASK);
70			for (uint32_t j = 0; j < 1024; j++) {
71				uint32_t tableEntry = pageTable[j];
72				if (tableEntry != 0) {
73					dprintf("virt 0x%08x     --> page 0x%08x type+flags 0x%08x\n",
74						(i << 22) | (j << 12),
75						tableEntry & X86_PTE_ADDRESS_MASK,
76						tableEntry & (~X86_PTE_ADDRESS_MASK));
77				}
78			}
79		}
80	}
81}
82#endif
83
84
85static uint32_t *
86get_next_page_table(void)
87{
88	uint32_t *pageTable = (uint32_t *)mmu_allocate_page();
89	memset(pageTable, 0, B_PAGE_SIZE);
90	return pageTable;
91}
92
93
94void
95arch_mmu_init_gdt(gdt_idt_descr &bootGDTDescriptor)
96{
97	segment_descriptor *bootGDT = NULL;
98
99	if (platform_allocate_region((void **)&bootGDT,
100			BOOT_GDT_SEGMENT_COUNT * sizeof(segment_descriptor), 0, false) != B_OK) {
101		panic("Failed to allocate GDT.\n");
102	}
103
104	STATIC_ASSERT(BOOT_GDT_SEGMENT_COUNT > KERNEL_CODE_SEGMENT
105		&& BOOT_GDT_SEGMENT_COUNT > KERNEL_DATA_SEGMENT
106		&& BOOT_GDT_SEGMENT_COUNT > USER_CODE_SEGMENT
107		&& BOOT_GDT_SEGMENT_COUNT > USER_DATA_SEGMENT);
108
109	// set up a new gdt
110
111	// put standard segment descriptors in GDT
112	clear_segment_descriptor(&bootGDT[0]);
113
114	// seg 0x08 - kernel 4GB code
115	set_segment_descriptor(&bootGDT[KERNEL_CODE_SEGMENT], 0, 0xffffffff,
116		DT_CODE_READABLE, DPL_KERNEL);
117
118	// seg 0x10 - kernel 4GB data
119	set_segment_descriptor(&bootGDT[KERNEL_DATA_SEGMENT], 0, 0xffffffff,
120		DT_DATA_WRITEABLE, DPL_KERNEL);
121
122	// seg 0x1b - ring 3 user 4GB code
123	set_segment_descriptor(&bootGDT[USER_CODE_SEGMENT], 0, 0xffffffff,
124		DT_CODE_READABLE, DPL_USER);
125
126	// seg 0x23 - ring 3 user 4GB data
127	set_segment_descriptor(&bootGDT[USER_DATA_SEGMENT], 0, 0xffffffff,
128		DT_DATA_WRITEABLE, DPL_USER);
129
130	addr_t virtualGDT;
131	platform_bootloader_address_to_kernel_address(bootGDT, &virtualGDT);
132
133	bootGDTDescriptor.limit = BOOT_GDT_SEGMENT_COUNT * sizeof(segment_descriptor);
134	bootGDTDescriptor.base = (uint32_t)virtualGDT;
135
136	TRACE("gdt phys 0x%08x virt 0x%08" B_PRIxADDR " desc 0x%08x\n",
137		(uint32_t)bootGDT, virtualGDT,
138		(uint32_t)&gBootGDTDescriptor);
139	TRACE("gdt limit=%d base=0x%08x\n",
140		bootGDTDescriptor.limit, bootGDTDescriptor.base);
141}
142
143
144static void
145map_page(addr_t virtAddr, phys_addr_t physAddr, uint32_t flags)
146{
147	physAddr &= ~(B_PAGE_SIZE - 1);
148
149	uint32_t *pageTable = NULL;
150	uint32_t pageDirectoryIndex = VADDR_TO_PDENT(virtAddr);
151	uint32_t pageDirectoryEntry = sPageDirectory[pageDirectoryIndex];
152
153	if (pageDirectoryEntry == 0) {
154		//TRACE("get next page table for address 0x%08" B_PRIxADDR "\n",
155		//	virtAddr);
156		pageTable = get_next_page_table();
157		sPageDirectory[pageDirectoryIndex] = (uint32_t)pageTable | kDefaultPageTableFlags;
158	} else {
159		pageTable = (uint32_t *)(pageDirectoryEntry & X86_PDE_ADDRESS_MASK);
160	}
161
162	uint32_t pageTableIndex = VADDR_TO_PTENT(virtAddr);
163	pageTable[pageTableIndex] = physAddr | flags;
164}
165
166
167static void
168map_range(addr_t virtAddr, phys_addr_t physAddr, size_t size, uint32_t flags)
169{
170	//TRACE("map 0x%08" B_PRIxADDR " --> 0x%08" B_PRIxPHYSADDR
171	//	", len=0x%08" B_PRIxSIZE ", flags=0x%08" PRIx32 "\n",
172	//	virtAddr, physAddr, size, flags);
173
174	for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
175		map_page(virtAddr + offset, physAddr + offset, flags);
176	}
177
178	if (virtAddr >= KERNEL_LOAD_BASE)
179		ASSERT_ALWAYS(insert_virtual_allocated_range(virtAddr, size) >= B_OK);
180}
181
182
183void
184arch_mmu_post_efi_setup(size_t memoryMapSize,
185	efi_memory_descriptor *memoryMap, size_t descriptorSize,
186	uint32_t descriptorVersion)
187{
188	build_physical_allocated_list(memoryMapSize, memoryMap,
189		descriptorSize, descriptorVersion);
190
191	// Switch EFI to virtual mode, using the kernel pmap.
192	kRuntimeServices->SetVirtualAddressMap(memoryMapSize, descriptorSize,
193		descriptorVersion, memoryMap);
194
195#ifdef TRACE_MEMORY_MAP
196	dprintf("phys memory ranges:\n");
197	for (uint32_t i = 0; i < gKernelArgs.num_physical_memory_ranges; i++) {
198		uint64 start = gKernelArgs.physical_memory_range[i].start;
199		uint64 size = gKernelArgs.physical_memory_range[i].size;
200		dprintf("    0x%08" B_PRIx64 "-0x%08" B_PRIx64 ", length 0x%08" B_PRIx64 "\n",
201			start, start + size, size);
202	}
203
204	dprintf("allocated phys memory ranges:\n");
205	for (uint32_t i = 0; i < gKernelArgs.num_physical_allocated_ranges; i++) {
206		uint64 start = gKernelArgs.physical_allocated_range[i].start;
207		uint64 size = gKernelArgs.physical_allocated_range[i].size;
208		dprintf("    0x%08" B_PRIx64 "-0x%08" B_PRIx64 ", length 0x%08" B_PRIx64 "\n",
209			start, start + size, size);
210	}
211
212	dprintf("allocated virt memory ranges:\n");
213	for (uint32_t i = 0; i < gKernelArgs.num_virtual_allocated_ranges; i++) {
214		uint64 start = gKernelArgs.virtual_allocated_range[i].start;
215		uint64 size = gKernelArgs.virtual_allocated_range[i].size;
216		dprintf("    0x%08" B_PRIx64 "-0x%08" B_PRIx64 ", length 0x%08" B_PRIx64 "\n",
217			start, start + size, size);
218	}
219#endif
220}
221
222
223static void
224arch_mmu_allocate_page_directory(void)
225{
226	if (platform_allocate_region((void **)&sPageDirectory,
227			B_PAGE_SIZE + ALIGN_PAGEDIR, 0, false) != B_OK)
228		panic("Failed to allocate page directory.");
229	sPageDirectory = (uint32_t *)ROUNDUP((uint32_t)sPageDirectory, ALIGN_PAGEDIR);
230	memset(sPageDirectory, 0, B_PAGE_SIZE);
231
232	TRACE("sPageDirectory  = 0x%08x\n", (uint32_t)sPageDirectory);
233}
234
235
236uint32_t
237arch_mmu_generate_post_efi_page_tables(size_t memoryMapSize,
238	efi_memory_descriptor *memoryMap, size_t descriptorSize,
239	uint32_t descriptorVersion)
240{
241	build_physical_memory_list(memoryMapSize, memoryMap,
242		descriptorSize, descriptorVersion,
243		PHYSICAL_MEMORY_LOW, PHYSICAL_MEMORY_HIGH);
244
245	//TODO: find out how to map EFI runtime services
246	//they are not mapped for now because the kernel doesn't use them anyway
247#if 0
248	addr_t memoryMapAddr = (addr_t)memoryMap;
249	for (size_t i = 0; i < memoryMapSize / descriptorSize; ++i) {
250		efi_memory_descriptor* entry =
251			(efi_memory_descriptor *)(memoryMapAddr + i * descriptorSize);
252		if ((entry->Attribute & EFI_MEMORY_RUNTIME) != 0)
253			map_range(entry->VirtualStart, entry->PhysicalStart,
254				entry->NumberOfPages * B_PAGE_SIZE,
255				kDefaultPageFlags);
256	}
257#endif
258
259	void* cookie = NULL;
260	addr_t vaddr;
261	phys_addr_t paddr;
262	size_t size;
263	while (mmu_next_region(&cookie, &vaddr, &paddr, &size)) {
264		map_range(vaddr, paddr, size,
265			kDefaultPageFlags);
266	}
267
268	// identity mapping for first 1MB
269	map_range((addr_t)0, (phys_addr_t)0, 1024*1024, kDefaultPageFlags);
270
271	sort_address_ranges(gKernelArgs.virtual_allocated_range,
272		gKernelArgs.num_virtual_allocated_ranges);
273
274	// Map the page directory into kernel space at 0xffc00000-0xffffffff
275	// this enables a mmu trick where the 4 MB region that this pgdir entry
276	// represents now maps the 4MB of potential pagetables that the pgdir
277	// points to. Thrown away later in VM bringup, but useful for now.
278	sPageDirectory[1023] = (uint32_t)sPageDirectory | kDefaultPageFlags;
279
280	addr_t virtPageDirectory;
281	platform_bootloader_address_to_kernel_address((void*)sPageDirectory, &virtPageDirectory);
282
283	gKernelArgs.arch_args.phys_pgdir = (uint32_t)sPageDirectory;
284	gKernelArgs.arch_args.vir_pgdir = (uint32_t)virtPageDirectory;
285	gKernelArgs.arch_args.page_hole = 0xffc00000;
286	gKernelArgs.arch_args.virtual_end
287		= gKernelArgs.virtual_allocated_range[gKernelArgs.num_virtual_allocated_ranges-1].start
288		+ gKernelArgs.virtual_allocated_range[gKernelArgs.num_virtual_allocated_ranges-1].size;
289
290	TRACE("gKernelArgs.arch_args.phys_pgdir  = 0x%08" B_PRIx32 "\n",
291		gKernelArgs.arch_args.phys_pgdir);
292	TRACE("gKernelArgs.arch_args.vir_pgdir   = 0x%08" B_PRIx64 "\n",
293		gKernelArgs.arch_args.vir_pgdir);
294	TRACE("gKernelArgs.arch_args.page_hole   = 0x%08" B_PRIx64 "\n",
295		gKernelArgs.arch_args.page_hole);
296	TRACE("gKernelArgs.arch_args.virtual_end = 0x%08" B_PRIx64 "\n",
297		gKernelArgs.arch_args.virtual_end);
298
299#ifdef TRACE_PAGE_DIRECTORY
300	dump_page_dir();
301#endif
302
303	return (uint32_t)sPageDirectory;
304}
305
306
307void
308arch_mmu_init(void)
309{
310	arch_mmu_allocate_page_directory();
311}
312