1/*
2 * Copyright 2014-2016 Haiku, Inc. All rights reserved.
3 * Copyright 2013-2014, Fredrik Holmqvist, fredrik.holmqvist@gmail.com.
4 * Copyright 2014, Henry Harrington, henry.harrington@gmail.com.
5 * All rights reserved.
6 * Distributed under the terms of the MIT License.
7 */
8
9
10#include <algorithm>
11
12#include <kernel.h>
13#include <arch_kernel.h>
14#include <boot/platform.h>
15#include <boot/stage2.h>
16#include <arch/x86/descriptors.h>
17
18#include <efi/types.h>
19#include <efi/boot-services.h>
20
21#include "mmu.h"
22#include "efi_platform.h"
23
24
25//#define TRACE_MMU
26#ifdef TRACE_MMU
27#	define TRACE(x...) dprintf(x)
28#else
29#	define TRACE(x...) ;
30#endif
31
32
33//#define TRACE_MEMORY_MAP
34
35
36extern uint64 gLongGDT;
37extern uint64 gLongGDTR;
38segment_descriptor gBootGDT[BOOT_GDT_SEGMENT_COUNT];
39
40
41static void
42long_gdt_init()
43{
44	STATIC_ASSERT(BOOT_GDT_SEGMENT_COUNT > KERNEL_CODE_SEGMENT
45		&& BOOT_GDT_SEGMENT_COUNT > KERNEL_DATA_SEGMENT
46		&& BOOT_GDT_SEGMENT_COUNT > USER_CODE_SEGMENT
47		&& BOOT_GDT_SEGMENT_COUNT > USER_DATA_SEGMENT);
48
49	clear_segment_descriptor(&gBootGDT[0]);
50
51	// Set up code/data segments (TSS segments set up later in the kernel).
52	set_segment_descriptor(&gBootGDT[KERNEL_CODE_SEGMENT], DT_CODE_EXECUTE_ONLY,
53		DPL_KERNEL);
54	set_segment_descriptor(&gBootGDT[KERNEL_DATA_SEGMENT], DT_DATA_WRITEABLE,
55		DPL_KERNEL);
56	set_segment_descriptor(&gBootGDT[USER_CODE_SEGMENT], DT_CODE_EXECUTE_ONLY,
57		DPL_USER);
58	set_segment_descriptor(&gBootGDT[USER_DATA_SEGMENT], DT_DATA_WRITEABLE,
59		DPL_USER);
60
61	// Used by long_enter_kernel().
62	gLongGDT = (addr_t)gBootGDT + 0xFFFFFF0000000000;
63	dprintf("GDT at 0x%lx\n", gLongGDT);
64}
65
66
67// Called after EFI boot services exit.
68// Currently assumes that the memory map is sane... Sorted and no overlapping
69// regions.
70void
71arch_mmu_post_efi_setup(size_t memory_map_size,
72	efi_memory_descriptor *memory_map, size_t descriptor_size,
73	uint32_t descriptor_version)
74{
75	// Add physical memory to the kernel args and update virtual addresses for
76	// EFI regions.
77	addr_t addr = (addr_t)memory_map;
78	gKernelArgs.num_physical_memory_ranges = 0;
79
80	// First scan: Add all usable ranges
81	for (size_t i = 0; i < memory_map_size / descriptor_size; ++i) {
82		efi_memory_descriptor *entry
83			= (efi_memory_descriptor *)(addr + i * descriptor_size);
84		switch (entry->Type) {
85		case EfiLoaderCode:
86		case EfiLoaderData:
87		case EfiBootServicesCode:
88		case EfiBootServicesData:
89		case EfiConventionalMemory: {
90			// Usable memory.
91			// Ignore memory below 1MB and above 512GB.
92			uint64_t base = entry->PhysicalStart;
93			uint64_t end = entry->PhysicalStart + entry->NumberOfPages * 4096;
94			uint64_t originalSize = end - base;
95			if (base < 0x100000)
96				base = 0x100000;
97			if (end > (512ull * 1024 * 1024 * 1024))
98				end = 512ull * 1024 * 1024 * 1024;
99
100			gKernelArgs.ignored_physical_memory
101				+= originalSize - (max_c(end, base) - base);
102
103			if (base >= end)
104				break;
105			uint64_t size = end - base;
106
107			insert_physical_memory_range(base, size);
108			// LoaderData memory is bootloader allocated memory, possibly
109			// containing the kernel or loaded drivers.
110			if (entry->Type == EfiLoaderData)
111				insert_physical_allocated_range(base, size);
112			break;
113		}
114		case EfiACPIReclaimMemory:
115			// ACPI reclaim -- physical memory we could actually use later
116			break;
117		case EfiRuntimeServicesCode:
118		case EfiRuntimeServicesData:
119			entry->VirtualStart = entry->PhysicalStart;
120			break;
121		}
122	}
123
124	uint64_t initialPhysicalMemory = total_physical_memory();
125
126	// Second scan: Remove everything reserved that may overlap
127	for (size_t i = 0; i < memory_map_size / descriptor_size; ++i) {
128		efi_memory_descriptor *entry
129			= (efi_memory_descriptor *)(addr + i * descriptor_size);
130		switch (entry->Type) {
131		case EfiLoaderCode:
132		case EfiLoaderData:
133		case EfiBootServicesCode:
134		case EfiBootServicesData:
135		case EfiConventionalMemory:
136			break;
137		default:
138			uint64_t base = entry->PhysicalStart;
139			uint64_t end = entry->PhysicalStart + entry->NumberOfPages * 4096;
140			remove_physical_memory_range(base, end - base);
141		}
142	}
143
144	gKernelArgs.ignored_physical_memory
145		+= initialPhysicalMemory - total_physical_memory();
146
147	// Sort the address ranges.
148	sort_address_ranges(gKernelArgs.physical_memory_range,
149		gKernelArgs.num_physical_memory_ranges);
150	sort_address_ranges(gKernelArgs.physical_allocated_range,
151		gKernelArgs.num_physical_allocated_ranges);
152	sort_address_ranges(gKernelArgs.virtual_allocated_range,
153		gKernelArgs.num_virtual_allocated_ranges);
154
155	// Switch EFI to virtual mode, using the kernel pmap.
156	kRuntimeServices->SetVirtualAddressMap(memory_map_size, descriptor_size,
157		descriptor_version, memory_map);
158
159#ifdef TRACE_MEMORY_MAP
160	dprintf("phys memory ranges:\n");
161	for (uint32_t i = 0; i < gKernelArgs.num_physical_memory_ranges; i++) {
162		uint64 start = gKernelArgs.physical_memory_range[i].start;
163		uint64 size = gKernelArgs.physical_memory_range[i].size;
164		dprintf("    0x%08" B_PRIx64 "-0x%08" B_PRIx64 ", length 0x%08" B_PRIx64 "\n",
165			start, start + size, size);
166	}
167
168	dprintf("allocated phys memory ranges:\n");
169	for (uint32_t i = 0; i < gKernelArgs.num_physical_allocated_ranges; i++) {
170		uint64 start = gKernelArgs.physical_allocated_range[i].start;
171		uint64 size = gKernelArgs.physical_allocated_range[i].size;
172		dprintf("    0x%08" B_PRIx64 "-0x%08" B_PRIx64 ", length 0x%08" B_PRIx64 "\n",
173			start, start + size, size);
174	}
175
176	dprintf("allocated virt memory ranges:\n");
177	for (uint32_t i = 0; i < gKernelArgs.num_virtual_allocated_ranges; i++) {
178		uint64 start = gKernelArgs.virtual_allocated_range[i].start;
179		uint64 size = gKernelArgs.virtual_allocated_range[i].size;
180		dprintf("    0x%08" B_PRIx64 "-0x%08" B_PRIx64 ", length 0x%08" B_PRIx64 "\n",
181			start, start + size, size);
182	}
183#endif
184
185	// Important.  Make sure supervisor threads can fault on read only pages...
186	asm("mov %%rax, %%cr0" : : "a" ((1 << 31) | (1 << 16) | (1 << 5) | 1));
187}
188
189
190
191uint64_t
192arch_mmu_generate_post_efi_page_tables(size_t memory_map_size,
193	efi_memory_descriptor *memory_map, size_t descriptor_size,
194	uint32_t descriptor_version)
195{
196	// Generate page tables, matching bios_ia32/long.cpp.
197	uint64_t *pml4;
198	uint64_t *pdpt;
199	uint64_t *pageDir;
200	uint64_t *pageTable;
201
202	// Allocate the top level PML4.
203	pml4 = NULL;
204	if (platform_allocate_region((void**)&pml4, B_PAGE_SIZE, 0, false) != B_OK)
205		panic("Failed to allocate PML4.");
206	gKernelArgs.arch_args.phys_pgdir = (uint32_t)(addr_t)pml4;
207	memset(pml4, 0, B_PAGE_SIZE);
208	platform_bootloader_address_to_kernel_address(pml4,
209		&gKernelArgs.arch_args.vir_pgdir);
210
211	// Store the virtual memory usage information.
212	gKernelArgs.virtual_allocated_range[0].start = KERNEL_LOAD_BASE;
213	gKernelArgs.virtual_allocated_range[0].size
214		= get_current_virtual_address() - KERNEL_LOAD_BASE;
215	gKernelArgs.num_virtual_allocated_ranges = 1;
216	gKernelArgs.arch_args.virtual_end = ROUNDUP(KERNEL_LOAD_BASE
217		+ gKernelArgs.virtual_allocated_range[0].size, 0x200000);
218
219	// Find the highest physical memory address. We map all physical memory
220	// into the kernel address space, so we want to make sure we map everything
221	// we have available.
222	uint64 maxAddress = 0;
223	for (size_t i = 0; i < memory_map_size / descriptor_size; ++i) {
224		efi_memory_descriptor *entry
225			= (efi_memory_descriptor *)((addr_t)memory_map + i * descriptor_size);
226		switch (entry->Type) {
227		case EfiLoaderCode:
228		case EfiLoaderData:
229		case EfiBootServicesCode:
230		case EfiBootServicesData:
231		case EfiConventionalMemory:
232		case EfiRuntimeServicesCode:
233		case EfiRuntimeServicesData:
234		case EfiPersistentMemory:
235		case EfiACPIReclaimMemory:
236		case EfiACPIMemoryNVS:
237			maxAddress = std::max(maxAddress,
238					      entry->PhysicalStart + entry->NumberOfPages * 4096);
239			break;
240		default:
241			break;
242		}
243	}
244
245	// Want to map at least 4GB, there may be stuff other than usable RAM that
246	// could be in the first 4GB of physical address space.
247	maxAddress = std::max(maxAddress, (uint64)0x100000000ll);
248	maxAddress = ROUNDUP(maxAddress, 0x40000000);
249
250	// Currently only use 1 PDPT (512GB). This will need to change if someone
251	// wants to use Haiku on a box with more than 512GB of RAM but that's
252	// probably not going to happen any time soon.
253	if (maxAddress / 0x40000000 > 512)
254		panic("Can't currently support more than 512GB of RAM!");
255
256	// Create page tables for the physical map area. Also map this PDPT
257	// temporarily at the bottom of the address space so that we are identity
258	// mapped.
259
260	pdpt = (uint64*)mmu_allocate_page();
261	memset(pdpt, 0, B_PAGE_SIZE);
262	pml4[510] = (addr_t)pdpt | kTableMappingFlags;
263	pml4[0] = (addr_t)pdpt | kTableMappingFlags;
264
265	for (uint64 i = 0; i < maxAddress; i += 0x40000000) {
266		pageDir = (uint64*)mmu_allocate_page();
267		memset(pageDir, 0, B_PAGE_SIZE);
268		pdpt[i / 0x40000000] = (addr_t)pageDir | kTableMappingFlags;
269
270		for (uint64 j = 0; j < 0x40000000; j += 0x200000) {
271			pageDir[j / 0x200000] = (i + j) | kLargePageMappingFlags;
272		}
273	}
274
275	// Allocate tables for the kernel mappings.
276
277	pdpt = (uint64*)mmu_allocate_page();
278	memset(pdpt, 0, B_PAGE_SIZE);
279	pml4[511] = (addr_t)pdpt | kTableMappingFlags;
280
281	pageDir = (uint64*)mmu_allocate_page();
282	memset(pageDir, 0, B_PAGE_SIZE);
283	pdpt[510] = (addr_t)pageDir | kTableMappingFlags;
284
285	// We can now allocate page tables and duplicate the mappings across from
286	// the 32-bit address space to them.
287	pageTable = NULL; // shush, compiler.
288	for (uint32 i = 0; i < gKernelArgs.virtual_allocated_range[0].size
289			/ B_PAGE_SIZE; i++) {
290		if ((i % 512) == 0) {
291			pageTable = (uint64*)mmu_allocate_page();
292			memset(pageTable, 0, B_PAGE_SIZE);
293			pageDir[i / 512] = (addr_t)pageTable | kTableMappingFlags;
294		}
295
296		// Get the physical address to map.
297		void *phys;
298		if (platform_kernel_address_to_bootloader_address(
299			KERNEL_LOAD_BASE + (i * B_PAGE_SIZE), &phys) != B_OK) {
300			continue;
301		}
302
303		pageTable[i % 512] = (addr_t)phys | kPageMappingFlags;
304	}
305
306	return (uint64)pml4;
307}
308
309
310void
311arch_mmu_init()
312{
313	long_gdt_init();
314}
315