1/*
2 * Copyright 2012, Alex Smith, alex@alex-smith.me.uk.
3 * Distributed under the terms of the MIT License.
4 */
5
6
7#include "long.h"
8
9#include <algorithm>
10
11#include <KernelExport.h>
12
13// Include the x86_64 version of descriptors.h
14#define __x86_64__
15#include <arch/x86/descriptors.h>
16#undef __x86_64__
17
18#include <arch_system_info.h>
19#include <boot/platform.h>
20#include <boot/heap.h>
21#include <boot/stage2.h>
22#include <boot/stdio.h>
23#include <kernel.h>
24#include <safemode.h>
25
26#include "debug.h"
27#include "mmu.h"
28#include "smp.h"
29
30
31static const uint64 kTableMappingFlags = 0x7;
32static const uint64 kLargePageMappingFlags = 0x183;
33static const uint64 kPageMappingFlags = 0x103;
34	// Global, R/W, Present
35
36extern "C" void long_enter_kernel(int currentCPU, uint64 stackTop);
37
38extern uint64 gLongGDT;
39extern uint32 gLongPhysicalPMLTop;
40extern bool gLongLA57;
41extern uint64 gLongKernelEntry;
42
43
44/*! Convert a 32-bit address to a 64-bit address. */
45static inline uint64
46fix_address(uint64 address)
47{
48	if (address >= KERNEL_LOAD_BASE)
49		return address + KERNEL_FIXUP_FOR_LONG_MODE;
50	else
51		return address;
52}
53
54
55template<typename Type>
56inline void
57fix_address(FixedWidthPointer<Type>& p)
58{
59	if (p != NULL)
60		p.SetTo(fix_address(p.Get()));
61}
62
63
64static void
65long_gdt_init()
66{
67	STATIC_ASSERT(BOOT_GDT_SEGMENT_COUNT > KERNEL_CODE_SEGMENT
68		&& BOOT_GDT_SEGMENT_COUNT > KERNEL_DATA_SEGMENT
69		&& BOOT_GDT_SEGMENT_COUNT > USER_CODE_SEGMENT
70		&& BOOT_GDT_SEGMENT_COUNT > USER_DATA_SEGMENT);
71
72	clear_segment_descriptor(&gBootGDT[0]);
73
74	// Set up code/data segments (TSS segments set up later in the kernel).
75	set_segment_descriptor(&gBootGDT[KERNEL_CODE_SEGMENT], DT_CODE_EXECUTE_ONLY,
76		DPL_KERNEL);
77	set_segment_descriptor(&gBootGDT[KERNEL_DATA_SEGMENT], DT_DATA_WRITEABLE,
78		DPL_KERNEL);
79	set_segment_descriptor(&gBootGDT[USER_CODE_SEGMENT], DT_CODE_EXECUTE_ONLY,
80		DPL_USER);
81	set_segment_descriptor(&gBootGDT[USER_DATA_SEGMENT], DT_DATA_WRITEABLE,
82		DPL_USER);
83
84	// Used by long_enter_kernel().
85	gLongGDT = fix_address((addr_t)gBootGDT);
86	dprintf("GDT at 0x%llx\n", gLongGDT);
87}
88
89
90static void
91long_mmu_init()
92{
93	uint64* pmlTop;
94	// Allocate the top level PMLTop.
95	pmlTop = (uint64*)mmu_allocate_page(&gKernelArgs.arch_args.phys_pgdir);
96	memset(pmlTop, 0, B_PAGE_SIZE);
97	gKernelArgs.arch_args.vir_pgdir = fix_address((uint64)(addr_t)pmlTop);
98
99	// Store the virtual memory usage information.
100	gKernelArgs.virtual_allocated_range[0].start = KERNEL_LOAD_BASE_64_BIT;
101	gKernelArgs.virtual_allocated_range[0].size = mmu_get_virtual_usage();
102	gKernelArgs.num_virtual_allocated_ranges = 1;
103	gKernelArgs.arch_args.virtual_end = ROUNDUP(KERNEL_LOAD_BASE_64_BIT
104		+ gKernelArgs.virtual_allocated_range[0].size, 0x200000);
105
106	// Find the highest physical memory address. We map all physical memory
107	// into the kernel address space, so we want to make sure we map everything
108	// we have available.
109	uint64 maxAddress = 0;
110	for (uint32 i = 0; i < gKernelArgs.num_physical_memory_ranges; i++) {
111		maxAddress = std::max(maxAddress,
112			gKernelArgs.physical_memory_range[i].start
113				+ gKernelArgs.physical_memory_range[i].size);
114	}
115
116	// Want to map at least 4GB, there may be stuff other than usable RAM that
117	// could be in the first 4GB of physical address space.
118	maxAddress = std::max(maxAddress, (uint64)0x100000000ll);
119	maxAddress = ROUNDUP(maxAddress, 0x40000000);
120
121	// Currently only use 1 PDPT (512GB). This will need to change if someone
122	// wants to use Haiku on a box with more than 512GB of RAM but that's
123	// probably not going to happen any time soon.
124	if (maxAddress / 0x40000000 > 512)
125		panic("Can't currently support more than 512GB of RAM!");
126
127	uint64* pml4 = pmlTop;
128	addr_t physicalAddress;
129	cpuid_info info;
130	if (get_current_cpuid(&info, 7, 0) == B_OK
131		&& (info.regs.ecx & IA32_FEATURE_LA57) != 0) {
132
133		if (get_safemode_boolean(B_SAFEMODE_256_TB_MEMORY_LIMIT, false)) {
134			// LA57 has been disabled!
135			dprintf("la57 disabled per safemode setting\n");
136		} else {
137			dprintf("la57 enabled\n");
138			gLongLA57 = true;
139			pml4 = (uint64*)mmu_allocate_page(&physicalAddress);
140			memset(pml4, 0, B_PAGE_SIZE);
141			pmlTop[511] = physicalAddress | kTableMappingFlags;
142			pmlTop[0] = physicalAddress | kTableMappingFlags;
143		}
144	}
145
146	uint64* pdpt;
147	uint64* pageDir;
148	uint64* pageTable;
149
150	// Create page tables for the physical map area. Also map this PDPT
151	// temporarily at the bottom of the address space so that we are identity
152	// mapped.
153
154	pdpt = (uint64*)mmu_allocate_page(&physicalAddress);
155	memset(pdpt, 0, B_PAGE_SIZE);
156	pml4[510] = physicalAddress | kTableMappingFlags;
157	pml4[0] = physicalAddress | kTableMappingFlags;
158
159	for (uint64 i = 0; i < maxAddress; i += 0x40000000) {
160		pageDir = (uint64*)mmu_allocate_page(&physicalAddress);
161		memset(pageDir, 0, B_PAGE_SIZE);
162		pdpt[i / 0x40000000] = physicalAddress | kTableMappingFlags;
163
164		for (uint64 j = 0; j < 0x40000000; j += 0x200000) {
165			pageDir[j / 0x200000] = (i + j) | kLargePageMappingFlags;
166		}
167
168		mmu_free(pageDir, B_PAGE_SIZE);
169	}
170
171	mmu_free(pdpt, B_PAGE_SIZE);
172
173	// Allocate tables for the kernel mappings.
174	pdpt = (uint64*)mmu_allocate_page(&physicalAddress);
175	memset(pdpt, 0, B_PAGE_SIZE);
176	pml4[511] = physicalAddress | kTableMappingFlags;
177
178	pageDir = (uint64*)mmu_allocate_page(&physicalAddress);
179	memset(pageDir, 0, B_PAGE_SIZE);
180	pdpt[510] = physicalAddress | kTableMappingFlags;
181
182	// We can now allocate page tables and duplicate the mappings across from
183	// the 32-bit address space to them.
184	pageTable = NULL;
185	for (uint32 i = 0; i < gKernelArgs.virtual_allocated_range[0].size
186			/ B_PAGE_SIZE; i++) {
187		if ((i % 512) == 0) {
188			if (pageTable)
189				mmu_free(pageTable, B_PAGE_SIZE);
190
191			pageTable = (uint64*)mmu_allocate_page(&physicalAddress);
192			memset(pageTable, 0, B_PAGE_SIZE);
193			pageDir[i / 512] = physicalAddress | kTableMappingFlags;
194		}
195
196		// Get the physical address to map.
197		if (!mmu_get_virtual_mapping(KERNEL_LOAD_BASE + (i * B_PAGE_SIZE),
198				&physicalAddress))
199			continue;
200
201		pageTable[i % 512] = physicalAddress | kPageMappingFlags;
202	}
203
204	if (pageTable)
205		mmu_free(pageTable, B_PAGE_SIZE);
206	mmu_free(pageDir, B_PAGE_SIZE);
207	mmu_free(pdpt, B_PAGE_SIZE);
208	if (pml4 != pmlTop)
209		mmu_free(pml4, B_PAGE_SIZE);
210
211	// Sort the address ranges.
212	sort_address_ranges(gKernelArgs.physical_memory_range,
213		gKernelArgs.num_physical_memory_ranges);
214	sort_address_ranges(gKernelArgs.physical_allocated_range,
215		gKernelArgs.num_physical_allocated_ranges);
216	sort_address_ranges(gKernelArgs.virtual_allocated_range,
217		gKernelArgs.num_virtual_allocated_ranges);
218
219	dprintf("phys memory ranges:\n");
220	for (uint32 i = 0; i < gKernelArgs.num_physical_memory_ranges; i++) {
221		dprintf("    base %#018" B_PRIx64 ", length %#018" B_PRIx64 "\n",
222			gKernelArgs.physical_memory_range[i].start,
223			gKernelArgs.physical_memory_range[i].size);
224	}
225
226	dprintf("allocated phys memory ranges:\n");
227	for (uint32 i = 0; i < gKernelArgs.num_physical_allocated_ranges; i++) {
228		dprintf("    base %#018" B_PRIx64 ", length %#018" B_PRIx64 "\n",
229			gKernelArgs.physical_allocated_range[i].start,
230			gKernelArgs.physical_allocated_range[i].size);
231	}
232
233	dprintf("allocated virt memory ranges:\n");
234	for (uint32 i = 0; i < gKernelArgs.num_virtual_allocated_ranges; i++) {
235		dprintf("    base %#018" B_PRIx64 ", length %#018" B_PRIx64 "\n",
236			gKernelArgs.virtual_allocated_range[i].start,
237			gKernelArgs.virtual_allocated_range[i].size);
238	}
239
240	gLongPhysicalPMLTop = gKernelArgs.arch_args.phys_pgdir;
241}
242
243
244static void
245convert_preloaded_image(preloaded_elf64_image* image)
246{
247	fix_address(image->next);
248	fix_address(image->name);
249	fix_address(image->debug_string_table);
250	fix_address(image->syms);
251	fix_address(image->rel);
252	fix_address(image->rela);
253	fix_address(image->pltrel);
254	fix_address(image->debug_symbols);
255}
256
257
258/*!	Convert all addresses in kernel_args to 64-bit addresses. */
259static void
260convert_kernel_args()
261{
262	fix_address(gKernelArgs.boot_volume);
263	fix_address(gKernelArgs.vesa_modes);
264	fix_address(gKernelArgs.edid_info);
265	fix_address(gKernelArgs.debug_output);
266	fix_address(gKernelArgs.previous_debug_output);
267	fix_address(gKernelArgs.boot_splash);
268	fix_address(gKernelArgs.ucode_data);
269	fix_address(gKernelArgs.arch_args.apic);
270	fix_address(gKernelArgs.arch_args.hpet);
271
272	convert_preloaded_image(static_cast<preloaded_elf64_image*>(
273		gKernelArgs.kernel_image.Pointer()));
274	fix_address(gKernelArgs.kernel_image);
275
276	// Iterate over the preloaded images. Must save the next address before
277	// converting, as the next pointer will be converted.
278	preloaded_image* image = gKernelArgs.preloaded_images;
279	fix_address(gKernelArgs.preloaded_images);
280	while (image != NULL) {
281		preloaded_image* next = image->next;
282		convert_preloaded_image(static_cast<preloaded_elf64_image*>(image));
283		image = next;
284	}
285
286	// Set correct kernel args range addresses.
287	dprintf("kernel args ranges:\n");
288	for (uint32 i = 0; i < gKernelArgs.num_kernel_args_ranges; i++) {
289		gKernelArgs.kernel_args_range[i].start = fix_address(
290			gKernelArgs.kernel_args_range[i].start);
291		dprintf("    base %#018" B_PRIx64 ", length %#018" B_PRIx64 "\n",
292			gKernelArgs.kernel_args_range[i].start,
293			gKernelArgs.kernel_args_range[i].size);
294	}
295
296	// Fix driver settings files.
297	driver_settings_file* file = gKernelArgs.driver_settings;
298	fix_address(gKernelArgs.driver_settings);
299	while (file != NULL) {
300		driver_settings_file* next = file->next;
301		fix_address(file->next);
302		fix_address(file->buffer);
303		file = next;
304	}
305}
306
307
308static void
309enable_sse()
310{
311	x86_write_cr4(x86_read_cr4() | CR4_OS_FXSR | CR4_OS_XMM_EXCEPTION);
312	x86_write_cr0(x86_read_cr0() & ~(CR0_FPU_EMULATION | CR0_MONITOR_FPU));
313}
314
315
316static void
317long_smp_start_kernel(void)
318{
319	uint32 cpu = smp_get_current_cpu();
320
321	// Important.  Make sure supervisor threads can fault on read only pages...
322	asm("movl %%eax, %%cr0" : : "a" ((1 << 31) | (1 << 16) | (1 << 5) | 1));
323	asm("cld");
324	asm("fninit");
325	enable_sse();
326
327	// Fix our kernel stack address.
328	gKernelArgs.cpu_kstack[cpu].start
329		= fix_address(gKernelArgs.cpu_kstack[cpu].start);
330
331	long_enter_kernel(cpu, gKernelArgs.cpu_kstack[cpu].start
332		+ gKernelArgs.cpu_kstack[cpu].size);
333
334	panic("Shouldn't get here");
335}
336
337
338void
339long_start_kernel()
340{
341	// Check whether long mode is supported.
342	cpuid_info info;
343	get_current_cpuid(&info, 0x80000001, 0);
344	if ((info.regs.edx & (1 << 29)) == 0)
345		panic("64-bit kernel requires a 64-bit CPU");
346
347	enable_sse();
348
349	preloaded_elf64_image *image = static_cast<preloaded_elf64_image *>(
350		gKernelArgs.kernel_image.Pointer());
351
352	smp_init_other_cpus();
353
354	long_gdt_init();
355	debug_cleanup();
356	long_mmu_init();
357	convert_kernel_args();
358
359	// Save the kernel entry point address.
360	gLongKernelEntry = image->elf_header.e_entry;
361	dprintf("kernel entry at %#llx\n", gLongKernelEntry);
362
363	// Fix our kernel stack address.
364	gKernelArgs.cpu_kstack[0].start
365		= fix_address(gKernelArgs.cpu_kstack[0].start);
366
367	// We're about to enter the kernel -- disable console output.
368	stdout = NULL;
369
370	smp_boot_other_cpus(long_smp_start_kernel);
371
372	// Enter the kernel!
373	long_enter_kernel(0, gKernelArgs.cpu_kstack[0].start
374		+ gKernelArgs.cpu_kstack[0].size);
375
376	panic("Shouldn't get here");
377}
378