1/*
2 * Copyright 2012, Alex Smith, alex@alex-smith.me.uk.
3 * Distributed under the terms of the MIT License.
4 */
5
6
7#include "long.h"
8
9#include <algorithm>
10
11#include <KernelExport.h>
12
13// Include the x86_64 version of descriptors.h
14#define __x86_64__
15#include <arch/x86/descriptors.h>
16#undef __x86_64__
17
18#include <arch_system_info.h>
19#include <boot/platform.h>
20#include <boot/heap.h>
21#include <boot/stage2.h>
22#include <boot/stdio.h>
23#include <kernel.h>
24
25#include "debug.h"
26#include "smp.h"
27#include "mmu.h"
28
29
30static const uint64 kTableMappingFlags = 0x7;
31static const uint64 kLargePageMappingFlags = 0x183;
32static const uint64 kPageMappingFlags = 0x103;
33	// Global, R/W, Present
34
35extern "C" void long_enter_kernel(int currentCPU, uint64 stackTop);
36
37extern uint32 gLongPhysicalGDT;
38extern uint64 gLongVirtualGDT;
39extern uint32 gLongPhysicalPML4;
40extern uint64 gLongKernelEntry;
41
42
43/*! Convert a 32-bit address to a 64-bit address. */
44static inline uint64
45fix_address(uint64 address)
46{
47	return address - KERNEL_LOAD_BASE + KERNEL_LOAD_BASE_64_BIT;
48}
49
50
51template<typename Type>
52inline void
53fix_address(FixedWidthPointer<Type>& p)
54{
55	if (p != NULL)
56		p.SetTo(fix_address(p.Get()));
57}
58
59
60static void
61long_gdt_init()
62{
63	// Allocate memory for the GDT.
64	segment_descriptor* gdt = (segment_descriptor*)
65		mmu_allocate_page(&gKernelArgs.arch_args.phys_gdt);
66	gKernelArgs.arch_args.vir_gdt = fix_address((addr_t)gdt);
67
68	dprintf("GDT at phys 0x%lx, virt 0x%llx\n", gKernelArgs.arch_args.phys_gdt,
69		gKernelArgs.arch_args.vir_gdt);
70
71	clear_segment_descriptor(&gdt[0]);
72
73	// Set up code/data segments (TSS segments set up later in the kernel).
74	set_segment_descriptor(&gdt[KERNEL_CODE_SEG / 8], DT_CODE_EXECUTE_ONLY,
75		DPL_KERNEL);
76	set_segment_descriptor(&gdt[KERNEL_DATA_SEG / 8], DT_DATA_WRITEABLE,
77		DPL_KERNEL);
78	set_segment_descriptor(&gdt[USER_CODE_SEG / 8], DT_CODE_EXECUTE_ONLY,
79		DPL_USER);
80	set_segment_descriptor(&gdt[USER_DATA_SEG / 8], DT_DATA_WRITEABLE,
81		DPL_USER);
82
83	// Used by long_enter_kernel().
84	gLongPhysicalGDT = gKernelArgs.arch_args.phys_gdt;
85	gLongVirtualGDT = gKernelArgs.arch_args.vir_gdt;
86}
87
88
89static void
90long_idt_init()
91{
92	interrupt_descriptor* idt = (interrupt_descriptor*)
93		mmu_allocate_page(&gKernelArgs.arch_args.phys_idt);
94	gKernelArgs.arch_args.vir_idt = fix_address((addr_t)idt);
95
96	dprintf("IDT at phys %#lx, virt %#llx\n", gKernelArgs.arch_args.phys_idt,
97		gKernelArgs.arch_args.vir_idt);
98
99	// The 32-bit kernel gets an IDT with the loader's exception handlers until
100	// it can set up its own. Can't do that here because they won't work after
101	// switching to long mode. Therefore, just clear the IDT and leave the
102	// kernel to set it up.
103	memset(idt, 0, B_PAGE_SIZE);
104}
105
106
107static void
108long_mmu_init()
109{
110	uint64* pml4;
111	uint64* pdpt;
112	uint64* pageDir;
113	uint64* pageTable;
114	addr_t physicalAddress;
115
116	// Allocate the top level PML4.
117	pml4 = (uint64*)mmu_allocate_page(&gKernelArgs.arch_args.phys_pgdir);
118	memset(pml4, 0, B_PAGE_SIZE);
119	gKernelArgs.arch_args.vir_pgdir = fix_address((uint64)(addr_t)pml4);
120
121	// Store the virtual memory usage information.
122	gKernelArgs.virtual_allocated_range[0].start = KERNEL_LOAD_BASE_64_BIT;
123	gKernelArgs.virtual_allocated_range[0].size = mmu_get_virtual_usage();
124	gKernelArgs.num_virtual_allocated_ranges = 1;
125	gKernelArgs.arch_args.virtual_end = ROUNDUP(KERNEL_LOAD_BASE_64_BIT
126		+ gKernelArgs.virtual_allocated_range[0].size, 0x200000);
127
128	// Find the highest physical memory address. We map all physical memory
129	// into the kernel address space, so we want to make sure we map everything
130	// we have available.
131	uint64 maxAddress = 0;
132	for (uint32 i = 0; i < gKernelArgs.num_physical_memory_ranges; i++) {
133		maxAddress = std::max(maxAddress,
134			gKernelArgs.physical_memory_range[i].start
135				+ gKernelArgs.physical_memory_range[i].size);
136	}
137
138	// Want to map at least 4GB, there may be stuff other than usable RAM that
139	// could be in the first 4GB of physical address space.
140	maxAddress = std::max(maxAddress, (uint64)0x100000000ll);
141	maxAddress = ROUNDUP(maxAddress, 0x40000000);
142
143	// Currently only use 1 PDPT (512GB). This will need to change if someone
144	// wants to use Haiku on a box with more than 512GB of RAM but that's
145	// probably not going to happen any time soon.
146	if (maxAddress / 0x40000000 > 512)
147		panic("Can't currently support more than 512GB of RAM!");
148
149	// Create page tables for the physical map area. Also map this PDPT
150	// temporarily at the bottom of the address space so that we are identity
151	// mapped.
152
153	pdpt = (uint64*)mmu_allocate_page(&physicalAddress);
154	memset(pdpt, 0, B_PAGE_SIZE);
155	pml4[510] = physicalAddress | kTableMappingFlags;
156	pml4[0] = physicalAddress | kTableMappingFlags;
157
158	for (uint64 i = 0; i < maxAddress; i += 0x40000000) {
159		pageDir = (uint64*)mmu_allocate_page(&physicalAddress);
160		memset(pageDir, 0, B_PAGE_SIZE);
161		pdpt[i / 0x40000000] = physicalAddress | kTableMappingFlags;
162
163		for (uint64 j = 0; j < 0x40000000; j += 0x200000) {
164			pageDir[j / 0x200000] = (i + j) | kLargePageMappingFlags;
165		}
166
167		mmu_free(pageDir, B_PAGE_SIZE);
168	}
169
170	mmu_free(pdpt, B_PAGE_SIZE);
171
172	// Allocate tables for the kernel mappings.
173
174	pdpt = (uint64*)mmu_allocate_page(&physicalAddress);
175	memset(pdpt, 0, B_PAGE_SIZE);
176	pml4[511] = physicalAddress | kTableMappingFlags;
177
178	pageDir = (uint64*)mmu_allocate_page(&physicalAddress);
179	memset(pageDir, 0, B_PAGE_SIZE);
180	pdpt[510] = physicalAddress | kTableMappingFlags;
181
182	// We can now allocate page tables and duplicate the mappings across from
183	// the 32-bit address space to them.
184	pageTable = NULL;
185	for (uint32 i = 0; i < gKernelArgs.virtual_allocated_range[0].size
186			/ B_PAGE_SIZE; i++) {
187		if ((i % 512) == 0) {
188			if (pageTable)
189				mmu_free(pageTable, B_PAGE_SIZE);
190
191			pageTable = (uint64*)mmu_allocate_page(&physicalAddress);
192			memset(pageTable, 0, B_PAGE_SIZE);
193			pageDir[i / 512] = physicalAddress | kTableMappingFlags;
194		}
195
196		// Get the physical address to map.
197		if (!mmu_get_virtual_mapping(KERNEL_LOAD_BASE + (i * B_PAGE_SIZE),
198				&physicalAddress))
199			continue;
200
201		pageTable[i % 512] = physicalAddress | kPageMappingFlags;
202	}
203
204	if (pageTable)
205		mmu_free(pageTable, B_PAGE_SIZE);
206	mmu_free(pageDir, B_PAGE_SIZE);
207	mmu_free(pdpt, B_PAGE_SIZE);
208
209	// Sort the address ranges.
210	sort_address_ranges(gKernelArgs.physical_memory_range,
211		gKernelArgs.num_physical_memory_ranges);
212	sort_address_ranges(gKernelArgs.physical_allocated_range,
213		gKernelArgs.num_physical_allocated_ranges);
214	sort_address_ranges(gKernelArgs.virtual_allocated_range,
215		gKernelArgs.num_virtual_allocated_ranges);
216
217	dprintf("phys memory ranges:\n");
218	for (uint32 i = 0; i < gKernelArgs.num_physical_memory_ranges; i++) {
219		dprintf("    base %#018" B_PRIx64 ", length %#018" B_PRIx64 "\n",
220			gKernelArgs.physical_memory_range[i].start,
221			gKernelArgs.physical_memory_range[i].size);
222	}
223
224	dprintf("allocated phys memory ranges:\n");
225	for (uint32 i = 0; i < gKernelArgs.num_physical_allocated_ranges; i++) {
226		dprintf("    base %#018" B_PRIx64 ", length %#018" B_PRIx64 "\n",
227			gKernelArgs.physical_allocated_range[i].start,
228			gKernelArgs.physical_allocated_range[i].size);
229	}
230
231	dprintf("allocated virt memory ranges:\n");
232	for (uint32 i = 0; i < gKernelArgs.num_virtual_allocated_ranges; i++) {
233		dprintf("    base %#018" B_PRIx64 ", length %#018" B_PRIx64 "\n",
234			gKernelArgs.virtual_allocated_range[i].start,
235			gKernelArgs.virtual_allocated_range[i].size);
236	}
237
238	gLongPhysicalPML4 = gKernelArgs.arch_args.phys_pgdir;
239}
240
241
242static void
243convert_preloaded_image(preloaded_elf64_image* image)
244{
245	fix_address(image->next);
246	fix_address(image->name);
247	fix_address(image->debug_string_table);
248	fix_address(image->syms);
249	fix_address(image->rel);
250	fix_address(image->rela);
251	fix_address(image->pltrel);
252	fix_address(image->debug_symbols);
253}
254
255
256/*!	Convert all addresses in kernel_args to 64-bit addresses. */
257static void
258convert_kernel_args()
259{
260	fix_address(gKernelArgs.boot_volume);
261	fix_address(gKernelArgs.vesa_modes);
262	fix_address(gKernelArgs.edid_info);
263	fix_address(gKernelArgs.debug_output);
264	fix_address(gKernelArgs.boot_splash);
265	fix_address(gKernelArgs.arch_args.apic);
266	fix_address(gKernelArgs.arch_args.hpet);
267
268	convert_preloaded_image(static_cast<preloaded_elf64_image*>(
269		gKernelArgs.kernel_image.Pointer()));
270	fix_address(gKernelArgs.kernel_image);
271
272	// Iterate over the preloaded images. Must save the next address before
273	// converting, as the next pointer will be converted.
274	preloaded_image* image = gKernelArgs.preloaded_images;
275	fix_address(gKernelArgs.preloaded_images);
276	while (image != NULL) {
277		preloaded_image* next = image->next;
278		convert_preloaded_image(static_cast<preloaded_elf64_image*>(image));
279		image = next;
280	}
281
282	// Set correct kernel args range addresses.
283	dprintf("kernel args ranges:\n");
284	for (uint32 i = 0; i < gKernelArgs.num_kernel_args_ranges; i++) {
285		gKernelArgs.kernel_args_range[i].start = fix_address(
286			gKernelArgs.kernel_args_range[i].start);
287		dprintf("    base %#018" B_PRIx64 ", length %#018" B_PRIx64 "\n",
288			gKernelArgs.kernel_args_range[i].start,
289			gKernelArgs.kernel_args_range[i].size);
290	}
291
292	// Fix driver settings files.
293	driver_settings_file* file = gKernelArgs.driver_settings;
294	fix_address(gKernelArgs.driver_settings);
295	while (file != NULL) {
296		driver_settings_file* next = file->next;
297		fix_address(file->next);
298		fix_address(file->buffer);
299		file = next;
300	}
301}
302
303
304static void
305long_smp_start_kernel(void)
306{
307	uint32 cpu = smp_get_current_cpu();
308
309	// Important.  Make sure supervisor threads can fault on read only pages...
310	asm("movl %%eax, %%cr0" : : "a" ((1 << 31) | (1 << 16) | (1 << 5) | 1));
311	asm("cld");
312	asm("fninit");
313
314	// Fix our kernel stack address.
315	gKernelArgs.cpu_kstack[cpu].start
316		= fix_address(gKernelArgs.cpu_kstack[cpu].start);
317
318	long_enter_kernel(cpu, gKernelArgs.cpu_kstack[cpu].start
319		+ gKernelArgs.cpu_kstack[cpu].size);
320
321	panic("Shouldn't get here");
322}
323
324
325void
326long_start_kernel()
327{
328	// Check whether long mode is supported.
329	cpuid_info info;
330	get_current_cpuid(&info, 0x80000001);
331	if ((info.regs.edx & (1 << 29)) == 0)
332		panic("64-bit kernel requires a 64-bit CPU");
333
334	preloaded_elf64_image *image = static_cast<preloaded_elf64_image *>(
335		gKernelArgs.kernel_image.Pointer());
336
337	smp_init_other_cpus();
338
339	long_gdt_init();
340	long_idt_init();
341	long_mmu_init();
342	convert_kernel_args();
343
344	debug_cleanup();
345
346	// Save the kernel entry point address.
347	gLongKernelEntry = image->elf_header.e_entry;
348	dprintf("kernel entry at %#llx\n", gLongKernelEntry);
349
350	// Fix our kernel stack address.
351	gKernelArgs.cpu_kstack[0].start
352		= fix_address(gKernelArgs.cpu_kstack[0].start);
353
354	// We're about to enter the kernel -- disable console output.
355	stdout = NULL;
356
357	smp_boot_other_cpus(long_smp_start_kernel);
358
359	// Enter the kernel!
360	long_enter_kernel(0, gKernelArgs.cpu_kstack[0].start
361		+ gKernelArgs.cpu_kstack[0].size);
362
363	panic("Shouldn't get here");
364}
365