1/**
2 * \file
3 * \brief x86-64 architecture initialization.
4 */
5
6/*
7 * Copyright (c) 2007, 2008, 2009, 2010, 2011, ETH Zurich.
8 * All rights reserved.
9 *
10 * This file is distributed under the terms in the attached LICENSE file.
11 * If you do not find this file, copies can be found by writing to:
12 * ETH Zurich D-INFK, Haldeneggsteig 4, CH-8092 Zurich. Attn: Systems Group.
13 */
14
15#include <kernel.h>
16#include <string.h>
17#include <stdio.h>
18#include <paging_kernel_arch.h>
19#include <elf/elf.h>
20#include <init.h>
21#include <irq.h>
22#include <x86.h>
23#include <serial.h>
24#include <kernel_multiboot.h>
25#include <syscall.h>
26#include <getopt/getopt.h>
27#include <exec.h>
28#include <kputchar.h>
29#include <systime.h>
30#include <arch/x86/conio.h>
31#include <arch/x86/pic.h>
32#include <arch/x86/apic.h>
33#include <arch/x86/mcheck.h>
34#include <arch/x86/perfmon.h>
35#include <arch/x86/rtc.h>
36#include <target/x86/barrelfish_kpi/coredata_target.h>
37#include <arch/x86/timing.h>
38#include <arch/x86/startup_x86.h>
39#include <arch/x86/start_aps.h>
40#include <arch/x86/ipi_notify.h>
41#include <barrelfish_kpi/cpu_arch.h>
42#include <target/x86_64/barrelfish_kpi/cpu_target.h>
43#include <coreboot.h>
44#include <kcb.h>
45
46#include <dev/xapic_dev.h> // XXX
47#include <dev/ia32_dev.h>
48#include <dev/amd64_dev.h>
49
50/**
51 * Used to store the address of global struct passed during boot across kernel
52 * relocations.
53 */
54static uint64_t addr_global;
55
56/**
57 * \brief Kernel stack.
58 *
59 * This is the one and only kernel stack for a kernel instance.
60 */
61uintptr_t x86_64_kernel_stack[X86_64_KERNEL_STACK_SIZE/sizeof(uintptr_t)];
62
63/**
64 * \brief Global Task State Segment (TSS).
65 *
66 * This is the global, static and only Task State Segment (TSS). It is used
67 * for interrupt and exception handling (stack setup) while in user-space.
68 */
69static struct task_state_segment tss __attribute__ ((aligned (4)));
70
71/**
72 * \brief Global Descriptor Table (GDT) for processor this kernel is running on.
73 *
74 * This descriptor table is completely static, as segments are basically
75 * turned off in 64-bit mode. They map flat-mode code and stack segments for
76 * both kernel- and user-space and the only Task State Segment (TSS).
77 */
78union segment_descriptor gdt[] __attribute__ ((aligned (4))) = {
79    [NULL_SEL] = {   // Null segment
80        .raw = 0
81    },
82    [KCODE_SEL] = {   // Kernel code segment
83        .d = {
84            .lo_limit = 0xffff,
85            .lo_base = 0,
86            .type = 0xa,
87            .system_desc = 1,
88            .privilege_level = SEL_KPL,
89            .present = 1,
90            .hi_limit = 0xf,
91            .available = 0,
92            .long_mode = 1,
93            .operation_size = 0,
94            .granularity = 1,
95            .hi_base = 0
96        }
97    },
98    [KSTACK_SEL] = {   // Kernel stack segment
99        .d = {
100            .lo_limit = 0xffff,
101            .lo_base = 0,
102            .type = 2,
103            .system_desc = 1,
104            .privilege_level = SEL_KPL,
105            .present = 1,
106            .hi_limit = 0xf,
107            .available = 0,
108            .long_mode = 1,
109            .operation_size = 0,
110            .granularity = 1,
111            .hi_base = 0
112        }
113    },
114    [USTACK_SEL] = {   // User stack segment
115        .d = {
116            .lo_limit = 0xffff,
117            .lo_base = 0,
118            .type = 2,
119            .system_desc = 1,
120            .privilege_level = SEL_UPL,
121            .present = 1,
122            .hi_limit = 0xf,
123            .available = 0,
124            .long_mode = 1,
125            .operation_size = 0,
126            .granularity = 1,
127            .hi_base = 0
128        }
129    },
130    [UCODE_SEL] = {   // User code segment
131        .d = {
132            .lo_limit = 0xffff,
133            .lo_base = 0,
134            .type = 0xa,
135            .system_desc = 1,
136            .privilege_level = SEL_UPL,
137            .present = 1,
138            .hi_limit = 0xf,
139            .available = 0,
140            .long_mode = 1,
141            .operation_size = 0,
142            .granularity = 1,
143            .hi_base = 0
144        }
145    },
146    [TSS_LO_SEL] = {   // Global Task State Segment (TSS), lower 8 bytes
147        .sys_lo = {
148            .lo_limit = sizeof(tss) & 0xffff,
149            .type = SDT_SYSTSS,
150            .privilege_level = SEL_KPL,
151            .present = 1,
152            .hi_limit = (sizeof(tss) >> 16) & 0xf,
153            .available = 0,
154            .granularity = 0,
155        }
156    },
157    [TSS_HI_SEL] = {   // Global Task State Segment (TSS), upper 8 bytes
158        .sys_hi = {
159            .base = 0
160        }
161    },
162    [LDT_LO_SEL] = {    // Local descriptor table (LDT), lower 8 bytes
163        .sys_lo = {
164            .lo_limit = 0, // # 4k pages (since granularity = 1)
165            .lo_base = 0, // changed by context switch path when doing lldt
166            .type = 2, // LDT
167            .privilege_level = SEL_UPL,
168            .present = 1,
169            .hi_limit = 0,
170            .available = 0,
171            .granularity = 1,
172            .hi_base = 0
173        }
174    },
175    [LDT_HI_SEL] = {    // Local descriptor table (LDT), upper 8 bytes
176        .sys_hi = {
177            .base = 0 // changed by context switch path when doing lldt
178        }
179    },
180};
181
182union segment_descriptor *ldt_descriptor = &gdt[LDT_LO_SEL];
183
184/**
185 * Bootup PML4, used to map both low (identity-mapped) memory and relocated
186 * memory at the same time.
187 */
188static union x86_64_pdir_entry boot_pml4[PTABLE_SIZE]
189__attribute__ ((aligned(BASE_PAGE_SIZE)));
190
191/**
192 * Bootup low-map PDPT and hi-map PDPT.
193 */
194static union x86_64_pdir_entry boot_pdpt[PTABLE_SIZE]
195__attribute__ ((aligned(BASE_PAGE_SIZE))),
196    boot_pdpt_hi[PTABLE_SIZE] __attribute__ ((aligned(BASE_PAGE_SIZE)));
197
198/**
199 * Bootup low-map PDIR, hi-map PDIR, and 1GB PDIR.
200 */
201static union x86_64_ptable_entry boot_pdir[PTABLE_SIZE]
202__attribute__ ((aligned(BASE_PAGE_SIZE))),
203    boot_pdir_hi[PTABLE_SIZE] __attribute__ ((aligned(BASE_PAGE_SIZE))),
204    boot_pdir_1GB[PTABLE_SIZE] __attribute__ ((aligned(BASE_PAGE_SIZE)));
205
206/**
207 * This flag is set to true once the IDT is initialized and exceptions can be
208 * caught.
209 */
210bool idt_initialized = false;
211
212/**
213 * \brief Setup bootup page table.
214 *
215 * This function sets up the page table needed to boot the kernel
216 * proper.  The table identity maps the first 1 GByte of physical
217 * memory in order to have access to various data structures and the
218 * first MByte containing bootloader-passed data structures. It also
219 * identity maps the local copy of the kernel in low memory and
220 * aliases it in kernel address space.
221 *
222 * \param base  Start address of kernel image in physical address space.
223 * \param size  Size of kernel image.
224 */
225static void paging_init(lpaddr_t base, size_t size)
226{
227    lvaddr_t vbase = local_phys_to_mem(base);
228
229    // Align base to kernel page size
230    if(base & X86_64_MEM_PAGE_MASK) {
231        size += base & X86_64_MEM_PAGE_MASK;
232        base -= base & X86_64_MEM_PAGE_MASK;
233    }
234
235    // Align vbase to kernel page size
236    if(vbase & X86_64_MEM_PAGE_MASK) {
237        vbase -= vbase & X86_64_MEM_PAGE_MASK;
238    }
239
240    // Align size to kernel page size
241    if(size & X86_64_MEM_PAGE_MASK) {
242        size += X86_64_MEM_PAGE_SIZE - (size & X86_64_MEM_PAGE_MASK);
243    }
244
245    // XXX: Cannot currently map more than one table of pages
246    assert(size <= X86_64_MEM_PAGE_SIZE * X86_64_PTABLE_SIZE);
247/*     assert(size <= MEM_PAGE_SIZE); */
248
249    for(size_t i = 0; i < size; i += X86_64_MEM_PAGE_SIZE,
250            base += X86_64_MEM_PAGE_SIZE, vbase += X86_64_MEM_PAGE_SIZE) {
251        // No kernel image above 4 GByte
252        assert(base < ((lpaddr_t)4 << 30));
253
254        // Identity-map the kernel's physical region, so we don't lose ground
255        paging_x86_64_map_table(&boot_pml4[X86_64_PML4_BASE(base)], (lpaddr_t)boot_pdpt);
256        paging_x86_64_map_table(&boot_pdpt[X86_64_PDPT_BASE(base)], (lpaddr_t)boot_pdir);
257        paging_x86_64_map_large(&boot_pdir[X86_64_PDIR_BASE(base)], base, PTABLE_PRESENT
258                                | PTABLE_READ_WRITE | PTABLE_USER_SUPERVISOR);
259
260        // Alias the same region at MEMORY_OFFSET
261        paging_x86_64_map_table(&boot_pml4[X86_64_PML4_BASE(vbase)], (lpaddr_t)boot_pdpt_hi);
262        paging_x86_64_map_table(&boot_pdpt_hi[X86_64_PDPT_BASE(vbase)], (lpaddr_t)boot_pdir_hi);
263        paging_x86_64_map_large(&boot_pdir_hi[X86_64_PDIR_BASE(vbase)], base, PTABLE_PRESENT
264                                | PTABLE_READ_WRITE | PTABLE_USER_SUPERVISOR);
265    }
266
267    // Identity-map the first 1G of physical memory for bootloader data
268    paging_x86_64_map_table(&boot_pml4[0], (lpaddr_t)boot_pdpt);
269    paging_x86_64_map_table(&boot_pdpt[0], (lpaddr_t)boot_pdir_1GB);
270    for (int i = 0; i < X86_64_PTABLE_SIZE; i++) {
271        paging_x86_64_map_large(&boot_pdir_1GB[X86_64_PDIR_BASE(X86_64_MEM_PAGE_SIZE * i)],
272                                X86_64_MEM_PAGE_SIZE * i, PTABLE_PRESENT
273                                | PTABLE_READ_WRITE | PTABLE_USER_SUPERVISOR);
274    }
275
276    // Activate new page tables
277    paging_x86_64_context_switch((lpaddr_t)boot_pml4);
278}
279
280/**
281 * \brief Setup default GDT.
282 *
283 * Loads the GDT register with the default GDT and reloads CS and SS
284 * to point to the new entries. Resets all other segment registers to null.
285 * Finally, completes setup of GDT to include TSS base address mapping and
286 * loads TSS into task register.
287 */
288static void gdt_reset(void)
289{
290    lvaddr_t                     ptss = (lvaddr_t)&tss;
291    struct region_descriptor    region = {
292        .rd_limit = sizeof(gdt),
293        .rd_base = (uint64_t)&gdt
294    };
295
296    // Load default GDT
297    __asm volatile("lgdt %[region]" :: [region] "m" (region));
298
299    // Reload segments
300    __asm volatile("mov %[null], %%ds      \n\t"
301                   "mov %[null], %%es      \n\t"
302                   "mov %[ss], %%ss        \n\t"
303                   "mov %[null], %%gs      \n\t"
304                   "mov %[null], %%fs      \n\t"
305                   "pushq %[cs]            \n\t"          // new CS
306                   "lea 1f(%%rip), %%rax   \n\t"          // jumps to after lret
307                   "pushq %%rax            \n\t"          // new IP
308                   "lretq                  \n\t"          // fake return
309                   "1:                     \n\t"          // we'll continue here
310                   : /* No Output */
311                   :
312                   [null] "r" (0),
313                   [ss] "r" (GSEL(KSTACK_SEL, SEL_KPL)),
314                   [cs] "i" (GSEL(KCODE_SEL, SEL_KPL))
315                   : "rax"
316                   );
317
318    // Complete setup of TSS descriptor (by inserting base address of TSS)
319    gdt[TSS_LO_SEL].sys_lo.lo_base = ptss & 0xffffff;
320    gdt[TSS_LO_SEL].sys_lo.hi_base = (ptss >> 24) & 0xff;
321    gdt[TSS_HI_SEL].sys_hi.base = ptss >> 32;
322
323    // Complete setup of TSS
324    tss.rsp[0] = (lvaddr_t)&x86_64_kernel_stack[X86_64_KERNEL_STACK_SIZE / sizeof(uintptr_t)];
325
326    // Load task state register
327    __asm volatile("ltr %%ax" :: "a" (GSEL(TSS_LO_SEL, SEL_KPL)));
328}
329
330/**
331 * \brief Relocates the active stack.
332 *
333 * This function relocates the stack, by adding 'offset' to the stack
334 * pointer.
335 *
336 * \param offset        Offset to add to the stack pointer.
337 */
338static inline void __attribute__ ((always_inline))
339relocate_stack(lvaddr_t offset)
340{
341    __asm volatile("add %[stack], %%rsp\n\t"
342                   : /* No output */
343                   : [stack] "er" (offset)
344                   : "rsp"
345                   );
346}
347
348/**
349 * \brief Enable SYSCALL/SYSRET fast system calls.
350 *
351 * This function enables the SYSCALL/SYSRET pair of fast system calls in
352 * long mode. Also sets the IA32_STAR and IA32_FMASK MSRs to point to the
353 * user-space base selector and RFLAGS mask for SYSCALL/SYSRET fast system
354 * calls.
355 */
356static inline void enable_fast_syscalls(void)
357{
358    // Segment selector bases for both kernel- and user-space for fast
359    // system calls
360    ia32_star_t star = ia32_star_rd(NULL);
361    star = ia32_star_call_insert(star, GSEL(KCODE_SEL,  SEL_KPL));
362    star = ia32_star_ret_insert( star, GSEL(KSTACK_SEL, SEL_UPL));
363    ia32_star_wr(NULL, star);
364
365    // Set ia32_lstar MSR to point to kernel-space system call multiplexer
366    ia32_lstar_wr(NULL, (lvaddr_t)syscall_entry);
367
368    // Set IA32_FMASK MSR for our OSes EFLAGS mask
369    // We mask out everything (including interrupts).
370    ia32_fmask_v_wrf(NULL, ~(RFLAGS_ALWAYS1) );
371
372    // Enable fast system calls
373    ia32_efer_sce_wrf(NULL, 1);
374}
375
376static inline void enable_tlb_flush_filter(void)
377{
378    uint32_t eax, ebx, ecx, edx;
379
380    // Must read "AuthenticAMD"
381    cpuid(0, &eax, &ebx, &ecx, &edx);
382    if(ebx != 0x68747541 || ecx != 0x444d4163 || edx != 0x69746e65) {
383        return;
384    }
385
386    // Is at least family 0fh?
387    cpuid(1, &eax, &ebx, &ecx, &edx);
388    if(((eax >> 8) & 0xf) != 0xf) {
389        return;
390    }
391
392    debug(SUBSYS_STARTUP, "Enabling TLB flush filter\n");
393    ia32_amd_hwcr_ffdis_wrf(NULL, 1);
394}
395
396static inline void enable_monitor_mwait(void)
397{
398    uint32_t eax, ebx, ecx, edx;
399
400    if (has_monitor_mwait()) {
401        cpuid(5, &eax, &ebx, &ecx, &edx);
402        debug(SUBSYS_STARTUP, "MONITOR/MWAIT supported: "
403              "min size %u bytes, max %u bytes. %s %s\n",
404              eax, ebx, (ecx & 2) ? "IBE" : "", (ecx & 1) ? "EMX" : "");
405    }
406    else {
407        debug(SUBSYS_STARTUP, "MONITOR/MWAIT are not supported.\n");
408    }
409}
410
411/**
412 * \brief Continue kernel initialization in kernel address space.
413 *
414 * This function resets paging to map out low memory and map in physical
415 * address space, relocating all remaining data structures. It resets the
416 * Global Descriptor Table for flat mode and to exclude legacy segments from
417 * boot initialization code. It sets up the IDT for exception and interrupt
418 * handling, initializes the local APIC and enables interrupts. After that it
419 * calls kernel_startup(), which should not return (if it does, this function
420 * halts the kernel).
421 */
422static void  __attribute__ ((noreturn, noinline)) text_init(void)
423{
424    // Reset global and locks to point to the memory in the pristine image
425    global = (struct global*)addr_global;
426
427    /*
428     * Reset paging once more to use relocated data structures and map in
429     * whole of kernel and available physical memory. Map out low memory.
430     */
431    paging_x86_64_reset();
432
433    // Relocate global to "memory"
434    global = (struct global*)local_phys_to_mem((lpaddr_t)global);
435
436    // Relocate glbl_core_data to "memory"
437    glbl_core_data = (struct x86_core_data *)
438        local_phys_to_mem((lpaddr_t)glbl_core_data);
439
440    /*
441     * Use new physical address space for video memory -- no calls to functions
442     * that end up calling a conio.c function may be called between
443     * paging_reset() and conio_relocate_vidmem()!
444     */
445    conio_relocate_vidmem(local_phys_to_mem(VIDEO_MEM));
446
447    // Re-map physical memory
448    /* XXX: Currently we are statically mapping a fixed amount of
449       memory.  We should not map in more memory than the machine
450       actually has.  Or else if the kernel tries to access addresses
451       not backed by real memory, it will experience weird faults
452       instead of a simpler pagefault.
453
454       Ideally, we should use the ACPI information to figure out which
455       memory to map in. Look at ticket #218 for more
456       information. -Akhi
457    */
458    if(paging_x86_64_map_memory(0, X86_64_PADDR_SPACE_LIMIT) != 0) {
459        panic("error while mapping physical memory!");
460    }
461
462    /*
463     * Also reset the global descriptor table (GDT), so we get
464     * segmentation again and can catch interrupts/exceptions (the IDT
465     * needs the GDT).
466     */
467    gdt_reset();
468
469    // Arch-independent early startup
470    kernel_startup_early();
471
472    // XXX: re-init the serial driver, in case the port changed after parsing args
473    serial_console_init(false);
474
475    // Setup IDT
476    setup_default_idt();
477    idt_initialized = true;
478
479    // Enable machine check reporting
480    mcheck_init();
481
482    // Initialize local APIC
483    apic_init();
484
485    // do not remove/change this printf: needed by regression harness
486    printf("Barrelfish CPU driver starting on x86_64 apic_id %u\n", apic_id);
487
488
489    if(apic_is_bsp()) {
490        // Initialize classic (8259A) PIC
491        pic_init();
492    }
493
494    // Initialize real-time clock
495    rtc_init();
496
497    // Initialize local APIC timer
498    if (kernel_ticks_enabled) {
499        timing_calibrate();
500        apic_timer_init(false, false);
501        apic_timer_set_divide(xapic_by1);
502        kernel_timeslice = ns_to_systime(config_timeslice * 1000000);
503#ifndef CONFIG_ONESHOT_TIMER
504        systime_set_timeout(systime_now() + kernel_timeslice);
505#endif
506    } else {
507        printk(LOG_WARN, "APIC timer disabled: NO timeslicing\n");
508        apic_mask_timer();
509    }
510
511    // Initialize IPI notification mechanism
512    ipi_notify_init();
513
514    // Enable SYSCALL/SYSRET fast system calls
515    enable_fast_syscalls();
516
517    // Enable "no execute" page-level protection bit
518    ia32_efer_nxe_wrf(NULL, 1);
519
520    // Enable FPU and MMX
521    enable_fpu();
522
523    // Enable user-mode RDPMC opcode
524    amd64_cr4_pce_wrf(NULL, 1);
525
526    // AMD64: Check if TLB flush filter is enabled
527    enable_tlb_flush_filter();
528
529    // Enable global pages
530    amd64_cr4_pge_wrf(NULL, 1);
531
532    // Check/Enable MONITOR/MWAIT opcodes
533    enable_monitor_mwait();
534
535    // Setup Page Attribute Table MSR
536    configure_page_attribute_table();
537
538    // Call main kernel startup function -- this should never return
539    kernel_startup();
540
541    halt();
542    // Returning here will crash! -- low pages not mapped anymore!
543}
544
545/**
546 * \brief Architecture-specific initialization function.
547 *
548 * This function is called by the bootup code in boot.S to initialize
549 * architecture-specific stuff. It is expected to call the kernel main
550 * loop. This function never returns.
551 *
552 * The kernel expects one of two magic values in 'magic' that determine how it
553 * has been booted. If 'magic' is #MULTIBOOT_INFO_MAGIC the kernel has been
554 * booted by a (Multiboot-compliant) bootloader and this is the first image on
555 * the boot CPU. It will relocate itself to a default position. If 'magic' is
556 * #KERNEL_BOOT_MAGIC it has been booted by another image of itself and is
557 * running on an (so-called) application CPU.
558 *
559 * This function sets up new page tables to alias the kernel
560 * at #MEMORY_OFFSET. It also does any relocations necessary to the
561 * "position-independent" code to make it run at the new location (e.g.
562 * relocating the GOT). After all relocations, it calls text_init() of
563 * the relocated image, which destroys the lower alias and may never return.
564 *
565 * For bsp kernels, the void pointer is of type multiboot_info, for application
566 * CPUs, it is of type global. Global carries a pointer to multiboot_info.
567 * Global also contains pointers to memory that is shared between kernels.
568 *
569 * \param magic         Boot magic value
570 * \param pointer       Pointer to Multiboot Info or to Global structure
571 */
572void arch_init(uint64_t magic, void *pointer)
573{
574    // Sanitize the screen
575    conio_cls();
576    // Initialize serial, only initialize HW if we are
577    // the first kernel
578    serial_console_init((magic == MULTIBOOT_INFO_MAGIC));
579
580    void __attribute__ ((noreturn)) (*reloc_text_init)(void) =
581        (void *)local_phys_to_mem((lpaddr_t)text_init);
582    struct Elf64_Shdr *rela, *symtab;
583    struct multiboot_info *mb = NULL;
584
585    apic_bsp = magic == MULTIBOOT_INFO_MAGIC;
586
587    /*
588     * If this is the boot image, make Multiboot information structure globally
589     * known. Otherwise the passed value should equal the original structure.
590     * If magic value does not match what we expect, we cannot proceed safely.
591     */
592    switch(magic) {
593    case MULTIBOOT_INFO_MAGIC:
594        mb = (struct multiboot_info *)pointer;
595
596        // Construct the global structure and store its address to retrive it
597        // across relocation
598        memset(&global->locks, 0, sizeof(global->locks));
599        addr_global            = (uint64_t)global;
600        break;
601
602    case KERNEL_BOOT_MAGIC:
603        global = (struct global*)pointer;
604        // Store the address of global to retrive it across relocation
605        addr_global = (uint64_t)global;
606        break;
607
608    default:
609        panic("Magic value does not match! (0x%x != 0x%lx != 0x%x)",
610              KERNEL_BOOT_MAGIC, magic, MULTIBOOT_INFO_MAGIC);
611        break;
612    }
613
614    /* determine page-aligned physical address past end of multiboot */
615    lvaddr_t dest = (lvaddr_t)&_start_kernel;
616    if (dest & (BASE_PAGE_SIZE - 1)) {
617        dest &= ~(BASE_PAGE_SIZE - 1);
618        dest += BASE_PAGE_SIZE;
619    }
620
621    // XXX: print kernel address for debugging with gdb
622    printf("Kernel starting at address 0x%"PRIxLVADDR"\n",
623           local_phys_to_mem(dest));
624
625    struct x86_coredata_elf *elf;
626    uint32_t multiboot_flags;
627    if (mb != NULL) { /* Multiboot info was passed */
628        multiboot_flags = mb->flags;
629        elf = (struct x86_coredata_elf *)&mb->syms.elf;
630
631        // We need the ELF section header table for relocation
632        if (!(multiboot_flags & MULTIBOOT_INFO_FLAG_HAS_ELF_SYMS)) {
633            panic("Multiboot information structure does not include ELF section"
634                  "header information -- Relocation impossible!");
635        }
636
637        // Determine where free RAM starts
638        glbl_core_data->start_free_ram =
639            ROUND_UP(max(multiboot_end_addr(mb), (uintptr_t)&_end_kernel),
640                     BASE_PAGE_SIZE);
641
642        glbl_core_data->mods_addr = mb->mods_addr;
643        glbl_core_data->mods_count = mb->mods_count;
644        glbl_core_data->cmdline = mb->cmdline;
645        glbl_core_data->mmap_length = mb->mmap_length;
646        glbl_core_data->mmap_addr = mb->mmap_addr;
647
648    } else { /* No multiboot info, use the core_data struct */
649        struct x86_core_data *core_data =
650            (struct x86_core_data*)(dest - BASE_PAGE_SIZE);
651        multiboot_flags = core_data->multiboot_flags;
652        elf = &core_data->elf;
653        glbl_core_data = core_data;
654        core_data->cmdline = (lpaddr_t)&core_data->kernel_cmdline;
655        my_core_id = core_data->dst_core_id;
656
657        kcb_current = (struct kcb*) glbl_core_data->kcb;
658        if (core_data->module_end > 4ul * (1ul << 30)) {
659            panic("The cpu module is outside the initial 4GB mapping."
660                  " Either move the module or increase initial mapping.");
661        }
662    }
663
664    // We're only able to process Elf64_Rela entries
665    if (elf->size != sizeof(struct Elf64_Shdr)) {
666        panic("ELF section header entry size mismatch!");
667    }
668
669    // Find relocation section
670    rela = elf64_find_section_header_type((struct Elf64_Shdr *)
671                                          (lpaddr_t)elf->addr,
672                                          elf->num, SHT_RELA);
673    if (rela == NULL) {
674        panic("Kernel image does not include relocation section!");
675    }
676
677    // Find symbol table section
678    symtab = elf64_find_section_header_type((struct Elf64_Shdr *)
679                                            (lpaddr_t)elf->addr,
680                                            elf->num, SHT_DYNSYM);
681    if (symtab == NULL) {
682        panic("Kernel image does not include symbol table!");
683    }
684
685    // Alias kernel on top of memory, keep low memory
686    paging_init((lpaddr_t)&_start_kernel, SIZE_KERNEL_IMAGE);
687
688    // Relocate kernel image for top of memory
689    elf64_relocate(X86_64_MEMORY_OFFSET + (lvaddr_t)&_start_kernel,
690                   (lvaddr_t)&_start_kernel,
691                   (struct Elf64_Rela *)(rela->sh_addr - X86_64_START_KERNEL_PHYS + &_start_kernel),
692                   rela->sh_size,
693                   (struct Elf64_Sym *)(symtab->sh_addr - X86_64_START_KERNEL_PHYS + &_start_kernel),
694                   symtab->sh_size,
695                   X86_64_START_KERNEL_PHYS, &_start_kernel);
696
697    /*** Aliased kernel available now -- low memory still mapped ***/
698
699    // Relocate stack to aliased location
700    relocate_stack(X86_64_MEMORY_OFFSET);
701
702    // Call aliased text_init() function and continue initialization
703    reloc_text_init();
704}
705