1/**
2 * \file
3 * \brief x86-32 architecture initialization.
4 */
5
6/*
7 * Copyright (c) 2007, 2008, 2009, 2010, 2011, 2012, ETH Zurich.
8 * All rights reserved.
9 *
10 * This file is distributed under the terms in the attached LICENSE file.
11 * If you do not find this file, copies can be found by writing to:
12 * ETH Zurich D-INFK, Universitaetstrasse 6, CH-8092 Zurich. Attn: Systems Group.
13 */
14
15#include <kernel.h>
16#include <string.h>
17#include <stdio.h>
18#include <paging_kernel_arch.h>
19#include <elf/elf.h>
20#include <init.h>
21#include <irq.h>
22#include <x86.h>
23#include <serial.h>
24#include <kernel_multiboot.h>
25#include <syscall.h>
26#include <getopt/getopt.h>
27#include <exec.h>
28#include <kputchar.h>
29#include <arch/x86/conio.h>
30#include <arch/x86/pic.h>
31#include <arch/x86/apic.h>
32#include <arch/x86/perfmon_intel.h>
33#include <arch/x86/perfmon_amd.h>
34#include <arch/x86/rtc.h>
35#include <arch/x86/ipi_notify.h>
36#include <target/x86/barrelfish_kpi/coredata_target.h>
37#include <arch/x86/timing.h>
38#include <arch/x86/startup_x86.h>
39#include <arch/x86/start_aps.h>
40#include <coreboot.h>
41#include <kcb.h>
42
43#include <dev/xapic_dev.h> // XXX
44
45/**
46 * Used to store the address of global struct passed during boot across kernel
47 * relocations.
48 */
49// XXX: This won't work if this kernel is not relocated from a pristine image!
50static uint32_t addr_global;
51
52/**
53 * EFLAGS mask for fast system calls. Put values to mask out here.
54 * We mask out everything (including interrupts).
55 */
56#define SYSCALL_FMASK   (~(EFLAGS_ALWAYS1) & 0xffffffff)
57
58/**
59 * Segment selector bases for both kernel- and user-space for fast
60 * system calls
61 */
62#define SYSCALL_STAR \
63    ((((uint64_t)GSEL(KSTACK_SEL, SEL_UPL)) << 48) | \
64     ((uint64_t)GSEL(KCODE_SEL, SEL_KPL) << 32))
65
66/**
67 * \brief Kernel stack.
68 *
69 * This is the one and only kernel stack for a kernel instance.
70 */
71uintptr_t x86_32_kernel_stack[X86_32_KERNEL_STACK_SIZE/sizeof(uintptr_t)];
72
73/**
74 * \brief Global Task State Segment (TSS).
75 *
76 * This is the global, static and only Task State Segment (TSS). It is used
77 * for interrupt and exception handling (stack setup) while in user-space.
78 */
79static struct task_state_segment tss __attribute__ ((aligned (4)));
80
81/**
82 * \brief Global Descriptor Table (GDT) for processor this kernel is running on.
83 *
84 * This descriptor table is completely static, as segments are basically
85 * turned off in 64-bit mode. They map flat-mode code and stack segments for
86 * both kernel- and user-space and the only Task State Segment (TSS).
87 */
88static union segment_descriptor gdt[] __attribute__ ((aligned (4))) = {
89    {   // Null segment
90        .raw = 0
91    },
92    {   // Kernel code segment
93        .d = {
94            .lo_limit = 0xffff,
95            .lo_base = 0,
96            .type = 0xa,
97            .system_desc = 1,
98            .privilege_level = SEL_KPL,
99            .present = 1,
100            .hi_limit = 0xf,
101            .available = 0,
102            .long_mode = 0,
103            .operation_size = 1,
104            .granularity = 1,
105            .hi_base = 0
106        }
107    },
108    {   // Kernel stack segment
109        .d = {
110            .lo_limit = 0xffff,
111            .lo_base = 0,
112            .type = 2,
113            .system_desc = 1,
114            .privilege_level = SEL_KPL,
115            .present = 1,
116            .hi_limit = 0xf,
117            .available = 0,
118            .long_mode = 0,
119            .operation_size = 1,
120            .granularity = 1,
121            .hi_base = 0
122        }
123    },
124    {   // User stack segment
125        .d = {
126            .lo_limit = 0xffff,
127            .lo_base = 0,
128            .type = 2,
129            .system_desc = 1,
130            .privilege_level = SEL_UPL,
131            .present = 1,
132            .hi_limit = 0xf,
133            .available = 0,
134            .long_mode = 0,
135            .operation_size = 1,
136            .granularity = 1,
137            .hi_base = 0
138        }
139    },
140    {   // User code segment
141        .d = {
142            .lo_limit = 0xffff,
143            .lo_base = 0,
144            .type = 0xa,
145            .system_desc = 1,
146            .privilege_level = SEL_UPL,
147            .present = 1,
148            .hi_limit = 0xf,
149            .available = 0,
150            .long_mode = 0,
151            .operation_size = 1,
152            .granularity = 1,
153            .hi_base = 0
154        }
155    },
156    {   // Global Task State Segment (TSS)
157        .tss = {
158            .lo_limit = sizeof(tss) & 0xffff,
159            .type = SDT_SYSTSS,
160            .privilege_level = SEL_KPL,
161            .present = 1,
162            .hi_limit = (sizeof(tss) >> 16) & 0xf,
163            .available = 0,
164            .granularity = 0,
165        }
166    },
167    {
168        // Dispatcher "segment"
169        .d = {
170            .lo_limit = 0xffff,
171            .lo_base = 0,
172            .type = 2,
173            .system_desc = 1,
174            .privilege_level = SEL_UPL,
175            .present = 1,
176            .hi_limit = 0xf,
177            .available = 0,
178            .long_mode = 0,
179            .operation_size = 1,
180            .granularity = 1,
181            .hi_base = 0
182        }
183    }
184};
185
186volatile union segment_descriptor *curdisp = &gdt[6];
187
188#ifdef CONFIG_PAE
189/**
190 * Bootup PDPTE.
191 */
192static union x86_32_pdpte_entry boot_pdpte[X86_32_PDPTE_SIZE]
193__attribute__ ((aligned(X86_32_BASE_PAGE_SIZE)));
194
195/**
196 * Bootup low-map PDIR and hi-map PDIR.
197 */
198static union x86_32_ptable_entry boot_pdir[X86_32_PTABLE_SIZE]
199__attribute__ ((aligned(X86_32_BASE_PAGE_SIZE))),
200    boot_pdir_hi[X86_32_PTABLE_SIZE] __attribute__ ((aligned(X86_32_BASE_PAGE_SIZE)));
201#else
202#       ifdef CONFIG_PSE
203/**
204 * Bootup PDIR.
205 */
206static union x86_32_ptable_entry boot_pdir[X86_32_PTABLE_SIZE]
207__attribute__ ((aligned(X86_32_BASE_PAGE_SIZE)));
208#       else
209/**
210 * Bootup PDIR.
211 */
212static union x86_32_pdir_entry boot_pdir[X86_32_PTABLE_SIZE]
213__attribute__ ((aligned(X86_32_BASE_PAGE_SIZE)));
214
215/**
216 * Bootup low-map PTABLE and hi-map PTABLE.
217 */
218static union x86_32_ptable_entry
219boot_ptable[MEM_PTABLE_SIZE][X86_32_PTABLE_SIZE]
220__attribute__ ((aligned(X86_32_BASE_PAGE_SIZE)));
221#       endif
222#endif
223
224/**
225 * This flag is set to true once the IDT is initialized and exceptions can be
226 * caught.
227 */
228bool idt_initialized = false;
229
230/**
231 * \brief Setup bootup page table.
232 *
233 * This function sets up the page table needed to boot the kernel proper.
234 * The table identity maps the first 2 MBytes (page size is 2 MBytes) of
235 * physical memory in order to have access to the first MByte containing
236 * bootloader-passed data structures. It also identity maps the local copy
237 * of the kernel in low memory and aliases it in kernel address space.
238 */
239static void paging_init(void)
240{
241    lvaddr_t vbase = X86_32_MEMORY_OFFSET, base = 0;
242
243    // Align vbase to kernel page size
244    if(vbase & X86_32_MEM_PAGE_MASK) {
245        vbase -= vbase & X86_32_MEM_PAGE_MASK;
246    }
247
248#ifdef CONFIG_PAE
249    for(size_t i = 0; i < X86_32_PTABLE_SIZE; i++,
250            base += X86_32_MEM_PAGE_SIZE, vbase += X86_32_MEM_PAGE_SIZE) {
251        // Identity-map the kernel's physical region, so we don't lose ground
252        paging_x86_32_map_pdpte(&boot_pdpte[X86_32_PDPTE_BASE(base)],
253                                (lpaddr_t)boot_pdir);
254        paging_x86_32_map_large(&boot_pdir[X86_32_PDIR_BASE(base)], base,
255                                X86_32_PTABLE_PRESENT
256                                | X86_32_PTABLE_READ_WRITE
257                                | X86_32_PTABLE_USER_SUPERVISOR);
258
259        // Alias the same region at MEMORY_OFFSET
260        paging_x86_32_map_pdpte(&boot_pdpte[X86_32_PDPTE_BASE(vbase)],
261                                (lpaddr_t)boot_pdir_hi);
262        paging_x86_32_map_large(&boot_pdir_hi[X86_32_PDIR_BASE(vbase)], base,
263                                X86_32_PTABLE_PRESENT
264                                | X86_32_PTABLE_READ_WRITE
265                                | X86_32_PTABLE_USER_SUPERVISOR);
266    }
267
268    // Activate new page tables
269    paging_x86_32_context_switch((lpaddr_t)boot_pdpte);
270#else
271    for(size_t i = 0; i < X86_32_PADDR_SPACE_LIMIT; i += X86_32_MEM_PAGE_SIZE,
272            base += X86_32_MEM_PAGE_SIZE, vbase += X86_32_MEM_PAGE_SIZE) {
273#       ifdef CONFIG_PSE
274        // Identity-map the kernel's physical region, so we don't lose ground
275        paging_x86_32_map_large(&boot_pdir[X86_32_PDIR_BASE(base)], base,
276                                X86_32_PTABLE_PRESENT
277                                | X86_32_PTABLE_READ_WRITE
278                                | X86_32_PTABLE_USER_SUPERVISOR);
279
280        // Alias the same region at MEMORY_OFFSET
281        paging_x86_32_map_large(&boot_pdir[X86_32_PDIR_BASE(vbase)], base,
282                                X86_32_PTABLE_PRESENT
283                                | X86_32_PTABLE_READ_WRITE
284                                | X86_32_PTABLE_USER_SUPERVISOR);
285#       else
286        // Identity-map the kernel's physical region, so we don't lose ground
287        paging_x86_32_map_table(&boot_pdir[X86_32_PDIR_BASE(base)],
288                                (lpaddr_t)boot_ptable[X86_32_PDIR_BASE(base)]);
289        paging_x86_32_map(&boot_ptable[X86_32_PDIR_BASE(base)][X86_32_PTABLE_BASE(base)],
290                          base,
291                          X86_32_PTABLE_PRESENT
292                          | X86_32_PTABLE_READ_WRITE
293                          | X86_32_PTABLE_USER_SUPERVISOR);
294
295        // Alias the same region at MEMORY_OFFSET
296        paging_x86_32_map_table(&boot_pdir[X86_32_PDIR_BASE(vbase)],
297                                (lpaddr_t)boot_ptable[X86_32_PDIR_BASE(base)]);
298#       endif
299    }
300
301    // Activate new page tables
302    paging_x86_32_context_switch((lpaddr_t)boot_pdir);
303#endif
304}
305
306/**
307 * \brief Setup default GDT.
308 *
309 * Loads the GDT register with the default GDT and reloads CS and SS
310 * to point to the new entries. Resets all other segment registers to null.
311 * Finally, completes setup of GDT to include TSS base address mapping and
312 * loads TSS into task register.
313 */
314static void gdt_reset(void)
315{
316    lvaddr_t                     ptss = (lvaddr_t)&tss;
317    struct region_descriptor    region = {
318        .rd_limit = sizeof(gdt),
319        .rd_base = (uint32_t)&gdt
320    };
321
322    // Load default GDT
323    __asm volatile("lgdt %[region]" :: [region] "m" (region));
324
325    // Reload segments
326    __asm volatile("mov %[ds], %%ds        \n\t"
327                   "mov %[ds], %%es        \n\t"
328                   "mov %[ss], %%ss        \n\t"
329                   "mov %[null], %%gs      \n\t"
330                   "mov %[null], %%fs      \n\t"
331                   "pushl %[cs]            \n\t"          // new CS
332                   "lea 1f, %%eax          \n\t"          // jumps to after lret
333                   "pushl %%eax            \n\t"          // new IP
334                   "lretl                  \n\t"          // fake return
335                   "1:                     \n\t"          // we'll continue here
336                   : /* No Output */
337                   :
338                   [null] "r" (0),
339                   [ss] "r" (GSEL(KSTACK_SEL, SEL_KPL)),
340                   [cs] "i" (GSEL(KCODE_SEL, SEL_KPL)),
341                   [ds] "r" (GSEL(USTACK_SEL, SEL_UPL))
342                   : "eax"
343                   );
344
345    // Complete setup of TSS descriptor (by inserting base address of TSS)
346    gdt[TSS_SEL].tss.lo_base = ptss & 0xffffff;
347    gdt[TSS_SEL].tss.hi_base = (ptss >> 24) & 0xff;
348
349    // Complete setup of TSS
350    tss.esp0 = (lvaddr_t)&x86_32_kernel_stack[X86_32_KERNEL_STACK_SIZE / sizeof(uintptr_t)];
351    tss.ss0 = GSEL(KSTACK_SEL, SEL_KPL);
352
353    // Load task state register
354    __asm volatile("ltr %%ax" :: "a" (GSEL(TSS_SEL, SEL_KPL)));
355}
356
357/**
358 * \brief Relocates the active stack.
359 *
360 * This function relocates the stack, by adding 'offset' to the stack
361 * pointer.
362 *
363 * \param offset        Offset to add to the stack pointer.
364 */
365static inline void __attribute__ ((always_inline))
366relocate_stack(lvaddr_t offset)
367{
368    __asm volatile("add %[stack], %%esp\n\t"
369                   : /* No output */
370                   : [stack] "g" (offset)
371                   : "esp"
372                   );
373}
374
375/**
376 * \brief Enable SYSCALL/SYSRET fast system calls.
377 *
378 * This function enables the SYSCALL/SYSRET pair of fast system calls in
379 * long mode. Also sets the IA32_STAR and IA32_FMASK MSRs to point to the
380 * user-space base selector and RFLAGS mask for SYSCALL/SYSRET fast system
381 * calls.
382 */
383static inline void enable_fast_syscalls(void)
384{
385    // Set IA32_STAR MSR to point to user-space base selector
386    wrmsr(MSR_IA32_STAR, SYSCALL_STAR);
387
388    // Set IA32_LSTAR MSR to point to kernel-space system call multiplexer
389    wrmsr(MSR_IA32_LSTAR, (lvaddr_t)syscall_entry);
390
391    // Set IA32_FMASK MSR for our OSes EFLAGS mask
392    wrmsr(MSR_IA32_FMASK, SYSCALL_FMASK);
393
394    // Enable fast system calls
395    addmsr(MSR_IA32_EFER, IA32_EFER_SCE);
396}
397
398#define CR0_CD  (1 << 30)
399#define CR0_NW  (1 << 29)
400#define CR0_PG  (1 << 31)
401#define CR4_MPE (1 << 11)
402#define CR4_PCE (1 << 8)
403#define CR4_PGE (1 << 7)
404#define CR4_PAE (1 << 5)
405#define CR4_PSE (1 << 4)
406
407static inline void enable_user_rdpmc(void)
408{
409    uint32_t cr4;
410
411    __asm volatile("mov %%cr4, %[cr4]" : [cr4] "=r" (cr4));
412    cr4 |= CR4_PCE;
413    __asm volatile("mov %[cr4], %%cr4" :: [cr4] "r" (cr4));
414}
415
416static inline void enable_tlb_flush_filter(void)
417{
418    uint32_t eax, ebx, ecx, edx;
419
420    // Must read "AuthenticAMD"
421    cpuid(0, &eax, &ebx, &ecx, &edx);
422    if(ebx != 0x68747541 || ecx != 0x444d4163 || edx != 0x69746e65) {
423        return;
424    }
425
426    // Is at least family 0fh?
427    cpuid(1, &eax, &ebx, &ecx, &edx);
428    if(((eax >> 8) & 0xf) != 0xf) {
429        return;
430    }
431
432    debug(SUBSYS_STARTUP, "Enabling TLB flush filter\n");
433    uint64_t hwcr = rdmsr(MSR_AMD_HWCR);
434    hwcr &= ~AMD_HWCR_FFDIS;
435    wrmsr(MSR_AMD_HWCR, hwcr);
436}
437
438static inline void enable_pge(void)
439{
440    uint32_t cr4;
441
442    __asm volatile("mov %%cr4, %[cr4]" : [cr4] "=r" (cr4));
443    cr4 |= CR4_PGE;
444    __asm volatile("mov %[cr4], %%cr4" :: [cr4] "r" (cr4));
445}
446
447static inline void enable_pae(void)
448{
449    uint32_t cr4;
450
451    __asm volatile("mov %%cr4, %[cr4]" : [cr4] "=r" (cr4));
452    cr4 |= CR4_PAE;
453    __asm volatile("mov %[cr4], %%cr4" :: [cr4] "r" (cr4));
454}
455
456static inline void enable_pse(void)
457{
458    uint32_t cr4;
459
460    __asm volatile("mov %%cr4, %[cr4]" : [cr4] "=r" (cr4));
461    cr4 |= CR4_PSE;
462    __asm volatile("mov %[cr4], %%cr4" :: [cr4] "r" (cr4));
463}
464
465static inline void enable_pg(void)
466{
467    uint32_t cr0;
468
469    __asm volatile("mov %%cr0, %[cr0]" : [cr0] "=r" (cr0));
470    cr0 |= CR0_PG;
471    __asm volatile("mov %[cr0], %%cr0" :: [cr0] "r" (cr0));
472}
473
474static inline void enable_monitor_mwait(void)
475{
476    uint32_t eax, ebx, ecx, edx;
477
478    cpuid(1, &eax, &ebx, &ecx, &edx);
479
480    if (ecx & (1 << 3)) {
481        cpuid(5, &eax, &ebx, &ecx, &edx);
482        debug(SUBSYS_STARTUP, "MONITOR/MWAIT supported: "
483              "min size %"PRIu32" bytes, max %"PRIu32" bytes. %s %s\n",
484              eax, ebx, (ecx & 2) ? "IBE" : "", (ecx & 1) ? "EMX" : "");
485    }
486}
487
488/**
489 * \brief Continue kernel initialization in kernel address space.
490 *
491 * This function resets paging to map out low memory and map in physical
492 * address space, relocating all remaining data structures. It resets the
493 * Global Descriptor Table for flat mode and to exclude legacy segments from
494 * boot initialization code. It sets up the IDT for exception and interrupt
495 * handling, initializes the local APIC and enables interrupts. After that it
496 * calls kernel_startup(), which should not return (if it does, this function
497 * halts the kernel).
498 */
499static void  __attribute__ ((noreturn, noinline)) text_init(void)
500{
501    // Relocate global to "memory"
502    global = (struct global*)local_phys_to_mem((lpaddr_t)addr_global);
503
504    // Relocate glbl_core_data to "memory"
505    glbl_core_data = (struct x86_core_data *)
506        local_phys_to_mem((lpaddr_t)glbl_core_data);
507
508    // Map-out low memory
509    paging_x86_32_reset();
510
511    /*
512     * Use new physical address space for video memory -- no calls to functions
513     * that end up calling a conio.c function may be called between
514     * paging_reset() and conio_relocate_vidmem()!
515     */
516    conio_relocate_vidmem(local_phys_to_mem(VIDEO_MEM));
517
518    /*
519     * Also reset the global descriptor table (GDT), so we get
520     * segmentation again and can catch interrupts/exceptions (the IDT
521     * needs the GDT).
522     */
523    gdt_reset();
524
525    // Arch-independent early startup
526    kernel_startup_early();
527
528    // XXX: re-init the serial driver, in case the port changed after parsing args
529    serial_console_init(false);
530
531    // Setup IDT
532    setup_default_idt();
533    idt_initialized = true;
534
535    // Initialize local APIC
536    apic_init();
537
538    // do not remove/change this printf: needed by regression harness
539    printf("Barrelfish CPU driver starting on x86_32 core %u\n", apic_id);
540
541    if(apic_is_bsp()) {
542        // Initialize classic (8259A) PIC
543        pic_init();
544    }
545
546    // Initialize real-time clock
547    rtc_init();
548
549    // Initialize local APIC timer
550    if (kernel_ticks_enabled) {
551        timing_calibrate();
552        apic_timer_init(false, true);
553        timing_apic_timer_set_ms(kernel_timeslice);
554    } else {
555        printk(LOG_WARN, "APIC timer disabled: NO timeslicing\n");
556        apic_mask_timer();
557    }
558
559    // Initialize IPI notification mechanism
560    ipi_notify_init();
561
562    // Enable SYSCALL/SYSRET fast system calls
563    /* enable_fast_syscalls(); */
564
565#ifdef CONFIG_NXE
566    // Enable "no execute" page-level protection bit
567    addmsr(MSR_IA32_EFER, IA32_EFER_NXE);
568#endif
569
570    // Enable FPU and MMX
571    enable_fpu();
572
573    // Enable user-mode RDPMC opcode
574    enable_user_rdpmc();
575
576    // AMD64: Check if TLB flush filter is enabled
577    enable_tlb_flush_filter();
578
579    // Enable global pages
580    enable_pge();
581
582    // Check/Enable MONITOR/MWAIT opcodes
583    enable_monitor_mwait();
584
585    // Setup Page Attribute Table MSR
586    configure_page_attribute_table();
587
588    // Call main kernel startup function -- this should never return
589    kernel_startup();
590
591    halt();
592    // Returning here will crash! -- low pages not mapped anymore!
593}
594
595/**
596 * \brief Architecture-specific initialization function.
597 *
598 * This function is called by the bootup code in boot.S to initialize
599 * architecture-specific stuff. It is expected to call the kernel main
600 * loop. This function never returns.
601 *
602 * The kernel expects one of two magic values in 'magic' that determine how it
603 * has been booted. If 'magic' is #MULTIBOOT_INFO_MAGIC the kernel has been
604 * booted by a (Multiboot-compliant) bootloader and this is the first image on
605 * the boot CPU. It will relocate itself to a default position. If 'magic' is
606 * #KERNEL_BOOT_MAGIC it has been booted by another image of itself and is
607 * running on an (so-called) application CPU. It expects 'dest' to be a physical
608 * address pointing to the base of a memory area to relocate itself to.
609 *
610 * For x86-64, after performing some sanity checks to the kernel image, this
611 * function first copies the whole kernel to a CPU-local version and then calls
612 * local_init(), at the offset of the local copy, to initialize that local
613 * copy. local_init() should never return.
614 *
615 * For bsp kernels, the void pointer is of type multiboot_info, for application
616 * CPUs, it is of type global. Global carries a pointer to multiboot_info.
617 * Global also contains pointers to memory that is shared between kernels.
618 *
619 * \param magic         Boot magic value
620 * \param pointer       Pointer to Multiboot Info or to Global structure
621 */
622void arch_init(uint32_t magic, void *pointer)
623{
624    // Sanitize the screen
625    conio_cls();
626    // Initialize serial, only initialize HW if we are
627    // the first kernel
628    serial_console_init((magic == MULTIBOOT_INFO_MAGIC));
629
630    /* determine page-aligned physical address past end of multiboot */
631    lvaddr_t dest = (lvaddr_t)&_start_kernel;
632    if (dest & (BASE_PAGE_SIZE - 1)) {
633        dest &= ~(BASE_PAGE_SIZE - 1);
634        dest += BASE_PAGE_SIZE;
635    }
636
637    // XXX: print kernel address for debugging with gdb
638    printf("Kernel starting at address 0x%"PRIxLVADDR"\n", local_phys_to_mem(dest));
639
640    void __attribute__ ((noreturn)) (*reloc_text_init)(void) =
641        (void *)local_phys_to_mem((lpaddr_t)text_init);
642    struct Elf32_Shdr *rela, *symtab;
643    struct x86_coredata_elf *elf;
644
645    /*
646     * If this is the boot image, make Multiboot information structure globally
647     * known. Otherwise the passed value should equal the original structure.
648     * If magic value does not match what we expect, we cannot proceed safely.
649     */
650    switch(magic) {
651    case MULTIBOOT_INFO_MAGIC:
652        {
653            struct multiboot_info *mb = (struct multiboot_info *)pointer;
654
655            elf = (struct x86_coredata_elf *)&mb->syms.elf;
656            // We need the ELF section header table for relocation
657            if (!(mb->flags & MULTIBOOT_INFO_FLAG_HAS_ELF_SYMS)) {
658                panic("Multiboot information structure does not include ELF section"
659                      "header information -- Relocation impossible!");
660            }
661            assert(mb->flags & MULTIBOOT_INFO_FLAG_HAS_MMAP);
662
663            // Determine where free RAM starts
664            memset(glbl_core_data, 0, sizeof(struct x86_core_data));
665            glbl_core_data->start_free_ram =
666                ROUND_UP(max(multiboot_end_addr(mb), (uintptr_t)&_end_kernel),
667                         BASE_PAGE_SIZE);
668
669            glbl_core_data->mods_addr = mb->mods_addr;
670            glbl_core_data->mods_count = mb->mods_count;
671            glbl_core_data->cmdline = mb->cmdline;
672            glbl_core_data->mmap_length = mb->mmap_length;
673            glbl_core_data->mmap_addr = mb->mmap_addr;
674        }
675        break;
676
677    case KERNEL_BOOT_MAGIC:
678        global = (struct global*)pointer;
679        // Store the address of global to retrive it across relocation
680        addr_global = (uint32_t)global;
681        memset(&global->locks, 0, sizeof(global->locks));
682        struct x86_core_data *core_data =
683            (struct x86_core_data*)(dest - BASE_PAGE_SIZE);
684        glbl_core_data = core_data;
685        glbl_core_data->cmdline = (lpaddr_t)&core_data->kernel_cmdline;
686        my_core_id = core_data->dst_core_id;
687        kcb_current = (struct kcb*) (lpaddr_t)glbl_core_data->kcb;
688        elf = &core_data->elf;
689        break;
690
691    default:
692        panic("Magic value does not match! (0x%x != 0x%"PRIu32" != 0x%x)",
693              KERNEL_BOOT_MAGIC, magic, MULTIBOOT_INFO_MAGIC);
694        break;
695    }
696
697    if(magic != KERNEL_BOOT_MAGIC) {
698        // Construct the global structure and store its address to retrive it
699        // across relocation
700        memset(&global->locks, 0, sizeof(global->locks));
701        addr_global            = (uint32_t)global;
702    }
703
704    // We're only able to process Elf32_Rela entries
705    if (elf->size != sizeof(struct Elf32_Shdr)) {
706        panic("ELF section header entry size mismatch!");
707    }
708
709    // Find relocation section
710    rela = elf32_find_section_header_type((struct Elf32_Shdr *)
711                                          (lpaddr_t)elf->addr,
712                                          elf->num, SHT_REL);
713
714    if (rela == NULL) {
715        panic("Kernel image does not include relocation section!");
716    }
717
718    // Find symbol table section
719    symtab = elf32_find_section_header_type((struct Elf32_Shdr *)
720                                            (lpaddr_t)elf->addr,
721                                            elf->num, SHT_DYNSYM);
722
723    if (symtab == NULL) {
724        panic("Kernel image does not include symbol table!");
725    }
726
727    // Kernel has to fit in mappable area
728    assert((lvaddr_t)&_end_kernel < X86_32_PADDR_SPACE_LIMIT);
729
730    // Map alias at MEMORY_OFFSET
731    paging_init();
732
733#ifdef CONFIG_PAE
734    // Put CPU in PAE mode
735    enable_pae();
736#elif defined(CONFIG_PSE)
737    // Enable page-size extensions
738    enable_pse();
739#endif
740
741    // Enable paging
742    enable_pg();
743
744    // Relocate kernel image for top of memory
745    elf32_relocate(X86_32_MEMORY_OFFSET + (lvaddr_t)&_start_kernel,
746                   (lvaddr_t)&_start_kernel,
747                   (struct Elf32_Rel *)(rela->sh_addr - X86_32_START_KERNEL_PHYS + &_start_kernel),
748                   rela->sh_size,
749                   (struct Elf32_Sym *)(symtab->sh_addr - X86_32_START_KERNEL_PHYS + &_start_kernel),
750                   symtab->sh_size,
751                   X86_32_START_KERNEL_PHYS, &_start_kernel);
752
753    /*** Aliased kernel available now -- low memory still mapped ***/
754
755    // Relocate stack to aliased location
756    relocate_stack(X86_32_MEMORY_OFFSET);
757
758    // Call aliased text_init() function and continue initialization
759    reloc_text_init();
760}
761