1/**
2 * \file
3 * \brief x86-64 architecture initialization.
4 */
5
6/*
7 * Copyright (c) 2007, 2008, 2009, 2010, 2011, ETH Zurich.
8 * All rights reserved.
9 *
10 * This file is distributed under the terms in the attached LICENSE file.
11 * If you do not find this file, copies can be found by writing to:
12 * ETH Zurich D-INFK, Universitaetstrasse 6, CH-8092 Zurich. Attn: Systems Group.
13 */
14
15#include <kernel.h>
16#include <string.h>
17#include <stdio.h>
18#include <paging_kernel_arch.h>
19#include <elf/elf.h>
20#include <init.h>
21#include <irq.h>
22#include <x86.h>
23#include <serial.h>
24#include <kernel_multiboot.h>
25#include <kernel_boot_param.h>
26#include <syscall.h>
27#include <getopt/getopt.h>
28#include <exec.h>
29#include <kputchar.h>
30#include <arch/x86/conio.h>
31#include <arch/x86/pic.h>
32#include <arch/x86/apic.h>
33#include <arch/x86/mcheck.h>
34#include <arch/x86/perfmon.h>
35#include <arch/x86/rtc.h>
36#include <target/x86/barrelfish_kpi/coredata_target.h>
37#include <arch/x86/timing.h>
38#include <arch/x86/startup_x86.h>
39#include <arch/x86/start_aps.h>
40#include <arch/x86/ipi_notify.h>
41#include <barrelfish_kpi/cpu_arch.h>
42#include <target/x86_64/barrelfish_kpi/cpu_target.h>
43#include <barrelfish_kpi/asm_inlines_arch.h>
44#include <coreboot.h>
45#include <kcb.h>
46#include <systime.h>
47#include <xeon_phi.h>
48#include <xeon_phi/xeon_phi.h>
49
50#include <dev/xapic_dev.h> // XXX
51#include <dev/ia32_dev.h>
52#include <dev/amd64_dev.h>
53
54#include <linux_host.h>
55
56extern lvaddr_t kernel_sbox_base_address;
57
58/**
59 * Used to store the address of global struct passed during boot across kernel
60 * relocations.
61 */
62static uint64_t addr_global;
63
64/**
65 * \brief Kernel stack.
66 *
67 * This is the one and only kernel stack for a kernel instance.
68 */
69uintptr_t k1om_kernel_stack[K1OM_KERNEL_STACK_SIZE / sizeof(uintptr_t)];
70
71/**
72 * \brief Global Task State Segment (TSS).
73 *
74 * This is the global, static and only Task State Segment (TSS). It is used
75 * for interrupt and exception handling (stack setup) while in user-space.
76 */
77static struct task_state_segment tss __attribute__ ((aligned (4)));
78
79/**
80 * \brief Global Descriptor Table (GDT) for processor this kernel is running on.
81 *
82 * This descriptor table is completely static, as segments are basically
83 * turned off in 64-bit mode. They map flat-mode code and stack segments for
84 * both kernel- and user-space and the only Task State Segment (TSS).
85 */
86union segment_descriptor gdt[] __attribute__ ((aligned (4))) = {
87    [NULL_SEL] = {   // Null segment
88        .raw = 0
89    },
90    [KCODE_SEL] = {   // Kernel code segment
91        .d = {
92            .lo_limit = 0xffff,
93            .lo_base = 0,
94            .type = 0xa,
95            .system_desc = 1,
96            .privilege_level = SEL_KPL,
97            .present = 1,
98            .hi_limit = 0xf,
99            .available = 0,
100            .long_mode = 1,
101            .operation_size = 0,
102            .granularity = 1,
103            .hi_base = 0
104        }
105    },
106    [KSTACK_SEL] = {   // Kernel stack segment
107        .d = {
108            .lo_limit = 0xffff,
109            .lo_base = 0,
110            .type = 2,
111            .system_desc = 1,
112            .privilege_level = SEL_KPL,
113            .present = 1,
114            .hi_limit = 0xf,
115            .available = 0,
116            .long_mode = 1,
117            .operation_size = 0,
118            .granularity = 1,
119            .hi_base = 0
120        }
121    },
122    [USTACK_SEL] = {   // User stack segment
123        .d = {
124            .lo_limit = 0xffff,
125            .lo_base = 0,
126            .type = 2,
127            .system_desc = 1,
128            .privilege_level = SEL_UPL,
129            .present = 1,
130            .hi_limit = 0xf,
131            .available = 0,
132            .long_mode = 1,
133            .operation_size = 0,
134            .granularity = 1,
135            .hi_base = 0
136        }
137    },
138    [UCODE_SEL] = {   // User code segment
139        .d = {
140            .lo_limit = 0xffff,
141            .lo_base = 0,
142            .type = 0xa,
143            .system_desc = 1,
144            .privilege_level = SEL_UPL,
145            .present = 1,
146            .hi_limit = 0xf,
147            .available = 0,
148            .long_mode = 1,
149            .operation_size = 0,
150            .granularity = 1,
151            .hi_base = 0
152        }
153    },
154    [TSS_LO_SEL] = {   // Global Task State Segment (TSS), lower 8 bytes
155        .sys_lo = {
156            .lo_limit = sizeof(tss) & 0xffff,
157            .type = SDT_SYSTSS,
158            .privilege_level = SEL_KPL,
159            .present = 1,
160            .hi_limit = (sizeof(tss) >> 16) & 0xf,
161            .available = 0,
162            .granularity = 0,
163        }
164    },
165    [TSS_HI_SEL] = {   // Global Task State Segment (TSS), upper 8 bytes
166        .sys_hi = {
167            .base = 0
168        }
169    },
170    [LDT_LO_SEL] = {    // Local descriptor table (LDT), lower 8 bytes
171        .sys_lo = {
172            .lo_limit = 0, // # 4k pages (since granularity = 1)
173            .lo_base = 0, // changed by context switch path when doing lldt
174            .type = 2, // LDT
175            .privilege_level = SEL_UPL,
176            .present = 1,
177            .hi_limit = 0,
178            .available = 0,
179            .granularity = 1,
180            .hi_base = 0
181        }
182    },
183    [LDT_HI_SEL] = {    // Local descriptor table (LDT), upper 8 bytes
184        .sys_hi = {
185            .base = 0 // changed by context switch path when doing lldt
186        }
187    },
188};
189
190union segment_descriptor *ldt_descriptor = &gdt[LDT_LO_SEL];
191
192/**
193 * Bootup PML4, used to map both low (identity-mapped) memory and relocated
194 * memory at the same time.
195 */
196static union x86_64_pdir_entry boot_pml4[PTABLE_SIZE]
197__attribute__ ((aligned(BASE_PAGE_SIZE)));
198
199/**
200 * Bootup low-map PDPT and hi-map PDPT.
201 */
202static union x86_64_pdir_entry boot_pdpt[PTABLE_SIZE]
203__attribute__ ((aligned(BASE_PAGE_SIZE))),
204    boot_pdpt_hi[PTABLE_SIZE] __attribute__ ((aligned(BASE_PAGE_SIZE)));
205
206/**
207 * Bootup low-map PDIR, hi-map PDIR, and 1GB PDIR.
208 */
209static union x86_64_ptable_entry boot_pdir[PTABLE_SIZE]
210__attribute__ ((aligned(BASE_PAGE_SIZE)));
211
212static union x86_64_ptable_entry boot_pdir_hi[PTABLE_SIZE]
213__attribute__ ((aligned(BASE_PAGE_SIZE)));
214
215static union x86_64_ptable_entry boot_pdir_1GB[PTABLE_SIZE]
216__attribute__ ((aligned(BASE_PAGE_SIZE)));
217
218static union x86_64_ptable_entry boot_pdir_mmio[PTABLE_SIZE]
219__attribute__ ((aligned(BASE_PAGE_SIZE)));
220
221/**
222 * This flag is set to true once the IDT is initialized and exceptions can be
223 * caught.
224 */
225bool idt_initialized = false;
226
227/**
228 * \brief Setup bootup page table.
229 *
230 * This function sets up the page table needed to boot the kernel
231 * proper.  The table identity maps the first 1 GByte of physical
232 * memory in order to have access to various data structures and the
233 * first MByte containing bootloader-passed data structures. It also
234 * identity maps the local copy of the kernel in low memory and
235 * aliases it in kernel address space.
236 *
237 * \param base  Start address of kernel image in physical address space.
238 * \param size  Size of kernel image.
239 */
240static void paging_init(lpaddr_t base, size_t size)
241{
242    lvaddr_t vbase = local_phys_to_mem(base);
243
244    // Align base to kernel page size
245    if(base & X86_64_MEM_PAGE_MASK) {
246        size += base & X86_64_MEM_PAGE_MASK;
247        base -= base & X86_64_MEM_PAGE_MASK;
248    }
249
250    // Align vbase to kernel page size
251    if(vbase & X86_64_MEM_PAGE_MASK) {
252        vbase -= vbase & X86_64_MEM_PAGE_MASK;
253    }
254
255    // Align size to kernel page size
256    if(size & X86_64_MEM_PAGE_MASK) {
257        size += X86_64_MEM_PAGE_SIZE - (size & X86_64_MEM_PAGE_MASK);
258    }
259
260    // XXX: Cannot currently map more than one table of pages
261    assert(size <= X86_64_MEM_PAGE_SIZE * X86_64_PTABLE_SIZE);
262/*     assert(size <= MEM_PAGE_SIZE); */
263
264    for(size_t i = 0; i < size; i += X86_64_MEM_PAGE_SIZE,
265            base += X86_64_MEM_PAGE_SIZE, vbase += X86_64_MEM_PAGE_SIZE) {
266        // No kernel image above 4 GByte
267        assert(base < ((lpaddr_t)4 << 30));
268
269        // Identity-map the kernel's physical region, so we don't lose ground
270        paging_x86_64_map_table(&boot_pml4[X86_64_PML4_BASE(base)], (lpaddr_t)boot_pdpt);
271        paging_x86_64_map_table(&boot_pdpt[X86_64_PDPT_BASE(base)], (lpaddr_t)boot_pdir);
272        paging_x86_64_map_large(&boot_pdir[X86_64_PDIR_BASE(base)], base, PTABLE_PRESENT
273                                | PTABLE_READ_WRITE | PTABLE_USER_SUPERVISOR);
274
275        // Alias the same region at MEMORY_OFFSET
276        paging_x86_64_map_table(&boot_pml4[X86_64_PML4_BASE(vbase)], (lpaddr_t)boot_pdpt_hi);
277        paging_x86_64_map_table(&boot_pdpt_hi[X86_64_PDPT_BASE(vbase)], (lpaddr_t)boot_pdir_hi);
278        paging_x86_64_map_large(&boot_pdir_hi[X86_64_PDIR_BASE(vbase)], base, PTABLE_PRESENT
279                                | PTABLE_READ_WRITE | PTABLE_USER_SUPERVISOR);
280    }
281
282    // Identity-map the first 1G of physical memory for bootloader data
283    paging_x86_64_map_table(&boot_pml4[0], (lpaddr_t)boot_pdpt);
284    paging_x86_64_map_table(&boot_pdpt[0], (lpaddr_t)boot_pdir_1GB);
285    for (int i = 0; i < X86_64_PTABLE_SIZE; i++) {
286        paging_x86_64_map_large(&boot_pdir_1GB[X86_64_PDIR_BASE(X86_64_MEM_PAGE_SIZE * i)],
287                                X86_64_MEM_PAGE_SIZE * i, PTABLE_PRESENT
288                                | PTABLE_READ_WRITE | PTABLE_USER_SUPERVISOR);
289    }
290
291    /*
292     * Identity MAP MMIO Register Region for "Serial Out" support
293     * 0x08007D0000ULL
294     *
295     * PML4[0], PDIR[32]
296     */
297    paging_x86_64_map_table(&boot_pml4[X86_64_PML4_BASE(local_phys_to_mem(XEON_PHI_SBOX_BASE))],
298                          (lpaddr_t) boot_pdpt_hi);
299    paging_x86_64_map_table(&boot_pdpt_hi[X86_64_PDPT_BASE(local_phys_to_mem(XEON_PHI_SBOX_BASE))],
300                          (lpaddr_t) boot_pdir_mmio);
301
302    paging_x86_64_map_large(&boot_pdir_mmio[X86_64_PDIR_BASE(local_phys_to_mem(XEON_PHI_SBOX_BASE))],
303                          XEON_PHI_SBOX_BASE,
304                          PTABLE_PRESENT | PTABLE_READ_WRITE | PTABLE_USER_SUPERVISOR
305                          | PTABLE_CACHE_DISABLED);
306
307    // Activate new page tables
308    paging_x86_64_context_switch((lpaddr_t)boot_pml4);
309}
310
311/**
312 * \brief Setup default GDT.
313 *
314 * Loads the GDT register with the default GDT and reloads CS and SS
315 * to point to the new entries. Resets all other segment registers to null.
316 * Finally, completes setup of GDT to include TSS base address mapping and
317 * loads TSS into task register.
318 */
319static void gdt_reset(void)
320{
321    lvaddr_t                     ptss = (lvaddr_t)&tss;
322    struct region_descriptor    region = {
323        .rd_limit = sizeof(gdt),
324        .rd_base = (uint64_t)&gdt
325    };
326
327    // Load default GDT
328    __asm volatile("lgdt %[region]" :: [region] "m" (region));
329
330    // Reload segments
331    __asm volatile("mov %[null], %%ds      \n\t"
332                   "mov %[null], %%es      \n\t"
333                   "mov %[ss], %%ss        \n\t"
334                   "mov %[null], %%gs      \n\t"
335                   "mov %[null], %%fs      \n\t"
336                   "pushq %[cs]            \n\t"          // new CS
337                   "lea 1f(%%rip), %%rax   \n\t"          // jumps to after lret
338                   "pushq %%rax            \n\t"          // new IP
339                   "lretq                  \n\t"          // fake return
340                   "1:                     \n\t"          // we'll continue here
341                   : /* No Output */
342                   :
343                   [null] "r" (0),
344                   [ss] "r" (GSEL(KSTACK_SEL, SEL_KPL)),
345                   [cs] "i" (GSEL(KCODE_SEL, SEL_KPL))
346                   : "rax"
347                   );
348
349    // Complete setup of TSS descriptor (by inserting base address of TSS)
350    gdt[TSS_LO_SEL].sys_lo.lo_base = ptss & 0xffffff;
351    gdt[TSS_LO_SEL].sys_lo.hi_base = (ptss >> 24) & 0xff;
352    gdt[TSS_HI_SEL].sys_hi.base = ptss >> 32;
353
354    // Complete setup of TSS
355    tss.rsp[0] = (lvaddr_t) &k1om_kernel_stack[K1OM_KERNEL_STACK_SIZE
356                    / sizeof(uintptr_t)];
357
358    // Load task state register
359    __asm volatile("ltr %%ax" :: "a" (GSEL(TSS_LO_SEL, SEL_KPL)));
360}
361
362/**
363 * \brief Relocates the active stack.
364 *
365 * This function relocates the stack, by adding 'offset' to the stack
366 * pointer.
367 *
368 * \param offset        Offset to add to the stack pointer.
369 */
370static inline void __attribute__ ((always_inline))
371relocate_stack(lvaddr_t offset)
372{
373    __asm volatile("add %[stack], %%rsp\n\t"
374                   : /* No output */
375                   : [stack] "er" (offset)
376                   : "rsp"
377                   );
378}
379
380/**
381 * \brief Enable SYSCALL/SYSRET fast system calls.
382 *
383 * This function enables the SYSCALL/SYSRET pair of fast system calls in
384 * long mode. Also sets the IA32_STAR and IA32_FMASK MSRs to point to the
385 * user-space base selector and RFLAGS mask for SYSCALL/SYSRET fast system
386 * calls.
387 */
388static inline void enable_fast_syscalls(void)
389{
390    // Segment selector bases for both kernel- and user-space for fast
391    // system calls
392    ia32_star_t star = ia32_star_rd(NULL);
393    star = ia32_star_call_insert(star, GSEL(KCODE_SEL,  SEL_KPL));
394    star = ia32_star_ret_insert( star, GSEL(KSTACK_SEL, SEL_UPL));
395    ia32_star_wr(NULL, star);
396
397    // Set ia32_lstar MSR to point to kernel-space system call multiplexer
398    ia32_lstar_wr(NULL, (lvaddr_t)syscall_entry);
399
400    // Set IA32_FMASK MSR for our OSes EFLAGS mask
401    // We mask out everything (including interrupts).
402    ia32_fmask_v_wrf(NULL, ~(RFLAGS_ALWAYS1) );
403
404    // Enable fast system calls
405    ia32_efer_sce_wrf(NULL, 1);
406}
407
408static inline void enable_tlb_flush_filter(void)
409{
410    uint32_t eax, ebx, ecx, edx;
411
412    // Must read "AuthenticAMD"
413    cpuid(0, &eax, &ebx, &ecx, &edx);
414    if(ebx != 0x68747541 || ecx != 0x444d4163 || edx != 0x69746e65) {
415        return;
416    }
417
418    // Is at least family 0fh?
419    cpuid(1, &eax, &ebx, &ecx, &edx);
420    if(((eax >> 8) & 0xf) != 0xf) {
421        return;
422    }
423
424    debug(SUBSYS_STARTUP, "Enabling TLB flush filter\n");
425    ia32_amd_hwcr_ffdis_wrf(NULL, 1);
426}
427
428/**
429 * \brief Continue kernel initialization in kernel address space.
430 *
431 * This function resets paging to map out low memory and map in physical
432 * address space, relocating all remaining data structures. It resets the
433 * Global Descriptor Table for flat mode and to exclude legacy segments from
434 * boot initialization code. It sets up the IDT for exception and interrupt
435 * handling, initializes the local APIC and enables interrupts. After that it
436 * calls kernel_startup(), which should not return (if it does, this function
437 * halts the kernel).
438 */
439static void  __attribute__ ((noreturn, noinline)) text_init(void)
440{
441    // Reset global and locks to point to the memory in the pristine image
442    global = (struct global*)addr_global;
443
444    kernel_sbox_base_address = local_phys_to_mem(XEON_PHI_SBOX_BASE);
445
446    // re-initialize the console with the relocated address
447    serial_console_init(0);
448    /*
449     * Reset paging once more to use relocated data structures and map in
450     * whole of kernel and available physical memory. Map out low memory.
451     */
452    paging_x86_64_reset();
453
454    // Relocate global to "memory"
455    global = (struct global*)local_phys_to_mem((lpaddr_t)global);
456
457    // Relocate glbl_core_data to "memory"
458    glbl_core_data = (struct x86_core_data *)
459        local_phys_to_mem((lpaddr_t)glbl_core_data);
460
461    /*
462     * We know how much memory we have based on the card model
463     */
464    if (paging_x86_64_map_memory(0, K1OM_PHYSICAL_MEMORY_SIZE) != 0) {
465        panic("error while mapping physical memory!");
466    }
467
468    /*
469     * Also reset the global descriptor table (GDT), so we get
470     * segmentation again and can catch interrupts/exceptions (the IDT
471     * needs the GDT).
472     */
473    gdt_reset();
474
475    // Arch-independent early startup
476    kernel_startup_early();
477
478    // Setup IDT
479    setup_default_idt();
480    idt_initialized = true;
481
482    // initialize the Xeon Phi
483    xeon_phi_init_early();
484
485    // Enable machine check reporting
486    mcheck_init();
487
488    /**
489     * 2.1.8.2.2 Interrupt Handling
490     *
491     * There are three different types of interrupt flows that are supported in
492     * the Intel Xeon Phi coprocessor:
493     *
494     * + Local Interrupts These are the interrupts that are destined for one
495     *   (or more) of the Intel Xeon Phi coprocessor cores located on the
496     *   originating device. They appear in the form of APIC messages on the
497     *   APIC serial bus.
498     * + Remote Interrupts These are the interrupts which are destined for one
499     *   (or more) of the Intel Xeon Phi coprocessor cores in other Intel Xeon
500     *   PhiTM coprocessor devices. They appear as MMIO accesses on the PEG port.
501     * + System Interrupts These are the interrupts which are destined for
502     *   the host processor(s). They appear as INTx/MSI/MSI-X messages on the
503     *   PEG port, depending upon the PCI configuration settings.
504     */
505
506    // Initialize local APIC
507    apic_init();
508
509    // do not remove/change this printf: needed by regression harness
510    printf("Barrelfish CPU driver starting on k1om apic_id %u\n", apic_id);
511
512    // Initialize local APIC timer
513    if (kernel_ticks_enabled) {
514        timing_calibrate();
515        apic_timer_init(false, false);
516        apic_timer_set_divide(xapic_by1);
517        kernel_timeslice = ns_to_systime(config_timeslice * 1000000);
518#ifndef CONFIG_ONESHOT_TIMER
519        systime_set_timer(kernel_timeslice);
520#endif
521    } else {
522        printk(LOG_WARN, "APIC timer disabled: NO timeslicing\n");
523        apic_mask_timer();
524    }
525
526    // Initialize IPI notification mechanism
527    ipi_notify_init();
528
529    // Enable SYSCALL/SYSRET fast system calls
530    /*
531     * NOTE: the xeon phi does not support SYSENTER/SYSEXIT
532     */
533    enable_fast_syscalls();
534
535    // Enable "no execute" page-level protection bit
536    ia32_efer_nxe_wrf(NULL, 1);
537
538    // Enable FPU and MMX
539    enable_fpu();
540
541    // Enable user-mode RDPMC opcode
542    amd64_cr4_pce_wrf(NULL, 1);
543
544    // AMD64: Check if TLB flush filter is enabled
545    enable_tlb_flush_filter();
546
547    // Call main kernel startup function -- this should never return
548    kernel_startup();
549
550    halt();
551    // Returning here will crash! -- low pages not mapped anymore!
552}
553
554/**
555 * \brief Architecture-specific initialization function.
556 *
557 * This function is called by the bootup code in boot.S to initialize
558 * architecture-specific stuff. It is expected to call the kernel main
559 * loop. This function never returns.
560 *
561 * The kernel expects one of two magic values in 'magic' that determine how it
562 * has been booted. If 'magic' is #MULTIBOOT_INFO_MAGIC the kernel has been
563 * booted by a (Multiboot-compliant) bootloader and this is the first image on
564 * the boot CPU. It will relocate itself to a default position. If 'magic' is
565 * #KERNEL_BOOT_MAGIC it has been booted by another image of itself and is
566 * running on an (so-called) application CPU.
567 *
568 * This function sets up new page tables to alias the kernel
569 * at #MEMORY_OFFSET. It also does any relocations necessary to the
570 * "position-independent" code to make it run at the new location (e.g.
571 * relocating the GOT). After all relocations, it calls text_init() of
572 * the relocated image, which destroys the lower alias and may never return.
573 *
574 * For bsp kernels, the void pointer is of type multiboot_info, for application
575 * CPUs, it is of type global. Global carries a pointer to multiboot_info.
576 * Global also contains pointers to memory that is shared between kernels.
577 *
578 * \param magic         Boot magic value
579 * \param pointer       Pointer to Multiboot Info or to Global structure
580 */
581void arch_init(uint64_t magic,
582               void *pointer)
583{
584    /* pointer to the boot param struct set up by the boot loader */
585    struct multiboot_info *mb = NULL;
586
587    /* initialize the console port to the host */
588    serial_console_init(0);
589
590    /* notify the host that we are running */
591    notify_host();
592
593    void __attribute__ ((noreturn))
594    (*reloc_text_init)(void) = (void *)local_phys_to_mem((lpaddr_t)text_init);
595
596    /* determine page-aligned physical address past end of multiboot */
597    lvaddr_t dest = (lvaddr_t) &_start_kernel;
598    if (dest & (BASE_PAGE_SIZE - 1)) {
599        dest &= ~(BASE_PAGE_SIZE - 1);
600        dest += BASE_PAGE_SIZE;
601    }
602
603    apic_bsp = magic == K1OM_BOOT_MAGIC;
604
605    // XXX: print kernel address for debugging with gdb
606    printf("Kernel starting at address 0x%"PRIxLVADDR"\n", local_phys_to_mem(dest));
607
608    /*
609     * If this is the boot image, make Multiboot information structure globally
610     * known. Otherwise the passed value should equal the original structure.
611     * If magic value does not match what we expect, we cannot proceed safely.
612     */
613    switch (magic) {
614        case KERNEL_BOOT_MAGIC:
615            /* kernel is started by another kernel */
616            global = (struct global*) pointer;
617            // Store the address of global to retrive it across relocation
618            addr_global = (uint64_t) global;
619            break;
620
621        case K1OM_BOOT_MAGIC:
622            /* kernel is started by the K1OM boot loader */
623            mb = (struct multiboot_info *) pointer;
624
625            printf("Barrelfish from weever: MBI: 0x%"PRIxLVADDR", Xeon Phi: [%u]\n",
626                   (lpaddr_t) pointer,
627                   mb->xeon_phi_id);
628
629            struct xeon_phi_boot_params *bp;
630            bp = (struct xeon_phi_boot_params *) (uintptr_t) mb->config_table;
631
632            /*
633             * XXX: The multiboot structure when invoked from the xloader will
634             *      contain additional information.
635             *
636             *      CMDLINE:    the cmd line as set by the host OS
637             *      MEM_LOWER:  the start of the multiboot image
638             *      MEM_UPPER:  the end of the multiboot image
639             */
640            glbl_core_data->bp = (struct xeon_phi_boot_params *) local_phys_to_mem(mb
641                            ->config_table);
642
643            // Construct the global structure and store its address to retrieve it
644            // across relocation
645            memset(&global->locks, 0, sizeof(global->locks));
646            addr_global = (uint64_t) global;
647
648            break;
649
650        default:
651            addr_global = (uint64_t) global;
652            break;
653    }
654
655    struct Elf64_Shdr *rela, *symtab;
656    struct x86_coredata_elf *elf;
657    uint32_t multiboot_flags;
658    if (mb != NULL) { /* Multiboot info was passed */
659        multiboot_flags = mb->flags;
660        elf = (struct x86_coredata_elf *)&mb->syms.elf;
661
662        // We need the ELF section header table for relocation
663        if (!(multiboot_flags & MULTIBOOT_INFO_FLAG_HAS_ELF_SYMS)) {
664            panic("Multiboot information structure does not include ELF section"
665                  "header information -- Relocation impossible!");
666        }
667
668        // Determine where free RAM starts
669        glbl_core_data->start_free_ram =
670            ROUND_UP(max(multiboot_end_addr(mb), (uintptr_t)&_end_kernel),
671                     BASE_PAGE_SIZE);
672        printf("Start Free RAM at 0x%x (%i MB)\n",
673               glbl_core_data->start_free_ram,
674               glbl_core_data->start_free_ram >> 20);
675
676        glbl_core_data->mods_addr = mb->mods_addr;
677        glbl_core_data->mods_count = mb->mods_count;
678        glbl_core_data->cmdline = mb->cmdline;
679        glbl_core_data->mmap_length = mb->mmap_length;
680        glbl_core_data->mmap_addr = mb->mmap_addr;
681        glbl_core_data->xeon_phi_id = mb->xeon_phi_id;
682    } else {
683        /* No multiboot info, use the core_data struct */
684        struct x86_core_data *core_data =
685            (struct x86_core_data*)(dest - BASE_PAGE_SIZE);
686        multiboot_flags = core_data->multiboot_flags;
687        elf = &core_data->elf;
688        glbl_core_data = core_data;
689        core_data->cmdline = (lpaddr_t)&core_data->kernel_cmdline;
690        my_core_id = core_data->dst_core_id;
691
692        kcb_current = (struct kcb*) glbl_core_data->kcb;
693        if (core_data->module_end > 4ul * (1ul << 30)) {
694            panic("The cpu module is outside the initial 4GB mapping."
695                  " Either move the module or increase initial mapping.");
696        }
697    }
698
699    // We're only able to process Elf64_Rela entries
700    if (elf->size != sizeof(struct Elf64_Shdr)) {
701        panic("ELF section header entry size mismatch!");
702    }
703
704    // Find relocation section
705    rela = elf64_find_section_header_type((struct Elf64_Shdr *)
706                                          (lpaddr_t)elf->addr,
707                                          elf->num, SHT_RELA);
708    if (rela == NULL) {
709        panic("Kernel image does not include relocation section!");
710    }
711
712    // Find symbol table section
713    symtab = elf64_find_section_header_type((struct Elf64_Shdr *)
714                                            (lpaddr_t)elf->addr,
715                                            elf->num, SHT_DYNSYM);
716    if (symtab == NULL) {
717        panic("Kernel image does not include symbol table!");
718    }
719
720    // Alias kernel on top of memory, keep low memory
721    paging_init((lpaddr_t)&_start_kernel, SIZE_KERNEL_IMAGE);
722
723    // Relocate kernel image for top of memory
724    elf64_relocate(K1OM_MEMORY_OFFSET + (lvaddr_t)&_start_kernel,
725                   (lvaddr_t)&_start_kernel,
726                   (struct Elf64_Rela *)(rela->sh_addr - K1OM_START_KERNEL_PHYS + &_start_kernel),
727                   rela->sh_size,
728                   (struct Elf64_Sym *)(symtab->sh_addr - K1OM_START_KERNEL_PHYS + &_start_kernel),
729                   symtab->sh_size,
730                   K1OM_START_KERNEL_PHYS, &_start_kernel);
731    /*** Aliased kernel available now -- low memory still mapped ***/
732
733    // Relocate stack to aliased location
734    relocate_stack(K1OM_MEMORY_OFFSET);
735
736    // Call aliased text_init() function and continue initialization
737    reloc_text_init();
738}
739