1/*
2 * Copyright 2014, General Dynamics C4 Systems
3 *
4 * This software may be distributed and modified according to the terms of
5 * the GNU General Public License version 2. Note that NO WARRANTY is provided.
6 * See "LICENSE_GPLv2.txt" for details.
7 *
8 * @TAG(GD_GPL)
9 */
10
11#include <config.h>
12#include <util.h>
13#include <machine/io.h>
14#include <arch/machine.h>
15#include <arch/kernel/apic.h>
16#include <arch/kernel/cmdline.h>
17#include <arch/kernel/boot.h>
18#include <arch/kernel/boot_sys.h>
19#include <arch/kernel/smp_sys.h>
20#include <arch/kernel/vspace.h>
21#include <arch/kernel/elf.h>
22#include <smp/lock.h>
23#include <linker.h>
24#include <plat/machine/acpi.h>
25#include <plat/machine/devices.h>
26#include <plat/machine/pic.h>
27#include <plat/machine/ioapic.h>
28#include <arch/api/bootinfo_types.h>
29
30/* addresses defined in linker script */
31/* need a fake array to get the pointer from the linker script */
32
33/* start/end of CPU boot code */
34extern char boot_cpu_start[1];
35extern char boot_cpu_end[1];
36
37/* start/end of boot stack */
38extern char boot_stack_bottom[1];
39extern char boot_stack_top[1];
40
41/* locations in kernel image */
42extern char ki_boot_end[1];
43extern char ki_end[1];
44extern char ki_skim_start[1];
45extern char ki_skim_end[1];
46
47#ifdef CONFIG_PRINTING
48/* kernel entry point */
49extern char _start[1];
50#endif
51
52/* constants */
53
54#define HIGHMEM_PADDR 0x100000
55
56/* type definitions (directly corresponding to abstract specification) */
57
58typedef struct boot_state {
59    p_region_t   avail_p_reg; /* region of available physical memory on platform */
60    p_region_t   ki_p_reg;    /* region where the kernel image is in */
61    ui_info_t    ui_info;     /* info about userland images */
62    uint32_t     num_ioapic;  /* number of IOAPICs detected */
63    paddr_t      ioapic_paddr[CONFIG_MAX_NUM_IOAPIC];
64    uint32_t     num_drhu; /* number of IOMMUs */
65    paddr_t      drhu_list[MAX_NUM_DRHU]; /* list of physical addresses of the IOMMUs */
66    acpi_rmrr_list_t rmrr_list;
67    acpi_rsdp_t  acpi_rsdp; /* copy of the rsdp */
68    paddr_t      mods_end_paddr; /* physical address where boot modules end */
69    paddr_t      boot_module_start; /* physical address of first boot module */
70    uint32_t     num_cpus;    /* number of detected cpus */
71    uint32_t     mem_lower;   /* lower memory size for boot code of APs to run in real mode */
72    cpu_id_t     cpus[CONFIG_MAX_NUM_NODES];
73    mem_p_regs_t mem_p_regs;  /* physical memory regions */
74    seL4_X86_BootInfo_VBE vbe_info; /* Potential VBE information from multiboot */
75    seL4_X86_BootInfo_mmap_t mb_mmap_info; /* memory map information from multiboot */
76    seL4_X86_BootInfo_fb_t fb_info; /* framebuffer information as set by bootloader */
77} boot_state_t;
78
79BOOT_BSS
80boot_state_t boot_state;
81
82/* global variables (not covered by abstract specification) */
83
84BOOT_BSS
85cmdline_opt_t cmdline_opt;
86
87/* check the module occupies in a contiguous physical memory region */
88BOOT_CODE static bool_t
89module_paddr_region_valid(paddr_t pa_start, paddr_t pa_end)
90{
91    int i = 0;
92    for (i = 0; i < boot_state.mem_p_regs.count; i++) {
93        paddr_t start = boot_state.mem_p_regs.list[i].start;
94        paddr_t end = boot_state.mem_p_regs.list[i].end;
95        if (pa_start >= start && pa_end < end) {
96            return true;
97        }
98    }
99    return false;
100}
101
102/* functions not modeled in abstract specification */
103
104BOOT_CODE static paddr_t
105find_load_paddr(paddr_t min_paddr, word_t image_size)
106{
107    int i;
108
109    for (i = 0; i < boot_state.mem_p_regs.count; i++) {
110        paddr_t start = MAX(min_paddr, boot_state.mem_p_regs.list[i].start);
111        paddr_t end = boot_state.mem_p_regs.list[i].end;
112        word_t region_size = end - start;
113
114        if (region_size >= image_size) {
115            return start;
116        }
117    }
118
119    return 0;
120}
121
122BOOT_CODE static paddr_t
123load_boot_module(word_t boot_module_start, paddr_t load_paddr)
124{
125    v_region_t v_reg;
126    word_t entry;
127    Elf_Header_t* elf_file = (Elf_Header_t*)boot_module_start;
128
129    if (!elf_checkFile(elf_file)) {
130        printf("Boot module does not contain a valid ELF image\n");
131        return 0;
132    }
133
134    v_reg = elf_getMemoryBounds(elf_file);
135    entry = elf_file->e_entry;
136
137    if (v_reg.end == 0) {
138        printf("ELF image in boot module does not contain any segments\n");
139        return 0;
140    }
141    v_reg.end = ROUND_UP(v_reg.end, PAGE_BITS);
142
143    printf("size=0x%lx v_entry=%p v_start=%p v_end=%p ",
144           v_reg.end - v_reg.start,
145           (void*)entry,
146           (void*)v_reg.start,
147           (void*)v_reg.end
148          );
149
150    if (!IS_ALIGNED(v_reg.start, PAGE_BITS)) {
151        printf("Userland image virtual start address must be 4KB-aligned\n");
152        return 0;
153    }
154    if (v_reg.end + 2 * BIT(PAGE_BITS) > PPTR_USER_TOP) {
155        /* for IPC buffer frame and bootinfo frame, need 2*4K of additional userland virtual memory */
156        printf("Userland image virtual end address too high\n");
157        return 0;
158    }
159    if ((entry < v_reg.start) || (entry >= v_reg.end)) {
160        printf("Userland image entry point does not lie within userland image\n");
161        return 0;
162    }
163
164    load_paddr = find_load_paddr(load_paddr, v_reg.end - v_reg.start);
165    assert(load_paddr);
166
167    /* fill ui_info struct */
168    boot_state.ui_info.pv_offset = load_paddr - v_reg.start;
169    boot_state.ui_info.p_reg.start = load_paddr;
170    load_paddr += v_reg.end - v_reg.start;
171    boot_state.ui_info.p_reg.end = load_paddr;
172    boot_state.ui_info.v_entry = entry;
173
174    printf("p_start=0x%lx p_end=0x%lx\n",
175           boot_state.ui_info.p_reg.start,
176           boot_state.ui_info.p_reg.end
177          );
178
179    if (!module_paddr_region_valid(
180                boot_state.ui_info.p_reg.start,
181                boot_state.ui_info.p_reg.end)) {
182        printf("End of loaded userland image lies outside of usable physical memory\n");
183        return 0;
184    }
185
186    /* initialise all initial userland memory and load potentially sparse ELF image */
187    memzero(
188        (void*)boot_state.ui_info.p_reg.start,
189        boot_state.ui_info.p_reg.end - boot_state.ui_info.p_reg.start
190    );
191    elf_load(elf_file, boot_state.ui_info.pv_offset);
192
193    return load_paddr;
194}
195
196static BOOT_CODE bool_t
197try_boot_sys_node(cpu_id_t cpu_id)
198{
199    p_region_t boot_mem_reuse_p_reg;
200
201    if (!map_kernel_window(
202                boot_state.num_ioapic,
203                boot_state.ioapic_paddr,
204                boot_state.num_drhu,
205                boot_state.drhu_list
206            )) {
207        return false;
208    }
209    setCurrentVSpaceRoot(kpptr_to_paddr(X86_KERNEL_VSPACE_ROOT), 0);
210    /* Sync up the compilers view of the world here to force the PD to actually
211     * be set *right now* instead of delayed */
212    asm volatile("" ::: "memory");
213
214#ifdef CONFIG_KERNEL_SKIM_WINDOW
215    if (!map_skim_window((vptr_t)ki_skim_start, (vptr_t)ki_skim_end)) {
216        return false;
217    }
218#endif
219
220    /* reuse boot code/data memory */
221    boot_mem_reuse_p_reg.start = PADDR_LOAD;
222    boot_mem_reuse_p_reg.end = (paddr_t)ki_boot_end - KERNEL_BASE_OFFSET;
223
224    /* initialise the CPU */
225    if (!init_cpu(config_set(CONFIG_IRQ_IOAPIC) ? 1 : 0)) {
226        return false;
227    }
228
229    /* initialise NDKS and kernel heap */
230    if (!init_sys_state(
231                cpu_id,
232                boot_state.mem_p_regs,
233                boot_state.ui_info,
234                boot_mem_reuse_p_reg,
235                /* parameters below not modeled in abstract specification */
236                boot_state.num_drhu,
237                boot_state.drhu_list,
238                &boot_state.rmrr_list,
239                &boot_state.acpi_rsdp,
240                &boot_state.vbe_info,
241                &boot_state.mb_mmap_info,
242                &boot_state.fb_info
243            )) {
244        return false;
245    }
246
247    return true;
248}
249
250static BOOT_CODE bool_t
251add_mem_p_regs(p_region_t reg)
252{
253    if (reg.end > PADDR_TOP) {
254        reg.end = PADDR_TOP;
255    }
256    if (reg.start > PADDR_TOP) {
257        reg.start = PADDR_TOP;
258    }
259    if (reg.start == reg.end) {
260        /* Return true here as it's not an error for there to exist memory outside the kernel window,
261         * we're just going to ignore it and leave it to be given out as device memory */
262        return true;
263    }
264    if (boot_state.mem_p_regs.count == MAX_NUM_FREEMEM_REG) {
265        printf("Dropping memory region 0x%lx-0x%lx, try increasing MAX_NUM_FREEMEM_REG\n", reg.start, reg.end);
266        return false;
267    }
268    printf("Adding physical memory region 0x%lx-0x%lx\n", reg.start, reg.end);
269    boot_state.mem_p_regs.list[boot_state.mem_p_regs.count] = reg;
270    boot_state.mem_p_regs.count++;
271    return add_allocated_p_region(reg);
272}
273
274/*
275 * the code relies that the GRUB provides correct information
276 * about the actual physical memory regions.
277 */
278static BOOT_CODE bool_t
279parse_mem_map(uint32_t mmap_length, uint32_t mmap_addr)
280{
281    multiboot_mmap_t *mmap = (multiboot_mmap_t *)((word_t)mmap_addr);
282    printf("Parsing GRUB physical memory map\n");
283
284    while ((word_t)mmap < (word_t)(mmap_addr + mmap_length)) {
285        uint64_t mem_start = mmap->base_addr;
286        uint64_t mem_length = mmap->length;
287        uint32_t type = mmap->type;
288        if (mem_start != (uint64_t)(word_t)mem_start) {
289            printf("\tPhysical memory region not addressable\n");
290        } else {
291            printf("\tPhysical Memory Region from %lx size %lx type %d\n", (long)mem_start, (long)mem_length, type);
292            if (type == MULTIBOOT_MMAP_USEABLE_TYPE && mem_start >= HIGHMEM_PADDR) {
293                if (!add_mem_p_regs((p_region_t) {
294                mem_start, mem_start + mem_length
295            })) {
296                    return false;
297                }
298            }
299        }
300        mmap++;
301    }
302    return true;
303}
304
305static BOOT_CODE bool_t
306is_compiled_for_microarchitecture(void)
307{
308    word_t microarch_generation = 0;
309    x86_cpu_identity_t *model_info = x86_cpuid_get_model_info();
310
311    if (config_set(CONFIG_ARCH_X86_SKYLAKE) ) {
312        microarch_generation = 7;
313    } else if (config_set(CONFIG_ARCH_X86_BROADWELL) ) {
314        microarch_generation = 6;
315    } else if (config_set(CONFIG_ARCH_X86_HASWELL) ) {
316        microarch_generation = 5;
317    } else if (config_set(CONFIG_ARCH_X86_IVY) ) {
318        microarch_generation = 4;
319    } else if (config_set(CONFIG_ARCH_X86_SANDY) ) {
320        microarch_generation = 3;
321    } else if (config_set(CONFIG_ARCH_X86_WESTMERE) ) {
322        microarch_generation = 2;
323    } else if (config_set(CONFIG_ARCH_X86_NEHALEM) ) {
324        microarch_generation = 1;
325    }
326
327    switch (model_info->model) {
328    case SKYLAKE_1_MODEL_ID:
329    case SKYLAKE_2_MODEL_ID:
330        if (microarch_generation > 7) {
331            return false;
332        }
333        break;
334
335    case BROADWELL_1_MODEL_ID:
336    case BROADWELL_2_MODEL_ID:
337    case BROADWELL_3_MODEL_ID:
338    case BROADWELL_4_MODEL_ID:
339    case BROADWELL_5_MODEL_ID:
340        if (microarch_generation > 6) {
341            return false;
342        }
343        break;
344
345    case HASWELL_1_MODEL_ID:
346    case HASWELL_2_MODEL_ID:
347    case HASWELL_3_MODEL_ID:
348    case HASWELL_4_MODEL_ID:
349        if (microarch_generation > 5) {
350            return false;
351        }
352        break;
353
354    case IVY_BRIDGE_1_MODEL_ID:
355    case IVY_BRIDGE_2_MODEL_ID:
356    case IVY_BRIDGE_3_MODEL_ID:
357        if (microarch_generation > 4) {
358            return false;
359        }
360        break;
361
362    case SANDY_BRIDGE_1_MODEL_ID:
363    case SANDY_BRIDGE_2_MODEL_ID:
364        if (microarch_generation > 3) {
365            return false;
366        }
367        break;
368
369    case WESTMERE_1_MODEL_ID:
370    case WESTMERE_2_MODEL_ID:
371    case WESTMERE_3_MODEL_ID:
372        if (microarch_generation > 2) {
373            return false;
374        }
375        break;
376
377    case NEHALEM_1_MODEL_ID:
378    case NEHALEM_2_MODEL_ID:
379    case NEHALEM_3_MODEL_ID:
380        if (microarch_generation > 1) {
381            return false;
382        }
383        break;
384
385    default:
386        if (!config_set(CONFIG_ARCH_X86_GENERIC)) {
387            return false;
388        }
389    }
390
391    return true;
392}
393
394static BOOT_CODE bool_t
395try_boot_sys(void)
396{
397    paddr_t mods_end_paddr = boot_state.mods_end_paddr;
398    p_region_t ui_p_regs;
399    paddr_t load_paddr;
400
401    boot_state.ki_p_reg.start = PADDR_LOAD;
402    boot_state.ki_p_reg.end = kpptr_to_paddr(ki_end);
403
404    if (!x86_cpuid_initialize()) {
405        printf("Warning: Your x86 CPU has an unsupported vendor, '%s'.\n"
406               "\tYour setup may not be able to competently run seL4 as "
407               "intended.\n"
408               "\tCurrently supported x86 vendors are AMD and Intel.\n",
409               x86_cpuid_get_identity()->vendor_string);
410    }
411
412    if (!is_compiled_for_microarchitecture()) {
413        printf("Warning: Your kernel was not compiled for the current microarchitecture.\n");
414    }
415
416    cpuid_007h_edx_t edx;
417    edx.words[0] = x86_cpuid_edx(0x7, 0);
418    /* see if we can definitively say whether or not we need the skim window by
419     * checking whether the CPU is vulnerable to rogue data cache loads (rdcl) */
420    if (cpuid_007h_edx_get_ia32_arch_cap_msr(edx)) {
421        ia32_arch_capabilities_msr_t cap_msr;
422        cap_msr.words[0] = x86_rdmsr(IA32_ARCH_CAPABILITIES_MSR);
423        if (ia32_arch_capabilities_msr_get_rdcl_no(cap_msr) && config_set(CONFIG_KERNEL_SKIM_WINDOW)) {
424            printf("CPU reports not vulnerable to Rogue Data Cache Load (aka Meltdown https://meltdownattack.com) "
425                   "yet SKIM window is enabled. Performance is needlessly being impacted, consider disabling.\n");
426        } else if (!ia32_arch_capabilities_msr_get_rdcl_no(cap_msr) && !config_set(CONFIG_KERNEL_SKIM_WINDOW)) {
427            printf("CPU reports vulernable to Rogue Data Cache Load (aka Meltdown https://meltdownattack.com) "
428                   "yet SKIM window is *not* enabled. Please re-build with SKIM window enabled.");
429            return false;
430        }
431    } else {
432        /* hardware doesn't tell us directly so guess based on CPU vendor */
433        if (config_set(CONFIG_KERNEL_SKIM_WINDOW) && x86_cpuid_get_identity()->vendor == X86_VENDOR_AMD) {
434            printf("SKIM window for mitigating Meltdown (https://www.meltdownattack.com) "
435                   "not necessary for AMD and performance is being needlessly affected, "
436                   "consider disabling\n");
437        }
438        if (!config_set(CONFIG_KERNEL_SKIM_WINDOW) && x86_cpuid_get_identity()->vendor == X86_VENDOR_INTEL) {
439            printf("***WARNING*** SKIM window not enabled, this machine is probably vulernable "
440                   "to Meltdown (https://www.meltdownattack.com), consider enabling\n");
441        }
442    }
443
444#ifdef ENABLE_SMP_SUPPORT
445    /* copy boot code for APs to lower memory to run in real mode */
446    if (!copy_boot_code_aps(boot_state.mem_lower)) {
447        return false;
448    }
449    /* Initialize any kernel TLS */
450    mode_init_tls(0);
451#endif /* ENABLE_SMP_SUPPORT */
452
453    printf("Kernel loaded to: start=0x%lx end=0x%lx size=0x%lx entry=0x%lx\n",
454           boot_state.ki_p_reg.start,
455           boot_state.ki_p_reg.end,
456           boot_state.ki_p_reg.end - boot_state.ki_p_reg.start,
457           (paddr_t)_start
458          );
459
460    /* remapping legacy IRQs to their correct vectors */
461    pic_remap_irqs(IRQ_INT_OFFSET);
462    if (config_set(CONFIG_IRQ_IOAPIC)) {
463        /* Disable the PIC so that it does not generate any interrupts. We need to
464         * do this *before* we initialize the apic */
465        pic_disable();
466    }
467
468    /* validate the ACPI table */
469    if (!acpi_validate_rsdp(&boot_state.acpi_rsdp)) {
470        return false;
471    }
472
473    /* check if kernel configuration matches platform requirments */
474    if (!acpi_fadt_scan(&boot_state.acpi_rsdp)) {
475        return false;
476    }
477
478    if (!config_set(CONFIG_IOMMU) || cmdline_opt.disable_iommu) {
479        boot_state.num_drhu = 0;
480    } else {
481        /* query available IOMMUs from ACPI */
482        acpi_dmar_scan(
483            &boot_state.acpi_rsdp,
484            boot_state.drhu_list,
485            &boot_state.num_drhu,
486            MAX_NUM_DRHU,
487            &boot_state.rmrr_list
488        );
489    }
490
491    /* query available CPUs from ACPI */
492    boot_state.num_cpus = acpi_madt_scan(&boot_state.acpi_rsdp, boot_state.cpus, &boot_state.num_ioapic, boot_state.ioapic_paddr);
493    if (boot_state.num_cpus == 0) {
494        printf("No CPUs detected\n");
495        return false;
496    }
497
498    if (config_set(CONFIG_IRQ_IOAPIC)) {
499        if (boot_state.num_ioapic == 0) {
500            printf("No IOAPICs detected\n");
501            return false;
502        }
503    } else {
504        if (boot_state.num_ioapic > 0) {
505            printf("Detected %d IOAPICs, but configured to use PIC instead\n", boot_state.num_ioapic);
506        }
507    }
508
509    mods_end_paddr = ROUND_UP(mods_end_paddr, PAGE_BITS);
510    assert(mods_end_paddr > boot_state.ki_p_reg.end);
511
512    printf("ELF-loading userland images from boot modules:\n");
513    load_paddr = mods_end_paddr;
514
515    load_paddr = load_boot_module(boot_state.boot_module_start, load_paddr);
516    if (!load_paddr) {
517        return false;
518    }
519
520    /* calculate final location of userland images */
521    ui_p_regs.start = boot_state.ki_p_reg.end;
522    ui_p_regs.end = ui_p_regs.start + load_paddr - mods_end_paddr;
523
524    printf(
525        "Moving loaded userland images to final location: from=0x%lx to=0x%lx size=0x%lx\n",
526        mods_end_paddr,
527        ui_p_regs.start,
528        ui_p_regs.end - ui_p_regs.start
529    );
530    memcpy((void*)ui_p_regs.start, (void*)mods_end_paddr, ui_p_regs.end - ui_p_regs.start);
531
532    /* adjust p_reg and pv_offset to final load address */
533    boot_state.ui_info.p_reg.start -= mods_end_paddr - ui_p_regs.start;
534    boot_state.ui_info.p_reg.end   -= mods_end_paddr - ui_p_regs.start;
535    boot_state.ui_info.pv_offset   -= mods_end_paddr - ui_p_regs.start;
536
537    /* ==== following code corresponds to abstract specification after "select" ==== */
538
539    if (!platAddDevices()) {
540        return false;
541    }
542
543    /* Total number of cores we intend to boot */
544    ksNumCPUs = boot_state.num_cpus;
545
546    printf("Starting node #0 with APIC ID %lu\n", boot_state.cpus[0]);
547    if (!try_boot_sys_node(boot_state.cpus[0])) {
548        return false;
549    }
550
551    if (config_set(CONFIG_IRQ_IOAPIC)) {
552        ioapic_init(1, boot_state.cpus, boot_state.num_ioapic);
553    }
554
555    /* initialize BKL before booting up APs */
556    SMP_COND_STATEMENT(clh_lock_init());
557    SMP_COND_STATEMENT(start_boot_aps());
558
559    /* grab BKL before leaving the kernel */
560    NODE_LOCK_SYS;
561
562    printf("Booting all finished, dropped to user space\n");
563
564    return true;
565}
566
567static BOOT_CODE bool_t
568try_boot_sys_mbi1(
569    multiboot_info_t* mbi
570)
571{
572    word_t i;
573    multiboot_module_t *modules = (multiboot_module_t*)(word_t)mbi->part1.mod_list;
574
575    cmdline_parse((const char *)(word_t)mbi->part1.cmdline, &cmdline_opt);
576
577    if ((mbi->part1.flags & MULTIBOOT_INFO_MEM_FLAG) == 0) {
578        printf("Boot loader did not provide information about physical memory size\n");
579        return false;
580    }
581
582    if (!(mbi->part1.flags & MULTIBOOT_INFO_MODS_FLAG)) {
583        printf("Boot loader did not provide information about boot modules\n");
584        return false;
585    }
586
587    printf("Detected %d boot module(s):\n", mbi->part1.mod_count);
588
589    if (mbi->part1.mod_count < 1) {
590        printf("Expect at least one boot module (containing a userland image)\n");
591        return false;
592    }
593
594    for (i = 0; i < mbi->part1.mod_count; i++) {
595        printf(
596            "  module #%ld: start=0x%x end=0x%x size=0x%x name='%s'\n",
597            i,
598            modules[i].start,
599            modules[i].end,
600            modules[i].end - modules[i].start,
601            (char *) (long)modules[i].name
602        );
603        if ((sword_t)(modules[i].end - modules[i].start) <= 0) {
604            printf("Invalid boot module size! Possible cause: boot module file not found by QEMU\n");
605            return false;
606        }
607        if (boot_state.mods_end_paddr < modules[i].end) {
608            boot_state.mods_end_paddr = modules[i].end;
609        }
610    }
611
612    /* initialize the memory. We track two kinds of memory regions. Physical memory
613     * that we will use for the kernel, and physical memory regions that we must
614     * not give to the user. Memory regions that must not be given to the user
615     * include all the physical memory in the kernel window, but also includes any
616     * important or kernel devices. */
617    boot_state.mem_p_regs.count = 0;
618    init_allocated_p_regions();
619    if (mbi->part1.flags & MULTIBOOT_INFO_MMAP_FLAG) {
620        if (!parse_mem_map(mbi->part2.mmap_length, mbi->part2.mmap_addr)) {
621            return false;
622        }
623        uint32_t multiboot_mmap_length = mbi->part2.mmap_length;
624        if (multiboot_mmap_length > (SEL4_MULTIBOOT_MAX_MMAP_ENTRIES * sizeof(seL4_X86_mb_mmap_t))) {
625            multiboot_mmap_length = SEL4_MULTIBOOT_MAX_MMAP_ENTRIES * sizeof(seL4_X86_mb_mmap_t);
626            printf("Warning: Multiboot has reported more memory map entries, %zd, "
627                   "than the max amount that will be passed in the bootinfo, %d. "
628                   "These extra regions will still be turned into untyped caps.",
629                   multiboot_mmap_length / sizeof(seL4_X86_mb_mmap_t), SEL4_MULTIBOOT_MAX_MMAP_ENTRIES);
630        }
631        memcpy(&boot_state.mb_mmap_info.mmap, (void*)(word_t)mbi->part2.mmap_addr, multiboot_mmap_length);
632        boot_state.mb_mmap_info.mmap_length = multiboot_mmap_length;
633    } else {
634        /* calculate memory the old way */
635        p_region_t avail;
636        avail.start = HIGHMEM_PADDR;
637        avail.end = ROUND_DOWN(avail.start + (mbi->part1.mem_upper << 10), PAGE_BITS);
638        if (!add_mem_p_regs(avail)) {
639            return false;
640        }
641    }
642
643    /* copy VESA information from multiboot header */
644    if ((mbi->part1.flags & MULTIBOOT_INFO_GRAPHICS_FLAG) == 0) {
645        boot_state.vbe_info.vbeMode = -1;
646        printf("Multiboot gave us no video information\n");
647    } else {
648        boot_state.vbe_info.vbeInfoBlock = *(seL4_VBEInfoBlock_t*)(seL4_Word)mbi->part2.vbe_control_info;
649        boot_state.vbe_info.vbeModeInfoBlock = *(seL4_VBEModeInfoBlock_t*)(seL4_Word)mbi->part2.vbe_mode_info;
650        boot_state.vbe_info.vbeMode = mbi->part2.vbe_mode;
651        printf("Got VBE info in multiboot. Current video mode is %d\n", mbi->part2.vbe_mode);
652        boot_state.vbe_info.vbeInterfaceSeg = mbi->part2.vbe_interface_seg;
653        boot_state.vbe_info.vbeInterfaceOff = mbi->part2.vbe_interface_off;
654        boot_state.vbe_info.vbeInterfaceLen = mbi->part2.vbe_interface_len;
655    }
656
657    boot_state.mem_lower = mbi->part1.mem_lower;
658    boot_state.boot_module_start = modules->start;
659
660    /* Initialize ACPI */
661    if (!acpi_init(&boot_state.acpi_rsdp)) {
662        return false;
663    }
664
665    return true;
666}
667
668static BOOT_CODE bool_t
669try_boot_sys_mbi2(
670    multiboot2_header_t* mbi2
671)
672{
673    int mod_count                  = 0;
674    multiboot2_tag_t const * tag   = (multiboot2_tag_t *)(mbi2 + 1);
675    multiboot2_tag_t const * tag_e = (multiboot2_tag_t *)((word_t)mbi2 + mbi2->total_size);
676
677    /* initialize the memory. We track two kinds of memory regions. Physical memory
678     * that we will use for the kernel, and physical memory regions that we must
679     * not give to the user. Memory regions that must not be given to the user
680     * include all the physical memory in the kernel window, but also includes any
681     * important or kernel devices. */
682    boot_state.mem_p_regs.count = 0;
683    init_allocated_p_regions();
684    boot_state.mb_mmap_info.mmap_length = 0;
685    boot_state.vbe_info.vbeMode = -1;
686
687    while (tag < tag_e && tag->type != MULTIBOOT2_TAG_END) {
688        word_t const behind_tag = (word_t)tag + sizeof(*tag);
689
690        if (tag->type == MULTIBOOT2_TAG_CMDLINE) {
691            char const * const cmdline = (char const * const)(behind_tag);
692            cmdline_parse(cmdline, &cmdline_opt);
693        } else if (tag->type == MULTIBOOT2_TAG_ACPI_1) {
694            if (ACPI_V1_SIZE == tag->size - sizeof(*tag)) {
695                memcpy(&boot_state.acpi_rsdp, (void *)behind_tag, tag->size - sizeof(*tag));
696            }
697        } else if (tag->type == MULTIBOOT2_TAG_ACPI_2) {
698            if (sizeof(boot_state.acpi_rsdp) == tag->size - sizeof(*tag)) {
699                memcpy(&boot_state.acpi_rsdp, (void *)behind_tag, sizeof(boot_state.acpi_rsdp));
700            }
701        } else if (tag->type == MULTIBOOT2_TAG_MODULE) {
702            multiboot2_module_t const * module = (multiboot2_module_t const *)behind_tag;
703            printf(
704                "  module #%d: start=0x%x end=0x%x size=0x%x name='%s'\n",
705                mod_count,
706                module->start,
707                module->end,
708                module->end - module->start,
709                module->string
710            );
711
712            if (mod_count == 0) {
713                boot_state.boot_module_start = module->start;
714            }
715
716            mod_count ++;
717            if ((sword_t)(module->end - module->start) <= 0) {
718                printf("Invalid boot module size! Possible cause: boot module file not found\n");
719                return false;
720            }
721            if (boot_state.mods_end_paddr < module->end) {
722                boot_state.mods_end_paddr = module->end;
723            }
724        } else if (tag->type == MULTIBOOT2_TAG_MEMORY) {
725            multiboot2_memory_t const * s = (multiboot2_memory_t *)(behind_tag + 8);
726            multiboot2_memory_t const * e = (multiboot2_memory_t *)((word_t)tag + tag->size);
727
728            for (multiboot2_memory_t const * m = s; m < e; m++) {
729                if (!m->addr) {
730                    boot_state.mem_lower = m->size;
731                }
732
733                printf("\tPhysical Memory Region from %llx size %llx type %u\n", m->addr, m->size, m->type);
734                if (m->addr != (uint64_t)(word_t)m->addr) {
735                    printf("\t\tPhysical memory region not addressable\n");
736                }
737
738                if (m->type == MULTIBOOT_MMAP_USEABLE_TYPE && m->addr >= HIGHMEM_PADDR) {
739                    if (!add_mem_p_regs((p_region_t) {
740                    m->addr, m->addr + m->size
741                }))
742                    return false;
743                }
744            }
745        } else if (tag->type == MULTIBOOT2_TAG_FB) {
746            multiboot2_fb_t const * fb = (multiboot2_fb_t const *)behind_tag;
747            printf("Got framebuffer info in multiboot2. Current video mode is at physical address=%llx pitch=%u resolution=%ux%u@%u type=%u\n",
748                   fb->addr, fb->pitch, fb->width, fb->height, fb->bpp, fb->type);
749            boot_state.fb_info = *fb;
750        }
751
752        tag = (multiboot2_tag_t const *)((word_t)tag + ROUND_UP(tag->size, 3));
753    }
754
755    printf("Detected %d boot module(s):\n", mod_count);
756
757    if (mod_count < 1) {
758        printf("Expect at least one boot module (containing a userland image)\n");
759        return false;
760    }
761
762    return true;
763}
764
765BOOT_CODE VISIBLE void
766boot_sys(
767    unsigned long multiboot_magic,
768    void* mbi)
769{
770    bool_t result = false;
771
772    if (multiboot_magic == MULTIBOOT_MAGIC) {
773        result = try_boot_sys_mbi1(mbi);
774    } else if (multiboot_magic == MULTIBOOT2_MAGIC) {
775        result = try_boot_sys_mbi2(mbi);
776    } else {
777        printf("Boot loader is not multiboot 1 or 2 compliant %lx\n", multiboot_magic);
778    }
779
780    if (result) {
781        result = try_boot_sys();
782    }
783
784    if (!result) {
785        fail("boot_sys failed for some reason :(\n");
786    }
787
788    ARCH_NODE_STATE(x86KScurInterrupt) = int_invalid;
789    ARCH_NODE_STATE(x86KSPendingInterrupt) = int_invalid;
790
791    schedule();
792    activateThread();
793}
794
795