1/*
2 * Copyright 2014, General Dynamics C4 Systems
3 *
4 * SPDX-License-Identifier: GPL-2.0-only
5 */
6
7#include <config.h>
8#include <util.h>
9#include <hardware.h>
10#include <machine/io.h>
11#include <arch/machine.h>
12#include <arch/kernel/apic.h>
13#include <arch/kernel/cmdline.h>
14#include <arch/kernel/boot.h>
15#include <arch/kernel/boot_sys.h>
16#include <arch/kernel/smp_sys.h>
17#include <arch/kernel/vspace.h>
18#include <arch/kernel/elf.h>
19#include <smp/lock.h>
20#include <linker.h>
21#include <plat/machine/acpi.h>
22#include <plat/machine/devices.h>
23#include <plat/machine/pic.h>
24#include <plat/machine/ioapic.h>
25#include <sel4/arch/bootinfo_types.h>
26
27/* addresses defined in linker script */
28/* need a fake array to get the pointer from the linker script */
29
30/* start/end of CPU boot code */
31extern char boot_cpu_start[1];
32extern char boot_cpu_end[1];
33
34/* start/end of boot stack */
35extern char boot_stack_bottom[1];
36extern char boot_stack_top[1];
37
38/* locations in kernel image */
39extern char ki_boot_end[1];
40extern char ki_end[1];
41extern char ki_skim_start[1];
42extern char ki_skim_end[1];
43
44#ifdef CONFIG_PRINTING
45/* kernel entry point */
46extern char _start[1];
47#endif
48
49/* constants */
50
51#define HIGHMEM_PADDR 0x100000
52
53BOOT_BSS
54boot_state_t boot_state;
55
56/* global variables (not covered by abstract specification) */
57
58BOOT_BSS
59cmdline_opt_t cmdline_opt;
60
61/* functions not modeled in abstract specification */
62
63BOOT_CODE static paddr_t find_load_paddr(paddr_t min_paddr, word_t image_size)
64{
65    int i;
66
67    for (i = 0; i < boot_state.mem_p_regs.count; i++) {
68        paddr_t start = MAX(min_paddr, boot_state.mem_p_regs.list[i].start);
69        paddr_t end = boot_state.mem_p_regs.list[i].end;
70        word_t region_size = end - start;
71
72        if (region_size >= image_size) {
73            return start;
74        }
75    }
76
77    return 0;
78}
79
80BOOT_CODE static paddr_t load_boot_module(word_t boot_module_start, paddr_t load_paddr)
81{
82    v_region_t v_reg;
83    word_t entry;
84    Elf_Header_t *elf_file = (Elf_Header_t *)boot_module_start;
85
86    if (!elf_checkFile(elf_file)) {
87        printf("Boot module does not contain a valid ELF image\n");
88        return 0;
89    }
90
91    v_reg = elf_getMemoryBounds(elf_file);
92    entry = elf_file->e_entry;
93
94    if (v_reg.end == 0) {
95        printf("ELF image in boot module does not contain any segments\n");
96        return 0;
97    }
98    v_reg.end = ROUND_UP(v_reg.end, PAGE_BITS);
99
100    printf("size=0x%lx v_entry=%p v_start=%p v_end=%p ",
101           v_reg.end - v_reg.start,
102           (void *)entry,
103           (void *)v_reg.start,
104           (void *)v_reg.end
105          );
106
107    if (!IS_ALIGNED(v_reg.start, PAGE_BITS)) {
108        printf("Userland image virtual start address must be 4KB-aligned\n");
109        return 0;
110    }
111    if (v_reg.end + 2 * BIT(PAGE_BITS) > USER_TOP) {
112        /* for IPC buffer frame and bootinfo frame, need 2*4K of additional userland virtual memory */
113        printf("Userland image virtual end address too high\n");
114        return 0;
115    }
116    if ((entry < v_reg.start) || (entry >= v_reg.end)) {
117        printf("Userland image entry point does not lie within userland image\n");
118        return 0;
119    }
120
121    load_paddr = find_load_paddr(load_paddr, v_reg.end - v_reg.start);
122    assert(load_paddr);
123
124    /* fill ui_info struct */
125    boot_state.ui_info.pv_offset = load_paddr - v_reg.start;
126    boot_state.ui_info.p_reg.start = load_paddr;
127    load_paddr += v_reg.end - v_reg.start;
128    boot_state.ui_info.p_reg.end = load_paddr;
129    boot_state.ui_info.v_entry = entry;
130
131    printf("p_start=0x%lx p_end=0x%lx\n",
132           boot_state.ui_info.p_reg.start,
133           boot_state.ui_info.p_reg.end
134          );
135
136    /* initialise all initial userland memory and load potentially sparse ELF image */
137    memzero(
138        (void *)boot_state.ui_info.p_reg.start,
139        boot_state.ui_info.p_reg.end - boot_state.ui_info.p_reg.start
140    );
141    elf_load(elf_file, boot_state.ui_info.pv_offset);
142
143    return load_paddr;
144}
145
146static BOOT_CODE bool_t try_boot_sys_node(cpu_id_t cpu_id)
147{
148    p_region_t boot_mem_reuse_p_reg;
149
150    if (!map_kernel_window(
151            boot_state.num_ioapic,
152            boot_state.ioapic_paddr,
153            boot_state.num_drhu,
154            boot_state.drhu_list
155        )) {
156        return false;
157    }
158    setCurrentVSpaceRoot(kpptr_to_paddr(X86_KERNEL_VSPACE_ROOT), 0);
159    /* Sync up the compilers view of the world here to force the PD to actually
160     * be set *right now* instead of delayed */
161    asm volatile("" ::: "memory");
162
163#ifdef CONFIG_KERNEL_SKIM_WINDOW
164    if (!map_skim_window((vptr_t)ki_skim_start, (vptr_t)ki_skim_end)) {
165        return false;
166    }
167#endif
168
169    /* reuse boot code/data memory */
170    boot_mem_reuse_p_reg.start = KERNEL_ELF_PADDR_BASE;
171    boot_mem_reuse_p_reg.end = kpptr_to_paddr(ki_boot_end);
172
173    /* initialise the CPU */
174    if (!init_cpu(config_set(CONFIG_IRQ_IOAPIC) ? 1 : 0)) {
175        return false;
176    }
177
178    /* initialise NDKS and kernel heap */
179    if (!init_sys_state(
180            cpu_id,
181            &boot_state.mem_p_regs,
182            boot_state.ui_info,
183            boot_mem_reuse_p_reg,
184            /* parameters below not modeled in abstract specification */
185            boot_state.num_drhu,
186            boot_state.drhu_list,
187            &boot_state.rmrr_list,
188            &boot_state.acpi_rsdp,
189            &boot_state.vbe_info,
190            &boot_state.mb_mmap_info,
191            &boot_state.fb_info
192        )) {
193        return false;
194    }
195
196    return true;
197}
198
199static BOOT_CODE bool_t add_mem_p_regs(p_region_t reg)
200{
201    if (reg.end > PADDR_TOP && reg.start > PADDR_TOP) {
202        /* Return true here as it's not an error for there to exist memory outside the kernel window,
203         * we're just going to ignore it and leave it to be given out as device memory */
204        return true;
205    }
206    if (boot_state.mem_p_regs.count == MAX_NUM_FREEMEM_REG) {
207        printf("Dropping memory region 0x%lx-0x%lx, try increasing MAX_NUM_FREEMEM_REG\n", reg.start, reg.end);
208        return false;
209    }
210    if (reg.end > PADDR_TOP) {
211        assert(reg.start <= PADDR_TOP);
212        /* Clamp a region to the top of the kernel window if it extends beyond */
213        reg.end = PADDR_TOP;
214    }
215    printf("Adding physical memory region 0x%lx-0x%lx\n", reg.start, reg.end);
216    boot_state.mem_p_regs.list[boot_state.mem_p_regs.count] = reg;
217    boot_state.mem_p_regs.count++;
218    return reserve_region(reg);
219}
220
221/*
222 * the code relies that the GRUB provides correct information
223 * about the actual physical memory regions.
224 */
225static BOOT_CODE bool_t parse_mem_map(uint32_t mmap_length, uint32_t mmap_addr)
226{
227    multiboot_mmap_t *mmap = (multiboot_mmap_t *)((word_t)mmap_addr);
228    printf("Parsing GRUB physical memory map\n");
229
230    while ((word_t)mmap < (word_t)(mmap_addr + mmap_length)) {
231        uint64_t mem_start = mmap->base_addr;
232        uint64_t mem_length = mmap->length;
233        uint32_t type = mmap->type;
234        if (mem_start != (uint64_t)(word_t)mem_start) {
235            printf("\tPhysical memory region not addressable\n");
236        } else {
237            printf("\tPhysical Memory Region from %lx size %lx type %d\n", (long)mem_start, (long)mem_length, type);
238            if (type == MULTIBOOT_MMAP_USEABLE_TYPE && mem_start >= HIGHMEM_PADDR) {
239                if (!add_mem_p_regs((p_region_t) {
240                mem_start, mem_start + mem_length
241            })) {
242                    return false;
243                }
244            }
245        }
246        mmap++;
247    }
248    return true;
249}
250
251static BOOT_CODE bool_t is_compiled_for_microarchitecture(void)
252{
253    word_t microarch_generation = 0;
254    x86_cpu_identity_t *model_info = x86_cpuid_get_model_info();
255
256    if (config_set(CONFIG_ARCH_X86_SKYLAKE)) {
257        microarch_generation = 7;
258    } else if (config_set(CONFIG_ARCH_X86_BROADWELL)) {
259        microarch_generation = 6;
260    } else if (config_set(CONFIG_ARCH_X86_HASWELL)) {
261        microarch_generation = 5;
262    } else if (config_set(CONFIG_ARCH_X86_IVY)) {
263        microarch_generation = 4;
264    } else if (config_set(CONFIG_ARCH_X86_SANDY)) {
265        microarch_generation = 3;
266    } else if (config_set(CONFIG_ARCH_X86_WESTMERE)) {
267        microarch_generation = 2;
268    } else if (config_set(CONFIG_ARCH_X86_NEHALEM)) {
269        microarch_generation = 1;
270    }
271
272    switch (model_info->model) {
273    case SKYLAKE_1_MODEL_ID:
274    case SKYLAKE_2_MODEL_ID:
275        if (microarch_generation > 7) {
276            return false;
277        }
278        break;
279
280    case BROADWELL_1_MODEL_ID:
281    case BROADWELL_2_MODEL_ID:
282    case BROADWELL_3_MODEL_ID:
283    case BROADWELL_4_MODEL_ID:
284    case BROADWELL_5_MODEL_ID:
285        if (microarch_generation > 6) {
286            return false;
287        }
288        break;
289
290    case HASWELL_1_MODEL_ID:
291    case HASWELL_2_MODEL_ID:
292    case HASWELL_3_MODEL_ID:
293    case HASWELL_4_MODEL_ID:
294        if (microarch_generation > 5) {
295            return false;
296        }
297        break;
298
299    case IVY_BRIDGE_1_MODEL_ID:
300    case IVY_BRIDGE_2_MODEL_ID:
301    case IVY_BRIDGE_3_MODEL_ID:
302        if (microarch_generation > 4) {
303            return false;
304        }
305        break;
306
307    case SANDY_BRIDGE_1_MODEL_ID:
308    case SANDY_BRIDGE_2_MODEL_ID:
309        if (microarch_generation > 3) {
310            return false;
311        }
312        break;
313
314    case WESTMERE_1_MODEL_ID:
315    case WESTMERE_2_MODEL_ID:
316    case WESTMERE_3_MODEL_ID:
317        if (microarch_generation > 2) {
318            return false;
319        }
320        break;
321
322    case NEHALEM_1_MODEL_ID:
323    case NEHALEM_2_MODEL_ID:
324    case NEHALEM_3_MODEL_ID:
325        if (microarch_generation > 1) {
326            return false;
327        }
328        break;
329
330    default:
331        if (!config_set(CONFIG_ARCH_X86_GENERIC)) {
332            return false;
333        }
334    }
335
336    return true;
337}
338
339static BOOT_CODE bool_t try_boot_sys(void)
340{
341    paddr_t mods_end_paddr = boot_state.mods_end_paddr;
342    p_region_t ui_p_regs;
343    paddr_t load_paddr;
344
345    boot_state.ki_p_reg.start = KERNEL_ELF_PADDR_BASE;
346    boot_state.ki_p_reg.end = kpptr_to_paddr(ki_end);
347
348    if (!x86_cpuid_initialize()) {
349        printf("Warning: Your x86 CPU has an unsupported vendor, '%s'.\n"
350               "\tYour setup may not be able to competently run seL4 as "
351               "intended.\n"
352               "\tCurrently supported x86 vendors are AMD and Intel.\n",
353               x86_cpuid_get_identity()->vendor_string);
354    }
355
356    if (!is_compiled_for_microarchitecture()) {
357        printf("Warning: Your kernel was not compiled for the current microarchitecture.\n");
358    }
359
360    cpuid_007h_edx_t edx;
361    edx.words[0] = x86_cpuid_edx(0x7, 0);
362    /* see if we can definitively say whether or not we need the skim window by
363     * checking whether the CPU is vulnerable to rogue data cache loads (rdcl) */
364    if (cpuid_007h_edx_get_ia32_arch_cap_msr(edx)) {
365        ia32_arch_capabilities_msr_t cap_msr;
366        cap_msr.words[0] = x86_rdmsr(IA32_ARCH_CAPABILITIES_MSR);
367        if (ia32_arch_capabilities_msr_get_rdcl_no(cap_msr) && config_set(CONFIG_KERNEL_SKIM_WINDOW)) {
368            printf("CPU reports not vulnerable to Rogue Data Cache Load (aka Meltdown https://meltdownattack.com) "
369                   "yet SKIM window is enabled. Performance is needlessly being impacted, consider disabling.\n");
370        } else if (!ia32_arch_capabilities_msr_get_rdcl_no(cap_msr) && !config_set(CONFIG_KERNEL_SKIM_WINDOW)) {
371            printf("CPU reports vulnerable to Rogue Data Cache Load (aka Meltdown https://meltdownattack.com) "
372                   "yet SKIM window is *not* enabled. Please re-build with SKIM window enabled.");
373            return false;
374        }
375    } else {
376        /* hardware doesn't tell us directly so guess based on CPU vendor */
377        if (config_set(CONFIG_KERNEL_SKIM_WINDOW) && x86_cpuid_get_identity()->vendor == X86_VENDOR_AMD) {
378            printf("SKIM window for mitigating Meltdown (https://www.meltdownattack.com) "
379                   "not necessary for AMD and performance is being needlessly affected, "
380                   "consider disabling\n");
381        }
382        if (!config_set(CONFIG_KERNEL_SKIM_WINDOW) && x86_cpuid_get_identity()->vendor == X86_VENDOR_INTEL) {
383            printf("***WARNING*** SKIM window not enabled, this machine is probably vulnerable "
384                   "to Meltdown (https://www.meltdownattack.com), consider enabling\n");
385        }
386    }
387
388#ifdef ENABLE_SMP_SUPPORT
389    /* copy boot code for APs to lower memory to run in real mode */
390    if (!copy_boot_code_aps(boot_state.mem_lower)) {
391        return false;
392    }
393    /* Initialize any kernel TLS */
394    mode_init_tls(0);
395#endif /* ENABLE_SMP_SUPPORT */
396
397    printf("Kernel loaded to: start=0x%lx end=0x%lx size=0x%lx entry=0x%lx\n",
398           boot_state.ki_p_reg.start,
399           boot_state.ki_p_reg.end,
400           boot_state.ki_p_reg.end - boot_state.ki_p_reg.start,
401           (paddr_t)_start
402          );
403
404    /* remapping legacy IRQs to their correct vectors */
405    pic_remap_irqs(IRQ_INT_OFFSET);
406    if (config_set(CONFIG_IRQ_IOAPIC)) {
407        /* Disable the PIC so that it does not generate any interrupts. We need to
408         * do this *before* we initialize the apic */
409        pic_disable();
410    }
411
412    /* validate the ACPI table */
413    if (!acpi_validate_rsdp(&boot_state.acpi_rsdp)) {
414        return false;
415    }
416
417    /* check if kernel configuration matches platform requirments */
418    if (!acpi_fadt_scan(&boot_state.acpi_rsdp)) {
419        return false;
420    }
421
422    if (!config_set(CONFIG_IOMMU) || cmdline_opt.disable_iommu) {
423        boot_state.num_drhu = 0;
424    } else {
425        /* query available IOMMUs from ACPI */
426        acpi_dmar_scan(
427            &boot_state.acpi_rsdp,
428            boot_state.drhu_list,
429            &boot_state.num_drhu,
430            MAX_NUM_DRHU,
431            &boot_state.rmrr_list
432        );
433    }
434
435    /* query available CPUs from ACPI */
436    boot_state.num_cpus = acpi_madt_scan(&boot_state.acpi_rsdp, boot_state.cpus, &boot_state.num_ioapic,
437                                         boot_state.ioapic_paddr);
438    if (boot_state.num_cpus == 0) {
439        printf("No CPUs detected\n");
440        return false;
441    }
442
443    if (config_set(CONFIG_IRQ_IOAPIC)) {
444        if (boot_state.num_ioapic == 0) {
445            printf("No IOAPICs detected\n");
446            return false;
447        }
448    } else {
449        if (boot_state.num_ioapic > 0) {
450            printf("Detected %d IOAPICs, but configured to use PIC instead\n", boot_state.num_ioapic);
451        }
452    }
453
454    mods_end_paddr = ROUND_UP(mods_end_paddr, PAGE_BITS);
455    assert(mods_end_paddr > boot_state.ki_p_reg.end);
456
457    printf("ELF-loading userland images from boot modules:\n");
458    load_paddr = mods_end_paddr;
459
460    load_paddr = load_boot_module(boot_state.boot_module_start, load_paddr);
461    if (!load_paddr) {
462        return false;
463    }
464
465    /* calculate final location of userland images */
466    ui_p_regs.start = boot_state.ki_p_reg.end;
467    ui_p_regs.end = ui_p_regs.start + load_paddr - mods_end_paddr;
468
469    printf(
470        "Moving loaded userland images to final location: from=0x%lx to=0x%lx size=0x%lx\n",
471        mods_end_paddr,
472        ui_p_regs.start,
473        ui_p_regs.end - ui_p_regs.start
474    );
475    memcpy((void *)ui_p_regs.start, (void *)mods_end_paddr, ui_p_regs.end - ui_p_regs.start);
476
477    /* adjust p_reg and pv_offset to final load address */
478    boot_state.ui_info.p_reg.start -= mods_end_paddr - ui_p_regs.start;
479    boot_state.ui_info.p_reg.end   -= mods_end_paddr - ui_p_regs.start;
480    boot_state.ui_info.pv_offset   -= mods_end_paddr - ui_p_regs.start;
481
482    /* ==== following code corresponds to abstract specification after "select" ==== */
483
484    if (!platAddDevices()) {
485        return false;
486    }
487
488    /* Total number of cores we intend to boot */
489    ksNumCPUs = boot_state.num_cpus;
490
491    printf("Starting node #0 with APIC ID %lu\n", boot_state.cpus[0]);
492    if (!try_boot_sys_node(boot_state.cpus[0])) {
493        return false;
494    }
495
496    if (config_set(CONFIG_IRQ_IOAPIC)) {
497        ioapic_init(1, boot_state.cpus, boot_state.num_ioapic);
498    }
499
500    /* initialize BKL before booting up APs */
501    SMP_COND_STATEMENT(clh_lock_init());
502    SMP_COND_STATEMENT(start_boot_aps());
503
504    /* grab BKL before leaving the kernel */
505    NODE_LOCK_SYS;
506
507    printf("Booting all finished, dropped to user space\n");
508
509    return true;
510}
511
512static BOOT_CODE bool_t try_boot_sys_mbi1(
513    multiboot_info_t *mbi
514)
515{
516    word_t i;
517    multiboot_module_t *modules = (multiboot_module_t *)(word_t)mbi->part1.mod_list;
518
519    cmdline_parse((const char *)(word_t)mbi->part1.cmdline, &cmdline_opt);
520
521    if ((mbi->part1.flags & MULTIBOOT_INFO_MEM_FLAG) == 0) {
522        printf("Boot loader did not provide information about physical memory size\n");
523        return false;
524    }
525
526    if (!(mbi->part1.flags & MULTIBOOT_INFO_MODS_FLAG)) {
527        printf("Boot loader did not provide information about boot modules\n");
528        return false;
529    }
530
531    printf("Detected %d boot module(s):\n", mbi->part1.mod_count);
532
533    if (mbi->part1.mod_count < 1) {
534        printf("Expect at least one boot module (containing a userland image)\n");
535        return false;
536    }
537
538    for (i = 0; i < mbi->part1.mod_count; i++) {
539        printf(
540            "  module #%ld: start=0x%x end=0x%x size=0x%x name='%s'\n",
541            i,
542            modules[i].start,
543            modules[i].end,
544            modules[i].end - modules[i].start,
545            (char *)(long)modules[i].name
546        );
547        if ((sword_t)(modules[i].end - modules[i].start) <= 0) {
548            printf("Invalid boot module size! Possible cause: boot module file not found by QEMU\n");
549            return false;
550        }
551        if (boot_state.mods_end_paddr < modules[i].end) {
552            boot_state.mods_end_paddr = modules[i].end;
553        }
554    }
555
556    /* initialize the memory. We track two kinds of memory regions. Physical memory
557     * that we will use for the kernel, and physical memory regions that we must
558     * not give to the user. Memory regions that must not be given to the user
559     * include all the physical memory in the kernel window, but also includes any
560     * important or kernel devices. */
561    boot_state.mem_p_regs.count = 0;
562    if (mbi->part1.flags & MULTIBOOT_INFO_MMAP_FLAG) {
563        if (!parse_mem_map(mbi->part2.mmap_length, mbi->part2.mmap_addr)) {
564            return false;
565        }
566        uint32_t multiboot_mmap_length = mbi->part2.mmap_length;
567        if (multiboot_mmap_length > (SEL4_MULTIBOOT_MAX_MMAP_ENTRIES * sizeof(seL4_X86_mb_mmap_t))) {
568            multiboot_mmap_length = SEL4_MULTIBOOT_MAX_MMAP_ENTRIES * sizeof(seL4_X86_mb_mmap_t);
569            printf("Warning: Multiboot has reported more memory map entries, %zd, "
570                   "than the max amount that will be passed in the bootinfo, %d. "
571                   "These extra regions will still be turned into untyped caps.",
572                   multiboot_mmap_length / sizeof(seL4_X86_mb_mmap_t), SEL4_MULTIBOOT_MAX_MMAP_ENTRIES);
573        }
574        memcpy(&boot_state.mb_mmap_info.mmap, (void *)(word_t)mbi->part2.mmap_addr, multiboot_mmap_length);
575        boot_state.mb_mmap_info.mmap_length = multiboot_mmap_length;
576    } else {
577        /* calculate memory the old way */
578        p_region_t avail;
579        avail.start = HIGHMEM_PADDR;
580        avail.end = ROUND_DOWN(avail.start + (mbi->part1.mem_upper << 10), PAGE_BITS);
581        if (!add_mem_p_regs(avail)) {
582            return false;
583        }
584    }
585
586    /* copy VESA information from multiboot header */
587    if ((mbi->part1.flags & MULTIBOOT_INFO_GRAPHICS_FLAG) == 0) {
588        boot_state.vbe_info.vbeMode = -1;
589        printf("Multiboot gave us no video information\n");
590    } else {
591        boot_state.vbe_info.vbeInfoBlock = *(seL4_VBEInfoBlock_t *)(seL4_Word)mbi->part2.vbe_control_info;
592        boot_state.vbe_info.vbeModeInfoBlock = *(seL4_VBEModeInfoBlock_t *)(seL4_Word)mbi->part2.vbe_mode_info;
593        boot_state.vbe_info.vbeMode = mbi->part2.vbe_mode;
594        printf("Got VBE info in multiboot. Current video mode is %d\n", mbi->part2.vbe_mode);
595        boot_state.vbe_info.vbeInterfaceSeg = mbi->part2.vbe_interface_seg;
596        boot_state.vbe_info.vbeInterfaceOff = mbi->part2.vbe_interface_off;
597        boot_state.vbe_info.vbeInterfaceLen = mbi->part2.vbe_interface_len;
598    }
599
600    boot_state.mem_lower = mbi->part1.mem_lower;
601    boot_state.boot_module_start = modules->start;
602
603    /* Initialize ACPI */
604    if (!acpi_init(&boot_state.acpi_rsdp)) {
605        return false;
606    }
607
608    return true;
609}
610
611static BOOT_CODE bool_t try_boot_sys_mbi2(
612    multiboot2_header_t *mbi2
613)
614{
615    int mod_count                  = 0;
616    multiboot2_tag_t const *tag   = (multiboot2_tag_t *)(mbi2 + 1);
617    multiboot2_tag_t const *tag_e = (multiboot2_tag_t *)((word_t)mbi2 + mbi2->total_size);
618
619    /* initialize the memory. We track two kinds of memory regions. Physical memory
620     * that we will use for the kernel, and physical memory regions that we must
621     * not give to the user. Memory regions that must not be given to the user
622     * include all the physical memory in the kernel window, but also includes any
623     * important or kernel devices. */
624    boot_state.mem_p_regs.count = 0;
625    boot_state.mb_mmap_info.mmap_length = 0;
626    boot_state.vbe_info.vbeMode = -1;
627
628    while (tag < tag_e && tag->type != MULTIBOOT2_TAG_END) {
629        word_t const behind_tag = (word_t)tag + sizeof(*tag);
630
631        if (tag->type == MULTIBOOT2_TAG_CMDLINE) {
632            char const *const cmdline = (char const * const)(behind_tag);
633            cmdline_parse(cmdline, &cmdline_opt);
634        } else if (tag->type == MULTIBOOT2_TAG_ACPI_1) {
635            if (ACPI_V1_SIZE == tag->size - sizeof(*tag)) {
636                memcpy(&boot_state.acpi_rsdp, (void *)behind_tag, tag->size - sizeof(*tag));
637            }
638        } else if (tag->type == MULTIBOOT2_TAG_ACPI_2) {
639            if (sizeof(boot_state.acpi_rsdp) == tag->size - sizeof(*tag)) {
640                memcpy(&boot_state.acpi_rsdp, (void *)behind_tag, sizeof(boot_state.acpi_rsdp));
641            }
642        } else if (tag->type == MULTIBOOT2_TAG_MODULE) {
643            multiboot2_module_t const *module = (multiboot2_module_t const *)behind_tag;
644            printf(
645                "  module #%d: start=0x%x end=0x%x size=0x%x name='%s'\n",
646                mod_count,
647                module->start,
648                module->end,
649                module->end - module->start,
650                module->string
651            );
652
653            if (mod_count == 0) {
654                boot_state.boot_module_start = module->start;
655            }
656
657            mod_count ++;
658            if ((sword_t)(module->end - module->start) <= 0) {
659                printf("Invalid boot module size! Possible cause: boot module file not found\n");
660                return false;
661            }
662            if (boot_state.mods_end_paddr < module->end) {
663                boot_state.mods_end_paddr = module->end;
664            }
665        } else if (tag->type == MULTIBOOT2_TAG_MEMORY) {
666            multiboot2_memory_t const *s = (multiboot2_memory_t *)(behind_tag + 8);
667            multiboot2_memory_t const *e = (multiboot2_memory_t *)((word_t)tag + tag->size);
668
669            for (multiboot2_memory_t const *m = s; m < e; m++) {
670                if (!m->addr) {
671                    boot_state.mem_lower = m->size;
672                }
673
674                printf("\tPhysical Memory Region from %llx size %llx type %u\n", m->addr, m->size, m->type);
675                if (m->addr != (uint64_t)(word_t)m->addr) {
676                    printf("\t\tPhysical memory region not addressable\n");
677                }
678
679                if (m->type == MULTIBOOT_MMAP_USEABLE_TYPE && m->addr >= HIGHMEM_PADDR) {
680                    if (!add_mem_p_regs((p_region_t) {
681                    m->addr, m->addr + m->size
682                }))
683                    return false;
684                }
685            }
686        } else if (tag->type == MULTIBOOT2_TAG_FB) {
687            multiboot2_fb_t const *fb = (multiboot2_fb_t const *)behind_tag;
688            printf("Got framebuffer info in multiboot2. Current video mode is at physical address=%llx pitch=%u resolution=%ux%u@%u type=%u\n",
689                   fb->addr, fb->pitch, fb->width, fb->height, fb->bpp, fb->type);
690            boot_state.fb_info = *fb;
691        }
692
693        tag = (multiboot2_tag_t const *)((word_t)tag + ROUND_UP(tag->size, 3));
694    }
695
696    printf("Detected %d boot module(s):\n", mod_count);
697
698    if (mod_count < 1) {
699        printf("Expect at least one boot module (containing a userland image)\n");
700        return false;
701    }
702
703    return true;
704}
705
706BOOT_CODE VISIBLE void boot_sys(
707    unsigned long multiboot_magic,
708    void *mbi)
709{
710    bool_t result = false;
711
712    if (multiboot_magic == MULTIBOOT_MAGIC) {
713        result = try_boot_sys_mbi1(mbi);
714    } else if (multiboot_magic == MULTIBOOT2_MAGIC) {
715        result = try_boot_sys_mbi2(mbi);
716    } else {
717        printf("Boot loader is not multiboot 1 or 2 compliant %lx\n", multiboot_magic);
718    }
719
720    if (result) {
721        result = try_boot_sys();
722    }
723
724    if (!result) {
725        fail("boot_sys failed for some reason :(\n");
726    }
727
728    ARCH_NODE_STATE(x86KScurInterrupt) = int_invalid;
729    ARCH_NODE_STATE(x86KSPendingInterrupt) = int_invalid;
730
731#ifdef CONFIG_KERNEL_MCS
732    NODE_STATE(ksCurTime) = getCurrentTime();
733    NODE_STATE(ksConsumed) = 0;
734#endif
735
736    schedule();
737    activateThread();
738}
739
740