1/**
2 * \file
3 * \brief Boot driver arch specific parts for x86 CPUs
4 */
5/*
6 * Copyright (c) 2013,2017 ETH Zurich.
7 * All rights reserved.
8 *
9 * This file is distributed under the terms in the attached LICENSE file.
10 * If you do not find this file, copies can be found by writing to:
11 * ETH Zurich D-INFK, Universitaetsstrasse 6, CH-8092 Zurich. Attn: Systems Group.
12 */
13
14#include "../../coreboot.h"
15
16#include <target/x86/barrelfish_kpi/coredata_target.h>
17#include <target/x86_32/barrelfish_kpi/paging_target.h>
18#include <target/x86_64/barrelfish_kpi/paging_target.h>
19#include <barrelfish/deferred.h>
20
21#ifdef __k1om__
22#include <barrelfish_kpi/asm_inlines_arch.h>
23#endif
24#include <arch/x86/start_aps.h>
25#include <target/x86_64/offsets_target.h>
26#include <target/x86_32/offsets_target.h>
27#include <if/acpi_defs.h>
28#include <if/monitor_blocking_defs.h>
29
30#define MON_URPC_CHANNEL_LEN  (32 * UMP_MSG_BYTES)
31
32/**
33 * Start_ap and start_ap_end mark the start end the
34 * end point of the assembler startup code to be copied
35 */
36extern uint64_t x86_64_start_ap;
37extern uint64_t x86_64_start_ap_end;
38extern uint64_t x86_64_init_ap_absolute_entry;
39extern uint64_t x86_64_init_ap_wait;
40extern uint64_t x86_64_init_ap_lock;
41extern uint64_t x86_64_start;
42extern uint64_t x86_64_init_ap_global;
43
44extern uint64_t x86_32_start_ap;
45extern uint64_t x86_32_start_ap_end;
46extern uint64_t x86_32_init_ap_absolute_entry;
47extern uint64_t x86_32_init_ap_wait;
48extern uint64_t x86_32_init_ap_lock;
49extern uint64_t x86_32_start;
50extern uint64_t x86_32_init_ap_global;
51
52
53volatile uint64_t *ap_dispatch;
54extern coreid_t my_arch_id;
55extern struct capref ipi_cap;
56
57errval_t get_core_info(coreid_t core_id, hwid_t* apic_id, enum cpu_type* cpu_type)
58{
59#if  defined(__k1om__)
60    size_t step = 4;
61    assert(step == 1 || step == 2 || step == 4);
62
63    *apic_id = (core_id * step);
64    if (*apic_id == my_arch_id) {
65        *apic_id += step;
66    }
67    *cpu_type = CPU_K1OM;
68    return SYS_ERR_OK;
69#else
70    char* record = NULL;
71    errval_t err = oct_get(&record, "hw.processor.%"PRIuCOREID"", core_id);
72    if (err_is_fail(err)) {
73        goto out;
74    }
75
76    uint64_t enabled, type;
77    err = oct_read(record, "_ { apic_id: %d, enabled: %d, type: %d}",
78                   apic_id, &enabled, &type);
79    assert (enabled);
80    if (err_is_fail(err)) {
81        goto out;
82    }
83
84    *cpu_type = (enum cpu_type) type;
85out:
86    return err;
87#endif
88}
89
90
91errval_t get_architecture_config(enum cpu_type type,
92                                 genpaddr_t *arch_page_size,
93                                 const char **monitor_binary,
94                                 const char **cpu_binary)
95{
96    extern char* cmd_kernel_binary;
97    extern char* cmd_monitor_binary;
98
99    switch (type) {
100    case CPU_X86_64:
101    {
102        *arch_page_size = X86_64_BASE_PAGE_SIZE;
103        *monitor_binary = (cmd_monitor_binary == NULL) ?
104                        "/" BF_BINARY_PREFIX "x86_64/sbin/monitor" :
105                        get_binary_path("/" BF_BINARY_PREFIX "x86_64/sbin/%s",
106                                        cmd_monitor_binary);
107        *cpu_binary = (cmd_kernel_binary == NULL) ?
108                        "/" BF_BINARY_PREFIX "x86_64/sbin/cpu" :
109                        get_binary_path("/" BF_BINARY_PREFIX "x86_64/sbin/%s",
110                                        cmd_kernel_binary);
111    }
112    break;
113
114    case CPU_X86_32:
115    {
116        *arch_page_size = X86_32_BASE_PAGE_SIZE;
117        *monitor_binary = (cmd_monitor_binary == NULL) ?
118                        "/" BF_BINARY_PREFIX "x86_32/sbin/monitor" :
119                        get_binary_path("/" BF_BINARY_PREFIX "x86_32/sbin/%s",
120                                        cmd_monitor_binary);
121        *cpu_binary = (cmd_kernel_binary == NULL) ?
122                        "/" BF_BINARY_PREFIX "x86_32/sbin/cpu" :
123                        get_binary_path("/" BF_BINARY_PREFIX "x86_32/sbin/%s",
124                                        cmd_kernel_binary);
125    }
126    break;
127
128    case CPU_K1OM:
129    {
130        *arch_page_size = X86_64_BASE_PAGE_SIZE;
131        *monitor_binary = (cmd_kernel_binary == NULL) ?
132                        "/" BF_BINARY_PREFIX "k1om/sbin/monitor" :
133                        get_binary_path("/" BF_BINARY_PREFIX "k1om/sbin/%s",
134                                        cmd_monitor_binary);
135        *cpu_binary = (cmd_kernel_binary == NULL) ?
136                        "/" BF_BINARY_PREFIX "k1om/sbin/cpu" :
137                        get_binary_path("/" BF_BINARY_PREFIX "k1om/sbin/%s",
138                                        cmd_kernel_binary);
139    }
140    break;
141
142    default:
143        return SPAWN_ERR_UNKNOWN_TARGET_ARCH;
144    }
145
146    return SYS_ERR_OK;
147}
148
149/**
150 * \brief Boot a app core of x86_64 type
151 *
152 * The processors are started by a sequency of INIT and STARTUP IPIs
153 * which are sent by this function.
154 * CMOS writes to the shutdown status byte are used to execute
155 * different memory locations.
156 *
157 * \param core_id   APIC ID of the core to try booting
158 * \param entry     Entry address for new kernel in the destination
159 *                  architecture's lvaddr_t given in genvaddr_t
160 *
161 * \returns Zero on successful boot, non-zero (error code) on failure
162 */
163int start_aps_x86_64_start(uint8_t core_id, genvaddr_t entry)
164{
165    DEBUG("%s:%d: start_aps_x86_64_start\n", __FILE__, __LINE__);
166
167    errval_t err;
168
169    // Copy the startup code to the real-mode address
170    uint8_t *real_src = (uint8_t *) &x86_64_start_ap;
171    uint8_t *real_end = (uint8_t *) &x86_64_start_ap_end;
172
173    struct capref bootcap;
174
175#ifdef __k1om__
176    struct capref realmodecap;
177
178    realmodecap.cnode.croot = CPTR_ROOTCN;
179    realmodecap.cnode.cnode = ROOTCN_SLOT_ADDR(ROOTCN_SLOT_ARGCN);
180    realmodecap.cnode.level = CNODE_TYPE_OTHER;
181    realmodecap.slot        = 0;
182    err = slot_alloc(&bootcap);
183    if (err_is_fail(err)) {
184        USER_PANIC_ERR(err, "Allocating a new slot");
185    }
186
187    err = cap_copy(bootcap, realmodecap);
188    if (err_is_fail(err)) {
189        USER_PANIC_ERR(err, "Copying capability");
190    }
191
192
193#else
194    struct acpi_binding* acl = get_acpi_binding();
195    err = slot_alloc(&bootcap);
196    if (err_is_fail(err)) {
197        USER_PANIC_ERR(err, "slot_alloc for mm_realloc_range_proxy");
198    }
199    errval_t error_code;
200    err = acl->rpc_tx_vtbl.mm_realloc_range_proxy(acl, 16, 0x0,
201                                           &bootcap, &error_code);
202    if (err_is_fail(err)) {
203        USER_PANIC_ERR(err, "mm_alloc_range_proxy failed.");
204    }
205    if (err_is_fail(error_code)) {
206        USER_PANIC_ERR(error_code, "mm_alloc_range_proxy return failed.");
207    }
208#endif
209
210    void* real_base;
211    err = vspace_map_one_frame(&real_base, 1<<16, bootcap, NULL, NULL);
212    uint8_t* real_dest = (uint8_t*)real_base + X86_64_REAL_MODE_LINEAR_OFFSET;
213
214    memcpy(real_dest, real_src, real_end - real_src);
215
216    /* Pointer to the entry point called from init_ap.S */
217    volatile uint64_t *absolute_entry_ptr = (volatile uint64_t *)
218                                            ((
219                                             (lpaddr_t) &x86_64_init_ap_absolute_entry -
220                                             (lpaddr_t) &x86_64_start_ap
221                                            )
222                                            +
223                                            real_dest);
224    //copy the address of the function start (in boot.S) to the long-mode
225    //assembler code to be able to perform an absolute jump
226    *absolute_entry_ptr = entry;
227
228    // pointer to the shared global variable amongst all kernels
229    volatile uint64_t *ap_global = (volatile uint64_t *)
230                                   ((
231                                    (lpaddr_t) &x86_64_init_ap_global -
232                                    (lpaddr_t) &x86_64_start_ap
233                                   )
234                                   + real_dest);
235
236
237    genpaddr_t global;
238    struct monitor_blocking_binding *mc =
239        get_monitor_blocking_binding();
240    err = mc->rpc_tx_vtbl.get_global_paddr(mc, &global);
241    if (err_is_fail(err)) {
242        DEBUG_ERR(err, "invoke spawn core");
243        return err_push(err, MON_ERR_SPAWN_CORE);
244    }
245    *ap_global = (uint64_t)(genpaddr_t)global;
246
247    // pointer to the pseudo-lock used to detect boot up of new core
248    volatile uint32_t *ap_wait = (volatile uint32_t *)
249                                         ((lpaddr_t) &x86_64_init_ap_wait -
250                                         ((lpaddr_t) &x86_64_start_ap) +
251                                         real_dest);
252
253    // Pointer to the lock variable in the realmode code
254    volatile uint8_t *ap_lock = (volatile uint8_t *)
255                                        ((lpaddr_t) &x86_64_init_ap_lock -
256                                        ((lpaddr_t) &x86_64_start_ap) +
257                                        real_dest);
258
259    *ap_wait = AP_STARTING_UP;
260
261#if  defined(__k1om__)
262    delay_ms(10);
263#endif
264    err = invoke_send_init_ipi(ipi_cap, core_id);
265    if (err_is_fail(err)) {
266        DEBUG_ERR(err, "invoke send init ipi");
267        return err;
268    }
269
270#if  defined(__k1om__)
271    delay_ms(200);
272#endif
273
274    // x86 protocol actually would like us to do this twice
275    err = invoke_send_start_ipi(ipi_cap, core_id, entry);
276    if (err_is_fail(err)) {
277        DEBUG_ERR(err, "invoke sipi");
278        return err;
279    }
280
281    // Give the new core a bit time to start-up and set the lock
282    for (uint64_t i = 0; i < STARTUP_TIMEOUT; i++) {
283        if (*ap_lock != 0) {
284            break;
285        }
286    }
287
288    // If the lock is set, the core has been started, otherwise assume, that
289    // a core with this APIC ID doesn't exist.
290    if (*ap_lock != 0) {
291        while (*ap_wait != AP_STARTED);
292        trace_event(TRACE_SUBSYS_KERNEL, TRACE_EVENT_KERNEL_CORE_START_REQUEST_ACK, core_id);
293        *ap_lock = 0;
294        return 0;
295    }
296
297    assert(!"badness");
298    return -1;
299}
300
301#ifndef __k1om__
302int start_aps_x86_32_start(uint8_t core_id, genvaddr_t entry)
303{
304    DEBUG("%s:%d: start_aps_x86_32_start\n", __FILE__, __LINE__);
305
306    // Copy the startup code to the real-mode address
307    uint8_t *real_src = (uint8_t *) &x86_32_start_ap;
308    uint8_t *real_end = (uint8_t *) &x86_32_start_ap_end;
309
310    struct capref bootcap;
311    struct acpi_binding* acl = get_acpi_binding();
312    errval_t err, error_code;
313
314    err = slot_alloc(&bootcap);
315    if (err_is_fail(err)) {
316        USER_PANIC_ERR(err, "slot_alloc for mm_alloc_range_proxy");
317    }
318    err = acl->rpc_tx_vtbl.mm_realloc_range_proxy(acl, 16, 0x0,
319                                                    &bootcap, &error_code);
320    if (err_is_fail(err)) {
321        USER_PANIC_ERR(err, "mm_alloc_range_proxy failed.");
322    }
323    if (err_is_fail(error_code)) {
324        USER_PANIC_ERR(error_code, "mm_alloc_range_proxy return failed.");
325    }
326
327    void* real_base;
328    err = vspace_map_one_frame(&real_base, 1<<16, bootcap, NULL, NULL);
329    uint8_t* real_dest = (uint8_t*)real_base + X86_32_REAL_MODE_LINEAR_OFFSET;
330
331    memcpy(real_dest, real_src, real_end - real_src);
332
333    /* Pointer to the entry point called from init_ap.S */
334    volatile uint64_t *absolute_entry_ptr = (volatile uint64_t *)
335                                            ((
336                                             (lpaddr_t) &x86_32_init_ap_absolute_entry -
337                                             (lpaddr_t) &x86_32_start_ap
338                                            )
339                                            +
340                                            real_dest);
341    //copy the address of the function start (in boot.S) to the long-mode
342    //assembler code to be able to perform an absolute jump
343    *absolute_entry_ptr = entry;
344
345    // pointer to the shared global variable amongst all kernels
346    volatile uint64_t *ap_global = (volatile uint64_t *)
347                                   ((
348                                    (lpaddr_t) &x86_32_init_ap_global -
349                                    (lpaddr_t) &x86_32_start_ap
350                                   )
351                                   + real_dest);
352
353
354    genpaddr_t global;
355    struct monitor_blocking_binding *mc =
356        get_monitor_blocking_binding();
357    err = mc->rpc_tx_vtbl.get_global_paddr(mc, &global);
358    if (err_is_fail(err)) {
359        DEBUG_ERR(err, "invoke spawn core");
360        return err_push(err, MON_ERR_SPAWN_CORE);
361    }
362    *ap_global = (uint64_t)(genpaddr_t)global;
363
364    // pointer to the pseudo-lock used to detect boot up of new core
365    volatile uint32_t *ap_wait = (volatile uint32_t *)
366                                         ((lpaddr_t) &x86_32_init_ap_wait -
367                                         ((lpaddr_t) &x86_32_start_ap) +
368                                         real_dest);
369
370    // Pointer to the lock variable in the realmode code
371    volatile uint8_t *ap_lock = (volatile uint8_t *)
372                                        ((lpaddr_t) &x86_32_init_ap_lock -
373                                        ((lpaddr_t) &x86_32_start_ap) +
374                                        real_dest);
375
376    *ap_wait = AP_STARTING_UP;
377
378    err = invoke_send_init_ipi(ipi_cap, core_id);
379    if (err_is_fail(err)) {
380        DEBUG_ERR(err, "invoke send init ipi");
381        return err;
382    }
383
384    err = invoke_send_start_ipi(ipi_cap, core_id, entry);
385    if (err_is_fail(err)) {
386        DEBUG_ERR(err, "invoke sipi");
387        return err;
388    }
389
390    //give the new core a bit time to start-up and set the lock
391    for (uint64_t i = 0; i < STARTUP_TIMEOUT; i++) {
392        if (*ap_lock != 0) {
393            break;
394        }
395    }
396
397    // If the lock is set, the core has been started, otherwise assume, that
398    // a core with this APIC ID doesn't exist.
399    if (*ap_lock != 0) {
400        while (*ap_wait != AP_STARTED);
401        trace_event(TRACE_SUBSYS_KERNEL, TRACE_EVENT_KERNEL_CORE_START_REQUEST_ACK, core_id);
402        *ap_lock = 0;
403        return 0;
404    }
405
406    assert(!"badness");
407    return -1;
408}
409#endif
410
411/**
412 * Allocates memory for kernel binary.
413 *
414 * For x86, the app kernel can only be loaded in the first 4GB
415 * of memory. Further, it must not overlap the integer
416 * boundaries, i.e. 0-1, 1-2, 2-3, or 3-4.
417 *
418 * Probably because we identity map this region during boot-phase
419 * so we can't access anything higher. Not sure about overlap tough.
420 */
421static errval_t allocate_kernel_memory(lvaddr_t cpu_binary, genpaddr_t page_size,
422                                       struct capref* cpu_memory_cap, size_t* cpu_memory,
423                                       struct frame_identity* id)
424{
425    errval_t err;
426    *cpu_memory = elf_virtual_size(cpu_binary) + page_size;
427
428    uint64_t old_minbase;
429    uint64_t old_maxlimit;
430    ram_get_affinity(&old_minbase, &old_maxlimit);
431    DEBUG("%s:%d: \n", __FILE__, __LINE__);
432    for (uint64_t minbase = 0, maxlimit = (uint64_t)1 << 30;
433            minbase < (uint64_t)4 << 30;
434            minbase += (uint64_t)1 << 30, maxlimit += (uint64_t)1 << 30) {
435
436        ram_set_affinity(minbase, maxlimit);
437        err = frame_alloc_identify(cpu_memory_cap, *cpu_memory, cpu_memory, id);
438        if (err_is_fail(err)) {
439            continue;
440        } else {
441            goto done;
442        }
443    }
444
445    USER_PANIC("No memory in the first 4GB, cannot continue booting cores");
446
447done:
448    ram_set_affinity(old_minbase, old_maxlimit);
449
450    return SYS_ERR_OK;
451}
452
453static errval_t relocate_cpu_binary(lvaddr_t cpu_binary,
454                                    struct Elf64_Ehdr *cpu_head,
455                                    struct elf_allocate_state state,
456                                    struct frame_identity frameid,
457                                    genpaddr_t arch_page_size)
458{
459    switch (cpu_head->e_machine) {
460    case EM_X86_64:
461    case EM_K1OM: {
462        struct Elf64_Shdr *rela, *symtab, *symhead =
463            (struct Elf64_Shdr *)(cpu_binary + (uintptr_t)cpu_head->e_shoff);
464
465        assert(cpu_head->e_shoff != 0);
466        rela = elf64_find_section_header_type(symhead, cpu_head->e_shnum, SHT_RELA);
467        assert(rela != NULL);
468        symtab = elf64_find_section_header_type(symhead, cpu_head->e_shnum, SHT_DYNSYM);
469        assert(symtab != NULL);
470        elf64_relocate(frameid.base + arch_page_size, state.elfbase,
471                       (struct Elf64_Rela *)(uintptr_t)(cpu_binary + rela->sh_offset),
472                       rela->sh_size,
473                       (struct Elf64_Sym *)(uintptr_t)(cpu_binary + symtab->sh_offset),
474                       symtab->sh_size,
475                       state.elfbase, state.vbase);
476        break;
477    }
478    case EM_386: {
479        struct Elf32_Ehdr *head32 = (struct Elf32_Ehdr *)cpu_binary;
480
481        struct Elf32_Shdr *rel, *symtab, *symhead =
482            (struct Elf32_Shdr *)(cpu_binary + (uintptr_t)head32->e_shoff);
483
484        rel = elf32_find_section_header_type(symhead, head32->e_shnum, SHT_REL);
485        assert(rel != NULL);
486        symtab = elf32_find_section_header_type(symhead, head32->e_shnum,
487                                                SHT_DYNSYM);
488        assert(symtab != NULL);
489        elf32_relocate(frameid.base + arch_page_size, state.elfbase,
490                       (struct Elf32_Rel *)(uintptr_t)(cpu_binary + rel->sh_offset),
491                       rel->sh_size,
492                       (struct Elf32_Sym *)(uintptr_t)(cpu_binary + symtab->sh_offset),
493                       symtab->sh_size,
494                       state.elfbase, state.vbase);
495        break;
496    }
497    default:
498        return SPAWN_ERR_UNKNOWN_TARGET_ARCH;
499    }
500
501    return SYS_ERR_OK;
502}
503
504errval_t spawn_xcore_monitor(coreid_t coreid, hwid_t hwid,
505                             enum cpu_type cpu_type,
506                             const char *cmdline,
507                             struct frame_identity urpc_frame_id,
508                             struct capref kcb)
509{
510    uint64_t start = 0;
511    const char *monitorname = NULL, *cpuname = NULL;
512    genpaddr_t arch_page_size;
513    errval_t err;
514
515    err = get_architecture_config(cpu_type, &arch_page_size,
516                                  &monitorname, &cpuname);
517    assert(err_is_ok(err));
518
519    DEBUG("loading kernel: %s\n", cpuname);
520    DEBUG("loading 1st app: %s\n", monitorname);
521
522    // compute size of frame needed and allocate it
523    DEBUG("%s:%s:%d: urpc_frame_id.base=%"PRIxGENPADDR"\n",
524           __FILE__, __FUNCTION__, __LINE__, urpc_frame_id.base);
525    DEBUG("%s:%s:%d: urpc_frame_id.size=0x%" PRIuGENSIZE "\n",
526           __FILE__, __FUNCTION__, __LINE__, urpc_frame_id.bytes);
527
528    if (benchmark_flag) {
529        start = bench_tsc();
530    }
531    static size_t cpu_binary_size;
532    static lvaddr_t cpu_binary = 0;
533    static genpaddr_t cpu_binary_phys;
534    static const char* cached_cpuname = NULL;
535    if (cpu_binary == 0) {
536        cached_cpuname = cpuname;
537        // XXX: Caching these for now, until we have unmap
538        err = lookup_module(cpuname, &cpu_binary, &cpu_binary_phys,
539                            &cpu_binary_size);
540        if (err_is_fail(err)) {
541            DEBUG_ERR(err, "Can not lookup module");
542            return err;
543        }
544    }
545    // Ensure caching actually works and we're
546    // always loading same binary. If this starts to fail, get rid of caching.
547    assert (strcmp(cached_cpuname, cpuname) == 0);
548
549    static size_t monitor_binary_size;
550    static lvaddr_t monitor_binary = 0;
551    static genpaddr_t monitor_binary_phys;
552    static const char* cached_monitorname = NULL;
553    if (monitor_binary == 0) {
554        cached_monitorname = monitorname;
555        // XXX: Caching these for now, until we have unmap
556        err = lookup_module(monitorname, &monitor_binary,
557                            &monitor_binary_phys, &monitor_binary_size);
558        if (err_is_fail(err)) {
559            DEBUG_ERR(err, "Can not lookup module");
560            return err;
561        }
562    }
563    // Again, ensure caching actually worked (see above)
564    assert (strcmp(cached_monitorname, monitorname) == 0);
565
566    if (benchmark_flag) {
567        bench_data->load = bench_tsc() - start;
568        start = bench_tsc();
569    }
570
571    struct capref cpu_memory_cap;
572    struct frame_identity frameid;
573    size_t cpu_memory;
574    err = allocate_kernel_memory(cpu_binary, arch_page_size,
575                                 &cpu_memory_cap, &cpu_memory, &frameid);
576    if (err_is_fail(err)) {
577        DEBUG_ERR(err, "Can not allocate space for new app kernel.");
578        return err;
579    }
580
581    err = cap_mark_remote(cpu_memory_cap);
582    if (err_is_fail(err)) {
583        DEBUG_ERR(err, "Can not mark cap remote.");
584        return err;
585    }
586
587    void *cpu_buf_memory;
588    err = vspace_map_one_frame(&cpu_buf_memory, cpu_memory, cpu_memory_cap,
589                               NULL, NULL);
590    if (err_is_fail(err)) {
591        return err_push(err, LIB_ERR_VSPACE_MAP);
592    }
593    if (benchmark_flag) {
594        bench_data->alloc_cpu = bench_tsc() - start;
595        start = bench_tsc();
596    }
597
598    /* Chunk of memory to load monitor on the app core */
599    struct capref spawn_memory_cap;
600    struct frame_identity spawn_memory_identity;
601
602    err = frame_alloc_identify(&spawn_memory_cap,
603                               X86_CORE_DATA_PAGES * arch_page_size,
604                               NULL, &spawn_memory_identity);
605    if (err_is_fail(err)) {
606        return err_push(err, LIB_ERR_FRAME_ALLOC);
607    }
608
609    err = cap_mark_remote(spawn_memory_cap);
610    if (err_is_fail(err)) {
611        DEBUG_ERR(err, "Can not mark cap remote.");
612        return err;
613    }
614    if (benchmark_flag) {
615        bench_data->alloc_mon = bench_tsc() - start;
616        start = bench_tsc();
617    }
618
619    /* Load cpu */
620    struct elf_allocate_state state;
621    state.vbase = (char *)cpu_buf_memory + arch_page_size;
622    assert(sizeof(struct x86_core_data) <= arch_page_size);
623    state.elfbase = elf_virtual_base(cpu_binary);
624
625    struct Elf64_Ehdr *cpu_head = (struct Elf64_Ehdr *)cpu_binary;
626    genvaddr_t cpu_entry;
627
628    err = elf_load(cpu_head->e_machine, elfload_allocate, &state,
629                   cpu_binary, cpu_binary_size, &cpu_entry);
630    if (err_is_fail(err)) {
631        return err;
632    }
633    if (benchmark_flag) {
634        bench_data->elf_load = bench_tsc() - start;
635        start = bench_tsc();
636    }
637
638    err = relocate_cpu_binary(cpu_binary, cpu_head, state, frameid, arch_page_size);
639    if (err_is_fail(err)) {
640        DEBUG_ERR(err, "Can not relocate new kernel.");
641        return err;
642    }
643    if (benchmark_flag) {
644        bench_data->elf_reloc = bench_tsc() - start;
645    }
646
647    genvaddr_t cpu_reloc_entry = cpu_entry - state.elfbase
648                                 + frameid.base + arch_page_size;
649    /* Compute entry point in the foreign address space */
650    forvaddr_t foreign_cpu_reloc_entry = (forvaddr_t)cpu_reloc_entry;
651
652    /* Setup the core_data struct in the new kernel */
653    struct x86_core_data *core_data = (struct x86_core_data *)cpu_buf_memory;
654    switch (cpu_head->e_machine) {
655    case EM_X86_64:
656    case EM_K1OM:
657        core_data->elf.size = sizeof(struct Elf64_Shdr);
658        core_data->elf.addr = cpu_binary_phys + (uintptr_t)cpu_head->e_shoff;
659        core_data->elf.num  = cpu_head->e_shnum;
660        break;
661    case EM_386:
662        core_data->elf.size = sizeof(struct Elf32_Shdr);
663        struct Elf32_Ehdr *head32 = (struct Elf32_Ehdr *)cpu_binary;
664        core_data->elf.addr = cpu_binary_phys + (uintptr_t)head32->e_shoff;
665        core_data->elf.num  = head32->e_shnum;
666        break;
667    default:
668        return SPAWN_ERR_UNKNOWN_TARGET_ARCH;
669    }
670    core_data->module_start = cpu_binary_phys;
671    core_data->module_end   = cpu_binary_phys + cpu_binary_size;
672    core_data->urpc_frame_base = urpc_frame_id.base;
673    assert((1UL << log2ceil(urpc_frame_id.bytes)) == urpc_frame_id.bytes);
674    core_data->urpc_frame_bits = log2ceil(urpc_frame_id.bytes);
675    core_data->monitor_binary   = monitor_binary_phys;
676    core_data->monitor_binary_size = monitor_binary_size;
677    core_data->memory_base_start = spawn_memory_identity.base;
678    assert((1UL << log2ceil(spawn_memory_identity.bytes)) == spawn_memory_identity.bytes);
679    core_data->memory_bits       = log2ceil(spawn_memory_identity.bytes);
680    core_data->src_core_id       = disp_get_core_id();
681    core_data->src_arch_id       = my_arch_id;
682    core_data->dst_core_id       = coreid;
683
684
685    struct frame_identity fid;
686    err = invoke_kcb_identify(kcb, &fid);
687    if (err_is_fail(err)) {
688        USER_PANIC_ERR(err, "Invoke frame identity for KCB failed. "
689                            "Did you add the syscall handler for that architecture?");
690    }
691    DEBUG("%s:%s:%d: fid.base is 0x%"PRIxGENPADDR"\n",
692           __FILE__, __FUNCTION__, __LINE__, fid.base);
693    core_data->kcb = (genpaddr_t) fid.base;
694#ifdef CONFIG_FLOUNDER_BACKEND_UMP_IPI
695    core_data->chan_id           = chanid;
696#endif
697
698    if (cmdline != NULL) {
699        // copy as much of command line as will fit
700        snprintf(core_data->kernel_cmdline, sizeof(core_data->kernel_cmdline),
701                "%s %s", cpuname, cmdline);
702        // ensure termination
703        core_data->kernel_cmdline[sizeof(core_data->kernel_cmdline) - 1] = '\0';
704
705        DEBUG("%s:%s:%d: %s\n", __FILE__, __FUNCTION__, __LINE__, core_data->kernel_cmdline);
706    }
707
708    /* Invoke kernel capability to boot new core */
709    if (cpu_type == CPU_X86_64 || cpu_type == CPU_K1OM) {
710        start_aps_x86_64_start(hwid, foreign_cpu_reloc_entry);
711    }
712
713#ifndef __k1om__
714    else if (cpu_type == CPU_X86_32) {
715        start_aps_x86_32_start(hwid, foreign_cpu_reloc_entry);
716    }
717#endif
718
719    /* Clean up */
720    // XXX: Should not delete the remote caps?
721    err = cap_destroy(spawn_memory_cap);
722    if (err_is_fail(err)) {
723        USER_PANIC_ERR(err, "cap_destroy failed");
724    }
725    err = vspace_unmap(cpu_buf_memory);
726    if (err_is_fail(err)) {
727        USER_PANIC_ERR(err, "vspace unmap CPU driver memory failed");
728    }
729    err = cap_destroy(cpu_memory_cap);
730    if (err_is_fail(err)) {
731        USER_PANIC_ERR(err, "cap_destroy failed");
732    }
733
734    return SYS_ERR_OK;
735}
736