1177867Sjfv// SPDX-License-Identifier: GPL-2.0+ 2169240Sjfv/* 3190872Sjfv * Copyright (C) 2015 Google, Inc 4169240Sjfv * 5169240Sjfv * Based on code from the coreboot file of the same name 6169240Sjfv */ 7169240Sjfv 8169240Sjfv#include <common.h> 9169240Sjfv#include <cpu.h> 10169240Sjfv#include <dm.h> 11169240Sjfv#include <errno.h> 12169240Sjfv#include <log.h> 13169240Sjfv#include <malloc.h> 14169240Sjfv#include <qfw.h> 15169240Sjfv#include <asm/atomic.h> 16169240Sjfv#include <asm/cpu.h> 17169240Sjfv#include <asm/global_data.h> 18169240Sjfv#include <asm/interrupt.h> 19169240Sjfv#include <asm/io.h> 20169240Sjfv#include <asm/lapic.h> 21169240Sjfv#include <asm/microcode.h> 22169240Sjfv#include <asm/mp.h> 23169240Sjfv#include <asm/msr.h> 24169240Sjfv#include <asm/mtrr.h> 25169240Sjfv#include <asm/processor.h> 26169240Sjfv#include <asm/sipi.h> 27169240Sjfv#include <dm/device-internal.h> 28169240Sjfv#include <dm/uclass-internal.h> 29169240Sjfv#include <dm/lists.h> 30169240Sjfv#include <dm/root.h> 31169240Sjfv#include <linux/delay.h> 32177867Sjfv#include <linux/linkage.h> 33177867Sjfv 34169240SjfvDECLARE_GLOBAL_DATA_PTR; 35185353Sjfv 36185353Sjfv/* 37185353Sjfv * Setting up multiprocessing 38185353Sjfv * 39185353Sjfv * See https://www.intel.com/content/www/us/en/intelligent-systems/intel-boot-loader-development-kit/minimal-intel-architecture-boot-loader-paper.html 40185353Sjfv * 41185353Sjfv * Note that this file refers to the boot CPU (the one U-Boot is running on) as 42185353Sjfv * the BSP (BootStrap Processor) and the others as APs (Application Processors). 43185353Sjfv * 44185353Sjfv * This module works by loading some setup code into RAM at AP_DEFAULT_BASE and 45185353Sjfv * telling each AP to execute it. The code that each AP runs is in 46185353Sjfv * sipi_vector.S (see ap_start16) which includes a struct sipi_params at the 47185353Sjfv * end of it. Those parameters are set up by the C code. 48185353Sjfv 49190872Sjfv * Setting up is handled by load_sipi_vector(). It inits the common block of 50169240Sjfv * parameters (sipi_params) which tell the APs what to do. This block includes 51169240Sjfv * microcode and the MTTRs (Memory-Type-Range Registers) from the main CPU. 52169589Sjfv * There is also an ap_count which each AP increments as it starts up, so the 53169240Sjfv * BSP can tell how many checked in. 54177867Sjfv * 55177867Sjfv * The APs are started with a SIPI (Startup Inter-Processor Interrupt) which 56177867Sjfv * tells an AP to start executing at a particular address, in this case 57177867Sjfv * AP_DEFAULT_BASE which contains the code copied from ap_start16. This protocol 58177867Sjfv * is handled by start_aps(). 59177867Sjfv * 60169240Sjfv * After being started, each AP runs the code in ap_start16, switches to 32-bit 61177867Sjfv * mode, runs the code at ap_start, then jumps to c_handler which is ap_init(). 62177867Sjfv * This runs a very simple 'flight plan' described in mp_steps(). This sets up 63177867Sjfv * the CPU and waits for further instructions by looking at its entry in 64177867Sjfv * ap_callbacks[]. Note that the flight plan is only actually run for each CPU 65173788Sjfv * in bsp_do_flight_plan(): once the BSP completes each flight record, it sets 66177867Sjfv * mp_flight_record->barrier to 1 to allow the APs to executed the record one 67177867Sjfv * by one. 68177867Sjfv * 69181027Sjfv * CPUS are numbered sequentially from 0 using the device tree: 70181027Sjfv * 71177867Sjfv * cpus { 72177867Sjfv * bootph-all; 73190872Sjfv * #address-cells = <1>; 74177867Sjfv * #size-cells = <0>; 75177867Sjfv * 76177867Sjfv * cpu@0 { 77169589Sjfv * bootph-all; 78169240Sjfv * device_type = "cpu"; 79169240Sjfv * compatible = "intel,apl-cpu"; 80169589Sjfv * reg = <0>; 81169240Sjfv * intel,apic-id = <0>; 82169240Sjfv * }; 83169240Sjfv * 84177867Sjfv * cpu@1 { 85177867Sjfv * device_type = "cpu"; 86169240Sjfv * compatible = "intel,apl-cpu"; 87169240Sjfv * reg = <1>; 88169240Sjfv * intel,apic-id = <2>; 89169589Sjfv * }; 90169240Sjfv * 91177867Sjfv * Here the 'reg' property is the CPU number and then is placed in dev_seq(cpu) 92169240Sjfv * so that we can index into ap_callbacks[] using that. The APIC ID is different 93169240Sjfv * and may not be sequential (it typically is if hyperthreading is supported). 94169240Sjfv * 95169240Sjfv * Once APs are inited they wait in ap_wait_for_instruction() for instructions. 96169240Sjfv * Instructions come in the form of a function to run. This logic is in 97169240Sjfv * mp_run_on_cpus() which supports running on any one AP, all APs, just the BSP 98173788Sjfv * or all CPUs. The BSP logic is handled directly in mp_run_on_cpus(), by 99185353Sjfv * calling the function. For the APs, callback information is stored in a 100169240Sjfv * single, common struct mp_callback and a pointer to this is written to each 101169240Sjfv * AP's slot in ap_callbacks[] by run_ap_work(). All APs get the message even 102169240Sjfv * if it is only for one of them. When an AP notices a message it checks whether 103169240Sjfv * it should call the function (see check in ap_wait_for_instruction()) and then 104169240Sjfv * does so if needed. After that it sets its slot to NULL to indicate it is 105169240Sjfv * done. 106169240Sjfv * 107177867Sjfv * While U-Boot is running it can use mp_run_on_cpus() to run code on the APs. 108177867Sjfv * An example of this is the 'mtrr' command which allows reading and changing 109177867Sjfv * the MTRRs on all CPUs. 110177867Sjfv * 111177867Sjfv * Before U-Boot exits it calls mp_park_aps() which tells all CPUs to halt by 112177867Sjfv * executing a 'hlt' instruction. That allows them to be used by Linux when it 113177867Sjfv * starts up. 114177867Sjfv */ 115177867Sjfv 116169240Sjfv/* This also needs to match the sipi.S assembly code for saved MSR encoding */ 117169240Sjfvstruct __packed saved_msr { 118169240Sjfv uint32_t index; 119169240Sjfv uint32_t lo; 120177867Sjfv uint32_t hi; 121177867Sjfv}; 122177867Sjfv 123177867Sjfv/** 124177867Sjfv * struct mp_flight_plan - Holds the flight plan 125177867Sjfv * 126177867Sjfv * @num_records: Number of flight records 127173788Sjfv * @records: Pointer to each record 128173788Sjfv */ 129173788Sjfvstruct mp_flight_plan { 130173788Sjfv int num_records; 131173788Sjfv struct mp_flight_record *records; 132173788Sjfv}; 133173788Sjfv 134173788Sjfv/** 135173788Sjfv * struct mp_callback - Callback information for APs 136169240Sjfv * 137169240Sjfv * @func: Function to run 138177867Sjfv * @arg: Argument to pass to the function 139177867Sjfv * @logical_cpu_number: Either a CPU number (i.e. dev_seq(cpu) or a special 140177867Sjfv * value like MP_SELECT_BSP. It tells the AP whether it should process this 141177867Sjfv * callback 142177867Sjfv */ 143177867Sjfvstruct mp_callback { 144177867Sjfv mp_run_func func; 145177867Sjfv void *arg; 146169240Sjfv int logical_cpu_number; 147173788Sjfv}; 148173788Sjfv 149169240Sjfv/* Stores the flight plan so that APs can find it */ 150173788Sjfvstatic struct mp_flight_plan mp_info; 151169240Sjfv 152169240Sjfv/* 153173788Sjfv * ap_callbacks - Callback mailbox array 154169240Sjfv * 155169240Sjfv * Array of callback, one entry for each available CPU, indexed by the CPU 156169240Sjfv * number, which is dev_seq(cpu). The entry for the main CPU is never used. 157178523Sjfv * When this is NULL, there is no pending work for the CPU to run. When 158178523Sjfv * non-NULL it points to the mp_callback structure. This is shared between all 159178523Sjfv * CPUs, so should only be written by the main CPU. 160178523Sjfv */ 161178523Sjfvstatic struct mp_callback **ap_callbacks; 162178523Sjfv 163178523Sjfvstatic inline void barrier_wait(atomic_t *b) 164178523Sjfv{ 165178523Sjfv while (atomic_read(b) == 0) 166178523Sjfv asm("pause"); 167178523Sjfv mfence(); 168178523Sjfv} 169178523Sjfv 170185353Sjfvstatic inline void release_barrier(atomic_t *b) 171178523Sjfv{ 172178523Sjfv mfence(); 173178523Sjfv atomic_set(b, 1); 174178523Sjfv} 175178523Sjfv 176169240Sjfvstatic inline void stop_this_cpu(void) 177169240Sjfv{ 178169240Sjfv /* Called by an AP when it is ready to halt and wait for a new task */ 179169240Sjfv for (;;) 180169240Sjfv cpu_hlt(); 181169240Sjfv} 182169240Sjfv 183169240Sjfv/* Returns 1 if timeout waiting for APs. 0 if target APs found */ 184169240Sjfvstatic int wait_for_aps(atomic_t *val, int target, int total_delay, 185169240Sjfv int delay_step) 186169240Sjfv{ 187169240Sjfv int timeout = 0; 188169589Sjfv int delayed = 0; 189169240Sjfv 190177867Sjfv while (atomic_read(val) != target) { 191169240Sjfv udelay(delay_step); 192169240Sjfv delayed += delay_step; 193169240Sjfv if (delayed >= total_delay) { 194169240Sjfv timeout = 1; 195169240Sjfv break; 196169240Sjfv } 197169240Sjfv } 198185353Sjfv 199185353Sjfv return timeout; 200169240Sjfv} 201169240Sjfv 202185353Sjfvstatic void ap_do_flight_plan(struct udevice *cpu) 203169240Sjfv{ 204169240Sjfv int i; 205169240Sjfv 206185353Sjfv for (i = 0; i < mp_info.num_records; i++) { 207169240Sjfv struct mp_flight_record *rec = &mp_info.records[i]; 208169240Sjfv 209169240Sjfv atomic_inc(&rec->cpus_entered); 210185353Sjfv barrier_wait(&rec->barrier); 211169240Sjfv 212169240Sjfv if (rec->ap_call != NULL) 213169240Sjfv rec->ap_call(cpu, rec->ap_arg); 214169240Sjfv } 215169240Sjfv} 216169240Sjfv 217178523Sjfvstatic int find_cpu_by_apic_id(int apic_id, struct udevice **devp) 218169240Sjfv{ 219169240Sjfv struct udevice *dev; 220169240Sjfv 221173788Sjfv *devp = NULL; 222173788Sjfv for (uclass_find_first_device(UCLASS_CPU, &dev); 223169240Sjfv dev; 224169240Sjfv uclass_find_next_device(&dev)) { 225169240Sjfv struct cpu_plat *plat = dev_get_parent_plat(dev); 226169240Sjfv 227169240Sjfv if (plat->cpu_id == apic_id) { 228169240Sjfv *devp = dev; 229169240Sjfv return 0; 230169240Sjfv } 231185353Sjfv } 232169240Sjfv 233169240Sjfv return -ENOENT; 234173788Sjfv} 235173788Sjfv 236169240Sjfv/* 237169240Sjfv * By the time APs call ap_init() caching has been setup, and microcode has 238169240Sjfv * been loaded 239173788Sjfv */ 240173788Sjfvstatic void ap_init(unsigned int cpu_index) 241173788Sjfv{ 242173788Sjfv struct udevice *dev; 243169240Sjfv int apic_id; 244169240Sjfv int ret; 245169240Sjfv 246169240Sjfv /* Ensure the local apic is enabled */ 247169240Sjfv enable_lapic(); 248177867Sjfv 249177867Sjfv apic_id = lapicid(); 250177867Sjfv ret = find_cpu_by_apic_id(apic_id, &dev); 251177867Sjfv if (ret) { 252177867Sjfv debug("Unknown CPU apic_id %x\n", apic_id); 253177867Sjfv goto done; 254177867Sjfv } 255169240Sjfv 256169240Sjfv debug("AP: slot %d apic_id %x, dev %s\n", cpu_index, apic_id, 257169240Sjfv dev ? dev->name : "(apic_id not found)"); 258169240Sjfv 259169240Sjfv /* 260169240Sjfv * Walk the flight plan, which only returns if CONFIG_SMP_AP_WORK is not 261169589Sjfv * enabled 262169240Sjfv */ 263177867Sjfv ap_do_flight_plan(dev); 264169240Sjfv 265169240Sjfvdone: 266169240Sjfv stop_this_cpu(); 267169240Sjfv} 268169240Sjfv 269169240Sjfvstatic const unsigned int fixed_mtrrs[NUM_FIXED_MTRRS] = { 270169240Sjfv MTRR_FIX_64K_00000_MSR, MTRR_FIX_16K_80000_MSR, MTRR_FIX_16K_A0000_MSR, 271169240Sjfv MTRR_FIX_4K_C0000_MSR, MTRR_FIX_4K_C8000_MSR, MTRR_FIX_4K_D0000_MSR, 272169240Sjfv MTRR_FIX_4K_D8000_MSR, MTRR_FIX_4K_E0000_MSR, MTRR_FIX_4K_E8000_MSR, 273169240Sjfv MTRR_FIX_4K_F0000_MSR, MTRR_FIX_4K_F8000_MSR, 274169240Sjfv}; 275173788Sjfv 276169240Sjfvstatic inline struct saved_msr *save_msr(int index, struct saved_msr *entry) 277169240Sjfv{ 278169589Sjfv msr_t msr; 279169589Sjfv 280169240Sjfv msr = msr_read(index); 281173788Sjfv entry->index = index; 282169240Sjfv entry->lo = msr.lo; 283169240Sjfv entry->hi = msr.hi; 284173788Sjfv 285169240Sjfv /* Return the next entry */ 286169240Sjfv entry++; 287169240Sjfv return entry; 288169240Sjfv} 289169240Sjfv 290169240Sjfvstatic int save_bsp_msrs(char *start, int size) 291169240Sjfv{ 292169240Sjfv int msr_count; 293169240Sjfv int num_var_mtrrs; 294169240Sjfv struct saved_msr *msr_entry; 295169240Sjfv int i; 296169240Sjfv msr_t msr; 297169240Sjfv 298169240Sjfv /* Determine number of MTRRs need to be saved */ 299169240Sjfv msr = msr_read(MTRR_CAP_MSR); 300169240Sjfv num_var_mtrrs = msr.lo & 0xff; 301169240Sjfv 302177867Sjfv /* 2 * num_var_mtrrs for base and mask. +1 for IA32_MTRR_DEF_TYPE */ 303185353Sjfv msr_count = 2 * num_var_mtrrs + NUM_FIXED_MTRRS + 1; 304185353Sjfv 305185353Sjfv if ((msr_count * sizeof(struct saved_msr)) > size) { 306185353Sjfv printf("Cannot mirror all %d msrs\n", msr_count); 307185353Sjfv return -ENOSPC; 308185353Sjfv } 309185353Sjfv 310185353Sjfv msr_entry = (void *)start; 311185353Sjfv for (i = 0; i < NUM_FIXED_MTRRS; i++) 312169240Sjfv msr_entry = save_msr(fixed_mtrrs[i], msr_entry); 313177867Sjfv 314169240Sjfv for (i = 0; i < num_var_mtrrs; i++) { 315177867Sjfv msr_entry = save_msr(MTRR_PHYS_BASE_MSR(i), msr_entry); 316169240Sjfv msr_entry = save_msr(MTRR_PHYS_MASK_MSR(i), msr_entry); 317177867Sjfv } 318169240Sjfv 319177867Sjfv msr_entry = save_msr(MTRR_DEF_TYPE_MSR, msr_entry); 320173788Sjfv 321169240Sjfv return msr_count; 322169240Sjfv} 323169240Sjfv 324173788Sjfvstatic int load_sipi_vector(atomic_t **ap_countp, int num_cpus) 325169240Sjfv{ 326177867Sjfv struct sipi_params_16bit *params16; 327169240Sjfv struct sipi_params *params; 328169240Sjfv static char msr_save[512]; 329177867Sjfv char *stack; 330169240Sjfv ulong addr; 331169240Sjfv int code_len; 332190872Sjfv int size; 333169240Sjfv int ret; 334169240Sjfv 335169240Sjfv /* Copy in the code */ 336169240Sjfv code_len = ap_start16_code_end - ap_start16; 337169240Sjfv debug("Copying SIPI code to %x: %d bytes\n", AP_DEFAULT_BASE, 338169240Sjfv code_len); 339169240Sjfv memcpy((void *)AP_DEFAULT_BASE, ap_start16, code_len); 340181027Sjfv 341181027Sjfv addr = AP_DEFAULT_BASE + (ulong)sipi_params_16bit - (ulong)ap_start16; 342181027Sjfv params16 = (struct sipi_params_16bit *)addr; 343181027Sjfv params16->ap_start = (uint32_t)ap_start; 344181027Sjfv params16->gdt = (uint32_t)gd->arch.gdt; 345181027Sjfv params16->gdt_limit = X86_GDT_SIZE - 1; 346181027Sjfv debug("gdt = %x, gdt_limit = %x\n", params16->gdt, params16->gdt_limit); 347181027Sjfv 348169240Sjfv params = (struct sipi_params *)sipi_params; 349190872Sjfv debug("SIPI 32-bit params at %p\n", params); 350169240Sjfv params->idt_ptr = (uint32_t)x86_get_idt(); 351177867Sjfv 352169240Sjfv params->stack_size = CONFIG_AP_STACK_SIZE; 353177867Sjfv size = params->stack_size * num_cpus; 354169240Sjfv stack = memalign(4096, size); 355177867Sjfv if (!stack) 356173788Sjfv return -ENOMEM; 357177867Sjfv params->stack_top = (u32)(stack + size); 358190872Sjfv#if !defined(CONFIG_QEMU) && !defined(CONFIG_HAVE_FSP) && \ 359190872Sjfv !defined(CONFIG_INTEL_MID) 360169240Sjfv params->microcode_ptr = ucode_base; 361177867Sjfv debug("Microcode at %x\n", params->microcode_ptr); 362169240Sjfv#endif 363177867Sjfv params->msr_table_ptr = (u32)msr_save; 364169240Sjfv ret = save_bsp_msrs(msr_save, sizeof(msr_save)); 365177867Sjfv if (ret < 0) 366169240Sjfv return ret; 367181027Sjfv params->msr_count = ret; 368181027Sjfv 369181027Sjfv params->c_handler = (uint32_t)&ap_init; 370181027Sjfv 371181027Sjfv *ap_countp = ¶ms->ap_count; 372181027Sjfv atomic_set(*ap_countp, 0); 373181027Sjfv debug("SIPI vector is ready\n"); 374181027Sjfv 375177867Sjfv return 0; 376169240Sjfv} 377177867Sjfv 378169240Sjfvstatic int check_cpu_devices(int expected_cpus) 379177867Sjfv{ 380173788Sjfv int i; 381169240Sjfv 382169240Sjfv for (i = 0; i < expected_cpus; i++) { 383169240Sjfv struct udevice *dev; 384169240Sjfv int ret; 385169240Sjfv 386169240Sjfv ret = uclass_find_device(UCLASS_CPU, i, &dev); 387169240Sjfv if (ret) { 388169240Sjfv debug("Cannot find CPU %d in device tree\n", i); 389169240Sjfv return ret; 390169589Sjfv } 391169240Sjfv } 392185353Sjfv 393169240Sjfv return 0; 394173788Sjfv} 395169240Sjfv 396169240Sjfv/* Returns 1 for timeout. 0 on success */ 397169240Sjfvstatic int apic_wait_timeout(int total_delay, const char *msg) 398177867Sjfv{ 399177867Sjfv int total = 0; 400177867Sjfv 401169240Sjfv if (!(lapic_read(LAPIC_ICR) & LAPIC_ICR_BUSY)) 402169240Sjfv return 0; 403169240Sjfv 404169240Sjfv debug("Waiting for %s...", msg); 405169589Sjfv while (lapic_read(LAPIC_ICR) & LAPIC_ICR_BUSY) { 406169240Sjfv udelay(50); 407169240Sjfv total += 50; 408169240Sjfv if (total >= total_delay) { 409169240Sjfv debug("timed out: aborting\n"); 410173788Sjfv return -ETIMEDOUT; 411169240Sjfv } 412169240Sjfv } 413169240Sjfv debug("done\n"); 414178523Sjfv 415169240Sjfv return 0; 416169240Sjfv} 417169240Sjfv 418169240Sjfv/** 419169240Sjfv * start_aps() - Start up the APs and count how many we find 420169240Sjfv * 421173788Sjfv * This is called on the boot processor to start up all the other processors 422173788Sjfv * (here called APs). 423169240Sjfv * 424169240Sjfv * @num_aps: Number of APs we expect to find 425173788Sjfv * @ap_count: Initially zero. Incremented by this function for each AP found 426173788Sjfv * Return: 0 if all APs were set up correctly or there are none to set up, 427169240Sjfv * -ENOSPC if the SIPI vector is too high in memory, 428169240Sjfv * -ETIMEDOUT if the ICR is busy or the second SIPI fails to complete 429169240Sjfv * -EIO if not all APs check in correctly 430169240Sjfv */ 431169240Sjfvstatic int start_aps(int num_aps, atomic_t *ap_count) 432178523Sjfv{ 433178523Sjfv int sipi_vector; 434178523Sjfv /* Max location is 4KiB below 1MiB */ 435178523Sjfv const int max_vector_loc = ((1 << 20) - (1 << 12)) >> 12; 436178523Sjfv 437178523Sjfv if (num_aps == 0) 438178523Sjfv return 0; 439178523Sjfv 440178523Sjfv /* The vector is sent as a 4k aligned address in one byte */ 441178523Sjfv sipi_vector = AP_DEFAULT_BASE >> 12; 442178523Sjfv 443178523Sjfv if (sipi_vector > max_vector_loc) { 444178523Sjfv printf("SIPI vector too large! 0x%08x\n", 445178523Sjfv sipi_vector); 446169240Sjfv return -ENOSPC; 447169240Sjfv } 448169240Sjfv 449169240Sjfv debug("Attempting to start %d APs\n", num_aps); 450169240Sjfv 451178523Sjfv if (apic_wait_timeout(1000, "ICR not to be busy")) 452169240Sjfv return -ETIMEDOUT; 453169240Sjfv 454169240Sjfv /* Send INIT IPI to all but self */ 455169240Sjfv lapic_write(LAPIC_ICR2, SET_LAPIC_DEST_FIELD(0)); 456169589Sjfv lapic_write(LAPIC_ICR, LAPIC_DEST_ALLBUT | LAPIC_INT_ASSERT | 457169589Sjfv LAPIC_DM_INIT); 458169589Sjfv debug("Waiting for 10ms after sending INIT\n"); 459169589Sjfv mdelay(10); 460169589Sjfv 461176667Sjfv /* Send 1st SIPI */ 462169589Sjfv if (apic_wait_timeout(1000, "ICR not to be busy")) 463169589Sjfv return -ETIMEDOUT; 464169589Sjfv 465169589Sjfv lapic_write(LAPIC_ICR2, SET_LAPIC_DEST_FIELD(0)); 466169589Sjfv lapic_write(LAPIC_ICR, LAPIC_DEST_ALLBUT | LAPIC_INT_ASSERT | 467169589Sjfv LAPIC_DM_STARTUP | sipi_vector); 468169589Sjfv if (apic_wait_timeout(10000, "first SIPI to complete")) 469169589Sjfv return -ETIMEDOUT; 470169589Sjfv 471169589Sjfv /* Wait for CPUs to check in up to 200 us */ 472169589Sjfv wait_for_aps(ap_count, num_aps, 200, 15); 473169589Sjfv 474169589Sjfv /* Send 2nd SIPI */ 475169589Sjfv if (apic_wait_timeout(1000, "ICR not to be busy")) 476169589Sjfv return -ETIMEDOUT; 477169589Sjfv 478169589Sjfv lapic_write(LAPIC_ICR2, SET_LAPIC_DEST_FIELD(0)); 479169589Sjfv lapic_write(LAPIC_ICR, LAPIC_DEST_ALLBUT | LAPIC_INT_ASSERT | 480169589Sjfv LAPIC_DM_STARTUP | sipi_vector); 481169589Sjfv if (apic_wait_timeout(10000, "second SIPI to complete")) 482169589Sjfv return -ETIMEDOUT; 483169589Sjfv 484169589Sjfv /* Wait for CPUs to check in */ 485169589Sjfv if (wait_for_aps(ap_count, num_aps, 10000, 50)) { 486169589Sjfv debug("Not all APs checked in: %d/%d\n", 487169589Sjfv atomic_read(ap_count), num_aps); 488169589Sjfv return -EIO; 489169589Sjfv } 490169589Sjfv 491169589Sjfv return 0; 492169589Sjfv} 493169589Sjfv 494169589Sjfv/** 495169589Sjfv * bsp_do_flight_plan() - Do the flight plan on the BSP 496169589Sjfv * 497169589Sjfv * This runs the flight plan on the main CPU used to boot U-Boot 498169589Sjfv * 499169589Sjfv * @cpu: Device for the main CPU 500176667Sjfv * @plan: Flight plan to run 501169589Sjfv * @num_aps: Number of APs (CPUs other than the BSP) 502169589Sjfv * @returns 0 on success, -ETIMEDOUT if an AP failed to come up 503169589Sjfv */ 504169589Sjfvstatic int bsp_do_flight_plan(struct udevice *cpu, struct mp_flight_plan *plan, 505169589Sjfv int num_aps) 506169589Sjfv{ 507169589Sjfv int i; 508169589Sjfv int ret = 0; 509169589Sjfv const int timeout_us = 100000; 510169589Sjfv const int step_us = 100; 511169589Sjfv 512169589Sjfv for (i = 0; i < plan->num_records; i++) { 513169589Sjfv struct mp_flight_record *rec = &plan->records[i]; 514169240Sjfv 515169589Sjfv /* Wait for APs if the record is not released */ 516169240Sjfv if (atomic_read(&rec->barrier) == 0) { 517169240Sjfv /* Wait for the APs to check in */ 518169240Sjfv if (wait_for_aps(&rec->cpus_entered, num_aps, 519169240Sjfv timeout_us, step_us)) { 520169240Sjfv debug("MP record %d timeout\n", i); 521169240Sjfv ret = -ETIMEDOUT; 522177867Sjfv } 523169240Sjfv } 524169240Sjfv 525169240Sjfv if (rec->bsp_call != NULL) 526169240Sjfv rec->bsp_call(cpu, rec->bsp_arg); 527169240Sjfv 528169589Sjfv release_barrier(&rec->barrier); 529169240Sjfv } 530169240Sjfv 531169240Sjfv return ret; 532190872Sjfv} 533190872Sjfv 534190872Sjfv/** 535190872Sjfv * get_bsp() - Get information about the bootstrap processor 536190872Sjfv * 537169240Sjfv * @devp: If non-NULL, returns CPU device corresponding to the BSP 538190872Sjfv * @cpu_countp: If non-NULL, returns the total number of CPUs 539190872Sjfv * Return: CPU number of the BSP, or -ve on error. If multiprocessing is not 540169240Sjfv * enabled, returns 0 541169240Sjfv */ 542169589Sjfvstatic int get_bsp(struct udevice **devp, int *cpu_countp) 543169240Sjfv{ 544169240Sjfv char processor_name[CPU_MAX_NAME_LEN]; 545169240Sjfv struct udevice *dev; 546169240Sjfv int apic_id; 547169240Sjfv int ret; 548169240Sjfv 549169240Sjfv cpu_get_name(processor_name); 550169589Sjfv debug("CPU: %s\n", processor_name); 551169240Sjfv 552169240Sjfv apic_id = lapicid(); 553169240Sjfv ret = find_cpu_by_apic_id(apic_id, &dev); 554177867Sjfv if (ret < 0) { 555169240Sjfv printf("Cannot find boot CPU, APIC ID %d\n", apic_id); 556169240Sjfv return ret; 557169240Sjfv } 558169240Sjfv ret = cpu_get_count(dev); 559169589Sjfv if (ret < 0) 560169240Sjfv return log_msg_ret("count", ret); 561169240Sjfv if (devp) 562169240Sjfv *devp = dev; 563169240Sjfv if (cpu_countp) 564169589Sjfv *cpu_countp = ret; 565169589Sjfv 566169589Sjfv return dev_seq(dev) >= 0 ? dev_seq(dev) : 0; 567169589Sjfv} 568169240Sjfv 569169240Sjfv/** 570169240Sjfv * read_callback() - Read the pointer in a callback slot 571169240Sjfv * 572176667Sjfv * This is called by APs to read their callback slot to see if there is a 573169240Sjfv * pointer to new instructions 574177867Sjfv * 575173788Sjfv * @slot: Pointer to the AP's callback slot 576169240Sjfv * Return: value of that pointer 577169240Sjfv */ 578169240Sjfvstatic struct mp_callback *read_callback(struct mp_callback **slot) 579169240Sjfv{ 580169240Sjfv dmb(); 581169240Sjfv 582169240Sjfv return *slot; 583178523Sjfv} 584169240Sjfv 585169240Sjfv/** 586169240Sjfv * store_callback() - Store a pointer to the callback slot 587169240Sjfv * 588169240Sjfv * This is called by APs to write NULL into the callback slot when they have 589169240Sjfv * finished the work requested by the BSP. 590169240Sjfv * 591169240Sjfv * @slot: Pointer to the AP's callback slot 592169240Sjfv * @val: Value to write (e.g. NULL) 593169240Sjfv */ 594169240Sjfvstatic void store_callback(struct mp_callback **slot, struct mp_callback *val) 595169240Sjfv{ 596169240Sjfv *slot = val; 597169240Sjfv dmb(); 598169240Sjfv} 599169240Sjfv 600169589Sjfv/** 601169240Sjfv * run_ap_work() - Run a callback on selected APs 602169240Sjfv * 603169240Sjfv * This writes @callback to all APs and waits for them all to acknowledge it, 604169240Sjfv * Note that whether each AP actually calls the callback depends on the value 605169240Sjfv * of logical_cpu_number (see struct mp_callback). The logical CPU number is 606177867Sjfv * the CPU device's req->seq value. 607169240Sjfv * 608169240Sjfv * @callback: Callback information to pass to all APs 609169240Sjfv * @bsp: CPU device for the BSP 610169240Sjfv * @num_cpus: The number of CPUs in the system (= number of APs + 1) 611169240Sjfv * @expire_ms: Timeout to wait for all APs to finish, in milliseconds, or 0 for 612169240Sjfv * no timeout 613169240Sjfv * Return: 0 if OK, -ETIMEDOUT if one or more APs failed to respond in time 614169240Sjfv */ 615169240Sjfvstatic int run_ap_work(struct mp_callback *callback, struct udevice *bsp, 616169240Sjfv int num_cpus, uint expire_ms) 617169240Sjfv{ 618173788Sjfv int cur_cpu = dev_seq(bsp); 619173788Sjfv int num_aps = num_cpus - 1; /* number of non-BSPs to get this message */ 620173788Sjfv int cpus_accepted; 621173788Sjfv ulong start; 622169240Sjfv int i; 623169240Sjfv 624169240Sjfv if (!IS_ENABLED(CONFIG_SMP_AP_WORK)) { 625169240Sjfv printf("APs already parked. CONFIG_SMP_AP_WORK not enabled\n"); 626169240Sjfv return -ENOTSUPP; 627169240Sjfv } 628169240Sjfv 629169240Sjfv /* Signal to all the APs to run the func. */ 630169240Sjfv for (i = 0; i < num_cpus; i++) { 631169240Sjfv if (cur_cpu != i) 632169240Sjfv store_callback(&ap_callbacks[i], callback); 633169240Sjfv } 634169240Sjfv mfence(); 635169240Sjfv 636169240Sjfv /* Wait for all the APs to signal back that call has been accepted. */ 637169240Sjfv start = get_timer(0); 638169240Sjfv 639173788Sjfv do { 640173788Sjfv mdelay(1); 641169240Sjfv cpus_accepted = 0; 642169240Sjfv 643169240Sjfv for (i = 0; i < num_cpus; i++) { 644169240Sjfv if (cur_cpu == i) 645169240Sjfv continue; 646169240Sjfv if (!read_callback(&ap_callbacks[i])) 647169240Sjfv cpus_accepted++; 648169240Sjfv } 649169240Sjfv 650169240Sjfv if (expire_ms && get_timer(start) >= expire_ms) { 651169240Sjfv log(UCLASS_CPU, LOGL_CRIT, 652169240Sjfv "AP call expired; %d/%d CPUs accepted\n", 653169240Sjfv cpus_accepted, num_aps); 654169240Sjfv return -ETIMEDOUT; 655169240Sjfv } 656169240Sjfv } while (cpus_accepted != num_aps); 657169240Sjfv 658169240Sjfv /* Make sure we can see any data written by the APs */ 659169240Sjfv mfence(); 660169240Sjfv 661169240Sjfv return 0; 662169240Sjfv} 663169240Sjfv 664169240Sjfv/** 665169240Sjfv * ap_wait_for_instruction() - Wait for and process requests from the main CPU 666169240Sjfv * 667169240Sjfv * This is called by APs (here, everything other than the main boot CPU) to 668169240Sjfv * await instructions. They arrive in the form of a function call and argument, 669169589Sjfv * which is then called. This uses a simple mailbox with atomic read/set 670169240Sjfv * 671169240Sjfv * @cpu: CPU that is waiting 672169240Sjfv * @unused: Optional argument provided by struct mp_flight_record, not used here 673169240Sjfv * Return: Does not return 674177867Sjfv */ 675169240Sjfvstatic int ap_wait_for_instruction(struct udevice *cpu, void *unused) 676169240Sjfv{ 677169240Sjfv struct mp_callback lcb; 678169240Sjfv struct mp_callback **per_cpu_slot; 679169240Sjfv 680169240Sjfv if (!IS_ENABLED(CONFIG_SMP_AP_WORK)) 681169240Sjfv return 0; 682169240Sjfv 683169240Sjfv per_cpu_slot = &ap_callbacks[dev_seq(cpu)]; 684169240Sjfv 685169240Sjfv while (1) { 686169589Sjfv struct mp_callback *cb = read_callback(per_cpu_slot); 687169589Sjfv 688169589Sjfv if (!cb) { 689169589Sjfv asm ("pause"); 690169240Sjfv continue; 691169240Sjfv } 692169240Sjfv 693169240Sjfv /* Copy to local variable before using the value */ 694169240Sjfv memcpy(&lcb, cb, sizeof(lcb)); 695169240Sjfv mfence(); 696176667Sjfv if (lcb.logical_cpu_number == MP_SELECT_ALL || 697169240Sjfv lcb.logical_cpu_number == MP_SELECT_APS || 698173788Sjfv dev_seq(cpu) == lcb.logical_cpu_number) 699173788Sjfv lcb.func(lcb.arg); 700169240Sjfv 701169240Sjfv /* Indicate we are finished */ 702169240Sjfv store_callback(per_cpu_slot, NULL); 703169240Sjfv } 704169240Sjfv 705169240Sjfv return 0; 706169240Sjfv} 707173788Sjfv 708173788Sjfvstatic int mp_init_cpu(struct udevice *cpu, void *unused) 709173788Sjfv{ 710173788Sjfv struct cpu_plat *plat = dev_get_parent_plat(cpu); 711169240Sjfv 712169240Sjfv plat->ucode_version = microcode_read_rev(); 713169240Sjfv plat->device_id = gd->arch.x86_device; 714169240Sjfv 715169240Sjfv return device_probe(cpu); 716169240Sjfv} 717169240Sjfv 718169240Sjfvstatic struct mp_flight_record mp_steps[] = { 719169240Sjfv MP_FR_BLOCK_APS(mp_init_cpu, NULL, mp_init_cpu, NULL), 720169240Sjfv MP_FR_BLOCK_APS(ap_wait_for_instruction, NULL, NULL, NULL), 721169240Sjfv}; 722169240Sjfv 723169240Sjfvint mp_run_on_cpus(int cpu_select, mp_run_func func, void *arg) 724169240Sjfv{ 725169240Sjfv struct mp_callback lcb = { 726169240Sjfv .func = func, 727169240Sjfv .arg = arg, 728169240Sjfv .logical_cpu_number = cpu_select, 729169240Sjfv }; 730169240Sjfv struct udevice *dev; 731169240Sjfv int num_cpus; 732169240Sjfv int ret; 733169240Sjfv 734169240Sjfv ret = get_bsp(&dev, &num_cpus); 735169240Sjfv if (ret < 0) 736169240Sjfv return log_msg_ret("bsp", ret); 737169240Sjfv if (cpu_select == MP_SELECT_ALL || cpu_select == MP_SELECT_BSP || 738169240Sjfv cpu_select == ret) { 739169240Sjfv /* Run on BSP first */ 740169589Sjfv func(arg); 741169240Sjfv } 742169240Sjfv 743169240Sjfv if (!IS_ENABLED(CONFIG_SMP_AP_WORK) || 744177867Sjfv !(gd->flags & GD_FLG_SMP_READY)) { 745169240Sjfv /* Allow use of this function on the BSP only */ 746169240Sjfv if (cpu_select == MP_SELECT_BSP || !cpu_select) 747169240Sjfv return 0; 748169240Sjfv return -ENOTSUPP; 749169240Sjfv } 750169240Sjfv 751169240Sjfv /* Allow up to 1 second for all APs to finish */ 752169240Sjfv ret = run_ap_work(&lcb, dev, num_cpus, 1000 /* ms */); 753169240Sjfv if (ret) 754169240Sjfv return log_msg_ret("aps", ret); 755169240Sjfv 756169240Sjfv return 0; 757169240Sjfv} 758169240Sjfv 759169240Sjfvstatic void park_this_cpu(void *unused) 760169240Sjfv{ 761169240Sjfv stop_this_cpu(); 762169240Sjfv} 763169240Sjfv 764169240Sjfvint mp_park_aps(void) 765169240Sjfv{ 766169240Sjfv int ret; 767169240Sjfv 768169240Sjfv ret = mp_run_on_cpus(MP_SELECT_APS, park_this_cpu, NULL); 769169589Sjfv if (ret) 770169589Sjfv return log_ret(ret); 771169240Sjfv 772169240Sjfv return 0; 773169240Sjfv} 774169240Sjfv 775169240Sjfvint mp_first_cpu(int cpu_select) 776169240Sjfv{ 777169240Sjfv struct udevice *dev; 778177867Sjfv int num_cpus; 779169240Sjfv int ret; 780169240Sjfv 781177867Sjfv /* 782169240Sjfv * This assumes that CPUs are numbered from 0. This function tries to 783169240Sjfv * avoid assuming the CPU 0 is the boot CPU 784169240Sjfv */ 785169240Sjfv if (cpu_select == MP_SELECT_ALL) 786177867Sjfv return 0; /* start with the first one */ 787177867Sjfv 788177867Sjfv ret = get_bsp(&dev, &num_cpus); 789177867Sjfv if (ret < 0) 790169240Sjfv return log_msg_ret("bsp", ret); 791169240Sjfv 792169240Sjfv /* Return boot CPU if requested */ 793169240Sjfv if (cpu_select == MP_SELECT_BSP) 794169240Sjfv return ret; 795177867Sjfv 796177867Sjfv /* Return something other than the boot CPU, if APs requested */ 797169240Sjfv if (cpu_select == MP_SELECT_APS && num_cpus > 1) 798169240Sjfv return ret == 0 ? 1 : 0; 799169240Sjfv 800169240Sjfv /* Try to check for an invalid value */ 801177867Sjfv if (cpu_select < 0 || cpu_select >= num_cpus) 802177867Sjfv return -EINVAL; 803169240Sjfv 804177867Sjfv return cpu_select; /* return the only selected one */ 805177867Sjfv} 806169240Sjfv 807169240Sjfvint mp_next_cpu(int cpu_select, int prev_cpu) 808169240Sjfv{ 809169240Sjfv struct udevice *dev; 810177867Sjfv int num_cpus; 811177867Sjfv int ret; 812173788Sjfv int bsp; 813173788Sjfv 814169240Sjfv /* If we selected the BSP or a particular single CPU, we are done */ 815169240Sjfv if (!IS_ENABLED(CONFIG_SMP_AP_WORK) || cpu_select == MP_SELECT_BSP || 816173788Sjfv cpu_select >= 0) 817173788Sjfv return -EFBIG; 818169240Sjfv 819177867Sjfv /* Must be doing MP_SELECT_ALL or MP_SELECT_APS; return the next CPU */ 820177867Sjfv ret = get_bsp(&dev, &num_cpus); 821177867Sjfv if (ret < 0) 822169240Sjfv return log_msg_ret("bsp", ret); 823169240Sjfv bsp = ret; 824169240Sjfv 825169240Sjfv /* Move to the next CPU */ 826177867Sjfv assert(prev_cpu >= 0); 827169240Sjfv ret = prev_cpu + 1; 828169240Sjfv 829169240Sjfv /* Skip the BSP if needed */ 830169240Sjfv if (cpu_select == MP_SELECT_APS && ret == bsp) 831169240Sjfv ret++; 832177867Sjfv if (ret >= num_cpus) 833177867Sjfv return -EFBIG; 834177867Sjfv 835169240Sjfv return ret; 836169240Sjfv} 837169240Sjfv 838169240Sjfvint mp_init(void) 839177867Sjfv{ 840169240Sjfv int num_aps, num_cpus; 841169240Sjfv atomic_t *ap_count; 842169240Sjfv struct udevice *cpu; 843169240Sjfv int ret; 844169240Sjfv 845169240Sjfv if (IS_ENABLED(CONFIG_QFW)) { 846169240Sjfv ret = qemu_cpu_fixup(); 847169240Sjfv if (ret) 848169240Sjfv return ret; 849169240Sjfv } 850169240Sjfv 851169240Sjfv ret = get_bsp(&cpu, &num_cpus); 852169240Sjfv if (ret < 0) { 853169589Sjfv debug("Cannot init boot CPU: err=%d\n", ret); 854169240Sjfv return ret; 855185353Sjfv } 856169240Sjfv 857177867Sjfv if (num_cpus < 2) 858169240Sjfv debug("Warning: Only 1 CPU is detected\n"); 859169240Sjfv 860169240Sjfv ret = check_cpu_devices(num_cpus); 861169240Sjfv if (ret) 862169240Sjfv log_warning("Warning: Device tree does not describe all CPUs. Extra ones will not be started correctly\n"); 863169240Sjfv 864169240Sjfv ap_callbacks = calloc(num_cpus, sizeof(struct mp_callback *)); 865173788Sjfv if (!ap_callbacks) 866173788Sjfv return -ENOMEM; 867169240Sjfv 868169240Sjfv /* Copy needed parameters so that APs have a reference to the plan */ 869169240Sjfv mp_info.num_records = ARRAY_SIZE(mp_steps); 870185353Sjfv mp_info.records = mp_steps; 871169240Sjfv 872169240Sjfv /* Load the SIPI vector */ 873169240Sjfv ret = load_sipi_vector(&ap_count, num_cpus); 874169240Sjfv if (ap_count == NULL) 875169240Sjfv return -ENOENT; 876169240Sjfv 877169240Sjfv /* 878169240Sjfv * Make sure SIPI data hits RAM so the APs that come up will see 879169240Sjfv * the startup code even if the caches are disabled 880169240Sjfv */ 881169240Sjfv wbinvd(); 882173788Sjfv 883173788Sjfv /* Start the APs providing number of APs and the cpus_entered field */ 884173788Sjfv num_aps = num_cpus - 1; 885173788Sjfv ret = start_aps(num_aps, ap_count); 886190872Sjfv if (ret) { 887190872Sjfv mdelay(1000); 888190872Sjfv debug("%d/%d eventually checked in?\n", atomic_read(ap_count), 889169240Sjfv num_aps); 890169240Sjfv return ret; 891169240Sjfv } 892169240Sjfv 893169240Sjfv /* Walk the flight plan for the BSP */ 894169240Sjfv ret = bsp_do_flight_plan(cpu, &mp_info, num_aps); 895169240Sjfv if (ret) { 896169240Sjfv debug("CPU init failed: err=%d\n", ret); 897169240Sjfv return ret; 898169240Sjfv } 899169240Sjfv gd->flags |= GD_FLG_SMP_READY; 900169240Sjfv 901169240Sjfv return 0; 902169240Sjfv} 903169240Sjfv