1 /* ********************************************************************* 2 * Broadcom Common Firmware Environment (CFE) 3 * 4 * Intel (X86) processor startup File: altcpu.S 5 * 6 * This module contains code to start the other CPU(s). The basic 7 * procedure used here is outlined in the Intel System Software 8 * Writer's Guide (SSWG) section 7.5. 9 * 10 * Author: Mitch Lichtenberg 11 * 12 ********************************************************************* 13 * 14 * Copyright 2000,2001,2003 15 * Broadcom Corporation. All rights reserved. 16 * 17 * This software is furnished under license and may be used and 18 * copied only in accordance with the following terms and 19 * conditions. Subject to these conditions, you may download, 20 * copy, install, use, modify and distribute modified or unmodified 21 * copies of this software in source and/or binary form. No title 22 * or ownership is transferred hereby. 23 * 24 * 1) Any source code used, modified or distributed must reproduce 25 * and retain this copyright notice and list of conditions 26 * as they appear in the source file. 27 * 28 * 2) No right is granted to use any trade name, trademark, or 29 * logo of Broadcom Corporation. The "Broadcom Corporation" 30 * name may not be used to endorse or promote products derived 31 * from this software without the prior written permission of 32 * Broadcom Corporation. 33 * 34 * 3) THIS SOFTWARE IS PROVIDED "AS-IS" AND ANY EXPRESS OR 35 * IMPLIED WARRANTIES, INCLUDING BUT NOT LIMITED TO, ANY IMPLIED 36 * WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 37 * PURPOSE, OR NON-INFRINGEMENT ARE DISCLAIMED. IN NO EVENT 38 * SHALL BROADCOM BE LIABLE FOR ANY DAMAGES WHATSOEVER, AND IN 39 * PARTICULAR, BROADCOM SHALL NOT BE LIABLE FOR DIRECT, INDIRECT, 40 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 41 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE 42 * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 43 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 44 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR 45 * TORT (INCLUDING NEGLIGENCE OR OTHERWISE), EVEN IF ADVISED OF 46 * THE POSSIBILITY OF SUCH DAMAGE. 47 ********************************************************************* */ 48 49#include "protmode.h" 50#include "x86macros.h" 51 52 53/* ********************************************************************* 54 * Constants 55 ********************************************************************* */ 56 57#define APIC_ID 0xFEE00020 58#define APIC_ICR_LOW 0xFEE00300 59#define APIC_SVR 0xFEE000F0 60#define APIC_ENABLED 0x00000100 61#define APIC_LVT3 0xFEE00370 62 63#define ALTCPU_MAX_CPUS 8 64#define ALTCPU_STACK_SIZE 256 65 66/* 67 * If you change the vector below, you also need to change the 68 * linker script's load address for the ALTCPU_TEXT section 69 * to be whatever the (vector << 12) value is. 70 */ 71 72#define AP_START_VECTOR 0x22 73 74 75/* ********************************************************************* 76 * Part of this code lives in a special segment, since we 77 * need to move it into the low 1MB of memory where it can 78 * be run in real-mode. After this part runs, we can go back 79 * to the real text segment again. 80 ********************************************************************* */ 81 82 .data 83 84 .extern ram_gdt_descr 85 86 .comm altcpu_stacks,(ALTCPU_MAX_CPUS*ALTCPU_STACK_SIZE) 87 88 .section .altcpu_text,"xr" 89 90 91/* ********************************************************************* 92 * 16-bit startup code. This code switches us into 93 * protected mode. It's kind of fragile, due to GCC's 94 * limited support for 16-bit code segments. If this 95 * code is modified, pay particular attention to any 96 * 32-bit constants or offsets you use and be sure that 97 * the OP32 prefix is applied to get the correct CPU 98 * addressing mode from a 16-bit segment. 99 ********************************************************************* */ 100 101#define ALTCPU_GDT_OFFSET 0x40 102 103 104 .align 16 /* Align on paragraph boundary for 16-bit code*/ 105 106 .code16 107 108 .global altcpu_initvec 109altcpu_initvec: 110 111 /* 112 * Set up the Global Descriptor Table (GDT) 113 */ 114 115 xorl %esi,%esi 116 movw $ALTCPU_GDT_OFFSET,%si 117 lgdt %cs:(%si) 118 119 120 /* 121 * Switch on the PE bit in CR0 to enable protected mode 122 */ 123 124 movb $0xFF,%al 125 outb %al,$0x80 126 127 movl %cr0,%eax 128 orb $CR0_PE,%al 129 movl %eax,%cr0 130 131 /* 132 * Do a FAR jump to the next instruction to load the 133 * new code segment selector into CS, then a near 134 * jump to flush the prefetch queue. 135 */ 136 137 JMPFAR32(1f) 1381: jmp 2f 1392: 140 141 142 /* 143 * Okay, we're in protected mode. Set up the 144 * data and stack segments and jump into the new world. 145 */ 146 147 148 .code32 149 150 movl $SEG_DATA,%eax 151 movw %ax,%ds 152 movw %ax,%es 153 movw %ax,%fs 154 movw %ax,%gs 155 movw %ax,%ss 156 157 jmp altcpu_init32 158 159 160 161 162/* ********************************************************************* 163 * ALTCPU ROM GDT 164 * 165 * It is located at a _magic_ place, 166 * so we can predict where it is relative to our current 167 * code segment. 168 * 169 * Unfortunately, the actual location of this module 170 * lives in the linker script, and is also dependent on 171 * the vector we choose to start up our CPUs (currently 0x22) 172 ********************************************************************* */ 173 174 .org ALTCPU_GDT_OFFSET 175 176/* 177 * Descriptor for GDT 178 */ 179 180altcpurom_gdt_descr: 181 GDT_GDT_DESCR(altcpurom_gdt,SEG_COUNT) /* descriptor for GDT itself */ 182 183/* 184 * GDT itself 185 */ 186 187 .align 16 188 189altcpurom_gdt: 190 GDT_NULL_DESCR() /* null descriptor */ 191 GDT_CODE_DESCR() /* Code segment (CS=0x0008) */ 192 GDT_DATA_DESCR() /* Data/stack/extra (DS/ES/SS=0x0010) */ 193 194 195 .byte 0xEE,0xAA,0xEE,0xAA /* stuff to find in the S-records */ 196 197/* ********************************************************************* 198 * Data used by our startup code 199 ********************************************************************* */ 200 201 .data 202 203cpu_sem: 204 .long 0 /* Startup semaphore */ 205 206cpu_count: 207 .long 1 /* start with ourselves */ 208 209cpu_curstack: 210 .long altcpu_stacks 211 212 .globl cpu_ids 213cpu_ids: 214 .long 0 /* Filled in with APIC IDs */ 215 .long 0 216 .long 0 217 .long 0 218 .long 0 219 .long 0 220 .long 0 221 .long 0 222 223/* ********************************************************************* 224 * Here is the rest of the startup for the other CPUs, 225 * this part goes in with the rest of the relocated CFE code. 226 ********************************************************************* */ 227 228 .text 229 230 231altcpu_init32: 232 233 /* 234 * Reload the GDT from the "real" place. 235 * 236 * Also, load the IDT so we can take exceptions. 237 */ 238 239 lgdt %ds:ram_gdt_descr 240 lidt %ds:ram_idt_ptr 241 242 /* 243 * Wait on the startup semaphore so we can be the only 244 * one to update the CPU table. 245 */ 246 247altcpu_wait: 248 movl $1,%eax /* Test-and-set semaphore */ 249 xchgl %eax,cpu_sem /* See if we got it. */ 250 or %eax,%eax /* Was old value zero? */ 251 jne altcpu_wait /* Nope, go back and keep waiting */ 252 253 /* 254 * Initialize MTRRs, but only once per CPU cluster. The 255 * threads on an HT Xeon share the MTRRs, but the 256 * individual chips do not (of course). 257 * 258 * (is this check necessary? --- yes, it seems to be. Boot 259 * of secondary CPUs is unreliable without it) 260 */ 261 262 movl $APIC_ID,%esi 263 movl (%esi),%eax /* get our APIC ID */ 264 movl (%esi),%eax /* get our APIC ID */ 265 shrl $24,%eax /* move APIC ID to low bit */ 266 test $1,%eax /* second CPU on HT chip? */ 267 jnz is_ht_cpu1 /* Don't bother with MTRR init */ 268 269 BAL_ESP(cpu_init) 270 271 /* 272 * Load processor microcode (belongs somewhere else). The 273 * processor microcode lives at a fixed place in the flash so 274 * we can stuff it in *after* CFE is built, this way we don't 275 * have to distribute the microcode with CFE itself, avoiding 276 * possible Intel proprietary stuff (vendor must supply 277 * microcode themselves) 278 */ 279 280 movl $0xFFFF0000,%esi /* this is where it is */ 281 movl $(16*2048),%eax /* 16 blocks max */ 282 BAL_ESP(cpu_load_ucode) 283 284 285is_ht_cpu1: 286 287 /* 288 * Init the caches. We do this on both threads of HT 289 * gear, since once the CPU is out of reset, both have 290 * to say that the cache is enabled for it to really 291 * be enabled. 292 */ 293 294#if CFG_INIT_L1 295 movl %cr0,%eax 296 andl $(~(CR0_CD|CR0_NW)),%eax 297 movl %eax,%cr0 298 invd 299 300#endif 301 302 303 304 /* 305 * Okay, we have the lock. Add us to the table. 306 */ 307 308 movl $APIC_ID,%esi 309 movl (%esi),%eax 310 andl $0xFF000000,%eax /* save only APIC ID info */ 311 312 movl cpu_count,%ebx /* Get current count */ 313 movl $cpu_ids,%esi 314 movl %eax,(%esi,%ebx,4) /* Write at current offset */ 315 incl %ebx 316 movl %ebx,cpu_count /* Write back new index */ 317 318 /* 319 * Initialize local APIC 320 */ 321 322 /* 323 * Initialize SMI environment 324 */ 325 326 /* 327 * Initialize stack - we will wake up via 328 * an NMI and need somewhere to push the 329 * return address. 330 */ 331 332 addl $ALTCPU_STACK_SIZE,cpu_curstack 333 movl cpu_curstack,%esp 334 335 /* 336 * Release semaphore. 337 */ 338 339 movl $0,%eax 340 xchgl %eax,cpu_sem 341 342 /* 343 * Now, wait for soemone to hit us with an INIT IPI or an NMI 344 */ 345 3461: cli 347 hlt 348 nop 349 nop 350 jmp 1b 351 352 353 354/* ********************************************************************* 355 * altcpu_startup 356 * 357 * This routine kicks off the CPU startup. It is called after 358 * DRAM is initialized and the stack set up, so we can 359 * do fancy stuff like push and pop :-). 360 * 361 * Input parameters: 362 * nothing 363 * 364 * Return value: 365 * eax - number of CPUs detected 366 ********************************************************************* */ 367 368 .globl altcpu_startup 369 370altcpu_startup: 371 372 pushl %esi 373 pushl %edi 374 pushl %ebx 375 pushl %ecx 376 pushl %edx 377 378 movl $1,cpu_count 379 movl $altcpu_stacks,%eax 380 movl %eax,cpu_curstack 381 382 /* 383 * Copy the code into lower DRAM. 384 */ 385 386 movl $0xFFFFD000,%esi 387 movl $(AP_START_VECTOR << 12),%edi 388 movl $256,%ecx 389 cld 390 rep movsb 391 392 /* 393 * Flush cache to ensure code is in memory. 394 */ 395 396 wbinvd 397 398 /* 399 * Enable the local APIC 400 */ 401 402 movl $APIC_SVR,%esi 403 movl (%esi),%eax 404 orl $APIC_ENABLED,%eax 405 movl %eax,(%esi) 406 407 movl $APIC_LVT3,%esi 408 movl (%esi),%eax 409 andl $0xFFFFFF00,%eax 410 orl $0x66,%eax 411 movl %eax,(%esi) 412 413 414 /* 415 * Whack the APIC to get the APs going 416 */ 417 418 movl $APIC_ICR_LOW,%esi 419 movl $0x000C4500,%eax 420 movl %eax,(%esi) 421 422 /* 423 * Delay (200ms) 424 */ 425 426 pushl $2 427 call cfe_sleep 428 addl $4,%esp 429 430 /* 431 * Send out SIPI IPI 432 */ 433 434 movl $APIC_ICR_LOW,%esi 435 movl $(0x000C4600 | AP_START_VECTOR),%eax 436 movl %eax,(%esi) 437 438 pushl $1 439 call cfe_sleep 440 addl $4,%esp 441 442 /* 443 * Send out SIPI IPI 444 */ 445 446 movl $APIC_ICR_LOW,%esi 447 movl $(0x000C4600 | AP_START_VECTOR),%eax 448 movl %eax,(%esi) 449 450 /* 451 * Wait 200msec 452 */ 453 454 pushl $5 455 call cfe_sleep 456 addl $4,%esp 457 458 popl %edx 459 popl %ecx 460 popl %ebx 461 popl %edi 462 popl %esi 463 464 movl cpu_count,%eax 465 466 ret 467 468/* 469 * Make a reference to the altcpu_text segment, to force the 470 * linker to bring it in. 471 */ 472 473 movl $altcpurom_gdt,%eax 474 475/* ********************************************************************* 476 * End 477 ********************************************************************* */ 478