1/* 2 * trampoline.S: Jump start slave processors on sparc64. 3 * 4 * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu) 5 */ 6 7#include <linux/init.h> 8 9#include <asm/head.h> 10#include <asm/asi.h> 11#include <asm/lsu.h> 12#include <asm/dcr.h> 13#include <asm/dcu.h> 14#include <asm/pstate.h> 15#include <asm/page.h> 16#include <asm/pgtable.h> 17#include <asm/spitfire.h> 18#include <asm/processor.h> 19#include <asm/thread_info.h> 20#include <asm/mmu.h> 21#include <asm/hypervisor.h> 22#include <asm/cpudata.h> 23 24 .data 25 .align 8 26call_method: 27 .asciz "call-method" 28 .align 8 29itlb_load: 30 .asciz "SUNW,itlb-load" 31 .align 8 32dtlb_load: 33 .asciz "SUNW,dtlb-load" 34 35#define TRAMP_STACK_SIZE 1024 36 .align 16 37tramp_stack: 38 .skip TRAMP_STACK_SIZE 39 40 __CPUINIT 41 .align 8 42 .globl sparc64_cpu_startup, sparc64_cpu_startup_end 43sparc64_cpu_startup: 44 BRANCH_IF_SUN4V(g1, niagara_startup) 45 BRANCH_IF_CHEETAH_BASE(g1, g5, cheetah_startup) 46 BRANCH_IF_CHEETAH_PLUS_OR_FOLLOWON(g1, g5, cheetah_plus_startup) 47 48 ba,pt %xcc, spitfire_startup 49 nop 50 51cheetah_plus_startup: 52 /* Preserve OBP chosen DCU and DCR register settings. */ 53 ba,pt %xcc, cheetah_generic_startup 54 nop 55 56cheetah_startup: 57 mov DCR_BPE | DCR_RPE | DCR_SI | DCR_IFPOE | DCR_MS, %g1 58 wr %g1, %asr18 59 60 sethi %uhi(DCU_ME|DCU_RE|DCU_HPE|DCU_SPE|DCU_SL|DCU_WE), %g5 61 or %g5, %ulo(DCU_ME|DCU_RE|DCU_HPE|DCU_SPE|DCU_SL|DCU_WE), %g5 62 sllx %g5, 32, %g5 63 or %g5, DCU_DM | DCU_IM | DCU_DC | DCU_IC, %g5 64 stxa %g5, [%g0] ASI_DCU_CONTROL_REG 65 membar #Sync 66 /* fallthru */ 67 68cheetah_generic_startup: 69 mov TSB_EXTENSION_P, %g3 70 stxa %g0, [%g3] ASI_DMMU 71 stxa %g0, [%g3] ASI_IMMU 72 membar #Sync 73 74 mov TSB_EXTENSION_S, %g3 75 stxa %g0, [%g3] ASI_DMMU 76 membar #Sync 77 78 mov TSB_EXTENSION_N, %g3 79 stxa %g0, [%g3] ASI_DMMU 80 stxa %g0, [%g3] ASI_IMMU 81 membar #Sync 82 /* fallthru */ 83 84niagara_startup: 85 /* Disable STICK_INT interrupts. */ 86 sethi %hi(0x80000000), %g5 87 sllx %g5, 32, %g5 88 wr %g5, %asr25 89 90 ba,pt %xcc, startup_continue 91 nop 92 93spitfire_startup: 94 mov (LSU_CONTROL_IC | LSU_CONTROL_DC | LSU_CONTROL_IM | LSU_CONTROL_DM), %g1 95 stxa %g1, [%g0] ASI_LSU_CONTROL 96 membar #Sync 97 98startup_continue: 99 mov %o0, %l0 100 BRANCH_IF_SUN4V(g1, niagara_lock_tlb) 101 102 sethi %hi(0x80000000), %g2 103 sllx %g2, 32, %g2 104 wr %g2, 0, %tick_cmpr 105 106 /* Call OBP by hand to lock KERNBASE into i/d tlbs. 107 * We lock 'num_kernel_image_mappings' consequetive entries. 108 */ 109 sethi %hi(prom_entry_lock), %g2 1101: ldstub [%g2 + %lo(prom_entry_lock)], %g1 111 brnz,pn %g1, 1b 112 nop 113 114 sethi %hi(p1275buf), %g2 115 or %g2, %lo(p1275buf), %g2 116 ldx [%g2 + 0x10], %l2 117 add %l2, -(192 + 128), %sp 118 flushw 119 120 /* Setup the loop variables: 121 * %l3: VADDR base 122 * %l4: TTE base 123 * %l5: Loop iterator, iterates from 0 to 'num_kernel_image_mappings' 124 * %l6: Number of TTE entries to map 125 * %l7: Highest TTE entry number, we count down 126 */ 127 sethi %hi(KERNBASE), %l3 128 sethi %hi(kern_locked_tte_data), %l4 129 ldx [%l4 + %lo(kern_locked_tte_data)], %l4 130 clr %l5 131 sethi %hi(num_kernel_image_mappings), %l6 132 lduw [%l6 + %lo(num_kernel_image_mappings)], %l6 133 add %l6, 1, %l6 134 135 mov 15, %l7 136 BRANCH_IF_ANY_CHEETAH(g1,g5,2f) 137 138 mov 63, %l7 1392: 140 1413: 142 /* Lock into I-MMU */ 143 sethi %hi(call_method), %g2 144 or %g2, %lo(call_method), %g2 145 stx %g2, [%sp + 2047 + 128 + 0x00] 146 mov 5, %g2 147 stx %g2, [%sp + 2047 + 128 + 0x08] 148 mov 1, %g2 149 stx %g2, [%sp + 2047 + 128 + 0x10] 150 sethi %hi(itlb_load), %g2 151 or %g2, %lo(itlb_load), %g2 152 stx %g2, [%sp + 2047 + 128 + 0x18] 153 sethi %hi(prom_mmu_ihandle_cache), %g2 154 lduw [%g2 + %lo(prom_mmu_ihandle_cache)], %g2 155 stx %g2, [%sp + 2047 + 128 + 0x20] 156 157 /* Each TTE maps 4MB, convert index to offset. */ 158 sllx %l5, 22, %g1 159 160 add %l3, %g1, %g2 161 stx %g2, [%sp + 2047 + 128 + 0x28] ! VADDR 162 add %l4, %g1, %g2 163 stx %g2, [%sp + 2047 + 128 + 0x30] ! TTE 164 165 /* TTE index is highest minus loop index. */ 166 sub %l7, %l5, %g2 167 stx %g2, [%sp + 2047 + 128 + 0x38] 168 169 sethi %hi(p1275buf), %g2 170 or %g2, %lo(p1275buf), %g2 171 ldx [%g2 + 0x08], %o1 172 call %o1 173 add %sp, (2047 + 128), %o0 174 175 /* Lock into D-MMU */ 176 sethi %hi(call_method), %g2 177 or %g2, %lo(call_method), %g2 178 stx %g2, [%sp + 2047 + 128 + 0x00] 179 mov 5, %g2 180 stx %g2, [%sp + 2047 + 128 + 0x08] 181 mov 1, %g2 182 stx %g2, [%sp + 2047 + 128 + 0x10] 183 sethi %hi(dtlb_load), %g2 184 or %g2, %lo(dtlb_load), %g2 185 stx %g2, [%sp + 2047 + 128 + 0x18] 186 sethi %hi(prom_mmu_ihandle_cache), %g2 187 lduw [%g2 + %lo(prom_mmu_ihandle_cache)], %g2 188 stx %g2, [%sp + 2047 + 128 + 0x20] 189 190 /* Each TTE maps 4MB, convert index to offset. */ 191 sllx %l5, 22, %g1 192 193 add %l3, %g1, %g2 194 stx %g2, [%sp + 2047 + 128 + 0x28] ! VADDR 195 add %l4, %g1, %g2 196 stx %g2, [%sp + 2047 + 128 + 0x30] ! TTE 197 198 /* TTE index is highest minus loop index. */ 199 sub %l7, %l5, %g2 200 stx %g2, [%sp + 2047 + 128 + 0x38] 201 202 sethi %hi(p1275buf), %g2 203 or %g2, %lo(p1275buf), %g2 204 ldx [%g2 + 0x08], %o1 205 call %o1 206 add %sp, (2047 + 128), %o0 207 208 add %l5, 1, %l5 209 cmp %l5, %l6 210 bne,pt %xcc, 3b 211 nop 212 213 sethi %hi(prom_entry_lock), %g2 214 stb %g0, [%g2 + %lo(prom_entry_lock)] 215 216 ba,pt %xcc, after_lock_tlb 217 nop 218 219niagara_lock_tlb: 220 sethi %hi(KERNBASE), %l3 221 sethi %hi(kern_locked_tte_data), %l4 222 ldx [%l4 + %lo(kern_locked_tte_data)], %l4 223 clr %l5 224 sethi %hi(num_kernel_image_mappings), %l6 225 lduw [%l6 + %lo(num_kernel_image_mappings)], %l6 226 add %l6, 1, %l6 227 2281: 229 mov HV_FAST_MMU_MAP_PERM_ADDR, %o5 230 sllx %l5, 22, %g2 231 add %l3, %g2, %o0 232 clr %o1 233 add %l4, %g2, %o2 234 mov HV_MMU_IMMU, %o3 235 ta HV_FAST_TRAP 236 237 mov HV_FAST_MMU_MAP_PERM_ADDR, %o5 238 sllx %l5, 22, %g2 239 add %l3, %g2, %o0 240 clr %o1 241 add %l4, %g2, %o2 242 mov HV_MMU_DMMU, %o3 243 ta HV_FAST_TRAP 244 245 add %l5, 1, %l5 246 cmp %l5, %l6 247 bne,pt %xcc, 1b 248 nop 249 250after_lock_tlb: 251 wrpr %g0, (PSTATE_PRIV | PSTATE_PEF), %pstate 252 wr %g0, 0, %fprs 253 254 wr %g0, ASI_P, %asi 255 256 mov PRIMARY_CONTEXT, %g7 257 258661: stxa %g0, [%g7] ASI_DMMU 259 .section .sun4v_1insn_patch, "ax" 260 .word 661b 261 stxa %g0, [%g7] ASI_MMU 262 .previous 263 264 membar #Sync 265 mov SECONDARY_CONTEXT, %g7 266 267661: stxa %g0, [%g7] ASI_DMMU 268 .section .sun4v_1insn_patch, "ax" 269 .word 661b 270 stxa %g0, [%g7] ASI_MMU 271 .previous 272 273 membar #Sync 274 275 /* Everything we do here, until we properly take over the 276 * trap table, must be done with extreme care. We cannot 277 * make any references to %g6 (current thread pointer), 278 * %g4 (current task pointer), or %g5 (base of current cpu's 279 * per-cpu area) until we properly take over the trap table 280 * from the firmware and hypervisor. 281 * 282 * Get onto temporary stack which is in the locked kernel image. 283 */ 284 sethi %hi(tramp_stack), %g1 285 or %g1, %lo(tramp_stack), %g1 286 add %g1, TRAMP_STACK_SIZE, %g1 287 sub %g1, STACKFRAME_SZ + STACK_BIAS + 256, %sp 288 mov 0, %fp 289 290 /* Put garbage in these registers to trap any access to them. */ 291 set 0xdeadbeef, %g4 292 set 0xdeadbeef, %g5 293 set 0xdeadbeef, %g6 294 295 call init_irqwork_curcpu 296 nop 297 298 sethi %hi(tlb_type), %g3 299 lduw [%g3 + %lo(tlb_type)], %g2 300 cmp %g2, 3 301 bne,pt %icc, 1f 302 nop 303 304 call hard_smp_processor_id 305 nop 306 307 call sun4v_register_mondo_queues 308 nop 309 3101: call init_cur_cpu_trap 311 ldx [%l0], %o0 312 313 /* Start using proper page size encodings in ctx register. */ 314 sethi %hi(sparc64_kern_pri_context), %g3 315 ldx [%g3 + %lo(sparc64_kern_pri_context)], %g2 316 mov PRIMARY_CONTEXT, %g1 317 318661: stxa %g2, [%g1] ASI_DMMU 319 .section .sun4v_1insn_patch, "ax" 320 .word 661b 321 stxa %g2, [%g1] ASI_MMU 322 .previous 323 324 membar #Sync 325 326 wrpr %g0, 0, %wstate 327 328 sethi %hi(prom_entry_lock), %g2 3291: ldstub [%g2 + %lo(prom_entry_lock)], %g1 330 brnz,pn %g1, 1b 331 nop 332 333 /* As a hack, put &init_thread_union into %g6. 334 * prom_world() loads from here to restore the %asi 335 * register. 336 */ 337 sethi %hi(init_thread_union), %g6 338 or %g6, %lo(init_thread_union), %g6 339 340 sethi %hi(is_sun4v), %o0 341 lduw [%o0 + %lo(is_sun4v)], %o0 342 brz,pt %o0, 2f 343 nop 344 345 TRAP_LOAD_TRAP_BLOCK(%g2, %g3) 346 add %g2, TRAP_PER_CPU_FAULT_INFO, %g2 347 stxa %g2, [%g0] ASI_SCRATCHPAD 348 349 /* Compute physical address: 350 * 351 * paddr = kern_base + (mmfsa_vaddr - KERNBASE) 352 */ 353 sethi %hi(KERNBASE), %g3 354 sub %g2, %g3, %g2 355 sethi %hi(kern_base), %g3 356 ldx [%g3 + %lo(kern_base)], %g3 357 add %g2, %g3, %o1 358 sethi %hi(sparc64_ttable_tl0), %o0 359 360 set prom_set_trap_table_name, %g2 361 stx %g2, [%sp + 2047 + 128 + 0x00] 362 mov 2, %g2 363 stx %g2, [%sp + 2047 + 128 + 0x08] 364 mov 0, %g2 365 stx %g2, [%sp + 2047 + 128 + 0x10] 366 stx %o0, [%sp + 2047 + 128 + 0x18] 367 stx %o1, [%sp + 2047 + 128 + 0x20] 368 sethi %hi(p1275buf), %g2 369 or %g2, %lo(p1275buf), %g2 370 ldx [%g2 + 0x08], %o1 371 call %o1 372 add %sp, (2047 + 128), %o0 373 374 ba,pt %xcc, 3f 375 nop 376 3772: sethi %hi(sparc64_ttable_tl0), %o0 378 set prom_set_trap_table_name, %g2 379 stx %g2, [%sp + 2047 + 128 + 0x00] 380 mov 1, %g2 381 stx %g2, [%sp + 2047 + 128 + 0x08] 382 mov 0, %g2 383 stx %g2, [%sp + 2047 + 128 + 0x10] 384 stx %o0, [%sp + 2047 + 128 + 0x18] 385 sethi %hi(p1275buf), %g2 386 or %g2, %lo(p1275buf), %g2 387 ldx [%g2 + 0x08], %o1 388 call %o1 389 add %sp, (2047 + 128), %o0 390 3913: sethi %hi(prom_entry_lock), %g2 392 stb %g0, [%g2 + %lo(prom_entry_lock)] 393 394 ldx [%l0], %g6 395 ldx [%g6 + TI_TASK], %g4 396 397 mov 1, %g5 398 sllx %g5, THREAD_SHIFT, %g5 399 sub %g5, (STACKFRAME_SZ + STACK_BIAS), %g5 400 add %g6, %g5, %sp 401 mov 0, %fp 402 403 rdpr %pstate, %o1 404 or %o1, PSTATE_IE, %o1 405 wrpr %o1, 0, %pstate 406 407 call smp_callin 408 nop 409 call cpu_idle 410 mov 0, %o0 411 call cpu_panic 412 nop 4131: b,a,pt %xcc, 1b 414 415 .align 8 416sparc64_cpu_startup_end: 417