1/* 2 * linux/arch/arm26/boot/compressed/head.S 3 * 4 * Copyright (C) 1996-2002 Russell King 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License version 2 as 8 * published by the Free Software Foundation. 9 */ 10#include <linux/linkage.h> 11 12/* 13 * Debugging stuff 14 * 15 * Note that these macros must not contain any code which is not 16 * 100% relocatable. Any attempt to do so will result in a crash. 17 * Please select one of the following when turning on debugging. 18 */ 19 20 .macro kputc,val 21 mov r0, \val 22 bl putc 23 .endm 24 25 .macro kphex,val,len 26 mov r0, \val 27 mov r1, #\len 28 bl phex 29 .endm 30 31 .macro debug_reloc_start 32 .endm 33 34 .macro debug_reloc_end 35 .endm 36 37 .section ".start", #alloc, #execinstr 38/* 39 * sort out different calling conventions 40 */ 41 .align 42start: 43 .type start,#function 44 .rept 8 45 mov r0, r0 46 .endr 47 48 b 1f 49 .word 0x016f2818 @ Magic numbers to help the loader 50 .word start @ absolute load/run zImage address 51 .word _edata @ zImage end address 521: mov r7, r1 @ save architecture ID 53 mov r8, #0 @ save r0 54 teqp pc, #0x0c000003 @ turn off interrupts 55 56 .text 57 adr r0, LC0 58 ldmia r0, {r1, r2, r3, r4, r5, r6, ip, sp} 59 subs r0, r0, r1 @ calculate the delta offset 60 61 teq r0, #0 @ if delta is zero, we're 62 beq not_relocated @ running at the address we 63 @ were linked at. 64 65 add r2, r2, r0 @ different address, so we 66 add r3, r3, r0 @ need to fix up various 67 add r5, r5, r0 @ pointers. 68 add r6, r6, r0 69 add ip, ip, r0 70 add sp, sp, r0 71 721: ldr r1, [r6, #0] @ relocate entries in the GOT 73 add r1, r1, r0 @ table. This fixes up the 74 str r1, [r6], #4 @ C references. 75 cmp r6, ip 76 blo 1b 77 78not_relocated: mov r0, #0 791: str r0, [r2], #4 @ clear bss 80 str r0, [r2], #4 81 str r0, [r2], #4 82 str r0, [r2], #4 83 cmp r2, r3 84 blo 1b 85 86 bl cache_on 87 88 mov r1, sp @ malloc space above stack 89 add r2, sp, #0x10000 @ 64k max 90 91/* 92 * Check to see if we will overwrite ourselves. 93 * r4 = final kernel address 94 * r5 = start of this image 95 * r2 = end of malloc space (and therefore this image) 96 * We basically want: 97 * r4 >= r2 -> OK 98 * r4 + image length <= r5 -> OK 99 */ 100 cmp r4, r2 101 bhs wont_overwrite 102 add r0, r4, #4096*1024 @ 4MB largest kernel size 103 cmp r0, r5 104 bls wont_overwrite 105 106 mov r5, r2 @ decompress after malloc space 107 mov r0, r5 108 mov r3, r7 109 bl decompress_kernel 110 111 add r0, r0, #127 112 bic r0, r0, #127 @ align the kernel length 113/* 114 * r0 = decompressed kernel length 115 * r1-r3 = unused 116 * r4 = kernel execution address 117 * r5 = decompressed kernel start 118 * r6 = processor ID 119 * r7 = architecture ID 120 * r8-r14 = unused 121 */ 122 add r1, r5, r0 @ end of decompressed kernel 123 adr r2, reloc_start 124 ldr r3, LC1 125 add r3, r2, r3 1261: ldmia r2!, {r8 - r13} @ copy relocation code 127 stmia r1!, {r8 - r13} 128 ldmia r2!, {r8 - r13} 129 stmia r1!, {r8 - r13} 130 cmp r2, r3 131 blo 1b 132 133 bl cache_clean_flush 134 add pc, r5, r0 @ call relocation code 135 136/* 137 * We're not in danger of overwriting ourselves. Do this the simple way. 138 * 139 * r4 = kernel execution address 140 * r7 = architecture ID 141 */ 142wont_overwrite: mov r0, r4 143 mov r3, r7 144 bl decompress_kernel 145 b call_kernel 146 147 .type LC0, #object 148LC0: .word LC0 @ r1 149 .word __bss_start @ r2 150 .word _end @ r3 151 .word _load_addr @ r4 152 .word _start @ r5 153 .word _got_start @ r6 154 .word _got_end @ ip 155 .word user_stack+4096 @ sp 156LC1: .word reloc_end - reloc_start 157 .size LC0, . - LC0 158 159/* 160 * Turn on the cache. We need to setup some page tables so that we 161 * can have both the I and D caches on. 162 * 163 * We place the page tables 16k down from the kernel execution address, 164 * and we hope that nothing else is using it. If we're using it, we 165 * will go pop! 166 * 167 * On entry, 168 * r4 = kernel execution address 169 * r6 = processor ID 170 * r7 = architecture number 171 * r8 = run-time address of "start" 172 * On exit, 173 * r1, r2, r3, r8, r9, r12 corrupted 174 * This routine must preserve: 175 * r4, r5, r6, r7 176 */ 177 .align 5 178cache_on: mov r3, #8 @ cache_on function 179 b call_cache_fn 180 181__setup_mmu: sub r3, r4, #16384 @ Page directory size 182 bic r3, r3, #0xff @ Align the pointer 183 bic r3, r3, #0x3f00 184/* 185 * Initialise the page tables, turning on the cacheable and bufferable 186 * bits for the RAM area only. 187 */ 188 mov r0, r3 189 mov r8, r0, lsr #18 190 mov r8, r8, lsl #18 @ start of RAM 191 add r9, r8, #0x10000000 @ a reasonable RAM size 192 mov r1, #0x12 193 orr r1, r1, #3 << 10 194 add r2, r3, #16384 1951: cmp r1, r8 @ if virt > start of RAM 196 orrhs r1, r1, #0x0c @ set cacheable, bufferable 197 cmp r1, r9 @ if virt > end of RAM 198 bichs r1, r1, #0x0c @ clear cacheable, bufferable 199 str r1, [r0], #4 @ 1:1 mapping 200 add r1, r1, #1048576 201 teq r0, r2 202 bne 1b 203/* 204 * If ever we are running from Flash, then we surely want the cache 205 * to be enabled also for our execution instance... We map 2MB of it 206 * so there is no map overlap problem for up to 1 MB compressed kernel. 207 * If the execution is in RAM then we would only be duplicating the above. 208 */ 209 mov r1, #0x1e 210 orr r1, r1, #3 << 10 211 mov r2, pc, lsr #20 212 orr r1, r1, r2, lsl #20 213 add r0, r3, r2, lsl #2 214 str r1, [r0], #4 215 add r1, r1, #1048576 216 str r1, [r0] 217 mov pc, lr 218 219__armv4_cache_on: 220 mov r12, lr 221 bl __setup_mmu 222 mov r0, #0 223 mcr p15, 0, r0, c7, c10, 4 @ drain write buffer 224 mcr p15, 0, r0, c8, c7, 0 @ flush I,D TLBs 225 mrc p15, 0, r0, c1, c0, 0 @ read control reg 226 orr r0, r0, #0x1000 @ I-cache enable 227 orr r0, r0, #0x0030 228 b __common_cache_on 229 230__arm6_cache_on: 231 mov r12, lr 232 bl __setup_mmu 233 mov r0, #0 234 mcr p15, 0, r0, c7, c0, 0 @ invalidate whole cache v3 235 mcr p15, 0, r0, c5, c0, 0 @ invalidate whole TLB v3 236 mov r0, #0x30 237__common_cache_on: 238#ifndef DEBUG 239 orr r0, r0, #0x000d @ Write buffer, mmu 240#endif 241 mov r1, #-1 242 mcr p15, 0, r3, c2, c0, 0 @ load page table pointer 243 mcr p15, 0, r1, c3, c0, 0 @ load domain access control 244 mcr p15, 0, r0, c1, c0, 0 @ load control register 245 mov pc, r12 246 247/* 248 * All code following this line is relocatable. It is relocated by 249 * the above code to the end of the decompressed kernel image and 250 * executed there. During this time, we have no stacks. 251 * 252 * r0 = decompressed kernel length 253 * r1-r3 = unused 254 * r4 = kernel execution address 255 * r5 = decompressed kernel start 256 * r6 = processor ID 257 * r7 = architecture ID 258 * r8-r14 = unused 259 */ 260 .align 5 261reloc_start: add r8, r5, r0 262 debug_reloc_start 263 mov r1, r4 2641: 265 .rept 4 266 ldmia r5!, {r0, r2, r3, r9 - r13} @ relocate kernel 267 stmia r1!, {r0, r2, r3, r9 - r13} 268 .endr 269 270 cmp r5, r8 271 blo 1b 272 debug_reloc_end 273 274call_kernel: bl cache_clean_flush 275 bl cache_off 276 mov r0, #0 277 mov r1, r7 @ restore architecture number 278 mov pc, r4 @ call kernel 279 280/* 281 * Here follow the relocatable cache support functions for the 282 * various processors. This is a generic hook for locating an 283 * entry and jumping to an instruction at the specified offset 284 * from the start of the block. Please note this is all position 285 * independent code. 286 * 287 * r1 = corrupted 288 * r2 = corrupted 289 * r3 = block offset 290 * r6 = corrupted 291 * r12 = corrupted 292 */ 293 294call_cache_fn: adr r12, proc_types 295 mrc p15, 0, r6, c0, c0 @ get processor ID 2961: ldr r1, [r12, #0] @ get value 297 ldr r2, [r12, #4] @ get mask 298 eor r1, r1, r6 @ (real ^ match) 299 tst r1, r2 @ & mask 300 addeq pc, r12, r3 @ call cache function 301 add r12, r12, #4*5 302 b 1b 303 304/* 305 * Table for cache operations. This is basically: 306 * - CPU ID match 307 * - CPU ID mask 308 * - 'cache on' method instruction 309 * - 'cache off' method instruction 310 * - 'cache flush' method instruction 311 * 312 * We match an entry using: ((real_id ^ match) & mask) == 0 313 * 314 * Writethrough caches generally only need 'on' and 'off' 315 * methods. Writeback caches _must_ have the flush method 316 * defined. 317 */ 318 .type proc_types,#object 319proc_types: 320 .word 0x41560600 @ ARM6/610 321 .word 0xffffffe0 322 b __arm6_cache_off @ works, but slow 323 b __arm6_cache_off 324 mov pc, lr 325@ b __arm6_cache_on @ untested 326@ b __arm6_cache_off 327@ b __armv3_cache_flush 328 329 .word 0x41007000 @ ARM7/710 330 .word 0xfff8fe00 331 b __arm7_cache_off 332 b __arm7_cache_off 333 mov pc, lr 334 335 .word 0x41807200 @ ARM720T (writethrough) 336 .word 0xffffff00 337 b __armv4_cache_on 338 b __armv4_cache_off 339 mov pc, lr 340 341 .word 0x41129200 @ ARM920T 342 .word 0xff00fff0 343 b __armv4_cache_on 344 b __armv4_cache_off 345 b __armv4_cache_flush 346 347 .word 0x4401a100 @ sa110 / sa1100 348 .word 0xffffffe0 349 b __armv4_cache_on 350 b __armv4_cache_off 351 b __armv4_cache_flush 352 353 .word 0x6901b110 @ sa1110 354 .word 0xfffffff0 355 b __armv4_cache_on 356 b __armv4_cache_off 357 b __armv4_cache_flush 358 359 .word 0x69050000 @ xscale 360 .word 0xffff0000 361 b __armv4_cache_on 362 b __armv4_cache_off 363 b __armv4_cache_flush 364 365 .word 0 @ unrecognised type 366 .word 0 367 mov pc, lr 368 mov pc, lr 369 mov pc, lr 370 371 .size proc_types, . - proc_types 372 373/* 374 * Turn off the Cache and MMU. ARMv3 does not support 375 * reading the control register, but ARMv4 does. 376 * 377 * On entry, r6 = processor ID 378 * On exit, r0, r1, r2, r3, r12 corrupted 379 * This routine must preserve: r4, r6, r7 380 */ 381 .align 5 382cache_off: mov r3, #12 @ cache_off function 383 b call_cache_fn 384 385__armv4_cache_off: 386 mrc p15, 0, r0, c1, c0 387 bic r0, r0, #0x000d 388 mcr p15, 0, r0, c1, c0 @ turn MMU and cache off 389 mov r0, #0 390 mcr p15, 0, r0, c7, c7 @ invalidate whole cache v4 391 mcr p15, 0, r0, c8, c7 @ invalidate whole TLB v4 392 mov pc, lr 393 394__arm6_cache_off: 395 mov r0, #0x00000030 @ ARM6 control reg. 396 b __armv3_cache_off 397 398__arm7_cache_off: 399 mov r0, #0x00000070 @ ARM7 control reg. 400 b __armv3_cache_off 401 402__armv3_cache_off: 403 mcr p15, 0, r0, c1, c0, 0 @ turn MMU and cache off 404 mov r0, #0 405 mcr p15, 0, r0, c7, c0, 0 @ invalidate whole cache v3 406 mcr p15, 0, r0, c5, c0, 0 @ invalidate whole TLB v3 407 mov pc, lr 408 409/* 410 * Clean and flush the cache to maintain consistency. 411 * 412 * On entry, 413 * r6 = processor ID 414 * On exit, 415 * r1, r2, r3, r12 corrupted 416 * This routine must preserve: 417 * r0, r4, r5, r6, r7 418 */ 419 .align 5 420cache_clean_flush: 421 mov r3, #16 422 b call_cache_fn 423 424__armv4_cache_flush: 425 bic r1, pc, #31 426 add r2, r1, #65536 @ 2x the largest dcache size 4271: ldr r12, [r1], #32 @ s/w flush D cache 428 teq r1, r2 429 bne 1b 430 431 mcr p15, 0, r1, c7, c7, 0 @ flush I cache 432 mcr p15, 0, r1, c7, c10, 4 @ drain WB 433 mov pc, lr 434 435__armv3_cache_flush: 436 mov r1, #0 437 mcr p15, 0, r0, c7, c0, 0 @ invalidate whole cache v3 438 mov pc, lr 439 440/* 441 * Various debugging routines for printing hex characters and 442 * memory, which again must be relocatable. 443 */ 444#ifdef DEBUG 445 .type phexbuf,#object 446phexbuf: .space 12 447 .size phexbuf, . - phexbuf 448 449phex: adr r3, phexbuf 450 mov r2, #0 451 strb r2, [r3, r1] 4521: subs r1, r1, #1 453 movmi r0, r3 454 bmi puts 455 and r2, r0, #15 456 mov r0, r0, lsr #4 457 cmp r2, #10 458 addge r2, r2, #7 459 add r2, r2, #'0' 460 strb r2, [r3, r1] 461 b 1b 462 463puts: loadsp r3 4641: ldrb r2, [r0], #1 465 teq r2, #0 466 moveq pc, lr 4672: writeb r2 468 mov r1, #0x00020000 4693: subs r1, r1, #1 470 bne 3b 471 teq r2, #'\n' 472 moveq r2, #'\r' 473 beq 2b 474 teq r0, #0 475 bne 1b 476 mov pc, lr 477putc: 478 mov r2, r0 479 mov r0, #0 480 loadsp r3 481 b 2b 482 483memdump: mov r12, r0 484 mov r10, lr 485 mov r11, #0 4862: mov r0, r11, lsl #2 487 add r0, r0, r12 488 mov r1, #8 489 bl phex 490 mov r0, #':' 491 bl putc 4921: mov r0, #' ' 493 bl putc 494 ldr r0, [r12, r11, lsl #2] 495 mov r1, #8 496 bl phex 497 and r0, r11, #7 498 teq r0, #3 499 moveq r0, #' ' 500 bleq putc 501 and r0, r11, #7 502 add r11, r11, #1 503 teq r0, #7 504 bne 1b 505 mov r0, #'\n' 506 bl putc 507 cmp r11, #64 508 blt 2b 509 mov pc, r10 510#endif 511 512reloc_end: 513 514 .align 515 .section ".stack", "aw" 516user_stack: .space 4096 517