• Home
  • History
  • Annotate
  • Line#
  • Navigate
  • Raw
  • Download
  • only in /netgear-WNDR4500v2-V1.0.0.60_1.0.38/src/linux/linux-2.6/arch/arm26/boot/compressed/
1/*
2 *  linux/arch/arm26/boot/compressed/head.S
3 *
4 *  Copyright (C) 1996-2002 Russell King
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10#include <linux/linkage.h>
11
12/*
13 * Debugging stuff
14 *
15 * Note that these macros must not contain any code which is not
16 * 100% relocatable.  Any attempt to do so will result in a crash.
17 * Please select one of the following when turning on debugging.
18 */
19
20		.macro	kputc,val
21		mov	r0, \val
22		bl	putc
23		.endm
24
25		.macro	kphex,val,len
26		mov	r0, \val
27		mov	r1, #\len
28		bl	phex
29		.endm
30
31		.macro	debug_reloc_start
32		.endm
33
34		.macro	debug_reloc_end
35		.endm
36
37		.section ".start", #alloc, #execinstr
38/*
39 * sort out different calling conventions
40 */
41		.align
42start:
43		.type	start,#function
44		.rept	8
45		mov	r0, r0
46		.endr
47
48		b	1f
49		.word	0x016f2818		@ Magic numbers to help the loader
50		.word	start			@ absolute load/run zImage address
51		.word	_edata			@ zImage end address
521:		mov	r7, r1			@ save architecture ID
53		mov	r8, #0			@ save r0
54		teqp	pc, #0x0c000003		@ turn off interrupts
55
56		.text
57		adr	r0, LC0
58		ldmia	r0, {r1, r2, r3, r4, r5, r6, ip, sp}
59		subs	r0, r0, r1		@ calculate the delta offset
60
61		teq	r0, #0			@ if delta is zero, we're
62		beq	not_relocated		@ running at the address we
63						@ were linked at.
64
65		add	r2, r2, r0		@ different address, so we
66		add	r3, r3, r0		@ need to fix up various
67		add	r5, r5, r0		@ pointers.
68		add	r6, r6, r0
69		add	ip, ip, r0
70		add	sp, sp, r0
71
721:		ldr	r1, [r6, #0]		@ relocate entries in the GOT
73		add	r1, r1, r0		@ table.  This fixes up the
74		str	r1, [r6], #4		@ C references.
75		cmp	r6, ip
76		blo	1b
77
78not_relocated:	mov	r0, #0
791:		str	r0, [r2], #4		@ clear bss
80		str	r0, [r2], #4
81		str	r0, [r2], #4
82		str	r0, [r2], #4
83		cmp	r2, r3
84		blo	1b
85
86		bl	cache_on
87
88		mov	r1, sp			@ malloc space above stack
89		add	r2, sp, #0x10000	@ 64k max
90
91/*
92 * Check to see if we will overwrite ourselves.
93 *   r4 = final kernel address
94 *   r5 = start of this image
95 *   r2 = end of malloc space (and therefore this image)
96 * We basically want:
97 *   r4 >= r2 -> OK
98 *   r4 + image length <= r5 -> OK
99 */
100		cmp	r4, r2
101		bhs	wont_overwrite
102		add	r0, r4, #4096*1024	@ 4MB largest kernel size
103		cmp	r0, r5
104		bls	wont_overwrite
105
106		mov	r5, r2			@ decompress after malloc space
107		mov	r0, r5
108		mov	r3, r7
109		bl	decompress_kernel
110
111		add	r0, r0, #127
112		bic	r0, r0, #127		@ align the kernel length
113/*
114 * r0     = decompressed kernel length
115 * r1-r3  = unused
116 * r4     = kernel execution address
117 * r5     = decompressed kernel start
118 * r6     = processor ID
119 * r7     = architecture ID
120 * r8-r14 = unused
121 */
122		add	r1, r5, r0		@ end of decompressed kernel
123		adr	r2, reloc_start
124		ldr	r3, LC1
125		add	r3, r2, r3
1261:		ldmia	r2!, {r8 - r13}		@ copy relocation code
127		stmia	r1!, {r8 - r13}
128		ldmia	r2!, {r8 - r13}
129		stmia	r1!, {r8 - r13}
130		cmp	r2, r3
131		blo	1b
132
133		bl	cache_clean_flush
134		add	pc, r5, r0		@ call relocation code
135
136/*
137 * We're not in danger of overwriting ourselves.  Do this the simple way.
138 *
139 * r4     = kernel execution address
140 * r7     = architecture ID
141 */
142wont_overwrite:	mov	r0, r4
143		mov	r3, r7
144		bl	decompress_kernel
145		b	call_kernel
146
147		.type	LC0, #object
148LC0:		.word	LC0			@ r1
149		.word	__bss_start		@ r2
150		.word	_end			@ r3
151		.word	_load_addr		@ r4
152		.word	_start			@ r5
153		.word	_got_start		@ r6
154		.word	_got_end		@ ip
155		.word	user_stack+4096		@ sp
156LC1:		.word	reloc_end - reloc_start
157		.size	LC0, . - LC0
158
159/*
160 * Turn on the cache.  We need to setup some page tables so that we
161 * can have both the I and D caches on.
162 *
163 * We place the page tables 16k down from the kernel execution address,
164 * and we hope that nothing else is using it.  If we're using it, we
165 * will go pop!
166 *
167 * On entry,
168 *  r4 = kernel execution address
169 *  r6 = processor ID
170 *  r7 = architecture number
171 *  r8 = run-time address of "start"
172 * On exit,
173 *  r1, r2, r3, r8, r9, r12 corrupted
174 * This routine must preserve:
175 *  r4, r5, r6, r7
176 */
177		.align	5
178cache_on:	mov	r3, #8			@ cache_on function
179		b	call_cache_fn
180
181__setup_mmu:	sub	r3, r4, #16384		@ Page directory size
182		bic	r3, r3, #0xff		@ Align the pointer
183		bic	r3, r3, #0x3f00
184/*
185 * Initialise the page tables, turning on the cacheable and bufferable
186 * bits for the RAM area only.
187 */
188		mov	r0, r3
189		mov	r8, r0, lsr #18
190		mov	r8, r8, lsl #18		@ start of RAM
191		add	r9, r8, #0x10000000	@ a reasonable RAM size
192		mov	r1, #0x12
193		orr	r1, r1, #3 << 10
194		add	r2, r3, #16384
1951:		cmp	r1, r8			@ if virt > start of RAM
196		orrhs	r1, r1, #0x0c		@ set cacheable, bufferable
197		cmp	r1, r9			@ if virt > end of RAM
198		bichs	r1, r1, #0x0c		@ clear cacheable, bufferable
199		str	r1, [r0], #4		@ 1:1 mapping
200		add	r1, r1, #1048576
201		teq	r0, r2
202		bne	1b
203/*
204 * If ever we are running from Flash, then we surely want the cache
205 * to be enabled also for our execution instance...  We map 2MB of it
206 * so there is no map overlap problem for up to 1 MB compressed kernel.
207 * If the execution is in RAM then we would only be duplicating the above.
208 */
209		mov	r1, #0x1e
210		orr	r1, r1, #3 << 10
211		mov	r2, pc, lsr #20
212		orr	r1, r1, r2, lsl #20
213		add	r0, r3, r2, lsl #2
214		str	r1, [r0], #4
215		add	r1, r1, #1048576
216		str	r1, [r0]
217		mov	pc, lr
218
219__armv4_cache_on:
220		mov	r12, lr
221		bl	__setup_mmu
222		mov	r0, #0
223		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
224		mcr	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
225		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
226		orr	r0, r0, #0x1000		@ I-cache enable
227		orr	r0, r0, #0x0030
228		b	__common_cache_on
229
230__arm6_cache_on:
231		mov	r12, lr
232		bl	__setup_mmu
233		mov	r0, #0
234		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
235		mcr	p15, 0, r0, c5, c0, 0	@ invalidate whole TLB v3
236		mov	r0, #0x30
237__common_cache_on:
238#ifndef DEBUG
239		orr	r0, r0, #0x000d		@ Write buffer, mmu
240#endif
241		mov	r1, #-1
242		mcr	p15, 0, r3, c2, c0, 0	@ load page table pointer
243		mcr	p15, 0, r1, c3, c0, 0	@ load domain access control
244		mcr	p15, 0, r0, c1, c0, 0	@ load control register
245		mov	pc, r12
246
247/*
248 * All code following this line is relocatable.  It is relocated by
249 * the above code to the end of the decompressed kernel image and
250 * executed there.  During this time, we have no stacks.
251 *
252 * r0     = decompressed kernel length
253 * r1-r3  = unused
254 * r4     = kernel execution address
255 * r5     = decompressed kernel start
256 * r6     = processor ID
257 * r7     = architecture ID
258 * r8-r14 = unused
259 */
260		.align	5
261reloc_start:	add	r8, r5, r0
262		debug_reloc_start
263		mov	r1, r4
2641:
265		.rept	4
266		ldmia	r5!, {r0, r2, r3, r9 - r13}	@ relocate kernel
267		stmia	r1!, {r0, r2, r3, r9 - r13}
268		.endr
269
270		cmp	r5, r8
271		blo	1b
272		debug_reloc_end
273
274call_kernel:	bl	cache_clean_flush
275		bl	cache_off
276		mov	r0, #0
277		mov	r1, r7			@ restore architecture number
278		mov	pc, r4			@ call kernel
279
280/*
281 * Here follow the relocatable cache support functions for the
282 * various processors.  This is a generic hook for locating an
283 * entry and jumping to an instruction at the specified offset
284 * from the start of the block.  Please note this is all position
285 * independent code.
286 *
287 *  r1  = corrupted
288 *  r2  = corrupted
289 *  r3  = block offset
290 *  r6  = corrupted
291 *  r12 = corrupted
292 */
293
294call_cache_fn:	adr	r12, proc_types
295		mrc	p15, 0, r6, c0, c0	@ get processor ID
2961:		ldr	r1, [r12, #0]		@ get value
297		ldr	r2, [r12, #4]		@ get mask
298		eor	r1, r1, r6		@ (real ^ match)
299		tst	r1, r2			@       & mask
300		addeq	pc, r12, r3		@ call cache function
301		add	r12, r12, #4*5
302		b	1b
303
304/*
305 * Table for cache operations.  This is basically:
306 *   - CPU ID match
307 *   - CPU ID mask
308 *   - 'cache on' method instruction
309 *   - 'cache off' method instruction
310 *   - 'cache flush' method instruction
311 *
312 * We match an entry using: ((real_id ^ match) & mask) == 0
313 *
314 * Writethrough caches generally only need 'on' and 'off'
315 * methods.  Writeback caches _must_ have the flush method
316 * defined.
317 */
318		.type	proc_types,#object
319proc_types:
320		.word	0x41560600		@ ARM6/610
321		.word	0xffffffe0
322		b	__arm6_cache_off	@ works, but slow
323		b	__arm6_cache_off
324		mov	pc, lr
325@		b	__arm6_cache_on		@ untested
326@		b	__arm6_cache_off
327@		b	__armv3_cache_flush
328
329		.word	0x41007000		@ ARM7/710
330		.word	0xfff8fe00
331		b	__arm7_cache_off
332		b	__arm7_cache_off
333		mov	pc, lr
334
335		.word	0x41807200		@ ARM720T (writethrough)
336		.word	0xffffff00
337		b	__armv4_cache_on
338		b	__armv4_cache_off
339		mov	pc, lr
340
341		.word	0x41129200		@ ARM920T
342		.word	0xff00fff0
343		b	__armv4_cache_on
344		b	__armv4_cache_off
345		b	__armv4_cache_flush
346
347		.word	0x4401a100		@ sa110 / sa1100
348		.word	0xffffffe0
349		b	__armv4_cache_on
350		b	__armv4_cache_off
351		b	__armv4_cache_flush
352
353		.word	0x6901b110		@ sa1110
354		.word	0xfffffff0
355		b	__armv4_cache_on
356		b	__armv4_cache_off
357		b	__armv4_cache_flush
358
359		.word	0x69050000		@ xscale
360		.word	0xffff0000
361		b	__armv4_cache_on
362		b	__armv4_cache_off
363		b	__armv4_cache_flush
364
365		.word	0			@ unrecognised type
366		.word	0
367		mov	pc, lr
368		mov	pc, lr
369		mov	pc, lr
370
371		.size	proc_types, . - proc_types
372
373/*
374 * Turn off the Cache and MMU.  ARMv3 does not support
375 * reading the control register, but ARMv4 does.
376 *
377 * On entry,  r6 = processor ID
378 * On exit,   r0, r1, r2, r3, r12 corrupted
379 * This routine must preserve: r4, r6, r7
380 */
381		.align	5
382cache_off:	mov	r3, #12			@ cache_off function
383		b	call_cache_fn
384
385__armv4_cache_off:
386		mrc	p15, 0, r0, c1, c0
387		bic	r0, r0, #0x000d
388		mcr	p15, 0, r0, c1, c0	@ turn MMU and cache off
389		mov	r0, #0
390		mcr	p15, 0, r0, c7, c7	@ invalidate whole cache v4
391		mcr	p15, 0, r0, c8, c7	@ invalidate whole TLB v4
392		mov	pc, lr
393
394__arm6_cache_off:
395		mov	r0, #0x00000030		@ ARM6 control reg.
396		b	__armv3_cache_off
397
398__arm7_cache_off:
399		mov	r0, #0x00000070		@ ARM7 control reg.
400		b	__armv3_cache_off
401
402__armv3_cache_off:
403		mcr	p15, 0, r0, c1, c0, 0	@ turn MMU and cache off
404		mov	r0, #0
405		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
406		mcr	p15, 0, r0, c5, c0, 0	@ invalidate whole TLB v3
407		mov	pc, lr
408
409/*
410 * Clean and flush the cache to maintain consistency.
411 *
412 * On entry,
413 *  r6 = processor ID
414 * On exit,
415 *  r1, r2, r3, r12 corrupted
416 * This routine must preserve:
417 *  r0, r4, r5, r6, r7
418 */
419		.align	5
420cache_clean_flush:
421		mov	r3, #16
422		b	call_cache_fn
423
424__armv4_cache_flush:
425		bic	r1, pc, #31
426		add	r2, r1, #65536		@ 2x the largest dcache size
4271:		ldr	r12, [r1], #32		@ s/w flush D cache
428		teq	r1, r2
429		bne	1b
430
431		mcr	p15, 0, r1, c7, c7, 0	@ flush I cache
432		mcr	p15, 0, r1, c7, c10, 4	@ drain WB
433		mov	pc, lr
434
435__armv3_cache_flush:
436		mov	r1, #0
437		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
438		mov	pc, lr
439
440/*
441 * Various debugging routines for printing hex characters and
442 * memory, which again must be relocatable.
443 */
444#ifdef DEBUG
445		.type	phexbuf,#object
446phexbuf:	.space	12
447		.size	phexbuf, . - phexbuf
448
449phex:		adr	r3, phexbuf
450		mov	r2, #0
451		strb	r2, [r3, r1]
4521:		subs	r1, r1, #1
453		movmi	r0, r3
454		bmi	puts
455		and	r2, r0, #15
456		mov	r0, r0, lsr #4
457		cmp	r2, #10
458		addge	r2, r2, #7
459		add	r2, r2, #'0'
460		strb	r2, [r3, r1]
461		b	1b
462
463puts:		loadsp	r3
4641:		ldrb	r2, [r0], #1
465		teq	r2, #0
466		moveq	pc, lr
4672:		writeb	r2
468		mov	r1, #0x00020000
4693:		subs	r1, r1, #1
470		bne	3b
471		teq	r2, #'\n'
472		moveq	r2, #'\r'
473		beq	2b
474		teq	r0, #0
475		bne	1b
476		mov	pc, lr
477putc:
478		mov	r2, r0
479		mov	r0, #0
480		loadsp	r3
481		b	2b
482
483memdump:	mov	r12, r0
484		mov	r10, lr
485		mov	r11, #0
4862:		mov	r0, r11, lsl #2
487		add	r0, r0, r12
488		mov	r1, #8
489		bl	phex
490		mov	r0, #':'
491		bl	putc
4921:		mov	r0, #' '
493		bl	putc
494		ldr	r0, [r12, r11, lsl #2]
495		mov	r1, #8
496		bl	phex
497		and	r0, r11, #7
498		teq	r0, #3
499		moveq	r0, #' '
500		bleq	putc
501		and	r0, r11, #7
502		add	r11, r11, #1
503		teq	r0, #7
504		bne	1b
505		mov	r0, #'\n'
506		bl	putc
507		cmp	r11, #64
508		blt	2b
509		mov	pc, r10
510#endif
511
512reloc_end:
513
514		.align
515		.section ".stack", "aw"
516user_stack:	.space	4096
517