1/*
2 *  linux/arch/arm/lib/copy_template.s
3 *
4 *  Code template for optimized memory copy functions
5 *
6 *  Author:	Nicolas Pitre
7 *  Created:	Sep 28, 2005
8 *  Copyright:	MontaVista Software, Inc.
9 *
10 *  This program is free software; you can redistribute it and/or modify
11 *  it under the terms of the GNU General Public License version 2 as
12 *  published by the Free Software Foundation.
13 */
14
15/*
16 * This can be used to enable code to cacheline align the source pointer.
17 * Experiments on tested architectures (StrongARM and XScale) didn't show
18 * this a worthwhile thing to do.  That might be different in the future.
19 */
20//#define CALGN(code...)	code
21#define CALGN(code...)
22
23/*
24 * Theory of operation
25 * -------------------
26 *
27 * This file provides the core code for a forward memory copy used in
28 * the implementation of memcopy(), copy_to_user() and copy_from_user().
29 *
30 * The including file must define the following accessor macros
31 * according to the need of the given function:
32 *
33 * ldr1w ptr reg abort
34 *
35 *	This loads one word from 'ptr', stores it in 'reg' and increments
36 *	'ptr' to the next word. The 'abort' argument is used for fixup tables.
37 *
38 * ldr4w ptr reg1 reg2 reg3 reg4 abort
39 * ldr8w ptr, reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
40 *
41 *	This loads four or eight words starting from 'ptr', stores them
42 *	in provided registers and increments 'ptr' past those words.
43 *	The'abort' argument is used for fixup tables.
44 *
45 * ldr1b ptr reg cond abort
46 *
47 *	Similar to ldr1w, but it loads a byte and increments 'ptr' one byte.
48 *	It also must apply the condition code if provided, otherwise the
49 *	"al" condition is assumed by default.
50 *
51 * str1w ptr reg abort
52 * str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
53 * str1b ptr reg cond abort
54 *
55 *	Same as their ldr* counterparts, but data is stored to 'ptr' location
56 *	rather than being loaded.
57 *
58 * enter reg1 reg2
59 *
60 *	Preserve the provided registers on the stack plus any additional
61 *	data as needed by the implementation including this code. Called
62 *	upon code entry.
63 *
64 * exit reg1 reg2
65 *
66 *	Restore registers with the values previously saved with the
67 *	'preserv' macro. Called upon code termination.
68 */
69
70
71		enter	r4, lr
72
73		subs	r2, r2, #4
74		blt	8f
75		ands	ip, r0, #3
76	PLD(	pld	[r1, #0]		)
77		bne	9f
78		ands	ip, r1, #3
79		bne	10f
80
811:		subs	r2, r2, #(28)
82		stmfd	sp!, {r5 - r8}
83		blt	5f
84
85	CALGN(	ands	ip, r1, #31		)
86	CALGN(	rsb	r3, ip, #32		)
87	CALGN(	sbcnes	r4, r3, r2		)  @ C is always set here
88	CALGN(	bcs	2f			)
89	CALGN(	adr	r4, 6f			)
90	CALGN(	subs	r2, r2, r3		)  @ C gets set
91	CALGN(	add	pc, r4, ip		)
92
93	PLD(	pld	[r1, #0]		)
942:	PLD(	subs	r2, r2, #96		)
95	PLD(	pld	[r1, #28]		)
96	PLD(	blt	4f			)
97	PLD(	pld	[r1, #60]		)
98	PLD(	pld	[r1, #92]		)
99
1003:	PLD(	pld	[r1, #124]		)
1014:		ldr8w	r1, r3, r4, r5, r6, r7, r8, ip, lr, abort=20f
102		subs	r2, r2, #32
103		str8w	r0, r3, r4, r5, r6, r7, r8, ip, lr, abort=20f
104		bge	3b
105	PLD(	cmn	r2, #96			)
106	PLD(	bge	4b			)
107
1085:		ands	ip, r2, #28
109		rsb	ip, ip, #32
110		addne	pc, pc, ip		@ C is always clear here
111		b	7f
1126:		nop
113		ldr1w	r1, r3, abort=20f
114		ldr1w	r1, r4, abort=20f
115		ldr1w	r1, r5, abort=20f
116		ldr1w	r1, r6, abort=20f
117		ldr1w	r1, r7, abort=20f
118		ldr1w	r1, r8, abort=20f
119		ldr1w	r1, lr, abort=20f
120
121		add	pc, pc, ip
122		nop
123		nop
124		str1w	r0, r3, abort=20f
125		str1w	r0, r4, abort=20f
126		str1w	r0, r5, abort=20f
127		str1w	r0, r6, abort=20f
128		str1w	r0, r7, abort=20f
129		str1w	r0, r8, abort=20f
130		str1w	r0, lr, abort=20f
131
132	CALGN(	bcs	2b			)
133
1347:		ldmfd	sp!, {r5 - r8}
135
1368:		movs	r2, r2, lsl #31
137		ldr1b	r1, r3, ne, abort=21f
138		ldr1b	r1, r4, cs, abort=21f
139		ldr1b	r1, ip, cs, abort=21f
140		str1b	r0, r3, ne, abort=21f
141		str1b	r0, r4, cs, abort=21f
142		str1b	r0, ip, cs, abort=21f
143
144		exit	r4, pc
145
1469:		rsb	ip, ip, #4
147		cmp	ip, #2
148		ldr1b	r1, r3, gt, abort=21f
149		ldr1b	r1, r4, ge, abort=21f
150		ldr1b	r1, lr, abort=21f
151		str1b	r0, r3, gt, abort=21f
152		str1b	r0, r4, ge, abort=21f
153		subs	r2, r2, ip
154		str1b	r0, lr, abort=21f
155		blt	8b
156		ands	ip, r1, #3
157		beq	1b
158
15910:		bic	r1, r1, #3
160		cmp	ip, #2
161		ldr1w	r1, lr, abort=21f
162		beq	17f
163		bgt	18f
164
165
166		.macro	forward_copy_shift pull push
167
168		subs	r2, r2, #28
169		blt	14f
170
171	CALGN(	ands	ip, r1, #31		)
172	CALGN(	rsb	ip, ip, #32		)
173	CALGN(	sbcnes	r4, ip, r2		)  @ C is always set here
174	CALGN(	subcc	r2, r2, ip		)
175	CALGN(	bcc	15f			)
176
17711:		stmfd	sp!, {r5 - r9}
178
179	PLD(	pld	[r1, #0]		)
180	PLD(	subs	r2, r2, #96		)
181	PLD(	pld	[r1, #28]		)
182	PLD(	blt	13f			)
183	PLD(	pld	[r1, #60]		)
184	PLD(	pld	[r1, #92]		)
185
18612:	PLD(	pld	[r1, #124]		)
18713:		ldr4w	r1, r4, r5, r6, r7, abort=19f
188		mov	r3, lr, pull #\pull
189		subs	r2, r2, #32
190		ldr4w	r1, r8, r9, ip, lr, abort=19f
191		orr	r3, r3, r4, push #\push
192		mov	r4, r4, pull #\pull
193		orr	r4, r4, r5, push #\push
194		mov	r5, r5, pull #\pull
195		orr	r5, r5, r6, push #\push
196		mov	r6, r6, pull #\pull
197		orr	r6, r6, r7, push #\push
198		mov	r7, r7, pull #\pull
199		orr	r7, r7, r8, push #\push
200		mov	r8, r8, pull #\pull
201		orr	r8, r8, r9, push #\push
202		mov	r9, r9, pull #\pull
203		orr	r9, r9, ip, push #\push
204		mov	ip, ip, pull #\pull
205		orr	ip, ip, lr, push #\push
206		str8w	r0, r3, r4, r5, r6, r7, r8, r9, ip, , abort=19f
207		bge	12b
208	PLD(	cmn	r2, #96			)
209	PLD(	bge	13b			)
210
211		ldmfd	sp!, {r5 - r9}
212
21314:		ands	ip, r2, #28
214		beq	16f
215
21615:		mov	r3, lr, pull #\pull
217		ldr1w	r1, lr, abort=21f
218		subs	ip, ip, #4
219		orr	r3, r3, lr, push #\push
220		str1w	r0, r3, abort=21f
221		bgt	15b
222	CALGN(	cmp	r2, #0			)
223	CALGN(	bge	11b			)
224
22516:		sub	r1, r1, #(\push / 8)
226		b	8b
227
228		.endm
229
230
231		forward_copy_shift	pull=8	push=24
232
23317:		forward_copy_shift	pull=16	push=16
234
23518:		forward_copy_shift	pull=24	push=8
236
237
238/*
239 * Abort preamble and completion macros.
240 * If a fixup handler is required then those macros must surround it.
241 * It is assumed that the fixup code will handle the private part of
242 * the exit macro.
243 */
244
245	.macro	copy_abort_preamble
24619:	ldmfd	sp!, {r5 - r9}
247	b	21f
24820:	ldmfd	sp!, {r5 - r8}
24921:
250	.endm
251
252	.macro	copy_abort_end
253	ldmfd	sp!, {r4, pc}
254	.endm
255