1/*
2 * linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines
3 *
4 * Author: Nicolas Pitre <nico@fluxnic.net>
5 *   - contributed to gcc-3.4 on Sep 30, 2003
6 *   - adapted for the Linux kernel on Oct 2, 2003
7 */
8
9/* Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc.
10
11This file is free software; you can redistribute it and/or modify it
12under the terms of the GNU General Public License as published by the
13Free Software Foundation; either version 2, or (at your option) any
14later version.
15
16In addition to the permissions in the GNU General Public License, the
17Free Software Foundation gives you unlimited permission to link the
18compiled version of this file into combinations with other programs,
19and to distribute those combinations without any restriction coming
20from the use of this file.  (The General Public License restrictions
21do apply in other respects; for example, they cover modification of
22the file, and distribution when not linked into a combine
23executable.)
24
25This file is distributed in the hope that it will be useful, but
26WITHOUT ANY WARRANTY; without even the implied warranty of
27MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
28General Public License for more details.
29
30You should have received a copy of the GNU General Public License
31along with this program; see the file COPYING.  If not, write to
32the Free Software Foundation, 59 Temple Place - Suite 330,
33Boston, MA 02111-1307, USA.  */
34
35
36#include <linux/linkage.h>
37#include <asm/assembler.h>
38#include <asm/unwind.h>
39
40.macro ARM_DIV_BODY dividend, divisor, result, curbit
41
42#if __LINUX_ARM_ARCH__ >= 5
43
44	clz	\curbit, \divisor
45	clz	\result, \dividend
46	sub	\result, \curbit, \result
47	mov	\curbit, #1
48	mov	\divisor, \divisor, lsl \result
49	mov	\curbit, \curbit, lsl \result
50	mov	\result, #0
51
52#else
53
54	@ Initially shift the divisor left 3 bits if possible,
55	@ set curbit accordingly.  This allows for curbit to be located
56	@ at the left end of each 4 bit nibbles in the division loop
57	@ to save one loop in most cases.
58	tst	\divisor, #0xe0000000
59	moveq	\divisor, \divisor, lsl #3
60	moveq	\curbit, #8
61	movne	\curbit, #1
62
63	@ Unless the divisor is very big, shift it up in multiples of
64	@ four bits, since this is the amount of unwinding in the main
65	@ division loop.  Continue shifting until the divisor is
66	@ larger than the dividend.
671:	cmp	\divisor, #0x10000000
68	cmplo	\divisor, \dividend
69	movlo	\divisor, \divisor, lsl #4
70	movlo	\curbit, \curbit, lsl #4
71	blo	1b
72
73	@ For very big divisors, we must shift it a bit at a time, or
74	@ we will be in danger of overflowing.
751:	cmp	\divisor, #0x80000000
76	cmplo	\divisor, \dividend
77	movlo	\divisor, \divisor, lsl #1
78	movlo	\curbit, \curbit, lsl #1
79	blo	1b
80
81	mov	\result, #0
82
83#endif
84
85	@ Division loop
861:	cmp	\dividend, \divisor
87	subhs	\dividend, \dividend, \divisor
88	orrhs	\result,   \result,   \curbit
89	cmp	\dividend, \divisor,  lsr #1
90	subhs	\dividend, \dividend, \divisor, lsr #1
91	orrhs	\result,   \result,   \curbit,  lsr #1
92	cmp	\dividend, \divisor,  lsr #2
93	subhs	\dividend, \dividend, \divisor, lsr #2
94	orrhs	\result,   \result,   \curbit,  lsr #2
95	cmp	\dividend, \divisor,  lsr #3
96	subhs	\dividend, \dividend, \divisor, lsr #3
97	orrhs	\result,   \result,   \curbit,  lsr #3
98	cmp	\dividend, #0			@ Early termination?
99	movsne	\curbit,   \curbit,  lsr #4	@ No, any more bits to do?
100	movne	\divisor,  \divisor, lsr #4
101	bne	1b
102
103.endm
104
105
106.macro ARM_DIV2_ORDER divisor, order
107
108#if __LINUX_ARM_ARCH__ >= 5
109
110	clz	\order, \divisor
111	rsb	\order, \order, #31
112
113#else
114
115	cmp	\divisor, #(1 << 16)
116	movhs	\divisor, \divisor, lsr #16
117	movhs	\order, #16
118	movlo	\order, #0
119
120	cmp	\divisor, #(1 << 8)
121	movhs	\divisor, \divisor, lsr #8
122	addhs	\order, \order, #8
123
124	cmp	\divisor, #(1 << 4)
125	movhs	\divisor, \divisor, lsr #4
126	addhs	\order, \order, #4
127
128	cmp	\divisor, #(1 << 2)
129	addhi	\order, \order, #3
130	addls	\order, \order, \divisor, lsr #1
131
132#endif
133
134.endm
135
136
137.macro ARM_MOD_BODY dividend, divisor, order, spare
138
139#if __LINUX_ARM_ARCH__ >= 5
140
141	clz	\order, \divisor
142	clz	\spare, \dividend
143	sub	\order, \order, \spare
144	mov	\divisor, \divisor, lsl \order
145
146#else
147
148	mov	\order, #0
149
150	@ Unless the divisor is very big, shift it up in multiples of
151	@ four bits, since this is the amount of unwinding in the main
152	@ division loop.  Continue shifting until the divisor is
153	@ larger than the dividend.
1541:	cmp	\divisor, #0x10000000
155	cmplo	\divisor, \dividend
156	movlo	\divisor, \divisor, lsl #4
157	addlo	\order, \order, #4
158	blo	1b
159
160	@ For very big divisors, we must shift it a bit at a time, or
161	@ we will be in danger of overflowing.
1621:	cmp	\divisor, #0x80000000
163	cmplo	\divisor, \dividend
164	movlo	\divisor, \divisor, lsl #1
165	addlo	\order, \order, #1
166	blo	1b
167
168#endif
169
170	@ Perform all needed subtractions to keep only the reminder.
171	@ Do comparisons in batch of 4 first.
172	subs	\order, \order, #3		@ yes, 3 is intended here
173	blt	2f
174
1751:	cmp	\dividend, \divisor
176	subhs	\dividend, \dividend, \divisor
177	cmp	\dividend, \divisor,  lsr #1
178	subhs	\dividend, \dividend, \divisor, lsr #1
179	cmp	\dividend, \divisor,  lsr #2
180	subhs	\dividend, \dividend, \divisor, lsr #2
181	cmp	\dividend, \divisor,  lsr #3
182	subhs	\dividend, \dividend, \divisor, lsr #3
183	cmp	\dividend, #1
184	mov	\divisor, \divisor, lsr #4
185	subsge	\order, \order, #4
186	bge	1b
187
188	tst	\order, #3
189	teqne	\dividend, #0
190	beq	5f
191
192	@ Either 1, 2 or 3 comparison/subtractions are left.
1932:	cmn	\order, #2
194	blt	4f
195	beq	3f
196	cmp	\dividend, \divisor
197	subhs	\dividend, \dividend, \divisor
198	mov	\divisor,  \divisor,  lsr #1
1993:	cmp	\dividend, \divisor
200	subhs	\dividend, \dividend, \divisor
201	mov	\divisor,  \divisor,  lsr #1
2024:	cmp	\dividend, \divisor
203	subhs	\dividend, \dividend, \divisor
2045:
205.endm
206
207
208#ifdef CONFIG_ARM_PATCH_IDIV
209	.align	3
210#endif
211
212ENTRY(__udivsi3)
213ENTRY(__aeabi_uidiv)
214UNWIND(.fnstart)
215
216	subs	r2, r1, #1
217	reteq	lr
218	bcc	Ldiv0
219	cmp	r0, r1
220	bls	11f
221	tst	r1, r2
222	beq	12f
223
224	ARM_DIV_BODY r0, r1, r2, r3
225
226	mov	r0, r2
227	ret	lr
228
22911:	moveq	r0, #1
230	movne	r0, #0
231	ret	lr
232
23312:	ARM_DIV2_ORDER r1, r2
234
235	mov	r0, r0, lsr r2
236	ret	lr
237
238UNWIND(.fnend)
239ENDPROC(__udivsi3)
240ENDPROC(__aeabi_uidiv)
241
242ENTRY(__umodsi3)
243UNWIND(.fnstart)
244
245	subs	r2, r1, #1			@ compare divisor with 1
246	bcc	Ldiv0
247	cmpne	r0, r1				@ compare dividend with divisor
248	moveq   r0, #0
249	tsthi	r1, r2				@ see if divisor is power of 2
250	andeq	r0, r0, r2
251	retls	lr
252
253	ARM_MOD_BODY r0, r1, r2, r3
254
255	ret	lr
256
257UNWIND(.fnend)
258ENDPROC(__umodsi3)
259
260#ifdef CONFIG_ARM_PATCH_IDIV
261	.align 3
262#endif
263
264ENTRY(__divsi3)
265ENTRY(__aeabi_idiv)
266UNWIND(.fnstart)
267
268	cmp	r1, #0
269	eor	ip, r0, r1			@ save the sign of the result.
270	beq	Ldiv0
271	rsbmi	r1, r1, #0			@ loops below use unsigned.
272	subs	r2, r1, #1			@ division by 1 or -1 ?
273	beq	10f
274	movs	r3, r0
275	rsbmi	r3, r0, #0			@ positive dividend value
276	cmp	r3, r1
277	bls	11f
278	tst	r1, r2				@ divisor is power of 2 ?
279	beq	12f
280
281	ARM_DIV_BODY r3, r1, r0, r2
282
283	cmp	ip, #0
284	rsbmi	r0, r0, #0
285	ret	lr
286
28710:	teq	ip, r0				@ same sign ?
288	rsbmi	r0, r0, #0
289	ret	lr
290
29111:	movlo	r0, #0
292	moveq	r0, ip, asr #31
293	orreq	r0, r0, #1
294	ret	lr
295
29612:	ARM_DIV2_ORDER r1, r2
297
298	cmp	ip, #0
299	mov	r0, r3, lsr r2
300	rsbmi	r0, r0, #0
301	ret	lr
302
303UNWIND(.fnend)
304ENDPROC(__divsi3)
305ENDPROC(__aeabi_idiv)
306
307ENTRY(__modsi3)
308UNWIND(.fnstart)
309
310	cmp	r1, #0
311	beq	Ldiv0
312	rsbmi	r1, r1, #0			@ loops below use unsigned.
313	movs	ip, r0				@ preserve sign of dividend
314	rsbmi	r0, r0, #0			@ if negative make positive
315	subs	r2, r1, #1			@ compare divisor with 1
316	cmpne	r0, r1				@ compare dividend with divisor
317	moveq	r0, #0
318	tsthi	r1, r2				@ see if divisor is power of 2
319	andeq	r0, r0, r2
320	bls	10f
321
322	ARM_MOD_BODY r0, r1, r2, r3
323
32410:	cmp	ip, #0
325	rsbmi	r0, r0, #0
326	ret	lr
327
328UNWIND(.fnend)
329ENDPROC(__modsi3)
330
331#ifdef CONFIG_AEABI
332
333ENTRY(__aeabi_uidivmod)
334UNWIND(.fnstart)
335UNWIND(.save {r0, r1, ip, lr}	)
336
337	stmfd	sp!, {r0, r1, ip, lr}
338	bl	__aeabi_uidiv
339	ldmfd	sp!, {r1, r2, ip, lr}
340	mul	r3, r0, r2
341	sub	r1, r1, r3
342	ret	lr
343
344UNWIND(.fnend)
345ENDPROC(__aeabi_uidivmod)
346
347ENTRY(__aeabi_idivmod)
348UNWIND(.fnstart)
349UNWIND(.save {r0, r1, ip, lr}	)
350	stmfd	sp!, {r0, r1, ip, lr}
351	bl	__aeabi_idiv
352	ldmfd	sp!, {r1, r2, ip, lr}
353	mul	r3, r0, r2
354	sub	r1, r1, r3
355	ret	lr
356
357UNWIND(.fnend)
358ENDPROC(__aeabi_idivmod)
359
360#endif
361
362Ldiv0:
363UNWIND(.fnstart)
364UNWIND(.pad #4)
365UNWIND(.save {lr})
366	str	lr, [sp, #-8]!
367	bl	__div0
368	mov	r0, #0			@ About as wrong as it could be.
369	ldr	pc, [sp], #8
370UNWIND(.fnend)
371ENDPROC(Ldiv0)
372