1dnl  ARM mpn_submul_1 -- Multiply a limb vector with a limb and subtract the
2dnl  result from a second limb vector.
3
4dnl  Copyright 1998, 2000, 2001, 2003 Free Software Foundation, Inc.
5
6dnl  This file is part of the GNU MP Library.
7
8dnl  The GNU MP Library is free software; you can redistribute it and/or modify
9dnl  it under the terms of the GNU Lesser General Public License as published
10dnl  by the Free Software Foundation; either version 3 of the License, or (at
11dnl  your option) any later version.
12
13dnl  The GNU MP Library is distributed in the hope that it will be useful, but
14dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
16dnl  License for more details.
17
18dnl  You should have received a copy of the GNU Lesser General Public License
19dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
20
21include(`../config.m4')
22
23C            cycles/limb
24C StrongARM:  7.75-9.75  (dependent on vl value)
25C XScale:        8-9     (dependent on vl value, estimated)
26
27define(`rp',`r0')
28define(`up',`r1')
29define(`n',`r2')
30define(`vl',`r3')
31define(`rl',`r12')
32define(`ul',`r6')
33define(`r',`lr')
34
35
36ASM_START()
37PROLOGUE(mpn_submul_1)
38	stmfd	sp!, { r4-r6, lr }
39	subs	r4, r0, r0		C clear r4, set cy
40	tst	n, #1
41	beq	L(skip1)
42	ldr	ul, [up], #4
43	ldr	rl, [rp, #0]
44	umull	r5, r4, ul, vl
45	subs	r, rl, r5
46	str	r, [rp], #4
47L(skip1):
48	tst	n, #2
49	beq	L(skip2)
50	ldr	ul, [up], #4
51	ldr	rl, [rp, #0]
52	mov	r5, #0
53	umlal	r4, r5, ul, vl
54	ldr	ul, [up], #4
55	sbcs	r, rl, r4
56	ldr	rl, [rp, #4]
57	mov	r4, #0
58	umlal	r5, r4, ul, vl
59	str	r, [rp], #4
60	sbcs	r, rl, r5
61	str	r, [rp], #4
62L(skip2):
63	bics	r, n, #3
64	beq	L(return)
65
66	ldr	ul, [up], #4
67	ldr	rl, [rp, #0]
68	mov	r5, #0
69	umlal	r4, r5, ul, vl
70	b	L(in)
71
72L(loop):
73	ldr	ul, [up], #4
74	sbcs	r, rl, r5
75	ldr	rl, [rp, #4]
76	mov	r5, #0
77	umlal	r4, r5, ul, vl
78	str	r, [rp], #4
79L(in):	ldr	ul, [up], #4
80	sbcs	r, rl, r4
81	ldr	rl, [rp, #4]
82	mov	r4, #0
83	umlal	r5, r4, ul, vl
84	str	r, [rp], #4
85	ldr	ul, [up], #4
86	sbcs	r, rl, r5
87	ldr	rl, [rp, #4]
88	mov	r5, #0
89	umlal	r4, r5, ul, vl
90	str	r, [rp], #4
91	ldr	ul, [up], #4
92	sbcs	r, rl, r4
93	ldr	rl, [rp, #4]
94	mov	r4, #0
95	umlal	r5, r4, ul, vl
96	str	r, [rp], #4
97	sub	n, n, #4
98	bics	r, n, #3
99	bne	L(loop)
100
101	sbcs	r, rl, r5
102	str	r, [rp], #4
103L(return):
104	sbc	r0, r0, r0
105	sub	r0, r4, r0
106	ldmfd	sp!, { r4-r6, pc }
107EPILOGUE(mpn_submul_1)
108