1; 29000 mpn_add_n -- Add two limb vectors of the same length > 0 and store
2; sum in a third limb vector.
3
4; Copyright 1992, 1994, 2000 Free Software Foundation, Inc.
5
6; This file is part of the GNU MP Library.
7
8; The GNU MP Library is free software; you can redistribute it and/or modify
9; it under the terms of the GNU Lesser General Public License as published by
10; the Free Software Foundation; either version 3 of the License, or (at your
11; option) any later version.
12
13; The GNU MP Library is distributed in the hope that it will be useful, but
14; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
16; License for more details.
17
18; You should have received a copy of the GNU Lesser General Public License
19; along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
20
21
22; INPUT PARAMETERS
23; res_ptr	lr2
24; s1_ptr	lr3
25; s2_ptr	lr4
26; size		lr5
27
28; We use the loadm/storem instructions and operate on chunks of 8
29; limbs/per iteration, until less than 8 limbs remain.
30
31; The 29k has no addition or subtraction instructions that doesn't
32; affect carry, so we need to save and restore that as soon as we
33; adjust the pointers.  gr116 is used for this purpose.  Note that
34; gr116==0 means that carry should be set.
35
36	.sect .lit,lit
37	.text
38	.align	4
39	.global	___gmpn_add_n
40	.word	0x60000
41___gmpn_add_n:
42	srl	gr117,lr5,3
43	sub	gr118,gr117,1
44	jmpt	gr118,Ltail
45	 constn	gr116,-1		; init cy reg
46	sub	gr117,gr117,2		; count for jmpfdec
47
48; Main loop working 8 limbs/iteration.
49Loop:	mtsrim	cr,(8-1)
50	loadm	0,0,gr96,lr3
51	add	lr3,lr3,32
52	mtsrim	cr,(8-1)
53	loadm	0,0,gr104,lr4
54	add	lr4,lr4,32
55
56	subr	gr116,gr116,0		; restore carry
57	addc	gr96,gr96,gr104
58	addc	gr97,gr97,gr105
59	addc	gr98,gr98,gr106
60	addc	gr99,gr99,gr107
61	addc	gr100,gr100,gr108
62	addc	gr101,gr101,gr109
63	addc	gr102,gr102,gr110
64	addc	gr103,gr103,gr111
65	subc	gr116,gr116,gr116	; gr116 = not(cy)
66
67	mtsrim	cr,(8-1)
68	storem	0,0,gr96,lr2
69	jmpfdec	gr117,Loop
70	 add	lr2,lr2,32
71
72; Code for the last up-to-7 limbs.
73; This code might look very strange, but it's hard to write it
74; differently without major slowdown.
75
76	and	lr5,lr5,(8-1)
77Ltail:	sub	gr118,lr5,1		; count for CR
78	jmpt	gr118,Lend
79	 sub	gr117,lr5,2		; count for jmpfdec
80
81	mtsr	cr,gr118
82	loadm	0,0,gr96,lr3
83	mtsr	cr,gr118
84	loadm	0,0,gr104,lr4
85
86	subr	gr116,gr116,0		; restore carry
87
88	jmpfdec	gr117,L1
89	 addc	gr96,gr96,gr104
90	jmp	Lstore
91	 mtsr	cr,gr118
92L1:	jmpfdec	gr117,L2
93	 addc	gr97,gr97,gr105
94	jmp	Lstore
95	 mtsr	cr,gr118
96L2:	jmpfdec	gr117,L3
97	 addc	gr98,gr98,gr106
98	jmp	Lstore
99	 mtsr	cr,gr118
100L3:	jmpfdec	gr117,L4
101	 addc	gr99,gr99,gr107
102	jmp	Lstore
103	 mtsr	cr,gr118
104L4:	jmpfdec	gr117,L5
105	 addc	gr100,gr100,gr108
106	jmp	Lstore
107	 mtsr	cr,gr118
108L5:	jmpfdec	gr117,L6
109	 addc	gr101,gr101,gr109
110	jmp	Lstore
111	 mtsr	cr,gr118
112L6:	addc	gr102,gr102,gr110
113
114Lstore:	storem	0,0,gr96,lr2
115	subc	gr116,gr116,gr116	; gr116 = not(cy)
116
117Lend:	jmpi	lr0
118	 add	gr96,gr116,1
119