1dnl  S/390-32 mpn_copyd
2
3dnl  Copyright 2011 Free Software Foundation, Inc.
4
5dnl  This file is part of the GNU MP Library.
6dnl
7dnl  The GNU MP Library is free software; you can redistribute it and/or modify
8dnl  it under the terms of either:
9dnl
10dnl    * the GNU Lesser General Public License as published by the Free
11dnl      Software Foundation; either version 3 of the License, or (at your
12dnl      option) any later version.
13dnl
14dnl  or
15dnl
16dnl    * the GNU General Public License as published by the Free Software
17dnl      Foundation; either version 2 of the License, or (at your option) any
18dnl      later version.
19dnl
20dnl  or both in parallel, as here.
21dnl
22dnl  The GNU MP Library is distributed in the hope that it will be useful, but
23dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
25dnl  for more details.
26dnl
27dnl  You should have received copies of the GNU General Public License and the
28dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
29dnl  see https://www.gnu.org/licenses/.
30
31
32include(`../config.m4')
33
34C            cycles/limb
35C            cycles/limb
36C z900		 1.65
37C z990           1.125
38C z9		 ?
39C z10		 ?
40C z196		 ?
41
42C FIXME:
43C  * Avoid saving/restoring callee-saves registers for n < 3.  This could be
44C    done by setting rp=r1, up=r2, i=r0 and r3,r4,r5 for clock regs.
45C    We could then use r3...r10 in main loop.
46
47C INPUT PARAMETERS
48define(`rp_param',	`%r2')
49define(`up_param',	`%r3')
50define(`n',		`%r4')
51
52define(`rp',	`%r8')
53define(`up',	`%r9')
54
55ASM_START()
56PROLOGUE(mpn_copyd)
57	stm	%r6, %r11, 24(%r15)
58
59	lr	%r1, n
60	sll	%r1, 2
61	la	%r10, 8(n)
62	ahi	%r1, -32
63	srl	%r10, 3
64	lhi	%r11, -32
65
66	la	rp, 0(%r1,rp_param)	C FIXME use lay on z990 and later
67	la	up, 0(%r1,up_param)	C FIXME use lay on z990 and later
68
69	lhi	%r7, 7
70	nr	%r7, n			C n mod 8
71	chi	%r7, 2
72	jh	L(b34567)
73	chi	%r7, 1
74	je	L(b1)
75	jh	L(b2)
76
77L(b0):	brct	%r10, L(top)
78	j	L(end)
79
80L(b1):	l	%r0, 28(up)
81	ahi	up, -4
82	st	%r0, 28(rp)
83	ahi	rp, -4
84	brct	%r10, L(top)
85	j	L(end)
86
87L(b2):	lm	%r0, %r1, 24(up)
88	ahi	up, -8
89	stm	%r0, %r1, 24(rp)
90	ahi	rp, -8
91	brct	%r10, L(top)
92	j	L(end)
93
94L(b34567):
95	chi	%r7, 4
96	jl	L(b3)
97	je	L(b4)
98	chi	%r7, 6
99	je	L(b6)
100	jh	L(b7)
101
102L(b5):	lm	%r0, %r4, 12(up)
103	ahi	up, -20
104	stm	%r0, %r4, 12(rp)
105	ahi	rp, -20
106	brct	%r10, L(top)
107	j	L(end)
108
109L(b3):	lm	%r0, %r2, 20(up)
110	ahi	up, -12
111	stm	%r0, %r2, 20(rp)
112	ahi	rp, -12
113	brct	%r10, L(top)
114	j	L(end)
115
116L(b4):	lm	%r0, %r3, 16(up)
117	ahi	up, -16
118	stm	%r0, %r3, 16(rp)
119	ahi	rp, -16
120	brct	%r10, L(top)
121	j	L(end)
122
123L(b6):	lm	%r0, %r5, 8(up)
124	ahi	up, -24
125	stm	%r0, %r5, 8(rp)
126	ahi	rp, -24
127	brct	%r10, L(top)
128	j	L(end)
129
130L(b7):	lm	%r0, %r6, 4(up)
131	ahi	up, -28
132	stm	%r0, %r6, 4(rp)
133	ahi	rp, -28
134	brct	%r10, L(top)
135	j	L(end)
136
137L(top):	lm	%r0, %r7, 0(up)
138	la	up, 0(%r11,up)
139	stm	%r0, %r7, 0(rp)
140	la	rp, 0(%r11,rp)
141	brct	%r10, L(top)
142
143L(end):	lm	%r6, %r11, 24(%r15)
144	br	%r14
145EPILOGUE()
146