1dnl  S/390-64 mpn_copyd
2
3dnl  Copyright 2011 Free Software Foundation, Inc.
4
5dnl  This file is part of the GNU MP Library.
6dnl
7dnl  The GNU MP Library is free software; you can redistribute it and/or modify
8dnl  it under the terms of either:
9dnl
10dnl    * the GNU Lesser General Public License as published by the Free
11dnl      Software Foundation; either version 3 of the License, or (at your
12dnl      option) any later version.
13dnl
14dnl  or
15dnl
16dnl    * the GNU General Public License as published by the Free Software
17dnl      Foundation; either version 2 of the License, or (at your option) any
18dnl      later version.
19dnl
20dnl  or both in parallel, as here.
21dnl
22dnl  The GNU MP Library is distributed in the hope that it will be useful, but
23dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
25dnl  for more details.
26dnl
27dnl  You should have received copies of the GNU General Public License and the
28dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
29dnl  see https://www.gnu.org/licenses/.
30
31
32include(`../config.m4')
33
34C            cycles/limb
35C z900		 2.67
36C z990           1.5
37C z9		 ?
38C z10		 1.8
39C z196		 ?
40
41C FIXME:
42C  * Avoid saving/restoring callee-saves registers for n < 3.  This could be
43C    done by setting rp=r1, up=r2, i=r0 and r3,r4,r5 for clock regs.
44C    We could then use r3...r10 in main loop.
45C  * Could we use some EX trick, modifying lmg/stmg, for the feed-in code?
46
47C INPUT PARAMETERS
48define(`rp_param',	`%r2')
49define(`up_param',	`%r3')
50define(`n',		`%r4')
51
52define(`rp',	`%r8')
53define(`up',	`%r9')
54
55ASM_START()
56PROLOGUE(mpn_copyd)
57	stmg	%r6, %r11, 48(%r15)
58
59	sllg	%r1, n, 3
60	la	%r10, 8(n)
61	aghi	%r1, -64
62	srlg	%r10, %r10, 3
63	lghi	%r11, -64
64
65	la	rp, 0(%r1,rp_param)	C FIXME use lay on z990 and later
66	la	up, 0(%r1,up_param)	C FIXME use lay on z990 and later
67
68	lghi	%r7, 7
69	ngr	%r7, n			C n mod 8
70	cghi	%r7, 2
71	jh	L(b34567)
72	cghi	%r7, 1
73	je	L(b1)
74	jh	L(b2)
75
76L(b0):	brctg	%r10, L(top)
77	j	L(end)
78
79L(b1):	lg	%r0, 56(up)
80	aghi	up, -8
81	stg	%r0, 56(rp)
82	aghi	rp, -8
83	brctg	%r10, L(top)
84	j	L(end)
85
86L(b2):	lmg	%r0, %r1, 48(up)
87	aghi	up, -16
88	stmg	%r0, %r1, 48(rp)
89	aghi	rp, -16
90	brctg	%r10, L(top)
91	j	L(end)
92
93L(b34567):
94	cghi	%r7, 4
95	jl	L(b3)
96	je	L(b4)
97	cghi	%r7, 6
98	je	L(b6)
99	jh	L(b7)
100
101L(b5):	lmg	%r0, %r4, 24(up)
102	aghi	up, -40
103	stmg	%r0, %r4, 24(rp)
104	aghi	rp, -40
105	brctg	%r10, L(top)
106	j	L(end)
107
108L(b3):	lmg	%r0, %r2, 40(up)
109	aghi	up, -24
110	stmg	%r0, %r2, 40(rp)
111	aghi	rp, -24
112	brctg	%r10, L(top)
113	j	L(end)
114
115L(b4):	lmg	%r0, %r3, 32(up)
116	aghi	up, -32
117	stmg	%r0, %r3, 32(rp)
118	aghi	rp, -32
119	brctg	%r10, L(top)
120	j	L(end)
121
122L(b6):	lmg	%r0, %r5, 16(up)
123	aghi	up, -48
124	stmg	%r0, %r5, 16(rp)
125	aghi	rp, -48
126	brctg	%r10, L(top)
127	j	L(end)
128
129L(b7):	lmg	%r0, %r6, 8(up)
130	aghi	up, -56
131	stmg	%r0, %r6, 8(rp)
132	aghi	rp, -56
133	brctg	%r10, L(top)
134	j	L(end)
135
136L(top):	lmg	%r0, %r7, 0(up)
137	la	up, 0(%r11,up)
138	stmg	%r0, %r7, 0(rp)
139	la	rp, 0(%r11,rp)
140	brctg	%r10, L(top)
141
142L(end):	lmg	%r6, %r11, 48(%r15)
143	br	%r14
144EPILOGUE()
145