1dnl  PowerPC-64 mpn_copyi.
2
3dnl  Copyright 2004, 2005, 2013 Free Software Foundation, Inc.
4
5dnl  This file is part of the GNU MP Library.
6dnl
7dnl  The GNU MP Library is free software; you can redistribute it and/or modify
8dnl  it under the terms of either:
9dnl
10dnl    * the GNU Lesser General Public License as published by the Free
11dnl      Software Foundation; either version 3 of the License, or (at your
12dnl      option) any later version.
13dnl
14dnl  or
15dnl
16dnl    * the GNU General Public License as published by the Free Software
17dnl      Foundation; either version 2 of the License, or (at your option) any
18dnl      later version.
19dnl
20dnl  or both in parallel, as here.
21dnl
22dnl  The GNU MP Library is distributed in the hope that it will be useful, but
23dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
25dnl  for more details.
26dnl
27dnl  You should have received copies of the GNU General Public License and the
28dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
29dnl  see https://www.gnu.org/licenses/.
30
31include(`../config.m4')
32
33C                  cycles/limb
34C POWER3/PPC630          ?
35C POWER4/PPC970          ?
36C POWER5                 ?
37C POWER6                 1.25
38C POWER7                 1.09
39
40C INPUT PARAMETERS
41define(`rp',	`r3')
42define(`up',	`r4')
43define(`n',	`r5')
44
45C TODO
46C  * Try rolling the two loop leading std to the end, allowing the code to
47C    handle also n = 2.
48C  * Consider using 4 pointers, schedule ptr update early wrt use.
49
50ASM_START()
51PROLOGUE(mpn_copyi)
52
53ifdef(`HAVE_ABI_mode32',
54`	rldicl	n, n, 0,32')
55
56	cmpdi	cr0, n, 4
57	blt	L(sml)
58
59	addi	r10, n, 4
60	srdi	r10, r10, 3
61	mtctr	r10
62
63	andi.	r0, n, 1
64	rlwinm	r11, n, 0,30,30
65	rlwinm	r12, n, 0,29,29
66	cmpdi	cr6, r11, 0
67	cmpdi	cr7, r12, 0
68
69	beq	cr0, L(xx0)
70L(xx1):	ld	r6, 0(up)
71	addi	up, up, 8
72	std	r6, 0(rp)
73	addi	rp, rp, 8
74
75L(xx0):	bne	cr6, L(x10)
76L(x00):	ld	r6, 0(up)
77	ld	r7, 8(up)
78	bne	cr7, L(100)
79L(000):	addi	rp, rp, -32
80	b	L(lo0)
81L(100):	addi	up, up, -32
82	b	L(lo4)
83L(x10):	ld	r8, 0(up)
84	ld	r9, 8(up)
85	bne	cr7, L(110)
86L(010):	addi	up, up, 16
87	addi	rp, rp, -16
88	b	L(lo2)
89L(110):	addi	up, up, -16
90	addi	rp, rp, -48
91	b	L(lo6)
92
93L(sml):	cmpdi	cr0, n, 0
94	beqlr-	cr0
95	mtctr	n
96L(t):	ld	r6, 0(up)
97	addi	up, up, 8
98	std	r6, 0(rp)
99	addi	rp, rp, 8
100	bdnz	L(t)
101	blr
102
103	ALIGN(32)
104L(top):	std	r6, 0(rp)
105	std	r7, 8(rp)
106L(lo2):	ld	r6, 0(up)
107	ld	r7, 8(up)
108	std	r8, 16(rp)
109	std	r9, 24(rp)
110L(lo0):	ld	r8, 16(up)
111	ld	r9, 24(up)
112	std	r6, 32(rp)
113	std	r7, 40(rp)
114L(lo6):	ld	r6, 32(up)
115	ld	r7, 40(up)
116	std	r8, 48(rp)
117	std	r9, 56(rp)
118	addi	rp, rp, 64
119L(lo4):	ld	r8, 48(up)
120	ld	r9, 56(up)
121	addi	up, up, 64
122	bdnz	L(top)
123
124L(end):	std	r6, 0(rp)
125	std	r7, 8(rp)
126	std	r8, 16(rp)
127	std	r9, 24(rp)
128	blr
129EPILOGUE()
130