1dnl  PowerPC-64 mpn_mod_34lsub1 -- modulo 2^24-1.
2
3dnl  Copyright 2005 Free Software Foundation, Inc.
4
5dnl  This file is part of the GNU MP Library.
6
7dnl  The GNU MP Library is free software; you can redistribute it and/or modify
8dnl  it under the terms of the GNU Lesser General Public License as published
9dnl  by the Free Software Foundation; either version 3 of the License, or (at
10dnl  your option) any later version.
11
12dnl  The GNU MP Library is distributed in the hope that it will be useful, but
13dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
15dnl  License for more details.
16
17dnl  You should have received a copy of the GNU Lesser General Public License
18dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
19
20include(`../config.m4')
21
22C		cycles/limb
23C POWER3/PPC630:     1.33
24C POWER4/PPC970:     1.5
25C POWER5:	     1.57
26
27C INPUT PARAMETERS
28define(`up',`r3')
29define(`n',`r4')
30
31ASM_START()
32PROLOGUE(mpn_mod_34lsub1)
33	li	r8, 0
34	li	r9, 0
35	li	r10, 0
36	li	r11, 0
37
38	cmpdi	cr6, n, 3
39	blt	cr6, L(lt3)
40
41	li	r0, -0x5556		C 0xFFFFFFFFFFFFAAAA
42	rldimi	r0, r0, 16, 32		C 0xFFFFFFFFAAAAAAAA
43	rldimi	r0, r0, 32, 63		C 0xAAAAAAAAAAAAAAAB
44	mulhdu	r0, r0, n
45	srdi	r0, r0, 1		C r0 = [n / 3]
46	mtctr	r0
47
48	ld	r5, 0(up)
49	ld	r6, 8(up)
50	ld	r7, 16(up)
51	addi	up, up, 24
52	bdz	L(end)
53
54	ALIGN(16)
55L(top):	addc	r8, r8, r5
56	nop
57	ld	r5, 0(up)
58	adde	r9, r9, r6
59	ld	r6, 8(up)
60	adde	r10, r10, r7
61	ld	r7, 16(up)
62	addi	up, up, 48
63	addze	r11, r11
64	bdz	L(endx)
65	addc	r8, r8, r5
66	nop
67	ld	r5, -24(up)
68	adde	r9, r9, r6
69	ld	r6, -16(up)
70	adde	r10, r10, r7
71	ld	r7, -8(up)
72	addze	r11, r11
73	bdnz	L(top)
74
75	addi	up, up, 24
76L(endx):
77	addi	up, up, -24
78
79L(end):	addc	r8, r8, r5
80	adde	r9, r9, r6
81	adde	r10, r10, r7
82	addze	r11, r11
83
84	sldi	r5, r0, 1
85	add	r5, r5, r0		C r11 = n / 3 * 3
86	sub	n, n, r5		C n = n mod 3
87L(lt3):	cmpdi	cr6, n, 1
88	blt	cr6, L(2)
89
90	ld	r5, 0(up)
91	addc	r8, r8, r5
92	li	r6, 0
93	beq	cr6, L(1)
94
95	ld	r6, 8(up)
96L(1):	adde	r9, r9, r6
97	addze	r10, r10
98	addze	r11, r11
99
100L(2):	rldicl	r0, r8, 0, 16		C r0 = r8 mod 2^48
101	srdi	r3, r8, 48		C r3 = r8 div 2^48
102	rldic	r4, r9, 16, 16		C r4 = (r9 mod 2^32) << 16
103	srdi	r5, r9, 32		C r5 = r9 div 2^32
104	rldic	r6, r10, 32, 16		C r6 = (r10 mod 2^16) << 32
105	srdi	r7, r10, 16		C r7 = r10 div 2^16
106
107	add	r0, r0, r3
108	add	r4, r4, r5
109	add	r6, r6, r7
110
111	add	r0, r0, r4
112	add	r6, r6, r11
113
114	add	r3, r0, r6
115	blr
116EPILOGUE()
117
118C |__r10__|__r9___|__r8___|
119C |-----|-----|-----|-----|
120