1dnl  PowerPC-64 mpn_invert_limb -- Invert a normalized limb.
2
3dnl  Copyright 2004, 2005, 2006, 2008 Free Software Foundation, Inc.
4
5dnl  This file is part of the GNU MP Library.
6
7dnl  The GNU MP Library is free software; you can redistribute it and/or modify
8dnl  it under the terms of the GNU Lesser General Public License as published
9dnl  by the Free Software Foundation; either version 3 of the License, or (at
10dnl  your option) any later version.
11
12dnl  The GNU MP Library is distributed in the hope that it will be useful, but
13dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
15dnl  License for more details.
16
17dnl  You should have received a copy of the GNU Lesser General Public License
18dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
19
20include(`../config.m4')
21
22C		cycles/limb
23C POWER3/PPC630:     ?
24C POWER4/PPC970:     75 (including call+ret)
25
26C TODO:
27C   * Pair multiply instructions.
28
29ASM_START()
30PROLOGUE(mpn_invert_limb)
31	LEAL(	r12, approx_tab)
32
33	srdi	r11, r3, 32		C r11 = d >> 32
34	rlwinm  r9, r11, 10, 23, 30	C r9 = ((d >> 55) & 0xff) << 1
35	lhzx	r0, r12, r9		C load initial approximation
36	rldic	r10, r0, 6, 42
37	mulld	r8, r10, r10
38	sldi	r9, r10, 17
39	mulld	r0, r8, r11
40	srdi	r0, r0, 31
41	subf	r10, r0, r9
42	mulld	r8, r10, r10
43	sldi	r11, r10, 33
44	mulhdu	r0, r8, r3
45	sldi	r9, r0, 1
46	subf	r10, r9, r11
47	sldi	r11, r10, 2
48	mulhdu	r0, r10, r10
49	mulld	r8, r10, r10
50	mulhdu	r10, r8, r3
51	mulld	r9, r0, r3
52	mulhdu	r0, r0, r3
53	addc	r8, r9, r10
54	addze	r10, r0
55	srdi	r0, r8, 62
56	rldimi	r0, r10, 2, 0
57	sldi	r9, r8, 2
58	subfic	r10, r9, 0
59	subfe	r8, r0, r11
60	mulhdu	r10, r3, r8
61	add	r10, r10, r3
62	mulld	r9, r3, r8
63	subf	r11, r10, r8
64	addi	r0, r10, 1
65	addi	r8, r11, -1
66	and	r0, r3, r0
67	addc	r11, r9, r0
68	addze	r10, r10
69	addc	r0, r11, r3
70	addze	r10, r10
71	subf	r3, r10, r8
72	blr
73EPILOGUE()
74
75DEF_OBJECT(approx_tab)
76	.short	1023,1020,1016,1012,1008,1004,1000,996
77	.short	992,989,985,981,978,974,970,967
78	.short	963,960,956,953,949,946,942,939
79	.short	936,932,929,926,923,919,916,913
80	.short	910,907,903,900,897,894,891,888
81	.short	885,882,879,876,873,870,868,865
82	.short	862,859,856,853,851,848,845,842
83	.short	840,837,834,832,829,826,824,821
84	.short	819,816,814,811,809,806,804,801
85	.short	799,796,794,791,789,787,784,782
86	.short	780,777,775,773,771,768,766,764
87	.short	762,759,757,755,753,751,748,746
88	.short	744,742,740,738,736,734,732,730
89	.short	728,726,724,722,720,718,716,714
90	.short	712,710,708,706,704,702,700,699
91	.short	697,695,693,691,689,688,686,684
92	.short	682,680,679,677,675,673,672,670
93	.short	668,667,665,663,661,660,658,657
94	.short	655,653,652,650,648,647,645,644
95	.short	642,640,639,637,636,634,633,631
96	.short	630,628,627,625,624,622,621,619
97	.short	618,616,615,613,612,611,609,608
98	.short	606,605,604,602,601,599,598,597
99	.short	595,594,593,591,590,589,587,586
100	.short	585,583,582,581,579,578,577,576
101	.short	574,573,572,571,569,568,567,566
102	.short	564,563,562,561,560,558,557,556
103	.short	555,554,553,551,550,549,548,547
104	.short	546,544,543,542,541,540,539,538
105	.short	537,536,534,533,532,531,530,529
106	.short	528,527,526,525,524,523,522,521
107	.short	520,519,518,517,516,515,514,513
108END_OBJECT(approx_tab)
109ASM_END()
110