1dnl PowerPC-64 mpn_invert_limb -- Invert a normalized limb. 2 3dnl Copyright 2004, 2005, 2006, 2008 Free Software Foundation, Inc. 4 5dnl This file is part of the GNU MP Library. 6 7dnl The GNU MP Library is free software; you can redistribute it and/or modify 8dnl it under the terms of the GNU Lesser General Public License as published 9dnl by the Free Software Foundation; either version 3 of the License, or (at 10dnl your option) any later version. 11 12dnl The GNU MP Library is distributed in the hope that it will be useful, but 13dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 14dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 15dnl License for more details. 16 17dnl You should have received a copy of the GNU Lesser General Public License 18dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. 19 20include(`../config.m4') 21 22C cycles/limb 23C POWER3/PPC630: ? 24C POWER4/PPC970: 75 (including call+ret) 25 26C TODO: 27C * Pair multiply instructions. 28 29ASM_START() 30PROLOGUE(mpn_invert_limb) 31 LEAL( r12, approx_tab) 32 33 srdi r11, r3, 32 C r11 = d >> 32 34 rlwinm r9, r11, 10, 23, 30 C r9 = ((d >> 55) & 0xff) << 1 35 lhzx r0, r12, r9 C load initial approximation 36 rldic r10, r0, 6, 42 37 mulld r8, r10, r10 38 sldi r9, r10, 17 39 mulld r0, r8, r11 40 srdi r0, r0, 31 41 subf r10, r0, r9 42 mulld r8, r10, r10 43 sldi r11, r10, 33 44 mulhdu r0, r8, r3 45 sldi r9, r0, 1 46 subf r10, r9, r11 47 sldi r11, r10, 2 48 mulhdu r0, r10, r10 49 mulld r8, r10, r10 50 mulhdu r10, r8, r3 51 mulld r9, r0, r3 52 mulhdu r0, r0, r3 53 addc r8, r9, r10 54 addze r10, r0 55 srdi r0, r8, 62 56 rldimi r0, r10, 2, 0 57 sldi r9, r8, 2 58 subfic r10, r9, 0 59 subfe r8, r0, r11 60 mulhdu r10, r3, r8 61 add r10, r10, r3 62 mulld r9, r3, r8 63 subf r11, r10, r8 64 addi r0, r10, 1 65 addi r8, r11, -1 66 and r0, r3, r0 67 addc r11, r9, r0 68 addze r10, r10 69 addc r0, r11, r3 70 addze r10, r10 71 subf r3, r10, r8 72 blr 73EPILOGUE() 74 75DEF_OBJECT(approx_tab) 76 .short 1023,1020,1016,1012,1008,1004,1000,996 77 .short 992,989,985,981,978,974,970,967 78 .short 963,960,956,953,949,946,942,939 79 .short 936,932,929,926,923,919,916,913 80 .short 910,907,903,900,897,894,891,888 81 .short 885,882,879,876,873,870,868,865 82 .short 862,859,856,853,851,848,845,842 83 .short 840,837,834,832,829,826,824,821 84 .short 819,816,814,811,809,806,804,801 85 .short 799,796,794,791,789,787,784,782 86 .short 780,777,775,773,771,768,766,764 87 .short 762,759,757,755,753,751,748,746 88 .short 744,742,740,738,736,734,732,730 89 .short 728,726,724,722,720,718,716,714 90 .short 712,710,708,706,704,702,700,699 91 .short 697,695,693,691,689,688,686,684 92 .short 682,680,679,677,675,673,672,670 93 .short 668,667,665,663,661,660,658,657 94 .short 655,653,652,650,648,647,645,644 95 .short 642,640,639,637,636,634,633,631 96 .short 630,628,627,625,624,622,621,619 97 .short 618,616,615,613,612,611,609,608 98 .short 606,605,604,602,601,599,598,597 99 .short 595,594,593,591,590,589,587,586 100 .short 585,583,582,581,579,578,577,576 101 .short 574,573,572,571,569,568,567,566 102 .short 564,563,562,561,560,558,557,556 103 .short 555,554,553,551,550,549,548,547 104 .short 546,544,543,542,541,540,539,538 105 .short 537,536,534,533,532,531,530,529 106 .short 528,527,526,525,524,523,522,521 107 .short 520,519,518,517,516,515,514,513 108END_OBJECT(approx_tab) 109ASM_END() 110