addaddmul_1msb0.asm revision 1.1.1.1
1dnl AMD64 mpn_addaddmul_1msb0, R = Au + Bv, u,v < 2^63. 2 3dnl Copyright 2008 Free Software Foundation, Inc. 4 5dnl This file is part of the GNU MP Library. 6 7dnl The GNU MP Library is free software; you can redistribute it and/or modify 8dnl it under the terms of the GNU Lesser General Public License as published 9dnl by the Free Software Foundation; either version 3 of the License, or (at 10dnl your option) any later version. 11 12dnl The GNU MP Library is distributed in the hope that it will be useful, but 13dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 14dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 15dnl License for more details. 16 17dnl You should have received a copy of the GNU Lesser General Public License 18dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. 19 20include(`../config.m4') 21 22C cycles/limb 23C K8: 2.167 24C P4: 12.0 25C P6-15: 4.0 26 27C TODO 28C * Perhaps handle various n mod 3 sizes better. The code now is too large. 29 30C INPUT PARAMETERS 31define(`rp', `%rdi') 32define(`ap', `%rsi') 33define(`bp_param', `%rdx') 34define(`n', `%rcx') 35define(`u0', `%r8') 36define(`v0', `%r9') 37 38 39define(`bp', `%rbp') 40 41ASM_START() 42 TEXT 43 ALIGN(16) 44PROLOGUE(mpn_addaddmul_1msb0) 45 push %r12 46 push %rbp 47 48 lea (ap,n,8), ap 49 lea (bp_param,n,8), bp 50 lea (rp,n,8), rp 51 neg n 52 53 mov (ap,n,8), %rax 54 mul %r8 55 mov %rax, %r12 56 mov (bp,n,8), %rax 57 mov %rdx, %r10 58 add $3, n 59 jns L(end) 60 61 ALIGN(16) 62L(top): mul %r9 63 add %rax, %r12 64 mov -16(ap,n,8), %rax 65 adc %rdx, %r10 66 mov %r12, -24(rp,n,8) 67 mul %r8 68 add %rax, %r10 69 mov -16(bp,n,8), %rax 70 mov $0, %r11d 71 adc %rdx, %r11 72 mul %r9 73 add %rax, %r10 74 mov -8(ap,n,8), %rax 75 adc %rdx, %r11 76 mov %r10, -16(rp,n,8) 77 mul %r8 78 add %rax, %r11 79 mov -8(bp,n,8), %rax 80 mov $0, %r12d 81 adc %rdx, %r12 82 mul %r9 83 add %rax, %r11 84 adc %rdx, %r12 85 mov (ap,n,8), %rax 86 mul %r8 87 add %rax, %r12 88 mov %r11, -8(rp,n,8) 89 mov (bp,n,8), %rax 90 mov $0, %r10d 91 adc %rdx, %r10 92 add $3, n 93 js L(top) 94 95L(end): cmp $1, R32(n) 96 ja 2f 97 jz 1f 98 99 mul %r9 100 add %rax, %r12 101 mov -16(ap), %rax 102 adc %rdx, %r10 103 mov %r12, -24(rp) 104 mul %r8 105 add %rax, %r10 106 mov -16(bp), %rax 107 mov $0, %r11d 108 adc %rdx, %r11 109 mul %r9 110 add %rax, %r10 111 mov -8(ap), %rax 112 adc %rdx, %r11 113 mov %r10, -16(rp) 114 mul %r8 115 add %rax, %r11 116 mov -8(bp), %rax 117 mov $0, %r12d 118 adc %rdx, %r12 119 mul %r9 120 add %rax, %r11 121 adc %rdx, %r12 122 mov %r11, -8(rp) 123 mov %r12, %rax 124 pop %rbp 125 pop %r12 126 ret 127 1281: mul %r9 129 add %rax, %r12 130 mov -8(ap), %rax 131 adc %rdx, %r10 132 mov %r12, -16(rp) 133 mul %r8 134 add %rax, %r10 135 mov -8(bp), %rax 136 mov $0, %r11d 137 adc %rdx, %r11 138 mul %r9 139 add %rax, %r10 140 adc %rdx, %r11 141 mov %r10, -8(rp) 142 mov %r11, %rax 143 pop %rbp 144 pop %r12 145 ret 146 1472: mul %r9 148 add %rax, %r12 149 mov %r12, -8(rp) 150 adc %rdx, %r10 151 mov %r10, %rax 152 pop %rbp 153 pop %r12 154 ret 155EPILOGUE() 156