1dnl Intel Pentium mpn_add_n/mpn_sub_n -- mpn addition and subtraction. 2 3dnl Copyright 1992, 1994-1996, 1999, 2000, 2002 Free Software Foundation, Inc. 4 5dnl This file is part of the GNU MP Library. 6dnl 7dnl The GNU MP Library is free software; you can redistribute it and/or modify 8dnl it under the terms of either: 9dnl 10dnl * the GNU Lesser General Public License as published by the Free 11dnl Software Foundation; either version 3 of the License, or (at your 12dnl option) any later version. 13dnl 14dnl or 15dnl 16dnl * the GNU General Public License as published by the Free Software 17dnl Foundation; either version 2 of the License, or (at your option) any 18dnl later version. 19dnl 20dnl or both in parallel, as here. 21dnl 22dnl The GNU MP Library is distributed in the hope that it will be useful, but 23dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25dnl for more details. 26dnl 27dnl You should have received copies of the GNU General Public License and the 28dnl GNU Lesser General Public License along with the GNU MP Library. If not, 29dnl see https://www.gnu.org/licenses/. 30 31include(`../config.m4') 32 33 34C P5: 2.375 cycles/limb 35 36 37ifdef(`OPERATION_add_n',` 38 define(M4_inst, adcl) 39 define(M4_function_n, mpn_add_n) 40 define(M4_function_nc, mpn_add_nc) 41 42',`ifdef(`OPERATION_sub_n',` 43 define(M4_inst, sbbl) 44 define(M4_function_n, mpn_sub_n) 45 define(M4_function_nc, mpn_sub_nc) 46 47',`m4_error(`Need OPERATION_add_n or OPERATION_sub_n 48')')') 49 50MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc) 51 52 53C mp_limb_t M4_function_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, 54C mp_size_t size); 55C mp_limb_t M4_function_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, 56C mp_size_t size, mp_limb_t carry); 57 58defframe(PARAM_CARRY,20) 59defframe(PARAM_SIZE, 16) 60defframe(PARAM_SRC2, 12) 61defframe(PARAM_SRC1, 8) 62defframe(PARAM_DST, 4) 63 64 TEXT 65 ALIGN(8) 66PROLOGUE(M4_function_nc) 67 68 pushl %edi 69 pushl %esi 70 pushl %ebx 71 pushl %ebp 72deflit(`FRAME',16) 73 74 movl PARAM_DST,%edi 75 movl PARAM_SRC1,%esi 76 movl PARAM_SRC2,%ebp 77 movl PARAM_SIZE,%ecx 78 79 movl (%ebp),%ebx 80 81 decl %ecx 82 movl %ecx,%edx 83 shrl $3,%ecx 84 andl $7,%edx 85 testl %ecx,%ecx C zero carry flag 86 jz L(endgo) 87 88 pushl %edx 89FRAME_pushl() 90 movl PARAM_CARRY,%eax 91 shrl %eax C shift bit 0 into carry 92 jmp L(oop) 93 94L(endgo): 95deflit(`FRAME',16) 96 movl PARAM_CARRY,%eax 97 shrl %eax C shift bit 0 into carry 98 jmp L(end) 99 100EPILOGUE() 101 102 103 ALIGN(8) 104PROLOGUE(M4_function_n) 105 106 pushl %edi 107 pushl %esi 108 pushl %ebx 109 pushl %ebp 110deflit(`FRAME',16) 111 112 movl PARAM_DST,%edi 113 movl PARAM_SRC1,%esi 114 movl PARAM_SRC2,%ebp 115 movl PARAM_SIZE,%ecx 116 117 movl (%ebp),%ebx 118 119 decl %ecx 120 movl %ecx,%edx 121 shrl $3,%ecx 122 andl $7,%edx 123 testl %ecx,%ecx C zero carry flag 124 jz L(end) 125 pushl %edx 126FRAME_pushl() 127 128 ALIGN(8) 129L(oop): movl 28(%edi),%eax C fetch destination cache line 130 leal 32(%edi),%edi 131 132L(1): movl (%esi),%eax 133 movl 4(%esi),%edx 134 M4_inst %ebx,%eax 135 movl 4(%ebp),%ebx 136 M4_inst %ebx,%edx 137 movl 8(%ebp),%ebx 138 movl %eax,-32(%edi) 139 movl %edx,-28(%edi) 140 141L(2): movl 8(%esi),%eax 142 movl 12(%esi),%edx 143 M4_inst %ebx,%eax 144 movl 12(%ebp),%ebx 145 M4_inst %ebx,%edx 146 movl 16(%ebp),%ebx 147 movl %eax,-24(%edi) 148 movl %edx,-20(%edi) 149 150L(3): movl 16(%esi),%eax 151 movl 20(%esi),%edx 152 M4_inst %ebx,%eax 153 movl 20(%ebp),%ebx 154 M4_inst %ebx,%edx 155 movl 24(%ebp),%ebx 156 movl %eax,-16(%edi) 157 movl %edx,-12(%edi) 158 159L(4): movl 24(%esi),%eax 160 movl 28(%esi),%edx 161 M4_inst %ebx,%eax 162 movl 28(%ebp),%ebx 163 M4_inst %ebx,%edx 164 movl 32(%ebp),%ebx 165 movl %eax,-8(%edi) 166 movl %edx,-4(%edi) 167 168 leal 32(%esi),%esi 169 leal 32(%ebp),%ebp 170 decl %ecx 171 jnz L(oop) 172 173 popl %edx 174FRAME_popl() 175L(end): 176 decl %edx C test %edx w/o clobbering carry 177 js L(end2) 178 incl %edx 179L(oop2): 180 leal 4(%edi),%edi 181 movl (%esi),%eax 182 M4_inst %ebx,%eax 183 movl 4(%ebp),%ebx 184 movl %eax,-4(%edi) 185 leal 4(%esi),%esi 186 leal 4(%ebp),%ebp 187 decl %edx 188 jnz L(oop2) 189L(end2): 190 movl (%esi),%eax 191 M4_inst %ebx,%eax 192 movl %eax,(%edi) 193 194 sbbl %eax,%eax 195 negl %eax 196 197 popl %ebp 198 popl %ebx 199 popl %esi 200 popl %edi 201 ret 202 203EPILOGUE() 204