1dnl Intel P5 mpn_mod_34lsub1 -- mpn remainder modulo 2**24-1. 2 3dnl Copyright 2000, 2001, 2002 Free Software Foundation, Inc. 4dnl 5dnl This file is part of the GNU MP Library. 6dnl 7dnl The GNU MP Library is free software; you can redistribute it and/or 8dnl modify it under the terms of the GNU Lesser General Public License as 9dnl published by the Free Software Foundation; either version 3 of the 10dnl License, or (at your option) any later version. 11dnl 12dnl The GNU MP Library is distributed in the hope that it will be useful, 13dnl but WITHOUT ANY WARRANTY; without even the implied warranty of 14dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15dnl Lesser General Public License for more details. 16dnl 17dnl You should have received a copy of the GNU Lesser General Public License 18dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. 19 20include(`../config.m4') 21 22 23C P5: 1.66 cycles/limb 24 25 26C mp_limb_t mpn_mod_34lsub1 (mp_srcptr src, mp_size_t size) 27C 28 29defframe(PARAM_SIZE, 8) 30defframe(PARAM_SRC, 4) 31 32 TEXT 33 ALIGN(16) 34PROLOGUE(mpn_mod_34lsub1) 35deflit(`FRAME',0) 36 37 movl PARAM_SIZE, %ecx 38 movl PARAM_SRC, %edx 39 40 subl $2, %ecx 41 ja L(three_or_more) 42 43 movl (%edx), %eax 44 jne L(one) 45 46 47 movl 4(%edx), %ecx 48 movl %eax, %edx 49 50 shrl $24, %edx 51 andl $0xFFFFFF, %eax 52 53 addl %edx, %eax 54 movl %ecx, %edx 55 56 shrl $16, %ecx 57 andl $0xFFFF, %edx 58 59 shll $8, %edx 60 addl %ecx, %eax 61 62 addl %edx, %eax 63 64L(one): 65 ret 66 67 68L(three_or_more): 69 C eax 70 C ebx 71 C ecx size-2 72 C edx src 73 C esi 74 C edi 75 C ebp 76 77 pushl %ebx FRAME_pushl() 78 pushl %esi FRAME_pushl() 79 80 pushl %edi FRAME_pushl() 81 pushl %ebp FRAME_pushl() 82 83 xorl %esi, %esi C 0mod3 84 xorl %edi, %edi C 1mod3 85 86 xorl %ebp, %ebp C 2mod3, and clear carry 87 88L(top): 89 C eax scratch 90 C ebx scratch 91 C ecx counter, limbs 92 C edx src 93 C esi 0mod3 94 C edi 1mod3 95 C ebp 2mod3 96 97 movl (%edx), %eax 98 movl 4(%edx), %ebx 99 100 adcl %eax, %esi 101 movl 8(%edx), %eax 102 103 adcl %ebx, %edi 104 leal 12(%edx), %edx 105 106 adcl %eax, %ebp 107 leal -2(%ecx), %ecx 108 109 decl %ecx 110 jg L(top) 111 112 113 C ecx is -2, -1 or 0, representing 0, 1 or 2 more limbs, respectively 114 115 movl $0xFFFFFFFF, %ebx C mask 116 incl %ecx 117 118 js L(combine) C 0 more 119 120 movl (%edx), %eax 121 movl $0xFFFFFF00, %ebx 122 123 adcl %eax, %esi 124 decl %ecx 125 126 js L(combine) C 1 more 127 128 movl 4(%edx), %eax 129 movl $0xFFFF0000, %ebx 130 131 adcl %eax, %edi 132 133 134 135L(combine): 136 C eax 137 C ebx mask 138 C ecx 139 C edx 140 C esi 0mod3 141 C edi 1mod3 142 C ebp 2mod3 143 144 sbbl %ecx, %ecx C carry 145 movl %esi, %eax C 0mod3 146 147 andl %ebx, %ecx C masked for position 148 andl $0xFFFFFF, %eax C 0mod3 low 149 150 shrl $24, %esi C 0mod3 high 151 subl %ecx, %eax C apply carry 152 153 addl %esi, %eax C apply 0mod3 154 movl %edi, %ebx C 1mod3 155 156 shrl $16, %edi C 1mod3 high 157 andl $0x0000FFFF, %ebx 158 159 shll $8, %ebx C 1mod3 low 160 addl %edi, %eax C apply 1mod3 high 161 162 addl %ebx, %eax C apply 1mod3 low 163 movl %ebp, %ebx C 2mod3 164 165 shrl $8, %ebp C 2mod3 high 166 andl $0xFF, %ebx 167 168 shll $16, %ebx C 2mod3 low 169 addl %ebp, %eax C apply 2mod3 high 170 171 addl %ebx, %eax C apply 2mod3 low 172 173 popl %ebp 174 popl %edi 175 176 popl %esi 177 popl %ebx 178 179 ret 180 181EPILOGUE() 182