mod_34lsub1.asm revision 1.1.1.2
1dnl Generic x86 mpn_mod_34lsub1 -- mpn remainder modulo 2^24-1. 2 3dnl Copyright 2000, 2001, 2002, 2004 Free Software Foundation, Inc. 4dnl 5dnl This file is part of the GNU MP Library. 6dnl 7dnl The GNU MP Library is free software; you can redistribute it and/or 8dnl modify it under the terms of the GNU Lesser General Public License as 9dnl published by the Free Software Foundation; either version 3 of the 10dnl License, or (at your option) any later version. 11dnl 12dnl The GNU MP Library is distributed in the hope that it will be useful, 13dnl but WITHOUT ANY WARRANTY; without even the implied warranty of 14dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15dnl Lesser General Public License for more details. 16dnl 17dnl You should have received a copy of the GNU Lesser General Public License 18dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. 19 20include(`../config.m4') 21 22 23C cycles/limb 24C P5 3.0 25C P6 3.66 26C K6 3.0 27C K7 1.3 28C P4 9 29 30 31C mp_limb_t mpn_mod_34lsub1 (mp_srcptr src, mp_size_t size) 32C 33 34defframe(PARAM_SIZE, 8) 35defframe(PARAM_SRC, 4) 36 37dnl re-use parameter space 38define(SAVE_EBX, `PARAM_SRC') 39 40 TEXT 41 ALIGN(16) 42PROLOGUE(mpn_mod_34lsub1) 43deflit(`FRAME',0) 44 45 movl PARAM_SIZE, %ecx 46 movl PARAM_SRC, %edx 47 48 subl $2, %ecx 49 ja L(three_or_more) 50 51 movl (%edx), %eax 52 jb L(one) 53 54 movl 4(%edx), %ecx 55 movl %eax, %edx 56 shrl $24, %eax C src[0] low 57 58 andl $0xFFFFFF, %edx C src[0] high 59 addl %edx, %eax 60 movl %ecx, %edx 61 62 andl $0xFFFF, %ecx 63 shrl $16, %edx C src[1] high 64 addl %edx, %eax 65 66 shll $8, %ecx C src[1] low 67 addl %ecx, %eax 68 69L(one): 70 ret 71 72 73L(three_or_more): 74 C eax 75 C ebx 76 C ecx size-2 77 C edx src 78 C esi 79 C edi 80 C ebp 81 82 movl %ebx, SAVE_EBX C and arrange 16-byte loop alignment 83 xorl %ebx, %ebx 84 85 pushl %esi FRAME_pushl() 86 xorl %esi, %esi 87 88 pushl %edi FRAME_pushl() 89 xorl %eax, %eax C and clear carry flag 90 91 92 C offset 0x40 here 93L(top): 94 C eax acc 0mod3 95 C ebx acc 1mod3 96 C ecx counter, limbs 97 C edx src 98 C esi acc 2mod3 99 C edi 100 C ebp 101 102 leal 12(%edx), %edx 103 leal -2(%ecx), %ecx 104 105 adcl -12(%edx), %eax 106 adcl -8(%edx), %ebx 107 adcl -4(%edx), %esi 108 109 decl %ecx 110 jg L(top) 111 112 113 C ecx is -2, -1 or 0 representing 0, 1 or 2 more limbs, respectively 114 115 movl $0xFFFFFFFF, %edi 116 incl %ecx 117 js L(combine) 118 119 adcl (%edx), %eax 120 movl $0xFFFFFF00, %edi 121 decl %ecx 122 js L(combine) 123 124 adcl 4(%edx), %ebx 125 movl $0xFFFF0000, %edi 126 127 128L(combine): 129 C eax acc 0mod3 130 C ebx acc 1mod3 131 C ecx 132 C edx 133 C esi acc 2mod3 134 C edi mask 135 C ebp 136 137 sbbl %ecx, %ecx C carry 138 movl %eax, %edx C 0mod3 139 140 shrl $24, %eax C 0mod3 high 141 andl %edi, %ecx C carry masked 142 143 subl %ecx, %eax C apply carry 144 movl %ebx, %edi C 1mod3 145 146 shrl $16, %ebx C 1mod3 high 147 andl $0x00FFFFFF, %edx C 0mod3 low 148 149 addl %edx, %eax C apply 0mod3 low 150 andl $0xFFFF, %edi 151 152 shll $8, %edi C 1mod3 low 153 addl %ebx, %eax C apply 1mod3 high 154 155 addl %edi, %eax C apply 1mod3 low 156 movl %esi, %edx C 2mod3 157 158 shrl $8, %esi C 2mod3 high 159 andl $0xFF, %edx C 2mod3 low 160 161 shll $16, %edx C 2mod3 low 162 addl %esi, %eax C apply 2mod3 high 163 164 addl %edx, %eax C apply 2mod3 low 165 popl %edi FRAME_popl() 166 167 movl SAVE_EBX, %ebx 168 popl %esi FRAME_popl() 169 170 ret 171 172EPILOGUE() 173