1dnl Generic x86 mpn_mod_34lsub1 -- mpn remainder modulo 2^24-1. 2 3dnl Copyright 2000-2002, 2004 Free Software Foundation, Inc. 4 5dnl This file is part of the GNU MP Library. 6dnl 7dnl The GNU MP Library is free software; you can redistribute it and/or modify 8dnl it under the terms of either: 9dnl 10dnl * the GNU Lesser General Public License as published by the Free 11dnl Software Foundation; either version 3 of the License, or (at your 12dnl option) any later version. 13dnl 14dnl or 15dnl 16dnl * the GNU General Public License as published by the Free Software 17dnl Foundation; either version 2 of the License, or (at your option) any 18dnl later version. 19dnl 20dnl or both in parallel, as here. 21dnl 22dnl The GNU MP Library is distributed in the hope that it will be useful, but 23dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25dnl for more details. 26dnl 27dnl You should have received copies of the GNU General Public License and the 28dnl GNU Lesser General Public License along with the GNU MP Library. If not, 29dnl see https://www.gnu.org/licenses/. 30 31include(`../config.m4') 32 33 34C cycles/limb 35C P5 3.0 36C P6 3.66 37C K6 3.0 38C K7 1.3 39C P4 9 40 41 42C mp_limb_t mpn_mod_34lsub1 (mp_srcptr src, mp_size_t size) 43C 44 45defframe(PARAM_SIZE, 8) 46defframe(PARAM_SRC, 4) 47 48dnl re-use parameter space 49define(SAVE_EBX, `PARAM_SRC') 50 51 TEXT 52 ALIGN(16) 53PROLOGUE(mpn_mod_34lsub1) 54deflit(`FRAME',0) 55 56 movl PARAM_SIZE, %ecx 57 movl PARAM_SRC, %edx 58 59 subl $2, %ecx 60 ja L(three_or_more) 61 62 movl (%edx), %eax 63 jb L(one) 64 65 movl 4(%edx), %ecx 66 movl %eax, %edx 67 shrl $24, %eax C src[0] low 68 69 andl $0xFFFFFF, %edx C src[0] high 70 addl %edx, %eax 71 movl %ecx, %edx 72 73 andl $0xFFFF, %ecx 74 shrl $16, %edx C src[1] high 75 addl %edx, %eax 76 77 shll $8, %ecx C src[1] low 78 addl %ecx, %eax 79 80L(one): 81 ret 82 83 84L(three_or_more): 85 C eax 86 C ebx 87 C ecx size-2 88 C edx src 89 C esi 90 C edi 91 C ebp 92 93 movl %ebx, SAVE_EBX C and arrange 16-byte loop alignment 94 xorl %ebx, %ebx 95 96 pushl %esi FRAME_pushl() 97 xorl %esi, %esi 98 99 pushl %edi FRAME_pushl() 100 xorl %eax, %eax C and clear carry flag 101 102 103 C offset 0x40 here 104L(top): 105 C eax acc 0mod3 106 C ebx acc 1mod3 107 C ecx counter, limbs 108 C edx src 109 C esi acc 2mod3 110 C edi 111 C ebp 112 113 leal 12(%edx), %edx 114 leal -2(%ecx), %ecx 115 116 adcl -12(%edx), %eax 117 adcl -8(%edx), %ebx 118 adcl -4(%edx), %esi 119 120 decl %ecx 121 jg L(top) 122 123 124 C ecx is -2, -1 or 0 representing 0, 1 or 2 more limbs, respectively 125 126 movl $0xFFFFFFFF, %edi 127 incl %ecx 128 js L(combine) 129 130 adcl (%edx), %eax 131 movl $0xFFFFFF00, %edi 132 decl %ecx 133 js L(combine) 134 135 adcl 4(%edx), %ebx 136 movl $0xFFFF0000, %edi 137 138 139L(combine): 140 C eax acc 0mod3 141 C ebx acc 1mod3 142 C ecx 143 C edx 144 C esi acc 2mod3 145 C edi mask 146 C ebp 147 148 sbbl %ecx, %ecx C carry 149 movl %eax, %edx C 0mod3 150 151 shrl $24, %eax C 0mod3 high 152 andl %edi, %ecx C carry masked 153 154 subl %ecx, %eax C apply carry 155 movl %ebx, %edi C 1mod3 156 157 shrl $16, %ebx C 1mod3 high 158 andl $0x00FFFFFF, %edx C 0mod3 low 159 160 addl %edx, %eax C apply 0mod3 low 161 andl $0xFFFF, %edi 162 163 shll $8, %edi C 1mod3 low 164 addl %ebx, %eax C apply 1mod3 high 165 166 addl %edi, %eax C apply 1mod3 low 167 movl %esi, %edx C 2mod3 168 169 shrl $8, %esi C 2mod3 high 170 andl $0xFF, %edx C 2mod3 low 171 172 shll $16, %edx C 2mod3 low 173 addl %esi, %eax C apply 2mod3 high 174 175 addl %edx, %eax C apply 2mod3 low 176 popl %edi FRAME_popl() 177 178 movl SAVE_EBX, %ebx 179 popl %esi FRAME_popl() 180 181 ret 182 183EPILOGUE() 184