com.asm revision 1.1.1.1
1dnl Intel Pentium mpn_com -- mpn ones complement. 2 3dnl Copyright 1996, 2001, 2002, 2006 Free Software Foundation, Inc. 4dnl 5dnl This file is part of the GNU MP Library. 6dnl 7dnl The GNU MP Library is free software; you can redistribute it and/or 8dnl modify it under the terms of the GNU Lesser General Public License as 9dnl published by the Free Software Foundation; either version 3 of the 10dnl License, or (at your option) any later version. 11dnl 12dnl The GNU MP Library is distributed in the hope that it will be useful, 13dnl but WITHOUT ANY WARRANTY; without even the implied warranty of 14dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15dnl Lesser General Public License for more details. 16dnl 17dnl You should have received a copy of the GNU Lesser General Public License 18dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. 19 20include(`../config.m4') 21 22 23C P5: 1.75 cycles/limb 24 25 26NAILS_SUPPORT(0-31) 27 28 29C void mpn_com (mp_ptr dst, mp_srcptr src, mp_size_t size); 30C 31C This code is similar to mpn_copyi, basically there's just some "xorl 32C $GMP_NUMB_MASK"s inserted. 33C 34C Alternatives: 35C 36C On P55 some MMX code could be 1.25 c/l (8 limb unrolled) if src and dst 37C are the same alignment mod 8, but it doesn't seem worth the trouble for 38C just that case (there'd need to be some plain integer available too for 39C the unaligned case). 40 41defframe(PARAM_SIZE,12) 42defframe(PARAM_SRC, 8) 43defframe(PARAM_DST, 4) 44 45 TEXT 46 ALIGN(8) 47PROLOGUE(mpn_com) 48deflit(`FRAME',0) 49 50 movl PARAM_SRC, %eax 51 movl PARAM_SIZE, %ecx 52 53 pushl %esi FRAME_pushl() 54 pushl %edi FRAME_pushl() 55 56 leal (%eax,%ecx,4), %eax 57 xorl $-1, %ecx C -size-1 58 59 movl PARAM_DST, %edx 60 addl $8, %ecx C -size+7 61 62 jns L(end) 63 64 movl (%edx), %esi C fetch destination cache line 65 nop 66 67L(top): 68 C eax &src[size] 69 C ebx 70 C ecx counter, limbs, negative 71 C edx dst, incrementing 72 C esi scratch 73 C edi scratch 74 C ebp 75 76 movl 28(%edx), %esi C destination prefetch 77 addl $32, %edx 78 79 movl -28(%eax,%ecx,4), %esi 80 movl -24(%eax,%ecx,4), %edi 81 xorl $GMP_NUMB_MASK, %esi 82 xorl $GMP_NUMB_MASK, %edi 83 movl %esi, -32(%edx) 84 movl %edi, -28(%edx) 85 86 movl -20(%eax,%ecx,4), %esi 87 movl -16(%eax,%ecx,4), %edi 88 xorl $GMP_NUMB_MASK, %esi 89 xorl $GMP_NUMB_MASK, %edi 90 movl %esi, -24(%edx) 91 movl %edi, -20(%edx) 92 93 movl -12(%eax,%ecx,4), %esi 94 movl -8(%eax,%ecx,4), %edi 95 xorl $GMP_NUMB_MASK, %esi 96 xorl $GMP_NUMB_MASK, %edi 97 movl %esi, -16(%edx) 98 movl %edi, -12(%edx) 99 100 movl -4(%eax,%ecx,4), %esi 101 movl (%eax,%ecx,4), %edi 102 xorl $GMP_NUMB_MASK, %esi 103 xorl $GMP_NUMB_MASK, %edi 104 movl %esi, -8(%edx) 105 movl %edi, -4(%edx) 106 107 addl $8, %ecx 108 js L(top) 109 110 111L(end): 112 C eax &src[size] 113 C ecx 0 to 7, representing respectively 7 to 0 limbs remaining 114 C edx dst, next location to store 115 116 subl $4, %ecx 117 nop 118 119 jns L(no4) 120 121 movl -12(%eax,%ecx,4), %esi 122 movl -8(%eax,%ecx,4), %edi 123 xorl $GMP_NUMB_MASK, %esi 124 xorl $GMP_NUMB_MASK, %edi 125 movl %esi, (%edx) 126 movl %edi, 4(%edx) 127 128 movl -4(%eax,%ecx,4), %esi 129 movl (%eax,%ecx,4), %edi 130 xorl $GMP_NUMB_MASK, %esi 131 xorl $GMP_NUMB_MASK, %edi 132 movl %esi, 8(%edx) 133 movl %edi, 12(%edx) 134 135 addl $16, %edx 136 addl $4, %ecx 137L(no4): 138 139 subl $2, %ecx 140 nop 141 142 jns L(no2) 143 144 movl -4(%eax,%ecx,4), %esi 145 movl (%eax,%ecx,4), %edi 146 xorl $GMP_NUMB_MASK, %esi 147 xorl $GMP_NUMB_MASK, %edi 148 movl %esi, (%edx) 149 movl %edi, 4(%edx) 150 151 addl $8, %edx 152 addl $2, %ecx 153L(no2): 154 155 popl %edi 156 jnz L(done) 157 158 movl -4(%eax), %ecx 159 160 xorl $GMP_NUMB_MASK, %ecx 161 popl %esi 162 163 movl %ecx, (%edx) 164 ret 165 166L(done): 167 popl %esi 168 ret 169 170EPILOGUE() 171