1dnl AMD64 logops. 2 3dnl Copyright 2004, 2005, 2006 Free Software Foundation, Inc. 4 5dnl This file is part of the GNU MP Library. 6 7dnl The GNU MP Library is free software; you can redistribute it and/or modify 8dnl it under the terms of the GNU Lesser General Public License as published 9dnl by the Free Software Foundation; either version 3 of the License, or (at 10dnl your option) any later version. 11 12dnl The GNU MP Library is distributed in the hope that it will be useful, but 13dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 14dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 15dnl License for more details. 16 17dnl You should have received a copy of the GNU Lesser General Public License 18dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. 19 20include(`../config.m4') 21 22 23C cycles/limb 24C K8,K9: 1.5 25C K10: 1.75-2 (fluctuating) 26C P4: 2.8/3.35/3.60 (variant1/variant2/variant3) 27C P6-15: 2.0 28 29ifdef(`OPERATION_and_n',` 30 define(`func',`mpn_and_n') 31 define(`VARIANT_1') 32 define(`LOGOP',`andq')') 33ifdef(`OPERATION_andn_n',` 34 define(`func',`mpn_andn_n') 35 define(`VARIANT_2') 36 define(`LOGOP',`andq')') 37ifdef(`OPERATION_nand_n',` 38 define(`func',`mpn_nand_n') 39 define(`VARIANT_3') 40 define(`LOGOP',`andq')') 41ifdef(`OPERATION_ior_n',` 42 define(`func',`mpn_ior_n') 43 define(`VARIANT_1') 44 define(`LOGOP',`orq')') 45ifdef(`OPERATION_iorn_n',` 46 define(`func',`mpn_iorn_n') 47 define(`VARIANT_2') 48 define(`LOGOP',`orq')') 49ifdef(`OPERATION_nior_n',` 50 define(`func',`mpn_nior_n') 51 define(`VARIANT_3') 52 define(`LOGOP',`orq')') 53ifdef(`OPERATION_xor_n',` 54 define(`func',`mpn_xor_n') 55 define(`VARIANT_1') 56 define(`LOGOP',`xorq')') 57ifdef(`OPERATION_xnor_n',` 58 define(`func',`mpn_xnor_n') 59 define(`VARIANT_2') 60 define(`LOGOP',`xorq')') 61 62 63MULFUNC_PROLOGUE(mpn_and_n mpn_andn_n mpn_nand_n mpn_ior_n mpn_iorn_n mpn_nior_n mpn_xor_n mpn_xnor_n) 64 65C INPUT PARAMETERS 66define(`rp',`%rdi') 67define(`up',`%rsi') 68define(`vp',`%rdx') 69define(`n',`%rcx') 70 71 72ASM_START() 73 74ifdef(`VARIANT_1',` 75 TEXT 76 ALIGN(32) 77PROLOGUE(func) 78 movq (vp), %r8 79 movl %ecx, %eax 80 leaq (vp,n,8), vp 81 leaq (up,n,8), up 82 leaq (rp,n,8), rp 83 negq n 84 andl $3, %eax 85 je L(b00) 86 cmpl $2, %eax 87 jc L(b01) 88 je L(b10) 89 90L(b11): LOGOP (up,n,8), %r8 91 movq %r8, (rp,n,8) 92 decq n 93 jmp L(e11) 94L(b10): addq $-2, n 95 jmp L(e10) 96L(b01): LOGOP (up,n,8), %r8 97 movq %r8, (rp,n,8) 98 incq n 99 jz L(ret) 100 101L(oop): movq (vp,n,8), %r8 102L(b00): movq 8(vp,n,8), %r9 103 LOGOP (up,n,8), %r8 104 LOGOP 8(up,n,8), %r9 105 nop 106 movq %r8, (rp,n,8) 107 movq %r9, 8(rp,n,8) 108L(e11): movq 16(vp,n,8), %r8 109L(e10): movq 24(vp,n,8), %r9 110 LOGOP 16(up,n,8), %r8 111 LOGOP 24(up,n,8), %r9 112 movq %r8, 16(rp,n,8) 113 movq %r9, 24(rp,n,8) 114 addq $4, n 115 jnc L(oop) 116L(ret): ret 117EPILOGUE() 118') 119 120ifdef(`VARIANT_2',` 121 TEXT 122 ALIGN(32) 123PROLOGUE(func) 124 movq (vp), %r8 125 notq %r8 126 movl %ecx, %eax 127 leaq (vp,n,8), vp 128 leaq (up,n,8), up 129 leaq (rp,n,8), rp 130 negq n 131 andl $3, %eax 132 je L(b00) 133 cmpl $2, %eax 134 jc L(b01) 135 je L(b10) 136 137L(b11): LOGOP (up,n,8), %r8 138 movq %r8, (rp,n,8) 139 decq n 140 jmp L(e11) 141L(b10): addq $-2, n 142 jmp L(e10) 143 .byte 0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90 144L(b01): LOGOP (up,n,8), %r8 145 movq %r8, (rp,n,8) 146 incq n 147 jz L(ret) 148 149L(oop): movq (vp,n,8), %r8 150 notq %r8 151L(b00): movq 8(vp,n,8), %r9 152 notq %r9 153 LOGOP (up,n,8), %r8 154 LOGOP 8(up,n,8), %r9 155 movq %r8, (rp,n,8) 156 movq %r9, 8(rp,n,8) 157L(e11): movq 16(vp,n,8), %r8 158 notq %r8 159L(e10): movq 24(vp,n,8), %r9 160 notq %r9 161 LOGOP 16(up,n,8), %r8 162 LOGOP 24(up,n,8), %r9 163 movq %r8, 16(rp,n,8) 164 movq %r9, 24(rp,n,8) 165 addq $4, n 166 jnc L(oop) 167L(ret): ret 168EPILOGUE() 169') 170 171ifdef(`VARIANT_3',` 172 TEXT 173 ALIGN(32) 174PROLOGUE(func) 175 movq (vp), %r8 176 movl %ecx, %eax 177 leaq (vp,n,8), vp 178 leaq (up,n,8), up 179 leaq (rp,n,8), rp 180 negq n 181 andl $3, %eax 182 je L(b00) 183 cmpl $2, %eax 184 jc L(b01) 185 je L(b10) 186 187L(b11): LOGOP (up,n,8), %r8 188 notq %r8 189 movq %r8, (rp,n,8) 190 decq n 191 jmp L(e11) 192L(b10): addq $-2, n 193 jmp L(e10) 194 .byte 0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90 195L(b01): LOGOP (up,n,8), %r8 196 notq %r8 197 movq %r8, (rp,n,8) 198 incq n 199 jz L(ret) 200 201L(oop): movq (vp,n,8), %r8 202L(b00): movq 8(vp,n,8), %r9 203 LOGOP (up,n,8), %r8 204 notq %r8 205 LOGOP 8(up,n,8), %r9 206 notq %r9 207 movq %r8, (rp,n,8) 208 movq %r9, 8(rp,n,8) 209L(e11): movq 16(vp,n,8), %r8 210L(e10): movq 24(vp,n,8), %r9 211 LOGOP 16(up,n,8), %r8 212 notq %r8 213 LOGOP 24(up,n,8), %r9 214 notq %r9 215 movq %r8, 16(rp,n,8) 216 movq %r9, 24(rp,n,8) 217 addq $4, n 218 jnc L(oop) 219L(ret): ret 220EPILOGUE() 221') 222