1dnl AMD64 logops. 2 3dnl Copyright 2004-2017 Free Software Foundation, Inc. 4 5dnl This file is part of the GNU MP Library. 6dnl 7dnl The GNU MP Library is free software; you can redistribute it and/or modify 8dnl it under the terms of either: 9dnl 10dnl * the GNU Lesser General Public License as published by the Free 11dnl Software Foundation; either version 3 of the License, or (at your 12dnl option) any later version. 13dnl 14dnl or 15dnl 16dnl * the GNU General Public License as published by the Free Software 17dnl Foundation; either version 2 of the License, or (at your option) any 18dnl later version. 19dnl 20dnl or both in parallel, as here. 21dnl 22dnl The GNU MP Library is distributed in the hope that it will be useful, but 23dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25dnl for more details. 26dnl 27dnl You should have received copies of the GNU General Public License and the 28dnl GNU Lesser General Public License along with the GNU MP Library. If not, 29dnl see https://www.gnu.org/licenses/. 30 31include(`../config.m4') 32 33 34C c/l c/l c/l good 35C var-1 var-2 var-3 for cpu? 36C AMD K8,K9 1.5 1.5 1.5 y 37C AMD K10 1.5 1.5 1.5 y 38C AMD bd1 39C AMD bd2 40C AMD bd3 41C AMD bd4 42C AMD bt1 2.67 ~2.79 ~2.67 43C AMD bt2 2.0 2.28 2.28 y 44C AMD zen 1.5 1.5 1.5 = 45C Intel P4 2.8 3.35 3.6 46C Intel PNR 2.0 2.0 2.0 = 47C Intel NHM 2.0 2.0 2.0 = 48C Intel SBR 1.5 1.75 1.75 n 49C Intel IBR 1.48 1.71 1.72 n 50C Intel HWL 1.5 1.5 1.5 n 51C Intel BWL 1.5 1.5 1.5 n 52C Intel SKL 1.5 1.5 1.5 n 53C Intel atom 3.82 3.82 3.82 n 54C Intel SLM 3.0 3.0 3.0 = 55C VIA nano 3.25 56 57ifdef(`OPERATION_and_n',` 58 define(`func',`mpn_and_n') 59 define(`VARIANT_1') 60 define(`LOGOP',`and')') 61ifdef(`OPERATION_andn_n',` 62 define(`func',`mpn_andn_n') 63 define(`VARIANT_2') 64 define(`LOGOP',`and')') 65ifdef(`OPERATION_nand_n',` 66 define(`func',`mpn_nand_n') 67 define(`VARIANT_3') 68 define(`LOGOP',`and')') 69ifdef(`OPERATION_ior_n',` 70 define(`func',`mpn_ior_n') 71 define(`VARIANT_1') 72 define(`LOGOP',`or')') 73ifdef(`OPERATION_iorn_n',` 74 define(`func',`mpn_iorn_n') 75 define(`VARIANT_2') 76 define(`LOGOP',`or')') 77ifdef(`OPERATION_nior_n',` 78 define(`func',`mpn_nior_n') 79 define(`VARIANT_3') 80 define(`LOGOP',`or')') 81ifdef(`OPERATION_xor_n',` 82 define(`func',`mpn_xor_n') 83 define(`VARIANT_1') 84 define(`LOGOP',`xor')') 85ifdef(`OPERATION_xnor_n',` 86 define(`func',`mpn_xnor_n') 87 define(`VARIANT_2') 88 define(`LOGOP',`xor')') 89 90 91MULFUNC_PROLOGUE(mpn_and_n mpn_andn_n mpn_nand_n mpn_ior_n mpn_iorn_n mpn_nior_n mpn_xor_n mpn_xnor_n) 92 93C INPUT PARAMETERS 94define(`rp',`%rdi') 95define(`up',`%rsi') 96define(`vp',`%rdx') 97define(`n',`%rcx') 98 99ABI_SUPPORT(DOS64) 100ABI_SUPPORT(STD64) 101 102ASM_START() 103 104ifdef(`VARIANT_1',` 105 TEXT 106 ALIGN(32) 107PROLOGUE(func) 108 FUNC_ENTRY(4) 109 mov (vp), %r8 110 mov R32(%rcx), R32(%rax) 111 lea (vp,n,8), vp 112 lea (up,n,8), up 113 lea (rp,n,8), rp 114 neg n 115 and $3, R32(%rax) 116 je L(b00) 117 cmp $2, R32(%rax) 118 jc L(b01) 119 je L(b10) 120 121L(b11): LOGOP (up,n,8), %r8 122 mov %r8, (rp,n,8) 123 dec n 124 jmp L(e11) 125L(b10): add $-2, n 126 jmp L(e10) 127L(b01): LOGOP (up,n,8), %r8 128 mov %r8, (rp,n,8) 129 inc n 130 jz L(ret) 131 132L(top): mov (vp,n,8), %r8 133L(b00): mov 8(vp,n,8), %r9 134 LOGOP (up,n,8), %r8 135 LOGOP 8(up,n,8), %r9 136 nop C K8/K9/K10 concession 137 mov %r8, (rp,n,8) 138 mov %r9, 8(rp,n,8) 139L(e11): mov 16(vp,n,8), %r8 140L(e10): mov 24(vp,n,8), %r9 141 LOGOP 16(up,n,8), %r8 142 LOGOP 24(up,n,8), %r9 143 mov %r8, 16(rp,n,8) 144 mov %r9, 24(rp,n,8) 145 add $4, n 146 jnc L(top) 147 148L(ret): FUNC_EXIT() 149 ret 150EPILOGUE() 151') 152 153ifdef(`VARIANT_2',` 154 TEXT 155 ALIGN(32) 156PROLOGUE(func) 157 FUNC_ENTRY(4) 158 mov (vp), %r8 159 not %r8 160 mov R32(%rcx), R32(%rax) 161 lea (vp,n,8), vp 162 lea (up,n,8), up 163 lea (rp,n,8), rp 164 neg n 165 and $3, R32(%rax) 166 je L(b00) 167 cmp $2, R32(%rax) 168 jc L(b01) 169 je L(b10) 170 171L(b11): LOGOP (up,n,8), %r8 172 mov %r8, (rp,n,8) 173 dec n 174 jmp L(e11) 175L(b10): add $-2, n 176 jmp L(e10) 177 .byte 0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90 178L(b01): LOGOP (up,n,8), %r8 179 mov %r8, (rp,n,8) 180 inc n 181 jz L(ret) 182 183L(top): mov (vp,n,8), %r8 184 not %r8 185L(b00): mov 8(vp,n,8), %r9 186 not %r9 187 LOGOP (up,n,8), %r8 188 LOGOP 8(up,n,8), %r9 189 mov %r8, (rp,n,8) 190 mov %r9, 8(rp,n,8) 191L(e11): mov 16(vp,n,8), %r8 192 not %r8 193L(e10): mov 24(vp,n,8), %r9 194 not %r9 195 LOGOP 16(up,n,8), %r8 196 LOGOP 24(up,n,8), %r9 197 mov %r8, 16(rp,n,8) 198 mov %r9, 24(rp,n,8) 199 add $4, n 200 jnc L(top) 201 202L(ret): FUNC_EXIT() 203 ret 204EPILOGUE() 205') 206 207ifdef(`VARIANT_3',` 208 TEXT 209 ALIGN(32) 210PROLOGUE(func) 211 FUNC_ENTRY(4) 212 mov (vp), %r8 213 mov R32(%rcx), R32(%rax) 214 lea (vp,n,8), vp 215 lea (up,n,8), up 216 lea (rp,n,8), rp 217 neg n 218 and $3, R32(%rax) 219 je L(b00) 220 cmp $2, R32(%rax) 221 jc L(b01) 222 je L(b10) 223 224L(b11): LOGOP (up,n,8), %r8 225 not %r8 226 mov %r8, (rp,n,8) 227 dec n 228 jmp L(e11) 229L(b10): add $-2, n 230 jmp L(e10) 231 .byte 0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90 232L(b01): LOGOP (up,n,8), %r8 233 not %r8 234 mov %r8, (rp,n,8) 235 inc n 236 jz L(ret) 237 238L(top): mov (vp,n,8), %r8 239L(b00): mov 8(vp,n,8), %r9 240 LOGOP (up,n,8), %r8 241 not %r8 242 LOGOP 8(up,n,8), %r9 243 not %r9 244 mov %r8, (rp,n,8) 245 mov %r9, 8(rp,n,8) 246L(e11): mov 16(vp,n,8), %r8 247L(e10): mov 24(vp,n,8), %r9 248 LOGOP 16(up,n,8), %r8 249 not %r8 250 LOGOP 24(up,n,8), %r9 251 not %r9 252 mov %r8, 16(rp,n,8) 253 mov %r9, 24(rp,n,8) 254 add $4, n 255 jnc L(top) 256 257L(ret): FUNC_EXIT() 258 ret 259EPILOGUE() 260') 261