1dnl AMD64 logops. 2 3dnl Copyright 2004-2017 Free Software Foundation, Inc. 4 5dnl This file is part of the GNU MP Library. 6dnl 7dnl The GNU MP Library is free software; you can redistribute it and/or modify 8dnl it under the terms of either: 9dnl 10dnl * the GNU Lesser General Public License as published by the Free 11dnl Software Foundation; either version 3 of the License, or (at your 12dnl option) any later version. 13dnl 14dnl or 15dnl 16dnl * the GNU General Public License as published by the Free Software 17dnl Foundation; either version 2 of the License, or (at your option) any 18dnl later version. 19dnl 20dnl or both in parallel, as here. 21dnl 22dnl The GNU MP Library is distributed in the hope that it will be useful, but 23dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25dnl for more details. 26dnl 27dnl You should have received copies of the GNU General Public License and the 28dnl GNU Lesser General Public License along with the GNU MP Library. If not, 29dnl see https://www.gnu.org/licenses/. 30 31include(`../config.m4') 32 33 34C c/l c/l c/l good 35C var-1 var-2 var-3 for cpu? 36C AMD K8,K9 37C AMD K10 1.52 1.75 1.75 n 38C AMD bd1 39C AMD bd2 40C AMD bd3 41C AMD bd4 42C AMD bt1 2.67 ~2.79 ~2.79 = 43C AMD bt2 2.15 2.65 2.65 n 44C AMD zen 1.5 1.5 1.5 = 45C Intel P4 46C Intel PNR 2.0 2.0 2.0 = 47C Intel NHM 2.0 2.0 2.0 = 48C Intel SBR 1.5 1.5 1.5 y 49C Intel IBR 1.47 1.48 1.48 y 50C Intel HWL 1.11 1.35 1.35 y 51C Intel BWL 1.09 1.30 1.30 y 52C Intel SKL 1.21 1.27 1.27 y 53C Intel atom 3.31 3.57 3.57 y 54C Intel SLM 3.0 3.0 3.0 = 55C VIA nano 56 57ifdef(`OPERATION_and_n',` 58 define(`func',`mpn_and_n') 59 define(`VARIANT_1') 60 define(`LOGOP',`and')') 61ifdef(`OPERATION_andn_n',` 62 define(`func',`mpn_andn_n') 63 define(`VARIANT_2') 64 define(`LOGOP',`and')') 65ifdef(`OPERATION_nand_n',` 66 define(`func',`mpn_nand_n') 67 define(`VARIANT_3') 68 define(`LOGOP',`and')') 69ifdef(`OPERATION_ior_n',` 70 define(`func',`mpn_ior_n') 71 define(`VARIANT_1') 72 define(`LOGOP',`or')') 73ifdef(`OPERATION_iorn_n',` 74 define(`func',`mpn_iorn_n') 75 define(`VARIANT_2') 76 define(`LOGOP',`or')') 77ifdef(`OPERATION_nior_n',` 78 define(`func',`mpn_nior_n') 79 define(`VARIANT_3') 80 define(`LOGOP',`or')') 81ifdef(`OPERATION_xor_n',` 82 define(`func',`mpn_xor_n') 83 define(`VARIANT_1') 84 define(`LOGOP',`xor')') 85ifdef(`OPERATION_xnor_n',` 86 define(`func',`mpn_xnor_n') 87 define(`VARIANT_2') 88 define(`LOGOP',`xor')') 89 90define(`addptr', `lea $1($2), $2') 91 92MULFUNC_PROLOGUE(mpn_and_n mpn_andn_n mpn_nand_n mpn_ior_n mpn_iorn_n mpn_nior_n mpn_xor_n mpn_xnor_n) 93 94C INPUT PARAMETERS 95define(`rp',`%rdi') 96define(`up',`%rsi') 97define(`vp',`%rdx') 98define(`n',`%rcx') 99 100ABI_SUPPORT(DOS64) 101ABI_SUPPORT(STD64) 102 103ASM_START() 104 105ifdef(`VARIANT_1',` 106 TEXT 107 ALIGN(32) 108PROLOGUE(func) 109 FUNC_ENTRY(4) 110 mov (vp), %r8 111 mov R32(%rcx), R32(%rax) 112 and $3, R32(%rax) 113 je L(b00) 114 cmp $2, R32(%rax) 115 jc L(b01) 116 je L(b10) 117 118L(b11): LOGOP (up), %r8 119 mov %r8, (rp) 120 inc n 121 addptr( -8, up) 122 addptr( -8, vp) 123 addptr( -8, rp) 124 jmp L(e11) 125L(b10): add $2, n 126 addptr( -16, up) 127 addptr( -16, vp) 128 addptr( -16, rp) 129 jmp L(e10) 130L(b01): LOGOP (up), %r8 131 mov %r8, (rp) 132 dec n 133 jz L(ret) 134 addptr( 8, up) 135 addptr( 8, vp) 136 addptr( 8, rp) 137 138 ALIGN(16) 139L(top): mov (vp), %r8 140L(b00): mov 8(vp), %r9 141 LOGOP (up), %r8 142 LOGOP 8(up), %r9 143 mov %r8, (rp) 144 mov %r9, 8(rp) 145L(e11): mov 16(vp), %r8 146L(e10): mov 24(vp), %r9 147 addptr( 32, vp) 148 LOGOP 16(up), %r8 149 LOGOP 24(up), %r9 150 addptr( 32, up) 151 mov %r8, 16(rp) 152 mov %r9, 24(rp) 153 addptr( 32, rp) 154 sub $4, n 155 jnz L(top) 156 157L(ret): FUNC_EXIT() 158 ret 159EPILOGUE() 160') 161 162ifdef(`VARIANT_2',` 163 TEXT 164 ALIGN(32) 165PROLOGUE(func) 166 FUNC_ENTRY(4) 167 mov (vp), %r8 168 not %r8 169 mov R32(%rcx), R32(%rax) 170 and $3, R32(%rax) 171 je L(b00) 172 cmp $2, R32(%rax) 173 jc L(b01) 174 je L(b10) 175 176L(b11): LOGOP (up), %r8 177 mov %r8, (rp) 178 inc n 179 addptr( -8, up) 180 addptr( -8, vp) 181 addptr( -8, rp) 182 jmp L(e11) 183L(b10): add $2, n 184 addptr( -16, up) 185 addptr( -16, vp) 186 addptr( -16, rp) 187 jmp L(e10) 188L(b01): LOGOP (up), %r8 189 mov %r8, (rp) 190 dec n 191 jz L(ret) 192 addptr( 8, up) 193 addptr( 8, vp) 194 addptr( 8, rp) 195 196 ALIGN(16) 197L(top): mov (vp), %r8 198 not %r8 199L(b00): mov 8(vp), %r9 200 not %r9 201 LOGOP (up), %r8 202 LOGOP 8(up), %r9 203 mov %r8, (rp) 204 mov %r9, 8(rp) 205L(e11): mov 16(vp), %r8 206 not %r8 207L(e10): mov 24(vp), %r9 208 not %r9 209 addptr( 32, vp) 210 LOGOP 16(up), %r8 211 LOGOP 24(up), %r9 212 addptr( 32, up) 213 mov %r8, 16(rp) 214 mov %r9, 24(rp) 215 addptr( 32, rp) 216 sub $4, n 217 jnz L(top) 218 219L(ret): FUNC_EXIT() 220 ret 221EPILOGUE() 222') 223 224ifdef(`VARIANT_3',` 225 TEXT 226 ALIGN(32) 227PROLOGUE(func) 228 FUNC_ENTRY(4) 229 mov (vp), %r8 230 mov R32(%rcx), R32(%rax) 231 and $3, R32(%rax) 232 je L(b00) 233 cmp $2, R32(%rax) 234 jc L(b01) 235 je L(b10) 236 237L(b11): LOGOP (up), %r8 238 not %r8 239 mov %r8, (rp) 240 inc n 241 addptr( -8, up) 242 addptr( -8, vp) 243 addptr( -8, rp) 244 jmp L(e11) 245L(b10): add $2, n 246 addptr( -16, up) 247 addptr( -16, vp) 248 addptr( -16, rp) 249 jmp L(e10) 250L(b01): LOGOP (up), %r8 251 not %r8 252 mov %r8, (rp) 253 dec n 254 jz L(ret) 255 addptr( 8, up) 256 addptr( 8, vp) 257 addptr( 8, rp) 258 259 ALIGN(16) 260L(top): mov (vp), %r8 261L(b00): mov 8(vp), %r9 262 LOGOP (up), %r8 263 not %r8 264 LOGOP 8(up), %r9 265 not %r9 266 mov %r8, (rp) 267 mov %r9, 8(rp) 268L(e11): mov 16(vp), %r8 269L(e10): mov 24(vp), %r9 270 addptr( 32, vp) 271 LOGOP 16(up), %r8 272 not %r8 273 LOGOP 24(up), %r9 274 addptr( 32, up) 275 not %r9 276 mov %r8, 16(rp) 277 mov %r9, 24(rp) 278 addptr( 32, rp) 279 sub $4, n 280 jnz L(top) 281 282L(ret): FUNC_EXIT() 283 ret 284EPILOGUE() 285') 286