1dnl AMD64 mpn_mul_1 optimised for Intel Atom. 2 3dnl Copyright 2003-2005, 2007, 2008, 2012, 2013 Free Software Foundation, Inc. 4 5dnl This file is part of the GNU MP Library. 6dnl 7dnl The GNU MP Library is free software; you can redistribute it and/or modify 8dnl it under the terms of either: 9dnl 10dnl * the GNU Lesser General Public License as published by the Free 11dnl Software Foundation; either version 3 of the License, or (at your 12dnl option) any later version. 13dnl 14dnl or 15dnl 16dnl * the GNU General Public License as published by the Free Software 17dnl Foundation; either version 2 of the License, or (at your option) any 18dnl later version. 19dnl 20dnl or both in parallel, as here. 21dnl 22dnl The GNU MP Library is distributed in the hope that it will be useful, but 23dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25dnl for more details. 26dnl 27dnl You should have received copies of the GNU General Public License and the 28dnl GNU Lesser General Public License along with the GNU MP Library. If not, 29dnl see https://www.gnu.org/licenses/. 30 31include(`../config.m4') 32 33C cycles/limb 34C AMD K8,K9 3.03 35C AMD K10 3.03 36C AMD bull 4.74 37C AMD pile 4.56 38C AMD steam 39C AMD excavator 40C AMD bobcat 5.56 6.04 41C AMD jaguar 5.55 5.84 42C Intel P4 13.05 43C Intel core2 4.03 44C Intel NHM 3.80 45C Intel SBR 2.75 46C Intel IBR 2.69 47C Intel HWL 2.50 48C Intel BWL 2.55 49C Intel SKL 2.57 50C Intel atom 17.3 51C Intel SLM 14.7 52C VIA nano 53 54C The loop of this code is the result of running a code generation and 55C optimisation tool suite written by David Harvey and Torbjorn Granlund. 56 57define(`rp', `%rdi') C rcx 58define(`up', `%rsi') C rdx 59define(`n_param', `%rdx') C r8 60define(`v0', `%rcx') C r9 61 62define(`n', `%r11') 63 64ABI_SUPPORT(DOS64) 65ABI_SUPPORT(STD64) 66 67ASM_START() 68 TEXT 69 ALIGN(16) 70PROLOGUE(mpn_mul_1) 71 FUNC_ENTRY(4) 72 xor %r8, %r8 73L(com): mov (up), %rax 74 lea -16(up,n_param,8), up 75 lea -8(rp,n_param,8), rp 76 test $1, R8(n_param) 77 jnz L(bx1) 78 79L(bx0): mov %r8, %r9 80 test $2, R8(n_param) 81 jnz L(b10) 82 83L(b00): mov $2, R32(n) 84 sub n_param, n 85 jmp L(lo0) 86 87L(bx1): test $2, R8(n_param) 88 jnz L(b11) 89 90L(b01): mov $3, R32(n) 91 sub n_param, n 92 mul v0 93 cmp $2, n 94 jnz L(lo1) 95 jmp L(cj1) 96 97L(b11): mov $1, R32(n) 98 sub n_param, n 99 jmp L(lo3) 100 101L(b10): xor R32(n), R32(n) 102 sub n_param, n 103 jmp L(lo2) 104 105L(top): mul v0 106 mov %r9, -24(rp,n,8) 107L(lo1): xor %r9d, %r9d 108 add %rax, %r8 109 mov (up,n,8), %rax 110 adc %rdx, %r9 111 mov %r8, -16(rp,n,8) 112L(lo0): xor %r8d, %r8d 113 mul v0 114 add %rax, %r9 115 mov 8(up,n,8), %rax 116 adc %rdx, %r8 117 mov %r9, -8(rp,n,8) 118L(lo3): xor %r9d, %r9d 119 mul v0 120 add %rax, %r8 121 mov 16(up,n,8), %rax 122 adc %rdx, %r9 123 mov %r8, (rp,n,8) 124L(lo2): xor %r8d, %r8d 125 mul v0 126 add %rax, %r9 127 mov 24(up,n,8), %rax 128 adc %rdx, %r8 129 add $4, n 130 js L(top) 131 132L(end): mul v0 133 mov %r9, -8(rp) 134L(cj1): add %rax, %r8 135 mov $0, R32(%rax) 136 adc %rdx, %rax 137 mov %r8, (rp) 138 FUNC_EXIT() 139 ret 140EPILOGUE() 141 142PROLOGUE(mpn_mul_1c) 143 FUNC_ENTRY(4) 144IFDOS(` mov 56(%rsp), %r8 ') 145 jmp L(com) 146EPILOGUE() 147ASM_END() 148