1284778Sdelphijdnl x86-64 mpn_divrem_2 -- Divide an mpn number by a normalized 2-limb number. 2186690Sobrien 3186690Sobriendnl Copyright 2007, 2008, 2010, 2014 Free Software Foundation, Inc. 4284778Sdelphij 5268515Sdelphijdnl This file is part of the GNU MP Library. 6186690Sobriendnl 7186690Sobriendnl The GNU MP Library is free software; you can redistribute it and/or modify 8186690Sobriendnl it under the terms of either: 9186690Sobriendnl 10186690Sobriendnl * the GNU Lesser General Public License as published by the Free 11186690Sobriendnl Software Foundation; either version 3 of the License, or (at your 12186690Sobriendnl option) any later version. 13186690Sobriendnl 14186690Sobriendnl or 15186690Sobriendnl 16186690Sobriendnl * the GNU General Public License as published by the Free Software 17284778Sdelphijdnl Foundation; either version 2 of the License, or (at your option) any 18284778Sdelphijdnl later version. 19284778Sdelphijdnl 20284778Sdelphijdnl or both in parallel, as here. 21284778Sdelphijdnl 22284778Sdelphijdnl The GNU MP Library is distributed in the hope that it will be useful, but 23284778Sdelphijdnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24284778Sdelphijdnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25284778Sdelphijdnl for more details. 26284778Sdelphijdnl 27284778Sdelphijdnl You should have received copies of the GNU General Public License and the 28268515Sdelphijdnl GNU Lesser General Public License along with the GNU MP Library. If not, 29268515Sdelphijdnl see https://www.gnu.org/licenses/. 30268515Sdelphij 31268515Sdelphijinclude(`../config.m4') 32268515Sdelphij 33268515Sdelphij 34268515SdelphijC cycles/limb best 35268515SdelphijC AMD K8,K9 18 36268515SdelphijC AMD K10 18 37268515SdelphijC AMD bull 38268515SdelphijC AMD pile 39268515SdelphijC AMD bobcat 40268515SdelphijC AMD jaguar 41268515SdelphijC Intel P4 68 42268515SdelphijC Intel core 34 43268515SdelphijC Intel NHM 30.25 44268515SdelphijC Intel SBR 21.3 45268515SdelphijC Intel IBR 21.4 46268515SdelphijC Intel HWL 20.6 47268515SdelphijC Intel BWL 48268515SdelphijC Intel atom 73 49268515SdelphijC VIA nano 33 50268515Sdelphij 51268515Sdelphij 52268515SdelphijC INPUT PARAMETERS 53268515Sdelphijdefine(`qp', `%rdi') 54268515Sdelphijdefine(`fn', `%rsi') 55268515Sdelphijdefine(`up_param', `%rdx') 56268515Sdelphijdefine(`un_param', `%rcx') 57268515Sdelphijdefine(`dp', `%r8') 58268515Sdelphij 59268515SdelphijABI_SUPPORT(DOS64) 60268515SdelphijABI_SUPPORT(STD64) 61268515Sdelphij 62268515SdelphijASM_START() 63268515Sdelphij TEXT 64268515Sdelphij ALIGN(16) 65268515SdelphijPROLOGUE(mpn_divrem_2) 66268515Sdelphij FUNC_ENTRY(4) 67268515SdelphijIFDOS(` mov 56(%rsp), %r8 ') 68268515Sdelphij push %r15 69268515Sdelphij push %r14 70268515Sdelphij push %r13 71268515Sdelphij push %r12 72234449Sobrien lea -24(%rdx,%rcx,8), %r12 C r12 = &up[un-1] 73186690Sobrien mov %rsi, %r13 74234449Sobrien push %rbp 75186690Sobrien mov %rdi, %rbp 76186690Sobrien push %rbx 77186690Sobrien mov 8(%r8), %r11 C d1 78186690Sobrien mov 16(%r12), %rbx 79186690Sobrien mov (%r8), %r8 C d0 80186690Sobrien mov 8(%r12), %r10 81186690Sobrien 82186690Sobrien xor R32(%r15), R32(%r15) 83186690Sobrien cmp %rbx, %r11 84186690Sobrien ja L(2) 85186690Sobrien setb %dl 86186690Sobrien cmp %r10, %r8 87186690Sobrien setbe %al 88186690Sobrien orb %al, %dl C "orb" form to placate Sun tools 89186690Sobrien je L(2) 90186690Sobrien inc R32(%r15) 91186690Sobrien sub %r8, %r10 92234449Sobrien sbb %r11, %rbx 93234449SobrienL(2): 94234449Sobrien lea -3(%rcx,%r13), %r14 C un + fn - 3 95234449Sobrien test %r14, %r14 96186690Sobrien js L(end) 97186690Sobrien 98284778Sdelphij push %r8 99186690Sobrien push %r10 100186690Sobrien push %r11 101186690SobrienIFSTD(` mov %r11, %rdi ') 102234449SobrienIFDOS(` mov %r11, %rcx ') 103186690SobrienIFDOS(` sub $32, %rsp ') 104186690Sobrien ASSERT(nz, `test $15, %rsp') 105186690Sobrien CALL( mpn_invert_limb) 106268515SdelphijIFDOS(` add $32, %rsp ') 107268515Sdelphij pop %r11 108234449Sobrien pop %r10 109268515Sdelphij pop %r8 110268515Sdelphij 111268515Sdelphij mov %r11, %rdx 112268515Sdelphij mov %rax, %rdi 113268515Sdelphij imul %rax, %rdx 114268515Sdelphij mov %rdx, %r9 115268515Sdelphij mul %r8 116268515Sdelphij xor R32(%rcx), R32(%rcx) 117268515Sdelphij add %r8, %r9 118268515Sdelphij adc $-1, %rcx 119268515Sdelphij add %rdx, %r9 120268515Sdelphij adc $0, %rcx 121268515Sdelphij js 2f 122234449Sobrien1: dec %rdi 123186690Sobrien sub %r11, %r9 124186690Sobrien sbb $0, %rcx 125234449Sobrien jns 1b 126186690Sobrien2: 127186690Sobrien 128234449Sobrien lea (%rbp,%r14,8), %rbp 129234449Sobrien mov %r11, %rsi 130234449Sobrien neg %rsi C -d1 131234449Sobrien 132268515SdelphijC rax rbx rcx rdx rsi rdi rbp r8 r9 r10 r11 r12 r13 r14 r15 133268515SdelphijC n2 un -d1 dinv qp d0 q0 d1 up fn msl 134268515Sdelphij 135268515Sdelphij ALIGN(16) 136186690SobrienL(top): mov %rdi, %rax C di ncp 137234449Sobrien mul %rbx C 0, 17 138234449Sobrien mov %r10, %rcx C 139234449Sobrien add %rax, %rcx C 4 140268515Sdelphij adc %rbx, %rdx C 5 141268515Sdelphij mov %rdx, %r9 C q 6 142268515Sdelphij imul %rsi, %rdx C 6 143268515Sdelphij mov %r8, %rax C ncp 144186690Sobrien lea (%rdx, %r10), %rbx C n1 -= ... 10 145186690Sobrien xor R32(%r10), R32(%r10) C 146268515Sdelphij mul %r9 C 7 147268515Sdelphij cmp %r14, %r13 C 148268515Sdelphij jg L(19) C 149268515Sdelphij mov (%r12), %r10 C 150268515Sdelphij sub $8, %r12 C 151268515SdelphijL(19): sub %r8, %r10 C ncp 152268515Sdelphij sbb %r11, %rbx C 11 153268515Sdelphij sub %rax, %r10 C 11 154268515Sdelphij sbb %rdx, %rbx C 12 155268515Sdelphij xor R32(%rax), R32(%rax) C 156268515Sdelphij xor R32(%rdx), R32(%rdx) C 157268515Sdelphij cmp %rcx, %rbx C 13 158268515Sdelphij cmovnc %r8, %rax C 14 159268515Sdelphij cmovnc %r11, %rdx C 14 160268515Sdelphij adc $0, %r9 C adjust q 14 161268515Sdelphij nop 162268515Sdelphij add %rax, %r10 C 15 163268515Sdelphij adc %rdx, %rbx C 16 164268515Sdelphij cmp %r11, %rbx C 165268515Sdelphij jae L(fix) C 166268515SdelphijL(bck): mov %r9, (%rbp) C 167268515Sdelphij sub $8, %rbp C 168186690Sobrien dec %r14 169186690Sobrien jns L(top) 170284778Sdelphij 171186690SobrienL(end): mov %r10, 8(%r12) 172192350Sdelphij mov %rbx, 16(%r12) 173186690Sobrien pop %rbx 174186690Sobrien pop %rbp 175234449Sobrien pop %r12 176186690Sobrien pop %r13 177186690Sobrien pop %r14 178186690Sobrien mov %r15, %rax 179186690Sobrien pop %r15 180186690Sobrien FUNC_EXIT() 181186690Sobrien ret 182186690Sobrien 183186690SobrienL(fix): seta %dl 184268515Sdelphij cmp %r8, %r10 185186690Sobrien setae %al 186186690Sobrien orb %dl, %al C "orb" form to placate Sun tools 187186690Sobrien je L(bck) 188186690Sobrien inc %r9 189186690Sobrien sub %r8, %r10 190268515Sdelphij sbb %r11, %rbx 191192350Sdelphij jmp L(bck) 192234449SobrienEPILOGUE() 193186690Sobrien