bignum_mul_4_8_alt.S revision 1.3
1// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2// 3// Permission to use, copy, modify, and/or distribute this software for any 4// purpose with or without fee is hereby granted, provided that the above 5// copyright notice and this permission notice appear in all copies. 6// 7// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 8// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 9// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 10// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 11// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 12// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 13// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 14 15// ---------------------------------------------------------------------------- 16// Multiply z := x * y 17// Inputs x[4], y[4]; output z[8] 18// 19// extern void bignum_mul_4_8_alt 20// (uint64_t z[static 8], uint64_t x[static 4], uint64_t y[static 4]); 21// 22// Standard x86-64 ABI: RDI = z, RSI = x, RDX = y 23// Microsoft x64 ABI: RCX = z, RDX = x, R8 = y 24// ---------------------------------------------------------------------------- 25 26#include "s2n_bignum_internal.h" 27 28 .intel_syntax noprefix 29 S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_mul_4_8_alt) 30 S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_mul_4_8_alt) 31 .text 32 33// These are actually right 34 35#define z rdi 36#define x rsi 37 38// This is moved from rdx to free it for muls 39 40#define y rcx 41 42// Other variables used as a rotating 3-word window to add terms to 43 44#define t0 r8 45#define t1 r9 46#define t2 r10 47 48// Macro for the key "multiply and add to (c,h,l)" step 49 50#define combadd(c,h,l,numa,numb) \ 51 mov rax, numa; \ 52 mul QWORD PTR numb; \ 53 add l, rax; \ 54 adc h, rdx; \ 55 adc c, 0 56 57// A minutely shorter form for when c = 0 initially 58 59#define combadz(c,h,l,numa,numb) \ 60 mov rax, numa; \ 61 mul QWORD PTR numb; \ 62 add l, rax; \ 63 adc h, rdx; \ 64 adc c, c 65 66// A short form where we don't expect a top carry 67 68#define combads(h,l,numa,numb) \ 69 mov rax, numa; \ 70 mul QWORD PTR numb; \ 71 add l, rax; \ 72 adc h, rdx 73 74S2N_BN_SYMBOL(bignum_mul_4_8_alt): 75 76#if WINDOWS_ABI 77 push rdi 78 push rsi 79 mov rdi, rcx 80 mov rsi, rdx 81 mov rdx, r8 82#endif 83 84// Copy y into a safe register to start with 85 86 mov y, rdx 87 88// Result term 0 89 90 mov rax, [x] 91 mul QWORD PTR [y] 92 93 mov [z], rax 94 mov t0, rdx 95 xor t1, t1 96 97// Result term 1 98 99 xor t2, t2 100 combads(t1,t0,[x],[y+8]) 101 combadz(t2,t1,t0,[x+8],[y]) 102 mov [z+8], t0 103 104// Result term 2 105 106 xor t0, t0 107 combadz(t0,t2,t1,[x],[y+16]) 108 combadd(t0,t2,t1,[x+8],[y+8]) 109 combadd(t0,t2,t1,[x+16],[y]) 110 mov [z+16], t1 111 112// Result term 3 113 114 xor t1, t1 115 combadz(t1,t0,t2,[x],[y+24]) 116 combadd(t1,t0,t2,[x+8],[y+16]) 117 combadd(t1,t0,t2,[x+16],[y+8]) 118 combadd(t1,t0,t2,[x+24],[y]) 119 mov [z+24], t2 120 121// Result term 4 122 123 xor t2, t2 124 combadz(t2,t1,t0,[x+8],[y+24]) 125 combadd(t2,t1,t0,[x+16],[y+16]) 126 combadd(t2,t1,t0,[x+24],[y+8]) 127 mov [z+32], t0 128 129// Result term 5 130 131 xor t0, t0 132 combadz(t0,t2,t1,[x+16],[y+24]) 133 combadd(t0,t2,t1,[x+24],[y+16]) 134 mov [z+40], t1 135 136// Result term 6 137 138 xor t1, t1 139 combads(t0,t2,[x+24],[y+24]) 140 mov [z+48], t2 141 142// Result term 7 143 144 mov [z+56], t0 145 146// Return 147 148#if WINDOWS_ABI 149 pop rsi 150 pop rdi 151#endif 152 ret 153 154#if defined(__linux__) && defined(__ELF__) 155.section .note.GNU-stack,"",%progbits 156#endif 157