1222656Sed// This file is dual licensed under the MIT and the University of Illinois Open 2222656Sed// Source Licenses. See LICENSE.TXT for details. 3214152Sed 4214152Sed#include "../assembly.h" 5214152Sed 6214152Sed// di_int __ashldi3(di_int input, int count); 7214152Sed 8214152Sed// This routine has some extra memory traffic, loading the 64-bit input via two 9214152Sed// 32-bit loads, then immediately storing it back to the stack via a single 64-bit 10214152Sed// store. This is to avoid a write-small, read-large stall. 11214152Sed// However, if callers of this routine can be safely assumed to store the argument 12214152Sed// via a 64-bt store, this is unnecessary memory traffic, and should be avoided. 13214152Sed// It can be turned off by defining the TRUST_CALLERS_USE_64_BIT_STORES macro. 14214152Sed 15214152Sed#ifdef __i386__ 16214152Sed#ifdef __SSE2__ 17214152Sed 18214152Sed.text 19214152Sed.align 4 20214152SedDEFINE_COMPILERRT_FUNCTION(__ashldi3) 21214152Sed movd 12(%esp), %xmm2 // Load count 22214152Sed#ifndef TRUST_CALLERS_USE_64_BIT_STORES 23214152Sed movd 4(%esp), %xmm0 24214152Sed movd 8(%esp), %xmm1 25214152Sed punpckldq %xmm1, %xmm0 // Load input 26214152Sed#else 27214152Sed movq 4(%esp), %xmm0 // Load input 28214152Sed#endif 29214152Sed psllq %xmm2, %xmm0 // shift input by count 30214152Sed movd %xmm0, %eax 31214152Sed psrlq $32, %xmm0 32214152Sed movd %xmm0, %edx 33214152Sed ret 34214152Sed 35214152Sed#else // Use GPRs instead of SSE2 instructions, if they aren't available. 36214152Sed 37214152Sed.text 38214152Sed.align 4 39214152SedDEFINE_COMPILERRT_FUNCTION(__ashldi3) 40214152Sed movl 12(%esp), %ecx // Load count 41214152Sed movl 8(%esp), %edx // Load high 42214152Sed movl 4(%esp), %eax // Load low 43214152Sed 44214152Sed testl $0x20, %ecx // If count >= 32 45214152Sed jnz 1f // goto 1 46214152Sed shldl %cl, %eax, %edx // left shift high by count 47214152Sed shll %cl, %eax // left shift low by count 48214152Sed ret 49214152Sed 50214152Sed1: movl %eax, %edx // Move low to high 51214152Sed xorl %eax, %eax // clear low 52214152Sed shll %cl, %edx // shift high by count - 32 53214152Sed ret 54214152Sed 55214152Sed#endif // __SSE2__ 56214152Sed#endif // __i386__ 57