ashldi3.S revision 214152
148692Sjlemon// This file is distributed under the University of Illinois Open Source 248692Sjlemon// License. See LICENSE.TXT for details. 348692Sjlemon 448692Sjlemon#include "../assembly.h" 548692Sjlemon 648692Sjlemon// di_int __ashldi3(di_int input, int count); 748692Sjlemon 848692Sjlemon// This routine has some extra memory traffic, loading the 64-bit input via two 948692Sjlemon// 32-bit loads, then immediately storing it back to the stack via a single 64-bit 1048692Sjlemon// store. This is to avoid a write-small, read-large stall. 1148692Sjlemon// However, if callers of this routine can be safely assumed to store the argument 1248692Sjlemon// via a 64-bt store, this is unnecessary memory traffic, and should be avoided. 1348692Sjlemon// It can be turned off by defining the TRUST_CALLERS_USE_64_BIT_STORES macro. 1448692Sjlemon 1548692Sjlemon#ifdef __i386__ 1648692Sjlemon#ifdef __SSE2__ 1748692Sjlemon 1848692Sjlemon.text 1948692Sjlemon.align 4 2048692SjlemonDEFINE_COMPILERRT_FUNCTION(__ashldi3) 2148692Sjlemon movd 12(%esp), %xmm2 // Load count 2248692Sjlemon#ifndef TRUST_CALLERS_USE_64_BIT_STORES 2348692Sjlemon movd 4(%esp), %xmm0 2448692Sjlemon movd 8(%esp), %xmm1 2548692Sjlemon punpckldq %xmm1, %xmm0 // Load input 2648692Sjlemon#else 2748692Sjlemon movq 4(%esp), %xmm0 // Load input 2848692Sjlemon#endif 2948692Sjlemon psllq %xmm2, %xmm0 // shift input by count 3048692Sjlemon movd %xmm0, %eax 3148692Sjlemon psrlq $32, %xmm0 3248692Sjlemon movd %xmm0, %edx 3348692Sjlemon ret 3448692Sjlemon 3548692Sjlemon#else // Use GPRs instead of SSE2 instructions, if they aren't available. 3648692Sjlemon 3748692Sjlemon.text 3848692Sjlemon.align 4 3990716SbdeDEFINE_COMPILERRT_FUNCTION(__ashldi3) 4090716Sbde movl 12(%esp), %ecx // Load count 4190716Sbde movl 8(%esp), %edx // Load high 4290716Sbde movl 4(%esp), %eax // Load low 4350477Speter 4448692Sjlemon testl $0x20, %ecx // If count >= 32 4548692Sjlemon jnz 1f // goto 1 46147692Speter shldl %cl, %eax, %edx // left shift high by count 47147692Speter shll %cl, %eax // left shift low by count 4848692Sjlemon ret 4976166Smarkm 5076166Smarkm1: movl %eax, %edx // Move low to high 5176166Smarkm xorl %eax, %eax // clear low 5248692Sjlemon shll %cl, %edx // shift high by count - 32 5384637Sdes ret 54189282Skib 5587321Sdes#endif // __SSE2__ 5676166Smarkm#endif // __i386__ 5748692Sjlemon