ashldi3.S revision 214152
148692Sjlemon// This file is distributed under the University of Illinois Open Source
248692Sjlemon// License. See LICENSE.TXT for details.
348692Sjlemon
448692Sjlemon#include "../assembly.h"
548692Sjlemon
648692Sjlemon// di_int __ashldi3(di_int input, int count);
748692Sjlemon
848692Sjlemon// This routine has some extra memory traffic, loading the 64-bit input via two
948692Sjlemon// 32-bit loads, then immediately storing it back to the stack via a single 64-bit
1048692Sjlemon// store.  This is to avoid a write-small, read-large stall.
1148692Sjlemon// However, if callers of this routine can be safely assumed to store the argument
1248692Sjlemon// via a 64-bt store, this is unnecessary memory traffic, and should be avoided.
1348692Sjlemon// It can be turned off by defining the TRUST_CALLERS_USE_64_BIT_STORES macro.
1448692Sjlemon
1548692Sjlemon#ifdef __i386__
1648692Sjlemon#ifdef __SSE2__
1748692Sjlemon
1848692Sjlemon.text
1948692Sjlemon.align 4
2048692SjlemonDEFINE_COMPILERRT_FUNCTION(__ashldi3)
2148692Sjlemon	movd	  12(%esp),		%xmm2	// Load count
2248692Sjlemon#ifndef TRUST_CALLERS_USE_64_BIT_STORES
2348692Sjlemon	movd	   4(%esp),		%xmm0
2448692Sjlemon	movd	   8(%esp),		%xmm1
2548692Sjlemon	punpckldq	%xmm1,		%xmm0	// Load input
2648692Sjlemon#else
2748692Sjlemon	movq	   4(%esp),		%xmm0	// Load input
2848692Sjlemon#endif
2948692Sjlemon	psllq		%xmm2,		%xmm0	// shift input by count
3048692Sjlemon	movd		%xmm0,		%eax
3148692Sjlemon	psrlq		$32,		%xmm0
3248692Sjlemon	movd		%xmm0,		%edx
3348692Sjlemon	ret
3448692Sjlemon
3548692Sjlemon#else // Use GPRs instead of SSE2 instructions, if they aren't available.
3648692Sjlemon
3748692Sjlemon.text
3848692Sjlemon.align 4
3990716SbdeDEFINE_COMPILERRT_FUNCTION(__ashldi3)
4090716Sbde	movl	  12(%esp),		%ecx	// Load count
4190716Sbde	movl	   8(%esp),		%edx	// Load high
4290716Sbde	movl	   4(%esp),		%eax	// Load low
4350477Speter
4448692Sjlemon	testl		$0x20,		%ecx	// If count >= 32
4548692Sjlemon	jnz		1f			//    goto 1
46147692Speter	shldl		%cl, %eax,	%edx	// left shift high by count
47147692Speter	shll		%cl,		%eax	// left shift low by count
4848692Sjlemon	ret
4976166Smarkm
5076166Smarkm1:	movl		%eax,		%edx	// Move low to high
5176166Smarkm	xorl		%eax,		%eax	// clear low
5248692Sjlemon	shll		%cl,		%edx	// shift high by count - 32
5384637Sdes	ret
54189282Skib
5587321Sdes#endif // __SSE2__
5676166Smarkm#endif // __i386__
5748692Sjlemon