1202375Srdivacky//===- InstCombineCalls.cpp -----------------------------------------------===// 2202375Srdivacky// 3353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4353358Sdim// See https://llvm.org/LICENSE.txt for license information. 5353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6202375Srdivacky// 7202375Srdivacky//===----------------------------------------------------------------------===// 8202375Srdivacky// 9353358Sdim// This file implements the visitCall, visitInvoke, and visitCallBr functions. 10202375Srdivacky// 11202375Srdivacky//===----------------------------------------------------------------------===// 12202375Srdivacky 13288943Sdim#include "InstCombineInternal.h" 14314564Sdim#include "llvm/ADT/APFloat.h" 15314564Sdim#include "llvm/ADT/APInt.h" 16353358Sdim#include "llvm/ADT/APSInt.h" 17314564Sdim#include "llvm/ADT/ArrayRef.h" 18314564Sdim#include "llvm/ADT/None.h" 19327952Sdim#include "llvm/ADT/Optional.h" 20314564Sdim#include "llvm/ADT/STLExtras.h" 21314564Sdim#include "llvm/ADT/SmallVector.h" 22321369Sdim#include "llvm/ADT/Statistic.h" 23314564Sdim#include "llvm/ADT/Twine.h" 24327952Sdim#include "llvm/Analysis/AssumptionCache.h" 25288943Sdim#include "llvm/Analysis/InstructionSimplify.h" 26353358Sdim#include "llvm/Analysis/Loads.h" 27249423Sdim#include "llvm/Analysis/MemoryBuiltins.h" 28314564Sdim#include "llvm/Analysis/ValueTracking.h" 29353358Sdim#include "llvm/Analysis/VectorUtils.h" 30327952Sdim#include "llvm/IR/Attributes.h" 31314564Sdim#include "llvm/IR/BasicBlock.h" 32314564Sdim#include "llvm/IR/Constant.h" 33327952Sdim#include "llvm/IR/Constants.h" 34314564Sdim#include "llvm/IR/DataLayout.h" 35314564Sdim#include "llvm/IR/DerivedTypes.h" 36314564Sdim#include "llvm/IR/Function.h" 37314564Sdim#include "llvm/IR/GlobalVariable.h" 38314564Sdim#include "llvm/IR/InstrTypes.h" 39314564Sdim#include "llvm/IR/Instruction.h" 40314564Sdim#include "llvm/IR/Instructions.h" 41314564Sdim#include "llvm/IR/IntrinsicInst.h" 42314564Sdim#include "llvm/IR/Intrinsics.h" 43360784Sdim#include "llvm/IR/IntrinsicsX86.h" 44360784Sdim#include "llvm/IR/IntrinsicsARM.h" 45360784Sdim#include "llvm/IR/IntrinsicsAArch64.h" 46360784Sdim#include "llvm/IR/IntrinsicsNVPTX.h" 47360784Sdim#include "llvm/IR/IntrinsicsAMDGPU.h" 48360784Sdim#include "llvm/IR/IntrinsicsPowerPC.h" 49314564Sdim#include "llvm/IR/LLVMContext.h" 50314564Sdim#include "llvm/IR/Metadata.h" 51276479Sdim#include "llvm/IR/PatternMatch.h" 52280031Sdim#include "llvm/IR/Statepoint.h" 53314564Sdim#include "llvm/IR/Type.h" 54327952Sdim#include "llvm/IR/User.h" 55314564Sdim#include "llvm/IR/Value.h" 56314564Sdim#include "llvm/IR/ValueHandle.h" 57327952Sdim#include "llvm/Support/AtomicOrdering.h" 58314564Sdim#include "llvm/Support/Casting.h" 59327952Sdim#include "llvm/Support/CommandLine.h" 60327952Sdim#include "llvm/Support/Compiler.h" 61314564Sdim#include "llvm/Support/Debug.h" 62327952Sdim#include "llvm/Support/ErrorHandling.h" 63321369Sdim#include "llvm/Support/KnownBits.h" 64314564Sdim#include "llvm/Support/MathExtras.h" 65327952Sdim#include "llvm/Support/raw_ostream.h" 66327952Sdim#include "llvm/Transforms/InstCombine/InstCombineWorklist.h" 67353358Sdim#include "llvm/Transforms/Utils/Local.h" 68288943Sdim#include "llvm/Transforms/Utils/SimplifyLibCalls.h" 69314564Sdim#include <algorithm> 70314564Sdim#include <cassert> 71314564Sdim#include <cstdint> 72314564Sdim#include <cstring> 73327952Sdim#include <utility> 74314564Sdim#include <vector> 75314564Sdim 76202375Srdivackyusing namespace llvm; 77249423Sdimusing namespace PatternMatch; 78202375Srdivacky 79276479Sdim#define DEBUG_TYPE "instcombine" 80276479Sdim 81249423SdimSTATISTIC(NumSimplified, "Number of library calls simplified"); 82249423Sdim 83341825Sdimstatic cl::opt<unsigned> GuardWideningWindow( 84341825Sdim "instcombine-guard-widening-window", 85341825Sdim cl::init(3), 86341825Sdim cl::desc("How wide an instruction window to bypass looking for " 87341825Sdim "another guard")); 88321369Sdim 89309124Sdim/// Return the specified type promoted as it would be to pass though a va_arg 90309124Sdim/// area. 91226633Sdimstatic Type *getPromotedType(Type *Ty) { 92226633Sdim if (IntegerType* ITy = dyn_cast<IntegerType>(Ty)) { 93202375Srdivacky if (ITy->getBitWidth() < 32) 94202375Srdivacky return Type::getInt32Ty(Ty->getContext()); 95202375Srdivacky } 96202375Srdivacky return Ty; 97202375Srdivacky} 98202375Srdivacky 99309124Sdim/// Return a constant boolean vector that has true elements in all positions 100309124Sdim/// where the input constant data vector has an element with the sign bit set. 101309124Sdimstatic Constant *getNegativeIsTrueBoolVec(ConstantDataVector *V) { 102309124Sdim SmallVector<Constant *, 32> BoolVec; 103309124Sdim IntegerType *BoolTy = Type::getInt1Ty(V->getContext()); 104309124Sdim for (unsigned I = 0, E = V->getNumElements(); I != E; ++I) { 105309124Sdim Constant *Elt = V->getElementAsConstant(I); 106309124Sdim assert((isa<ConstantInt>(Elt) || isa<ConstantFP>(Elt)) && 107309124Sdim "Unexpected constant data vector element type"); 108309124Sdim bool Sign = V->getElementType()->isIntegerTy() 109309124Sdim ? cast<ConstantInt>(Elt)->isNegative() 110309124Sdim : cast<ConstantFP>(Elt)->isNegative(); 111309124Sdim BoolVec.push_back(ConstantInt::get(BoolTy, Sign)); 112309124Sdim } 113309124Sdim return ConstantVector::get(BoolVec); 114309124Sdim} 115309124Sdim 116341825SdimInstruction *InstCombiner::SimplifyAnyMemTransfer(AnyMemTransferInst *MI) { 117341825Sdim unsigned DstAlign = getKnownAlignment(MI->getRawDest(), DL, MI, &AC, &DT); 118341825Sdim unsigned CopyDstAlign = MI->getDestAlignment(); 119341825Sdim if (CopyDstAlign < DstAlign){ 120341825Sdim MI->setDestAlignment(DstAlign); 121341825Sdim return MI; 122321369Sdim } 123321369Sdim 124341825Sdim unsigned SrcAlign = getKnownAlignment(MI->getRawSource(), DL, MI, &AC, &DT); 125341825Sdim unsigned CopySrcAlign = MI->getSourceAlignment(); 126341825Sdim if (CopySrcAlign < SrcAlign) { 127341825Sdim MI->setSourceAlignment(SrcAlign); 128202375Srdivacky return MI; 129202375Srdivacky } 130234353Sdim 131353358Sdim // If we have a store to a location which is known constant, we can conclude 132353358Sdim // that the store must be storing the constant value (else the memory 133353358Sdim // wouldn't be constant), and this must be a noop. 134353358Sdim if (AA->pointsToConstantMemory(MI->getDest())) { 135353358Sdim // Set the size of the copy to 0, it will be deleted on the next iteration. 136353358Sdim MI->setLength(Constant::getNullValue(MI->getLength()->getType())); 137353358Sdim return MI; 138353358Sdim } 139353358Sdim 140202375Srdivacky // If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with 141202375Srdivacky // load/store. 142341825Sdim ConstantInt *MemOpLength = dyn_cast<ConstantInt>(MI->getLength()); 143276479Sdim if (!MemOpLength) return nullptr; 144234353Sdim 145202375Srdivacky // Source and destination pointer types are always "i8*" for intrinsic. See 146202375Srdivacky // if the size is something we can handle with a single primitive load/store. 147202375Srdivacky // A single load+store correctly handles overlapping memory in the memmove 148202375Srdivacky // case. 149239462Sdim uint64_t Size = MemOpLength->getLimitedValue(); 150276479Sdim assert(Size && "0-sized memory transferring should be removed already."); 151234353Sdim 152202375Srdivacky if (Size > 8 || (Size&(Size-1))) 153276479Sdim return nullptr; // If not 1/2/4/8 bytes, exit. 154234353Sdim 155344779Sdim // If it is an atomic and alignment is less than the size then we will 156344779Sdim // introduce the unaligned memory access which will be later transformed 157344779Sdim // into libcall in CodeGen. This is not evident performance gain so disable 158344779Sdim // it now. 159344779Sdim if (isa<AtomicMemTransferInst>(MI)) 160344779Sdim if (CopyDstAlign < Size || CopySrcAlign < Size) 161344779Sdim return nullptr; 162344779Sdim 163202375Srdivacky // Use an integer load+store unless we can find something better. 164206274Srdivacky unsigned SrcAddrSp = 165210299Sed cast<PointerType>(MI->getArgOperand(1)->getType())->getAddressSpace(); 166206274Srdivacky unsigned DstAddrSp = 167210299Sed cast<PointerType>(MI->getArgOperand(0)->getType())->getAddressSpace(); 168206274Srdivacky 169226633Sdim IntegerType* IntType = IntegerType::get(MI->getContext(), Size<<3); 170206274Srdivacky Type *NewSrcPtrTy = PointerType::get(IntType, SrcAddrSp); 171206274Srdivacky Type *NewDstPtrTy = PointerType::get(IntType, DstAddrSp); 172234353Sdim 173321369Sdim // If the memcpy has metadata describing the members, see if we can get the 174321369Sdim // TBAA tag describing our copy. 175276479Sdim MDNode *CopyMD = nullptr; 176341825Sdim if (MDNode *M = MI->getMetadata(LLVMContext::MD_tbaa)) { 177341825Sdim CopyMD = M; 178341825Sdim } else if (MDNode *M = MI->getMetadata(LLVMContext::MD_tbaa_struct)) { 179321369Sdim if (M->getNumOperands() == 3 && M->getOperand(0) && 180321369Sdim mdconst::hasa<ConstantInt>(M->getOperand(0)) && 181321369Sdim mdconst::extract<ConstantInt>(M->getOperand(0))->isZero() && 182321369Sdim M->getOperand(1) && 183321369Sdim mdconst::hasa<ConstantInt>(M->getOperand(1)) && 184321369Sdim mdconst::extract<ConstantInt>(M->getOperand(1))->getValue() == 185321369Sdim Size && 186321369Sdim M->getOperand(2) && isa<MDNode>(M->getOperand(2))) 187321369Sdim CopyMD = cast<MDNode>(M->getOperand(2)); 188202375Srdivacky } 189234353Sdim 190321369Sdim Value *Src = Builder.CreateBitCast(MI->getArgOperand(1), NewSrcPtrTy); 191321369Sdim Value *Dest = Builder.CreateBitCast(MI->getArgOperand(0), NewDstPtrTy); 192353358Sdim LoadInst *L = Builder.CreateLoad(IntType, Src); 193341825Sdim // Alignment from the mem intrinsic will be better, so use it. 194360784Sdim L->setAlignment( 195360784Sdim MaybeAlign(CopySrcAlign)); // FIXME: Check if we can use Align instead. 196243830Sdim if (CopyMD) 197243830Sdim L->setMetadata(LLVMContext::MD_tbaa, CopyMD); 198314564Sdim MDNode *LoopMemParallelMD = 199314564Sdim MI->getMetadata(LLVMContext::MD_mem_parallel_loop_access); 200314564Sdim if (LoopMemParallelMD) 201314564Sdim L->setMetadata(LLVMContext::MD_mem_parallel_loop_access, LoopMemParallelMD); 202344779Sdim MDNode *AccessGroupMD = MI->getMetadata(LLVMContext::MD_access_group); 203344779Sdim if (AccessGroupMD) 204344779Sdim L->setMetadata(LLVMContext::MD_access_group, AccessGroupMD); 205314564Sdim 206341825Sdim StoreInst *S = Builder.CreateStore(L, Dest); 207341825Sdim // Alignment from the mem intrinsic will be better, so use it. 208360784Sdim S->setAlignment( 209360784Sdim MaybeAlign(CopyDstAlign)); // FIXME: Check if we can use Align instead. 210243830Sdim if (CopyMD) 211243830Sdim S->setMetadata(LLVMContext::MD_tbaa, CopyMD); 212314564Sdim if (LoopMemParallelMD) 213314564Sdim S->setMetadata(LLVMContext::MD_mem_parallel_loop_access, LoopMemParallelMD); 214344779Sdim if (AccessGroupMD) 215344779Sdim S->setMetadata(LLVMContext::MD_access_group, AccessGroupMD); 216202375Srdivacky 217341825Sdim if (auto *MT = dyn_cast<MemTransferInst>(MI)) { 218341825Sdim // non-atomics can be volatile 219341825Sdim L->setVolatile(MT->isVolatile()); 220341825Sdim S->setVolatile(MT->isVolatile()); 221341825Sdim } 222341825Sdim if (isa<AtomicMemTransferInst>(MI)) { 223341825Sdim // atomics have to be unordered 224341825Sdim L->setOrdering(AtomicOrdering::Unordered); 225341825Sdim S->setOrdering(AtomicOrdering::Unordered); 226341825Sdim } 227341825Sdim 228202375Srdivacky // Set the size of the copy to 0, it will be deleted on the next iteration. 229341825Sdim MI->setLength(Constant::getNullValue(MemOpLength->getType())); 230202375Srdivacky return MI; 231202375Srdivacky} 232202375Srdivacky 233341825SdimInstruction *InstCombiner::SimplifyAnyMemSet(AnyMemSetInst *MI) { 234360784Sdim const unsigned KnownAlignment = 235360784Sdim getKnownAlignment(MI->getDest(), DL, MI, &AC, &DT); 236360784Sdim if (MI->getDestAlignment() < KnownAlignment) { 237360784Sdim MI->setDestAlignment(KnownAlignment); 238202375Srdivacky return MI; 239202375Srdivacky } 240234353Sdim 241353358Sdim // If we have a store to a location which is known constant, we can conclude 242353358Sdim // that the store must be storing the constant value (else the memory 243353358Sdim // wouldn't be constant), and this must be a noop. 244353358Sdim if (AA->pointsToConstantMemory(MI->getDest())) { 245353358Sdim // Set the size of the copy to 0, it will be deleted on the next iteration. 246353358Sdim MI->setLength(Constant::getNullValue(MI->getLength()->getType())); 247353358Sdim return MI; 248353358Sdim } 249353358Sdim 250202375Srdivacky // Extract the length and alignment and fill if they are constant. 251202375Srdivacky ConstantInt *LenC = dyn_cast<ConstantInt>(MI->getLength()); 252202375Srdivacky ConstantInt *FillC = dyn_cast<ConstantInt>(MI->getValue()); 253203954Srdivacky if (!LenC || !FillC || !FillC->getType()->isIntegerTy(8)) 254276479Sdim return nullptr; 255360784Sdim const uint64_t Len = LenC->getLimitedValue(); 256239462Sdim assert(Len && "0-sized memory setting should be removed already."); 257360784Sdim const Align Alignment = assumeAligned(MI->getDestAlignment()); 258234353Sdim 259344779Sdim // If it is an atomic and alignment is less than the size then we will 260344779Sdim // introduce the unaligned memory access which will be later transformed 261344779Sdim // into libcall in CodeGen. This is not evident performance gain so disable 262344779Sdim // it now. 263344779Sdim if (isa<AtomicMemSetInst>(MI)) 264344779Sdim if (Alignment < Len) 265344779Sdim return nullptr; 266344779Sdim 267202375Srdivacky // memset(s,c,n) -> store s, c (for n=1,2,4,8) 268202375Srdivacky if (Len <= 8 && isPowerOf2_32((uint32_t)Len)) { 269226633Sdim Type *ITy = IntegerType::get(MI->getContext(), Len*8); // n=1 -> i8. 270234353Sdim 271202375Srdivacky Value *Dest = MI->getDest(); 272218893Sdim unsigned DstAddrSp = cast<PointerType>(Dest->getType())->getAddressSpace(); 273218893Sdim Type *NewDstPtrTy = PointerType::get(ITy, DstAddrSp); 274321369Sdim Dest = Builder.CreateBitCast(Dest, NewDstPtrTy); 275202375Srdivacky 276202375Srdivacky // Extract the fill value and store. 277202375Srdivacky uint64_t Fill = FillC->getZExtValue()*0x0101010101010101ULL; 278321369Sdim StoreInst *S = Builder.CreateStore(ConstantInt::get(ITy, Fill), Dest, 279321369Sdim MI->isVolatile()); 280223017Sdim S->setAlignment(Alignment); 281341825Sdim if (isa<AtomicMemSetInst>(MI)) 282341825Sdim S->setOrdering(AtomicOrdering::Unordered); 283234353Sdim 284202375Srdivacky // Set the size of the copy to 0, it will be deleted on the next iteration. 285202375Srdivacky MI->setLength(Constant::getNullValue(LenC->getType())); 286202375Srdivacky return MI; 287202375Srdivacky } 288202375Srdivacky 289276479Sdim return nullptr; 290202375Srdivacky} 291202375Srdivacky 292309124Sdimstatic Value *simplifyX86immShift(const IntrinsicInst &II, 293296417Sdim InstCombiner::BuilderTy &Builder) { 294296417Sdim bool LogicalShift = false; 295296417Sdim bool ShiftLeft = false; 296296417Sdim 297296417Sdim switch (II.getIntrinsicID()) { 298314564Sdim default: llvm_unreachable("Unexpected intrinsic!"); 299296417Sdim case Intrinsic::x86_sse2_psra_d: 300296417Sdim case Intrinsic::x86_sse2_psra_w: 301296417Sdim case Intrinsic::x86_sse2_psrai_d: 302296417Sdim case Intrinsic::x86_sse2_psrai_w: 303296417Sdim case Intrinsic::x86_avx2_psra_d: 304296417Sdim case Intrinsic::x86_avx2_psra_w: 305296417Sdim case Intrinsic::x86_avx2_psrai_d: 306296417Sdim case Intrinsic::x86_avx2_psrai_w: 307314564Sdim case Intrinsic::x86_avx512_psra_q_128: 308314564Sdim case Intrinsic::x86_avx512_psrai_q_128: 309314564Sdim case Intrinsic::x86_avx512_psra_q_256: 310314564Sdim case Intrinsic::x86_avx512_psrai_q_256: 311314564Sdim case Intrinsic::x86_avx512_psra_d_512: 312314564Sdim case Intrinsic::x86_avx512_psra_q_512: 313314564Sdim case Intrinsic::x86_avx512_psra_w_512: 314314564Sdim case Intrinsic::x86_avx512_psrai_d_512: 315314564Sdim case Intrinsic::x86_avx512_psrai_q_512: 316314564Sdim case Intrinsic::x86_avx512_psrai_w_512: 317296417Sdim LogicalShift = false; ShiftLeft = false; 318296417Sdim break; 319296417Sdim case Intrinsic::x86_sse2_psrl_d: 320296417Sdim case Intrinsic::x86_sse2_psrl_q: 321296417Sdim case Intrinsic::x86_sse2_psrl_w: 322296417Sdim case Intrinsic::x86_sse2_psrli_d: 323296417Sdim case Intrinsic::x86_sse2_psrli_q: 324296417Sdim case Intrinsic::x86_sse2_psrli_w: 325296417Sdim case Intrinsic::x86_avx2_psrl_d: 326296417Sdim case Intrinsic::x86_avx2_psrl_q: 327296417Sdim case Intrinsic::x86_avx2_psrl_w: 328296417Sdim case Intrinsic::x86_avx2_psrli_d: 329296417Sdim case Intrinsic::x86_avx2_psrli_q: 330296417Sdim case Intrinsic::x86_avx2_psrli_w: 331314564Sdim case Intrinsic::x86_avx512_psrl_d_512: 332314564Sdim case Intrinsic::x86_avx512_psrl_q_512: 333314564Sdim case Intrinsic::x86_avx512_psrl_w_512: 334314564Sdim case Intrinsic::x86_avx512_psrli_d_512: 335314564Sdim case Intrinsic::x86_avx512_psrli_q_512: 336314564Sdim case Intrinsic::x86_avx512_psrli_w_512: 337296417Sdim LogicalShift = true; ShiftLeft = false; 338296417Sdim break; 339296417Sdim case Intrinsic::x86_sse2_psll_d: 340296417Sdim case Intrinsic::x86_sse2_psll_q: 341296417Sdim case Intrinsic::x86_sse2_psll_w: 342296417Sdim case Intrinsic::x86_sse2_pslli_d: 343296417Sdim case Intrinsic::x86_sse2_pslli_q: 344296417Sdim case Intrinsic::x86_sse2_pslli_w: 345296417Sdim case Intrinsic::x86_avx2_psll_d: 346296417Sdim case Intrinsic::x86_avx2_psll_q: 347296417Sdim case Intrinsic::x86_avx2_psll_w: 348296417Sdim case Intrinsic::x86_avx2_pslli_d: 349296417Sdim case Intrinsic::x86_avx2_pslli_q: 350296417Sdim case Intrinsic::x86_avx2_pslli_w: 351314564Sdim case Intrinsic::x86_avx512_psll_d_512: 352314564Sdim case Intrinsic::x86_avx512_psll_q_512: 353314564Sdim case Intrinsic::x86_avx512_psll_w_512: 354314564Sdim case Intrinsic::x86_avx512_pslli_d_512: 355314564Sdim case Intrinsic::x86_avx512_pslli_q_512: 356314564Sdim case Intrinsic::x86_avx512_pslli_w_512: 357296417Sdim LogicalShift = true; ShiftLeft = true; 358296417Sdim break; 359296417Sdim } 360296417Sdim assert((LogicalShift || !ShiftLeft) && "Only logical shifts can shift left"); 361296417Sdim 362296417Sdim // Simplify if count is constant. 363296417Sdim auto Arg1 = II.getArgOperand(1); 364296417Sdim auto CAZ = dyn_cast<ConstantAggregateZero>(Arg1); 365296417Sdim auto CDV = dyn_cast<ConstantDataVector>(Arg1); 366296417Sdim auto CInt = dyn_cast<ConstantInt>(Arg1); 367296417Sdim if (!CAZ && !CDV && !CInt) 368296417Sdim return nullptr; 369296417Sdim 370296417Sdim APInt Count(64, 0); 371296417Sdim if (CDV) { 372296417Sdim // SSE2/AVX2 uses all the first 64-bits of the 128-bit vector 373296417Sdim // operand to compute the shift amount. 374296417Sdim auto VT = cast<VectorType>(CDV->getType()); 375296417Sdim unsigned BitWidth = VT->getElementType()->getPrimitiveSizeInBits(); 376296417Sdim assert((64 % BitWidth) == 0 && "Unexpected packed shift size"); 377296417Sdim unsigned NumSubElts = 64 / BitWidth; 378296417Sdim 379296417Sdim // Concatenate the sub-elements to create the 64-bit value. 380296417Sdim for (unsigned i = 0; i != NumSubElts; ++i) { 381296417Sdim unsigned SubEltIdx = (NumSubElts - 1) - i; 382296417Sdim auto SubElt = cast<ConstantInt>(CDV->getElementAsConstant(SubEltIdx)); 383321369Sdim Count <<= BitWidth; 384296417Sdim Count |= SubElt->getValue().zextOrTrunc(64); 385296417Sdim } 386296417Sdim } 387296417Sdim else if (CInt) 388296417Sdim Count = CInt->getValue(); 389296417Sdim 390296417Sdim auto Vec = II.getArgOperand(0); 391296417Sdim auto VT = cast<VectorType>(Vec->getType()); 392296417Sdim auto SVT = VT->getElementType(); 393296417Sdim unsigned VWidth = VT->getNumElements(); 394296417Sdim unsigned BitWidth = SVT->getPrimitiveSizeInBits(); 395296417Sdim 396296417Sdim // If shift-by-zero then just return the original value. 397321369Sdim if (Count.isNullValue()) 398296417Sdim return Vec; 399296417Sdim 400296417Sdim // Handle cases when Shift >= BitWidth. 401296417Sdim if (Count.uge(BitWidth)) { 402296417Sdim // If LogicalShift - just return zero. 403296417Sdim if (LogicalShift) 404296417Sdim return ConstantAggregateZero::get(VT); 405296417Sdim 406296417Sdim // If ArithmeticShift - clamp Shift to (BitWidth - 1). 407296417Sdim Count = APInt(64, BitWidth - 1); 408296417Sdim } 409296417Sdim 410296417Sdim // Get a constant vector of the same type as the first operand. 411296417Sdim auto ShiftAmt = ConstantInt::get(SVT, Count.zextOrTrunc(BitWidth)); 412296417Sdim auto ShiftVec = Builder.CreateVectorSplat(VWidth, ShiftAmt); 413296417Sdim 414296417Sdim if (ShiftLeft) 415296417Sdim return Builder.CreateShl(Vec, ShiftVec); 416296417Sdim 417296417Sdim if (LogicalShift) 418296417Sdim return Builder.CreateLShr(Vec, ShiftVec); 419296417Sdim 420296417Sdim return Builder.CreateAShr(Vec, ShiftVec); 421296417Sdim} 422296417Sdim 423309124Sdim// Attempt to simplify AVX2 per-element shift intrinsics to a generic IR shift. 424309124Sdim// Unlike the generic IR shifts, the intrinsics have defined behaviour for out 425309124Sdim// of range shift amounts (logical - set to zero, arithmetic - splat sign bit). 426309124Sdimstatic Value *simplifyX86varShift(const IntrinsicInst &II, 427309124Sdim InstCombiner::BuilderTy &Builder) { 428309124Sdim bool LogicalShift = false; 429309124Sdim bool ShiftLeft = false; 430296417Sdim 431309124Sdim switch (II.getIntrinsicID()) { 432314564Sdim default: llvm_unreachable("Unexpected intrinsic!"); 433309124Sdim case Intrinsic::x86_avx2_psrav_d: 434309124Sdim case Intrinsic::x86_avx2_psrav_d_256: 435314564Sdim case Intrinsic::x86_avx512_psrav_q_128: 436314564Sdim case Intrinsic::x86_avx512_psrav_q_256: 437314564Sdim case Intrinsic::x86_avx512_psrav_d_512: 438314564Sdim case Intrinsic::x86_avx512_psrav_q_512: 439314564Sdim case Intrinsic::x86_avx512_psrav_w_128: 440314564Sdim case Intrinsic::x86_avx512_psrav_w_256: 441314564Sdim case Intrinsic::x86_avx512_psrav_w_512: 442309124Sdim LogicalShift = false; 443309124Sdim ShiftLeft = false; 444309124Sdim break; 445309124Sdim case Intrinsic::x86_avx2_psrlv_d: 446309124Sdim case Intrinsic::x86_avx2_psrlv_d_256: 447309124Sdim case Intrinsic::x86_avx2_psrlv_q: 448309124Sdim case Intrinsic::x86_avx2_psrlv_q_256: 449314564Sdim case Intrinsic::x86_avx512_psrlv_d_512: 450314564Sdim case Intrinsic::x86_avx512_psrlv_q_512: 451314564Sdim case Intrinsic::x86_avx512_psrlv_w_128: 452314564Sdim case Intrinsic::x86_avx512_psrlv_w_256: 453314564Sdim case Intrinsic::x86_avx512_psrlv_w_512: 454309124Sdim LogicalShift = true; 455309124Sdim ShiftLeft = false; 456309124Sdim break; 457309124Sdim case Intrinsic::x86_avx2_psllv_d: 458309124Sdim case Intrinsic::x86_avx2_psllv_d_256: 459309124Sdim case Intrinsic::x86_avx2_psllv_q: 460309124Sdim case Intrinsic::x86_avx2_psllv_q_256: 461314564Sdim case Intrinsic::x86_avx512_psllv_d_512: 462314564Sdim case Intrinsic::x86_avx512_psllv_q_512: 463314564Sdim case Intrinsic::x86_avx512_psllv_w_128: 464314564Sdim case Intrinsic::x86_avx512_psllv_w_256: 465314564Sdim case Intrinsic::x86_avx512_psllv_w_512: 466309124Sdim LogicalShift = true; 467309124Sdim ShiftLeft = true; 468309124Sdim break; 469309124Sdim } 470309124Sdim assert((LogicalShift || !ShiftLeft) && "Only logical shifts can shift left"); 471296417Sdim 472309124Sdim // Simplify if all shift amounts are constant/undef. 473309124Sdim auto *CShift = dyn_cast<Constant>(II.getArgOperand(1)); 474309124Sdim if (!CShift) 475309124Sdim return nullptr; 476309124Sdim 477309124Sdim auto Vec = II.getArgOperand(0); 478309124Sdim auto VT = cast<VectorType>(II.getType()); 479309124Sdim auto SVT = VT->getVectorElementType(); 480309124Sdim int NumElts = VT->getNumElements(); 481309124Sdim int BitWidth = SVT->getIntegerBitWidth(); 482309124Sdim 483309124Sdim // Collect each element's shift amount. 484309124Sdim // We also collect special cases: UNDEF = -1, OUT-OF-RANGE = BitWidth. 485309124Sdim bool AnyOutOfRange = false; 486309124Sdim SmallVector<int, 8> ShiftAmts; 487309124Sdim for (int I = 0; I < NumElts; ++I) { 488309124Sdim auto *CElt = CShift->getAggregateElement(I); 489309124Sdim if (CElt && isa<UndefValue>(CElt)) { 490309124Sdim ShiftAmts.push_back(-1); 491309124Sdim continue; 492309124Sdim } 493309124Sdim 494309124Sdim auto *COp = dyn_cast_or_null<ConstantInt>(CElt); 495309124Sdim if (!COp) 496309124Sdim return nullptr; 497309124Sdim 498309124Sdim // Handle out of range shifts. 499309124Sdim // If LogicalShift - set to BitWidth (special case). 500309124Sdim // If ArithmeticShift - set to (BitWidth - 1) (sign splat). 501309124Sdim APInt ShiftVal = COp->getValue(); 502309124Sdim if (ShiftVal.uge(BitWidth)) { 503309124Sdim AnyOutOfRange = LogicalShift; 504309124Sdim ShiftAmts.push_back(LogicalShift ? BitWidth : BitWidth - 1); 505309124Sdim continue; 506309124Sdim } 507309124Sdim 508309124Sdim ShiftAmts.push_back((int)ShiftVal.getZExtValue()); 509309124Sdim } 510309124Sdim 511309124Sdim // If all elements out of range or UNDEF, return vector of zeros/undefs. 512309124Sdim // ArithmeticShift should only hit this if they are all UNDEF. 513309124Sdim auto OutOfRange = [&](int Idx) { return (Idx < 0) || (BitWidth <= Idx); }; 514327952Sdim if (llvm::all_of(ShiftAmts, OutOfRange)) { 515309124Sdim SmallVector<Constant *, 8> ConstantVec; 516309124Sdim for (int Idx : ShiftAmts) { 517309124Sdim if (Idx < 0) { 518309124Sdim ConstantVec.push_back(UndefValue::get(SVT)); 519309124Sdim } else { 520309124Sdim assert(LogicalShift && "Logical shift expected"); 521309124Sdim ConstantVec.push_back(ConstantInt::getNullValue(SVT)); 522309124Sdim } 523309124Sdim } 524309124Sdim return ConstantVector::get(ConstantVec); 525309124Sdim } 526309124Sdim 527309124Sdim // We can't handle only some out of range values with generic logical shifts. 528309124Sdim if (AnyOutOfRange) 529309124Sdim return nullptr; 530309124Sdim 531309124Sdim // Build the shift amount constant vector. 532309124Sdim SmallVector<Constant *, 8> ShiftVecAmts; 533309124Sdim for (int Idx : ShiftAmts) { 534309124Sdim if (Idx < 0) 535309124Sdim ShiftVecAmts.push_back(UndefValue::get(SVT)); 536309124Sdim else 537309124Sdim ShiftVecAmts.push_back(ConstantInt::get(SVT, Idx)); 538309124Sdim } 539309124Sdim auto ShiftVec = ConstantVector::get(ShiftVecAmts); 540309124Sdim 541309124Sdim if (ShiftLeft) 542309124Sdim return Builder.CreateShl(Vec, ShiftVec); 543309124Sdim 544309124Sdim if (LogicalShift) 545309124Sdim return Builder.CreateLShr(Vec, ShiftVec); 546309124Sdim 547309124Sdim return Builder.CreateAShr(Vec, ShiftVec); 548296417Sdim} 549296417Sdim 550353358Sdimstatic Value *simplifyX86pack(IntrinsicInst &II, 551353358Sdim InstCombiner::BuilderTy &Builder, bool IsSigned) { 552321369Sdim Value *Arg0 = II.getArgOperand(0); 553321369Sdim Value *Arg1 = II.getArgOperand(1); 554321369Sdim Type *ResTy = II.getType(); 555321369Sdim 556321369Sdim // Fast all undef handling. 557321369Sdim if (isa<UndefValue>(Arg0) && isa<UndefValue>(Arg1)) 558321369Sdim return UndefValue::get(ResTy); 559321369Sdim 560321369Sdim Type *ArgTy = Arg0->getType(); 561321369Sdim unsigned NumLanes = ResTy->getPrimitiveSizeInBits() / 128; 562321369Sdim unsigned NumSrcElts = ArgTy->getVectorNumElements(); 563353358Sdim assert(ResTy->getVectorNumElements() == (2 * NumSrcElts) && 564353358Sdim "Unexpected packing types"); 565321369Sdim 566321369Sdim unsigned NumSrcEltsPerLane = NumSrcElts / NumLanes; 567321369Sdim unsigned DstScalarSizeInBits = ResTy->getScalarSizeInBits(); 568353358Sdim unsigned SrcScalarSizeInBits = ArgTy->getScalarSizeInBits(); 569353358Sdim assert(SrcScalarSizeInBits == (2 * DstScalarSizeInBits) && 570321369Sdim "Unexpected packing types"); 571321369Sdim 572321369Sdim // Constant folding. 573353358Sdim if (!isa<Constant>(Arg0) || !isa<Constant>(Arg1)) 574321369Sdim return nullptr; 575321369Sdim 576353358Sdim // Clamp Values - signed/unsigned both use signed clamp values, but they 577353358Sdim // differ on the min/max values. 578353358Sdim APInt MinValue, MaxValue; 579353358Sdim if (IsSigned) { 580353358Sdim // PACKSS: Truncate signed value with signed saturation. 581353358Sdim // Source values less than dst minint are saturated to minint. 582353358Sdim // Source values greater than dst maxint are saturated to maxint. 583353358Sdim MinValue = 584353358Sdim APInt::getSignedMinValue(DstScalarSizeInBits).sext(SrcScalarSizeInBits); 585353358Sdim MaxValue = 586353358Sdim APInt::getSignedMaxValue(DstScalarSizeInBits).sext(SrcScalarSizeInBits); 587353358Sdim } else { 588353358Sdim // PACKUS: Truncate signed value with unsigned saturation. 589353358Sdim // Source values less than zero are saturated to zero. 590353358Sdim // Source values greater than dst maxuint are saturated to maxuint. 591353358Sdim MinValue = APInt::getNullValue(SrcScalarSizeInBits); 592353358Sdim MaxValue = APInt::getLowBitsSet(SrcScalarSizeInBits, DstScalarSizeInBits); 593321369Sdim } 594321369Sdim 595353358Sdim auto *MinC = Constant::getIntegerValue(ArgTy, MinValue); 596353358Sdim auto *MaxC = Constant::getIntegerValue(ArgTy, MaxValue); 597353358Sdim Arg0 = Builder.CreateSelect(Builder.CreateICmpSLT(Arg0, MinC), MinC, Arg0); 598353358Sdim Arg1 = Builder.CreateSelect(Builder.CreateICmpSLT(Arg1, MinC), MinC, Arg1); 599353358Sdim Arg0 = Builder.CreateSelect(Builder.CreateICmpSGT(Arg0, MaxC), MaxC, Arg0); 600353358Sdim Arg1 = Builder.CreateSelect(Builder.CreateICmpSGT(Arg1, MaxC), MaxC, Arg1); 601321369Sdim 602353358Sdim // Shuffle clamped args together at the lane level. 603353358Sdim SmallVector<unsigned, 32> PackMask; 604353358Sdim for (unsigned Lane = 0; Lane != NumLanes; ++Lane) { 605353358Sdim for (unsigned Elt = 0; Elt != NumSrcEltsPerLane; ++Elt) 606353358Sdim PackMask.push_back(Elt + (Lane * NumSrcEltsPerLane)); 607353358Sdim for (unsigned Elt = 0; Elt != NumSrcEltsPerLane; ++Elt) 608353358Sdim PackMask.push_back(Elt + (Lane * NumSrcEltsPerLane) + NumSrcElts); 609341825Sdim } 610353358Sdim auto *Shuffle = Builder.CreateShuffleVector(Arg0, Arg1, PackMask); 611341825Sdim 612353358Sdim // Truncate to dst size. 613353358Sdim return Builder.CreateTrunc(Shuffle, ResTy); 614341825Sdim} 615341825Sdim 616344779Sdimstatic Value *simplifyX86movmsk(const IntrinsicInst &II, 617344779Sdim InstCombiner::BuilderTy &Builder) { 618309124Sdim Value *Arg = II.getArgOperand(0); 619309124Sdim Type *ResTy = II.getType(); 620309124Sdim Type *ArgTy = Arg->getType(); 621309124Sdim 622309124Sdim // movmsk(undef) -> zero as we must ensure the upper bits are zero. 623309124Sdim if (isa<UndefValue>(Arg)) 624309124Sdim return Constant::getNullValue(ResTy); 625309124Sdim 626309124Sdim // We can't easily peek through x86_mmx types. 627309124Sdim if (!ArgTy->isVectorTy()) 628309124Sdim return nullptr; 629309124Sdim 630353358Sdim // Expand MOVMSK to compare/bitcast/zext: 631353358Sdim // e.g. PMOVMSKB(v16i8 x): 632353358Sdim // %cmp = icmp slt <16 x i8> %x, zeroinitializer 633353358Sdim // %int = bitcast <16 x i1> %cmp to i16 634353358Sdim // %res = zext i16 %int to i32 635353358Sdim unsigned NumElts = ArgTy->getVectorNumElements(); 636353358Sdim Type *IntegerVecTy = VectorType::getInteger(cast<VectorType>(ArgTy)); 637353358Sdim Type *IntegerTy = Builder.getIntNTy(NumElts); 638309124Sdim 639353358Sdim Value *Res = Builder.CreateBitCast(Arg, IntegerVecTy); 640353358Sdim Res = Builder.CreateICmpSLT(Res, Constant::getNullValue(IntegerVecTy)); 641353358Sdim Res = Builder.CreateBitCast(Res, IntegerTy); 642353358Sdim Res = Builder.CreateZExtOrTrunc(Res, ResTy); 643353358Sdim return Res; 644353358Sdim} 645309124Sdim 646353358Sdimstatic Value *simplifyX86addcarry(const IntrinsicInst &II, 647353358Sdim InstCombiner::BuilderTy &Builder) { 648353358Sdim Value *CarryIn = II.getArgOperand(0); 649353358Sdim Value *Op1 = II.getArgOperand(1); 650353358Sdim Value *Op2 = II.getArgOperand(2); 651353358Sdim Type *RetTy = II.getType(); 652353358Sdim Type *OpTy = Op1->getType(); 653353358Sdim assert(RetTy->getStructElementType(0)->isIntegerTy(8) && 654353358Sdim RetTy->getStructElementType(1) == OpTy && OpTy == Op2->getType() && 655353358Sdim "Unexpected types for x86 addcarry"); 656309124Sdim 657353358Sdim // If carry-in is zero, this is just an unsigned add with overflow. 658353358Sdim if (match(CarryIn, m_ZeroInt())) { 659353358Sdim Value *UAdd = Builder.CreateIntrinsic(Intrinsic::uadd_with_overflow, OpTy, 660353358Sdim { Op1, Op2 }); 661353358Sdim // The types have to be adjusted to match the x86 call types. 662353358Sdim Value *UAddResult = Builder.CreateExtractValue(UAdd, 0); 663353358Sdim Value *UAddOV = Builder.CreateZExt(Builder.CreateExtractValue(UAdd, 1), 664353358Sdim Builder.getInt8Ty()); 665353358Sdim Value *Res = UndefValue::get(RetTy); 666353358Sdim Res = Builder.CreateInsertValue(Res, UAddOV, 0); 667353358Sdim return Builder.CreateInsertValue(Res, UAddResult, 1); 668309124Sdim } 669309124Sdim 670344779Sdim return nullptr; 671309124Sdim} 672309124Sdim 673309124Sdimstatic Value *simplifyX86insertps(const IntrinsicInst &II, 674288943Sdim InstCombiner::BuilderTy &Builder) { 675309124Sdim auto *CInt = dyn_cast<ConstantInt>(II.getArgOperand(2)); 676309124Sdim if (!CInt) 677309124Sdim return nullptr; 678296417Sdim 679309124Sdim VectorType *VecTy = cast<VectorType>(II.getType()); 680309124Sdim assert(VecTy->getNumElements() == 4 && "insertps with wrong vector type"); 681288943Sdim 682309124Sdim // The immediate permute control byte looks like this: 683309124Sdim // [3:0] - zero mask for each 32-bit lane 684309124Sdim // [5:4] - select one 32-bit destination lane 685309124Sdim // [7:6] - select one 32-bit source lane 686288943Sdim 687309124Sdim uint8_t Imm = CInt->getZExtValue(); 688309124Sdim uint8_t ZMask = Imm & 0xf; 689309124Sdim uint8_t DestLane = (Imm >> 4) & 0x3; 690309124Sdim uint8_t SourceLane = (Imm >> 6) & 0x3; 691288943Sdim 692309124Sdim ConstantAggregateZero *ZeroVector = ConstantAggregateZero::get(VecTy); 693288943Sdim 694309124Sdim // If all zero mask bits are set, this was just a weird way to 695309124Sdim // generate a zero vector. 696309124Sdim if (ZMask == 0xf) 697309124Sdim return ZeroVector; 698288943Sdim 699309124Sdim // Initialize by passing all of the first source bits through. 700309124Sdim uint32_t ShuffleMask[4] = { 0, 1, 2, 3 }; 701288943Sdim 702309124Sdim // We may replace the second operand with the zero vector. 703309124Sdim Value *V1 = II.getArgOperand(1); 704309124Sdim 705309124Sdim if (ZMask) { 706309124Sdim // If the zero mask is being used with a single input or the zero mask 707309124Sdim // overrides the destination lane, this is a shuffle with the zero vector. 708309124Sdim if ((II.getArgOperand(0) == II.getArgOperand(1)) || 709309124Sdim (ZMask & (1 << DestLane))) { 710309124Sdim V1 = ZeroVector; 711309124Sdim // We may still move 32-bits of the first source vector from one lane 712309124Sdim // to another. 713309124Sdim ShuffleMask[DestLane] = SourceLane; 714309124Sdim // The zero mask may override the previous insert operation. 715309124Sdim for (unsigned i = 0; i < 4; ++i) 716309124Sdim if ((ZMask >> i) & 0x1) 717309124Sdim ShuffleMask[i] = i + 4; 718288943Sdim } else { 719309124Sdim // TODO: Model this case as 2 shuffles or a 'logical and' plus shuffle? 720309124Sdim return nullptr; 721288943Sdim } 722309124Sdim } else { 723309124Sdim // Replace the selected destination lane with the selected source lane. 724309124Sdim ShuffleMask[DestLane] = SourceLane + 4; 725309124Sdim } 726296417Sdim 727309124Sdim return Builder.CreateShuffleVector(II.getArgOperand(0), V1, ShuffleMask); 728288943Sdim} 729288943Sdim 730296417Sdim/// Attempt to simplify SSE4A EXTRQ/EXTRQI instructions using constant folding 731296417Sdim/// or conversion to a shuffle vector. 732309124Sdimstatic Value *simplifyX86extrq(IntrinsicInst &II, Value *Op0, 733296417Sdim ConstantInt *CILength, ConstantInt *CIIndex, 734296417Sdim InstCombiner::BuilderTy &Builder) { 735296417Sdim auto LowConstantHighUndef = [&](uint64_t Val) { 736296417Sdim Type *IntTy64 = Type::getInt64Ty(II.getContext()); 737296417Sdim Constant *Args[] = {ConstantInt::get(IntTy64, Val), 738296417Sdim UndefValue::get(IntTy64)}; 739296417Sdim return ConstantVector::get(Args); 740296417Sdim }; 741296417Sdim 742296417Sdim // See if we're dealing with constant values. 743296417Sdim Constant *C0 = dyn_cast<Constant>(Op0); 744296417Sdim ConstantInt *CI0 = 745314564Sdim C0 ? dyn_cast_or_null<ConstantInt>(C0->getAggregateElement((unsigned)0)) 746296417Sdim : nullptr; 747296417Sdim 748296417Sdim // Attempt to constant fold. 749296417Sdim if (CILength && CIIndex) { 750296417Sdim // From AMD documentation: "The bit index and field length are each six 751296417Sdim // bits in length other bits of the field are ignored." 752296417Sdim APInt APIndex = CIIndex->getValue().zextOrTrunc(6); 753296417Sdim APInt APLength = CILength->getValue().zextOrTrunc(6); 754296417Sdim 755296417Sdim unsigned Index = APIndex.getZExtValue(); 756296417Sdim 757296417Sdim // From AMD documentation: "a value of zero in the field length is 758296417Sdim // defined as length of 64". 759296417Sdim unsigned Length = APLength == 0 ? 64 : APLength.getZExtValue(); 760296417Sdim 761296417Sdim // From AMD documentation: "If the sum of the bit index + length field 762296417Sdim // is greater than 64, the results are undefined". 763296417Sdim unsigned End = Index + Length; 764296417Sdim 765296417Sdim // Note that both field index and field length are 8-bit quantities. 766296417Sdim // Since variables 'Index' and 'Length' are unsigned values 767296417Sdim // obtained from zero-extending field index and field length 768296417Sdim // respectively, their sum should never wrap around. 769296417Sdim if (End > 64) 770296417Sdim return UndefValue::get(II.getType()); 771296417Sdim 772296417Sdim // If we are inserting whole bytes, we can convert this to a shuffle. 773296417Sdim // Lowering can recognize EXTRQI shuffle masks. 774296417Sdim if ((Length % 8) == 0 && (Index % 8) == 0) { 775296417Sdim // Convert bit indices to byte indices. 776296417Sdim Length /= 8; 777296417Sdim Index /= 8; 778296417Sdim 779296417Sdim Type *IntTy8 = Type::getInt8Ty(II.getContext()); 780296417Sdim Type *IntTy32 = Type::getInt32Ty(II.getContext()); 781296417Sdim VectorType *ShufTy = VectorType::get(IntTy8, 16); 782296417Sdim 783296417Sdim SmallVector<Constant *, 16> ShuffleMask; 784296417Sdim for (int i = 0; i != (int)Length; ++i) 785296417Sdim ShuffleMask.push_back( 786296417Sdim Constant::getIntegerValue(IntTy32, APInt(32, i + Index))); 787296417Sdim for (int i = Length; i != 8; ++i) 788296417Sdim ShuffleMask.push_back( 789296417Sdim Constant::getIntegerValue(IntTy32, APInt(32, i + 16))); 790296417Sdim for (int i = 8; i != 16; ++i) 791296417Sdim ShuffleMask.push_back(UndefValue::get(IntTy32)); 792296417Sdim 793296417Sdim Value *SV = Builder.CreateShuffleVector( 794296417Sdim Builder.CreateBitCast(Op0, ShufTy), 795296417Sdim ConstantAggregateZero::get(ShufTy), ConstantVector::get(ShuffleMask)); 796296417Sdim return Builder.CreateBitCast(SV, II.getType()); 797296417Sdim } 798296417Sdim 799296417Sdim // Constant Fold - shift Index'th bit to lowest position and mask off 800296417Sdim // Length bits. 801296417Sdim if (CI0) { 802296417Sdim APInt Elt = CI0->getValue(); 803321369Sdim Elt.lshrInPlace(Index); 804321369Sdim Elt = Elt.zextOrTrunc(Length); 805296417Sdim return LowConstantHighUndef(Elt.getZExtValue()); 806296417Sdim } 807296417Sdim 808296417Sdim // If we were an EXTRQ call, we'll save registers if we convert to EXTRQI. 809296417Sdim if (II.getIntrinsicID() == Intrinsic::x86_sse4a_extrq) { 810296417Sdim Value *Args[] = {Op0, CILength, CIIndex}; 811296417Sdim Module *M = II.getModule(); 812353358Sdim Function *F = Intrinsic::getDeclaration(M, Intrinsic::x86_sse4a_extrqi); 813296417Sdim return Builder.CreateCall(F, Args); 814296417Sdim } 815296417Sdim } 816296417Sdim 817296417Sdim // Constant Fold - extraction from zero is always {zero, undef}. 818321369Sdim if (CI0 && CI0->isZero()) 819296417Sdim return LowConstantHighUndef(0); 820296417Sdim 821296417Sdim return nullptr; 822296417Sdim} 823296417Sdim 824296417Sdim/// Attempt to simplify SSE4A INSERTQ/INSERTQI instructions using constant 825296417Sdim/// folding or conversion to a shuffle vector. 826309124Sdimstatic Value *simplifyX86insertq(IntrinsicInst &II, Value *Op0, Value *Op1, 827296417Sdim APInt APLength, APInt APIndex, 828296417Sdim InstCombiner::BuilderTy &Builder) { 829296417Sdim // From AMD documentation: "The bit index and field length are each six bits 830296417Sdim // in length other bits of the field are ignored." 831296417Sdim APIndex = APIndex.zextOrTrunc(6); 832296417Sdim APLength = APLength.zextOrTrunc(6); 833296417Sdim 834296417Sdim // Attempt to constant fold. 835296417Sdim unsigned Index = APIndex.getZExtValue(); 836296417Sdim 837296417Sdim // From AMD documentation: "a value of zero in the field length is 838296417Sdim // defined as length of 64". 839296417Sdim unsigned Length = APLength == 0 ? 64 : APLength.getZExtValue(); 840296417Sdim 841296417Sdim // From AMD documentation: "If the sum of the bit index + length field 842296417Sdim // is greater than 64, the results are undefined". 843296417Sdim unsigned End = Index + Length; 844296417Sdim 845296417Sdim // Note that both field index and field length are 8-bit quantities. 846296417Sdim // Since variables 'Index' and 'Length' are unsigned values 847296417Sdim // obtained from zero-extending field index and field length 848296417Sdim // respectively, their sum should never wrap around. 849296417Sdim if (End > 64) 850296417Sdim return UndefValue::get(II.getType()); 851296417Sdim 852296417Sdim // If we are inserting whole bytes, we can convert this to a shuffle. 853296417Sdim // Lowering can recognize INSERTQI shuffle masks. 854296417Sdim if ((Length % 8) == 0 && (Index % 8) == 0) { 855296417Sdim // Convert bit indices to byte indices. 856296417Sdim Length /= 8; 857296417Sdim Index /= 8; 858296417Sdim 859296417Sdim Type *IntTy8 = Type::getInt8Ty(II.getContext()); 860296417Sdim Type *IntTy32 = Type::getInt32Ty(II.getContext()); 861296417Sdim VectorType *ShufTy = VectorType::get(IntTy8, 16); 862296417Sdim 863296417Sdim SmallVector<Constant *, 16> ShuffleMask; 864296417Sdim for (int i = 0; i != (int)Index; ++i) 865296417Sdim ShuffleMask.push_back(Constant::getIntegerValue(IntTy32, APInt(32, i))); 866296417Sdim for (int i = 0; i != (int)Length; ++i) 867296417Sdim ShuffleMask.push_back( 868296417Sdim Constant::getIntegerValue(IntTy32, APInt(32, i + 16))); 869296417Sdim for (int i = Index + Length; i != 8; ++i) 870296417Sdim ShuffleMask.push_back(Constant::getIntegerValue(IntTy32, APInt(32, i))); 871296417Sdim for (int i = 8; i != 16; ++i) 872296417Sdim ShuffleMask.push_back(UndefValue::get(IntTy32)); 873296417Sdim 874296417Sdim Value *SV = Builder.CreateShuffleVector(Builder.CreateBitCast(Op0, ShufTy), 875296417Sdim Builder.CreateBitCast(Op1, ShufTy), 876296417Sdim ConstantVector::get(ShuffleMask)); 877296417Sdim return Builder.CreateBitCast(SV, II.getType()); 878296417Sdim } 879296417Sdim 880296417Sdim // See if we're dealing with constant values. 881296417Sdim Constant *C0 = dyn_cast<Constant>(Op0); 882296417Sdim Constant *C1 = dyn_cast<Constant>(Op1); 883296417Sdim ConstantInt *CI00 = 884314564Sdim C0 ? dyn_cast_or_null<ConstantInt>(C0->getAggregateElement((unsigned)0)) 885296417Sdim : nullptr; 886296417Sdim ConstantInt *CI10 = 887314564Sdim C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((unsigned)0)) 888296417Sdim : nullptr; 889296417Sdim 890296417Sdim // Constant Fold - insert bottom Length bits starting at the Index'th bit. 891296417Sdim if (CI00 && CI10) { 892296417Sdim APInt V00 = CI00->getValue(); 893296417Sdim APInt V10 = CI10->getValue(); 894296417Sdim APInt Mask = APInt::getLowBitsSet(64, Length).shl(Index); 895296417Sdim V00 = V00 & ~Mask; 896296417Sdim V10 = V10.zextOrTrunc(Length).zextOrTrunc(64).shl(Index); 897296417Sdim APInt Val = V00 | V10; 898296417Sdim Type *IntTy64 = Type::getInt64Ty(II.getContext()); 899296417Sdim Constant *Args[] = {ConstantInt::get(IntTy64, Val.getZExtValue()), 900296417Sdim UndefValue::get(IntTy64)}; 901296417Sdim return ConstantVector::get(Args); 902296417Sdim } 903296417Sdim 904296417Sdim // If we were an INSERTQ call, we'll save demanded elements if we convert to 905296417Sdim // INSERTQI. 906296417Sdim if (II.getIntrinsicID() == Intrinsic::x86_sse4a_insertq) { 907296417Sdim Type *IntTy8 = Type::getInt8Ty(II.getContext()); 908296417Sdim Constant *CILength = ConstantInt::get(IntTy8, Length, false); 909296417Sdim Constant *CIIndex = ConstantInt::get(IntTy8, Index, false); 910296417Sdim 911296417Sdim Value *Args[] = {Op0, Op1, CILength, CIIndex}; 912296417Sdim Module *M = II.getModule(); 913353358Sdim Function *F = Intrinsic::getDeclaration(M, Intrinsic::x86_sse4a_insertqi); 914296417Sdim return Builder.CreateCall(F, Args); 915296417Sdim } 916296417Sdim 917296417Sdim return nullptr; 918296417Sdim} 919296417Sdim 920309124Sdim/// Attempt to convert pshufb* to shufflevector if the mask is constant. 921309124Sdimstatic Value *simplifyX86pshufb(const IntrinsicInst &II, 922309124Sdim InstCombiner::BuilderTy &Builder) { 923309124Sdim Constant *V = dyn_cast<Constant>(II.getArgOperand(1)); 924309124Sdim if (!V) 925309124Sdim return nullptr; 926309124Sdim 927309124Sdim auto *VecTy = cast<VectorType>(II.getType()); 928309124Sdim auto *MaskEltTy = Type::getInt32Ty(II.getContext()); 929309124Sdim unsigned NumElts = VecTy->getNumElements(); 930314564Sdim assert((NumElts == 16 || NumElts == 32 || NumElts == 64) && 931309124Sdim "Unexpected number of elements in shuffle mask!"); 932309124Sdim 933309124Sdim // Construct a shuffle mask from constant integers or UNDEFs. 934314564Sdim Constant *Indexes[64] = {nullptr}; 935309124Sdim 936309124Sdim // Each byte in the shuffle control mask forms an index to permute the 937309124Sdim // corresponding byte in the destination operand. 938309124Sdim for (unsigned I = 0; I < NumElts; ++I) { 939309124Sdim Constant *COp = V->getAggregateElement(I); 940309124Sdim if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp))) 941309124Sdim return nullptr; 942309124Sdim 943309124Sdim if (isa<UndefValue>(COp)) { 944309124Sdim Indexes[I] = UndefValue::get(MaskEltTy); 945309124Sdim continue; 946309124Sdim } 947309124Sdim 948309124Sdim int8_t Index = cast<ConstantInt>(COp)->getValue().getZExtValue(); 949309124Sdim 950309124Sdim // If the most significant bit (bit[7]) of each byte of the shuffle 951309124Sdim // control mask is set, then zero is written in the result byte. 952309124Sdim // The zero vector is in the right-hand side of the resulting 953309124Sdim // shufflevector. 954309124Sdim 955309124Sdim // The value of each index for the high 128-bit lane is the least 956309124Sdim // significant 4 bits of the respective shuffle control byte. 957309124Sdim Index = ((Index < 0) ? NumElts : Index & 0x0F) + (I & 0xF0); 958309124Sdim Indexes[I] = ConstantInt::get(MaskEltTy, Index); 959309124Sdim } 960309124Sdim 961309124Sdim auto ShuffleMask = ConstantVector::get(makeArrayRef(Indexes, NumElts)); 962309124Sdim auto V1 = II.getArgOperand(0); 963309124Sdim auto V2 = Constant::getNullValue(VecTy); 964309124Sdim return Builder.CreateShuffleVector(V1, V2, ShuffleMask); 965309124Sdim} 966309124Sdim 967309124Sdim/// Attempt to convert vpermilvar* to shufflevector if the mask is constant. 968309124Sdimstatic Value *simplifyX86vpermilvar(const IntrinsicInst &II, 969309124Sdim InstCombiner::BuilderTy &Builder) { 970309124Sdim Constant *V = dyn_cast<Constant>(II.getArgOperand(1)); 971309124Sdim if (!V) 972309124Sdim return nullptr; 973309124Sdim 974314564Sdim auto *VecTy = cast<VectorType>(II.getType()); 975309124Sdim auto *MaskEltTy = Type::getInt32Ty(II.getContext()); 976314564Sdim unsigned NumElts = VecTy->getVectorNumElements(); 977314564Sdim bool IsPD = VecTy->getScalarType()->isDoubleTy(); 978314564Sdim unsigned NumLaneElts = IsPD ? 2 : 4; 979314564Sdim assert(NumElts == 16 || NumElts == 8 || NumElts == 4 || NumElts == 2); 980309124Sdim 981309124Sdim // Construct a shuffle mask from constant integers or UNDEFs. 982314564Sdim Constant *Indexes[16] = {nullptr}; 983309124Sdim 984309124Sdim // The intrinsics only read one or two bits, clear the rest. 985309124Sdim for (unsigned I = 0; I < NumElts; ++I) { 986309124Sdim Constant *COp = V->getAggregateElement(I); 987309124Sdim if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp))) 988309124Sdim return nullptr; 989309124Sdim 990309124Sdim if (isa<UndefValue>(COp)) { 991309124Sdim Indexes[I] = UndefValue::get(MaskEltTy); 992309124Sdim continue; 993309124Sdim } 994309124Sdim 995309124Sdim APInt Index = cast<ConstantInt>(COp)->getValue(); 996309124Sdim Index = Index.zextOrTrunc(32).getLoBits(2); 997309124Sdim 998309124Sdim // The PD variants uses bit 1 to select per-lane element index, so 999309124Sdim // shift down to convert to generic shuffle mask index. 1000314564Sdim if (IsPD) 1001321369Sdim Index.lshrInPlace(1); 1002309124Sdim 1003309124Sdim // The _256 variants are a bit trickier since the mask bits always index 1004309124Sdim // into the corresponding 128 half. In order to convert to a generic 1005309124Sdim // shuffle, we have to make that explicit. 1006314564Sdim Index += APInt(32, (I / NumLaneElts) * NumLaneElts); 1007309124Sdim 1008309124Sdim Indexes[I] = ConstantInt::get(MaskEltTy, Index); 1009309124Sdim } 1010309124Sdim 1011309124Sdim auto ShuffleMask = ConstantVector::get(makeArrayRef(Indexes, NumElts)); 1012309124Sdim auto V1 = II.getArgOperand(0); 1013309124Sdim auto V2 = UndefValue::get(V1->getType()); 1014309124Sdim return Builder.CreateShuffleVector(V1, V2, ShuffleMask); 1015309124Sdim} 1016309124Sdim 1017309124Sdim/// Attempt to convert vpermd/vpermps to shufflevector if the mask is constant. 1018309124Sdimstatic Value *simplifyX86vpermv(const IntrinsicInst &II, 1019309124Sdim InstCombiner::BuilderTy &Builder) { 1020309124Sdim auto *V = dyn_cast<Constant>(II.getArgOperand(1)); 1021309124Sdim if (!V) 1022309124Sdim return nullptr; 1023309124Sdim 1024309124Sdim auto *VecTy = cast<VectorType>(II.getType()); 1025309124Sdim auto *MaskEltTy = Type::getInt32Ty(II.getContext()); 1026309124Sdim unsigned Size = VecTy->getNumElements(); 1027314564Sdim assert((Size == 4 || Size == 8 || Size == 16 || Size == 32 || Size == 64) && 1028314564Sdim "Unexpected shuffle mask size"); 1029309124Sdim 1030309124Sdim // Construct a shuffle mask from constant integers or UNDEFs. 1031314564Sdim Constant *Indexes[64] = {nullptr}; 1032309124Sdim 1033309124Sdim for (unsigned I = 0; I < Size; ++I) { 1034309124Sdim Constant *COp = V->getAggregateElement(I); 1035309124Sdim if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp))) 1036309124Sdim return nullptr; 1037309124Sdim 1038309124Sdim if (isa<UndefValue>(COp)) { 1039309124Sdim Indexes[I] = UndefValue::get(MaskEltTy); 1040309124Sdim continue; 1041309124Sdim } 1042309124Sdim 1043314564Sdim uint32_t Index = cast<ConstantInt>(COp)->getZExtValue(); 1044314564Sdim Index &= Size - 1; 1045309124Sdim Indexes[I] = ConstantInt::get(MaskEltTy, Index); 1046309124Sdim } 1047309124Sdim 1048309124Sdim auto ShuffleMask = ConstantVector::get(makeArrayRef(Indexes, Size)); 1049309124Sdim auto V1 = II.getArgOperand(0); 1050309124Sdim auto V2 = UndefValue::get(VecTy); 1051309124Sdim return Builder.CreateShuffleVector(V1, V2, ShuffleMask); 1052309124Sdim} 1053309124Sdim 1054353358Sdim// TODO, Obvious Missing Transforms: 1055353358Sdim// * Narrow width by halfs excluding zero/undef lanes 1056353358SdimValue *InstCombiner::simplifyMaskedLoad(IntrinsicInst &II) { 1057353358Sdim Value *LoadPtr = II.getArgOperand(0); 1058353358Sdim unsigned Alignment = cast<ConstantInt>(II.getArgOperand(1))->getZExtValue(); 1059296417Sdim 1060309124Sdim // If the mask is all ones or undefs, this is a plain vector load of the 1st 1061309124Sdim // argument. 1062353358Sdim if (maskIsAllOneOrUndef(II.getArgOperand(2))) 1063353358Sdim return Builder.CreateAlignedLoad(II.getType(), LoadPtr, Alignment, 1064353358Sdim "unmaskedload"); 1065353358Sdim 1066353358Sdim // If we can unconditionally load from this address, replace with a 1067353358Sdim // load/select idiom. TODO: use DT for context sensitive query 1068360784Sdim if (isDereferenceableAndAlignedPointer( 1069360784Sdim LoadPtr, II.getType(), MaybeAlign(Alignment), 1070360784Sdim II.getModule()->getDataLayout(), &II, nullptr)) { 1071353358Sdim Value *LI = Builder.CreateAlignedLoad(II.getType(), LoadPtr, Alignment, 1072353358Sdim "unmaskedload"); 1073353358Sdim return Builder.CreateSelect(II.getArgOperand(2), LI, II.getArgOperand(3)); 1074309124Sdim } 1075309124Sdim 1076309124Sdim return nullptr; 1077309124Sdim} 1078309124Sdim 1079353358Sdim// TODO, Obvious Missing Transforms: 1080353358Sdim// * Single constant active lane -> store 1081353358Sdim// * Narrow width by halfs excluding zero/undef lanes 1082353358SdimInstruction *InstCombiner::simplifyMaskedStore(IntrinsicInst &II) { 1083309124Sdim auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(3)); 1084309124Sdim if (!ConstMask) 1085309124Sdim return nullptr; 1086309124Sdim 1087309124Sdim // If the mask is all zeros, this instruction does nothing. 1088309124Sdim if (ConstMask->isNullValue()) 1089353358Sdim return eraseInstFromFunction(II); 1090309124Sdim 1091309124Sdim // If the mask is all ones, this is a plain vector store of the 1st argument. 1092309124Sdim if (ConstMask->isAllOnesValue()) { 1093309124Sdim Value *StorePtr = II.getArgOperand(1); 1094360784Sdim MaybeAlign Alignment( 1095360784Sdim cast<ConstantInt>(II.getArgOperand(2))->getZExtValue()); 1096309124Sdim return new StoreInst(II.getArgOperand(0), StorePtr, false, Alignment); 1097309124Sdim } 1098309124Sdim 1099353358Sdim // Use masked off lanes to simplify operands via SimplifyDemandedVectorElts 1100353358Sdim APInt DemandedElts = possiblyDemandedEltsInMask(ConstMask); 1101353358Sdim APInt UndefElts(DemandedElts.getBitWidth(), 0); 1102353358Sdim if (Value *V = SimplifyDemandedVectorElts(II.getOperand(0), 1103353358Sdim DemandedElts, UndefElts)) { 1104353358Sdim II.setOperand(0, V); 1105353358Sdim return &II; 1106353358Sdim } 1107353358Sdim 1108309124Sdim return nullptr; 1109309124Sdim} 1110309124Sdim 1111353358Sdim// TODO, Obvious Missing Transforms: 1112353358Sdim// * Single constant active lane load -> load 1113353358Sdim// * Dereferenceable address & few lanes -> scalarize speculative load/selects 1114353358Sdim// * Adjacent vector addresses -> masked.load 1115353358Sdim// * Narrow width by halfs excluding zero/undef lanes 1116353358Sdim// * Vector splat address w/known mask -> scalar load 1117353358Sdim// * Vector incrementing address -> vector masked load 1118353358SdimInstruction *InstCombiner::simplifyMaskedGather(IntrinsicInst &II) { 1119353358Sdim return nullptr; 1120353358Sdim} 1121309124Sdim 1122353358Sdim// TODO, Obvious Missing Transforms: 1123353358Sdim// * Single constant active lane -> store 1124353358Sdim// * Adjacent vector addresses -> masked.store 1125353358Sdim// * Narrow store width by halfs excluding zero/undef lanes 1126353358Sdim// * Vector splat address w/known mask -> scalar store 1127353358Sdim// * Vector incrementing address -> vector masked store 1128353358SdimInstruction *InstCombiner::simplifyMaskedScatter(IntrinsicInst &II) { 1129353358Sdim auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(3)); 1130353358Sdim if (!ConstMask) 1131353358Sdim return nullptr; 1132353358Sdim 1133353358Sdim // If the mask is all zeros, a scatter does nothing. 1134353358Sdim if (ConstMask->isNullValue()) 1135353358Sdim return eraseInstFromFunction(II); 1136353358Sdim 1137353358Sdim // Use masked off lanes to simplify operands via SimplifyDemandedVectorElts 1138353358Sdim APInt DemandedElts = possiblyDemandedEltsInMask(ConstMask); 1139353358Sdim APInt UndefElts(DemandedElts.getBitWidth(), 0); 1140353358Sdim if (Value *V = SimplifyDemandedVectorElts(II.getOperand(0), 1141353358Sdim DemandedElts, UndefElts)) { 1142353358Sdim II.setOperand(0, V); 1143353358Sdim return &II; 1144353358Sdim } 1145353358Sdim if (Value *V = SimplifyDemandedVectorElts(II.getOperand(1), 1146353358Sdim DemandedElts, UndefElts)) { 1147353358Sdim II.setOperand(1, V); 1148353358Sdim return &II; 1149353358Sdim } 1150353358Sdim 1151309124Sdim return nullptr; 1152309124Sdim} 1153309124Sdim 1154341825Sdim/// This function transforms launder.invariant.group and strip.invariant.group 1155341825Sdim/// like: 1156341825Sdim/// launder(launder(%x)) -> launder(%x) (the result is not the argument) 1157341825Sdim/// launder(strip(%x)) -> launder(%x) 1158341825Sdim/// strip(strip(%x)) -> strip(%x) (the result is not the argument) 1159341825Sdim/// strip(launder(%x)) -> strip(%x) 1160341825Sdim/// This is legal because it preserves the most recent information about 1161341825Sdim/// the presence or absence of invariant.group. 1162341825Sdimstatic Instruction *simplifyInvariantGroupIntrinsic(IntrinsicInst &II, 1163341825Sdim InstCombiner &IC) { 1164341825Sdim auto *Arg = II.getArgOperand(0); 1165341825Sdim auto *StrippedArg = Arg->stripPointerCasts(); 1166341825Sdim auto *StrippedInvariantGroupsArg = Arg->stripPointerCastsAndInvariantGroups(); 1167341825Sdim if (StrippedArg == StrippedInvariantGroupsArg) 1168341825Sdim return nullptr; // No launders/strips to remove. 1169341825Sdim 1170341825Sdim Value *Result = nullptr; 1171341825Sdim 1172341825Sdim if (II.getIntrinsicID() == Intrinsic::launder_invariant_group) 1173341825Sdim Result = IC.Builder.CreateLaunderInvariantGroup(StrippedInvariantGroupsArg); 1174341825Sdim else if (II.getIntrinsicID() == Intrinsic::strip_invariant_group) 1175341825Sdim Result = IC.Builder.CreateStripInvariantGroup(StrippedInvariantGroupsArg); 1176341825Sdim else 1177341825Sdim llvm_unreachable( 1178341825Sdim "simplifyInvariantGroupIntrinsic only handles launder and strip"); 1179341825Sdim if (Result->getType()->getPointerAddressSpace() != 1180341825Sdim II.getType()->getPointerAddressSpace()) 1181341825Sdim Result = IC.Builder.CreateAddrSpaceCast(Result, II.getType()); 1182341825Sdim if (Result->getType() != II.getType()) 1183341825Sdim Result = IC.Builder.CreateBitCast(Result, II.getType()); 1184341825Sdim 1185341825Sdim return cast<Instruction>(Result); 1186341825Sdim} 1187341825Sdim 1188314564Sdimstatic Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombiner &IC) { 1189314564Sdim assert((II.getIntrinsicID() == Intrinsic::cttz || 1190314564Sdim II.getIntrinsicID() == Intrinsic::ctlz) && 1191314564Sdim "Expected cttz or ctlz intrinsic"); 1192353358Sdim bool IsTZ = II.getIntrinsicID() == Intrinsic::cttz; 1193314564Sdim Value *Op0 = II.getArgOperand(0); 1194353358Sdim Value *X; 1195353358Sdim // ctlz(bitreverse(x)) -> cttz(x) 1196353358Sdim // cttz(bitreverse(x)) -> ctlz(x) 1197353358Sdim if (match(Op0, m_BitReverse(m_Value(X)))) { 1198353358Sdim Intrinsic::ID ID = IsTZ ? Intrinsic::ctlz : Intrinsic::cttz; 1199353358Sdim Function *F = Intrinsic::getDeclaration(II.getModule(), ID, II.getType()); 1200353358Sdim return CallInst::Create(F, {X, II.getArgOperand(1)}); 1201353358Sdim } 1202314564Sdim 1203353358Sdim if (IsTZ) { 1204353358Sdim // cttz(-x) -> cttz(x) 1205353358Sdim if (match(Op0, m_Neg(m_Value(X)))) { 1206353358Sdim II.setOperand(0, X); 1207353358Sdim return &II; 1208353358Sdim } 1209353358Sdim 1210353358Sdim // cttz(abs(x)) -> cttz(x) 1211353358Sdim // cttz(nabs(x)) -> cttz(x) 1212353358Sdim Value *Y; 1213353358Sdim SelectPatternFlavor SPF = matchSelectPattern(Op0, X, Y).Flavor; 1214353358Sdim if (SPF == SPF_ABS || SPF == SPF_NABS) { 1215353358Sdim II.setOperand(0, X); 1216353358Sdim return &II; 1217353358Sdim } 1218353358Sdim } 1219353358Sdim 1220321369Sdim KnownBits Known = IC.computeKnownBits(Op0, 0, &II); 1221314564Sdim 1222314564Sdim // Create a mask for bits above (ctlz) or below (cttz) the first known one. 1223321369Sdim unsigned PossibleZeros = IsTZ ? Known.countMaxTrailingZeros() 1224321369Sdim : Known.countMaxLeadingZeros(); 1225321369Sdim unsigned DefiniteZeros = IsTZ ? Known.countMinTrailingZeros() 1226321369Sdim : Known.countMinLeadingZeros(); 1227314564Sdim 1228314564Sdim // If all bits above (ctlz) or below (cttz) the first known one are known 1229314564Sdim // zero, this value is constant. 1230314564Sdim // FIXME: This should be in InstSimplify because we're replacing an 1231314564Sdim // instruction with a constant. 1232321369Sdim if (PossibleZeros == DefiniteZeros) { 1233321369Sdim auto *C = ConstantInt::get(Op0->getType(), DefiniteZeros); 1234314564Sdim return IC.replaceInstUsesWith(II, C); 1235314564Sdim } 1236314564Sdim 1237314564Sdim // If the input to cttz/ctlz is known to be non-zero, 1238314564Sdim // then change the 'ZeroIsUndef' parameter to 'true' 1239314564Sdim // because we know the zero behavior can't affect the result. 1240321369Sdim if (!Known.One.isNullValue() || 1241321369Sdim isKnownNonZero(Op0, IC.getDataLayout(), 0, &IC.getAssumptionCache(), &II, 1242321369Sdim &IC.getDominatorTree())) { 1243314564Sdim if (!match(II.getArgOperand(1), m_One())) { 1244321369Sdim II.setOperand(1, IC.Builder.getTrue()); 1245314564Sdim return &II; 1246314564Sdim } 1247314564Sdim } 1248314564Sdim 1249321369Sdim // Add range metadata since known bits can't completely reflect what we know. 1250321369Sdim // TODO: Handle splat vectors. 1251321369Sdim auto *IT = dyn_cast<IntegerType>(Op0->getType()); 1252321369Sdim if (IT && IT->getBitWidth() != 1 && !II.getMetadata(LLVMContext::MD_range)) { 1253321369Sdim Metadata *LowAndHigh[] = { 1254321369Sdim ConstantAsMetadata::get(ConstantInt::get(IT, DefiniteZeros)), 1255321369Sdim ConstantAsMetadata::get(ConstantInt::get(IT, PossibleZeros + 1))}; 1256321369Sdim II.setMetadata(LLVMContext::MD_range, 1257321369Sdim MDNode::get(II.getContext(), LowAndHigh)); 1258321369Sdim return &II; 1259321369Sdim } 1260321369Sdim 1261314564Sdim return nullptr; 1262314564Sdim} 1263314564Sdim 1264321369Sdimstatic Instruction *foldCtpop(IntrinsicInst &II, InstCombiner &IC) { 1265321369Sdim assert(II.getIntrinsicID() == Intrinsic::ctpop && 1266321369Sdim "Expected ctpop intrinsic"); 1267321369Sdim Value *Op0 = II.getArgOperand(0); 1268353358Sdim Value *X; 1269353358Sdim // ctpop(bitreverse(x)) -> ctpop(x) 1270353358Sdim // ctpop(bswap(x)) -> ctpop(x) 1271353358Sdim if (match(Op0, m_BitReverse(m_Value(X))) || match(Op0, m_BSwap(m_Value(X)))) { 1272353358Sdim II.setOperand(0, X); 1273353358Sdim return &II; 1274353358Sdim } 1275353358Sdim 1276321369Sdim // FIXME: Try to simplify vectors of integers. 1277321369Sdim auto *IT = dyn_cast<IntegerType>(Op0->getType()); 1278321369Sdim if (!IT) 1279321369Sdim return nullptr; 1280321369Sdim 1281321369Sdim unsigned BitWidth = IT->getBitWidth(); 1282321369Sdim KnownBits Known(BitWidth); 1283321369Sdim IC.computeKnownBits(Op0, Known, 0, &II); 1284321369Sdim 1285321369Sdim unsigned MinCount = Known.countMinPopulation(); 1286321369Sdim unsigned MaxCount = Known.countMaxPopulation(); 1287321369Sdim 1288321369Sdim // Add range metadata since known bits can't completely reflect what we know. 1289321369Sdim if (IT->getBitWidth() != 1 && !II.getMetadata(LLVMContext::MD_range)) { 1290321369Sdim Metadata *LowAndHigh[] = { 1291321369Sdim ConstantAsMetadata::get(ConstantInt::get(IT, MinCount)), 1292321369Sdim ConstantAsMetadata::get(ConstantInt::get(IT, MaxCount + 1))}; 1293321369Sdim II.setMetadata(LLVMContext::MD_range, 1294321369Sdim MDNode::get(II.getContext(), LowAndHigh)); 1295321369Sdim return &II; 1296321369Sdim } 1297321369Sdim 1298321369Sdim return nullptr; 1299321369Sdim} 1300321369Sdim 1301309124Sdim// TODO: If the x86 backend knew how to convert a bool vector mask back to an 1302309124Sdim// XMM register mask efficiently, we could transform all x86 masked intrinsics 1303309124Sdim// to LLVM masked intrinsics and remove the x86 masked intrinsic defs. 1304309124Sdimstatic Instruction *simplifyX86MaskedLoad(IntrinsicInst &II, InstCombiner &IC) { 1305309124Sdim Value *Ptr = II.getOperand(0); 1306309124Sdim Value *Mask = II.getOperand(1); 1307309124Sdim Constant *ZeroVec = Constant::getNullValue(II.getType()); 1308309124Sdim 1309309124Sdim // Special case a zero mask since that's not a ConstantDataVector. 1310309124Sdim // This masked load instruction creates a zero vector. 1311309124Sdim if (isa<ConstantAggregateZero>(Mask)) 1312309124Sdim return IC.replaceInstUsesWith(II, ZeroVec); 1313309124Sdim 1314309124Sdim auto *ConstMask = dyn_cast<ConstantDataVector>(Mask); 1315309124Sdim if (!ConstMask) 1316309124Sdim return nullptr; 1317309124Sdim 1318309124Sdim // The mask is constant. Convert this x86 intrinsic to the LLVM instrinsic 1319309124Sdim // to allow target-independent optimizations. 1320309124Sdim 1321309124Sdim // First, cast the x86 intrinsic scalar pointer to a vector pointer to match 1322309124Sdim // the LLVM intrinsic definition for the pointer argument. 1323309124Sdim unsigned AddrSpace = cast<PointerType>(Ptr->getType())->getAddressSpace(); 1324309124Sdim PointerType *VecPtrTy = PointerType::get(II.getType(), AddrSpace); 1325321369Sdim Value *PtrCast = IC.Builder.CreateBitCast(Ptr, VecPtrTy, "castvec"); 1326309124Sdim 1327309124Sdim // Second, convert the x86 XMM integer vector mask to a vector of bools based 1328309124Sdim // on each element's most significant bit (the sign bit). 1329309124Sdim Constant *BoolMask = getNegativeIsTrueBoolVec(ConstMask); 1330309124Sdim 1331309124Sdim // The pass-through vector for an x86 masked load is a zero vector. 1332309124Sdim CallInst *NewMaskedLoad = 1333321369Sdim IC.Builder.CreateMaskedLoad(PtrCast, 1, BoolMask, ZeroVec); 1334309124Sdim return IC.replaceInstUsesWith(II, NewMaskedLoad); 1335309124Sdim} 1336309124Sdim 1337309124Sdim// TODO: If the x86 backend knew how to convert a bool vector mask back to an 1338309124Sdim// XMM register mask efficiently, we could transform all x86 masked intrinsics 1339309124Sdim// to LLVM masked intrinsics and remove the x86 masked intrinsic defs. 1340309124Sdimstatic bool simplifyX86MaskedStore(IntrinsicInst &II, InstCombiner &IC) { 1341309124Sdim Value *Ptr = II.getOperand(0); 1342309124Sdim Value *Mask = II.getOperand(1); 1343309124Sdim Value *Vec = II.getOperand(2); 1344309124Sdim 1345309124Sdim // Special case a zero mask since that's not a ConstantDataVector: 1346309124Sdim // this masked store instruction does nothing. 1347309124Sdim if (isa<ConstantAggregateZero>(Mask)) { 1348309124Sdim IC.eraseInstFromFunction(II); 1349309124Sdim return true; 1350309124Sdim } 1351309124Sdim 1352309124Sdim // The SSE2 version is too weird (eg, unaligned but non-temporal) to do 1353309124Sdim // anything else at this level. 1354309124Sdim if (II.getIntrinsicID() == Intrinsic::x86_sse2_maskmov_dqu) 1355309124Sdim return false; 1356309124Sdim 1357309124Sdim auto *ConstMask = dyn_cast<ConstantDataVector>(Mask); 1358309124Sdim if (!ConstMask) 1359309124Sdim return false; 1360309124Sdim 1361309124Sdim // The mask is constant. Convert this x86 intrinsic to the LLVM instrinsic 1362309124Sdim // to allow target-independent optimizations. 1363309124Sdim 1364309124Sdim // First, cast the x86 intrinsic scalar pointer to a vector pointer to match 1365309124Sdim // the LLVM intrinsic definition for the pointer argument. 1366309124Sdim unsigned AddrSpace = cast<PointerType>(Ptr->getType())->getAddressSpace(); 1367309124Sdim PointerType *VecPtrTy = PointerType::get(Vec->getType(), AddrSpace); 1368321369Sdim Value *PtrCast = IC.Builder.CreateBitCast(Ptr, VecPtrTy, "castvec"); 1369309124Sdim 1370309124Sdim // Second, convert the x86 XMM integer vector mask to a vector of bools based 1371309124Sdim // on each element's most significant bit (the sign bit). 1372309124Sdim Constant *BoolMask = getNegativeIsTrueBoolVec(ConstMask); 1373309124Sdim 1374321369Sdim IC.Builder.CreateMaskedStore(Vec, PtrCast, 1, BoolMask); 1375309124Sdim 1376309124Sdim // 'Replace uses' doesn't work for stores. Erase the original masked store. 1377309124Sdim IC.eraseInstFromFunction(II); 1378309124Sdim return true; 1379309124Sdim} 1380309124Sdim 1381321369Sdim// Constant fold llvm.amdgcn.fmed3 intrinsics for standard inputs. 1382321369Sdim// 1383321369Sdim// A single NaN input is folded to minnum, so we rely on that folding for 1384321369Sdim// handling NaNs. 1385321369Sdimstatic APFloat fmed3AMDGCN(const APFloat &Src0, const APFloat &Src1, 1386321369Sdim const APFloat &Src2) { 1387321369Sdim APFloat Max3 = maxnum(maxnum(Src0, Src1), Src2); 1388321369Sdim 1389321369Sdim APFloat::cmpResult Cmp0 = Max3.compare(Src0); 1390321369Sdim assert(Cmp0 != APFloat::cmpUnordered && "nans handled separately"); 1391321369Sdim if (Cmp0 == APFloat::cmpEqual) 1392321369Sdim return maxnum(Src1, Src2); 1393321369Sdim 1394321369Sdim APFloat::cmpResult Cmp1 = Max3.compare(Src1); 1395321369Sdim assert(Cmp1 != APFloat::cmpUnordered && "nans handled separately"); 1396321369Sdim if (Cmp1 == APFloat::cmpEqual) 1397321369Sdim return maxnum(Src0, Src2); 1398321369Sdim 1399321369Sdim return maxnum(Src0, Src1); 1400321369Sdim} 1401321369Sdim 1402341825Sdim/// Convert a table lookup to shufflevector if the mask is constant. 1403341825Sdim/// This could benefit tbl1 if the mask is { 7,6,5,4,3,2,1,0 }, in 1404341825Sdim/// which case we could lower the shufflevector with rev64 instructions 1405341825Sdim/// as it's actually a byte reverse. 1406341825Sdimstatic Value *simplifyNeonTbl1(const IntrinsicInst &II, 1407341825Sdim InstCombiner::BuilderTy &Builder) { 1408341825Sdim // Bail out if the mask is not a constant. 1409341825Sdim auto *C = dyn_cast<Constant>(II.getArgOperand(1)); 1410341825Sdim if (!C) 1411341825Sdim return nullptr; 1412341825Sdim 1413341825Sdim auto *VecTy = cast<VectorType>(II.getType()); 1414341825Sdim unsigned NumElts = VecTy->getNumElements(); 1415341825Sdim 1416341825Sdim // Only perform this transformation for <8 x i8> vector types. 1417341825Sdim if (!VecTy->getElementType()->isIntegerTy(8) || NumElts != 8) 1418341825Sdim return nullptr; 1419341825Sdim 1420341825Sdim uint32_t Indexes[8]; 1421341825Sdim 1422341825Sdim for (unsigned I = 0; I < NumElts; ++I) { 1423341825Sdim Constant *COp = C->getAggregateElement(I); 1424341825Sdim 1425341825Sdim if (!COp || !isa<ConstantInt>(COp)) 1426341825Sdim return nullptr; 1427341825Sdim 1428341825Sdim Indexes[I] = cast<ConstantInt>(COp)->getLimitedValue(); 1429341825Sdim 1430341825Sdim // Make sure the mask indices are in range. 1431341825Sdim if (Indexes[I] >= NumElts) 1432341825Sdim return nullptr; 1433341825Sdim } 1434341825Sdim 1435341825Sdim auto *ShuffleMask = ConstantDataVector::get(II.getContext(), 1436341825Sdim makeArrayRef(Indexes)); 1437341825Sdim auto *V1 = II.getArgOperand(0); 1438341825Sdim auto *V2 = Constant::getNullValue(V1->getType()); 1439341825Sdim return Builder.CreateShuffleVector(V1, V2, ShuffleMask); 1440341825Sdim} 1441341825Sdim 1442341825Sdim/// Convert a vector load intrinsic into a simple llvm load instruction. 1443341825Sdim/// This is beneficial when the underlying object being addressed comes 1444341825Sdim/// from a constant, since we get constant-folding for free. 1445341825Sdimstatic Value *simplifyNeonVld1(const IntrinsicInst &II, 1446341825Sdim unsigned MemAlign, 1447341825Sdim InstCombiner::BuilderTy &Builder) { 1448341825Sdim auto *IntrAlign = dyn_cast<ConstantInt>(II.getArgOperand(1)); 1449341825Sdim 1450341825Sdim if (!IntrAlign) 1451341825Sdim return nullptr; 1452341825Sdim 1453341825Sdim unsigned Alignment = IntrAlign->getLimitedValue() < MemAlign ? 1454341825Sdim MemAlign : IntrAlign->getLimitedValue(); 1455341825Sdim 1456341825Sdim if (!isPowerOf2_32(Alignment)) 1457341825Sdim return nullptr; 1458341825Sdim 1459341825Sdim auto *BCastInst = Builder.CreateBitCast(II.getArgOperand(0), 1460341825Sdim PointerType::get(II.getType(), 0)); 1461353358Sdim return Builder.CreateAlignedLoad(II.getType(), BCastInst, Alignment); 1462341825Sdim} 1463341825Sdim 1464309124Sdim// Returns true iff the 2 intrinsics have the same operands, limiting the 1465309124Sdim// comparison to the first NumOperands. 1466309124Sdimstatic bool haveSameOperands(const IntrinsicInst &I, const IntrinsicInst &E, 1467309124Sdim unsigned NumOperands) { 1468309124Sdim assert(I.getNumArgOperands() >= NumOperands && "Not enough operands"); 1469309124Sdim assert(E.getNumArgOperands() >= NumOperands && "Not enough operands"); 1470309124Sdim for (unsigned i = 0; i < NumOperands; i++) 1471309124Sdim if (I.getArgOperand(i) != E.getArgOperand(i)) 1472309124Sdim return false; 1473309124Sdim return true; 1474309124Sdim} 1475309124Sdim 1476309124Sdim// Remove trivially empty start/end intrinsic ranges, i.e. a start 1477309124Sdim// immediately followed by an end (ignoring debuginfo or other 1478309124Sdim// start/end intrinsics in between). As this handles only the most trivial 1479309124Sdim// cases, tracking the nesting level is not needed: 1480309124Sdim// 1481309124Sdim// call @llvm.foo.start(i1 0) ; &I 1482309124Sdim// call @llvm.foo.start(i1 0) 1483309124Sdim// call @llvm.foo.end(i1 0) ; This one will not be skipped: it will be removed 1484309124Sdim// call @llvm.foo.end(i1 0) 1485309124Sdimstatic bool removeTriviallyEmptyRange(IntrinsicInst &I, unsigned StartID, 1486309124Sdim unsigned EndID, InstCombiner &IC) { 1487309124Sdim assert(I.getIntrinsicID() == StartID && 1488309124Sdim "Start intrinsic does not have expected ID"); 1489309124Sdim BasicBlock::iterator BI(I), BE(I.getParent()->end()); 1490309124Sdim for (++BI; BI != BE; ++BI) { 1491309124Sdim if (auto *E = dyn_cast<IntrinsicInst>(BI)) { 1492309124Sdim if (isa<DbgInfoIntrinsic>(E) || E->getIntrinsicID() == StartID) 1493309124Sdim continue; 1494309124Sdim if (E->getIntrinsicID() == EndID && 1495309124Sdim haveSameOperands(I, *E, E->getNumArgOperands())) { 1496309124Sdim IC.eraseInstFromFunction(*E); 1497309124Sdim IC.eraseInstFromFunction(I); 1498309124Sdim return true; 1499309124Sdim } 1500309124Sdim } 1501309124Sdim break; 1502309124Sdim } 1503309124Sdim 1504309124Sdim return false; 1505309124Sdim} 1506309124Sdim 1507321369Sdim// Convert NVVM intrinsics to target-generic LLVM code where possible. 1508321369Sdimstatic Instruction *SimplifyNVVMIntrinsic(IntrinsicInst *II, InstCombiner &IC) { 1509321369Sdim // Each NVVM intrinsic we can simplify can be replaced with one of: 1510321369Sdim // 1511321369Sdim // * an LLVM intrinsic, 1512321369Sdim // * an LLVM cast operation, 1513321369Sdim // * an LLVM binary operation, or 1514321369Sdim // * ad-hoc LLVM IR for the particular operation. 1515321369Sdim 1516321369Sdim // Some transformations are only valid when the module's 1517321369Sdim // flush-denormals-to-zero (ftz) setting is true/false, whereas other 1518321369Sdim // transformations are valid regardless of the module's ftz setting. 1519321369Sdim enum FtzRequirementTy { 1520321369Sdim FTZ_Any, // Any ftz setting is ok. 1521321369Sdim FTZ_MustBeOn, // Transformation is valid only if ftz is on. 1522321369Sdim FTZ_MustBeOff, // Transformation is valid only if ftz is off. 1523321369Sdim }; 1524321369Sdim // Classes of NVVM intrinsics that can't be replaced one-to-one with a 1525321369Sdim // target-generic intrinsic, cast op, or binary op but that we can nonetheless 1526321369Sdim // simplify. 1527321369Sdim enum SpecialCase { 1528321369Sdim SPC_Reciprocal, 1529321369Sdim }; 1530321369Sdim 1531321369Sdim // SimplifyAction is a poor-man's variant (plus an additional flag) that 1532321369Sdim // represents how to replace an NVVM intrinsic with target-generic LLVM IR. 1533321369Sdim struct SimplifyAction { 1534321369Sdim // Invariant: At most one of these Optionals has a value. 1535321369Sdim Optional<Intrinsic::ID> IID; 1536321369Sdim Optional<Instruction::CastOps> CastOp; 1537321369Sdim Optional<Instruction::BinaryOps> BinaryOp; 1538321369Sdim Optional<SpecialCase> Special; 1539321369Sdim 1540321369Sdim FtzRequirementTy FtzRequirement = FTZ_Any; 1541321369Sdim 1542321369Sdim SimplifyAction() = default; 1543321369Sdim 1544321369Sdim SimplifyAction(Intrinsic::ID IID, FtzRequirementTy FtzReq) 1545321369Sdim : IID(IID), FtzRequirement(FtzReq) {} 1546321369Sdim 1547321369Sdim // Cast operations don't have anything to do with FTZ, so we skip that 1548321369Sdim // argument. 1549321369Sdim SimplifyAction(Instruction::CastOps CastOp) : CastOp(CastOp) {} 1550321369Sdim 1551321369Sdim SimplifyAction(Instruction::BinaryOps BinaryOp, FtzRequirementTy FtzReq) 1552321369Sdim : BinaryOp(BinaryOp), FtzRequirement(FtzReq) {} 1553321369Sdim 1554321369Sdim SimplifyAction(SpecialCase Special, FtzRequirementTy FtzReq) 1555321369Sdim : Special(Special), FtzRequirement(FtzReq) {} 1556321369Sdim }; 1557321369Sdim 1558321369Sdim // Try to generate a SimplifyAction describing how to replace our 1559321369Sdim // IntrinsicInstr with target-generic LLVM IR. 1560321369Sdim const SimplifyAction Action = [II]() -> SimplifyAction { 1561321369Sdim switch (II->getIntrinsicID()) { 1562321369Sdim // NVVM intrinsics that map directly to LLVM intrinsics. 1563321369Sdim case Intrinsic::nvvm_ceil_d: 1564321369Sdim return {Intrinsic::ceil, FTZ_Any}; 1565321369Sdim case Intrinsic::nvvm_ceil_f: 1566321369Sdim return {Intrinsic::ceil, FTZ_MustBeOff}; 1567321369Sdim case Intrinsic::nvvm_ceil_ftz_f: 1568321369Sdim return {Intrinsic::ceil, FTZ_MustBeOn}; 1569321369Sdim case Intrinsic::nvvm_fabs_d: 1570321369Sdim return {Intrinsic::fabs, FTZ_Any}; 1571321369Sdim case Intrinsic::nvvm_fabs_f: 1572321369Sdim return {Intrinsic::fabs, FTZ_MustBeOff}; 1573321369Sdim case Intrinsic::nvvm_fabs_ftz_f: 1574321369Sdim return {Intrinsic::fabs, FTZ_MustBeOn}; 1575321369Sdim case Intrinsic::nvvm_floor_d: 1576321369Sdim return {Intrinsic::floor, FTZ_Any}; 1577321369Sdim case Intrinsic::nvvm_floor_f: 1578321369Sdim return {Intrinsic::floor, FTZ_MustBeOff}; 1579321369Sdim case Intrinsic::nvvm_floor_ftz_f: 1580321369Sdim return {Intrinsic::floor, FTZ_MustBeOn}; 1581321369Sdim case Intrinsic::nvvm_fma_rn_d: 1582321369Sdim return {Intrinsic::fma, FTZ_Any}; 1583321369Sdim case Intrinsic::nvvm_fma_rn_f: 1584321369Sdim return {Intrinsic::fma, FTZ_MustBeOff}; 1585321369Sdim case Intrinsic::nvvm_fma_rn_ftz_f: 1586321369Sdim return {Intrinsic::fma, FTZ_MustBeOn}; 1587321369Sdim case Intrinsic::nvvm_fmax_d: 1588321369Sdim return {Intrinsic::maxnum, FTZ_Any}; 1589321369Sdim case Intrinsic::nvvm_fmax_f: 1590321369Sdim return {Intrinsic::maxnum, FTZ_MustBeOff}; 1591321369Sdim case Intrinsic::nvvm_fmax_ftz_f: 1592321369Sdim return {Intrinsic::maxnum, FTZ_MustBeOn}; 1593321369Sdim case Intrinsic::nvvm_fmin_d: 1594321369Sdim return {Intrinsic::minnum, FTZ_Any}; 1595321369Sdim case Intrinsic::nvvm_fmin_f: 1596321369Sdim return {Intrinsic::minnum, FTZ_MustBeOff}; 1597321369Sdim case Intrinsic::nvvm_fmin_ftz_f: 1598321369Sdim return {Intrinsic::minnum, FTZ_MustBeOn}; 1599321369Sdim case Intrinsic::nvvm_round_d: 1600321369Sdim return {Intrinsic::round, FTZ_Any}; 1601321369Sdim case Intrinsic::nvvm_round_f: 1602321369Sdim return {Intrinsic::round, FTZ_MustBeOff}; 1603321369Sdim case Intrinsic::nvvm_round_ftz_f: 1604321369Sdim return {Intrinsic::round, FTZ_MustBeOn}; 1605321369Sdim case Intrinsic::nvvm_sqrt_rn_d: 1606321369Sdim return {Intrinsic::sqrt, FTZ_Any}; 1607321369Sdim case Intrinsic::nvvm_sqrt_f: 1608321369Sdim // nvvm_sqrt_f is a special case. For most intrinsics, foo_ftz_f is the 1609321369Sdim // ftz version, and foo_f is the non-ftz version. But nvvm_sqrt_f adopts 1610321369Sdim // the ftz-ness of the surrounding code. sqrt_rn_f and sqrt_rn_ftz_f are 1611321369Sdim // the versions with explicit ftz-ness. 1612321369Sdim return {Intrinsic::sqrt, FTZ_Any}; 1613321369Sdim case Intrinsic::nvvm_sqrt_rn_f: 1614321369Sdim return {Intrinsic::sqrt, FTZ_MustBeOff}; 1615321369Sdim case Intrinsic::nvvm_sqrt_rn_ftz_f: 1616321369Sdim return {Intrinsic::sqrt, FTZ_MustBeOn}; 1617321369Sdim case Intrinsic::nvvm_trunc_d: 1618321369Sdim return {Intrinsic::trunc, FTZ_Any}; 1619321369Sdim case Intrinsic::nvvm_trunc_f: 1620321369Sdim return {Intrinsic::trunc, FTZ_MustBeOff}; 1621321369Sdim case Intrinsic::nvvm_trunc_ftz_f: 1622321369Sdim return {Intrinsic::trunc, FTZ_MustBeOn}; 1623321369Sdim 1624321369Sdim // NVVM intrinsics that map to LLVM cast operations. 1625321369Sdim // 1626321369Sdim // Note that llvm's target-generic conversion operators correspond to the rz 1627321369Sdim // (round to zero) versions of the nvvm conversion intrinsics, even though 1628321369Sdim // most everything else here uses the rn (round to nearest even) nvvm ops. 1629321369Sdim case Intrinsic::nvvm_d2i_rz: 1630321369Sdim case Intrinsic::nvvm_f2i_rz: 1631321369Sdim case Intrinsic::nvvm_d2ll_rz: 1632321369Sdim case Intrinsic::nvvm_f2ll_rz: 1633321369Sdim return {Instruction::FPToSI}; 1634321369Sdim case Intrinsic::nvvm_d2ui_rz: 1635321369Sdim case Intrinsic::nvvm_f2ui_rz: 1636321369Sdim case Intrinsic::nvvm_d2ull_rz: 1637321369Sdim case Intrinsic::nvvm_f2ull_rz: 1638321369Sdim return {Instruction::FPToUI}; 1639321369Sdim case Intrinsic::nvvm_i2d_rz: 1640321369Sdim case Intrinsic::nvvm_i2f_rz: 1641321369Sdim case Intrinsic::nvvm_ll2d_rz: 1642321369Sdim case Intrinsic::nvvm_ll2f_rz: 1643321369Sdim return {Instruction::SIToFP}; 1644321369Sdim case Intrinsic::nvvm_ui2d_rz: 1645321369Sdim case Intrinsic::nvvm_ui2f_rz: 1646321369Sdim case Intrinsic::nvvm_ull2d_rz: 1647321369Sdim case Intrinsic::nvvm_ull2f_rz: 1648321369Sdim return {Instruction::UIToFP}; 1649321369Sdim 1650321369Sdim // NVVM intrinsics that map to LLVM binary ops. 1651321369Sdim case Intrinsic::nvvm_add_rn_d: 1652321369Sdim return {Instruction::FAdd, FTZ_Any}; 1653321369Sdim case Intrinsic::nvvm_add_rn_f: 1654321369Sdim return {Instruction::FAdd, FTZ_MustBeOff}; 1655321369Sdim case Intrinsic::nvvm_add_rn_ftz_f: 1656321369Sdim return {Instruction::FAdd, FTZ_MustBeOn}; 1657321369Sdim case Intrinsic::nvvm_mul_rn_d: 1658321369Sdim return {Instruction::FMul, FTZ_Any}; 1659321369Sdim case Intrinsic::nvvm_mul_rn_f: 1660321369Sdim return {Instruction::FMul, FTZ_MustBeOff}; 1661321369Sdim case Intrinsic::nvvm_mul_rn_ftz_f: 1662321369Sdim return {Instruction::FMul, FTZ_MustBeOn}; 1663321369Sdim case Intrinsic::nvvm_div_rn_d: 1664321369Sdim return {Instruction::FDiv, FTZ_Any}; 1665321369Sdim case Intrinsic::nvvm_div_rn_f: 1666321369Sdim return {Instruction::FDiv, FTZ_MustBeOff}; 1667321369Sdim case Intrinsic::nvvm_div_rn_ftz_f: 1668321369Sdim return {Instruction::FDiv, FTZ_MustBeOn}; 1669321369Sdim 1670321369Sdim // The remainder of cases are NVVM intrinsics that map to LLVM idioms, but 1671321369Sdim // need special handling. 1672321369Sdim // 1673321369Sdim // We seem to be missing intrinsics for rcp.approx.{ftz.}f32, which is just 1674321369Sdim // as well. 1675321369Sdim case Intrinsic::nvvm_rcp_rn_d: 1676321369Sdim return {SPC_Reciprocal, FTZ_Any}; 1677321369Sdim case Intrinsic::nvvm_rcp_rn_f: 1678321369Sdim return {SPC_Reciprocal, FTZ_MustBeOff}; 1679321369Sdim case Intrinsic::nvvm_rcp_rn_ftz_f: 1680321369Sdim return {SPC_Reciprocal, FTZ_MustBeOn}; 1681321369Sdim 1682321369Sdim // We do not currently simplify intrinsics that give an approximate answer. 1683321369Sdim // These include: 1684321369Sdim // 1685321369Sdim // - nvvm_cos_approx_{f,ftz_f} 1686321369Sdim // - nvvm_ex2_approx_{d,f,ftz_f} 1687321369Sdim // - nvvm_lg2_approx_{d,f,ftz_f} 1688321369Sdim // - nvvm_sin_approx_{f,ftz_f} 1689321369Sdim // - nvvm_sqrt_approx_{f,ftz_f} 1690321369Sdim // - nvvm_rsqrt_approx_{d,f,ftz_f} 1691321369Sdim // - nvvm_div_approx_{ftz_d,ftz_f,f} 1692321369Sdim // - nvvm_rcp_approx_ftz_d 1693321369Sdim // 1694321369Sdim // Ideally we'd encode them as e.g. "fast call @llvm.cos", where "fast" 1695321369Sdim // means that fastmath is enabled in the intrinsic. Unfortunately only 1696321369Sdim // binary operators (currently) have a fastmath bit in SelectionDAG, so this 1697321369Sdim // information gets lost and we can't select on it. 1698321369Sdim // 1699321369Sdim // TODO: div and rcp are lowered to a binary op, so these we could in theory 1700321369Sdim // lower them to "fast fdiv". 1701321369Sdim 1702321369Sdim default: 1703321369Sdim return {}; 1704321369Sdim } 1705321369Sdim }(); 1706321369Sdim 1707321369Sdim // If Action.FtzRequirementTy is not satisfied by the module's ftz state, we 1708321369Sdim // can bail out now. (Notice that in the case that IID is not an NVVM 1709321369Sdim // intrinsic, we don't have to look up any module metadata, as 1710321369Sdim // FtzRequirementTy will be FTZ_Any.) 1711321369Sdim if (Action.FtzRequirement != FTZ_Any) { 1712321369Sdim bool FtzEnabled = 1713321369Sdim II->getFunction()->getFnAttribute("nvptx-f32ftz").getValueAsString() == 1714321369Sdim "true"; 1715321369Sdim 1716321369Sdim if (FtzEnabled != (Action.FtzRequirement == FTZ_MustBeOn)) 1717321369Sdim return nullptr; 1718321369Sdim } 1719321369Sdim 1720321369Sdim // Simplify to target-generic intrinsic. 1721321369Sdim if (Action.IID) { 1722321369Sdim SmallVector<Value *, 4> Args(II->arg_operands()); 1723321369Sdim // All the target-generic intrinsics currently of interest to us have one 1724321369Sdim // type argument, equal to that of the nvvm intrinsic's argument. 1725321369Sdim Type *Tys[] = {II->getArgOperand(0)->getType()}; 1726321369Sdim return CallInst::Create( 1727321369Sdim Intrinsic::getDeclaration(II->getModule(), *Action.IID, Tys), Args); 1728321369Sdim } 1729321369Sdim 1730321369Sdim // Simplify to target-generic binary op. 1731321369Sdim if (Action.BinaryOp) 1732321369Sdim return BinaryOperator::Create(*Action.BinaryOp, II->getArgOperand(0), 1733321369Sdim II->getArgOperand(1), II->getName()); 1734321369Sdim 1735321369Sdim // Simplify to target-generic cast op. 1736321369Sdim if (Action.CastOp) 1737321369Sdim return CastInst::Create(*Action.CastOp, II->getArgOperand(0), II->getType(), 1738321369Sdim II->getName()); 1739321369Sdim 1740321369Sdim // All that's left are the special cases. 1741321369Sdim if (!Action.Special) 1742321369Sdim return nullptr; 1743321369Sdim 1744321369Sdim switch (*Action.Special) { 1745321369Sdim case SPC_Reciprocal: 1746321369Sdim // Simplify reciprocal. 1747321369Sdim return BinaryOperator::Create( 1748321369Sdim Instruction::FDiv, ConstantFP::get(II->getArgOperand(0)->getType(), 1), 1749321369Sdim II->getArgOperand(0), II->getName()); 1750321369Sdim } 1751321369Sdim llvm_unreachable("All SpecialCase enumerators should be handled in switch."); 1752321369Sdim} 1753321369Sdim 1754309124SdimInstruction *InstCombiner::visitVAStartInst(VAStartInst &I) { 1755309124Sdim removeTriviallyEmptyRange(I, Intrinsic::vastart, Intrinsic::vaend, *this); 1756309124Sdim return nullptr; 1757309124Sdim} 1758309124Sdim 1759309124SdimInstruction *InstCombiner::visitVACopyInst(VACopyInst &I) { 1760309124Sdim removeTriviallyEmptyRange(I, Intrinsic::vacopy, Intrinsic::vaend, *this); 1761309124Sdim return nullptr; 1762309124Sdim} 1763309124Sdim 1764344779Sdimstatic Instruction *canonicalizeConstantArg0ToArg1(CallInst &Call) { 1765344779Sdim assert(Call.getNumArgOperands() > 1 && "Need at least 2 args to swap"); 1766344779Sdim Value *Arg0 = Call.getArgOperand(0), *Arg1 = Call.getArgOperand(1); 1767344779Sdim if (isa<Constant>(Arg0) && !isa<Constant>(Arg1)) { 1768344779Sdim Call.setArgOperand(0, Arg1); 1769344779Sdim Call.setArgOperand(1, Arg0); 1770344779Sdim return &Call; 1771344779Sdim } 1772344779Sdim return nullptr; 1773344779Sdim} 1774344779Sdim 1775353358SdimInstruction *InstCombiner::foldIntrinsicWithOverflowCommon(IntrinsicInst *II) { 1776353358Sdim WithOverflowInst *WO = cast<WithOverflowInst>(II); 1777353358Sdim Value *OperationResult = nullptr; 1778353358Sdim Constant *OverflowResult = nullptr; 1779353358Sdim if (OptimizeOverflowCheck(WO->getBinaryOp(), WO->isSigned(), WO->getLHS(), 1780353358Sdim WO->getRHS(), *WO, OperationResult, OverflowResult)) 1781353358Sdim return CreateOverflowTuple(WO, OperationResult, OverflowResult); 1782353358Sdim return nullptr; 1783353358Sdim} 1784353358Sdim 1785309124Sdim/// CallInst simplification. This mostly only handles folding of intrinsic 1786353358Sdim/// instructions. For normal calls, it allows visitCallBase to do the heavy 1787309124Sdim/// lifting. 1788202375SrdivackyInstruction *InstCombiner::visitCallInst(CallInst &CI) { 1789327952Sdim if (Value *V = SimplifyCall(&CI, SQ.getWithInstruction(&CI))) 1790309124Sdim return replaceInstUsesWith(CI, V); 1791288943Sdim 1792314564Sdim if (isFreeCall(&CI, &TLI)) 1793202375Srdivacky return visitFree(CI); 1794202375Srdivacky 1795202375Srdivacky // If the caller function is nounwind, mark the call as nounwind, even if the 1796202375Srdivacky // callee isn't. 1797314564Sdim if (CI.getFunction()->doesNotThrow() && !CI.doesNotThrow()) { 1798202375Srdivacky CI.setDoesNotThrow(); 1799202375Srdivacky return &CI; 1800202375Srdivacky } 1801234353Sdim 1802202375Srdivacky IntrinsicInst *II = dyn_cast<IntrinsicInst>(&CI); 1803353358Sdim if (!II) return visitCallBase(CI); 1804210299Sed 1805353358Sdim // Intrinsics cannot occur in an invoke or a callbr, so handle them here 1806353358Sdim // instead of in visitCallBase. 1807341825Sdim if (auto *MI = dyn_cast<AnyMemIntrinsic>(II)) { 1808202375Srdivacky bool Changed = false; 1809202375Srdivacky 1810202375Srdivacky // memmove/cpy/set of zero bytes is a noop. 1811202375Srdivacky if (Constant *NumBytes = dyn_cast<Constant>(MI->getLength())) { 1812218893Sdim if (NumBytes->isNullValue()) 1813309124Sdim return eraseInstFromFunction(CI); 1814202375Srdivacky 1815202375Srdivacky if (ConstantInt *CI = dyn_cast<ConstantInt>(NumBytes)) 1816202375Srdivacky if (CI->getZExtValue() == 1) { 1817202375Srdivacky // Replace the instruction with just byte operations. We would 1818202375Srdivacky // transform other cases to loads/stores, but we don't know if 1819202375Srdivacky // alignment is sufficient. 1820202375Srdivacky } 1821202375Srdivacky } 1822234353Sdim 1823218893Sdim // No other transformations apply to volatile transfers. 1824341825Sdim if (auto *M = dyn_cast<MemIntrinsic>(MI)) 1825341825Sdim if (M->isVolatile()) 1826341825Sdim return nullptr; 1827202375Srdivacky 1828202375Srdivacky // If we have a memmove and the source operation is a constant global, 1829202375Srdivacky // then the source and dest pointers can't alias, so we can change this 1830202375Srdivacky // into a call to memcpy. 1831341825Sdim if (auto *MMI = dyn_cast<AnyMemMoveInst>(MI)) { 1832202375Srdivacky if (GlobalVariable *GVSrc = dyn_cast<GlobalVariable>(MMI->getSource())) 1833202375Srdivacky if (GVSrc->isConstant()) { 1834296417Sdim Module *M = CI.getModule(); 1835341825Sdim Intrinsic::ID MemCpyID = 1836341825Sdim isa<AtomicMemMoveInst>(MMI) 1837341825Sdim ? Intrinsic::memcpy_element_unordered_atomic 1838341825Sdim : Intrinsic::memcpy; 1839224145Sdim Type *Tys[3] = { CI.getArgOperand(0)->getType(), 1840224145Sdim CI.getArgOperand(1)->getType(), 1841224145Sdim CI.getArgOperand(2)->getType() }; 1842224145Sdim CI.setCalledFunction(Intrinsic::getDeclaration(M, MemCpyID, Tys)); 1843202375Srdivacky Changed = true; 1844202375Srdivacky } 1845202375Srdivacky } 1846202375Srdivacky 1847341825Sdim if (AnyMemTransferInst *MTI = dyn_cast<AnyMemTransferInst>(MI)) { 1848202375Srdivacky // memmove(x,x,size) -> noop. 1849202375Srdivacky if (MTI->getSource() == MTI->getDest()) 1850309124Sdim return eraseInstFromFunction(CI); 1851202375Srdivacky } 1852202375Srdivacky 1853202375Srdivacky // If we can determine a pointer alignment that is bigger than currently 1854202375Srdivacky // set, update the alignment. 1855341825Sdim if (auto *MTI = dyn_cast<AnyMemTransferInst>(MI)) { 1856341825Sdim if (Instruction *I = SimplifyAnyMemTransfer(MTI)) 1857202375Srdivacky return I; 1858341825Sdim } else if (auto *MSI = dyn_cast<AnyMemSetInst>(MI)) { 1859341825Sdim if (Instruction *I = SimplifyAnyMemSet(MSI)) 1860202375Srdivacky return I; 1861202375Srdivacky } 1862210299Sed 1863202375Srdivacky if (Changed) return II; 1864202375Srdivacky } 1865234353Sdim 1866353358Sdim // For vector result intrinsics, use the generic demanded vector support. 1867353358Sdim if (II->getType()->isVectorTy()) { 1868353358Sdim auto VWidth = II->getType()->getVectorNumElements(); 1869353358Sdim APInt UndefElts(VWidth, 0); 1870353358Sdim APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth)); 1871353358Sdim if (Value *V = SimplifyDemandedVectorElts(II, AllOnesEltMask, UndefElts)) { 1872353358Sdim if (V != II) 1873353358Sdim return replaceInstUsesWith(*II, V); 1874353358Sdim return II; 1875353358Sdim } 1876353358Sdim } 1877353358Sdim 1878321369Sdim if (Instruction *I = SimplifyNVVMIntrinsic(II, *this)) 1879321369Sdim return I; 1880321369Sdim 1881309124Sdim auto SimplifyDemandedVectorEltsLow = [this](Value *Op, unsigned Width, 1882309124Sdim unsigned DemandedWidth) { 1883296417Sdim APInt UndefElts(Width, 0); 1884296417Sdim APInt DemandedElts = APInt::getLowBitsSet(Width, DemandedWidth); 1885296417Sdim return SimplifyDemandedVectorElts(Op, DemandedElts, UndefElts); 1886296417Sdim }; 1887296417Sdim 1888353358Sdim Intrinsic::ID IID = II->getIntrinsicID(); 1889353358Sdim switch (IID) { 1890202375Srdivacky default: break; 1891314564Sdim case Intrinsic::objectsize: 1892353358Sdim if (Value *V = lowerObjectSizeCall(II, DL, &TLI, /*MustSucceed=*/false)) 1893353358Sdim return replaceInstUsesWith(CI, V); 1894276479Sdim return nullptr; 1895249423Sdim case Intrinsic::bswap: { 1896249423Sdim Value *IIOperand = II->getArgOperand(0); 1897276479Sdim Value *X = nullptr; 1898249423Sdim 1899202375Srdivacky // bswap(trunc(bswap(x))) -> trunc(lshr(x, c)) 1900249423Sdim if (match(IIOperand, m_Trunc(m_BSwap(m_Value(X))))) { 1901249423Sdim unsigned C = X->getType()->getPrimitiveSizeInBits() - 1902249423Sdim IIOperand->getType()->getPrimitiveSizeInBits(); 1903249423Sdim Value *CV = ConstantInt::get(X->getType(), C); 1904321369Sdim Value *V = Builder.CreateLShr(X, CV); 1905249423Sdim return new TruncInst(V, IIOperand->getType()); 1906202375Srdivacky } 1907249423Sdim break; 1908249423Sdim } 1909309124Sdim case Intrinsic::masked_load: 1910353358Sdim if (Value *SimplifiedMaskedOp = simplifyMaskedLoad(*II)) 1911309124Sdim return replaceInstUsesWith(CI, SimplifiedMaskedOp); 1912309124Sdim break; 1913309124Sdim case Intrinsic::masked_store: 1914353358Sdim return simplifyMaskedStore(*II); 1915309124Sdim case Intrinsic::masked_gather: 1916353358Sdim return simplifyMaskedGather(*II); 1917309124Sdim case Intrinsic::masked_scatter: 1918353358Sdim return simplifyMaskedScatter(*II); 1919341825Sdim case Intrinsic::launder_invariant_group: 1920341825Sdim case Intrinsic::strip_invariant_group: 1921341825Sdim if (auto *SkippedBarrier = simplifyInvariantGroupIntrinsic(*II, *this)) 1922341825Sdim return replaceInstUsesWith(*II, SkippedBarrier); 1923341825Sdim break; 1924202375Srdivacky case Intrinsic::powi: 1925210299Sed if (ConstantInt *Power = dyn_cast<ConstantInt>(II->getArgOperand(1))) { 1926327952Sdim // 0 and 1 are handled in instsimplify 1927327952Sdim 1928202375Srdivacky // powi(x, -1) -> 1/x 1929321369Sdim if (Power->isMinusOne()) 1930202375Srdivacky return BinaryOperator::CreateFDiv(ConstantFP::get(CI.getType(), 1.0), 1931210299Sed II->getArgOperand(0)); 1932327952Sdim // powi(x, 2) -> x*x 1933327952Sdim if (Power->equalsInt(2)) 1934327952Sdim return BinaryOperator::CreateFMul(II->getArgOperand(0), 1935327952Sdim II->getArgOperand(0)); 1936202375Srdivacky } 1937202375Srdivacky break; 1938234353Sdim 1939314564Sdim case Intrinsic::cttz: 1940314564Sdim case Intrinsic::ctlz: 1941314564Sdim if (auto *I = foldCttzCtlz(*II, *this)) 1942314564Sdim return I; 1943202375Srdivacky break; 1944234353Sdim 1945321369Sdim case Intrinsic::ctpop: 1946321369Sdim if (auto *I = foldCtpop(*II, *this)) 1947321369Sdim return I; 1948321369Sdim break; 1949321369Sdim 1950344779Sdim case Intrinsic::fshl: 1951344779Sdim case Intrinsic::fshr: { 1952353358Sdim Value *Op0 = II->getArgOperand(0), *Op1 = II->getArgOperand(1); 1953353358Sdim Type *Ty = II->getType(); 1954353358Sdim unsigned BitWidth = Ty->getScalarSizeInBits(); 1955353358Sdim Constant *ShAmtC; 1956353358Sdim if (match(II->getArgOperand(2), m_Constant(ShAmtC)) && 1957353358Sdim !isa<ConstantExpr>(ShAmtC) && !ShAmtC->containsConstantExpression()) { 1958353358Sdim // Canonicalize a shift amount constant operand to modulo the bit-width. 1959353358Sdim Constant *WidthC = ConstantInt::get(Ty, BitWidth); 1960353358Sdim Constant *ModuloC = ConstantExpr::getURem(ShAmtC, WidthC); 1961353358Sdim if (ModuloC != ShAmtC) { 1962353358Sdim II->setArgOperand(2, ModuloC); 1963353358Sdim return II; 1964353358Sdim } 1965353358Sdim assert(ConstantExpr::getICmp(ICmpInst::ICMP_UGT, WidthC, ShAmtC) == 1966353358Sdim ConstantInt::getTrue(CmpInst::makeCmpResultType(Ty)) && 1967353358Sdim "Shift amount expected to be modulo bitwidth"); 1968344779Sdim 1969353358Sdim // Canonicalize funnel shift right by constant to funnel shift left. This 1970353358Sdim // is not entirely arbitrary. For historical reasons, the backend may 1971353358Sdim // recognize rotate left patterns but miss rotate right patterns. 1972353358Sdim if (IID == Intrinsic::fshr) { 1973353358Sdim // fshr X, Y, C --> fshl X, Y, (BitWidth - C) 1974353358Sdim Constant *LeftShiftC = ConstantExpr::getSub(WidthC, ShAmtC); 1975353358Sdim Module *Mod = II->getModule(); 1976353358Sdim Function *Fshl = Intrinsic::getDeclaration(Mod, Intrinsic::fshl, Ty); 1977353358Sdim return CallInst::Create(Fshl, { Op0, Op1, LeftShiftC }); 1978353358Sdim } 1979353358Sdim assert(IID == Intrinsic::fshl && 1980353358Sdim "All funnel shifts by simple constants should go left"); 1981344779Sdim 1982353358Sdim // fshl(X, 0, C) --> shl X, C 1983353358Sdim // fshl(X, undef, C) --> shl X, C 1984353358Sdim if (match(Op1, m_ZeroInt()) || match(Op1, m_Undef())) 1985353358Sdim return BinaryOperator::CreateShl(Op0, ShAmtC); 1986353358Sdim 1987353358Sdim // fshl(0, X, C) --> lshr X, (BW-C) 1988353358Sdim // fshl(undef, X, C) --> lshr X, (BW-C) 1989353358Sdim if (match(Op0, m_ZeroInt()) || match(Op0, m_Undef())) 1990353358Sdim return BinaryOperator::CreateLShr(Op1, 1991353358Sdim ConstantExpr::getSub(WidthC, ShAmtC)); 1992353358Sdim 1993353358Sdim // fshl i16 X, X, 8 --> bswap i16 X (reduce to more-specific form) 1994353358Sdim if (Op0 == Op1 && BitWidth == 16 && match(ShAmtC, m_SpecificInt(8))) { 1995353358Sdim Module *Mod = II->getModule(); 1996353358Sdim Function *Bswap = Intrinsic::getDeclaration(Mod, Intrinsic::bswap, Ty); 1997353358Sdim return CallInst::Create(Bswap, { Op0 }); 1998353358Sdim } 1999344779Sdim } 2000344779Sdim 2001353358Sdim // Left or right might be masked. 2002353358Sdim if (SimplifyDemandedInstructionBits(*II)) 2003353358Sdim return &CI; 2004353358Sdim 2005344779Sdim // The shift amount (operand 2) of a funnel shift is modulo the bitwidth, 2006344779Sdim // so only the low bits of the shift amount are demanded if the bitwidth is 2007344779Sdim // a power-of-2. 2008344779Sdim if (!isPowerOf2_32(BitWidth)) 2009344779Sdim break; 2010344779Sdim APInt Op2Demanded = APInt::getLowBitsSet(BitWidth, Log2_32_Ceil(BitWidth)); 2011344779Sdim KnownBits Op2Known(BitWidth); 2012344779Sdim if (SimplifyDemandedBits(II, 2, Op2Demanded, Op2Known)) 2013344779Sdim return &CI; 2014344779Sdim break; 2015344779Sdim } 2016288943Sdim case Intrinsic::uadd_with_overflow: 2017353358Sdim case Intrinsic::sadd_with_overflow: { 2018353358Sdim if (Instruction *I = canonicalizeConstantArg0ToArg1(CI)) 2019353358Sdim return I; 2020353358Sdim if (Instruction *I = foldIntrinsicWithOverflowCommon(II)) 2021353358Sdim return I; 2022353358Sdim 2023353358Sdim // Given 2 constant operands whose sum does not overflow: 2024353358Sdim // uaddo (X +nuw C0), C1 -> uaddo X, C0 + C1 2025353358Sdim // saddo (X +nsw C0), C1 -> saddo X, C0 + C1 2026353358Sdim Value *X; 2027353358Sdim const APInt *C0, *C1; 2028353358Sdim Value *Arg0 = II->getArgOperand(0); 2029353358Sdim Value *Arg1 = II->getArgOperand(1); 2030353358Sdim bool IsSigned = IID == Intrinsic::sadd_with_overflow; 2031353358Sdim bool HasNWAdd = IsSigned ? match(Arg0, m_NSWAdd(m_Value(X), m_APInt(C0))) 2032353358Sdim : match(Arg0, m_NUWAdd(m_Value(X), m_APInt(C0))); 2033353358Sdim if (HasNWAdd && match(Arg1, m_APInt(C1))) { 2034353358Sdim bool Overflow; 2035353358Sdim APInt NewC = 2036353358Sdim IsSigned ? C1->sadd_ov(*C0, Overflow) : C1->uadd_ov(*C0, Overflow); 2037353358Sdim if (!Overflow) 2038353358Sdim return replaceInstUsesWith( 2039353358Sdim *II, Builder.CreateBinaryIntrinsic( 2040353358Sdim IID, X, ConstantInt::get(Arg1->getType(), NewC))); 2041353358Sdim } 2042353358Sdim break; 2043353358Sdim } 2044353358Sdim 2045288943Sdim case Intrinsic::umul_with_overflow: 2046288943Sdim case Intrinsic::smul_with_overflow: 2047344779Sdim if (Instruction *I = canonicalizeConstantArg0ToArg1(CI)) 2048344779Sdim return I; 2049314564Sdim LLVM_FALLTHROUGH; 2050202375Srdivacky 2051202375Srdivacky case Intrinsic::usub_with_overflow: 2052353358Sdim if (Instruction *I = foldIntrinsicWithOverflowCommon(II)) 2053353358Sdim return I; 2054353358Sdim break; 2055353358Sdim 2056280031Sdim case Intrinsic::ssub_with_overflow: { 2057353358Sdim if (Instruction *I = foldIntrinsicWithOverflowCommon(II)) 2058353358Sdim return I; 2059234353Sdim 2060353358Sdim Constant *C; 2061353358Sdim Value *Arg0 = II->getArgOperand(0); 2062353358Sdim Value *Arg1 = II->getArgOperand(1); 2063353358Sdim // Given a constant C that is not the minimum signed value 2064353358Sdim // for an integer of a given bit width: 2065353358Sdim // 2066353358Sdim // ssubo X, C -> saddo X, -C 2067353358Sdim if (match(Arg1, m_Constant(C)) && C->isNotMinSignedValue()) { 2068353358Sdim Value *NegVal = ConstantExpr::getNeg(C); 2069353358Sdim // Build a saddo call that is equivalent to the discovered 2070353358Sdim // ssubo call. 2071353358Sdim return replaceInstUsesWith( 2072353358Sdim *II, Builder.CreateBinaryIntrinsic(Intrinsic::sadd_with_overflow, 2073353358Sdim Arg0, NegVal)); 2074353358Sdim } 2075288943Sdim 2076202375Srdivacky break; 2077280031Sdim } 2078202375Srdivacky 2079344779Sdim case Intrinsic::uadd_sat: 2080344779Sdim case Intrinsic::sadd_sat: 2081344779Sdim if (Instruction *I = canonicalizeConstantArg0ToArg1(CI)) 2082344779Sdim return I; 2083344779Sdim LLVM_FALLTHROUGH; 2084344779Sdim case Intrinsic::usub_sat: 2085344779Sdim case Intrinsic::ssub_sat: { 2086353358Sdim SaturatingInst *SI = cast<SaturatingInst>(II); 2087353358Sdim Type *Ty = SI->getType(); 2088353358Sdim Value *Arg0 = SI->getLHS(); 2089353358Sdim Value *Arg1 = SI->getRHS(); 2090344779Sdim 2091344779Sdim // Make use of known overflow information. 2092353358Sdim OverflowResult OR = computeOverflow(SI->getBinaryOp(), SI->isSigned(), 2093353358Sdim Arg0, Arg1, SI); 2094353358Sdim switch (OR) { 2095353358Sdim case OverflowResult::MayOverflow: 2096353358Sdim break; 2097353358Sdim case OverflowResult::NeverOverflows: 2098353358Sdim if (SI->isSigned()) 2099353358Sdim return BinaryOperator::CreateNSW(SI->getBinaryOp(), Arg0, Arg1); 2100353358Sdim else 2101353358Sdim return BinaryOperator::CreateNUW(SI->getBinaryOp(), Arg0, Arg1); 2102353358Sdim case OverflowResult::AlwaysOverflowsLow: { 2103353358Sdim unsigned BitWidth = Ty->getScalarSizeInBits(); 2104353358Sdim APInt Min = APSInt::getMinValue(BitWidth, !SI->isSigned()); 2105353358Sdim return replaceInstUsesWith(*SI, ConstantInt::get(Ty, Min)); 2106353358Sdim } 2107353358Sdim case OverflowResult::AlwaysOverflowsHigh: { 2108353358Sdim unsigned BitWidth = Ty->getScalarSizeInBits(); 2109353358Sdim APInt Max = APSInt::getMaxValue(BitWidth, !SI->isSigned()); 2110353358Sdim return replaceInstUsesWith(*SI, ConstantInt::get(Ty, Max)); 2111353358Sdim } 2112280031Sdim } 2113341825Sdim 2114344779Sdim // ssub.sat(X, C) -> sadd.sat(X, -C) if C != MIN 2115344779Sdim Constant *C; 2116344779Sdim if (IID == Intrinsic::ssub_sat && match(Arg1, m_Constant(C)) && 2117344779Sdim C->isNotMinSignedValue()) { 2118344779Sdim Value *NegVal = ConstantExpr::getNeg(C); 2119344779Sdim return replaceInstUsesWith( 2120344779Sdim *II, Builder.CreateBinaryIntrinsic( 2121344779Sdim Intrinsic::sadd_sat, Arg0, NegVal)); 2122344779Sdim } 2123341825Sdim 2124344779Sdim // sat(sat(X + Val2) + Val) -> sat(X + (Val+Val2)) 2125344779Sdim // sat(sat(X - Val2) - Val) -> sat(X - (Val+Val2)) 2126344779Sdim // if Val and Val2 have the same sign 2127344779Sdim if (auto *Other = dyn_cast<IntrinsicInst>(Arg0)) { 2128344779Sdim Value *X; 2129344779Sdim const APInt *Val, *Val2; 2130344779Sdim APInt NewVal; 2131344779Sdim bool IsUnsigned = 2132344779Sdim IID == Intrinsic::uadd_sat || IID == Intrinsic::usub_sat; 2133353358Sdim if (Other->getIntrinsicID() == IID && 2134344779Sdim match(Arg1, m_APInt(Val)) && 2135344779Sdim match(Other->getArgOperand(0), m_Value(X)) && 2136344779Sdim match(Other->getArgOperand(1), m_APInt(Val2))) { 2137344779Sdim if (IsUnsigned) 2138344779Sdim NewVal = Val->uadd_sat(*Val2); 2139344779Sdim else if (Val->isNonNegative() == Val2->isNonNegative()) { 2140344779Sdim bool Overflow; 2141344779Sdim NewVal = Val->sadd_ov(*Val2, Overflow); 2142344779Sdim if (Overflow) { 2143344779Sdim // Both adds together may add more than SignedMaxValue 2144344779Sdim // without saturating the final result. 2145344779Sdim break; 2146344779Sdim } 2147344779Sdim } else { 2148344779Sdim // Cannot fold saturated addition with different signs. 2149344779Sdim break; 2150344779Sdim } 2151344779Sdim 2152344779Sdim return replaceInstUsesWith( 2153344779Sdim *II, Builder.CreateBinaryIntrinsic( 2154344779Sdim IID, X, ConstantInt::get(II->getType(), NewVal))); 2155344779Sdim } 2156344779Sdim } 2157344779Sdim break; 2158344779Sdim } 2159344779Sdim 2160344779Sdim case Intrinsic::minnum: 2161344779Sdim case Intrinsic::maxnum: 2162344779Sdim case Intrinsic::minimum: 2163344779Sdim case Intrinsic::maximum: { 2164344779Sdim if (Instruction *I = canonicalizeConstantArg0ToArg1(CI)) 2165344779Sdim return I; 2166344779Sdim Value *Arg0 = II->getArgOperand(0); 2167344779Sdim Value *Arg1 = II->getArgOperand(1); 2168341825Sdim Value *X, *Y; 2169341825Sdim if (match(Arg0, m_FNeg(m_Value(X))) && match(Arg1, m_FNeg(m_Value(Y))) && 2170341825Sdim (Arg0->hasOneUse() || Arg1->hasOneUse())) { 2171341825Sdim // If both operands are negated, invert the call and negate the result: 2172344779Sdim // min(-X, -Y) --> -(max(X, Y)) 2173344779Sdim // max(-X, -Y) --> -(min(X, Y)) 2174344779Sdim Intrinsic::ID NewIID; 2175344779Sdim switch (IID) { 2176344779Sdim case Intrinsic::maxnum: 2177344779Sdim NewIID = Intrinsic::minnum; 2178344779Sdim break; 2179344779Sdim case Intrinsic::minnum: 2180344779Sdim NewIID = Intrinsic::maxnum; 2181344779Sdim break; 2182344779Sdim case Intrinsic::maximum: 2183344779Sdim NewIID = Intrinsic::minimum; 2184344779Sdim break; 2185344779Sdim case Intrinsic::minimum: 2186344779Sdim NewIID = Intrinsic::maximum; 2187344779Sdim break; 2188344779Sdim default: 2189344779Sdim llvm_unreachable("unexpected intrinsic ID"); 2190344779Sdim } 2191344779Sdim Value *NewCall = Builder.CreateBinaryIntrinsic(NewIID, X, Y, II); 2192341825Sdim Instruction *FNeg = BinaryOperator::CreateFNeg(NewCall); 2193341825Sdim FNeg->copyIRFlags(II); 2194341825Sdim return FNeg; 2195341825Sdim } 2196344779Sdim 2197344779Sdim // m(m(X, C2), C1) -> m(X, C) 2198344779Sdim const APFloat *C1, *C2; 2199344779Sdim if (auto *M = dyn_cast<IntrinsicInst>(Arg0)) { 2200344779Sdim if (M->getIntrinsicID() == IID && match(Arg1, m_APFloat(C1)) && 2201344779Sdim ((match(M->getArgOperand(0), m_Value(X)) && 2202344779Sdim match(M->getArgOperand(1), m_APFloat(C2))) || 2203344779Sdim (match(M->getArgOperand(1), m_Value(X)) && 2204344779Sdim match(M->getArgOperand(0), m_APFloat(C2))))) { 2205344779Sdim APFloat Res(0.0); 2206344779Sdim switch (IID) { 2207344779Sdim case Intrinsic::maxnum: 2208344779Sdim Res = maxnum(*C1, *C2); 2209344779Sdim break; 2210344779Sdim case Intrinsic::minnum: 2211344779Sdim Res = minnum(*C1, *C2); 2212344779Sdim break; 2213344779Sdim case Intrinsic::maximum: 2214344779Sdim Res = maximum(*C1, *C2); 2215344779Sdim break; 2216344779Sdim case Intrinsic::minimum: 2217344779Sdim Res = minimum(*C1, *C2); 2218344779Sdim break; 2219344779Sdim default: 2220344779Sdim llvm_unreachable("unexpected intrinsic ID"); 2221344779Sdim } 2222344779Sdim Instruction *NewCall = Builder.CreateBinaryIntrinsic( 2223344779Sdim IID, X, ConstantFP::get(Arg0->getType(), Res)); 2224344779Sdim NewCall->copyIRFlags(II); 2225344779Sdim return replaceInstUsesWith(*II, NewCall); 2226344779Sdim } 2227344779Sdim } 2228344779Sdim 2229280031Sdim break; 2230280031Sdim } 2231314564Sdim case Intrinsic::fmuladd: { 2232321369Sdim // Canonicalize fast fmuladd to the separate fmul + fadd. 2233327952Sdim if (II->isFast()) { 2234321369Sdim BuilderTy::FastMathFlagGuard Guard(Builder); 2235321369Sdim Builder.setFastMathFlags(II->getFastMathFlags()); 2236321369Sdim Value *Mul = Builder.CreateFMul(II->getArgOperand(0), 2237321369Sdim II->getArgOperand(1)); 2238321369Sdim Value *Add = Builder.CreateFAdd(Mul, II->getArgOperand(2)); 2239321369Sdim Add->takeName(II); 2240321369Sdim return replaceInstUsesWith(*II, Add); 2241321369Sdim } 2242321369Sdim 2243360784Sdim // Try to simplify the underlying FMul. 2244360784Sdim if (Value *V = SimplifyFMulInst(II->getArgOperand(0), II->getArgOperand(1), 2245360784Sdim II->getFastMathFlags(), 2246360784Sdim SQ.getWithInstruction(II))) { 2247360784Sdim auto *FAdd = BinaryOperator::CreateFAdd(V, II->getArgOperand(2)); 2248360784Sdim FAdd->copyFastMathFlags(II); 2249360784Sdim return FAdd; 2250360784Sdim } 2251360784Sdim 2252321369Sdim LLVM_FALLTHROUGH; 2253321369Sdim } 2254321369Sdim case Intrinsic::fma: { 2255344779Sdim if (Instruction *I = canonicalizeConstantArg0ToArg1(CI)) 2256344779Sdim return I; 2257344779Sdim 2258344779Sdim // fma fneg(x), fneg(y), z -> fma x, y, z 2259314564Sdim Value *Src0 = II->getArgOperand(0); 2260314564Sdim Value *Src1 = II->getArgOperand(1); 2261341825Sdim Value *X, *Y; 2262341825Sdim if (match(Src0, m_FNeg(m_Value(X))) && match(Src1, m_FNeg(m_Value(Y)))) { 2263341825Sdim II->setArgOperand(0, X); 2264341825Sdim II->setArgOperand(1, Y); 2265314564Sdim return II; 2266314564Sdim } 2267314564Sdim 2268314564Sdim // fma fabs(x), fabs(x), z -> fma x, x, z 2269341825Sdim if (match(Src0, m_FAbs(m_Value(X))) && 2270341825Sdim match(Src1, m_FAbs(m_Specific(X)))) { 2271341825Sdim II->setArgOperand(0, X); 2272341825Sdim II->setArgOperand(1, X); 2273314564Sdim return II; 2274314564Sdim } 2275314564Sdim 2276360784Sdim // Try to simplify the underlying FMul. We can only apply simplifications 2277360784Sdim // that do not require rounding. 2278360784Sdim if (Value *V = SimplifyFMAFMul(II->getArgOperand(0), II->getArgOperand(1), 2279360784Sdim II->getFastMathFlags(), 2280360784Sdim SQ.getWithInstruction(II))) { 2281360784Sdim auto *FAdd = BinaryOperator::CreateFAdd(V, II->getArgOperand(2)); 2282341825Sdim FAdd->copyFastMathFlags(II); 2283341825Sdim return FAdd; 2284314564Sdim } 2285314564Sdim 2286314564Sdim break; 2287314564Sdim } 2288360784Sdim case Intrinsic::copysign: { 2289360784Sdim if (SignBitMustBeZero(II->getArgOperand(1), &TLI)) { 2290360784Sdim // If we know that the sign argument is positive, reduce to FABS: 2291360784Sdim // copysign X, Pos --> fabs X 2292360784Sdim Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, 2293360784Sdim II->getArgOperand(0), II); 2294360784Sdim return replaceInstUsesWith(*II, Fabs); 2295360784Sdim } 2296360784Sdim // TODO: There should be a ValueTracking sibling like SignBitMustBeOne. 2297360784Sdim const APFloat *C; 2298360784Sdim if (match(II->getArgOperand(1), m_APFloat(C)) && C->isNegative()) { 2299360784Sdim // If we know that the sign argument is negative, reduce to FNABS: 2300360784Sdim // copysign X, Neg --> fneg (fabs X) 2301360784Sdim Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, 2302360784Sdim II->getArgOperand(0), II); 2303360784Sdim return replaceInstUsesWith(*II, Builder.CreateFNegFMF(Fabs, II)); 2304360784Sdim } 2305360784Sdim 2306360784Sdim // Propagate sign argument through nested calls: 2307360784Sdim // copysign X, (copysign ?, SignArg) --> copysign X, SignArg 2308360784Sdim Value *SignArg; 2309360784Sdim if (match(II->getArgOperand(1), 2310360784Sdim m_Intrinsic<Intrinsic::copysign>(m_Value(), m_Value(SignArg)))) { 2311360784Sdim II->setArgOperand(1, SignArg); 2312360784Sdim return II; 2313360784Sdim } 2314360784Sdim 2315360784Sdim break; 2316360784Sdim } 2317314564Sdim case Intrinsic::fabs: { 2318314564Sdim Value *Cond; 2319314564Sdim Constant *LHS, *RHS; 2320314564Sdim if (match(II->getArgOperand(0), 2321314564Sdim m_Select(m_Value(Cond), m_Constant(LHS), m_Constant(RHS)))) { 2322321369Sdim CallInst *Call0 = Builder.CreateCall(II->getCalledFunction(), {LHS}); 2323321369Sdim CallInst *Call1 = Builder.CreateCall(II->getCalledFunction(), {RHS}); 2324314564Sdim return SelectInst::Create(Cond, Call0, Call1); 2325314564Sdim } 2326314564Sdim 2327321369Sdim LLVM_FALLTHROUGH; 2328321369Sdim } 2329321369Sdim case Intrinsic::ceil: 2330321369Sdim case Intrinsic::floor: 2331321369Sdim case Intrinsic::round: 2332321369Sdim case Intrinsic::nearbyint: 2333321369Sdim case Intrinsic::rint: 2334321369Sdim case Intrinsic::trunc: { 2335321369Sdim Value *ExtSrc; 2336341825Sdim if (match(II->getArgOperand(0), m_OneUse(m_FPExt(m_Value(ExtSrc))))) { 2337341825Sdim // Narrow the call: intrinsic (fpext x) -> fpext (intrinsic x) 2338353358Sdim Value *NarrowII = Builder.CreateUnaryIntrinsic(IID, ExtSrc, II); 2339341825Sdim return new FPExtInst(NarrowII, II->getType()); 2340321369Sdim } 2341314564Sdim break; 2342314564Sdim } 2343314564Sdim case Intrinsic::cos: 2344314564Sdim case Intrinsic::amdgcn_cos: { 2345344779Sdim Value *X; 2346314564Sdim Value *Src = II->getArgOperand(0); 2347344779Sdim if (match(Src, m_FNeg(m_Value(X))) || match(Src, m_FAbs(m_Value(X)))) { 2348314564Sdim // cos(-x) -> cos(x) 2349314564Sdim // cos(fabs(x)) -> cos(x) 2350344779Sdim II->setArgOperand(0, X); 2351314564Sdim return II; 2352314564Sdim } 2353314564Sdim break; 2354314564Sdim } 2355344779Sdim case Intrinsic::sin: { 2356344779Sdim Value *X; 2357344779Sdim if (match(II->getArgOperand(0), m_OneUse(m_FNeg(m_Value(X))))) { 2358344779Sdim // sin(-x) --> -sin(x) 2359344779Sdim Value *NewSin = Builder.CreateUnaryIntrinsic(Intrinsic::sin, X, II); 2360344779Sdim Instruction *FNeg = BinaryOperator::CreateFNeg(NewSin); 2361344779Sdim FNeg->copyFastMathFlags(II); 2362344779Sdim return FNeg; 2363344779Sdim } 2364344779Sdim break; 2365344779Sdim } 2366202375Srdivacky case Intrinsic::ppc_altivec_lvx: 2367202375Srdivacky case Intrinsic::ppc_altivec_lvxl: 2368221345Sdim // Turn PPC lvx -> load if the pointer is known aligned. 2369314564Sdim if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, II, &AC, 2370314564Sdim &DT) >= 16) { 2371321369Sdim Value *Ptr = Builder.CreateBitCast(II->getArgOperand(0), 2372202375Srdivacky PointerType::getUnqual(II->getType())); 2373353358Sdim return new LoadInst(II->getType(), Ptr); 2374202375Srdivacky } 2375202375Srdivacky break; 2376280031Sdim case Intrinsic::ppc_vsx_lxvw4x: 2377280031Sdim case Intrinsic::ppc_vsx_lxvd2x: { 2378280031Sdim // Turn PPC VSX loads into normal loads. 2379321369Sdim Value *Ptr = Builder.CreateBitCast(II->getArgOperand(0), 2380321369Sdim PointerType::getUnqual(II->getType())); 2381360784Sdim return new LoadInst(II->getType(), Ptr, Twine(""), false, Align::None()); 2382280031Sdim } 2383202375Srdivacky case Intrinsic::ppc_altivec_stvx: 2384202375Srdivacky case Intrinsic::ppc_altivec_stvxl: 2385202375Srdivacky // Turn stvx -> store if the pointer is known aligned. 2386314564Sdim if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, DL, II, &AC, 2387314564Sdim &DT) >= 16) { 2388234353Sdim Type *OpPtrTy = 2389210299Sed PointerType::getUnqual(II->getArgOperand(0)->getType()); 2390321369Sdim Value *Ptr = Builder.CreateBitCast(II->getArgOperand(1), OpPtrTy); 2391210299Sed return new StoreInst(II->getArgOperand(0), Ptr); 2392202375Srdivacky } 2393202375Srdivacky break; 2394280031Sdim case Intrinsic::ppc_vsx_stxvw4x: 2395280031Sdim case Intrinsic::ppc_vsx_stxvd2x: { 2396280031Sdim // Turn PPC VSX stores into normal stores. 2397280031Sdim Type *OpPtrTy = PointerType::getUnqual(II->getArgOperand(0)->getType()); 2398321369Sdim Value *Ptr = Builder.CreateBitCast(II->getArgOperand(1), OpPtrTy); 2399360784Sdim return new StoreInst(II->getArgOperand(0), Ptr, false, Align::None()); 2400280031Sdim } 2401288943Sdim case Intrinsic::ppc_qpx_qvlfs: 2402288943Sdim // Turn PPC QPX qvlfs -> load if the pointer is known aligned. 2403314564Sdim if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, II, &AC, 2404314564Sdim &DT) >= 16) { 2405321369Sdim Type *VTy = VectorType::get(Builder.getFloatTy(), 2406288943Sdim II->getType()->getVectorNumElements()); 2407321369Sdim Value *Ptr = Builder.CreateBitCast(II->getArgOperand(0), 2408288943Sdim PointerType::getUnqual(VTy)); 2409353358Sdim Value *Load = Builder.CreateLoad(VTy, Ptr); 2410288943Sdim return new FPExtInst(Load, II->getType()); 2411288943Sdim } 2412288943Sdim break; 2413288943Sdim case Intrinsic::ppc_qpx_qvlfd: 2414288943Sdim // Turn PPC QPX qvlfd -> load if the pointer is known aligned. 2415314564Sdim if (getOrEnforceKnownAlignment(II->getArgOperand(0), 32, DL, II, &AC, 2416314564Sdim &DT) >= 32) { 2417321369Sdim Value *Ptr = Builder.CreateBitCast(II->getArgOperand(0), 2418288943Sdim PointerType::getUnqual(II->getType())); 2419353358Sdim return new LoadInst(II->getType(), Ptr); 2420288943Sdim } 2421288943Sdim break; 2422288943Sdim case Intrinsic::ppc_qpx_qvstfs: 2423288943Sdim // Turn PPC QPX qvstfs -> store if the pointer is known aligned. 2424314564Sdim if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, DL, II, &AC, 2425314564Sdim &DT) >= 16) { 2426321369Sdim Type *VTy = VectorType::get(Builder.getFloatTy(), 2427288943Sdim II->getArgOperand(0)->getType()->getVectorNumElements()); 2428321369Sdim Value *TOp = Builder.CreateFPTrunc(II->getArgOperand(0), VTy); 2429288943Sdim Type *OpPtrTy = PointerType::getUnqual(VTy); 2430321369Sdim Value *Ptr = Builder.CreateBitCast(II->getArgOperand(1), OpPtrTy); 2431288943Sdim return new StoreInst(TOp, Ptr); 2432288943Sdim } 2433288943Sdim break; 2434288943Sdim case Intrinsic::ppc_qpx_qvstfd: 2435288943Sdim // Turn PPC QPX qvstfd -> store if the pointer is known aligned. 2436314564Sdim if (getOrEnforceKnownAlignment(II->getArgOperand(1), 32, DL, II, &AC, 2437314564Sdim &DT) >= 32) { 2438288943Sdim Type *OpPtrTy = 2439288943Sdim PointerType::getUnqual(II->getArgOperand(0)->getType()); 2440321369Sdim Value *Ptr = Builder.CreateBitCast(II->getArgOperand(1), OpPtrTy); 2441288943Sdim return new StoreInst(II->getArgOperand(0), Ptr); 2442288943Sdim } 2443288943Sdim break; 2444296417Sdim 2445327952Sdim case Intrinsic::x86_bmi_bextr_32: 2446327952Sdim case Intrinsic::x86_bmi_bextr_64: 2447327952Sdim case Intrinsic::x86_tbm_bextri_u32: 2448327952Sdim case Intrinsic::x86_tbm_bextri_u64: 2449327952Sdim // If the RHS is a constant we can try some simplifications. 2450327952Sdim if (auto *C = dyn_cast<ConstantInt>(II->getArgOperand(1))) { 2451327952Sdim uint64_t Shift = C->getZExtValue(); 2452327952Sdim uint64_t Length = (Shift >> 8) & 0xff; 2453327952Sdim Shift &= 0xff; 2454327952Sdim unsigned BitWidth = II->getType()->getIntegerBitWidth(); 2455327952Sdim // If the length is 0 or the shift is out of range, replace with zero. 2456327952Sdim if (Length == 0 || Shift >= BitWidth) 2457327952Sdim return replaceInstUsesWith(CI, ConstantInt::get(II->getType(), 0)); 2458327952Sdim // If the LHS is also a constant, we can completely constant fold this. 2459327952Sdim if (auto *InC = dyn_cast<ConstantInt>(II->getArgOperand(0))) { 2460327952Sdim uint64_t Result = InC->getZExtValue() >> Shift; 2461327952Sdim if (Length > BitWidth) 2462327952Sdim Length = BitWidth; 2463327952Sdim Result &= maskTrailingOnes<uint64_t>(Length); 2464327952Sdim return replaceInstUsesWith(CI, ConstantInt::get(II->getType(), Result)); 2465327952Sdim } 2466327952Sdim // TODO should we turn this into 'and' if shift is 0? Or 'shl' if we 2467327952Sdim // are only masking bits that a shift already cleared? 2468327952Sdim } 2469327952Sdim break; 2470327952Sdim 2471327952Sdim case Intrinsic::x86_bmi_bzhi_32: 2472327952Sdim case Intrinsic::x86_bmi_bzhi_64: 2473327952Sdim // If the RHS is a constant we can try some simplifications. 2474327952Sdim if (auto *C = dyn_cast<ConstantInt>(II->getArgOperand(1))) { 2475327952Sdim uint64_t Index = C->getZExtValue() & 0xff; 2476327952Sdim unsigned BitWidth = II->getType()->getIntegerBitWidth(); 2477327952Sdim if (Index >= BitWidth) 2478327952Sdim return replaceInstUsesWith(CI, II->getArgOperand(0)); 2479327952Sdim if (Index == 0) 2480327952Sdim return replaceInstUsesWith(CI, ConstantInt::get(II->getType(), 0)); 2481327952Sdim // If the LHS is also a constant, we can completely constant fold this. 2482327952Sdim if (auto *InC = dyn_cast<ConstantInt>(II->getArgOperand(0))) { 2483327952Sdim uint64_t Result = InC->getZExtValue(); 2484327952Sdim Result &= maskTrailingOnes<uint64_t>(Index); 2485327952Sdim return replaceInstUsesWith(CI, ConstantInt::get(II->getType(), Result)); 2486327952Sdim } 2487327952Sdim // TODO should we convert this to an AND if the RHS is constant? 2488327952Sdim } 2489327952Sdim break; 2490360784Sdim case Intrinsic::x86_bmi_pext_32: 2491360784Sdim case Intrinsic::x86_bmi_pext_64: 2492360784Sdim if (auto *MaskC = dyn_cast<ConstantInt>(II->getArgOperand(1))) { 2493360784Sdim if (MaskC->isNullValue()) 2494360784Sdim return replaceInstUsesWith(CI, ConstantInt::get(II->getType(), 0)); 2495360784Sdim if (MaskC->isAllOnesValue()) 2496360784Sdim return replaceInstUsesWith(CI, II->getArgOperand(0)); 2497327952Sdim 2498360784Sdim if (auto *SrcC = dyn_cast<ConstantInt>(II->getArgOperand(0))) { 2499360784Sdim uint64_t Src = SrcC->getZExtValue(); 2500360784Sdim uint64_t Mask = MaskC->getZExtValue(); 2501360784Sdim uint64_t Result = 0; 2502360784Sdim uint64_t BitToSet = 1; 2503360784Sdim 2504360784Sdim while (Mask) { 2505360784Sdim // Isolate lowest set bit. 2506360784Sdim uint64_t BitToTest = Mask & -Mask; 2507360784Sdim if (BitToTest & Src) 2508360784Sdim Result |= BitToSet; 2509360784Sdim 2510360784Sdim BitToSet <<= 1; 2511360784Sdim // Clear lowest set bit. 2512360784Sdim Mask &= Mask - 1; 2513360784Sdim } 2514360784Sdim 2515360784Sdim return replaceInstUsesWith(CI, ConstantInt::get(II->getType(), Result)); 2516360784Sdim } 2517360784Sdim } 2518360784Sdim break; 2519360784Sdim case Intrinsic::x86_bmi_pdep_32: 2520360784Sdim case Intrinsic::x86_bmi_pdep_64: 2521360784Sdim if (auto *MaskC = dyn_cast<ConstantInt>(II->getArgOperand(1))) { 2522360784Sdim if (MaskC->isNullValue()) 2523360784Sdim return replaceInstUsesWith(CI, ConstantInt::get(II->getType(), 0)); 2524360784Sdim if (MaskC->isAllOnesValue()) 2525360784Sdim return replaceInstUsesWith(CI, II->getArgOperand(0)); 2526360784Sdim 2527360784Sdim if (auto *SrcC = dyn_cast<ConstantInt>(II->getArgOperand(0))) { 2528360784Sdim uint64_t Src = SrcC->getZExtValue(); 2529360784Sdim uint64_t Mask = MaskC->getZExtValue(); 2530360784Sdim uint64_t Result = 0; 2531360784Sdim uint64_t BitToTest = 1; 2532360784Sdim 2533360784Sdim while (Mask) { 2534360784Sdim // Isolate lowest set bit. 2535360784Sdim uint64_t BitToSet = Mask & -Mask; 2536360784Sdim if (BitToTest & Src) 2537360784Sdim Result |= BitToSet; 2538360784Sdim 2539360784Sdim BitToTest <<= 1; 2540360784Sdim // Clear lowest set bit; 2541360784Sdim Mask &= Mask - 1; 2542360784Sdim } 2543360784Sdim 2544360784Sdim return replaceInstUsesWith(CI, ConstantInt::get(II->getType(), Result)); 2545360784Sdim } 2546360784Sdim } 2547360784Sdim break; 2548360784Sdim 2549296417Sdim case Intrinsic::x86_vcvtph2ps_128: 2550296417Sdim case Intrinsic::x86_vcvtph2ps_256: { 2551296417Sdim auto Arg = II->getArgOperand(0); 2552296417Sdim auto ArgType = cast<VectorType>(Arg->getType()); 2553296417Sdim auto RetType = cast<VectorType>(II->getType()); 2554296417Sdim unsigned ArgWidth = ArgType->getNumElements(); 2555296417Sdim unsigned RetWidth = RetType->getNumElements(); 2556296417Sdim assert(RetWidth <= ArgWidth && "Unexpected input/return vector widths"); 2557296417Sdim assert(ArgType->isIntOrIntVectorTy() && 2558296417Sdim ArgType->getScalarSizeInBits() == 16 && 2559296417Sdim "CVTPH2PS input type should be 16-bit integer vector"); 2560296417Sdim assert(RetType->getScalarType()->isFloatTy() && 2561296417Sdim "CVTPH2PS output type should be 32-bit float vector"); 2562296417Sdim 2563296417Sdim // Constant folding: Convert to generic half to single conversion. 2564296417Sdim if (isa<ConstantAggregateZero>(Arg)) 2565309124Sdim return replaceInstUsesWith(*II, ConstantAggregateZero::get(RetType)); 2566296417Sdim 2567296417Sdim if (isa<ConstantDataVector>(Arg)) { 2568296417Sdim auto VectorHalfAsShorts = Arg; 2569296417Sdim if (RetWidth < ArgWidth) { 2570309124Sdim SmallVector<uint32_t, 8> SubVecMask; 2571296417Sdim for (unsigned i = 0; i != RetWidth; ++i) 2572296417Sdim SubVecMask.push_back((int)i); 2573321369Sdim VectorHalfAsShorts = Builder.CreateShuffleVector( 2574296417Sdim Arg, UndefValue::get(ArgType), SubVecMask); 2575296417Sdim } 2576296417Sdim 2577296417Sdim auto VectorHalfType = 2578296417Sdim VectorType::get(Type::getHalfTy(II->getContext()), RetWidth); 2579296417Sdim auto VectorHalfs = 2580321369Sdim Builder.CreateBitCast(VectorHalfAsShorts, VectorHalfType); 2581321369Sdim auto VectorFloats = Builder.CreateFPExt(VectorHalfs, RetType); 2582309124Sdim return replaceInstUsesWith(*II, VectorFloats); 2583296417Sdim } 2584296417Sdim 2585296417Sdim // We only use the lowest lanes of the argument. 2586296417Sdim if (Value *V = SimplifyDemandedVectorEltsLow(Arg, ArgWidth, RetWidth)) { 2587296417Sdim II->setArgOperand(0, V); 2588296417Sdim return II; 2589296417Sdim } 2590296417Sdim break; 2591296417Sdim } 2592296417Sdim 2593218893Sdim case Intrinsic::x86_sse_cvtss2si: 2594218893Sdim case Intrinsic::x86_sse_cvtss2si64: 2595218893Sdim case Intrinsic::x86_sse_cvttss2si: 2596218893Sdim case Intrinsic::x86_sse_cvttss2si64: 2597218893Sdim case Intrinsic::x86_sse2_cvtsd2si: 2598218893Sdim case Intrinsic::x86_sse2_cvtsd2si64: 2599218893Sdim case Intrinsic::x86_sse2_cvttsd2si: 2600314564Sdim case Intrinsic::x86_sse2_cvttsd2si64: 2601314564Sdim case Intrinsic::x86_avx512_vcvtss2si32: 2602314564Sdim case Intrinsic::x86_avx512_vcvtss2si64: 2603314564Sdim case Intrinsic::x86_avx512_vcvtss2usi32: 2604314564Sdim case Intrinsic::x86_avx512_vcvtss2usi64: 2605314564Sdim case Intrinsic::x86_avx512_vcvtsd2si32: 2606314564Sdim case Intrinsic::x86_avx512_vcvtsd2si64: 2607314564Sdim case Intrinsic::x86_avx512_vcvtsd2usi32: 2608314564Sdim case Intrinsic::x86_avx512_vcvtsd2usi64: 2609314564Sdim case Intrinsic::x86_avx512_cvttss2si: 2610314564Sdim case Intrinsic::x86_avx512_cvttss2si64: 2611314564Sdim case Intrinsic::x86_avx512_cvttss2usi: 2612314564Sdim case Intrinsic::x86_avx512_cvttss2usi64: 2613314564Sdim case Intrinsic::x86_avx512_cvttsd2si: 2614314564Sdim case Intrinsic::x86_avx512_cvttsd2si64: 2615314564Sdim case Intrinsic::x86_avx512_cvttsd2usi: 2616314564Sdim case Intrinsic::x86_avx512_cvttsd2usi64: { 2617218893Sdim // These intrinsics only demand the 0th element of their input vectors. If 2618202375Srdivacky // we can simplify the input based on that, do so now. 2619296417Sdim Value *Arg = II->getArgOperand(0); 2620296417Sdim unsigned VWidth = Arg->getType()->getVectorNumElements(); 2621296417Sdim if (Value *V = SimplifyDemandedVectorEltsLow(Arg, VWidth, 1)) { 2622210299Sed II->setArgOperand(0, V); 2623202375Srdivacky return II; 2624202375Srdivacky } 2625202375Srdivacky break; 2626202375Srdivacky } 2627218893Sdim 2628309124Sdim case Intrinsic::x86_mmx_pmovmskb: 2629309124Sdim case Intrinsic::x86_sse_movmsk_ps: 2630309124Sdim case Intrinsic::x86_sse2_movmsk_pd: 2631309124Sdim case Intrinsic::x86_sse2_pmovmskb_128: 2632309124Sdim case Intrinsic::x86_avx_movmsk_pd_256: 2633309124Sdim case Intrinsic::x86_avx_movmsk_ps_256: 2634327952Sdim case Intrinsic::x86_avx2_pmovmskb: 2635344779Sdim if (Value *V = simplifyX86movmsk(*II, Builder)) 2636309124Sdim return replaceInstUsesWith(*II, V); 2637309124Sdim break; 2638309124Sdim 2639309124Sdim case Intrinsic::x86_sse_comieq_ss: 2640309124Sdim case Intrinsic::x86_sse_comige_ss: 2641309124Sdim case Intrinsic::x86_sse_comigt_ss: 2642309124Sdim case Intrinsic::x86_sse_comile_ss: 2643309124Sdim case Intrinsic::x86_sse_comilt_ss: 2644309124Sdim case Intrinsic::x86_sse_comineq_ss: 2645309124Sdim case Intrinsic::x86_sse_ucomieq_ss: 2646309124Sdim case Intrinsic::x86_sse_ucomige_ss: 2647309124Sdim case Intrinsic::x86_sse_ucomigt_ss: 2648309124Sdim case Intrinsic::x86_sse_ucomile_ss: 2649309124Sdim case Intrinsic::x86_sse_ucomilt_ss: 2650309124Sdim case Intrinsic::x86_sse_ucomineq_ss: 2651309124Sdim case Intrinsic::x86_sse2_comieq_sd: 2652309124Sdim case Intrinsic::x86_sse2_comige_sd: 2653309124Sdim case Intrinsic::x86_sse2_comigt_sd: 2654309124Sdim case Intrinsic::x86_sse2_comile_sd: 2655309124Sdim case Intrinsic::x86_sse2_comilt_sd: 2656309124Sdim case Intrinsic::x86_sse2_comineq_sd: 2657309124Sdim case Intrinsic::x86_sse2_ucomieq_sd: 2658309124Sdim case Intrinsic::x86_sse2_ucomige_sd: 2659309124Sdim case Intrinsic::x86_sse2_ucomigt_sd: 2660309124Sdim case Intrinsic::x86_sse2_ucomile_sd: 2661309124Sdim case Intrinsic::x86_sse2_ucomilt_sd: 2662314564Sdim case Intrinsic::x86_sse2_ucomineq_sd: 2663314564Sdim case Intrinsic::x86_avx512_vcomi_ss: 2664314564Sdim case Intrinsic::x86_avx512_vcomi_sd: 2665314564Sdim case Intrinsic::x86_avx512_mask_cmp_ss: 2666314564Sdim case Intrinsic::x86_avx512_mask_cmp_sd: { 2667309124Sdim // These intrinsics only demand the 0th element of their input vectors. If 2668309124Sdim // we can simplify the input based on that, do so now. 2669309124Sdim bool MadeChange = false; 2670309124Sdim Value *Arg0 = II->getArgOperand(0); 2671309124Sdim Value *Arg1 = II->getArgOperand(1); 2672309124Sdim unsigned VWidth = Arg0->getType()->getVectorNumElements(); 2673309124Sdim if (Value *V = SimplifyDemandedVectorEltsLow(Arg0, VWidth, 1)) { 2674309124Sdim II->setArgOperand(0, V); 2675309124Sdim MadeChange = true; 2676309124Sdim } 2677309124Sdim if (Value *V = SimplifyDemandedVectorEltsLow(Arg1, VWidth, 1)) { 2678309124Sdim II->setArgOperand(1, V); 2679309124Sdim MadeChange = true; 2680309124Sdim } 2681309124Sdim if (MadeChange) 2682309124Sdim return II; 2683309124Sdim break; 2684309124Sdim } 2685341825Sdim case Intrinsic::x86_avx512_cmp_pd_128: 2686341825Sdim case Intrinsic::x86_avx512_cmp_pd_256: 2687341825Sdim case Intrinsic::x86_avx512_cmp_pd_512: 2688341825Sdim case Intrinsic::x86_avx512_cmp_ps_128: 2689341825Sdim case Intrinsic::x86_avx512_cmp_ps_256: 2690341825Sdim case Intrinsic::x86_avx512_cmp_ps_512: { 2691321369Sdim // Folding cmp(sub(a,b),0) -> cmp(a,b) and cmp(0,sub(a,b)) -> cmp(b,a) 2692321369Sdim Value *Arg0 = II->getArgOperand(0); 2693321369Sdim Value *Arg1 = II->getArgOperand(1); 2694341825Sdim bool Arg0IsZero = match(Arg0, m_PosZeroFP()); 2695321369Sdim if (Arg0IsZero) 2696321369Sdim std::swap(Arg0, Arg1); 2697321369Sdim Value *A, *B; 2698321369Sdim // This fold requires only the NINF(not +/- inf) since inf minus 2699321369Sdim // inf is nan. 2700321369Sdim // NSZ(No Signed Zeros) is not needed because zeros of any sign are 2701321369Sdim // equal for both compares. 2702321369Sdim // NNAN is not needed because nans compare the same for both compares. 2703321369Sdim // The compare intrinsic uses the above assumptions and therefore 2704321369Sdim // doesn't require additional flags. 2705321369Sdim if ((match(Arg0, m_OneUse(m_FSub(m_Value(A), m_Value(B)))) && 2706341825Sdim match(Arg1, m_PosZeroFP()) && isa<Instruction>(Arg0) && 2707321369Sdim cast<Instruction>(Arg0)->getFastMathFlags().noInfs())) { 2708321369Sdim if (Arg0IsZero) 2709321369Sdim std::swap(A, B); 2710321369Sdim II->setArgOperand(0, A); 2711321369Sdim II->setArgOperand(1, B); 2712321369Sdim return II; 2713321369Sdim } 2714321369Sdim break; 2715321369Sdim } 2716309124Sdim 2717341825Sdim case Intrinsic::x86_avx512_add_ps_512: 2718341825Sdim case Intrinsic::x86_avx512_div_ps_512: 2719341825Sdim case Intrinsic::x86_avx512_mul_ps_512: 2720341825Sdim case Intrinsic::x86_avx512_sub_ps_512: 2721341825Sdim case Intrinsic::x86_avx512_add_pd_512: 2722341825Sdim case Intrinsic::x86_avx512_div_pd_512: 2723341825Sdim case Intrinsic::x86_avx512_mul_pd_512: 2724341825Sdim case Intrinsic::x86_avx512_sub_pd_512: 2725314564Sdim // If the rounding mode is CUR_DIRECTION(4) we can turn these into regular 2726314564Sdim // IR operations. 2727341825Sdim if (auto *R = dyn_cast<ConstantInt>(II->getArgOperand(2))) { 2728314564Sdim if (R->getValue() == 4) { 2729314564Sdim Value *Arg0 = II->getArgOperand(0); 2730314564Sdim Value *Arg1 = II->getArgOperand(1); 2731314564Sdim 2732314564Sdim Value *V; 2733353358Sdim switch (IID) { 2734314564Sdim default: llvm_unreachable("Case stmts out of sync!"); 2735341825Sdim case Intrinsic::x86_avx512_add_ps_512: 2736341825Sdim case Intrinsic::x86_avx512_add_pd_512: 2737321369Sdim V = Builder.CreateFAdd(Arg0, Arg1); 2738314564Sdim break; 2739341825Sdim case Intrinsic::x86_avx512_sub_ps_512: 2740341825Sdim case Intrinsic::x86_avx512_sub_pd_512: 2741321369Sdim V = Builder.CreateFSub(Arg0, Arg1); 2742314564Sdim break; 2743341825Sdim case Intrinsic::x86_avx512_mul_ps_512: 2744341825Sdim case Intrinsic::x86_avx512_mul_pd_512: 2745321369Sdim V = Builder.CreateFMul(Arg0, Arg1); 2746314564Sdim break; 2747341825Sdim case Intrinsic::x86_avx512_div_ps_512: 2748341825Sdim case Intrinsic::x86_avx512_div_pd_512: 2749321369Sdim V = Builder.CreateFDiv(Arg0, Arg1); 2750314564Sdim break; 2751314564Sdim } 2752314564Sdim 2753314564Sdim return replaceInstUsesWith(*II, V); 2754314564Sdim } 2755314564Sdim } 2756314564Sdim break; 2757314564Sdim 2758314564Sdim case Intrinsic::x86_avx512_mask_add_ss_round: 2759314564Sdim case Intrinsic::x86_avx512_mask_div_ss_round: 2760314564Sdim case Intrinsic::x86_avx512_mask_mul_ss_round: 2761314564Sdim case Intrinsic::x86_avx512_mask_sub_ss_round: 2762314564Sdim case Intrinsic::x86_avx512_mask_add_sd_round: 2763314564Sdim case Intrinsic::x86_avx512_mask_div_sd_round: 2764314564Sdim case Intrinsic::x86_avx512_mask_mul_sd_round: 2765314564Sdim case Intrinsic::x86_avx512_mask_sub_sd_round: 2766314564Sdim // If the rounding mode is CUR_DIRECTION(4) we can turn these into regular 2767314564Sdim // IR operations. 2768314564Sdim if (auto *R = dyn_cast<ConstantInt>(II->getArgOperand(4))) { 2769314564Sdim if (R->getValue() == 4) { 2770314564Sdim // Extract the element as scalars. 2771314564Sdim Value *Arg0 = II->getArgOperand(0); 2772314564Sdim Value *Arg1 = II->getArgOperand(1); 2773321369Sdim Value *LHS = Builder.CreateExtractElement(Arg0, (uint64_t)0); 2774321369Sdim Value *RHS = Builder.CreateExtractElement(Arg1, (uint64_t)0); 2775314564Sdim 2776314564Sdim Value *V; 2777353358Sdim switch (IID) { 2778314564Sdim default: llvm_unreachable("Case stmts out of sync!"); 2779314564Sdim case Intrinsic::x86_avx512_mask_add_ss_round: 2780314564Sdim case Intrinsic::x86_avx512_mask_add_sd_round: 2781321369Sdim V = Builder.CreateFAdd(LHS, RHS); 2782314564Sdim break; 2783314564Sdim case Intrinsic::x86_avx512_mask_sub_ss_round: 2784314564Sdim case Intrinsic::x86_avx512_mask_sub_sd_round: 2785321369Sdim V = Builder.CreateFSub(LHS, RHS); 2786314564Sdim break; 2787314564Sdim case Intrinsic::x86_avx512_mask_mul_ss_round: 2788314564Sdim case Intrinsic::x86_avx512_mask_mul_sd_round: 2789321369Sdim V = Builder.CreateFMul(LHS, RHS); 2790314564Sdim break; 2791314564Sdim case Intrinsic::x86_avx512_mask_div_ss_round: 2792314564Sdim case Intrinsic::x86_avx512_mask_div_sd_round: 2793321369Sdim V = Builder.CreateFDiv(LHS, RHS); 2794314564Sdim break; 2795314564Sdim } 2796314564Sdim 2797314564Sdim // Handle the masking aspect of the intrinsic. 2798314564Sdim Value *Mask = II->getArgOperand(3); 2799314564Sdim auto *C = dyn_cast<ConstantInt>(Mask); 2800314564Sdim // We don't need a select if we know the mask bit is a 1. 2801314564Sdim if (!C || !C->getValue()[0]) { 2802314564Sdim // Cast the mask to an i1 vector and then extract the lowest element. 2803321369Sdim auto *MaskTy = VectorType::get(Builder.getInt1Ty(), 2804314564Sdim cast<IntegerType>(Mask->getType())->getBitWidth()); 2805321369Sdim Mask = Builder.CreateBitCast(Mask, MaskTy); 2806321369Sdim Mask = Builder.CreateExtractElement(Mask, (uint64_t)0); 2807314564Sdim // Extract the lowest element from the passthru operand. 2808321369Sdim Value *Passthru = Builder.CreateExtractElement(II->getArgOperand(2), 2809314564Sdim (uint64_t)0); 2810321369Sdim V = Builder.CreateSelect(Mask, V, Passthru); 2811314564Sdim } 2812314564Sdim 2813314564Sdim // Insert the result back into the original argument 0. 2814321369Sdim V = Builder.CreateInsertElement(Arg0, V, (uint64_t)0); 2815314564Sdim 2816314564Sdim return replaceInstUsesWith(*II, V); 2817314564Sdim } 2818314564Sdim } 2819341825Sdim break; 2820309124Sdim 2821296417Sdim // Constant fold ashr( <A x Bi>, Ci ). 2822296417Sdim // Constant fold lshr( <A x Bi>, Ci ). 2823296417Sdim // Constant fold shl( <A x Bi>, Ci ). 2824296417Sdim case Intrinsic::x86_sse2_psrai_d: 2825296417Sdim case Intrinsic::x86_sse2_psrai_w: 2826296417Sdim case Intrinsic::x86_avx2_psrai_d: 2827296417Sdim case Intrinsic::x86_avx2_psrai_w: 2828314564Sdim case Intrinsic::x86_avx512_psrai_q_128: 2829314564Sdim case Intrinsic::x86_avx512_psrai_q_256: 2830314564Sdim case Intrinsic::x86_avx512_psrai_d_512: 2831314564Sdim case Intrinsic::x86_avx512_psrai_q_512: 2832314564Sdim case Intrinsic::x86_avx512_psrai_w_512: 2833296417Sdim case Intrinsic::x86_sse2_psrli_d: 2834296417Sdim case Intrinsic::x86_sse2_psrli_q: 2835296417Sdim case Intrinsic::x86_sse2_psrli_w: 2836296417Sdim case Intrinsic::x86_avx2_psrli_d: 2837296417Sdim case Intrinsic::x86_avx2_psrli_q: 2838296417Sdim case Intrinsic::x86_avx2_psrli_w: 2839314564Sdim case Intrinsic::x86_avx512_psrli_d_512: 2840314564Sdim case Intrinsic::x86_avx512_psrli_q_512: 2841314564Sdim case Intrinsic::x86_avx512_psrli_w_512: 2842276479Sdim case Intrinsic::x86_sse2_pslli_d: 2843276479Sdim case Intrinsic::x86_sse2_pslli_q: 2844276479Sdim case Intrinsic::x86_sse2_pslli_w: 2845276479Sdim case Intrinsic::x86_avx2_pslli_d: 2846276479Sdim case Intrinsic::x86_avx2_pslli_q: 2847276479Sdim case Intrinsic::x86_avx2_pslli_w: 2848314564Sdim case Intrinsic::x86_avx512_pslli_d_512: 2849314564Sdim case Intrinsic::x86_avx512_pslli_q_512: 2850314564Sdim case Intrinsic::x86_avx512_pslli_w_512: 2851321369Sdim if (Value *V = simplifyX86immShift(*II, Builder)) 2852309124Sdim return replaceInstUsesWith(*II, V); 2853296417Sdim break; 2854296417Sdim 2855296417Sdim case Intrinsic::x86_sse2_psra_d: 2856296417Sdim case Intrinsic::x86_sse2_psra_w: 2857296417Sdim case Intrinsic::x86_avx2_psra_d: 2858296417Sdim case Intrinsic::x86_avx2_psra_w: 2859314564Sdim case Intrinsic::x86_avx512_psra_q_128: 2860314564Sdim case Intrinsic::x86_avx512_psra_q_256: 2861314564Sdim case Intrinsic::x86_avx512_psra_d_512: 2862314564Sdim case Intrinsic::x86_avx512_psra_q_512: 2863314564Sdim case Intrinsic::x86_avx512_psra_w_512: 2864276479Sdim case Intrinsic::x86_sse2_psrl_d: 2865276479Sdim case Intrinsic::x86_sse2_psrl_q: 2866276479Sdim case Intrinsic::x86_sse2_psrl_w: 2867276479Sdim case Intrinsic::x86_avx2_psrl_d: 2868276479Sdim case Intrinsic::x86_avx2_psrl_q: 2869276479Sdim case Intrinsic::x86_avx2_psrl_w: 2870314564Sdim case Intrinsic::x86_avx512_psrl_d_512: 2871314564Sdim case Intrinsic::x86_avx512_psrl_q_512: 2872314564Sdim case Intrinsic::x86_avx512_psrl_w_512: 2873296417Sdim case Intrinsic::x86_sse2_psll_d: 2874296417Sdim case Intrinsic::x86_sse2_psll_q: 2875296417Sdim case Intrinsic::x86_sse2_psll_w: 2876296417Sdim case Intrinsic::x86_avx2_psll_d: 2877296417Sdim case Intrinsic::x86_avx2_psll_q: 2878314564Sdim case Intrinsic::x86_avx2_psll_w: 2879314564Sdim case Intrinsic::x86_avx512_psll_d_512: 2880314564Sdim case Intrinsic::x86_avx512_psll_q_512: 2881314564Sdim case Intrinsic::x86_avx512_psll_w_512: { 2882321369Sdim if (Value *V = simplifyX86immShift(*II, Builder)) 2883309124Sdim return replaceInstUsesWith(*II, V); 2884223017Sdim 2885296417Sdim // SSE2/AVX2 uses only the first 64-bits of the 128-bit vector 2886296417Sdim // operand to compute the shift amount. 2887296417Sdim Value *Arg1 = II->getArgOperand(1); 2888296417Sdim assert(Arg1->getType()->getPrimitiveSizeInBits() == 128 && 2889296417Sdim "Unexpected packed shift size"); 2890296417Sdim unsigned VWidth = Arg1->getType()->getVectorNumElements(); 2891276479Sdim 2892296417Sdim if (Value *V = SimplifyDemandedVectorEltsLow(Arg1, VWidth, VWidth / 2)) { 2893296417Sdim II->setArgOperand(1, V); 2894296417Sdim return II; 2895276479Sdim } 2896296417Sdim break; 2897296417Sdim } 2898276479Sdim 2899309124Sdim case Intrinsic::x86_avx2_psllv_d: 2900309124Sdim case Intrinsic::x86_avx2_psllv_d_256: 2901309124Sdim case Intrinsic::x86_avx2_psllv_q: 2902309124Sdim case Intrinsic::x86_avx2_psllv_q_256: 2903314564Sdim case Intrinsic::x86_avx512_psllv_d_512: 2904314564Sdim case Intrinsic::x86_avx512_psllv_q_512: 2905314564Sdim case Intrinsic::x86_avx512_psllv_w_128: 2906314564Sdim case Intrinsic::x86_avx512_psllv_w_256: 2907314564Sdim case Intrinsic::x86_avx512_psllv_w_512: 2908309124Sdim case Intrinsic::x86_avx2_psrav_d: 2909309124Sdim case Intrinsic::x86_avx2_psrav_d_256: 2910314564Sdim case Intrinsic::x86_avx512_psrav_q_128: 2911314564Sdim case Intrinsic::x86_avx512_psrav_q_256: 2912314564Sdim case Intrinsic::x86_avx512_psrav_d_512: 2913314564Sdim case Intrinsic::x86_avx512_psrav_q_512: 2914314564Sdim case Intrinsic::x86_avx512_psrav_w_128: 2915314564Sdim case Intrinsic::x86_avx512_psrav_w_256: 2916314564Sdim case Intrinsic::x86_avx512_psrav_w_512: 2917309124Sdim case Intrinsic::x86_avx2_psrlv_d: 2918309124Sdim case Intrinsic::x86_avx2_psrlv_d_256: 2919309124Sdim case Intrinsic::x86_avx2_psrlv_q: 2920309124Sdim case Intrinsic::x86_avx2_psrlv_q_256: 2921314564Sdim case Intrinsic::x86_avx512_psrlv_d_512: 2922314564Sdim case Intrinsic::x86_avx512_psrlv_q_512: 2923314564Sdim case Intrinsic::x86_avx512_psrlv_w_128: 2924314564Sdim case Intrinsic::x86_avx512_psrlv_w_256: 2925314564Sdim case Intrinsic::x86_avx512_psrlv_w_512: 2926321369Sdim if (Value *V = simplifyX86varShift(*II, Builder)) 2927309124Sdim return replaceInstUsesWith(*II, V); 2928296417Sdim break; 2929276479Sdim 2930321369Sdim case Intrinsic::x86_sse2_packssdw_128: 2931321369Sdim case Intrinsic::x86_sse2_packsswb_128: 2932321369Sdim case Intrinsic::x86_avx2_packssdw: 2933321369Sdim case Intrinsic::x86_avx2_packsswb: 2934321369Sdim case Intrinsic::x86_avx512_packssdw_512: 2935321369Sdim case Intrinsic::x86_avx512_packsswb_512: 2936353358Sdim if (Value *V = simplifyX86pack(*II, Builder, true)) 2937321369Sdim return replaceInstUsesWith(*II, V); 2938321369Sdim break; 2939321369Sdim 2940321369Sdim case Intrinsic::x86_sse2_packuswb_128: 2941321369Sdim case Intrinsic::x86_sse41_packusdw: 2942321369Sdim case Intrinsic::x86_avx2_packusdw: 2943321369Sdim case Intrinsic::x86_avx2_packuswb: 2944321369Sdim case Intrinsic::x86_avx512_packusdw_512: 2945321369Sdim case Intrinsic::x86_avx512_packuswb_512: 2946353358Sdim if (Value *V = simplifyX86pack(*II, Builder, false)) 2947321369Sdim return replaceInstUsesWith(*II, V); 2948321369Sdim break; 2949321369Sdim 2950341825Sdim case Intrinsic::x86_pclmulqdq: 2951341825Sdim case Intrinsic::x86_pclmulqdq_256: 2952341825Sdim case Intrinsic::x86_pclmulqdq_512: { 2953321369Sdim if (auto *C = dyn_cast<ConstantInt>(II->getArgOperand(2))) { 2954321369Sdim unsigned Imm = C->getZExtValue(); 2955321369Sdim 2956321369Sdim bool MadeChange = false; 2957321369Sdim Value *Arg0 = II->getArgOperand(0); 2958321369Sdim Value *Arg1 = II->getArgOperand(1); 2959321369Sdim unsigned VWidth = Arg0->getType()->getVectorNumElements(); 2960321369Sdim 2961321369Sdim APInt UndefElts1(VWidth, 0); 2962341825Sdim APInt DemandedElts1 = APInt::getSplat(VWidth, 2963341825Sdim APInt(2, (Imm & 0x01) ? 2 : 1)); 2964341825Sdim if (Value *V = SimplifyDemandedVectorElts(Arg0, DemandedElts1, 2965321369Sdim UndefElts1)) { 2966321369Sdim II->setArgOperand(0, V); 2967321369Sdim MadeChange = true; 2968321369Sdim } 2969321369Sdim 2970321369Sdim APInt UndefElts2(VWidth, 0); 2971341825Sdim APInt DemandedElts2 = APInt::getSplat(VWidth, 2972341825Sdim APInt(2, (Imm & 0x10) ? 2 : 1)); 2973341825Sdim if (Value *V = SimplifyDemandedVectorElts(Arg1, DemandedElts2, 2974321369Sdim UndefElts2)) { 2975321369Sdim II->setArgOperand(1, V); 2976321369Sdim MadeChange = true; 2977321369Sdim } 2978321369Sdim 2979341825Sdim // If either input elements are undef, the result is zero. 2980341825Sdim if (DemandedElts1.isSubsetOf(UndefElts1) || 2981341825Sdim DemandedElts2.isSubsetOf(UndefElts2)) 2982321369Sdim return replaceInstUsesWith(*II, 2983321369Sdim ConstantAggregateZero::get(II->getType())); 2984321369Sdim 2985321369Sdim if (MadeChange) 2986321369Sdim return II; 2987321369Sdim } 2988321369Sdim break; 2989321369Sdim } 2990321369Sdim 2991296417Sdim case Intrinsic::x86_sse41_insertps: 2992321369Sdim if (Value *V = simplifyX86insertps(*II, Builder)) 2993309124Sdim return replaceInstUsesWith(*II, V); 2994296417Sdim break; 2995296417Sdim 2996296417Sdim case Intrinsic::x86_sse4a_extrq: { 2997296417Sdim Value *Op0 = II->getArgOperand(0); 2998296417Sdim Value *Op1 = II->getArgOperand(1); 2999296417Sdim unsigned VWidth0 = Op0->getType()->getVectorNumElements(); 3000296417Sdim unsigned VWidth1 = Op1->getType()->getVectorNumElements(); 3001296417Sdim assert(Op0->getType()->getPrimitiveSizeInBits() == 128 && 3002296417Sdim Op1->getType()->getPrimitiveSizeInBits() == 128 && VWidth0 == 2 && 3003296417Sdim VWidth1 == 16 && "Unexpected operand sizes"); 3004296417Sdim 3005296417Sdim // See if we're dealing with constant values. 3006296417Sdim Constant *C1 = dyn_cast<Constant>(Op1); 3007296417Sdim ConstantInt *CILength = 3008314564Sdim C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((unsigned)0)) 3009296417Sdim : nullptr; 3010296417Sdim ConstantInt *CIIndex = 3011314564Sdim C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((unsigned)1)) 3012296417Sdim : nullptr; 3013296417Sdim 3014296417Sdim // Attempt to simplify to a constant, shuffle vector or EXTRQI call. 3015321369Sdim if (Value *V = simplifyX86extrq(*II, Op0, CILength, CIIndex, Builder)) 3016309124Sdim return replaceInstUsesWith(*II, V); 3017296417Sdim 3018296417Sdim // EXTRQ only uses the lowest 64-bits of the first 128-bit vector 3019296417Sdim // operands and the lowest 16-bits of the second. 3020309124Sdim bool MadeChange = false; 3021296417Sdim if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth0, 1)) { 3022296417Sdim II->setArgOperand(0, V); 3023309124Sdim MadeChange = true; 3024223017Sdim } 3025296417Sdim if (Value *V = SimplifyDemandedVectorEltsLow(Op1, VWidth1, 2)) { 3026296417Sdim II->setArgOperand(1, V); 3027309124Sdim MadeChange = true; 3028309124Sdim } 3029309124Sdim if (MadeChange) 3030296417Sdim return II; 3031223017Sdim break; 3032223017Sdim } 3033296417Sdim 3034296417Sdim case Intrinsic::x86_sse4a_extrqi: { 3035296417Sdim // EXTRQI: Extract Length bits starting from Index. Zero pad the remaining 3036296417Sdim // bits of the lower 64-bits. The upper 64-bits are undefined. 3037296417Sdim Value *Op0 = II->getArgOperand(0); 3038296417Sdim unsigned VWidth = Op0->getType()->getVectorNumElements(); 3039296417Sdim assert(Op0->getType()->getPrimitiveSizeInBits() == 128 && VWidth == 2 && 3040296417Sdim "Unexpected operand size"); 3041296417Sdim 3042296417Sdim // See if we're dealing with constant values. 3043296417Sdim ConstantInt *CILength = dyn_cast<ConstantInt>(II->getArgOperand(1)); 3044296417Sdim ConstantInt *CIIndex = dyn_cast<ConstantInt>(II->getArgOperand(2)); 3045296417Sdim 3046296417Sdim // Attempt to simplify to a constant or shuffle vector. 3047321369Sdim if (Value *V = simplifyX86extrq(*II, Op0, CILength, CIIndex, Builder)) 3048309124Sdim return replaceInstUsesWith(*II, V); 3049296417Sdim 3050296417Sdim // EXTRQI only uses the lowest 64-bits of the first 128-bit vector 3051296417Sdim // operand. 3052296417Sdim if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth, 1)) { 3053296417Sdim II->setArgOperand(0, V); 3054296417Sdim return II; 3055296417Sdim } 3056288943Sdim break; 3057296417Sdim } 3058280031Sdim 3059296417Sdim case Intrinsic::x86_sse4a_insertq: { 3060296417Sdim Value *Op0 = II->getArgOperand(0); 3061296417Sdim Value *Op1 = II->getArgOperand(1); 3062296417Sdim unsigned VWidth = Op0->getType()->getVectorNumElements(); 3063296417Sdim assert(Op0->getType()->getPrimitiveSizeInBits() == 128 && 3064296417Sdim Op1->getType()->getPrimitiveSizeInBits() == 128 && VWidth == 2 && 3065296417Sdim Op1->getType()->getVectorNumElements() == 2 && 3066296417Sdim "Unexpected operand size"); 3067280031Sdim 3068296417Sdim // See if we're dealing with constant values. 3069296417Sdim Constant *C1 = dyn_cast<Constant>(Op1); 3070296417Sdim ConstantInt *CI11 = 3071314564Sdim C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((unsigned)1)) 3072296417Sdim : nullptr; 3073280031Sdim 3074296417Sdim // Attempt to simplify to a constant, shuffle vector or INSERTQI call. 3075296417Sdim if (CI11) { 3076309124Sdim const APInt &V11 = CI11->getValue(); 3077296417Sdim APInt Len = V11.zextOrTrunc(6); 3078296417Sdim APInt Idx = V11.lshr(8).zextOrTrunc(6); 3079321369Sdim if (Value *V = simplifyX86insertq(*II, Op0, Op1, Len, Idx, Builder)) 3080309124Sdim return replaceInstUsesWith(*II, V); 3081296417Sdim } 3082276479Sdim 3083296417Sdim // INSERTQ only uses the lowest 64-bits of the first 128-bit vector 3084296417Sdim // operand. 3085296417Sdim if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth, 1)) { 3086296417Sdim II->setArgOperand(0, V); 3087296417Sdim return II; 3088296417Sdim } 3089296417Sdim break; 3090296417Sdim } 3091276479Sdim 3092296417Sdim case Intrinsic::x86_sse4a_insertqi: { 3093296417Sdim // INSERTQI: Extract lowest Length bits from lower half of second source and 3094296417Sdim // insert over first source starting at Index bit. The upper 64-bits are 3095296417Sdim // undefined. 3096296417Sdim Value *Op0 = II->getArgOperand(0); 3097296417Sdim Value *Op1 = II->getArgOperand(1); 3098296417Sdim unsigned VWidth0 = Op0->getType()->getVectorNumElements(); 3099296417Sdim unsigned VWidth1 = Op1->getType()->getVectorNumElements(); 3100296417Sdim assert(Op0->getType()->getPrimitiveSizeInBits() == 128 && 3101296417Sdim Op1->getType()->getPrimitiveSizeInBits() == 128 && VWidth0 == 2 && 3102296417Sdim VWidth1 == 2 && "Unexpected operand sizes"); 3103296417Sdim 3104296417Sdim // See if we're dealing with constant values. 3105296417Sdim ConstantInt *CILength = dyn_cast<ConstantInt>(II->getArgOperand(2)); 3106296417Sdim ConstantInt *CIIndex = dyn_cast<ConstantInt>(II->getArgOperand(3)); 3107296417Sdim 3108296417Sdim // Attempt to simplify to a constant or shuffle vector. 3109296417Sdim if (CILength && CIIndex) { 3110296417Sdim APInt Len = CILength->getValue().zextOrTrunc(6); 3111296417Sdim APInt Idx = CIIndex->getValue().zextOrTrunc(6); 3112321369Sdim if (Value *V = simplifyX86insertq(*II, Op0, Op1, Len, Idx, Builder)) 3113309124Sdim return replaceInstUsesWith(*II, V); 3114276479Sdim } 3115296417Sdim 3116296417Sdim // INSERTQI only uses the lowest 64-bits of the first two 128-bit vector 3117296417Sdim // operands. 3118309124Sdim bool MadeChange = false; 3119296417Sdim if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth0, 1)) { 3120296417Sdim II->setArgOperand(0, V); 3121309124Sdim MadeChange = true; 3122296417Sdim } 3123296417Sdim if (Value *V = SimplifyDemandedVectorEltsLow(Op1, VWidth1, 1)) { 3124296417Sdim II->setArgOperand(1, V); 3125309124Sdim MadeChange = true; 3126309124Sdim } 3127309124Sdim if (MadeChange) 3128296417Sdim return II; 3129276479Sdim break; 3130276479Sdim } 3131276479Sdim 3132276479Sdim case Intrinsic::x86_sse41_pblendvb: 3133276479Sdim case Intrinsic::x86_sse41_blendvps: 3134276479Sdim case Intrinsic::x86_sse41_blendvpd: 3135276479Sdim case Intrinsic::x86_avx_blendv_ps_256: 3136276479Sdim case Intrinsic::x86_avx_blendv_pd_256: 3137276479Sdim case Intrinsic::x86_avx2_pblendvb: { 3138344779Sdim // fold (blend A, A, Mask) -> A 3139296417Sdim Value *Op0 = II->getArgOperand(0); 3140296417Sdim Value *Op1 = II->getArgOperand(1); 3141276479Sdim Value *Mask = II->getArgOperand(2); 3142296417Sdim if (Op0 == Op1) 3143309124Sdim return replaceInstUsesWith(CI, Op0); 3144296417Sdim 3145296417Sdim // Zero Mask - select 1st argument. 3146296417Sdim if (isa<ConstantAggregateZero>(Mask)) 3147309124Sdim return replaceInstUsesWith(CI, Op0); 3148296417Sdim 3149296417Sdim // Constant Mask - select 1st/2nd argument lane based on top bit of mask. 3150309124Sdim if (auto *ConstantMask = dyn_cast<ConstantDataVector>(Mask)) { 3151309124Sdim Constant *NewSelector = getNegativeIsTrueBoolVec(ConstantMask); 3152296417Sdim return SelectInst::Create(NewSelector, Op1, Op0, "blendv"); 3153276479Sdim } 3154344779Sdim 3155344779Sdim // Convert to a vector select if we can bypass casts and find a boolean 3156344779Sdim // vector condition value. 3157344779Sdim Value *BoolVec; 3158344779Sdim Mask = peekThroughBitcast(Mask); 3159344779Sdim if (match(Mask, m_SExt(m_Value(BoolVec))) && 3160344779Sdim BoolVec->getType()->isVectorTy() && 3161344779Sdim BoolVec->getType()->getScalarSizeInBits() == 1) { 3162344779Sdim assert(Mask->getType()->getPrimitiveSizeInBits() == 3163344779Sdim II->getType()->getPrimitiveSizeInBits() && 3164344779Sdim "Not expecting mask and operands with different sizes"); 3165344779Sdim 3166344779Sdim unsigned NumMaskElts = Mask->getType()->getVectorNumElements(); 3167344779Sdim unsigned NumOperandElts = II->getType()->getVectorNumElements(); 3168344779Sdim if (NumMaskElts == NumOperandElts) 3169344779Sdim return SelectInst::Create(BoolVec, Op1, Op0); 3170344779Sdim 3171344779Sdim // If the mask has less elements than the operands, each mask bit maps to 3172344779Sdim // multiple elements of the operands. Bitcast back and forth. 3173344779Sdim if (NumMaskElts < NumOperandElts) { 3174344779Sdim Value *CastOp0 = Builder.CreateBitCast(Op0, Mask->getType()); 3175344779Sdim Value *CastOp1 = Builder.CreateBitCast(Op1, Mask->getType()); 3176344779Sdim Value *Sel = Builder.CreateSelect(BoolVec, CastOp1, CastOp0); 3177344779Sdim return new BitCastInst(Sel, II->getType()); 3178344779Sdim } 3179344779Sdim } 3180344779Sdim 3181296417Sdim break; 3182276479Sdim } 3183276479Sdim 3184296417Sdim case Intrinsic::x86_ssse3_pshuf_b_128: 3185309124Sdim case Intrinsic::x86_avx2_pshuf_b: 3186314564Sdim case Intrinsic::x86_avx512_pshuf_b_512: 3187321369Sdim if (Value *V = simplifyX86pshufb(*II, Builder)) 3188309124Sdim return replaceInstUsesWith(*II, V); 3189309124Sdim break; 3190296417Sdim 3191276479Sdim case Intrinsic::x86_avx_vpermilvar_ps: 3192276479Sdim case Intrinsic::x86_avx_vpermilvar_ps_256: 3193314564Sdim case Intrinsic::x86_avx512_vpermilvar_ps_512: 3194276479Sdim case Intrinsic::x86_avx_vpermilvar_pd: 3195309124Sdim case Intrinsic::x86_avx_vpermilvar_pd_256: 3196314564Sdim case Intrinsic::x86_avx512_vpermilvar_pd_512: 3197321369Sdim if (Value *V = simplifyX86vpermilvar(*II, Builder)) 3198309124Sdim return replaceInstUsesWith(*II, V); 3199309124Sdim break; 3200276479Sdim 3201309124Sdim case Intrinsic::x86_avx2_permd: 3202309124Sdim case Intrinsic::x86_avx2_permps: 3203341825Sdim case Intrinsic::x86_avx512_permvar_df_256: 3204341825Sdim case Intrinsic::x86_avx512_permvar_df_512: 3205341825Sdim case Intrinsic::x86_avx512_permvar_di_256: 3206341825Sdim case Intrinsic::x86_avx512_permvar_di_512: 3207341825Sdim case Intrinsic::x86_avx512_permvar_hi_128: 3208341825Sdim case Intrinsic::x86_avx512_permvar_hi_256: 3209341825Sdim case Intrinsic::x86_avx512_permvar_hi_512: 3210341825Sdim case Intrinsic::x86_avx512_permvar_qi_128: 3211341825Sdim case Intrinsic::x86_avx512_permvar_qi_256: 3212341825Sdim case Intrinsic::x86_avx512_permvar_qi_512: 3213341825Sdim case Intrinsic::x86_avx512_permvar_sf_512: 3214341825Sdim case Intrinsic::x86_avx512_permvar_si_512: 3215321369Sdim if (Value *V = simplifyX86vpermv(*II, Builder)) 3216309124Sdim return replaceInstUsesWith(*II, V); 3217309124Sdim break; 3218309124Sdim 3219309124Sdim case Intrinsic::x86_avx_maskload_ps: 3220309124Sdim case Intrinsic::x86_avx_maskload_pd: 3221309124Sdim case Intrinsic::x86_avx_maskload_ps_256: 3222309124Sdim case Intrinsic::x86_avx_maskload_pd_256: 3223309124Sdim case Intrinsic::x86_avx2_maskload_d: 3224309124Sdim case Intrinsic::x86_avx2_maskload_q: 3225309124Sdim case Intrinsic::x86_avx2_maskload_d_256: 3226309124Sdim case Intrinsic::x86_avx2_maskload_q_256: 3227309124Sdim if (Instruction *I = simplifyX86MaskedLoad(*II, *this)) 3228309124Sdim return I; 3229309124Sdim break; 3230309124Sdim 3231309124Sdim case Intrinsic::x86_sse2_maskmov_dqu: 3232309124Sdim case Intrinsic::x86_avx_maskstore_ps: 3233309124Sdim case Intrinsic::x86_avx_maskstore_pd: 3234309124Sdim case Intrinsic::x86_avx_maskstore_ps_256: 3235309124Sdim case Intrinsic::x86_avx_maskstore_pd_256: 3236309124Sdim case Intrinsic::x86_avx2_maskstore_d: 3237309124Sdim case Intrinsic::x86_avx2_maskstore_q: 3238309124Sdim case Intrinsic::x86_avx2_maskstore_d_256: 3239309124Sdim case Intrinsic::x86_avx2_maskstore_q_256: 3240309124Sdim if (simplifyX86MaskedStore(*II, *this)) 3241309124Sdim return nullptr; 3242309124Sdim break; 3243309124Sdim 3244353358Sdim case Intrinsic::x86_addcarry_32: 3245353358Sdim case Intrinsic::x86_addcarry_64: 3246353358Sdim if (Value *V = simplifyX86addcarry(*II, Builder)) 3247309124Sdim return replaceInstUsesWith(*II, V); 3248296417Sdim break; 3249296417Sdim 3250202375Srdivacky case Intrinsic::ppc_altivec_vperm: 3251202375Srdivacky // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant. 3252276479Sdim // Note that ppc_altivec_vperm has a big-endian bias, so when creating 3253276479Sdim // a vectorshuffle for little endian, we must undo the transformation 3254276479Sdim // performed on vec_perm in altivec.h. That is, we must complement 3255276479Sdim // the permutation mask with respect to 31 and reverse the order of 3256276479Sdim // V1 and V2. 3257234353Sdim if (Constant *Mask = dyn_cast<Constant>(II->getArgOperand(2))) { 3258234353Sdim assert(Mask->getType()->getVectorNumElements() == 16 && 3259234353Sdim "Bad type for intrinsic!"); 3260234353Sdim 3261202375Srdivacky // Check that all of the elements are integer constants or undefs. 3262202375Srdivacky bool AllEltsOk = true; 3263202375Srdivacky for (unsigned i = 0; i != 16; ++i) { 3264234353Sdim Constant *Elt = Mask->getAggregateElement(i); 3265276479Sdim if (!Elt || !(isa<ConstantInt>(Elt) || isa<UndefValue>(Elt))) { 3266202375Srdivacky AllEltsOk = false; 3267202375Srdivacky break; 3268202375Srdivacky } 3269202375Srdivacky } 3270234353Sdim 3271202375Srdivacky if (AllEltsOk) { 3272202375Srdivacky // Cast the input vectors to byte vectors. 3273321369Sdim Value *Op0 = Builder.CreateBitCast(II->getArgOperand(0), 3274321369Sdim Mask->getType()); 3275321369Sdim Value *Op1 = Builder.CreateBitCast(II->getArgOperand(1), 3276321369Sdim Mask->getType()); 3277202375Srdivacky Value *Result = UndefValue::get(Op0->getType()); 3278234353Sdim 3279202375Srdivacky // Only extract each element once. 3280202375Srdivacky Value *ExtractedElts[32]; 3281202375Srdivacky memset(ExtractedElts, 0, sizeof(ExtractedElts)); 3282234353Sdim 3283202375Srdivacky for (unsigned i = 0; i != 16; ++i) { 3284234353Sdim if (isa<UndefValue>(Mask->getAggregateElement(i))) 3285202375Srdivacky continue; 3286234353Sdim unsigned Idx = 3287234353Sdim cast<ConstantInt>(Mask->getAggregateElement(i))->getZExtValue(); 3288202375Srdivacky Idx &= 31; // Match the hardware behavior. 3289288943Sdim if (DL.isLittleEndian()) 3290276479Sdim Idx = 31 - Idx; 3291234353Sdim 3292276479Sdim if (!ExtractedElts[Idx]) { 3293288943Sdim Value *Op0ToUse = (DL.isLittleEndian()) ? Op1 : Op0; 3294288943Sdim Value *Op1ToUse = (DL.isLittleEndian()) ? Op0 : Op1; 3295234353Sdim ExtractedElts[Idx] = 3296321369Sdim Builder.CreateExtractElement(Idx < 16 ? Op0ToUse : Op1ToUse, 3297321369Sdim Builder.getInt32(Idx&15)); 3298202375Srdivacky } 3299234353Sdim 3300202375Srdivacky // Insert this value into the result vector. 3301321369Sdim Result = Builder.CreateInsertElement(Result, ExtractedElts[Idx], 3302321369Sdim Builder.getInt32(i)); 3303202375Srdivacky } 3304202375Srdivacky return CastInst::Create(Instruction::BitCast, Result, CI.getType()); 3305202375Srdivacky } 3306202375Srdivacky } 3307202375Srdivacky break; 3308202375Srdivacky 3309341825Sdim case Intrinsic::arm_neon_vld1: { 3310341825Sdim unsigned MemAlign = getKnownAlignment(II->getArgOperand(0), 3311341825Sdim DL, II, &AC, &DT); 3312341825Sdim if (Value *V = simplifyNeonVld1(*II, MemAlign, Builder)) 3313341825Sdim return replaceInstUsesWith(*II, V); 3314341825Sdim break; 3315341825Sdim } 3316341825Sdim 3317218893Sdim case Intrinsic::arm_neon_vld2: 3318218893Sdim case Intrinsic::arm_neon_vld3: 3319218893Sdim case Intrinsic::arm_neon_vld4: 3320218893Sdim case Intrinsic::arm_neon_vld2lane: 3321218893Sdim case Intrinsic::arm_neon_vld3lane: 3322218893Sdim case Intrinsic::arm_neon_vld4lane: 3323218893Sdim case Intrinsic::arm_neon_vst1: 3324218893Sdim case Intrinsic::arm_neon_vst2: 3325218893Sdim case Intrinsic::arm_neon_vst3: 3326218893Sdim case Intrinsic::arm_neon_vst4: 3327218893Sdim case Intrinsic::arm_neon_vst2lane: 3328218893Sdim case Intrinsic::arm_neon_vst3lane: 3329218893Sdim case Intrinsic::arm_neon_vst4lane: { 3330314564Sdim unsigned MemAlign = 3331314564Sdim getKnownAlignment(II->getArgOperand(0), DL, II, &AC, &DT); 3332218893Sdim unsigned AlignArg = II->getNumArgOperands() - 1; 3333218893Sdim ConstantInt *IntrAlign = dyn_cast<ConstantInt>(II->getArgOperand(AlignArg)); 3334218893Sdim if (IntrAlign && IntrAlign->getZExtValue() < MemAlign) { 3335218893Sdim II->setArgOperand(AlignArg, 3336218893Sdim ConstantInt::get(Type::getInt32Ty(II->getContext()), 3337218893Sdim MemAlign, false)); 3338218893Sdim return II; 3339218893Sdim } 3340218893Sdim break; 3341218893Sdim } 3342218893Sdim 3343341825Sdim case Intrinsic::arm_neon_vtbl1: 3344341825Sdim case Intrinsic::aarch64_neon_tbl1: 3345341825Sdim if (Value *V = simplifyNeonTbl1(*II, Builder)) 3346341825Sdim return replaceInstUsesWith(*II, V); 3347341825Sdim break; 3348341825Sdim 3349239462Sdim case Intrinsic::arm_neon_vmulls: 3350276479Sdim case Intrinsic::arm_neon_vmullu: 3351276479Sdim case Intrinsic::aarch64_neon_smull: 3352276479Sdim case Intrinsic::aarch64_neon_umull: { 3353239462Sdim Value *Arg0 = II->getArgOperand(0); 3354239462Sdim Value *Arg1 = II->getArgOperand(1); 3355239462Sdim 3356239462Sdim // Handle mul by zero first: 3357239462Sdim if (isa<ConstantAggregateZero>(Arg0) || isa<ConstantAggregateZero>(Arg1)) { 3358309124Sdim return replaceInstUsesWith(CI, ConstantAggregateZero::get(II->getType())); 3359239462Sdim } 3360239462Sdim 3361239462Sdim // Check for constant LHS & RHS - in this case we just simplify. 3362353358Sdim bool Zext = (IID == Intrinsic::arm_neon_vmullu || 3363353358Sdim IID == Intrinsic::aarch64_neon_umull); 3364239462Sdim VectorType *NewVT = cast<VectorType>(II->getType()); 3365276479Sdim if (Constant *CV0 = dyn_cast<Constant>(Arg0)) { 3366276479Sdim if (Constant *CV1 = dyn_cast<Constant>(Arg1)) { 3367276479Sdim CV0 = ConstantExpr::getIntegerCast(CV0, NewVT, /*isSigned=*/!Zext); 3368276479Sdim CV1 = ConstantExpr::getIntegerCast(CV1, NewVT, /*isSigned=*/!Zext); 3369276479Sdim 3370309124Sdim return replaceInstUsesWith(CI, ConstantExpr::getMul(CV0, CV1)); 3371239462Sdim } 3372239462Sdim 3373276479Sdim // Couldn't simplify - canonicalize constant to the RHS. 3374239462Sdim std::swap(Arg0, Arg1); 3375239462Sdim } 3376239462Sdim 3377239462Sdim // Handle mul by one: 3378276479Sdim if (Constant *CV1 = dyn_cast<Constant>(Arg1)) 3379239462Sdim if (ConstantInt *Splat = 3380276479Sdim dyn_cast_or_null<ConstantInt>(CV1->getSplatValue())) 3381276479Sdim if (Splat->isOne()) 3382276479Sdim return CastInst::CreateIntegerCast(Arg0, II->getType(), 3383276479Sdim /*isSigned=*/!Zext); 3384276479Sdim 3385276479Sdim break; 3386276479Sdim } 3387341825Sdim case Intrinsic::arm_neon_aesd: 3388341825Sdim case Intrinsic::arm_neon_aese: 3389341825Sdim case Intrinsic::aarch64_crypto_aesd: 3390341825Sdim case Intrinsic::aarch64_crypto_aese: { 3391341825Sdim Value *DataArg = II->getArgOperand(0); 3392341825Sdim Value *KeyArg = II->getArgOperand(1); 3393341825Sdim 3394341825Sdim // Try to use the builtin XOR in AESE and AESD to eliminate a prior XOR 3395341825Sdim Value *Data, *Key; 3396341825Sdim if (match(KeyArg, m_ZeroInt()) && 3397341825Sdim match(DataArg, m_Xor(m_Value(Data), m_Value(Key)))) { 3398341825Sdim II->setArgOperand(0, Data); 3399341825Sdim II->setArgOperand(1, Key); 3400341825Sdim return II; 3401341825Sdim } 3402341825Sdim break; 3403341825Sdim } 3404360784Sdim case Intrinsic::arm_mve_pred_i2v: { 3405360784Sdim Value *Arg = II->getArgOperand(0); 3406360784Sdim Value *ArgArg; 3407360784Sdim if (match(Arg, m_Intrinsic<Intrinsic::arm_mve_pred_v2i>(m_Value(ArgArg))) && 3408360784Sdim II->getType() == ArgArg->getType()) 3409360784Sdim return replaceInstUsesWith(*II, ArgArg); 3410360784Sdim Constant *XorMask; 3411360784Sdim if (match(Arg, 3412360784Sdim m_Xor(m_Intrinsic<Intrinsic::arm_mve_pred_v2i>(m_Value(ArgArg)), 3413360784Sdim m_Constant(XorMask))) && 3414360784Sdim II->getType() == ArgArg->getType()) { 3415360784Sdim if (auto *CI = dyn_cast<ConstantInt>(XorMask)) { 3416360784Sdim if (CI->getValue().trunc(16).isAllOnesValue()) { 3417360784Sdim auto TrueVector = Builder.CreateVectorSplat( 3418360784Sdim II->getType()->getVectorNumElements(), Builder.getTrue()); 3419360784Sdim return BinaryOperator::Create(Instruction::Xor, ArgArg, TrueVector); 3420360784Sdim } 3421360784Sdim } 3422360784Sdim } 3423360784Sdim KnownBits ScalarKnown(32); 3424360784Sdim if (SimplifyDemandedBits(II, 0, APInt::getLowBitsSet(32, 16), 3425360784Sdim ScalarKnown, 0)) 3426360784Sdim return II; 3427360784Sdim break; 3428360784Sdim } 3429360784Sdim case Intrinsic::arm_mve_pred_v2i: { 3430360784Sdim Value *Arg = II->getArgOperand(0); 3431360784Sdim Value *ArgArg; 3432360784Sdim if (match(Arg, m_Intrinsic<Intrinsic::arm_mve_pred_i2v>(m_Value(ArgArg)))) 3433360784Sdim return replaceInstUsesWith(*II, ArgArg); 3434360784Sdim if (!II->getMetadata(LLVMContext::MD_range)) { 3435360784Sdim Type *IntTy32 = Type::getInt32Ty(II->getContext()); 3436360784Sdim Metadata *M[] = { 3437360784Sdim ConstantAsMetadata::get(ConstantInt::get(IntTy32, 0)), 3438360784Sdim ConstantAsMetadata::get(ConstantInt::get(IntTy32, 0xFFFF)) 3439360784Sdim }; 3440360784Sdim II->setMetadata(LLVMContext::MD_range, MDNode::get(II->getContext(), M)); 3441360784Sdim return II; 3442360784Sdim } 3443360784Sdim break; 3444360784Sdim } 3445360784Sdim case Intrinsic::arm_mve_vadc: 3446360784Sdim case Intrinsic::arm_mve_vadc_predicated: { 3447360784Sdim unsigned CarryOp = 3448360784Sdim (II->getIntrinsicID() == Intrinsic::arm_mve_vadc_predicated) ? 3 : 2; 3449360784Sdim assert(II->getArgOperand(CarryOp)->getType()->getScalarSizeInBits() == 32 && 3450360784Sdim "Bad type for intrinsic!"); 3451360784Sdim 3452360784Sdim KnownBits CarryKnown(32); 3453360784Sdim if (SimplifyDemandedBits(II, CarryOp, APInt::getOneBitSet(32, 29), 3454360784Sdim CarryKnown)) 3455360784Sdim return II; 3456360784Sdim break; 3457360784Sdim } 3458321369Sdim case Intrinsic::amdgcn_rcp: { 3459321369Sdim Value *Src = II->getArgOperand(0); 3460276479Sdim 3461321369Sdim // TODO: Move to ConstantFolding/InstSimplify? 3462321369Sdim if (isa<UndefValue>(Src)) 3463321369Sdim return replaceInstUsesWith(CI, Src); 3464321369Sdim 3465321369Sdim if (const ConstantFP *C = dyn_cast<ConstantFP>(Src)) { 3466276479Sdim const APFloat &ArgVal = C->getValueAPF(); 3467360784Sdim APFloat Val(ArgVal.getSemantics(), 1); 3468276479Sdim APFloat::opStatus Status = Val.divide(ArgVal, 3469276479Sdim APFloat::rmNearestTiesToEven); 3470276479Sdim // Only do this if it was exact and therefore not dependent on the 3471276479Sdim // rounding mode. 3472276479Sdim if (Status == APFloat::opOK) 3473309124Sdim return replaceInstUsesWith(CI, ConstantFP::get(II->getContext(), Val)); 3474239462Sdim } 3475239462Sdim 3476239462Sdim break; 3477239462Sdim } 3478321369Sdim case Intrinsic::amdgcn_rsq: { 3479321369Sdim Value *Src = II->getArgOperand(0); 3480321369Sdim 3481321369Sdim // TODO: Move to ConstantFolding/InstSimplify? 3482321369Sdim if (isa<UndefValue>(Src)) 3483321369Sdim return replaceInstUsesWith(CI, Src); 3484321369Sdim break; 3485321369Sdim } 3486309124Sdim case Intrinsic::amdgcn_frexp_mant: 3487309124Sdim case Intrinsic::amdgcn_frexp_exp: { 3488309124Sdim Value *Src = II->getArgOperand(0); 3489309124Sdim if (const ConstantFP *C = dyn_cast<ConstantFP>(Src)) { 3490309124Sdim int Exp; 3491309124Sdim APFloat Significand = frexp(C->getValueAPF(), Exp, 3492309124Sdim APFloat::rmNearestTiesToEven); 3493309124Sdim 3494353358Sdim if (IID == Intrinsic::amdgcn_frexp_mant) { 3495309124Sdim return replaceInstUsesWith(CI, ConstantFP::get(II->getContext(), 3496309124Sdim Significand)); 3497309124Sdim } 3498309124Sdim 3499309124Sdim // Match instruction special case behavior. 3500309124Sdim if (Exp == APFloat::IEK_NaN || Exp == APFloat::IEK_Inf) 3501309124Sdim Exp = 0; 3502309124Sdim 3503309124Sdim return replaceInstUsesWith(CI, ConstantInt::get(II->getType(), Exp)); 3504309124Sdim } 3505309124Sdim 3506309124Sdim if (isa<UndefValue>(Src)) 3507309124Sdim return replaceInstUsesWith(CI, UndefValue::get(II->getType())); 3508309124Sdim 3509309124Sdim break; 3510309124Sdim } 3511314564Sdim case Intrinsic::amdgcn_class: { 3512314564Sdim enum { 3513314564Sdim S_NAN = 1 << 0, // Signaling NaN 3514314564Sdim Q_NAN = 1 << 1, // Quiet NaN 3515314564Sdim N_INFINITY = 1 << 2, // Negative infinity 3516314564Sdim N_NORMAL = 1 << 3, // Negative normal 3517314564Sdim N_SUBNORMAL = 1 << 4, // Negative subnormal 3518314564Sdim N_ZERO = 1 << 5, // Negative zero 3519314564Sdim P_ZERO = 1 << 6, // Positive zero 3520314564Sdim P_SUBNORMAL = 1 << 7, // Positive subnormal 3521314564Sdim P_NORMAL = 1 << 8, // Positive normal 3522314564Sdim P_INFINITY = 1 << 9 // Positive infinity 3523314564Sdim }; 3524314564Sdim 3525314564Sdim const uint32_t FullMask = S_NAN | Q_NAN | N_INFINITY | N_NORMAL | 3526314564Sdim N_SUBNORMAL | N_ZERO | P_ZERO | P_SUBNORMAL | P_NORMAL | P_INFINITY; 3527314564Sdim 3528314564Sdim Value *Src0 = II->getArgOperand(0); 3529314564Sdim Value *Src1 = II->getArgOperand(1); 3530314564Sdim const ConstantInt *CMask = dyn_cast<ConstantInt>(Src1); 3531314564Sdim if (!CMask) { 3532314564Sdim if (isa<UndefValue>(Src0)) 3533314564Sdim return replaceInstUsesWith(*II, UndefValue::get(II->getType())); 3534314564Sdim 3535314564Sdim if (isa<UndefValue>(Src1)) 3536314564Sdim return replaceInstUsesWith(*II, ConstantInt::get(II->getType(), false)); 3537314564Sdim break; 3538314564Sdim } 3539314564Sdim 3540314564Sdim uint32_t Mask = CMask->getZExtValue(); 3541314564Sdim 3542314564Sdim // If all tests are made, it doesn't matter what the value is. 3543314564Sdim if ((Mask & FullMask) == FullMask) 3544314564Sdim return replaceInstUsesWith(*II, ConstantInt::get(II->getType(), true)); 3545314564Sdim 3546314564Sdim if ((Mask & FullMask) == 0) 3547314564Sdim return replaceInstUsesWith(*II, ConstantInt::get(II->getType(), false)); 3548314564Sdim 3549314564Sdim if (Mask == (S_NAN | Q_NAN)) { 3550314564Sdim // Equivalent of isnan. Replace with standard fcmp. 3551321369Sdim Value *FCmp = Builder.CreateFCmpUNO(Src0, Src0); 3552314564Sdim FCmp->takeName(II); 3553314564Sdim return replaceInstUsesWith(*II, FCmp); 3554314564Sdim } 3555314564Sdim 3556344779Sdim if (Mask == (N_ZERO | P_ZERO)) { 3557344779Sdim // Equivalent of == 0. 3558344779Sdim Value *FCmp = Builder.CreateFCmpOEQ( 3559344779Sdim Src0, ConstantFP::get(Src0->getType(), 0.0)); 3560344779Sdim 3561344779Sdim FCmp->takeName(II); 3562344779Sdim return replaceInstUsesWith(*II, FCmp); 3563344779Sdim } 3564344779Sdim 3565344779Sdim // fp_class (nnan x), qnan|snan|other -> fp_class (nnan x), other 3566344779Sdim if (((Mask & S_NAN) || (Mask & Q_NAN)) && isKnownNeverNaN(Src0, &TLI)) { 3567344779Sdim II->setArgOperand(1, ConstantInt::get(Src1->getType(), 3568344779Sdim Mask & ~(S_NAN | Q_NAN))); 3569344779Sdim return II; 3570344779Sdim } 3571344779Sdim 3572314564Sdim const ConstantFP *CVal = dyn_cast<ConstantFP>(Src0); 3573314564Sdim if (!CVal) { 3574314564Sdim if (isa<UndefValue>(Src0)) 3575314564Sdim return replaceInstUsesWith(*II, UndefValue::get(II->getType())); 3576314564Sdim 3577314564Sdim // Clamp mask to used bits 3578314564Sdim if ((Mask & FullMask) != Mask) { 3579321369Sdim CallInst *NewCall = Builder.CreateCall(II->getCalledFunction(), 3580314564Sdim { Src0, ConstantInt::get(Src1->getType(), Mask & FullMask) } 3581314564Sdim ); 3582314564Sdim 3583314564Sdim NewCall->takeName(II); 3584314564Sdim return replaceInstUsesWith(*II, NewCall); 3585314564Sdim } 3586314564Sdim 3587314564Sdim break; 3588314564Sdim } 3589314564Sdim 3590314564Sdim const APFloat &Val = CVal->getValueAPF(); 3591314564Sdim 3592314564Sdim bool Result = 3593314564Sdim ((Mask & S_NAN) && Val.isNaN() && Val.isSignaling()) || 3594314564Sdim ((Mask & Q_NAN) && Val.isNaN() && !Val.isSignaling()) || 3595314564Sdim ((Mask & N_INFINITY) && Val.isInfinity() && Val.isNegative()) || 3596314564Sdim ((Mask & N_NORMAL) && Val.isNormal() && Val.isNegative()) || 3597314564Sdim ((Mask & N_SUBNORMAL) && Val.isDenormal() && Val.isNegative()) || 3598314564Sdim ((Mask & N_ZERO) && Val.isZero() && Val.isNegative()) || 3599314564Sdim ((Mask & P_ZERO) && Val.isZero() && !Val.isNegative()) || 3600314564Sdim ((Mask & P_SUBNORMAL) && Val.isDenormal() && !Val.isNegative()) || 3601314564Sdim ((Mask & P_NORMAL) && Val.isNormal() && !Val.isNegative()) || 3602314564Sdim ((Mask & P_INFINITY) && Val.isInfinity() && !Val.isNegative()); 3603314564Sdim 3604314564Sdim return replaceInstUsesWith(*II, ConstantInt::get(II->getType(), Result)); 3605314564Sdim } 3606321369Sdim case Intrinsic::amdgcn_cvt_pkrtz: { 3607321369Sdim Value *Src0 = II->getArgOperand(0); 3608321369Sdim Value *Src1 = II->getArgOperand(1); 3609321369Sdim if (const ConstantFP *C0 = dyn_cast<ConstantFP>(Src0)) { 3610321369Sdim if (const ConstantFP *C1 = dyn_cast<ConstantFP>(Src1)) { 3611321369Sdim const fltSemantics &HalfSem 3612321369Sdim = II->getType()->getScalarType()->getFltSemantics(); 3613321369Sdim bool LosesInfo; 3614321369Sdim APFloat Val0 = C0->getValueAPF(); 3615321369Sdim APFloat Val1 = C1->getValueAPF(); 3616321369Sdim Val0.convert(HalfSem, APFloat::rmTowardZero, &LosesInfo); 3617321369Sdim Val1.convert(HalfSem, APFloat::rmTowardZero, &LosesInfo); 3618321369Sdim 3619321369Sdim Constant *Folded = ConstantVector::get({ 3620321369Sdim ConstantFP::get(II->getContext(), Val0), 3621321369Sdim ConstantFP::get(II->getContext(), Val1) }); 3622321369Sdim return replaceInstUsesWith(*II, Folded); 3623321369Sdim } 3624321369Sdim } 3625321369Sdim 3626321369Sdim if (isa<UndefValue>(Src0) && isa<UndefValue>(Src1)) 3627321369Sdim return replaceInstUsesWith(*II, UndefValue::get(II->getType())); 3628321369Sdim 3629321369Sdim break; 3630321369Sdim } 3631329410Sdim case Intrinsic::amdgcn_cvt_pknorm_i16: 3632329410Sdim case Intrinsic::amdgcn_cvt_pknorm_u16: 3633329410Sdim case Intrinsic::amdgcn_cvt_pk_i16: 3634329410Sdim case Intrinsic::amdgcn_cvt_pk_u16: { 3635329410Sdim Value *Src0 = II->getArgOperand(0); 3636329410Sdim Value *Src1 = II->getArgOperand(1); 3637329410Sdim 3638329410Sdim if (isa<UndefValue>(Src0) && isa<UndefValue>(Src1)) 3639329410Sdim return replaceInstUsesWith(*II, UndefValue::get(II->getType())); 3640329410Sdim 3641329410Sdim break; 3642329410Sdim } 3643321369Sdim case Intrinsic::amdgcn_ubfe: 3644321369Sdim case Intrinsic::amdgcn_sbfe: { 3645321369Sdim // Decompose simple cases into standard shifts. 3646321369Sdim Value *Src = II->getArgOperand(0); 3647321369Sdim if (isa<UndefValue>(Src)) 3648321369Sdim return replaceInstUsesWith(*II, Src); 3649321369Sdim 3650321369Sdim unsigned Width; 3651321369Sdim Type *Ty = II->getType(); 3652321369Sdim unsigned IntSize = Ty->getIntegerBitWidth(); 3653321369Sdim 3654321369Sdim ConstantInt *CWidth = dyn_cast<ConstantInt>(II->getArgOperand(2)); 3655321369Sdim if (CWidth) { 3656321369Sdim Width = CWidth->getZExtValue(); 3657321369Sdim if ((Width & (IntSize - 1)) == 0) 3658321369Sdim return replaceInstUsesWith(*II, ConstantInt::getNullValue(Ty)); 3659321369Sdim 3660321369Sdim if (Width >= IntSize) { 3661321369Sdim // Hardware ignores high bits, so remove those. 3662321369Sdim II->setArgOperand(2, ConstantInt::get(CWidth->getType(), 3663321369Sdim Width & (IntSize - 1))); 3664321369Sdim return II; 3665321369Sdim } 3666321369Sdim } 3667321369Sdim 3668321369Sdim unsigned Offset; 3669321369Sdim ConstantInt *COffset = dyn_cast<ConstantInt>(II->getArgOperand(1)); 3670321369Sdim if (COffset) { 3671321369Sdim Offset = COffset->getZExtValue(); 3672321369Sdim if (Offset >= IntSize) { 3673321369Sdim II->setArgOperand(1, ConstantInt::get(COffset->getType(), 3674321369Sdim Offset & (IntSize - 1))); 3675321369Sdim return II; 3676321369Sdim } 3677321369Sdim } 3678321369Sdim 3679353358Sdim bool Signed = IID == Intrinsic::amdgcn_sbfe; 3680321369Sdim 3681321369Sdim if (!CWidth || !COffset) 3682321369Sdim break; 3683321369Sdim 3684344779Sdim // The case of Width == 0 is handled above, which makes this tranformation 3685344779Sdim // safe. If Width == 0, then the ashr and lshr instructions become poison 3686344779Sdim // value since the shift amount would be equal to the bit size. 3687344779Sdim assert(Width != 0); 3688344779Sdim 3689321369Sdim // TODO: This allows folding to undef when the hardware has specific 3690321369Sdim // behavior? 3691321369Sdim if (Offset + Width < IntSize) { 3692321369Sdim Value *Shl = Builder.CreateShl(Src, IntSize - Offset - Width); 3693321369Sdim Value *RightShift = Signed ? Builder.CreateAShr(Shl, IntSize - Width) 3694321369Sdim : Builder.CreateLShr(Shl, IntSize - Width); 3695321369Sdim RightShift->takeName(II); 3696321369Sdim return replaceInstUsesWith(*II, RightShift); 3697321369Sdim } 3698321369Sdim 3699321369Sdim Value *RightShift = Signed ? Builder.CreateAShr(Src, Offset) 3700321369Sdim : Builder.CreateLShr(Src, Offset); 3701321369Sdim 3702321369Sdim RightShift->takeName(II); 3703321369Sdim return replaceInstUsesWith(*II, RightShift); 3704321369Sdim } 3705321369Sdim case Intrinsic::amdgcn_exp: 3706321369Sdim case Intrinsic::amdgcn_exp_compr: { 3707353358Sdim ConstantInt *En = cast<ConstantInt>(II->getArgOperand(1)); 3708321369Sdim unsigned EnBits = En->getZExtValue(); 3709321369Sdim if (EnBits == 0xf) 3710321369Sdim break; // All inputs enabled. 3711321369Sdim 3712353358Sdim bool IsCompr = IID == Intrinsic::amdgcn_exp_compr; 3713321369Sdim bool Changed = false; 3714321369Sdim for (int I = 0; I < (IsCompr ? 2 : 4); ++I) { 3715321369Sdim if ((!IsCompr && (EnBits & (1 << I)) == 0) || 3716321369Sdim (IsCompr && ((EnBits & (0x3 << (2 * I))) == 0))) { 3717321369Sdim Value *Src = II->getArgOperand(I + 2); 3718321369Sdim if (!isa<UndefValue>(Src)) { 3719321369Sdim II->setArgOperand(I + 2, UndefValue::get(Src->getType())); 3720321369Sdim Changed = true; 3721321369Sdim } 3722321369Sdim } 3723321369Sdim } 3724321369Sdim 3725321369Sdim if (Changed) 3726321369Sdim return II; 3727321369Sdim 3728321369Sdim break; 3729321369Sdim } 3730321369Sdim case Intrinsic::amdgcn_fmed3: { 3731321369Sdim // Note this does not preserve proper sNaN behavior if IEEE-mode is enabled 3732321369Sdim // for the shader. 3733321369Sdim 3734321369Sdim Value *Src0 = II->getArgOperand(0); 3735321369Sdim Value *Src1 = II->getArgOperand(1); 3736321369Sdim Value *Src2 = II->getArgOperand(2); 3737321369Sdim 3738341825Sdim // Checking for NaN before canonicalization provides better fidelity when 3739341825Sdim // mapping other operations onto fmed3 since the order of operands is 3740341825Sdim // unchanged. 3741341825Sdim CallInst *NewCall = nullptr; 3742341825Sdim if (match(Src0, m_NaN()) || isa<UndefValue>(Src0)) { 3743341825Sdim NewCall = Builder.CreateMinNum(Src1, Src2); 3744341825Sdim } else if (match(Src1, m_NaN()) || isa<UndefValue>(Src1)) { 3745341825Sdim NewCall = Builder.CreateMinNum(Src0, Src2); 3746341825Sdim } else if (match(Src2, m_NaN()) || isa<UndefValue>(Src2)) { 3747341825Sdim NewCall = Builder.CreateMaxNum(Src0, Src1); 3748341825Sdim } 3749341825Sdim 3750341825Sdim if (NewCall) { 3751341825Sdim NewCall->copyFastMathFlags(II); 3752341825Sdim NewCall->takeName(II); 3753341825Sdim return replaceInstUsesWith(*II, NewCall); 3754341825Sdim } 3755341825Sdim 3756321369Sdim bool Swap = false; 3757321369Sdim // Canonicalize constants to RHS operands. 3758321369Sdim // 3759321369Sdim // fmed3(c0, x, c1) -> fmed3(x, c0, c1) 3760321369Sdim if (isa<Constant>(Src0) && !isa<Constant>(Src1)) { 3761321369Sdim std::swap(Src0, Src1); 3762321369Sdim Swap = true; 3763321369Sdim } 3764321369Sdim 3765321369Sdim if (isa<Constant>(Src1) && !isa<Constant>(Src2)) { 3766321369Sdim std::swap(Src1, Src2); 3767321369Sdim Swap = true; 3768321369Sdim } 3769321369Sdim 3770321369Sdim if (isa<Constant>(Src0) && !isa<Constant>(Src1)) { 3771321369Sdim std::swap(Src0, Src1); 3772321369Sdim Swap = true; 3773321369Sdim } 3774321369Sdim 3775321369Sdim if (Swap) { 3776321369Sdim II->setArgOperand(0, Src0); 3777321369Sdim II->setArgOperand(1, Src1); 3778321369Sdim II->setArgOperand(2, Src2); 3779321369Sdim return II; 3780321369Sdim } 3781321369Sdim 3782321369Sdim if (const ConstantFP *C0 = dyn_cast<ConstantFP>(Src0)) { 3783321369Sdim if (const ConstantFP *C1 = dyn_cast<ConstantFP>(Src1)) { 3784321369Sdim if (const ConstantFP *C2 = dyn_cast<ConstantFP>(Src2)) { 3785321369Sdim APFloat Result = fmed3AMDGCN(C0->getValueAPF(), C1->getValueAPF(), 3786321369Sdim C2->getValueAPF()); 3787321369Sdim return replaceInstUsesWith(*II, 3788321369Sdim ConstantFP::get(Builder.getContext(), Result)); 3789321369Sdim } 3790321369Sdim } 3791321369Sdim } 3792321369Sdim 3793321369Sdim break; 3794321369Sdim } 3795321369Sdim case Intrinsic::amdgcn_icmp: 3796321369Sdim case Intrinsic::amdgcn_fcmp: { 3797353358Sdim const ConstantInt *CC = cast<ConstantInt>(II->getArgOperand(2)); 3798321369Sdim // Guard against invalid arguments. 3799321369Sdim int64_t CCVal = CC->getZExtValue(); 3800353358Sdim bool IsInteger = IID == Intrinsic::amdgcn_icmp; 3801321369Sdim if ((IsInteger && (CCVal < CmpInst::FIRST_ICMP_PREDICATE || 3802321369Sdim CCVal > CmpInst::LAST_ICMP_PREDICATE)) || 3803321369Sdim (!IsInteger && (CCVal < CmpInst::FIRST_FCMP_PREDICATE || 3804321369Sdim CCVal > CmpInst::LAST_FCMP_PREDICATE))) 3805321369Sdim break; 3806321369Sdim 3807321369Sdim Value *Src0 = II->getArgOperand(0); 3808321369Sdim Value *Src1 = II->getArgOperand(1); 3809321369Sdim 3810321369Sdim if (auto *CSrc0 = dyn_cast<Constant>(Src0)) { 3811321369Sdim if (auto *CSrc1 = dyn_cast<Constant>(Src1)) { 3812321369Sdim Constant *CCmp = ConstantExpr::getCompare(CCVal, CSrc0, CSrc1); 3813321369Sdim if (CCmp->isNullValue()) { 3814321369Sdim return replaceInstUsesWith( 3815321369Sdim *II, ConstantExpr::getSExt(CCmp, II->getType())); 3816321369Sdim } 3817321369Sdim 3818321369Sdim // The result of V_ICMP/V_FCMP assembly instructions (which this 3819321369Sdim // intrinsic exposes) is one bit per thread, masked with the EXEC 3820321369Sdim // register (which contains the bitmask of live threads). So a 3821321369Sdim // comparison that always returns true is the same as a read of the 3822321369Sdim // EXEC register. 3823353358Sdim Function *NewF = Intrinsic::getDeclaration( 3824321369Sdim II->getModule(), Intrinsic::read_register, II->getType()); 3825321369Sdim Metadata *MDArgs[] = {MDString::get(II->getContext(), "exec")}; 3826321369Sdim MDNode *MD = MDNode::get(II->getContext(), MDArgs); 3827321369Sdim Value *Args[] = {MetadataAsValue::get(II->getContext(), MD)}; 3828321369Sdim CallInst *NewCall = Builder.CreateCall(NewF, Args); 3829321369Sdim NewCall->addAttribute(AttributeList::FunctionIndex, 3830321369Sdim Attribute::Convergent); 3831321369Sdim NewCall->takeName(II); 3832321369Sdim return replaceInstUsesWith(*II, NewCall); 3833321369Sdim } 3834321369Sdim 3835321369Sdim // Canonicalize constants to RHS. 3836321369Sdim CmpInst::Predicate SwapPred 3837321369Sdim = CmpInst::getSwappedPredicate(static_cast<CmpInst::Predicate>(CCVal)); 3838321369Sdim II->setArgOperand(0, Src1); 3839321369Sdim II->setArgOperand(1, Src0); 3840321369Sdim II->setArgOperand(2, ConstantInt::get(CC->getType(), 3841321369Sdim static_cast<int>(SwapPred))); 3842321369Sdim return II; 3843321369Sdim } 3844321369Sdim 3845321369Sdim if (CCVal != CmpInst::ICMP_EQ && CCVal != CmpInst::ICMP_NE) 3846321369Sdim break; 3847321369Sdim 3848321369Sdim // Canonicalize compare eq with true value to compare != 0 3849321369Sdim // llvm.amdgcn.icmp(zext (i1 x), 1, eq) 3850321369Sdim // -> llvm.amdgcn.icmp(zext (i1 x), 0, ne) 3851321369Sdim // llvm.amdgcn.icmp(sext (i1 x), -1, eq) 3852321369Sdim // -> llvm.amdgcn.icmp(sext (i1 x), 0, ne) 3853321369Sdim Value *ExtSrc; 3854321369Sdim if (CCVal == CmpInst::ICMP_EQ && 3855321369Sdim ((match(Src1, m_One()) && match(Src0, m_ZExt(m_Value(ExtSrc)))) || 3856321369Sdim (match(Src1, m_AllOnes()) && match(Src0, m_SExt(m_Value(ExtSrc))))) && 3857321369Sdim ExtSrc->getType()->isIntegerTy(1)) { 3858321369Sdim II->setArgOperand(1, ConstantInt::getNullValue(Src1->getType())); 3859321369Sdim II->setArgOperand(2, ConstantInt::get(CC->getType(), CmpInst::ICMP_NE)); 3860321369Sdim return II; 3861321369Sdim } 3862321369Sdim 3863321369Sdim CmpInst::Predicate SrcPred; 3864321369Sdim Value *SrcLHS; 3865321369Sdim Value *SrcRHS; 3866321369Sdim 3867321369Sdim // Fold compare eq/ne with 0 from a compare result as the predicate to the 3868321369Sdim // intrinsic. The typical use is a wave vote function in the library, which 3869321369Sdim // will be fed from a user code condition compared with 0. Fold in the 3870321369Sdim // redundant compare. 3871321369Sdim 3872321369Sdim // llvm.amdgcn.icmp([sz]ext ([if]cmp pred a, b), 0, ne) 3873321369Sdim // -> llvm.amdgcn.[if]cmp(a, b, pred) 3874321369Sdim // 3875321369Sdim // llvm.amdgcn.icmp([sz]ext ([if]cmp pred a, b), 0, eq) 3876321369Sdim // -> llvm.amdgcn.[if]cmp(a, b, inv pred) 3877321369Sdim if (match(Src1, m_Zero()) && 3878321369Sdim match(Src0, 3879321369Sdim m_ZExtOrSExt(m_Cmp(SrcPred, m_Value(SrcLHS), m_Value(SrcRHS))))) { 3880321369Sdim if (CCVal == CmpInst::ICMP_EQ) 3881321369Sdim SrcPred = CmpInst::getInversePredicate(SrcPred); 3882321369Sdim 3883321369Sdim Intrinsic::ID NewIID = CmpInst::isFPPredicate(SrcPred) ? 3884321369Sdim Intrinsic::amdgcn_fcmp : Intrinsic::amdgcn_icmp; 3885321369Sdim 3886344779Sdim Type *Ty = SrcLHS->getType(); 3887344779Sdim if (auto *CmpType = dyn_cast<IntegerType>(Ty)) { 3888344779Sdim // Promote to next legal integer type. 3889344779Sdim unsigned Width = CmpType->getBitWidth(); 3890344779Sdim unsigned NewWidth = Width; 3891344779Sdim 3892344779Sdim // Don't do anything for i1 comparisons. 3893344779Sdim if (Width == 1) 3894344779Sdim break; 3895344779Sdim 3896344779Sdim if (Width <= 16) 3897344779Sdim NewWidth = 16; 3898344779Sdim else if (Width <= 32) 3899344779Sdim NewWidth = 32; 3900344779Sdim else if (Width <= 64) 3901344779Sdim NewWidth = 64; 3902344779Sdim else if (Width > 64) 3903344779Sdim break; // Can't handle this. 3904344779Sdim 3905344779Sdim if (Width != NewWidth) { 3906344779Sdim IntegerType *CmpTy = Builder.getIntNTy(NewWidth); 3907344779Sdim if (CmpInst::isSigned(SrcPred)) { 3908344779Sdim SrcLHS = Builder.CreateSExt(SrcLHS, CmpTy); 3909344779Sdim SrcRHS = Builder.CreateSExt(SrcRHS, CmpTy); 3910344779Sdim } else { 3911344779Sdim SrcLHS = Builder.CreateZExt(SrcLHS, CmpTy); 3912344779Sdim SrcRHS = Builder.CreateZExt(SrcRHS, CmpTy); 3913344779Sdim } 3914344779Sdim } 3915344779Sdim } else if (!Ty->isFloatTy() && !Ty->isDoubleTy() && !Ty->isHalfTy()) 3916344779Sdim break; 3917344779Sdim 3918353358Sdim Function *NewF = 3919353358Sdim Intrinsic::getDeclaration(II->getModule(), NewIID, 3920353358Sdim { II->getType(), 3921353358Sdim SrcLHS->getType() }); 3922321369Sdim Value *Args[] = { SrcLHS, SrcRHS, 3923321369Sdim ConstantInt::get(CC->getType(), SrcPred) }; 3924321369Sdim CallInst *NewCall = Builder.CreateCall(NewF, Args); 3925321369Sdim NewCall->takeName(II); 3926321369Sdim return replaceInstUsesWith(*II, NewCall); 3927321369Sdim } 3928321369Sdim 3929321369Sdim break; 3930321369Sdim } 3931327952Sdim case Intrinsic::amdgcn_wqm_vote: { 3932327952Sdim // wqm_vote is identity when the argument is constant. 3933327952Sdim if (!isa<Constant>(II->getArgOperand(0))) 3934327952Sdim break; 3935327952Sdim 3936327952Sdim return replaceInstUsesWith(*II, II->getArgOperand(0)); 3937327952Sdim } 3938327952Sdim case Intrinsic::amdgcn_kill: { 3939327952Sdim const ConstantInt *C = dyn_cast<ConstantInt>(II->getArgOperand(0)); 3940327952Sdim if (!C || !C->getZExtValue()) 3941327952Sdim break; 3942327952Sdim 3943327952Sdim // amdgcn.kill(i1 1) is a no-op 3944327952Sdim return eraseInstFromFunction(CI); 3945327952Sdim } 3946341825Sdim case Intrinsic::amdgcn_update_dpp: { 3947341825Sdim Value *Old = II->getArgOperand(0); 3948341825Sdim 3949353358Sdim auto BC = cast<ConstantInt>(II->getArgOperand(5)); 3950353358Sdim auto RM = cast<ConstantInt>(II->getArgOperand(3)); 3951353358Sdim auto BM = cast<ConstantInt>(II->getArgOperand(4)); 3952353358Sdim if (BC->isZeroValue() || 3953341825Sdim RM->getZExtValue() != 0xF || 3954341825Sdim BM->getZExtValue() != 0xF || 3955341825Sdim isa<UndefValue>(Old)) 3956341825Sdim break; 3957341825Sdim 3958341825Sdim // If bound_ctrl = 1, row mask = bank mask = 0xf we can omit old value. 3959341825Sdim II->setOperand(0, UndefValue::get(Old->getType())); 3960341825Sdim return II; 3961341825Sdim } 3962353358Sdim case Intrinsic::amdgcn_readfirstlane: 3963353358Sdim case Intrinsic::amdgcn_readlane: { 3964353358Sdim // A constant value is trivially uniform. 3965353358Sdim if (Constant *C = dyn_cast<Constant>(II->getArgOperand(0))) 3966353358Sdim return replaceInstUsesWith(*II, C); 3967353358Sdim 3968353358Sdim // The rest of these may not be safe if the exec may not be the same between 3969353358Sdim // the def and use. 3970353358Sdim Value *Src = II->getArgOperand(0); 3971353358Sdim Instruction *SrcInst = dyn_cast<Instruction>(Src); 3972353358Sdim if (SrcInst && SrcInst->getParent() != II->getParent()) 3973353358Sdim break; 3974353358Sdim 3975353358Sdim // readfirstlane (readfirstlane x) -> readfirstlane x 3976353358Sdim // readlane (readfirstlane x), y -> readfirstlane x 3977353358Sdim if (match(Src, m_Intrinsic<Intrinsic::amdgcn_readfirstlane>())) 3978353358Sdim return replaceInstUsesWith(*II, Src); 3979353358Sdim 3980353358Sdim if (IID == Intrinsic::amdgcn_readfirstlane) { 3981353358Sdim // readfirstlane (readlane x, y) -> readlane x, y 3982353358Sdim if (match(Src, m_Intrinsic<Intrinsic::amdgcn_readlane>())) 3983353358Sdim return replaceInstUsesWith(*II, Src); 3984353358Sdim } else { 3985353358Sdim // readlane (readlane x, y), y -> readlane x, y 3986353358Sdim if (match(Src, m_Intrinsic<Intrinsic::amdgcn_readlane>( 3987353358Sdim m_Value(), m_Specific(II->getArgOperand(1))))) 3988353358Sdim return replaceInstUsesWith(*II, Src); 3989353358Sdim } 3990353358Sdim 3991353358Sdim break; 3992353358Sdim } 3993202375Srdivacky case Intrinsic::stackrestore: { 3994202375Srdivacky // If the save is right next to the restore, remove the restore. This can 3995202375Srdivacky // happen when variable allocas are DCE'd. 3996210299Sed if (IntrinsicInst *SS = dyn_cast<IntrinsicInst>(II->getArgOperand(0))) { 3997202375Srdivacky if (SS->getIntrinsicID() == Intrinsic::stacksave) { 3998341825Sdim // Skip over debug info. 3999341825Sdim if (SS->getNextNonDebugInstruction() == II) { 4000309124Sdim return eraseInstFromFunction(CI); 4001341825Sdim } 4002202375Srdivacky } 4003202375Srdivacky } 4004234353Sdim 4005202375Srdivacky // Scan down this block to see if there is another stack restore in the 4006202375Srdivacky // same block without an intervening call/alloca. 4007296417Sdim BasicBlock::iterator BI(II); 4008344779Sdim Instruction *TI = II->getParent()->getTerminator(); 4009202375Srdivacky bool CannotRemove = false; 4010202375Srdivacky for (++BI; &*BI != TI; ++BI) { 4011239462Sdim if (isa<AllocaInst>(BI)) { 4012202375Srdivacky CannotRemove = true; 4013202375Srdivacky break; 4014202375Srdivacky } 4015202375Srdivacky if (CallInst *BCI = dyn_cast<CallInst>(BI)) { 4016353358Sdim if (auto *II2 = dyn_cast<IntrinsicInst>(BCI)) { 4017202375Srdivacky // If there is a stackrestore below this one, remove this one. 4018353358Sdim if (II2->getIntrinsicID() == Intrinsic::stackrestore) 4019309124Sdim return eraseInstFromFunction(CI); 4020309124Sdim 4021309124Sdim // Bail if we cross over an intrinsic with side effects, such as 4022360784Sdim // llvm.stacksave, or llvm.read_register. 4023353358Sdim if (II2->mayHaveSideEffects()) { 4024309124Sdim CannotRemove = true; 4025309124Sdim break; 4026309124Sdim } 4027202375Srdivacky } else { 4028202375Srdivacky // If we found a non-intrinsic call, we can't remove the stack 4029202375Srdivacky // restore. 4030202375Srdivacky CannotRemove = true; 4031202375Srdivacky break; 4032202375Srdivacky } 4033202375Srdivacky } 4034202375Srdivacky } 4035234353Sdim 4036226633Sdim // If the stack restore is in a return, resume, or unwind block and if there 4037226633Sdim // are no allocas or calls between the restore and the return, nuke the 4038226633Sdim // restore. 4039234353Sdim if (!CannotRemove && (isa<ReturnInst>(TI) || isa<ResumeInst>(TI))) 4040309124Sdim return eraseInstFromFunction(CI); 4041202375Srdivacky break; 4042202375Srdivacky } 4043309124Sdim case Intrinsic::lifetime_start: 4044314564Sdim // Asan needs to poison memory to detect invalid access which is possible 4045314564Sdim // even for empty lifetime range. 4046327952Sdim if (II->getFunction()->hasFnAttribute(Attribute::SanitizeAddress) || 4047360784Sdim II->getFunction()->hasFnAttribute(Attribute::SanitizeMemory) || 4048327952Sdim II->getFunction()->hasFnAttribute(Attribute::SanitizeHWAddress)) 4049314564Sdim break; 4050314564Sdim 4051309124Sdim if (removeTriviallyEmptyRange(*II, Intrinsic::lifetime_start, 4052309124Sdim Intrinsic::lifetime_end, *this)) 4053309124Sdim return nullptr; 4054296417Sdim break; 4055280031Sdim case Intrinsic::assume: { 4056309124Sdim Value *IIOperand = II->getArgOperand(0); 4057341825Sdim // Remove an assume if it is followed by an identical assume. 4058341825Sdim // TODO: Do we need this? Unless there are conflicting assumptions, the 4059341825Sdim // computeKnownBits(IIOperand) below here eliminates redundant assumes. 4060341825Sdim Instruction *Next = II->getNextNonDebugInstruction(); 4061341825Sdim if (match(Next, m_Intrinsic<Intrinsic::assume>(m_Specific(IIOperand)))) 4062309124Sdim return eraseInstFromFunction(CI); 4063309124Sdim 4064280031Sdim // Canonicalize assume(a && b) -> assume(a); assume(b); 4065280031Sdim // Note: New assumption intrinsics created here are registered by 4066280031Sdim // the InstCombineIRInserter object. 4067353358Sdim FunctionType *AssumeIntrinsicTy = II->getFunctionType(); 4068353358Sdim Value *AssumeIntrinsic = II->getCalledValue(); 4069353358Sdim Value *A, *B; 4070280031Sdim if (match(IIOperand, m_And(m_Value(A), m_Value(B)))) { 4071353358Sdim Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic, A, II->getName()); 4072353358Sdim Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic, B, II->getName()); 4073309124Sdim return eraseInstFromFunction(*II); 4074280031Sdim } 4075280031Sdim // assume(!(a || b)) -> assume(!a); assume(!b); 4076280031Sdim if (match(IIOperand, m_Not(m_Or(m_Value(A), m_Value(B))))) { 4077353358Sdim Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic, 4078353358Sdim Builder.CreateNot(A), II->getName()); 4079353358Sdim Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic, 4080353358Sdim Builder.CreateNot(B), II->getName()); 4081309124Sdim return eraseInstFromFunction(*II); 4082280031Sdim } 4083280031Sdim 4084280031Sdim // assume( (load addr) != null ) -> add 'nonnull' metadata to load 4085280031Sdim // (if assume is valid at the load) 4086314564Sdim CmpInst::Predicate Pred; 4087314564Sdim Instruction *LHS; 4088314564Sdim if (match(IIOperand, m_ICmp(Pred, m_Instruction(LHS), m_Zero())) && 4089314564Sdim Pred == ICmpInst::ICMP_NE && LHS->getOpcode() == Instruction::Load && 4090314564Sdim LHS->getType()->isPointerTy() && 4091314564Sdim isValidAssumeForContext(II, LHS, &DT)) { 4092314564Sdim MDNode *MD = MDNode::get(II->getContext(), None); 4093314564Sdim LHS->setMetadata(LLVMContext::MD_nonnull, MD); 4094314564Sdim return eraseInstFromFunction(*II); 4095314564Sdim 4096280031Sdim // TODO: apply nonnull return attributes to calls and invokes 4097280031Sdim // TODO: apply range metadata for range check patterns? 4098280031Sdim } 4099314564Sdim 4100280031Sdim // If there is a dominating assume with the same condition as this one, 4101280031Sdim // then this one is redundant, and should be removed. 4102321369Sdim KnownBits Known(1); 4103321369Sdim computeKnownBits(IIOperand, Known, 0, II); 4104321369Sdim if (Known.isAllOnes()) 4105309124Sdim return eraseInstFromFunction(*II); 4106280031Sdim 4107314564Sdim // Update the cache of affected values for this assumption (we might be 4108314564Sdim // here because we just simplified the condition). 4109314564Sdim AC.updateAffectedValues(II); 4110280031Sdim break; 4111202375Srdivacky } 4112280031Sdim case Intrinsic::experimental_gc_relocate: { 4113360784Sdim auto &GCR = *cast<GCRelocateInst>(II); 4114360784Sdim 4115360784Sdim // If we have two copies of the same pointer in the statepoint argument 4116360784Sdim // list, canonicalize to one. This may let us common gc.relocates. 4117360784Sdim if (GCR.getBasePtr() == GCR.getDerivedPtr() && 4118360784Sdim GCR.getBasePtrIndex() != GCR.getDerivedPtrIndex()) { 4119360784Sdim auto *OpIntTy = GCR.getOperand(2)->getType(); 4120360784Sdim II->setOperand(2, ConstantInt::get(OpIntTy, GCR.getBasePtrIndex())); 4121360784Sdim return II; 4122360784Sdim } 4123360784Sdim 4124280031Sdim // Translate facts known about a pointer before relocating into 4125280031Sdim // facts about the relocate value, while being careful to 4126280031Sdim // preserve relocation semantics. 4127360784Sdim Value *DerivedPtr = GCR.getDerivedPtr(); 4128202375Srdivacky 4129280031Sdim // Remove the relocation if unused, note that this check is required 4130280031Sdim // to prevent the cases below from looping forever. 4131280031Sdim if (II->use_empty()) 4132309124Sdim return eraseInstFromFunction(*II); 4133280031Sdim 4134280031Sdim // Undef is undef, even after relocation. 4135280031Sdim // TODO: provide a hook for this in GCStrategy. This is clearly legal for 4136280031Sdim // most practical collectors, but there was discussion in the review thread 4137280031Sdim // about whether it was legal for all possible collectors. 4138309124Sdim if (isa<UndefValue>(DerivedPtr)) 4139309124Sdim // Use undef of gc_relocate's type to replace it. 4140309124Sdim return replaceInstUsesWith(*II, UndefValue::get(II->getType())); 4141280031Sdim 4142309124Sdim if (auto *PT = dyn_cast<PointerType>(II->getType())) { 4143309124Sdim // The relocation of null will be null for most any collector. 4144309124Sdim // TODO: provide a hook for this in GCStrategy. There might be some 4145309124Sdim // weird collector this property does not hold for. 4146309124Sdim if (isa<ConstantPointerNull>(DerivedPtr)) 4147309124Sdim // Use null-pointer of gc_relocate's type to replace it. 4148309124Sdim return replaceInstUsesWith(*II, ConstantPointerNull::get(PT)); 4149280031Sdim 4150309124Sdim // isKnownNonNull -> nonnull attribute 4151344779Sdim if (!II->hasRetAttr(Attribute::NonNull) && 4152344779Sdim isKnownNonZero(DerivedPtr, DL, 0, &AC, II, &DT)) { 4153321369Sdim II->addAttribute(AttributeList::ReturnIndex, Attribute::NonNull); 4154344779Sdim return II; 4155344779Sdim } 4156288943Sdim } 4157280031Sdim 4158280031Sdim // TODO: bitcast(relocate(p)) -> relocate(bitcast(p)) 4159280031Sdim // Canonicalize on the type from the uses to the defs 4160288943Sdim 4161280031Sdim // TODO: relocate((gep p, C, C2, ...)) -> gep(relocate(p), C, C2, ...) 4162309124Sdim break; 4163280031Sdim } 4164321369Sdim 4165321369Sdim case Intrinsic::experimental_guard: { 4166341825Sdim // Is this guard followed by another guard? We scan forward over a small 4167341825Sdim // fixed window of instructions to handle common cases with conditions 4168341825Sdim // computed between guards. 4169360784Sdim Instruction *NextInst = II->getNextNonDebugInstruction(); 4170341825Sdim for (unsigned i = 0; i < GuardWideningWindow; i++) { 4171341825Sdim // Note: Using context-free form to avoid compile time blow up 4172341825Sdim if (!isSafeToSpeculativelyExecute(NextInst)) 4173341825Sdim break; 4174360784Sdim NextInst = NextInst->getNextNonDebugInstruction(); 4175341825Sdim } 4176321369Sdim Value *NextCond = nullptr; 4177321369Sdim if (match(NextInst, 4178321369Sdim m_Intrinsic<Intrinsic::experimental_guard>(m_Value(NextCond)))) { 4179321369Sdim Value *CurrCond = II->getArgOperand(0); 4180321369Sdim 4181321369Sdim // Remove a guard that it is immediately preceded by an identical guard. 4182321369Sdim // Otherwise canonicalize guard(a); guard(b) -> guard(a & b). 4183360784Sdim if (CurrCond != NextCond) { 4184360784Sdim Instruction *MoveI = II->getNextNonDebugInstruction(); 4185360784Sdim while (MoveI != NextInst) { 4186360784Sdim auto *Temp = MoveI; 4187360784Sdim MoveI = MoveI->getNextNonDebugInstruction(); 4188360784Sdim Temp->moveBefore(II); 4189360784Sdim } 4190360784Sdim II->setArgOperand(0, Builder.CreateAnd(CurrCond, NextCond)); 4191341825Sdim } 4192360784Sdim eraseInstFromFunction(*NextInst); 4193360784Sdim return II; 4194321369Sdim } 4195321369Sdim break; 4196280031Sdim } 4197321369Sdim } 4198353358Sdim return visitCallBase(*II); 4199202375Srdivacky} 4200202375Srdivacky 4201321369Sdim// Fence instruction simplification 4202321369SdimInstruction *InstCombiner::visitFenceInst(FenceInst &FI) { 4203321369Sdim // Remove identical consecutive fences. 4204341825Sdim Instruction *Next = FI.getNextNonDebugInstruction(); 4205341825Sdim if (auto *NFI = dyn_cast<FenceInst>(Next)) 4206321369Sdim if (FI.isIdenticalTo(NFI)) 4207321369Sdim return eraseInstFromFunction(FI); 4208321369Sdim return nullptr; 4209321369Sdim} 4210321369Sdim 4211202375Srdivacky// InvokeInst simplification 4212202375SrdivackyInstruction *InstCombiner::visitInvokeInst(InvokeInst &II) { 4213353358Sdim return visitCallBase(II); 4214202375Srdivacky} 4215202375Srdivacky 4216353358Sdim// CallBrInst simplification 4217353358SdimInstruction *InstCombiner::visitCallBrInst(CallBrInst &CBI) { 4218353358Sdim return visitCallBase(CBI); 4219353358Sdim} 4220353358Sdim 4221309124Sdim/// If this cast does not affect the value passed through the varargs area, we 4222309124Sdim/// can eliminate the use of the cast. 4223353358Sdimstatic bool isSafeToEliminateVarargsCast(const CallBase &Call, 4224288943Sdim const DataLayout &DL, 4225288943Sdim const CastInst *const CI, 4226202375Srdivacky const int ix) { 4227202375Srdivacky if (!CI->isLosslessCast()) 4228202375Srdivacky return false; 4229202375Srdivacky 4230280031Sdim // If this is a GC intrinsic, avoid munging types. We need types for 4231280031Sdim // statepoint reconstruction in SelectionDAG. 4232280031Sdim // TODO: This is probably something which should be expanded to all 4233280031Sdim // intrinsics since the entire point of intrinsics is that 4234280031Sdim // they are understandable by the optimizer. 4235353358Sdim if (isStatepoint(&Call) || isGCRelocate(&Call) || isGCResult(&Call)) 4236280031Sdim return false; 4237280031Sdim 4238276479Sdim // The size of ByVal or InAlloca arguments is derived from the type, so we 4239202375Srdivacky // can't change to a type with a different size. If the size were 4240202375Srdivacky // passed explicitly we could avoid this check. 4241353358Sdim if (!Call.isByValOrInAllocaArgument(ix)) 4242202375Srdivacky return true; 4243202375Srdivacky 4244234353Sdim Type* SrcTy = 4245202375Srdivacky cast<PointerType>(CI->getOperand(0)->getType())->getElementType(); 4246353358Sdim Type *DstTy = Call.isByValArgument(ix) 4247353358Sdim ? Call.getParamByValType(ix) 4248353358Sdim : cast<PointerType>(CI->getType())->getElementType(); 4249202375Srdivacky if (!SrcTy->isSized() || !DstTy->isSized()) 4250202375Srdivacky return false; 4251288943Sdim if (DL.getTypeAllocSize(SrcTy) != DL.getTypeAllocSize(DstTy)) 4252202375Srdivacky return false; 4253202375Srdivacky return true; 4254202375Srdivacky} 4255202375Srdivacky 4256288943SdimInstruction *InstCombiner::tryOptimizeCall(CallInst *CI) { 4257276479Sdim if (!CI->getCalledFunction()) return nullptr; 4258204961Srdivacky 4259288943Sdim auto InstCombineRAUW = [this](Instruction *From, Value *With) { 4260309124Sdim replaceInstUsesWith(*From, With); 4261288943Sdim }; 4262344779Sdim auto InstCombineErase = [this](Instruction *I) { 4263344779Sdim eraseInstFromFunction(*I); 4264344779Sdim }; 4265353358Sdim LibCallSimplifier Simplifier(DL, &TLI, ORE, BFI, PSI, InstCombineRAUW, 4266344779Sdim InstCombineErase); 4267288943Sdim if (Value *With = Simplifier.optimizeCall(CI)) { 4268249423Sdim ++NumSimplified; 4269309124Sdim return CI->use_empty() ? CI : replaceInstUsesWith(*CI, With); 4270249423Sdim } 4271243830Sdim 4272276479Sdim return nullptr; 4273204961Srdivacky} 4274204961Srdivacky 4275309124Sdimstatic IntrinsicInst *findInitTrampolineFromAlloca(Value *TrampMem) { 4276226633Sdim // Strip off at most one level of pointer casts, looking for an alloca. This 4277226633Sdim // is good enough in practice and simpler than handling any number of casts. 4278226633Sdim Value *Underlying = TrampMem->stripPointerCasts(); 4279226633Sdim if (Underlying != TrampMem && 4280276479Sdim (!Underlying->hasOneUse() || Underlying->user_back() != TrampMem)) 4281276479Sdim return nullptr; 4282226633Sdim if (!isa<AllocaInst>(Underlying)) 4283276479Sdim return nullptr; 4284226633Sdim 4285276479Sdim IntrinsicInst *InitTrampoline = nullptr; 4286276479Sdim for (User *U : TrampMem->users()) { 4287276479Sdim IntrinsicInst *II = dyn_cast<IntrinsicInst>(U); 4288226633Sdim if (!II) 4289276479Sdim return nullptr; 4290226633Sdim if (II->getIntrinsicID() == Intrinsic::init_trampoline) { 4291226633Sdim if (InitTrampoline) 4292226633Sdim // More than one init_trampoline writes to this value. Give up. 4293276479Sdim return nullptr; 4294226633Sdim InitTrampoline = II; 4295226633Sdim continue; 4296226633Sdim } 4297226633Sdim if (II->getIntrinsicID() == Intrinsic::adjust_trampoline) 4298226633Sdim // Allow any number of calls to adjust.trampoline. 4299226633Sdim continue; 4300276479Sdim return nullptr; 4301226633Sdim } 4302226633Sdim 4303226633Sdim // No call to init.trampoline found. 4304226633Sdim if (!InitTrampoline) 4305276479Sdim return nullptr; 4306226633Sdim 4307226633Sdim // Check that the alloca is being used in the expected way. 4308226633Sdim if (InitTrampoline->getOperand(0) != TrampMem) 4309276479Sdim return nullptr; 4310226633Sdim 4311226633Sdim return InitTrampoline; 4312226633Sdim} 4313226633Sdim 4314309124Sdimstatic IntrinsicInst *findInitTrampolineFromBB(IntrinsicInst *AdjustTramp, 4315226633Sdim Value *TrampMem) { 4316226633Sdim // Visit all the previous instructions in the basic block, and try to find a 4317226633Sdim // init.trampoline which has a direct path to the adjust.trampoline. 4318296417Sdim for (BasicBlock::iterator I = AdjustTramp->getIterator(), 4319296417Sdim E = AdjustTramp->getParent()->begin(); 4320296417Sdim I != E;) { 4321296417Sdim Instruction *Inst = &*--I; 4322226633Sdim if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) 4323226633Sdim if (II->getIntrinsicID() == Intrinsic::init_trampoline && 4324226633Sdim II->getOperand(0) == TrampMem) 4325226633Sdim return II; 4326226633Sdim if (Inst->mayWriteToMemory()) 4327276479Sdim return nullptr; 4328226633Sdim } 4329276479Sdim return nullptr; 4330226633Sdim} 4331226633Sdim 4332226633Sdim// Given a call to llvm.adjust.trampoline, find and return the corresponding 4333226633Sdim// call to llvm.init.trampoline if the call to the trampoline can be optimized 4334226633Sdim// to a direct call to a function. Otherwise return NULL. 4335309124Sdimstatic IntrinsicInst *findInitTrampoline(Value *Callee) { 4336226633Sdim Callee = Callee->stripPointerCasts(); 4337226633Sdim IntrinsicInst *AdjustTramp = dyn_cast<IntrinsicInst>(Callee); 4338226633Sdim if (!AdjustTramp || 4339226633Sdim AdjustTramp->getIntrinsicID() != Intrinsic::adjust_trampoline) 4340276479Sdim return nullptr; 4341226633Sdim 4342226633Sdim Value *TrampMem = AdjustTramp->getOperand(0); 4343226633Sdim 4344309124Sdim if (IntrinsicInst *IT = findInitTrampolineFromAlloca(TrampMem)) 4345226633Sdim return IT; 4346309124Sdim if (IntrinsicInst *IT = findInitTrampolineFromBB(AdjustTramp, TrampMem)) 4347226633Sdim return IT; 4348276479Sdim return nullptr; 4349226633Sdim} 4350226633Sdim 4351360784Sdimstatic void annotateAnyAllocSite(CallBase &Call, const TargetLibraryInfo *TLI) { 4352360784Sdim unsigned NumArgs = Call.getNumArgOperands(); 4353360784Sdim ConstantInt *Op0C = dyn_cast<ConstantInt>(Call.getOperand(0)); 4354360784Sdim ConstantInt *Op1C = 4355360784Sdim (NumArgs == 1) ? nullptr : dyn_cast<ConstantInt>(Call.getOperand(1)); 4356360784Sdim // Bail out if the allocation size is zero. 4357360784Sdim if ((Op0C && Op0C->isNullValue()) || (Op1C && Op1C->isNullValue())) 4358360784Sdim return; 4359360784Sdim 4360360784Sdim if (isMallocLikeFn(&Call, TLI) && Op0C) { 4361360784Sdim if (isOpNewLikeFn(&Call, TLI)) 4362360784Sdim Call.addAttribute(AttributeList::ReturnIndex, 4363360784Sdim Attribute::getWithDereferenceableBytes( 4364360784Sdim Call.getContext(), Op0C->getZExtValue())); 4365360784Sdim else 4366360784Sdim Call.addAttribute(AttributeList::ReturnIndex, 4367360784Sdim Attribute::getWithDereferenceableOrNullBytes( 4368360784Sdim Call.getContext(), Op0C->getZExtValue())); 4369360784Sdim } else if (isReallocLikeFn(&Call, TLI) && Op1C) { 4370360784Sdim Call.addAttribute(AttributeList::ReturnIndex, 4371360784Sdim Attribute::getWithDereferenceableOrNullBytes( 4372360784Sdim Call.getContext(), Op1C->getZExtValue())); 4373360784Sdim } else if (isCallocLikeFn(&Call, TLI) && Op0C && Op1C) { 4374360784Sdim bool Overflow; 4375360784Sdim const APInt &N = Op0C->getValue(); 4376360784Sdim APInt Size = N.umul_ov(Op1C->getValue(), Overflow); 4377360784Sdim if (!Overflow) 4378360784Sdim Call.addAttribute(AttributeList::ReturnIndex, 4379360784Sdim Attribute::getWithDereferenceableOrNullBytes( 4380360784Sdim Call.getContext(), Size.getZExtValue())); 4381360784Sdim } else if (isStrdupLikeFn(&Call, TLI)) { 4382360784Sdim uint64_t Len = GetStringLength(Call.getOperand(0)); 4383360784Sdim if (Len) { 4384360784Sdim // strdup 4385360784Sdim if (NumArgs == 1) 4386360784Sdim Call.addAttribute(AttributeList::ReturnIndex, 4387360784Sdim Attribute::getWithDereferenceableOrNullBytes( 4388360784Sdim Call.getContext(), Len)); 4389360784Sdim // strndup 4390360784Sdim else if (NumArgs == 2 && Op1C) 4391360784Sdim Call.addAttribute( 4392360784Sdim AttributeList::ReturnIndex, 4393360784Sdim Attribute::getWithDereferenceableOrNullBytes( 4394360784Sdim Call.getContext(), std::min(Len, Op1C->getZExtValue() + 1))); 4395360784Sdim } 4396360784Sdim } 4397360784Sdim} 4398360784Sdim 4399353358Sdim/// Improvements for call, callbr and invoke instructions. 4400353358SdimInstruction *InstCombiner::visitCallBase(CallBase &Call) { 4401360784Sdim if (isAllocationFn(&Call, &TLI)) 4402360784Sdim annotateAnyAllocSite(Call, &TLI); 4403239462Sdim 4404202375Srdivacky bool Changed = false; 4405202375Srdivacky 4406288943Sdim // Mark any parameters that are known to be non-null with the nonnull 4407288943Sdim // attribute. This is helpful for inlining calls to functions with null 4408288943Sdim // checks on their arguments. 4409321369Sdim SmallVector<unsigned, 4> ArgNos; 4410288943Sdim unsigned ArgNo = 0; 4411296417Sdim 4412353358Sdim for (Value *V : Call.args()) { 4413309124Sdim if (V->getType()->isPointerTy() && 4414353358Sdim !Call.paramHasAttr(ArgNo, Attribute::NonNull) && 4415353358Sdim isKnownNonZero(V, DL, 0, &AC, &Call, &DT)) 4416321369Sdim ArgNos.push_back(ArgNo); 4417288943Sdim ArgNo++; 4418288943Sdim } 4419296417Sdim 4420353358Sdim assert(ArgNo == Call.arg_size() && "sanity check"); 4421288943Sdim 4422321369Sdim if (!ArgNos.empty()) { 4423353358Sdim AttributeList AS = Call.getAttributes(); 4424353358Sdim LLVMContext &Ctx = Call.getContext(); 4425321369Sdim AS = AS.addParamAttribute(Ctx, ArgNos, 4426321369Sdim Attribute::get(Ctx, Attribute::NonNull)); 4427353358Sdim Call.setAttributes(AS); 4428296417Sdim Changed = true; 4429296417Sdim } 4430296417Sdim 4431218893Sdim // If the callee is a pointer to a function, attempt to move any casts to the 4432353358Sdim // arguments of the call/callbr/invoke. 4433353358Sdim Value *Callee = Call.getCalledValue(); 4434353358Sdim if (!isa<Function>(Callee) && transformConstExprCastCall(Call)) 4435276479Sdim return nullptr; 4436202375Srdivacky 4437309124Sdim if (Function *CalleeF = dyn_cast<Function>(Callee)) { 4438309124Sdim // Remove the convergent attr on calls when the callee is not convergent. 4439353358Sdim if (Call.isConvergent() && !CalleeF->isConvergent() && 4440309124Sdim !CalleeF->isIntrinsic()) { 4441353358Sdim LLVM_DEBUG(dbgs() << "Removing convergent attr from instr " << Call 4442353358Sdim << "\n"); 4443353358Sdim Call.setNotConvergent(); 4444353358Sdim return &Call; 4445309124Sdim } 4446309124Sdim 4447203954Srdivacky // If the call and callee calling conventions don't match, this call must 4448203954Srdivacky // be unreachable, as the call is undefined. 4449353358Sdim if (CalleeF->getCallingConv() != Call.getCallingConv() && 4450203954Srdivacky // Only do this for calls to a function with a body. A prototype may 4451203954Srdivacky // not actually end up matching the implementation's calling conv for a 4452203954Srdivacky // variety of reasons (e.g. it may be written in assembly). 4453203954Srdivacky !CalleeF->isDeclaration()) { 4454353358Sdim Instruction *OldCall = &Call; 4455353358Sdim CreateNonTerminatorUnreachable(OldCall); 4456249423Sdim // If OldCall does not return void then replaceAllUsesWith undef. 4457202375Srdivacky // This allows ValueHandlers and custom metadata to adjust itself. 4458202375Srdivacky if (!OldCall->getType()->isVoidTy()) 4459309124Sdim replaceInstUsesWith(*OldCall, UndefValue::get(OldCall->getType())); 4460203954Srdivacky if (isa<CallInst>(OldCall)) 4461309124Sdim return eraseInstFromFunction(*OldCall); 4462234353Sdim 4463353358Sdim // We cannot remove an invoke or a callbr, because it would change thexi 4464353358Sdim // CFG, just change the callee to a null pointer. 4465353358Sdim cast<CallBase>(OldCall)->setCalledFunction( 4466353358Sdim CalleeF->getFunctionType(), 4467353358Sdim Constant::getNullValue(CalleeF->getType())); 4468276479Sdim return nullptr; 4469202375Srdivacky } 4470309124Sdim } 4471202375Srdivacky 4472341825Sdim if ((isa<ConstantPointerNull>(Callee) && 4473353358Sdim !NullPointerIsDefined(Call.getFunction())) || 4474341825Sdim isa<UndefValue>(Callee)) { 4475353358Sdim // If Call does not return void then replaceAllUsesWith undef. 4476202375Srdivacky // This allows ValueHandlers and custom metadata to adjust itself. 4477353358Sdim if (!Call.getType()->isVoidTy()) 4478353358Sdim replaceInstUsesWith(Call, UndefValue::get(Call.getType())); 4479202375Srdivacky 4480353358Sdim if (Call.isTerminator()) { 4481353358Sdim // Can't remove an invoke or callbr because we cannot change the CFG. 4482276479Sdim return nullptr; 4483202375Srdivacky } 4484239462Sdim 4485353358Sdim // This instruction is not reachable, just remove it. 4486353358Sdim CreateNonTerminatorUnreachable(&Call); 4487353358Sdim return eraseInstFromFunction(Call); 4488202375Srdivacky } 4489202375Srdivacky 4490309124Sdim if (IntrinsicInst *II = findInitTrampoline(Callee)) 4491353358Sdim return transformCallThroughTrampoline(Call, *II); 4492202375Srdivacky 4493226633Sdim PointerType *PTy = cast<PointerType>(Callee->getType()); 4494226633Sdim FunctionType *FTy = cast<FunctionType>(PTy->getElementType()); 4495202375Srdivacky if (FTy->isVarArg()) { 4496234353Sdim int ix = FTy->getNumParams(); 4497202375Srdivacky // See if we can optimize any arguments passed through the varargs area of 4498202375Srdivacky // the call. 4499353358Sdim for (auto I = Call.arg_begin() + FTy->getNumParams(), E = Call.arg_end(); 4500353358Sdim I != E; ++I, ++ix) { 4501202375Srdivacky CastInst *CI = dyn_cast<CastInst>(*I); 4502353358Sdim if (CI && isSafeToEliminateVarargsCast(Call, DL, CI, ix)) { 4503202375Srdivacky *I = CI->getOperand(0); 4504353358Sdim 4505353358Sdim // Update the byval type to match the argument type. 4506353358Sdim if (Call.isByValArgument(ix)) { 4507353358Sdim Call.removeParamAttr(ix, Attribute::ByVal); 4508353358Sdim Call.addParamAttr( 4509353358Sdim ix, Attribute::getWithByValType( 4510353358Sdim Call.getContext(), 4511353358Sdim CI->getOperand(0)->getType()->getPointerElementType())); 4512353358Sdim } 4513202375Srdivacky Changed = true; 4514202375Srdivacky } 4515202375Srdivacky } 4516202375Srdivacky } 4517202375Srdivacky 4518353358Sdim if (isa<InlineAsm>(Callee) && !Call.doesNotThrow()) { 4519202375Srdivacky // Inline asm calls cannot throw - mark them 'nounwind'. 4520353358Sdim Call.setDoesNotThrow(); 4521202375Srdivacky Changed = true; 4522202375Srdivacky } 4523202375Srdivacky 4524243830Sdim // Try to optimize the call if possible, we require DataLayout for most of 4525204961Srdivacky // this. None of these calls are seen as possibly dead so go ahead and 4526204961Srdivacky // delete the instruction now. 4527353358Sdim if (CallInst *CI = dyn_cast<CallInst>(&Call)) { 4528288943Sdim Instruction *I = tryOptimizeCall(CI); 4529204961Srdivacky // If we changed something return the result, etc. Otherwise let 4530204961Srdivacky // the fallthrough check. 4531309124Sdim if (I) return eraseInstFromFunction(*I); 4532204961Srdivacky } 4533204961Srdivacky 4534360784Sdim if (isAllocLikeFn(&Call, &TLI)) 4535360784Sdim return visitAllocSite(Call); 4536360784Sdim 4537353358Sdim return Changed ? &Call : nullptr; 4538202375Srdivacky} 4539202375Srdivacky 4540309124Sdim/// If the callee is a constexpr cast of a function, attempt to move the cast to 4541353358Sdim/// the arguments of the call/callbr/invoke. 4542353358Sdimbool InstCombiner::transformConstExprCastCall(CallBase &Call) { 4543353358Sdim auto *Callee = dyn_cast<Function>(Call.getCalledValue()->stripPointerCasts()); 4544276479Sdim if (!Callee) 4545202375Srdivacky return false; 4546314564Sdim 4547341825Sdim // If this is a call to a thunk function, don't remove the cast. Thunks are 4548341825Sdim // used to transparently forward all incoming parameters and outgoing return 4549341825Sdim // values, so it's important to leave the cast in place. 4550280031Sdim if (Callee->hasFnAttribute("thunk")) 4551280031Sdim return false; 4552314564Sdim 4553341825Sdim // If this is a musttail call, the callee's prototype must match the caller's 4554341825Sdim // prototype with the exception of pointee types. The code below doesn't 4555341825Sdim // implement that, so we can't do this transform. 4556341825Sdim // TODO: Do the transform if it only requires adding pointer casts. 4557353358Sdim if (Call.isMustTailCall()) 4558341825Sdim return false; 4559341825Sdim 4560353358Sdim Instruction *Caller = &Call; 4561353358Sdim const AttributeList &CallerPAL = Call.getAttributes(); 4562202375Srdivacky 4563202375Srdivacky // Okay, this is a cast from a function to a different type. Unless doing so 4564202375Srdivacky // would cause a type conversion of one of our arguments, change this call to 4565202375Srdivacky // be a direct call with arguments casted to the appropriate types. 4566226633Sdim FunctionType *FT = Callee->getFunctionType(); 4567226633Sdim Type *OldRetTy = Caller->getType(); 4568226633Sdim Type *NewRetTy = FT->getReturnType(); 4569202375Srdivacky 4570202375Srdivacky // Check to see if we are changing the return type... 4571202375Srdivacky if (OldRetTy != NewRetTy) { 4572276479Sdim 4573276479Sdim if (NewRetTy->isStructTy()) 4574276479Sdim return false; // TODO: Handle multiple return values. 4575276479Sdim 4576280031Sdim if (!CastInst::isBitOrNoopPointerCastable(NewRetTy, OldRetTy, DL)) { 4577261991Sdim if (Callee->isDeclaration()) 4578261991Sdim return false; // Cannot transform this return value. 4579202375Srdivacky 4580261991Sdim if (!Caller->use_empty() && 4581261991Sdim // void -> non-void is handled specially 4582261991Sdim !NewRetTy->isVoidTy()) 4583280031Sdim return false; // Cannot transform this return value. 4584261991Sdim } 4585202375Srdivacky 4586202375Srdivacky if (!CallerPAL.isEmpty() && !Caller->use_empty()) { 4587321369Sdim AttrBuilder RAttrs(CallerPAL, AttributeList::ReturnIndex); 4588288943Sdim if (RAttrs.overlaps(AttributeFuncs::typeIncompatible(NewRetTy))) 4589202375Srdivacky return false; // Attribute not compatible with transformed value. 4590202375Srdivacky } 4591202375Srdivacky 4592353358Sdim // If the callbase is an invoke/callbr instruction, and the return value is 4593353358Sdim // used by a PHI node in a successor, we cannot change the return type of 4594353358Sdim // the call because there is no place to put the cast instruction (without 4595353358Sdim // breaking the critical edge). Bail out in this case. 4596353358Sdim if (!Caller->use_empty()) { 4597202375Srdivacky if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) 4598276479Sdim for (User *U : II->users()) 4599276479Sdim if (PHINode *PN = dyn_cast<PHINode>(U)) 4600202375Srdivacky if (PN->getParent() == II->getNormalDest() || 4601202375Srdivacky PN->getParent() == II->getUnwindDest()) 4602202375Srdivacky return false; 4603353358Sdim // FIXME: Be conservative for callbr to avoid a quadratic search. 4604353358Sdim if (isa<CallBrInst>(Caller)) 4605353358Sdim return false; 4606353358Sdim } 4607202375Srdivacky } 4608202375Srdivacky 4609353358Sdim unsigned NumActualArgs = Call.arg_size(); 4610202375Srdivacky unsigned NumCommonArgs = std::min(FT->getNumParams(), NumActualArgs); 4611202375Srdivacky 4612280031Sdim // Prevent us turning: 4613280031Sdim // declare void @takes_i32_inalloca(i32* inalloca) 4614280031Sdim // call void bitcast (void (i32*)* @takes_i32_inalloca to void (i32)*)(i32 0) 4615280031Sdim // 4616280031Sdim // into: 4617280031Sdim // call void @takes_i32_inalloca(i32* null) 4618288943Sdim // 4619288943Sdim // Similarly, avoid folding away bitcasts of byval calls. 4620288943Sdim if (Callee->getAttributes().hasAttrSomewhere(Attribute::InAlloca) || 4621288943Sdim Callee->getAttributes().hasAttrSomewhere(Attribute::ByVal)) 4622280031Sdim return false; 4623280031Sdim 4624353358Sdim auto AI = Call.arg_begin(); 4625202375Srdivacky for (unsigned i = 0, e = NumCommonArgs; i != e; ++i, ++AI) { 4626226633Sdim Type *ParamTy = FT->getParamType(i); 4627226633Sdim Type *ActTy = (*AI)->getType(); 4628202375Srdivacky 4629280031Sdim if (!CastInst::isBitOrNoopPointerCastable(ActTy, ParamTy, DL)) 4630202375Srdivacky return false; // Cannot transform this parameter value. 4631202375Srdivacky 4632321369Sdim if (AttrBuilder(CallerPAL.getParamAttributes(i)) 4633321369Sdim .overlaps(AttributeFuncs::typeIncompatible(ParamTy))) 4634202375Srdivacky return false; // Attribute not compatible with transformed value. 4635234353Sdim 4636353358Sdim if (Call.isInAllocaArgument(i)) 4637276479Sdim return false; // Cannot transform to and from inalloca. 4638276479Sdim 4639218893Sdim // If the parameter is passed as a byval argument, then we have to have a 4640218893Sdim // sized type and the sized type has to have the same size as the old type. 4641321369Sdim if (ParamTy != ActTy && CallerPAL.hasParamAttribute(i, Attribute::ByVal)) { 4642226633Sdim PointerType *ParamPTy = dyn_cast<PointerType>(ParamTy); 4643288943Sdim if (!ParamPTy || !ParamPTy->getElementType()->isSized()) 4644218893Sdim return false; 4645234353Sdim 4646353358Sdim Type *CurElTy = Call.getParamByValType(i); 4647288943Sdim if (DL.getTypeAllocSize(CurElTy) != 4648288943Sdim DL.getTypeAllocSize(ParamPTy->getElementType())) 4649218893Sdim return false; 4650218893Sdim } 4651202375Srdivacky } 4652202375Srdivacky 4653219077Sdim if (Callee->isDeclaration()) { 4654219077Sdim // Do not delete arguments unless we have a function body. 4655219077Sdim if (FT->getNumParams() < NumActualArgs && !FT->isVarArg()) 4656219077Sdim return false; 4657202375Srdivacky 4658219077Sdim // If the callee is just a declaration, don't change the varargsness of the 4659219077Sdim // call. We don't want to introduce a varargs call where one doesn't 4660219077Sdim // already exist. 4661353358Sdim PointerType *APTy = cast<PointerType>(Call.getCalledValue()->getType()); 4662219077Sdim if (FT->isVarArg()!=cast<FunctionType>(APTy->getElementType())->isVarArg()) 4663219077Sdim return false; 4664234353Sdim 4665234353Sdim // If both the callee and the cast type are varargs, we still have to make 4666234353Sdim // sure the number of fixed parameters are the same or we have the same 4667234353Sdim // ABI issues as if we introduce a varargs call. 4668234353Sdim if (FT->isVarArg() && 4669234353Sdim cast<FunctionType>(APTy->getElementType())->isVarArg() && 4670234353Sdim FT->getNumParams() != 4671234353Sdim cast<FunctionType>(APTy->getElementType())->getNumParams()) 4672234353Sdim return false; 4673219077Sdim } 4674234353Sdim 4675202375Srdivacky if (FT->getNumParams() < NumActualArgs && FT->isVarArg() && 4676321369Sdim !CallerPAL.isEmpty()) { 4677202375Srdivacky // In this case we have more arguments than the new function type, but we 4678202375Srdivacky // won't be dropping them. Check that these extra arguments have attributes 4679202375Srdivacky // that are compatible with being a vararg call argument. 4680321369Sdim unsigned SRetIdx; 4681321369Sdim if (CallerPAL.hasAttrSomewhere(Attribute::StructRet, &SRetIdx) && 4682321369Sdim SRetIdx > FT->getNumParams()) 4683321369Sdim return false; 4684321369Sdim } 4685249423Sdim 4686202375Srdivacky // Okay, we decided that this is a safe thing to do: go ahead and start 4687219077Sdim // inserting cast instructions as necessary. 4688321369Sdim SmallVector<Value *, 8> Args; 4689321369Sdim SmallVector<AttributeSet, 8> ArgAttrs; 4690202375Srdivacky Args.reserve(NumActualArgs); 4691321369Sdim ArgAttrs.reserve(NumActualArgs); 4692202375Srdivacky 4693202375Srdivacky // Get any return attributes. 4694321369Sdim AttrBuilder RAttrs(CallerPAL, AttributeList::ReturnIndex); 4695202375Srdivacky 4696202375Srdivacky // If the return value is not being used, the type may not be compatible 4697202375Srdivacky // with the existing attributes. Wipe out any problematic attributes. 4698288943Sdim RAttrs.remove(AttributeFuncs::typeIncompatible(NewRetTy)); 4699202375Srdivacky 4700353358Sdim LLVMContext &Ctx = Call.getContext(); 4701353358Sdim AI = Call.arg_begin(); 4702202375Srdivacky for (unsigned i = 0; i != NumCommonArgs; ++i, ++AI) { 4703226633Sdim Type *ParamTy = FT->getParamType(i); 4704261991Sdim 4705321369Sdim Value *NewArg = *AI; 4706321369Sdim if ((*AI)->getType() != ParamTy) 4707321369Sdim NewArg = Builder.CreateBitOrPointerCast(*AI, ParamTy); 4708321369Sdim Args.push_back(NewArg); 4709202375Srdivacky 4710202375Srdivacky // Add any parameter attributes. 4711353358Sdim if (CallerPAL.hasParamAttribute(i, Attribute::ByVal)) { 4712353358Sdim AttrBuilder AB(CallerPAL.getParamAttributes(i)); 4713353358Sdim AB.addByValAttr(NewArg->getType()->getPointerElementType()); 4714353358Sdim ArgAttrs.push_back(AttributeSet::get(Ctx, AB)); 4715353358Sdim } else 4716353358Sdim ArgAttrs.push_back(CallerPAL.getParamAttributes(i)); 4717202375Srdivacky } 4718202375Srdivacky 4719202375Srdivacky // If the function takes more arguments than the call was taking, add them 4720202375Srdivacky // now. 4721321369Sdim for (unsigned i = NumCommonArgs; i != FT->getNumParams(); ++i) { 4722202375Srdivacky Args.push_back(Constant::getNullValue(FT->getParamType(i))); 4723321369Sdim ArgAttrs.push_back(AttributeSet()); 4724321369Sdim } 4725202375Srdivacky 4726202375Srdivacky // If we are removing arguments to the function, emit an obnoxious warning. 4727202375Srdivacky if (FT->getNumParams() < NumActualArgs) { 4728249423Sdim // TODO: if (!FT->isVarArg()) this call may be unreachable. PR14722 4729249423Sdim if (FT->isVarArg()) { 4730202375Srdivacky // Add all of the arguments in their promoted form to the arg list. 4731202375Srdivacky for (unsigned i = FT->getNumParams(); i != NumActualArgs; ++i, ++AI) { 4732226633Sdim Type *PTy = getPromotedType((*AI)->getType()); 4733321369Sdim Value *NewArg = *AI; 4734202375Srdivacky if (PTy != (*AI)->getType()) { 4735202375Srdivacky // Must promote to pass through va_arg area! 4736202375Srdivacky Instruction::CastOps opcode = 4737202375Srdivacky CastInst::getCastOpcode(*AI, false, PTy, false); 4738321369Sdim NewArg = Builder.CreateCast(opcode, *AI, PTy); 4739202375Srdivacky } 4740321369Sdim Args.push_back(NewArg); 4741202375Srdivacky 4742202375Srdivacky // Add any parameter attributes. 4743321369Sdim ArgAttrs.push_back(CallerPAL.getParamAttributes(i)); 4744202375Srdivacky } 4745202375Srdivacky } 4746202375Srdivacky } 4747202375Srdivacky 4748249423Sdim AttributeSet FnAttrs = CallerPAL.getFnAttributes(); 4749202375Srdivacky 4750202375Srdivacky if (NewRetTy->isVoidTy()) 4751202375Srdivacky Caller->setName(""); // Void type should not have a name. 4752202375Srdivacky 4753321369Sdim assert((ArgAttrs.size() == FT->getNumParams() || FT->isVarArg()) && 4754321369Sdim "missing argument attributes"); 4755321369Sdim AttributeList NewCallerPAL = AttributeList::get( 4756321369Sdim Ctx, FnAttrs, AttributeSet::get(Ctx, RAttrs), ArgAttrs); 4757202375Srdivacky 4758296417Sdim SmallVector<OperandBundleDef, 1> OpBundles; 4759353358Sdim Call.getOperandBundlesAsDefs(OpBundles); 4760296417Sdim 4761353358Sdim CallBase *NewCall; 4762202375Srdivacky if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) { 4763353358Sdim NewCall = Builder.CreateInvoke(Callee, II->getNormalDest(), 4764353358Sdim II->getUnwindDest(), Args, OpBundles); 4765353358Sdim } else if (CallBrInst *CBI = dyn_cast<CallBrInst>(Caller)) { 4766353358Sdim NewCall = Builder.CreateCallBr(Callee, CBI->getDefaultDest(), 4767353358Sdim CBI->getIndirectDests(), Args, OpBundles); 4768202375Srdivacky } else { 4769353358Sdim NewCall = Builder.CreateCall(Callee, Args, OpBundles); 4770353358Sdim cast<CallInst>(NewCall)->setTailCallKind( 4771353358Sdim cast<CallInst>(Caller)->getTailCallKind()); 4772202375Srdivacky } 4773353358Sdim NewCall->takeName(Caller); 4774353358Sdim NewCall->setCallingConv(Call.getCallingConv()); 4775353358Sdim NewCall->setAttributes(NewCallerPAL); 4776202375Srdivacky 4777321369Sdim // Preserve the weight metadata for the new call instruction. The metadata 4778321369Sdim // is used by SamplePGO to check callsite's hotness. 4779321369Sdim uint64_t W; 4780321369Sdim if (Caller->extractProfTotalWeight(W)) 4781353358Sdim NewCall->setProfWeight(W); 4782321369Sdim 4783202375Srdivacky // Insert a cast of the return type as necessary. 4784353358Sdim Instruction *NC = NewCall; 4785202375Srdivacky Value *NV = NC; 4786202375Srdivacky if (OldRetTy != NV->getType() && !Caller->use_empty()) { 4787202375Srdivacky if (!NV->getType()->isVoidTy()) { 4788280031Sdim NV = NC = CastInst::CreateBitOrPointerCast(NC, OldRetTy); 4789223017Sdim NC->setDebugLoc(Caller->getDebugLoc()); 4790202375Srdivacky 4791353358Sdim // If this is an invoke/callbr instruction, we should insert it after the 4792353358Sdim // first non-phi instruction in the normal successor block. 4793202375Srdivacky if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) { 4794226633Sdim BasicBlock::iterator I = II->getNormalDest()->getFirstInsertionPt(); 4795202375Srdivacky InsertNewInstBefore(NC, *I); 4796353358Sdim } else if (CallBrInst *CBI = dyn_cast<CallBrInst>(Caller)) { 4797353358Sdim BasicBlock::iterator I = CBI->getDefaultDest()->getFirstInsertionPt(); 4798353358Sdim InsertNewInstBefore(NC, *I); 4799202375Srdivacky } else { 4800218893Sdim // Otherwise, it's a call, just insert cast right after the call. 4801202375Srdivacky InsertNewInstBefore(NC, *Caller); 4802202375Srdivacky } 4803202375Srdivacky Worklist.AddUsersToWorkList(*Caller); 4804202375Srdivacky } else { 4805202375Srdivacky NV = UndefValue::get(Caller->getType()); 4806202375Srdivacky } 4807202375Srdivacky } 4808202375Srdivacky 4809202375Srdivacky if (!Caller->use_empty()) 4810309124Sdim replaceInstUsesWith(*Caller, NV); 4811280031Sdim else if (Caller->hasValueHandle()) { 4812280031Sdim if (OldRetTy == NV->getType()) 4813280031Sdim ValueHandleBase::ValueIsRAUWd(Caller, NV); 4814280031Sdim else 4815280031Sdim // We cannot call ValueIsRAUWd with a different type, and the 4816280031Sdim // actual tracked value will disappear. 4817280031Sdim ValueHandleBase::ValueIsDeleted(Caller); 4818280031Sdim } 4819223017Sdim 4820309124Sdim eraseInstFromFunction(*Caller); 4821202375Srdivacky return true; 4822202375Srdivacky} 4823202375Srdivacky 4824309124Sdim/// Turn a call to a function created by init_trampoline / adjust_trampoline 4825309124Sdim/// intrinsic pair into a direct call to the underlying function. 4826226633SdimInstruction * 4827353358SdimInstCombiner::transformCallThroughTrampoline(CallBase &Call, 4828353358Sdim IntrinsicInst &Tramp) { 4829353358Sdim Value *Callee = Call.getCalledValue(); 4830353358Sdim Type *CalleeTy = Callee->getType(); 4831353358Sdim FunctionType *FTy = Call.getFunctionType(); 4832353358Sdim AttributeList Attrs = Call.getAttributes(); 4833202375Srdivacky 4834202375Srdivacky // If the call already has the 'nest' attribute somewhere then give up - 4835202375Srdivacky // otherwise 'nest' would occur twice after splicing in the chain. 4836249423Sdim if (Attrs.hasAttrSomewhere(Attribute::Nest)) 4837276479Sdim return nullptr; 4838202375Srdivacky 4839353358Sdim Function *NestF = cast<Function>(Tramp.getArgOperand(1)->stripPointerCasts()); 4840353358Sdim FunctionType *NestFTy = NestF->getFunctionType(); 4841202375Srdivacky 4842321369Sdim AttributeList NestAttrs = NestF->getAttributes(); 4843202375Srdivacky if (!NestAttrs.isEmpty()) { 4844321369Sdim unsigned NestArgNo = 0; 4845276479Sdim Type *NestTy = nullptr; 4846249423Sdim AttributeSet NestAttr; 4847202375Srdivacky 4848202375Srdivacky // Look for a parameter marked with the 'nest' attribute. 4849202375Srdivacky for (FunctionType::param_iterator I = NestFTy->param_begin(), 4850321369Sdim E = NestFTy->param_end(); 4851321369Sdim I != E; ++NestArgNo, ++I) { 4852321369Sdim AttributeSet AS = NestAttrs.getParamAttributes(NestArgNo); 4853321369Sdim if (AS.hasAttribute(Attribute::Nest)) { 4854202375Srdivacky // Record the parameter type and any other attributes. 4855202375Srdivacky NestTy = *I; 4856321369Sdim NestAttr = AS; 4857202375Srdivacky break; 4858202375Srdivacky } 4859321369Sdim } 4860202375Srdivacky 4861202375Srdivacky if (NestTy) { 4862202375Srdivacky std::vector<Value*> NewArgs; 4863321369Sdim std::vector<AttributeSet> NewArgAttrs; 4864353358Sdim NewArgs.reserve(Call.arg_size() + 1); 4865353358Sdim NewArgAttrs.reserve(Call.arg_size()); 4866202375Srdivacky 4867202375Srdivacky // Insert the nest argument into the call argument list, which may 4868202375Srdivacky // mean appending it. Likewise for attributes. 4869202375Srdivacky 4870202375Srdivacky { 4871321369Sdim unsigned ArgNo = 0; 4872353358Sdim auto I = Call.arg_begin(), E = Call.arg_end(); 4873202375Srdivacky do { 4874321369Sdim if (ArgNo == NestArgNo) { 4875202375Srdivacky // Add the chain argument and attributes. 4876353358Sdim Value *NestVal = Tramp.getArgOperand(2); 4877202375Srdivacky if (NestVal->getType() != NestTy) 4878321369Sdim NestVal = Builder.CreateBitCast(NestVal, NestTy, "nest"); 4879202375Srdivacky NewArgs.push_back(NestVal); 4880321369Sdim NewArgAttrs.push_back(NestAttr); 4881202375Srdivacky } 4882202375Srdivacky 4883202375Srdivacky if (I == E) 4884202375Srdivacky break; 4885202375Srdivacky 4886202375Srdivacky // Add the original argument and attributes. 4887202375Srdivacky NewArgs.push_back(*I); 4888321369Sdim NewArgAttrs.push_back(Attrs.getParamAttributes(ArgNo)); 4889202375Srdivacky 4890321369Sdim ++ArgNo; 4891309124Sdim ++I; 4892314564Sdim } while (true); 4893202375Srdivacky } 4894202375Srdivacky 4895202375Srdivacky // The trampoline may have been bitcast to a bogus type (FTy). 4896202375Srdivacky // Handle this by synthesizing a new function type, equal to FTy 4897202375Srdivacky // with the chain parameter inserted. 4898202375Srdivacky 4899224145Sdim std::vector<Type*> NewTypes; 4900202375Srdivacky NewTypes.reserve(FTy->getNumParams()+1); 4901202375Srdivacky 4902202375Srdivacky // Insert the chain's type into the list of parameter types, which may 4903202375Srdivacky // mean appending it. 4904202375Srdivacky { 4905321369Sdim unsigned ArgNo = 0; 4906202375Srdivacky FunctionType::param_iterator I = FTy->param_begin(), 4907202375Srdivacky E = FTy->param_end(); 4908202375Srdivacky 4909202375Srdivacky do { 4910321369Sdim if (ArgNo == NestArgNo) 4911202375Srdivacky // Add the chain's type. 4912202375Srdivacky NewTypes.push_back(NestTy); 4913202375Srdivacky 4914202375Srdivacky if (I == E) 4915202375Srdivacky break; 4916202375Srdivacky 4917202375Srdivacky // Add the original type. 4918202375Srdivacky NewTypes.push_back(*I); 4919202375Srdivacky 4920321369Sdim ++ArgNo; 4921309124Sdim ++I; 4922314564Sdim } while (true); 4923202375Srdivacky } 4924202375Srdivacky 4925202375Srdivacky // Replace the trampoline call with a direct call. Let the generic 4926202375Srdivacky // code sort out any function type mismatches. 4927234353Sdim FunctionType *NewFTy = FunctionType::get(FTy->getReturnType(), NewTypes, 4928202375Srdivacky FTy->isVarArg()); 4929202375Srdivacky Constant *NewCallee = 4930202375Srdivacky NestF->getType() == PointerType::getUnqual(NewFTy) ? 4931234353Sdim NestF : ConstantExpr::getBitCast(NestF, 4932202375Srdivacky PointerType::getUnqual(NewFTy)); 4933321369Sdim AttributeList NewPAL = 4934321369Sdim AttributeList::get(FTy->getContext(), Attrs.getFnAttributes(), 4935321369Sdim Attrs.getRetAttributes(), NewArgAttrs); 4936202375Srdivacky 4937309124Sdim SmallVector<OperandBundleDef, 1> OpBundles; 4938353358Sdim Call.getOperandBundlesAsDefs(OpBundles); 4939309124Sdim 4940202375Srdivacky Instruction *NewCaller; 4941353358Sdim if (InvokeInst *II = dyn_cast<InvokeInst>(&Call)) { 4942353358Sdim NewCaller = InvokeInst::Create(NewFTy, NewCallee, 4943202375Srdivacky II->getNormalDest(), II->getUnwindDest(), 4944309124Sdim NewArgs, OpBundles); 4945202375Srdivacky cast<InvokeInst>(NewCaller)->setCallingConv(II->getCallingConv()); 4946202375Srdivacky cast<InvokeInst>(NewCaller)->setAttributes(NewPAL); 4947353358Sdim } else if (CallBrInst *CBI = dyn_cast<CallBrInst>(&Call)) { 4948353358Sdim NewCaller = 4949353358Sdim CallBrInst::Create(NewFTy, NewCallee, CBI->getDefaultDest(), 4950353358Sdim CBI->getIndirectDests(), NewArgs, OpBundles); 4951353358Sdim cast<CallBrInst>(NewCaller)->setCallingConv(CBI->getCallingConv()); 4952353358Sdim cast<CallBrInst>(NewCaller)->setAttributes(NewPAL); 4953202375Srdivacky } else { 4954353358Sdim NewCaller = CallInst::Create(NewFTy, NewCallee, NewArgs, OpBundles); 4955314564Sdim cast<CallInst>(NewCaller)->setTailCallKind( 4956353358Sdim cast<CallInst>(Call).getTailCallKind()); 4957314564Sdim cast<CallInst>(NewCaller)->setCallingConv( 4958353358Sdim cast<CallInst>(Call).getCallingConv()); 4959202375Srdivacky cast<CallInst>(NewCaller)->setAttributes(NewPAL); 4960202375Srdivacky } 4961353358Sdim NewCaller->setDebugLoc(Call.getDebugLoc()); 4962223017Sdim 4963223017Sdim return NewCaller; 4964202375Srdivacky } 4965202375Srdivacky } 4966202375Srdivacky 4967202375Srdivacky // Replace the trampoline call with a direct call. Since there is no 'nest' 4968202375Srdivacky // parameter, there is no need to adjust the argument list. Let the generic 4969202375Srdivacky // code sort out any function type mismatches. 4970353358Sdim Constant *NewCallee = ConstantExpr::getBitCast(NestF, CalleeTy); 4971353358Sdim Call.setCalledFunction(FTy, NewCallee); 4972353358Sdim return &Call; 4973202375Srdivacky} 4974