1327952Sdim//===- AtomicExpandPass.cpp - Expand atomic instructions ------------------===// 2277323Sdim// 3353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4353358Sdim// See https://llvm.org/LICENSE.txt for license information. 5353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6277323Sdim// 7277323Sdim//===----------------------------------------------------------------------===// 8277323Sdim// 9277323Sdim// This file contains a pass (at IR level) to replace atomic instructions with 10309124Sdim// __atomic_* library calls, or target specific instruction which implement the 11309124Sdim// same semantics in a way which better fits the target backend. This can 12309124Sdim// include the use of (intrinsic-based) load-linked/store-conditional loops, 13309124Sdim// AtomicCmpXchg, or type coercions. 14277323Sdim// 15277323Sdim//===----------------------------------------------------------------------===// 16277323Sdim 17327952Sdim#include "llvm/ADT/ArrayRef.h" 18327952Sdim#include "llvm/ADT/STLExtras.h" 19327952Sdim#include "llvm/ADT/SmallVector.h" 20296417Sdim#include "llvm/CodeGen/AtomicExpandUtils.h" 21327952Sdim#include "llvm/CodeGen/RuntimeLibcalls.h" 22327952Sdim#include "llvm/CodeGen/TargetLowering.h" 23321369Sdim#include "llvm/CodeGen/TargetPassConfig.h" 24327952Sdim#include "llvm/CodeGen/TargetSubtargetInfo.h" 25327952Sdim#include "llvm/CodeGen/ValueTypes.h" 26327952Sdim#include "llvm/IR/Attributes.h" 27327952Sdim#include "llvm/IR/BasicBlock.h" 28327952Sdim#include "llvm/IR/Constant.h" 29327952Sdim#include "llvm/IR/Constants.h" 30327952Sdim#include "llvm/IR/DataLayout.h" 31327952Sdim#include "llvm/IR/DerivedTypes.h" 32277323Sdim#include "llvm/IR/Function.h" 33277323Sdim#include "llvm/IR/IRBuilder.h" 34277323Sdim#include "llvm/IR/InstIterator.h" 35327952Sdim#include "llvm/IR/Instruction.h" 36277323Sdim#include "llvm/IR/Instructions.h" 37277323Sdim#include "llvm/IR/Module.h" 38327952Sdim#include "llvm/IR/Type.h" 39327952Sdim#include "llvm/IR/User.h" 40327952Sdim#include "llvm/IR/Value.h" 41360784Sdim#include "llvm/InitializePasses.h" 42327952Sdim#include "llvm/Pass.h" 43327952Sdim#include "llvm/Support/AtomicOrdering.h" 44327952Sdim#include "llvm/Support/Casting.h" 45277323Sdim#include "llvm/Support/Debug.h" 46327952Sdim#include "llvm/Support/ErrorHandling.h" 47296417Sdim#include "llvm/Support/raw_ostream.h" 48277323Sdim#include "llvm/Target/TargetMachine.h" 49327952Sdim#include <cassert> 50327952Sdim#include <cstdint> 51327952Sdim#include <iterator> 52277323Sdim 53277323Sdimusing namespace llvm; 54277323Sdim 55277323Sdim#define DEBUG_TYPE "atomic-expand" 56277323Sdim 57277323Sdimnamespace { 58327952Sdim 59277323Sdim class AtomicExpand: public FunctionPass { 60327952Sdim const TargetLowering *TLI = nullptr; 61327952Sdim 62277323Sdim public: 63277323Sdim static char ID; // Pass identification, replacement for typeid 64327952Sdim 65327952Sdim AtomicExpand() : FunctionPass(ID) { 66277323Sdim initializeAtomicExpandPass(*PassRegistry::getPassRegistry()); 67277323Sdim } 68277323Sdim 69277323Sdim bool runOnFunction(Function &F) override; 70277323Sdim 71277323Sdim private: 72321369Sdim bool bracketInstWithFences(Instruction *I, AtomicOrdering Order); 73296417Sdim IntegerType *getCorrespondingIntegerType(Type *T, const DataLayout &DL); 74296417Sdim LoadInst *convertAtomicLoadToIntegerType(LoadInst *LI); 75296417Sdim bool tryExpandAtomicLoad(LoadInst *LI); 76277323Sdim bool expandAtomicLoadToLL(LoadInst *LI); 77277323Sdim bool expandAtomicLoadToCmpXchg(LoadInst *LI); 78296417Sdim StoreInst *convertAtomicStoreToIntegerType(StoreInst *SI); 79277323Sdim bool expandAtomicStore(StoreInst *SI); 80288943Sdim bool tryExpandAtomicRMW(AtomicRMWInst *AI); 81309124Sdim Value * 82309124Sdim insertRMWLLSCLoop(IRBuilder<> &Builder, Type *ResultTy, Value *Addr, 83309124Sdim AtomicOrdering MemOpOrder, 84309124Sdim function_ref<Value *(IRBuilder<> &, Value *)> PerformOp); 85309124Sdim void expandAtomicOpToLLSC( 86309124Sdim Instruction *I, Type *ResultTy, Value *Addr, AtomicOrdering MemOpOrder, 87309124Sdim function_ref<Value *(IRBuilder<> &, Value *)> PerformOp); 88309124Sdim void expandPartwordAtomicRMW( 89309124Sdim AtomicRMWInst *I, 90309124Sdim TargetLoweringBase::AtomicExpansionKind ExpansionKind); 91344779Sdim AtomicRMWInst *widenPartwordAtomicRMW(AtomicRMWInst *AI); 92309124Sdim void expandPartwordCmpXchg(AtomicCmpXchgInst *I); 93344779Sdim void expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI); 94344779Sdim void expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI); 95309124Sdim 96309124Sdim AtomicCmpXchgInst *convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI); 97309124Sdim static Value *insertRMWCmpXchgLoop( 98309124Sdim IRBuilder<> &Builder, Type *ResultType, Value *Addr, 99309124Sdim AtomicOrdering MemOpOrder, 100309124Sdim function_ref<Value *(IRBuilder<> &, Value *)> PerformOp, 101309124Sdim CreateCmpXchgInstFun CreateCmpXchg); 102344779Sdim bool tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI); 103309124Sdim 104277323Sdim bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI); 105341825Sdim bool isIdempotentRMW(AtomicRMWInst *RMWI); 106341825Sdim bool simplifyIdempotentRMW(AtomicRMWInst *RMWI); 107309124Sdim 108309124Sdim bool expandAtomicOpToLibcall(Instruction *I, unsigned Size, unsigned Align, 109309124Sdim Value *PointerOperand, Value *ValueOperand, 110309124Sdim Value *CASExpected, AtomicOrdering Ordering, 111309124Sdim AtomicOrdering Ordering2, 112309124Sdim ArrayRef<RTLIB::Libcall> Libcalls); 113309124Sdim void expandAtomicLoadToLibcall(LoadInst *LI); 114309124Sdim void expandAtomicStoreToLibcall(StoreInst *LI); 115309124Sdim void expandAtomicRMWToLibcall(AtomicRMWInst *I); 116309124Sdim void expandAtomicCASToLibcall(AtomicCmpXchgInst *I); 117309124Sdim 118309124Sdim friend bool 119309124Sdim llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, 120309124Sdim CreateCmpXchgInstFun CreateCmpXchg); 121277323Sdim }; 122277323Sdim 123327952Sdim} // end anonymous namespace 124327952Sdim 125277323Sdimchar AtomicExpand::ID = 0; 126327952Sdim 127277323Sdimchar &llvm::AtomicExpandID = AtomicExpand::ID; 128327952Sdim 129321369SdimINITIALIZE_PASS(AtomicExpand, DEBUG_TYPE, "Expand Atomic instructions", 130321369Sdim false, false) 131277323Sdim 132321369SdimFunctionPass *llvm::createAtomicExpandPass() { return new AtomicExpand(); } 133277323Sdim 134309124Sdim// Helper functions to retrieve the size of atomic instructions. 135327952Sdimstatic unsigned getAtomicOpSize(LoadInst *LI) { 136309124Sdim const DataLayout &DL = LI->getModule()->getDataLayout(); 137309124Sdim return DL.getTypeStoreSize(LI->getType()); 138309124Sdim} 139309124Sdim 140327952Sdimstatic unsigned getAtomicOpSize(StoreInst *SI) { 141309124Sdim const DataLayout &DL = SI->getModule()->getDataLayout(); 142309124Sdim return DL.getTypeStoreSize(SI->getValueOperand()->getType()); 143309124Sdim} 144309124Sdim 145327952Sdimstatic unsigned getAtomicOpSize(AtomicRMWInst *RMWI) { 146309124Sdim const DataLayout &DL = RMWI->getModule()->getDataLayout(); 147309124Sdim return DL.getTypeStoreSize(RMWI->getValOperand()->getType()); 148309124Sdim} 149309124Sdim 150327952Sdimstatic unsigned getAtomicOpSize(AtomicCmpXchgInst *CASI) { 151309124Sdim const DataLayout &DL = CASI->getModule()->getDataLayout(); 152309124Sdim return DL.getTypeStoreSize(CASI->getCompareOperand()->getType()); 153309124Sdim} 154309124Sdim 155309124Sdim// Helper functions to retrieve the alignment of atomic instructions. 156327952Sdimstatic unsigned getAtomicOpAlign(LoadInst *LI) { 157309124Sdim unsigned Align = LI->getAlignment(); 158309124Sdim // In the future, if this IR restriction is relaxed, we should 159309124Sdim // return DataLayout::getABITypeAlignment when there's no align 160309124Sdim // value. 161309124Sdim assert(Align != 0 && "An atomic LoadInst always has an explicit alignment"); 162309124Sdim return Align; 163309124Sdim} 164309124Sdim 165327952Sdimstatic unsigned getAtomicOpAlign(StoreInst *SI) { 166309124Sdim unsigned Align = SI->getAlignment(); 167309124Sdim // In the future, if this IR restriction is relaxed, we should 168309124Sdim // return DataLayout::getABITypeAlignment when there's no align 169309124Sdim // value. 170309124Sdim assert(Align != 0 && "An atomic StoreInst always has an explicit alignment"); 171309124Sdim return Align; 172309124Sdim} 173309124Sdim 174327952Sdimstatic unsigned getAtomicOpAlign(AtomicRMWInst *RMWI) { 175309124Sdim // TODO(PR27168): This instruction has no alignment attribute, but unlike the 176309124Sdim // default alignment for load/store, the default here is to assume 177309124Sdim // it has NATURAL alignment, not DataLayout-specified alignment. 178309124Sdim const DataLayout &DL = RMWI->getModule()->getDataLayout(); 179309124Sdim return DL.getTypeStoreSize(RMWI->getValOperand()->getType()); 180309124Sdim} 181309124Sdim 182327952Sdimstatic unsigned getAtomicOpAlign(AtomicCmpXchgInst *CASI) { 183309124Sdim // TODO(PR27168): same comment as above. 184309124Sdim const DataLayout &DL = CASI->getModule()->getDataLayout(); 185309124Sdim return DL.getTypeStoreSize(CASI->getCompareOperand()->getType()); 186309124Sdim} 187309124Sdim 188309124Sdim// Determine if a particular atomic operation has a supported size, 189309124Sdim// and is of appropriate alignment, to be passed through for target 190309124Sdim// lowering. (Versus turning into a __atomic libcall) 191309124Sdimtemplate <typename Inst> 192327952Sdimstatic bool atomicSizeSupported(const TargetLowering *TLI, Inst *I) { 193309124Sdim unsigned Size = getAtomicOpSize(I); 194309124Sdim unsigned Align = getAtomicOpAlign(I); 195309124Sdim return Align >= Size && Size <= TLI->getMaxAtomicSizeInBitsSupported() / 8; 196309124Sdim} 197309124Sdim 198277323Sdimbool AtomicExpand::runOnFunction(Function &F) { 199321369Sdim auto *TPC = getAnalysisIfAvailable<TargetPassConfig>(); 200321369Sdim if (!TPC) 201277323Sdim return false; 202277323Sdim 203321369Sdim auto &TM = TPC->getTM<TargetMachine>(); 204321369Sdim if (!TM.getSubtargetImpl(F)->enableAtomicExpand()) 205321369Sdim return false; 206321369Sdim TLI = TM.getSubtargetImpl(F)->getTargetLowering(); 207321369Sdim 208277323Sdim SmallVector<Instruction *, 1> AtomicInsts; 209277323Sdim 210277323Sdim // Changing control-flow while iterating through it is a bad idea, so gather a 211277323Sdim // list of all atomic instructions before we start. 212309124Sdim for (inst_iterator II = inst_begin(F), E = inst_end(F); II != E; ++II) { 213309124Sdim Instruction *I = &*II; 214309124Sdim if (I->isAtomic() && !isa<FenceInst>(I)) 215309124Sdim AtomicInsts.push_back(I); 216277323Sdim } 217277323Sdim 218277323Sdim bool MadeChange = false; 219277323Sdim for (auto I : AtomicInsts) { 220277323Sdim auto LI = dyn_cast<LoadInst>(I); 221277323Sdim auto SI = dyn_cast<StoreInst>(I); 222277323Sdim auto RMWI = dyn_cast<AtomicRMWInst>(I); 223277323Sdim auto CASI = dyn_cast<AtomicCmpXchgInst>(I); 224309124Sdim assert((LI || SI || RMWI || CASI) && "Unknown atomic instruction"); 225277323Sdim 226309124Sdim // If the Size/Alignment is not supported, replace with a libcall. 227309124Sdim if (LI) { 228309124Sdim if (!atomicSizeSupported(TLI, LI)) { 229309124Sdim expandAtomicLoadToLibcall(LI); 230309124Sdim MadeChange = true; 231309124Sdim continue; 232309124Sdim } 233309124Sdim } else if (SI) { 234309124Sdim if (!atomicSizeSupported(TLI, SI)) { 235309124Sdim expandAtomicStoreToLibcall(SI); 236309124Sdim MadeChange = true; 237309124Sdim continue; 238309124Sdim } 239309124Sdim } else if (RMWI) { 240309124Sdim if (!atomicSizeSupported(TLI, RMWI)) { 241309124Sdim expandAtomicRMWToLibcall(RMWI); 242309124Sdim MadeChange = true; 243309124Sdim continue; 244309124Sdim } 245309124Sdim } else if (CASI) { 246309124Sdim if (!atomicSizeSupported(TLI, CASI)) { 247309124Sdim expandAtomicCASToLibcall(CASI); 248309124Sdim MadeChange = true; 249309124Sdim continue; 250309124Sdim } 251309124Sdim } 252309124Sdim 253309124Sdim if (TLI->shouldInsertFencesForAtomic(I)) { 254309124Sdim auto FenceOrdering = AtomicOrdering::Monotonic; 255309124Sdim if (LI && isAcquireOrStronger(LI->getOrdering())) { 256277323Sdim FenceOrdering = LI->getOrdering(); 257309124Sdim LI->setOrdering(AtomicOrdering::Monotonic); 258309124Sdim } else if (SI && isReleaseOrStronger(SI->getOrdering())) { 259277323Sdim FenceOrdering = SI->getOrdering(); 260309124Sdim SI->setOrdering(AtomicOrdering::Monotonic); 261309124Sdim } else if (RMWI && (isReleaseOrStronger(RMWI->getOrdering()) || 262309124Sdim isAcquireOrStronger(RMWI->getOrdering()))) { 263277323Sdim FenceOrdering = RMWI->getOrdering(); 264309124Sdim RMWI->setOrdering(AtomicOrdering::Monotonic); 265344779Sdim } else if (CASI && 266344779Sdim TLI->shouldExpandAtomicCmpXchgInIR(CASI) == 267344779Sdim TargetLoweringBase::AtomicExpansionKind::None && 268309124Sdim (isReleaseOrStronger(CASI->getSuccessOrdering()) || 269309124Sdim isAcquireOrStronger(CASI->getSuccessOrdering()))) { 270277323Sdim // If a compare and swap is lowered to LL/SC, we can do smarter fence 271277323Sdim // insertion, with a stronger one on the success path than on the 272277323Sdim // failure path. As a result, fence insertion is directly done by 273277323Sdim // expandAtomicCmpXchg in that case. 274277323Sdim FenceOrdering = CASI->getSuccessOrdering(); 275309124Sdim CASI->setSuccessOrdering(AtomicOrdering::Monotonic); 276309124Sdim CASI->setFailureOrdering(AtomicOrdering::Monotonic); 277277323Sdim } 278277323Sdim 279309124Sdim if (FenceOrdering != AtomicOrdering::Monotonic) { 280321369Sdim MadeChange |= bracketInstWithFences(I, FenceOrdering); 281277323Sdim } 282277323Sdim } 283277323Sdim 284296417Sdim if (LI) { 285296417Sdim if (LI->getType()->isFloatingPointTy()) { 286296417Sdim // TODO: add a TLI hook to control this so that each target can 287296417Sdim // convert to lowering the original type one at a time. 288296417Sdim LI = convertAtomicLoadToIntegerType(LI); 289296417Sdim assert(LI->getType()->isIntegerTy() && "invariant broken"); 290296417Sdim MadeChange = true; 291296417Sdim } 292309124Sdim 293296417Sdim MadeChange |= tryExpandAtomicLoad(LI); 294296417Sdim } else if (SI) { 295296417Sdim if (SI->getValueOperand()->getType()->isFloatingPointTy()) { 296296417Sdim // TODO: add a TLI hook to control this so that each target can 297296417Sdim // convert to lowering the original type one at a time. 298296417Sdim SI = convertAtomicStoreToIntegerType(SI); 299296417Sdim assert(SI->getValueOperand()->getType()->isIntegerTy() && 300296417Sdim "invariant broken"); 301296417Sdim MadeChange = true; 302296417Sdim } 303296417Sdim 304296417Sdim if (TLI->shouldExpandAtomicStoreInIR(SI)) 305296417Sdim MadeChange |= expandAtomicStore(SI); 306277323Sdim } else if (RMWI) { 307277323Sdim // There are two different ways of expanding RMW instructions: 308277323Sdim // - into a load if it is idempotent 309277323Sdim // - into a Cmpxchg/LL-SC loop otherwise 310277323Sdim // we try them in that order. 311288943Sdim 312288943Sdim if (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) { 313288943Sdim MadeChange = true; 314288943Sdim } else { 315344779Sdim unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8; 316344779Sdim unsigned ValueSize = getAtomicOpSize(RMWI); 317344779Sdim AtomicRMWInst::BinOp Op = RMWI->getOperation(); 318344779Sdim if (ValueSize < MinCASSize && 319344779Sdim (Op == AtomicRMWInst::Or || Op == AtomicRMWInst::Xor || 320344779Sdim Op == AtomicRMWInst::And)) { 321344779Sdim RMWI = widenPartwordAtomicRMW(RMWI); 322344779Sdim MadeChange = true; 323344779Sdim } 324344779Sdim 325288943Sdim MadeChange |= tryExpandAtomicRMW(RMWI); 326288943Sdim } 327309124Sdim } else if (CASI) { 328309124Sdim // TODO: when we're ready to make the change at the IR level, we can 329309124Sdim // extend convertCmpXchgToInteger for floating point too. 330309124Sdim assert(!CASI->getCompareOperand()->getType()->isFloatingPointTy() && 331309124Sdim "unimplemented - floating point not legal at IR level"); 332309124Sdim if (CASI->getCompareOperand()->getType()->isPointerTy() ) { 333309124Sdim // TODO: add a TLI hook to control this so that each target can 334309124Sdim // convert to lowering the original type one at a time. 335309124Sdim CASI = convertCmpXchgToIntegerType(CASI); 336309124Sdim assert(CASI->getCompareOperand()->getType()->isIntegerTy() && 337309124Sdim "invariant broken"); 338309124Sdim MadeChange = true; 339309124Sdim } 340309124Sdim 341344779Sdim MadeChange |= tryExpandAtomicCmpXchg(CASI); 342277323Sdim } 343277323Sdim } 344277323Sdim return MadeChange; 345277323Sdim} 346277323Sdim 347321369Sdimbool AtomicExpand::bracketInstWithFences(Instruction *I, AtomicOrdering Order) { 348277323Sdim IRBuilder<> Builder(I); 349277323Sdim 350321369Sdim auto LeadingFence = TLI->emitLeadingFence(Builder, I, Order); 351277323Sdim 352321369Sdim auto TrailingFence = TLI->emitTrailingFence(Builder, I, Order); 353277323Sdim // We have a guard here because not every atomic operation generates a 354277323Sdim // trailing fence. 355327952Sdim if (TrailingFence) 356327952Sdim TrailingFence->moveAfter(I); 357277323Sdim 358277323Sdim return (LeadingFence || TrailingFence); 359277323Sdim} 360277323Sdim 361296417Sdim/// Get the iX type with the same bitwidth as T. 362296417SdimIntegerType *AtomicExpand::getCorrespondingIntegerType(Type *T, 363296417Sdim const DataLayout &DL) { 364353358Sdim EVT VT = TLI->getMemValueType(DL, T); 365296417Sdim unsigned BitWidth = VT.getStoreSizeInBits(); 366296417Sdim assert(BitWidth == VT.getSizeInBits() && "must be a power of two"); 367296417Sdim return IntegerType::get(T->getContext(), BitWidth); 368296417Sdim} 369296417Sdim 370296417Sdim/// Convert an atomic load of a non-integral type to an integer load of the 371309124Sdim/// equivalent bitwidth. See the function comment on 372341825Sdim/// convertAtomicStoreToIntegerType for background. 373296417SdimLoadInst *AtomicExpand::convertAtomicLoadToIntegerType(LoadInst *LI) { 374296417Sdim auto *M = LI->getModule(); 375296417Sdim Type *NewTy = getCorrespondingIntegerType(LI->getType(), 376296417Sdim M->getDataLayout()); 377296417Sdim 378296417Sdim IRBuilder<> Builder(LI); 379341825Sdim 380296417Sdim Value *Addr = LI->getPointerOperand(); 381296417Sdim Type *PT = PointerType::get(NewTy, 382296417Sdim Addr->getType()->getPointerAddressSpace()); 383296417Sdim Value *NewAddr = Builder.CreateBitCast(Addr, PT); 384341825Sdim 385353358Sdim auto *NewLI = Builder.CreateLoad(NewTy, NewAddr); 386360784Sdim NewLI->setAlignment(MaybeAlign(LI->getAlignment())); 387296417Sdim NewLI->setVolatile(LI->isVolatile()); 388321369Sdim NewLI->setAtomic(LI->getOrdering(), LI->getSyncScopeID()); 389341825Sdim LLVM_DEBUG(dbgs() << "Replaced " << *LI << " with " << *NewLI << "\n"); 390341825Sdim 391296417Sdim Value *NewVal = Builder.CreateBitCast(NewLI, LI->getType()); 392296417Sdim LI->replaceAllUsesWith(NewVal); 393296417Sdim LI->eraseFromParent(); 394296417Sdim return NewLI; 395296417Sdim} 396296417Sdim 397296417Sdimbool AtomicExpand::tryExpandAtomicLoad(LoadInst *LI) { 398296417Sdim switch (TLI->shouldExpandAtomicLoadInIR(LI)) { 399296417Sdim case TargetLoweringBase::AtomicExpansionKind::None: 400296417Sdim return false; 401296417Sdim case TargetLoweringBase::AtomicExpansionKind::LLSC: 402309124Sdim expandAtomicOpToLLSC( 403309124Sdim LI, LI->getType(), LI->getPointerOperand(), LI->getOrdering(), 404296417Sdim [](IRBuilder<> &Builder, Value *Loaded) { return Loaded; }); 405309124Sdim return true; 406296417Sdim case TargetLoweringBase::AtomicExpansionKind::LLOnly: 407277323Sdim return expandAtomicLoadToLL(LI); 408296417Sdim case TargetLoweringBase::AtomicExpansionKind::CmpXChg: 409277323Sdim return expandAtomicLoadToCmpXchg(LI); 410344779Sdim default: 411344779Sdim llvm_unreachable("Unhandled case in tryExpandAtomicLoad"); 412296417Sdim } 413277323Sdim} 414277323Sdim 415277323Sdimbool AtomicExpand::expandAtomicLoadToLL(LoadInst *LI) { 416277323Sdim IRBuilder<> Builder(LI); 417277323Sdim 418277323Sdim // On some architectures, load-linked instructions are atomic for larger 419277323Sdim // sizes than normal loads. For example, the only 64-bit load guaranteed 420277323Sdim // to be single-copy atomic by ARM is an ldrexd (A3.5.3). 421277323Sdim Value *Val = 422277323Sdim TLI->emitLoadLinked(Builder, LI->getPointerOperand(), LI->getOrdering()); 423296417Sdim TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder); 424277323Sdim 425277323Sdim LI->replaceAllUsesWith(Val); 426277323Sdim LI->eraseFromParent(); 427277323Sdim 428277323Sdim return true; 429277323Sdim} 430277323Sdim 431277323Sdimbool AtomicExpand::expandAtomicLoadToCmpXchg(LoadInst *LI) { 432277323Sdim IRBuilder<> Builder(LI); 433277323Sdim AtomicOrdering Order = LI->getOrdering(); 434353358Sdim if (Order == AtomicOrdering::Unordered) 435353358Sdim Order = AtomicOrdering::Monotonic; 436353358Sdim 437277323Sdim Value *Addr = LI->getPointerOperand(); 438277323Sdim Type *Ty = cast<PointerType>(Addr->getType())->getElementType(); 439277323Sdim Constant *DummyVal = Constant::getNullValue(Ty); 440277323Sdim 441277323Sdim Value *Pair = Builder.CreateAtomicCmpXchg( 442277323Sdim Addr, DummyVal, DummyVal, Order, 443277323Sdim AtomicCmpXchgInst::getStrongestFailureOrdering(Order)); 444277323Sdim Value *Loaded = Builder.CreateExtractValue(Pair, 0, "loaded"); 445277323Sdim 446277323Sdim LI->replaceAllUsesWith(Loaded); 447277323Sdim LI->eraseFromParent(); 448277323Sdim 449277323Sdim return true; 450277323Sdim} 451277323Sdim 452296417Sdim/// Convert an atomic store of a non-integral type to an integer store of the 453309124Sdim/// equivalent bitwidth. We used to not support floating point or vector 454296417Sdim/// atomics in the IR at all. The backends learned to deal with the bitcast 455296417Sdim/// idiom because that was the only way of expressing the notion of a atomic 456296417Sdim/// float or vector store. The long term plan is to teach each backend to 457296417Sdim/// instruction select from the original atomic store, but as a migration 458296417Sdim/// mechanism, we convert back to the old format which the backends understand. 459296417Sdim/// Each backend will need individual work to recognize the new format. 460296417SdimStoreInst *AtomicExpand::convertAtomicStoreToIntegerType(StoreInst *SI) { 461296417Sdim IRBuilder<> Builder(SI); 462296417Sdim auto *M = SI->getModule(); 463296417Sdim Type *NewTy = getCorrespondingIntegerType(SI->getValueOperand()->getType(), 464296417Sdim M->getDataLayout()); 465296417Sdim Value *NewVal = Builder.CreateBitCast(SI->getValueOperand(), NewTy); 466341825Sdim 467296417Sdim Value *Addr = SI->getPointerOperand(); 468296417Sdim Type *PT = PointerType::get(NewTy, 469296417Sdim Addr->getType()->getPointerAddressSpace()); 470296417Sdim Value *NewAddr = Builder.CreateBitCast(Addr, PT); 471296417Sdim 472296417Sdim StoreInst *NewSI = Builder.CreateStore(NewVal, NewAddr); 473360784Sdim NewSI->setAlignment(MaybeAlign(SI->getAlignment())); 474296417Sdim NewSI->setVolatile(SI->isVolatile()); 475321369Sdim NewSI->setAtomic(SI->getOrdering(), SI->getSyncScopeID()); 476341825Sdim LLVM_DEBUG(dbgs() << "Replaced " << *SI << " with " << *NewSI << "\n"); 477296417Sdim SI->eraseFromParent(); 478296417Sdim return NewSI; 479296417Sdim} 480296417Sdim 481277323Sdimbool AtomicExpand::expandAtomicStore(StoreInst *SI) { 482277323Sdim // This function is only called on atomic stores that are too large to be 483277323Sdim // atomic if implemented as a native store. So we replace them by an 484277323Sdim // atomic swap, that can be implemented for example as a ldrex/strex on ARM 485277323Sdim // or lock cmpxchg8/16b on X86, as these are atomic for larger sizes. 486288943Sdim // It is the responsibility of the target to only signal expansion via 487277323Sdim // shouldExpandAtomicRMW in cases where this is required and possible. 488277323Sdim IRBuilder<> Builder(SI); 489277323Sdim AtomicRMWInst *AI = 490277323Sdim Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, SI->getPointerOperand(), 491277323Sdim SI->getValueOperand(), SI->getOrdering()); 492277323Sdim SI->eraseFromParent(); 493277323Sdim 494277323Sdim // Now we have an appropriate swap instruction, lower it as usual. 495288943Sdim return tryExpandAtomicRMW(AI); 496277323Sdim} 497277323Sdim 498296417Sdimstatic void createCmpXchgInstFun(IRBuilder<> &Builder, Value *Addr, 499296417Sdim Value *Loaded, Value *NewVal, 500296417Sdim AtomicOrdering MemOpOrder, 501296417Sdim Value *&Success, Value *&NewLoaded) { 502353358Sdim Type *OrigTy = NewVal->getType(); 503353358Sdim 504353358Sdim // This code can go away when cmpxchg supports FP types. 505353358Sdim bool NeedBitcast = OrigTy->isFloatingPointTy(); 506353358Sdim if (NeedBitcast) { 507353358Sdim IntegerType *IntTy = Builder.getIntNTy(OrigTy->getPrimitiveSizeInBits()); 508353358Sdim unsigned AS = Addr->getType()->getPointerAddressSpace(); 509353358Sdim Addr = Builder.CreateBitCast(Addr, IntTy->getPointerTo(AS)); 510353358Sdim NewVal = Builder.CreateBitCast(NewVal, IntTy); 511353358Sdim Loaded = Builder.CreateBitCast(Loaded, IntTy); 512353358Sdim } 513353358Sdim 514296417Sdim Value* Pair = Builder.CreateAtomicCmpXchg( 515296417Sdim Addr, Loaded, NewVal, MemOpOrder, 516296417Sdim AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder)); 517296417Sdim Success = Builder.CreateExtractValue(Pair, 1, "success"); 518296417Sdim NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded"); 519353358Sdim 520353358Sdim if (NeedBitcast) 521353358Sdim NewLoaded = Builder.CreateBitCast(NewLoaded, OrigTy); 522277323Sdim} 523277323Sdim 524277323Sdim/// Emit IR to implement the given atomicrmw operation on values in registers, 525277323Sdim/// returning the new value. 526277323Sdimstatic Value *performAtomicOp(AtomicRMWInst::BinOp Op, IRBuilder<> &Builder, 527277323Sdim Value *Loaded, Value *Inc) { 528277323Sdim Value *NewVal; 529277323Sdim switch (Op) { 530277323Sdim case AtomicRMWInst::Xchg: 531277323Sdim return Inc; 532277323Sdim case AtomicRMWInst::Add: 533277323Sdim return Builder.CreateAdd(Loaded, Inc, "new"); 534277323Sdim case AtomicRMWInst::Sub: 535277323Sdim return Builder.CreateSub(Loaded, Inc, "new"); 536277323Sdim case AtomicRMWInst::And: 537277323Sdim return Builder.CreateAnd(Loaded, Inc, "new"); 538277323Sdim case AtomicRMWInst::Nand: 539277323Sdim return Builder.CreateNot(Builder.CreateAnd(Loaded, Inc), "new"); 540277323Sdim case AtomicRMWInst::Or: 541277323Sdim return Builder.CreateOr(Loaded, Inc, "new"); 542277323Sdim case AtomicRMWInst::Xor: 543277323Sdim return Builder.CreateXor(Loaded, Inc, "new"); 544277323Sdim case AtomicRMWInst::Max: 545277323Sdim NewVal = Builder.CreateICmpSGT(Loaded, Inc); 546277323Sdim return Builder.CreateSelect(NewVal, Loaded, Inc, "new"); 547277323Sdim case AtomicRMWInst::Min: 548277323Sdim NewVal = Builder.CreateICmpSLE(Loaded, Inc); 549277323Sdim return Builder.CreateSelect(NewVal, Loaded, Inc, "new"); 550277323Sdim case AtomicRMWInst::UMax: 551277323Sdim NewVal = Builder.CreateICmpUGT(Loaded, Inc); 552277323Sdim return Builder.CreateSelect(NewVal, Loaded, Inc, "new"); 553277323Sdim case AtomicRMWInst::UMin: 554277323Sdim NewVal = Builder.CreateICmpULE(Loaded, Inc); 555277323Sdim return Builder.CreateSelect(NewVal, Loaded, Inc, "new"); 556353358Sdim case AtomicRMWInst::FAdd: 557353358Sdim return Builder.CreateFAdd(Loaded, Inc, "new"); 558353358Sdim case AtomicRMWInst::FSub: 559353358Sdim return Builder.CreateFSub(Loaded, Inc, "new"); 560277323Sdim default: 561277323Sdim llvm_unreachable("Unknown atomic op"); 562277323Sdim } 563277323Sdim} 564277323Sdim 565296417Sdimbool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) { 566296417Sdim switch (TLI->shouldExpandAtomicRMWInIR(AI)) { 567296417Sdim case TargetLoweringBase::AtomicExpansionKind::None: 568296417Sdim return false; 569309124Sdim case TargetLoweringBase::AtomicExpansionKind::LLSC: { 570309124Sdim unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8; 571309124Sdim unsigned ValueSize = getAtomicOpSize(AI); 572309124Sdim if (ValueSize < MinCASSize) { 573309124Sdim llvm_unreachable( 574309124Sdim "MinCmpXchgSizeInBits not yet supported for LL/SC architectures."); 575309124Sdim } else { 576309124Sdim auto PerformOp = [&](IRBuilder<> &Builder, Value *Loaded) { 577309124Sdim return performAtomicOp(AI->getOperation(), Builder, Loaded, 578309124Sdim AI->getValOperand()); 579309124Sdim }; 580309124Sdim expandAtomicOpToLLSC(AI, AI->getType(), AI->getPointerOperand(), 581309124Sdim AI->getOrdering(), PerformOp); 582309124Sdim } 583309124Sdim return true; 584309124Sdim } 585309124Sdim case TargetLoweringBase::AtomicExpansionKind::CmpXChg: { 586309124Sdim unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8; 587309124Sdim unsigned ValueSize = getAtomicOpSize(AI); 588309124Sdim if (ValueSize < MinCASSize) { 589353358Sdim // TODO: Handle atomicrmw fadd/fsub 590353358Sdim if (AI->getType()->isFloatingPointTy()) 591353358Sdim return false; 592353358Sdim 593309124Sdim expandPartwordAtomicRMW(AI, 594309124Sdim TargetLoweringBase::AtomicExpansionKind::CmpXChg); 595309124Sdim } else { 596309124Sdim expandAtomicRMWToCmpXchg(AI, createCmpXchgInstFun); 597309124Sdim } 598309124Sdim return true; 599309124Sdim } 600344779Sdim case TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic: { 601344779Sdim expandAtomicRMWToMaskedIntrinsic(AI); 602344779Sdim return true; 603344779Sdim } 604296417Sdim default: 605296417Sdim llvm_unreachable("Unhandled case in tryExpandAtomicRMW"); 606296417Sdim } 607296417Sdim} 608296417Sdim 609309124Sdimnamespace { 610309124Sdim 611309124Sdim/// Result values from createMaskInstrs helper. 612309124Sdimstruct PartwordMaskValues { 613309124Sdim Type *WordType; 614309124Sdim Type *ValueType; 615309124Sdim Value *AlignedAddr; 616309124Sdim Value *ShiftAmt; 617309124Sdim Value *Mask; 618309124Sdim Value *Inv_Mask; 619309124Sdim}; 620327952Sdim 621309124Sdim} // end anonymous namespace 622309124Sdim 623309124Sdim/// This is a helper function which builds instructions to provide 624309124Sdim/// values necessary for partword atomic operations. It takes an 625309124Sdim/// incoming address, Addr, and ValueType, and constructs the address, 626309124Sdim/// shift-amounts and masks needed to work with a larger value of size 627309124Sdim/// WordSize. 628309124Sdim/// 629309124Sdim/// AlignedAddr: Addr rounded down to a multiple of WordSize 630309124Sdim/// 631309124Sdim/// ShiftAmt: Number of bits to right-shift a WordSize value loaded 632309124Sdim/// from AlignAddr for it to have the same value as if 633309124Sdim/// ValueType was loaded from Addr. 634309124Sdim/// 635309124Sdim/// Mask: Value to mask with the value loaded from AlignAddr to 636309124Sdim/// include only the part that would've been loaded from Addr. 637309124Sdim/// 638309124Sdim/// Inv_Mask: The inverse of Mask. 639309124Sdimstatic PartwordMaskValues createMaskInstrs(IRBuilder<> &Builder, Instruction *I, 640309124Sdim Type *ValueType, Value *Addr, 641309124Sdim unsigned WordSize) { 642309124Sdim PartwordMaskValues Ret; 643309124Sdim 644296417Sdim BasicBlock *BB = I->getParent(); 645277323Sdim Function *F = BB->getParent(); 646309124Sdim Module *M = I->getModule(); 647309124Sdim 648277323Sdim LLVMContext &Ctx = F->getContext(); 649309124Sdim const DataLayout &DL = M->getDataLayout(); 650277323Sdim 651309124Sdim unsigned ValueSize = DL.getTypeStoreSize(ValueType); 652309124Sdim 653309124Sdim assert(ValueSize < WordSize); 654309124Sdim 655309124Sdim Ret.ValueType = ValueType; 656309124Sdim Ret.WordType = Type::getIntNTy(Ctx, WordSize * 8); 657309124Sdim 658309124Sdim Type *WordPtrType = 659309124Sdim Ret.WordType->getPointerTo(Addr->getType()->getPointerAddressSpace()); 660309124Sdim 661309124Sdim Value *AddrInt = Builder.CreatePtrToInt(Addr, DL.getIntPtrType(Ctx)); 662309124Sdim Ret.AlignedAddr = Builder.CreateIntToPtr( 663309124Sdim Builder.CreateAnd(AddrInt, ~(uint64_t)(WordSize - 1)), WordPtrType, 664309124Sdim "AlignedAddr"); 665309124Sdim 666309124Sdim Value *PtrLSB = Builder.CreateAnd(AddrInt, WordSize - 1, "PtrLSB"); 667309124Sdim if (DL.isLittleEndian()) { 668309124Sdim // turn bytes into bits 669309124Sdim Ret.ShiftAmt = Builder.CreateShl(PtrLSB, 3); 670309124Sdim } else { 671309124Sdim // turn bytes into bits, and count from the other side. 672309124Sdim Ret.ShiftAmt = 673309124Sdim Builder.CreateShl(Builder.CreateXor(PtrLSB, WordSize - ValueSize), 3); 674309124Sdim } 675309124Sdim 676309124Sdim Ret.ShiftAmt = Builder.CreateTrunc(Ret.ShiftAmt, Ret.WordType, "ShiftAmt"); 677309124Sdim Ret.Mask = Builder.CreateShl( 678360784Sdim ConstantInt::get(Ret.WordType, (1 << (ValueSize * 8)) - 1), Ret.ShiftAmt, 679309124Sdim "Mask"); 680309124Sdim Ret.Inv_Mask = Builder.CreateNot(Ret.Mask, "Inv_Mask"); 681309124Sdim 682309124Sdim return Ret; 683309124Sdim} 684309124Sdim 685309124Sdim/// Emit IR to implement a masked version of a given atomicrmw 686309124Sdim/// operation. (That is, only the bits under the Mask should be 687309124Sdim/// affected by the operation) 688309124Sdimstatic Value *performMaskedAtomicOp(AtomicRMWInst::BinOp Op, 689309124Sdim IRBuilder<> &Builder, Value *Loaded, 690309124Sdim Value *Shifted_Inc, Value *Inc, 691309124Sdim const PartwordMaskValues &PMV) { 692344779Sdim // TODO: update to use 693344779Sdim // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge in order 694344779Sdim // to merge bits from two values without requiring PMV.Inv_Mask. 695309124Sdim switch (Op) { 696309124Sdim case AtomicRMWInst::Xchg: { 697309124Sdim Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask); 698309124Sdim Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, Shifted_Inc); 699309124Sdim return FinalVal; 700309124Sdim } 701309124Sdim case AtomicRMWInst::Or: 702309124Sdim case AtomicRMWInst::Xor: 703344779Sdim case AtomicRMWInst::And: 704344779Sdim llvm_unreachable("Or/Xor/And handled by widenPartwordAtomicRMW"); 705309124Sdim case AtomicRMWInst::Add: 706309124Sdim case AtomicRMWInst::Sub: 707309124Sdim case AtomicRMWInst::Nand: { 708309124Sdim // The other arithmetic ops need to be masked into place. 709309124Sdim Value *NewVal = performAtomicOp(Op, Builder, Loaded, Shifted_Inc); 710309124Sdim Value *NewVal_Masked = Builder.CreateAnd(NewVal, PMV.Mask); 711309124Sdim Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask); 712309124Sdim Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Masked); 713309124Sdim return FinalVal; 714309124Sdim } 715309124Sdim case AtomicRMWInst::Max: 716309124Sdim case AtomicRMWInst::Min: 717309124Sdim case AtomicRMWInst::UMax: 718309124Sdim case AtomicRMWInst::UMin: { 719309124Sdim // Finally, comparison ops will operate on the full value, so 720309124Sdim // truncate down to the original size, and expand out again after 721309124Sdim // doing the operation. 722309124Sdim Value *Loaded_Shiftdown = Builder.CreateTrunc( 723309124Sdim Builder.CreateLShr(Loaded, PMV.ShiftAmt), PMV.ValueType); 724309124Sdim Value *NewVal = performAtomicOp(Op, Builder, Loaded_Shiftdown, Inc); 725309124Sdim Value *NewVal_Shiftup = Builder.CreateShl( 726309124Sdim Builder.CreateZExt(NewVal, PMV.WordType), PMV.ShiftAmt); 727309124Sdim Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask); 728309124Sdim Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Shiftup); 729309124Sdim return FinalVal; 730309124Sdim } 731309124Sdim default: 732309124Sdim llvm_unreachable("Unknown atomic op"); 733309124Sdim } 734309124Sdim} 735309124Sdim 736309124Sdim/// Expand a sub-word atomicrmw operation into an appropriate 737309124Sdim/// word-sized operation. 738309124Sdim/// 739309124Sdim/// It will create an LL/SC or cmpxchg loop, as appropriate, the same 740309124Sdim/// way as a typical atomicrmw expansion. The only difference here is 741309124Sdim/// that the operation inside of the loop must operate only upon a 742309124Sdim/// part of the value. 743309124Sdimvoid AtomicExpand::expandPartwordAtomicRMW( 744309124Sdim AtomicRMWInst *AI, TargetLoweringBase::AtomicExpansionKind ExpansionKind) { 745309124Sdim assert(ExpansionKind == TargetLoweringBase::AtomicExpansionKind::CmpXChg); 746309124Sdim 747309124Sdim AtomicOrdering MemOpOrder = AI->getOrdering(); 748309124Sdim 749309124Sdim IRBuilder<> Builder(AI); 750309124Sdim 751309124Sdim PartwordMaskValues PMV = 752309124Sdim createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(), 753309124Sdim TLI->getMinCmpXchgSizeInBits() / 8); 754309124Sdim 755309124Sdim Value *ValOperand_Shifted = 756309124Sdim Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), PMV.WordType), 757309124Sdim PMV.ShiftAmt, "ValOperand_Shifted"); 758309124Sdim 759309124Sdim auto PerformPartwordOp = [&](IRBuilder<> &Builder, Value *Loaded) { 760309124Sdim return performMaskedAtomicOp(AI->getOperation(), Builder, Loaded, 761309124Sdim ValOperand_Shifted, AI->getValOperand(), PMV); 762309124Sdim }; 763309124Sdim 764309124Sdim // TODO: When we're ready to support LLSC conversions too, use 765309124Sdim // insertRMWLLSCLoop here for ExpansionKind==LLSC. 766309124Sdim Value *OldResult = 767309124Sdim insertRMWCmpXchgLoop(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder, 768309124Sdim PerformPartwordOp, createCmpXchgInstFun); 769309124Sdim Value *FinalOldResult = Builder.CreateTrunc( 770309124Sdim Builder.CreateLShr(OldResult, PMV.ShiftAmt), PMV.ValueType); 771309124Sdim AI->replaceAllUsesWith(FinalOldResult); 772309124Sdim AI->eraseFromParent(); 773309124Sdim} 774309124Sdim 775344779Sdim// Widen the bitwise atomicrmw (or/xor/and) to the minimum supported width. 776344779SdimAtomicRMWInst *AtomicExpand::widenPartwordAtomicRMW(AtomicRMWInst *AI) { 777344779Sdim IRBuilder<> Builder(AI); 778344779Sdim AtomicRMWInst::BinOp Op = AI->getOperation(); 779344779Sdim 780344779Sdim assert((Op == AtomicRMWInst::Or || Op == AtomicRMWInst::Xor || 781344779Sdim Op == AtomicRMWInst::And) && 782344779Sdim "Unable to widen operation"); 783344779Sdim 784344779Sdim PartwordMaskValues PMV = 785344779Sdim createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(), 786344779Sdim TLI->getMinCmpXchgSizeInBits() / 8); 787344779Sdim 788344779Sdim Value *ValOperand_Shifted = 789344779Sdim Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), PMV.WordType), 790344779Sdim PMV.ShiftAmt, "ValOperand_Shifted"); 791344779Sdim 792344779Sdim Value *NewOperand; 793344779Sdim 794344779Sdim if (Op == AtomicRMWInst::And) 795344779Sdim NewOperand = 796344779Sdim Builder.CreateOr(PMV.Inv_Mask, ValOperand_Shifted, "AndOperand"); 797344779Sdim else 798344779Sdim NewOperand = ValOperand_Shifted; 799344779Sdim 800344779Sdim AtomicRMWInst *NewAI = Builder.CreateAtomicRMW(Op, PMV.AlignedAddr, 801344779Sdim NewOperand, AI->getOrdering()); 802344779Sdim 803344779Sdim Value *FinalOldResult = Builder.CreateTrunc( 804344779Sdim Builder.CreateLShr(NewAI, PMV.ShiftAmt), PMV.ValueType); 805344779Sdim AI->replaceAllUsesWith(FinalOldResult); 806344779Sdim AI->eraseFromParent(); 807344779Sdim return NewAI; 808344779Sdim} 809344779Sdim 810309124Sdimvoid AtomicExpand::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) { 811309124Sdim // The basic idea here is that we're expanding a cmpxchg of a 812309124Sdim // smaller memory size up to a word-sized cmpxchg. To do this, we 813309124Sdim // need to add a retry-loop for strong cmpxchg, so that 814309124Sdim // modifications to other parts of the word don't cause a spurious 815309124Sdim // failure. 816309124Sdim 817309124Sdim // This generates code like the following: 818309124Sdim // [[Setup mask values PMV.*]] 819309124Sdim // %NewVal_Shifted = shl i32 %NewVal, %PMV.ShiftAmt 820309124Sdim // %Cmp_Shifted = shl i32 %Cmp, %PMV.ShiftAmt 821309124Sdim // %InitLoaded = load i32* %addr 822309124Sdim // %InitLoaded_MaskOut = and i32 %InitLoaded, %PMV.Inv_Mask 823309124Sdim // br partword.cmpxchg.loop 824309124Sdim // partword.cmpxchg.loop: 825309124Sdim // %Loaded_MaskOut = phi i32 [ %InitLoaded_MaskOut, %entry ], 826309124Sdim // [ %OldVal_MaskOut, %partword.cmpxchg.failure ] 827309124Sdim // %FullWord_NewVal = or i32 %Loaded_MaskOut, %NewVal_Shifted 828309124Sdim // %FullWord_Cmp = or i32 %Loaded_MaskOut, %Cmp_Shifted 829309124Sdim // %NewCI = cmpxchg i32* %PMV.AlignedAddr, i32 %FullWord_Cmp, 830309124Sdim // i32 %FullWord_NewVal success_ordering failure_ordering 831309124Sdim // %OldVal = extractvalue { i32, i1 } %NewCI, 0 832309124Sdim // %Success = extractvalue { i32, i1 } %NewCI, 1 833309124Sdim // br i1 %Success, label %partword.cmpxchg.end, 834309124Sdim // label %partword.cmpxchg.failure 835309124Sdim // partword.cmpxchg.failure: 836309124Sdim // %OldVal_MaskOut = and i32 %OldVal, %PMV.Inv_Mask 837309124Sdim // %ShouldContinue = icmp ne i32 %Loaded_MaskOut, %OldVal_MaskOut 838309124Sdim // br i1 %ShouldContinue, label %partword.cmpxchg.loop, 839309124Sdim // label %partword.cmpxchg.end 840309124Sdim // partword.cmpxchg.end: 841309124Sdim // %tmp1 = lshr i32 %OldVal, %PMV.ShiftAmt 842309124Sdim // %FinalOldVal = trunc i32 %tmp1 to i8 843309124Sdim // %tmp2 = insertvalue { i8, i1 } undef, i8 %FinalOldVal, 0 844309124Sdim // %Res = insertvalue { i8, i1 } %25, i1 %Success, 1 845309124Sdim 846309124Sdim Value *Addr = CI->getPointerOperand(); 847309124Sdim Value *Cmp = CI->getCompareOperand(); 848309124Sdim Value *NewVal = CI->getNewValOperand(); 849309124Sdim 850309124Sdim BasicBlock *BB = CI->getParent(); 851309124Sdim Function *F = BB->getParent(); 852309124Sdim IRBuilder<> Builder(CI); 853309124Sdim LLVMContext &Ctx = Builder.getContext(); 854309124Sdim 855309124Sdim const int WordSize = TLI->getMinCmpXchgSizeInBits() / 8; 856309124Sdim 857309124Sdim BasicBlock *EndBB = 858309124Sdim BB->splitBasicBlock(CI->getIterator(), "partword.cmpxchg.end"); 859309124Sdim auto FailureBB = 860309124Sdim BasicBlock::Create(Ctx, "partword.cmpxchg.failure", F, EndBB); 861309124Sdim auto LoopBB = BasicBlock::Create(Ctx, "partword.cmpxchg.loop", F, FailureBB); 862309124Sdim 863309124Sdim // The split call above "helpfully" added a branch at the end of BB 864309124Sdim // (to the wrong place). 865309124Sdim std::prev(BB->end())->eraseFromParent(); 866309124Sdim Builder.SetInsertPoint(BB); 867309124Sdim 868309124Sdim PartwordMaskValues PMV = createMaskInstrs( 869309124Sdim Builder, CI, CI->getCompareOperand()->getType(), Addr, WordSize); 870309124Sdim 871309124Sdim // Shift the incoming values over, into the right location in the word. 872309124Sdim Value *NewVal_Shifted = 873309124Sdim Builder.CreateShl(Builder.CreateZExt(NewVal, PMV.WordType), PMV.ShiftAmt); 874309124Sdim Value *Cmp_Shifted = 875309124Sdim Builder.CreateShl(Builder.CreateZExt(Cmp, PMV.WordType), PMV.ShiftAmt); 876309124Sdim 877309124Sdim // Load the entire current word, and mask into place the expected and new 878309124Sdim // values 879309124Sdim LoadInst *InitLoaded = Builder.CreateLoad(PMV.WordType, PMV.AlignedAddr); 880309124Sdim InitLoaded->setVolatile(CI->isVolatile()); 881309124Sdim Value *InitLoaded_MaskOut = Builder.CreateAnd(InitLoaded, PMV.Inv_Mask); 882309124Sdim Builder.CreateBr(LoopBB); 883309124Sdim 884309124Sdim // partword.cmpxchg.loop: 885309124Sdim Builder.SetInsertPoint(LoopBB); 886309124Sdim PHINode *Loaded_MaskOut = Builder.CreatePHI(PMV.WordType, 2); 887309124Sdim Loaded_MaskOut->addIncoming(InitLoaded_MaskOut, BB); 888309124Sdim 889309124Sdim // Mask/Or the expected and new values into place in the loaded word. 890309124Sdim Value *FullWord_NewVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Shifted); 891309124Sdim Value *FullWord_Cmp = Builder.CreateOr(Loaded_MaskOut, Cmp_Shifted); 892309124Sdim AtomicCmpXchgInst *NewCI = Builder.CreateAtomicCmpXchg( 893309124Sdim PMV.AlignedAddr, FullWord_Cmp, FullWord_NewVal, CI->getSuccessOrdering(), 894321369Sdim CI->getFailureOrdering(), CI->getSyncScopeID()); 895309124Sdim NewCI->setVolatile(CI->isVolatile()); 896309124Sdim // When we're building a strong cmpxchg, we need a loop, so you 897309124Sdim // might think we could use a weak cmpxchg inside. But, using strong 898309124Sdim // allows the below comparison for ShouldContinue, and we're 899309124Sdim // expecting the underlying cmpxchg to be a machine instruction, 900309124Sdim // which is strong anyways. 901309124Sdim NewCI->setWeak(CI->isWeak()); 902309124Sdim 903309124Sdim Value *OldVal = Builder.CreateExtractValue(NewCI, 0); 904309124Sdim Value *Success = Builder.CreateExtractValue(NewCI, 1); 905309124Sdim 906309124Sdim if (CI->isWeak()) 907309124Sdim Builder.CreateBr(EndBB); 908309124Sdim else 909309124Sdim Builder.CreateCondBr(Success, EndBB, FailureBB); 910309124Sdim 911309124Sdim // partword.cmpxchg.failure: 912309124Sdim Builder.SetInsertPoint(FailureBB); 913309124Sdim // Upon failure, verify that the masked-out part of the loaded value 914309124Sdim // has been modified. If it didn't, abort the cmpxchg, since the 915309124Sdim // masked-in part must've. 916309124Sdim Value *OldVal_MaskOut = Builder.CreateAnd(OldVal, PMV.Inv_Mask); 917309124Sdim Value *ShouldContinue = Builder.CreateICmpNE(Loaded_MaskOut, OldVal_MaskOut); 918309124Sdim Builder.CreateCondBr(ShouldContinue, LoopBB, EndBB); 919309124Sdim 920309124Sdim // Add the second value to the phi from above 921309124Sdim Loaded_MaskOut->addIncoming(OldVal_MaskOut, FailureBB); 922309124Sdim 923309124Sdim // partword.cmpxchg.end: 924309124Sdim Builder.SetInsertPoint(CI); 925309124Sdim 926309124Sdim Value *FinalOldVal = Builder.CreateTrunc( 927309124Sdim Builder.CreateLShr(OldVal, PMV.ShiftAmt), PMV.ValueType); 928309124Sdim Value *Res = UndefValue::get(CI->getType()); 929309124Sdim Res = Builder.CreateInsertValue(Res, FinalOldVal, 0); 930309124Sdim Res = Builder.CreateInsertValue(Res, Success, 1); 931309124Sdim 932309124Sdim CI->replaceAllUsesWith(Res); 933309124Sdim CI->eraseFromParent(); 934309124Sdim} 935309124Sdim 936309124Sdimvoid AtomicExpand::expandAtomicOpToLLSC( 937309124Sdim Instruction *I, Type *ResultType, Value *Addr, AtomicOrdering MemOpOrder, 938309124Sdim function_ref<Value *(IRBuilder<> &, Value *)> PerformOp) { 939309124Sdim IRBuilder<> Builder(I); 940309124Sdim Value *Loaded = 941309124Sdim insertRMWLLSCLoop(Builder, ResultType, Addr, MemOpOrder, PerformOp); 942309124Sdim 943309124Sdim I->replaceAllUsesWith(Loaded); 944309124Sdim I->eraseFromParent(); 945309124Sdim} 946309124Sdim 947344779Sdimvoid AtomicExpand::expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI) { 948344779Sdim IRBuilder<> Builder(AI); 949344779Sdim 950344779Sdim PartwordMaskValues PMV = 951344779Sdim createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(), 952344779Sdim TLI->getMinCmpXchgSizeInBits() / 8); 953344779Sdim 954344779Sdim // The value operand must be sign-extended for signed min/max so that the 955344779Sdim // target's signed comparison instructions can be used. Otherwise, just 956344779Sdim // zero-ext. 957344779Sdim Instruction::CastOps CastOp = Instruction::ZExt; 958344779Sdim AtomicRMWInst::BinOp RMWOp = AI->getOperation(); 959344779Sdim if (RMWOp == AtomicRMWInst::Max || RMWOp == AtomicRMWInst::Min) 960344779Sdim CastOp = Instruction::SExt; 961344779Sdim 962344779Sdim Value *ValOperand_Shifted = Builder.CreateShl( 963344779Sdim Builder.CreateCast(CastOp, AI->getValOperand(), PMV.WordType), 964344779Sdim PMV.ShiftAmt, "ValOperand_Shifted"); 965344779Sdim Value *OldResult = TLI->emitMaskedAtomicRMWIntrinsic( 966344779Sdim Builder, AI, PMV.AlignedAddr, ValOperand_Shifted, PMV.Mask, PMV.ShiftAmt, 967344779Sdim AI->getOrdering()); 968344779Sdim Value *FinalOldResult = Builder.CreateTrunc( 969344779Sdim Builder.CreateLShr(OldResult, PMV.ShiftAmt), PMV.ValueType); 970344779Sdim AI->replaceAllUsesWith(FinalOldResult); 971344779Sdim AI->eraseFromParent(); 972344779Sdim} 973344779Sdim 974344779Sdimvoid AtomicExpand::expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI) { 975344779Sdim IRBuilder<> Builder(CI); 976344779Sdim 977344779Sdim PartwordMaskValues PMV = createMaskInstrs( 978344779Sdim Builder, CI, CI->getCompareOperand()->getType(), CI->getPointerOperand(), 979344779Sdim TLI->getMinCmpXchgSizeInBits() / 8); 980344779Sdim 981344779Sdim Value *CmpVal_Shifted = Builder.CreateShl( 982344779Sdim Builder.CreateZExt(CI->getCompareOperand(), PMV.WordType), PMV.ShiftAmt, 983344779Sdim "CmpVal_Shifted"); 984344779Sdim Value *NewVal_Shifted = Builder.CreateShl( 985344779Sdim Builder.CreateZExt(CI->getNewValOperand(), PMV.WordType), PMV.ShiftAmt, 986344779Sdim "NewVal_Shifted"); 987344779Sdim Value *OldVal = TLI->emitMaskedAtomicCmpXchgIntrinsic( 988344779Sdim Builder, CI, PMV.AlignedAddr, CmpVal_Shifted, NewVal_Shifted, PMV.Mask, 989344779Sdim CI->getSuccessOrdering()); 990344779Sdim Value *FinalOldVal = Builder.CreateTrunc( 991344779Sdim Builder.CreateLShr(OldVal, PMV.ShiftAmt), PMV.ValueType); 992344779Sdim 993344779Sdim Value *Res = UndefValue::get(CI->getType()); 994344779Sdim Res = Builder.CreateInsertValue(Res, FinalOldVal, 0); 995344779Sdim Value *Success = Builder.CreateICmpEQ( 996344779Sdim CmpVal_Shifted, Builder.CreateAnd(OldVal, PMV.Mask), "Success"); 997344779Sdim Res = Builder.CreateInsertValue(Res, Success, 1); 998344779Sdim 999344779Sdim CI->replaceAllUsesWith(Res); 1000344779Sdim CI->eraseFromParent(); 1001344779Sdim} 1002344779Sdim 1003309124SdimValue *AtomicExpand::insertRMWLLSCLoop( 1004309124Sdim IRBuilder<> &Builder, Type *ResultTy, Value *Addr, 1005309124Sdim AtomicOrdering MemOpOrder, 1006309124Sdim function_ref<Value *(IRBuilder<> &, Value *)> PerformOp) { 1007309124Sdim LLVMContext &Ctx = Builder.getContext(); 1008309124Sdim BasicBlock *BB = Builder.GetInsertBlock(); 1009309124Sdim Function *F = BB->getParent(); 1010309124Sdim 1011277323Sdim // Given: atomicrmw some_op iN* %addr, iN %incr ordering 1012277323Sdim // 1013277323Sdim // The standard expansion we produce is: 1014277323Sdim // [...] 1015277323Sdim // atomicrmw.start: 1016277323Sdim // %loaded = @load.linked(%addr) 1017277323Sdim // %new = some_op iN %loaded, %incr 1018277323Sdim // %stored = @store_conditional(%new, %addr) 1019277323Sdim // %try_again = icmp i32 ne %stored, 0 1020277323Sdim // br i1 %try_again, label %loop, label %atomicrmw.end 1021277323Sdim // atomicrmw.end: 1022277323Sdim // [...] 1023309124Sdim BasicBlock *ExitBB = 1024309124Sdim BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end"); 1025277323Sdim BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB); 1026277323Sdim 1027277323Sdim // The split call above "helpfully" added a branch at the end of BB (to the 1028309124Sdim // wrong place). 1029277323Sdim std::prev(BB->end())->eraseFromParent(); 1030277323Sdim Builder.SetInsertPoint(BB); 1031277323Sdim Builder.CreateBr(LoopBB); 1032277323Sdim 1033277323Sdim // Start the main loop block now that we've taken care of the preliminaries. 1034277323Sdim Builder.SetInsertPoint(LoopBB); 1035277323Sdim Value *Loaded = TLI->emitLoadLinked(Builder, Addr, MemOpOrder); 1036277323Sdim 1037296417Sdim Value *NewVal = PerformOp(Builder, Loaded); 1038277323Sdim 1039277323Sdim Value *StoreSuccess = 1040277323Sdim TLI->emitStoreConditional(Builder, NewVal, Addr, MemOpOrder); 1041277323Sdim Value *TryAgain = Builder.CreateICmpNE( 1042277323Sdim StoreSuccess, ConstantInt::get(IntegerType::get(Ctx, 32), 0), "tryagain"); 1043277323Sdim Builder.CreateCondBr(TryAgain, LoopBB, ExitBB); 1044277323Sdim 1045277323Sdim Builder.SetInsertPoint(ExitBB, ExitBB->begin()); 1046309124Sdim return Loaded; 1047309124Sdim} 1048277323Sdim 1049309124Sdim/// Convert an atomic cmpxchg of a non-integral type to an integer cmpxchg of 1050309124Sdim/// the equivalent bitwidth. We used to not support pointer cmpxchg in the 1051309124Sdim/// IR. As a migration step, we convert back to what use to be the standard 1052309124Sdim/// way to represent a pointer cmpxchg so that we can update backends one by 1053341825Sdim/// one. 1054309124SdimAtomicCmpXchgInst *AtomicExpand::convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI) { 1055309124Sdim auto *M = CI->getModule(); 1056309124Sdim Type *NewTy = getCorrespondingIntegerType(CI->getCompareOperand()->getType(), 1057309124Sdim M->getDataLayout()); 1058277323Sdim 1059309124Sdim IRBuilder<> Builder(CI); 1060341825Sdim 1061309124Sdim Value *Addr = CI->getPointerOperand(); 1062309124Sdim Type *PT = PointerType::get(NewTy, 1063309124Sdim Addr->getType()->getPointerAddressSpace()); 1064309124Sdim Value *NewAddr = Builder.CreateBitCast(Addr, PT); 1065309124Sdim 1066309124Sdim Value *NewCmp = Builder.CreatePtrToInt(CI->getCompareOperand(), NewTy); 1067309124Sdim Value *NewNewVal = Builder.CreatePtrToInt(CI->getNewValOperand(), NewTy); 1068341825Sdim 1069341825Sdim 1070309124Sdim auto *NewCI = Builder.CreateAtomicCmpXchg(NewAddr, NewCmp, NewNewVal, 1071309124Sdim CI->getSuccessOrdering(), 1072309124Sdim CI->getFailureOrdering(), 1073321369Sdim CI->getSyncScopeID()); 1074309124Sdim NewCI->setVolatile(CI->isVolatile()); 1075309124Sdim NewCI->setWeak(CI->isWeak()); 1076341825Sdim LLVM_DEBUG(dbgs() << "Replaced " << *CI << " with " << *NewCI << "\n"); 1077309124Sdim 1078309124Sdim Value *OldVal = Builder.CreateExtractValue(NewCI, 0); 1079309124Sdim Value *Succ = Builder.CreateExtractValue(NewCI, 1); 1080309124Sdim 1081309124Sdim OldVal = Builder.CreateIntToPtr(OldVal, CI->getCompareOperand()->getType()); 1082309124Sdim 1083309124Sdim Value *Res = UndefValue::get(CI->getType()); 1084309124Sdim Res = Builder.CreateInsertValue(Res, OldVal, 0); 1085309124Sdim Res = Builder.CreateInsertValue(Res, Succ, 1); 1086309124Sdim 1087309124Sdim CI->replaceAllUsesWith(Res); 1088309124Sdim CI->eraseFromParent(); 1089309124Sdim return NewCI; 1090277323Sdim} 1091277323Sdim 1092277323Sdimbool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { 1093277323Sdim AtomicOrdering SuccessOrder = CI->getSuccessOrdering(); 1094277323Sdim AtomicOrdering FailureOrder = CI->getFailureOrdering(); 1095277323Sdim Value *Addr = CI->getPointerOperand(); 1096277323Sdim BasicBlock *BB = CI->getParent(); 1097277323Sdim Function *F = BB->getParent(); 1098277323Sdim LLVMContext &Ctx = F->getContext(); 1099309124Sdim // If shouldInsertFencesForAtomic() returns true, then the target does not 1100309124Sdim // want to deal with memory orders, and emitLeading/TrailingFence should take 1101309124Sdim // care of everything. Otherwise, emitLeading/TrailingFence are no-op and we 1102277323Sdim // should preserve the ordering. 1103309124Sdim bool ShouldInsertFencesForAtomic = TLI->shouldInsertFencesForAtomic(CI); 1104277323Sdim AtomicOrdering MemOpOrder = 1105309124Sdim ShouldInsertFencesForAtomic ? AtomicOrdering::Monotonic : SuccessOrder; 1106277323Sdim 1107309124Sdim // In implementations which use a barrier to achieve release semantics, we can 1108309124Sdim // delay emitting this barrier until we know a store is actually going to be 1109309124Sdim // attempted. The cost of this delay is that we need 2 copies of the block 1110309124Sdim // emitting the load-linked, affecting code size. 1111309124Sdim // 1112309124Sdim // Ideally, this logic would be unconditional except for the minsize check 1113309124Sdim // since in other cases the extra blocks naturally collapse down to the 1114309124Sdim // minimal loop. Unfortunately, this puts too much stress on later 1115309124Sdim // optimisations so we avoid emitting the extra logic in those cases too. 1116309124Sdim bool HasReleasedLoadBB = !CI->isWeak() && ShouldInsertFencesForAtomic && 1117309124Sdim SuccessOrder != AtomicOrdering::Monotonic && 1118309124Sdim SuccessOrder != AtomicOrdering::Acquire && 1119353358Sdim !F->hasMinSize(); 1120309124Sdim 1121309124Sdim // There's no overhead for sinking the release barrier in a weak cmpxchg, so 1122309124Sdim // do it even on minsize. 1123353358Sdim bool UseUnconditionalReleaseBarrier = F->hasMinSize() && !CI->isWeak(); 1124309124Sdim 1125277323Sdim // Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord 1126277323Sdim // 1127277323Sdim // The full expansion we produce is: 1128277323Sdim // [...] 1129277323Sdim // cmpxchg.start: 1130309124Sdim // %unreleasedload = @load.linked(%addr) 1131309124Sdim // %should_store = icmp eq %unreleasedload, %desired 1132309124Sdim // br i1 %should_store, label %cmpxchg.fencedstore, 1133296417Sdim // label %cmpxchg.nostore 1134309124Sdim // cmpxchg.releasingstore: 1135309124Sdim // fence? 1136309124Sdim // br label cmpxchg.trystore 1137277323Sdim // cmpxchg.trystore: 1138309124Sdim // %loaded.trystore = phi [%unreleasedload, %releasingstore], 1139309124Sdim // [%releasedload, %cmpxchg.releasedload] 1140277323Sdim // %stored = @store_conditional(%new, %addr) 1141277323Sdim // %success = icmp eq i32 %stored, 0 1142309124Sdim // br i1 %success, label %cmpxchg.success, 1143309124Sdim // label %cmpxchg.releasedload/%cmpxchg.failure 1144309124Sdim // cmpxchg.releasedload: 1145309124Sdim // %releasedload = @load.linked(%addr) 1146309124Sdim // %should_store = icmp eq %releasedload, %desired 1147309124Sdim // br i1 %should_store, label %cmpxchg.trystore, 1148309124Sdim // label %cmpxchg.failure 1149277323Sdim // cmpxchg.success: 1150277323Sdim // fence? 1151277323Sdim // br label %cmpxchg.end 1152296417Sdim // cmpxchg.nostore: 1153309124Sdim // %loaded.nostore = phi [%unreleasedload, %cmpxchg.start], 1154309124Sdim // [%releasedload, 1155309124Sdim // %cmpxchg.releasedload/%cmpxchg.trystore] 1156296417Sdim // @load_linked_fail_balance()? 1157296417Sdim // br label %cmpxchg.failure 1158277323Sdim // cmpxchg.failure: 1159277323Sdim // fence? 1160277323Sdim // br label %cmpxchg.end 1161277323Sdim // cmpxchg.end: 1162309124Sdim // %loaded = phi [%loaded.nostore, %cmpxchg.failure], 1163309124Sdim // [%loaded.trystore, %cmpxchg.trystore] 1164277323Sdim // %success = phi i1 [true, %cmpxchg.success], [false, %cmpxchg.failure] 1165277323Sdim // %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0 1166277323Sdim // %res = insertvalue { iN, i1 } %restmp, i1 %success, 1 1167277323Sdim // [...] 1168296417Sdim BasicBlock *ExitBB = BB->splitBasicBlock(CI->getIterator(), "cmpxchg.end"); 1169277323Sdim auto FailureBB = BasicBlock::Create(Ctx, "cmpxchg.failure", F, ExitBB); 1170296417Sdim auto NoStoreBB = BasicBlock::Create(Ctx, "cmpxchg.nostore", F, FailureBB); 1171296417Sdim auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, NoStoreBB); 1172309124Sdim auto ReleasedLoadBB = 1173309124Sdim BasicBlock::Create(Ctx, "cmpxchg.releasedload", F, SuccessBB); 1174309124Sdim auto TryStoreBB = 1175309124Sdim BasicBlock::Create(Ctx, "cmpxchg.trystore", F, ReleasedLoadBB); 1176309124Sdim auto ReleasingStoreBB = 1177309124Sdim BasicBlock::Create(Ctx, "cmpxchg.fencedstore", F, TryStoreBB); 1178309124Sdim auto StartBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, ReleasingStoreBB); 1179277323Sdim 1180277323Sdim // This grabs the DebugLoc from CI 1181277323Sdim IRBuilder<> Builder(CI); 1182277323Sdim 1183277323Sdim // The split call above "helpfully" added a branch at the end of BB (to the 1184277323Sdim // wrong place), but we might want a fence too. It's easiest to just remove 1185277323Sdim // the branch entirely. 1186277323Sdim std::prev(BB->end())->eraseFromParent(); 1187277323Sdim Builder.SetInsertPoint(BB); 1188309124Sdim if (ShouldInsertFencesForAtomic && UseUnconditionalReleaseBarrier) 1189321369Sdim TLI->emitLeadingFence(Builder, CI, SuccessOrder); 1190309124Sdim Builder.CreateBr(StartBB); 1191277323Sdim 1192277323Sdim // Start the main loop block now that we've taken care of the preliminaries. 1193309124Sdim Builder.SetInsertPoint(StartBB); 1194309124Sdim Value *UnreleasedLoad = TLI->emitLoadLinked(Builder, Addr, MemOpOrder); 1195309124Sdim Value *ShouldStore = Builder.CreateICmpEQ( 1196309124Sdim UnreleasedLoad, CI->getCompareOperand(), "should_store"); 1197277323Sdim 1198288943Sdim // If the cmpxchg doesn't actually need any ordering when it fails, we can 1199277323Sdim // jump straight past that fence instruction (if it exists). 1200309124Sdim Builder.CreateCondBr(ShouldStore, ReleasingStoreBB, NoStoreBB); 1201277323Sdim 1202309124Sdim Builder.SetInsertPoint(ReleasingStoreBB); 1203309124Sdim if (ShouldInsertFencesForAtomic && !UseUnconditionalReleaseBarrier) 1204321369Sdim TLI->emitLeadingFence(Builder, CI, SuccessOrder); 1205309124Sdim Builder.CreateBr(TryStoreBB); 1206309124Sdim 1207277323Sdim Builder.SetInsertPoint(TryStoreBB); 1208277323Sdim Value *StoreSuccess = TLI->emitStoreConditional( 1209277323Sdim Builder, CI->getNewValOperand(), Addr, MemOpOrder); 1210277323Sdim StoreSuccess = Builder.CreateICmpEQ( 1211277323Sdim StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success"); 1212309124Sdim BasicBlock *RetryBB = HasReleasedLoadBB ? ReleasedLoadBB : StartBB; 1213277323Sdim Builder.CreateCondBr(StoreSuccess, SuccessBB, 1214309124Sdim CI->isWeak() ? FailureBB : RetryBB); 1215277323Sdim 1216309124Sdim Builder.SetInsertPoint(ReleasedLoadBB); 1217309124Sdim Value *SecondLoad; 1218309124Sdim if (HasReleasedLoadBB) { 1219309124Sdim SecondLoad = TLI->emitLoadLinked(Builder, Addr, MemOpOrder); 1220309124Sdim ShouldStore = Builder.CreateICmpEQ(SecondLoad, CI->getCompareOperand(), 1221309124Sdim "should_store"); 1222309124Sdim 1223309124Sdim // If the cmpxchg doesn't actually need any ordering when it fails, we can 1224309124Sdim // jump straight past that fence instruction (if it exists). 1225309124Sdim Builder.CreateCondBr(ShouldStore, TryStoreBB, NoStoreBB); 1226309124Sdim } else 1227309124Sdim Builder.CreateUnreachable(); 1228309124Sdim 1229309124Sdim // Make sure later instructions don't get reordered with a fence if 1230309124Sdim // necessary. 1231277323Sdim Builder.SetInsertPoint(SuccessBB); 1232309124Sdim if (ShouldInsertFencesForAtomic) 1233321369Sdim TLI->emitTrailingFence(Builder, CI, SuccessOrder); 1234277323Sdim Builder.CreateBr(ExitBB); 1235277323Sdim 1236296417Sdim Builder.SetInsertPoint(NoStoreBB); 1237296417Sdim // In the failing case, where we don't execute the store-conditional, the 1238296417Sdim // target might want to balance out the load-linked with a dedicated 1239296417Sdim // instruction (e.g., on ARM, clearing the exclusive monitor). 1240296417Sdim TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder); 1241296417Sdim Builder.CreateBr(FailureBB); 1242296417Sdim 1243277323Sdim Builder.SetInsertPoint(FailureBB); 1244309124Sdim if (ShouldInsertFencesForAtomic) 1245321369Sdim TLI->emitTrailingFence(Builder, CI, FailureOrder); 1246277323Sdim Builder.CreateBr(ExitBB); 1247277323Sdim 1248277323Sdim // Finally, we have control-flow based knowledge of whether the cmpxchg 1249277323Sdim // succeeded or not. We expose this to later passes by converting any 1250309124Sdim // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate 1251309124Sdim // PHI. 1252277323Sdim Builder.SetInsertPoint(ExitBB, ExitBB->begin()); 1253277323Sdim PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2); 1254277323Sdim Success->addIncoming(ConstantInt::getTrue(Ctx), SuccessBB); 1255277323Sdim Success->addIncoming(ConstantInt::getFalse(Ctx), FailureBB); 1256277323Sdim 1257309124Sdim // Setup the builder so we can create any PHIs we need. 1258309124Sdim Value *Loaded; 1259309124Sdim if (!HasReleasedLoadBB) 1260309124Sdim Loaded = UnreleasedLoad; 1261309124Sdim else { 1262309124Sdim Builder.SetInsertPoint(TryStoreBB, TryStoreBB->begin()); 1263309124Sdim PHINode *TryStoreLoaded = Builder.CreatePHI(UnreleasedLoad->getType(), 2); 1264309124Sdim TryStoreLoaded->addIncoming(UnreleasedLoad, ReleasingStoreBB); 1265309124Sdim TryStoreLoaded->addIncoming(SecondLoad, ReleasedLoadBB); 1266309124Sdim 1267309124Sdim Builder.SetInsertPoint(NoStoreBB, NoStoreBB->begin()); 1268309124Sdim PHINode *NoStoreLoaded = Builder.CreatePHI(UnreleasedLoad->getType(), 2); 1269309124Sdim NoStoreLoaded->addIncoming(UnreleasedLoad, StartBB); 1270309124Sdim NoStoreLoaded->addIncoming(SecondLoad, ReleasedLoadBB); 1271309124Sdim 1272309124Sdim Builder.SetInsertPoint(ExitBB, ++ExitBB->begin()); 1273309124Sdim PHINode *ExitLoaded = Builder.CreatePHI(UnreleasedLoad->getType(), 2); 1274309124Sdim ExitLoaded->addIncoming(TryStoreLoaded, SuccessBB); 1275309124Sdim ExitLoaded->addIncoming(NoStoreLoaded, FailureBB); 1276309124Sdim 1277309124Sdim Loaded = ExitLoaded; 1278309124Sdim } 1279309124Sdim 1280277323Sdim // Look for any users of the cmpxchg that are just comparing the loaded value 1281277323Sdim // against the desired one, and replace them with the CFG-derived version. 1282277323Sdim SmallVector<ExtractValueInst *, 2> PrunedInsts; 1283277323Sdim for (auto User : CI->users()) { 1284277323Sdim ExtractValueInst *EV = dyn_cast<ExtractValueInst>(User); 1285277323Sdim if (!EV) 1286277323Sdim continue; 1287277323Sdim 1288277323Sdim assert(EV->getNumIndices() == 1 && EV->getIndices()[0] <= 1 && 1289277323Sdim "weird extraction from { iN, i1 }"); 1290277323Sdim 1291277323Sdim if (EV->getIndices()[0] == 0) 1292277323Sdim EV->replaceAllUsesWith(Loaded); 1293277323Sdim else 1294277323Sdim EV->replaceAllUsesWith(Success); 1295277323Sdim 1296277323Sdim PrunedInsts.push_back(EV); 1297277323Sdim } 1298277323Sdim 1299277323Sdim // We can remove the instructions now we're no longer iterating through them. 1300277323Sdim for (auto EV : PrunedInsts) 1301277323Sdim EV->eraseFromParent(); 1302277323Sdim 1303277323Sdim if (!CI->use_empty()) { 1304277323Sdim // Some use of the full struct return that we don't understand has happened, 1305277323Sdim // so we've got to reconstruct it properly. 1306277323Sdim Value *Res; 1307277323Sdim Res = Builder.CreateInsertValue(UndefValue::get(CI->getType()), Loaded, 0); 1308277323Sdim Res = Builder.CreateInsertValue(Res, Success, 1); 1309277323Sdim 1310277323Sdim CI->replaceAllUsesWith(Res); 1311277323Sdim } 1312277323Sdim 1313277323Sdim CI->eraseFromParent(); 1314277323Sdim return true; 1315277323Sdim} 1316277323Sdim 1317277323Sdimbool AtomicExpand::isIdempotentRMW(AtomicRMWInst* RMWI) { 1318277323Sdim auto C = dyn_cast<ConstantInt>(RMWI->getValOperand()); 1319277323Sdim if(!C) 1320277323Sdim return false; 1321277323Sdim 1322277323Sdim AtomicRMWInst::BinOp Op = RMWI->getOperation(); 1323277323Sdim switch(Op) { 1324277323Sdim case AtomicRMWInst::Add: 1325277323Sdim case AtomicRMWInst::Sub: 1326277323Sdim case AtomicRMWInst::Or: 1327277323Sdim case AtomicRMWInst::Xor: 1328277323Sdim return C->isZero(); 1329277323Sdim case AtomicRMWInst::And: 1330277323Sdim return C->isMinusOne(); 1331277323Sdim // FIXME: we could also treat Min/Max/UMin/UMax by the INT_MIN/INT_MAX/... 1332277323Sdim default: 1333277323Sdim return false; 1334277323Sdim } 1335277323Sdim} 1336277323Sdim 1337277323Sdimbool AtomicExpand::simplifyIdempotentRMW(AtomicRMWInst* RMWI) { 1338277323Sdim if (auto ResultingLoad = TLI->lowerIdempotentRMWIntoFencedLoad(RMWI)) { 1339296417Sdim tryExpandAtomicLoad(ResultingLoad); 1340277323Sdim return true; 1341277323Sdim } 1342277323Sdim return false; 1343277323Sdim} 1344296417Sdim 1345309124SdimValue *AtomicExpand::insertRMWCmpXchgLoop( 1346309124Sdim IRBuilder<> &Builder, Type *ResultTy, Value *Addr, 1347309124Sdim AtomicOrdering MemOpOrder, 1348309124Sdim function_ref<Value *(IRBuilder<> &, Value *)> PerformOp, 1349309124Sdim CreateCmpXchgInstFun CreateCmpXchg) { 1350309124Sdim LLVMContext &Ctx = Builder.getContext(); 1351309124Sdim BasicBlock *BB = Builder.GetInsertBlock(); 1352296417Sdim Function *F = BB->getParent(); 1353296417Sdim 1354296417Sdim // Given: atomicrmw some_op iN* %addr, iN %incr ordering 1355296417Sdim // 1356296417Sdim // The standard expansion we produce is: 1357296417Sdim // [...] 1358296417Sdim // %init_loaded = load atomic iN* %addr 1359296417Sdim // br label %loop 1360296417Sdim // loop: 1361296417Sdim // %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ] 1362296417Sdim // %new = some_op iN %loaded, %incr 1363296417Sdim // %pair = cmpxchg iN* %addr, iN %loaded, iN %new 1364296417Sdim // %new_loaded = extractvalue { iN, i1 } %pair, 0 1365296417Sdim // %success = extractvalue { iN, i1 } %pair, 1 1366296417Sdim // br i1 %success, label %atomicrmw.end, label %loop 1367296417Sdim // atomicrmw.end: 1368296417Sdim // [...] 1369309124Sdim BasicBlock *ExitBB = 1370309124Sdim BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end"); 1371296417Sdim BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB); 1372296417Sdim 1373296417Sdim // The split call above "helpfully" added a branch at the end of BB (to the 1374296417Sdim // wrong place), but we want a load. It's easiest to just remove 1375296417Sdim // the branch entirely. 1376296417Sdim std::prev(BB->end())->eraseFromParent(); 1377296417Sdim Builder.SetInsertPoint(BB); 1378309124Sdim LoadInst *InitLoaded = Builder.CreateLoad(ResultTy, Addr); 1379296417Sdim // Atomics require at least natural alignment. 1380360784Sdim InitLoaded->setAlignment(MaybeAlign(ResultTy->getPrimitiveSizeInBits() / 8)); 1381296417Sdim Builder.CreateBr(LoopBB); 1382296417Sdim 1383296417Sdim // Start the main loop block now that we've taken care of the preliminaries. 1384296417Sdim Builder.SetInsertPoint(LoopBB); 1385309124Sdim PHINode *Loaded = Builder.CreatePHI(ResultTy, 2, "loaded"); 1386296417Sdim Loaded->addIncoming(InitLoaded, BB); 1387296417Sdim 1388309124Sdim Value *NewVal = PerformOp(Builder, Loaded); 1389296417Sdim 1390296417Sdim Value *NewLoaded = nullptr; 1391296417Sdim Value *Success = nullptr; 1392296417Sdim 1393309124Sdim CreateCmpXchg(Builder, Addr, Loaded, NewVal, 1394309124Sdim MemOpOrder == AtomicOrdering::Unordered 1395309124Sdim ? AtomicOrdering::Monotonic 1396309124Sdim : MemOpOrder, 1397296417Sdim Success, NewLoaded); 1398296417Sdim assert(Success && NewLoaded); 1399296417Sdim 1400296417Sdim Loaded->addIncoming(NewLoaded, LoopBB); 1401296417Sdim 1402296417Sdim Builder.CreateCondBr(Success, ExitBB, LoopBB); 1403296417Sdim 1404296417Sdim Builder.SetInsertPoint(ExitBB, ExitBB->begin()); 1405309124Sdim return NewLoaded; 1406309124Sdim} 1407296417Sdim 1408344779Sdimbool AtomicExpand::tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) { 1409344779Sdim unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8; 1410344779Sdim unsigned ValueSize = getAtomicOpSize(CI); 1411344779Sdim 1412344779Sdim switch (TLI->shouldExpandAtomicCmpXchgInIR(CI)) { 1413344779Sdim default: 1414344779Sdim llvm_unreachable("Unhandled case in tryExpandAtomicCmpXchg"); 1415344779Sdim case TargetLoweringBase::AtomicExpansionKind::None: 1416344779Sdim if (ValueSize < MinCASSize) 1417344779Sdim expandPartwordCmpXchg(CI); 1418344779Sdim return false; 1419344779Sdim case TargetLoweringBase::AtomicExpansionKind::LLSC: { 1420344779Sdim assert(ValueSize >= MinCASSize && 1421344779Sdim "MinCmpXchgSizeInBits not yet supported for LL/SC expansions."); 1422344779Sdim return expandAtomicCmpXchg(CI); 1423344779Sdim } 1424344779Sdim case TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic: 1425344779Sdim expandAtomicCmpXchgToMaskedIntrinsic(CI); 1426344779Sdim return true; 1427344779Sdim } 1428344779Sdim} 1429344779Sdim 1430309124Sdim// Note: This function is exposed externally by AtomicExpandUtils.h 1431309124Sdimbool llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, 1432309124Sdim CreateCmpXchgInstFun CreateCmpXchg) { 1433309124Sdim IRBuilder<> Builder(AI); 1434309124Sdim Value *Loaded = AtomicExpand::insertRMWCmpXchgLoop( 1435309124Sdim Builder, AI->getType(), AI->getPointerOperand(), AI->getOrdering(), 1436309124Sdim [&](IRBuilder<> &Builder, Value *Loaded) { 1437309124Sdim return performAtomicOp(AI->getOperation(), Builder, Loaded, 1438309124Sdim AI->getValOperand()); 1439309124Sdim }, 1440309124Sdim CreateCmpXchg); 1441309124Sdim 1442309124Sdim AI->replaceAllUsesWith(Loaded); 1443296417Sdim AI->eraseFromParent(); 1444309124Sdim return true; 1445309124Sdim} 1446296417Sdim 1447309124Sdim// In order to use one of the sized library calls such as 1448309124Sdim// __atomic_fetch_add_4, the alignment must be sufficient, the size 1449309124Sdim// must be one of the potentially-specialized sizes, and the value 1450309124Sdim// type must actually exist in C on the target (otherwise, the 1451309124Sdim// function wouldn't actually be defined.) 1452309124Sdimstatic bool canUseSizedAtomicCall(unsigned Size, unsigned Align, 1453309124Sdim const DataLayout &DL) { 1454309124Sdim // TODO: "LargestSize" is an approximation for "largest type that 1455309124Sdim // you can express in C". It seems to be the case that int128 is 1456309124Sdim // supported on all 64-bit platforms, otherwise only up to 64-bit 1457309124Sdim // integers are supported. If we get this wrong, then we'll try to 1458309124Sdim // call a sized libcall that doesn't actually exist. There should 1459309124Sdim // really be some more reliable way in LLVM of determining integer 1460309124Sdim // sizes which are valid in the target's C ABI... 1461309124Sdim unsigned LargestSize = DL.getLargestLegalIntTypeSizeInBits() >= 64 ? 16 : 8; 1462309124Sdim return Align >= Size && 1463309124Sdim (Size == 1 || Size == 2 || Size == 4 || Size == 8 || Size == 16) && 1464309124Sdim Size <= LargestSize; 1465309124Sdim} 1466309124Sdim 1467309124Sdimvoid AtomicExpand::expandAtomicLoadToLibcall(LoadInst *I) { 1468309124Sdim static const RTLIB::Libcall Libcalls[6] = { 1469309124Sdim RTLIB::ATOMIC_LOAD, RTLIB::ATOMIC_LOAD_1, RTLIB::ATOMIC_LOAD_2, 1470309124Sdim RTLIB::ATOMIC_LOAD_4, RTLIB::ATOMIC_LOAD_8, RTLIB::ATOMIC_LOAD_16}; 1471309124Sdim unsigned Size = getAtomicOpSize(I); 1472309124Sdim unsigned Align = getAtomicOpAlign(I); 1473309124Sdim 1474309124Sdim bool expanded = expandAtomicOpToLibcall( 1475309124Sdim I, Size, Align, I->getPointerOperand(), nullptr, nullptr, 1476309124Sdim I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls); 1477309124Sdim (void)expanded; 1478309124Sdim assert(expanded && "expandAtomicOpToLibcall shouldn't fail tor Load"); 1479309124Sdim} 1480309124Sdim 1481309124Sdimvoid AtomicExpand::expandAtomicStoreToLibcall(StoreInst *I) { 1482309124Sdim static const RTLIB::Libcall Libcalls[6] = { 1483309124Sdim RTLIB::ATOMIC_STORE, RTLIB::ATOMIC_STORE_1, RTLIB::ATOMIC_STORE_2, 1484309124Sdim RTLIB::ATOMIC_STORE_4, RTLIB::ATOMIC_STORE_8, RTLIB::ATOMIC_STORE_16}; 1485309124Sdim unsigned Size = getAtomicOpSize(I); 1486309124Sdim unsigned Align = getAtomicOpAlign(I); 1487309124Sdim 1488309124Sdim bool expanded = expandAtomicOpToLibcall( 1489309124Sdim I, Size, Align, I->getPointerOperand(), I->getValueOperand(), nullptr, 1490309124Sdim I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls); 1491309124Sdim (void)expanded; 1492309124Sdim assert(expanded && "expandAtomicOpToLibcall shouldn't fail tor Store"); 1493309124Sdim} 1494309124Sdim 1495309124Sdimvoid AtomicExpand::expandAtomicCASToLibcall(AtomicCmpXchgInst *I) { 1496309124Sdim static const RTLIB::Libcall Libcalls[6] = { 1497309124Sdim RTLIB::ATOMIC_COMPARE_EXCHANGE, RTLIB::ATOMIC_COMPARE_EXCHANGE_1, 1498309124Sdim RTLIB::ATOMIC_COMPARE_EXCHANGE_2, RTLIB::ATOMIC_COMPARE_EXCHANGE_4, 1499309124Sdim RTLIB::ATOMIC_COMPARE_EXCHANGE_8, RTLIB::ATOMIC_COMPARE_EXCHANGE_16}; 1500309124Sdim unsigned Size = getAtomicOpSize(I); 1501309124Sdim unsigned Align = getAtomicOpAlign(I); 1502309124Sdim 1503309124Sdim bool expanded = expandAtomicOpToLibcall( 1504309124Sdim I, Size, Align, I->getPointerOperand(), I->getNewValOperand(), 1505309124Sdim I->getCompareOperand(), I->getSuccessOrdering(), I->getFailureOrdering(), 1506309124Sdim Libcalls); 1507309124Sdim (void)expanded; 1508309124Sdim assert(expanded && "expandAtomicOpToLibcall shouldn't fail tor CAS"); 1509309124Sdim} 1510309124Sdim 1511309124Sdimstatic ArrayRef<RTLIB::Libcall> GetRMWLibcall(AtomicRMWInst::BinOp Op) { 1512309124Sdim static const RTLIB::Libcall LibcallsXchg[6] = { 1513309124Sdim RTLIB::ATOMIC_EXCHANGE, RTLIB::ATOMIC_EXCHANGE_1, 1514309124Sdim RTLIB::ATOMIC_EXCHANGE_2, RTLIB::ATOMIC_EXCHANGE_4, 1515309124Sdim RTLIB::ATOMIC_EXCHANGE_8, RTLIB::ATOMIC_EXCHANGE_16}; 1516309124Sdim static const RTLIB::Libcall LibcallsAdd[6] = { 1517309124Sdim RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_ADD_1, 1518309124Sdim RTLIB::ATOMIC_FETCH_ADD_2, RTLIB::ATOMIC_FETCH_ADD_4, 1519309124Sdim RTLIB::ATOMIC_FETCH_ADD_8, RTLIB::ATOMIC_FETCH_ADD_16}; 1520309124Sdim static const RTLIB::Libcall LibcallsSub[6] = { 1521309124Sdim RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_SUB_1, 1522309124Sdim RTLIB::ATOMIC_FETCH_SUB_2, RTLIB::ATOMIC_FETCH_SUB_4, 1523309124Sdim RTLIB::ATOMIC_FETCH_SUB_8, RTLIB::ATOMIC_FETCH_SUB_16}; 1524309124Sdim static const RTLIB::Libcall LibcallsAnd[6] = { 1525309124Sdim RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_AND_1, 1526309124Sdim RTLIB::ATOMIC_FETCH_AND_2, RTLIB::ATOMIC_FETCH_AND_4, 1527309124Sdim RTLIB::ATOMIC_FETCH_AND_8, RTLIB::ATOMIC_FETCH_AND_16}; 1528309124Sdim static const RTLIB::Libcall LibcallsOr[6] = { 1529309124Sdim RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_OR_1, 1530309124Sdim RTLIB::ATOMIC_FETCH_OR_2, RTLIB::ATOMIC_FETCH_OR_4, 1531309124Sdim RTLIB::ATOMIC_FETCH_OR_8, RTLIB::ATOMIC_FETCH_OR_16}; 1532309124Sdim static const RTLIB::Libcall LibcallsXor[6] = { 1533309124Sdim RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_XOR_1, 1534309124Sdim RTLIB::ATOMIC_FETCH_XOR_2, RTLIB::ATOMIC_FETCH_XOR_4, 1535309124Sdim RTLIB::ATOMIC_FETCH_XOR_8, RTLIB::ATOMIC_FETCH_XOR_16}; 1536309124Sdim static const RTLIB::Libcall LibcallsNand[6] = { 1537309124Sdim RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_NAND_1, 1538309124Sdim RTLIB::ATOMIC_FETCH_NAND_2, RTLIB::ATOMIC_FETCH_NAND_4, 1539309124Sdim RTLIB::ATOMIC_FETCH_NAND_8, RTLIB::ATOMIC_FETCH_NAND_16}; 1540309124Sdim 1541309124Sdim switch (Op) { 1542309124Sdim case AtomicRMWInst::BAD_BINOP: 1543309124Sdim llvm_unreachable("Should not have BAD_BINOP."); 1544309124Sdim case AtomicRMWInst::Xchg: 1545309124Sdim return makeArrayRef(LibcallsXchg); 1546309124Sdim case AtomicRMWInst::Add: 1547309124Sdim return makeArrayRef(LibcallsAdd); 1548309124Sdim case AtomicRMWInst::Sub: 1549309124Sdim return makeArrayRef(LibcallsSub); 1550309124Sdim case AtomicRMWInst::And: 1551309124Sdim return makeArrayRef(LibcallsAnd); 1552309124Sdim case AtomicRMWInst::Or: 1553309124Sdim return makeArrayRef(LibcallsOr); 1554309124Sdim case AtomicRMWInst::Xor: 1555309124Sdim return makeArrayRef(LibcallsXor); 1556309124Sdim case AtomicRMWInst::Nand: 1557309124Sdim return makeArrayRef(LibcallsNand); 1558309124Sdim case AtomicRMWInst::Max: 1559309124Sdim case AtomicRMWInst::Min: 1560309124Sdim case AtomicRMWInst::UMax: 1561309124Sdim case AtomicRMWInst::UMin: 1562353358Sdim case AtomicRMWInst::FAdd: 1563353358Sdim case AtomicRMWInst::FSub: 1564309124Sdim // No atomic libcalls are available for max/min/umax/umin. 1565309124Sdim return {}; 1566309124Sdim } 1567309124Sdim llvm_unreachable("Unexpected AtomicRMW operation."); 1568309124Sdim} 1569309124Sdim 1570309124Sdimvoid AtomicExpand::expandAtomicRMWToLibcall(AtomicRMWInst *I) { 1571309124Sdim ArrayRef<RTLIB::Libcall> Libcalls = GetRMWLibcall(I->getOperation()); 1572309124Sdim 1573309124Sdim unsigned Size = getAtomicOpSize(I); 1574309124Sdim unsigned Align = getAtomicOpAlign(I); 1575309124Sdim 1576309124Sdim bool Success = false; 1577309124Sdim if (!Libcalls.empty()) 1578309124Sdim Success = expandAtomicOpToLibcall( 1579309124Sdim I, Size, Align, I->getPointerOperand(), I->getValOperand(), nullptr, 1580309124Sdim I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls); 1581309124Sdim 1582309124Sdim // The expansion failed: either there were no libcalls at all for 1583309124Sdim // the operation (min/max), or there were only size-specialized 1584309124Sdim // libcalls (add/sub/etc) and we needed a generic. So, expand to a 1585309124Sdim // CAS libcall, via a CAS loop, instead. 1586309124Sdim if (!Success) { 1587309124Sdim expandAtomicRMWToCmpXchg(I, [this](IRBuilder<> &Builder, Value *Addr, 1588309124Sdim Value *Loaded, Value *NewVal, 1589309124Sdim AtomicOrdering MemOpOrder, 1590309124Sdim Value *&Success, Value *&NewLoaded) { 1591309124Sdim // Create the CAS instruction normally... 1592309124Sdim AtomicCmpXchgInst *Pair = Builder.CreateAtomicCmpXchg( 1593309124Sdim Addr, Loaded, NewVal, MemOpOrder, 1594309124Sdim AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder)); 1595309124Sdim Success = Builder.CreateExtractValue(Pair, 1, "success"); 1596309124Sdim NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded"); 1597309124Sdim 1598309124Sdim // ...and then expand the CAS into a libcall. 1599309124Sdim expandAtomicCASToLibcall(Pair); 1600309124Sdim }); 1601309124Sdim } 1602309124Sdim} 1603309124Sdim 1604309124Sdim// A helper routine for the above expandAtomic*ToLibcall functions. 1605309124Sdim// 1606309124Sdim// 'Libcalls' contains an array of enum values for the particular 1607309124Sdim// ATOMIC libcalls to be emitted. All of the other arguments besides 1608309124Sdim// 'I' are extracted from the Instruction subclass by the 1609309124Sdim// caller. Depending on the particular call, some will be null. 1610309124Sdimbool AtomicExpand::expandAtomicOpToLibcall( 1611309124Sdim Instruction *I, unsigned Size, unsigned Align, Value *PointerOperand, 1612309124Sdim Value *ValueOperand, Value *CASExpected, AtomicOrdering Ordering, 1613309124Sdim AtomicOrdering Ordering2, ArrayRef<RTLIB::Libcall> Libcalls) { 1614309124Sdim assert(Libcalls.size() == 6); 1615309124Sdim 1616309124Sdim LLVMContext &Ctx = I->getContext(); 1617309124Sdim Module *M = I->getModule(); 1618309124Sdim const DataLayout &DL = M->getDataLayout(); 1619309124Sdim IRBuilder<> Builder(I); 1620309124Sdim IRBuilder<> AllocaBuilder(&I->getFunction()->getEntryBlock().front()); 1621309124Sdim 1622309124Sdim bool UseSizedLibcall = canUseSizedAtomicCall(Size, Align, DL); 1623309124Sdim Type *SizedIntTy = Type::getIntNTy(Ctx, Size * 8); 1624309124Sdim 1625309124Sdim unsigned AllocaAlignment = DL.getPrefTypeAlignment(SizedIntTy); 1626309124Sdim 1627309124Sdim // TODO: the "order" argument type is "int", not int32. So 1628309124Sdim // getInt32Ty may be wrong if the arch uses e.g. 16-bit ints. 1629309124Sdim ConstantInt *SizeVal64 = ConstantInt::get(Type::getInt64Ty(Ctx), Size); 1630309124Sdim assert(Ordering != AtomicOrdering::NotAtomic && "expect atomic MO"); 1631309124Sdim Constant *OrderingVal = 1632309124Sdim ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering)); 1633309124Sdim Constant *Ordering2Val = nullptr; 1634309124Sdim if (CASExpected) { 1635309124Sdim assert(Ordering2 != AtomicOrdering::NotAtomic && "expect atomic MO"); 1636309124Sdim Ordering2Val = 1637309124Sdim ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering2)); 1638309124Sdim } 1639309124Sdim bool HasResult = I->getType() != Type::getVoidTy(Ctx); 1640309124Sdim 1641309124Sdim RTLIB::Libcall RTLibType; 1642309124Sdim if (UseSizedLibcall) { 1643309124Sdim switch (Size) { 1644309124Sdim case 1: RTLibType = Libcalls[1]; break; 1645309124Sdim case 2: RTLibType = Libcalls[2]; break; 1646309124Sdim case 4: RTLibType = Libcalls[3]; break; 1647309124Sdim case 8: RTLibType = Libcalls[4]; break; 1648309124Sdim case 16: RTLibType = Libcalls[5]; break; 1649309124Sdim } 1650309124Sdim } else if (Libcalls[0] != RTLIB::UNKNOWN_LIBCALL) { 1651309124Sdim RTLibType = Libcalls[0]; 1652309124Sdim } else { 1653309124Sdim // Can't use sized function, and there's no generic for this 1654309124Sdim // operation, so give up. 1655309124Sdim return false; 1656309124Sdim } 1657309124Sdim 1658309124Sdim // Build up the function call. There's two kinds. First, the sized 1659309124Sdim // variants. These calls are going to be one of the following (with 1660309124Sdim // N=1,2,4,8,16): 1661309124Sdim // iN __atomic_load_N(iN *ptr, int ordering) 1662309124Sdim // void __atomic_store_N(iN *ptr, iN val, int ordering) 1663309124Sdim // iN __atomic_{exchange|fetch_*}_N(iN *ptr, iN val, int ordering) 1664309124Sdim // bool __atomic_compare_exchange_N(iN *ptr, iN *expected, iN desired, 1665309124Sdim // int success_order, int failure_order) 1666309124Sdim // 1667309124Sdim // Note that these functions can be used for non-integer atomic 1668309124Sdim // operations, the values just need to be bitcast to integers on the 1669309124Sdim // way in and out. 1670309124Sdim // 1671309124Sdim // And, then, the generic variants. They look like the following: 1672309124Sdim // void __atomic_load(size_t size, void *ptr, void *ret, int ordering) 1673309124Sdim // void __atomic_store(size_t size, void *ptr, void *val, int ordering) 1674309124Sdim // void __atomic_exchange(size_t size, void *ptr, void *val, void *ret, 1675309124Sdim // int ordering) 1676309124Sdim // bool __atomic_compare_exchange(size_t size, void *ptr, void *expected, 1677309124Sdim // void *desired, int success_order, 1678309124Sdim // int failure_order) 1679309124Sdim // 1680309124Sdim // The different signatures are built up depending on the 1681309124Sdim // 'UseSizedLibcall', 'CASExpected', 'ValueOperand', and 'HasResult' 1682309124Sdim // variables. 1683309124Sdim 1684309124Sdim AllocaInst *AllocaCASExpected = nullptr; 1685309124Sdim Value *AllocaCASExpected_i8 = nullptr; 1686309124Sdim AllocaInst *AllocaValue = nullptr; 1687309124Sdim Value *AllocaValue_i8 = nullptr; 1688309124Sdim AllocaInst *AllocaResult = nullptr; 1689309124Sdim Value *AllocaResult_i8 = nullptr; 1690309124Sdim 1691309124Sdim Type *ResultTy; 1692309124Sdim SmallVector<Value *, 6> Args; 1693321369Sdim AttributeList Attr; 1694309124Sdim 1695309124Sdim // 'size' argument. 1696309124Sdim if (!UseSizedLibcall) { 1697309124Sdim // Note, getIntPtrType is assumed equivalent to size_t. 1698309124Sdim Args.push_back(ConstantInt::get(DL.getIntPtrType(Ctx), Size)); 1699309124Sdim } 1700309124Sdim 1701309124Sdim // 'ptr' argument. 1702353358Sdim // note: This assumes all address spaces share a common libfunc 1703353358Sdim // implementation and that addresses are convertable. For systems without 1704353358Sdim // that property, we'd need to extend this mechanism to support AS-specific 1705353358Sdim // families of atomic intrinsics. 1706353358Sdim auto PtrTypeAS = PointerOperand->getType()->getPointerAddressSpace(); 1707353358Sdim Value *PtrVal = Builder.CreateBitCast(PointerOperand, 1708353358Sdim Type::getInt8PtrTy(Ctx, PtrTypeAS)); 1709353358Sdim PtrVal = Builder.CreateAddrSpaceCast(PtrVal, Type::getInt8PtrTy(Ctx)); 1710309124Sdim Args.push_back(PtrVal); 1711309124Sdim 1712309124Sdim // 'expected' argument, if present. 1713309124Sdim if (CASExpected) { 1714309124Sdim AllocaCASExpected = AllocaBuilder.CreateAlloca(CASExpected->getType()); 1715360784Sdim AllocaCASExpected->setAlignment(MaybeAlign(AllocaAlignment)); 1716353358Sdim unsigned AllocaAS = AllocaCASExpected->getType()->getPointerAddressSpace(); 1717353358Sdim 1718309124Sdim AllocaCASExpected_i8 = 1719353358Sdim Builder.CreateBitCast(AllocaCASExpected, 1720353358Sdim Type::getInt8PtrTy(Ctx, AllocaAS)); 1721309124Sdim Builder.CreateLifetimeStart(AllocaCASExpected_i8, SizeVal64); 1722309124Sdim Builder.CreateAlignedStore(CASExpected, AllocaCASExpected, AllocaAlignment); 1723309124Sdim Args.push_back(AllocaCASExpected_i8); 1724309124Sdim } 1725309124Sdim 1726309124Sdim // 'val' argument ('desired' for cas), if present. 1727309124Sdim if (ValueOperand) { 1728309124Sdim if (UseSizedLibcall) { 1729309124Sdim Value *IntValue = 1730309124Sdim Builder.CreateBitOrPointerCast(ValueOperand, SizedIntTy); 1731309124Sdim Args.push_back(IntValue); 1732309124Sdim } else { 1733309124Sdim AllocaValue = AllocaBuilder.CreateAlloca(ValueOperand->getType()); 1734360784Sdim AllocaValue->setAlignment(MaybeAlign(AllocaAlignment)); 1735309124Sdim AllocaValue_i8 = 1736309124Sdim Builder.CreateBitCast(AllocaValue, Type::getInt8PtrTy(Ctx)); 1737309124Sdim Builder.CreateLifetimeStart(AllocaValue_i8, SizeVal64); 1738309124Sdim Builder.CreateAlignedStore(ValueOperand, AllocaValue, AllocaAlignment); 1739309124Sdim Args.push_back(AllocaValue_i8); 1740309124Sdim } 1741309124Sdim } 1742309124Sdim 1743309124Sdim // 'ret' argument. 1744309124Sdim if (!CASExpected && HasResult && !UseSizedLibcall) { 1745309124Sdim AllocaResult = AllocaBuilder.CreateAlloca(I->getType()); 1746360784Sdim AllocaResult->setAlignment(MaybeAlign(AllocaAlignment)); 1747353358Sdim unsigned AllocaAS = AllocaResult->getType()->getPointerAddressSpace(); 1748309124Sdim AllocaResult_i8 = 1749353358Sdim Builder.CreateBitCast(AllocaResult, Type::getInt8PtrTy(Ctx, AllocaAS)); 1750309124Sdim Builder.CreateLifetimeStart(AllocaResult_i8, SizeVal64); 1751309124Sdim Args.push_back(AllocaResult_i8); 1752309124Sdim } 1753309124Sdim 1754309124Sdim // 'ordering' ('success_order' for cas) argument. 1755309124Sdim Args.push_back(OrderingVal); 1756309124Sdim 1757309124Sdim // 'failure_order' argument, if present. 1758309124Sdim if (Ordering2Val) 1759309124Sdim Args.push_back(Ordering2Val); 1760309124Sdim 1761309124Sdim // Now, the return type. 1762309124Sdim if (CASExpected) { 1763309124Sdim ResultTy = Type::getInt1Ty(Ctx); 1764321369Sdim Attr = Attr.addAttribute(Ctx, AttributeList::ReturnIndex, Attribute::ZExt); 1765309124Sdim } else if (HasResult && UseSizedLibcall) 1766309124Sdim ResultTy = SizedIntTy; 1767309124Sdim else 1768309124Sdim ResultTy = Type::getVoidTy(Ctx); 1769309124Sdim 1770309124Sdim // Done with setting up arguments and return types, create the call: 1771309124Sdim SmallVector<Type *, 6> ArgTys; 1772309124Sdim for (Value *Arg : Args) 1773309124Sdim ArgTys.push_back(Arg->getType()); 1774309124Sdim FunctionType *FnType = FunctionType::get(ResultTy, ArgTys, false); 1775353358Sdim FunctionCallee LibcallFn = 1776309124Sdim M->getOrInsertFunction(TLI->getLibcallName(RTLibType), FnType, Attr); 1777309124Sdim CallInst *Call = Builder.CreateCall(LibcallFn, Args); 1778309124Sdim Call->setAttributes(Attr); 1779309124Sdim Value *Result = Call; 1780309124Sdim 1781309124Sdim // And then, extract the results... 1782309124Sdim if (ValueOperand && !UseSizedLibcall) 1783309124Sdim Builder.CreateLifetimeEnd(AllocaValue_i8, SizeVal64); 1784309124Sdim 1785309124Sdim if (CASExpected) { 1786309124Sdim // The final result from the CAS is {load of 'expected' alloca, bool result 1787309124Sdim // from call} 1788309124Sdim Type *FinalResultTy = I->getType(); 1789309124Sdim Value *V = UndefValue::get(FinalResultTy); 1790353358Sdim Value *ExpectedOut = Builder.CreateAlignedLoad( 1791353358Sdim CASExpected->getType(), AllocaCASExpected, AllocaAlignment); 1792309124Sdim Builder.CreateLifetimeEnd(AllocaCASExpected_i8, SizeVal64); 1793309124Sdim V = Builder.CreateInsertValue(V, ExpectedOut, 0); 1794309124Sdim V = Builder.CreateInsertValue(V, Result, 1); 1795309124Sdim I->replaceAllUsesWith(V); 1796309124Sdim } else if (HasResult) { 1797309124Sdim Value *V; 1798309124Sdim if (UseSizedLibcall) 1799309124Sdim V = Builder.CreateBitOrPointerCast(Result, I->getType()); 1800309124Sdim else { 1801353358Sdim V = Builder.CreateAlignedLoad(I->getType(), AllocaResult, 1802353358Sdim AllocaAlignment); 1803309124Sdim Builder.CreateLifetimeEnd(AllocaResult_i8, SizeVal64); 1804309124Sdim } 1805309124Sdim I->replaceAllUsesWith(V); 1806309124Sdim } 1807309124Sdim I->eraseFromParent(); 1808296417Sdim return true; 1809296417Sdim} 1810