1327952Sdim//===- AtomicExpandPass.cpp - Expand atomic instructions ------------------===//
2277323Sdim//
3353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4353358Sdim// See https://llvm.org/LICENSE.txt for license information.
5353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6277323Sdim//
7277323Sdim//===----------------------------------------------------------------------===//
8277323Sdim//
9277323Sdim// This file contains a pass (at IR level) to replace atomic instructions with
10309124Sdim// __atomic_* library calls, or target specific instruction which implement the
11309124Sdim// same semantics in a way which better fits the target backend.  This can
12309124Sdim// include the use of (intrinsic-based) load-linked/store-conditional loops,
13309124Sdim// AtomicCmpXchg, or type coercions.
14277323Sdim//
15277323Sdim//===----------------------------------------------------------------------===//
16277323Sdim
17327952Sdim#include "llvm/ADT/ArrayRef.h"
18327952Sdim#include "llvm/ADT/STLExtras.h"
19327952Sdim#include "llvm/ADT/SmallVector.h"
20296417Sdim#include "llvm/CodeGen/AtomicExpandUtils.h"
21327952Sdim#include "llvm/CodeGen/RuntimeLibcalls.h"
22327952Sdim#include "llvm/CodeGen/TargetLowering.h"
23321369Sdim#include "llvm/CodeGen/TargetPassConfig.h"
24327952Sdim#include "llvm/CodeGen/TargetSubtargetInfo.h"
25327952Sdim#include "llvm/CodeGen/ValueTypes.h"
26327952Sdim#include "llvm/IR/Attributes.h"
27327952Sdim#include "llvm/IR/BasicBlock.h"
28327952Sdim#include "llvm/IR/Constant.h"
29327952Sdim#include "llvm/IR/Constants.h"
30327952Sdim#include "llvm/IR/DataLayout.h"
31327952Sdim#include "llvm/IR/DerivedTypes.h"
32277323Sdim#include "llvm/IR/Function.h"
33277323Sdim#include "llvm/IR/IRBuilder.h"
34277323Sdim#include "llvm/IR/InstIterator.h"
35327952Sdim#include "llvm/IR/Instruction.h"
36277323Sdim#include "llvm/IR/Instructions.h"
37277323Sdim#include "llvm/IR/Module.h"
38327952Sdim#include "llvm/IR/Type.h"
39327952Sdim#include "llvm/IR/User.h"
40327952Sdim#include "llvm/IR/Value.h"
41360784Sdim#include "llvm/InitializePasses.h"
42327952Sdim#include "llvm/Pass.h"
43327952Sdim#include "llvm/Support/AtomicOrdering.h"
44327952Sdim#include "llvm/Support/Casting.h"
45277323Sdim#include "llvm/Support/Debug.h"
46327952Sdim#include "llvm/Support/ErrorHandling.h"
47296417Sdim#include "llvm/Support/raw_ostream.h"
48277323Sdim#include "llvm/Target/TargetMachine.h"
49327952Sdim#include <cassert>
50327952Sdim#include <cstdint>
51327952Sdim#include <iterator>
52277323Sdim
53277323Sdimusing namespace llvm;
54277323Sdim
55277323Sdim#define DEBUG_TYPE "atomic-expand"
56277323Sdim
57277323Sdimnamespace {
58327952Sdim
59277323Sdim  class AtomicExpand: public FunctionPass {
60327952Sdim    const TargetLowering *TLI = nullptr;
61327952Sdim
62277323Sdim  public:
63277323Sdim    static char ID; // Pass identification, replacement for typeid
64327952Sdim
65327952Sdim    AtomicExpand() : FunctionPass(ID) {
66277323Sdim      initializeAtomicExpandPass(*PassRegistry::getPassRegistry());
67277323Sdim    }
68277323Sdim
69277323Sdim    bool runOnFunction(Function &F) override;
70277323Sdim
71277323Sdim  private:
72321369Sdim    bool bracketInstWithFences(Instruction *I, AtomicOrdering Order);
73296417Sdim    IntegerType *getCorrespondingIntegerType(Type *T, const DataLayout &DL);
74296417Sdim    LoadInst *convertAtomicLoadToIntegerType(LoadInst *LI);
75296417Sdim    bool tryExpandAtomicLoad(LoadInst *LI);
76277323Sdim    bool expandAtomicLoadToLL(LoadInst *LI);
77277323Sdim    bool expandAtomicLoadToCmpXchg(LoadInst *LI);
78296417Sdim    StoreInst *convertAtomicStoreToIntegerType(StoreInst *SI);
79277323Sdim    bool expandAtomicStore(StoreInst *SI);
80288943Sdim    bool tryExpandAtomicRMW(AtomicRMWInst *AI);
81309124Sdim    Value *
82309124Sdim    insertRMWLLSCLoop(IRBuilder<> &Builder, Type *ResultTy, Value *Addr,
83309124Sdim                      AtomicOrdering MemOpOrder,
84309124Sdim                      function_ref<Value *(IRBuilder<> &, Value *)> PerformOp);
85309124Sdim    void expandAtomicOpToLLSC(
86309124Sdim        Instruction *I, Type *ResultTy, Value *Addr, AtomicOrdering MemOpOrder,
87309124Sdim        function_ref<Value *(IRBuilder<> &, Value *)> PerformOp);
88309124Sdim    void expandPartwordAtomicRMW(
89309124Sdim        AtomicRMWInst *I,
90309124Sdim        TargetLoweringBase::AtomicExpansionKind ExpansionKind);
91344779Sdim    AtomicRMWInst *widenPartwordAtomicRMW(AtomicRMWInst *AI);
92309124Sdim    void expandPartwordCmpXchg(AtomicCmpXchgInst *I);
93344779Sdim    void expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI);
94344779Sdim    void expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI);
95309124Sdim
96309124Sdim    AtomicCmpXchgInst *convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI);
97309124Sdim    static Value *insertRMWCmpXchgLoop(
98309124Sdim        IRBuilder<> &Builder, Type *ResultType, Value *Addr,
99309124Sdim        AtomicOrdering MemOpOrder,
100309124Sdim        function_ref<Value *(IRBuilder<> &, Value *)> PerformOp,
101309124Sdim        CreateCmpXchgInstFun CreateCmpXchg);
102344779Sdim    bool tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI);
103309124Sdim
104277323Sdim    bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI);
105341825Sdim    bool isIdempotentRMW(AtomicRMWInst *RMWI);
106341825Sdim    bool simplifyIdempotentRMW(AtomicRMWInst *RMWI);
107309124Sdim
108309124Sdim    bool expandAtomicOpToLibcall(Instruction *I, unsigned Size, unsigned Align,
109309124Sdim                                 Value *PointerOperand, Value *ValueOperand,
110309124Sdim                                 Value *CASExpected, AtomicOrdering Ordering,
111309124Sdim                                 AtomicOrdering Ordering2,
112309124Sdim                                 ArrayRef<RTLIB::Libcall> Libcalls);
113309124Sdim    void expandAtomicLoadToLibcall(LoadInst *LI);
114309124Sdim    void expandAtomicStoreToLibcall(StoreInst *LI);
115309124Sdim    void expandAtomicRMWToLibcall(AtomicRMWInst *I);
116309124Sdim    void expandAtomicCASToLibcall(AtomicCmpXchgInst *I);
117309124Sdim
118309124Sdim    friend bool
119309124Sdim    llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI,
120309124Sdim                                   CreateCmpXchgInstFun CreateCmpXchg);
121277323Sdim  };
122277323Sdim
123327952Sdim} // end anonymous namespace
124327952Sdim
125277323Sdimchar AtomicExpand::ID = 0;
126327952Sdim
127277323Sdimchar &llvm::AtomicExpandID = AtomicExpand::ID;
128327952Sdim
129321369SdimINITIALIZE_PASS(AtomicExpand, DEBUG_TYPE, "Expand Atomic instructions",
130321369Sdim                false, false)
131277323Sdim
132321369SdimFunctionPass *llvm::createAtomicExpandPass() { return new AtomicExpand(); }
133277323Sdim
134309124Sdim// Helper functions to retrieve the size of atomic instructions.
135327952Sdimstatic unsigned getAtomicOpSize(LoadInst *LI) {
136309124Sdim  const DataLayout &DL = LI->getModule()->getDataLayout();
137309124Sdim  return DL.getTypeStoreSize(LI->getType());
138309124Sdim}
139309124Sdim
140327952Sdimstatic unsigned getAtomicOpSize(StoreInst *SI) {
141309124Sdim  const DataLayout &DL = SI->getModule()->getDataLayout();
142309124Sdim  return DL.getTypeStoreSize(SI->getValueOperand()->getType());
143309124Sdim}
144309124Sdim
145327952Sdimstatic unsigned getAtomicOpSize(AtomicRMWInst *RMWI) {
146309124Sdim  const DataLayout &DL = RMWI->getModule()->getDataLayout();
147309124Sdim  return DL.getTypeStoreSize(RMWI->getValOperand()->getType());
148309124Sdim}
149309124Sdim
150327952Sdimstatic unsigned getAtomicOpSize(AtomicCmpXchgInst *CASI) {
151309124Sdim  const DataLayout &DL = CASI->getModule()->getDataLayout();
152309124Sdim  return DL.getTypeStoreSize(CASI->getCompareOperand()->getType());
153309124Sdim}
154309124Sdim
155309124Sdim// Helper functions to retrieve the alignment of atomic instructions.
156327952Sdimstatic unsigned getAtomicOpAlign(LoadInst *LI) {
157309124Sdim  unsigned Align = LI->getAlignment();
158309124Sdim  // In the future, if this IR restriction is relaxed, we should
159309124Sdim  // return DataLayout::getABITypeAlignment when there's no align
160309124Sdim  // value.
161309124Sdim  assert(Align != 0 && "An atomic LoadInst always has an explicit alignment");
162309124Sdim  return Align;
163309124Sdim}
164309124Sdim
165327952Sdimstatic unsigned getAtomicOpAlign(StoreInst *SI) {
166309124Sdim  unsigned Align = SI->getAlignment();
167309124Sdim  // In the future, if this IR restriction is relaxed, we should
168309124Sdim  // return DataLayout::getABITypeAlignment when there's no align
169309124Sdim  // value.
170309124Sdim  assert(Align != 0 && "An atomic StoreInst always has an explicit alignment");
171309124Sdim  return Align;
172309124Sdim}
173309124Sdim
174327952Sdimstatic unsigned getAtomicOpAlign(AtomicRMWInst *RMWI) {
175309124Sdim  // TODO(PR27168): This instruction has no alignment attribute, but unlike the
176309124Sdim  // default alignment for load/store, the default here is to assume
177309124Sdim  // it has NATURAL alignment, not DataLayout-specified alignment.
178309124Sdim  const DataLayout &DL = RMWI->getModule()->getDataLayout();
179309124Sdim  return DL.getTypeStoreSize(RMWI->getValOperand()->getType());
180309124Sdim}
181309124Sdim
182327952Sdimstatic unsigned getAtomicOpAlign(AtomicCmpXchgInst *CASI) {
183309124Sdim  // TODO(PR27168): same comment as above.
184309124Sdim  const DataLayout &DL = CASI->getModule()->getDataLayout();
185309124Sdim  return DL.getTypeStoreSize(CASI->getCompareOperand()->getType());
186309124Sdim}
187309124Sdim
188309124Sdim// Determine if a particular atomic operation has a supported size,
189309124Sdim// and is of appropriate alignment, to be passed through for target
190309124Sdim// lowering. (Versus turning into a __atomic libcall)
191309124Sdimtemplate <typename Inst>
192327952Sdimstatic bool atomicSizeSupported(const TargetLowering *TLI, Inst *I) {
193309124Sdim  unsigned Size = getAtomicOpSize(I);
194309124Sdim  unsigned Align = getAtomicOpAlign(I);
195309124Sdim  return Align >= Size && Size <= TLI->getMaxAtomicSizeInBitsSupported() / 8;
196309124Sdim}
197309124Sdim
198277323Sdimbool AtomicExpand::runOnFunction(Function &F) {
199321369Sdim  auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
200321369Sdim  if (!TPC)
201277323Sdim    return false;
202277323Sdim
203321369Sdim  auto &TM = TPC->getTM<TargetMachine>();
204321369Sdim  if (!TM.getSubtargetImpl(F)->enableAtomicExpand())
205321369Sdim    return false;
206321369Sdim  TLI = TM.getSubtargetImpl(F)->getTargetLowering();
207321369Sdim
208277323Sdim  SmallVector<Instruction *, 1> AtomicInsts;
209277323Sdim
210277323Sdim  // Changing control-flow while iterating through it is a bad idea, so gather a
211277323Sdim  // list of all atomic instructions before we start.
212309124Sdim  for (inst_iterator II = inst_begin(F), E = inst_end(F); II != E; ++II) {
213309124Sdim    Instruction *I = &*II;
214309124Sdim    if (I->isAtomic() && !isa<FenceInst>(I))
215309124Sdim      AtomicInsts.push_back(I);
216277323Sdim  }
217277323Sdim
218277323Sdim  bool MadeChange = false;
219277323Sdim  for (auto I : AtomicInsts) {
220277323Sdim    auto LI = dyn_cast<LoadInst>(I);
221277323Sdim    auto SI = dyn_cast<StoreInst>(I);
222277323Sdim    auto RMWI = dyn_cast<AtomicRMWInst>(I);
223277323Sdim    auto CASI = dyn_cast<AtomicCmpXchgInst>(I);
224309124Sdim    assert((LI || SI || RMWI || CASI) && "Unknown atomic instruction");
225277323Sdim
226309124Sdim    // If the Size/Alignment is not supported, replace with a libcall.
227309124Sdim    if (LI) {
228309124Sdim      if (!atomicSizeSupported(TLI, LI)) {
229309124Sdim        expandAtomicLoadToLibcall(LI);
230309124Sdim        MadeChange = true;
231309124Sdim        continue;
232309124Sdim      }
233309124Sdim    } else if (SI) {
234309124Sdim      if (!atomicSizeSupported(TLI, SI)) {
235309124Sdim        expandAtomicStoreToLibcall(SI);
236309124Sdim        MadeChange = true;
237309124Sdim        continue;
238309124Sdim      }
239309124Sdim    } else if (RMWI) {
240309124Sdim      if (!atomicSizeSupported(TLI, RMWI)) {
241309124Sdim        expandAtomicRMWToLibcall(RMWI);
242309124Sdim        MadeChange = true;
243309124Sdim        continue;
244309124Sdim      }
245309124Sdim    } else if (CASI) {
246309124Sdim      if (!atomicSizeSupported(TLI, CASI)) {
247309124Sdim        expandAtomicCASToLibcall(CASI);
248309124Sdim        MadeChange = true;
249309124Sdim        continue;
250309124Sdim      }
251309124Sdim    }
252309124Sdim
253309124Sdim    if (TLI->shouldInsertFencesForAtomic(I)) {
254309124Sdim      auto FenceOrdering = AtomicOrdering::Monotonic;
255309124Sdim      if (LI && isAcquireOrStronger(LI->getOrdering())) {
256277323Sdim        FenceOrdering = LI->getOrdering();
257309124Sdim        LI->setOrdering(AtomicOrdering::Monotonic);
258309124Sdim      } else if (SI && isReleaseOrStronger(SI->getOrdering())) {
259277323Sdim        FenceOrdering = SI->getOrdering();
260309124Sdim        SI->setOrdering(AtomicOrdering::Monotonic);
261309124Sdim      } else if (RMWI && (isReleaseOrStronger(RMWI->getOrdering()) ||
262309124Sdim                          isAcquireOrStronger(RMWI->getOrdering()))) {
263277323Sdim        FenceOrdering = RMWI->getOrdering();
264309124Sdim        RMWI->setOrdering(AtomicOrdering::Monotonic);
265344779Sdim      } else if (CASI &&
266344779Sdim                 TLI->shouldExpandAtomicCmpXchgInIR(CASI) ==
267344779Sdim                     TargetLoweringBase::AtomicExpansionKind::None &&
268309124Sdim                 (isReleaseOrStronger(CASI->getSuccessOrdering()) ||
269309124Sdim                  isAcquireOrStronger(CASI->getSuccessOrdering()))) {
270277323Sdim        // If a compare and swap is lowered to LL/SC, we can do smarter fence
271277323Sdim        // insertion, with a stronger one on the success path than on the
272277323Sdim        // failure path. As a result, fence insertion is directly done by
273277323Sdim        // expandAtomicCmpXchg in that case.
274277323Sdim        FenceOrdering = CASI->getSuccessOrdering();
275309124Sdim        CASI->setSuccessOrdering(AtomicOrdering::Monotonic);
276309124Sdim        CASI->setFailureOrdering(AtomicOrdering::Monotonic);
277277323Sdim      }
278277323Sdim
279309124Sdim      if (FenceOrdering != AtomicOrdering::Monotonic) {
280321369Sdim        MadeChange |= bracketInstWithFences(I, FenceOrdering);
281277323Sdim      }
282277323Sdim    }
283277323Sdim
284296417Sdim    if (LI) {
285296417Sdim      if (LI->getType()->isFloatingPointTy()) {
286296417Sdim        // TODO: add a TLI hook to control this so that each target can
287296417Sdim        // convert to lowering the original type one at a time.
288296417Sdim        LI = convertAtomicLoadToIntegerType(LI);
289296417Sdim        assert(LI->getType()->isIntegerTy() && "invariant broken");
290296417Sdim        MadeChange = true;
291296417Sdim      }
292309124Sdim
293296417Sdim      MadeChange |= tryExpandAtomicLoad(LI);
294296417Sdim    } else if (SI) {
295296417Sdim      if (SI->getValueOperand()->getType()->isFloatingPointTy()) {
296296417Sdim        // TODO: add a TLI hook to control this so that each target can
297296417Sdim        // convert to lowering the original type one at a time.
298296417Sdim        SI = convertAtomicStoreToIntegerType(SI);
299296417Sdim        assert(SI->getValueOperand()->getType()->isIntegerTy() &&
300296417Sdim               "invariant broken");
301296417Sdim        MadeChange = true;
302296417Sdim      }
303296417Sdim
304296417Sdim      if (TLI->shouldExpandAtomicStoreInIR(SI))
305296417Sdim        MadeChange |= expandAtomicStore(SI);
306277323Sdim    } else if (RMWI) {
307277323Sdim      // There are two different ways of expanding RMW instructions:
308277323Sdim      // - into a load if it is idempotent
309277323Sdim      // - into a Cmpxchg/LL-SC loop otherwise
310277323Sdim      // we try them in that order.
311288943Sdim
312288943Sdim      if (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) {
313288943Sdim        MadeChange = true;
314288943Sdim      } else {
315344779Sdim        unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
316344779Sdim        unsigned ValueSize = getAtomicOpSize(RMWI);
317344779Sdim        AtomicRMWInst::BinOp Op = RMWI->getOperation();
318344779Sdim        if (ValueSize < MinCASSize &&
319344779Sdim            (Op == AtomicRMWInst::Or || Op == AtomicRMWInst::Xor ||
320344779Sdim             Op == AtomicRMWInst::And)) {
321344779Sdim          RMWI = widenPartwordAtomicRMW(RMWI);
322344779Sdim          MadeChange = true;
323344779Sdim        }
324344779Sdim
325288943Sdim        MadeChange |= tryExpandAtomicRMW(RMWI);
326288943Sdim      }
327309124Sdim    } else if (CASI) {
328309124Sdim      // TODO: when we're ready to make the change at the IR level, we can
329309124Sdim      // extend convertCmpXchgToInteger for floating point too.
330309124Sdim      assert(!CASI->getCompareOperand()->getType()->isFloatingPointTy() &&
331309124Sdim             "unimplemented - floating point not legal at IR level");
332309124Sdim      if (CASI->getCompareOperand()->getType()->isPointerTy() ) {
333309124Sdim        // TODO: add a TLI hook to control this so that each target can
334309124Sdim        // convert to lowering the original type one at a time.
335309124Sdim        CASI = convertCmpXchgToIntegerType(CASI);
336309124Sdim        assert(CASI->getCompareOperand()->getType()->isIntegerTy() &&
337309124Sdim               "invariant broken");
338309124Sdim        MadeChange = true;
339309124Sdim      }
340309124Sdim
341344779Sdim      MadeChange |= tryExpandAtomicCmpXchg(CASI);
342277323Sdim    }
343277323Sdim  }
344277323Sdim  return MadeChange;
345277323Sdim}
346277323Sdim
347321369Sdimbool AtomicExpand::bracketInstWithFences(Instruction *I, AtomicOrdering Order) {
348277323Sdim  IRBuilder<> Builder(I);
349277323Sdim
350321369Sdim  auto LeadingFence = TLI->emitLeadingFence(Builder, I, Order);
351277323Sdim
352321369Sdim  auto TrailingFence = TLI->emitTrailingFence(Builder, I, Order);
353277323Sdim  // We have a guard here because not every atomic operation generates a
354277323Sdim  // trailing fence.
355327952Sdim  if (TrailingFence)
356327952Sdim    TrailingFence->moveAfter(I);
357277323Sdim
358277323Sdim  return (LeadingFence || TrailingFence);
359277323Sdim}
360277323Sdim
361296417Sdim/// Get the iX type with the same bitwidth as T.
362296417SdimIntegerType *AtomicExpand::getCorrespondingIntegerType(Type *T,
363296417Sdim                                                       const DataLayout &DL) {
364353358Sdim  EVT VT = TLI->getMemValueType(DL, T);
365296417Sdim  unsigned BitWidth = VT.getStoreSizeInBits();
366296417Sdim  assert(BitWidth == VT.getSizeInBits() && "must be a power of two");
367296417Sdim  return IntegerType::get(T->getContext(), BitWidth);
368296417Sdim}
369296417Sdim
370296417Sdim/// Convert an atomic load of a non-integral type to an integer load of the
371309124Sdim/// equivalent bitwidth.  See the function comment on
372341825Sdim/// convertAtomicStoreToIntegerType for background.
373296417SdimLoadInst *AtomicExpand::convertAtomicLoadToIntegerType(LoadInst *LI) {
374296417Sdim  auto *M = LI->getModule();
375296417Sdim  Type *NewTy = getCorrespondingIntegerType(LI->getType(),
376296417Sdim                                            M->getDataLayout());
377296417Sdim
378296417Sdim  IRBuilder<> Builder(LI);
379341825Sdim
380296417Sdim  Value *Addr = LI->getPointerOperand();
381296417Sdim  Type *PT = PointerType::get(NewTy,
382296417Sdim                              Addr->getType()->getPointerAddressSpace());
383296417Sdim  Value *NewAddr = Builder.CreateBitCast(Addr, PT);
384341825Sdim
385353358Sdim  auto *NewLI = Builder.CreateLoad(NewTy, NewAddr);
386360784Sdim  NewLI->setAlignment(MaybeAlign(LI->getAlignment()));
387296417Sdim  NewLI->setVolatile(LI->isVolatile());
388321369Sdim  NewLI->setAtomic(LI->getOrdering(), LI->getSyncScopeID());
389341825Sdim  LLVM_DEBUG(dbgs() << "Replaced " << *LI << " with " << *NewLI << "\n");
390341825Sdim
391296417Sdim  Value *NewVal = Builder.CreateBitCast(NewLI, LI->getType());
392296417Sdim  LI->replaceAllUsesWith(NewVal);
393296417Sdim  LI->eraseFromParent();
394296417Sdim  return NewLI;
395296417Sdim}
396296417Sdim
397296417Sdimbool AtomicExpand::tryExpandAtomicLoad(LoadInst *LI) {
398296417Sdim  switch (TLI->shouldExpandAtomicLoadInIR(LI)) {
399296417Sdim  case TargetLoweringBase::AtomicExpansionKind::None:
400296417Sdim    return false;
401296417Sdim  case TargetLoweringBase::AtomicExpansionKind::LLSC:
402309124Sdim    expandAtomicOpToLLSC(
403309124Sdim        LI, LI->getType(), LI->getPointerOperand(), LI->getOrdering(),
404296417Sdim        [](IRBuilder<> &Builder, Value *Loaded) { return Loaded; });
405309124Sdim    return true;
406296417Sdim  case TargetLoweringBase::AtomicExpansionKind::LLOnly:
407277323Sdim    return expandAtomicLoadToLL(LI);
408296417Sdim  case TargetLoweringBase::AtomicExpansionKind::CmpXChg:
409277323Sdim    return expandAtomicLoadToCmpXchg(LI);
410344779Sdim  default:
411344779Sdim    llvm_unreachable("Unhandled case in tryExpandAtomicLoad");
412296417Sdim  }
413277323Sdim}
414277323Sdim
415277323Sdimbool AtomicExpand::expandAtomicLoadToLL(LoadInst *LI) {
416277323Sdim  IRBuilder<> Builder(LI);
417277323Sdim
418277323Sdim  // On some architectures, load-linked instructions are atomic for larger
419277323Sdim  // sizes than normal loads. For example, the only 64-bit load guaranteed
420277323Sdim  // to be single-copy atomic by ARM is an ldrexd (A3.5.3).
421277323Sdim  Value *Val =
422277323Sdim      TLI->emitLoadLinked(Builder, LI->getPointerOperand(), LI->getOrdering());
423296417Sdim  TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder);
424277323Sdim
425277323Sdim  LI->replaceAllUsesWith(Val);
426277323Sdim  LI->eraseFromParent();
427277323Sdim
428277323Sdim  return true;
429277323Sdim}
430277323Sdim
431277323Sdimbool AtomicExpand::expandAtomicLoadToCmpXchg(LoadInst *LI) {
432277323Sdim  IRBuilder<> Builder(LI);
433277323Sdim  AtomicOrdering Order = LI->getOrdering();
434353358Sdim  if (Order == AtomicOrdering::Unordered)
435353358Sdim    Order = AtomicOrdering::Monotonic;
436353358Sdim
437277323Sdim  Value *Addr = LI->getPointerOperand();
438277323Sdim  Type *Ty = cast<PointerType>(Addr->getType())->getElementType();
439277323Sdim  Constant *DummyVal = Constant::getNullValue(Ty);
440277323Sdim
441277323Sdim  Value *Pair = Builder.CreateAtomicCmpXchg(
442277323Sdim      Addr, DummyVal, DummyVal, Order,
443277323Sdim      AtomicCmpXchgInst::getStrongestFailureOrdering(Order));
444277323Sdim  Value *Loaded = Builder.CreateExtractValue(Pair, 0, "loaded");
445277323Sdim
446277323Sdim  LI->replaceAllUsesWith(Loaded);
447277323Sdim  LI->eraseFromParent();
448277323Sdim
449277323Sdim  return true;
450277323Sdim}
451277323Sdim
452296417Sdim/// Convert an atomic store of a non-integral type to an integer store of the
453309124Sdim/// equivalent bitwidth.  We used to not support floating point or vector
454296417Sdim/// atomics in the IR at all.  The backends learned to deal with the bitcast
455296417Sdim/// idiom because that was the only way of expressing the notion of a atomic
456296417Sdim/// float or vector store.  The long term plan is to teach each backend to
457296417Sdim/// instruction select from the original atomic store, but as a migration
458296417Sdim/// mechanism, we convert back to the old format which the backends understand.
459296417Sdim/// Each backend will need individual work to recognize the new format.
460296417SdimStoreInst *AtomicExpand::convertAtomicStoreToIntegerType(StoreInst *SI) {
461296417Sdim  IRBuilder<> Builder(SI);
462296417Sdim  auto *M = SI->getModule();
463296417Sdim  Type *NewTy = getCorrespondingIntegerType(SI->getValueOperand()->getType(),
464296417Sdim                                            M->getDataLayout());
465296417Sdim  Value *NewVal = Builder.CreateBitCast(SI->getValueOperand(), NewTy);
466341825Sdim
467296417Sdim  Value *Addr = SI->getPointerOperand();
468296417Sdim  Type *PT = PointerType::get(NewTy,
469296417Sdim                              Addr->getType()->getPointerAddressSpace());
470296417Sdim  Value *NewAddr = Builder.CreateBitCast(Addr, PT);
471296417Sdim
472296417Sdim  StoreInst *NewSI = Builder.CreateStore(NewVal, NewAddr);
473360784Sdim  NewSI->setAlignment(MaybeAlign(SI->getAlignment()));
474296417Sdim  NewSI->setVolatile(SI->isVolatile());
475321369Sdim  NewSI->setAtomic(SI->getOrdering(), SI->getSyncScopeID());
476341825Sdim  LLVM_DEBUG(dbgs() << "Replaced " << *SI << " with " << *NewSI << "\n");
477296417Sdim  SI->eraseFromParent();
478296417Sdim  return NewSI;
479296417Sdim}
480296417Sdim
481277323Sdimbool AtomicExpand::expandAtomicStore(StoreInst *SI) {
482277323Sdim  // This function is only called on atomic stores that are too large to be
483277323Sdim  // atomic if implemented as a native store. So we replace them by an
484277323Sdim  // atomic swap, that can be implemented for example as a ldrex/strex on ARM
485277323Sdim  // or lock cmpxchg8/16b on X86, as these are atomic for larger sizes.
486288943Sdim  // It is the responsibility of the target to only signal expansion via
487277323Sdim  // shouldExpandAtomicRMW in cases where this is required and possible.
488277323Sdim  IRBuilder<> Builder(SI);
489277323Sdim  AtomicRMWInst *AI =
490277323Sdim      Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, SI->getPointerOperand(),
491277323Sdim                              SI->getValueOperand(), SI->getOrdering());
492277323Sdim  SI->eraseFromParent();
493277323Sdim
494277323Sdim  // Now we have an appropriate swap instruction, lower it as usual.
495288943Sdim  return tryExpandAtomicRMW(AI);
496277323Sdim}
497277323Sdim
498296417Sdimstatic void createCmpXchgInstFun(IRBuilder<> &Builder, Value *Addr,
499296417Sdim                                 Value *Loaded, Value *NewVal,
500296417Sdim                                 AtomicOrdering MemOpOrder,
501296417Sdim                                 Value *&Success, Value *&NewLoaded) {
502353358Sdim  Type *OrigTy = NewVal->getType();
503353358Sdim
504353358Sdim  // This code can go away when cmpxchg supports FP types.
505353358Sdim  bool NeedBitcast = OrigTy->isFloatingPointTy();
506353358Sdim  if (NeedBitcast) {
507353358Sdim    IntegerType *IntTy = Builder.getIntNTy(OrigTy->getPrimitiveSizeInBits());
508353358Sdim    unsigned AS = Addr->getType()->getPointerAddressSpace();
509353358Sdim    Addr = Builder.CreateBitCast(Addr, IntTy->getPointerTo(AS));
510353358Sdim    NewVal = Builder.CreateBitCast(NewVal, IntTy);
511353358Sdim    Loaded = Builder.CreateBitCast(Loaded, IntTy);
512353358Sdim  }
513353358Sdim
514296417Sdim  Value* Pair = Builder.CreateAtomicCmpXchg(
515296417Sdim      Addr, Loaded, NewVal, MemOpOrder,
516296417Sdim      AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder));
517296417Sdim  Success = Builder.CreateExtractValue(Pair, 1, "success");
518296417Sdim  NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
519353358Sdim
520353358Sdim  if (NeedBitcast)
521353358Sdim    NewLoaded = Builder.CreateBitCast(NewLoaded, OrigTy);
522277323Sdim}
523277323Sdim
524277323Sdim/// Emit IR to implement the given atomicrmw operation on values in registers,
525277323Sdim/// returning the new value.
526277323Sdimstatic Value *performAtomicOp(AtomicRMWInst::BinOp Op, IRBuilder<> &Builder,
527277323Sdim                              Value *Loaded, Value *Inc) {
528277323Sdim  Value *NewVal;
529277323Sdim  switch (Op) {
530277323Sdim  case AtomicRMWInst::Xchg:
531277323Sdim    return Inc;
532277323Sdim  case AtomicRMWInst::Add:
533277323Sdim    return Builder.CreateAdd(Loaded, Inc, "new");
534277323Sdim  case AtomicRMWInst::Sub:
535277323Sdim    return Builder.CreateSub(Loaded, Inc, "new");
536277323Sdim  case AtomicRMWInst::And:
537277323Sdim    return Builder.CreateAnd(Loaded, Inc, "new");
538277323Sdim  case AtomicRMWInst::Nand:
539277323Sdim    return Builder.CreateNot(Builder.CreateAnd(Loaded, Inc), "new");
540277323Sdim  case AtomicRMWInst::Or:
541277323Sdim    return Builder.CreateOr(Loaded, Inc, "new");
542277323Sdim  case AtomicRMWInst::Xor:
543277323Sdim    return Builder.CreateXor(Loaded, Inc, "new");
544277323Sdim  case AtomicRMWInst::Max:
545277323Sdim    NewVal = Builder.CreateICmpSGT(Loaded, Inc);
546277323Sdim    return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
547277323Sdim  case AtomicRMWInst::Min:
548277323Sdim    NewVal = Builder.CreateICmpSLE(Loaded, Inc);
549277323Sdim    return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
550277323Sdim  case AtomicRMWInst::UMax:
551277323Sdim    NewVal = Builder.CreateICmpUGT(Loaded, Inc);
552277323Sdim    return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
553277323Sdim  case AtomicRMWInst::UMin:
554277323Sdim    NewVal = Builder.CreateICmpULE(Loaded, Inc);
555277323Sdim    return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
556353358Sdim  case AtomicRMWInst::FAdd:
557353358Sdim    return Builder.CreateFAdd(Loaded, Inc, "new");
558353358Sdim  case AtomicRMWInst::FSub:
559353358Sdim    return Builder.CreateFSub(Loaded, Inc, "new");
560277323Sdim  default:
561277323Sdim    llvm_unreachable("Unknown atomic op");
562277323Sdim  }
563277323Sdim}
564277323Sdim
565296417Sdimbool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) {
566296417Sdim  switch (TLI->shouldExpandAtomicRMWInIR(AI)) {
567296417Sdim  case TargetLoweringBase::AtomicExpansionKind::None:
568296417Sdim    return false;
569309124Sdim  case TargetLoweringBase::AtomicExpansionKind::LLSC: {
570309124Sdim    unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
571309124Sdim    unsigned ValueSize = getAtomicOpSize(AI);
572309124Sdim    if (ValueSize < MinCASSize) {
573309124Sdim      llvm_unreachable(
574309124Sdim          "MinCmpXchgSizeInBits not yet supported for LL/SC architectures.");
575309124Sdim    } else {
576309124Sdim      auto PerformOp = [&](IRBuilder<> &Builder, Value *Loaded) {
577309124Sdim        return performAtomicOp(AI->getOperation(), Builder, Loaded,
578309124Sdim                               AI->getValOperand());
579309124Sdim      };
580309124Sdim      expandAtomicOpToLLSC(AI, AI->getType(), AI->getPointerOperand(),
581309124Sdim                           AI->getOrdering(), PerformOp);
582309124Sdim    }
583309124Sdim    return true;
584309124Sdim  }
585309124Sdim  case TargetLoweringBase::AtomicExpansionKind::CmpXChg: {
586309124Sdim    unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
587309124Sdim    unsigned ValueSize = getAtomicOpSize(AI);
588309124Sdim    if (ValueSize < MinCASSize) {
589353358Sdim      // TODO: Handle atomicrmw fadd/fsub
590353358Sdim      if (AI->getType()->isFloatingPointTy())
591353358Sdim        return false;
592353358Sdim
593309124Sdim      expandPartwordAtomicRMW(AI,
594309124Sdim                              TargetLoweringBase::AtomicExpansionKind::CmpXChg);
595309124Sdim    } else {
596309124Sdim      expandAtomicRMWToCmpXchg(AI, createCmpXchgInstFun);
597309124Sdim    }
598309124Sdim    return true;
599309124Sdim  }
600344779Sdim  case TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic: {
601344779Sdim    expandAtomicRMWToMaskedIntrinsic(AI);
602344779Sdim    return true;
603344779Sdim  }
604296417Sdim  default:
605296417Sdim    llvm_unreachable("Unhandled case in tryExpandAtomicRMW");
606296417Sdim  }
607296417Sdim}
608296417Sdim
609309124Sdimnamespace {
610309124Sdim
611309124Sdim/// Result values from createMaskInstrs helper.
612309124Sdimstruct PartwordMaskValues {
613309124Sdim  Type *WordType;
614309124Sdim  Type *ValueType;
615309124Sdim  Value *AlignedAddr;
616309124Sdim  Value *ShiftAmt;
617309124Sdim  Value *Mask;
618309124Sdim  Value *Inv_Mask;
619309124Sdim};
620327952Sdim
621309124Sdim} // end anonymous namespace
622309124Sdim
623309124Sdim/// This is a helper function which builds instructions to provide
624309124Sdim/// values necessary for partword atomic operations. It takes an
625309124Sdim/// incoming address, Addr, and ValueType, and constructs the address,
626309124Sdim/// shift-amounts and masks needed to work with a larger value of size
627309124Sdim/// WordSize.
628309124Sdim///
629309124Sdim/// AlignedAddr: Addr rounded down to a multiple of WordSize
630309124Sdim///
631309124Sdim/// ShiftAmt: Number of bits to right-shift a WordSize value loaded
632309124Sdim///           from AlignAddr for it to have the same value as if
633309124Sdim///           ValueType was loaded from Addr.
634309124Sdim///
635309124Sdim/// Mask: Value to mask with the value loaded from AlignAddr to
636309124Sdim///       include only the part that would've been loaded from Addr.
637309124Sdim///
638309124Sdim/// Inv_Mask: The inverse of Mask.
639309124Sdimstatic PartwordMaskValues createMaskInstrs(IRBuilder<> &Builder, Instruction *I,
640309124Sdim                                           Type *ValueType, Value *Addr,
641309124Sdim                                           unsigned WordSize) {
642309124Sdim  PartwordMaskValues Ret;
643309124Sdim
644296417Sdim  BasicBlock *BB = I->getParent();
645277323Sdim  Function *F = BB->getParent();
646309124Sdim  Module *M = I->getModule();
647309124Sdim
648277323Sdim  LLVMContext &Ctx = F->getContext();
649309124Sdim  const DataLayout &DL = M->getDataLayout();
650277323Sdim
651309124Sdim  unsigned ValueSize = DL.getTypeStoreSize(ValueType);
652309124Sdim
653309124Sdim  assert(ValueSize < WordSize);
654309124Sdim
655309124Sdim  Ret.ValueType = ValueType;
656309124Sdim  Ret.WordType = Type::getIntNTy(Ctx, WordSize * 8);
657309124Sdim
658309124Sdim  Type *WordPtrType =
659309124Sdim      Ret.WordType->getPointerTo(Addr->getType()->getPointerAddressSpace());
660309124Sdim
661309124Sdim  Value *AddrInt = Builder.CreatePtrToInt(Addr, DL.getIntPtrType(Ctx));
662309124Sdim  Ret.AlignedAddr = Builder.CreateIntToPtr(
663309124Sdim      Builder.CreateAnd(AddrInt, ~(uint64_t)(WordSize - 1)), WordPtrType,
664309124Sdim      "AlignedAddr");
665309124Sdim
666309124Sdim  Value *PtrLSB = Builder.CreateAnd(AddrInt, WordSize - 1, "PtrLSB");
667309124Sdim  if (DL.isLittleEndian()) {
668309124Sdim    // turn bytes into bits
669309124Sdim    Ret.ShiftAmt = Builder.CreateShl(PtrLSB, 3);
670309124Sdim  } else {
671309124Sdim    // turn bytes into bits, and count from the other side.
672309124Sdim    Ret.ShiftAmt =
673309124Sdim        Builder.CreateShl(Builder.CreateXor(PtrLSB, WordSize - ValueSize), 3);
674309124Sdim  }
675309124Sdim
676309124Sdim  Ret.ShiftAmt = Builder.CreateTrunc(Ret.ShiftAmt, Ret.WordType, "ShiftAmt");
677309124Sdim  Ret.Mask = Builder.CreateShl(
678360784Sdim      ConstantInt::get(Ret.WordType, (1 << (ValueSize * 8)) - 1), Ret.ShiftAmt,
679309124Sdim      "Mask");
680309124Sdim  Ret.Inv_Mask = Builder.CreateNot(Ret.Mask, "Inv_Mask");
681309124Sdim
682309124Sdim  return Ret;
683309124Sdim}
684309124Sdim
685309124Sdim/// Emit IR to implement a masked version of a given atomicrmw
686309124Sdim/// operation. (That is, only the bits under the Mask should be
687309124Sdim/// affected by the operation)
688309124Sdimstatic Value *performMaskedAtomicOp(AtomicRMWInst::BinOp Op,
689309124Sdim                                    IRBuilder<> &Builder, Value *Loaded,
690309124Sdim                                    Value *Shifted_Inc, Value *Inc,
691309124Sdim                                    const PartwordMaskValues &PMV) {
692344779Sdim  // TODO: update to use
693344779Sdim  // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge in order
694344779Sdim  // to merge bits from two values without requiring PMV.Inv_Mask.
695309124Sdim  switch (Op) {
696309124Sdim  case AtomicRMWInst::Xchg: {
697309124Sdim    Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
698309124Sdim    Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, Shifted_Inc);
699309124Sdim    return FinalVal;
700309124Sdim  }
701309124Sdim  case AtomicRMWInst::Or:
702309124Sdim  case AtomicRMWInst::Xor:
703344779Sdim  case AtomicRMWInst::And:
704344779Sdim    llvm_unreachable("Or/Xor/And handled by widenPartwordAtomicRMW");
705309124Sdim  case AtomicRMWInst::Add:
706309124Sdim  case AtomicRMWInst::Sub:
707309124Sdim  case AtomicRMWInst::Nand: {
708309124Sdim    // The other arithmetic ops need to be masked into place.
709309124Sdim    Value *NewVal = performAtomicOp(Op, Builder, Loaded, Shifted_Inc);
710309124Sdim    Value *NewVal_Masked = Builder.CreateAnd(NewVal, PMV.Mask);
711309124Sdim    Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
712309124Sdim    Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Masked);
713309124Sdim    return FinalVal;
714309124Sdim  }
715309124Sdim  case AtomicRMWInst::Max:
716309124Sdim  case AtomicRMWInst::Min:
717309124Sdim  case AtomicRMWInst::UMax:
718309124Sdim  case AtomicRMWInst::UMin: {
719309124Sdim    // Finally, comparison ops will operate on the full value, so
720309124Sdim    // truncate down to the original size, and expand out again after
721309124Sdim    // doing the operation.
722309124Sdim    Value *Loaded_Shiftdown = Builder.CreateTrunc(
723309124Sdim        Builder.CreateLShr(Loaded, PMV.ShiftAmt), PMV.ValueType);
724309124Sdim    Value *NewVal = performAtomicOp(Op, Builder, Loaded_Shiftdown, Inc);
725309124Sdim    Value *NewVal_Shiftup = Builder.CreateShl(
726309124Sdim        Builder.CreateZExt(NewVal, PMV.WordType), PMV.ShiftAmt);
727309124Sdim    Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
728309124Sdim    Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Shiftup);
729309124Sdim    return FinalVal;
730309124Sdim  }
731309124Sdim  default:
732309124Sdim    llvm_unreachable("Unknown atomic op");
733309124Sdim  }
734309124Sdim}
735309124Sdim
736309124Sdim/// Expand a sub-word atomicrmw operation into an appropriate
737309124Sdim/// word-sized operation.
738309124Sdim///
739309124Sdim/// It will create an LL/SC or cmpxchg loop, as appropriate, the same
740309124Sdim/// way as a typical atomicrmw expansion. The only difference here is
741309124Sdim/// that the operation inside of the loop must operate only upon a
742309124Sdim/// part of the value.
743309124Sdimvoid AtomicExpand::expandPartwordAtomicRMW(
744309124Sdim    AtomicRMWInst *AI, TargetLoweringBase::AtomicExpansionKind ExpansionKind) {
745309124Sdim  assert(ExpansionKind == TargetLoweringBase::AtomicExpansionKind::CmpXChg);
746309124Sdim
747309124Sdim  AtomicOrdering MemOpOrder = AI->getOrdering();
748309124Sdim
749309124Sdim  IRBuilder<> Builder(AI);
750309124Sdim
751309124Sdim  PartwordMaskValues PMV =
752309124Sdim      createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
753309124Sdim                       TLI->getMinCmpXchgSizeInBits() / 8);
754309124Sdim
755309124Sdim  Value *ValOperand_Shifted =
756309124Sdim      Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), PMV.WordType),
757309124Sdim                        PMV.ShiftAmt, "ValOperand_Shifted");
758309124Sdim
759309124Sdim  auto PerformPartwordOp = [&](IRBuilder<> &Builder, Value *Loaded) {
760309124Sdim    return performMaskedAtomicOp(AI->getOperation(), Builder, Loaded,
761309124Sdim                                 ValOperand_Shifted, AI->getValOperand(), PMV);
762309124Sdim  };
763309124Sdim
764309124Sdim  // TODO: When we're ready to support LLSC conversions too, use
765309124Sdim  // insertRMWLLSCLoop here for ExpansionKind==LLSC.
766309124Sdim  Value *OldResult =
767309124Sdim      insertRMWCmpXchgLoop(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder,
768309124Sdim                           PerformPartwordOp, createCmpXchgInstFun);
769309124Sdim  Value *FinalOldResult = Builder.CreateTrunc(
770309124Sdim      Builder.CreateLShr(OldResult, PMV.ShiftAmt), PMV.ValueType);
771309124Sdim  AI->replaceAllUsesWith(FinalOldResult);
772309124Sdim  AI->eraseFromParent();
773309124Sdim}
774309124Sdim
775344779Sdim// Widen the bitwise atomicrmw (or/xor/and) to the minimum supported width.
776344779SdimAtomicRMWInst *AtomicExpand::widenPartwordAtomicRMW(AtomicRMWInst *AI) {
777344779Sdim  IRBuilder<> Builder(AI);
778344779Sdim  AtomicRMWInst::BinOp Op = AI->getOperation();
779344779Sdim
780344779Sdim  assert((Op == AtomicRMWInst::Or || Op == AtomicRMWInst::Xor ||
781344779Sdim          Op == AtomicRMWInst::And) &&
782344779Sdim         "Unable to widen operation");
783344779Sdim
784344779Sdim  PartwordMaskValues PMV =
785344779Sdim      createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
786344779Sdim                       TLI->getMinCmpXchgSizeInBits() / 8);
787344779Sdim
788344779Sdim  Value *ValOperand_Shifted =
789344779Sdim      Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), PMV.WordType),
790344779Sdim                        PMV.ShiftAmt, "ValOperand_Shifted");
791344779Sdim
792344779Sdim  Value *NewOperand;
793344779Sdim
794344779Sdim  if (Op == AtomicRMWInst::And)
795344779Sdim    NewOperand =
796344779Sdim        Builder.CreateOr(PMV.Inv_Mask, ValOperand_Shifted, "AndOperand");
797344779Sdim  else
798344779Sdim    NewOperand = ValOperand_Shifted;
799344779Sdim
800344779Sdim  AtomicRMWInst *NewAI = Builder.CreateAtomicRMW(Op, PMV.AlignedAddr,
801344779Sdim                                                 NewOperand, AI->getOrdering());
802344779Sdim
803344779Sdim  Value *FinalOldResult = Builder.CreateTrunc(
804344779Sdim      Builder.CreateLShr(NewAI, PMV.ShiftAmt), PMV.ValueType);
805344779Sdim  AI->replaceAllUsesWith(FinalOldResult);
806344779Sdim  AI->eraseFromParent();
807344779Sdim  return NewAI;
808344779Sdim}
809344779Sdim
810309124Sdimvoid AtomicExpand::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) {
811309124Sdim  // The basic idea here is that we're expanding a cmpxchg of a
812309124Sdim  // smaller memory size up to a word-sized cmpxchg. To do this, we
813309124Sdim  // need to add a retry-loop for strong cmpxchg, so that
814309124Sdim  // modifications to other parts of the word don't cause a spurious
815309124Sdim  // failure.
816309124Sdim
817309124Sdim  // This generates code like the following:
818309124Sdim  //     [[Setup mask values PMV.*]]
819309124Sdim  //     %NewVal_Shifted = shl i32 %NewVal, %PMV.ShiftAmt
820309124Sdim  //     %Cmp_Shifted = shl i32 %Cmp, %PMV.ShiftAmt
821309124Sdim  //     %InitLoaded = load i32* %addr
822309124Sdim  //     %InitLoaded_MaskOut = and i32 %InitLoaded, %PMV.Inv_Mask
823309124Sdim  //     br partword.cmpxchg.loop
824309124Sdim  // partword.cmpxchg.loop:
825309124Sdim  //     %Loaded_MaskOut = phi i32 [ %InitLoaded_MaskOut, %entry ],
826309124Sdim  //        [ %OldVal_MaskOut, %partword.cmpxchg.failure ]
827309124Sdim  //     %FullWord_NewVal = or i32 %Loaded_MaskOut, %NewVal_Shifted
828309124Sdim  //     %FullWord_Cmp = or i32 %Loaded_MaskOut, %Cmp_Shifted
829309124Sdim  //     %NewCI = cmpxchg i32* %PMV.AlignedAddr, i32 %FullWord_Cmp,
830309124Sdim  //        i32 %FullWord_NewVal success_ordering failure_ordering
831309124Sdim  //     %OldVal = extractvalue { i32, i1 } %NewCI, 0
832309124Sdim  //     %Success = extractvalue { i32, i1 } %NewCI, 1
833309124Sdim  //     br i1 %Success, label %partword.cmpxchg.end,
834309124Sdim  //        label %partword.cmpxchg.failure
835309124Sdim  // partword.cmpxchg.failure:
836309124Sdim  //     %OldVal_MaskOut = and i32 %OldVal, %PMV.Inv_Mask
837309124Sdim  //     %ShouldContinue = icmp ne i32 %Loaded_MaskOut, %OldVal_MaskOut
838309124Sdim  //     br i1 %ShouldContinue, label %partword.cmpxchg.loop,
839309124Sdim  //         label %partword.cmpxchg.end
840309124Sdim  // partword.cmpxchg.end:
841309124Sdim  //    %tmp1 = lshr i32 %OldVal, %PMV.ShiftAmt
842309124Sdim  //    %FinalOldVal = trunc i32 %tmp1 to i8
843309124Sdim  //    %tmp2 = insertvalue { i8, i1 } undef, i8 %FinalOldVal, 0
844309124Sdim  //    %Res = insertvalue { i8, i1 } %25, i1 %Success, 1
845309124Sdim
846309124Sdim  Value *Addr = CI->getPointerOperand();
847309124Sdim  Value *Cmp = CI->getCompareOperand();
848309124Sdim  Value *NewVal = CI->getNewValOperand();
849309124Sdim
850309124Sdim  BasicBlock *BB = CI->getParent();
851309124Sdim  Function *F = BB->getParent();
852309124Sdim  IRBuilder<> Builder(CI);
853309124Sdim  LLVMContext &Ctx = Builder.getContext();
854309124Sdim
855309124Sdim  const int WordSize = TLI->getMinCmpXchgSizeInBits() / 8;
856309124Sdim
857309124Sdim  BasicBlock *EndBB =
858309124Sdim      BB->splitBasicBlock(CI->getIterator(), "partword.cmpxchg.end");
859309124Sdim  auto FailureBB =
860309124Sdim      BasicBlock::Create(Ctx, "partword.cmpxchg.failure", F, EndBB);
861309124Sdim  auto LoopBB = BasicBlock::Create(Ctx, "partword.cmpxchg.loop", F, FailureBB);
862309124Sdim
863309124Sdim  // The split call above "helpfully" added a branch at the end of BB
864309124Sdim  // (to the wrong place).
865309124Sdim  std::prev(BB->end())->eraseFromParent();
866309124Sdim  Builder.SetInsertPoint(BB);
867309124Sdim
868309124Sdim  PartwordMaskValues PMV = createMaskInstrs(
869309124Sdim      Builder, CI, CI->getCompareOperand()->getType(), Addr, WordSize);
870309124Sdim
871309124Sdim  // Shift the incoming values over, into the right location in the word.
872309124Sdim  Value *NewVal_Shifted =
873309124Sdim      Builder.CreateShl(Builder.CreateZExt(NewVal, PMV.WordType), PMV.ShiftAmt);
874309124Sdim  Value *Cmp_Shifted =
875309124Sdim      Builder.CreateShl(Builder.CreateZExt(Cmp, PMV.WordType), PMV.ShiftAmt);
876309124Sdim
877309124Sdim  // Load the entire current word, and mask into place the expected and new
878309124Sdim  // values
879309124Sdim  LoadInst *InitLoaded = Builder.CreateLoad(PMV.WordType, PMV.AlignedAddr);
880309124Sdim  InitLoaded->setVolatile(CI->isVolatile());
881309124Sdim  Value *InitLoaded_MaskOut = Builder.CreateAnd(InitLoaded, PMV.Inv_Mask);
882309124Sdim  Builder.CreateBr(LoopBB);
883309124Sdim
884309124Sdim  // partword.cmpxchg.loop:
885309124Sdim  Builder.SetInsertPoint(LoopBB);
886309124Sdim  PHINode *Loaded_MaskOut = Builder.CreatePHI(PMV.WordType, 2);
887309124Sdim  Loaded_MaskOut->addIncoming(InitLoaded_MaskOut, BB);
888309124Sdim
889309124Sdim  // Mask/Or the expected and new values into place in the loaded word.
890309124Sdim  Value *FullWord_NewVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Shifted);
891309124Sdim  Value *FullWord_Cmp = Builder.CreateOr(Loaded_MaskOut, Cmp_Shifted);
892309124Sdim  AtomicCmpXchgInst *NewCI = Builder.CreateAtomicCmpXchg(
893309124Sdim      PMV.AlignedAddr, FullWord_Cmp, FullWord_NewVal, CI->getSuccessOrdering(),
894321369Sdim      CI->getFailureOrdering(), CI->getSyncScopeID());
895309124Sdim  NewCI->setVolatile(CI->isVolatile());
896309124Sdim  // When we're building a strong cmpxchg, we need a loop, so you
897309124Sdim  // might think we could use a weak cmpxchg inside. But, using strong
898309124Sdim  // allows the below comparison for ShouldContinue, and we're
899309124Sdim  // expecting the underlying cmpxchg to be a machine instruction,
900309124Sdim  // which is strong anyways.
901309124Sdim  NewCI->setWeak(CI->isWeak());
902309124Sdim
903309124Sdim  Value *OldVal = Builder.CreateExtractValue(NewCI, 0);
904309124Sdim  Value *Success = Builder.CreateExtractValue(NewCI, 1);
905309124Sdim
906309124Sdim  if (CI->isWeak())
907309124Sdim    Builder.CreateBr(EndBB);
908309124Sdim  else
909309124Sdim    Builder.CreateCondBr(Success, EndBB, FailureBB);
910309124Sdim
911309124Sdim  // partword.cmpxchg.failure:
912309124Sdim  Builder.SetInsertPoint(FailureBB);
913309124Sdim  // Upon failure, verify that the masked-out part of the loaded value
914309124Sdim  // has been modified.  If it didn't, abort the cmpxchg, since the
915309124Sdim  // masked-in part must've.
916309124Sdim  Value *OldVal_MaskOut = Builder.CreateAnd(OldVal, PMV.Inv_Mask);
917309124Sdim  Value *ShouldContinue = Builder.CreateICmpNE(Loaded_MaskOut, OldVal_MaskOut);
918309124Sdim  Builder.CreateCondBr(ShouldContinue, LoopBB, EndBB);
919309124Sdim
920309124Sdim  // Add the second value to the phi from above
921309124Sdim  Loaded_MaskOut->addIncoming(OldVal_MaskOut, FailureBB);
922309124Sdim
923309124Sdim  // partword.cmpxchg.end:
924309124Sdim  Builder.SetInsertPoint(CI);
925309124Sdim
926309124Sdim  Value *FinalOldVal = Builder.CreateTrunc(
927309124Sdim      Builder.CreateLShr(OldVal, PMV.ShiftAmt), PMV.ValueType);
928309124Sdim  Value *Res = UndefValue::get(CI->getType());
929309124Sdim  Res = Builder.CreateInsertValue(Res, FinalOldVal, 0);
930309124Sdim  Res = Builder.CreateInsertValue(Res, Success, 1);
931309124Sdim
932309124Sdim  CI->replaceAllUsesWith(Res);
933309124Sdim  CI->eraseFromParent();
934309124Sdim}
935309124Sdim
936309124Sdimvoid AtomicExpand::expandAtomicOpToLLSC(
937309124Sdim    Instruction *I, Type *ResultType, Value *Addr, AtomicOrdering MemOpOrder,
938309124Sdim    function_ref<Value *(IRBuilder<> &, Value *)> PerformOp) {
939309124Sdim  IRBuilder<> Builder(I);
940309124Sdim  Value *Loaded =
941309124Sdim      insertRMWLLSCLoop(Builder, ResultType, Addr, MemOpOrder, PerformOp);
942309124Sdim
943309124Sdim  I->replaceAllUsesWith(Loaded);
944309124Sdim  I->eraseFromParent();
945309124Sdim}
946309124Sdim
947344779Sdimvoid AtomicExpand::expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI) {
948344779Sdim  IRBuilder<> Builder(AI);
949344779Sdim
950344779Sdim  PartwordMaskValues PMV =
951344779Sdim      createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
952344779Sdim                       TLI->getMinCmpXchgSizeInBits() / 8);
953344779Sdim
954344779Sdim  // The value operand must be sign-extended for signed min/max so that the
955344779Sdim  // target's signed comparison instructions can be used. Otherwise, just
956344779Sdim  // zero-ext.
957344779Sdim  Instruction::CastOps CastOp = Instruction::ZExt;
958344779Sdim  AtomicRMWInst::BinOp RMWOp = AI->getOperation();
959344779Sdim  if (RMWOp == AtomicRMWInst::Max || RMWOp == AtomicRMWInst::Min)
960344779Sdim    CastOp = Instruction::SExt;
961344779Sdim
962344779Sdim  Value *ValOperand_Shifted = Builder.CreateShl(
963344779Sdim      Builder.CreateCast(CastOp, AI->getValOperand(), PMV.WordType),
964344779Sdim      PMV.ShiftAmt, "ValOperand_Shifted");
965344779Sdim  Value *OldResult = TLI->emitMaskedAtomicRMWIntrinsic(
966344779Sdim      Builder, AI, PMV.AlignedAddr, ValOperand_Shifted, PMV.Mask, PMV.ShiftAmt,
967344779Sdim      AI->getOrdering());
968344779Sdim  Value *FinalOldResult = Builder.CreateTrunc(
969344779Sdim      Builder.CreateLShr(OldResult, PMV.ShiftAmt), PMV.ValueType);
970344779Sdim  AI->replaceAllUsesWith(FinalOldResult);
971344779Sdim  AI->eraseFromParent();
972344779Sdim}
973344779Sdim
974344779Sdimvoid AtomicExpand::expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI) {
975344779Sdim  IRBuilder<> Builder(CI);
976344779Sdim
977344779Sdim  PartwordMaskValues PMV = createMaskInstrs(
978344779Sdim      Builder, CI, CI->getCompareOperand()->getType(), CI->getPointerOperand(),
979344779Sdim      TLI->getMinCmpXchgSizeInBits() / 8);
980344779Sdim
981344779Sdim  Value *CmpVal_Shifted = Builder.CreateShl(
982344779Sdim      Builder.CreateZExt(CI->getCompareOperand(), PMV.WordType), PMV.ShiftAmt,
983344779Sdim      "CmpVal_Shifted");
984344779Sdim  Value *NewVal_Shifted = Builder.CreateShl(
985344779Sdim      Builder.CreateZExt(CI->getNewValOperand(), PMV.WordType), PMV.ShiftAmt,
986344779Sdim      "NewVal_Shifted");
987344779Sdim  Value *OldVal = TLI->emitMaskedAtomicCmpXchgIntrinsic(
988344779Sdim      Builder, CI, PMV.AlignedAddr, CmpVal_Shifted, NewVal_Shifted, PMV.Mask,
989344779Sdim      CI->getSuccessOrdering());
990344779Sdim  Value *FinalOldVal = Builder.CreateTrunc(
991344779Sdim      Builder.CreateLShr(OldVal, PMV.ShiftAmt), PMV.ValueType);
992344779Sdim
993344779Sdim  Value *Res = UndefValue::get(CI->getType());
994344779Sdim  Res = Builder.CreateInsertValue(Res, FinalOldVal, 0);
995344779Sdim  Value *Success = Builder.CreateICmpEQ(
996344779Sdim      CmpVal_Shifted, Builder.CreateAnd(OldVal, PMV.Mask), "Success");
997344779Sdim  Res = Builder.CreateInsertValue(Res, Success, 1);
998344779Sdim
999344779Sdim  CI->replaceAllUsesWith(Res);
1000344779Sdim  CI->eraseFromParent();
1001344779Sdim}
1002344779Sdim
1003309124SdimValue *AtomicExpand::insertRMWLLSCLoop(
1004309124Sdim    IRBuilder<> &Builder, Type *ResultTy, Value *Addr,
1005309124Sdim    AtomicOrdering MemOpOrder,
1006309124Sdim    function_ref<Value *(IRBuilder<> &, Value *)> PerformOp) {
1007309124Sdim  LLVMContext &Ctx = Builder.getContext();
1008309124Sdim  BasicBlock *BB = Builder.GetInsertBlock();
1009309124Sdim  Function *F = BB->getParent();
1010309124Sdim
1011277323Sdim  // Given: atomicrmw some_op iN* %addr, iN %incr ordering
1012277323Sdim  //
1013277323Sdim  // The standard expansion we produce is:
1014277323Sdim  //     [...]
1015277323Sdim  // atomicrmw.start:
1016277323Sdim  //     %loaded = @load.linked(%addr)
1017277323Sdim  //     %new = some_op iN %loaded, %incr
1018277323Sdim  //     %stored = @store_conditional(%new, %addr)
1019277323Sdim  //     %try_again = icmp i32 ne %stored, 0
1020277323Sdim  //     br i1 %try_again, label %loop, label %atomicrmw.end
1021277323Sdim  // atomicrmw.end:
1022277323Sdim  //     [...]
1023309124Sdim  BasicBlock *ExitBB =
1024309124Sdim      BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end");
1025277323Sdim  BasicBlock *LoopBB =  BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
1026277323Sdim
1027277323Sdim  // The split call above "helpfully" added a branch at the end of BB (to the
1028309124Sdim  // wrong place).
1029277323Sdim  std::prev(BB->end())->eraseFromParent();
1030277323Sdim  Builder.SetInsertPoint(BB);
1031277323Sdim  Builder.CreateBr(LoopBB);
1032277323Sdim
1033277323Sdim  // Start the main loop block now that we've taken care of the preliminaries.
1034277323Sdim  Builder.SetInsertPoint(LoopBB);
1035277323Sdim  Value *Loaded = TLI->emitLoadLinked(Builder, Addr, MemOpOrder);
1036277323Sdim
1037296417Sdim  Value *NewVal = PerformOp(Builder, Loaded);
1038277323Sdim
1039277323Sdim  Value *StoreSuccess =
1040277323Sdim      TLI->emitStoreConditional(Builder, NewVal, Addr, MemOpOrder);
1041277323Sdim  Value *TryAgain = Builder.CreateICmpNE(
1042277323Sdim      StoreSuccess, ConstantInt::get(IntegerType::get(Ctx, 32), 0), "tryagain");
1043277323Sdim  Builder.CreateCondBr(TryAgain, LoopBB, ExitBB);
1044277323Sdim
1045277323Sdim  Builder.SetInsertPoint(ExitBB, ExitBB->begin());
1046309124Sdim  return Loaded;
1047309124Sdim}
1048277323Sdim
1049309124Sdim/// Convert an atomic cmpxchg of a non-integral type to an integer cmpxchg of
1050309124Sdim/// the equivalent bitwidth.  We used to not support pointer cmpxchg in the
1051309124Sdim/// IR.  As a migration step, we convert back to what use to be the standard
1052309124Sdim/// way to represent a pointer cmpxchg so that we can update backends one by
1053341825Sdim/// one.
1054309124SdimAtomicCmpXchgInst *AtomicExpand::convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI) {
1055309124Sdim  auto *M = CI->getModule();
1056309124Sdim  Type *NewTy = getCorrespondingIntegerType(CI->getCompareOperand()->getType(),
1057309124Sdim                                            M->getDataLayout());
1058277323Sdim
1059309124Sdim  IRBuilder<> Builder(CI);
1060341825Sdim
1061309124Sdim  Value *Addr = CI->getPointerOperand();
1062309124Sdim  Type *PT = PointerType::get(NewTy,
1063309124Sdim                              Addr->getType()->getPointerAddressSpace());
1064309124Sdim  Value *NewAddr = Builder.CreateBitCast(Addr, PT);
1065309124Sdim
1066309124Sdim  Value *NewCmp = Builder.CreatePtrToInt(CI->getCompareOperand(), NewTy);
1067309124Sdim  Value *NewNewVal = Builder.CreatePtrToInt(CI->getNewValOperand(), NewTy);
1068341825Sdim
1069341825Sdim
1070309124Sdim  auto *NewCI = Builder.CreateAtomicCmpXchg(NewAddr, NewCmp, NewNewVal,
1071309124Sdim                                            CI->getSuccessOrdering(),
1072309124Sdim                                            CI->getFailureOrdering(),
1073321369Sdim                                            CI->getSyncScopeID());
1074309124Sdim  NewCI->setVolatile(CI->isVolatile());
1075309124Sdim  NewCI->setWeak(CI->isWeak());
1076341825Sdim  LLVM_DEBUG(dbgs() << "Replaced " << *CI << " with " << *NewCI << "\n");
1077309124Sdim
1078309124Sdim  Value *OldVal = Builder.CreateExtractValue(NewCI, 0);
1079309124Sdim  Value *Succ = Builder.CreateExtractValue(NewCI, 1);
1080309124Sdim
1081309124Sdim  OldVal = Builder.CreateIntToPtr(OldVal, CI->getCompareOperand()->getType());
1082309124Sdim
1083309124Sdim  Value *Res = UndefValue::get(CI->getType());
1084309124Sdim  Res = Builder.CreateInsertValue(Res, OldVal, 0);
1085309124Sdim  Res = Builder.CreateInsertValue(Res, Succ, 1);
1086309124Sdim
1087309124Sdim  CI->replaceAllUsesWith(Res);
1088309124Sdim  CI->eraseFromParent();
1089309124Sdim  return NewCI;
1090277323Sdim}
1091277323Sdim
1092277323Sdimbool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
1093277323Sdim  AtomicOrdering SuccessOrder = CI->getSuccessOrdering();
1094277323Sdim  AtomicOrdering FailureOrder = CI->getFailureOrdering();
1095277323Sdim  Value *Addr = CI->getPointerOperand();
1096277323Sdim  BasicBlock *BB = CI->getParent();
1097277323Sdim  Function *F = BB->getParent();
1098277323Sdim  LLVMContext &Ctx = F->getContext();
1099309124Sdim  // If shouldInsertFencesForAtomic() returns true, then the target does not
1100309124Sdim  // want to deal with memory orders, and emitLeading/TrailingFence should take
1101309124Sdim  // care of everything. Otherwise, emitLeading/TrailingFence are no-op and we
1102277323Sdim  // should preserve the ordering.
1103309124Sdim  bool ShouldInsertFencesForAtomic = TLI->shouldInsertFencesForAtomic(CI);
1104277323Sdim  AtomicOrdering MemOpOrder =
1105309124Sdim      ShouldInsertFencesForAtomic ? AtomicOrdering::Monotonic : SuccessOrder;
1106277323Sdim
1107309124Sdim  // In implementations which use a barrier to achieve release semantics, we can
1108309124Sdim  // delay emitting this barrier until we know a store is actually going to be
1109309124Sdim  // attempted. The cost of this delay is that we need 2 copies of the block
1110309124Sdim  // emitting the load-linked, affecting code size.
1111309124Sdim  //
1112309124Sdim  // Ideally, this logic would be unconditional except for the minsize check
1113309124Sdim  // since in other cases the extra blocks naturally collapse down to the
1114309124Sdim  // minimal loop. Unfortunately, this puts too much stress on later
1115309124Sdim  // optimisations so we avoid emitting the extra logic in those cases too.
1116309124Sdim  bool HasReleasedLoadBB = !CI->isWeak() && ShouldInsertFencesForAtomic &&
1117309124Sdim                           SuccessOrder != AtomicOrdering::Monotonic &&
1118309124Sdim                           SuccessOrder != AtomicOrdering::Acquire &&
1119353358Sdim                           !F->hasMinSize();
1120309124Sdim
1121309124Sdim  // There's no overhead for sinking the release barrier in a weak cmpxchg, so
1122309124Sdim  // do it even on minsize.
1123353358Sdim  bool UseUnconditionalReleaseBarrier = F->hasMinSize() && !CI->isWeak();
1124309124Sdim
1125277323Sdim  // Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord
1126277323Sdim  //
1127277323Sdim  // The full expansion we produce is:
1128277323Sdim  //     [...]
1129277323Sdim  // cmpxchg.start:
1130309124Sdim  //     %unreleasedload = @load.linked(%addr)
1131309124Sdim  //     %should_store = icmp eq %unreleasedload, %desired
1132309124Sdim  //     br i1 %should_store, label %cmpxchg.fencedstore,
1133296417Sdim  //                          label %cmpxchg.nostore
1134309124Sdim  // cmpxchg.releasingstore:
1135309124Sdim  //     fence?
1136309124Sdim  //     br label cmpxchg.trystore
1137277323Sdim  // cmpxchg.trystore:
1138309124Sdim  //     %loaded.trystore = phi [%unreleasedload, %releasingstore],
1139309124Sdim  //                            [%releasedload, %cmpxchg.releasedload]
1140277323Sdim  //     %stored = @store_conditional(%new, %addr)
1141277323Sdim  //     %success = icmp eq i32 %stored, 0
1142309124Sdim  //     br i1 %success, label %cmpxchg.success,
1143309124Sdim  //                     label %cmpxchg.releasedload/%cmpxchg.failure
1144309124Sdim  // cmpxchg.releasedload:
1145309124Sdim  //     %releasedload = @load.linked(%addr)
1146309124Sdim  //     %should_store = icmp eq %releasedload, %desired
1147309124Sdim  //     br i1 %should_store, label %cmpxchg.trystore,
1148309124Sdim  //                          label %cmpxchg.failure
1149277323Sdim  // cmpxchg.success:
1150277323Sdim  //     fence?
1151277323Sdim  //     br label %cmpxchg.end
1152296417Sdim  // cmpxchg.nostore:
1153309124Sdim  //     %loaded.nostore = phi [%unreleasedload, %cmpxchg.start],
1154309124Sdim  //                           [%releasedload,
1155309124Sdim  //                               %cmpxchg.releasedload/%cmpxchg.trystore]
1156296417Sdim  //     @load_linked_fail_balance()?
1157296417Sdim  //     br label %cmpxchg.failure
1158277323Sdim  // cmpxchg.failure:
1159277323Sdim  //     fence?
1160277323Sdim  //     br label %cmpxchg.end
1161277323Sdim  // cmpxchg.end:
1162309124Sdim  //     %loaded = phi [%loaded.nostore, %cmpxchg.failure],
1163309124Sdim  //                   [%loaded.trystore, %cmpxchg.trystore]
1164277323Sdim  //     %success = phi i1 [true, %cmpxchg.success], [false, %cmpxchg.failure]
1165277323Sdim  //     %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0
1166277323Sdim  //     %res = insertvalue { iN, i1 } %restmp, i1 %success, 1
1167277323Sdim  //     [...]
1168296417Sdim  BasicBlock *ExitBB = BB->splitBasicBlock(CI->getIterator(), "cmpxchg.end");
1169277323Sdim  auto FailureBB = BasicBlock::Create(Ctx, "cmpxchg.failure", F, ExitBB);
1170296417Sdim  auto NoStoreBB = BasicBlock::Create(Ctx, "cmpxchg.nostore", F, FailureBB);
1171296417Sdim  auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, NoStoreBB);
1172309124Sdim  auto ReleasedLoadBB =
1173309124Sdim      BasicBlock::Create(Ctx, "cmpxchg.releasedload", F, SuccessBB);
1174309124Sdim  auto TryStoreBB =
1175309124Sdim      BasicBlock::Create(Ctx, "cmpxchg.trystore", F, ReleasedLoadBB);
1176309124Sdim  auto ReleasingStoreBB =
1177309124Sdim      BasicBlock::Create(Ctx, "cmpxchg.fencedstore", F, TryStoreBB);
1178309124Sdim  auto StartBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, ReleasingStoreBB);
1179277323Sdim
1180277323Sdim  // This grabs the DebugLoc from CI
1181277323Sdim  IRBuilder<> Builder(CI);
1182277323Sdim
1183277323Sdim  // The split call above "helpfully" added a branch at the end of BB (to the
1184277323Sdim  // wrong place), but we might want a fence too. It's easiest to just remove
1185277323Sdim  // the branch entirely.
1186277323Sdim  std::prev(BB->end())->eraseFromParent();
1187277323Sdim  Builder.SetInsertPoint(BB);
1188309124Sdim  if (ShouldInsertFencesForAtomic && UseUnconditionalReleaseBarrier)
1189321369Sdim    TLI->emitLeadingFence(Builder, CI, SuccessOrder);
1190309124Sdim  Builder.CreateBr(StartBB);
1191277323Sdim
1192277323Sdim  // Start the main loop block now that we've taken care of the preliminaries.
1193309124Sdim  Builder.SetInsertPoint(StartBB);
1194309124Sdim  Value *UnreleasedLoad = TLI->emitLoadLinked(Builder, Addr, MemOpOrder);
1195309124Sdim  Value *ShouldStore = Builder.CreateICmpEQ(
1196309124Sdim      UnreleasedLoad, CI->getCompareOperand(), "should_store");
1197277323Sdim
1198288943Sdim  // If the cmpxchg doesn't actually need any ordering when it fails, we can
1199277323Sdim  // jump straight past that fence instruction (if it exists).
1200309124Sdim  Builder.CreateCondBr(ShouldStore, ReleasingStoreBB, NoStoreBB);
1201277323Sdim
1202309124Sdim  Builder.SetInsertPoint(ReleasingStoreBB);
1203309124Sdim  if (ShouldInsertFencesForAtomic && !UseUnconditionalReleaseBarrier)
1204321369Sdim    TLI->emitLeadingFence(Builder, CI, SuccessOrder);
1205309124Sdim  Builder.CreateBr(TryStoreBB);
1206309124Sdim
1207277323Sdim  Builder.SetInsertPoint(TryStoreBB);
1208277323Sdim  Value *StoreSuccess = TLI->emitStoreConditional(
1209277323Sdim      Builder, CI->getNewValOperand(), Addr, MemOpOrder);
1210277323Sdim  StoreSuccess = Builder.CreateICmpEQ(
1211277323Sdim      StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success");
1212309124Sdim  BasicBlock *RetryBB = HasReleasedLoadBB ? ReleasedLoadBB : StartBB;
1213277323Sdim  Builder.CreateCondBr(StoreSuccess, SuccessBB,
1214309124Sdim                       CI->isWeak() ? FailureBB : RetryBB);
1215277323Sdim
1216309124Sdim  Builder.SetInsertPoint(ReleasedLoadBB);
1217309124Sdim  Value *SecondLoad;
1218309124Sdim  if (HasReleasedLoadBB) {
1219309124Sdim    SecondLoad = TLI->emitLoadLinked(Builder, Addr, MemOpOrder);
1220309124Sdim    ShouldStore = Builder.CreateICmpEQ(SecondLoad, CI->getCompareOperand(),
1221309124Sdim                                       "should_store");
1222309124Sdim
1223309124Sdim    // If the cmpxchg doesn't actually need any ordering when it fails, we can
1224309124Sdim    // jump straight past that fence instruction (if it exists).
1225309124Sdim    Builder.CreateCondBr(ShouldStore, TryStoreBB, NoStoreBB);
1226309124Sdim  } else
1227309124Sdim    Builder.CreateUnreachable();
1228309124Sdim
1229309124Sdim  // Make sure later instructions don't get reordered with a fence if
1230309124Sdim  // necessary.
1231277323Sdim  Builder.SetInsertPoint(SuccessBB);
1232309124Sdim  if (ShouldInsertFencesForAtomic)
1233321369Sdim    TLI->emitTrailingFence(Builder, CI, SuccessOrder);
1234277323Sdim  Builder.CreateBr(ExitBB);
1235277323Sdim
1236296417Sdim  Builder.SetInsertPoint(NoStoreBB);
1237296417Sdim  // In the failing case, where we don't execute the store-conditional, the
1238296417Sdim  // target might want to balance out the load-linked with a dedicated
1239296417Sdim  // instruction (e.g., on ARM, clearing the exclusive monitor).
1240296417Sdim  TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder);
1241296417Sdim  Builder.CreateBr(FailureBB);
1242296417Sdim
1243277323Sdim  Builder.SetInsertPoint(FailureBB);
1244309124Sdim  if (ShouldInsertFencesForAtomic)
1245321369Sdim    TLI->emitTrailingFence(Builder, CI, FailureOrder);
1246277323Sdim  Builder.CreateBr(ExitBB);
1247277323Sdim
1248277323Sdim  // Finally, we have control-flow based knowledge of whether the cmpxchg
1249277323Sdim  // succeeded or not. We expose this to later passes by converting any
1250309124Sdim  // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate
1251309124Sdim  // PHI.
1252277323Sdim  Builder.SetInsertPoint(ExitBB, ExitBB->begin());
1253277323Sdim  PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2);
1254277323Sdim  Success->addIncoming(ConstantInt::getTrue(Ctx), SuccessBB);
1255277323Sdim  Success->addIncoming(ConstantInt::getFalse(Ctx), FailureBB);
1256277323Sdim
1257309124Sdim  // Setup the builder so we can create any PHIs we need.
1258309124Sdim  Value *Loaded;
1259309124Sdim  if (!HasReleasedLoadBB)
1260309124Sdim    Loaded = UnreleasedLoad;
1261309124Sdim  else {
1262309124Sdim    Builder.SetInsertPoint(TryStoreBB, TryStoreBB->begin());
1263309124Sdim    PHINode *TryStoreLoaded = Builder.CreatePHI(UnreleasedLoad->getType(), 2);
1264309124Sdim    TryStoreLoaded->addIncoming(UnreleasedLoad, ReleasingStoreBB);
1265309124Sdim    TryStoreLoaded->addIncoming(SecondLoad, ReleasedLoadBB);
1266309124Sdim
1267309124Sdim    Builder.SetInsertPoint(NoStoreBB, NoStoreBB->begin());
1268309124Sdim    PHINode *NoStoreLoaded = Builder.CreatePHI(UnreleasedLoad->getType(), 2);
1269309124Sdim    NoStoreLoaded->addIncoming(UnreleasedLoad, StartBB);
1270309124Sdim    NoStoreLoaded->addIncoming(SecondLoad, ReleasedLoadBB);
1271309124Sdim
1272309124Sdim    Builder.SetInsertPoint(ExitBB, ++ExitBB->begin());
1273309124Sdim    PHINode *ExitLoaded = Builder.CreatePHI(UnreleasedLoad->getType(), 2);
1274309124Sdim    ExitLoaded->addIncoming(TryStoreLoaded, SuccessBB);
1275309124Sdim    ExitLoaded->addIncoming(NoStoreLoaded, FailureBB);
1276309124Sdim
1277309124Sdim    Loaded = ExitLoaded;
1278309124Sdim  }
1279309124Sdim
1280277323Sdim  // Look for any users of the cmpxchg that are just comparing the loaded value
1281277323Sdim  // against the desired one, and replace them with the CFG-derived version.
1282277323Sdim  SmallVector<ExtractValueInst *, 2> PrunedInsts;
1283277323Sdim  for (auto User : CI->users()) {
1284277323Sdim    ExtractValueInst *EV = dyn_cast<ExtractValueInst>(User);
1285277323Sdim    if (!EV)
1286277323Sdim      continue;
1287277323Sdim
1288277323Sdim    assert(EV->getNumIndices() == 1 && EV->getIndices()[0] <= 1 &&
1289277323Sdim           "weird extraction from { iN, i1 }");
1290277323Sdim
1291277323Sdim    if (EV->getIndices()[0] == 0)
1292277323Sdim      EV->replaceAllUsesWith(Loaded);
1293277323Sdim    else
1294277323Sdim      EV->replaceAllUsesWith(Success);
1295277323Sdim
1296277323Sdim    PrunedInsts.push_back(EV);
1297277323Sdim  }
1298277323Sdim
1299277323Sdim  // We can remove the instructions now we're no longer iterating through them.
1300277323Sdim  for (auto EV : PrunedInsts)
1301277323Sdim    EV->eraseFromParent();
1302277323Sdim
1303277323Sdim  if (!CI->use_empty()) {
1304277323Sdim    // Some use of the full struct return that we don't understand has happened,
1305277323Sdim    // so we've got to reconstruct it properly.
1306277323Sdim    Value *Res;
1307277323Sdim    Res = Builder.CreateInsertValue(UndefValue::get(CI->getType()), Loaded, 0);
1308277323Sdim    Res = Builder.CreateInsertValue(Res, Success, 1);
1309277323Sdim
1310277323Sdim    CI->replaceAllUsesWith(Res);
1311277323Sdim  }
1312277323Sdim
1313277323Sdim  CI->eraseFromParent();
1314277323Sdim  return true;
1315277323Sdim}
1316277323Sdim
1317277323Sdimbool AtomicExpand::isIdempotentRMW(AtomicRMWInst* RMWI) {
1318277323Sdim  auto C = dyn_cast<ConstantInt>(RMWI->getValOperand());
1319277323Sdim  if(!C)
1320277323Sdim    return false;
1321277323Sdim
1322277323Sdim  AtomicRMWInst::BinOp Op = RMWI->getOperation();
1323277323Sdim  switch(Op) {
1324277323Sdim    case AtomicRMWInst::Add:
1325277323Sdim    case AtomicRMWInst::Sub:
1326277323Sdim    case AtomicRMWInst::Or:
1327277323Sdim    case AtomicRMWInst::Xor:
1328277323Sdim      return C->isZero();
1329277323Sdim    case AtomicRMWInst::And:
1330277323Sdim      return C->isMinusOne();
1331277323Sdim    // FIXME: we could also treat Min/Max/UMin/UMax by the INT_MIN/INT_MAX/...
1332277323Sdim    default:
1333277323Sdim      return false;
1334277323Sdim  }
1335277323Sdim}
1336277323Sdim
1337277323Sdimbool AtomicExpand::simplifyIdempotentRMW(AtomicRMWInst* RMWI) {
1338277323Sdim  if (auto ResultingLoad = TLI->lowerIdempotentRMWIntoFencedLoad(RMWI)) {
1339296417Sdim    tryExpandAtomicLoad(ResultingLoad);
1340277323Sdim    return true;
1341277323Sdim  }
1342277323Sdim  return false;
1343277323Sdim}
1344296417Sdim
1345309124SdimValue *AtomicExpand::insertRMWCmpXchgLoop(
1346309124Sdim    IRBuilder<> &Builder, Type *ResultTy, Value *Addr,
1347309124Sdim    AtomicOrdering MemOpOrder,
1348309124Sdim    function_ref<Value *(IRBuilder<> &, Value *)> PerformOp,
1349309124Sdim    CreateCmpXchgInstFun CreateCmpXchg) {
1350309124Sdim  LLVMContext &Ctx = Builder.getContext();
1351309124Sdim  BasicBlock *BB = Builder.GetInsertBlock();
1352296417Sdim  Function *F = BB->getParent();
1353296417Sdim
1354296417Sdim  // Given: atomicrmw some_op iN* %addr, iN %incr ordering
1355296417Sdim  //
1356296417Sdim  // The standard expansion we produce is:
1357296417Sdim  //     [...]
1358296417Sdim  //     %init_loaded = load atomic iN* %addr
1359296417Sdim  //     br label %loop
1360296417Sdim  // loop:
1361296417Sdim  //     %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ]
1362296417Sdim  //     %new = some_op iN %loaded, %incr
1363296417Sdim  //     %pair = cmpxchg iN* %addr, iN %loaded, iN %new
1364296417Sdim  //     %new_loaded = extractvalue { iN, i1 } %pair, 0
1365296417Sdim  //     %success = extractvalue { iN, i1 } %pair, 1
1366296417Sdim  //     br i1 %success, label %atomicrmw.end, label %loop
1367296417Sdim  // atomicrmw.end:
1368296417Sdim  //     [...]
1369309124Sdim  BasicBlock *ExitBB =
1370309124Sdim      BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end");
1371296417Sdim  BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
1372296417Sdim
1373296417Sdim  // The split call above "helpfully" added a branch at the end of BB (to the
1374296417Sdim  // wrong place), but we want a load. It's easiest to just remove
1375296417Sdim  // the branch entirely.
1376296417Sdim  std::prev(BB->end())->eraseFromParent();
1377296417Sdim  Builder.SetInsertPoint(BB);
1378309124Sdim  LoadInst *InitLoaded = Builder.CreateLoad(ResultTy, Addr);
1379296417Sdim  // Atomics require at least natural alignment.
1380360784Sdim  InitLoaded->setAlignment(MaybeAlign(ResultTy->getPrimitiveSizeInBits() / 8));
1381296417Sdim  Builder.CreateBr(LoopBB);
1382296417Sdim
1383296417Sdim  // Start the main loop block now that we've taken care of the preliminaries.
1384296417Sdim  Builder.SetInsertPoint(LoopBB);
1385309124Sdim  PHINode *Loaded = Builder.CreatePHI(ResultTy, 2, "loaded");
1386296417Sdim  Loaded->addIncoming(InitLoaded, BB);
1387296417Sdim
1388309124Sdim  Value *NewVal = PerformOp(Builder, Loaded);
1389296417Sdim
1390296417Sdim  Value *NewLoaded = nullptr;
1391296417Sdim  Value *Success = nullptr;
1392296417Sdim
1393309124Sdim  CreateCmpXchg(Builder, Addr, Loaded, NewVal,
1394309124Sdim                MemOpOrder == AtomicOrdering::Unordered
1395309124Sdim                    ? AtomicOrdering::Monotonic
1396309124Sdim                    : MemOpOrder,
1397296417Sdim                Success, NewLoaded);
1398296417Sdim  assert(Success && NewLoaded);
1399296417Sdim
1400296417Sdim  Loaded->addIncoming(NewLoaded, LoopBB);
1401296417Sdim
1402296417Sdim  Builder.CreateCondBr(Success, ExitBB, LoopBB);
1403296417Sdim
1404296417Sdim  Builder.SetInsertPoint(ExitBB, ExitBB->begin());
1405309124Sdim  return NewLoaded;
1406309124Sdim}
1407296417Sdim
1408344779Sdimbool AtomicExpand::tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
1409344779Sdim  unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
1410344779Sdim  unsigned ValueSize = getAtomicOpSize(CI);
1411344779Sdim
1412344779Sdim  switch (TLI->shouldExpandAtomicCmpXchgInIR(CI)) {
1413344779Sdim  default:
1414344779Sdim    llvm_unreachable("Unhandled case in tryExpandAtomicCmpXchg");
1415344779Sdim  case TargetLoweringBase::AtomicExpansionKind::None:
1416344779Sdim    if (ValueSize < MinCASSize)
1417344779Sdim      expandPartwordCmpXchg(CI);
1418344779Sdim    return false;
1419344779Sdim  case TargetLoweringBase::AtomicExpansionKind::LLSC: {
1420344779Sdim    assert(ValueSize >= MinCASSize &&
1421344779Sdim           "MinCmpXchgSizeInBits not yet supported for LL/SC expansions.");
1422344779Sdim    return expandAtomicCmpXchg(CI);
1423344779Sdim  }
1424344779Sdim  case TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic:
1425344779Sdim    expandAtomicCmpXchgToMaskedIntrinsic(CI);
1426344779Sdim    return true;
1427344779Sdim  }
1428344779Sdim}
1429344779Sdim
1430309124Sdim// Note: This function is exposed externally by AtomicExpandUtils.h
1431309124Sdimbool llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI,
1432309124Sdim                                    CreateCmpXchgInstFun CreateCmpXchg) {
1433309124Sdim  IRBuilder<> Builder(AI);
1434309124Sdim  Value *Loaded = AtomicExpand::insertRMWCmpXchgLoop(
1435309124Sdim      Builder, AI->getType(), AI->getPointerOperand(), AI->getOrdering(),
1436309124Sdim      [&](IRBuilder<> &Builder, Value *Loaded) {
1437309124Sdim        return performAtomicOp(AI->getOperation(), Builder, Loaded,
1438309124Sdim                               AI->getValOperand());
1439309124Sdim      },
1440309124Sdim      CreateCmpXchg);
1441309124Sdim
1442309124Sdim  AI->replaceAllUsesWith(Loaded);
1443296417Sdim  AI->eraseFromParent();
1444309124Sdim  return true;
1445309124Sdim}
1446296417Sdim
1447309124Sdim// In order to use one of the sized library calls such as
1448309124Sdim// __atomic_fetch_add_4, the alignment must be sufficient, the size
1449309124Sdim// must be one of the potentially-specialized sizes, and the value
1450309124Sdim// type must actually exist in C on the target (otherwise, the
1451309124Sdim// function wouldn't actually be defined.)
1452309124Sdimstatic bool canUseSizedAtomicCall(unsigned Size, unsigned Align,
1453309124Sdim                                  const DataLayout &DL) {
1454309124Sdim  // TODO: "LargestSize" is an approximation for "largest type that
1455309124Sdim  // you can express in C". It seems to be the case that int128 is
1456309124Sdim  // supported on all 64-bit platforms, otherwise only up to 64-bit
1457309124Sdim  // integers are supported. If we get this wrong, then we'll try to
1458309124Sdim  // call a sized libcall that doesn't actually exist. There should
1459309124Sdim  // really be some more reliable way in LLVM of determining integer
1460309124Sdim  // sizes which are valid in the target's C ABI...
1461309124Sdim  unsigned LargestSize = DL.getLargestLegalIntTypeSizeInBits() >= 64 ? 16 : 8;
1462309124Sdim  return Align >= Size &&
1463309124Sdim         (Size == 1 || Size == 2 || Size == 4 || Size == 8 || Size == 16) &&
1464309124Sdim         Size <= LargestSize;
1465309124Sdim}
1466309124Sdim
1467309124Sdimvoid AtomicExpand::expandAtomicLoadToLibcall(LoadInst *I) {
1468309124Sdim  static const RTLIB::Libcall Libcalls[6] = {
1469309124Sdim      RTLIB::ATOMIC_LOAD,   RTLIB::ATOMIC_LOAD_1, RTLIB::ATOMIC_LOAD_2,
1470309124Sdim      RTLIB::ATOMIC_LOAD_4, RTLIB::ATOMIC_LOAD_8, RTLIB::ATOMIC_LOAD_16};
1471309124Sdim  unsigned Size = getAtomicOpSize(I);
1472309124Sdim  unsigned Align = getAtomicOpAlign(I);
1473309124Sdim
1474309124Sdim  bool expanded = expandAtomicOpToLibcall(
1475309124Sdim      I, Size, Align, I->getPointerOperand(), nullptr, nullptr,
1476309124Sdim      I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1477309124Sdim  (void)expanded;
1478309124Sdim  assert(expanded && "expandAtomicOpToLibcall shouldn't fail tor Load");
1479309124Sdim}
1480309124Sdim
1481309124Sdimvoid AtomicExpand::expandAtomicStoreToLibcall(StoreInst *I) {
1482309124Sdim  static const RTLIB::Libcall Libcalls[6] = {
1483309124Sdim      RTLIB::ATOMIC_STORE,   RTLIB::ATOMIC_STORE_1, RTLIB::ATOMIC_STORE_2,
1484309124Sdim      RTLIB::ATOMIC_STORE_4, RTLIB::ATOMIC_STORE_8, RTLIB::ATOMIC_STORE_16};
1485309124Sdim  unsigned Size = getAtomicOpSize(I);
1486309124Sdim  unsigned Align = getAtomicOpAlign(I);
1487309124Sdim
1488309124Sdim  bool expanded = expandAtomicOpToLibcall(
1489309124Sdim      I, Size, Align, I->getPointerOperand(), I->getValueOperand(), nullptr,
1490309124Sdim      I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1491309124Sdim  (void)expanded;
1492309124Sdim  assert(expanded && "expandAtomicOpToLibcall shouldn't fail tor Store");
1493309124Sdim}
1494309124Sdim
1495309124Sdimvoid AtomicExpand::expandAtomicCASToLibcall(AtomicCmpXchgInst *I) {
1496309124Sdim  static const RTLIB::Libcall Libcalls[6] = {
1497309124Sdim      RTLIB::ATOMIC_COMPARE_EXCHANGE,   RTLIB::ATOMIC_COMPARE_EXCHANGE_1,
1498309124Sdim      RTLIB::ATOMIC_COMPARE_EXCHANGE_2, RTLIB::ATOMIC_COMPARE_EXCHANGE_4,
1499309124Sdim      RTLIB::ATOMIC_COMPARE_EXCHANGE_8, RTLIB::ATOMIC_COMPARE_EXCHANGE_16};
1500309124Sdim  unsigned Size = getAtomicOpSize(I);
1501309124Sdim  unsigned Align = getAtomicOpAlign(I);
1502309124Sdim
1503309124Sdim  bool expanded = expandAtomicOpToLibcall(
1504309124Sdim      I, Size, Align, I->getPointerOperand(), I->getNewValOperand(),
1505309124Sdim      I->getCompareOperand(), I->getSuccessOrdering(), I->getFailureOrdering(),
1506309124Sdim      Libcalls);
1507309124Sdim  (void)expanded;
1508309124Sdim  assert(expanded && "expandAtomicOpToLibcall shouldn't fail tor CAS");
1509309124Sdim}
1510309124Sdim
1511309124Sdimstatic ArrayRef<RTLIB::Libcall> GetRMWLibcall(AtomicRMWInst::BinOp Op) {
1512309124Sdim  static const RTLIB::Libcall LibcallsXchg[6] = {
1513309124Sdim      RTLIB::ATOMIC_EXCHANGE,   RTLIB::ATOMIC_EXCHANGE_1,
1514309124Sdim      RTLIB::ATOMIC_EXCHANGE_2, RTLIB::ATOMIC_EXCHANGE_4,
1515309124Sdim      RTLIB::ATOMIC_EXCHANGE_8, RTLIB::ATOMIC_EXCHANGE_16};
1516309124Sdim  static const RTLIB::Libcall LibcallsAdd[6] = {
1517309124Sdim      RTLIB::UNKNOWN_LIBCALL,    RTLIB::ATOMIC_FETCH_ADD_1,
1518309124Sdim      RTLIB::ATOMIC_FETCH_ADD_2, RTLIB::ATOMIC_FETCH_ADD_4,
1519309124Sdim      RTLIB::ATOMIC_FETCH_ADD_8, RTLIB::ATOMIC_FETCH_ADD_16};
1520309124Sdim  static const RTLIB::Libcall LibcallsSub[6] = {
1521309124Sdim      RTLIB::UNKNOWN_LIBCALL,    RTLIB::ATOMIC_FETCH_SUB_1,
1522309124Sdim      RTLIB::ATOMIC_FETCH_SUB_2, RTLIB::ATOMIC_FETCH_SUB_4,
1523309124Sdim      RTLIB::ATOMIC_FETCH_SUB_8, RTLIB::ATOMIC_FETCH_SUB_16};
1524309124Sdim  static const RTLIB::Libcall LibcallsAnd[6] = {
1525309124Sdim      RTLIB::UNKNOWN_LIBCALL,    RTLIB::ATOMIC_FETCH_AND_1,
1526309124Sdim      RTLIB::ATOMIC_FETCH_AND_2, RTLIB::ATOMIC_FETCH_AND_4,
1527309124Sdim      RTLIB::ATOMIC_FETCH_AND_8, RTLIB::ATOMIC_FETCH_AND_16};
1528309124Sdim  static const RTLIB::Libcall LibcallsOr[6] = {
1529309124Sdim      RTLIB::UNKNOWN_LIBCALL,   RTLIB::ATOMIC_FETCH_OR_1,
1530309124Sdim      RTLIB::ATOMIC_FETCH_OR_2, RTLIB::ATOMIC_FETCH_OR_4,
1531309124Sdim      RTLIB::ATOMIC_FETCH_OR_8, RTLIB::ATOMIC_FETCH_OR_16};
1532309124Sdim  static const RTLIB::Libcall LibcallsXor[6] = {
1533309124Sdim      RTLIB::UNKNOWN_LIBCALL,    RTLIB::ATOMIC_FETCH_XOR_1,
1534309124Sdim      RTLIB::ATOMIC_FETCH_XOR_2, RTLIB::ATOMIC_FETCH_XOR_4,
1535309124Sdim      RTLIB::ATOMIC_FETCH_XOR_8, RTLIB::ATOMIC_FETCH_XOR_16};
1536309124Sdim  static const RTLIB::Libcall LibcallsNand[6] = {
1537309124Sdim      RTLIB::UNKNOWN_LIBCALL,     RTLIB::ATOMIC_FETCH_NAND_1,
1538309124Sdim      RTLIB::ATOMIC_FETCH_NAND_2, RTLIB::ATOMIC_FETCH_NAND_4,
1539309124Sdim      RTLIB::ATOMIC_FETCH_NAND_8, RTLIB::ATOMIC_FETCH_NAND_16};
1540309124Sdim
1541309124Sdim  switch (Op) {
1542309124Sdim  case AtomicRMWInst::BAD_BINOP:
1543309124Sdim    llvm_unreachable("Should not have BAD_BINOP.");
1544309124Sdim  case AtomicRMWInst::Xchg:
1545309124Sdim    return makeArrayRef(LibcallsXchg);
1546309124Sdim  case AtomicRMWInst::Add:
1547309124Sdim    return makeArrayRef(LibcallsAdd);
1548309124Sdim  case AtomicRMWInst::Sub:
1549309124Sdim    return makeArrayRef(LibcallsSub);
1550309124Sdim  case AtomicRMWInst::And:
1551309124Sdim    return makeArrayRef(LibcallsAnd);
1552309124Sdim  case AtomicRMWInst::Or:
1553309124Sdim    return makeArrayRef(LibcallsOr);
1554309124Sdim  case AtomicRMWInst::Xor:
1555309124Sdim    return makeArrayRef(LibcallsXor);
1556309124Sdim  case AtomicRMWInst::Nand:
1557309124Sdim    return makeArrayRef(LibcallsNand);
1558309124Sdim  case AtomicRMWInst::Max:
1559309124Sdim  case AtomicRMWInst::Min:
1560309124Sdim  case AtomicRMWInst::UMax:
1561309124Sdim  case AtomicRMWInst::UMin:
1562353358Sdim  case AtomicRMWInst::FAdd:
1563353358Sdim  case AtomicRMWInst::FSub:
1564309124Sdim    // No atomic libcalls are available for max/min/umax/umin.
1565309124Sdim    return {};
1566309124Sdim  }
1567309124Sdim  llvm_unreachable("Unexpected AtomicRMW operation.");
1568309124Sdim}
1569309124Sdim
1570309124Sdimvoid AtomicExpand::expandAtomicRMWToLibcall(AtomicRMWInst *I) {
1571309124Sdim  ArrayRef<RTLIB::Libcall> Libcalls = GetRMWLibcall(I->getOperation());
1572309124Sdim
1573309124Sdim  unsigned Size = getAtomicOpSize(I);
1574309124Sdim  unsigned Align = getAtomicOpAlign(I);
1575309124Sdim
1576309124Sdim  bool Success = false;
1577309124Sdim  if (!Libcalls.empty())
1578309124Sdim    Success = expandAtomicOpToLibcall(
1579309124Sdim        I, Size, Align, I->getPointerOperand(), I->getValOperand(), nullptr,
1580309124Sdim        I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1581309124Sdim
1582309124Sdim  // The expansion failed: either there were no libcalls at all for
1583309124Sdim  // the operation (min/max), or there were only size-specialized
1584309124Sdim  // libcalls (add/sub/etc) and we needed a generic. So, expand to a
1585309124Sdim  // CAS libcall, via a CAS loop, instead.
1586309124Sdim  if (!Success) {
1587309124Sdim    expandAtomicRMWToCmpXchg(I, [this](IRBuilder<> &Builder, Value *Addr,
1588309124Sdim                                       Value *Loaded, Value *NewVal,
1589309124Sdim                                       AtomicOrdering MemOpOrder,
1590309124Sdim                                       Value *&Success, Value *&NewLoaded) {
1591309124Sdim      // Create the CAS instruction normally...
1592309124Sdim      AtomicCmpXchgInst *Pair = Builder.CreateAtomicCmpXchg(
1593309124Sdim          Addr, Loaded, NewVal, MemOpOrder,
1594309124Sdim          AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder));
1595309124Sdim      Success = Builder.CreateExtractValue(Pair, 1, "success");
1596309124Sdim      NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
1597309124Sdim
1598309124Sdim      // ...and then expand the CAS into a libcall.
1599309124Sdim      expandAtomicCASToLibcall(Pair);
1600309124Sdim    });
1601309124Sdim  }
1602309124Sdim}
1603309124Sdim
1604309124Sdim// A helper routine for the above expandAtomic*ToLibcall functions.
1605309124Sdim//
1606309124Sdim// 'Libcalls' contains an array of enum values for the particular
1607309124Sdim// ATOMIC libcalls to be emitted. All of the other arguments besides
1608309124Sdim// 'I' are extracted from the Instruction subclass by the
1609309124Sdim// caller. Depending on the particular call, some will be null.
1610309124Sdimbool AtomicExpand::expandAtomicOpToLibcall(
1611309124Sdim    Instruction *I, unsigned Size, unsigned Align, Value *PointerOperand,
1612309124Sdim    Value *ValueOperand, Value *CASExpected, AtomicOrdering Ordering,
1613309124Sdim    AtomicOrdering Ordering2, ArrayRef<RTLIB::Libcall> Libcalls) {
1614309124Sdim  assert(Libcalls.size() == 6);
1615309124Sdim
1616309124Sdim  LLVMContext &Ctx = I->getContext();
1617309124Sdim  Module *M = I->getModule();
1618309124Sdim  const DataLayout &DL = M->getDataLayout();
1619309124Sdim  IRBuilder<> Builder(I);
1620309124Sdim  IRBuilder<> AllocaBuilder(&I->getFunction()->getEntryBlock().front());
1621309124Sdim
1622309124Sdim  bool UseSizedLibcall = canUseSizedAtomicCall(Size, Align, DL);
1623309124Sdim  Type *SizedIntTy = Type::getIntNTy(Ctx, Size * 8);
1624309124Sdim
1625309124Sdim  unsigned AllocaAlignment = DL.getPrefTypeAlignment(SizedIntTy);
1626309124Sdim
1627309124Sdim  // TODO: the "order" argument type is "int", not int32. So
1628309124Sdim  // getInt32Ty may be wrong if the arch uses e.g. 16-bit ints.
1629309124Sdim  ConstantInt *SizeVal64 = ConstantInt::get(Type::getInt64Ty(Ctx), Size);
1630309124Sdim  assert(Ordering != AtomicOrdering::NotAtomic && "expect atomic MO");
1631309124Sdim  Constant *OrderingVal =
1632309124Sdim      ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering));
1633309124Sdim  Constant *Ordering2Val = nullptr;
1634309124Sdim  if (CASExpected) {
1635309124Sdim    assert(Ordering2 != AtomicOrdering::NotAtomic && "expect atomic MO");
1636309124Sdim    Ordering2Val =
1637309124Sdim        ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering2));
1638309124Sdim  }
1639309124Sdim  bool HasResult = I->getType() != Type::getVoidTy(Ctx);
1640309124Sdim
1641309124Sdim  RTLIB::Libcall RTLibType;
1642309124Sdim  if (UseSizedLibcall) {
1643309124Sdim    switch (Size) {
1644309124Sdim    case 1: RTLibType = Libcalls[1]; break;
1645309124Sdim    case 2: RTLibType = Libcalls[2]; break;
1646309124Sdim    case 4: RTLibType = Libcalls[3]; break;
1647309124Sdim    case 8: RTLibType = Libcalls[4]; break;
1648309124Sdim    case 16: RTLibType = Libcalls[5]; break;
1649309124Sdim    }
1650309124Sdim  } else if (Libcalls[0] != RTLIB::UNKNOWN_LIBCALL) {
1651309124Sdim    RTLibType = Libcalls[0];
1652309124Sdim  } else {
1653309124Sdim    // Can't use sized function, and there's no generic for this
1654309124Sdim    // operation, so give up.
1655309124Sdim    return false;
1656309124Sdim  }
1657309124Sdim
1658309124Sdim  // Build up the function call. There's two kinds. First, the sized
1659309124Sdim  // variants.  These calls are going to be one of the following (with
1660309124Sdim  // N=1,2,4,8,16):
1661309124Sdim  //  iN    __atomic_load_N(iN *ptr, int ordering)
1662309124Sdim  //  void  __atomic_store_N(iN *ptr, iN val, int ordering)
1663309124Sdim  //  iN    __atomic_{exchange|fetch_*}_N(iN *ptr, iN val, int ordering)
1664309124Sdim  //  bool  __atomic_compare_exchange_N(iN *ptr, iN *expected, iN desired,
1665309124Sdim  //                                    int success_order, int failure_order)
1666309124Sdim  //
1667309124Sdim  // Note that these functions can be used for non-integer atomic
1668309124Sdim  // operations, the values just need to be bitcast to integers on the
1669309124Sdim  // way in and out.
1670309124Sdim  //
1671309124Sdim  // And, then, the generic variants. They look like the following:
1672309124Sdim  //  void  __atomic_load(size_t size, void *ptr, void *ret, int ordering)
1673309124Sdim  //  void  __atomic_store(size_t size, void *ptr, void *val, int ordering)
1674309124Sdim  //  void  __atomic_exchange(size_t size, void *ptr, void *val, void *ret,
1675309124Sdim  //                          int ordering)
1676309124Sdim  //  bool  __atomic_compare_exchange(size_t size, void *ptr, void *expected,
1677309124Sdim  //                                  void *desired, int success_order,
1678309124Sdim  //                                  int failure_order)
1679309124Sdim  //
1680309124Sdim  // The different signatures are built up depending on the
1681309124Sdim  // 'UseSizedLibcall', 'CASExpected', 'ValueOperand', and 'HasResult'
1682309124Sdim  // variables.
1683309124Sdim
1684309124Sdim  AllocaInst *AllocaCASExpected = nullptr;
1685309124Sdim  Value *AllocaCASExpected_i8 = nullptr;
1686309124Sdim  AllocaInst *AllocaValue = nullptr;
1687309124Sdim  Value *AllocaValue_i8 = nullptr;
1688309124Sdim  AllocaInst *AllocaResult = nullptr;
1689309124Sdim  Value *AllocaResult_i8 = nullptr;
1690309124Sdim
1691309124Sdim  Type *ResultTy;
1692309124Sdim  SmallVector<Value *, 6> Args;
1693321369Sdim  AttributeList Attr;
1694309124Sdim
1695309124Sdim  // 'size' argument.
1696309124Sdim  if (!UseSizedLibcall) {
1697309124Sdim    // Note, getIntPtrType is assumed equivalent to size_t.
1698309124Sdim    Args.push_back(ConstantInt::get(DL.getIntPtrType(Ctx), Size));
1699309124Sdim  }
1700309124Sdim
1701309124Sdim  // 'ptr' argument.
1702353358Sdim  // note: This assumes all address spaces share a common libfunc
1703353358Sdim  // implementation and that addresses are convertable.  For systems without
1704353358Sdim  // that property, we'd need to extend this mechanism to support AS-specific
1705353358Sdim  // families of atomic intrinsics.
1706353358Sdim  auto PtrTypeAS = PointerOperand->getType()->getPointerAddressSpace();
1707353358Sdim  Value *PtrVal = Builder.CreateBitCast(PointerOperand,
1708353358Sdim                                        Type::getInt8PtrTy(Ctx, PtrTypeAS));
1709353358Sdim  PtrVal = Builder.CreateAddrSpaceCast(PtrVal, Type::getInt8PtrTy(Ctx));
1710309124Sdim  Args.push_back(PtrVal);
1711309124Sdim
1712309124Sdim  // 'expected' argument, if present.
1713309124Sdim  if (CASExpected) {
1714309124Sdim    AllocaCASExpected = AllocaBuilder.CreateAlloca(CASExpected->getType());
1715360784Sdim    AllocaCASExpected->setAlignment(MaybeAlign(AllocaAlignment));
1716353358Sdim    unsigned AllocaAS =  AllocaCASExpected->getType()->getPointerAddressSpace();
1717353358Sdim
1718309124Sdim    AllocaCASExpected_i8 =
1719353358Sdim      Builder.CreateBitCast(AllocaCASExpected,
1720353358Sdim                            Type::getInt8PtrTy(Ctx, AllocaAS));
1721309124Sdim    Builder.CreateLifetimeStart(AllocaCASExpected_i8, SizeVal64);
1722309124Sdim    Builder.CreateAlignedStore(CASExpected, AllocaCASExpected, AllocaAlignment);
1723309124Sdim    Args.push_back(AllocaCASExpected_i8);
1724309124Sdim  }
1725309124Sdim
1726309124Sdim  // 'val' argument ('desired' for cas), if present.
1727309124Sdim  if (ValueOperand) {
1728309124Sdim    if (UseSizedLibcall) {
1729309124Sdim      Value *IntValue =
1730309124Sdim          Builder.CreateBitOrPointerCast(ValueOperand, SizedIntTy);
1731309124Sdim      Args.push_back(IntValue);
1732309124Sdim    } else {
1733309124Sdim      AllocaValue = AllocaBuilder.CreateAlloca(ValueOperand->getType());
1734360784Sdim      AllocaValue->setAlignment(MaybeAlign(AllocaAlignment));
1735309124Sdim      AllocaValue_i8 =
1736309124Sdim          Builder.CreateBitCast(AllocaValue, Type::getInt8PtrTy(Ctx));
1737309124Sdim      Builder.CreateLifetimeStart(AllocaValue_i8, SizeVal64);
1738309124Sdim      Builder.CreateAlignedStore(ValueOperand, AllocaValue, AllocaAlignment);
1739309124Sdim      Args.push_back(AllocaValue_i8);
1740309124Sdim    }
1741309124Sdim  }
1742309124Sdim
1743309124Sdim  // 'ret' argument.
1744309124Sdim  if (!CASExpected && HasResult && !UseSizedLibcall) {
1745309124Sdim    AllocaResult = AllocaBuilder.CreateAlloca(I->getType());
1746360784Sdim    AllocaResult->setAlignment(MaybeAlign(AllocaAlignment));
1747353358Sdim    unsigned AllocaAS =  AllocaResult->getType()->getPointerAddressSpace();
1748309124Sdim    AllocaResult_i8 =
1749353358Sdim      Builder.CreateBitCast(AllocaResult, Type::getInt8PtrTy(Ctx, AllocaAS));
1750309124Sdim    Builder.CreateLifetimeStart(AllocaResult_i8, SizeVal64);
1751309124Sdim    Args.push_back(AllocaResult_i8);
1752309124Sdim  }
1753309124Sdim
1754309124Sdim  // 'ordering' ('success_order' for cas) argument.
1755309124Sdim  Args.push_back(OrderingVal);
1756309124Sdim
1757309124Sdim  // 'failure_order' argument, if present.
1758309124Sdim  if (Ordering2Val)
1759309124Sdim    Args.push_back(Ordering2Val);
1760309124Sdim
1761309124Sdim  // Now, the return type.
1762309124Sdim  if (CASExpected) {
1763309124Sdim    ResultTy = Type::getInt1Ty(Ctx);
1764321369Sdim    Attr = Attr.addAttribute(Ctx, AttributeList::ReturnIndex, Attribute::ZExt);
1765309124Sdim  } else if (HasResult && UseSizedLibcall)
1766309124Sdim    ResultTy = SizedIntTy;
1767309124Sdim  else
1768309124Sdim    ResultTy = Type::getVoidTy(Ctx);
1769309124Sdim
1770309124Sdim  // Done with setting up arguments and return types, create the call:
1771309124Sdim  SmallVector<Type *, 6> ArgTys;
1772309124Sdim  for (Value *Arg : Args)
1773309124Sdim    ArgTys.push_back(Arg->getType());
1774309124Sdim  FunctionType *FnType = FunctionType::get(ResultTy, ArgTys, false);
1775353358Sdim  FunctionCallee LibcallFn =
1776309124Sdim      M->getOrInsertFunction(TLI->getLibcallName(RTLibType), FnType, Attr);
1777309124Sdim  CallInst *Call = Builder.CreateCall(LibcallFn, Args);
1778309124Sdim  Call->setAttributes(Attr);
1779309124Sdim  Value *Result = Call;
1780309124Sdim
1781309124Sdim  // And then, extract the results...
1782309124Sdim  if (ValueOperand && !UseSizedLibcall)
1783309124Sdim    Builder.CreateLifetimeEnd(AllocaValue_i8, SizeVal64);
1784309124Sdim
1785309124Sdim  if (CASExpected) {
1786309124Sdim    // The final result from the CAS is {load of 'expected' alloca, bool result
1787309124Sdim    // from call}
1788309124Sdim    Type *FinalResultTy = I->getType();
1789309124Sdim    Value *V = UndefValue::get(FinalResultTy);
1790353358Sdim    Value *ExpectedOut = Builder.CreateAlignedLoad(
1791353358Sdim        CASExpected->getType(), AllocaCASExpected, AllocaAlignment);
1792309124Sdim    Builder.CreateLifetimeEnd(AllocaCASExpected_i8, SizeVal64);
1793309124Sdim    V = Builder.CreateInsertValue(V, ExpectedOut, 0);
1794309124Sdim    V = Builder.CreateInsertValue(V, Result, 1);
1795309124Sdim    I->replaceAllUsesWith(V);
1796309124Sdim  } else if (HasResult) {
1797309124Sdim    Value *V;
1798309124Sdim    if (UseSizedLibcall)
1799309124Sdim      V = Builder.CreateBitOrPointerCast(Result, I->getType());
1800309124Sdim    else {
1801353358Sdim      V = Builder.CreateAlignedLoad(I->getType(), AllocaResult,
1802353358Sdim                                    AllocaAlignment);
1803309124Sdim      Builder.CreateLifetimeEnd(AllocaResult_i8, SizeVal64);
1804309124Sdim    }
1805309124Sdim    I->replaceAllUsesWith(V);
1806309124Sdim  }
1807309124Sdim  I->eraseFromParent();
1808296417Sdim  return true;
1809296417Sdim}
1810