1284677Sdim//===-- NVPTXLowerAlloca.cpp - Make alloca to use local memory =====--===//
2284677Sdim//
3284677Sdim//                     The LLVM Compiler Infrastructure
4284677Sdim//
5284677Sdim// This file is distributed under the University of Illinois Open Source
6284677Sdim// License. See LICENSE.TXT for details.
7284677Sdim//
8284677Sdim//===----------------------------------------------------------------------===//
9284677Sdim//
10284677Sdim// For all alloca instructions, and add a pair of cast to local address for
11284677Sdim// each of them. For example,
12284677Sdim//
13284677Sdim//   %A = alloca i32
14284677Sdim//   store i32 0, i32* %A ; emits st.u32
15284677Sdim//
16284677Sdim// will be transformed to
17284677Sdim//
18284677Sdim//   %A = alloca i32
19284677Sdim//   %Local = addrspacecast i32* %A to i32 addrspace(5)*
20284677Sdim//   %Generic = addrspacecast i32 addrspace(5)* %A to i32*
21284677Sdim//   store i32 0, i32 addrspace(5)* %Generic ; emits st.local.u32
22284677Sdim//
23284677Sdim// And we will rely on NVPTXFavorNonGenericAddrSpace to combine the last
24284677Sdim// two instructions.
25284677Sdim//
26284677Sdim//===----------------------------------------------------------------------===//
27284677Sdim
28284677Sdim#include "NVPTX.h"
29284677Sdim#include "NVPTXUtilities.h"
30284677Sdim#include "llvm/IR/Function.h"
31284677Sdim#include "llvm/IR/Instructions.h"
32284677Sdim#include "llvm/IR/IntrinsicInst.h"
33284677Sdim#include "llvm/IR/Module.h"
34284677Sdim#include "llvm/IR/Type.h"
35284677Sdim#include "llvm/Pass.h"
36284677Sdim
37284677Sdimusing namespace llvm;
38284677Sdim
39284677Sdimnamespace llvm {
40284677Sdimvoid initializeNVPTXLowerAllocaPass(PassRegistry &);
41284677Sdim}
42284677Sdim
43284677Sdimnamespace {
44284677Sdimclass NVPTXLowerAlloca : public BasicBlockPass {
45284677Sdim  bool runOnBasicBlock(BasicBlock &BB) override;
46284677Sdim
47284677Sdimpublic:
48284677Sdim  static char ID; // Pass identification, replacement for typeid
49284677Sdim  NVPTXLowerAlloca() : BasicBlockPass(ID) {}
50284677Sdim  const char *getPassName() const override {
51284677Sdim    return "convert address space of alloca'ed memory to local";
52284677Sdim  }
53284677Sdim};
54284677Sdim} // namespace
55284677Sdim
56284677Sdimchar NVPTXLowerAlloca::ID = 1;
57284677Sdim
58284677SdimINITIALIZE_PASS(NVPTXLowerAlloca, "nvptx-lower-alloca",
59284677Sdim                "Lower Alloca", false, false)
60284677Sdim
61284677Sdim// =============================================================================
62284677Sdim// Main function for this pass.
63284677Sdim// =============================================================================
64284677Sdimbool NVPTXLowerAlloca::runOnBasicBlock(BasicBlock &BB) {
65284677Sdim  bool Changed = false;
66284677Sdim  for (auto &I : BB) {
67284677Sdim    if (auto allocaInst = dyn_cast<AllocaInst>(&I)) {
68284677Sdim      Changed = true;
69284677Sdim      auto PTy = dyn_cast<PointerType>(allocaInst->getType());
70284677Sdim      auto ETy = PTy->getElementType();
71284677Sdim      auto LocalAddrTy = PointerType::get(ETy, ADDRESS_SPACE_LOCAL);
72284677Sdim      auto NewASCToLocal = new AddrSpaceCastInst(allocaInst, LocalAddrTy, "");
73284677Sdim      auto GenericAddrTy = PointerType::get(ETy, ADDRESS_SPACE_GENERIC);
74284677Sdim      auto NewASCToGeneric = new AddrSpaceCastInst(NewASCToLocal,
75284677Sdim                                                    GenericAddrTy, "");
76284677Sdim      NewASCToLocal->insertAfter(allocaInst);
77284677Sdim      NewASCToGeneric->insertAfter(NewASCToLocal);
78284677Sdim      for (Value::use_iterator UI = allocaInst->use_begin(),
79284677Sdim                                UE = allocaInst->use_end();
80284677Sdim            UI != UE; ) {
81284677Sdim        // Check Load, Store, GEP, and BitCast Uses on alloca and make them
82284677Sdim        // use the converted generic address, in order to expose non-generic
83284677Sdim        // addrspacecast to NVPTXFavorNonGenericAddrSpace. For other types
84296417Sdim        // of instructions this is unnecessary and may introduce redundant
85284677Sdim        // address cast.
86284677Sdim        const auto &AllocaUse = *UI++;
87284677Sdim        auto LI = dyn_cast<LoadInst>(AllocaUse.getUser());
88284677Sdim        if (LI && LI->getPointerOperand() == allocaInst && !LI->isVolatile()) {
89284677Sdim          LI->setOperand(LI->getPointerOperandIndex(), NewASCToGeneric);
90284677Sdim          continue;
91284677Sdim        }
92284677Sdim        auto SI = dyn_cast<StoreInst>(AllocaUse.getUser());
93284677Sdim        if (SI && SI->getPointerOperand() == allocaInst && !SI->isVolatile()) {
94284677Sdim          SI->setOperand(SI->getPointerOperandIndex(), NewASCToGeneric);
95284677Sdim          continue;
96284677Sdim        }
97284677Sdim        auto GI = dyn_cast<GetElementPtrInst>(AllocaUse.getUser());
98284677Sdim        if (GI && GI->getPointerOperand() == allocaInst) {
99284677Sdim          GI->setOperand(GI->getPointerOperandIndex(), NewASCToGeneric);
100284677Sdim          continue;
101284677Sdim        }
102284677Sdim        auto BI = dyn_cast<BitCastInst>(AllocaUse.getUser());
103284677Sdim        if (BI && BI->getOperand(0) == allocaInst) {
104284677Sdim          BI->setOperand(0, NewASCToGeneric);
105284677Sdim          continue;
106284677Sdim        }
107284677Sdim      }
108284677Sdim    }
109284677Sdim  }
110284677Sdim  return Changed;
111284677Sdim}
112284677Sdim
113284677SdimBasicBlockPass *llvm::createNVPTXLowerAllocaPass() {
114284677Sdim  return new NVPTXLowerAlloca();
115284677Sdim}
116