1193323Sed//===- ExtractFunction.cpp - Extract a function from Program --------------===//
2193323Sed//
3193323Sed//                     The LLVM Compiler Infrastructure
4193323Sed//
5193323Sed// This file is distributed under the University of Illinois Open Source
6193323Sed// License. See LICENSE.TXT for details.
7193323Sed//
8193323Sed//===----------------------------------------------------------------------===//
9193323Sed//
10193323Sed// This file implements several methods that are used to extract functions,
11193323Sed// loops, or portions of a module from the rest of the module.
12193323Sed//
13193323Sed//===----------------------------------------------------------------------===//
14193323Sed
15193323Sed#include "BugDriver.h"
16193323Sed#include "llvm/Analysis/Verifier.h"
17198090Srdivacky#include "llvm/Assembly/Writer.h"
18249423Sdim#include "llvm/IR/Constants.h"
19249423Sdim#include "llvm/IR/DataLayout.h"
20249423Sdim#include "llvm/IR/DerivedTypes.h"
21249423Sdim#include "llvm/IR/LLVMContext.h"
22249423Sdim#include "llvm/IR/Module.h"
23249423Sdim#include "llvm/Pass.h"
24249423Sdim#include "llvm/PassManager.h"
25193323Sed#include "llvm/Support/CommandLine.h"
26193323Sed#include "llvm/Support/Debug.h"
27193323Sed#include "llvm/Support/FileUtilities.h"
28218885Sdim#include "llvm/Support/Path.h"
29218885Sdim#include "llvm/Support/Signals.h"
30249423Sdim#include "llvm/Support/ToolOutputFile.h"
31249423Sdim#include "llvm/Transforms/IPO.h"
32249423Sdim#include "llvm/Transforms/Scalar.h"
33249423Sdim#include "llvm/Transforms/Utils/Cloning.h"
34249423Sdim#include "llvm/Transforms/Utils/CodeExtractor.h"
35193323Sed#include <set>
36193323Sedusing namespace llvm;
37193323Sed
38193323Sednamespace llvm {
39193323Sed  bool DisableSimplifyCFG = false;
40198090Srdivacky  extern cl::opt<std::string> OutputPrefix;
41193323Sed} // End llvm namespace
42193323Sed
43193323Sednamespace {
44193323Sed  cl::opt<bool>
45193323Sed  NoDCE ("disable-dce",
46193323Sed         cl::desc("Do not use the -dce pass to reduce testcases"));
47193323Sed  cl::opt<bool, true>
48193323Sed  NoSCFG("disable-simplifycfg", cl::location(DisableSimplifyCFG),
49193323Sed         cl::desc("Do not use the -simplifycfg pass to reduce testcases"));
50193323Sed
51234353Sdim  Function* globalInitUsesExternalBA(GlobalVariable* GV) {
52234353Sdim    if (!GV->hasInitializer())
53234353Sdim      return 0;
54234353Sdim
55234353Sdim    Constant *I = GV->getInitializer();
56234353Sdim
57234353Sdim    // walk the values used by the initializer
58234353Sdim    // (and recurse into things like ConstantExpr)
59234353Sdim    std::vector<Constant*> Todo;
60234353Sdim    std::set<Constant*> Done;
61234353Sdim    Todo.push_back(I);
62234353Sdim
63234353Sdim    while (!Todo.empty()) {
64234353Sdim      Constant* V = Todo.back();
65234353Sdim      Todo.pop_back();
66234353Sdim      Done.insert(V);
67234353Sdim
68234353Sdim      if (BlockAddress *BA = dyn_cast<BlockAddress>(V)) {
69234353Sdim        Function *F = BA->getFunction();
70234353Sdim        if (F->isDeclaration())
71234353Sdim          return F;
72234353Sdim      }
73234353Sdim
74234353Sdim      for (User::op_iterator i = V->op_begin(), e = V->op_end(); i != e; ++i) {
75234353Sdim        Constant *C = dyn_cast<Constant>(*i);
76234353Sdim        if (C && !isa<GlobalValue>(C) && !Done.count(C))
77234353Sdim          Todo.push_back(C);
78234353Sdim      }
79234353Sdim    }
80234353Sdim    return 0;
81234353Sdim  }
82234353Sdim}  // end anonymous namespace
83234353Sdim
84193323Sed/// deleteInstructionFromProgram - This method clones the current Program and
85193323Sed/// deletes the specified instruction from the cloned module.  It then runs a
86193323Sed/// series of cleanup passes (ADCE and SimplifyCFG) to eliminate any code which
87193323Sed/// depends on the value.  The modified module is then returned.
88193323Sed///
89193323SedModule *BugDriver::deleteInstructionFromProgram(const Instruction *I,
90212793Sdim                                                unsigned Simplification) {
91212793Sdim  // FIXME, use vmap?
92212793Sdim  Module *Clone = CloneModule(Program);
93193323Sed
94193323Sed  const BasicBlock *PBB = I->getParent();
95193323Sed  const Function *PF = PBB->getParent();
96193323Sed
97212793Sdim  Module::iterator RFI = Clone->begin(); // Get iterator to corresponding fn
98193323Sed  std::advance(RFI, std::distance(PF->getParent()->begin(),
99193323Sed                                  Module::const_iterator(PF)));
100193323Sed
101193323Sed  Function::iterator RBI = RFI->begin();  // Get iterator to corresponding BB
102193323Sed  std::advance(RBI, std::distance(PF->begin(), Function::const_iterator(PBB)));
103193323Sed
104193323Sed  BasicBlock::iterator RI = RBI->begin(); // Get iterator to corresponding inst
105193323Sed  std::advance(RI, std::distance(PBB->begin(), BasicBlock::const_iterator(I)));
106193323Sed  Instruction *TheInst = RI;              // Got the corresponding instruction!
107193323Sed
108193323Sed  // If this instruction produces a value, replace any users with null values
109210006Srdivacky  if (!TheInst->getType()->isVoidTy())
110193323Sed    TheInst->replaceAllUsesWith(Constant::getNullValue(TheInst->getType()));
111193323Sed
112193323Sed  // Remove the instruction from the program.
113193323Sed  TheInst->getParent()->getInstList().erase(TheInst);
114193323Sed
115193323Sed  // Spiff up the output a little bit.
116212793Sdim  std::vector<std::string> Passes;
117193323Sed
118212793Sdim  /// Can we get rid of the -disable-* options?
119193323Sed  if (Simplification > 1 && !NoDCE)
120212793Sdim    Passes.push_back("dce");
121193323Sed  if (Simplification && !DisableSimplifyCFG)
122212793Sdim    Passes.push_back("simplifycfg");      // Delete dead control flow
123193323Sed
124212793Sdim  Passes.push_back("verify");
125212793Sdim  Module *New = runPassesOn(Clone, Passes);
126212793Sdim  delete Clone;
127212793Sdim  if (!New) {
128212793Sdim    errs() << "Instruction removal failed.  Sorry. :(  Please report a bug!\n";
129212793Sdim    exit(1);
130212793Sdim  }
131212793Sdim  return New;
132193323Sed}
133193323Sed
134193323Sed/// performFinalCleanups - This method clones the current Program and performs
135193323Sed/// a series of cleanups intended to get rid of extra cruft on the module
136193323Sed/// before handing it to the user.
137193323Sed///
138193323SedModule *BugDriver::performFinalCleanups(Module *M, bool MayModifySemantics) {
139193323Sed  // Make all functions external, so GlobalDCE doesn't delete them...
140193323Sed  for (Module::iterator I = M->begin(), E = M->end(); I != E; ++I)
141193323Sed    I->setLinkage(GlobalValue::ExternalLinkage);
142193323Sed
143212793Sdim  std::vector<std::string> CleanupPasses;
144212793Sdim  CleanupPasses.push_back("globaldce");
145193323Sed
146193323Sed  if (MayModifySemantics)
147212793Sdim    CleanupPasses.push_back("deadarghaX0r");
148193323Sed  else
149212793Sdim    CleanupPasses.push_back("deadargelim");
150193323Sed
151193323Sed  Module *New = runPassesOn(M, CleanupPasses);
152193323Sed  if (New == 0) {
153198090Srdivacky    errs() << "Final cleanups failed.  Sorry. :(  Please report a bug!\n";
154193323Sed    return M;
155193323Sed  }
156193323Sed  delete M;
157193323Sed  return New;
158193323Sed}
159193323Sed
160193323Sed
161193323Sed/// ExtractLoop - Given a module, extract up to one loop from it into a new
162193323Sed/// function.  This returns null if there are no extractable loops in the
163193323Sed/// program or if the loop extractor crashes.
164193323SedModule *BugDriver::ExtractLoop(Module *M) {
165212793Sdim  std::vector<std::string> LoopExtractPasses;
166212793Sdim  LoopExtractPasses.push_back("loop-extract-single");
167193323Sed
168193323Sed  Module *NewM = runPassesOn(M, LoopExtractPasses);
169193323Sed  if (NewM == 0) {
170198090Srdivacky    outs() << "*** Loop extraction failed: ";
171212793Sdim    EmitProgressBitcode(M, "loopextraction", true);
172198090Srdivacky    outs() << "*** Sorry. :(  Please report a bug!\n";
173193323Sed    return 0;
174193323Sed  }
175193323Sed
176193323Sed  // Check to see if we created any new functions.  If not, no loops were
177193323Sed  // extracted and we should return null.  Limit the number of loops we extract
178193323Sed  // to avoid taking forever.
179193323Sed  static unsigned NumExtracted = 32;
180193323Sed  if (M->size() == NewM->size() || --NumExtracted == 0) {
181193323Sed    delete NewM;
182193323Sed    return 0;
183193323Sed  } else {
184193323Sed    assert(M->size() < NewM->size() && "Loop extract removed functions?");
185193323Sed    Module::iterator MI = NewM->begin();
186193323Sed    for (unsigned i = 0, e = M->size(); i != e; ++i)
187193323Sed      ++MI;
188193323Sed  }
189193323Sed
190193323Sed  return NewM;
191193323Sed}
192193323Sed
193193323Sed
194193323Sed// DeleteFunctionBody - "Remove" the function by deleting all of its basic
195193323Sed// blocks, making it external.
196193323Sed//
197193323Sedvoid llvm::DeleteFunctionBody(Function *F) {
198193323Sed  // delete the body of the function...
199193323Sed  F->deleteBody();
200193323Sed  assert(F->isDeclaration() && "This didn't make the function external!");
201193323Sed}
202193323Sed
203193323Sed/// GetTorInit - Given a list of entries for static ctors/dtors, return them
204193323Sed/// as a constant array.
205193323Sedstatic Constant *GetTorInit(std::vector<std::pair<Function*, int> > &TorList) {
206193323Sed  assert(!TorList.empty() && "Don't create empty tor list!");
207193323Sed  std::vector<Constant*> ArrayElts;
208224133Sdim  Type *Int32Ty = Type::getInt32Ty(TorList[0].first->getContext());
209224133Sdim
210226584Sdim  StructType *STy =
211224133Sdim    StructType::get(Int32Ty, TorList[0].first->getType(), NULL);
212193323Sed  for (unsigned i = 0, e = TorList.size(); i != e; ++i) {
213224133Sdim    Constant *Elts[] = {
214224133Sdim      ConstantInt::get(Int32Ty, TorList[i].second),
215224133Sdim      TorList[i].first
216224133Sdim    };
217224133Sdim    ArrayElts.push_back(ConstantStruct::get(STy, Elts));
218193323Sed  }
219193323Sed  return ConstantArray::get(ArrayType::get(ArrayElts[0]->getType(),
220193323Sed                                           ArrayElts.size()),
221193323Sed                            ArrayElts);
222193323Sed}
223193323Sed
224193323Sed/// SplitStaticCtorDtor - A module was recently split into two parts, M1/M2, and
225193323Sed/// M1 has all of the global variables.  If M2 contains any functions that are
226193323Sed/// static ctors/dtors, we need to add an llvm.global_[cd]tors global to M2, and
227193323Sed/// prune appropriate entries out of M1s list.
228193323Sedstatic void SplitStaticCtorDtor(const char *GlobalName, Module *M1, Module *M2,
229218885Sdim                                ValueToValueMapTy &VMap) {
230193323Sed  GlobalVariable *GV = M1->getNamedGlobal(GlobalName);
231193323Sed  if (!GV || GV->isDeclaration() || GV->hasLocalLinkage() ||
232193323Sed      !GV->use_empty()) return;
233193323Sed
234193323Sed  std::vector<std::pair<Function*, int> > M1Tors, M2Tors;
235193323Sed  ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
236193323Sed  if (!InitList) return;
237193323Sed
238193323Sed  for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) {
239193323Sed    if (ConstantStruct *CS = dyn_cast<ConstantStruct>(InitList->getOperand(i))){
240193323Sed      if (CS->getNumOperands() != 2) return;  // Not array of 2-element structs.
241193323Sed
242193323Sed      if (CS->getOperand(1)->isNullValue())
243193323Sed        break;  // Found a null terminator, stop here.
244193323Sed
245193323Sed      ConstantInt *CI = dyn_cast<ConstantInt>(CS->getOperand(0));
246193323Sed      int Priority = CI ? CI->getSExtValue() : 0;
247193323Sed
248193323Sed      Constant *FP = CS->getOperand(1);
249193323Sed      if (ConstantExpr *CE = dyn_cast<ConstantExpr>(FP))
250193323Sed        if (CE->isCast())
251193323Sed          FP = CE->getOperand(0);
252193323Sed      if (Function *F = dyn_cast<Function>(FP)) {
253193323Sed        if (!F->isDeclaration())
254193323Sed          M1Tors.push_back(std::make_pair(F, Priority));
255193323Sed        else {
256193323Sed          // Map to M2's version of the function.
257210006Srdivacky          F = cast<Function>(VMap[F]);
258193323Sed          M2Tors.push_back(std::make_pair(F, Priority));
259193323Sed        }
260193323Sed      }
261193323Sed    }
262193323Sed  }
263193323Sed
264193323Sed  GV->eraseFromParent();
265193323Sed  if (!M1Tors.empty()) {
266193323Sed    Constant *M1Init = GetTorInit(M1Tors);
267198090Srdivacky    new GlobalVariable(*M1, M1Init->getType(), false,
268198090Srdivacky                       GlobalValue::AppendingLinkage,
269198090Srdivacky                       M1Init, GlobalName);
270193323Sed  }
271193323Sed
272193323Sed  GV = M2->getNamedGlobal(GlobalName);
273193323Sed  assert(GV && "Not a clone of M1?");
274193323Sed  assert(GV->use_empty() && "llvm.ctors shouldn't have uses!");
275193323Sed
276193323Sed  GV->eraseFromParent();
277193323Sed  if (!M2Tors.empty()) {
278193323Sed    Constant *M2Init = GetTorInit(M2Tors);
279198090Srdivacky    new GlobalVariable(*M2, M2Init->getType(), false,
280198090Srdivacky                       GlobalValue::AppendingLinkage,
281198090Srdivacky                       M2Init, GlobalName);
282193323Sed  }
283193323Sed}
284193323Sed
285193323Sed
286193323Sed/// SplitFunctionsOutOfModule - Given a module and a list of functions in the
287193323Sed/// module, split the functions OUT of the specified module, and place them in
288193323Sed/// the new module.
289193323SedModule *
290193323Sedllvm::SplitFunctionsOutOfModule(Module *M,
291193323Sed                                const std::vector<Function*> &F,
292218885Sdim                                ValueToValueMapTy &VMap) {
293193323Sed  // Make sure functions & globals are all external so that linkage
294193323Sed  // between the two modules will work.
295193323Sed  for (Module::iterator I = M->begin(), E = M->end(); I != E; ++I)
296193323Sed    I->setLinkage(GlobalValue::ExternalLinkage);
297193323Sed  for (Module::global_iterator I = M->global_begin(), E = M->global_end();
298193323Sed       I != E; ++I) {
299198090Srdivacky    if (I->hasName() && I->getName()[0] == '\01')
300198090Srdivacky      I->setName(I->getName().substr(1));
301193323Sed    I->setLinkage(GlobalValue::ExternalLinkage);
302193323Sed  }
303193323Sed
304218885Sdim  ValueToValueMapTy NewVMap;
305210006Srdivacky  Module *New = CloneModule(M, NewVMap);
306193323Sed
307193323Sed  // Remove the Test functions from the Safe module
308193323Sed  std::set<Function *> TestFunctions;
309193323Sed  for (unsigned i = 0, e = F.size(); i != e; ++i) {
310210006Srdivacky    Function *TNOF = cast<Function>(VMap[F[i]]);
311198090Srdivacky    DEBUG(errs() << "Removing function ");
312198090Srdivacky    DEBUG(WriteAsOperand(errs(), TNOF, false));
313198090Srdivacky    DEBUG(errs() << "\n");
314210006Srdivacky    TestFunctions.insert(cast<Function>(NewVMap[TNOF]));
315193323Sed    DeleteFunctionBody(TNOF);       // Function is now external in this module!
316193323Sed  }
317193323Sed
318193323Sed
319193323Sed  // Remove the Safe functions from the Test module
320193323Sed  for (Module::iterator I = New->begin(), E = New->end(); I != E; ++I)
321193323Sed    if (!TestFunctions.count(I))
322193323Sed      DeleteFunctionBody(I);
323193323Sed
324193323Sed
325234353Sdim  // Try to split the global initializers evenly
326234353Sdim  for (Module::global_iterator I = M->global_begin(), E = M->global_end();
327234353Sdim       I != E; ++I) {
328234353Sdim    GlobalVariable *GV = cast<GlobalVariable>(NewVMap[I]);
329234353Sdim    if (Function *TestFn = globalInitUsesExternalBA(I)) {
330234353Sdim      if (Function *SafeFn = globalInitUsesExternalBA(GV)) {
331234353Sdim        errs() << "*** Error: when reducing functions, encountered "
332234353Sdim                  "the global '";
333234353Sdim        WriteAsOperand(errs(), GV, false);
334234353Sdim        errs() << "' with an initializer that references blockaddresses "
335234353Sdim                  "from safe function '" << SafeFn->getName()
336234353Sdim               << "' and from test function '" << TestFn->getName() << "'.\n";
337234353Sdim        exit(1);
338234353Sdim      }
339234353Sdim      I->setInitializer(0);  // Delete the initializer to make it external
340234353Sdim    } else {
341234353Sdim      // If we keep it in the safe module, then delete it in the test module
342234353Sdim      GV->setInitializer(0);
343234353Sdim    }
344234353Sdim  }
345234353Sdim
346193323Sed  // Make sure that there is a global ctor/dtor array in both halves of the
347193323Sed  // module if they both have static ctor/dtor functions.
348210006Srdivacky  SplitStaticCtorDtor("llvm.global_ctors", M, New, NewVMap);
349210006Srdivacky  SplitStaticCtorDtor("llvm.global_dtors", M, New, NewVMap);
350193323Sed
351193323Sed  return New;
352193323Sed}
353193323Sed
354193323Sed//===----------------------------------------------------------------------===//
355193323Sed// Basic Block Extraction Code
356193323Sed//===----------------------------------------------------------------------===//
357193323Sed
358193323Sed/// ExtractMappedBlocksFromModule - Extract all but the specified basic blocks
359193323Sed/// into their own functions.  The only detail is that M is actually a module
360193323Sed/// cloned from the one the BBs are in, so some mapping needs to be performed.
361193323Sed/// If this operation fails for some reason (ie the implementation is buggy),
362193323Sed/// this function should return null, otherwise it returns a new Module.
363193323SedModule *BugDriver::ExtractMappedBlocksFromModule(const
364193323Sed                                                 std::vector<BasicBlock*> &BBs,
365193323Sed                                                 Module *M) {
366263508Sdim  SmallString<128> Filename;
367263508Sdim  int FD;
368263508Sdim  error_code EC = sys::fs::createUniqueFile(
369263508Sdim      OutputPrefix + "-extractblocks%%%%%%%", FD, Filename);
370263508Sdim  if (EC) {
371198090Srdivacky    outs() << "*** Basic Block extraction failed!\n";
372263508Sdim    errs() << "Error creating temporary file: " << EC.message() << "\n";
373212793Sdim    EmitProgressBitcode(M, "basicblockextractfail", true);
374193323Sed    return 0;
375193323Sed  }
376263508Sdim  sys::RemoveFileOnSignal(Filename);
377193323Sed
378263508Sdim  tool_output_file BlocksToNotExtractFile(Filename.c_str(), FD);
379193323Sed  for (std::vector<BasicBlock*>::const_iterator I = BBs.begin(), E = BBs.end();
380193323Sed       I != E; ++I) {
381193323Sed    BasicBlock *BB = *I;
382193323Sed    // If the BB doesn't have a name, give it one so we have something to key
383193323Sed    // off of.
384193323Sed    if (!BB->hasName()) BB->setName("tmpbb");
385234353Sdim    BlocksToNotExtractFile.os() << BB->getParent()->getName() << " "
386212793Sdim                                << BB->getName() << "\n";
387193323Sed  }
388212793Sdim  BlocksToNotExtractFile.os().close();
389212793Sdim  if (BlocksToNotExtractFile.os().has_error()) {
390263508Sdim    errs() << "Error writing list of blocks to not extract\n";
391212793Sdim    EmitProgressBitcode(M, "basicblockextractfail", true);
392212793Sdim    BlocksToNotExtractFile.os().clear_error();
393212793Sdim    return 0;
394212793Sdim  }
395212793Sdim  BlocksToNotExtractFile.keep();
396193323Sed
397263508Sdim  std::string uniqueFN = "--extract-blocks-file=";
398263508Sdim  uniqueFN += Filename.str();
399203954Srdivacky  const char *ExtraArg = uniqueFN.c_str();
400193323Sed
401212793Sdim  std::vector<std::string> PI;
402212793Sdim  PI.push_back("extract-blocks");
403193323Sed  Module *Ret = runPassesOn(M, PI, false, 1, &ExtraArg);
404193323Sed
405263508Sdim  sys::fs::remove(Filename.c_str());
406193323Sed
407193323Sed  if (Ret == 0) {
408198090Srdivacky    outs() << "*** Basic Block extraction failed, please report a bug!\n";
409212793Sdim    EmitProgressBitcode(M, "basicblockextractfail", true);
410193323Sed  }
411193323Sed  return Ret;
412193323Sed}
413