ConstantMerge.cpp revision 360784
1//===- ConstantMerge.cpp - Merge duplicate global constants ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interface to a pass that merges duplicate global
10// constants together into a single constant that is shared.  This is useful
11// because some passes (ie TraceValues) insert a lot of string constants into
12// the program, regardless of whether or not an existing string is available.
13//
14// Algorithm: ConstantMerge is designed to build up a map of available constants
15// and eliminate duplicates when it is initialized.
16//
17//===----------------------------------------------------------------------===//
18
19#include "llvm/Transforms/IPO/ConstantMerge.h"
20#include "llvm/ADT/DenseMap.h"
21#include "llvm/ADT/SmallPtrSet.h"
22#include "llvm/ADT/SmallVector.h"
23#include "llvm/ADT/Statistic.h"
24#include "llvm/IR/Constants.h"
25#include "llvm/IR/DataLayout.h"
26#include "llvm/IR/DerivedTypes.h"
27#include "llvm/IR/GlobalValue.h"
28#include "llvm/IR/GlobalVariable.h"
29#include "llvm/IR/LLVMContext.h"
30#include "llvm/IR/Module.h"
31#include "llvm/InitializePasses.h"
32#include "llvm/Pass.h"
33#include "llvm/Support/Casting.h"
34#include "llvm/Transforms/IPO.h"
35#include <algorithm>
36#include <cassert>
37#include <utility>
38
39using namespace llvm;
40
41#define DEBUG_TYPE "constmerge"
42
43STATISTIC(NumIdenticalMerged, "Number of identical global constants merged");
44
45/// Find values that are marked as llvm.used.
46static void FindUsedValues(GlobalVariable *LLVMUsed,
47                           SmallPtrSetImpl<const GlobalValue*> &UsedValues) {
48  if (!LLVMUsed) return;
49  ConstantArray *Inits = cast<ConstantArray>(LLVMUsed->getInitializer());
50
51  for (unsigned i = 0, e = Inits->getNumOperands(); i != e; ++i) {
52    Value *Operand = Inits->getOperand(i)->stripPointerCasts();
53    GlobalValue *GV = cast<GlobalValue>(Operand);
54    UsedValues.insert(GV);
55  }
56}
57
58// True if A is better than B.
59static bool IsBetterCanonical(const GlobalVariable &A,
60                              const GlobalVariable &B) {
61  if (!A.hasLocalLinkage() && B.hasLocalLinkage())
62    return true;
63
64  if (A.hasLocalLinkage() && !B.hasLocalLinkage())
65    return false;
66
67  return A.hasGlobalUnnamedAddr();
68}
69
70static bool hasMetadataOtherThanDebugLoc(const GlobalVariable *GV) {
71  SmallVector<std::pair<unsigned, MDNode *>, 4> MDs;
72  GV->getAllMetadata(MDs);
73  for (const auto &V : MDs)
74    if (V.first != LLVMContext::MD_dbg)
75      return true;
76  return false;
77}
78
79static void copyDebugLocMetadata(const GlobalVariable *From,
80                                 GlobalVariable *To) {
81  SmallVector<DIGlobalVariableExpression *, 1> MDs;
82  From->getDebugInfo(MDs);
83  for (auto MD : MDs)
84    To->addDebugInfo(MD);
85}
86
87static unsigned getAlignment(GlobalVariable *GV) {
88  unsigned Align = GV->getAlignment();
89  if (Align)
90    return Align;
91  return GV->getParent()->getDataLayout().getPreferredAlignment(GV);
92}
93
94static bool
95isUnmergeableGlobal(GlobalVariable *GV,
96                    const SmallPtrSetImpl<const GlobalValue *> &UsedGlobals) {
97  // Only process constants with initializers in the default address space.
98  return !GV->isConstant() || !GV->hasDefinitiveInitializer() ||
99         GV->getType()->getAddressSpace() != 0 || GV->hasSection() ||
100         // Don't touch values marked with attribute(used).
101         UsedGlobals.count(GV);
102}
103
104enum class CanMerge { No, Yes };
105static CanMerge makeMergeable(GlobalVariable *Old, GlobalVariable *New) {
106  if (!Old->hasGlobalUnnamedAddr() && !New->hasGlobalUnnamedAddr())
107    return CanMerge::No;
108  if (hasMetadataOtherThanDebugLoc(Old))
109    return CanMerge::No;
110  assert(!hasMetadataOtherThanDebugLoc(New));
111  if (!Old->hasGlobalUnnamedAddr())
112    New->setUnnamedAddr(GlobalValue::UnnamedAddr::None);
113  return CanMerge::Yes;
114}
115
116static void replace(Module &M, GlobalVariable *Old, GlobalVariable *New) {
117  Constant *NewConstant = New;
118
119  LLVM_DEBUG(dbgs() << "Replacing global: @" << Old->getName() << " -> @"
120                    << New->getName() << "\n");
121
122  // Bump the alignment if necessary.
123  if (Old->getAlignment() || New->getAlignment())
124    New->setAlignment(Align(std::max(getAlignment(Old), getAlignment(New))));
125
126  copyDebugLocMetadata(Old, New);
127  Old->replaceAllUsesWith(NewConstant);
128
129  // Delete the global value from the module.
130  assert(Old->hasLocalLinkage() &&
131         "Refusing to delete an externally visible global variable.");
132  Old->eraseFromParent();
133}
134
135static bool mergeConstants(Module &M) {
136  // Find all the globals that are marked "used".  These cannot be merged.
137  SmallPtrSet<const GlobalValue*, 8> UsedGlobals;
138  FindUsedValues(M.getGlobalVariable("llvm.used"), UsedGlobals);
139  FindUsedValues(M.getGlobalVariable("llvm.compiler.used"), UsedGlobals);
140
141  // Map unique constants to globals.
142  DenseMap<Constant *, GlobalVariable *> CMap;
143
144  SmallVector<std::pair<GlobalVariable *, GlobalVariable *>, 32>
145      SameContentReplacements;
146
147  size_t ChangesMade = 0;
148  size_t OldChangesMade = 0;
149
150  // Iterate constant merging while we are still making progress.  Merging two
151  // constants together may allow us to merge other constants together if the
152  // second level constants have initializers which point to the globals that
153  // were just merged.
154  while (true) {
155    // Find the canonical constants others will be merged with.
156    for (Module::global_iterator GVI = M.global_begin(), E = M.global_end();
157         GVI != E; ) {
158      GlobalVariable *GV = &*GVI++;
159
160      // If this GV is dead, remove it.
161      GV->removeDeadConstantUsers();
162      if (GV->use_empty() && GV->hasLocalLinkage()) {
163        GV->eraseFromParent();
164        ++ChangesMade;
165        continue;
166      }
167
168      if (isUnmergeableGlobal(GV, UsedGlobals))
169        continue;
170
171      // This transformation is legal for weak ODR globals in the sense it
172      // doesn't change semantics, but we really don't want to perform it
173      // anyway; it's likely to pessimize code generation, and some tools
174      // (like the Darwin linker in cases involving CFString) don't expect it.
175      if (GV->isWeakForLinker())
176        continue;
177
178      // Don't touch globals with metadata other then !dbg.
179      if (hasMetadataOtherThanDebugLoc(GV))
180        continue;
181
182      Constant *Init = GV->getInitializer();
183
184      // Check to see if the initializer is already known.
185      GlobalVariable *&Slot = CMap[Init];
186
187      // If this is the first constant we find or if the old one is local,
188      // replace with the current one. If the current is externally visible
189      // it cannot be replace, but can be the canonical constant we merge with.
190      bool FirstConstantFound = !Slot;
191      if (FirstConstantFound || IsBetterCanonical(*GV, *Slot)) {
192        Slot = GV;
193        LLVM_DEBUG(dbgs() << "Cmap[" << *Init << "] = " << GV->getName()
194                          << (FirstConstantFound ? "\n" : " (updated)\n"));
195      }
196    }
197
198    // Identify all globals that can be merged together, filling in the
199    // SameContentReplacements vector. We cannot do the replacement in this pass
200    // because doing so may cause initializers of other globals to be rewritten,
201    // invalidating the Constant* pointers in CMap.
202    for (Module::global_iterator GVI = M.global_begin(), E = M.global_end();
203         GVI != E; ) {
204      GlobalVariable *GV = &*GVI++;
205
206      if (isUnmergeableGlobal(GV, UsedGlobals))
207        continue;
208
209      // We can only replace constant with local linkage.
210      if (!GV->hasLocalLinkage())
211        continue;
212
213      Constant *Init = GV->getInitializer();
214
215      // Check to see if the initializer is already known.
216      auto Found = CMap.find(Init);
217      if (Found == CMap.end())
218        continue;
219
220      GlobalVariable *Slot = Found->second;
221      if (Slot == GV)
222        continue;
223
224      if (makeMergeable(GV, Slot) == CanMerge::No)
225        continue;
226
227      // Make all uses of the duplicate constant use the canonical version.
228      LLVM_DEBUG(dbgs() << "Will replace: @" << GV->getName() << " -> @"
229                        << Slot->getName() << "\n");
230      SameContentReplacements.push_back(std::make_pair(GV, Slot));
231    }
232
233    // Now that we have figured out which replacements must be made, do them all
234    // now.  This avoid invalidating the pointers in CMap, which are unneeded
235    // now.
236    for (unsigned i = 0, e = SameContentReplacements.size(); i != e; ++i) {
237      GlobalVariable *Old = SameContentReplacements[i].first;
238      GlobalVariable *New = SameContentReplacements[i].second;
239      replace(M, Old, New);
240      ++ChangesMade;
241      ++NumIdenticalMerged;
242    }
243
244    if (ChangesMade == OldChangesMade)
245      break;
246    OldChangesMade = ChangesMade;
247
248    SameContentReplacements.clear();
249    CMap.clear();
250  }
251
252  return ChangesMade;
253}
254
255PreservedAnalyses ConstantMergePass::run(Module &M, ModuleAnalysisManager &) {
256  if (!mergeConstants(M))
257    return PreservedAnalyses::all();
258  return PreservedAnalyses::none();
259}
260
261namespace {
262
263struct ConstantMergeLegacyPass : public ModulePass {
264  static char ID; // Pass identification, replacement for typeid
265
266  ConstantMergeLegacyPass() : ModulePass(ID) {
267    initializeConstantMergeLegacyPassPass(*PassRegistry::getPassRegistry());
268  }
269
270  // For this pass, process all of the globals in the module, eliminating
271  // duplicate constants.
272  bool runOnModule(Module &M) override {
273    if (skipModule(M))
274      return false;
275    return mergeConstants(M);
276  }
277};
278
279} // end anonymous namespace
280
281char ConstantMergeLegacyPass::ID = 0;
282
283INITIALIZE_PASS(ConstantMergeLegacyPass, "constmerge",
284                "Merge Duplicate Global Constants", false, false)
285
286ModulePass *llvm::createConstantMergePass() {
287  return new ConstantMergeLegacyPass();
288}
289