1//===- ValueEnumerator.cpp - Number values and types for bitcode writer ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the ValueEnumerator class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "ValueEnumerator.h"
14#include "llvm/ADT/DenseMap.h"
15#include "llvm/ADT/SmallVector.h"
16#include "llvm/Config/llvm-config.h"
17#include "llvm/IR/Argument.h"
18#include "llvm/IR/Attributes.h"
19#include "llvm/IR/BasicBlock.h"
20#include "llvm/IR/Constant.h"
21#include "llvm/IR/DebugInfoMetadata.h"
22#include "llvm/IR/DerivedTypes.h"
23#include "llvm/IR/Function.h"
24#include "llvm/IR/GlobalAlias.h"
25#include "llvm/IR/GlobalIFunc.h"
26#include "llvm/IR/GlobalObject.h"
27#include "llvm/IR/GlobalValue.h"
28#include "llvm/IR/GlobalVariable.h"
29#include "llvm/IR/Instruction.h"
30#include "llvm/IR/Instructions.h"
31#include "llvm/IR/Metadata.h"
32#include "llvm/IR/Module.h"
33#include "llvm/IR/Type.h"
34#include "llvm/IR/Use.h"
35#include "llvm/IR/UseListOrder.h"
36#include "llvm/IR/User.h"
37#include "llvm/IR/Value.h"
38#include "llvm/IR/ValueSymbolTable.h"
39#include "llvm/Support/Casting.h"
40#include "llvm/Support/Compiler.h"
41#include "llvm/Support/Debug.h"
42#include "llvm/Support/MathExtras.h"
43#include "llvm/Support/raw_ostream.h"
44#include <algorithm>
45#include <cassert>
46#include <cstddef>
47#include <iterator>
48#include <tuple>
49#include <utility>
50#include <vector>
51
52using namespace llvm;
53
54namespace {
55
56struct OrderMap {
57  DenseMap<const Value *, std::pair<unsigned, bool>> IDs;
58  unsigned LastGlobalConstantID = 0;
59  unsigned LastGlobalValueID = 0;
60
61  OrderMap() = default;
62
63  bool isGlobalConstant(unsigned ID) const {
64    return ID <= LastGlobalConstantID;
65  }
66
67  bool isGlobalValue(unsigned ID) const {
68    return ID <= LastGlobalValueID && !isGlobalConstant(ID);
69  }
70
71  unsigned size() const { return IDs.size(); }
72  std::pair<unsigned, bool> &operator[](const Value *V) { return IDs[V]; }
73
74  std::pair<unsigned, bool> lookup(const Value *V) const {
75    return IDs.lookup(V);
76  }
77
78  void index(const Value *V) {
79    // Explicitly sequence get-size and insert-value operations to avoid UB.
80    unsigned ID = IDs.size() + 1;
81    IDs[V].first = ID;
82  }
83};
84
85} // end anonymous namespace
86
87static void orderValue(const Value *V, OrderMap &OM) {
88  if (OM.lookup(V).first)
89    return;
90
91  if (const Constant *C = dyn_cast<Constant>(V)) {
92    if (C->getNumOperands() && !isa<GlobalValue>(C)) {
93      for (const Value *Op : C->operands())
94        if (!isa<BasicBlock>(Op) && !isa<GlobalValue>(Op))
95          orderValue(Op, OM);
96      if (auto *CE = dyn_cast<ConstantExpr>(C))
97        if (CE->getOpcode() == Instruction::ShuffleVector)
98          orderValue(CE->getShuffleMaskForBitcode(), OM);
99    }
100  }
101
102  // Note: we cannot cache this lookup above, since inserting into the map
103  // changes the map's size, and thus affects the other IDs.
104  OM.index(V);
105}
106
107static OrderMap orderModule(const Module &M) {
108  // This needs to match the order used by ValueEnumerator::ValueEnumerator()
109  // and ValueEnumerator::incorporateFunction().
110  OrderMap OM;
111
112  // In the reader, initializers of GlobalValues are set *after* all the
113  // globals have been read.  Rather than awkwardly modeling this behaviour
114  // directly in predictValueUseListOrderImpl(), just assign IDs to
115  // initializers of GlobalValues before GlobalValues themselves to model this
116  // implicitly.
117  for (const GlobalVariable &G : M.globals())
118    if (G.hasInitializer())
119      if (!isa<GlobalValue>(G.getInitializer()))
120        orderValue(G.getInitializer(), OM);
121  for (const GlobalAlias &A : M.aliases())
122    if (!isa<GlobalValue>(A.getAliasee()))
123      orderValue(A.getAliasee(), OM);
124  for (const GlobalIFunc &I : M.ifuncs())
125    if (!isa<GlobalValue>(I.getResolver()))
126      orderValue(I.getResolver(), OM);
127  for (const Function &F : M) {
128    for (const Use &U : F.operands())
129      if (!isa<GlobalValue>(U.get()))
130        orderValue(U.get(), OM);
131  }
132  OM.LastGlobalConstantID = OM.size();
133
134  // Initializers of GlobalValues are processed in
135  // BitcodeReader::ResolveGlobalAndAliasInits().  Match the order there rather
136  // than ValueEnumerator, and match the code in predictValueUseListOrderImpl()
137  // by giving IDs in reverse order.
138  //
139  // Since GlobalValues never reference each other directly (just through
140  // initializers), their relative IDs only matter for determining order of
141  // uses in their initializers.
142  for (const Function &F : M)
143    orderValue(&F, OM);
144  for (const GlobalAlias &A : M.aliases())
145    orderValue(&A, OM);
146  for (const GlobalIFunc &I : M.ifuncs())
147    orderValue(&I, OM);
148  for (const GlobalVariable &G : M.globals())
149    orderValue(&G, OM);
150  OM.LastGlobalValueID = OM.size();
151
152  for (const Function &F : M) {
153    if (F.isDeclaration())
154      continue;
155    // Here we need to match the union of ValueEnumerator::incorporateFunction()
156    // and WriteFunction().  Basic blocks are implicitly declared before
157    // anything else (by declaring their size).
158    for (const BasicBlock &BB : F)
159      orderValue(&BB, OM);
160    for (const Argument &A : F.args())
161      orderValue(&A, OM);
162    for (const BasicBlock &BB : F)
163      for (const Instruction &I : BB) {
164        for (const Value *Op : I.operands())
165          if ((isa<Constant>(*Op) && !isa<GlobalValue>(*Op)) ||
166              isa<InlineAsm>(*Op))
167            orderValue(Op, OM);
168        if (auto *SVI = dyn_cast<ShuffleVectorInst>(&I))
169          orderValue(SVI->getShuffleMaskForBitcode(), OM);
170      }
171    for (const BasicBlock &BB : F)
172      for (const Instruction &I : BB)
173        orderValue(&I, OM);
174  }
175  return OM;
176}
177
178static void predictValueUseListOrderImpl(const Value *V, const Function *F,
179                                         unsigned ID, const OrderMap &OM,
180                                         UseListOrderStack &Stack) {
181  // Predict use-list order for this one.
182  using Entry = std::pair<const Use *, unsigned>;
183  SmallVector<Entry, 64> List;
184  for (const Use &U : V->uses())
185    // Check if this user will be serialized.
186    if (OM.lookup(U.getUser()).first)
187      List.push_back(std::make_pair(&U, List.size()));
188
189  if (List.size() < 2)
190    // We may have lost some users.
191    return;
192
193  bool IsGlobalValue = OM.isGlobalValue(ID);
194  llvm::sort(List, [&](const Entry &L, const Entry &R) {
195    const Use *LU = L.first;
196    const Use *RU = R.first;
197    if (LU == RU)
198      return false;
199
200    auto LID = OM.lookup(LU->getUser()).first;
201    auto RID = OM.lookup(RU->getUser()).first;
202
203    // Global values are processed in reverse order.
204    //
205    // Moreover, initializers of GlobalValues are set *after* all the globals
206    // have been read (despite having earlier IDs).  Rather than awkwardly
207    // modeling this behaviour here, orderModule() has assigned IDs to
208    // initializers of GlobalValues before GlobalValues themselves.
209    if (OM.isGlobalValue(LID) && OM.isGlobalValue(RID))
210      return LID < RID;
211
212    // If ID is 4, then expect: 7 6 5 1 2 3.
213    if (LID < RID) {
214      if (RID <= ID)
215        if (!IsGlobalValue) // GlobalValue uses don't get reversed.
216          return true;
217      return false;
218    }
219    if (RID < LID) {
220      if (LID <= ID)
221        if (!IsGlobalValue) // GlobalValue uses don't get reversed.
222          return false;
223      return true;
224    }
225
226    // LID and RID are equal, so we have different operands of the same user.
227    // Assume operands are added in order for all instructions.
228    if (LID <= ID)
229      if (!IsGlobalValue) // GlobalValue uses don't get reversed.
230        return LU->getOperandNo() < RU->getOperandNo();
231    return LU->getOperandNo() > RU->getOperandNo();
232  });
233
234  if (llvm::is_sorted(List, [](const Entry &L, const Entry &R) {
235        return L.second < R.second;
236      }))
237    // Order is already correct.
238    return;
239
240  // Store the shuffle.
241  Stack.emplace_back(V, F, List.size());
242  assert(List.size() == Stack.back().Shuffle.size() && "Wrong size");
243  for (size_t I = 0, E = List.size(); I != E; ++I)
244    Stack.back().Shuffle[I] = List[I].second;
245}
246
247static void predictValueUseListOrder(const Value *V, const Function *F,
248                                     OrderMap &OM, UseListOrderStack &Stack) {
249  auto &IDPair = OM[V];
250  assert(IDPair.first && "Unmapped value");
251  if (IDPair.second)
252    // Already predicted.
253    return;
254
255  // Do the actual prediction.
256  IDPair.second = true;
257  if (!V->use_empty() && std::next(V->use_begin()) != V->use_end())
258    predictValueUseListOrderImpl(V, F, IDPair.first, OM, Stack);
259
260  // Recursive descent into constants.
261  if (const Constant *C = dyn_cast<Constant>(V)) {
262    if (C->getNumOperands()) { // Visit GlobalValues.
263      for (const Value *Op : C->operands())
264        if (isa<Constant>(Op)) // Visit GlobalValues.
265          predictValueUseListOrder(Op, F, OM, Stack);
266      if (auto *CE = dyn_cast<ConstantExpr>(C))
267        if (CE->getOpcode() == Instruction::ShuffleVector)
268          predictValueUseListOrder(CE->getShuffleMaskForBitcode(), F, OM,
269                                   Stack);
270    }
271  }
272}
273
274static UseListOrderStack predictUseListOrder(const Module &M) {
275  OrderMap OM = orderModule(M);
276
277  // Use-list orders need to be serialized after all the users have been added
278  // to a value, or else the shuffles will be incomplete.  Store them per
279  // function in a stack.
280  //
281  // Aside from function order, the order of values doesn't matter much here.
282  UseListOrderStack Stack;
283
284  // We want to visit the functions backward now so we can list function-local
285  // constants in the last Function they're used in.  Module-level constants
286  // have already been visited above.
287  for (auto I = M.rbegin(), E = M.rend(); I != E; ++I) {
288    const Function &F = *I;
289    if (F.isDeclaration())
290      continue;
291    for (const BasicBlock &BB : F)
292      predictValueUseListOrder(&BB, &F, OM, Stack);
293    for (const Argument &A : F.args())
294      predictValueUseListOrder(&A, &F, OM, Stack);
295    for (const BasicBlock &BB : F)
296      for (const Instruction &I : BB) {
297        for (const Value *Op : I.operands())
298          if (isa<Constant>(*Op) || isa<InlineAsm>(*Op)) // Visit GlobalValues.
299            predictValueUseListOrder(Op, &F, OM, Stack);
300        if (auto *SVI = dyn_cast<ShuffleVectorInst>(&I))
301          predictValueUseListOrder(SVI->getShuffleMaskForBitcode(), &F, OM,
302                                   Stack);
303      }
304    for (const BasicBlock &BB : F)
305      for (const Instruction &I : BB)
306        predictValueUseListOrder(&I, &F, OM, Stack);
307  }
308
309  // Visit globals last, since the module-level use-list block will be seen
310  // before the function bodies are processed.
311  for (const GlobalVariable &G : M.globals())
312    predictValueUseListOrder(&G, nullptr, OM, Stack);
313  for (const Function &F : M)
314    predictValueUseListOrder(&F, nullptr, OM, Stack);
315  for (const GlobalAlias &A : M.aliases())
316    predictValueUseListOrder(&A, nullptr, OM, Stack);
317  for (const GlobalIFunc &I : M.ifuncs())
318    predictValueUseListOrder(&I, nullptr, OM, Stack);
319  for (const GlobalVariable &G : M.globals())
320    if (G.hasInitializer())
321      predictValueUseListOrder(G.getInitializer(), nullptr, OM, Stack);
322  for (const GlobalAlias &A : M.aliases())
323    predictValueUseListOrder(A.getAliasee(), nullptr, OM, Stack);
324  for (const GlobalIFunc &I : M.ifuncs())
325    predictValueUseListOrder(I.getResolver(), nullptr, OM, Stack);
326  for (const Function &F : M) {
327    for (const Use &U : F.operands())
328      predictValueUseListOrder(U.get(), nullptr, OM, Stack);
329  }
330
331  return Stack;
332}
333
334static bool isIntOrIntVectorValue(const std::pair<const Value*, unsigned> &V) {
335  return V.first->getType()->isIntOrIntVectorTy();
336}
337
338ValueEnumerator::ValueEnumerator(const Module &M,
339                                 bool ShouldPreserveUseListOrder)
340    : ShouldPreserveUseListOrder(ShouldPreserveUseListOrder) {
341  if (ShouldPreserveUseListOrder)
342    UseListOrders = predictUseListOrder(M);
343
344  // Enumerate the global variables.
345  for (const GlobalVariable &GV : M.globals())
346    EnumerateValue(&GV);
347
348  // Enumerate the functions.
349  for (const Function & F : M) {
350    EnumerateValue(&F);
351    EnumerateAttributes(F.getAttributes());
352  }
353
354  // Enumerate the aliases.
355  for (const GlobalAlias &GA : M.aliases())
356    EnumerateValue(&GA);
357
358  // Enumerate the ifuncs.
359  for (const GlobalIFunc &GIF : M.ifuncs())
360    EnumerateValue(&GIF);
361
362  // Remember what is the cutoff between globalvalue's and other constants.
363  unsigned FirstConstant = Values.size();
364
365  // Enumerate the global variable initializers and attributes.
366  for (const GlobalVariable &GV : M.globals()) {
367    if (GV.hasInitializer())
368      EnumerateValue(GV.getInitializer());
369    if (GV.hasAttributes())
370      EnumerateAttributes(GV.getAttributesAsList(AttributeList::FunctionIndex));
371  }
372
373  // Enumerate the aliasees.
374  for (const GlobalAlias &GA : M.aliases())
375    EnumerateValue(GA.getAliasee());
376
377  // Enumerate the ifunc resolvers.
378  for (const GlobalIFunc &GIF : M.ifuncs())
379    EnumerateValue(GIF.getResolver());
380
381  // Enumerate any optional Function data.
382  for (const Function &F : M)
383    for (const Use &U : F.operands())
384      EnumerateValue(U.get());
385
386  // Enumerate the metadata type.
387  //
388  // TODO: Move this to ValueEnumerator::EnumerateOperandType() once bitcode
389  // only encodes the metadata type when it's used as a value.
390  EnumerateType(Type::getMetadataTy(M.getContext()));
391
392  // Insert constants and metadata that are named at module level into the slot
393  // pool so that the module symbol table can refer to them...
394  EnumerateValueSymbolTable(M.getValueSymbolTable());
395  EnumerateNamedMetadata(M);
396
397  SmallVector<std::pair<unsigned, MDNode *>, 8> MDs;
398  for (const GlobalVariable &GV : M.globals()) {
399    MDs.clear();
400    GV.getAllMetadata(MDs);
401    for (const auto &I : MDs)
402      // FIXME: Pass GV to EnumerateMetadata and arrange for the bitcode writer
403      // to write metadata to the global variable's own metadata block
404      // (PR28134).
405      EnumerateMetadata(nullptr, I.second);
406  }
407
408  // Enumerate types used by function bodies and argument lists.
409  for (const Function &F : M) {
410    for (const Argument &A : F.args())
411      EnumerateType(A.getType());
412
413    // Enumerate metadata attached to this function.
414    MDs.clear();
415    F.getAllMetadata(MDs);
416    for (const auto &I : MDs)
417      EnumerateMetadata(F.isDeclaration() ? nullptr : &F, I.second);
418
419    for (const BasicBlock &BB : F)
420      for (const Instruction &I : BB) {
421        for (const Use &Op : I.operands()) {
422          auto *MD = dyn_cast<MetadataAsValue>(&Op);
423          if (!MD) {
424            EnumerateOperandType(Op);
425            continue;
426          }
427
428          // Local metadata is enumerated during function-incorporation.
429          if (isa<LocalAsMetadata>(MD->getMetadata()))
430            continue;
431
432          EnumerateMetadata(&F, MD->getMetadata());
433        }
434        if (auto *SVI = dyn_cast<ShuffleVectorInst>(&I))
435          EnumerateType(SVI->getShuffleMaskForBitcode()->getType());
436        EnumerateType(I.getType());
437        if (const auto *Call = dyn_cast<CallBase>(&I))
438          EnumerateAttributes(Call->getAttributes());
439
440        // Enumerate metadata attached with this instruction.
441        MDs.clear();
442        I.getAllMetadataOtherThanDebugLoc(MDs);
443        for (unsigned i = 0, e = MDs.size(); i != e; ++i)
444          EnumerateMetadata(&F, MDs[i].second);
445
446        // Don't enumerate the location directly -- it has a special record
447        // type -- but enumerate its operands.
448        if (DILocation *L = I.getDebugLoc())
449          for (const Metadata *Op : L->operands())
450            EnumerateMetadata(&F, Op);
451      }
452  }
453
454  // Optimize constant ordering.
455  OptimizeConstants(FirstConstant, Values.size());
456
457  // Organize metadata ordering.
458  organizeMetadata();
459}
460
461unsigned ValueEnumerator::getInstructionID(const Instruction *Inst) const {
462  InstructionMapType::const_iterator I = InstructionMap.find(Inst);
463  assert(I != InstructionMap.end() && "Instruction is not mapped!");
464  return I->second;
465}
466
467unsigned ValueEnumerator::getComdatID(const Comdat *C) const {
468  unsigned ComdatID = Comdats.idFor(C);
469  assert(ComdatID && "Comdat not found!");
470  return ComdatID;
471}
472
473void ValueEnumerator::setInstructionID(const Instruction *I) {
474  InstructionMap[I] = InstructionCount++;
475}
476
477unsigned ValueEnumerator::getValueID(const Value *V) const {
478  if (auto *MD = dyn_cast<MetadataAsValue>(V))
479    return getMetadataID(MD->getMetadata());
480
481  ValueMapType::const_iterator I = ValueMap.find(V);
482  assert(I != ValueMap.end() && "Value not in slotcalculator!");
483  return I->second-1;
484}
485
486#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
487LLVM_DUMP_METHOD void ValueEnumerator::dump() const {
488  print(dbgs(), ValueMap, "Default");
489  dbgs() << '\n';
490  print(dbgs(), MetadataMap, "MetaData");
491  dbgs() << '\n';
492}
493#endif
494
495void ValueEnumerator::print(raw_ostream &OS, const ValueMapType &Map,
496                            const char *Name) const {
497  OS << "Map Name: " << Name << "\n";
498  OS << "Size: " << Map.size() << "\n";
499  for (ValueMapType::const_iterator I = Map.begin(),
500         E = Map.end(); I != E; ++I) {
501    const Value *V = I->first;
502    if (V->hasName())
503      OS << "Value: " << V->getName();
504    else
505      OS << "Value: [null]\n";
506    V->print(errs());
507    errs() << '\n';
508
509    OS << " Uses(" << V->getNumUses() << "):";
510    for (const Use &U : V->uses()) {
511      if (&U != &*V->use_begin())
512        OS << ",";
513      if(U->hasName())
514        OS << " " << U->getName();
515      else
516        OS << " [null]";
517
518    }
519    OS <<  "\n\n";
520  }
521}
522
523void ValueEnumerator::print(raw_ostream &OS, const MetadataMapType &Map,
524                            const char *Name) const {
525  OS << "Map Name: " << Name << "\n";
526  OS << "Size: " << Map.size() << "\n";
527  for (auto I = Map.begin(), E = Map.end(); I != E; ++I) {
528    const Metadata *MD = I->first;
529    OS << "Metadata: slot = " << I->second.ID << "\n";
530    OS << "Metadata: function = " << I->second.F << "\n";
531    MD->print(OS);
532    OS << "\n";
533  }
534}
535
536/// OptimizeConstants - Reorder constant pool for denser encoding.
537void ValueEnumerator::OptimizeConstants(unsigned CstStart, unsigned CstEnd) {
538  if (CstStart == CstEnd || CstStart+1 == CstEnd) return;
539
540  if (ShouldPreserveUseListOrder)
541    // Optimizing constants makes the use-list order difficult to predict.
542    // Disable it for now when trying to preserve the order.
543    return;
544
545  std::stable_sort(Values.begin() + CstStart, Values.begin() + CstEnd,
546                   [this](const std::pair<const Value *, unsigned> &LHS,
547                          const std::pair<const Value *, unsigned> &RHS) {
548    // Sort by plane.
549    if (LHS.first->getType() != RHS.first->getType())
550      return getTypeID(LHS.first->getType()) < getTypeID(RHS.first->getType());
551    // Then by frequency.
552    return LHS.second > RHS.second;
553  });
554
555  // Ensure that integer and vector of integer constants are at the start of the
556  // constant pool.  This is important so that GEP structure indices come before
557  // gep constant exprs.
558  std::stable_partition(Values.begin() + CstStart, Values.begin() + CstEnd,
559                        isIntOrIntVectorValue);
560
561  // Rebuild the modified portion of ValueMap.
562  for (; CstStart != CstEnd; ++CstStart)
563    ValueMap[Values[CstStart].first] = CstStart+1;
564}
565
566/// EnumerateValueSymbolTable - Insert all of the values in the specified symbol
567/// table into the values table.
568void ValueEnumerator::EnumerateValueSymbolTable(const ValueSymbolTable &VST) {
569  for (ValueSymbolTable::const_iterator VI = VST.begin(), VE = VST.end();
570       VI != VE; ++VI)
571    EnumerateValue(VI->getValue());
572}
573
574/// Insert all of the values referenced by named metadata in the specified
575/// module.
576void ValueEnumerator::EnumerateNamedMetadata(const Module &M) {
577  for (const auto &I : M.named_metadata())
578    EnumerateNamedMDNode(&I);
579}
580
581void ValueEnumerator::EnumerateNamedMDNode(const NamedMDNode *MD) {
582  for (unsigned i = 0, e = MD->getNumOperands(); i != e; ++i)
583    EnumerateMetadata(nullptr, MD->getOperand(i));
584}
585
586unsigned ValueEnumerator::getMetadataFunctionID(const Function *F) const {
587  return F ? getValueID(F) + 1 : 0;
588}
589
590void ValueEnumerator::EnumerateMetadata(const Function *F, const Metadata *MD) {
591  EnumerateMetadata(getMetadataFunctionID(F), MD);
592}
593
594void ValueEnumerator::EnumerateFunctionLocalMetadata(
595    const Function &F, const LocalAsMetadata *Local) {
596  EnumerateFunctionLocalMetadata(getMetadataFunctionID(&F), Local);
597}
598
599void ValueEnumerator::dropFunctionFromMetadata(
600    MetadataMapType::value_type &FirstMD) {
601  SmallVector<const MDNode *, 64> Worklist;
602  auto push = [&Worklist](MetadataMapType::value_type &MD) {
603    auto &Entry = MD.second;
604
605    // Nothing to do if this metadata isn't tagged.
606    if (!Entry.F)
607      return;
608
609    // Drop the function tag.
610    Entry.F = 0;
611
612    // If this is has an ID and is an MDNode, then its operands have entries as
613    // well.  We need to drop the function from them too.
614    if (Entry.ID)
615      if (auto *N = dyn_cast<MDNode>(MD.first))
616        Worklist.push_back(N);
617  };
618  push(FirstMD);
619  while (!Worklist.empty())
620    for (const Metadata *Op : Worklist.pop_back_val()->operands()) {
621      if (!Op)
622        continue;
623      auto MD = MetadataMap.find(Op);
624      if (MD != MetadataMap.end())
625        push(*MD);
626    }
627}
628
629void ValueEnumerator::EnumerateMetadata(unsigned F, const Metadata *MD) {
630  // It's vital for reader efficiency that uniqued subgraphs are done in
631  // post-order; it's expensive when their operands have forward references.
632  // If a distinct node is referenced from a uniqued node, it'll be delayed
633  // until the uniqued subgraph has been completely traversed.
634  SmallVector<const MDNode *, 32> DelayedDistinctNodes;
635
636  // Start by enumerating MD, and then work through its transitive operands in
637  // post-order.  This requires a depth-first search.
638  SmallVector<std::pair<const MDNode *, MDNode::op_iterator>, 32> Worklist;
639  if (const MDNode *N = enumerateMetadataImpl(F, MD))
640    Worklist.push_back(std::make_pair(N, N->op_begin()));
641
642  while (!Worklist.empty()) {
643    const MDNode *N = Worklist.back().first;
644
645    // Enumerate operands until we hit a new node.  We need to traverse these
646    // nodes' operands before visiting the rest of N's operands.
647    MDNode::op_iterator I = std::find_if(
648        Worklist.back().second, N->op_end(),
649        [&](const Metadata *MD) { return enumerateMetadataImpl(F, MD); });
650    if (I != N->op_end()) {
651      auto *Op = cast<MDNode>(*I);
652      Worklist.back().second = ++I;
653
654      // Delay traversing Op if it's a distinct node and N is uniqued.
655      if (Op->isDistinct() && !N->isDistinct())
656        DelayedDistinctNodes.push_back(Op);
657      else
658        Worklist.push_back(std::make_pair(Op, Op->op_begin()));
659      continue;
660    }
661
662    // All the operands have been visited.  Now assign an ID.
663    Worklist.pop_back();
664    MDs.push_back(N);
665    MetadataMap[N].ID = MDs.size();
666
667    // Flush out any delayed distinct nodes; these are all the distinct nodes
668    // that are leaves in last uniqued subgraph.
669    if (Worklist.empty() || Worklist.back().first->isDistinct()) {
670      for (const MDNode *N : DelayedDistinctNodes)
671        Worklist.push_back(std::make_pair(N, N->op_begin()));
672      DelayedDistinctNodes.clear();
673    }
674  }
675}
676
677const MDNode *ValueEnumerator::enumerateMetadataImpl(unsigned F, const Metadata *MD) {
678  if (!MD)
679    return nullptr;
680
681  assert(
682      (isa<MDNode>(MD) || isa<MDString>(MD) || isa<ConstantAsMetadata>(MD)) &&
683      "Invalid metadata kind");
684
685  auto Insertion = MetadataMap.insert(std::make_pair(MD, MDIndex(F)));
686  MDIndex &Entry = Insertion.first->second;
687  if (!Insertion.second) {
688    // Already mapped.  If F doesn't match the function tag, drop it.
689    if (Entry.hasDifferentFunction(F))
690      dropFunctionFromMetadata(*Insertion.first);
691    return nullptr;
692  }
693
694  // Don't assign IDs to metadata nodes.
695  if (auto *N = dyn_cast<MDNode>(MD))
696    return N;
697
698  // Save the metadata.
699  MDs.push_back(MD);
700  Entry.ID = MDs.size();
701
702  // Enumerate the constant, if any.
703  if (auto *C = dyn_cast<ConstantAsMetadata>(MD))
704    EnumerateValue(C->getValue());
705
706  return nullptr;
707}
708
709/// EnumerateFunctionLocalMetadataa - Incorporate function-local metadata
710/// information reachable from the metadata.
711void ValueEnumerator::EnumerateFunctionLocalMetadata(
712    unsigned F, const LocalAsMetadata *Local) {
713  assert(F && "Expected a function");
714
715  // Check to see if it's already in!
716  MDIndex &Index = MetadataMap[Local];
717  if (Index.ID) {
718    assert(Index.F == F && "Expected the same function");
719    return;
720  }
721
722  MDs.push_back(Local);
723  Index.F = F;
724  Index.ID = MDs.size();
725
726  EnumerateValue(Local->getValue());
727}
728
729static unsigned getMetadataTypeOrder(const Metadata *MD) {
730  // Strings are emitted in bulk and must come first.
731  if (isa<MDString>(MD))
732    return 0;
733
734  // ConstantAsMetadata doesn't reference anything.  We may as well shuffle it
735  // to the front since we can detect it.
736  auto *N = dyn_cast<MDNode>(MD);
737  if (!N)
738    return 1;
739
740  // The reader is fast forward references for distinct node operands, but slow
741  // when uniqued operands are unresolved.
742  return N->isDistinct() ? 2 : 3;
743}
744
745void ValueEnumerator::organizeMetadata() {
746  assert(MetadataMap.size() == MDs.size() &&
747         "Metadata map and vector out of sync");
748
749  if (MDs.empty())
750    return;
751
752  // Copy out the index information from MetadataMap in order to choose a new
753  // order.
754  SmallVector<MDIndex, 64> Order;
755  Order.reserve(MetadataMap.size());
756  for (const Metadata *MD : MDs)
757    Order.push_back(MetadataMap.lookup(MD));
758
759  // Partition:
760  //   - by function, then
761  //   - by isa<MDString>
762  // and then sort by the original/current ID.  Since the IDs are guaranteed to
763  // be unique, the result of std::sort will be deterministic.  There's no need
764  // for std::stable_sort.
765  llvm::sort(Order, [this](MDIndex LHS, MDIndex RHS) {
766    return std::make_tuple(LHS.F, getMetadataTypeOrder(LHS.get(MDs)), LHS.ID) <
767           std::make_tuple(RHS.F, getMetadataTypeOrder(RHS.get(MDs)), RHS.ID);
768  });
769
770  // Rebuild MDs, index the metadata ranges for each function in FunctionMDs,
771  // and fix up MetadataMap.
772  std::vector<const Metadata *> OldMDs;
773  MDs.swap(OldMDs);
774  MDs.reserve(OldMDs.size());
775  for (unsigned I = 0, E = Order.size(); I != E && !Order[I].F; ++I) {
776    auto *MD = Order[I].get(OldMDs);
777    MDs.push_back(MD);
778    MetadataMap[MD].ID = I + 1;
779    if (isa<MDString>(MD))
780      ++NumMDStrings;
781  }
782
783  // Return early if there's nothing for the functions.
784  if (MDs.size() == Order.size())
785    return;
786
787  // Build the function metadata ranges.
788  MDRange R;
789  FunctionMDs.reserve(OldMDs.size());
790  unsigned PrevF = 0;
791  for (unsigned I = MDs.size(), E = Order.size(), ID = MDs.size(); I != E;
792       ++I) {
793    unsigned F = Order[I].F;
794    if (!PrevF) {
795      PrevF = F;
796    } else if (PrevF != F) {
797      R.Last = FunctionMDs.size();
798      std::swap(R, FunctionMDInfo[PrevF]);
799      R.First = FunctionMDs.size();
800
801      ID = MDs.size();
802      PrevF = F;
803    }
804
805    auto *MD = Order[I].get(OldMDs);
806    FunctionMDs.push_back(MD);
807    MetadataMap[MD].ID = ++ID;
808    if (isa<MDString>(MD))
809      ++R.NumStrings;
810  }
811  R.Last = FunctionMDs.size();
812  FunctionMDInfo[PrevF] = R;
813}
814
815void ValueEnumerator::incorporateFunctionMetadata(const Function &F) {
816  NumModuleMDs = MDs.size();
817
818  auto R = FunctionMDInfo.lookup(getValueID(&F) + 1);
819  NumMDStrings = R.NumStrings;
820  MDs.insert(MDs.end(), FunctionMDs.begin() + R.First,
821             FunctionMDs.begin() + R.Last);
822}
823
824void ValueEnumerator::EnumerateValue(const Value *V) {
825  assert(!V->getType()->isVoidTy() && "Can't insert void values!");
826  assert(!isa<MetadataAsValue>(V) && "EnumerateValue doesn't handle Metadata!");
827
828  // Check to see if it's already in!
829  unsigned &ValueID = ValueMap[V];
830  if (ValueID) {
831    // Increment use count.
832    Values[ValueID-1].second++;
833    return;
834  }
835
836  if (auto *GO = dyn_cast<GlobalObject>(V))
837    if (const Comdat *C = GO->getComdat())
838      Comdats.insert(C);
839
840  // Enumerate the type of this value.
841  EnumerateType(V->getType());
842
843  if (const Constant *C = dyn_cast<Constant>(V)) {
844    if (isa<GlobalValue>(C)) {
845      // Initializers for globals are handled explicitly elsewhere.
846    } else if (C->getNumOperands()) {
847      // If a constant has operands, enumerate them.  This makes sure that if a
848      // constant has uses (for example an array of const ints), that they are
849      // inserted also.
850
851      // We prefer to enumerate them with values before we enumerate the user
852      // itself.  This makes it more likely that we can avoid forward references
853      // in the reader.  We know that there can be no cycles in the constants
854      // graph that don't go through a global variable.
855      for (User::const_op_iterator I = C->op_begin(), E = C->op_end();
856           I != E; ++I)
857        if (!isa<BasicBlock>(*I)) // Don't enumerate BB operand to BlockAddress.
858          EnumerateValue(*I);
859      if (auto *CE = dyn_cast<ConstantExpr>(C))
860        if (CE->getOpcode() == Instruction::ShuffleVector)
861          EnumerateValue(CE->getShuffleMaskForBitcode());
862
863      // Finally, add the value.  Doing this could make the ValueID reference be
864      // dangling, don't reuse it.
865      Values.push_back(std::make_pair(V, 1U));
866      ValueMap[V] = Values.size();
867      return;
868    }
869  }
870
871  // Add the value.
872  Values.push_back(std::make_pair(V, 1U));
873  ValueID = Values.size();
874}
875
876
877void ValueEnumerator::EnumerateType(Type *Ty) {
878  unsigned *TypeID = &TypeMap[Ty];
879
880  // We've already seen this type.
881  if (*TypeID)
882    return;
883
884  // If it is a non-anonymous struct, mark the type as being visited so that we
885  // don't recursively visit it.  This is safe because we allow forward
886  // references of these in the bitcode reader.
887  if (StructType *STy = dyn_cast<StructType>(Ty))
888    if (!STy->isLiteral())
889      *TypeID = ~0U;
890
891  // Enumerate all of the subtypes before we enumerate this type.  This ensures
892  // that the type will be enumerated in an order that can be directly built.
893  for (Type *SubTy : Ty->subtypes())
894    EnumerateType(SubTy);
895
896  // Refresh the TypeID pointer in case the table rehashed.
897  TypeID = &TypeMap[Ty];
898
899  // Check to see if we got the pointer another way.  This can happen when
900  // enumerating recursive types that hit the base case deeper than they start.
901  //
902  // If this is actually a struct that we are treating as forward ref'able,
903  // then emit the definition now that all of its contents are available.
904  if (*TypeID && *TypeID != ~0U)
905    return;
906
907  // Add this type now that its contents are all happily enumerated.
908  Types.push_back(Ty);
909
910  *TypeID = Types.size();
911}
912
913// Enumerate the types for the specified value.  If the value is a constant,
914// walk through it, enumerating the types of the constant.
915void ValueEnumerator::EnumerateOperandType(const Value *V) {
916  EnumerateType(V->getType());
917
918  assert(!isa<MetadataAsValue>(V) && "Unexpected metadata operand");
919
920  const Constant *C = dyn_cast<Constant>(V);
921  if (!C)
922    return;
923
924  // If this constant is already enumerated, ignore it, we know its type must
925  // be enumerated.
926  if (ValueMap.count(C))
927    return;
928
929  // This constant may have operands, make sure to enumerate the types in
930  // them.
931  for (const Value *Op : C->operands()) {
932    // Don't enumerate basic blocks here, this happens as operands to
933    // blockaddress.
934    if (isa<BasicBlock>(Op))
935      continue;
936
937    EnumerateOperandType(Op);
938  }
939  if (auto *CE = dyn_cast<ConstantExpr>(C))
940    if (CE->getOpcode() == Instruction::ShuffleVector)
941      EnumerateOperandType(CE->getShuffleMaskForBitcode());
942}
943
944void ValueEnumerator::EnumerateAttributes(AttributeList PAL) {
945  if (PAL.isEmpty()) return;  // null is always 0.
946
947  // Do a lookup.
948  unsigned &Entry = AttributeListMap[PAL];
949  if (Entry == 0) {
950    // Never saw this before, add it.
951    AttributeLists.push_back(PAL);
952    Entry = AttributeLists.size();
953  }
954
955  // Do lookups for all attribute groups.
956  for (unsigned i = PAL.index_begin(), e = PAL.index_end(); i != e; ++i) {
957    AttributeSet AS = PAL.getAttributes(i);
958    if (!AS.hasAttributes())
959      continue;
960    IndexAndAttrSet Pair = {i, AS};
961    unsigned &Entry = AttributeGroupMap[Pair];
962    if (Entry == 0) {
963      AttributeGroups.push_back(Pair);
964      Entry = AttributeGroups.size();
965    }
966  }
967}
968
969void ValueEnumerator::incorporateFunction(const Function &F) {
970  InstructionCount = 0;
971  NumModuleValues = Values.size();
972
973  // Add global metadata to the function block.  This doesn't include
974  // LocalAsMetadata.
975  incorporateFunctionMetadata(F);
976
977  // Adding function arguments to the value table.
978  for (const auto &I : F.args()) {
979    EnumerateValue(&I);
980    if (I.hasAttribute(Attribute::ByVal))
981      EnumerateType(I.getParamByValType());
982  }
983  FirstFuncConstantID = Values.size();
984
985  // Add all function-level constants to the value table.
986  for (const BasicBlock &BB : F) {
987    for (const Instruction &I : BB) {
988      for (const Use &OI : I.operands()) {
989        if ((isa<Constant>(OI) && !isa<GlobalValue>(OI)) || isa<InlineAsm>(OI))
990          EnumerateValue(OI);
991      }
992      if (auto *SVI = dyn_cast<ShuffleVectorInst>(&I))
993        EnumerateValue(SVI->getShuffleMaskForBitcode());
994    }
995    BasicBlocks.push_back(&BB);
996    ValueMap[&BB] = BasicBlocks.size();
997  }
998
999  // Optimize the constant layout.
1000  OptimizeConstants(FirstFuncConstantID, Values.size());
1001
1002  // Add the function's parameter attributes so they are available for use in
1003  // the function's instruction.
1004  EnumerateAttributes(F.getAttributes());
1005
1006  FirstInstID = Values.size();
1007
1008  SmallVector<LocalAsMetadata *, 8> FnLocalMDVector;
1009  // Add all of the instructions.
1010  for (const BasicBlock &BB : F) {
1011    for (const Instruction &I : BB) {
1012      for (const Use &OI : I.operands()) {
1013        if (auto *MD = dyn_cast<MetadataAsValue>(&OI))
1014          if (auto *Local = dyn_cast<LocalAsMetadata>(MD->getMetadata()))
1015            // Enumerate metadata after the instructions they might refer to.
1016            FnLocalMDVector.push_back(Local);
1017      }
1018
1019      if (!I.getType()->isVoidTy())
1020        EnumerateValue(&I);
1021    }
1022  }
1023
1024  // Add all of the function-local metadata.
1025  for (unsigned i = 0, e = FnLocalMDVector.size(); i != e; ++i) {
1026    // At this point, every local values have been incorporated, we shouldn't
1027    // have a metadata operand that references a value that hasn't been seen.
1028    assert(ValueMap.count(FnLocalMDVector[i]->getValue()) &&
1029           "Missing value for metadata operand");
1030    EnumerateFunctionLocalMetadata(F, FnLocalMDVector[i]);
1031  }
1032}
1033
1034void ValueEnumerator::purgeFunction() {
1035  /// Remove purged values from the ValueMap.
1036  for (unsigned i = NumModuleValues, e = Values.size(); i != e; ++i)
1037    ValueMap.erase(Values[i].first);
1038  for (unsigned i = NumModuleMDs, e = MDs.size(); i != e; ++i)
1039    MetadataMap.erase(MDs[i]);
1040  for (unsigned i = 0, e = BasicBlocks.size(); i != e; ++i)
1041    ValueMap.erase(BasicBlocks[i]);
1042
1043  Values.resize(NumModuleValues);
1044  MDs.resize(NumModuleMDs);
1045  BasicBlocks.clear();
1046  NumMDStrings = 0;
1047}
1048
1049static void IncorporateFunctionInfoGlobalBBIDs(const Function *F,
1050                                 DenseMap<const BasicBlock*, unsigned> &IDMap) {
1051  unsigned Counter = 0;
1052  for (const BasicBlock &BB : *F)
1053    IDMap[&BB] = ++Counter;
1054}
1055
1056/// getGlobalBasicBlockID - This returns the function-specific ID for the
1057/// specified basic block.  This is relatively expensive information, so it
1058/// should only be used by rare constructs such as address-of-label.
1059unsigned ValueEnumerator::getGlobalBasicBlockID(const BasicBlock *BB) const {
1060  unsigned &Idx = GlobalBasicBlockIDs[BB];
1061  if (Idx != 0)
1062    return Idx-1;
1063
1064  IncorporateFunctionInfoGlobalBBIDs(BB->getParent(), GlobalBasicBlockIDs);
1065  return getGlobalBasicBlockID(BB);
1066}
1067
1068uint64_t ValueEnumerator::computeBitsRequiredForTypeIndicies() const {
1069  return Log2_32_Ceil(getTypes().size() + 1);
1070}
1071