1//===- lib/Linker/LinkModules.cpp - Module Linker Implementation ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the LLVM module linker.
10//
11//===----------------------------------------------------------------------===//
12
13#include "LinkDiagnosticInfo.h"
14#include "llvm-c/Linker.h"
15#include "llvm/ADT/SetVector.h"
16#include "llvm/IR/Comdat.h"
17#include "llvm/IR/DiagnosticPrinter.h"
18#include "llvm/IR/GlobalValue.h"
19#include "llvm/IR/LLVMContext.h"
20#include "llvm/IR/Module.h"
21#include "llvm/Linker/Linker.h"
22#include "llvm/Support/Error.h"
23using namespace llvm;
24
25namespace {
26
27/// This is an implementation class for the LinkModules function, which is the
28/// entrypoint for this file.
29class ModuleLinker {
30  IRMover &Mover;
31  std::unique_ptr<Module> SrcM;
32
33  SetVector<GlobalValue *> ValuesToLink;
34
35  /// For symbol clashes, prefer those from Src.
36  unsigned Flags;
37
38  /// List of global value names that should be internalized.
39  StringSet<> Internalize;
40
41  /// Function that will perform the actual internalization. The reason for a
42  /// callback is that the linker cannot call internalizeModule without
43  /// creating a circular dependency between IPO and the linker.
44  std::function<void(Module &, const StringSet<> &)> InternalizeCallback;
45
46  /// Used as the callback for lazy linking.
47  /// The mover has just hit GV and we have to decide if it, and other members
48  /// of the same comdat, should be linked. Every member to be linked is passed
49  /// to Add.
50  void addLazyFor(GlobalValue &GV, const IRMover::ValueAdder &Add);
51
52  bool shouldOverrideFromSrc() { return Flags & Linker::OverrideFromSrc; }
53  bool shouldLinkOnlyNeeded() { return Flags & Linker::LinkOnlyNeeded; }
54
55  bool shouldLinkFromSource(bool &LinkFromSrc, const GlobalValue &Dest,
56                            const GlobalValue &Src);
57
58  /// Should we have mover and linker error diag info?
59  bool emitError(const Twine &Message) {
60    SrcM->getContext().diagnose(LinkDiagnosticInfo(DS_Error, Message));
61    return true;
62  }
63
64  bool getComdatLeader(Module &M, StringRef ComdatName,
65                       const GlobalVariable *&GVar);
66  bool computeResultingSelectionKind(StringRef ComdatName,
67                                     Comdat::SelectionKind Src,
68                                     Comdat::SelectionKind Dst,
69                                     Comdat::SelectionKind &Result,
70                                     bool &LinkFromSrc);
71  std::map<const Comdat *, std::pair<Comdat::SelectionKind, bool>>
72      ComdatsChosen;
73  bool getComdatResult(const Comdat *SrcC, Comdat::SelectionKind &SK,
74                       bool &LinkFromSrc);
75  // Keep track of the lazy linked global members of each comdat in source.
76  DenseMap<const Comdat *, std::vector<GlobalValue *>> LazyComdatMembers;
77
78  /// Given a global in the source module, return the global in the
79  /// destination module that is being linked to, if any.
80  GlobalValue *getLinkedToGlobal(const GlobalValue *SrcGV) {
81    Module &DstM = Mover.getModule();
82    // If the source has no name it can't link.  If it has local linkage,
83    // there is no name match-up going on.
84    if (!SrcGV->hasName() || GlobalValue::isLocalLinkage(SrcGV->getLinkage()))
85      return nullptr;
86
87    // Otherwise see if we have a match in the destination module's symtab.
88    GlobalValue *DGV = DstM.getNamedValue(SrcGV->getName());
89    if (!DGV)
90      return nullptr;
91
92    // If we found a global with the same name in the dest module, but it has
93    // internal linkage, we are really not doing any linkage here.
94    if (DGV->hasLocalLinkage())
95      return nullptr;
96
97    // Otherwise, we do in fact link to the destination global.
98    return DGV;
99  }
100
101  /// Drop GV if it is a member of a comdat that we are dropping.
102  /// This can happen with COFF's largest selection kind.
103  void dropReplacedComdat(GlobalValue &GV,
104                          const DenseSet<const Comdat *> &ReplacedDstComdats);
105
106  bool linkIfNeeded(GlobalValue &GV);
107
108public:
109  ModuleLinker(IRMover &Mover, std::unique_ptr<Module> SrcM, unsigned Flags,
110               std::function<void(Module &, const StringSet<> &)>
111                   InternalizeCallback = {})
112      : Mover(Mover), SrcM(std::move(SrcM)), Flags(Flags),
113        InternalizeCallback(std::move(InternalizeCallback)) {}
114
115  bool run();
116};
117}
118
119static GlobalValue::VisibilityTypes
120getMinVisibility(GlobalValue::VisibilityTypes A,
121                 GlobalValue::VisibilityTypes B) {
122  if (A == GlobalValue::HiddenVisibility || B == GlobalValue::HiddenVisibility)
123    return GlobalValue::HiddenVisibility;
124  if (A == GlobalValue::ProtectedVisibility ||
125      B == GlobalValue::ProtectedVisibility)
126    return GlobalValue::ProtectedVisibility;
127  return GlobalValue::DefaultVisibility;
128}
129
130bool ModuleLinker::getComdatLeader(Module &M, StringRef ComdatName,
131                                   const GlobalVariable *&GVar) {
132  const GlobalValue *GVal = M.getNamedValue(ComdatName);
133  if (const auto *GA = dyn_cast_or_null<GlobalAlias>(GVal)) {
134    GVal = GA->getBaseObject();
135    if (!GVal)
136      // We cannot resolve the size of the aliasee yet.
137      return emitError("Linking COMDATs named '" + ComdatName +
138                       "': COMDAT key involves incomputable alias size.");
139  }
140
141  GVar = dyn_cast_or_null<GlobalVariable>(GVal);
142  if (!GVar)
143    return emitError(
144        "Linking COMDATs named '" + ComdatName +
145        "': GlobalVariable required for data dependent selection!");
146
147  return false;
148}
149
150bool ModuleLinker::computeResultingSelectionKind(StringRef ComdatName,
151                                                 Comdat::SelectionKind Src,
152                                                 Comdat::SelectionKind Dst,
153                                                 Comdat::SelectionKind &Result,
154                                                 bool &LinkFromSrc) {
155  Module &DstM = Mover.getModule();
156  // The ability to mix Comdat::SelectionKind::Any with
157  // Comdat::SelectionKind::Largest is a behavior that comes from COFF.
158  bool DstAnyOrLargest = Dst == Comdat::SelectionKind::Any ||
159                         Dst == Comdat::SelectionKind::Largest;
160  bool SrcAnyOrLargest = Src == Comdat::SelectionKind::Any ||
161                         Src == Comdat::SelectionKind::Largest;
162  if (DstAnyOrLargest && SrcAnyOrLargest) {
163    if (Dst == Comdat::SelectionKind::Largest ||
164        Src == Comdat::SelectionKind::Largest)
165      Result = Comdat::SelectionKind::Largest;
166    else
167      Result = Comdat::SelectionKind::Any;
168  } else if (Src == Dst) {
169    Result = Dst;
170  } else {
171    return emitError("Linking COMDATs named '" + ComdatName +
172                     "': invalid selection kinds!");
173  }
174
175  switch (Result) {
176  case Comdat::SelectionKind::Any:
177    // Go with Dst.
178    LinkFromSrc = false;
179    break;
180  case Comdat::SelectionKind::NoDuplicates:
181    return emitError("Linking COMDATs named '" + ComdatName +
182                     "': noduplicates has been violated!");
183  case Comdat::SelectionKind::ExactMatch:
184  case Comdat::SelectionKind::Largest:
185  case Comdat::SelectionKind::SameSize: {
186    const GlobalVariable *DstGV;
187    const GlobalVariable *SrcGV;
188    if (getComdatLeader(DstM, ComdatName, DstGV) ||
189        getComdatLeader(*SrcM, ComdatName, SrcGV))
190      return true;
191
192    const DataLayout &DstDL = DstM.getDataLayout();
193    const DataLayout &SrcDL = SrcM->getDataLayout();
194    uint64_t DstSize = DstDL.getTypeAllocSize(DstGV->getValueType());
195    uint64_t SrcSize = SrcDL.getTypeAllocSize(SrcGV->getValueType());
196    if (Result == Comdat::SelectionKind::ExactMatch) {
197      if (SrcGV->getInitializer() != DstGV->getInitializer())
198        return emitError("Linking COMDATs named '" + ComdatName +
199                         "': ExactMatch violated!");
200      LinkFromSrc = false;
201    } else if (Result == Comdat::SelectionKind::Largest) {
202      LinkFromSrc = SrcSize > DstSize;
203    } else if (Result == Comdat::SelectionKind::SameSize) {
204      if (SrcSize != DstSize)
205        return emitError("Linking COMDATs named '" + ComdatName +
206                         "': SameSize violated!");
207      LinkFromSrc = false;
208    } else {
209      llvm_unreachable("unknown selection kind");
210    }
211    break;
212  }
213  }
214
215  return false;
216}
217
218bool ModuleLinker::getComdatResult(const Comdat *SrcC,
219                                   Comdat::SelectionKind &Result,
220                                   bool &LinkFromSrc) {
221  Module &DstM = Mover.getModule();
222  Comdat::SelectionKind SSK = SrcC->getSelectionKind();
223  StringRef ComdatName = SrcC->getName();
224  Module::ComdatSymTabType &ComdatSymTab = DstM.getComdatSymbolTable();
225  Module::ComdatSymTabType::iterator DstCI = ComdatSymTab.find(ComdatName);
226
227  if (DstCI == ComdatSymTab.end()) {
228    // Use the comdat if it is only available in one of the modules.
229    LinkFromSrc = true;
230    Result = SSK;
231    return false;
232  }
233
234  const Comdat *DstC = &DstCI->second;
235  Comdat::SelectionKind DSK = DstC->getSelectionKind();
236  return computeResultingSelectionKind(ComdatName, SSK, DSK, Result,
237                                       LinkFromSrc);
238}
239
240bool ModuleLinker::shouldLinkFromSource(bool &LinkFromSrc,
241                                        const GlobalValue &Dest,
242                                        const GlobalValue &Src) {
243
244  // Should we unconditionally use the Src?
245  if (shouldOverrideFromSrc()) {
246    LinkFromSrc = true;
247    return false;
248  }
249
250  // We always have to add Src if it has appending linkage.
251  if (Src.hasAppendingLinkage()) {
252    LinkFromSrc = true;
253    return false;
254  }
255
256  bool SrcIsDeclaration = Src.isDeclarationForLinker();
257  bool DestIsDeclaration = Dest.isDeclarationForLinker();
258
259  if (SrcIsDeclaration) {
260    // If Src is external or if both Src & Dest are external..  Just link the
261    // external globals, we aren't adding anything.
262    if (Src.hasDLLImportStorageClass()) {
263      // If one of GVs is marked as DLLImport, result should be dllimport'ed.
264      LinkFromSrc = DestIsDeclaration;
265      return false;
266    }
267    // If the Dest is weak, use the source linkage.
268    if (Dest.hasExternalWeakLinkage()) {
269      LinkFromSrc = true;
270      return false;
271    }
272    // Link an available_externally over a declaration.
273    LinkFromSrc = !Src.isDeclaration() && Dest.isDeclaration();
274    return false;
275  }
276
277  if (DestIsDeclaration) {
278    // If Dest is external but Src is not:
279    LinkFromSrc = true;
280    return false;
281  }
282
283  if (Src.hasCommonLinkage()) {
284    if (Dest.hasLinkOnceLinkage() || Dest.hasWeakLinkage()) {
285      LinkFromSrc = true;
286      return false;
287    }
288
289    if (!Dest.hasCommonLinkage()) {
290      LinkFromSrc = false;
291      return false;
292    }
293
294    const DataLayout &DL = Dest.getParent()->getDataLayout();
295    uint64_t DestSize = DL.getTypeAllocSize(Dest.getValueType());
296    uint64_t SrcSize = DL.getTypeAllocSize(Src.getValueType());
297    LinkFromSrc = SrcSize > DestSize;
298    return false;
299  }
300
301  if (Src.isWeakForLinker()) {
302    assert(!Dest.hasExternalWeakLinkage());
303    assert(!Dest.hasAvailableExternallyLinkage());
304
305    if (Dest.hasLinkOnceLinkage() && Src.hasWeakLinkage()) {
306      LinkFromSrc = true;
307      return false;
308    }
309
310    LinkFromSrc = false;
311    return false;
312  }
313
314  if (Dest.isWeakForLinker()) {
315    assert(Src.hasExternalLinkage());
316    LinkFromSrc = true;
317    return false;
318  }
319
320  assert(!Src.hasExternalWeakLinkage());
321  assert(!Dest.hasExternalWeakLinkage());
322  assert(Dest.hasExternalLinkage() && Src.hasExternalLinkage() &&
323         "Unexpected linkage type!");
324  return emitError("Linking globals named '" + Src.getName() +
325                   "': symbol multiply defined!");
326}
327
328bool ModuleLinker::linkIfNeeded(GlobalValue &GV) {
329  GlobalValue *DGV = getLinkedToGlobal(&GV);
330
331  if (shouldLinkOnlyNeeded()) {
332    // Always import variables with appending linkage.
333    if (!GV.hasAppendingLinkage()) {
334      // Don't import globals unless they are referenced by the destination
335      // module.
336      if (!DGV)
337        return false;
338      // Don't import globals that are already defined in the destination module
339      if (!DGV->isDeclaration())
340        return false;
341    }
342  }
343
344  if (DGV && !GV.hasLocalLinkage() && !GV.hasAppendingLinkage()) {
345    auto *DGVar = dyn_cast<GlobalVariable>(DGV);
346    auto *SGVar = dyn_cast<GlobalVariable>(&GV);
347    if (DGVar && SGVar) {
348      if (DGVar->isDeclaration() && SGVar->isDeclaration() &&
349          (!DGVar->isConstant() || !SGVar->isConstant())) {
350        DGVar->setConstant(false);
351        SGVar->setConstant(false);
352      }
353      if (DGVar->hasCommonLinkage() && SGVar->hasCommonLinkage()) {
354        MaybeAlign Align(
355            std::max(DGVar->getAlignment(), SGVar->getAlignment()));
356        SGVar->setAlignment(Align);
357        DGVar->setAlignment(Align);
358      }
359    }
360
361    GlobalValue::VisibilityTypes Visibility =
362        getMinVisibility(DGV->getVisibility(), GV.getVisibility());
363    DGV->setVisibility(Visibility);
364    GV.setVisibility(Visibility);
365
366    GlobalValue::UnnamedAddr UnnamedAddr = GlobalValue::getMinUnnamedAddr(
367        DGV->getUnnamedAddr(), GV.getUnnamedAddr());
368    DGV->setUnnamedAddr(UnnamedAddr);
369    GV.setUnnamedAddr(UnnamedAddr);
370  }
371
372  if (!DGV && !shouldOverrideFromSrc() &&
373      (GV.hasLocalLinkage() || GV.hasLinkOnceLinkage() ||
374       GV.hasAvailableExternallyLinkage()))
375    return false;
376
377  if (GV.isDeclaration())
378    return false;
379
380  if (const Comdat *SC = GV.getComdat()) {
381    bool LinkFromSrc;
382    Comdat::SelectionKind SK;
383    std::tie(SK, LinkFromSrc) = ComdatsChosen[SC];
384    if (!LinkFromSrc)
385      return false;
386  }
387
388  bool LinkFromSrc = true;
389  if (DGV && shouldLinkFromSource(LinkFromSrc, *DGV, GV))
390    return true;
391  if (LinkFromSrc)
392    ValuesToLink.insert(&GV);
393  return false;
394}
395
396void ModuleLinker::addLazyFor(GlobalValue &GV, const IRMover::ValueAdder &Add) {
397  // Add these to the internalize list
398  if (!GV.hasLinkOnceLinkage() && !GV.hasAvailableExternallyLinkage() &&
399      !shouldLinkOnlyNeeded())
400    return;
401
402  if (InternalizeCallback)
403    Internalize.insert(GV.getName());
404  Add(GV);
405
406  const Comdat *SC = GV.getComdat();
407  if (!SC)
408    return;
409  for (GlobalValue *GV2 : LazyComdatMembers[SC]) {
410    GlobalValue *DGV = getLinkedToGlobal(GV2);
411    bool LinkFromSrc = true;
412    if (DGV && shouldLinkFromSource(LinkFromSrc, *DGV, *GV2))
413      return;
414    if (!LinkFromSrc)
415      continue;
416    if (InternalizeCallback)
417      Internalize.insert(GV2->getName());
418    Add(*GV2);
419  }
420}
421
422void ModuleLinker::dropReplacedComdat(
423    GlobalValue &GV, const DenseSet<const Comdat *> &ReplacedDstComdats) {
424  Comdat *C = GV.getComdat();
425  if (!C)
426    return;
427  if (!ReplacedDstComdats.count(C))
428    return;
429  if (GV.use_empty()) {
430    GV.eraseFromParent();
431    return;
432  }
433
434  if (auto *F = dyn_cast<Function>(&GV)) {
435    F->deleteBody();
436  } else if (auto *Var = dyn_cast<GlobalVariable>(&GV)) {
437    Var->setInitializer(nullptr);
438  } else {
439    auto &Alias = cast<GlobalAlias>(GV);
440    Module &M = *Alias.getParent();
441    PointerType &Ty = *cast<PointerType>(Alias.getType());
442    GlobalValue *Declaration;
443    if (auto *FTy = dyn_cast<FunctionType>(Alias.getValueType())) {
444      Declaration = Function::Create(FTy, GlobalValue::ExternalLinkage, "", &M);
445    } else {
446      Declaration =
447          new GlobalVariable(M, Ty.getElementType(), /*isConstant*/ false,
448                             GlobalValue::ExternalLinkage,
449                             /*Initializer*/ nullptr);
450    }
451    Declaration->takeName(&Alias);
452    Alias.replaceAllUsesWith(Declaration);
453    Alias.eraseFromParent();
454  }
455}
456
457bool ModuleLinker::run() {
458  Module &DstM = Mover.getModule();
459  DenseSet<const Comdat *> ReplacedDstComdats;
460
461  for (const auto &SMEC : SrcM->getComdatSymbolTable()) {
462    const Comdat &C = SMEC.getValue();
463    if (ComdatsChosen.count(&C))
464      continue;
465    Comdat::SelectionKind SK;
466    bool LinkFromSrc;
467    if (getComdatResult(&C, SK, LinkFromSrc))
468      return true;
469    ComdatsChosen[&C] = std::make_pair(SK, LinkFromSrc);
470
471    if (!LinkFromSrc)
472      continue;
473
474    Module::ComdatSymTabType &ComdatSymTab = DstM.getComdatSymbolTable();
475    Module::ComdatSymTabType::iterator DstCI = ComdatSymTab.find(C.getName());
476    if (DstCI == ComdatSymTab.end())
477      continue;
478
479    // The source comdat is replacing the dest one.
480    const Comdat *DstC = &DstCI->second;
481    ReplacedDstComdats.insert(DstC);
482  }
483
484  // Alias have to go first, since we are not able to find their comdats
485  // otherwise.
486  for (auto I = DstM.alias_begin(), E = DstM.alias_end(); I != E;) {
487    GlobalAlias &GV = *I++;
488    dropReplacedComdat(GV, ReplacedDstComdats);
489  }
490
491  for (auto I = DstM.global_begin(), E = DstM.global_end(); I != E;) {
492    GlobalVariable &GV = *I++;
493    dropReplacedComdat(GV, ReplacedDstComdats);
494  }
495
496  for (auto I = DstM.begin(), E = DstM.end(); I != E;) {
497    Function &GV = *I++;
498    dropReplacedComdat(GV, ReplacedDstComdats);
499  }
500
501  for (GlobalVariable &GV : SrcM->globals())
502    if (GV.hasLinkOnceLinkage())
503      if (const Comdat *SC = GV.getComdat())
504        LazyComdatMembers[SC].push_back(&GV);
505
506  for (Function &SF : *SrcM)
507    if (SF.hasLinkOnceLinkage())
508      if (const Comdat *SC = SF.getComdat())
509        LazyComdatMembers[SC].push_back(&SF);
510
511  for (GlobalAlias &GA : SrcM->aliases())
512    if (GA.hasLinkOnceLinkage())
513      if (const Comdat *SC = GA.getComdat())
514        LazyComdatMembers[SC].push_back(&GA);
515
516  // Insert all of the globals in src into the DstM module... without linking
517  // initializers (which could refer to functions not yet mapped over).
518  for (GlobalVariable &GV : SrcM->globals())
519    if (linkIfNeeded(GV))
520      return true;
521
522  for (Function &SF : *SrcM)
523    if (linkIfNeeded(SF))
524      return true;
525
526  for (GlobalAlias &GA : SrcM->aliases())
527    if (linkIfNeeded(GA))
528      return true;
529
530  for (unsigned I = 0; I < ValuesToLink.size(); ++I) {
531    GlobalValue *GV = ValuesToLink[I];
532    const Comdat *SC = GV->getComdat();
533    if (!SC)
534      continue;
535    for (GlobalValue *GV2 : LazyComdatMembers[SC]) {
536      GlobalValue *DGV = getLinkedToGlobal(GV2);
537      bool LinkFromSrc = true;
538      if (DGV && shouldLinkFromSource(LinkFromSrc, *DGV, *GV2))
539        return true;
540      if (LinkFromSrc)
541        ValuesToLink.insert(GV2);
542    }
543  }
544
545  if (InternalizeCallback) {
546    for (GlobalValue *GV : ValuesToLink)
547      Internalize.insert(GV->getName());
548  }
549
550  // FIXME: Propagate Errors through to the caller instead of emitting
551  // diagnostics.
552  bool HasErrors = false;
553  if (Error E = Mover.move(std::move(SrcM), ValuesToLink.getArrayRef(),
554                           [this](GlobalValue &GV, IRMover::ValueAdder Add) {
555                             addLazyFor(GV, Add);
556                           },
557                           /* IsPerformingImport */ false)) {
558    handleAllErrors(std::move(E), [&](ErrorInfoBase &EIB) {
559      DstM.getContext().diagnose(LinkDiagnosticInfo(DS_Error, EIB.message()));
560      HasErrors = true;
561    });
562  }
563  if (HasErrors)
564    return true;
565
566  if (InternalizeCallback)
567    InternalizeCallback(DstM, Internalize);
568
569  return false;
570}
571
572Linker::Linker(Module &M) : Mover(M) {}
573
574bool Linker::linkInModule(
575    std::unique_ptr<Module> Src, unsigned Flags,
576    std::function<void(Module &, const StringSet<> &)> InternalizeCallback) {
577  ModuleLinker ModLinker(Mover, std::move(Src), Flags,
578                         std::move(InternalizeCallback));
579  return ModLinker.run();
580}
581
582//===----------------------------------------------------------------------===//
583// LinkModules entrypoint.
584//===----------------------------------------------------------------------===//
585
586/// This function links two modules together, with the resulting Dest module
587/// modified to be the composite of the two input modules. If an error occurs,
588/// true is returned and ErrorMsg (if not null) is set to indicate the problem.
589/// Upon failure, the Dest module could be in a modified state, and shouldn't be
590/// relied on to be consistent.
591bool Linker::linkModules(
592    Module &Dest, std::unique_ptr<Module> Src, unsigned Flags,
593    std::function<void(Module &, const StringSet<> &)> InternalizeCallback) {
594  Linker L(Dest);
595  return L.linkInModule(std::move(Src), Flags, std::move(InternalizeCallback));
596}
597
598//===----------------------------------------------------------------------===//
599// C API.
600//===----------------------------------------------------------------------===//
601
602LLVMBool LLVMLinkModules2(LLVMModuleRef Dest, LLVMModuleRef Src) {
603  Module *D = unwrap(Dest);
604  std::unique_ptr<Module> M(unwrap(Src));
605  return Linker::linkModules(*D, std::move(M));
606}
607