//===- LowerTypeTests.cpp - type metadata lowering pass -------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This pass lowers type metadata and calls to the llvm.type.test intrinsic. // It also ensures that globals are properly laid out for the // llvm.icall.branch.funnel intrinsic. // See http://llvm.org/docs/TypeMetadata.html for more information. // //===----------------------------------------------------------------------===// #include "llvm/Transforms/IPO/LowerTypeTests.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/EquivalenceClasses.h" #include "llvm/ADT/PointerUnion.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/TinyPtrVector.h" #include "llvm/ADT/Triple.h" #include "llvm/Analysis/TypeMetadataUtils.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalAlias.h" #include "llvm/IR/GlobalObject.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/IR/ModuleSummaryIndex.h" #include "llvm/IR/ModuleSummaryIndexYAML.h" #include "llvm/IR/Operator.h" #include "llvm/IR/PassManager.h" #include "llvm/IR/Type.h" #include "llvm/IR/Use.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/TrailingObjects.h" #include "llvm/Support/YAMLTraits.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/ModuleUtils.h" #include #include #include #include #include #include #include #include #include using namespace llvm; using namespace lowertypetests; #define DEBUG_TYPE "lowertypetests" STATISTIC(ByteArraySizeBits, "Byte array size in bits"); STATISTIC(ByteArraySizeBytes, "Byte array size in bytes"); STATISTIC(NumByteArraysCreated, "Number of byte arrays created"); STATISTIC(NumTypeTestCallsLowered, "Number of type test calls lowered"); STATISTIC(NumTypeIdDisjointSets, "Number of disjoint sets of type identifiers"); static cl::opt AvoidReuse( "lowertypetests-avoid-reuse", cl::desc("Try to avoid reuse of byte array addresses using aliases"), cl::Hidden, cl::init(true)); static cl::opt ClSummaryAction( "lowertypetests-summary-action", cl::desc("What to do with the summary when running this pass"), cl::values(clEnumValN(PassSummaryAction::None, "none", "Do nothing"), clEnumValN(PassSummaryAction::Import, "import", "Import typeid resolutions from summary and globals"), clEnumValN(PassSummaryAction::Export, "export", "Export typeid resolutions to summary and globals")), cl::Hidden); static cl::opt ClReadSummary( "lowertypetests-read-summary", cl::desc("Read summary from given YAML file before running pass"), cl::Hidden); static cl::opt ClWriteSummary( "lowertypetests-write-summary", cl::desc("Write summary to given YAML file after running pass"), cl::Hidden); bool BitSetInfo::containsGlobalOffset(uint64_t Offset) const { if (Offset < ByteOffset) return false; if ((Offset - ByteOffset) % (uint64_t(1) << AlignLog2) != 0) return false; uint64_t BitOffset = (Offset - ByteOffset) >> AlignLog2; if (BitOffset >= BitSize) return false; return Bits.count(BitOffset); } void BitSetInfo::print(raw_ostream &OS) const { OS << "offset " << ByteOffset << " size " << BitSize << " align " << (1 << AlignLog2); if (isAllOnes()) { OS << " all-ones\n"; return; } OS << " { "; for (uint64_t B : Bits) OS << B << ' '; OS << "}\n"; } BitSetInfo BitSetBuilder::build() { if (Min > Max) Min = 0; // Normalize each offset against the minimum observed offset, and compute // the bitwise OR of each of the offsets. The number of trailing zeros // in the mask gives us the log2 of the alignment of all offsets, which // allows us to compress the bitset by only storing one bit per aligned // address. uint64_t Mask = 0; for (uint64_t &Offset : Offsets) { Offset -= Min; Mask |= Offset; } BitSetInfo BSI; BSI.ByteOffset = Min; BSI.AlignLog2 = 0; if (Mask != 0) BSI.AlignLog2 = countTrailingZeros(Mask, ZB_Undefined); // Build the compressed bitset while normalizing the offsets against the // computed alignment. BSI.BitSize = ((Max - Min) >> BSI.AlignLog2) + 1; for (uint64_t Offset : Offsets) { Offset >>= BSI.AlignLog2; BSI.Bits.insert(Offset); } return BSI; } void GlobalLayoutBuilder::addFragment(const std::set &F) { // Create a new fragment to hold the layout for F. Fragments.emplace_back(); std::vector &Fragment = Fragments.back(); uint64_t FragmentIndex = Fragments.size() - 1; for (auto ObjIndex : F) { uint64_t OldFragmentIndex = FragmentMap[ObjIndex]; if (OldFragmentIndex == 0) { // We haven't seen this object index before, so just add it to the current // fragment. Fragment.push_back(ObjIndex); } else { // This index belongs to an existing fragment. Copy the elements of the // old fragment into this one and clear the old fragment. We don't update // the fragment map just yet, this ensures that any further references to // indices from the old fragment in this fragment do not insert any more // indices. std::vector &OldFragment = Fragments[OldFragmentIndex]; Fragment.insert(Fragment.end(), OldFragment.begin(), OldFragment.end()); OldFragment.clear(); } } // Update the fragment map to point our object indices to this fragment. for (uint64_t ObjIndex : Fragment) FragmentMap[ObjIndex] = FragmentIndex; } void ByteArrayBuilder::allocate(const std::set &Bits, uint64_t BitSize, uint64_t &AllocByteOffset, uint8_t &AllocMask) { // Find the smallest current allocation. unsigned Bit = 0; for (unsigned I = 1; I != BitsPerByte; ++I) if (BitAllocs[I] < BitAllocs[Bit]) Bit = I; AllocByteOffset = BitAllocs[Bit]; // Add our size to it. unsigned ReqSize = AllocByteOffset + BitSize; BitAllocs[Bit] = ReqSize; if (Bytes.size() < ReqSize) Bytes.resize(ReqSize); // Set our bits. AllocMask = 1 << Bit; for (uint64_t B : Bits) Bytes[AllocByteOffset + B] |= AllocMask; } bool lowertypetests::isJumpTableCanonical(Function *F) { if (F->isDeclarationForLinker()) return false; auto *CI = mdconst::extract_or_null( F->getParent()->getModuleFlag("CFI Canonical Jump Tables")); if (!CI || CI->getZExtValue() != 0) return true; return F->hasFnAttribute("cfi-canonical-jump-table"); } namespace { struct ByteArrayInfo { std::set Bits; uint64_t BitSize; GlobalVariable *ByteArray; GlobalVariable *MaskGlobal; uint8_t *MaskPtr = nullptr; }; /// A POD-like structure that we use to store a global reference together with /// its metadata types. In this pass we frequently need to query the set of /// metadata types referenced by a global, which at the IR level is an expensive /// operation involving a map lookup; this data structure helps to reduce the /// number of times we need to do this lookup. class GlobalTypeMember final : TrailingObjects { friend TrailingObjects; GlobalObject *GO; size_t NTypes; // For functions: true if the jump table is canonical. This essentially means // whether the canonical address (i.e. the symbol table entry) of the function // is provided by the local jump table. This is normally the same as whether // the function is defined locally, but if canonical jump tables are disabled // by the user then the jump table never provides a canonical definition. bool IsJumpTableCanonical; // For functions: true if this function is either defined or used in a thinlto // module and its jumptable entry needs to be exported to thinlto backends. bool IsExported; size_t numTrailingObjects(OverloadToken) const { return NTypes; } public: static GlobalTypeMember *create(BumpPtrAllocator &Alloc, GlobalObject *GO, bool IsJumpTableCanonical, bool IsExported, ArrayRef Types) { auto *GTM = static_cast(Alloc.Allocate( totalSizeToAlloc(Types.size()), alignof(GlobalTypeMember))); GTM->GO = GO; GTM->NTypes = Types.size(); GTM->IsJumpTableCanonical = IsJumpTableCanonical; GTM->IsExported = IsExported; std::uninitialized_copy(Types.begin(), Types.end(), GTM->getTrailingObjects()); return GTM; } GlobalObject *getGlobal() const { return GO; } bool isJumpTableCanonical() const { return IsJumpTableCanonical; } bool isExported() const { return IsExported; } ArrayRef types() const { return makeArrayRef(getTrailingObjects(), NTypes); } }; struct ICallBranchFunnel final : TrailingObjects { static ICallBranchFunnel *create(BumpPtrAllocator &Alloc, CallInst *CI, ArrayRef Targets, unsigned UniqueId) { auto *Call = static_cast( Alloc.Allocate(totalSizeToAlloc(Targets.size()), alignof(ICallBranchFunnel))); Call->CI = CI; Call->UniqueId = UniqueId; Call->NTargets = Targets.size(); std::uninitialized_copy(Targets.begin(), Targets.end(), Call->getTrailingObjects()); return Call; } CallInst *CI; ArrayRef targets() const { return makeArrayRef(getTrailingObjects(), NTargets); } unsigned UniqueId; private: size_t NTargets; }; struct ScopedSaveAliaseesAndUsed { Module &M; SmallPtrSet Used, CompilerUsed; std::vector> FunctionAliases; ScopedSaveAliaseesAndUsed(Module &M) : M(M) { // The users of this class want to replace all function references except // for aliases and llvm.used/llvm.compiler.used with references to a jump // table. We avoid replacing aliases in order to avoid introducing a double // indirection (or an alias pointing to a declaration in ThinLTO mode), and // we avoid replacing llvm.used/llvm.compiler.used because these global // variables describe properties of the global, not the jump table (besides, // offseted references to the jump table in llvm.used are invalid). // Unfortunately, LLVM doesn't have a "RAUW except for these (possibly // indirect) users", so what we do is save the list of globals referenced by // llvm.used/llvm.compiler.used and aliases, erase the used lists, let RAUW // replace the aliasees and then set them back to their original values at // the end. if (GlobalVariable *GV = collectUsedGlobalVariables(M, Used, false)) GV->eraseFromParent(); if (GlobalVariable *GV = collectUsedGlobalVariables(M, CompilerUsed, true)) GV->eraseFromParent(); for (auto &GIS : concat(M.aliases(), M.ifuncs())) { // FIXME: This should look past all aliases not just interposable ones, // see discussion on D65118. if (auto *F = dyn_cast(GIS.getIndirectSymbol()->stripPointerCasts())) FunctionAliases.push_back({&GIS, F}); } } ~ScopedSaveAliaseesAndUsed() { appendToUsed(M, std::vector(Used.begin(), Used.end())); appendToCompilerUsed(M, std::vector(CompilerUsed.begin(), CompilerUsed.end())); for (auto P : FunctionAliases) P.first->setIndirectSymbol( ConstantExpr::getBitCast(P.second, P.first->getType())); } }; class LowerTypeTestsModule { Module &M; ModuleSummaryIndex *ExportSummary; const ModuleSummaryIndex *ImportSummary; Triple::ArchType Arch; Triple::OSType OS; Triple::ObjectFormatType ObjectFormat; IntegerType *Int1Ty = Type::getInt1Ty(M.getContext()); IntegerType *Int8Ty = Type::getInt8Ty(M.getContext()); PointerType *Int8PtrTy = Type::getInt8PtrTy(M.getContext()); ArrayType *Int8Arr0Ty = ArrayType::get(Type::getInt8Ty(M.getContext()), 0); IntegerType *Int32Ty = Type::getInt32Ty(M.getContext()); PointerType *Int32PtrTy = PointerType::getUnqual(Int32Ty); IntegerType *Int64Ty = Type::getInt64Ty(M.getContext()); IntegerType *IntPtrTy = M.getDataLayout().getIntPtrType(M.getContext(), 0); // Indirect function call index assignment counter for WebAssembly uint64_t IndirectIndex = 1; // Mapping from type identifiers to the call sites that test them, as well as // whether the type identifier needs to be exported to ThinLTO backends as // part of the regular LTO phase of the ThinLTO pipeline (see exportTypeId). struct TypeIdUserInfo { std::vector CallSites; bool IsExported = false; }; DenseMap TypeIdUsers; /// This structure describes how to lower type tests for a particular type /// identifier. It is either built directly from the global analysis (during /// regular LTO or the regular LTO phase of ThinLTO), or indirectly using type /// identifier summaries and external symbol references (in ThinLTO backends). struct TypeIdLowering { TypeTestResolution::Kind TheKind = TypeTestResolution::Unsat; /// All except Unsat: the start address within the combined global. Constant *OffsetedGlobal; /// ByteArray, Inline, AllOnes: log2 of the required global alignment /// relative to the start address. Constant *AlignLog2; /// ByteArray, Inline, AllOnes: one less than the size of the memory region /// covering members of this type identifier as a multiple of 2^AlignLog2. Constant *SizeM1; /// ByteArray: the byte array to test the address against. Constant *TheByteArray; /// ByteArray: the bit mask to apply to bytes loaded from the byte array. Constant *BitMask; /// Inline: the bit mask to test the address against. Constant *InlineBits; }; std::vector ByteArrayInfos; Function *WeakInitializerFn = nullptr; bool shouldExportConstantsAsAbsoluteSymbols(); uint8_t *exportTypeId(StringRef TypeId, const TypeIdLowering &TIL); TypeIdLowering importTypeId(StringRef TypeId); void importTypeTest(CallInst *CI); void importFunction(Function *F, bool isJumpTableCanonical, std::vector &AliasesToErase); BitSetInfo buildBitSet(Metadata *TypeId, const DenseMap &GlobalLayout); ByteArrayInfo *createByteArray(BitSetInfo &BSI); void allocateByteArrays(); Value *createBitSetTest(IRBuilder<> &B, const TypeIdLowering &TIL, Value *BitOffset); void lowerTypeTestCalls( ArrayRef TypeIds, Constant *CombinedGlobalAddr, const DenseMap &GlobalLayout); Value *lowerTypeTestCall(Metadata *TypeId, CallInst *CI, const TypeIdLowering &TIL); void buildBitSetsFromGlobalVariables(ArrayRef TypeIds, ArrayRef Globals); unsigned getJumpTableEntrySize(); Type *getJumpTableEntryType(); void createJumpTableEntry(raw_ostream &AsmOS, raw_ostream &ConstraintOS, Triple::ArchType JumpTableArch, SmallVectorImpl &AsmArgs, Function *Dest); void verifyTypeMDNode(GlobalObject *GO, MDNode *Type); void buildBitSetsFromFunctions(ArrayRef TypeIds, ArrayRef Functions); void buildBitSetsFromFunctionsNative(ArrayRef TypeIds, ArrayRef Functions); void buildBitSetsFromFunctionsWASM(ArrayRef TypeIds, ArrayRef Functions); void buildBitSetsFromDisjointSet(ArrayRef TypeIds, ArrayRef Globals, ArrayRef ICallBranchFunnels); void replaceWeakDeclarationWithJumpTablePtr(Function *F, Constant *JT, bool IsJumpTableCanonical); void moveInitializerToModuleConstructor(GlobalVariable *GV); void findGlobalVariableUsersOf(Constant *C, SmallSetVector &Out); void createJumpTable(Function *F, ArrayRef Functions); /// replaceCfiUses - Go through the uses list for this definition /// and make each use point to "V" instead of "this" when the use is outside /// the block. 'This's use list is expected to have at least one element. /// Unlike replaceAllUsesWith this function skips blockaddr and direct call /// uses. void replaceCfiUses(Function *Old, Value *New, bool IsJumpTableCanonical); /// replaceDirectCalls - Go through the uses list for this definition and /// replace each use, which is a direct function call. void replaceDirectCalls(Value *Old, Value *New); public: LowerTypeTestsModule(Module &M, ModuleSummaryIndex *ExportSummary, const ModuleSummaryIndex *ImportSummary); bool lower(); // Lower the module using the action and summary passed as command line // arguments. For testing purposes only. static bool runForTesting(Module &M); }; struct LowerTypeTests : public ModulePass { static char ID; bool UseCommandLine = false; ModuleSummaryIndex *ExportSummary; const ModuleSummaryIndex *ImportSummary; LowerTypeTests() : ModulePass(ID), UseCommandLine(true) { initializeLowerTypeTestsPass(*PassRegistry::getPassRegistry()); } LowerTypeTests(ModuleSummaryIndex *ExportSummary, const ModuleSummaryIndex *ImportSummary) : ModulePass(ID), ExportSummary(ExportSummary), ImportSummary(ImportSummary) { initializeLowerTypeTestsPass(*PassRegistry::getPassRegistry()); } bool runOnModule(Module &M) override { if (UseCommandLine) return LowerTypeTestsModule::runForTesting(M); return LowerTypeTestsModule(M, ExportSummary, ImportSummary).lower(); } }; } // end anonymous namespace char LowerTypeTests::ID = 0; INITIALIZE_PASS(LowerTypeTests, "lowertypetests", "Lower type metadata", false, false) ModulePass * llvm::createLowerTypeTestsPass(ModuleSummaryIndex *ExportSummary, const ModuleSummaryIndex *ImportSummary) { return new LowerTypeTests(ExportSummary, ImportSummary); } /// Build a bit set for TypeId using the object layouts in /// GlobalLayout. BitSetInfo LowerTypeTestsModule::buildBitSet( Metadata *TypeId, const DenseMap &GlobalLayout) { BitSetBuilder BSB; // Compute the byte offset of each address associated with this type // identifier. for (auto &GlobalAndOffset : GlobalLayout) { for (MDNode *Type : GlobalAndOffset.first->types()) { if (Type->getOperand(1) != TypeId) continue; uint64_t Offset = cast( cast(Type->getOperand(0))->getValue()) ->getZExtValue(); BSB.addOffset(GlobalAndOffset.second + Offset); } } return BSB.build(); } /// Build a test that bit BitOffset mod sizeof(Bits)*8 is set in /// Bits. This pattern matches to the bt instruction on x86. static Value *createMaskedBitTest(IRBuilder<> &B, Value *Bits, Value *BitOffset) { auto BitsType = cast(Bits->getType()); unsigned BitWidth = BitsType->getBitWidth(); BitOffset = B.CreateZExtOrTrunc(BitOffset, BitsType); Value *BitIndex = B.CreateAnd(BitOffset, ConstantInt::get(BitsType, BitWidth - 1)); Value *BitMask = B.CreateShl(ConstantInt::get(BitsType, 1), BitIndex); Value *MaskedBits = B.CreateAnd(Bits, BitMask); return B.CreateICmpNE(MaskedBits, ConstantInt::get(BitsType, 0)); } ByteArrayInfo *LowerTypeTestsModule::createByteArray(BitSetInfo &BSI) { // Create globals to stand in for byte arrays and masks. These never actually // get initialized, we RAUW and erase them later in allocateByteArrays() once // we know the offset and mask to use. auto ByteArrayGlobal = new GlobalVariable( M, Int8Ty, /*isConstant=*/true, GlobalValue::PrivateLinkage, nullptr); auto MaskGlobal = new GlobalVariable(M, Int8Ty, /*isConstant=*/true, GlobalValue::PrivateLinkage, nullptr); ByteArrayInfos.emplace_back(); ByteArrayInfo *BAI = &ByteArrayInfos.back(); BAI->Bits = BSI.Bits; BAI->BitSize = BSI.BitSize; BAI->ByteArray = ByteArrayGlobal; BAI->MaskGlobal = MaskGlobal; return BAI; } void LowerTypeTestsModule::allocateByteArrays() { llvm::stable_sort(ByteArrayInfos, [](const ByteArrayInfo &BAI1, const ByteArrayInfo &BAI2) { return BAI1.BitSize > BAI2.BitSize; }); std::vector ByteArrayOffsets(ByteArrayInfos.size()); ByteArrayBuilder BAB; for (unsigned I = 0; I != ByteArrayInfos.size(); ++I) { ByteArrayInfo *BAI = &ByteArrayInfos[I]; uint8_t Mask; BAB.allocate(BAI->Bits, BAI->BitSize, ByteArrayOffsets[I], Mask); BAI->MaskGlobal->replaceAllUsesWith( ConstantExpr::getIntToPtr(ConstantInt::get(Int8Ty, Mask), Int8PtrTy)); BAI->MaskGlobal->eraseFromParent(); if (BAI->MaskPtr) *BAI->MaskPtr = Mask; } Constant *ByteArrayConst = ConstantDataArray::get(M.getContext(), BAB.Bytes); auto ByteArray = new GlobalVariable(M, ByteArrayConst->getType(), /*isConstant=*/true, GlobalValue::PrivateLinkage, ByteArrayConst); for (unsigned I = 0; I != ByteArrayInfos.size(); ++I) { ByteArrayInfo *BAI = &ByteArrayInfos[I]; Constant *Idxs[] = {ConstantInt::get(IntPtrTy, 0), ConstantInt::get(IntPtrTy, ByteArrayOffsets[I])}; Constant *GEP = ConstantExpr::getInBoundsGetElementPtr( ByteArrayConst->getType(), ByteArray, Idxs); // Create an alias instead of RAUW'ing the gep directly. On x86 this ensures // that the pc-relative displacement is folded into the lea instead of the // test instruction getting another displacement. GlobalAlias *Alias = GlobalAlias::create( Int8Ty, 0, GlobalValue::PrivateLinkage, "bits", GEP, &M); BAI->ByteArray->replaceAllUsesWith(Alias); BAI->ByteArray->eraseFromParent(); } ByteArraySizeBits = BAB.BitAllocs[0] + BAB.BitAllocs[1] + BAB.BitAllocs[2] + BAB.BitAllocs[3] + BAB.BitAllocs[4] + BAB.BitAllocs[5] + BAB.BitAllocs[6] + BAB.BitAllocs[7]; ByteArraySizeBytes = BAB.Bytes.size(); } /// Build a test that bit BitOffset is set in the type identifier that was /// lowered to TIL, which must be either an Inline or a ByteArray. Value *LowerTypeTestsModule::createBitSetTest(IRBuilder<> &B, const TypeIdLowering &TIL, Value *BitOffset) { if (TIL.TheKind == TypeTestResolution::Inline) { // If the bit set is sufficiently small, we can avoid a load by bit testing // a constant. return createMaskedBitTest(B, TIL.InlineBits, BitOffset); } else { Constant *ByteArray = TIL.TheByteArray; if (AvoidReuse && !ImportSummary) { // Each use of the byte array uses a different alias. This makes the // backend less likely to reuse previously computed byte array addresses, // improving the security of the CFI mechanism based on this pass. // This won't work when importing because TheByteArray is external. ByteArray = GlobalAlias::create(Int8Ty, 0, GlobalValue::PrivateLinkage, "bits_use", ByteArray, &M); } Value *ByteAddr = B.CreateGEP(Int8Ty, ByteArray, BitOffset); Value *Byte = B.CreateLoad(Int8Ty, ByteAddr); Value *ByteAndMask = B.CreateAnd(Byte, ConstantExpr::getPtrToInt(TIL.BitMask, Int8Ty)); return B.CreateICmpNE(ByteAndMask, ConstantInt::get(Int8Ty, 0)); } } static bool isKnownTypeIdMember(Metadata *TypeId, const DataLayout &DL, Value *V, uint64_t COffset) { if (auto GV = dyn_cast(V)) { SmallVector Types; GV->getMetadata(LLVMContext::MD_type, Types); for (MDNode *Type : Types) { if (Type->getOperand(1) != TypeId) continue; uint64_t Offset = cast( cast(Type->getOperand(0))->getValue()) ->getZExtValue(); if (COffset == Offset) return true; } return false; } if (auto GEP = dyn_cast(V)) { APInt APOffset(DL.getPointerSizeInBits(0), 0); bool Result = GEP->accumulateConstantOffset(DL, APOffset); if (!Result) return false; COffset += APOffset.getZExtValue(); return isKnownTypeIdMember(TypeId, DL, GEP->getPointerOperand(), COffset); } if (auto Op = dyn_cast(V)) { if (Op->getOpcode() == Instruction::BitCast) return isKnownTypeIdMember(TypeId, DL, Op->getOperand(0), COffset); if (Op->getOpcode() == Instruction::Select) return isKnownTypeIdMember(TypeId, DL, Op->getOperand(1), COffset) && isKnownTypeIdMember(TypeId, DL, Op->getOperand(2), COffset); } return false; } /// Lower a llvm.type.test call to its implementation. Returns the value to /// replace the call with. Value *LowerTypeTestsModule::lowerTypeTestCall(Metadata *TypeId, CallInst *CI, const TypeIdLowering &TIL) { if (TIL.TheKind == TypeTestResolution::Unsat) return ConstantInt::getFalse(M.getContext()); Value *Ptr = CI->getArgOperand(0); const DataLayout &DL = M.getDataLayout(); if (isKnownTypeIdMember(TypeId, DL, Ptr, 0)) return ConstantInt::getTrue(M.getContext()); BasicBlock *InitialBB = CI->getParent(); IRBuilder<> B(CI); Value *PtrAsInt = B.CreatePtrToInt(Ptr, IntPtrTy); Constant *OffsetedGlobalAsInt = ConstantExpr::getPtrToInt(TIL.OffsetedGlobal, IntPtrTy); if (TIL.TheKind == TypeTestResolution::Single) return B.CreateICmpEQ(PtrAsInt, OffsetedGlobalAsInt); Value *PtrOffset = B.CreateSub(PtrAsInt, OffsetedGlobalAsInt); // We need to check that the offset both falls within our range and is // suitably aligned. We can check both properties at the same time by // performing a right rotate by log2(alignment) followed by an integer // comparison against the bitset size. The rotate will move the lower // order bits that need to be zero into the higher order bits of the // result, causing the comparison to fail if they are nonzero. The rotate // also conveniently gives us a bit offset to use during the load from // the bitset. Value *OffsetSHR = B.CreateLShr(PtrOffset, ConstantExpr::getZExt(TIL.AlignLog2, IntPtrTy)); Value *OffsetSHL = B.CreateShl( PtrOffset, ConstantExpr::getZExt( ConstantExpr::getSub( ConstantInt::get(Int8Ty, DL.getPointerSizeInBits(0)), TIL.AlignLog2), IntPtrTy)); Value *BitOffset = B.CreateOr(OffsetSHR, OffsetSHL); Value *OffsetInRange = B.CreateICmpULE(BitOffset, TIL.SizeM1); // If the bit set is all ones, testing against it is unnecessary. if (TIL.TheKind == TypeTestResolution::AllOnes) return OffsetInRange; // See if the intrinsic is used in the following common pattern: // br(llvm.type.test(...), thenbb, elsebb) // where nothing happens between the type test and the br. // If so, create slightly simpler IR. if (CI->hasOneUse()) if (auto *Br = dyn_cast(*CI->user_begin())) if (CI->getNextNode() == Br) { BasicBlock *Then = InitialBB->splitBasicBlock(CI->getIterator()); BasicBlock *Else = Br->getSuccessor(1); BranchInst *NewBr = BranchInst::Create(Then, Else, OffsetInRange); NewBr->setMetadata(LLVMContext::MD_prof, Br->getMetadata(LLVMContext::MD_prof)); ReplaceInstWithInst(InitialBB->getTerminator(), NewBr); // Update phis in Else resulting from InitialBB being split for (auto &Phi : Else->phis()) Phi.addIncoming(Phi.getIncomingValueForBlock(Then), InitialBB); IRBuilder<> ThenB(CI); return createBitSetTest(ThenB, TIL, BitOffset); } IRBuilder<> ThenB(SplitBlockAndInsertIfThen(OffsetInRange, CI, false)); // Now that we know that the offset is in range and aligned, load the // appropriate bit from the bitset. Value *Bit = createBitSetTest(ThenB, TIL, BitOffset); // The value we want is 0 if we came directly from the initial block // (having failed the range or alignment checks), or the loaded bit if // we came from the block in which we loaded it. B.SetInsertPoint(CI); PHINode *P = B.CreatePHI(Int1Ty, 2); P->addIncoming(ConstantInt::get(Int1Ty, 0), InitialBB); P->addIncoming(Bit, ThenB.GetInsertBlock()); return P; } /// Given a disjoint set of type identifiers and globals, lay out the globals, /// build the bit sets and lower the llvm.type.test calls. void LowerTypeTestsModule::buildBitSetsFromGlobalVariables( ArrayRef TypeIds, ArrayRef Globals) { // Build a new global with the combined contents of the referenced globals. // This global is a struct whose even-indexed elements contain the original // contents of the referenced globals and whose odd-indexed elements contain // any padding required to align the next element to the next power of 2 plus // any additional padding required to meet its alignment requirements. std::vector GlobalInits; const DataLayout &DL = M.getDataLayout(); DenseMap GlobalLayout; Align MaxAlign; uint64_t CurOffset = 0; uint64_t DesiredPadding = 0; for (GlobalTypeMember *G : Globals) { auto *GV = cast(G->getGlobal()); MaybeAlign Alignment(GV->getAlignment()); if (!Alignment) Alignment = Align(DL.getABITypeAlignment(GV->getValueType())); MaxAlign = std::max(MaxAlign, *Alignment); uint64_t GVOffset = alignTo(CurOffset + DesiredPadding, *Alignment); GlobalLayout[G] = GVOffset; if (GVOffset != 0) { uint64_t Padding = GVOffset - CurOffset; GlobalInits.push_back( ConstantAggregateZero::get(ArrayType::get(Int8Ty, Padding))); } GlobalInits.push_back(GV->getInitializer()); uint64_t InitSize = DL.getTypeAllocSize(GV->getValueType()); CurOffset = GVOffset + InitSize; // Compute the amount of padding that we'd like for the next element. DesiredPadding = NextPowerOf2(InitSize - 1) - InitSize; // Experiments of different caps with Chromium on both x64 and ARM64 // have shown that the 32-byte cap generates the smallest binary on // both platforms while different caps yield similar performance. // (see https://lists.llvm.org/pipermail/llvm-dev/2018-July/124694.html) if (DesiredPadding > 32) DesiredPadding = alignTo(InitSize, 32) - InitSize; } Constant *NewInit = ConstantStruct::getAnon(M.getContext(), GlobalInits); auto *CombinedGlobal = new GlobalVariable(M, NewInit->getType(), /*isConstant=*/true, GlobalValue::PrivateLinkage, NewInit); CombinedGlobal->setAlignment(MaxAlign); StructType *NewTy = cast(NewInit->getType()); lowerTypeTestCalls(TypeIds, CombinedGlobal, GlobalLayout); // Build aliases pointing to offsets into the combined global for each // global from which we built the combined global, and replace references // to the original globals with references to the aliases. for (unsigned I = 0; I != Globals.size(); ++I) { GlobalVariable *GV = cast(Globals[I]->getGlobal()); // Multiply by 2 to account for padding elements. Constant *CombinedGlobalIdxs[] = {ConstantInt::get(Int32Ty, 0), ConstantInt::get(Int32Ty, I * 2)}; Constant *CombinedGlobalElemPtr = ConstantExpr::getGetElementPtr( NewInit->getType(), CombinedGlobal, CombinedGlobalIdxs); assert(GV->getType()->getAddressSpace() == 0); GlobalAlias *GAlias = GlobalAlias::create(NewTy->getElementType(I * 2), 0, GV->getLinkage(), "", CombinedGlobalElemPtr, &M); GAlias->setVisibility(GV->getVisibility()); GAlias->takeName(GV); GV->replaceAllUsesWith(GAlias); GV->eraseFromParent(); } } bool LowerTypeTestsModule::shouldExportConstantsAsAbsoluteSymbols() { return (Arch == Triple::x86 || Arch == Triple::x86_64) && ObjectFormat == Triple::ELF; } /// Export the given type identifier so that ThinLTO backends may import it. /// Type identifiers are exported by adding coarse-grained information about how /// to test the type identifier to the summary, and creating symbols in the /// object file (aliases and absolute symbols) containing fine-grained /// information about the type identifier. /// /// Returns a pointer to the location in which to store the bitmask, if /// applicable. uint8_t *LowerTypeTestsModule::exportTypeId(StringRef TypeId, const TypeIdLowering &TIL) { TypeTestResolution &TTRes = ExportSummary->getOrInsertTypeIdSummary(TypeId).TTRes; TTRes.TheKind = TIL.TheKind; auto ExportGlobal = [&](StringRef Name, Constant *C) { GlobalAlias *GA = GlobalAlias::create(Int8Ty, 0, GlobalValue::ExternalLinkage, "__typeid_" + TypeId + "_" + Name, C, &M); GA->setVisibility(GlobalValue::HiddenVisibility); }; auto ExportConstant = [&](StringRef Name, uint64_t &Storage, Constant *C) { if (shouldExportConstantsAsAbsoluteSymbols()) ExportGlobal(Name, ConstantExpr::getIntToPtr(C, Int8PtrTy)); else Storage = cast(C)->getZExtValue(); }; if (TIL.TheKind != TypeTestResolution::Unsat) ExportGlobal("global_addr", TIL.OffsetedGlobal); if (TIL.TheKind == TypeTestResolution::ByteArray || TIL.TheKind == TypeTestResolution::Inline || TIL.TheKind == TypeTestResolution::AllOnes) { ExportConstant("align", TTRes.AlignLog2, TIL.AlignLog2); ExportConstant("size_m1", TTRes.SizeM1, TIL.SizeM1); uint64_t BitSize = cast(TIL.SizeM1)->getZExtValue() + 1; if (TIL.TheKind == TypeTestResolution::Inline) TTRes.SizeM1BitWidth = (BitSize <= 32) ? 5 : 6; else TTRes.SizeM1BitWidth = (BitSize <= 128) ? 7 : 32; } if (TIL.TheKind == TypeTestResolution::ByteArray) { ExportGlobal("byte_array", TIL.TheByteArray); if (shouldExportConstantsAsAbsoluteSymbols()) ExportGlobal("bit_mask", TIL.BitMask); else return &TTRes.BitMask; } if (TIL.TheKind == TypeTestResolution::Inline) ExportConstant("inline_bits", TTRes.InlineBits, TIL.InlineBits); return nullptr; } LowerTypeTestsModule::TypeIdLowering LowerTypeTestsModule::importTypeId(StringRef TypeId) { const TypeIdSummary *TidSummary = ImportSummary->getTypeIdSummary(TypeId); if (!TidSummary) return {}; // Unsat: no globals match this type id. const TypeTestResolution &TTRes = TidSummary->TTRes; TypeIdLowering TIL; TIL.TheKind = TTRes.TheKind; auto ImportGlobal = [&](StringRef Name) { // Give the global a type of length 0 so that it is not assumed not to alias // with any other global. Constant *C = M.getOrInsertGlobal(("__typeid_" + TypeId + "_" + Name).str(), Int8Arr0Ty); if (auto *GV = dyn_cast(C)) GV->setVisibility(GlobalValue::HiddenVisibility); C = ConstantExpr::getBitCast(C, Int8PtrTy); return C; }; auto ImportConstant = [&](StringRef Name, uint64_t Const, unsigned AbsWidth, Type *Ty) { if (!shouldExportConstantsAsAbsoluteSymbols()) { Constant *C = ConstantInt::get(isa(Ty) ? Ty : Int64Ty, Const); if (!isa(Ty)) C = ConstantExpr::getIntToPtr(C, Ty); return C; } Constant *C = ImportGlobal(Name); auto *GV = cast(C->stripPointerCasts()); if (isa(Ty)) C = ConstantExpr::getPtrToInt(C, Ty); if (GV->getMetadata(LLVMContext::MD_absolute_symbol)) return C; auto SetAbsRange = [&](uint64_t Min, uint64_t Max) { auto *MinC = ConstantAsMetadata::get(ConstantInt::get(IntPtrTy, Min)); auto *MaxC = ConstantAsMetadata::get(ConstantInt::get(IntPtrTy, Max)); GV->setMetadata(LLVMContext::MD_absolute_symbol, MDNode::get(M.getContext(), {MinC, MaxC})); }; if (AbsWidth == IntPtrTy->getBitWidth()) SetAbsRange(~0ull, ~0ull); // Full set. else SetAbsRange(0, 1ull << AbsWidth); return C; }; if (TIL.TheKind != TypeTestResolution::Unsat) TIL.OffsetedGlobal = ImportGlobal("global_addr"); if (TIL.TheKind == TypeTestResolution::ByteArray || TIL.TheKind == TypeTestResolution::Inline || TIL.TheKind == TypeTestResolution::AllOnes) { TIL.AlignLog2 = ImportConstant("align", TTRes.AlignLog2, 8, Int8Ty); TIL.SizeM1 = ImportConstant("size_m1", TTRes.SizeM1, TTRes.SizeM1BitWidth, IntPtrTy); } if (TIL.TheKind == TypeTestResolution::ByteArray) { TIL.TheByteArray = ImportGlobal("byte_array"); TIL.BitMask = ImportConstant("bit_mask", TTRes.BitMask, 8, Int8PtrTy); } if (TIL.TheKind == TypeTestResolution::Inline) TIL.InlineBits = ImportConstant( "inline_bits", TTRes.InlineBits, 1 << TTRes.SizeM1BitWidth, TTRes.SizeM1BitWidth <= 5 ? Int32Ty : Int64Ty); return TIL; } void LowerTypeTestsModule::importTypeTest(CallInst *CI) { auto TypeIdMDVal = dyn_cast(CI->getArgOperand(1)); if (!TypeIdMDVal) report_fatal_error("Second argument of llvm.type.test must be metadata"); auto TypeIdStr = dyn_cast(TypeIdMDVal->getMetadata()); if (!TypeIdStr) report_fatal_error( "Second argument of llvm.type.test must be a metadata string"); TypeIdLowering TIL = importTypeId(TypeIdStr->getString()); Value *Lowered = lowerTypeTestCall(TypeIdStr, CI, TIL); CI->replaceAllUsesWith(Lowered); CI->eraseFromParent(); } // ThinLTO backend: the function F has a jump table entry; update this module // accordingly. isJumpTableCanonical describes the type of the jump table entry. void LowerTypeTestsModule::importFunction( Function *F, bool isJumpTableCanonical, std::vector &AliasesToErase) { assert(F->getType()->getAddressSpace() == 0); GlobalValue::VisibilityTypes Visibility = F->getVisibility(); std::string Name = F->getName(); if (F->isDeclarationForLinker() && isJumpTableCanonical) { // Non-dso_local functions may be overriden at run time, // don't short curcuit them if (F->isDSOLocal()) { Function *RealF = Function::Create(F->getFunctionType(), GlobalValue::ExternalLinkage, F->getAddressSpace(), Name + ".cfi", &M); RealF->setVisibility(GlobalVariable::HiddenVisibility); replaceDirectCalls(F, RealF); } return; } Function *FDecl; if (!isJumpTableCanonical) { // Either a declaration of an external function or a reference to a locally // defined jump table. FDecl = Function::Create(F->getFunctionType(), GlobalValue::ExternalLinkage, F->getAddressSpace(), Name + ".cfi_jt", &M); FDecl->setVisibility(GlobalValue::HiddenVisibility); } else { F->setName(Name + ".cfi"); F->setLinkage(GlobalValue::ExternalLinkage); FDecl = Function::Create(F->getFunctionType(), GlobalValue::ExternalLinkage, F->getAddressSpace(), Name, &M); FDecl->setVisibility(Visibility); Visibility = GlobalValue::HiddenVisibility; // Delete aliases pointing to this function, they'll be re-created in the // merged output. Don't do it yet though because ScopedSaveAliaseesAndUsed // will want to reset the aliasees first. for (auto &U : F->uses()) { if (auto *A = dyn_cast(U.getUser())) { Function *AliasDecl = Function::Create( F->getFunctionType(), GlobalValue::ExternalLinkage, F->getAddressSpace(), "", &M); AliasDecl->takeName(A); A->replaceAllUsesWith(AliasDecl); AliasesToErase.push_back(A); } } } if (F->hasExternalWeakLinkage()) replaceWeakDeclarationWithJumpTablePtr(F, FDecl, isJumpTableCanonical); else replaceCfiUses(F, FDecl, isJumpTableCanonical); // Set visibility late because it's used in replaceCfiUses() to determine // whether uses need to to be replaced. F->setVisibility(Visibility); } void LowerTypeTestsModule::lowerTypeTestCalls( ArrayRef TypeIds, Constant *CombinedGlobalAddr, const DenseMap &GlobalLayout) { CombinedGlobalAddr = ConstantExpr::getBitCast(CombinedGlobalAddr, Int8PtrTy); // For each type identifier in this disjoint set... for (Metadata *TypeId : TypeIds) { // Build the bitset. BitSetInfo BSI = buildBitSet(TypeId, GlobalLayout); LLVM_DEBUG({ if (auto MDS = dyn_cast(TypeId)) dbgs() << MDS->getString() << ": "; else dbgs() << ": "; BSI.print(dbgs()); }); ByteArrayInfo *BAI = nullptr; TypeIdLowering TIL; TIL.OffsetedGlobal = ConstantExpr::getGetElementPtr( Int8Ty, CombinedGlobalAddr, ConstantInt::get(IntPtrTy, BSI.ByteOffset)), TIL.AlignLog2 = ConstantInt::get(Int8Ty, BSI.AlignLog2); TIL.SizeM1 = ConstantInt::get(IntPtrTy, BSI.BitSize - 1); if (BSI.isAllOnes()) { TIL.TheKind = (BSI.BitSize == 1) ? TypeTestResolution::Single : TypeTestResolution::AllOnes; } else if (BSI.BitSize <= 64) { TIL.TheKind = TypeTestResolution::Inline; uint64_t InlineBits = 0; for (auto Bit : BSI.Bits) InlineBits |= uint64_t(1) << Bit; if (InlineBits == 0) TIL.TheKind = TypeTestResolution::Unsat; else TIL.InlineBits = ConstantInt::get( (BSI.BitSize <= 32) ? Int32Ty : Int64Ty, InlineBits); } else { TIL.TheKind = TypeTestResolution::ByteArray; ++NumByteArraysCreated; BAI = createByteArray(BSI); TIL.TheByteArray = BAI->ByteArray; TIL.BitMask = BAI->MaskGlobal; } TypeIdUserInfo &TIUI = TypeIdUsers[TypeId]; if (TIUI.IsExported) { uint8_t *MaskPtr = exportTypeId(cast(TypeId)->getString(), TIL); if (BAI) BAI->MaskPtr = MaskPtr; } // Lower each call to llvm.type.test for this type identifier. for (CallInst *CI : TIUI.CallSites) { ++NumTypeTestCallsLowered; Value *Lowered = lowerTypeTestCall(TypeId, CI, TIL); CI->replaceAllUsesWith(Lowered); CI->eraseFromParent(); } } } void LowerTypeTestsModule::verifyTypeMDNode(GlobalObject *GO, MDNode *Type) { if (Type->getNumOperands() != 2) report_fatal_error("All operands of type metadata must have 2 elements"); if (GO->isThreadLocal()) report_fatal_error("Bit set element may not be thread-local"); if (isa(GO) && GO->hasSection()) report_fatal_error( "A member of a type identifier may not have an explicit section"); // FIXME: We previously checked that global var member of a type identifier // must be a definition, but the IR linker may leave type metadata on // declarations. We should restore this check after fixing PR31759. auto OffsetConstMD = dyn_cast(Type->getOperand(0)); if (!OffsetConstMD) report_fatal_error("Type offset must be a constant"); auto OffsetInt = dyn_cast(OffsetConstMD->getValue()); if (!OffsetInt) report_fatal_error("Type offset must be an integer constant"); } static const unsigned kX86JumpTableEntrySize = 8; static const unsigned kARMJumpTableEntrySize = 4; unsigned LowerTypeTestsModule::getJumpTableEntrySize() { switch (Arch) { case Triple::x86: case Triple::x86_64: return kX86JumpTableEntrySize; case Triple::arm: case Triple::thumb: case Triple::aarch64: return kARMJumpTableEntrySize; default: report_fatal_error("Unsupported architecture for jump tables"); } } // Create a jump table entry for the target. This consists of an instruction // sequence containing a relative branch to Dest. Appends inline asm text, // constraints and arguments to AsmOS, ConstraintOS and AsmArgs. void LowerTypeTestsModule::createJumpTableEntry( raw_ostream &AsmOS, raw_ostream &ConstraintOS, Triple::ArchType JumpTableArch, SmallVectorImpl &AsmArgs, Function *Dest) { unsigned ArgIndex = AsmArgs.size(); if (JumpTableArch == Triple::x86 || JumpTableArch == Triple::x86_64) { AsmOS << "jmp ${" << ArgIndex << ":c}@plt\n"; AsmOS << "int3\nint3\nint3\n"; } else if (JumpTableArch == Triple::arm || JumpTableArch == Triple::aarch64) { AsmOS << "b $" << ArgIndex << "\n"; } else if (JumpTableArch == Triple::thumb) { AsmOS << "b.w $" << ArgIndex << "\n"; } else { report_fatal_error("Unsupported architecture for jump tables"); } ConstraintOS << (ArgIndex > 0 ? ",s" : "s"); AsmArgs.push_back(Dest); } Type *LowerTypeTestsModule::getJumpTableEntryType() { return ArrayType::get(Int8Ty, getJumpTableEntrySize()); } /// Given a disjoint set of type identifiers and functions, build the bit sets /// and lower the llvm.type.test calls, architecture dependently. void LowerTypeTestsModule::buildBitSetsFromFunctions( ArrayRef TypeIds, ArrayRef Functions) { if (Arch == Triple::x86 || Arch == Triple::x86_64 || Arch == Triple::arm || Arch == Triple::thumb || Arch == Triple::aarch64) buildBitSetsFromFunctionsNative(TypeIds, Functions); else if (Arch == Triple::wasm32 || Arch == Triple::wasm64) buildBitSetsFromFunctionsWASM(TypeIds, Functions); else report_fatal_error("Unsupported architecture for jump tables"); } void LowerTypeTestsModule::moveInitializerToModuleConstructor( GlobalVariable *GV) { if (WeakInitializerFn == nullptr) { WeakInitializerFn = Function::Create( FunctionType::get(Type::getVoidTy(M.getContext()), /* IsVarArg */ false), GlobalValue::InternalLinkage, M.getDataLayout().getProgramAddressSpace(), "__cfi_global_var_init", &M); BasicBlock *BB = BasicBlock::Create(M.getContext(), "entry", WeakInitializerFn); ReturnInst::Create(M.getContext(), BB); WeakInitializerFn->setSection( ObjectFormat == Triple::MachO ? "__TEXT,__StaticInit,regular,pure_instructions" : ".text.startup"); // This code is equivalent to relocation application, and should run at the // earliest possible time (i.e. with the highest priority). appendToGlobalCtors(M, WeakInitializerFn, /* Priority */ 0); } IRBuilder<> IRB(WeakInitializerFn->getEntryBlock().getTerminator()); GV->setConstant(false); IRB.CreateAlignedStore(GV->getInitializer(), GV, GV->getAlignment()); GV->setInitializer(Constant::getNullValue(GV->getValueType())); } void LowerTypeTestsModule::findGlobalVariableUsersOf( Constant *C, SmallSetVector &Out) { for (auto *U : C->users()){ if (auto *GV = dyn_cast(U)) Out.insert(GV); else if (auto *C2 = dyn_cast(U)) findGlobalVariableUsersOf(C2, Out); } } // Replace all uses of F with (F ? JT : 0). void LowerTypeTestsModule::replaceWeakDeclarationWithJumpTablePtr( Function *F, Constant *JT, bool IsJumpTableCanonical) { // The target expression can not appear in a constant initializer on most // (all?) targets. Switch to a runtime initializer. SmallSetVector GlobalVarUsers; findGlobalVariableUsersOf(F, GlobalVarUsers); for (auto GV : GlobalVarUsers) moveInitializerToModuleConstructor(GV); // Can not RAUW F with an expression that uses F. Replace with a temporary // placeholder first. Function *PlaceholderFn = Function::Create(cast(F->getValueType()), GlobalValue::ExternalWeakLinkage, F->getAddressSpace(), "", &M); replaceCfiUses(F, PlaceholderFn, IsJumpTableCanonical); Constant *Target = ConstantExpr::getSelect( ConstantExpr::getICmp(CmpInst::ICMP_NE, F, Constant::getNullValue(F->getType())), JT, Constant::getNullValue(F->getType())); PlaceholderFn->replaceAllUsesWith(Target); PlaceholderFn->eraseFromParent(); } static bool isThumbFunction(Function *F, Triple::ArchType ModuleArch) { Attribute TFAttr = F->getFnAttribute("target-features"); if (!TFAttr.hasAttribute(Attribute::None)) { SmallVector Features; TFAttr.getValueAsString().split(Features, ','); for (StringRef Feature : Features) { if (Feature == "-thumb-mode") return false; else if (Feature == "+thumb-mode") return true; } } return ModuleArch == Triple::thumb; } // Each jump table must be either ARM or Thumb as a whole for the bit-test math // to work. Pick one that matches the majority of members to minimize interop // veneers inserted by the linker. static Triple::ArchType selectJumpTableArmEncoding(ArrayRef Functions, Triple::ArchType ModuleArch) { if (ModuleArch != Triple::arm && ModuleArch != Triple::thumb) return ModuleArch; unsigned ArmCount = 0, ThumbCount = 0; for (const auto GTM : Functions) { if (!GTM->isJumpTableCanonical()) { // PLT stubs are always ARM. // FIXME: This is the wrong heuristic for non-canonical jump tables. ++ArmCount; continue; } Function *F = cast(GTM->getGlobal()); ++(isThumbFunction(F, ModuleArch) ? ThumbCount : ArmCount); } return ArmCount > ThumbCount ? Triple::arm : Triple::thumb; } void LowerTypeTestsModule::createJumpTable( Function *F, ArrayRef Functions) { std::string AsmStr, ConstraintStr; raw_string_ostream AsmOS(AsmStr), ConstraintOS(ConstraintStr); SmallVector AsmArgs; AsmArgs.reserve(Functions.size() * 2); Triple::ArchType JumpTableArch = selectJumpTableArmEncoding(Functions, Arch); for (unsigned I = 0; I != Functions.size(); ++I) createJumpTableEntry(AsmOS, ConstraintOS, JumpTableArch, AsmArgs, cast(Functions[I]->getGlobal())); // Align the whole table by entry size. F->setAlignment(Align(getJumpTableEntrySize())); // Skip prologue. // Disabled on win32 due to https://llvm.org/bugs/show_bug.cgi?id=28641#c3. // Luckily, this function does not get any prologue even without the // attribute. if (OS != Triple::Win32) F->addFnAttr(Attribute::Naked); if (JumpTableArch == Triple::arm) F->addFnAttr("target-features", "-thumb-mode"); if (JumpTableArch == Triple::thumb) { F->addFnAttr("target-features", "+thumb-mode"); // Thumb jump table assembly needs Thumb2. The following attribute is added // by Clang for -march=armv7. F->addFnAttr("target-cpu", "cortex-a8"); } // Make sure we don't emit .eh_frame for this function. F->addFnAttr(Attribute::NoUnwind); BasicBlock *BB = BasicBlock::Create(M.getContext(), "entry", F); IRBuilder<> IRB(BB); SmallVector ArgTypes; ArgTypes.reserve(AsmArgs.size()); for (const auto &Arg : AsmArgs) ArgTypes.push_back(Arg->getType()); InlineAsm *JumpTableAsm = InlineAsm::get(FunctionType::get(IRB.getVoidTy(), ArgTypes, false), AsmOS.str(), ConstraintOS.str(), /*hasSideEffects=*/true); IRB.CreateCall(JumpTableAsm, AsmArgs); IRB.CreateUnreachable(); } /// Given a disjoint set of type identifiers and functions, build a jump table /// for the functions, build the bit sets and lower the llvm.type.test calls. void LowerTypeTestsModule::buildBitSetsFromFunctionsNative( ArrayRef TypeIds, ArrayRef Functions) { // Unlike the global bitset builder, the function bitset builder cannot // re-arrange functions in a particular order and base its calculations on the // layout of the functions' entry points, as we have no idea how large a // particular function will end up being (the size could even depend on what // this pass does!) Instead, we build a jump table, which is a block of code // consisting of one branch instruction for each of the functions in the bit // set that branches to the target function, and redirect any taken function // addresses to the corresponding jump table entry. In the object file's // symbol table, the symbols for the target functions also refer to the jump // table entries, so that addresses taken outside the module will pass any // verification done inside the module. // // In more concrete terms, suppose we have three functions f, g, h which are // of the same type, and a function foo that returns their addresses: // // f: // mov 0, %eax // ret // // g: // mov 1, %eax // ret // // h: // mov 2, %eax // ret // // foo: // mov f, %eax // mov g, %edx // mov h, %ecx // ret // // We output the jump table as module-level inline asm string. The end result // will (conceptually) look like this: // // f = .cfi.jumptable // g = .cfi.jumptable + 4 // h = .cfi.jumptable + 8 // .cfi.jumptable: // jmp f.cfi ; 5 bytes // int3 ; 1 byte // int3 ; 1 byte // int3 ; 1 byte // jmp g.cfi ; 5 bytes // int3 ; 1 byte // int3 ; 1 byte // int3 ; 1 byte // jmp h.cfi ; 5 bytes // int3 ; 1 byte // int3 ; 1 byte // int3 ; 1 byte // // f.cfi: // mov 0, %eax // ret // // g.cfi: // mov 1, %eax // ret // // h.cfi: // mov 2, %eax // ret // // foo: // mov f, %eax // mov g, %edx // mov h, %ecx // ret // // Because the addresses of f, g, h are evenly spaced at a power of 2, in the // normal case the check can be carried out using the same kind of simple // arithmetic that we normally use for globals. // FIXME: find a better way to represent the jumptable in the IR. assert(!Functions.empty()); // Build a simple layout based on the regular layout of jump tables. DenseMap GlobalLayout; unsigned EntrySize = getJumpTableEntrySize(); for (unsigned I = 0; I != Functions.size(); ++I) GlobalLayout[Functions[I]] = I * EntrySize; Function *JumpTableFn = Function::Create(FunctionType::get(Type::getVoidTy(M.getContext()), /* IsVarArg */ false), GlobalValue::PrivateLinkage, M.getDataLayout().getProgramAddressSpace(), ".cfi.jumptable", &M); ArrayType *JumpTableType = ArrayType::get(getJumpTableEntryType(), Functions.size()); auto JumpTable = ConstantExpr::getPointerCast(JumpTableFn, JumpTableType->getPointerTo(0)); lowerTypeTestCalls(TypeIds, JumpTable, GlobalLayout); { ScopedSaveAliaseesAndUsed S(M); // Build aliases pointing to offsets into the jump table, and replace // references to the original functions with references to the aliases. for (unsigned I = 0; I != Functions.size(); ++I) { Function *F = cast(Functions[I]->getGlobal()); bool IsJumpTableCanonical = Functions[I]->isJumpTableCanonical(); Constant *CombinedGlobalElemPtr = ConstantExpr::getBitCast( ConstantExpr::getInBoundsGetElementPtr( JumpTableType, JumpTable, ArrayRef{ConstantInt::get(IntPtrTy, 0), ConstantInt::get(IntPtrTy, I)}), F->getType()); if (Functions[I]->isExported()) { if (IsJumpTableCanonical) { ExportSummary->cfiFunctionDefs().insert(F->getName()); } else { GlobalAlias *JtAlias = GlobalAlias::create( F->getValueType(), 0, GlobalValue::ExternalLinkage, F->getName() + ".cfi_jt", CombinedGlobalElemPtr, &M); JtAlias->setVisibility(GlobalValue::HiddenVisibility); ExportSummary->cfiFunctionDecls().insert(F->getName()); } } if (!IsJumpTableCanonical) { if (F->hasExternalWeakLinkage()) replaceWeakDeclarationWithJumpTablePtr(F, CombinedGlobalElemPtr, IsJumpTableCanonical); else replaceCfiUses(F, CombinedGlobalElemPtr, IsJumpTableCanonical); } else { assert(F->getType()->getAddressSpace() == 0); GlobalAlias *FAlias = GlobalAlias::create(F->getValueType(), 0, F->getLinkage(), "", CombinedGlobalElemPtr, &M); FAlias->setVisibility(F->getVisibility()); FAlias->takeName(F); if (FAlias->hasName()) F->setName(FAlias->getName() + ".cfi"); replaceCfiUses(F, FAlias, IsJumpTableCanonical); if (!F->hasLocalLinkage()) F->setVisibility(GlobalVariable::HiddenVisibility); } } } createJumpTable(JumpTableFn, Functions); } /// Assign a dummy layout using an incrementing counter, tag each function /// with its index represented as metadata, and lower each type test to an /// integer range comparison. During generation of the indirect function call /// table in the backend, it will assign the given indexes. /// Note: Dynamic linking is not supported, as the WebAssembly ABI has not yet /// been finalized. void LowerTypeTestsModule::buildBitSetsFromFunctionsWASM( ArrayRef TypeIds, ArrayRef Functions) { assert(!Functions.empty()); // Build consecutive monotonic integer ranges for each call target set DenseMap GlobalLayout; for (GlobalTypeMember *GTM : Functions) { Function *F = cast(GTM->getGlobal()); // Skip functions that are not address taken, to avoid bloating the table if (!F->hasAddressTaken()) continue; // Store metadata with the index for each function MDNode *MD = MDNode::get(F->getContext(), ArrayRef(ConstantAsMetadata::get( ConstantInt::get(Int64Ty, IndirectIndex)))); F->setMetadata("wasm.index", MD); // Assign the counter value GlobalLayout[GTM] = IndirectIndex++; } // The indirect function table index space starts at zero, so pass a NULL // pointer as the subtracted "jump table" offset. lowerTypeTestCalls(TypeIds, ConstantPointerNull::get(Int32PtrTy), GlobalLayout); } void LowerTypeTestsModule::buildBitSetsFromDisjointSet( ArrayRef TypeIds, ArrayRef Globals, ArrayRef ICallBranchFunnels) { DenseMap TypeIdIndices; for (unsigned I = 0; I != TypeIds.size(); ++I) TypeIdIndices[TypeIds[I]] = I; // For each type identifier, build a set of indices that refer to members of // the type identifier. std::vector> TypeMembers(TypeIds.size()); unsigned GlobalIndex = 0; DenseMap GlobalIndices; for (GlobalTypeMember *GTM : Globals) { for (MDNode *Type : GTM->types()) { // Type = { offset, type identifier } auto I = TypeIdIndices.find(Type->getOperand(1)); if (I != TypeIdIndices.end()) TypeMembers[I->second].insert(GlobalIndex); } GlobalIndices[GTM] = GlobalIndex; GlobalIndex++; } for (ICallBranchFunnel *JT : ICallBranchFunnels) { TypeMembers.emplace_back(); std::set &TMSet = TypeMembers.back(); for (GlobalTypeMember *T : JT->targets()) TMSet.insert(GlobalIndices[T]); } // Order the sets of indices by size. The GlobalLayoutBuilder works best // when given small index sets first. llvm::stable_sort(TypeMembers, [](const std::set &O1, const std::set &O2) { return O1.size() < O2.size(); }); // Create a GlobalLayoutBuilder and provide it with index sets as layout // fragments. The GlobalLayoutBuilder tries to lay out members of fragments as // close together as possible. GlobalLayoutBuilder GLB(Globals.size()); for (auto &&MemSet : TypeMembers) GLB.addFragment(MemSet); // Build a vector of globals with the computed layout. bool IsGlobalSet = Globals.empty() || isa(Globals[0]->getGlobal()); std::vector OrderedGTMs(Globals.size()); auto OGTMI = OrderedGTMs.begin(); for (auto &&F : GLB.Fragments) { for (auto &&Offset : F) { if (IsGlobalSet != isa(Globals[Offset]->getGlobal())) report_fatal_error("Type identifier may not contain both global " "variables and functions"); *OGTMI++ = Globals[Offset]; } } // Build the bitsets from this disjoint set. if (IsGlobalSet) buildBitSetsFromGlobalVariables(TypeIds, OrderedGTMs); else buildBitSetsFromFunctions(TypeIds, OrderedGTMs); } /// Lower all type tests in this module. LowerTypeTestsModule::LowerTypeTestsModule( Module &M, ModuleSummaryIndex *ExportSummary, const ModuleSummaryIndex *ImportSummary) : M(M), ExportSummary(ExportSummary), ImportSummary(ImportSummary) { assert(!(ExportSummary && ImportSummary)); Triple TargetTriple(M.getTargetTriple()); Arch = TargetTriple.getArch(); OS = TargetTriple.getOS(); ObjectFormat = TargetTriple.getObjectFormat(); } bool LowerTypeTestsModule::runForTesting(Module &M) { ModuleSummaryIndex Summary(/*HaveGVs=*/false); // Handle the command-line summary arguments. This code is for testing // purposes only, so we handle errors directly. if (!ClReadSummary.empty()) { ExitOnError ExitOnErr("-lowertypetests-read-summary: " + ClReadSummary + ": "); auto ReadSummaryFile = ExitOnErr(errorOrToExpected(MemoryBuffer::getFile(ClReadSummary))); yaml::Input In(ReadSummaryFile->getBuffer()); In >> Summary; ExitOnErr(errorCodeToError(In.error())); } bool Changed = LowerTypeTestsModule( M, ClSummaryAction == PassSummaryAction::Export ? &Summary : nullptr, ClSummaryAction == PassSummaryAction::Import ? &Summary : nullptr) .lower(); if (!ClWriteSummary.empty()) { ExitOnError ExitOnErr("-lowertypetests-write-summary: " + ClWriteSummary + ": "); std::error_code EC; raw_fd_ostream OS(ClWriteSummary, EC, sys::fs::OF_Text); ExitOnErr(errorCodeToError(EC)); yaml::Output Out(OS); Out << Summary; } return Changed; } static bool isDirectCall(Use& U) { auto *Usr = dyn_cast(U.getUser()); if (Usr) { CallSite CS(Usr); if (CS.isCallee(&U)) return true; } return false; } void LowerTypeTestsModule::replaceCfiUses(Function *Old, Value *New, bool IsJumpTableCanonical) { SmallSetVector Constants; auto UI = Old->use_begin(), E = Old->use_end(); for (; UI != E;) { Use &U = *UI; ++UI; // Skip block addresses if (isa(U.getUser())) continue; // Skip direct calls to externally defined or non-dso_local functions if (isDirectCall(U) && (Old->isDSOLocal() || !IsJumpTableCanonical)) continue; // Must handle Constants specially, we cannot call replaceUsesOfWith on a // constant because they are uniqued. if (auto *C = dyn_cast(U.getUser())) { if (!isa(C)) { // Save unique users to avoid processing operand replacement // more than once. Constants.insert(C); continue; } } U.set(New); } // Process operand replacement of saved constants. for (auto *C : Constants) C->handleOperandChange(Old, New); } void LowerTypeTestsModule::replaceDirectCalls(Value *Old, Value *New) { Old->replaceUsesWithIf(New, [](Use &U) { return isDirectCall(U); }); } bool LowerTypeTestsModule::lower() { // If only some of the modules were split, we cannot correctly perform // this transformation. We already checked for the presense of type tests // with partially split modules during the thin link, and would have emitted // an error if any were found, so here we can simply return. if ((ExportSummary && ExportSummary->partiallySplitLTOUnits()) || (ImportSummary && ImportSummary->partiallySplitLTOUnits())) return false; Function *TypeTestFunc = M.getFunction(Intrinsic::getName(Intrinsic::type_test)); Function *ICallBranchFunnelFunc = M.getFunction(Intrinsic::getName(Intrinsic::icall_branch_funnel)); if ((!TypeTestFunc || TypeTestFunc->use_empty()) && (!ICallBranchFunnelFunc || ICallBranchFunnelFunc->use_empty()) && !ExportSummary && !ImportSummary) return false; if (ImportSummary) { if (TypeTestFunc) { for (auto UI = TypeTestFunc->use_begin(), UE = TypeTestFunc->use_end(); UI != UE;) { auto *CI = cast((*UI++).getUser()); importTypeTest(CI); } } if (ICallBranchFunnelFunc && !ICallBranchFunnelFunc->use_empty()) report_fatal_error( "unexpected call to llvm.icall.branch.funnel during import phase"); SmallVector Defs; SmallVector Decls; for (auto &F : M) { // CFI functions are either external, or promoted. A local function may // have the same name, but it's not the one we are looking for. if (F.hasLocalLinkage()) continue; if (ImportSummary->cfiFunctionDefs().count(F.getName())) Defs.push_back(&F); else if (ImportSummary->cfiFunctionDecls().count(F.getName())) Decls.push_back(&F); } std::vector AliasesToErase; { ScopedSaveAliaseesAndUsed S(M); for (auto F : Defs) importFunction(F, /*isJumpTableCanonical*/ true, AliasesToErase); for (auto F : Decls) importFunction(F, /*isJumpTableCanonical*/ false, AliasesToErase); } for (GlobalAlias *GA : AliasesToErase) GA->eraseFromParent(); return true; } // Equivalence class set containing type identifiers and the globals that // reference them. This is used to partition the set of type identifiers in // the module into disjoint sets. using GlobalClassesTy = EquivalenceClasses< PointerUnion>; GlobalClassesTy GlobalClasses; // Verify the type metadata and build a few data structures to let us // efficiently enumerate the type identifiers associated with a global: // a list of GlobalTypeMembers (a GlobalObject stored alongside a vector // of associated type metadata) and a mapping from type identifiers to their // list of GlobalTypeMembers and last observed index in the list of globals. // The indices will be used later to deterministically order the list of type // identifiers. BumpPtrAllocator Alloc; struct TIInfo { unsigned UniqueId; std::vector RefGlobals; }; DenseMap TypeIdInfo; unsigned CurUniqueId = 0; SmallVector Types; // Cross-DSO CFI emits jumptable entries for exported functions as well as // address taken functions in case they are address taken in other modules. const bool CrossDsoCfi = M.getModuleFlag("Cross-DSO CFI") != nullptr; struct ExportedFunctionInfo { CfiFunctionLinkage Linkage; MDNode *FuncMD; // {name, linkage, type[, type...]} }; DenseMap ExportedFunctions; if (ExportSummary) { // A set of all functions that are address taken by a live global object. DenseSet AddressTaken; for (auto &I : *ExportSummary) for (auto &GVS : I.second.SummaryList) if (GVS->isLive()) for (auto &Ref : GVS->refs()) AddressTaken.insert(Ref.getGUID()); NamedMDNode *CfiFunctionsMD = M.getNamedMetadata("cfi.functions"); if (CfiFunctionsMD) { for (auto FuncMD : CfiFunctionsMD->operands()) { assert(FuncMD->getNumOperands() >= 2); StringRef FunctionName = cast(FuncMD->getOperand(0))->getString(); CfiFunctionLinkage Linkage = static_cast( cast(FuncMD->getOperand(1)) ->getValue() ->getUniqueInteger() .getZExtValue()); const GlobalValue::GUID GUID = GlobalValue::getGUID( GlobalValue::dropLLVMManglingEscape(FunctionName)); // Do not emit jumptable entries for functions that are not-live and // have no live references (and are not exported with cross-DSO CFI.) if (!ExportSummary->isGUIDLive(GUID)) continue; if (!AddressTaken.count(GUID)) { if (!CrossDsoCfi || Linkage != CFL_Definition) continue; bool Exported = false; if (auto VI = ExportSummary->getValueInfo(GUID)) for (auto &GVS : VI.getSummaryList()) if (GVS->isLive() && !GlobalValue::isLocalLinkage(GVS->linkage())) Exported = true; if (!Exported) continue; } auto P = ExportedFunctions.insert({FunctionName, {Linkage, FuncMD}}); if (!P.second && P.first->second.Linkage != CFL_Definition) P.first->second = {Linkage, FuncMD}; } for (const auto &P : ExportedFunctions) { StringRef FunctionName = P.first; CfiFunctionLinkage Linkage = P.second.Linkage; MDNode *FuncMD = P.second.FuncMD; Function *F = M.getFunction(FunctionName); if (F && F->hasLocalLinkage()) { // Locally defined function that happens to have the same name as a // function defined in a ThinLTO module. Rename it to move it out of // the way of the external reference that we're about to create. // Note that setName will find a unique name for the function, so even // if there is an existing function with the suffix there won't be a // name collision. F->setName(F->getName() + ".1"); F = nullptr; } if (!F) F = Function::Create( FunctionType::get(Type::getVoidTy(M.getContext()), false), GlobalVariable::ExternalLinkage, M.getDataLayout().getProgramAddressSpace(), FunctionName, &M); // If the function is available_externally, remove its definition so // that it is handled the same way as a declaration. Later we will try // to create an alias using this function's linkage, which will fail if // the linkage is available_externally. This will also result in us // following the code path below to replace the type metadata. if (F->hasAvailableExternallyLinkage()) { F->setLinkage(GlobalValue::ExternalLinkage); F->deleteBody(); F->setComdat(nullptr); F->clearMetadata(); } // Update the linkage for extern_weak declarations when a definition // exists. if (Linkage == CFL_Definition && F->hasExternalWeakLinkage()) F->setLinkage(GlobalValue::ExternalLinkage); // If the function in the full LTO module is a declaration, replace its // type metadata with the type metadata we found in cfi.functions. That // metadata is presumed to be more accurate than the metadata attached // to the declaration. if (F->isDeclaration()) { if (Linkage == CFL_WeakDeclaration) F->setLinkage(GlobalValue::ExternalWeakLinkage); F->eraseMetadata(LLVMContext::MD_type); for (unsigned I = 2; I < FuncMD->getNumOperands(); ++I) F->addMetadata(LLVMContext::MD_type, *cast(FuncMD->getOperand(I).get())); } } } } DenseMap GlobalTypeMembers; for (GlobalObject &GO : M.global_objects()) { if (isa(GO) && GO.isDeclarationForLinker()) continue; Types.clear(); GO.getMetadata(LLVMContext::MD_type, Types); bool IsJumpTableCanonical = false; bool IsExported = false; if (Function *F = dyn_cast(&GO)) { IsJumpTableCanonical = isJumpTableCanonical(F); if (ExportedFunctions.count(F->getName())) { IsJumpTableCanonical |= ExportedFunctions[F->getName()].Linkage == CFL_Definition; IsExported = true; // TODO: The logic here checks only that the function is address taken, // not that the address takers are live. This can be updated to check // their liveness and emit fewer jumptable entries once monolithic LTO // builds also emit summaries. } else if (!F->hasAddressTaken()) { if (!CrossDsoCfi || !IsJumpTableCanonical || F->hasLocalLinkage()) continue; } } auto *GTM = GlobalTypeMember::create(Alloc, &GO, IsJumpTableCanonical, IsExported, Types); GlobalTypeMembers[&GO] = GTM; for (MDNode *Type : Types) { verifyTypeMDNode(&GO, Type); auto &Info = TypeIdInfo[Type->getOperand(1)]; Info.UniqueId = ++CurUniqueId; Info.RefGlobals.push_back(GTM); } } auto AddTypeIdUse = [&](Metadata *TypeId) -> TypeIdUserInfo & { // Add the call site to the list of call sites for this type identifier. We // also use TypeIdUsers to keep track of whether we have seen this type // identifier before. If we have, we don't need to re-add the referenced // globals to the equivalence class. auto Ins = TypeIdUsers.insert({TypeId, {}}); if (Ins.second) { // Add the type identifier to the equivalence class. GlobalClassesTy::iterator GCI = GlobalClasses.insert(TypeId); GlobalClassesTy::member_iterator CurSet = GlobalClasses.findLeader(GCI); // Add the referenced globals to the type identifier's equivalence class. for (GlobalTypeMember *GTM : TypeIdInfo[TypeId].RefGlobals) CurSet = GlobalClasses.unionSets( CurSet, GlobalClasses.findLeader(GlobalClasses.insert(GTM))); } return Ins.first->second; }; if (TypeTestFunc) { for (const Use &U : TypeTestFunc->uses()) { auto CI = cast(U.getUser()); auto TypeIdMDVal = dyn_cast(CI->getArgOperand(1)); if (!TypeIdMDVal) report_fatal_error("Second argument of llvm.type.test must be metadata"); auto TypeId = TypeIdMDVal->getMetadata(); AddTypeIdUse(TypeId).CallSites.push_back(CI); } } if (ICallBranchFunnelFunc) { for (const Use &U : ICallBranchFunnelFunc->uses()) { if (Arch != Triple::x86_64) report_fatal_error( "llvm.icall.branch.funnel not supported on this target"); auto CI = cast(U.getUser()); std::vector Targets; if (CI->getNumArgOperands() % 2 != 1) report_fatal_error("number of arguments should be odd"); GlobalClassesTy::member_iterator CurSet; for (unsigned I = 1; I != CI->getNumArgOperands(); I += 2) { int64_t Offset; auto *Base = dyn_cast(GetPointerBaseWithConstantOffset( CI->getOperand(I), Offset, M.getDataLayout())); if (!Base) report_fatal_error( "Expected branch funnel operand to be global value"); GlobalTypeMember *GTM = GlobalTypeMembers[Base]; Targets.push_back(GTM); GlobalClassesTy::member_iterator NewSet = GlobalClasses.findLeader(GlobalClasses.insert(GTM)); if (I == 1) CurSet = NewSet; else CurSet = GlobalClasses.unionSets(CurSet, NewSet); } GlobalClasses.unionSets( CurSet, GlobalClasses.findLeader( GlobalClasses.insert(ICallBranchFunnel::create( Alloc, CI, Targets, ++CurUniqueId)))); } } if (ExportSummary) { DenseMap> MetadataByGUID; for (auto &P : TypeIdInfo) { if (auto *TypeId = dyn_cast(P.first)) MetadataByGUID[GlobalValue::getGUID(TypeId->getString())].push_back( TypeId); } for (auto &P : *ExportSummary) { for (auto &S : P.second.SummaryList) { if (!ExportSummary->isGlobalValueLive(S.get())) continue; if (auto *FS = dyn_cast(S->getBaseObject())) for (GlobalValue::GUID G : FS->type_tests()) for (Metadata *MD : MetadataByGUID[G]) AddTypeIdUse(MD).IsExported = true; } } } if (GlobalClasses.empty()) return false; // Build a list of disjoint sets ordered by their maximum global index for // determinism. std::vector> Sets; for (GlobalClassesTy::iterator I = GlobalClasses.begin(), E = GlobalClasses.end(); I != E; ++I) { if (!I->isLeader()) continue; ++NumTypeIdDisjointSets; unsigned MaxUniqueId = 0; for (GlobalClassesTy::member_iterator MI = GlobalClasses.member_begin(I); MI != GlobalClasses.member_end(); ++MI) { if (auto *MD = MI->dyn_cast()) MaxUniqueId = std::max(MaxUniqueId, TypeIdInfo[MD].UniqueId); else if (auto *BF = MI->dyn_cast()) MaxUniqueId = std::max(MaxUniqueId, BF->UniqueId); } Sets.emplace_back(I, MaxUniqueId); } llvm::sort(Sets, [](const std::pair &S1, const std::pair &S2) { return S1.second < S2.second; }); // For each disjoint set we found... for (const auto &S : Sets) { // Build the list of type identifiers in this disjoint set. std::vector TypeIds; std::vector Globals; std::vector ICallBranchFunnels; for (GlobalClassesTy::member_iterator MI = GlobalClasses.member_begin(S.first); MI != GlobalClasses.member_end(); ++MI) { if (MI->is()) TypeIds.push_back(MI->get()); else if (MI->is()) Globals.push_back(MI->get()); else ICallBranchFunnels.push_back(MI->get()); } // Order type identifiers by unique ID for determinism. This ordering is // stable as there is a one-to-one mapping between metadata and unique IDs. llvm::sort(TypeIds, [&](Metadata *M1, Metadata *M2) { return TypeIdInfo[M1].UniqueId < TypeIdInfo[M2].UniqueId; }); // Same for the branch funnels. llvm::sort(ICallBranchFunnels, [&](ICallBranchFunnel *F1, ICallBranchFunnel *F2) { return F1->UniqueId < F2->UniqueId; }); // Build bitsets for this disjoint set. buildBitSetsFromDisjointSet(TypeIds, Globals, ICallBranchFunnels); } allocateByteArrays(); // Parse alias data to replace stand-in function declarations for aliases // with an alias to the intended target. if (ExportSummary) { if (NamedMDNode *AliasesMD = M.getNamedMetadata("aliases")) { for (auto AliasMD : AliasesMD->operands()) { assert(AliasMD->getNumOperands() >= 4); StringRef AliasName = cast(AliasMD->getOperand(0))->getString(); StringRef Aliasee = cast(AliasMD->getOperand(1))->getString(); if (!ExportedFunctions.count(Aliasee) || ExportedFunctions[Aliasee].Linkage != CFL_Definition || !M.getNamedAlias(Aliasee)) continue; GlobalValue::VisibilityTypes Visibility = static_cast( cast(AliasMD->getOperand(2)) ->getValue() ->getUniqueInteger() .getZExtValue()); bool Weak = static_cast(cast(AliasMD->getOperand(3)) ->getValue() ->getUniqueInteger() .getZExtValue()); auto *Alias = GlobalAlias::create("", M.getNamedAlias(Aliasee)); Alias->setVisibility(Visibility); if (Weak) Alias->setLinkage(GlobalValue::WeakAnyLinkage); if (auto *F = M.getFunction(AliasName)) { Alias->takeName(F); F->replaceAllUsesWith(Alias); F->eraseFromParent(); } else { Alias->setName(AliasName); } } } } // Emit .symver directives for exported functions, if they exist. if (ExportSummary) { if (NamedMDNode *SymversMD = M.getNamedMetadata("symvers")) { for (auto Symver : SymversMD->operands()) { assert(Symver->getNumOperands() >= 2); StringRef SymbolName = cast(Symver->getOperand(0))->getString(); StringRef Alias = cast(Symver->getOperand(1))->getString(); if (!ExportedFunctions.count(SymbolName)) continue; M.appendModuleInlineAsm( (llvm::Twine(".symver ") + SymbolName + ", " + Alias).str()); } } } return true; } PreservedAnalyses LowerTypeTestsPass::run(Module &M, ModuleAnalysisManager &AM) { bool Changed = LowerTypeTestsModule(M, ExportSummary, ImportSummary).lower(); if (!Changed) return PreservedAnalyses::all(); return PreservedAnalyses::none(); }