1327952Sdim//===- CFLSteensAliasAnalysis.cpp - Unification-based Alias Analysis ------===// 2303231Sdim// 3353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4353358Sdim// See https://llvm.org/LICENSE.txt for license information. 5353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6303231Sdim// 7303231Sdim//===----------------------------------------------------------------------===// 8303231Sdim// 9303231Sdim// This file implements a CFL-base, summary-based alias analysis algorithm. It 10303231Sdim// does not depend on types. The algorithm is a mixture of the one described in 11303231Sdim// "Demand-driven alias analysis for C" by Xin Zheng and Radu Rugina, and "Fast 12303231Sdim// algorithms for Dyck-CFL-reachability with applications to Alias Analysis" by 13303231Sdim// Zhang Q, Lyu M R, Yuan H, and Su Z. -- to summarize the papers, we build a 14303231Sdim// graph of the uses of a variable, where each node is a memory location, and 15303231Sdim// each edge is an action that happened on that memory location. The "actions" 16303231Sdim// can be one of Dereference, Reference, or Assign. The precision of this 17303231Sdim// analysis is roughly the same as that of an one level context-sensitive 18303231Sdim// Steensgaard's algorithm. 19303231Sdim// 20303231Sdim// Two variables are considered as aliasing iff you can reach one value's node 21303231Sdim// from the other value's node and the language formed by concatenating all of 22303231Sdim// the edge labels (actions) conforms to a context-free grammar. 23303231Sdim// 24303231Sdim// Because this algorithm requires a graph search on each query, we execute the 25303231Sdim// algorithm outlined in "Fast algorithms..." (mentioned above) 26303231Sdim// in order to transform the graph into sets of variables that may alias in 27303231Sdim// ~nlogn time (n = number of variables), which makes queries take constant 28303231Sdim// time. 29303231Sdim//===----------------------------------------------------------------------===// 30303231Sdim 31303231Sdim// N.B. AliasAnalysis as a whole is phrased as a FunctionPass at the moment, and 32303231Sdim// CFLSteensAA is interprocedural. This is *technically* A Bad Thing, because 33303231Sdim// FunctionPasses are only allowed to inspect the Function that they're being 34303231Sdim// run on. Realistically, this likely isn't a problem until we allow 35303231Sdim// FunctionPasses to run concurrently. 36303231Sdim 37303231Sdim#include "llvm/Analysis/CFLSteensAliasAnalysis.h" 38327952Sdim#include "AliasAnalysisSummary.h" 39303231Sdim#include "CFLGraph.h" 40303231Sdim#include "StratifiedSets.h" 41303231Sdim#include "llvm/ADT/DenseMap.h" 42303231Sdim#include "llvm/ADT/Optional.h" 43327952Sdim#include "llvm/ADT/SmallVector.h" 44303231Sdim#include "llvm/Analysis/TargetLibraryInfo.h" 45303231Sdim#include "llvm/IR/Constants.h" 46303231Sdim#include "llvm/IR/Function.h" 47327952Sdim#include "llvm/IR/Type.h" 48327952Sdim#include "llvm/IR/Value.h" 49360784Sdim#include "llvm/InitializePasses.h" 50303231Sdim#include "llvm/Pass.h" 51303231Sdim#include "llvm/Support/Debug.h" 52303231Sdim#include "llvm/Support/raw_ostream.h" 53303231Sdim#include <algorithm> 54303231Sdim#include <cassert> 55327952Sdim#include <limits> 56303231Sdim#include <memory> 57327952Sdim#include <utility> 58303231Sdim 59303231Sdimusing namespace llvm; 60303231Sdimusing namespace llvm::cflaa; 61303231Sdim 62303231Sdim#define DEBUG_TYPE "cfl-steens-aa" 63303231Sdim 64360784SdimCFLSteensAAResult::CFLSteensAAResult( 65360784Sdim std::function<const TargetLibraryInfo &(Function &F)> GetTLI) 66360784Sdim : AAResultBase(), GetTLI(std::move(GetTLI)) {} 67303231SdimCFLSteensAAResult::CFLSteensAAResult(CFLSteensAAResult &&Arg) 68360784Sdim : AAResultBase(std::move(Arg)), GetTLI(std::move(Arg.GetTLI)) {} 69327952SdimCFLSteensAAResult::~CFLSteensAAResult() = default; 70303231Sdim 71303231Sdim/// Information we have about a function and would like to keep around. 72303231Sdimclass CFLSteensAAResult::FunctionInfo { 73303231Sdim StratifiedSets<InstantiatedValue> Sets; 74303231Sdim AliasSummary Summary; 75303231Sdim 76303231Sdimpublic: 77303231Sdim FunctionInfo(Function &Fn, const SmallVectorImpl<Value *> &RetVals, 78303231Sdim StratifiedSets<InstantiatedValue> S); 79303231Sdim 80303231Sdim const StratifiedSets<InstantiatedValue> &getStratifiedSets() const { 81303231Sdim return Sets; 82303231Sdim } 83327952Sdim 84303231Sdim const AliasSummary &getAliasSummary() const { return Summary; } 85303231Sdim}; 86303231Sdim 87303231Sdimconst StratifiedIndex StratifiedLink::SetSentinel = 88303231Sdim std::numeric_limits<StratifiedIndex>::max(); 89303231Sdim 90303231Sdim//===----------------------------------------------------------------------===// 91303231Sdim// Function declarations that require types defined in the namespace above 92303231Sdim//===----------------------------------------------------------------------===// 93303231Sdim 94303231Sdim/// Determines whether it would be pointless to add the given Value to our sets. 95303231Sdimstatic bool canSkipAddingToSets(Value *Val) { 96303231Sdim // Constants can share instances, which may falsely unify multiple 97303231Sdim // sets, e.g. in 98303231Sdim // store i32* null, i32** %ptr1 99303231Sdim // store i32* null, i32** %ptr2 100303231Sdim // clearly ptr1 and ptr2 should not be unified into the same set, so 101303231Sdim // we should filter out the (potentially shared) instance to 102303231Sdim // i32* null. 103303231Sdim if (isa<Constant>(Val)) { 104303231Sdim // TODO: Because all of these things are constant, we can determine whether 105303231Sdim // the data is *actually* mutable at graph building time. This will probably 106303231Sdim // come for free/cheap with offset awareness. 107303231Sdim bool CanStoreMutableData = isa<GlobalValue>(Val) || 108303231Sdim isa<ConstantExpr>(Val) || 109303231Sdim isa<ConstantAggregate>(Val); 110303231Sdim return !CanStoreMutableData; 111303231Sdim } 112303231Sdim 113303231Sdim return false; 114303231Sdim} 115303231Sdim 116303231SdimCFLSteensAAResult::FunctionInfo::FunctionInfo( 117303231Sdim Function &Fn, const SmallVectorImpl<Value *> &RetVals, 118303231Sdim StratifiedSets<InstantiatedValue> S) 119303231Sdim : Sets(std::move(S)) { 120303231Sdim // Historically, an arbitrary upper-bound of 50 args was selected. We may want 121303231Sdim // to remove this if it doesn't really matter in practice. 122303231Sdim if (Fn.arg_size() > MaxSupportedArgsInSummary) 123303231Sdim return; 124303231Sdim 125303231Sdim DenseMap<StratifiedIndex, InterfaceValue> InterfaceMap; 126303231Sdim 127303231Sdim // Our intention here is to record all InterfaceValues that share the same 128303231Sdim // StratifiedIndex in RetParamRelations. For each valid InterfaceValue, we 129303231Sdim // have its StratifiedIndex scanned here and check if the index is presented 130303231Sdim // in InterfaceMap: if it is not, we add the correspondence to the map; 131303231Sdim // otherwise, an aliasing relation is found and we add it to 132303231Sdim // RetParamRelations. 133303231Sdim 134303231Sdim auto AddToRetParamRelations = [&](unsigned InterfaceIndex, 135303231Sdim StratifiedIndex SetIndex) { 136303231Sdim unsigned Level = 0; 137303231Sdim while (true) { 138303231Sdim InterfaceValue CurrValue{InterfaceIndex, Level}; 139303231Sdim 140303231Sdim auto Itr = InterfaceMap.find(SetIndex); 141303231Sdim if (Itr != InterfaceMap.end()) { 142303231Sdim if (CurrValue != Itr->second) 143303231Sdim Summary.RetParamRelations.push_back( 144314564Sdim ExternalRelation{CurrValue, Itr->second, UnknownOffset}); 145303231Sdim break; 146303231Sdim } 147303231Sdim 148303231Sdim auto &Link = Sets.getLink(SetIndex); 149303231Sdim InterfaceMap.insert(std::make_pair(SetIndex, CurrValue)); 150303231Sdim auto ExternalAttrs = getExternallyVisibleAttrs(Link.Attrs); 151303231Sdim if (ExternalAttrs.any()) 152303231Sdim Summary.RetParamAttributes.push_back( 153303231Sdim ExternalAttribute{CurrValue, ExternalAttrs}); 154303231Sdim 155303231Sdim if (!Link.hasBelow()) 156303231Sdim break; 157303231Sdim 158303231Sdim ++Level; 159303231Sdim SetIndex = Link.Below; 160303231Sdim } 161303231Sdim }; 162303231Sdim 163303231Sdim // Populate RetParamRelations for return values 164303231Sdim for (auto *RetVal : RetVals) { 165303231Sdim assert(RetVal != nullptr); 166303231Sdim assert(RetVal->getType()->isPointerTy()); 167303231Sdim auto RetInfo = Sets.find(InstantiatedValue{RetVal, 0}); 168303231Sdim if (RetInfo.hasValue()) 169303231Sdim AddToRetParamRelations(0, RetInfo->Index); 170303231Sdim } 171303231Sdim 172303231Sdim // Populate RetParamRelations for parameters 173303231Sdim unsigned I = 0; 174303231Sdim for (auto &Param : Fn.args()) { 175303231Sdim if (Param.getType()->isPointerTy()) { 176303231Sdim auto ParamInfo = Sets.find(InstantiatedValue{&Param, 0}); 177303231Sdim if (ParamInfo.hasValue()) 178303231Sdim AddToRetParamRelations(I + 1, ParamInfo->Index); 179303231Sdim } 180303231Sdim ++I; 181303231Sdim } 182303231Sdim} 183303231Sdim 184303231Sdim// Builds the graph + StratifiedSets for a function. 185303231SdimCFLSteensAAResult::FunctionInfo CFLSteensAAResult::buildSetsFrom(Function *Fn) { 186360784Sdim CFLGraphBuilder<CFLSteensAAResult> GraphBuilder(*this, GetTLI(*Fn), *Fn); 187303231Sdim StratifiedSetsBuilder<InstantiatedValue> SetBuilder; 188303231Sdim 189303231Sdim // Add all CFLGraph nodes and all Dereference edges to StratifiedSets 190303231Sdim auto &Graph = GraphBuilder.getCFLGraph(); 191303231Sdim for (const auto &Mapping : Graph.value_mappings()) { 192303231Sdim auto Val = Mapping.first; 193303231Sdim if (canSkipAddingToSets(Val)) 194303231Sdim continue; 195303231Sdim auto &ValueInfo = Mapping.second; 196303231Sdim 197303231Sdim assert(ValueInfo.getNumLevels() > 0); 198303231Sdim SetBuilder.add(InstantiatedValue{Val, 0}); 199303231Sdim SetBuilder.noteAttributes(InstantiatedValue{Val, 0}, 200303231Sdim ValueInfo.getNodeInfoAtLevel(0).Attr); 201303231Sdim for (unsigned I = 0, E = ValueInfo.getNumLevels() - 1; I < E; ++I) { 202303231Sdim SetBuilder.add(InstantiatedValue{Val, I + 1}); 203303231Sdim SetBuilder.noteAttributes(InstantiatedValue{Val, I + 1}, 204303231Sdim ValueInfo.getNodeInfoAtLevel(I + 1).Attr); 205303231Sdim SetBuilder.addBelow(InstantiatedValue{Val, I}, 206303231Sdim InstantiatedValue{Val, I + 1}); 207303231Sdim } 208303231Sdim } 209303231Sdim 210303231Sdim // Add all assign edges to StratifiedSets 211303231Sdim for (const auto &Mapping : Graph.value_mappings()) { 212303231Sdim auto Val = Mapping.first; 213303231Sdim if (canSkipAddingToSets(Val)) 214303231Sdim continue; 215303231Sdim auto &ValueInfo = Mapping.second; 216303231Sdim 217303231Sdim for (unsigned I = 0, E = ValueInfo.getNumLevels(); I < E; ++I) { 218303231Sdim auto Src = InstantiatedValue{Val, I}; 219303231Sdim for (auto &Edge : ValueInfo.getNodeInfoAtLevel(I).Edges) 220303231Sdim SetBuilder.addWith(Src, Edge.Other); 221303231Sdim } 222303231Sdim } 223303231Sdim 224303231Sdim return FunctionInfo(*Fn, GraphBuilder.getReturnValues(), SetBuilder.build()); 225303231Sdim} 226303231Sdim 227303231Sdimvoid CFLSteensAAResult::scan(Function *Fn) { 228303231Sdim auto InsertPair = Cache.insert(std::make_pair(Fn, Optional<FunctionInfo>())); 229303231Sdim (void)InsertPair; 230303231Sdim assert(InsertPair.second && 231303231Sdim "Trying to scan a function that has already been cached"); 232303231Sdim 233303231Sdim // Note that we can't do Cache[Fn] = buildSetsFrom(Fn) here: the function call 234303231Sdim // may get evaluated after operator[], potentially triggering a DenseMap 235303231Sdim // resize and invalidating the reference returned by operator[] 236303231Sdim auto FunInfo = buildSetsFrom(Fn); 237303231Sdim Cache[Fn] = std::move(FunInfo); 238303231Sdim 239321369Sdim Handles.emplace_front(Fn, this); 240303231Sdim} 241303231Sdim 242303231Sdimvoid CFLSteensAAResult::evict(Function *Fn) { Cache.erase(Fn); } 243303231Sdim 244303231Sdim/// Ensures that the given function is available in the cache, and returns the 245303231Sdim/// entry. 246303231Sdimconst Optional<CFLSteensAAResult::FunctionInfo> & 247303231SdimCFLSteensAAResult::ensureCached(Function *Fn) { 248303231Sdim auto Iter = Cache.find(Fn); 249303231Sdim if (Iter == Cache.end()) { 250303231Sdim scan(Fn); 251303231Sdim Iter = Cache.find(Fn); 252303231Sdim assert(Iter != Cache.end()); 253303231Sdim assert(Iter->second.hasValue()); 254303231Sdim } 255303231Sdim return Iter->second; 256303231Sdim} 257303231Sdim 258303231Sdimconst AliasSummary *CFLSteensAAResult::getAliasSummary(Function &Fn) { 259303231Sdim auto &FunInfo = ensureCached(&Fn); 260303231Sdim if (FunInfo.hasValue()) 261303231Sdim return &FunInfo->getAliasSummary(); 262303231Sdim else 263303231Sdim return nullptr; 264303231Sdim} 265303231Sdim 266303231SdimAliasResult CFLSteensAAResult::query(const MemoryLocation &LocA, 267303231Sdim const MemoryLocation &LocB) { 268303231Sdim auto *ValA = const_cast<Value *>(LocA.Ptr); 269303231Sdim auto *ValB = const_cast<Value *>(LocB.Ptr); 270303231Sdim 271303231Sdim if (!ValA->getType()->isPointerTy() || !ValB->getType()->isPointerTy()) 272303231Sdim return NoAlias; 273303231Sdim 274303231Sdim Function *Fn = nullptr; 275321369Sdim Function *MaybeFnA = const_cast<Function *>(parentFunctionOfValue(ValA)); 276321369Sdim Function *MaybeFnB = const_cast<Function *>(parentFunctionOfValue(ValB)); 277321369Sdim if (!MaybeFnA && !MaybeFnB) { 278303231Sdim // The only times this is known to happen are when globals + InlineAsm are 279303231Sdim // involved 280341825Sdim LLVM_DEBUG( 281341825Sdim dbgs() 282341825Sdim << "CFLSteensAA: could not extract parent function information.\n"); 283303231Sdim return MayAlias; 284303231Sdim } 285303231Sdim 286321369Sdim if (MaybeFnA) { 287321369Sdim Fn = MaybeFnA; 288321369Sdim assert((!MaybeFnB || MaybeFnB == MaybeFnA) && 289303231Sdim "Interprocedural queries not supported"); 290303231Sdim } else { 291321369Sdim Fn = MaybeFnB; 292303231Sdim } 293303231Sdim 294303231Sdim assert(Fn != nullptr); 295303231Sdim auto &MaybeInfo = ensureCached(Fn); 296303231Sdim assert(MaybeInfo.hasValue()); 297303231Sdim 298303231Sdim auto &Sets = MaybeInfo->getStratifiedSets(); 299303231Sdim auto MaybeA = Sets.find(InstantiatedValue{ValA, 0}); 300303231Sdim if (!MaybeA.hasValue()) 301303231Sdim return MayAlias; 302303231Sdim 303303231Sdim auto MaybeB = Sets.find(InstantiatedValue{ValB, 0}); 304303231Sdim if (!MaybeB.hasValue()) 305303231Sdim return MayAlias; 306303231Sdim 307303231Sdim auto SetA = *MaybeA; 308303231Sdim auto SetB = *MaybeB; 309303231Sdim auto AttrsA = Sets.getLink(SetA.Index).Attrs; 310303231Sdim auto AttrsB = Sets.getLink(SetB.Index).Attrs; 311303231Sdim 312303231Sdim // If both values are local (meaning the corresponding set has attribute 313303231Sdim // AttrNone or AttrEscaped), then we know that CFLSteensAA fully models them: 314303231Sdim // they may-alias each other if and only if they are in the same set. 315303231Sdim // If at least one value is non-local (meaning it either is global/argument or 316303231Sdim // it comes from unknown sources like integer cast), the situation becomes a 317303231Sdim // bit more interesting. We follow three general rules described below: 318303231Sdim // - Non-local values may alias each other 319303231Sdim // - AttrNone values do not alias any non-local values 320303231Sdim // - AttrEscaped do not alias globals/arguments, but they may alias 321303231Sdim // AttrUnknown values 322303231Sdim if (SetA.Index == SetB.Index) 323303231Sdim return MayAlias; 324303231Sdim if (AttrsA.none() || AttrsB.none()) 325303231Sdim return NoAlias; 326303231Sdim if (hasUnknownOrCallerAttr(AttrsA) || hasUnknownOrCallerAttr(AttrsB)) 327303231Sdim return MayAlias; 328303231Sdim if (isGlobalOrArgAttr(AttrsA) && isGlobalOrArgAttr(AttrsB)) 329303231Sdim return MayAlias; 330303231Sdim return NoAlias; 331303231Sdim} 332303231Sdim 333314564SdimAnalysisKey CFLSteensAA::Key; 334303231Sdim 335314564SdimCFLSteensAAResult CFLSteensAA::run(Function &F, FunctionAnalysisManager &AM) { 336360784Sdim auto GetTLI = [&AM](Function &F) -> const TargetLibraryInfo & { 337360784Sdim return AM.getResult<TargetLibraryAnalysis>(F); 338360784Sdim }; 339360784Sdim return CFLSteensAAResult(GetTLI); 340303231Sdim} 341303231Sdim 342303231Sdimchar CFLSteensAAWrapperPass::ID = 0; 343303231SdimINITIALIZE_PASS(CFLSteensAAWrapperPass, "cfl-steens-aa", 344303231Sdim "Unification-Based CFL Alias Analysis", false, true) 345303231Sdim 346303231SdimImmutablePass *llvm::createCFLSteensAAWrapperPass() { 347303231Sdim return new CFLSteensAAWrapperPass(); 348303231Sdim} 349303231Sdim 350303231SdimCFLSteensAAWrapperPass::CFLSteensAAWrapperPass() : ImmutablePass(ID) { 351303231Sdim initializeCFLSteensAAWrapperPassPass(*PassRegistry::getPassRegistry()); 352303231Sdim} 353303231Sdim 354303231Sdimvoid CFLSteensAAWrapperPass::initializePass() { 355360784Sdim auto GetTLI = [this](Function &F) -> const TargetLibraryInfo & { 356360784Sdim return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); 357360784Sdim }; 358360784Sdim Result.reset(new CFLSteensAAResult(GetTLI)); 359303231Sdim} 360303231Sdim 361303231Sdimvoid CFLSteensAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { 362303231Sdim AU.setPreservesAll(); 363303231Sdim AU.addRequired<TargetLibraryInfoWrapperPass>(); 364303231Sdim} 365