1//===- CFLSteensAliasAnalysis.cpp - Unification-based Alias Analysis ------===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file implements a CFL-base, summary-based alias analysis algorithm. It 10// does not depend on types. The algorithm is a mixture of the one described in 11// "Demand-driven alias analysis for C" by Xin Zheng and Radu Rugina, and "Fast 12// algorithms for Dyck-CFL-reachability with applications to Alias Analysis" by 13// Zhang Q, Lyu M R, Yuan H, and Su Z. -- to summarize the papers, we build a 14// graph of the uses of a variable, where each node is a memory location, and 15// each edge is an action that happened on that memory location. The "actions" 16// can be one of Dereference, Reference, or Assign. The precision of this 17// analysis is roughly the same as that of an one level context-sensitive 18// Steensgaard's algorithm. 19// 20// Two variables are considered as aliasing iff you can reach one value's node 21// from the other value's node and the language formed by concatenating all of 22// the edge labels (actions) conforms to a context-free grammar. 23// 24// Because this algorithm requires a graph search on each query, we execute the 25// algorithm outlined in "Fast algorithms..." (mentioned above) 26// in order to transform the graph into sets of variables that may alias in 27// ~nlogn time (n = number of variables), which makes queries take constant 28// time. 29//===----------------------------------------------------------------------===// 30 31// N.B. AliasAnalysis as a whole is phrased as a FunctionPass at the moment, and 32// CFLSteensAA is interprocedural. This is *technically* A Bad Thing, because 33// FunctionPasses are only allowed to inspect the Function that they're being 34// run on. Realistically, this likely isn't a problem until we allow 35// FunctionPasses to run concurrently. 36 37#include "llvm/Analysis/CFLSteensAliasAnalysis.h" 38#include "AliasAnalysisSummary.h" 39#include "CFLGraph.h" 40#include "StratifiedSets.h" 41#include "llvm/ADT/DenseMap.h" 42#include "llvm/ADT/Optional.h" 43#include "llvm/ADT/SmallVector.h" 44#include "llvm/Analysis/TargetLibraryInfo.h" 45#include "llvm/IR/Constants.h" 46#include "llvm/IR/Function.h" 47#include "llvm/IR/Type.h" 48#include "llvm/IR/Value.h" 49#include "llvm/InitializePasses.h" 50#include "llvm/Pass.h" 51#include "llvm/Support/Debug.h" 52#include "llvm/Support/raw_ostream.h" 53#include <algorithm> 54#include <cassert> 55#include <limits> 56#include <memory> 57#include <utility> 58 59using namespace llvm; 60using namespace llvm::cflaa; 61 62#define DEBUG_TYPE "cfl-steens-aa" 63 64CFLSteensAAResult::CFLSteensAAResult( 65 std::function<const TargetLibraryInfo &(Function &F)> GetTLI) 66 : AAResultBase(), GetTLI(std::move(GetTLI)) {} 67CFLSteensAAResult::CFLSteensAAResult(CFLSteensAAResult &&Arg) 68 : AAResultBase(std::move(Arg)), GetTLI(std::move(Arg.GetTLI)) {} 69CFLSteensAAResult::~CFLSteensAAResult() = default; 70 71/// Information we have about a function and would like to keep around. 72class CFLSteensAAResult::FunctionInfo { 73 StratifiedSets<InstantiatedValue> Sets; 74 AliasSummary Summary; 75 76public: 77 FunctionInfo(Function &Fn, const SmallVectorImpl<Value *> &RetVals, 78 StratifiedSets<InstantiatedValue> S); 79 80 const StratifiedSets<InstantiatedValue> &getStratifiedSets() const { 81 return Sets; 82 } 83 84 const AliasSummary &getAliasSummary() const { return Summary; } 85}; 86 87const StratifiedIndex StratifiedLink::SetSentinel = 88 std::numeric_limits<StratifiedIndex>::max(); 89 90//===----------------------------------------------------------------------===// 91// Function declarations that require types defined in the namespace above 92//===----------------------------------------------------------------------===// 93 94/// Determines whether it would be pointless to add the given Value to our sets. 95static bool canSkipAddingToSets(Value *Val) { 96 // Constants can share instances, which may falsely unify multiple 97 // sets, e.g. in 98 // store i32* null, i32** %ptr1 99 // store i32* null, i32** %ptr2 100 // clearly ptr1 and ptr2 should not be unified into the same set, so 101 // we should filter out the (potentially shared) instance to 102 // i32* null. 103 if (isa<Constant>(Val)) { 104 // TODO: Because all of these things are constant, we can determine whether 105 // the data is *actually* mutable at graph building time. This will probably 106 // come for free/cheap with offset awareness. 107 bool CanStoreMutableData = isa<GlobalValue>(Val) || 108 isa<ConstantExpr>(Val) || 109 isa<ConstantAggregate>(Val); 110 return !CanStoreMutableData; 111 } 112 113 return false; 114} 115 116CFLSteensAAResult::FunctionInfo::FunctionInfo( 117 Function &Fn, const SmallVectorImpl<Value *> &RetVals, 118 StratifiedSets<InstantiatedValue> S) 119 : Sets(std::move(S)) { 120 // Historically, an arbitrary upper-bound of 50 args was selected. We may want 121 // to remove this if it doesn't really matter in practice. 122 if (Fn.arg_size() > MaxSupportedArgsInSummary) 123 return; 124 125 DenseMap<StratifiedIndex, InterfaceValue> InterfaceMap; 126 127 // Our intention here is to record all InterfaceValues that share the same 128 // StratifiedIndex in RetParamRelations. For each valid InterfaceValue, we 129 // have its StratifiedIndex scanned here and check if the index is presented 130 // in InterfaceMap: if it is not, we add the correspondence to the map; 131 // otherwise, an aliasing relation is found and we add it to 132 // RetParamRelations. 133 134 auto AddToRetParamRelations = [&](unsigned InterfaceIndex, 135 StratifiedIndex SetIndex) { 136 unsigned Level = 0; 137 while (true) { 138 InterfaceValue CurrValue{InterfaceIndex, Level}; 139 140 auto Itr = InterfaceMap.find(SetIndex); 141 if (Itr != InterfaceMap.end()) { 142 if (CurrValue != Itr->second) 143 Summary.RetParamRelations.push_back( 144 ExternalRelation{CurrValue, Itr->second, UnknownOffset}); 145 break; 146 } 147 148 auto &Link = Sets.getLink(SetIndex); 149 InterfaceMap.insert(std::make_pair(SetIndex, CurrValue)); 150 auto ExternalAttrs = getExternallyVisibleAttrs(Link.Attrs); 151 if (ExternalAttrs.any()) 152 Summary.RetParamAttributes.push_back( 153 ExternalAttribute{CurrValue, ExternalAttrs}); 154 155 if (!Link.hasBelow()) 156 break; 157 158 ++Level; 159 SetIndex = Link.Below; 160 } 161 }; 162 163 // Populate RetParamRelations for return values 164 for (auto *RetVal : RetVals) { 165 assert(RetVal != nullptr); 166 assert(RetVal->getType()->isPointerTy()); 167 auto RetInfo = Sets.find(InstantiatedValue{RetVal, 0}); 168 if (RetInfo.hasValue()) 169 AddToRetParamRelations(0, RetInfo->Index); 170 } 171 172 // Populate RetParamRelations for parameters 173 unsigned I = 0; 174 for (auto &Param : Fn.args()) { 175 if (Param.getType()->isPointerTy()) { 176 auto ParamInfo = Sets.find(InstantiatedValue{&Param, 0}); 177 if (ParamInfo.hasValue()) 178 AddToRetParamRelations(I + 1, ParamInfo->Index); 179 } 180 ++I; 181 } 182} 183 184// Builds the graph + StratifiedSets for a function. 185CFLSteensAAResult::FunctionInfo CFLSteensAAResult::buildSetsFrom(Function *Fn) { 186 CFLGraphBuilder<CFLSteensAAResult> GraphBuilder(*this, GetTLI(*Fn), *Fn); 187 StratifiedSetsBuilder<InstantiatedValue> SetBuilder; 188 189 // Add all CFLGraph nodes and all Dereference edges to StratifiedSets 190 auto &Graph = GraphBuilder.getCFLGraph(); 191 for (const auto &Mapping : Graph.value_mappings()) { 192 auto Val = Mapping.first; 193 if (canSkipAddingToSets(Val)) 194 continue; 195 auto &ValueInfo = Mapping.second; 196 197 assert(ValueInfo.getNumLevels() > 0); 198 SetBuilder.add(InstantiatedValue{Val, 0}); 199 SetBuilder.noteAttributes(InstantiatedValue{Val, 0}, 200 ValueInfo.getNodeInfoAtLevel(0).Attr); 201 for (unsigned I = 0, E = ValueInfo.getNumLevels() - 1; I < E; ++I) { 202 SetBuilder.add(InstantiatedValue{Val, I + 1}); 203 SetBuilder.noteAttributes(InstantiatedValue{Val, I + 1}, 204 ValueInfo.getNodeInfoAtLevel(I + 1).Attr); 205 SetBuilder.addBelow(InstantiatedValue{Val, I}, 206 InstantiatedValue{Val, I + 1}); 207 } 208 } 209 210 // Add all assign edges to StratifiedSets 211 for (const auto &Mapping : Graph.value_mappings()) { 212 auto Val = Mapping.first; 213 if (canSkipAddingToSets(Val)) 214 continue; 215 auto &ValueInfo = Mapping.second; 216 217 for (unsigned I = 0, E = ValueInfo.getNumLevels(); I < E; ++I) { 218 auto Src = InstantiatedValue{Val, I}; 219 for (auto &Edge : ValueInfo.getNodeInfoAtLevel(I).Edges) 220 SetBuilder.addWith(Src, Edge.Other); 221 } 222 } 223 224 return FunctionInfo(*Fn, GraphBuilder.getReturnValues(), SetBuilder.build()); 225} 226 227void CFLSteensAAResult::scan(Function *Fn) { 228 auto InsertPair = Cache.insert(std::make_pair(Fn, Optional<FunctionInfo>())); 229 (void)InsertPair; 230 assert(InsertPair.second && 231 "Trying to scan a function that has already been cached"); 232 233 // Note that we can't do Cache[Fn] = buildSetsFrom(Fn) here: the function call 234 // may get evaluated after operator[], potentially triggering a DenseMap 235 // resize and invalidating the reference returned by operator[] 236 auto FunInfo = buildSetsFrom(Fn); 237 Cache[Fn] = std::move(FunInfo); 238 239 Handles.emplace_front(Fn, this); 240} 241 242void CFLSteensAAResult::evict(Function *Fn) { Cache.erase(Fn); } 243 244/// Ensures that the given function is available in the cache, and returns the 245/// entry. 246const Optional<CFLSteensAAResult::FunctionInfo> & 247CFLSteensAAResult::ensureCached(Function *Fn) { 248 auto Iter = Cache.find(Fn); 249 if (Iter == Cache.end()) { 250 scan(Fn); 251 Iter = Cache.find(Fn); 252 assert(Iter != Cache.end()); 253 assert(Iter->second.hasValue()); 254 } 255 return Iter->second; 256} 257 258const AliasSummary *CFLSteensAAResult::getAliasSummary(Function &Fn) { 259 auto &FunInfo = ensureCached(&Fn); 260 if (FunInfo.hasValue()) 261 return &FunInfo->getAliasSummary(); 262 else 263 return nullptr; 264} 265 266AliasResult CFLSteensAAResult::query(const MemoryLocation &LocA, 267 const MemoryLocation &LocB) { 268 auto *ValA = const_cast<Value *>(LocA.Ptr); 269 auto *ValB = const_cast<Value *>(LocB.Ptr); 270 271 if (!ValA->getType()->isPointerTy() || !ValB->getType()->isPointerTy()) 272 return NoAlias; 273 274 Function *Fn = nullptr; 275 Function *MaybeFnA = const_cast<Function *>(parentFunctionOfValue(ValA)); 276 Function *MaybeFnB = const_cast<Function *>(parentFunctionOfValue(ValB)); 277 if (!MaybeFnA && !MaybeFnB) { 278 // The only times this is known to happen are when globals + InlineAsm are 279 // involved 280 LLVM_DEBUG( 281 dbgs() 282 << "CFLSteensAA: could not extract parent function information.\n"); 283 return MayAlias; 284 } 285 286 if (MaybeFnA) { 287 Fn = MaybeFnA; 288 assert((!MaybeFnB || MaybeFnB == MaybeFnA) && 289 "Interprocedural queries not supported"); 290 } else { 291 Fn = MaybeFnB; 292 } 293 294 assert(Fn != nullptr); 295 auto &MaybeInfo = ensureCached(Fn); 296 assert(MaybeInfo.hasValue()); 297 298 auto &Sets = MaybeInfo->getStratifiedSets(); 299 auto MaybeA = Sets.find(InstantiatedValue{ValA, 0}); 300 if (!MaybeA.hasValue()) 301 return MayAlias; 302 303 auto MaybeB = Sets.find(InstantiatedValue{ValB, 0}); 304 if (!MaybeB.hasValue()) 305 return MayAlias; 306 307 auto SetA = *MaybeA; 308 auto SetB = *MaybeB; 309 auto AttrsA = Sets.getLink(SetA.Index).Attrs; 310 auto AttrsB = Sets.getLink(SetB.Index).Attrs; 311 312 // If both values are local (meaning the corresponding set has attribute 313 // AttrNone or AttrEscaped), then we know that CFLSteensAA fully models them: 314 // they may-alias each other if and only if they are in the same set. 315 // If at least one value is non-local (meaning it either is global/argument or 316 // it comes from unknown sources like integer cast), the situation becomes a 317 // bit more interesting. We follow three general rules described below: 318 // - Non-local values may alias each other 319 // - AttrNone values do not alias any non-local values 320 // - AttrEscaped do not alias globals/arguments, but they may alias 321 // AttrUnknown values 322 if (SetA.Index == SetB.Index) 323 return MayAlias; 324 if (AttrsA.none() || AttrsB.none()) 325 return NoAlias; 326 if (hasUnknownOrCallerAttr(AttrsA) || hasUnknownOrCallerAttr(AttrsB)) 327 return MayAlias; 328 if (isGlobalOrArgAttr(AttrsA) && isGlobalOrArgAttr(AttrsB)) 329 return MayAlias; 330 return NoAlias; 331} 332 333AnalysisKey CFLSteensAA::Key; 334 335CFLSteensAAResult CFLSteensAA::run(Function &F, FunctionAnalysisManager &AM) { 336 auto GetTLI = [&AM](Function &F) -> const TargetLibraryInfo & { 337 return AM.getResult<TargetLibraryAnalysis>(F); 338 }; 339 return CFLSteensAAResult(GetTLI); 340} 341 342char CFLSteensAAWrapperPass::ID = 0; 343INITIALIZE_PASS(CFLSteensAAWrapperPass, "cfl-steens-aa", 344 "Unification-Based CFL Alias Analysis", false, true) 345 346ImmutablePass *llvm::createCFLSteensAAWrapperPass() { 347 return new CFLSteensAAWrapperPass(); 348} 349 350CFLSteensAAWrapperPass::CFLSteensAAWrapperPass() : ImmutablePass(ID) { 351 initializeCFLSteensAAWrapperPassPass(*PassRegistry::getPassRegistry()); 352} 353 354void CFLSteensAAWrapperPass::initializePass() { 355 auto GetTLI = [this](Function &F) -> const TargetLibraryInfo & { 356 return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); 357 }; 358 Result.reset(new CFLSteensAAResult(GetTLI)); 359} 360 361void CFLSteensAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { 362 AU.setPreservesAll(); 363 AU.addRequired<TargetLibraryInfoWrapperPass>(); 364} 365