1327952Sdim//===- CFLSteensAliasAnalysis.cpp - Unification-based Alias Analysis ------===//
2303231Sdim//
3353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4353358Sdim// See https://llvm.org/LICENSE.txt for license information.
5353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6303231Sdim//
7303231Sdim//===----------------------------------------------------------------------===//
8303231Sdim//
9303231Sdim// This file implements a CFL-base, summary-based alias analysis algorithm. It
10303231Sdim// does not depend on types. The algorithm is a mixture of the one described in
11303231Sdim// "Demand-driven alias analysis for C" by Xin Zheng and Radu Rugina, and "Fast
12303231Sdim// algorithms for Dyck-CFL-reachability with applications to Alias Analysis" by
13303231Sdim// Zhang Q, Lyu M R, Yuan H, and Su Z. -- to summarize the papers, we build a
14303231Sdim// graph of the uses of a variable, where each node is a memory location, and
15303231Sdim// each edge is an action that happened on that memory location.  The "actions"
16303231Sdim// can be one of Dereference, Reference, or Assign. The precision of this
17303231Sdim// analysis is roughly the same as that of an one level context-sensitive
18303231Sdim// Steensgaard's algorithm.
19303231Sdim//
20303231Sdim// Two variables are considered as aliasing iff you can reach one value's node
21303231Sdim// from the other value's node and the language formed by concatenating all of
22303231Sdim// the edge labels (actions) conforms to a context-free grammar.
23303231Sdim//
24303231Sdim// Because this algorithm requires a graph search on each query, we execute the
25303231Sdim// algorithm outlined in "Fast algorithms..." (mentioned above)
26303231Sdim// in order to transform the graph into sets of variables that may alias in
27303231Sdim// ~nlogn time (n = number of variables), which makes queries take constant
28303231Sdim// time.
29303231Sdim//===----------------------------------------------------------------------===//
30303231Sdim
31303231Sdim// N.B. AliasAnalysis as a whole is phrased as a FunctionPass at the moment, and
32303231Sdim// CFLSteensAA is interprocedural. This is *technically* A Bad Thing, because
33303231Sdim// FunctionPasses are only allowed to inspect the Function that they're being
34303231Sdim// run on. Realistically, this likely isn't a problem until we allow
35303231Sdim// FunctionPasses to run concurrently.
36303231Sdim
37303231Sdim#include "llvm/Analysis/CFLSteensAliasAnalysis.h"
38327952Sdim#include "AliasAnalysisSummary.h"
39303231Sdim#include "CFLGraph.h"
40303231Sdim#include "StratifiedSets.h"
41303231Sdim#include "llvm/ADT/DenseMap.h"
42303231Sdim#include "llvm/ADT/Optional.h"
43327952Sdim#include "llvm/ADT/SmallVector.h"
44303231Sdim#include "llvm/Analysis/TargetLibraryInfo.h"
45303231Sdim#include "llvm/IR/Constants.h"
46303231Sdim#include "llvm/IR/Function.h"
47327952Sdim#include "llvm/IR/Type.h"
48327952Sdim#include "llvm/IR/Value.h"
49360784Sdim#include "llvm/InitializePasses.h"
50303231Sdim#include "llvm/Pass.h"
51303231Sdim#include "llvm/Support/Debug.h"
52303231Sdim#include "llvm/Support/raw_ostream.h"
53303231Sdim#include <algorithm>
54303231Sdim#include <cassert>
55327952Sdim#include <limits>
56303231Sdim#include <memory>
57327952Sdim#include <utility>
58303231Sdim
59303231Sdimusing namespace llvm;
60303231Sdimusing namespace llvm::cflaa;
61303231Sdim
62303231Sdim#define DEBUG_TYPE "cfl-steens-aa"
63303231Sdim
64360784SdimCFLSteensAAResult::CFLSteensAAResult(
65360784Sdim    std::function<const TargetLibraryInfo &(Function &F)> GetTLI)
66360784Sdim    : AAResultBase(), GetTLI(std::move(GetTLI)) {}
67303231SdimCFLSteensAAResult::CFLSteensAAResult(CFLSteensAAResult &&Arg)
68360784Sdim    : AAResultBase(std::move(Arg)), GetTLI(std::move(Arg.GetTLI)) {}
69327952SdimCFLSteensAAResult::~CFLSteensAAResult() = default;
70303231Sdim
71303231Sdim/// Information we have about a function and would like to keep around.
72303231Sdimclass CFLSteensAAResult::FunctionInfo {
73303231Sdim  StratifiedSets<InstantiatedValue> Sets;
74303231Sdim  AliasSummary Summary;
75303231Sdim
76303231Sdimpublic:
77303231Sdim  FunctionInfo(Function &Fn, const SmallVectorImpl<Value *> &RetVals,
78303231Sdim               StratifiedSets<InstantiatedValue> S);
79303231Sdim
80303231Sdim  const StratifiedSets<InstantiatedValue> &getStratifiedSets() const {
81303231Sdim    return Sets;
82303231Sdim  }
83327952Sdim
84303231Sdim  const AliasSummary &getAliasSummary() const { return Summary; }
85303231Sdim};
86303231Sdim
87303231Sdimconst StratifiedIndex StratifiedLink::SetSentinel =
88303231Sdim    std::numeric_limits<StratifiedIndex>::max();
89303231Sdim
90303231Sdim//===----------------------------------------------------------------------===//
91303231Sdim// Function declarations that require types defined in the namespace above
92303231Sdim//===----------------------------------------------------------------------===//
93303231Sdim
94303231Sdim/// Determines whether it would be pointless to add the given Value to our sets.
95303231Sdimstatic bool canSkipAddingToSets(Value *Val) {
96303231Sdim  // Constants can share instances, which may falsely unify multiple
97303231Sdim  // sets, e.g. in
98303231Sdim  // store i32* null, i32** %ptr1
99303231Sdim  // store i32* null, i32** %ptr2
100303231Sdim  // clearly ptr1 and ptr2 should not be unified into the same set, so
101303231Sdim  // we should filter out the (potentially shared) instance to
102303231Sdim  // i32* null.
103303231Sdim  if (isa<Constant>(Val)) {
104303231Sdim    // TODO: Because all of these things are constant, we can determine whether
105303231Sdim    // the data is *actually* mutable at graph building time. This will probably
106303231Sdim    // come for free/cheap with offset awareness.
107303231Sdim    bool CanStoreMutableData = isa<GlobalValue>(Val) ||
108303231Sdim                               isa<ConstantExpr>(Val) ||
109303231Sdim                               isa<ConstantAggregate>(Val);
110303231Sdim    return !CanStoreMutableData;
111303231Sdim  }
112303231Sdim
113303231Sdim  return false;
114303231Sdim}
115303231Sdim
116303231SdimCFLSteensAAResult::FunctionInfo::FunctionInfo(
117303231Sdim    Function &Fn, const SmallVectorImpl<Value *> &RetVals,
118303231Sdim    StratifiedSets<InstantiatedValue> S)
119303231Sdim    : Sets(std::move(S)) {
120303231Sdim  // Historically, an arbitrary upper-bound of 50 args was selected. We may want
121303231Sdim  // to remove this if it doesn't really matter in practice.
122303231Sdim  if (Fn.arg_size() > MaxSupportedArgsInSummary)
123303231Sdim    return;
124303231Sdim
125303231Sdim  DenseMap<StratifiedIndex, InterfaceValue> InterfaceMap;
126303231Sdim
127303231Sdim  // Our intention here is to record all InterfaceValues that share the same
128303231Sdim  // StratifiedIndex in RetParamRelations. For each valid InterfaceValue, we
129303231Sdim  // have its StratifiedIndex scanned here and check if the index is presented
130303231Sdim  // in InterfaceMap: if it is not, we add the correspondence to the map;
131303231Sdim  // otherwise, an aliasing relation is found and we add it to
132303231Sdim  // RetParamRelations.
133303231Sdim
134303231Sdim  auto AddToRetParamRelations = [&](unsigned InterfaceIndex,
135303231Sdim                                    StratifiedIndex SetIndex) {
136303231Sdim    unsigned Level = 0;
137303231Sdim    while (true) {
138303231Sdim      InterfaceValue CurrValue{InterfaceIndex, Level};
139303231Sdim
140303231Sdim      auto Itr = InterfaceMap.find(SetIndex);
141303231Sdim      if (Itr != InterfaceMap.end()) {
142303231Sdim        if (CurrValue != Itr->second)
143303231Sdim          Summary.RetParamRelations.push_back(
144314564Sdim              ExternalRelation{CurrValue, Itr->second, UnknownOffset});
145303231Sdim        break;
146303231Sdim      }
147303231Sdim
148303231Sdim      auto &Link = Sets.getLink(SetIndex);
149303231Sdim      InterfaceMap.insert(std::make_pair(SetIndex, CurrValue));
150303231Sdim      auto ExternalAttrs = getExternallyVisibleAttrs(Link.Attrs);
151303231Sdim      if (ExternalAttrs.any())
152303231Sdim        Summary.RetParamAttributes.push_back(
153303231Sdim            ExternalAttribute{CurrValue, ExternalAttrs});
154303231Sdim
155303231Sdim      if (!Link.hasBelow())
156303231Sdim        break;
157303231Sdim
158303231Sdim      ++Level;
159303231Sdim      SetIndex = Link.Below;
160303231Sdim    }
161303231Sdim  };
162303231Sdim
163303231Sdim  // Populate RetParamRelations for return values
164303231Sdim  for (auto *RetVal : RetVals) {
165303231Sdim    assert(RetVal != nullptr);
166303231Sdim    assert(RetVal->getType()->isPointerTy());
167303231Sdim    auto RetInfo = Sets.find(InstantiatedValue{RetVal, 0});
168303231Sdim    if (RetInfo.hasValue())
169303231Sdim      AddToRetParamRelations(0, RetInfo->Index);
170303231Sdim  }
171303231Sdim
172303231Sdim  // Populate RetParamRelations for parameters
173303231Sdim  unsigned I = 0;
174303231Sdim  for (auto &Param : Fn.args()) {
175303231Sdim    if (Param.getType()->isPointerTy()) {
176303231Sdim      auto ParamInfo = Sets.find(InstantiatedValue{&Param, 0});
177303231Sdim      if (ParamInfo.hasValue())
178303231Sdim        AddToRetParamRelations(I + 1, ParamInfo->Index);
179303231Sdim    }
180303231Sdim    ++I;
181303231Sdim  }
182303231Sdim}
183303231Sdim
184303231Sdim// Builds the graph + StratifiedSets for a function.
185303231SdimCFLSteensAAResult::FunctionInfo CFLSteensAAResult::buildSetsFrom(Function *Fn) {
186360784Sdim  CFLGraphBuilder<CFLSteensAAResult> GraphBuilder(*this, GetTLI(*Fn), *Fn);
187303231Sdim  StratifiedSetsBuilder<InstantiatedValue> SetBuilder;
188303231Sdim
189303231Sdim  // Add all CFLGraph nodes and all Dereference edges to StratifiedSets
190303231Sdim  auto &Graph = GraphBuilder.getCFLGraph();
191303231Sdim  for (const auto &Mapping : Graph.value_mappings()) {
192303231Sdim    auto Val = Mapping.first;
193303231Sdim    if (canSkipAddingToSets(Val))
194303231Sdim      continue;
195303231Sdim    auto &ValueInfo = Mapping.second;
196303231Sdim
197303231Sdim    assert(ValueInfo.getNumLevels() > 0);
198303231Sdim    SetBuilder.add(InstantiatedValue{Val, 0});
199303231Sdim    SetBuilder.noteAttributes(InstantiatedValue{Val, 0},
200303231Sdim                              ValueInfo.getNodeInfoAtLevel(0).Attr);
201303231Sdim    for (unsigned I = 0, E = ValueInfo.getNumLevels() - 1; I < E; ++I) {
202303231Sdim      SetBuilder.add(InstantiatedValue{Val, I + 1});
203303231Sdim      SetBuilder.noteAttributes(InstantiatedValue{Val, I + 1},
204303231Sdim                                ValueInfo.getNodeInfoAtLevel(I + 1).Attr);
205303231Sdim      SetBuilder.addBelow(InstantiatedValue{Val, I},
206303231Sdim                          InstantiatedValue{Val, I + 1});
207303231Sdim    }
208303231Sdim  }
209303231Sdim
210303231Sdim  // Add all assign edges to StratifiedSets
211303231Sdim  for (const auto &Mapping : Graph.value_mappings()) {
212303231Sdim    auto Val = Mapping.first;
213303231Sdim    if (canSkipAddingToSets(Val))
214303231Sdim      continue;
215303231Sdim    auto &ValueInfo = Mapping.second;
216303231Sdim
217303231Sdim    for (unsigned I = 0, E = ValueInfo.getNumLevels(); I < E; ++I) {
218303231Sdim      auto Src = InstantiatedValue{Val, I};
219303231Sdim      for (auto &Edge : ValueInfo.getNodeInfoAtLevel(I).Edges)
220303231Sdim        SetBuilder.addWith(Src, Edge.Other);
221303231Sdim    }
222303231Sdim  }
223303231Sdim
224303231Sdim  return FunctionInfo(*Fn, GraphBuilder.getReturnValues(), SetBuilder.build());
225303231Sdim}
226303231Sdim
227303231Sdimvoid CFLSteensAAResult::scan(Function *Fn) {
228303231Sdim  auto InsertPair = Cache.insert(std::make_pair(Fn, Optional<FunctionInfo>()));
229303231Sdim  (void)InsertPair;
230303231Sdim  assert(InsertPair.second &&
231303231Sdim         "Trying to scan a function that has already been cached");
232303231Sdim
233303231Sdim  // Note that we can't do Cache[Fn] = buildSetsFrom(Fn) here: the function call
234303231Sdim  // may get evaluated after operator[], potentially triggering a DenseMap
235303231Sdim  // resize and invalidating the reference returned by operator[]
236303231Sdim  auto FunInfo = buildSetsFrom(Fn);
237303231Sdim  Cache[Fn] = std::move(FunInfo);
238303231Sdim
239321369Sdim  Handles.emplace_front(Fn, this);
240303231Sdim}
241303231Sdim
242303231Sdimvoid CFLSteensAAResult::evict(Function *Fn) { Cache.erase(Fn); }
243303231Sdim
244303231Sdim/// Ensures that the given function is available in the cache, and returns the
245303231Sdim/// entry.
246303231Sdimconst Optional<CFLSteensAAResult::FunctionInfo> &
247303231SdimCFLSteensAAResult::ensureCached(Function *Fn) {
248303231Sdim  auto Iter = Cache.find(Fn);
249303231Sdim  if (Iter == Cache.end()) {
250303231Sdim    scan(Fn);
251303231Sdim    Iter = Cache.find(Fn);
252303231Sdim    assert(Iter != Cache.end());
253303231Sdim    assert(Iter->second.hasValue());
254303231Sdim  }
255303231Sdim  return Iter->second;
256303231Sdim}
257303231Sdim
258303231Sdimconst AliasSummary *CFLSteensAAResult::getAliasSummary(Function &Fn) {
259303231Sdim  auto &FunInfo = ensureCached(&Fn);
260303231Sdim  if (FunInfo.hasValue())
261303231Sdim    return &FunInfo->getAliasSummary();
262303231Sdim  else
263303231Sdim    return nullptr;
264303231Sdim}
265303231Sdim
266303231SdimAliasResult CFLSteensAAResult::query(const MemoryLocation &LocA,
267303231Sdim                                     const MemoryLocation &LocB) {
268303231Sdim  auto *ValA = const_cast<Value *>(LocA.Ptr);
269303231Sdim  auto *ValB = const_cast<Value *>(LocB.Ptr);
270303231Sdim
271303231Sdim  if (!ValA->getType()->isPointerTy() || !ValB->getType()->isPointerTy())
272303231Sdim    return NoAlias;
273303231Sdim
274303231Sdim  Function *Fn = nullptr;
275321369Sdim  Function *MaybeFnA = const_cast<Function *>(parentFunctionOfValue(ValA));
276321369Sdim  Function *MaybeFnB = const_cast<Function *>(parentFunctionOfValue(ValB));
277321369Sdim  if (!MaybeFnA && !MaybeFnB) {
278303231Sdim    // The only times this is known to happen are when globals + InlineAsm are
279303231Sdim    // involved
280341825Sdim    LLVM_DEBUG(
281341825Sdim        dbgs()
282341825Sdim        << "CFLSteensAA: could not extract parent function information.\n");
283303231Sdim    return MayAlias;
284303231Sdim  }
285303231Sdim
286321369Sdim  if (MaybeFnA) {
287321369Sdim    Fn = MaybeFnA;
288321369Sdim    assert((!MaybeFnB || MaybeFnB == MaybeFnA) &&
289303231Sdim           "Interprocedural queries not supported");
290303231Sdim  } else {
291321369Sdim    Fn = MaybeFnB;
292303231Sdim  }
293303231Sdim
294303231Sdim  assert(Fn != nullptr);
295303231Sdim  auto &MaybeInfo = ensureCached(Fn);
296303231Sdim  assert(MaybeInfo.hasValue());
297303231Sdim
298303231Sdim  auto &Sets = MaybeInfo->getStratifiedSets();
299303231Sdim  auto MaybeA = Sets.find(InstantiatedValue{ValA, 0});
300303231Sdim  if (!MaybeA.hasValue())
301303231Sdim    return MayAlias;
302303231Sdim
303303231Sdim  auto MaybeB = Sets.find(InstantiatedValue{ValB, 0});
304303231Sdim  if (!MaybeB.hasValue())
305303231Sdim    return MayAlias;
306303231Sdim
307303231Sdim  auto SetA = *MaybeA;
308303231Sdim  auto SetB = *MaybeB;
309303231Sdim  auto AttrsA = Sets.getLink(SetA.Index).Attrs;
310303231Sdim  auto AttrsB = Sets.getLink(SetB.Index).Attrs;
311303231Sdim
312303231Sdim  // If both values are local (meaning the corresponding set has attribute
313303231Sdim  // AttrNone or AttrEscaped), then we know that CFLSteensAA fully models them:
314303231Sdim  // they may-alias each other if and only if they are in the same set.
315303231Sdim  // If at least one value is non-local (meaning it either is global/argument or
316303231Sdim  // it comes from unknown sources like integer cast), the situation becomes a
317303231Sdim  // bit more interesting. We follow three general rules described below:
318303231Sdim  // - Non-local values may alias each other
319303231Sdim  // - AttrNone values do not alias any non-local values
320303231Sdim  // - AttrEscaped do not alias globals/arguments, but they may alias
321303231Sdim  // AttrUnknown values
322303231Sdim  if (SetA.Index == SetB.Index)
323303231Sdim    return MayAlias;
324303231Sdim  if (AttrsA.none() || AttrsB.none())
325303231Sdim    return NoAlias;
326303231Sdim  if (hasUnknownOrCallerAttr(AttrsA) || hasUnknownOrCallerAttr(AttrsB))
327303231Sdim    return MayAlias;
328303231Sdim  if (isGlobalOrArgAttr(AttrsA) && isGlobalOrArgAttr(AttrsB))
329303231Sdim    return MayAlias;
330303231Sdim  return NoAlias;
331303231Sdim}
332303231Sdim
333314564SdimAnalysisKey CFLSteensAA::Key;
334303231Sdim
335314564SdimCFLSteensAAResult CFLSteensAA::run(Function &F, FunctionAnalysisManager &AM) {
336360784Sdim  auto GetTLI = [&AM](Function &F) -> const TargetLibraryInfo & {
337360784Sdim    return AM.getResult<TargetLibraryAnalysis>(F);
338360784Sdim  };
339360784Sdim  return CFLSteensAAResult(GetTLI);
340303231Sdim}
341303231Sdim
342303231Sdimchar CFLSteensAAWrapperPass::ID = 0;
343303231SdimINITIALIZE_PASS(CFLSteensAAWrapperPass, "cfl-steens-aa",
344303231Sdim                "Unification-Based CFL Alias Analysis", false, true)
345303231Sdim
346303231SdimImmutablePass *llvm::createCFLSteensAAWrapperPass() {
347303231Sdim  return new CFLSteensAAWrapperPass();
348303231Sdim}
349303231Sdim
350303231SdimCFLSteensAAWrapperPass::CFLSteensAAWrapperPass() : ImmutablePass(ID) {
351303231Sdim  initializeCFLSteensAAWrapperPassPass(*PassRegistry::getPassRegistry());
352303231Sdim}
353303231Sdim
354303231Sdimvoid CFLSteensAAWrapperPass::initializePass() {
355360784Sdim  auto GetTLI = [this](Function &F) -> const TargetLibraryInfo & {
356360784Sdim    return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
357360784Sdim  };
358360784Sdim  Result.reset(new CFLSteensAAResult(GetTLI));
359303231Sdim}
360303231Sdim
361303231Sdimvoid CFLSteensAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
362303231Sdim  AU.setPreservesAll();
363303231Sdim  AU.addRequired<TargetLibraryInfoWrapperPass>();
364303231Sdim}
365