1//===- StratifiedSets.h - Abstract stratified sets implementation. --------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_ADT_STRATIFIEDSETS_H
10#define LLVM_ADT_STRATIFIEDSETS_H
11
12#include "AliasAnalysisSummary.h"
13#include "llvm/ADT/DenseMap.h"
14#include "llvm/ADT/Optional.h"
15#include "llvm/ADT/SmallSet.h"
16#include "llvm/ADT/SmallVector.h"
17#include <bitset>
18#include <cassert>
19#include <cmath>
20#include <type_traits>
21#include <utility>
22#include <vector>
23
24namespace llvm {
25namespace cflaa {
26/// An index into Stratified Sets.
27typedef unsigned StratifiedIndex;
28/// NOTE: ^ This can't be a short -- bootstrapping clang has a case where
29/// ~1M sets exist.
30
31// Container of information related to a value in a StratifiedSet.
32struct StratifiedInfo {
33  StratifiedIndex Index;
34  /// For field sensitivity, etc. we can tack fields on here.
35};
36
37/// A "link" between two StratifiedSets.
38struct StratifiedLink {
39  /// This is a value used to signify "does not exist" where the
40  /// StratifiedIndex type is used.
41  ///
42  /// This is used instead of Optional<StratifiedIndex> because
43  /// Optional<StratifiedIndex> would eat up a considerable amount of extra
44  /// memory, after struct padding/alignment is taken into account.
45  static const StratifiedIndex SetSentinel;
46
47  /// The index for the set "above" current
48  StratifiedIndex Above;
49
50  /// The link for the set "below" current
51  StratifiedIndex Below;
52
53  /// Attributes for these StratifiedSets.
54  AliasAttrs Attrs;
55
56  StratifiedLink() : Above(SetSentinel), Below(SetSentinel) {}
57
58  bool hasBelow() const { return Below != SetSentinel; }
59  bool hasAbove() const { return Above != SetSentinel; }
60
61  void clearBelow() { Below = SetSentinel; }
62  void clearAbove() { Above = SetSentinel; }
63};
64
65/// These are stratified sets, as described in "Fast algorithms for
66/// Dyck-CFL-reachability with applications to Alias Analysis" by Zhang Q, Lyu M
67/// R, Yuan H, and Su Z. -- in short, this is meant to represent different sets
68/// of Value*s. If two Value*s are in the same set, or if both sets have
69/// overlapping attributes, then the Value*s are said to alias.
70///
71/// Sets may be related by position, meaning that one set may be considered as
72/// above or below another. In CFL Alias Analysis, this gives us an indication
73/// of how two variables are related; if the set of variable A is below a set
74/// containing variable B, then at some point, a variable that has interacted
75/// with B (or B itself) was either used in order to extract the variable A, or
76/// was used as storage of variable A.
77///
78/// Sets may also have attributes (as noted above). These attributes are
79/// generally used for noting whether a variable in the set has interacted with
80/// a variable whose origins we don't quite know (i.e. globals/arguments), or if
81/// the variable may have had operations performed on it (modified in a function
82/// call). All attributes that exist in a set A must exist in all sets marked as
83/// below set A.
84template <typename T> class StratifiedSets {
85public:
86  StratifiedSets() = default;
87  StratifiedSets(StratifiedSets &&) = default;
88  StratifiedSets &operator=(StratifiedSets &&) = default;
89
90  StratifiedSets(DenseMap<T, StratifiedInfo> Map,
91                 std::vector<StratifiedLink> Links)
92      : Values(std::move(Map)), Links(std::move(Links)) {}
93
94  Optional<StratifiedInfo> find(const T &Elem) const {
95    auto Iter = Values.find(Elem);
96    if (Iter == Values.end())
97      return None;
98    return Iter->second;
99  }
100
101  const StratifiedLink &getLink(StratifiedIndex Index) const {
102    assert(inbounds(Index));
103    return Links[Index];
104  }
105
106private:
107  DenseMap<T, StratifiedInfo> Values;
108  std::vector<StratifiedLink> Links;
109
110  bool inbounds(StratifiedIndex Idx) const { return Idx < Links.size(); }
111};
112
113/// Generic Builder class that produces StratifiedSets instances.
114///
115/// The goal of this builder is to efficiently produce correct StratifiedSets
116/// instances. To this end, we use a few tricks:
117///   > Set chains (A method for linking sets together)
118///   > Set remaps (A method for marking a set as an alias [irony?] of another)
119///
120/// ==== Set chains ====
121/// This builder has a notion of some value A being above, below, or with some
122/// other value B:
123///   > The `A above B` relationship implies that there is a reference edge
124///   going from A to B. Namely, it notes that A can store anything in B's set.
125///   > The `A below B` relationship is the opposite of `A above B`. It implies
126///   that there's a dereference edge going from A to B.
127///   > The `A with B` relationship states that there's an assignment edge going
128///   from A to B, and that A and B should be treated as equals.
129///
130/// As an example, take the following code snippet:
131///
132/// %a = alloca i32, align 4
133/// %ap = alloca i32*, align 8
134/// %app = alloca i32**, align 8
135/// store %a, %ap
136/// store %ap, %app
137/// %aw = getelementptr %ap, i32 0
138///
139/// Given this, the following relations exist:
140///   - %a below %ap & %ap above %a
141///   - %ap below %app & %app above %ap
142///   - %aw with %ap & %ap with %aw
143///
144/// These relations produce the following sets:
145///   [{%a}, {%ap, %aw}, {%app}]
146///
147/// ...Which state that the only MayAlias relationship in the above program is
148/// between %ap and %aw.
149///
150/// Because LLVM allows arbitrary casts, code like the following needs to be
151/// supported:
152///   %ip = alloca i64, align 8
153///   %ipp = alloca i64*, align 8
154///   %i = bitcast i64** ipp to i64
155///   store i64* %ip, i64** %ipp
156///   store i64 %i, i64* %ip
157///
158/// Which, because %ipp ends up *both* above and below %ip, is fun.
159///
160/// This is solved by merging %i and %ipp into a single set (...which is the
161/// only way to solve this, since their bit patterns are equivalent). Any sets
162/// that ended up in between %i and %ipp at the time of merging (in this case,
163/// the set containing %ip) also get conservatively merged into the set of %i
164/// and %ipp. In short, the resulting StratifiedSet from the above code would be
165/// {%ip, %ipp, %i}.
166///
167/// ==== Set remaps ====
168/// More of an implementation detail than anything -- when merging sets, we need
169/// to update the numbers of all of the elements mapped to those sets. Rather
170/// than doing this at each merge, we note in the BuilderLink structure that a
171/// remap has occurred, and use this information so we can defer renumbering set
172/// elements until build time.
173template <typename T> class StratifiedSetsBuilder {
174  /// Represents a Stratified Set, with information about the Stratified
175  /// Set above it, the set below it, and whether the current set has been
176  /// remapped to another.
177  struct BuilderLink {
178    const StratifiedIndex Number;
179
180    BuilderLink(StratifiedIndex N) : Number(N) {
181      Remap = StratifiedLink::SetSentinel;
182    }
183
184    bool hasAbove() const {
185      assert(!isRemapped());
186      return Link.hasAbove();
187    }
188
189    bool hasBelow() const {
190      assert(!isRemapped());
191      return Link.hasBelow();
192    }
193
194    void setBelow(StratifiedIndex I) {
195      assert(!isRemapped());
196      Link.Below = I;
197    }
198
199    void setAbove(StratifiedIndex I) {
200      assert(!isRemapped());
201      Link.Above = I;
202    }
203
204    void clearBelow() {
205      assert(!isRemapped());
206      Link.clearBelow();
207    }
208
209    void clearAbove() {
210      assert(!isRemapped());
211      Link.clearAbove();
212    }
213
214    StratifiedIndex getBelow() const {
215      assert(!isRemapped());
216      assert(hasBelow());
217      return Link.Below;
218    }
219
220    StratifiedIndex getAbove() const {
221      assert(!isRemapped());
222      assert(hasAbove());
223      return Link.Above;
224    }
225
226    AliasAttrs getAttrs() {
227      assert(!isRemapped());
228      return Link.Attrs;
229    }
230
231    void setAttrs(AliasAttrs Other) {
232      assert(!isRemapped());
233      Link.Attrs |= Other;
234    }
235
236    bool isRemapped() const { return Remap != StratifiedLink::SetSentinel; }
237
238    /// For initial remapping to another set
239    void remapTo(StratifiedIndex Other) {
240      assert(!isRemapped());
241      Remap = Other;
242    }
243
244    StratifiedIndex getRemapIndex() const {
245      assert(isRemapped());
246      return Remap;
247    }
248
249    /// Should only be called when we're already remapped.
250    void updateRemap(StratifiedIndex Other) {
251      assert(isRemapped());
252      Remap = Other;
253    }
254
255    /// Prefer the above functions to calling things directly on what's returned
256    /// from this -- they guard against unexpected calls when the current
257    /// BuilderLink is remapped.
258    const StratifiedLink &getLink() const { return Link; }
259
260  private:
261    StratifiedLink Link;
262    StratifiedIndex Remap;
263  };
264
265  /// This function performs all of the set unioning/value renumbering
266  /// that we've been putting off, and generates a vector<StratifiedLink> that
267  /// may be placed in a StratifiedSets instance.
268  void finalizeSets(std::vector<StratifiedLink> &StratLinks) {
269    DenseMap<StratifiedIndex, StratifiedIndex> Remaps;
270    for (auto &Link : Links) {
271      if (Link.isRemapped())
272        continue;
273
274      StratifiedIndex Number = StratLinks.size();
275      Remaps.insert(std::make_pair(Link.Number, Number));
276      StratLinks.push_back(Link.getLink());
277    }
278
279    for (auto &Link : StratLinks) {
280      if (Link.hasAbove()) {
281        auto &Above = linksAt(Link.Above);
282        auto Iter = Remaps.find(Above.Number);
283        assert(Iter != Remaps.end());
284        Link.Above = Iter->second;
285      }
286
287      if (Link.hasBelow()) {
288        auto &Below = linksAt(Link.Below);
289        auto Iter = Remaps.find(Below.Number);
290        assert(Iter != Remaps.end());
291        Link.Below = Iter->second;
292      }
293    }
294
295    for (auto &Pair : Values) {
296      auto &Info = Pair.second;
297      auto &Link = linksAt(Info.Index);
298      auto Iter = Remaps.find(Link.Number);
299      assert(Iter != Remaps.end());
300      Info.Index = Iter->second;
301    }
302  }
303
304  /// There's a guarantee in StratifiedLink where all bits set in a
305  /// Link.externals will be set in all Link.externals "below" it.
306  static void propagateAttrs(std::vector<StratifiedLink> &Links) {
307    const auto getHighestParentAbove = [&Links](StratifiedIndex Idx) {
308      const auto *Link = &Links[Idx];
309      while (Link->hasAbove()) {
310        Idx = Link->Above;
311        Link = &Links[Idx];
312      }
313      return Idx;
314    };
315
316    SmallSet<StratifiedIndex, 16> Visited;
317    for (unsigned I = 0, E = Links.size(); I < E; ++I) {
318      auto CurrentIndex = getHighestParentAbove(I);
319      if (!Visited.insert(CurrentIndex).second)
320        continue;
321
322      while (Links[CurrentIndex].hasBelow()) {
323        auto &CurrentBits = Links[CurrentIndex].Attrs;
324        auto NextIndex = Links[CurrentIndex].Below;
325        auto &NextBits = Links[NextIndex].Attrs;
326        NextBits |= CurrentBits;
327        CurrentIndex = NextIndex;
328      }
329    }
330  }
331
332public:
333  /// Builds a StratifiedSet from the information we've been given since either
334  /// construction or the prior build() call.
335  StratifiedSets<T> build() {
336    std::vector<StratifiedLink> StratLinks;
337    finalizeSets(StratLinks);
338    propagateAttrs(StratLinks);
339    Links.clear();
340    return StratifiedSets<T>(std::move(Values), std::move(StratLinks));
341  }
342
343  bool has(const T &Elem) const { return get(Elem).hasValue(); }
344
345  bool add(const T &Main) {
346    if (get(Main).hasValue())
347      return false;
348
349    auto NewIndex = getNewUnlinkedIndex();
350    return addAtMerging(Main, NewIndex);
351  }
352
353  /// Restructures the stratified sets as necessary to make "ToAdd" in a
354  /// set above "Main". There are some cases where this is not possible (see
355  /// above), so we merge them such that ToAdd and Main are in the same set.
356  bool addAbove(const T &Main, const T &ToAdd) {
357    assert(has(Main));
358    auto Index = *indexOf(Main);
359    if (!linksAt(Index).hasAbove())
360      addLinkAbove(Index);
361
362    auto Above = linksAt(Index).getAbove();
363    return addAtMerging(ToAdd, Above);
364  }
365
366  /// Restructures the stratified sets as necessary to make "ToAdd" in a
367  /// set below "Main". There are some cases where this is not possible (see
368  /// above), so we merge them such that ToAdd and Main are in the same set.
369  bool addBelow(const T &Main, const T &ToAdd) {
370    assert(has(Main));
371    auto Index = *indexOf(Main);
372    if (!linksAt(Index).hasBelow())
373      addLinkBelow(Index);
374
375    auto Below = linksAt(Index).getBelow();
376    return addAtMerging(ToAdd, Below);
377  }
378
379  bool addWith(const T &Main, const T &ToAdd) {
380    assert(has(Main));
381    auto MainIndex = *indexOf(Main);
382    return addAtMerging(ToAdd, MainIndex);
383  }
384
385  void noteAttributes(const T &Main, AliasAttrs NewAttrs) {
386    assert(has(Main));
387    auto *Info = *get(Main);
388    auto &Link = linksAt(Info->Index);
389    Link.setAttrs(NewAttrs);
390  }
391
392private:
393  DenseMap<T, StratifiedInfo> Values;
394  std::vector<BuilderLink> Links;
395
396  /// Adds the given element at the given index, merging sets if necessary.
397  bool addAtMerging(const T &ToAdd, StratifiedIndex Index) {
398    StratifiedInfo Info = {Index};
399    auto Pair = Values.insert(std::make_pair(ToAdd, Info));
400    if (Pair.second)
401      return true;
402
403    auto &Iter = Pair.first;
404    auto &IterSet = linksAt(Iter->second.Index);
405    auto &ReqSet = linksAt(Index);
406
407    // Failed to add where we wanted to. Merge the sets.
408    if (&IterSet != &ReqSet)
409      merge(IterSet.Number, ReqSet.Number);
410
411    return false;
412  }
413
414  /// Gets the BuilderLink at the given index, taking set remapping into
415  /// account.
416  BuilderLink &linksAt(StratifiedIndex Index) {
417    auto *Start = &Links[Index];
418    if (!Start->isRemapped())
419      return *Start;
420
421    auto *Current = Start;
422    while (Current->isRemapped())
423      Current = &Links[Current->getRemapIndex()];
424
425    auto NewRemap = Current->Number;
426
427    // Run through everything that has yet to be updated, and update them to
428    // remap to NewRemap
429    Current = Start;
430    while (Current->isRemapped()) {
431      auto *Next = &Links[Current->getRemapIndex()];
432      Current->updateRemap(NewRemap);
433      Current = Next;
434    }
435
436    return *Current;
437  }
438
439  /// Merges two sets into one another. Assumes that these sets are not
440  /// already one in the same.
441  void merge(StratifiedIndex Idx1, StratifiedIndex Idx2) {
442    assert(inbounds(Idx1) && inbounds(Idx2));
443    assert(&linksAt(Idx1) != &linksAt(Idx2) &&
444           "Merging a set into itself is not allowed");
445
446    // CASE 1: If the set at `Idx1` is above or below `Idx2`, we need to merge
447    // both the
448    // given sets, and all sets between them, into one.
449    if (tryMergeUpwards(Idx1, Idx2))
450      return;
451
452    if (tryMergeUpwards(Idx2, Idx1))
453      return;
454
455    // CASE 2: The set at `Idx1` is not in the same chain as the set at `Idx2`.
456    // We therefore need to merge the two chains together.
457    mergeDirect(Idx1, Idx2);
458  }
459
460  /// Merges two sets assuming that the set at `Idx1` is unreachable from
461  /// traversing above or below the set at `Idx2`.
462  void mergeDirect(StratifiedIndex Idx1, StratifiedIndex Idx2) {
463    assert(inbounds(Idx1) && inbounds(Idx2));
464
465    auto *LinksInto = &linksAt(Idx1);
466    auto *LinksFrom = &linksAt(Idx2);
467    // Merging everything above LinksInto then proceeding to merge everything
468    // below LinksInto becomes problematic, so we go as far "up" as possible!
469    while (LinksInto->hasAbove() && LinksFrom->hasAbove()) {
470      LinksInto = &linksAt(LinksInto->getAbove());
471      LinksFrom = &linksAt(LinksFrom->getAbove());
472    }
473
474    if (LinksFrom->hasAbove()) {
475      LinksInto->setAbove(LinksFrom->getAbove());
476      auto &NewAbove = linksAt(LinksInto->getAbove());
477      NewAbove.setBelow(LinksInto->Number);
478    }
479
480    // Merging strategy:
481    //  > If neither has links below, stop.
482    //  > If only `LinksInto` has links below, stop.
483    //  > If only `LinksFrom` has links below, reset `LinksInto.Below` to
484    //  match `LinksFrom.Below`
485    //  > If both have links above, deal with those next.
486    while (LinksInto->hasBelow() && LinksFrom->hasBelow()) {
487      auto FromAttrs = LinksFrom->getAttrs();
488      LinksInto->setAttrs(FromAttrs);
489
490      // Remap needs to happen after getBelow(), but before
491      // assignment of LinksFrom
492      auto *NewLinksFrom = &linksAt(LinksFrom->getBelow());
493      LinksFrom->remapTo(LinksInto->Number);
494      LinksFrom = NewLinksFrom;
495      LinksInto = &linksAt(LinksInto->getBelow());
496    }
497
498    if (LinksFrom->hasBelow()) {
499      LinksInto->setBelow(LinksFrom->getBelow());
500      auto &NewBelow = linksAt(LinksInto->getBelow());
501      NewBelow.setAbove(LinksInto->Number);
502    }
503
504    LinksInto->setAttrs(LinksFrom->getAttrs());
505    LinksFrom->remapTo(LinksInto->Number);
506  }
507
508  /// Checks to see if lowerIndex is at a level lower than upperIndex. If so, it
509  /// will merge lowerIndex with upperIndex (and all of the sets between) and
510  /// return true. Otherwise, it will return false.
511  bool tryMergeUpwards(StratifiedIndex LowerIndex, StratifiedIndex UpperIndex) {
512    assert(inbounds(LowerIndex) && inbounds(UpperIndex));
513    auto *Lower = &linksAt(LowerIndex);
514    auto *Upper = &linksAt(UpperIndex);
515    if (Lower == Upper)
516      return true;
517
518    SmallVector<BuilderLink *, 8> Found;
519    auto *Current = Lower;
520    auto Attrs = Current->getAttrs();
521    while (Current->hasAbove() && Current != Upper) {
522      Found.push_back(Current);
523      Attrs |= Current->getAttrs();
524      Current = &linksAt(Current->getAbove());
525    }
526
527    if (Current != Upper)
528      return false;
529
530    Upper->setAttrs(Attrs);
531
532    if (Lower->hasBelow()) {
533      auto NewBelowIndex = Lower->getBelow();
534      Upper->setBelow(NewBelowIndex);
535      auto &NewBelow = linksAt(NewBelowIndex);
536      NewBelow.setAbove(UpperIndex);
537    } else {
538      Upper->clearBelow();
539    }
540
541    for (const auto &Ptr : Found)
542      Ptr->remapTo(Upper->Number);
543
544    return true;
545  }
546
547  Optional<const StratifiedInfo *> get(const T &Val) const {
548    auto Result = Values.find(Val);
549    if (Result == Values.end())
550      return None;
551    return &Result->second;
552  }
553
554  Optional<StratifiedInfo *> get(const T &Val) {
555    auto Result = Values.find(Val);
556    if (Result == Values.end())
557      return None;
558    return &Result->second;
559  }
560
561  Optional<StratifiedIndex> indexOf(const T &Val) {
562    auto MaybeVal = get(Val);
563    if (!MaybeVal.hasValue())
564      return None;
565    auto *Info = *MaybeVal;
566    auto &Link = linksAt(Info->Index);
567    return Link.Number;
568  }
569
570  StratifiedIndex addLinkBelow(StratifiedIndex Set) {
571    auto At = addLinks();
572    Links[Set].setBelow(At);
573    Links[At].setAbove(Set);
574    return At;
575  }
576
577  StratifiedIndex addLinkAbove(StratifiedIndex Set) {
578    auto At = addLinks();
579    Links[At].setBelow(Set);
580    Links[Set].setAbove(At);
581    return At;
582  }
583
584  StratifiedIndex getNewUnlinkedIndex() { return addLinks(); }
585
586  StratifiedIndex addLinks() {
587    auto Link = Links.size();
588    Links.push_back(BuilderLink(Link));
589    return Link;
590  }
591
592  bool inbounds(StratifiedIndex N) const { return N < Links.size(); }
593};
594}
595}
596#endif // LLVM_ADT_STRATIFIEDSETS_H
597