1//===- VPlan.h - Represent A Vectorizer Plan --------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file contains the declarations of the Vectorization Plan base classes:
11/// 1. VPBasicBlock and VPRegionBlock that inherit from a common pure virtual
12///    VPBlockBase, together implementing a Hierarchical CFG;
13/// 2. Specializations of GraphTraits that allow VPBlockBase graphs to be
14///    treated as proper graphs for generic algorithms;
15/// 3. Pure virtual VPRecipeBase serving as the base class for recipes contained
16///    within VPBasicBlocks;
17/// 4. VPInstruction, a concrete Recipe and VPUser modeling a single planned
18///    instruction;
19/// 5. The VPlan class holding a candidate for vectorization;
20/// 6. The VPlanPrinter class providing a way to print a plan in dot format;
21/// These are documented in docs/VectorizationPlan.rst.
22//
23//===----------------------------------------------------------------------===//
24
25#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
26#define LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
27
28#include "VPlanLoopInfo.h"
29#include "VPlanValue.h"
30#include "llvm/ADT/DenseMap.h"
31#include "llvm/ADT/DepthFirstIterator.h"
32#include "llvm/ADT/GraphTraits.h"
33#include "llvm/ADT/Optional.h"
34#include "llvm/ADT/SmallBitVector.h"
35#include "llvm/ADT/SmallPtrSet.h"
36#include "llvm/ADT/SmallSet.h"
37#include "llvm/ADT/SmallVector.h"
38#include "llvm/ADT/Twine.h"
39#include "llvm/ADT/ilist.h"
40#include "llvm/ADT/ilist_node.h"
41#include "llvm/Analysis/VectorUtils.h"
42#include "llvm/IR/IRBuilder.h"
43#include <algorithm>
44#include <cassert>
45#include <cstddef>
46#include <map>
47#include <string>
48
49namespace llvm {
50
51class BasicBlock;
52class DominatorTree;
53class InnerLoopVectorizer;
54template <class T> class InterleaveGroup;
55class LoopInfo;
56class raw_ostream;
57class Value;
58class VPBasicBlock;
59class VPRegionBlock;
60class VPSlotTracker;
61class VPlan;
62class VPlanSlp;
63
64/// A range of powers-of-2 vectorization factors with fixed start and
65/// adjustable end. The range includes start and excludes end, e.g.,:
66/// [1, 9) = {1, 2, 4, 8}
67struct VFRange {
68  // A power of 2.
69  const unsigned Start;
70
71  // Need not be a power of 2. If End <= Start range is empty.
72  unsigned End;
73};
74
75using VPlanPtr = std::unique_ptr<VPlan>;
76
77/// In what follows, the term "input IR" refers to code that is fed into the
78/// vectorizer whereas the term "output IR" refers to code that is generated by
79/// the vectorizer.
80
81/// VPIteration represents a single point in the iteration space of the output
82/// (vectorized and/or unrolled) IR loop.
83struct VPIteration {
84  /// in [0..UF)
85  unsigned Part;
86
87  /// in [0..VF)
88  unsigned Lane;
89};
90
91/// This is a helper struct for maintaining vectorization state. It's used for
92/// mapping values from the original loop to their corresponding values in
93/// the new loop. Two mappings are maintained: one for vectorized values and
94/// one for scalarized values. Vectorized values are represented with UF
95/// vector values in the new loop, and scalarized values are represented with
96/// UF x VF scalar values in the new loop. UF and VF are the unroll and
97/// vectorization factors, respectively.
98///
99/// Entries can be added to either map with setVectorValue and setScalarValue,
100/// which assert that an entry was not already added before. If an entry is to
101/// replace an existing one, call resetVectorValue and resetScalarValue. This is
102/// currently needed to modify the mapped values during "fix-up" operations that
103/// occur once the first phase of widening is complete. These operations include
104/// type truncation and the second phase of recurrence widening.
105///
106/// Entries from either map can be retrieved using the getVectorValue and
107/// getScalarValue functions, which assert that the desired value exists.
108struct VectorizerValueMap {
109  friend struct VPTransformState;
110
111private:
112  /// The unroll factor. Each entry in the vector map contains UF vector values.
113  unsigned UF;
114
115  /// The vectorization factor. Each entry in the scalar map contains UF x VF
116  /// scalar values.
117  unsigned VF;
118
119  /// The vector and scalar map storage. We use std::map and not DenseMap
120  /// because insertions to DenseMap invalidate its iterators.
121  using VectorParts = SmallVector<Value *, 2>;
122  using ScalarParts = SmallVector<SmallVector<Value *, 4>, 2>;
123  std::map<Value *, VectorParts> VectorMapStorage;
124  std::map<Value *, ScalarParts> ScalarMapStorage;
125
126public:
127  /// Construct an empty map with the given unroll and vectorization factors.
128  VectorizerValueMap(unsigned UF, unsigned VF) : UF(UF), VF(VF) {}
129
130  /// \return True if the map has any vector entry for \p Key.
131  bool hasAnyVectorValue(Value *Key) const {
132    return VectorMapStorage.count(Key);
133  }
134
135  /// \return True if the map has a vector entry for \p Key and \p Part.
136  bool hasVectorValue(Value *Key, unsigned Part) const {
137    assert(Part < UF && "Queried Vector Part is too large.");
138    if (!hasAnyVectorValue(Key))
139      return false;
140    const VectorParts &Entry = VectorMapStorage.find(Key)->second;
141    assert(Entry.size() == UF && "VectorParts has wrong dimensions.");
142    return Entry[Part] != nullptr;
143  }
144
145  /// \return True if the map has any scalar entry for \p Key.
146  bool hasAnyScalarValue(Value *Key) const {
147    return ScalarMapStorage.count(Key);
148  }
149
150  /// \return True if the map has a scalar entry for \p Key and \p Instance.
151  bool hasScalarValue(Value *Key, const VPIteration &Instance) const {
152    assert(Instance.Part < UF && "Queried Scalar Part is too large.");
153    assert(Instance.Lane < VF && "Queried Scalar Lane is too large.");
154    if (!hasAnyScalarValue(Key))
155      return false;
156    const ScalarParts &Entry = ScalarMapStorage.find(Key)->second;
157    assert(Entry.size() == UF && "ScalarParts has wrong dimensions.");
158    assert(Entry[Instance.Part].size() == VF &&
159           "ScalarParts has wrong dimensions.");
160    return Entry[Instance.Part][Instance.Lane] != nullptr;
161  }
162
163  /// Retrieve the existing vector value that corresponds to \p Key and
164  /// \p Part.
165  Value *getVectorValue(Value *Key, unsigned Part) {
166    assert(hasVectorValue(Key, Part) && "Getting non-existent value.");
167    return VectorMapStorage[Key][Part];
168  }
169
170  /// Retrieve the existing scalar value that corresponds to \p Key and
171  /// \p Instance.
172  Value *getScalarValue(Value *Key, const VPIteration &Instance) {
173    assert(hasScalarValue(Key, Instance) && "Getting non-existent value.");
174    return ScalarMapStorage[Key][Instance.Part][Instance.Lane];
175  }
176
177  /// Set a vector value associated with \p Key and \p Part. Assumes such a
178  /// value is not already set. If it is, use resetVectorValue() instead.
179  void setVectorValue(Value *Key, unsigned Part, Value *Vector) {
180    assert(!hasVectorValue(Key, Part) && "Vector value already set for part");
181    if (!VectorMapStorage.count(Key)) {
182      VectorParts Entry(UF);
183      VectorMapStorage[Key] = Entry;
184    }
185    VectorMapStorage[Key][Part] = Vector;
186  }
187
188  /// Set a scalar value associated with \p Key and \p Instance. Assumes such a
189  /// value is not already set.
190  void setScalarValue(Value *Key, const VPIteration &Instance, Value *Scalar) {
191    assert(!hasScalarValue(Key, Instance) && "Scalar value already set");
192    if (!ScalarMapStorage.count(Key)) {
193      ScalarParts Entry(UF);
194      // TODO: Consider storing uniform values only per-part, as they occupy
195      //       lane 0 only, keeping the other VF-1 redundant entries null.
196      for (unsigned Part = 0; Part < UF; ++Part)
197        Entry[Part].resize(VF, nullptr);
198      ScalarMapStorage[Key] = Entry;
199    }
200    ScalarMapStorage[Key][Instance.Part][Instance.Lane] = Scalar;
201  }
202
203  /// Reset the vector value associated with \p Key for the given \p Part.
204  /// This function can be used to update values that have already been
205  /// vectorized. This is the case for "fix-up" operations including type
206  /// truncation and the second phase of recurrence vectorization.
207  void resetVectorValue(Value *Key, unsigned Part, Value *Vector) {
208    assert(hasVectorValue(Key, Part) && "Vector value not set for part");
209    VectorMapStorage[Key][Part] = Vector;
210  }
211
212  /// Reset the scalar value associated with \p Key for \p Part and \p Lane.
213  /// This function can be used to update values that have already been
214  /// scalarized. This is the case for "fix-up" operations including scalar phi
215  /// nodes for scalarized and predicated instructions.
216  void resetScalarValue(Value *Key, const VPIteration &Instance,
217                        Value *Scalar) {
218    assert(hasScalarValue(Key, Instance) &&
219           "Scalar value not set for part and lane");
220    ScalarMapStorage[Key][Instance.Part][Instance.Lane] = Scalar;
221  }
222};
223
224/// This class is used to enable the VPlan to invoke a method of ILV. This is
225/// needed until the method is refactored out of ILV and becomes reusable.
226struct VPCallback {
227  virtual ~VPCallback() {}
228  virtual Value *getOrCreateVectorValues(Value *V, unsigned Part) = 0;
229  virtual Value *getOrCreateScalarValue(Value *V,
230                                        const VPIteration &Instance) = 0;
231};
232
233/// VPTransformState holds information passed down when "executing" a VPlan,
234/// needed for generating the output IR.
235struct VPTransformState {
236  VPTransformState(unsigned VF, unsigned UF, LoopInfo *LI, DominatorTree *DT,
237                   IRBuilder<> &Builder, VectorizerValueMap &ValueMap,
238                   InnerLoopVectorizer *ILV, VPCallback &Callback)
239      : VF(VF), UF(UF), Instance(), LI(LI), DT(DT), Builder(Builder),
240        ValueMap(ValueMap), ILV(ILV), Callback(Callback) {}
241
242  /// The chosen Vectorization and Unroll Factors of the loop being vectorized.
243  unsigned VF;
244  unsigned UF;
245
246  /// Hold the indices to generate specific scalar instructions. Null indicates
247  /// that all instances are to be generated, using either scalar or vector
248  /// instructions.
249  Optional<VPIteration> Instance;
250
251  struct DataState {
252    /// A type for vectorized values in the new loop. Each value from the
253    /// original loop, when vectorized, is represented by UF vector values in
254    /// the new unrolled loop, where UF is the unroll factor.
255    typedef SmallVector<Value *, 2> PerPartValuesTy;
256
257    DenseMap<VPValue *, PerPartValuesTy> PerPartOutput;
258  } Data;
259
260  /// Get the generated Value for a given VPValue and a given Part. Note that
261  /// as some Defs are still created by ILV and managed in its ValueMap, this
262  /// method will delegate the call to ILV in such cases in order to provide
263  /// callers a consistent API.
264  /// \see set.
265  Value *get(VPValue *Def, unsigned Part) {
266    // If Values have been set for this Def return the one relevant for \p Part.
267    if (Data.PerPartOutput.count(Def))
268      return Data.PerPartOutput[Def][Part];
269    // Def is managed by ILV: bring the Values from ValueMap.
270    return Callback.getOrCreateVectorValues(VPValue2Value[Def], Part);
271  }
272
273  /// Get the generated Value for a given VPValue and given Part and Lane.
274  Value *get(VPValue *Def, const VPIteration &Instance) {
275    // If the Def is managed directly by VPTransformState, extract the lane from
276    // the relevant part. Note that currently only VPInstructions and external
277    // defs are managed by VPTransformState. Other Defs are still created by ILV
278    // and managed in its ValueMap. For those this method currently just
279    // delegates the call to ILV below.
280    if (Data.PerPartOutput.count(Def)) {
281      auto *VecPart = Data.PerPartOutput[Def][Instance.Part];
282      // TODO: Cache created scalar values.
283      return Builder.CreateExtractElement(VecPart,
284                                          Builder.getInt32(Instance.Lane));
285    }
286
287    return Callback.getOrCreateScalarValue(VPValue2Value[Def], Instance);
288  }
289
290  /// Set the generated Value for a given VPValue and a given Part.
291  void set(VPValue *Def, Value *V, unsigned Part) {
292    if (!Data.PerPartOutput.count(Def)) {
293      DataState::PerPartValuesTy Entry(UF);
294      Data.PerPartOutput[Def] = Entry;
295    }
296    Data.PerPartOutput[Def][Part] = V;
297  }
298
299  /// Hold state information used when constructing the CFG of the output IR,
300  /// traversing the VPBasicBlocks and generating corresponding IR BasicBlocks.
301  struct CFGState {
302    /// The previous VPBasicBlock visited. Initially set to null.
303    VPBasicBlock *PrevVPBB = nullptr;
304
305    /// The previous IR BasicBlock created or used. Initially set to the new
306    /// header BasicBlock.
307    BasicBlock *PrevBB = nullptr;
308
309    /// The last IR BasicBlock in the output IR. Set to the new latch
310    /// BasicBlock, used for placing the newly created BasicBlocks.
311    BasicBlock *LastBB = nullptr;
312
313    /// A mapping of each VPBasicBlock to the corresponding BasicBlock. In case
314    /// of replication, maps the BasicBlock of the last replica created.
315    SmallDenseMap<VPBasicBlock *, BasicBlock *> VPBB2IRBB;
316
317    /// Vector of VPBasicBlocks whose terminator instruction needs to be fixed
318    /// up at the end of vector code generation.
319    SmallVector<VPBasicBlock *, 8> VPBBsToFix;
320
321    CFGState() = default;
322  } CFG;
323
324  /// Hold a pointer to LoopInfo to register new basic blocks in the loop.
325  LoopInfo *LI;
326
327  /// Hold a pointer to Dominator Tree to register new basic blocks in the loop.
328  DominatorTree *DT;
329
330  /// Hold a reference to the IRBuilder used to generate output IR code.
331  IRBuilder<> &Builder;
332
333  /// Hold a reference to the Value state information used when generating the
334  /// Values of the output IR.
335  VectorizerValueMap &ValueMap;
336
337  /// Hold a reference to a mapping between VPValues in VPlan and original
338  /// Values they correspond to.
339  VPValue2ValueTy VPValue2Value;
340
341  /// Hold the canonical scalar IV of the vector loop (start=0, step=VF*UF).
342  Value *CanonicalIV = nullptr;
343
344  /// Hold the trip count of the scalar loop.
345  Value *TripCount = nullptr;
346
347  /// Hold a pointer to InnerLoopVectorizer to reuse its IR generation methods.
348  InnerLoopVectorizer *ILV;
349
350  VPCallback &Callback;
351};
352
353/// VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
354/// A VPBlockBase can be either a VPBasicBlock or a VPRegionBlock.
355class VPBlockBase {
356  friend class VPBlockUtils;
357
358  const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast).
359
360  /// An optional name for the block.
361  std::string Name;
362
363  /// The immediate VPRegionBlock which this VPBlockBase belongs to, or null if
364  /// it is a topmost VPBlockBase.
365  VPRegionBlock *Parent = nullptr;
366
367  /// List of predecessor blocks.
368  SmallVector<VPBlockBase *, 1> Predecessors;
369
370  /// List of successor blocks.
371  SmallVector<VPBlockBase *, 1> Successors;
372
373  /// Successor selector, null for zero or single successor blocks.
374  VPValue *CondBit = nullptr;
375
376  /// Current block predicate - null if the block does not need a predicate.
377  VPValue *Predicate = nullptr;
378
379  /// VPlan containing the block. Can only be set on the entry block of the
380  /// plan.
381  VPlan *Plan = nullptr;
382
383  /// Add \p Successor as the last successor to this block.
384  void appendSuccessor(VPBlockBase *Successor) {
385    assert(Successor && "Cannot add nullptr successor!");
386    Successors.push_back(Successor);
387  }
388
389  /// Add \p Predecessor as the last predecessor to this block.
390  void appendPredecessor(VPBlockBase *Predecessor) {
391    assert(Predecessor && "Cannot add nullptr predecessor!");
392    Predecessors.push_back(Predecessor);
393  }
394
395  /// Remove \p Predecessor from the predecessors of this block.
396  void removePredecessor(VPBlockBase *Predecessor) {
397    auto Pos = std::find(Predecessors.begin(), Predecessors.end(), Predecessor);
398    assert(Pos && "Predecessor does not exist");
399    Predecessors.erase(Pos);
400  }
401
402  /// Remove \p Successor from the successors of this block.
403  void removeSuccessor(VPBlockBase *Successor) {
404    auto Pos = std::find(Successors.begin(), Successors.end(), Successor);
405    assert(Pos && "Successor does not exist");
406    Successors.erase(Pos);
407  }
408
409protected:
410  VPBlockBase(const unsigned char SC, const std::string &N)
411      : SubclassID(SC), Name(N) {}
412
413public:
414  /// An enumeration for keeping track of the concrete subclass of VPBlockBase
415  /// that are actually instantiated. Values of this enumeration are kept in the
416  /// SubclassID field of the VPBlockBase objects. They are used for concrete
417  /// type identification.
418  using VPBlockTy = enum { VPBasicBlockSC, VPRegionBlockSC };
419
420  using VPBlocksTy = SmallVectorImpl<VPBlockBase *>;
421
422  virtual ~VPBlockBase() = default;
423
424  const std::string &getName() const { return Name; }
425
426  void setName(const Twine &newName) { Name = newName.str(); }
427
428  /// \return an ID for the concrete type of this object.
429  /// This is used to implement the classof checks. This should not be used
430  /// for any other purpose, as the values may change as LLVM evolves.
431  unsigned getVPBlockID() const { return SubclassID; }
432
433  VPRegionBlock *getParent() { return Parent; }
434  const VPRegionBlock *getParent() const { return Parent; }
435
436  /// \return A pointer to the plan containing the current block.
437  VPlan *getPlan();
438  const VPlan *getPlan() const;
439
440  /// Sets the pointer of the plan containing the block. The block must be the
441  /// entry block into the VPlan.
442  void setPlan(VPlan *ParentPlan);
443
444  void setParent(VPRegionBlock *P) { Parent = P; }
445
446  /// \return the VPBasicBlock that is the entry of this VPBlockBase,
447  /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
448  /// VPBlockBase is a VPBasicBlock, it is returned.
449  const VPBasicBlock *getEntryBasicBlock() const;
450  VPBasicBlock *getEntryBasicBlock();
451
452  /// \return the VPBasicBlock that is the exit of this VPBlockBase,
453  /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
454  /// VPBlockBase is a VPBasicBlock, it is returned.
455  const VPBasicBlock *getExitBasicBlock() const;
456  VPBasicBlock *getExitBasicBlock();
457
458  const VPBlocksTy &getSuccessors() const { return Successors; }
459  VPBlocksTy &getSuccessors() { return Successors; }
460
461  const VPBlocksTy &getPredecessors() const { return Predecessors; }
462  VPBlocksTy &getPredecessors() { return Predecessors; }
463
464  /// \return the successor of this VPBlockBase if it has a single successor.
465  /// Otherwise return a null pointer.
466  VPBlockBase *getSingleSuccessor() const {
467    return (Successors.size() == 1 ? *Successors.begin() : nullptr);
468  }
469
470  /// \return the predecessor of this VPBlockBase if it has a single
471  /// predecessor. Otherwise return a null pointer.
472  VPBlockBase *getSinglePredecessor() const {
473    return (Predecessors.size() == 1 ? *Predecessors.begin() : nullptr);
474  }
475
476  size_t getNumSuccessors() const { return Successors.size(); }
477  size_t getNumPredecessors() const { return Predecessors.size(); }
478
479  /// An Enclosing Block of a block B is any block containing B, including B
480  /// itself. \return the closest enclosing block starting from "this", which
481  /// has successors. \return the root enclosing block if all enclosing blocks
482  /// have no successors.
483  VPBlockBase *getEnclosingBlockWithSuccessors();
484
485  /// \return the closest enclosing block starting from "this", which has
486  /// predecessors. \return the root enclosing block if all enclosing blocks
487  /// have no predecessors.
488  VPBlockBase *getEnclosingBlockWithPredecessors();
489
490  /// \return the successors either attached directly to this VPBlockBase or, if
491  /// this VPBlockBase is the exit block of a VPRegionBlock and has no
492  /// successors of its own, search recursively for the first enclosing
493  /// VPRegionBlock that has successors and return them. If no such
494  /// VPRegionBlock exists, return the (empty) successors of the topmost
495  /// VPBlockBase reached.
496  const VPBlocksTy &getHierarchicalSuccessors() {
497    return getEnclosingBlockWithSuccessors()->getSuccessors();
498  }
499
500  /// \return the hierarchical successor of this VPBlockBase if it has a single
501  /// hierarchical successor. Otherwise return a null pointer.
502  VPBlockBase *getSingleHierarchicalSuccessor() {
503    return getEnclosingBlockWithSuccessors()->getSingleSuccessor();
504  }
505
506  /// \return the predecessors either attached directly to this VPBlockBase or,
507  /// if this VPBlockBase is the entry block of a VPRegionBlock and has no
508  /// predecessors of its own, search recursively for the first enclosing
509  /// VPRegionBlock that has predecessors and return them. If no such
510  /// VPRegionBlock exists, return the (empty) predecessors of the topmost
511  /// VPBlockBase reached.
512  const VPBlocksTy &getHierarchicalPredecessors() {
513    return getEnclosingBlockWithPredecessors()->getPredecessors();
514  }
515
516  /// \return the hierarchical predecessor of this VPBlockBase if it has a
517  /// single hierarchical predecessor. Otherwise return a null pointer.
518  VPBlockBase *getSingleHierarchicalPredecessor() {
519    return getEnclosingBlockWithPredecessors()->getSinglePredecessor();
520  }
521
522  /// \return the condition bit selecting the successor.
523  VPValue *getCondBit() { return CondBit; }
524
525  const VPValue *getCondBit() const { return CondBit; }
526
527  void setCondBit(VPValue *CV) { CondBit = CV; }
528
529  VPValue *getPredicate() { return Predicate; }
530
531  const VPValue *getPredicate() const { return Predicate; }
532
533  void setPredicate(VPValue *Pred) { Predicate = Pred; }
534
535  /// Set a given VPBlockBase \p Successor as the single successor of this
536  /// VPBlockBase. This VPBlockBase is not added as predecessor of \p Successor.
537  /// This VPBlockBase must have no successors.
538  void setOneSuccessor(VPBlockBase *Successor) {
539    assert(Successors.empty() && "Setting one successor when others exist.");
540    appendSuccessor(Successor);
541  }
542
543  /// Set two given VPBlockBases \p IfTrue and \p IfFalse to be the two
544  /// successors of this VPBlockBase. \p Condition is set as the successor
545  /// selector. This VPBlockBase is not added as predecessor of \p IfTrue or \p
546  /// IfFalse. This VPBlockBase must have no successors.
547  void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse,
548                        VPValue *Condition) {
549    assert(Successors.empty() && "Setting two successors when others exist.");
550    assert(Condition && "Setting two successors without condition!");
551    CondBit = Condition;
552    appendSuccessor(IfTrue);
553    appendSuccessor(IfFalse);
554  }
555
556  /// Set each VPBasicBlock in \p NewPreds as predecessor of this VPBlockBase.
557  /// This VPBlockBase must have no predecessors. This VPBlockBase is not added
558  /// as successor of any VPBasicBlock in \p NewPreds.
559  void setPredecessors(ArrayRef<VPBlockBase *> NewPreds) {
560    assert(Predecessors.empty() && "Block predecessors already set.");
561    for (auto *Pred : NewPreds)
562      appendPredecessor(Pred);
563  }
564
565  /// Remove all the predecessor of this block.
566  void clearPredecessors() { Predecessors.clear(); }
567
568  /// Remove all the successors of this block and set to null its condition bit
569  void clearSuccessors() {
570    Successors.clear();
571    CondBit = nullptr;
572  }
573
574  /// The method which generates the output IR that correspond to this
575  /// VPBlockBase, thereby "executing" the VPlan.
576  virtual void execute(struct VPTransformState *State) = 0;
577
578  /// Delete all blocks reachable from a given VPBlockBase, inclusive.
579  static void deleteCFG(VPBlockBase *Entry);
580
581  void printAsOperand(raw_ostream &OS, bool PrintType) const {
582    OS << getName();
583  }
584
585  void print(raw_ostream &OS) const {
586    // TODO: Only printing VPBB name for now since we only have dot printing
587    // support for VPInstructions/Recipes.
588    printAsOperand(OS, false);
589  }
590
591  /// Return true if it is legal to hoist instructions into this block.
592  bool isLegalToHoistInto() {
593    // There are currently no constraints that prevent an instruction to be
594    // hoisted into a VPBlockBase.
595    return true;
596  }
597};
598
599/// VPRecipeBase is a base class modeling a sequence of one or more output IR
600/// instructions.
601class VPRecipeBase : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock> {
602  friend VPBasicBlock;
603  friend class VPBlockUtils;
604
605  const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast).
606
607  /// Each VPRecipe belongs to a single VPBasicBlock.
608  VPBasicBlock *Parent = nullptr;
609
610public:
611  /// An enumeration for keeping track of the concrete subclass of VPRecipeBase
612  /// that is actually instantiated. Values of this enumeration are kept in the
613  /// SubclassID field of the VPRecipeBase objects. They are used for concrete
614  /// type identification.
615  using VPRecipeTy = enum {
616    VPBlendSC,
617    VPBranchOnMaskSC,
618    VPInstructionSC,
619    VPInterleaveSC,
620    VPPredInstPHISC,
621    VPReplicateSC,
622    VPWidenCallSC,
623    VPWidenCanonicalIVSC,
624    VPWidenGEPSC,
625    VPWidenIntOrFpInductionSC,
626    VPWidenMemoryInstructionSC,
627    VPWidenPHISC,
628    VPWidenSC,
629    VPWidenSelectSC
630  };
631
632  VPRecipeBase(const unsigned char SC) : SubclassID(SC) {}
633  virtual ~VPRecipeBase() = default;
634
635  /// \return an ID for the concrete type of this object.
636  /// This is used to implement the classof checks. This should not be used
637  /// for any other purpose, as the values may change as LLVM evolves.
638  unsigned getVPRecipeID() const { return SubclassID; }
639
640  /// \return the VPBasicBlock which this VPRecipe belongs to.
641  VPBasicBlock *getParent() { return Parent; }
642  const VPBasicBlock *getParent() const { return Parent; }
643
644  /// The method which generates the output IR instructions that correspond to
645  /// this VPRecipe, thereby "executing" the VPlan.
646  virtual void execute(struct VPTransformState &State) = 0;
647
648  /// Each recipe prints itself.
649  virtual void print(raw_ostream &O, const Twine &Indent,
650                     VPSlotTracker &SlotTracker) const = 0;
651
652  /// Insert an unlinked recipe into a basic block immediately before
653  /// the specified recipe.
654  void insertBefore(VPRecipeBase *InsertPos);
655
656  /// Insert an unlinked Recipe into a basic block immediately after
657  /// the specified Recipe.
658  void insertAfter(VPRecipeBase *InsertPos);
659
660  /// Unlink this recipe from its current VPBasicBlock and insert it into
661  /// the VPBasicBlock that MovePos lives in, right after MovePos.
662  void moveAfter(VPRecipeBase *MovePos);
663
664  /// This method unlinks 'this' from the containing basic block, but does not
665  /// delete it.
666  void removeFromParent();
667
668  /// This method unlinks 'this' from the containing basic block and deletes it.
669  ///
670  /// \returns an iterator pointing to the element after the erased one
671  iplist<VPRecipeBase>::iterator eraseFromParent();
672};
673
674/// This is a concrete Recipe that models a single VPlan-level instruction.
675/// While as any Recipe it may generate a sequence of IR instructions when
676/// executed, these instructions would always form a single-def expression as
677/// the VPInstruction is also a single def-use vertex.
678class VPInstruction : public VPUser, public VPRecipeBase {
679  friend class VPlanSlp;
680
681public:
682  /// VPlan opcodes, extending LLVM IR with idiomatics instructions.
683  enum {
684    Not = Instruction::OtherOpsEnd + 1,
685    ICmpULE,
686    SLPLoad,
687    SLPStore,
688    ActiveLaneMask,
689  };
690
691private:
692  typedef unsigned char OpcodeTy;
693  OpcodeTy Opcode;
694
695  /// Utility method serving execute(): generates a single instance of the
696  /// modeled instruction.
697  void generateInstruction(VPTransformState &State, unsigned Part);
698
699protected:
700  Instruction *getUnderlyingInstr() {
701    return cast_or_null<Instruction>(getUnderlyingValue());
702  }
703
704  void setUnderlyingInstr(Instruction *I) { setUnderlyingValue(I); }
705
706public:
707  VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands)
708      : VPUser(VPValue::VPInstructionSC, Operands),
709        VPRecipeBase(VPRecipeBase::VPInstructionSC), Opcode(Opcode) {}
710
711  VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands)
712      : VPInstruction(Opcode, ArrayRef<VPValue *>(Operands)) {}
713
714  /// Method to support type inquiry through isa, cast, and dyn_cast.
715  static inline bool classof(const VPValue *V) {
716    return V->getVPValueID() == VPValue::VPInstructionSC;
717  }
718
719  VPInstruction *clone() const {
720    SmallVector<VPValue *, 2> Operands(operands());
721    return new VPInstruction(Opcode, Operands);
722  }
723
724  /// Method to support type inquiry through isa, cast, and dyn_cast.
725  static inline bool classof(const VPRecipeBase *R) {
726    return R->getVPRecipeID() == VPRecipeBase::VPInstructionSC;
727  }
728
729  unsigned getOpcode() const { return Opcode; }
730
731  /// Generate the instruction.
732  /// TODO: We currently execute only per-part unless a specific instance is
733  /// provided.
734  void execute(VPTransformState &State) override;
735
736  /// Print the Recipe.
737  void print(raw_ostream &O, const Twine &Indent,
738             VPSlotTracker &SlotTracker) const override;
739
740  /// Print the VPInstruction.
741  void print(raw_ostream &O) const;
742  void print(raw_ostream &O, VPSlotTracker &SlotTracker) const;
743
744  /// Return true if this instruction may modify memory.
745  bool mayWriteToMemory() const {
746    // TODO: we can use attributes of the called function to rule out memory
747    //       modifications.
748    return Opcode == Instruction::Store || Opcode == Instruction::Call ||
749           Opcode == Instruction::Invoke || Opcode == SLPStore;
750  }
751
752  bool hasResult() const {
753    // CallInst may or may not have a result, depending on the called function.
754    // Conservatively return calls have results for now.
755    switch (getOpcode()) {
756    case Instruction::Ret:
757    case Instruction::Br:
758    case Instruction::Store:
759    case Instruction::Switch:
760    case Instruction::IndirectBr:
761    case Instruction::Resume:
762    case Instruction::CatchRet:
763    case Instruction::Unreachable:
764    case Instruction::Fence:
765    case Instruction::AtomicRMW:
766      return false;
767    default:
768      return true;
769    }
770  }
771};
772
773/// VPWidenRecipe is a recipe for producing a copy of vector type its
774/// ingredient. This recipe covers most of the traditional vectorization cases
775/// where each ingredient transforms into a vectorized version of itself.
776class VPWidenRecipe : public VPRecipeBase {
777  /// Hold the instruction to be widened.
778  Instruction &Ingredient;
779
780  /// Hold VPValues for the operands of the ingredient.
781  VPUser User;
782
783public:
784  template <typename IterT>
785  VPWidenRecipe(Instruction &I, iterator_range<IterT> Operands)
786      : VPRecipeBase(VPWidenSC), Ingredient(I), User(Operands) {}
787
788  ~VPWidenRecipe() override = default;
789
790  /// Method to support type inquiry through isa, cast, and dyn_cast.
791  static inline bool classof(const VPRecipeBase *V) {
792    return V->getVPRecipeID() == VPRecipeBase::VPWidenSC;
793  }
794
795  /// Produce widened copies of all Ingredients.
796  void execute(VPTransformState &State) override;
797
798  /// Print the recipe.
799  void print(raw_ostream &O, const Twine &Indent,
800             VPSlotTracker &SlotTracker) const override;
801};
802
803/// A recipe for widening Call instructions.
804class VPWidenCallRecipe : public VPRecipeBase {
805  /// Hold the call to be widened.
806  CallInst &Ingredient;
807
808  /// Hold VPValues for the arguments of the call.
809  VPUser User;
810
811public:
812  template <typename IterT>
813  VPWidenCallRecipe(CallInst &I, iterator_range<IterT> CallArguments)
814      : VPRecipeBase(VPWidenCallSC), Ingredient(I), User(CallArguments) {}
815
816  ~VPWidenCallRecipe() override = default;
817
818  /// Method to support type inquiry through isa, cast, and dyn_cast.
819  static inline bool classof(const VPRecipeBase *V) {
820    return V->getVPRecipeID() == VPRecipeBase::VPWidenCallSC;
821  }
822
823  /// Produce a widened version of the call instruction.
824  void execute(VPTransformState &State) override;
825
826  /// Print the recipe.
827  void print(raw_ostream &O, const Twine &Indent,
828             VPSlotTracker &SlotTracker) const override;
829};
830
831/// A recipe for widening select instructions.
832class VPWidenSelectRecipe : public VPRecipeBase {
833private:
834  /// Hold the select to be widened.
835  SelectInst &Ingredient;
836
837  /// Hold VPValues for the operands of the select.
838  VPUser User;
839
840  /// Is the condition of the select loop invariant?
841  bool InvariantCond;
842
843public:
844  template <typename IterT>
845  VPWidenSelectRecipe(SelectInst &I, iterator_range<IterT> Operands,
846                      bool InvariantCond)
847      : VPRecipeBase(VPWidenSelectSC), Ingredient(I), User(Operands),
848        InvariantCond(InvariantCond) {}
849
850  ~VPWidenSelectRecipe() override = default;
851
852  /// Method to support type inquiry through isa, cast, and dyn_cast.
853  static inline bool classof(const VPRecipeBase *V) {
854    return V->getVPRecipeID() == VPRecipeBase::VPWidenSelectSC;
855  }
856
857  /// Produce a widened version of the select instruction.
858  void execute(VPTransformState &State) override;
859
860  /// Print the recipe.
861  void print(raw_ostream &O, const Twine &Indent,
862             VPSlotTracker &SlotTracker) const override;
863};
864
865/// A recipe for handling GEP instructions.
866class VPWidenGEPRecipe : public VPRecipeBase {
867  GetElementPtrInst *GEP;
868
869  /// Hold VPValues for the base and indices of the GEP.
870  VPUser User;
871
872  bool IsPtrLoopInvariant;
873  SmallBitVector IsIndexLoopInvariant;
874
875public:
876  template <typename IterT>
877  VPWidenGEPRecipe(GetElementPtrInst *GEP, iterator_range<IterT> Operands,
878                   Loop *OrigLoop)
879      : VPRecipeBase(VPWidenGEPSC), GEP(GEP), User(Operands),
880        IsIndexLoopInvariant(GEP->getNumIndices(), false) {
881    IsPtrLoopInvariant = OrigLoop->isLoopInvariant(GEP->getPointerOperand());
882    for (auto Index : enumerate(GEP->indices()))
883      IsIndexLoopInvariant[Index.index()] =
884          OrigLoop->isLoopInvariant(Index.value().get());
885  }
886  ~VPWidenGEPRecipe() override = default;
887
888  /// Method to support type inquiry through isa, cast, and dyn_cast.
889  static inline bool classof(const VPRecipeBase *V) {
890    return V->getVPRecipeID() == VPRecipeBase::VPWidenGEPSC;
891  }
892
893  /// Generate the gep nodes.
894  void execute(VPTransformState &State) override;
895
896  /// Print the recipe.
897  void print(raw_ostream &O, const Twine &Indent,
898             VPSlotTracker &SlotTracker) const override;
899};
900
901/// A recipe for handling phi nodes of integer and floating-point inductions,
902/// producing their vector and scalar values.
903class VPWidenIntOrFpInductionRecipe : public VPRecipeBase {
904  PHINode *IV;
905  TruncInst *Trunc;
906
907public:
908  VPWidenIntOrFpInductionRecipe(PHINode *IV, TruncInst *Trunc = nullptr)
909      : VPRecipeBase(VPWidenIntOrFpInductionSC), IV(IV), Trunc(Trunc) {}
910  ~VPWidenIntOrFpInductionRecipe() override = default;
911
912  /// Method to support type inquiry through isa, cast, and dyn_cast.
913  static inline bool classof(const VPRecipeBase *V) {
914    return V->getVPRecipeID() == VPRecipeBase::VPWidenIntOrFpInductionSC;
915  }
916
917  /// Generate the vectorized and scalarized versions of the phi node as
918  /// needed by their users.
919  void execute(VPTransformState &State) override;
920
921  /// Print the recipe.
922  void print(raw_ostream &O, const Twine &Indent,
923             VPSlotTracker &SlotTracker) const override;
924};
925
926/// A recipe for handling all phi nodes except for integer and FP inductions.
927class VPWidenPHIRecipe : public VPRecipeBase {
928  PHINode *Phi;
929
930public:
931  VPWidenPHIRecipe(PHINode *Phi) : VPRecipeBase(VPWidenPHISC), Phi(Phi) {}
932  ~VPWidenPHIRecipe() override = default;
933
934  /// Method to support type inquiry through isa, cast, and dyn_cast.
935  static inline bool classof(const VPRecipeBase *V) {
936    return V->getVPRecipeID() == VPRecipeBase::VPWidenPHISC;
937  }
938
939  /// Generate the phi/select nodes.
940  void execute(VPTransformState &State) override;
941
942  /// Print the recipe.
943  void print(raw_ostream &O, const Twine &Indent,
944             VPSlotTracker &SlotTracker) const override;
945};
946
947/// A recipe for vectorizing a phi-node as a sequence of mask-based select
948/// instructions.
949class VPBlendRecipe : public VPRecipeBase {
950  PHINode *Phi;
951
952  /// The blend operation is a User of the incoming values and of their
953  /// respective masks, ordered [I0, M0, I1, M1, ...]. Note that a single value
954  /// might be incoming with a full mask for which there is no VPValue.
955  VPUser User;
956
957public:
958  VPBlendRecipe(PHINode *Phi, ArrayRef<VPValue *> Operands)
959      : VPRecipeBase(VPBlendSC), Phi(Phi), User(Operands) {
960    assert(Operands.size() > 0 &&
961           ((Operands.size() == 1) || (Operands.size() % 2 == 0)) &&
962           "Expected either a single incoming value or a positive even number "
963           "of operands");
964  }
965
966  /// Method to support type inquiry through isa, cast, and dyn_cast.
967  static inline bool classof(const VPRecipeBase *V) {
968    return V->getVPRecipeID() == VPRecipeBase::VPBlendSC;
969  }
970
971  /// Return the number of incoming values, taking into account that a single
972  /// incoming value has no mask.
973  unsigned getNumIncomingValues() const {
974    return (User.getNumOperands() + 1) / 2;
975  }
976
977  /// Return incoming value number \p Idx.
978  VPValue *getIncomingValue(unsigned Idx) const {
979    return User.getOperand(Idx * 2);
980  }
981
982  /// Return mask number \p Idx.
983  VPValue *getMask(unsigned Idx) const { return User.getOperand(Idx * 2 + 1); }
984
985  /// Generate the phi/select nodes.
986  void execute(VPTransformState &State) override;
987
988  /// Print the recipe.
989  void print(raw_ostream &O, const Twine &Indent,
990             VPSlotTracker &SlotTracker) const override;
991};
992
993/// VPInterleaveRecipe is a recipe for transforming an interleave group of load
994/// or stores into one wide load/store and shuffles.
995class VPInterleaveRecipe : public VPRecipeBase {
996  const InterleaveGroup<Instruction> *IG;
997  VPUser User;
998
999public:
1000  VPInterleaveRecipe(const InterleaveGroup<Instruction> *IG, VPValue *Addr,
1001                     VPValue *Mask)
1002      : VPRecipeBase(VPInterleaveSC), IG(IG), User({Addr}) {
1003    if (Mask)
1004      User.addOperand(Mask);
1005  }
1006  ~VPInterleaveRecipe() override = default;
1007
1008  /// Method to support type inquiry through isa, cast, and dyn_cast.
1009  static inline bool classof(const VPRecipeBase *V) {
1010    return V->getVPRecipeID() == VPRecipeBase::VPInterleaveSC;
1011  }
1012
1013  /// Return the address accessed by this recipe.
1014  VPValue *getAddr() const {
1015    return User.getOperand(0); // Address is the 1st, mandatory operand.
1016  }
1017
1018  /// Return the mask used by this recipe. Note that a full mask is represented
1019  /// by a nullptr.
1020  VPValue *getMask() const {
1021    // Mask is optional and therefore the last, currently 2nd operand.
1022    return User.getNumOperands() == 2 ? User.getOperand(1) : nullptr;
1023  }
1024
1025  /// Generate the wide load or store, and shuffles.
1026  void execute(VPTransformState &State) override;
1027
1028  /// Print the recipe.
1029  void print(raw_ostream &O, const Twine &Indent,
1030             VPSlotTracker &SlotTracker) const override;
1031
1032  const InterleaveGroup<Instruction> *getInterleaveGroup() { return IG; }
1033};
1034
1035/// VPReplicateRecipe replicates a given instruction producing multiple scalar
1036/// copies of the original scalar type, one per lane, instead of producing a
1037/// single copy of widened type for all lanes. If the instruction is known to be
1038/// uniform only one copy, per lane zero, will be generated.
1039class VPReplicateRecipe : public VPRecipeBase {
1040  /// The instruction being replicated.
1041  Instruction *Ingredient;
1042
1043  /// Hold VPValues for the operands of the ingredient.
1044  VPUser User;
1045
1046  /// Indicator if only a single replica per lane is needed.
1047  bool IsUniform;
1048
1049  /// Indicator if the replicas are also predicated.
1050  bool IsPredicated;
1051
1052  /// Indicator if the scalar values should also be packed into a vector.
1053  bool AlsoPack;
1054
1055public:
1056  template <typename IterT>
1057  VPReplicateRecipe(Instruction *I, iterator_range<IterT> Operands,
1058                    bool IsUniform, bool IsPredicated = false)
1059      : VPRecipeBase(VPReplicateSC), Ingredient(I), User(Operands),
1060        IsUniform(IsUniform), IsPredicated(IsPredicated) {
1061    // Retain the previous behavior of predicateInstructions(), where an
1062    // insert-element of a predicated instruction got hoisted into the
1063    // predicated basic block iff it was its only user. This is achieved by
1064    // having predicated instructions also pack their values into a vector by
1065    // default unless they have a replicated user which uses their scalar value.
1066    AlsoPack = IsPredicated && !I->use_empty();
1067  }
1068
1069  ~VPReplicateRecipe() override = default;
1070
1071  /// Method to support type inquiry through isa, cast, and dyn_cast.
1072  static inline bool classof(const VPRecipeBase *V) {
1073    return V->getVPRecipeID() == VPRecipeBase::VPReplicateSC;
1074  }
1075
1076  /// Generate replicas of the desired Ingredient. Replicas will be generated
1077  /// for all parts and lanes unless a specific part and lane are specified in
1078  /// the \p State.
1079  void execute(VPTransformState &State) override;
1080
1081  void setAlsoPack(bool Pack) { AlsoPack = Pack; }
1082
1083  /// Print the recipe.
1084  void print(raw_ostream &O, const Twine &Indent,
1085             VPSlotTracker &SlotTracker) const override;
1086};
1087
1088/// A recipe for generating conditional branches on the bits of a mask.
1089class VPBranchOnMaskRecipe : public VPRecipeBase {
1090  VPUser User;
1091
1092public:
1093  VPBranchOnMaskRecipe(VPValue *BlockInMask) : VPRecipeBase(VPBranchOnMaskSC) {
1094    if (BlockInMask) // nullptr means all-one mask.
1095      User.addOperand(BlockInMask);
1096  }
1097
1098  /// Method to support type inquiry through isa, cast, and dyn_cast.
1099  static inline bool classof(const VPRecipeBase *V) {
1100    return V->getVPRecipeID() == VPRecipeBase::VPBranchOnMaskSC;
1101  }
1102
1103  /// Generate the extraction of the appropriate bit from the block mask and the
1104  /// conditional branch.
1105  void execute(VPTransformState &State) override;
1106
1107  /// Print the recipe.
1108  void print(raw_ostream &O, const Twine &Indent,
1109             VPSlotTracker &SlotTracker) const override {
1110    O << " +\n" << Indent << "\"BRANCH-ON-MASK ";
1111    if (VPValue *Mask = getMask())
1112      Mask->print(O, SlotTracker);
1113    else
1114      O << " All-One";
1115    O << "\\l\"";
1116  }
1117
1118  /// Return the mask used by this recipe. Note that a full mask is represented
1119  /// by a nullptr.
1120  VPValue *getMask() const {
1121    assert(User.getNumOperands() <= 1 && "should have either 0 or 1 operands");
1122    // Mask is optional.
1123    return User.getNumOperands() == 1 ? User.getOperand(0) : nullptr;
1124  }
1125};
1126
1127/// VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when
1128/// control converges back from a Branch-on-Mask. The phi nodes are needed in
1129/// order to merge values that are set under such a branch and feed their uses.
1130/// The phi nodes can be scalar or vector depending on the users of the value.
1131/// This recipe works in concert with VPBranchOnMaskRecipe.
1132class VPPredInstPHIRecipe : public VPRecipeBase {
1133  Instruction *PredInst;
1134
1135public:
1136  /// Construct a VPPredInstPHIRecipe given \p PredInst whose value needs a phi
1137  /// nodes after merging back from a Branch-on-Mask.
1138  VPPredInstPHIRecipe(Instruction *PredInst)
1139      : VPRecipeBase(VPPredInstPHISC), PredInst(PredInst) {}
1140  ~VPPredInstPHIRecipe() override = default;
1141
1142  /// Method to support type inquiry through isa, cast, and dyn_cast.
1143  static inline bool classof(const VPRecipeBase *V) {
1144    return V->getVPRecipeID() == VPRecipeBase::VPPredInstPHISC;
1145  }
1146
1147  /// Generates phi nodes for live-outs as needed to retain SSA form.
1148  void execute(VPTransformState &State) override;
1149
1150  /// Print the recipe.
1151  void print(raw_ostream &O, const Twine &Indent,
1152             VPSlotTracker &SlotTracker) const override;
1153};
1154
1155/// A Recipe for widening load/store operations.
1156/// The recipe uses the following VPValues:
1157/// - For load: Address, optional mask
1158/// - For store: Address, stored value, optional mask
1159/// TODO: We currently execute only per-part unless a specific instance is
1160/// provided.
1161class VPWidenMemoryInstructionRecipe : public VPRecipeBase {
1162  Instruction &Instr;
1163  VPUser User;
1164
1165  void setMask(VPValue *Mask) {
1166    if (!Mask)
1167      return;
1168    User.addOperand(Mask);
1169  }
1170
1171  bool isMasked() const {
1172    return (isa<LoadInst>(Instr) && User.getNumOperands() == 2) ||
1173           (isa<StoreInst>(Instr) && User.getNumOperands() == 3);
1174  }
1175
1176public:
1177  VPWidenMemoryInstructionRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask)
1178      : VPRecipeBase(VPWidenMemoryInstructionSC), Instr(Load), User({Addr}) {
1179    setMask(Mask);
1180  }
1181
1182  VPWidenMemoryInstructionRecipe(StoreInst &Store, VPValue *Addr,
1183                                 VPValue *StoredValue, VPValue *Mask)
1184      : VPRecipeBase(VPWidenMemoryInstructionSC), Instr(Store),
1185        User({Addr, StoredValue}) {
1186    setMask(Mask);
1187  }
1188
1189  /// Method to support type inquiry through isa, cast, and dyn_cast.
1190  static inline bool classof(const VPRecipeBase *V) {
1191    return V->getVPRecipeID() == VPRecipeBase::VPWidenMemoryInstructionSC;
1192  }
1193
1194  /// Return the address accessed by this recipe.
1195  VPValue *getAddr() const {
1196    return User.getOperand(0); // Address is the 1st, mandatory operand.
1197  }
1198
1199  /// Return the mask used by this recipe. Note that a full mask is represented
1200  /// by a nullptr.
1201  VPValue *getMask() const {
1202    // Mask is optional and therefore the last operand.
1203    return isMasked() ? User.getOperand(User.getNumOperands() - 1) : nullptr;
1204  }
1205
1206  /// Return the address accessed by this recipe.
1207  VPValue *getStoredValue() const {
1208    assert(isa<StoreInst>(Instr) &&
1209           "Stored value only available for store instructions");
1210    return User.getOperand(1); // Stored value is the 2nd, mandatory operand.
1211  }
1212
1213  /// Generate the wide load/store.
1214  void execute(VPTransformState &State) override;
1215
1216  /// Print the recipe.
1217  void print(raw_ostream &O, const Twine &Indent,
1218             VPSlotTracker &SlotTracker) const override;
1219};
1220
1221/// A Recipe for widening the canonical induction variable of the vector loop.
1222class VPWidenCanonicalIVRecipe : public VPRecipeBase {
1223  /// A VPValue representing the canonical vector IV.
1224  VPValue Val;
1225
1226public:
1227  VPWidenCanonicalIVRecipe() : VPRecipeBase(VPWidenCanonicalIVSC) {}
1228  ~VPWidenCanonicalIVRecipe() override = default;
1229
1230  /// Return the VPValue representing the canonical vector induction variable of
1231  /// the vector loop.
1232  const VPValue *getVPValue() const { return &Val; }
1233  VPValue *getVPValue() { return &Val; }
1234
1235  /// Method to support type inquiry through isa, cast, and dyn_cast.
1236  static inline bool classof(const VPRecipeBase *V) {
1237    return V->getVPRecipeID() == VPRecipeBase::VPWidenCanonicalIVSC;
1238  }
1239
1240  /// Generate a canonical vector induction variable of the vector loop, with
1241  /// start = {<Part*VF, Part*VF+1, ..., Part*VF+VF-1> for 0 <= Part < UF}, and
1242  /// step = <VF*UF, VF*UF, ..., VF*UF>.
1243  void execute(VPTransformState &State) override;
1244
1245  /// Print the recipe.
1246  void print(raw_ostream &O, const Twine &Indent,
1247             VPSlotTracker &SlotTracker) const override;
1248};
1249
1250/// VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph. It
1251/// holds a sequence of zero or more VPRecipe's each representing a sequence of
1252/// output IR instructions.
1253class VPBasicBlock : public VPBlockBase {
1254public:
1255  using RecipeListTy = iplist<VPRecipeBase>;
1256
1257private:
1258  /// The VPRecipes held in the order of output instructions to generate.
1259  RecipeListTy Recipes;
1260
1261public:
1262  VPBasicBlock(const Twine &Name = "", VPRecipeBase *Recipe = nullptr)
1263      : VPBlockBase(VPBasicBlockSC, Name.str()) {
1264    if (Recipe)
1265      appendRecipe(Recipe);
1266  }
1267
1268  ~VPBasicBlock() override { Recipes.clear(); }
1269
1270  /// Instruction iterators...
1271  using iterator = RecipeListTy::iterator;
1272  using const_iterator = RecipeListTy::const_iterator;
1273  using reverse_iterator = RecipeListTy::reverse_iterator;
1274  using const_reverse_iterator = RecipeListTy::const_reverse_iterator;
1275
1276  //===--------------------------------------------------------------------===//
1277  /// Recipe iterator methods
1278  ///
1279  inline iterator begin() { return Recipes.begin(); }
1280  inline const_iterator begin() const { return Recipes.begin(); }
1281  inline iterator end() { return Recipes.end(); }
1282  inline const_iterator end() const { return Recipes.end(); }
1283
1284  inline reverse_iterator rbegin() { return Recipes.rbegin(); }
1285  inline const_reverse_iterator rbegin() const { return Recipes.rbegin(); }
1286  inline reverse_iterator rend() { return Recipes.rend(); }
1287  inline const_reverse_iterator rend() const { return Recipes.rend(); }
1288
1289  inline size_t size() const { return Recipes.size(); }
1290  inline bool empty() const { return Recipes.empty(); }
1291  inline const VPRecipeBase &front() const { return Recipes.front(); }
1292  inline VPRecipeBase &front() { return Recipes.front(); }
1293  inline const VPRecipeBase &back() const { return Recipes.back(); }
1294  inline VPRecipeBase &back() { return Recipes.back(); }
1295
1296  /// Returns a reference to the list of recipes.
1297  RecipeListTy &getRecipeList() { return Recipes; }
1298
1299  /// Returns a pointer to a member of the recipe list.
1300  static RecipeListTy VPBasicBlock::*getSublistAccess(VPRecipeBase *) {
1301    return &VPBasicBlock::Recipes;
1302  }
1303
1304  /// Method to support type inquiry through isa, cast, and dyn_cast.
1305  static inline bool classof(const VPBlockBase *V) {
1306    return V->getVPBlockID() == VPBlockBase::VPBasicBlockSC;
1307  }
1308
1309  void insert(VPRecipeBase *Recipe, iterator InsertPt) {
1310    assert(Recipe && "No recipe to append.");
1311    assert(!Recipe->Parent && "Recipe already in VPlan");
1312    Recipe->Parent = this;
1313    Recipes.insert(InsertPt, Recipe);
1314  }
1315
1316  /// Augment the existing recipes of a VPBasicBlock with an additional
1317  /// \p Recipe as the last recipe.
1318  void appendRecipe(VPRecipeBase *Recipe) { insert(Recipe, end()); }
1319
1320  /// The method which generates the output IR instructions that correspond to
1321  /// this VPBasicBlock, thereby "executing" the VPlan.
1322  void execute(struct VPTransformState *State) override;
1323
1324private:
1325  /// Create an IR BasicBlock to hold the output instructions generated by this
1326  /// VPBasicBlock, and return it. Update the CFGState accordingly.
1327  BasicBlock *createEmptyBasicBlock(VPTransformState::CFGState &CFG);
1328};
1329
1330/// VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks
1331/// which form a Single-Entry-Single-Exit subgraph of the output IR CFG.
1332/// A VPRegionBlock may indicate that its contents are to be replicated several
1333/// times. This is designed to support predicated scalarization, in which a
1334/// scalar if-then code structure needs to be generated VF * UF times. Having
1335/// this replication indicator helps to keep a single model for multiple
1336/// candidate VF's. The actual replication takes place only once the desired VF
1337/// and UF have been determined.
1338class VPRegionBlock : public VPBlockBase {
1339  /// Hold the Single Entry of the SESE region modelled by the VPRegionBlock.
1340  VPBlockBase *Entry;
1341
1342  /// Hold the Single Exit of the SESE region modelled by the VPRegionBlock.
1343  VPBlockBase *Exit;
1344
1345  /// An indicator whether this region is to generate multiple replicated
1346  /// instances of output IR corresponding to its VPBlockBases.
1347  bool IsReplicator;
1348
1349public:
1350  VPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exit,
1351                const std::string &Name = "", bool IsReplicator = false)
1352      : VPBlockBase(VPRegionBlockSC, Name), Entry(Entry), Exit(Exit),
1353        IsReplicator(IsReplicator) {
1354    assert(Entry->getPredecessors().empty() && "Entry block has predecessors.");
1355    assert(Exit->getSuccessors().empty() && "Exit block has successors.");
1356    Entry->setParent(this);
1357    Exit->setParent(this);
1358  }
1359  VPRegionBlock(const std::string &Name = "", bool IsReplicator = false)
1360      : VPBlockBase(VPRegionBlockSC, Name), Entry(nullptr), Exit(nullptr),
1361        IsReplicator(IsReplicator) {}
1362
1363  ~VPRegionBlock() override {
1364    if (Entry)
1365      deleteCFG(Entry);
1366  }
1367
1368  /// Method to support type inquiry through isa, cast, and dyn_cast.
1369  static inline bool classof(const VPBlockBase *V) {
1370    return V->getVPBlockID() == VPBlockBase::VPRegionBlockSC;
1371  }
1372
1373  const VPBlockBase *getEntry() const { return Entry; }
1374  VPBlockBase *getEntry() { return Entry; }
1375
1376  /// Set \p EntryBlock as the entry VPBlockBase of this VPRegionBlock. \p
1377  /// EntryBlock must have no predecessors.
1378  void setEntry(VPBlockBase *EntryBlock) {
1379    assert(EntryBlock->getPredecessors().empty() &&
1380           "Entry block cannot have predecessors.");
1381    Entry = EntryBlock;
1382    EntryBlock->setParent(this);
1383  }
1384
1385  // FIXME: DominatorTreeBase is doing 'A->getParent()->front()'. 'front' is a
1386  // specific interface of llvm::Function, instead of using
1387  // GraphTraints::getEntryNode. We should add a new template parameter to
1388  // DominatorTreeBase representing the Graph type.
1389  VPBlockBase &front() const { return *Entry; }
1390
1391  const VPBlockBase *getExit() const { return Exit; }
1392  VPBlockBase *getExit() { return Exit; }
1393
1394  /// Set \p ExitBlock as the exit VPBlockBase of this VPRegionBlock. \p
1395  /// ExitBlock must have no successors.
1396  void setExit(VPBlockBase *ExitBlock) {
1397    assert(ExitBlock->getSuccessors().empty() &&
1398           "Exit block cannot have successors.");
1399    Exit = ExitBlock;
1400    ExitBlock->setParent(this);
1401  }
1402
1403  /// An indicator whether this region is to generate multiple replicated
1404  /// instances of output IR corresponding to its VPBlockBases.
1405  bool isReplicator() const { return IsReplicator; }
1406
1407  /// The method which generates the output IR instructions that correspond to
1408  /// this VPRegionBlock, thereby "executing" the VPlan.
1409  void execute(struct VPTransformState *State) override;
1410};
1411
1412//===----------------------------------------------------------------------===//
1413// GraphTraits specializations for VPlan Hierarchical Control-Flow Graphs     //
1414//===----------------------------------------------------------------------===//
1415
1416// The following set of template specializations implement GraphTraits to treat
1417// any VPBlockBase as a node in a graph of VPBlockBases. It's important to note
1418// that VPBlockBase traits don't recurse into VPRegioBlocks, i.e., if the
1419// VPBlockBase is a VPRegionBlock, this specialization provides access to its
1420// successors/predecessors but not to the blocks inside the region.
1421
1422template <> struct GraphTraits<VPBlockBase *> {
1423  using NodeRef = VPBlockBase *;
1424  using ChildIteratorType = SmallVectorImpl<VPBlockBase *>::iterator;
1425
1426  static NodeRef getEntryNode(NodeRef N) { return N; }
1427
1428  static inline ChildIteratorType child_begin(NodeRef N) {
1429    return N->getSuccessors().begin();
1430  }
1431
1432  static inline ChildIteratorType child_end(NodeRef N) {
1433    return N->getSuccessors().end();
1434  }
1435};
1436
1437template <> struct GraphTraits<const VPBlockBase *> {
1438  using NodeRef = const VPBlockBase *;
1439  using ChildIteratorType = SmallVectorImpl<VPBlockBase *>::const_iterator;
1440
1441  static NodeRef getEntryNode(NodeRef N) { return N; }
1442
1443  static inline ChildIteratorType child_begin(NodeRef N) {
1444    return N->getSuccessors().begin();
1445  }
1446
1447  static inline ChildIteratorType child_end(NodeRef N) {
1448    return N->getSuccessors().end();
1449  }
1450};
1451
1452// Inverse order specialization for VPBasicBlocks. Predecessors are used instead
1453// of successors for the inverse traversal.
1454template <> struct GraphTraits<Inverse<VPBlockBase *>> {
1455  using NodeRef = VPBlockBase *;
1456  using ChildIteratorType = SmallVectorImpl<VPBlockBase *>::iterator;
1457
1458  static NodeRef getEntryNode(Inverse<NodeRef> B) { return B.Graph; }
1459
1460  static inline ChildIteratorType child_begin(NodeRef N) {
1461    return N->getPredecessors().begin();
1462  }
1463
1464  static inline ChildIteratorType child_end(NodeRef N) {
1465    return N->getPredecessors().end();
1466  }
1467};
1468
1469// The following set of template specializations implement GraphTraits to
1470// treat VPRegionBlock as a graph and recurse inside its nodes. It's important
1471// to note that the blocks inside the VPRegionBlock are treated as VPBlockBases
1472// (i.e., no dyn_cast is performed, VPBlockBases specialization is used), so
1473// there won't be automatic recursion into other VPBlockBases that turn to be
1474// VPRegionBlocks.
1475
1476template <>
1477struct GraphTraits<VPRegionBlock *> : public GraphTraits<VPBlockBase *> {
1478  using GraphRef = VPRegionBlock *;
1479  using nodes_iterator = df_iterator<NodeRef>;
1480
1481  static NodeRef getEntryNode(GraphRef N) { return N->getEntry(); }
1482
1483  static nodes_iterator nodes_begin(GraphRef N) {
1484    return nodes_iterator::begin(N->getEntry());
1485  }
1486
1487  static nodes_iterator nodes_end(GraphRef N) {
1488    // df_iterator::end() returns an empty iterator so the node used doesn't
1489    // matter.
1490    return nodes_iterator::end(N);
1491  }
1492};
1493
1494template <>
1495struct GraphTraits<const VPRegionBlock *>
1496    : public GraphTraits<const VPBlockBase *> {
1497  using GraphRef = const VPRegionBlock *;
1498  using nodes_iterator = df_iterator<NodeRef>;
1499
1500  static NodeRef getEntryNode(GraphRef N) { return N->getEntry(); }
1501
1502  static nodes_iterator nodes_begin(GraphRef N) {
1503    return nodes_iterator::begin(N->getEntry());
1504  }
1505
1506  static nodes_iterator nodes_end(GraphRef N) {
1507    // df_iterator::end() returns an empty iterator so the node used doesn't
1508    // matter.
1509    return nodes_iterator::end(N);
1510  }
1511};
1512
1513template <>
1514struct GraphTraits<Inverse<VPRegionBlock *>>
1515    : public GraphTraits<Inverse<VPBlockBase *>> {
1516  using GraphRef = VPRegionBlock *;
1517  using nodes_iterator = df_iterator<NodeRef>;
1518
1519  static NodeRef getEntryNode(Inverse<GraphRef> N) {
1520    return N.Graph->getExit();
1521  }
1522
1523  static nodes_iterator nodes_begin(GraphRef N) {
1524    return nodes_iterator::begin(N->getExit());
1525  }
1526
1527  static nodes_iterator nodes_end(GraphRef N) {
1528    // df_iterator::end() returns an empty iterator so the node used doesn't
1529    // matter.
1530    return nodes_iterator::end(N);
1531  }
1532};
1533
1534/// VPlan models a candidate for vectorization, encoding various decisions take
1535/// to produce efficient output IR, including which branches, basic-blocks and
1536/// output IR instructions to generate, and their cost. VPlan holds a
1537/// Hierarchical-CFG of VPBasicBlocks and VPRegionBlocks rooted at an Entry
1538/// VPBlock.
1539class VPlan {
1540  friend class VPlanPrinter;
1541  friend class VPSlotTracker;
1542
1543  /// Hold the single entry to the Hierarchical CFG of the VPlan.
1544  VPBlockBase *Entry;
1545
1546  /// Holds the VFs applicable to this VPlan.
1547  SmallSet<unsigned, 2> VFs;
1548
1549  /// Holds the name of the VPlan, for printing.
1550  std::string Name;
1551
1552  /// Holds all the external definitions created for this VPlan.
1553  // TODO: Introduce a specific representation for external definitions in
1554  // VPlan. External definitions must be immutable and hold a pointer to its
1555  // underlying IR that will be used to implement its structural comparison
1556  // (operators '==' and '<').
1557  SmallPtrSet<VPValue *, 16> VPExternalDefs;
1558
1559  /// Represents the backedge taken count of the original loop, for folding
1560  /// the tail.
1561  VPValue *BackedgeTakenCount = nullptr;
1562
1563  /// Holds a mapping between Values and their corresponding VPValue inside
1564  /// VPlan.
1565  Value2VPValueTy Value2VPValue;
1566
1567  /// Holds the VPLoopInfo analysis for this VPlan.
1568  VPLoopInfo VPLInfo;
1569
1570  /// Holds the condition bit values built during VPInstruction to VPRecipe transformation.
1571  SmallVector<VPValue *, 4> VPCBVs;
1572
1573public:
1574  VPlan(VPBlockBase *Entry = nullptr) : Entry(Entry) {
1575    if (Entry)
1576      Entry->setPlan(this);
1577  }
1578
1579  ~VPlan() {
1580    if (Entry)
1581      VPBlockBase::deleteCFG(Entry);
1582    for (auto &MapEntry : Value2VPValue)
1583      delete MapEntry.second;
1584    if (BackedgeTakenCount)
1585      delete BackedgeTakenCount;
1586    for (VPValue *Def : VPExternalDefs)
1587      delete Def;
1588    for (VPValue *CBV : VPCBVs)
1589      delete CBV;
1590  }
1591
1592  /// Generate the IR code for this VPlan.
1593  void execute(struct VPTransformState *State);
1594
1595  VPBlockBase *getEntry() { return Entry; }
1596  const VPBlockBase *getEntry() const { return Entry; }
1597
1598  VPBlockBase *setEntry(VPBlockBase *Block) {
1599    Entry = Block;
1600    Block->setPlan(this);
1601    return Entry;
1602  }
1603
1604  /// The backedge taken count of the original loop.
1605  VPValue *getOrCreateBackedgeTakenCount() {
1606    if (!BackedgeTakenCount)
1607      BackedgeTakenCount = new VPValue();
1608    return BackedgeTakenCount;
1609  }
1610
1611  void addVF(unsigned VF) { VFs.insert(VF); }
1612
1613  bool hasVF(unsigned VF) { return VFs.count(VF); }
1614
1615  const std::string &getName() const { return Name; }
1616
1617  void setName(const Twine &newName) { Name = newName.str(); }
1618
1619  /// Add \p VPVal to the pool of external definitions if it's not already
1620  /// in the pool.
1621  void addExternalDef(VPValue *VPVal) {
1622    VPExternalDefs.insert(VPVal);
1623  }
1624
1625  /// Add \p CBV to the vector of condition bit values.
1626  void addCBV(VPValue *CBV) {
1627    VPCBVs.push_back(CBV);
1628  }
1629
1630  void addVPValue(Value *V) {
1631    assert(V && "Trying to add a null Value to VPlan");
1632    assert(!Value2VPValue.count(V) && "Value already exists in VPlan");
1633    Value2VPValue[V] = new VPValue(V);
1634  }
1635
1636  VPValue *getVPValue(Value *V) {
1637    assert(V && "Trying to get the VPValue of a null Value");
1638    assert(Value2VPValue.count(V) && "Value does not exist in VPlan");
1639    return Value2VPValue[V];
1640  }
1641
1642  VPValue *getOrAddVPValue(Value *V) {
1643    assert(V && "Trying to get or add the VPValue of a null Value");
1644    if (!Value2VPValue.count(V))
1645      addVPValue(V);
1646    return getVPValue(V);
1647  }
1648
1649  /// Return the VPLoopInfo analysis for this VPlan.
1650  VPLoopInfo &getVPLoopInfo() { return VPLInfo; }
1651  const VPLoopInfo &getVPLoopInfo() const { return VPLInfo; }
1652
1653  /// Dump the plan to stderr (for debugging).
1654  void dump() const;
1655
1656  /// Returns a range mapping the values the range \p Operands to their
1657  /// corresponding VPValues.
1658  iterator_range<mapped_iterator<Use *, std::function<VPValue *(Value *)>>>
1659  mapToVPValues(User::op_range Operands) {
1660    std::function<VPValue *(Value *)> Fn = [this](Value *Op) {
1661      return getOrAddVPValue(Op);
1662    };
1663    return map_range(Operands, Fn);
1664  }
1665
1666private:
1667  /// Add to the given dominator tree the header block and every new basic block
1668  /// that was created between it and the latch block, inclusive.
1669  static void updateDominatorTree(DominatorTree *DT, BasicBlock *LoopLatchBB,
1670                                  BasicBlock *LoopPreHeaderBB,
1671                                  BasicBlock *LoopExitBB);
1672};
1673
1674/// VPlanPrinter prints a given VPlan to a given output stream. The printing is
1675/// indented and follows the dot format.
1676class VPlanPrinter {
1677  friend inline raw_ostream &operator<<(raw_ostream &OS, const VPlan &Plan);
1678  friend inline raw_ostream &operator<<(raw_ostream &OS,
1679                                        const struct VPlanIngredient &I);
1680
1681private:
1682  raw_ostream &OS;
1683  const VPlan &Plan;
1684  unsigned Depth = 0;
1685  unsigned TabWidth = 2;
1686  std::string Indent;
1687  unsigned BID = 0;
1688  SmallDenseMap<const VPBlockBase *, unsigned> BlockID;
1689
1690  VPSlotTracker SlotTracker;
1691
1692  VPlanPrinter(raw_ostream &O, const VPlan &P)
1693      : OS(O), Plan(P), SlotTracker(&P) {}
1694
1695  /// Handle indentation.
1696  void bumpIndent(int b) { Indent = std::string((Depth += b) * TabWidth, ' '); }
1697
1698  /// Print a given \p Block of the Plan.
1699  void dumpBlock(const VPBlockBase *Block);
1700
1701  /// Print the information related to the CFG edges going out of a given
1702  /// \p Block, followed by printing the successor blocks themselves.
1703  void dumpEdges(const VPBlockBase *Block);
1704
1705  /// Print a given \p BasicBlock, including its VPRecipes, followed by printing
1706  /// its successor blocks.
1707  void dumpBasicBlock(const VPBasicBlock *BasicBlock);
1708
1709  /// Print a given \p Region of the Plan.
1710  void dumpRegion(const VPRegionBlock *Region);
1711
1712  unsigned getOrCreateBID(const VPBlockBase *Block) {
1713    return BlockID.count(Block) ? BlockID[Block] : BlockID[Block] = BID++;
1714  }
1715
1716  const Twine getOrCreateName(const VPBlockBase *Block);
1717
1718  const Twine getUID(const VPBlockBase *Block);
1719
1720  /// Print the information related to a CFG edge between two VPBlockBases.
1721  void drawEdge(const VPBlockBase *From, const VPBlockBase *To, bool Hidden,
1722                const Twine &Label);
1723
1724  void dump();
1725
1726  static void printAsIngredient(raw_ostream &O, Value *V);
1727};
1728
1729struct VPlanIngredient {
1730  Value *V;
1731
1732  VPlanIngredient(Value *V) : V(V) {}
1733};
1734
1735inline raw_ostream &operator<<(raw_ostream &OS, const VPlanIngredient &I) {
1736  VPlanPrinter::printAsIngredient(OS, I.V);
1737  return OS;
1738}
1739
1740inline raw_ostream &operator<<(raw_ostream &OS, const VPlan &Plan) {
1741  VPlanPrinter Printer(OS, Plan);
1742  Printer.dump();
1743  return OS;
1744}
1745
1746//===----------------------------------------------------------------------===//
1747// VPlan Utilities
1748//===----------------------------------------------------------------------===//
1749
1750/// Class that provides utilities for VPBlockBases in VPlan.
1751class VPBlockUtils {
1752public:
1753  VPBlockUtils() = delete;
1754
1755  /// Insert disconnected VPBlockBase \p NewBlock after \p BlockPtr. Add \p
1756  /// NewBlock as successor of \p BlockPtr and \p BlockPtr as predecessor of \p
1757  /// NewBlock, and propagate \p BlockPtr parent to \p NewBlock. If \p BlockPtr
1758  /// has more than one successor, its conditional bit is propagated to \p
1759  /// NewBlock. \p NewBlock must have neither successors nor predecessors.
1760  static void insertBlockAfter(VPBlockBase *NewBlock, VPBlockBase *BlockPtr) {
1761    assert(NewBlock->getSuccessors().empty() &&
1762           "Can't insert new block with successors.");
1763    // TODO: move successors from BlockPtr to NewBlock when this functionality
1764    // is necessary. For now, setBlockSingleSuccessor will assert if BlockPtr
1765    // already has successors.
1766    BlockPtr->setOneSuccessor(NewBlock);
1767    NewBlock->setPredecessors({BlockPtr});
1768    NewBlock->setParent(BlockPtr->getParent());
1769  }
1770
1771  /// Insert disconnected VPBlockBases \p IfTrue and \p IfFalse after \p
1772  /// BlockPtr. Add \p IfTrue and \p IfFalse as succesors of \p BlockPtr and \p
1773  /// BlockPtr as predecessor of \p IfTrue and \p IfFalse. Propagate \p BlockPtr
1774  /// parent to \p IfTrue and \p IfFalse. \p Condition is set as the successor
1775  /// selector. \p BlockPtr must have no successors and \p IfTrue and \p IfFalse
1776  /// must have neither successors nor predecessors.
1777  static void insertTwoBlocksAfter(VPBlockBase *IfTrue, VPBlockBase *IfFalse,
1778                                   VPValue *Condition, VPBlockBase *BlockPtr) {
1779    assert(IfTrue->getSuccessors().empty() &&
1780           "Can't insert IfTrue with successors.");
1781    assert(IfFalse->getSuccessors().empty() &&
1782           "Can't insert IfFalse with successors.");
1783    BlockPtr->setTwoSuccessors(IfTrue, IfFalse, Condition);
1784    IfTrue->setPredecessors({BlockPtr});
1785    IfFalse->setPredecessors({BlockPtr});
1786    IfTrue->setParent(BlockPtr->getParent());
1787    IfFalse->setParent(BlockPtr->getParent());
1788  }
1789
1790  /// Connect VPBlockBases \p From and \p To bi-directionally. Append \p To to
1791  /// the successors of \p From and \p From to the predecessors of \p To. Both
1792  /// VPBlockBases must have the same parent, which can be null. Both
1793  /// VPBlockBases can be already connected to other VPBlockBases.
1794  static void connectBlocks(VPBlockBase *From, VPBlockBase *To) {
1795    assert((From->getParent() == To->getParent()) &&
1796           "Can't connect two block with different parents");
1797    assert(From->getNumSuccessors() < 2 &&
1798           "Blocks can't have more than two successors.");
1799    From->appendSuccessor(To);
1800    To->appendPredecessor(From);
1801  }
1802
1803  /// Disconnect VPBlockBases \p From and \p To bi-directionally. Remove \p To
1804  /// from the successors of \p From and \p From from the predecessors of \p To.
1805  static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To) {
1806    assert(To && "Successor to disconnect is null.");
1807    From->removeSuccessor(To);
1808    To->removePredecessor(From);
1809  }
1810
1811  /// Returns true if the edge \p FromBlock -> \p ToBlock is a back-edge.
1812  static bool isBackEdge(const VPBlockBase *FromBlock,
1813                         const VPBlockBase *ToBlock, const VPLoopInfo *VPLI) {
1814    assert(FromBlock->getParent() == ToBlock->getParent() &&
1815           FromBlock->getParent() && "Must be in same region");
1816    const VPLoop *FromLoop = VPLI->getLoopFor(FromBlock);
1817    const VPLoop *ToLoop = VPLI->getLoopFor(ToBlock);
1818    if (!FromLoop || !ToLoop || FromLoop != ToLoop)
1819      return false;
1820
1821    // A back-edge is a branch from the loop latch to its header.
1822    return ToLoop->isLoopLatch(FromBlock) && ToBlock == ToLoop->getHeader();
1823  }
1824
1825  /// Returns true if \p Block is a loop latch
1826  static bool blockIsLoopLatch(const VPBlockBase *Block,
1827                               const VPLoopInfo *VPLInfo) {
1828    if (const VPLoop *ParentVPL = VPLInfo->getLoopFor(Block))
1829      return ParentVPL->isLoopLatch(Block);
1830
1831    return false;
1832  }
1833
1834  /// Count and return the number of succesors of \p PredBlock excluding any
1835  /// backedges.
1836  static unsigned countSuccessorsNoBE(VPBlockBase *PredBlock,
1837                                      VPLoopInfo *VPLI) {
1838    unsigned Count = 0;
1839    for (VPBlockBase *SuccBlock : PredBlock->getSuccessors()) {
1840      if (!VPBlockUtils::isBackEdge(PredBlock, SuccBlock, VPLI))
1841        Count++;
1842    }
1843    return Count;
1844  }
1845};
1846
1847class VPInterleavedAccessInfo {
1848  DenseMap<VPInstruction *, InterleaveGroup<VPInstruction> *>
1849      InterleaveGroupMap;
1850
1851  /// Type for mapping of instruction based interleave groups to VPInstruction
1852  /// interleave groups
1853  using Old2NewTy = DenseMap<InterleaveGroup<Instruction> *,
1854                             InterleaveGroup<VPInstruction> *>;
1855
1856  /// Recursively \p Region and populate VPlan based interleave groups based on
1857  /// \p IAI.
1858  void visitRegion(VPRegionBlock *Region, Old2NewTy &Old2New,
1859                   InterleavedAccessInfo &IAI);
1860  /// Recursively traverse \p Block and populate VPlan based interleave groups
1861  /// based on \p IAI.
1862  void visitBlock(VPBlockBase *Block, Old2NewTy &Old2New,
1863                  InterleavedAccessInfo &IAI);
1864
1865public:
1866  VPInterleavedAccessInfo(VPlan &Plan, InterleavedAccessInfo &IAI);
1867
1868  ~VPInterleavedAccessInfo() {
1869    SmallPtrSet<InterleaveGroup<VPInstruction> *, 4> DelSet;
1870    // Avoid releasing a pointer twice.
1871    for (auto &I : InterleaveGroupMap)
1872      DelSet.insert(I.second);
1873    for (auto *Ptr : DelSet)
1874      delete Ptr;
1875  }
1876
1877  /// Get the interleave group that \p Instr belongs to.
1878  ///
1879  /// \returns nullptr if doesn't have such group.
1880  InterleaveGroup<VPInstruction> *
1881  getInterleaveGroup(VPInstruction *Instr) const {
1882    if (InterleaveGroupMap.count(Instr))
1883      return InterleaveGroupMap.find(Instr)->second;
1884    return nullptr;
1885  }
1886};
1887
1888/// Class that maps (parts of) an existing VPlan to trees of combined
1889/// VPInstructions.
1890class VPlanSlp {
1891  enum class OpMode { Failed, Load, Opcode };
1892
1893  /// A DenseMapInfo implementation for using SmallVector<VPValue *, 4> as
1894  /// DenseMap keys.
1895  struct BundleDenseMapInfo {
1896    static SmallVector<VPValue *, 4> getEmptyKey() {
1897      return {reinterpret_cast<VPValue *>(-1)};
1898    }
1899
1900    static SmallVector<VPValue *, 4> getTombstoneKey() {
1901      return {reinterpret_cast<VPValue *>(-2)};
1902    }
1903
1904    static unsigned getHashValue(const SmallVector<VPValue *, 4> &V) {
1905      return static_cast<unsigned>(hash_combine_range(V.begin(), V.end()));
1906    }
1907
1908    static bool isEqual(const SmallVector<VPValue *, 4> &LHS,
1909                        const SmallVector<VPValue *, 4> &RHS) {
1910      return LHS == RHS;
1911    }
1912  };
1913
1914  /// Mapping of values in the original VPlan to a combined VPInstruction.
1915  DenseMap<SmallVector<VPValue *, 4>, VPInstruction *, BundleDenseMapInfo>
1916      BundleToCombined;
1917
1918  VPInterleavedAccessInfo &IAI;
1919
1920  /// Basic block to operate on. For now, only instructions in a single BB are
1921  /// considered.
1922  const VPBasicBlock &BB;
1923
1924  /// Indicates whether we managed to combine all visited instructions or not.
1925  bool CompletelySLP = true;
1926
1927  /// Width of the widest combined bundle in bits.
1928  unsigned WidestBundleBits = 0;
1929
1930  using MultiNodeOpTy =
1931      typename std::pair<VPInstruction *, SmallVector<VPValue *, 4>>;
1932
1933  // Input operand bundles for the current multi node. Each multi node operand
1934  // bundle contains values not matching the multi node's opcode. They will
1935  // be reordered in reorderMultiNodeOps, once we completed building a
1936  // multi node.
1937  SmallVector<MultiNodeOpTy, 4> MultiNodeOps;
1938
1939  /// Indicates whether we are building a multi node currently.
1940  bool MultiNodeActive = false;
1941
1942  /// Check if we can vectorize Operands together.
1943  bool areVectorizable(ArrayRef<VPValue *> Operands) const;
1944
1945  /// Add combined instruction \p New for the bundle \p Operands.
1946  void addCombined(ArrayRef<VPValue *> Operands, VPInstruction *New);
1947
1948  /// Indicate we hit a bundle we failed to combine. Returns nullptr for now.
1949  VPInstruction *markFailed();
1950
1951  /// Reorder operands in the multi node to maximize sequential memory access
1952  /// and commutative operations.
1953  SmallVector<MultiNodeOpTy, 4> reorderMultiNodeOps();
1954
1955  /// Choose the best candidate to use for the lane after \p Last. The set of
1956  /// candidates to choose from are values with an opcode matching \p Last's
1957  /// or loads consecutive to \p Last.
1958  std::pair<OpMode, VPValue *> getBest(OpMode Mode, VPValue *Last,
1959                                       SmallPtrSetImpl<VPValue *> &Candidates,
1960                                       VPInterleavedAccessInfo &IAI);
1961
1962  /// Print bundle \p Values to dbgs().
1963  void dumpBundle(ArrayRef<VPValue *> Values);
1964
1965public:
1966  VPlanSlp(VPInterleavedAccessInfo &IAI, VPBasicBlock &BB) : IAI(IAI), BB(BB) {}
1967
1968  ~VPlanSlp() {
1969    for (auto &KV : BundleToCombined)
1970      delete KV.second;
1971  }
1972
1973  /// Tries to build an SLP tree rooted at \p Operands and returns a
1974  /// VPInstruction combining \p Operands, if they can be combined.
1975  VPInstruction *buildGraph(ArrayRef<VPValue *> Operands);
1976
1977  /// Return the width of the widest combined bundle in bits.
1978  unsigned getWidestBundleBits() const { return WidestBundleBits; }
1979
1980  /// Return true if all visited instruction can be combined.
1981  bool isCompletelySLP() const { return CompletelySLP; }
1982};
1983} // end namespace llvm
1984
1985#endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
1986