VPlan.h revision 360784
1//===- VPlan.h - Represent A Vectorizer Plan --------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file contains the declarations of the Vectorization Plan base classes:
11/// 1. VPBasicBlock and VPRegionBlock that inherit from a common pure virtual
12///    VPBlockBase, together implementing a Hierarchical CFG;
13/// 2. Specializations of GraphTraits that allow VPBlockBase graphs to be
14///    treated as proper graphs for generic algorithms;
15/// 3. Pure virtual VPRecipeBase serving as the base class for recipes contained
16///    within VPBasicBlocks;
17/// 4. VPInstruction, a concrete Recipe and VPUser modeling a single planned
18///    instruction;
19/// 5. The VPlan class holding a candidate for vectorization;
20/// 6. The VPlanPrinter class providing a way to print a plan in dot format;
21/// These are documented in docs/VectorizationPlan.rst.
22//
23//===----------------------------------------------------------------------===//
24
25#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
26#define LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
27
28#include "VPlanLoopInfo.h"
29#include "VPlanValue.h"
30#include "llvm/ADT/DenseMap.h"
31#include "llvm/ADT/DepthFirstIterator.h"
32#include "llvm/ADT/GraphTraits.h"
33#include "llvm/ADT/Optional.h"
34#include "llvm/ADT/SmallBitVector.h"
35#include "llvm/ADT/SmallPtrSet.h"
36#include "llvm/ADT/SmallSet.h"
37#include "llvm/ADT/SmallVector.h"
38#include "llvm/ADT/Twine.h"
39#include "llvm/ADT/ilist.h"
40#include "llvm/ADT/ilist_node.h"
41#include "llvm/Analysis/VectorUtils.h"
42#include "llvm/IR/IRBuilder.h"
43#include <algorithm>
44#include <cassert>
45#include <cstddef>
46#include <map>
47#include <string>
48
49namespace llvm {
50
51class LoopVectorizationLegality;
52class LoopVectorizationCostModel;
53class BasicBlock;
54class DominatorTree;
55class InnerLoopVectorizer;
56template <class T> class InterleaveGroup;
57class LoopInfo;
58class raw_ostream;
59class Value;
60class VPBasicBlock;
61class VPRegionBlock;
62class VPlan;
63class VPlanSlp;
64
65/// A range of powers-of-2 vectorization factors with fixed start and
66/// adjustable end. The range includes start and excludes end, e.g.,:
67/// [1, 9) = {1, 2, 4, 8}
68struct VFRange {
69  // A power of 2.
70  const unsigned Start;
71
72  // Need not be a power of 2. If End <= Start range is empty.
73  unsigned End;
74};
75
76using VPlanPtr = std::unique_ptr<VPlan>;
77
78/// In what follows, the term "input IR" refers to code that is fed into the
79/// vectorizer whereas the term "output IR" refers to code that is generated by
80/// the vectorizer.
81
82/// VPIteration represents a single point in the iteration space of the output
83/// (vectorized and/or unrolled) IR loop.
84struct VPIteration {
85  /// in [0..UF)
86  unsigned Part;
87
88  /// in [0..VF)
89  unsigned Lane;
90};
91
92/// This is a helper struct for maintaining vectorization state. It's used for
93/// mapping values from the original loop to their corresponding values in
94/// the new loop. Two mappings are maintained: one for vectorized values and
95/// one for scalarized values. Vectorized values are represented with UF
96/// vector values in the new loop, and scalarized values are represented with
97/// UF x VF scalar values in the new loop. UF and VF are the unroll and
98/// vectorization factors, respectively.
99///
100/// Entries can be added to either map with setVectorValue and setScalarValue,
101/// which assert that an entry was not already added before. If an entry is to
102/// replace an existing one, call resetVectorValue and resetScalarValue. This is
103/// currently needed to modify the mapped values during "fix-up" operations that
104/// occur once the first phase of widening is complete. These operations include
105/// type truncation and the second phase of recurrence widening.
106///
107/// Entries from either map can be retrieved using the getVectorValue and
108/// getScalarValue functions, which assert that the desired value exists.
109struct VectorizerValueMap {
110  friend struct VPTransformState;
111
112private:
113  /// The unroll factor. Each entry in the vector map contains UF vector values.
114  unsigned UF;
115
116  /// The vectorization factor. Each entry in the scalar map contains UF x VF
117  /// scalar values.
118  unsigned VF;
119
120  /// The vector and scalar map storage. We use std::map and not DenseMap
121  /// because insertions to DenseMap invalidate its iterators.
122  using VectorParts = SmallVector<Value *, 2>;
123  using ScalarParts = SmallVector<SmallVector<Value *, 4>, 2>;
124  std::map<Value *, VectorParts> VectorMapStorage;
125  std::map<Value *, ScalarParts> ScalarMapStorage;
126
127public:
128  /// Construct an empty map with the given unroll and vectorization factors.
129  VectorizerValueMap(unsigned UF, unsigned VF) : UF(UF), VF(VF) {}
130
131  /// \return True if the map has any vector entry for \p Key.
132  bool hasAnyVectorValue(Value *Key) const {
133    return VectorMapStorage.count(Key);
134  }
135
136  /// \return True if the map has a vector entry for \p Key and \p Part.
137  bool hasVectorValue(Value *Key, unsigned Part) const {
138    assert(Part < UF && "Queried Vector Part is too large.");
139    if (!hasAnyVectorValue(Key))
140      return false;
141    const VectorParts &Entry = VectorMapStorage.find(Key)->second;
142    assert(Entry.size() == UF && "VectorParts has wrong dimensions.");
143    return Entry[Part] != nullptr;
144  }
145
146  /// \return True if the map has any scalar entry for \p Key.
147  bool hasAnyScalarValue(Value *Key) const {
148    return ScalarMapStorage.count(Key);
149  }
150
151  /// \return True if the map has a scalar entry for \p Key and \p Instance.
152  bool hasScalarValue(Value *Key, const VPIteration &Instance) const {
153    assert(Instance.Part < UF && "Queried Scalar Part is too large.");
154    assert(Instance.Lane < VF && "Queried Scalar Lane is too large.");
155    if (!hasAnyScalarValue(Key))
156      return false;
157    const ScalarParts &Entry = ScalarMapStorage.find(Key)->second;
158    assert(Entry.size() == UF && "ScalarParts has wrong dimensions.");
159    assert(Entry[Instance.Part].size() == VF &&
160           "ScalarParts has wrong dimensions.");
161    return Entry[Instance.Part][Instance.Lane] != nullptr;
162  }
163
164  /// Retrieve the existing vector value that corresponds to \p Key and
165  /// \p Part.
166  Value *getVectorValue(Value *Key, unsigned Part) {
167    assert(hasVectorValue(Key, Part) && "Getting non-existent value.");
168    return VectorMapStorage[Key][Part];
169  }
170
171  /// Retrieve the existing scalar value that corresponds to \p Key and
172  /// \p Instance.
173  Value *getScalarValue(Value *Key, const VPIteration &Instance) {
174    assert(hasScalarValue(Key, Instance) && "Getting non-existent value.");
175    return ScalarMapStorage[Key][Instance.Part][Instance.Lane];
176  }
177
178  /// Set a vector value associated with \p Key and \p Part. Assumes such a
179  /// value is not already set. If it is, use resetVectorValue() instead.
180  void setVectorValue(Value *Key, unsigned Part, Value *Vector) {
181    assert(!hasVectorValue(Key, Part) && "Vector value already set for part");
182    if (!VectorMapStorage.count(Key)) {
183      VectorParts Entry(UF);
184      VectorMapStorage[Key] = Entry;
185    }
186    VectorMapStorage[Key][Part] = Vector;
187  }
188
189  /// Set a scalar value associated with \p Key and \p Instance. Assumes such a
190  /// value is not already set.
191  void setScalarValue(Value *Key, const VPIteration &Instance, Value *Scalar) {
192    assert(!hasScalarValue(Key, Instance) && "Scalar value already set");
193    if (!ScalarMapStorage.count(Key)) {
194      ScalarParts Entry(UF);
195      // TODO: Consider storing uniform values only per-part, as they occupy
196      //       lane 0 only, keeping the other VF-1 redundant entries null.
197      for (unsigned Part = 0; Part < UF; ++Part)
198        Entry[Part].resize(VF, nullptr);
199      ScalarMapStorage[Key] = Entry;
200    }
201    ScalarMapStorage[Key][Instance.Part][Instance.Lane] = Scalar;
202  }
203
204  /// Reset the vector value associated with \p Key for the given \p Part.
205  /// This function can be used to update values that have already been
206  /// vectorized. This is the case for "fix-up" operations including type
207  /// truncation and the second phase of recurrence vectorization.
208  void resetVectorValue(Value *Key, unsigned Part, Value *Vector) {
209    assert(hasVectorValue(Key, Part) && "Vector value not set for part");
210    VectorMapStorage[Key][Part] = Vector;
211  }
212
213  /// Reset the scalar value associated with \p Key for \p Part and \p Lane.
214  /// This function can be used to update values that have already been
215  /// scalarized. This is the case for "fix-up" operations including scalar phi
216  /// nodes for scalarized and predicated instructions.
217  void resetScalarValue(Value *Key, const VPIteration &Instance,
218                        Value *Scalar) {
219    assert(hasScalarValue(Key, Instance) &&
220           "Scalar value not set for part and lane");
221    ScalarMapStorage[Key][Instance.Part][Instance.Lane] = Scalar;
222  }
223};
224
225/// This class is used to enable the VPlan to invoke a method of ILV. This is
226/// needed until the method is refactored out of ILV and becomes reusable.
227struct VPCallback {
228  virtual ~VPCallback() {}
229  virtual Value *getOrCreateVectorValues(Value *V, unsigned Part) = 0;
230  virtual Value *getOrCreateScalarValue(Value *V,
231                                        const VPIteration &Instance) = 0;
232};
233
234/// VPTransformState holds information passed down when "executing" a VPlan,
235/// needed for generating the output IR.
236struct VPTransformState {
237  VPTransformState(unsigned VF, unsigned UF, LoopInfo *LI, DominatorTree *DT,
238                   IRBuilder<> &Builder, VectorizerValueMap &ValueMap,
239                   InnerLoopVectorizer *ILV, VPCallback &Callback)
240      : VF(VF), UF(UF), Instance(), LI(LI), DT(DT), Builder(Builder),
241        ValueMap(ValueMap), ILV(ILV), Callback(Callback) {}
242
243  /// The chosen Vectorization and Unroll Factors of the loop being vectorized.
244  unsigned VF;
245  unsigned UF;
246
247  /// Hold the indices to generate specific scalar instructions. Null indicates
248  /// that all instances are to be generated, using either scalar or vector
249  /// instructions.
250  Optional<VPIteration> Instance;
251
252  struct DataState {
253    /// A type for vectorized values in the new loop. Each value from the
254    /// original loop, when vectorized, is represented by UF vector values in
255    /// the new unrolled loop, where UF is the unroll factor.
256    typedef SmallVector<Value *, 2> PerPartValuesTy;
257
258    DenseMap<VPValue *, PerPartValuesTy> PerPartOutput;
259  } Data;
260
261  /// Get the generated Value for a given VPValue and a given Part. Note that
262  /// as some Defs are still created by ILV and managed in its ValueMap, this
263  /// method will delegate the call to ILV in such cases in order to provide
264  /// callers a consistent API.
265  /// \see set.
266  Value *get(VPValue *Def, unsigned Part) {
267    // If Values have been set for this Def return the one relevant for \p Part.
268    if (Data.PerPartOutput.count(Def))
269      return Data.PerPartOutput[Def][Part];
270    // Def is managed by ILV: bring the Values from ValueMap.
271    return Callback.getOrCreateVectorValues(VPValue2Value[Def], Part);
272  }
273
274  /// Get the generated Value for a given VPValue and given Part and Lane. Note
275  /// that as per-lane Defs are still created by ILV and managed in its ValueMap
276  /// this method currently just delegates the call to ILV.
277  Value *get(VPValue *Def, const VPIteration &Instance) {
278    return Callback.getOrCreateScalarValue(VPValue2Value[Def], Instance);
279  }
280
281  /// Set the generated Value for a given VPValue and a given Part.
282  void set(VPValue *Def, Value *V, unsigned Part) {
283    if (!Data.PerPartOutput.count(Def)) {
284      DataState::PerPartValuesTy Entry(UF);
285      Data.PerPartOutput[Def] = Entry;
286    }
287    Data.PerPartOutput[Def][Part] = V;
288  }
289
290  /// Hold state information used when constructing the CFG of the output IR,
291  /// traversing the VPBasicBlocks and generating corresponding IR BasicBlocks.
292  struct CFGState {
293    /// The previous VPBasicBlock visited. Initially set to null.
294    VPBasicBlock *PrevVPBB = nullptr;
295
296    /// The previous IR BasicBlock created or used. Initially set to the new
297    /// header BasicBlock.
298    BasicBlock *PrevBB = nullptr;
299
300    /// The last IR BasicBlock in the output IR. Set to the new latch
301    /// BasicBlock, used for placing the newly created BasicBlocks.
302    BasicBlock *LastBB = nullptr;
303
304    /// A mapping of each VPBasicBlock to the corresponding BasicBlock. In case
305    /// of replication, maps the BasicBlock of the last replica created.
306    SmallDenseMap<VPBasicBlock *, BasicBlock *> VPBB2IRBB;
307
308    /// Vector of VPBasicBlocks whose terminator instruction needs to be fixed
309    /// up at the end of vector code generation.
310    SmallVector<VPBasicBlock *, 8> VPBBsToFix;
311
312    CFGState() = default;
313  } CFG;
314
315  /// Hold a pointer to LoopInfo to register new basic blocks in the loop.
316  LoopInfo *LI;
317
318  /// Hold a pointer to Dominator Tree to register new basic blocks in the loop.
319  DominatorTree *DT;
320
321  /// Hold a reference to the IRBuilder used to generate output IR code.
322  IRBuilder<> &Builder;
323
324  /// Hold a reference to the Value state information used when generating the
325  /// Values of the output IR.
326  VectorizerValueMap &ValueMap;
327
328  /// Hold a reference to a mapping between VPValues in VPlan and original
329  /// Values they correspond to.
330  VPValue2ValueTy VPValue2Value;
331
332  /// Hold the trip count of the scalar loop.
333  Value *TripCount = nullptr;
334
335  /// Hold a pointer to InnerLoopVectorizer to reuse its IR generation methods.
336  InnerLoopVectorizer *ILV;
337
338  VPCallback &Callback;
339};
340
341/// VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
342/// A VPBlockBase can be either a VPBasicBlock or a VPRegionBlock.
343class VPBlockBase {
344  friend class VPBlockUtils;
345
346private:
347  const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast).
348
349  /// An optional name for the block.
350  std::string Name;
351
352  /// The immediate VPRegionBlock which this VPBlockBase belongs to, or null if
353  /// it is a topmost VPBlockBase.
354  VPRegionBlock *Parent = nullptr;
355
356  /// List of predecessor blocks.
357  SmallVector<VPBlockBase *, 1> Predecessors;
358
359  /// List of successor blocks.
360  SmallVector<VPBlockBase *, 1> Successors;
361
362  /// Successor selector, null for zero or single successor blocks.
363  VPValue *CondBit = nullptr;
364
365  /// Current block predicate - null if the block does not need a predicate.
366  VPValue *Predicate = nullptr;
367
368  /// Add \p Successor as the last successor to this block.
369  void appendSuccessor(VPBlockBase *Successor) {
370    assert(Successor && "Cannot add nullptr successor!");
371    Successors.push_back(Successor);
372  }
373
374  /// Add \p Predecessor as the last predecessor to this block.
375  void appendPredecessor(VPBlockBase *Predecessor) {
376    assert(Predecessor && "Cannot add nullptr predecessor!");
377    Predecessors.push_back(Predecessor);
378  }
379
380  /// Remove \p Predecessor from the predecessors of this block.
381  void removePredecessor(VPBlockBase *Predecessor) {
382    auto Pos = std::find(Predecessors.begin(), Predecessors.end(), Predecessor);
383    assert(Pos && "Predecessor does not exist");
384    Predecessors.erase(Pos);
385  }
386
387  /// Remove \p Successor from the successors of this block.
388  void removeSuccessor(VPBlockBase *Successor) {
389    auto Pos = std::find(Successors.begin(), Successors.end(), Successor);
390    assert(Pos && "Successor does not exist");
391    Successors.erase(Pos);
392  }
393
394protected:
395  VPBlockBase(const unsigned char SC, const std::string &N)
396      : SubclassID(SC), Name(N) {}
397
398public:
399  /// An enumeration for keeping track of the concrete subclass of VPBlockBase
400  /// that are actually instantiated. Values of this enumeration are kept in the
401  /// SubclassID field of the VPBlockBase objects. They are used for concrete
402  /// type identification.
403  using VPBlockTy = enum { VPBasicBlockSC, VPRegionBlockSC };
404
405  using VPBlocksTy = SmallVectorImpl<VPBlockBase *>;
406
407  virtual ~VPBlockBase() = default;
408
409  const std::string &getName() const { return Name; }
410
411  void setName(const Twine &newName) { Name = newName.str(); }
412
413  /// \return an ID for the concrete type of this object.
414  /// This is used to implement the classof checks. This should not be used
415  /// for any other purpose, as the values may change as LLVM evolves.
416  unsigned getVPBlockID() const { return SubclassID; }
417
418  VPRegionBlock *getParent() { return Parent; }
419  const VPRegionBlock *getParent() const { return Parent; }
420
421  void setParent(VPRegionBlock *P) { Parent = P; }
422
423  /// \return the VPBasicBlock that is the entry of this VPBlockBase,
424  /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
425  /// VPBlockBase is a VPBasicBlock, it is returned.
426  const VPBasicBlock *getEntryBasicBlock() const;
427  VPBasicBlock *getEntryBasicBlock();
428
429  /// \return the VPBasicBlock that is the exit of this VPBlockBase,
430  /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
431  /// VPBlockBase is a VPBasicBlock, it is returned.
432  const VPBasicBlock *getExitBasicBlock() const;
433  VPBasicBlock *getExitBasicBlock();
434
435  const VPBlocksTy &getSuccessors() const { return Successors; }
436  VPBlocksTy &getSuccessors() { return Successors; }
437
438  const VPBlocksTy &getPredecessors() const { return Predecessors; }
439  VPBlocksTy &getPredecessors() { return Predecessors; }
440
441  /// \return the successor of this VPBlockBase if it has a single successor.
442  /// Otherwise return a null pointer.
443  VPBlockBase *getSingleSuccessor() const {
444    return (Successors.size() == 1 ? *Successors.begin() : nullptr);
445  }
446
447  /// \return the predecessor of this VPBlockBase if it has a single
448  /// predecessor. Otherwise return a null pointer.
449  VPBlockBase *getSinglePredecessor() const {
450    return (Predecessors.size() == 1 ? *Predecessors.begin() : nullptr);
451  }
452
453  size_t getNumSuccessors() const { return Successors.size(); }
454  size_t getNumPredecessors() const { return Predecessors.size(); }
455
456  /// An Enclosing Block of a block B is any block containing B, including B
457  /// itself. \return the closest enclosing block starting from "this", which
458  /// has successors. \return the root enclosing block if all enclosing blocks
459  /// have no successors.
460  VPBlockBase *getEnclosingBlockWithSuccessors();
461
462  /// \return the closest enclosing block starting from "this", which has
463  /// predecessors. \return the root enclosing block if all enclosing blocks
464  /// have no predecessors.
465  VPBlockBase *getEnclosingBlockWithPredecessors();
466
467  /// \return the successors either attached directly to this VPBlockBase or, if
468  /// this VPBlockBase is the exit block of a VPRegionBlock and has no
469  /// successors of its own, search recursively for the first enclosing
470  /// VPRegionBlock that has successors and return them. If no such
471  /// VPRegionBlock exists, return the (empty) successors of the topmost
472  /// VPBlockBase reached.
473  const VPBlocksTy &getHierarchicalSuccessors() {
474    return getEnclosingBlockWithSuccessors()->getSuccessors();
475  }
476
477  /// \return the hierarchical successor of this VPBlockBase if it has a single
478  /// hierarchical successor. Otherwise return a null pointer.
479  VPBlockBase *getSingleHierarchicalSuccessor() {
480    return getEnclosingBlockWithSuccessors()->getSingleSuccessor();
481  }
482
483  /// \return the predecessors either attached directly to this VPBlockBase or,
484  /// if this VPBlockBase is the entry block of a VPRegionBlock and has no
485  /// predecessors of its own, search recursively for the first enclosing
486  /// VPRegionBlock that has predecessors and return them. If no such
487  /// VPRegionBlock exists, return the (empty) predecessors of the topmost
488  /// VPBlockBase reached.
489  const VPBlocksTy &getHierarchicalPredecessors() {
490    return getEnclosingBlockWithPredecessors()->getPredecessors();
491  }
492
493  /// \return the hierarchical predecessor of this VPBlockBase if it has a
494  /// single hierarchical predecessor. Otherwise return a null pointer.
495  VPBlockBase *getSingleHierarchicalPredecessor() {
496    return getEnclosingBlockWithPredecessors()->getSinglePredecessor();
497  }
498
499  /// \return the condition bit selecting the successor.
500  VPValue *getCondBit() { return CondBit; }
501
502  const VPValue *getCondBit() const { return CondBit; }
503
504  void setCondBit(VPValue *CV) { CondBit = CV; }
505
506  VPValue *getPredicate() { return Predicate; }
507
508  const VPValue *getPredicate() const { return Predicate; }
509
510  void setPredicate(VPValue *Pred) { Predicate = Pred; }
511
512  /// Set a given VPBlockBase \p Successor as the single successor of this
513  /// VPBlockBase. This VPBlockBase is not added as predecessor of \p Successor.
514  /// This VPBlockBase must have no successors.
515  void setOneSuccessor(VPBlockBase *Successor) {
516    assert(Successors.empty() && "Setting one successor when others exist.");
517    appendSuccessor(Successor);
518  }
519
520  /// Set two given VPBlockBases \p IfTrue and \p IfFalse to be the two
521  /// successors of this VPBlockBase. \p Condition is set as the successor
522  /// selector. This VPBlockBase is not added as predecessor of \p IfTrue or \p
523  /// IfFalse. This VPBlockBase must have no successors.
524  void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse,
525                        VPValue *Condition) {
526    assert(Successors.empty() && "Setting two successors when others exist.");
527    assert(Condition && "Setting two successors without condition!");
528    CondBit = Condition;
529    appendSuccessor(IfTrue);
530    appendSuccessor(IfFalse);
531  }
532
533  /// Set each VPBasicBlock in \p NewPreds as predecessor of this VPBlockBase.
534  /// This VPBlockBase must have no predecessors. This VPBlockBase is not added
535  /// as successor of any VPBasicBlock in \p NewPreds.
536  void setPredecessors(ArrayRef<VPBlockBase *> NewPreds) {
537    assert(Predecessors.empty() && "Block predecessors already set.");
538    for (auto *Pred : NewPreds)
539      appendPredecessor(Pred);
540  }
541
542  /// Remove all the predecessor of this block.
543  void clearPredecessors() { Predecessors.clear(); }
544
545  /// Remove all the successors of this block and set to null its condition bit
546  void clearSuccessors() {
547    Successors.clear();
548    CondBit = nullptr;
549  }
550
551  /// The method which generates the output IR that correspond to this
552  /// VPBlockBase, thereby "executing" the VPlan.
553  virtual void execute(struct VPTransformState *State) = 0;
554
555  /// Delete all blocks reachable from a given VPBlockBase, inclusive.
556  static void deleteCFG(VPBlockBase *Entry);
557
558  void printAsOperand(raw_ostream &OS, bool PrintType) const {
559    OS << getName();
560  }
561
562  void print(raw_ostream &OS) const {
563    // TODO: Only printing VPBB name for now since we only have dot printing
564    // support for VPInstructions/Recipes.
565    printAsOperand(OS, false);
566  }
567
568  /// Return true if it is legal to hoist instructions into this block.
569  bool isLegalToHoistInto() {
570    // There are currently no constraints that prevent an instruction to be
571    // hoisted into a VPBlockBase.
572    return true;
573  }
574};
575
576/// VPRecipeBase is a base class modeling a sequence of one or more output IR
577/// instructions.
578class VPRecipeBase : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock> {
579  friend VPBasicBlock;
580  friend class VPBlockUtils;
581
582private:
583  const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast).
584
585  /// Each VPRecipe belongs to a single VPBasicBlock.
586  VPBasicBlock *Parent = nullptr;
587
588public:
589  /// An enumeration for keeping track of the concrete subclass of VPRecipeBase
590  /// that is actually instantiated. Values of this enumeration are kept in the
591  /// SubclassID field of the VPRecipeBase objects. They are used for concrete
592  /// type identification.
593  using VPRecipeTy = enum {
594    VPBlendSC,
595    VPBranchOnMaskSC,
596    VPInstructionSC,
597    VPInterleaveSC,
598    VPPredInstPHISC,
599    VPReplicateSC,
600    VPWidenGEPSC,
601    VPWidenIntOrFpInductionSC,
602    VPWidenMemoryInstructionSC,
603    VPWidenPHISC,
604    VPWidenSC,
605  };
606
607  VPRecipeBase(const unsigned char SC) : SubclassID(SC) {}
608  virtual ~VPRecipeBase() = default;
609
610  /// \return an ID for the concrete type of this object.
611  /// This is used to implement the classof checks. This should not be used
612  /// for any other purpose, as the values may change as LLVM evolves.
613  unsigned getVPRecipeID() const { return SubclassID; }
614
615  /// \return the VPBasicBlock which this VPRecipe belongs to.
616  VPBasicBlock *getParent() { return Parent; }
617  const VPBasicBlock *getParent() const { return Parent; }
618
619  /// The method which generates the output IR instructions that correspond to
620  /// this VPRecipe, thereby "executing" the VPlan.
621  virtual void execute(struct VPTransformState &State) = 0;
622
623  /// Each recipe prints itself.
624  virtual void print(raw_ostream &O, const Twine &Indent) const = 0;
625
626  /// Insert an unlinked recipe into a basic block immediately before
627  /// the specified recipe.
628  void insertBefore(VPRecipeBase *InsertPos);
629
630  /// Insert an unlinked Recipe into a basic block immediately after
631  /// the specified Recipe.
632  void insertAfter(VPRecipeBase *InsertPos);
633
634  /// Unlink this recipe from its current VPBasicBlock and insert it into
635  /// the VPBasicBlock that MovePos lives in, right after MovePos.
636  void moveAfter(VPRecipeBase *MovePos);
637
638  /// This method unlinks 'this' from the containing basic block, but does not
639  /// delete it.
640  void removeFromParent();
641
642  /// This method unlinks 'this' from the containing basic block and deletes it.
643  ///
644  /// \returns an iterator pointing to the element after the erased one
645  iplist<VPRecipeBase>::iterator eraseFromParent();
646};
647
648/// This is a concrete Recipe that models a single VPlan-level instruction.
649/// While as any Recipe it may generate a sequence of IR instructions when
650/// executed, these instructions would always form a single-def expression as
651/// the VPInstruction is also a single def-use vertex.
652class VPInstruction : public VPUser, public VPRecipeBase {
653  friend class VPlanSlp;
654
655public:
656  /// VPlan opcodes, extending LLVM IR with idiomatics instructions.
657  enum {
658    Not = Instruction::OtherOpsEnd + 1,
659    ICmpULE,
660    SLPLoad,
661    SLPStore,
662  };
663
664private:
665  typedef unsigned char OpcodeTy;
666  OpcodeTy Opcode;
667
668  /// Utility method serving execute(): generates a single instance of the
669  /// modeled instruction.
670  void generateInstruction(VPTransformState &State, unsigned Part);
671
672protected:
673  Instruction *getUnderlyingInstr() {
674    return cast_or_null<Instruction>(getUnderlyingValue());
675  }
676
677  void setUnderlyingInstr(Instruction *I) { setUnderlyingValue(I); }
678
679public:
680  VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands)
681      : VPUser(VPValue::VPInstructionSC, Operands),
682        VPRecipeBase(VPRecipeBase::VPInstructionSC), Opcode(Opcode) {}
683
684  VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands)
685      : VPInstruction(Opcode, ArrayRef<VPValue *>(Operands)) {}
686
687  /// Method to support type inquiry through isa, cast, and dyn_cast.
688  static inline bool classof(const VPValue *V) {
689    return V->getVPValueID() == VPValue::VPInstructionSC;
690  }
691
692  VPInstruction *clone() const {
693    SmallVector<VPValue *, 2> Operands(operands());
694    return new VPInstruction(Opcode, Operands);
695  }
696
697  /// Method to support type inquiry through isa, cast, and dyn_cast.
698  static inline bool classof(const VPRecipeBase *R) {
699    return R->getVPRecipeID() == VPRecipeBase::VPInstructionSC;
700  }
701
702  unsigned getOpcode() const { return Opcode; }
703
704  /// Generate the instruction.
705  /// TODO: We currently execute only per-part unless a specific instance is
706  /// provided.
707  void execute(VPTransformState &State) override;
708
709  /// Print the Recipe.
710  void print(raw_ostream &O, const Twine &Indent) const override;
711
712  /// Print the VPInstruction.
713  void print(raw_ostream &O) const;
714
715  /// Return true if this instruction may modify memory.
716  bool mayWriteToMemory() const {
717    // TODO: we can use attributes of the called function to rule out memory
718    //       modifications.
719    return Opcode == Instruction::Store || Opcode == Instruction::Call ||
720           Opcode == Instruction::Invoke || Opcode == SLPStore;
721  }
722};
723
724/// VPWidenRecipe is a recipe for producing a copy of vector type for each
725/// Instruction in its ingredients independently, in order. This recipe covers
726/// most of the traditional vectorization cases where each ingredient transforms
727/// into a vectorized version of itself.
728class VPWidenRecipe : public VPRecipeBase {
729private:
730  /// Hold the ingredients by pointing to their original BasicBlock location.
731  BasicBlock::iterator Begin;
732  BasicBlock::iterator End;
733
734public:
735  VPWidenRecipe(Instruction *I) : VPRecipeBase(VPWidenSC) {
736    End = I->getIterator();
737    Begin = End++;
738  }
739
740  ~VPWidenRecipe() override = default;
741
742  /// Method to support type inquiry through isa, cast, and dyn_cast.
743  static inline bool classof(const VPRecipeBase *V) {
744    return V->getVPRecipeID() == VPRecipeBase::VPWidenSC;
745  }
746
747  /// Produce widened copies of all Ingredients.
748  void execute(VPTransformState &State) override;
749
750  /// Augment the recipe to include Instr, if it lies at its End.
751  bool appendInstruction(Instruction *Instr) {
752    if (End != Instr->getIterator())
753      return false;
754    End++;
755    return true;
756  }
757
758  /// Print the recipe.
759  void print(raw_ostream &O, const Twine &Indent) const override;
760};
761
762/// A recipe for handling GEP instructions.
763class VPWidenGEPRecipe : public VPRecipeBase {
764private:
765  GetElementPtrInst *GEP;
766  bool IsPtrLoopInvariant;
767  SmallBitVector IsIndexLoopInvariant;
768
769public:
770  VPWidenGEPRecipe(GetElementPtrInst *GEP, Loop *OrigLoop)
771      : VPRecipeBase(VPWidenGEPSC), GEP(GEP),
772        IsIndexLoopInvariant(GEP->getNumIndices(), false) {
773    IsPtrLoopInvariant = OrigLoop->isLoopInvariant(GEP->getPointerOperand());
774    for (auto Index : enumerate(GEP->indices()))
775      IsIndexLoopInvariant[Index.index()] =
776          OrigLoop->isLoopInvariant(Index.value().get());
777  }
778  ~VPWidenGEPRecipe() override = default;
779
780  /// Method to support type inquiry through isa, cast, and dyn_cast.
781  static inline bool classof(const VPRecipeBase *V) {
782    return V->getVPRecipeID() == VPRecipeBase::VPWidenGEPSC;
783  }
784
785  /// Generate the gep nodes.
786  void execute(VPTransformState &State) override;
787
788  /// Print the recipe.
789  void print(raw_ostream &O, const Twine &Indent) const override;
790};
791
792/// A recipe for handling phi nodes of integer and floating-point inductions,
793/// producing their vector and scalar values.
794class VPWidenIntOrFpInductionRecipe : public VPRecipeBase {
795private:
796  PHINode *IV;
797  TruncInst *Trunc;
798
799public:
800  VPWidenIntOrFpInductionRecipe(PHINode *IV, TruncInst *Trunc = nullptr)
801      : VPRecipeBase(VPWidenIntOrFpInductionSC), IV(IV), Trunc(Trunc) {}
802  ~VPWidenIntOrFpInductionRecipe() override = default;
803
804  /// Method to support type inquiry through isa, cast, and dyn_cast.
805  static inline bool classof(const VPRecipeBase *V) {
806    return V->getVPRecipeID() == VPRecipeBase::VPWidenIntOrFpInductionSC;
807  }
808
809  /// Generate the vectorized and scalarized versions of the phi node as
810  /// needed by their users.
811  void execute(VPTransformState &State) override;
812
813  /// Print the recipe.
814  void print(raw_ostream &O, const Twine &Indent) const override;
815};
816
817/// A recipe for handling all phi nodes except for integer and FP inductions.
818class VPWidenPHIRecipe : public VPRecipeBase {
819private:
820  PHINode *Phi;
821
822public:
823  VPWidenPHIRecipe(PHINode *Phi) : VPRecipeBase(VPWidenPHISC), Phi(Phi) {}
824  ~VPWidenPHIRecipe() override = default;
825
826  /// Method to support type inquiry through isa, cast, and dyn_cast.
827  static inline bool classof(const VPRecipeBase *V) {
828    return V->getVPRecipeID() == VPRecipeBase::VPWidenPHISC;
829  }
830
831  /// Generate the phi/select nodes.
832  void execute(VPTransformState &State) override;
833
834  /// Print the recipe.
835  void print(raw_ostream &O, const Twine &Indent) const override;
836};
837
838/// A recipe for vectorizing a phi-node as a sequence of mask-based select
839/// instructions.
840class VPBlendRecipe : public VPRecipeBase {
841private:
842  PHINode *Phi;
843
844  /// The blend operation is a User of a mask, if not null.
845  std::unique_ptr<VPUser> User;
846
847public:
848  VPBlendRecipe(PHINode *Phi, ArrayRef<VPValue *> Masks)
849      : VPRecipeBase(VPBlendSC), Phi(Phi) {
850    assert((Phi->getNumIncomingValues() == 1 ||
851            Phi->getNumIncomingValues() == Masks.size()) &&
852           "Expected the same number of incoming values and masks");
853    if (!Masks.empty())
854      User.reset(new VPUser(Masks));
855  }
856
857  /// Method to support type inquiry through isa, cast, and dyn_cast.
858  static inline bool classof(const VPRecipeBase *V) {
859    return V->getVPRecipeID() == VPRecipeBase::VPBlendSC;
860  }
861
862  /// Generate the phi/select nodes.
863  void execute(VPTransformState &State) override;
864
865  /// Print the recipe.
866  void print(raw_ostream &O, const Twine &Indent) const override;
867};
868
869/// VPInterleaveRecipe is a recipe for transforming an interleave group of load
870/// or stores into one wide load/store and shuffles.
871class VPInterleaveRecipe : public VPRecipeBase {
872private:
873  const InterleaveGroup<Instruction> *IG;
874  VPUser User;
875
876public:
877  VPInterleaveRecipe(const InterleaveGroup<Instruction> *IG, VPValue *Addr,
878                     VPValue *Mask)
879      : VPRecipeBase(VPInterleaveSC), IG(IG), User({Addr}) {
880    if (Mask)
881      User.addOperand(Mask);
882  }
883  ~VPInterleaveRecipe() override = default;
884
885  /// Method to support type inquiry through isa, cast, and dyn_cast.
886  static inline bool classof(const VPRecipeBase *V) {
887    return V->getVPRecipeID() == VPRecipeBase::VPInterleaveSC;
888  }
889
890  /// Return the address accessed by this recipe.
891  VPValue *getAddr() const {
892    return User.getOperand(0); // Address is the 1st, mandatory operand.
893  }
894
895  /// Return the mask used by this recipe. Note that a full mask is represented
896  /// by a nullptr.
897  VPValue *getMask() const {
898    // Mask is optional and therefore the last, currently 2nd operand.
899    return User.getNumOperands() == 2 ? User.getOperand(1) : nullptr;
900  }
901
902  /// Generate the wide load or store, and shuffles.
903  void execute(VPTransformState &State) override;
904
905  /// Print the recipe.
906  void print(raw_ostream &O, const Twine &Indent) const override;
907
908  const InterleaveGroup<Instruction> *getInterleaveGroup() { return IG; }
909};
910
911/// VPReplicateRecipe replicates a given instruction producing multiple scalar
912/// copies of the original scalar type, one per lane, instead of producing a
913/// single copy of widened type for all lanes. If the instruction is known to be
914/// uniform only one copy, per lane zero, will be generated.
915class VPReplicateRecipe : public VPRecipeBase {
916private:
917  /// The instruction being replicated.
918  Instruction *Ingredient;
919
920  /// Indicator if only a single replica per lane is needed.
921  bool IsUniform;
922
923  /// Indicator if the replicas are also predicated.
924  bool IsPredicated;
925
926  /// Indicator if the scalar values should also be packed into a vector.
927  bool AlsoPack;
928
929public:
930  VPReplicateRecipe(Instruction *I, bool IsUniform, bool IsPredicated = false)
931      : VPRecipeBase(VPReplicateSC), Ingredient(I), IsUniform(IsUniform),
932        IsPredicated(IsPredicated) {
933    // Retain the previous behavior of predicateInstructions(), where an
934    // insert-element of a predicated instruction got hoisted into the
935    // predicated basic block iff it was its only user. This is achieved by
936    // having predicated instructions also pack their values into a vector by
937    // default unless they have a replicated user which uses their scalar value.
938    AlsoPack = IsPredicated && !I->use_empty();
939  }
940
941  ~VPReplicateRecipe() override = default;
942
943  /// Method to support type inquiry through isa, cast, and dyn_cast.
944  static inline bool classof(const VPRecipeBase *V) {
945    return V->getVPRecipeID() == VPRecipeBase::VPReplicateSC;
946  }
947
948  /// Generate replicas of the desired Ingredient. Replicas will be generated
949  /// for all parts and lanes unless a specific part and lane are specified in
950  /// the \p State.
951  void execute(VPTransformState &State) override;
952
953  void setAlsoPack(bool Pack) { AlsoPack = Pack; }
954
955  /// Print the recipe.
956  void print(raw_ostream &O, const Twine &Indent) const override;
957};
958
959/// A recipe for generating conditional branches on the bits of a mask.
960class VPBranchOnMaskRecipe : public VPRecipeBase {
961private:
962  std::unique_ptr<VPUser> User;
963
964public:
965  VPBranchOnMaskRecipe(VPValue *BlockInMask) : VPRecipeBase(VPBranchOnMaskSC) {
966    if (BlockInMask) // nullptr means all-one mask.
967      User.reset(new VPUser({BlockInMask}));
968  }
969
970  /// Method to support type inquiry through isa, cast, and dyn_cast.
971  static inline bool classof(const VPRecipeBase *V) {
972    return V->getVPRecipeID() == VPRecipeBase::VPBranchOnMaskSC;
973  }
974
975  /// Generate the extraction of the appropriate bit from the block mask and the
976  /// conditional branch.
977  void execute(VPTransformState &State) override;
978
979  /// Print the recipe.
980  void print(raw_ostream &O, const Twine &Indent) const override {
981    O << " +\n" << Indent << "\"BRANCH-ON-MASK ";
982    if (User)
983      O << *User->getOperand(0);
984    else
985      O << " All-One";
986    O << "\\l\"";
987  }
988};
989
990/// VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when
991/// control converges back from a Branch-on-Mask. The phi nodes are needed in
992/// order to merge values that are set under such a branch and feed their uses.
993/// The phi nodes can be scalar or vector depending on the users of the value.
994/// This recipe works in concert with VPBranchOnMaskRecipe.
995class VPPredInstPHIRecipe : public VPRecipeBase {
996private:
997  Instruction *PredInst;
998
999public:
1000  /// Construct a VPPredInstPHIRecipe given \p PredInst whose value needs a phi
1001  /// nodes after merging back from a Branch-on-Mask.
1002  VPPredInstPHIRecipe(Instruction *PredInst)
1003      : VPRecipeBase(VPPredInstPHISC), PredInst(PredInst) {}
1004  ~VPPredInstPHIRecipe() override = default;
1005
1006  /// Method to support type inquiry through isa, cast, and dyn_cast.
1007  static inline bool classof(const VPRecipeBase *V) {
1008    return V->getVPRecipeID() == VPRecipeBase::VPPredInstPHISC;
1009  }
1010
1011  /// Generates phi nodes for live-outs as needed to retain SSA form.
1012  void execute(VPTransformState &State) override;
1013
1014  /// Print the recipe.
1015  void print(raw_ostream &O, const Twine &Indent) const override;
1016};
1017
1018/// A Recipe for widening load/store operations.
1019/// TODO: We currently execute only per-part unless a specific instance is
1020/// provided.
1021class VPWidenMemoryInstructionRecipe : public VPRecipeBase {
1022private:
1023  Instruction &Instr;
1024  VPUser User;
1025
1026public:
1027  VPWidenMemoryInstructionRecipe(Instruction &Instr, VPValue *Addr,
1028                                 VPValue *Mask)
1029      : VPRecipeBase(VPWidenMemoryInstructionSC), Instr(Instr), User({Addr}) {
1030    if (Mask)
1031      User.addOperand(Mask);
1032  }
1033
1034  /// Method to support type inquiry through isa, cast, and dyn_cast.
1035  static inline bool classof(const VPRecipeBase *V) {
1036    return V->getVPRecipeID() == VPRecipeBase::VPWidenMemoryInstructionSC;
1037  }
1038
1039  /// Return the address accessed by this recipe.
1040  VPValue *getAddr() const {
1041    return User.getOperand(0); // Address is the 1st, mandatory operand.
1042  }
1043
1044  /// Return the mask used by this recipe. Note that a full mask is represented
1045  /// by a nullptr.
1046  VPValue *getMask() const {
1047    // Mask is optional and therefore the last, currently 2nd operand.
1048    return User.getNumOperands() == 2 ? User.getOperand(1) : nullptr;
1049  }
1050
1051  /// Generate the wide load/store.
1052  void execute(VPTransformState &State) override;
1053
1054  /// Print the recipe.
1055  void print(raw_ostream &O, const Twine &Indent) const override;
1056};
1057
1058/// VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph. It
1059/// holds a sequence of zero or more VPRecipe's each representing a sequence of
1060/// output IR instructions.
1061class VPBasicBlock : public VPBlockBase {
1062public:
1063  using RecipeListTy = iplist<VPRecipeBase>;
1064
1065private:
1066  /// The VPRecipes held in the order of output instructions to generate.
1067  RecipeListTy Recipes;
1068
1069public:
1070  VPBasicBlock(const Twine &Name = "", VPRecipeBase *Recipe = nullptr)
1071      : VPBlockBase(VPBasicBlockSC, Name.str()) {
1072    if (Recipe)
1073      appendRecipe(Recipe);
1074  }
1075
1076  ~VPBasicBlock() override { Recipes.clear(); }
1077
1078  /// Instruction iterators...
1079  using iterator = RecipeListTy::iterator;
1080  using const_iterator = RecipeListTy::const_iterator;
1081  using reverse_iterator = RecipeListTy::reverse_iterator;
1082  using const_reverse_iterator = RecipeListTy::const_reverse_iterator;
1083
1084  //===--------------------------------------------------------------------===//
1085  /// Recipe iterator methods
1086  ///
1087  inline iterator begin() { return Recipes.begin(); }
1088  inline const_iterator begin() const { return Recipes.begin(); }
1089  inline iterator end() { return Recipes.end(); }
1090  inline const_iterator end() const { return Recipes.end(); }
1091
1092  inline reverse_iterator rbegin() { return Recipes.rbegin(); }
1093  inline const_reverse_iterator rbegin() const { return Recipes.rbegin(); }
1094  inline reverse_iterator rend() { return Recipes.rend(); }
1095  inline const_reverse_iterator rend() const { return Recipes.rend(); }
1096
1097  inline size_t size() const { return Recipes.size(); }
1098  inline bool empty() const { return Recipes.empty(); }
1099  inline const VPRecipeBase &front() const { return Recipes.front(); }
1100  inline VPRecipeBase &front() { return Recipes.front(); }
1101  inline const VPRecipeBase &back() const { return Recipes.back(); }
1102  inline VPRecipeBase &back() { return Recipes.back(); }
1103
1104  /// Returns a reference to the list of recipes.
1105  RecipeListTy &getRecipeList() { return Recipes; }
1106
1107  /// Returns a pointer to a member of the recipe list.
1108  static RecipeListTy VPBasicBlock::*getSublistAccess(VPRecipeBase *) {
1109    return &VPBasicBlock::Recipes;
1110  }
1111
1112  /// Method to support type inquiry through isa, cast, and dyn_cast.
1113  static inline bool classof(const VPBlockBase *V) {
1114    return V->getVPBlockID() == VPBlockBase::VPBasicBlockSC;
1115  }
1116
1117  void insert(VPRecipeBase *Recipe, iterator InsertPt) {
1118    assert(Recipe && "No recipe to append.");
1119    assert(!Recipe->Parent && "Recipe already in VPlan");
1120    Recipe->Parent = this;
1121    Recipes.insert(InsertPt, Recipe);
1122  }
1123
1124  /// Augment the existing recipes of a VPBasicBlock with an additional
1125  /// \p Recipe as the last recipe.
1126  void appendRecipe(VPRecipeBase *Recipe) { insert(Recipe, end()); }
1127
1128  /// The method which generates the output IR instructions that correspond to
1129  /// this VPBasicBlock, thereby "executing" the VPlan.
1130  void execute(struct VPTransformState *State) override;
1131
1132private:
1133  /// Create an IR BasicBlock to hold the output instructions generated by this
1134  /// VPBasicBlock, and return it. Update the CFGState accordingly.
1135  BasicBlock *createEmptyBasicBlock(VPTransformState::CFGState &CFG);
1136};
1137
1138/// VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks
1139/// which form a Single-Entry-Single-Exit subgraph of the output IR CFG.
1140/// A VPRegionBlock may indicate that its contents are to be replicated several
1141/// times. This is designed to support predicated scalarization, in which a
1142/// scalar if-then code structure needs to be generated VF * UF times. Having
1143/// this replication indicator helps to keep a single model for multiple
1144/// candidate VF's. The actual replication takes place only once the desired VF
1145/// and UF have been determined.
1146class VPRegionBlock : public VPBlockBase {
1147private:
1148  /// Hold the Single Entry of the SESE region modelled by the VPRegionBlock.
1149  VPBlockBase *Entry;
1150
1151  /// Hold the Single Exit of the SESE region modelled by the VPRegionBlock.
1152  VPBlockBase *Exit;
1153
1154  /// An indicator whether this region is to generate multiple replicated
1155  /// instances of output IR corresponding to its VPBlockBases.
1156  bool IsReplicator;
1157
1158public:
1159  VPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exit,
1160                const std::string &Name = "", bool IsReplicator = false)
1161      : VPBlockBase(VPRegionBlockSC, Name), Entry(Entry), Exit(Exit),
1162        IsReplicator(IsReplicator) {
1163    assert(Entry->getPredecessors().empty() && "Entry block has predecessors.");
1164    assert(Exit->getSuccessors().empty() && "Exit block has successors.");
1165    Entry->setParent(this);
1166    Exit->setParent(this);
1167  }
1168  VPRegionBlock(const std::string &Name = "", bool IsReplicator = false)
1169      : VPBlockBase(VPRegionBlockSC, Name), Entry(nullptr), Exit(nullptr),
1170        IsReplicator(IsReplicator) {}
1171
1172  ~VPRegionBlock() override {
1173    if (Entry)
1174      deleteCFG(Entry);
1175  }
1176
1177  /// Method to support type inquiry through isa, cast, and dyn_cast.
1178  static inline bool classof(const VPBlockBase *V) {
1179    return V->getVPBlockID() == VPBlockBase::VPRegionBlockSC;
1180  }
1181
1182  const VPBlockBase *getEntry() const { return Entry; }
1183  VPBlockBase *getEntry() { return Entry; }
1184
1185  /// Set \p EntryBlock as the entry VPBlockBase of this VPRegionBlock. \p
1186  /// EntryBlock must have no predecessors.
1187  void setEntry(VPBlockBase *EntryBlock) {
1188    assert(EntryBlock->getPredecessors().empty() &&
1189           "Entry block cannot have predecessors.");
1190    Entry = EntryBlock;
1191    EntryBlock->setParent(this);
1192  }
1193
1194  // FIXME: DominatorTreeBase is doing 'A->getParent()->front()'. 'front' is a
1195  // specific interface of llvm::Function, instead of using
1196  // GraphTraints::getEntryNode. We should add a new template parameter to
1197  // DominatorTreeBase representing the Graph type.
1198  VPBlockBase &front() const { return *Entry; }
1199
1200  const VPBlockBase *getExit() const { return Exit; }
1201  VPBlockBase *getExit() { return Exit; }
1202
1203  /// Set \p ExitBlock as the exit VPBlockBase of this VPRegionBlock. \p
1204  /// ExitBlock must have no successors.
1205  void setExit(VPBlockBase *ExitBlock) {
1206    assert(ExitBlock->getSuccessors().empty() &&
1207           "Exit block cannot have successors.");
1208    Exit = ExitBlock;
1209    ExitBlock->setParent(this);
1210  }
1211
1212  /// An indicator whether this region is to generate multiple replicated
1213  /// instances of output IR corresponding to its VPBlockBases.
1214  bool isReplicator() const { return IsReplicator; }
1215
1216  /// The method which generates the output IR instructions that correspond to
1217  /// this VPRegionBlock, thereby "executing" the VPlan.
1218  void execute(struct VPTransformState *State) override;
1219};
1220
1221//===----------------------------------------------------------------------===//
1222// GraphTraits specializations for VPlan Hierarchical Control-Flow Graphs     //
1223//===----------------------------------------------------------------------===//
1224
1225// The following set of template specializations implement GraphTraits to treat
1226// any VPBlockBase as a node in a graph of VPBlockBases. It's important to note
1227// that VPBlockBase traits don't recurse into VPRegioBlocks, i.e., if the
1228// VPBlockBase is a VPRegionBlock, this specialization provides access to its
1229// successors/predecessors but not to the blocks inside the region.
1230
1231template <> struct GraphTraits<VPBlockBase *> {
1232  using NodeRef = VPBlockBase *;
1233  using ChildIteratorType = SmallVectorImpl<VPBlockBase *>::iterator;
1234
1235  static NodeRef getEntryNode(NodeRef N) { return N; }
1236
1237  static inline ChildIteratorType child_begin(NodeRef N) {
1238    return N->getSuccessors().begin();
1239  }
1240
1241  static inline ChildIteratorType child_end(NodeRef N) {
1242    return N->getSuccessors().end();
1243  }
1244};
1245
1246template <> struct GraphTraits<const VPBlockBase *> {
1247  using NodeRef = const VPBlockBase *;
1248  using ChildIteratorType = SmallVectorImpl<VPBlockBase *>::const_iterator;
1249
1250  static NodeRef getEntryNode(NodeRef N) { return N; }
1251
1252  static inline ChildIteratorType child_begin(NodeRef N) {
1253    return N->getSuccessors().begin();
1254  }
1255
1256  static inline ChildIteratorType child_end(NodeRef N) {
1257    return N->getSuccessors().end();
1258  }
1259};
1260
1261// Inverse order specialization for VPBasicBlocks. Predecessors are used instead
1262// of successors for the inverse traversal.
1263template <> struct GraphTraits<Inverse<VPBlockBase *>> {
1264  using NodeRef = VPBlockBase *;
1265  using ChildIteratorType = SmallVectorImpl<VPBlockBase *>::iterator;
1266
1267  static NodeRef getEntryNode(Inverse<NodeRef> B) { return B.Graph; }
1268
1269  static inline ChildIteratorType child_begin(NodeRef N) {
1270    return N->getPredecessors().begin();
1271  }
1272
1273  static inline ChildIteratorType child_end(NodeRef N) {
1274    return N->getPredecessors().end();
1275  }
1276};
1277
1278// The following set of template specializations implement GraphTraits to
1279// treat VPRegionBlock as a graph and recurse inside its nodes. It's important
1280// to note that the blocks inside the VPRegionBlock are treated as VPBlockBases
1281// (i.e., no dyn_cast is performed, VPBlockBases specialization is used), so
1282// there won't be automatic recursion into other VPBlockBases that turn to be
1283// VPRegionBlocks.
1284
1285template <>
1286struct GraphTraits<VPRegionBlock *> : public GraphTraits<VPBlockBase *> {
1287  using GraphRef = VPRegionBlock *;
1288  using nodes_iterator = df_iterator<NodeRef>;
1289
1290  static NodeRef getEntryNode(GraphRef N) { return N->getEntry(); }
1291
1292  static nodes_iterator nodes_begin(GraphRef N) {
1293    return nodes_iterator::begin(N->getEntry());
1294  }
1295
1296  static nodes_iterator nodes_end(GraphRef N) {
1297    // df_iterator::end() returns an empty iterator so the node used doesn't
1298    // matter.
1299    return nodes_iterator::end(N);
1300  }
1301};
1302
1303template <>
1304struct GraphTraits<const VPRegionBlock *>
1305    : public GraphTraits<const VPBlockBase *> {
1306  using GraphRef = const VPRegionBlock *;
1307  using nodes_iterator = df_iterator<NodeRef>;
1308
1309  static NodeRef getEntryNode(GraphRef N) { return N->getEntry(); }
1310
1311  static nodes_iterator nodes_begin(GraphRef N) {
1312    return nodes_iterator::begin(N->getEntry());
1313  }
1314
1315  static nodes_iterator nodes_end(GraphRef N) {
1316    // df_iterator::end() returns an empty iterator so the node used doesn't
1317    // matter.
1318    return nodes_iterator::end(N);
1319  }
1320};
1321
1322template <>
1323struct GraphTraits<Inverse<VPRegionBlock *>>
1324    : public GraphTraits<Inverse<VPBlockBase *>> {
1325  using GraphRef = VPRegionBlock *;
1326  using nodes_iterator = df_iterator<NodeRef>;
1327
1328  static NodeRef getEntryNode(Inverse<GraphRef> N) {
1329    return N.Graph->getExit();
1330  }
1331
1332  static nodes_iterator nodes_begin(GraphRef N) {
1333    return nodes_iterator::begin(N->getExit());
1334  }
1335
1336  static nodes_iterator nodes_end(GraphRef N) {
1337    // df_iterator::end() returns an empty iterator so the node used doesn't
1338    // matter.
1339    return nodes_iterator::end(N);
1340  }
1341};
1342
1343/// VPlan models a candidate for vectorization, encoding various decisions take
1344/// to produce efficient output IR, including which branches, basic-blocks and
1345/// output IR instructions to generate, and their cost. VPlan holds a
1346/// Hierarchical-CFG of VPBasicBlocks and VPRegionBlocks rooted at an Entry
1347/// VPBlock.
1348class VPlan {
1349  friend class VPlanPrinter;
1350
1351private:
1352  /// Hold the single entry to the Hierarchical CFG of the VPlan.
1353  VPBlockBase *Entry;
1354
1355  /// Holds the VFs applicable to this VPlan.
1356  SmallSet<unsigned, 2> VFs;
1357
1358  /// Holds the name of the VPlan, for printing.
1359  std::string Name;
1360
1361  /// Holds all the external definitions created for this VPlan.
1362  // TODO: Introduce a specific representation for external definitions in
1363  // VPlan. External definitions must be immutable and hold a pointer to its
1364  // underlying IR that will be used to implement its structural comparison
1365  // (operators '==' and '<').
1366  SmallPtrSet<VPValue *, 16> VPExternalDefs;
1367
1368  /// Represents the backedge taken count of the original loop, for folding
1369  /// the tail.
1370  VPValue *BackedgeTakenCount = nullptr;
1371
1372  /// Holds a mapping between Values and their corresponding VPValue inside
1373  /// VPlan.
1374  Value2VPValueTy Value2VPValue;
1375
1376  /// Holds the VPLoopInfo analysis for this VPlan.
1377  VPLoopInfo VPLInfo;
1378
1379  /// Holds the condition bit values built during VPInstruction to VPRecipe transformation.
1380  SmallVector<VPValue *, 4> VPCBVs;
1381
1382public:
1383  VPlan(VPBlockBase *Entry = nullptr) : Entry(Entry) {}
1384
1385  ~VPlan() {
1386    if (Entry)
1387      VPBlockBase::deleteCFG(Entry);
1388    for (auto &MapEntry : Value2VPValue)
1389      if (MapEntry.second != BackedgeTakenCount)
1390        delete MapEntry.second;
1391    if (BackedgeTakenCount)
1392      delete BackedgeTakenCount; // Delete once, if in Value2VPValue or not.
1393    for (VPValue *Def : VPExternalDefs)
1394      delete Def;
1395    for (VPValue *CBV : VPCBVs)
1396      delete CBV;
1397  }
1398
1399  /// Generate the IR code for this VPlan.
1400  void execute(struct VPTransformState *State);
1401
1402  VPBlockBase *getEntry() { return Entry; }
1403  const VPBlockBase *getEntry() const { return Entry; }
1404
1405  VPBlockBase *setEntry(VPBlockBase *Block) { return Entry = Block; }
1406
1407  /// The backedge taken count of the original loop.
1408  VPValue *getOrCreateBackedgeTakenCount() {
1409    if (!BackedgeTakenCount)
1410      BackedgeTakenCount = new VPValue();
1411    return BackedgeTakenCount;
1412  }
1413
1414  void addVF(unsigned VF) { VFs.insert(VF); }
1415
1416  bool hasVF(unsigned VF) { return VFs.count(VF); }
1417
1418  const std::string &getName() const { return Name; }
1419
1420  void setName(const Twine &newName) { Name = newName.str(); }
1421
1422  /// Add \p VPVal to the pool of external definitions if it's not already
1423  /// in the pool.
1424  void addExternalDef(VPValue *VPVal) {
1425    VPExternalDefs.insert(VPVal);
1426  }
1427
1428  /// Add \p CBV to the vector of condition bit values.
1429  void addCBV(VPValue *CBV) {
1430    VPCBVs.push_back(CBV);
1431  }
1432
1433  void addVPValue(Value *V) {
1434    assert(V && "Trying to add a null Value to VPlan");
1435    assert(!Value2VPValue.count(V) && "Value already exists in VPlan");
1436    Value2VPValue[V] = new VPValue();
1437  }
1438
1439  VPValue *getVPValue(Value *V) {
1440    assert(V && "Trying to get the VPValue of a null Value");
1441    assert(Value2VPValue.count(V) && "Value does not exist in VPlan");
1442    return Value2VPValue[V];
1443  }
1444
1445  VPValue *getOrAddVPValue(Value *V) {
1446    assert(V && "Trying to get or add the VPValue of a null Value");
1447    if (!Value2VPValue.count(V))
1448      addVPValue(V);
1449    return getVPValue(V);
1450  }
1451
1452  /// Return the VPLoopInfo analysis for this VPlan.
1453  VPLoopInfo &getVPLoopInfo() { return VPLInfo; }
1454  const VPLoopInfo &getVPLoopInfo() const { return VPLInfo; }
1455
1456  /// Dump the plan to stderr (for debugging).
1457  void dump() const;
1458
1459private:
1460  /// Add to the given dominator tree the header block and every new basic block
1461  /// that was created between it and the latch block, inclusive.
1462  static void updateDominatorTree(DominatorTree *DT, BasicBlock *LoopLatchBB,
1463                                  BasicBlock *LoopPreHeaderBB,
1464                                  BasicBlock *LoopExitBB);
1465};
1466
1467/// VPlanPrinter prints a given VPlan to a given output stream. The printing is
1468/// indented and follows the dot format.
1469class VPlanPrinter {
1470  friend inline raw_ostream &operator<<(raw_ostream &OS, const VPlan &Plan);
1471  friend inline raw_ostream &operator<<(raw_ostream &OS,
1472                                        const struct VPlanIngredient &I);
1473
1474private:
1475  raw_ostream &OS;
1476  const VPlan &Plan;
1477  unsigned Depth = 0;
1478  unsigned TabWidth = 2;
1479  std::string Indent;
1480  unsigned BID = 0;
1481  SmallDenseMap<const VPBlockBase *, unsigned> BlockID;
1482
1483  VPlanPrinter(raw_ostream &O, const VPlan &P) : OS(O), Plan(P) {}
1484
1485  /// Handle indentation.
1486  void bumpIndent(int b) { Indent = std::string((Depth += b) * TabWidth, ' '); }
1487
1488  /// Print a given \p Block of the Plan.
1489  void dumpBlock(const VPBlockBase *Block);
1490
1491  /// Print the information related to the CFG edges going out of a given
1492  /// \p Block, followed by printing the successor blocks themselves.
1493  void dumpEdges(const VPBlockBase *Block);
1494
1495  /// Print a given \p BasicBlock, including its VPRecipes, followed by printing
1496  /// its successor blocks.
1497  void dumpBasicBlock(const VPBasicBlock *BasicBlock);
1498
1499  /// Print a given \p Region of the Plan.
1500  void dumpRegion(const VPRegionBlock *Region);
1501
1502  unsigned getOrCreateBID(const VPBlockBase *Block) {
1503    return BlockID.count(Block) ? BlockID[Block] : BlockID[Block] = BID++;
1504  }
1505
1506  const Twine getOrCreateName(const VPBlockBase *Block);
1507
1508  const Twine getUID(const VPBlockBase *Block);
1509
1510  /// Print the information related to a CFG edge between two VPBlockBases.
1511  void drawEdge(const VPBlockBase *From, const VPBlockBase *To, bool Hidden,
1512                const Twine &Label);
1513
1514  void dump();
1515
1516  static void printAsIngredient(raw_ostream &O, Value *V);
1517};
1518
1519struct VPlanIngredient {
1520  Value *V;
1521
1522  VPlanIngredient(Value *V) : V(V) {}
1523};
1524
1525inline raw_ostream &operator<<(raw_ostream &OS, const VPlanIngredient &I) {
1526  VPlanPrinter::printAsIngredient(OS, I.V);
1527  return OS;
1528}
1529
1530inline raw_ostream &operator<<(raw_ostream &OS, const VPlan &Plan) {
1531  VPlanPrinter Printer(OS, Plan);
1532  Printer.dump();
1533  return OS;
1534}
1535
1536//===----------------------------------------------------------------------===//
1537// VPlan Utilities
1538//===----------------------------------------------------------------------===//
1539
1540/// Class that provides utilities for VPBlockBases in VPlan.
1541class VPBlockUtils {
1542public:
1543  VPBlockUtils() = delete;
1544
1545  /// Insert disconnected VPBlockBase \p NewBlock after \p BlockPtr. Add \p
1546  /// NewBlock as successor of \p BlockPtr and \p BlockPtr as predecessor of \p
1547  /// NewBlock, and propagate \p BlockPtr parent to \p NewBlock. If \p BlockPtr
1548  /// has more than one successor, its conditional bit is propagated to \p
1549  /// NewBlock. \p NewBlock must have neither successors nor predecessors.
1550  static void insertBlockAfter(VPBlockBase *NewBlock, VPBlockBase *BlockPtr) {
1551    assert(NewBlock->getSuccessors().empty() &&
1552           "Can't insert new block with successors.");
1553    // TODO: move successors from BlockPtr to NewBlock when this functionality
1554    // is necessary. For now, setBlockSingleSuccessor will assert if BlockPtr
1555    // already has successors.
1556    BlockPtr->setOneSuccessor(NewBlock);
1557    NewBlock->setPredecessors({BlockPtr});
1558    NewBlock->setParent(BlockPtr->getParent());
1559  }
1560
1561  /// Insert disconnected VPBlockBases \p IfTrue and \p IfFalse after \p
1562  /// BlockPtr. Add \p IfTrue and \p IfFalse as succesors of \p BlockPtr and \p
1563  /// BlockPtr as predecessor of \p IfTrue and \p IfFalse. Propagate \p BlockPtr
1564  /// parent to \p IfTrue and \p IfFalse. \p Condition is set as the successor
1565  /// selector. \p BlockPtr must have no successors and \p IfTrue and \p IfFalse
1566  /// must have neither successors nor predecessors.
1567  static void insertTwoBlocksAfter(VPBlockBase *IfTrue, VPBlockBase *IfFalse,
1568                                   VPValue *Condition, VPBlockBase *BlockPtr) {
1569    assert(IfTrue->getSuccessors().empty() &&
1570           "Can't insert IfTrue with successors.");
1571    assert(IfFalse->getSuccessors().empty() &&
1572           "Can't insert IfFalse with successors.");
1573    BlockPtr->setTwoSuccessors(IfTrue, IfFalse, Condition);
1574    IfTrue->setPredecessors({BlockPtr});
1575    IfFalse->setPredecessors({BlockPtr});
1576    IfTrue->setParent(BlockPtr->getParent());
1577    IfFalse->setParent(BlockPtr->getParent());
1578  }
1579
1580  /// Connect VPBlockBases \p From and \p To bi-directionally. Append \p To to
1581  /// the successors of \p From and \p From to the predecessors of \p To. Both
1582  /// VPBlockBases must have the same parent, which can be null. Both
1583  /// VPBlockBases can be already connected to other VPBlockBases.
1584  static void connectBlocks(VPBlockBase *From, VPBlockBase *To) {
1585    assert((From->getParent() == To->getParent()) &&
1586           "Can't connect two block with different parents");
1587    assert(From->getNumSuccessors() < 2 &&
1588           "Blocks can't have more than two successors.");
1589    From->appendSuccessor(To);
1590    To->appendPredecessor(From);
1591  }
1592
1593  /// Disconnect VPBlockBases \p From and \p To bi-directionally. Remove \p To
1594  /// from the successors of \p From and \p From from the predecessors of \p To.
1595  static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To) {
1596    assert(To && "Successor to disconnect is null.");
1597    From->removeSuccessor(To);
1598    To->removePredecessor(From);
1599  }
1600
1601  /// Returns true if the edge \p FromBlock -> \p ToBlock is a back-edge.
1602  static bool isBackEdge(const VPBlockBase *FromBlock,
1603                         const VPBlockBase *ToBlock, const VPLoopInfo *VPLI) {
1604    assert(FromBlock->getParent() == ToBlock->getParent() &&
1605           FromBlock->getParent() && "Must be in same region");
1606    const VPLoop *FromLoop = VPLI->getLoopFor(FromBlock);
1607    const VPLoop *ToLoop = VPLI->getLoopFor(ToBlock);
1608    if (!FromLoop || !ToLoop || FromLoop != ToLoop)
1609      return false;
1610
1611    // A back-edge is a branch from the loop latch to its header.
1612    return ToLoop->isLoopLatch(FromBlock) && ToBlock == ToLoop->getHeader();
1613  }
1614
1615  /// Returns true if \p Block is a loop latch
1616  static bool blockIsLoopLatch(const VPBlockBase *Block,
1617                               const VPLoopInfo *VPLInfo) {
1618    if (const VPLoop *ParentVPL = VPLInfo->getLoopFor(Block))
1619      return ParentVPL->isLoopLatch(Block);
1620
1621    return false;
1622  }
1623
1624  /// Count and return the number of succesors of \p PredBlock excluding any
1625  /// backedges.
1626  static unsigned countSuccessorsNoBE(VPBlockBase *PredBlock,
1627                                      VPLoopInfo *VPLI) {
1628    unsigned Count = 0;
1629    for (VPBlockBase *SuccBlock : PredBlock->getSuccessors()) {
1630      if (!VPBlockUtils::isBackEdge(PredBlock, SuccBlock, VPLI))
1631        Count++;
1632    }
1633    return Count;
1634  }
1635};
1636
1637class VPInterleavedAccessInfo {
1638private:
1639  DenseMap<VPInstruction *, InterleaveGroup<VPInstruction> *>
1640      InterleaveGroupMap;
1641
1642  /// Type for mapping of instruction based interleave groups to VPInstruction
1643  /// interleave groups
1644  using Old2NewTy = DenseMap<InterleaveGroup<Instruction> *,
1645                             InterleaveGroup<VPInstruction> *>;
1646
1647  /// Recursively \p Region and populate VPlan based interleave groups based on
1648  /// \p IAI.
1649  void visitRegion(VPRegionBlock *Region, Old2NewTy &Old2New,
1650                   InterleavedAccessInfo &IAI);
1651  /// Recursively traverse \p Block and populate VPlan based interleave groups
1652  /// based on \p IAI.
1653  void visitBlock(VPBlockBase *Block, Old2NewTy &Old2New,
1654                  InterleavedAccessInfo &IAI);
1655
1656public:
1657  VPInterleavedAccessInfo(VPlan &Plan, InterleavedAccessInfo &IAI);
1658
1659  ~VPInterleavedAccessInfo() {
1660    SmallPtrSet<InterleaveGroup<VPInstruction> *, 4> DelSet;
1661    // Avoid releasing a pointer twice.
1662    for (auto &I : InterleaveGroupMap)
1663      DelSet.insert(I.second);
1664    for (auto *Ptr : DelSet)
1665      delete Ptr;
1666  }
1667
1668  /// Get the interleave group that \p Instr belongs to.
1669  ///
1670  /// \returns nullptr if doesn't have such group.
1671  InterleaveGroup<VPInstruction> *
1672  getInterleaveGroup(VPInstruction *Instr) const {
1673    if (InterleaveGroupMap.count(Instr))
1674      return InterleaveGroupMap.find(Instr)->second;
1675    return nullptr;
1676  }
1677};
1678
1679/// Class that maps (parts of) an existing VPlan to trees of combined
1680/// VPInstructions.
1681class VPlanSlp {
1682private:
1683  enum class OpMode { Failed, Load, Opcode };
1684
1685  /// A DenseMapInfo implementation for using SmallVector<VPValue *, 4> as
1686  /// DenseMap keys.
1687  struct BundleDenseMapInfo {
1688    static SmallVector<VPValue *, 4> getEmptyKey() {
1689      return {reinterpret_cast<VPValue *>(-1)};
1690    }
1691
1692    static SmallVector<VPValue *, 4> getTombstoneKey() {
1693      return {reinterpret_cast<VPValue *>(-2)};
1694    }
1695
1696    static unsigned getHashValue(const SmallVector<VPValue *, 4> &V) {
1697      return static_cast<unsigned>(hash_combine_range(V.begin(), V.end()));
1698    }
1699
1700    static bool isEqual(const SmallVector<VPValue *, 4> &LHS,
1701                        const SmallVector<VPValue *, 4> &RHS) {
1702      return LHS == RHS;
1703    }
1704  };
1705
1706  /// Mapping of values in the original VPlan to a combined VPInstruction.
1707  DenseMap<SmallVector<VPValue *, 4>, VPInstruction *, BundleDenseMapInfo>
1708      BundleToCombined;
1709
1710  VPInterleavedAccessInfo &IAI;
1711
1712  /// Basic block to operate on. For now, only instructions in a single BB are
1713  /// considered.
1714  const VPBasicBlock &BB;
1715
1716  /// Indicates whether we managed to combine all visited instructions or not.
1717  bool CompletelySLP = true;
1718
1719  /// Width of the widest combined bundle in bits.
1720  unsigned WidestBundleBits = 0;
1721
1722  using MultiNodeOpTy =
1723      typename std::pair<VPInstruction *, SmallVector<VPValue *, 4>>;
1724
1725  // Input operand bundles for the current multi node. Each multi node operand
1726  // bundle contains values not matching the multi node's opcode. They will
1727  // be reordered in reorderMultiNodeOps, once we completed building a
1728  // multi node.
1729  SmallVector<MultiNodeOpTy, 4> MultiNodeOps;
1730
1731  /// Indicates whether we are building a multi node currently.
1732  bool MultiNodeActive = false;
1733
1734  /// Check if we can vectorize Operands together.
1735  bool areVectorizable(ArrayRef<VPValue *> Operands) const;
1736
1737  /// Add combined instruction \p New for the bundle \p Operands.
1738  void addCombined(ArrayRef<VPValue *> Operands, VPInstruction *New);
1739
1740  /// Indicate we hit a bundle we failed to combine. Returns nullptr for now.
1741  VPInstruction *markFailed();
1742
1743  /// Reorder operands in the multi node to maximize sequential memory access
1744  /// and commutative operations.
1745  SmallVector<MultiNodeOpTy, 4> reorderMultiNodeOps();
1746
1747  /// Choose the best candidate to use for the lane after \p Last. The set of
1748  /// candidates to choose from are values with an opcode matching \p Last's
1749  /// or loads consecutive to \p Last.
1750  std::pair<OpMode, VPValue *> getBest(OpMode Mode, VPValue *Last,
1751                                       SmallPtrSetImpl<VPValue *> &Candidates,
1752                                       VPInterleavedAccessInfo &IAI);
1753
1754  /// Print bundle \p Values to dbgs().
1755  void dumpBundle(ArrayRef<VPValue *> Values);
1756
1757public:
1758  VPlanSlp(VPInterleavedAccessInfo &IAI, VPBasicBlock &BB) : IAI(IAI), BB(BB) {}
1759
1760  ~VPlanSlp() {
1761    for (auto &KV : BundleToCombined)
1762      delete KV.second;
1763  }
1764
1765  /// Tries to build an SLP tree rooted at \p Operands and returns a
1766  /// VPInstruction combining \p Operands, if they can be combined.
1767  VPInstruction *buildGraph(ArrayRef<VPValue *> Operands);
1768
1769  /// Return the width of the widest combined bundle in bits.
1770  unsigned getWidestBundleBits() const { return WidestBundleBits; }
1771
1772  /// Return true if all visited instruction can be combined.
1773  bool isCompletelySLP() const { return CompletelySLP; }
1774};
1775} // end namespace llvm
1776
1777#endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
1778