1//===- LoopVectorize.h ------------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This is the LLVM loop vectorizer. This pass modifies 'vectorizable' loops
10// and generates target-independent LLVM-IR.
11// The vectorizer uses the TargetTransformInfo analysis to estimate the costs
12// of instructions in order to estimate the profitability of vectorization.
13//
14// The loop vectorizer combines consecutive loop iterations into a single
15// 'wide' iteration. After this transformation the index is incremented
16// by the SIMD vector width, and not by one.
17//
18// This pass has four parts:
19// 1. The main loop pass that drives the different parts.
20// 2. LoopVectorizationLegality - A unit that checks for the legality
21//    of the vectorization.
22// 3. InnerLoopVectorizer - A unit that performs the actual
23//    widening of instructions.
24// 4. LoopVectorizationCostModel - A unit that checks for the profitability
25//    of vectorization. It decides on the optimal vector width, which
26//    can be one, if vectorization is not profitable.
27//
28// There is a development effort going on to migrate loop vectorizer to the
29// VPlan infrastructure and to introduce outer loop vectorization support (see
30// docs/VectorizationPlan.rst and
31// http://lists.llvm.org/pipermail/llvm-dev/2017-December/119523.html). For this
32// purpose, we temporarily introduced the VPlan-native vectorization path: an
33// alternative vectorization path that is natively implemented on top of the
34// VPlan infrastructure. See EnableVPlanNativePath for enabling.
35//
36//===----------------------------------------------------------------------===//
37//
38// The reduction-variable vectorization is based on the paper:
39//  D. Nuzman and R. Henderson. Multi-platform Auto-vectorization.
40//
41// Variable uniformity checks are inspired by:
42//  Karrenberg, R. and Hack, S. Whole Function Vectorization.
43//
44// The interleaved access vectorization is based on the paper:
45//  Dorit Nuzman, Ira Rosen and Ayal Zaks.  Auto-Vectorization of Interleaved
46//  Data for SIMD
47//
48// Other ideas/concepts are from:
49//  A. Zaks and D. Nuzman. Autovectorization in GCC-two years later.
50//
51//  S. Maleki, Y. Gao, M. Garzaran, T. Wong and D. Padua.  An Evaluation of
52//  Vectorizing Compilers.
53//
54//===----------------------------------------------------------------------===//
55
56#ifndef LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZE_H
57#define LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZE_H
58
59#include "llvm/IR/PassManager.h"
60#include "llvm/Support/CommandLine.h"
61#include <functional>
62
63namespace llvm {
64
65class AssumptionCache;
66class BlockFrequencyInfo;
67class DemandedBits;
68class DominatorTree;
69class Function;
70class Loop;
71class LoopAccessInfoManager;
72class LoopInfo;
73class OptimizationRemarkEmitter;
74class ProfileSummaryInfo;
75class ScalarEvolution;
76class TargetLibraryInfo;
77class TargetTransformInfo;
78
79extern cl::opt<bool> EnableLoopInterleaving;
80extern cl::opt<bool> EnableLoopVectorization;
81
82/// A marker to determine if extra passes after loop vectorization should be
83/// run.
84struct ShouldRunExtraVectorPasses
85    : public AnalysisInfoMixin<ShouldRunExtraVectorPasses> {
86  static AnalysisKey Key;
87  struct Result {
88    bool invalidate(Function &F, const PreservedAnalyses &PA,
89                    FunctionAnalysisManager::Invalidator &) {
90      // Check whether the analysis has been explicitly invalidated. Otherwise,
91      // it remains preserved.
92      auto PAC = PA.getChecker<ShouldRunExtraVectorPasses>();
93      return !PAC.preservedWhenStateless();
94    }
95  };
96
97  Result run(Function &F, FunctionAnalysisManager &FAM) { return Result(); }
98};
99
100/// A pass manager to run a set of extra function simplification passes after
101/// vectorization, if requested. LoopVectorize caches the
102/// ShouldRunExtraVectorPasses analysis to request extra simplifications, if
103/// they could be beneficial.
104struct ExtraVectorPassManager : public FunctionPassManager {
105  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM) {
106    auto PA = PreservedAnalyses::all();
107    if (AM.getCachedResult<ShouldRunExtraVectorPasses>(F))
108      PA.intersect(FunctionPassManager::run(F, AM));
109    PA.abandon<ShouldRunExtraVectorPasses>();
110    return PA;
111  }
112};
113
114struct LoopVectorizeOptions {
115  /// If false, consider all loops for interleaving.
116  /// If true, only loops that explicitly request interleaving are considered.
117  bool InterleaveOnlyWhenForced;
118
119  /// If false, consider all loops for vectorization.
120  /// If true, only loops that explicitly request vectorization are considered.
121  bool VectorizeOnlyWhenForced;
122
123  /// The current defaults when creating the pass with no arguments are:
124  /// EnableLoopInterleaving = true and EnableLoopVectorization = true. This
125  /// means that interleaving default is consistent with the cl::opt flag, while
126  /// vectorization is not.
127  /// FIXME: The default for EnableLoopVectorization in the cl::opt should be
128  /// set to true, and the corresponding change to account for this be made in
129  /// opt.cpp. The initializations below will become:
130  /// InterleaveOnlyWhenForced(!EnableLoopInterleaving)
131  /// VectorizeOnlyWhenForced(!EnableLoopVectorization).
132  LoopVectorizeOptions()
133      : InterleaveOnlyWhenForced(false), VectorizeOnlyWhenForced(false) {}
134  LoopVectorizeOptions(bool InterleaveOnlyWhenForced,
135                       bool VectorizeOnlyWhenForced)
136      : InterleaveOnlyWhenForced(InterleaveOnlyWhenForced),
137        VectorizeOnlyWhenForced(VectorizeOnlyWhenForced) {}
138
139  LoopVectorizeOptions &setInterleaveOnlyWhenForced(bool Value) {
140    InterleaveOnlyWhenForced = Value;
141    return *this;
142  }
143
144  LoopVectorizeOptions &setVectorizeOnlyWhenForced(bool Value) {
145    VectorizeOnlyWhenForced = Value;
146    return *this;
147  }
148};
149
150/// Storage for information about made changes.
151struct LoopVectorizeResult {
152  bool MadeAnyChange;
153  bool MadeCFGChange;
154
155  LoopVectorizeResult(bool MadeAnyChange, bool MadeCFGChange)
156      : MadeAnyChange(MadeAnyChange), MadeCFGChange(MadeCFGChange) {}
157};
158
159/// The LoopVectorize Pass.
160struct LoopVectorizePass : public PassInfoMixin<LoopVectorizePass> {
161private:
162  /// If false, consider all loops for interleaving.
163  /// If true, only loops that explicitly request interleaving are considered.
164  bool InterleaveOnlyWhenForced;
165
166  /// If false, consider all loops for vectorization.
167  /// If true, only loops that explicitly request vectorization are considered.
168  bool VectorizeOnlyWhenForced;
169
170public:
171  LoopVectorizePass(LoopVectorizeOptions Opts = {});
172
173  ScalarEvolution *SE;
174  LoopInfo *LI;
175  TargetTransformInfo *TTI;
176  DominatorTree *DT;
177  BlockFrequencyInfo *BFI;
178  TargetLibraryInfo *TLI;
179  DemandedBits *DB;
180  AssumptionCache *AC;
181  LoopAccessInfoManager *LAIs;
182  OptimizationRemarkEmitter *ORE;
183  ProfileSummaryInfo *PSI;
184
185  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
186  void printPipeline(raw_ostream &OS,
187                     function_ref<StringRef(StringRef)> MapClassName2PassName);
188
189  // Shim for old PM.
190  LoopVectorizeResult runImpl(Function &F, ScalarEvolution &SE_, LoopInfo &LI_,
191                              TargetTransformInfo &TTI_, DominatorTree &DT_,
192                              BlockFrequencyInfo *BFI_, TargetLibraryInfo *TLI_,
193                              DemandedBits &DB_, AssumptionCache &AC_,
194                              LoopAccessInfoManager &LAIs_,
195                              OptimizationRemarkEmitter &ORE_,
196                              ProfileSummaryInfo *PSI_);
197
198  bool processLoop(Loop *L);
199};
200
201/// Reports a vectorization failure: print \p DebugMsg for debugging
202/// purposes along with the corresponding optimization remark \p RemarkName.
203/// If \p I is passed, it is an instruction that prevents vectorization.
204/// Otherwise, the loop \p TheLoop is used for the location of the remark.
205void reportVectorizationFailure(const StringRef DebugMsg,
206    const StringRef OREMsg, const StringRef ORETag,
207    OptimizationRemarkEmitter *ORE, Loop *TheLoop, Instruction *I = nullptr);
208
209/// Reports an informative message: print \p Msg for debugging purposes as well
210/// as an optimization remark. Uses either \p I as location of the remark, or
211/// otherwise \p TheLoop.
212void reportVectorizationInfo(const StringRef OREMsg, const StringRef ORETag,
213                             OptimizationRemarkEmitter *ORE, Loop *TheLoop,
214                             Instruction *I = nullptr);
215
216} // end namespace llvm
217
218#endif // LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZE_H
219