1//===- TargetTransformInfo.h ------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This pass exposes codegen information to IR-level passes. Every
10/// transformation that uses codegen information is broken into three parts:
11/// 1. The IR-level analysis pass.
12/// 2. The IR-level transformation interface which provides the needed
13///    information.
14/// 3. Codegen-level implementation which uses target-specific hooks.
15///
16/// This file defines #2, which is the interface that IR-level transformations
17/// use for querying the codegen.
18///
19//===----------------------------------------------------------------------===//
20
21#ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
22#define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
23
24#include "llvm/ADT/SmallBitVector.h"
25#include "llvm/IR/FMF.h"
26#include "llvm/IR/InstrTypes.h"
27#include "llvm/IR/PassManager.h"
28#include "llvm/Pass.h"
29#include "llvm/Support/AtomicOrdering.h"
30#include "llvm/Support/BranchProbability.h"
31#include "llvm/Support/InstructionCost.h"
32#include <functional>
33#include <optional>
34#include <utility>
35
36namespace llvm {
37
38namespace Intrinsic {
39typedef unsigned ID;
40}
41
42class AllocaInst;
43class AssumptionCache;
44class BlockFrequencyInfo;
45class DominatorTree;
46class BranchInst;
47class CallBase;
48class Function;
49class GlobalValue;
50class InstCombiner;
51class OptimizationRemarkEmitter;
52class InterleavedAccessInfo;
53class IntrinsicInst;
54class LoadInst;
55class Loop;
56class LoopInfo;
57class LoopVectorizationLegality;
58class ProfileSummaryInfo;
59class RecurrenceDescriptor;
60class SCEV;
61class ScalarEvolution;
62class StoreInst;
63class SwitchInst;
64class TargetLibraryInfo;
65class Type;
66class User;
67class Value;
68class VPIntrinsic;
69struct KnownBits;
70
71/// Information about a load/store intrinsic defined by the target.
72struct MemIntrinsicInfo {
73  /// This is the pointer that the intrinsic is loading from or storing to.
74  /// If this is non-null, then analysis/optimization passes can assume that
75  /// this intrinsic is functionally equivalent to a load/store from this
76  /// pointer.
77  Value *PtrVal = nullptr;
78
79  // Ordering for atomic operations.
80  AtomicOrdering Ordering = AtomicOrdering::NotAtomic;
81
82  // Same Id is set by the target for corresponding load/store intrinsics.
83  unsigned short MatchingId = 0;
84
85  bool ReadMem = false;
86  bool WriteMem = false;
87  bool IsVolatile = false;
88
89  bool isUnordered() const {
90    return (Ordering == AtomicOrdering::NotAtomic ||
91            Ordering == AtomicOrdering::Unordered) &&
92           !IsVolatile;
93  }
94};
95
96/// Attributes of a target dependent hardware loop.
97struct HardwareLoopInfo {
98  HardwareLoopInfo() = delete;
99  HardwareLoopInfo(Loop *L);
100  Loop *L = nullptr;
101  BasicBlock *ExitBlock = nullptr;
102  BranchInst *ExitBranch = nullptr;
103  const SCEV *ExitCount = nullptr;
104  IntegerType *CountType = nullptr;
105  Value *LoopDecrement = nullptr; // Decrement the loop counter by this
106                                  // value in every iteration.
107  bool IsNestingLegal = false;    // Can a hardware loop be a parent to
108                                  // another hardware loop?
109  bool CounterInReg = false;      // Should loop counter be updated in
110                                  // the loop via a phi?
111  bool PerformEntryTest = false;  // Generate the intrinsic which also performs
112                                  // icmp ne zero on the loop counter value and
113                                  // produces an i1 to guard the loop entry.
114  bool isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI,
115                               DominatorTree &DT, bool ForceNestedLoop = false,
116                               bool ForceHardwareLoopPHI = false);
117  bool canAnalyze(LoopInfo &LI);
118};
119
120class IntrinsicCostAttributes {
121  const IntrinsicInst *II = nullptr;
122  Type *RetTy = nullptr;
123  Intrinsic::ID IID;
124  SmallVector<Type *, 4> ParamTys;
125  SmallVector<const Value *, 4> Arguments;
126  FastMathFlags FMF;
127  // If ScalarizationCost is UINT_MAX, the cost of scalarizing the
128  // arguments and the return value will be computed based on types.
129  InstructionCost ScalarizationCost = InstructionCost::getInvalid();
130
131public:
132  IntrinsicCostAttributes(
133      Intrinsic::ID Id, const CallBase &CI,
134      InstructionCost ScalarCost = InstructionCost::getInvalid(),
135      bool TypeBasedOnly = false);
136
137  IntrinsicCostAttributes(
138      Intrinsic::ID Id, Type *RTy, ArrayRef<Type *> Tys,
139      FastMathFlags Flags = FastMathFlags(), const IntrinsicInst *I = nullptr,
140      InstructionCost ScalarCost = InstructionCost::getInvalid());
141
142  IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
143                          ArrayRef<const Value *> Args);
144
145  IntrinsicCostAttributes(
146      Intrinsic::ID Id, Type *RTy, ArrayRef<const Value *> Args,
147      ArrayRef<Type *> Tys, FastMathFlags Flags = FastMathFlags(),
148      const IntrinsicInst *I = nullptr,
149      InstructionCost ScalarCost = InstructionCost::getInvalid());
150
151  Intrinsic::ID getID() const { return IID; }
152  const IntrinsicInst *getInst() const { return II; }
153  Type *getReturnType() const { return RetTy; }
154  FastMathFlags getFlags() const { return FMF; }
155  InstructionCost getScalarizationCost() const { return ScalarizationCost; }
156  const SmallVectorImpl<const Value *> &getArgs() const { return Arguments; }
157  const SmallVectorImpl<Type *> &getArgTypes() const { return ParamTys; }
158
159  bool isTypeBasedOnly() const {
160    return Arguments.empty();
161  }
162
163  bool skipScalarizationCost() const { return ScalarizationCost.isValid(); }
164};
165
166enum class TailFoldingStyle {
167  /// Don't use tail folding
168  None,
169  /// Use predicate only to mask operations on data in the loop.
170  /// When the VL is not known to be a power-of-2, this method requires a
171  /// runtime overflow check for the i + VL in the loop because it compares the
172  /// scalar induction variable against the tripcount rounded up by VL which may
173  /// overflow. When the VL is a power-of-2, both the increment and uprounded
174  /// tripcount will overflow to 0, which does not require a runtime check
175  /// since the loop is exited when the loop induction variable equals the
176  /// uprounded trip-count, which are both 0.
177  Data,
178  /// Same as Data, but avoids using the get.active.lane.mask intrinsic to
179  /// calculate the mask and instead implements this with a
180  /// splat/stepvector/cmp.
181  /// FIXME: Can this kind be removed now that SelectionDAGBuilder expands the
182  /// active.lane.mask intrinsic when it is not natively supported?
183  DataWithoutLaneMask,
184  /// Use predicate to control both data and control flow.
185  /// This method always requires a runtime overflow check for the i + VL
186  /// increment inside the loop, because it uses the result direclty in the
187  /// active.lane.mask to calculate the mask for the next iteration. If the
188  /// increment overflows, the mask is no longer correct.
189  DataAndControlFlow,
190  /// Use predicate to control both data and control flow, but modify
191  /// the trip count so that a runtime overflow check can be avoided
192  /// and such that the scalar epilogue loop can always be removed.
193  DataAndControlFlowWithoutRuntimeCheck
194};
195
196struct TailFoldingInfo {
197  TargetLibraryInfo *TLI;
198  LoopVectorizationLegality *LVL;
199  InterleavedAccessInfo *IAI;
200  TailFoldingInfo(TargetLibraryInfo *TLI, LoopVectorizationLegality *LVL,
201                  InterleavedAccessInfo *IAI)
202      : TLI(TLI), LVL(LVL), IAI(IAI) {}
203};
204
205class TargetTransformInfo;
206typedef TargetTransformInfo TTI;
207
208/// This pass provides access to the codegen interfaces that are needed
209/// for IR-level transformations.
210class TargetTransformInfo {
211public:
212  /// Construct a TTI object using a type implementing the \c Concept
213  /// API below.
214  ///
215  /// This is used by targets to construct a TTI wrapping their target-specific
216  /// implementation that encodes appropriate costs for their target.
217  template <typename T> TargetTransformInfo(T Impl);
218
219  /// Construct a baseline TTI object using a minimal implementation of
220  /// the \c Concept API below.
221  ///
222  /// The TTI implementation will reflect the information in the DataLayout
223  /// provided if non-null.
224  explicit TargetTransformInfo(const DataLayout &DL);
225
226  // Provide move semantics.
227  TargetTransformInfo(TargetTransformInfo &&Arg);
228  TargetTransformInfo &operator=(TargetTransformInfo &&RHS);
229
230  // We need to define the destructor out-of-line to define our sub-classes
231  // out-of-line.
232  ~TargetTransformInfo();
233
234  /// Handle the invalidation of this information.
235  ///
236  /// When used as a result of \c TargetIRAnalysis this method will be called
237  /// when the function this was computed for changes. When it returns false,
238  /// the information is preserved across those changes.
239  bool invalidate(Function &, const PreservedAnalyses &,
240                  FunctionAnalysisManager::Invalidator &) {
241    // FIXME: We should probably in some way ensure that the subtarget
242    // information for a function hasn't changed.
243    return false;
244  }
245
246  /// \name Generic Target Information
247  /// @{
248
249  /// The kind of cost model.
250  ///
251  /// There are several different cost models that can be customized by the
252  /// target. The normalization of each cost model may be target specific.
253  /// e.g. TCK_SizeAndLatency should be comparable to target thresholds such as
254  /// those derived from MCSchedModel::LoopMicroOpBufferSize etc.
255  enum TargetCostKind {
256    TCK_RecipThroughput, ///< Reciprocal throughput.
257    TCK_Latency,         ///< The latency of instruction.
258    TCK_CodeSize,        ///< Instruction code size.
259    TCK_SizeAndLatency   ///< The weighted sum of size and latency.
260  };
261
262  /// Underlying constants for 'cost' values in this interface.
263  ///
264  /// Many APIs in this interface return a cost. This enum defines the
265  /// fundamental values that should be used to interpret (and produce) those
266  /// costs. The costs are returned as an int rather than a member of this
267  /// enumeration because it is expected that the cost of one IR instruction
268  /// may have a multiplicative factor to it or otherwise won't fit directly
269  /// into the enum. Moreover, it is common to sum or average costs which works
270  /// better as simple integral values. Thus this enum only provides constants.
271  /// Also note that the returned costs are signed integers to make it natural
272  /// to add, subtract, and test with zero (a common boundary condition). It is
273  /// not expected that 2^32 is a realistic cost to be modeling at any point.
274  ///
275  /// Note that these costs should usually reflect the intersection of code-size
276  /// cost and execution cost. A free instruction is typically one that folds
277  /// into another instruction. For example, reg-to-reg moves can often be
278  /// skipped by renaming the registers in the CPU, but they still are encoded
279  /// and thus wouldn't be considered 'free' here.
280  enum TargetCostConstants {
281    TCC_Free = 0,     ///< Expected to fold away in lowering.
282    TCC_Basic = 1,    ///< The cost of a typical 'add' instruction.
283    TCC_Expensive = 4 ///< The cost of a 'div' instruction on x86.
284  };
285
286  /// Estimate the cost of a GEP operation when lowered.
287  ///
288  /// \p PointeeType is the source element type of the GEP.
289  /// \p Ptr is the base pointer operand.
290  /// \p Operands is the list of indices following the base pointer.
291  ///
292  /// \p AccessType is a hint as to what type of memory might be accessed by
293  /// users of the GEP. getGEPCost will use it to determine if the GEP can be
294  /// folded into the addressing mode of a load/store. If AccessType is null,
295  /// then the resulting target type based off of PointeeType will be used as an
296  /// approximation.
297  InstructionCost
298  getGEPCost(Type *PointeeType, const Value *Ptr,
299             ArrayRef<const Value *> Operands, Type *AccessType = nullptr,
300             TargetCostKind CostKind = TCK_SizeAndLatency) const;
301
302  /// Describe known properties for a set of pointers.
303  struct PointersChainInfo {
304    /// All the GEPs in a set have same base address.
305    unsigned IsSameBaseAddress : 1;
306    /// These properties only valid if SameBaseAddress is set.
307    /// True if all pointers are separated by a unit stride.
308    unsigned IsUnitStride : 1;
309    /// True if distance between any two neigbouring pointers is a known value.
310    unsigned IsKnownStride : 1;
311    unsigned Reserved : 29;
312
313    bool isSameBase() const { return IsSameBaseAddress; }
314    bool isUnitStride() const { return IsSameBaseAddress && IsUnitStride; }
315    bool isKnownStride() const { return IsSameBaseAddress && IsKnownStride; }
316
317    static PointersChainInfo getUnitStride() {
318      return {/*IsSameBaseAddress=*/1, /*IsUnitStride=*/1,
319              /*IsKnownStride=*/1, 0};
320    }
321    static PointersChainInfo getKnownStride() {
322      return {/*IsSameBaseAddress=*/1, /*IsUnitStride=*/0,
323              /*IsKnownStride=*/1, 0};
324    }
325    static PointersChainInfo getUnknownStride() {
326      return {/*IsSameBaseAddress=*/1, /*IsUnitStride=*/0,
327              /*IsKnownStride=*/0, 0};
328    }
329  };
330  static_assert(sizeof(PointersChainInfo) == 4, "Was size increase justified?");
331
332  /// Estimate the cost of a chain of pointers (typically pointer operands of a
333  /// chain of loads or stores within same block) operations set when lowered.
334  /// \p AccessTy is the type of the loads/stores that will ultimately use the
335  /// \p Ptrs.
336  InstructionCost
337  getPointersChainCost(ArrayRef<const Value *> Ptrs, const Value *Base,
338                       const PointersChainInfo &Info, Type *AccessTy,
339                       TargetCostKind CostKind = TTI::TCK_RecipThroughput
340
341  ) const;
342
343  /// \returns A value by which our inlining threshold should be multiplied.
344  /// This is primarily used to bump up the inlining threshold wholesale on
345  /// targets where calls are unusually expensive.
346  ///
347  /// TODO: This is a rather blunt instrument.  Perhaps altering the costs of
348  /// individual classes of instructions would be better.
349  unsigned getInliningThresholdMultiplier() const;
350
351  unsigned getInliningCostBenefitAnalysisSavingsMultiplier() const;
352  unsigned getInliningCostBenefitAnalysisProfitableMultiplier() const;
353
354  /// \returns A value to be added to the inlining threshold.
355  unsigned adjustInliningThreshold(const CallBase *CB) const;
356
357  /// \returns The cost of having an Alloca in the caller if not inlined, to be
358  /// added to the threshold
359  unsigned getCallerAllocaCost(const CallBase *CB, const AllocaInst *AI) const;
360
361  /// \returns Vector bonus in percent.
362  ///
363  /// Vector bonuses: We want to more aggressively inline vector-dense kernels
364  /// and apply this bonus based on the percentage of vector instructions. A
365  /// bonus is applied if the vector instructions exceed 50% and half that
366  /// amount is applied if it exceeds 10%. Note that these bonuses are some what
367  /// arbitrary and evolved over time by accident as much as because they are
368  /// principled bonuses.
369  /// FIXME: It would be nice to base the bonus values on something more
370  /// scientific. A target may has no bonus on vector instructions.
371  int getInlinerVectorBonusPercent() const;
372
373  /// \return the expected cost of a memcpy, which could e.g. depend on the
374  /// source/destination type and alignment and the number of bytes copied.
375  InstructionCost getMemcpyCost(const Instruction *I) const;
376
377  /// Returns the maximum memset / memcpy size in bytes that still makes it
378  /// profitable to inline the call.
379  uint64_t getMaxMemIntrinsicInlineSizeThreshold() const;
380
381  /// \return The estimated number of case clusters when lowering \p 'SI'.
382  /// \p JTSize Set a jump table size only when \p SI is suitable for a jump
383  /// table.
384  unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
385                                            unsigned &JTSize,
386                                            ProfileSummaryInfo *PSI,
387                                            BlockFrequencyInfo *BFI) const;
388
389  /// Estimate the cost of a given IR user when lowered.
390  ///
391  /// This can estimate the cost of either a ConstantExpr or Instruction when
392  /// lowered.
393  ///
394  /// \p Operands is a list of operands which can be a result of transformations
395  /// of the current operands. The number of the operands on the list must equal
396  /// to the number of the current operands the IR user has. Their order on the
397  /// list must be the same as the order of the current operands the IR user
398  /// has.
399  ///
400  /// The returned cost is defined in terms of \c TargetCostConstants, see its
401  /// comments for a detailed explanation of the cost values.
402  InstructionCost getInstructionCost(const User *U,
403                                     ArrayRef<const Value *> Operands,
404                                     TargetCostKind CostKind) const;
405
406  /// This is a helper function which calls the three-argument
407  /// getInstructionCost with \p Operands which are the current operands U has.
408  InstructionCost getInstructionCost(const User *U,
409                                     TargetCostKind CostKind) const {
410    SmallVector<const Value *, 4> Operands(U->operand_values());
411    return getInstructionCost(U, Operands, CostKind);
412  }
413
414  /// If a branch or a select condition is skewed in one direction by more than
415  /// this factor, it is very likely to be predicted correctly.
416  BranchProbability getPredictableBranchThreshold() const;
417
418  /// Return true if branch divergence exists.
419  ///
420  /// Branch divergence has a significantly negative impact on GPU performance
421  /// when threads in the same wavefront take different paths due to conditional
422  /// branches.
423  ///
424  /// If \p F is passed, provides a context function. If \p F is known to only
425  /// execute in a single threaded environment, the target may choose to skip
426  /// uniformity analysis and assume all values are uniform.
427  bool hasBranchDivergence(const Function *F = nullptr) const;
428
429  /// Returns whether V is a source of divergence.
430  ///
431  /// This function provides the target-dependent information for
432  /// the target-independent UniformityAnalysis.
433  bool isSourceOfDivergence(const Value *V) const;
434
435  // Returns true for the target specific
436  // set of operations which produce uniform result
437  // even taking non-uniform arguments
438  bool isAlwaysUniform(const Value *V) const;
439
440  /// Query the target whether the specified address space cast from FromAS to
441  /// ToAS is valid.
442  bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const;
443
444  /// Return false if a \p AS0 address cannot possibly alias a \p AS1 address.
445  bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const;
446
447  /// Returns the address space ID for a target's 'flat' address space. Note
448  /// this is not necessarily the same as addrspace(0), which LLVM sometimes
449  /// refers to as the generic address space. The flat address space is a
450  /// generic address space that can be used access multiple segments of memory
451  /// with different address spaces. Access of a memory location through a
452  /// pointer with this address space is expected to be legal but slower
453  /// compared to the same memory location accessed through a pointer with a
454  /// different address space.
455  //
456  /// This is for targets with different pointer representations which can
457  /// be converted with the addrspacecast instruction. If a pointer is converted
458  /// to this address space, optimizations should attempt to replace the access
459  /// with the source address space.
460  ///
461  /// \returns ~0u if the target does not have such a flat address space to
462  /// optimize away.
463  unsigned getFlatAddressSpace() const;
464
465  /// Return any intrinsic address operand indexes which may be rewritten if
466  /// they use a flat address space pointer.
467  ///
468  /// \returns true if the intrinsic was handled.
469  bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
470                                  Intrinsic::ID IID) const;
471
472  bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const;
473
474  /// Return true if globals in this address space can have initializers other
475  /// than `undef`.
476  bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const;
477
478  unsigned getAssumedAddrSpace(const Value *V) const;
479
480  bool isSingleThreaded() const;
481
482  std::pair<const Value *, unsigned>
483  getPredicatedAddrSpace(const Value *V) const;
484
485  /// Rewrite intrinsic call \p II such that \p OldV will be replaced with \p
486  /// NewV, which has a different address space. This should happen for every
487  /// operand index that collectFlatAddressOperands returned for the intrinsic.
488  /// \returns nullptr if the intrinsic was not handled. Otherwise, returns the
489  /// new value (which may be the original \p II with modified operands).
490  Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
491                                          Value *NewV) const;
492
493  /// Test whether calls to a function lower to actual program function
494  /// calls.
495  ///
496  /// The idea is to test whether the program is likely to require a 'call'
497  /// instruction or equivalent in order to call the given function.
498  ///
499  /// FIXME: It's not clear that this is a good or useful query API. Client's
500  /// should probably move to simpler cost metrics using the above.
501  /// Alternatively, we could split the cost interface into distinct code-size
502  /// and execution-speed costs. This would allow modelling the core of this
503  /// query more accurately as a call is a single small instruction, but
504  /// incurs significant execution cost.
505  bool isLoweredToCall(const Function *F) const;
506
507  struct LSRCost {
508    /// TODO: Some of these could be merged. Also, a lexical ordering
509    /// isn't always optimal.
510    unsigned Insns;
511    unsigned NumRegs;
512    unsigned AddRecCost;
513    unsigned NumIVMuls;
514    unsigned NumBaseAdds;
515    unsigned ImmCost;
516    unsigned SetupCost;
517    unsigned ScaleCost;
518  };
519
520  /// Parameters that control the generic loop unrolling transformation.
521  struct UnrollingPreferences {
522    /// The cost threshold for the unrolled loop. Should be relative to the
523    /// getInstructionCost values returned by this API, and the expectation is
524    /// that the unrolled loop's instructions when run through that interface
525    /// should not exceed this cost. However, this is only an estimate. Also,
526    /// specific loops may be unrolled even with a cost above this threshold if
527    /// deemed profitable. Set this to UINT_MAX to disable the loop body cost
528    /// restriction.
529    unsigned Threshold;
530    /// If complete unrolling will reduce the cost of the loop, we will boost
531    /// the Threshold by a certain percent to allow more aggressive complete
532    /// unrolling. This value provides the maximum boost percentage that we
533    /// can apply to Threshold (The value should be no less than 100).
534    /// BoostedThreshold = Threshold * min(RolledCost / UnrolledCost,
535    ///                                    MaxPercentThresholdBoost / 100)
536    /// E.g. if complete unrolling reduces the loop execution time by 50%
537    /// then we boost the threshold by the factor of 2x. If unrolling is not
538    /// expected to reduce the running time, then we do not increase the
539    /// threshold.
540    unsigned MaxPercentThresholdBoost;
541    /// The cost threshold for the unrolled loop when optimizing for size (set
542    /// to UINT_MAX to disable).
543    unsigned OptSizeThreshold;
544    /// The cost threshold for the unrolled loop, like Threshold, but used
545    /// for partial/runtime unrolling (set to UINT_MAX to disable).
546    unsigned PartialThreshold;
547    /// The cost threshold for the unrolled loop when optimizing for size, like
548    /// OptSizeThreshold, but used for partial/runtime unrolling (set to
549    /// UINT_MAX to disable).
550    unsigned PartialOptSizeThreshold;
551    /// A forced unrolling factor (the number of concatenated bodies of the
552    /// original loop in the unrolled loop body). When set to 0, the unrolling
553    /// transformation will select an unrolling factor based on the current cost
554    /// threshold and other factors.
555    unsigned Count;
556    /// Default unroll count for loops with run-time trip count.
557    unsigned DefaultUnrollRuntimeCount;
558    // Set the maximum unrolling factor. The unrolling factor may be selected
559    // using the appropriate cost threshold, but may not exceed this number
560    // (set to UINT_MAX to disable). This does not apply in cases where the
561    // loop is being fully unrolled.
562    unsigned MaxCount;
563    /// Set the maximum upper bound of trip count. Allowing the MaxUpperBound
564    /// to be overrided by a target gives more flexiblity on certain cases.
565    /// By default, MaxUpperBound uses UnrollMaxUpperBound which value is 8.
566    unsigned MaxUpperBound;
567    /// Set the maximum unrolling factor for full unrolling. Like MaxCount, but
568    /// applies even if full unrolling is selected. This allows a target to fall
569    /// back to Partial unrolling if full unrolling is above FullUnrollMaxCount.
570    unsigned FullUnrollMaxCount;
571    // Represents number of instructions optimized when "back edge"
572    // becomes "fall through" in unrolled loop.
573    // For now we count a conditional branch on a backedge and a comparison
574    // feeding it.
575    unsigned BEInsns;
576    /// Allow partial unrolling (unrolling of loops to expand the size of the
577    /// loop body, not only to eliminate small constant-trip-count loops).
578    bool Partial;
579    /// Allow runtime unrolling (unrolling of loops to expand the size of the
580    /// loop body even when the number of loop iterations is not known at
581    /// compile time).
582    bool Runtime;
583    /// Allow generation of a loop remainder (extra iterations after unroll).
584    bool AllowRemainder;
585    /// Allow emitting expensive instructions (such as divisions) when computing
586    /// the trip count of a loop for runtime unrolling.
587    bool AllowExpensiveTripCount;
588    /// Apply loop unroll on any kind of loop
589    /// (mainly to loops that fail runtime unrolling).
590    bool Force;
591    /// Allow using trip count upper bound to unroll loops.
592    bool UpperBound;
593    /// Allow unrolling of all the iterations of the runtime loop remainder.
594    bool UnrollRemainder;
595    /// Allow unroll and jam. Used to enable unroll and jam for the target.
596    bool UnrollAndJam;
597    /// Threshold for unroll and jam, for inner loop size. The 'Threshold'
598    /// value above is used during unroll and jam for the outer loop size.
599    /// This value is used in the same manner to limit the size of the inner
600    /// loop.
601    unsigned UnrollAndJamInnerLoopThreshold;
602    /// Don't allow loop unrolling to simulate more than this number of
603    /// iterations when checking full unroll profitability
604    unsigned MaxIterationsCountToAnalyze;
605    /// Don't disable runtime unroll for the loops which were vectorized.
606    bool UnrollVectorizedLoop = false;
607  };
608
609  /// Get target-customized preferences for the generic loop unrolling
610  /// transformation. The caller will initialize UP with the current
611  /// target-independent defaults.
612  void getUnrollingPreferences(Loop *L, ScalarEvolution &,
613                               UnrollingPreferences &UP,
614                               OptimizationRemarkEmitter *ORE) const;
615
616  /// Query the target whether it would be profitable to convert the given loop
617  /// into a hardware loop.
618  bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
619                                AssumptionCache &AC, TargetLibraryInfo *LibInfo,
620                                HardwareLoopInfo &HWLoopInfo) const;
621
622  /// Query the target whether it would be prefered to create a predicated
623  /// vector loop, which can avoid the need to emit a scalar epilogue loop.
624  bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) const;
625
626  /// Query the target what the preferred style of tail folding is.
627  /// \param IVUpdateMayOverflow Tells whether it is known if the IV update
628  /// may (or will never) overflow for the suggested VF/UF in the given loop.
629  /// Targets can use this information to select a more optimal tail folding
630  /// style. The value conservatively defaults to true, such that no assumptions
631  /// are made on overflow.
632  TailFoldingStyle
633  getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) const;
634
635  // Parameters that control the loop peeling transformation
636  struct PeelingPreferences {
637    /// A forced peeling factor (the number of bodied of the original loop
638    /// that should be peeled off before the loop body). When set to 0, the
639    /// a peeling factor based on profile information and other factors.
640    unsigned PeelCount;
641    /// Allow peeling off loop iterations.
642    bool AllowPeeling;
643    /// Allow peeling off loop iterations for loop nests.
644    bool AllowLoopNestsPeeling;
645    /// Allow peeling basing on profile. Uses to enable peeling off all
646    /// iterations basing on provided profile.
647    /// If the value is true the peeling cost model can decide to peel only
648    /// some iterations and in this case it will set this to false.
649    bool PeelProfiledIterations;
650  };
651
652  /// Get target-customized preferences for the generic loop peeling
653  /// transformation. The caller will initialize \p PP with the current
654  /// target-independent defaults with information from \p L and \p SE.
655  void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
656                             PeelingPreferences &PP) const;
657
658  /// Targets can implement their own combinations for target-specific
659  /// intrinsics. This function will be called from the InstCombine pass every
660  /// time a target-specific intrinsic is encountered.
661  ///
662  /// \returns std::nullopt to not do anything target specific or a value that
663  /// will be returned from the InstCombiner. It is possible to return null and
664  /// stop further processing of the intrinsic by returning nullptr.
665  std::optional<Instruction *> instCombineIntrinsic(InstCombiner & IC,
666                                                    IntrinsicInst & II) const;
667  /// Can be used to implement target-specific instruction combining.
668  /// \see instCombineIntrinsic
669  std::optional<Value *> simplifyDemandedUseBitsIntrinsic(
670      InstCombiner & IC, IntrinsicInst & II, APInt DemandedMask,
671      KnownBits & Known, bool &KnownBitsComputed) const;
672  /// Can be used to implement target-specific instruction combining.
673  /// \see instCombineIntrinsic
674  std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
675      InstCombiner & IC, IntrinsicInst & II, APInt DemandedElts,
676      APInt & UndefElts, APInt & UndefElts2, APInt & UndefElts3,
677      std::function<void(Instruction *, unsigned, APInt, APInt &)>
678          SimplifyAndSetOp) const;
679  /// @}
680
681  /// \name Scalar Target Information
682  /// @{
683
684  /// Flags indicating the kind of support for population count.
685  ///
686  /// Compared to the SW implementation, HW support is supposed to
687  /// significantly boost the performance when the population is dense, and it
688  /// may or may not degrade performance if the population is sparse. A HW
689  /// support is considered as "Fast" if it can outperform, or is on a par
690  /// with, SW implementation when the population is sparse; otherwise, it is
691  /// considered as "Slow".
692  enum PopcntSupportKind { PSK_Software, PSK_SlowHardware, PSK_FastHardware };
693
694  /// Return true if the specified immediate is legal add immediate, that
695  /// is the target has add instructions which can add a register with the
696  /// immediate without having to materialize the immediate into a register.
697  bool isLegalAddImmediate(int64_t Imm) const;
698
699  /// Return true if the specified immediate is legal icmp immediate,
700  /// that is the target has icmp instructions which can compare a register
701  /// against the immediate without having to materialize the immediate into a
702  /// register.
703  bool isLegalICmpImmediate(int64_t Imm) const;
704
705  /// Return true if the addressing mode represented by AM is legal for
706  /// this target, for a load/store of the specified type.
707  /// The type may be VoidTy, in which case only return true if the addressing
708  /// mode is legal for a load/store of any legal type.
709  /// If target returns true in LSRWithInstrQueries(), I may be valid.
710  /// TODO: Handle pre/postinc as well.
711  bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
712                             bool HasBaseReg, int64_t Scale,
713                             unsigned AddrSpace = 0,
714                             Instruction *I = nullptr) const;
715
716  /// Return true if LSR cost of C1 is lower than C2.
717  bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
718                     const TargetTransformInfo::LSRCost &C2) const;
719
720  /// Return true if LSR major cost is number of registers. Targets which
721  /// implement their own isLSRCostLess and unset number of registers as major
722  /// cost should return false, otherwise return true.
723  bool isNumRegsMajorCostOfLSR() const;
724
725  /// Return true if LSR should attempts to replace a use of an otherwise dead
726  /// primary IV in the latch condition with another IV available in the loop.
727  /// When successful, makes the primary IV dead.
728  bool shouldFoldTerminatingConditionAfterLSR() const;
729
730  /// \returns true if LSR should not optimize a chain that includes \p I.
731  bool isProfitableLSRChainElement(Instruction *I) const;
732
733  /// Return true if the target can fuse a compare and branch.
734  /// Loop-strength-reduction (LSR) uses that knowledge to adjust its cost
735  /// calculation for the instructions in a loop.
736  bool canMacroFuseCmp() const;
737
738  /// Return true if the target can save a compare for loop count, for example
739  /// hardware loop saves a compare.
740  bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI,
741                  DominatorTree *DT, AssumptionCache *AC,
742                  TargetLibraryInfo *LibInfo) const;
743
744  enum AddressingModeKind {
745    AMK_PreIndexed,
746    AMK_PostIndexed,
747    AMK_None
748  };
749
750  /// Return the preferred addressing mode LSR should make efforts to generate.
751  AddressingModeKind getPreferredAddressingMode(const Loop *L,
752                                                ScalarEvolution *SE) const;
753
754  /// Return true if the target supports masked store.
755  bool isLegalMaskedStore(Type *DataType, Align Alignment) const;
756  /// Return true if the target supports masked load.
757  bool isLegalMaskedLoad(Type *DataType, Align Alignment) const;
758
759  /// Return true if the target supports nontemporal store.
760  bool isLegalNTStore(Type *DataType, Align Alignment) const;
761  /// Return true if the target supports nontemporal load.
762  bool isLegalNTLoad(Type *DataType, Align Alignment) const;
763
764  /// \Returns true if the target supports broadcasting a load to a vector of
765  /// type <NumElements x ElementTy>.
766  bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const;
767
768  /// Return true if the target supports masked scatter.
769  bool isLegalMaskedScatter(Type *DataType, Align Alignment) const;
770  /// Return true if the target supports masked gather.
771  bool isLegalMaskedGather(Type *DataType, Align Alignment) const;
772  /// Return true if the target forces scalarizing of llvm.masked.gather
773  /// intrinsics.
774  bool forceScalarizeMaskedGather(VectorType *Type, Align Alignment) const;
775  /// Return true if the target forces scalarizing of llvm.masked.scatter
776  /// intrinsics.
777  bool forceScalarizeMaskedScatter(VectorType *Type, Align Alignment) const;
778
779  /// Return true if the target supports masked compress store.
780  bool isLegalMaskedCompressStore(Type *DataType) const;
781  /// Return true if the target supports masked expand load.
782  bool isLegalMaskedExpandLoad(Type *DataType) const;
783
784  /// Return true if this is an alternating opcode pattern that can be lowered
785  /// to a single instruction on the target. In X86 this is for the addsub
786  /// instruction which corrsponds to a Shuffle + Fadd + FSub pattern in IR.
787  /// This function expectes two opcodes: \p Opcode1 and \p Opcode2 being
788  /// selected by \p OpcodeMask. The mask contains one bit per lane and is a `0`
789  /// when \p Opcode0 is selected and `1` when Opcode1 is selected.
790  /// \p VecTy is the vector type of the instruction to be generated.
791  bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1,
792                       const SmallBitVector &OpcodeMask) const;
793
794  /// Return true if we should be enabling ordered reductions for the target.
795  bool enableOrderedReductions() const;
796
797  /// Return true if the target has a unified operation to calculate division
798  /// and remainder. If so, the additional implicit multiplication and
799  /// subtraction required to calculate a remainder from division are free. This
800  /// can enable more aggressive transformations for division and remainder than
801  /// would typically be allowed using throughput or size cost models.
802  bool hasDivRemOp(Type *DataType, bool IsSigned) const;
803
804  /// Return true if the given instruction (assumed to be a memory access
805  /// instruction) has a volatile variant. If that's the case then we can avoid
806  /// addrspacecast to generic AS for volatile loads/stores. Default
807  /// implementation returns false, which prevents address space inference for
808  /// volatile loads/stores.
809  bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const;
810
811  /// Return true if target doesn't mind addresses in vectors.
812  bool prefersVectorizedAddressing() const;
813
814  /// Return the cost of the scaling factor used in the addressing
815  /// mode represented by AM for this target, for a load/store
816  /// of the specified type.
817  /// If the AM is supported, the return value must be >= 0.
818  /// If the AM is not supported, it returns a negative value.
819  /// TODO: Handle pre/postinc as well.
820  InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
821                                       int64_t BaseOffset, bool HasBaseReg,
822                                       int64_t Scale,
823                                       unsigned AddrSpace = 0) const;
824
825  /// Return true if the loop strength reduce pass should make
826  /// Instruction* based TTI queries to isLegalAddressingMode(). This is
827  /// needed on SystemZ, where e.g. a memcpy can only have a 12 bit unsigned
828  /// immediate offset and no index register.
829  bool LSRWithInstrQueries() const;
830
831  /// Return true if it's free to truncate a value of type Ty1 to type
832  /// Ty2. e.g. On x86 it's free to truncate a i32 value in register EAX to i16
833  /// by referencing its sub-register AX.
834  bool isTruncateFree(Type *Ty1, Type *Ty2) const;
835
836  /// Return true if it is profitable to hoist instruction in the
837  /// then/else to before if.
838  bool isProfitableToHoist(Instruction *I) const;
839
840  bool useAA() const;
841
842  /// Return true if this type is legal.
843  bool isTypeLegal(Type *Ty) const;
844
845  /// Returns the estimated number of registers required to represent \p Ty.
846  unsigned getRegUsageForType(Type *Ty) const;
847
848  /// Return true if switches should be turned into lookup tables for the
849  /// target.
850  bool shouldBuildLookupTables() const;
851
852  /// Return true if switches should be turned into lookup tables
853  /// containing this constant value for the target.
854  bool shouldBuildLookupTablesForConstant(Constant *C) const;
855
856  /// Return true if lookup tables should be turned into relative lookup tables.
857  bool shouldBuildRelLookupTables() const;
858
859  /// Return true if the input function which is cold at all call sites,
860  ///  should use coldcc calling convention.
861  bool useColdCCForColdCall(Function &F) const;
862
863  /// Estimate the overhead of scalarizing an instruction. Insert and Extract
864  /// are set if the demanded result elements need to be inserted and/or
865  /// extracted from vectors.
866  InstructionCost getScalarizationOverhead(VectorType *Ty,
867                                           const APInt &DemandedElts,
868                                           bool Insert, bool Extract,
869                                           TTI::TargetCostKind CostKind) const;
870
871  /// Estimate the overhead of scalarizing an instructions unique
872  /// non-constant operands. The (potentially vector) types to use for each of
873  /// argument are passes via Tys.
874  InstructionCost
875  getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
876                                   ArrayRef<Type *> Tys,
877                                   TTI::TargetCostKind CostKind) const;
878
879  /// If target has efficient vector element load/store instructions, it can
880  /// return true here so that insertion/extraction costs are not added to
881  /// the scalarization cost of a load/store.
882  bool supportsEfficientVectorElementLoadStore() const;
883
884  /// If the target supports tail calls.
885  bool supportsTailCalls() const;
886
887  /// If target supports tail call on \p CB
888  bool supportsTailCallFor(const CallBase *CB) const;
889
890  /// Don't restrict interleaved unrolling to small loops.
891  bool enableAggressiveInterleaving(bool LoopHasReductions) const;
892
893  /// Returns options for expansion of memcmp. IsZeroCmp is
894  // true if this is the expansion of memcmp(p1, p2, s) == 0.
895  struct MemCmpExpansionOptions {
896    // Return true if memcmp expansion is enabled.
897    operator bool() const { return MaxNumLoads > 0; }
898
899    // Maximum number of load operations.
900    unsigned MaxNumLoads = 0;
901
902    // The list of available load sizes (in bytes), sorted in decreasing order.
903    SmallVector<unsigned, 8> LoadSizes;
904
905    // For memcmp expansion when the memcmp result is only compared equal or
906    // not-equal to 0, allow up to this number of load pairs per block. As an
907    // example, this may allow 'memcmp(a, b, 3) == 0' in a single block:
908    //   a0 = load2bytes &a[0]
909    //   b0 = load2bytes &b[0]
910    //   a2 = load1byte  &a[2]
911    //   b2 = load1byte  &b[2]
912    //   r  = cmp eq (a0 ^ b0 | a2 ^ b2), 0
913    unsigned NumLoadsPerBlock = 1;
914
915    // Set to true to allow overlapping loads. For example, 7-byte compares can
916    // be done with two 4-byte compares instead of 4+2+1-byte compares. This
917    // requires all loads in LoadSizes to be doable in an unaligned way.
918    bool AllowOverlappingLoads = false;
919
920    // Sometimes, the amount of data that needs to be compared is smaller than
921    // the standard register size, but it cannot be loaded with just one load
922    // instruction. For example, if the size of the memory comparison is 6
923    // bytes, we can handle it more efficiently by loading all 6 bytes in a
924    // single block and generating an 8-byte number, instead of generating two
925    // separate blocks with conditional jumps for 4 and 2 byte loads. This
926    // approach simplifies the process and produces the comparison result as
927    // normal. This array lists the allowed sizes of memcmp tails that can be
928    // merged into one block
929    SmallVector<unsigned, 4> AllowedTailExpansions;
930  };
931  MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
932                                               bool IsZeroCmp) const;
933
934  /// Should the Select Optimization pass be enabled and ran.
935  bool enableSelectOptimize() const;
936
937  /// Should the Select Optimization pass treat the given instruction like a
938  /// select, potentially converting it to a conditional branch. This can
939  /// include select-like instructions like or(zext(c), x) that can be converted
940  /// to selects.
941  bool shouldTreatInstructionLikeSelect(const Instruction *I) const;
942
943  /// Enable matching of interleaved access groups.
944  bool enableInterleavedAccessVectorization() const;
945
946  /// Enable matching of interleaved access groups that contain predicated
947  /// accesses or gaps and therefore vectorized using masked
948  /// vector loads/stores.
949  bool enableMaskedInterleavedAccessVectorization() const;
950
951  /// Indicate that it is potentially unsafe to automatically vectorize
952  /// floating-point operations because the semantics of vector and scalar
953  /// floating-point semantics may differ. For example, ARM NEON v7 SIMD math
954  /// does not support IEEE-754 denormal numbers, while depending on the
955  /// platform, scalar floating-point math does.
956  /// This applies to floating-point math operations and calls, not memory
957  /// operations, shuffles, or casts.
958  bool isFPVectorizationPotentiallyUnsafe() const;
959
960  /// Determine if the target supports unaligned memory accesses.
961  bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth,
962                                      unsigned AddressSpace = 0,
963                                      Align Alignment = Align(1),
964                                      unsigned *Fast = nullptr) const;
965
966  /// Return hardware support for population count.
967  PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const;
968
969  /// Return true if the hardware has a fast square-root instruction.
970  bool haveFastSqrt(Type *Ty) const;
971
972  /// Return true if the cost of the instruction is too high to speculatively
973  /// execute and should be kept behind a branch.
974  /// This normally just wraps around a getInstructionCost() call, but some
975  /// targets might report a low TCK_SizeAndLatency value that is incompatible
976  /// with the fixed TCC_Expensive value.
977  /// NOTE: This assumes the instruction passes isSafeToSpeculativelyExecute().
978  bool isExpensiveToSpeculativelyExecute(const Instruction *I) const;
979
980  /// Return true if it is faster to check if a floating-point value is NaN
981  /// (or not-NaN) versus a comparison against a constant FP zero value.
982  /// Targets should override this if materializing a 0.0 for comparison is
983  /// generally as cheap as checking for ordered/unordered.
984  bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const;
985
986  /// Return the expected cost of supporting the floating point operation
987  /// of the specified type.
988  InstructionCost getFPOpCost(Type *Ty) const;
989
990  /// Return the expected cost of materializing for the given integer
991  /// immediate of the specified type.
992  InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
993                                TargetCostKind CostKind) const;
994
995  /// Return the expected cost of materialization for the given integer
996  /// immediate of the specified type for a given instruction. The cost can be
997  /// zero if the immediate can be folded into the specified instruction.
998  InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx,
999                                    const APInt &Imm, Type *Ty,
1000                                    TargetCostKind CostKind,
1001                                    Instruction *Inst = nullptr) const;
1002  InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
1003                                      const APInt &Imm, Type *Ty,
1004                                      TargetCostKind CostKind) const;
1005
1006  /// Return the expected cost for the given integer when optimising
1007  /// for size. This is different than the other integer immediate cost
1008  /// functions in that it is subtarget agnostic. This is useful when you e.g.
1009  /// target one ISA such as Aarch32 but smaller encodings could be possible
1010  /// with another such as Thumb. This return value is used as a penalty when
1011  /// the total costs for a constant is calculated (the bigger the cost, the
1012  /// more beneficial constant hoisting is).
1013  InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx,
1014                                        const APInt &Imm, Type *Ty) const;
1015
1016  /// It can be advantageous to detach complex constants from their uses to make
1017  /// their generation cheaper. This hook allows targets to report when such
1018  /// transformations might negatively effect the code generation of the
1019  /// underlying operation. The motivating example is divides whereby hoisting
1020  /// constants prevents the code generator's ability to transform them into
1021  /// combinations of simpler operations.
1022  bool preferToKeepConstantsAttached(const Instruction &Inst,
1023                                     const Function &Fn) const;
1024
1025  /// @}
1026
1027  /// \name Vector Target Information
1028  /// @{
1029
1030  /// The various kinds of shuffle patterns for vector queries.
1031  enum ShuffleKind {
1032    SK_Broadcast,        ///< Broadcast element 0 to all other elements.
1033    SK_Reverse,          ///< Reverse the order of the vector.
1034    SK_Select,           ///< Selects elements from the corresponding lane of
1035                         ///< either source operand. This is equivalent to a
1036                         ///< vector select with a constant condition operand.
1037    SK_Transpose,        ///< Transpose two vectors.
1038    SK_InsertSubvector,  ///< InsertSubvector. Index indicates start offset.
1039    SK_ExtractSubvector, ///< ExtractSubvector Index indicates start offset.
1040    SK_PermuteTwoSrc,    ///< Merge elements from two source vectors into one
1041                         ///< with any shuffle mask.
1042    SK_PermuteSingleSrc, ///< Shuffle elements of single source vector with any
1043                         ///< shuffle mask.
1044    SK_Splice            ///< Concatenates elements from the first input vector
1045                         ///< with elements of the second input vector. Returning
1046                         ///< a vector of the same type as the input vectors.
1047                         ///< Index indicates start offset in first input vector.
1048  };
1049
1050  /// Additional information about an operand's possible values.
1051  enum OperandValueKind {
1052    OK_AnyValue,               // Operand can have any value.
1053    OK_UniformValue,           // Operand is uniform (splat of a value).
1054    OK_UniformConstantValue,   // Operand is uniform constant.
1055    OK_NonUniformConstantValue // Operand is a non uniform constant value.
1056  };
1057
1058  /// Additional properties of an operand's values.
1059  enum OperandValueProperties {
1060    OP_None = 0,
1061    OP_PowerOf2 = 1,
1062    OP_NegatedPowerOf2 = 2,
1063  };
1064
1065  // Describe the values an operand can take.  We're in the process
1066  // of migrating uses of OperandValueKind and OperandValueProperties
1067  // to use this class, and then will change the internal representation.
1068  struct OperandValueInfo {
1069    OperandValueKind Kind = OK_AnyValue;
1070    OperandValueProperties Properties = OP_None;
1071
1072    bool isConstant() const {
1073      return Kind == OK_UniformConstantValue || Kind == OK_NonUniformConstantValue;
1074    }
1075    bool isUniform() const {
1076      return Kind == OK_UniformConstantValue || Kind == OK_UniformValue;
1077    }
1078    bool isPowerOf2() const {
1079      return Properties == OP_PowerOf2;
1080    }
1081    bool isNegatedPowerOf2() const {
1082      return Properties == OP_NegatedPowerOf2;
1083    }
1084
1085    OperandValueInfo getNoProps() const {
1086      return {Kind, OP_None};
1087    }
1088  };
1089
1090  /// \return the number of registers in the target-provided register class.
1091  unsigned getNumberOfRegisters(unsigned ClassID) const;
1092
1093  /// \return the target-provided register class ID for the provided type,
1094  /// accounting for type promotion and other type-legalization techniques that
1095  /// the target might apply. However, it specifically does not account for the
1096  /// scalarization or splitting of vector types. Should a vector type require
1097  /// scalarization or splitting into multiple underlying vector registers, that
1098  /// type should be mapped to a register class containing no registers.
1099  /// Specifically, this is designed to provide a simple, high-level view of the
1100  /// register allocation later performed by the backend. These register classes
1101  /// don't necessarily map onto the register classes used by the backend.
1102  /// FIXME: It's not currently possible to determine how many registers
1103  /// are used by the provided type.
1104  unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const;
1105
1106  /// \return the target-provided register class name
1107  const char *getRegisterClassName(unsigned ClassID) const;
1108
1109  enum RegisterKind { RGK_Scalar, RGK_FixedWidthVector, RGK_ScalableVector };
1110
1111  /// \return The width of the largest scalar or vector register type.
1112  TypeSize getRegisterBitWidth(RegisterKind K) const;
1113
1114  /// \return The width of the smallest vector register type.
1115  unsigned getMinVectorRegisterBitWidth() const;
1116
1117  /// \return The maximum value of vscale if the target specifies an
1118  ///  architectural maximum vector length, and std::nullopt otherwise.
1119  std::optional<unsigned> getMaxVScale() const;
1120
1121  /// \return the value of vscale to tune the cost model for.
1122  std::optional<unsigned> getVScaleForTuning() const;
1123
1124  /// \return true if vscale is known to be a power of 2
1125  bool isVScaleKnownToBeAPowerOfTwo() const;
1126
1127  /// \return True if the vectorization factor should be chosen to
1128  /// make the vector of the smallest element type match the size of a
1129  /// vector register. For wider element types, this could result in
1130  /// creating vectors that span multiple vector registers.
1131  /// If false, the vectorization factor will be chosen based on the
1132  /// size of the widest element type.
1133  /// \p K Register Kind for vectorization.
1134  bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const;
1135
1136  /// \return The minimum vectorization factor for types of given element
1137  /// bit width, or 0 if there is no minimum VF. The returned value only
1138  /// applies when shouldMaximizeVectorBandwidth returns true.
1139  /// If IsScalable is true, the returned ElementCount must be a scalable VF.
1140  ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const;
1141
1142  /// \return The maximum vectorization factor for types of given element
1143  /// bit width and opcode, or 0 if there is no maximum VF.
1144  /// Currently only used by the SLP vectorizer.
1145  unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const;
1146
1147  /// \return The minimum vectorization factor for the store instruction. Given
1148  /// the initial estimation of the minimum vector factor and store value type,
1149  /// it tries to find possible lowest VF, which still might be profitable for
1150  /// the vectorization.
1151  /// \param VF Initial estimation of the minimum vector factor.
1152  /// \param ScalarMemTy Scalar memory type of the store operation.
1153  /// \param ScalarValTy Scalar type of the stored value.
1154  /// Currently only used by the SLP vectorizer.
1155  unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy,
1156                             Type *ScalarValTy) const;
1157
1158  /// \return True if it should be considered for address type promotion.
1159  /// \p AllowPromotionWithoutCommonHeader Set true if promoting \p I is
1160  /// profitable without finding other extensions fed by the same input.
1161  bool shouldConsiderAddressTypePromotion(
1162      const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const;
1163
1164  /// \return The size of a cache line in bytes.
1165  unsigned getCacheLineSize() const;
1166
1167  /// The possible cache levels
1168  enum class CacheLevel {
1169    L1D, // The L1 data cache
1170    L2D, // The L2 data cache
1171
1172    // We currently do not model L3 caches, as their sizes differ widely between
1173    // microarchitectures. Also, we currently do not have a use for L3 cache
1174    // size modeling yet.
1175  };
1176
1177  /// \return The size of the cache level in bytes, if available.
1178  std::optional<unsigned> getCacheSize(CacheLevel Level) const;
1179
1180  /// \return The associativity of the cache level, if available.
1181  std::optional<unsigned> getCacheAssociativity(CacheLevel Level) const;
1182
1183  /// \return The minimum architectural page size for the target.
1184  std::optional<unsigned> getMinPageSize() const;
1185
1186  /// \return How much before a load we should place the prefetch
1187  /// instruction.  This is currently measured in number of
1188  /// instructions.
1189  unsigned getPrefetchDistance() const;
1190
1191  /// Some HW prefetchers can handle accesses up to a certain constant stride.
1192  /// Sometimes prefetching is beneficial even below the HW prefetcher limit,
1193  /// and the arguments provided are meant to serve as a basis for deciding this
1194  /// for a particular loop.
1195  ///
1196  /// \param NumMemAccesses        Number of memory accesses in the loop.
1197  /// \param NumStridedMemAccesses Number of the memory accesses that
1198  ///                              ScalarEvolution could find a known stride
1199  ///                              for.
1200  /// \param NumPrefetches         Number of software prefetches that will be
1201  ///                              emitted as determined by the addresses
1202  ///                              involved and the cache line size.
1203  /// \param HasCall               True if the loop contains a call.
1204  ///
1205  /// \return This is the minimum stride in bytes where it makes sense to start
1206  ///         adding SW prefetches. The default is 1, i.e. prefetch with any
1207  ///         stride.
1208  unsigned getMinPrefetchStride(unsigned NumMemAccesses,
1209                                unsigned NumStridedMemAccesses,
1210                                unsigned NumPrefetches, bool HasCall) const;
1211
1212  /// \return The maximum number of iterations to prefetch ahead.  If
1213  /// the required number of iterations is more than this number, no
1214  /// prefetching is performed.
1215  unsigned getMaxPrefetchIterationsAhead() const;
1216
1217  /// \return True if prefetching should also be done for writes.
1218  bool enableWritePrefetching() const;
1219
1220  /// \return if target want to issue a prefetch in address space \p AS.
1221  bool shouldPrefetchAddressSpace(unsigned AS) const;
1222
1223  /// \return The maximum interleave factor that any transform should try to
1224  /// perform for this target. This number depends on the level of parallelism
1225  /// and the number of execution units in the CPU.
1226  unsigned getMaxInterleaveFactor(ElementCount VF) const;
1227
1228  /// Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
1229  static OperandValueInfo getOperandInfo(const Value *V);
1230
1231  /// This is an approximation of reciprocal throughput of a math/logic op.
1232  /// A higher cost indicates less expected throughput.
1233  /// From Agner Fog's guides, reciprocal throughput is "the average number of
1234  /// clock cycles per instruction when the instructions are not part of a
1235  /// limiting dependency chain."
1236  /// Therefore, costs should be scaled to account for multiple execution units
1237  /// on the target that can process this type of instruction. For example, if
1238  /// there are 5 scalar integer units and 2 vector integer units that can
1239  /// calculate an 'add' in a single cycle, this model should indicate that the
1240  /// cost of the vector add instruction is 2.5 times the cost of the scalar
1241  /// add instruction.
1242  /// \p Args is an optional argument which holds the instruction operands
1243  /// values so the TTI can analyze those values searching for special
1244  /// cases or optimizations based on those values.
1245  /// \p CxtI is the optional original context instruction, if one exists, to
1246  /// provide even more information.
1247  InstructionCost getArithmeticInstrCost(
1248      unsigned Opcode, Type *Ty,
1249      TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
1250      TTI::OperandValueInfo Opd1Info = {TTI::OK_AnyValue, TTI::OP_None},
1251      TTI::OperandValueInfo Opd2Info = {TTI::OK_AnyValue, TTI::OP_None},
1252      ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
1253      const Instruction *CxtI = nullptr) const;
1254
1255  /// Returns the cost estimation for alternating opcode pattern that can be
1256  /// lowered to a single instruction on the target. In X86 this is for the
1257  /// addsub instruction which corrsponds to a Shuffle + Fadd + FSub pattern in
1258  /// IR. This function expects two opcodes: \p Opcode1 and \p Opcode2 being
1259  /// selected by \p OpcodeMask. The mask contains one bit per lane and is a `0`
1260  /// when \p Opcode0 is selected and `1` when Opcode1 is selected.
1261  /// \p VecTy is the vector type of the instruction to be generated.
1262  InstructionCost getAltInstrCost(
1263      VectorType *VecTy, unsigned Opcode0, unsigned Opcode1,
1264      const SmallBitVector &OpcodeMask,
1265      TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
1266
1267  /// \return The cost of a shuffle instruction of kind Kind and of type Tp.
1268  /// The exact mask may be passed as Mask, or else the array will be empty.
1269  /// The index and subtype parameters are used by the subvector insertion and
1270  /// extraction shuffle kinds to show the insert/extract point and the type of
1271  /// the subvector being inserted/extracted. The operands of the shuffle can be
1272  /// passed through \p Args, which helps improve the cost estimation in some
1273  /// cases, like in broadcast loads.
1274  /// NOTE: For subvector extractions Tp represents the source type.
1275  InstructionCost
1276  getShuffleCost(ShuffleKind Kind, VectorType *Tp,
1277                 ArrayRef<int> Mask = std::nullopt,
1278                 TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
1279                 int Index = 0, VectorType *SubTp = nullptr,
1280                 ArrayRef<const Value *> Args = std::nullopt) const;
1281
1282  /// Represents a hint about the context in which a cast is used.
1283  ///
1284  /// For zext/sext, the context of the cast is the operand, which must be a
1285  /// load of some kind. For trunc, the context is of the cast is the single
1286  /// user of the instruction, which must be a store of some kind.
1287  ///
1288  /// This enum allows the vectorizer to give getCastInstrCost an idea of the
1289  /// type of cast it's dealing with, as not every cast is equal. For instance,
1290  /// the zext of a load may be free, but the zext of an interleaving load can
1291  //// be (very) expensive!
1292  ///
1293  /// See \c getCastContextHint to compute a CastContextHint from a cast
1294  /// Instruction*. Callers can use it if they don't need to override the
1295  /// context and just want it to be calculated from the instruction.
1296  ///
1297  /// FIXME: This handles the types of load/store that the vectorizer can
1298  /// produce, which are the cases where the context instruction is most
1299  /// likely to be incorrect. There are other situations where that can happen
1300  /// too, which might be handled here but in the long run a more general
1301  /// solution of costing multiple instructions at the same times may be better.
1302  enum class CastContextHint : uint8_t {
1303    None,          ///< The cast is not used with a load/store of any kind.
1304    Normal,        ///< The cast is used with a normal load/store.
1305    Masked,        ///< The cast is used with a masked load/store.
1306    GatherScatter, ///< The cast is used with a gather/scatter.
1307    Interleave,    ///< The cast is used with an interleaved load/store.
1308    Reversed,      ///< The cast is used with a reversed load/store.
1309  };
1310
1311  /// Calculates a CastContextHint from \p I.
1312  /// This should be used by callers of getCastInstrCost if they wish to
1313  /// determine the context from some instruction.
1314  /// \returns the CastContextHint for ZExt/SExt/Trunc, None if \p I is nullptr,
1315  /// or if it's another type of cast.
1316  static CastContextHint getCastContextHint(const Instruction *I);
1317
1318  /// \return The expected cost of cast instructions, such as bitcast, trunc,
1319  /// zext, etc. If there is an existing instruction that holds Opcode, it
1320  /// may be passed in the 'I' parameter.
1321  InstructionCost
1322  getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
1323                   TTI::CastContextHint CCH,
1324                   TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
1325                   const Instruction *I = nullptr) const;
1326
1327  /// \return The expected cost of a sign- or zero-extended vector extract. Use
1328  /// Index = -1 to indicate that there is no information about the index value.
1329  InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
1330                                           VectorType *VecTy,
1331                                           unsigned Index) const;
1332
1333  /// \return The expected cost of control-flow related instructions such as
1334  /// Phi, Ret, Br, Switch.
1335  InstructionCost
1336  getCFInstrCost(unsigned Opcode,
1337                 TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
1338                 const Instruction *I = nullptr) const;
1339
1340  /// \returns The expected cost of compare and select instructions. If there
1341  /// is an existing instruction that holds Opcode, it may be passed in the
1342  /// 'I' parameter. The \p VecPred parameter can be used to indicate the select
1343  /// is using a compare with the specified predicate as condition. When vector
1344  /// types are passed, \p VecPred must be used for all lanes.
1345  InstructionCost
1346  getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
1347                     CmpInst::Predicate VecPred,
1348                     TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
1349                     const Instruction *I = nullptr) const;
1350
1351  /// \return The expected cost of vector Insert and Extract.
1352  /// Use -1 to indicate that there is no information on the index value.
1353  /// This is used when the instruction is not available; a typical use
1354  /// case is to provision the cost of vectorization/scalarization in
1355  /// vectorizer passes.
1356  InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
1357                                     TTI::TargetCostKind CostKind,
1358                                     unsigned Index = -1, Value *Op0 = nullptr,
1359                                     Value *Op1 = nullptr) const;
1360
1361  /// \return The expected cost of vector Insert and Extract.
1362  /// This is used when instruction is available, and implementation
1363  /// asserts 'I' is not nullptr.
1364  ///
1365  /// A typical suitable use case is cost estimation when vector instruction
1366  /// exists (e.g., from basic blocks during transformation).
1367  InstructionCost getVectorInstrCost(const Instruction &I, Type *Val,
1368                                     TTI::TargetCostKind CostKind,
1369                                     unsigned Index = -1) const;
1370
1371  /// \return The cost of replication shuffle of \p VF elements typed \p EltTy
1372  /// \p ReplicationFactor times.
1373  ///
1374  /// For example, the mask for \p ReplicationFactor=3 and \p VF=4 is:
1375  ///   <0,0,0,1,1,1,2,2,2,3,3,3>
1376  InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor,
1377                                            int VF,
1378                                            const APInt &DemandedDstElts,
1379                                            TTI::TargetCostKind CostKind);
1380
1381  /// \return The cost of Load and Store instructions.
1382  InstructionCost
1383  getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
1384                  unsigned AddressSpace,
1385                  TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
1386                  OperandValueInfo OpdInfo = {OK_AnyValue, OP_None},
1387                  const Instruction *I = nullptr) const;
1388
1389  /// \return The cost of VP Load and Store instructions.
1390  InstructionCost
1391  getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
1392                    unsigned AddressSpace,
1393                    TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
1394                    const Instruction *I = nullptr) const;
1395
1396  /// \return The cost of masked Load and Store instructions.
1397  InstructionCost getMaskedMemoryOpCost(
1398      unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace,
1399      TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
1400
1401  /// \return The cost of Gather or Scatter operation
1402  /// \p Opcode - is a type of memory access Load or Store
1403  /// \p DataTy - a vector type of the data to be loaded or stored
1404  /// \p Ptr - pointer [or vector of pointers] - address[es] in memory
1405  /// \p VariableMask - true when the memory access is predicated with a mask
1406  ///                   that is not a compile-time constant
1407  /// \p Alignment - alignment of single element
1408  /// \p I - the optional original context instruction, if one exists, e.g. the
1409  ///        load/store to transform or the call to the gather/scatter intrinsic
1410  InstructionCost getGatherScatterOpCost(
1411      unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
1412      Align Alignment, TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
1413      const Instruction *I = nullptr) const;
1414
1415  /// \return The cost of the interleaved memory operation.
1416  /// \p Opcode is the memory operation code
1417  /// \p VecTy is the vector type of the interleaved access.
1418  /// \p Factor is the interleave factor
1419  /// \p Indices is the indices for interleaved load members (as interleaved
1420  ///    load allows gaps)
1421  /// \p Alignment is the alignment of the memory operation
1422  /// \p AddressSpace is address space of the pointer.
1423  /// \p UseMaskForCond indicates if the memory access is predicated.
1424  /// \p UseMaskForGaps indicates if gaps should be masked.
1425  InstructionCost getInterleavedMemoryOpCost(
1426      unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
1427      Align Alignment, unsigned AddressSpace,
1428      TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
1429      bool UseMaskForCond = false, bool UseMaskForGaps = false) const;
1430
1431  /// A helper function to determine the type of reduction algorithm used
1432  /// for a given \p Opcode and set of FastMathFlags \p FMF.
1433  static bool requiresOrderedReduction(std::optional<FastMathFlags> FMF) {
1434    return FMF && !(*FMF).allowReassoc();
1435  }
1436
1437  /// Calculate the cost of vector reduction intrinsics.
1438  ///
1439  /// This is the cost of reducing the vector value of type \p Ty to a scalar
1440  /// value using the operation denoted by \p Opcode. The FastMathFlags
1441  /// parameter \p FMF indicates what type of reduction we are performing:
1442  ///   1. Tree-wise. This is the typical 'fast' reduction performed that
1443  ///   involves successively splitting a vector into half and doing the
1444  ///   operation on the pair of halves until you have a scalar value. For
1445  ///   example:
1446  ///     (v0, v1, v2, v3)
1447  ///     ((v0+v2), (v1+v3), undef, undef)
1448  ///     ((v0+v2+v1+v3), undef, undef, undef)
1449  ///   This is the default behaviour for integer operations, whereas for
1450  ///   floating point we only do this if \p FMF indicates that
1451  ///   reassociation is allowed.
1452  ///   2. Ordered. For a vector with N elements this involves performing N
1453  ///   operations in lane order, starting with an initial scalar value, i.e.
1454  ///     result = InitVal + v0
1455  ///     result = result + v1
1456  ///     result = result + v2
1457  ///     result = result + v3
1458  ///   This is only the case for FP operations and when reassociation is not
1459  ///   allowed.
1460  ///
1461  InstructionCost getArithmeticReductionCost(
1462      unsigned Opcode, VectorType *Ty, std::optional<FastMathFlags> FMF,
1463      TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
1464
1465  InstructionCost getMinMaxReductionCost(
1466      Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF = FastMathFlags(),
1467      TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
1468
1469  /// Calculate the cost of an extended reduction pattern, similar to
1470  /// getArithmeticReductionCost of an Add reduction with multiply and optional
1471  /// extensions. This is the cost of as:
1472  /// ResTy vecreduce.add(mul (A, B)).
1473  /// ResTy vecreduce.add(mul(ext(Ty A), ext(Ty B)).
1474  InstructionCost getMulAccReductionCost(
1475      bool IsUnsigned, Type *ResTy, VectorType *Ty,
1476      TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
1477
1478  /// Calculate the cost of an extended reduction pattern, similar to
1479  /// getArithmeticReductionCost of a reduction with an extension.
1480  /// This is the cost of as:
1481  /// ResTy vecreduce.opcode(ext(Ty A)).
1482  InstructionCost getExtendedReductionCost(
1483      unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty,
1484      FastMathFlags FMF,
1485      TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
1486
1487  /// \returns The cost of Intrinsic instructions. Analyses the real arguments.
1488  /// Three cases are handled: 1. scalar instruction 2. vector instruction
1489  /// 3. scalar instruction which is to be vectorized.
1490  InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
1491                                        TTI::TargetCostKind CostKind) const;
1492
1493  /// \returns The cost of Call instructions.
1494  InstructionCost getCallInstrCost(
1495      Function *F, Type *RetTy, ArrayRef<Type *> Tys,
1496      TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) const;
1497
1498  /// \returns The number of pieces into which the provided type must be
1499  /// split during legalization. Zero is returned when the answer is unknown.
1500  unsigned getNumberOfParts(Type *Tp) const;
1501
1502  /// \returns The cost of the address computation. For most targets this can be
1503  /// merged into the instruction indexing mode. Some targets might want to
1504  /// distinguish between address computation for memory operations on vector
1505  /// types and scalar types. Such targets should override this function.
1506  /// The 'SE' parameter holds pointer for the scalar evolution object which
1507  /// is used in order to get the Ptr step value in case of constant stride.
1508  /// The 'Ptr' parameter holds SCEV of the access pointer.
1509  InstructionCost getAddressComputationCost(Type *Ty,
1510                                            ScalarEvolution *SE = nullptr,
1511                                            const SCEV *Ptr = nullptr) const;
1512
1513  /// \returns The cost, if any, of keeping values of the given types alive
1514  /// over a callsite.
1515  ///
1516  /// Some types may require the use of register classes that do not have
1517  /// any callee-saved registers, so would require a spill and fill.
1518  InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const;
1519
1520  /// \returns True if the intrinsic is a supported memory intrinsic.  Info
1521  /// will contain additional information - whether the intrinsic may write
1522  /// or read to memory, volatility and the pointer.  Info is undefined
1523  /// if false is returned.
1524  bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const;
1525
1526  /// \returns The maximum element size, in bytes, for an element
1527  /// unordered-atomic memory intrinsic.
1528  unsigned getAtomicMemIntrinsicMaxElementSize() const;
1529
1530  /// \returns A value which is the result of the given memory intrinsic.  New
1531  /// instructions may be created to extract the result from the given intrinsic
1532  /// memory operation.  Returns nullptr if the target cannot create a result
1533  /// from the given intrinsic.
1534  Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
1535                                           Type *ExpectedType) const;
1536
1537  /// \returns The type to use in a loop expansion of a memcpy call.
1538  Type *getMemcpyLoopLoweringType(
1539      LLVMContext &Context, Value *Length, unsigned SrcAddrSpace,
1540      unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign,
1541      std::optional<uint32_t> AtomicElementSize = std::nullopt) const;
1542
1543  /// \param[out] OpsOut The operand types to copy RemainingBytes of memory.
1544  /// \param RemainingBytes The number of bytes to copy.
1545  ///
1546  /// Calculates the operand types to use when copying \p RemainingBytes of
1547  /// memory, where source and destination alignments are \p SrcAlign and
1548  /// \p DestAlign respectively.
1549  void getMemcpyLoopResidualLoweringType(
1550      SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
1551      unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
1552      unsigned SrcAlign, unsigned DestAlign,
1553      std::optional<uint32_t> AtomicCpySize = std::nullopt) const;
1554
1555  /// \returns True if the two functions have compatible attributes for inlining
1556  /// purposes.
1557  bool areInlineCompatible(const Function *Caller,
1558                           const Function *Callee) const;
1559
1560  /// Returns a penalty for invoking call \p Call in \p F.
1561  /// For example, if a function F calls a function G, which in turn calls
1562  /// function H, then getInlineCallPenalty(F, H()) would return the
1563  /// penalty of calling H from F, e.g. after inlining G into F.
1564  /// \p DefaultCallPenalty is passed to give a default penalty that
1565  /// the target can amend or override.
1566  unsigned getInlineCallPenalty(const Function *F, const CallBase &Call,
1567                                unsigned DefaultCallPenalty) const;
1568
1569  /// \returns True if the caller and callee agree on how \p Types will be
1570  /// passed to or returned from the callee.
1571  /// to the callee.
1572  /// \param Types List of types to check.
1573  bool areTypesABICompatible(const Function *Caller, const Function *Callee,
1574                             const ArrayRef<Type *> &Types) const;
1575
1576  /// The type of load/store indexing.
1577  enum MemIndexedMode {
1578    MIM_Unindexed, ///< No indexing.
1579    MIM_PreInc,    ///< Pre-incrementing.
1580    MIM_PreDec,    ///< Pre-decrementing.
1581    MIM_PostInc,   ///< Post-incrementing.
1582    MIM_PostDec    ///< Post-decrementing.
1583  };
1584
1585  /// \returns True if the specified indexed load for the given type is legal.
1586  bool isIndexedLoadLegal(enum MemIndexedMode Mode, Type *Ty) const;
1587
1588  /// \returns True if the specified indexed store for the given type is legal.
1589  bool isIndexedStoreLegal(enum MemIndexedMode Mode, Type *Ty) const;
1590
1591  /// \returns The bitwidth of the largest vector type that should be used to
1592  /// load/store in the given address space.
1593  unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
1594
1595  /// \returns True if the load instruction is legal to vectorize.
1596  bool isLegalToVectorizeLoad(LoadInst *LI) const;
1597
1598  /// \returns True if the store instruction is legal to vectorize.
1599  bool isLegalToVectorizeStore(StoreInst *SI) const;
1600
1601  /// \returns True if it is legal to vectorize the given load chain.
1602  bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
1603                                   unsigned AddrSpace) const;
1604
1605  /// \returns True if it is legal to vectorize the given store chain.
1606  bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
1607                                    unsigned AddrSpace) const;
1608
1609  /// \returns True if it is legal to vectorize the given reduction kind.
1610  bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
1611                                   ElementCount VF) const;
1612
1613  /// \returns True if the given type is supported for scalable vectors
1614  bool isElementTypeLegalForScalableVector(Type *Ty) const;
1615
1616  /// \returns The new vector factor value if the target doesn't support \p
1617  /// SizeInBytes loads or has a better vector factor.
1618  unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
1619                               unsigned ChainSizeInBytes,
1620                               VectorType *VecTy) const;
1621
1622  /// \returns The new vector factor value if the target doesn't support \p
1623  /// SizeInBytes stores or has a better vector factor.
1624  unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
1625                                unsigned ChainSizeInBytes,
1626                                VectorType *VecTy) const;
1627
1628  /// Flags describing the kind of vector reduction.
1629  struct ReductionFlags {
1630    ReductionFlags() = default;
1631    bool IsMaxOp =
1632        false; ///< If the op a min/max kind, true if it's a max operation.
1633    bool IsSigned = false; ///< Whether the operation is a signed int reduction.
1634    bool NoNaN =
1635        false; ///< If op is an fp min/max, whether NaNs may be present.
1636  };
1637
1638  /// \returns True if the target prefers reductions in loop.
1639  bool preferInLoopReduction(unsigned Opcode, Type *Ty,
1640                             ReductionFlags Flags) const;
1641
1642  /// \returns True if the target prefers reductions select kept in the loop
1643  /// when tail folding. i.e.
1644  /// loop:
1645  ///   p = phi (0, s)
1646  ///   a = add (p, x)
1647  ///   s = select (mask, a, p)
1648  /// vecreduce.add(s)
1649  ///
1650  /// As opposed to the normal scheme of p = phi (0, a) which allows the select
1651  /// to be pulled out of the loop. If the select(.., add, ..) can be predicated
1652  /// by the target, this can lead to cleaner code generation.
1653  bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
1654                                       ReductionFlags Flags) const;
1655
1656  /// Return true if the loop vectorizer should consider vectorizing an
1657  /// otherwise scalar epilogue loop.
1658  bool preferEpilogueVectorization() const;
1659
1660  /// \returns True if the target wants to expand the given reduction intrinsic
1661  /// into a shuffle sequence.
1662  bool shouldExpandReduction(const IntrinsicInst *II) const;
1663
1664  /// \returns the size cost of rematerializing a GlobalValue address relative
1665  /// to a stack reload.
1666  unsigned getGISelRematGlobalCost() const;
1667
1668  /// \returns the lower bound of a trip count to decide on vectorization
1669  /// while tail-folding.
1670  unsigned getMinTripCountTailFoldingThreshold() const;
1671
1672  /// \returns True if the target supports scalable vectors.
1673  bool supportsScalableVectors() const;
1674
1675  /// \return true when scalable vectorization is preferred.
1676  bool enableScalableVectorization() const;
1677
1678  /// \name Vector Predication Information
1679  /// @{
1680  /// Whether the target supports the %evl parameter of VP intrinsic efficiently
1681  /// in hardware, for the given opcode and type/alignment. (see LLVM Language
1682  /// Reference - "Vector Predication Intrinsics").
1683  /// Use of %evl is discouraged when that is not the case.
1684  bool hasActiveVectorLength(unsigned Opcode, Type *DataType,
1685                             Align Alignment) const;
1686
1687  struct VPLegalization {
1688    enum VPTransform {
1689      // keep the predicating parameter
1690      Legal = 0,
1691      // where legal, discard the predicate parameter
1692      Discard = 1,
1693      // transform into something else that is also predicating
1694      Convert = 2
1695    };
1696
1697    // How to transform the EVL parameter.
1698    // Legal:   keep the EVL parameter as it is.
1699    // Discard: Ignore the EVL parameter where it is safe to do so.
1700    // Convert: Fold the EVL into the mask parameter.
1701    VPTransform EVLParamStrategy;
1702
1703    // How to transform the operator.
1704    // Legal:   The target supports this operator.
1705    // Convert: Convert this to a non-VP operation.
1706    // The 'Discard' strategy is invalid.
1707    VPTransform OpStrategy;
1708
1709    bool shouldDoNothing() const {
1710      return (EVLParamStrategy == Legal) && (OpStrategy == Legal);
1711    }
1712    VPLegalization(VPTransform EVLParamStrategy, VPTransform OpStrategy)
1713        : EVLParamStrategy(EVLParamStrategy), OpStrategy(OpStrategy) {}
1714  };
1715
1716  /// \returns How the target needs this vector-predicated operation to be
1717  /// transformed.
1718  VPLegalization getVPLegalizationStrategy(const VPIntrinsic &PI) const;
1719  /// @}
1720
1721  /// \returns Whether a 32-bit branch instruction is available in Arm or Thumb
1722  /// state.
1723  ///
1724  /// Used by the LowerTypeTests pass, which constructs an IR inline assembler
1725  /// node containing a jump table in a format suitable for the target, so it
1726  /// needs to know what format of jump table it can legally use.
1727  ///
1728  /// For non-Arm targets, this function isn't used. It defaults to returning
1729  /// false, but it shouldn't matter what it returns anyway.
1730  bool hasArmWideBranch(bool Thumb) const;
1731
1732  /// \return The maximum number of function arguments the target supports.
1733  unsigned getMaxNumArgs() const;
1734
1735  /// @}
1736
1737private:
1738  /// The abstract base class used to type erase specific TTI
1739  /// implementations.
1740  class Concept;
1741
1742  /// The template model for the base class which wraps a concrete
1743  /// implementation in a type erased interface.
1744  template <typename T> class Model;
1745
1746  std::unique_ptr<Concept> TTIImpl;
1747};
1748
1749class TargetTransformInfo::Concept {
1750public:
1751  virtual ~Concept() = 0;
1752  virtual const DataLayout &getDataLayout() const = 0;
1753  virtual InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr,
1754                                     ArrayRef<const Value *> Operands,
1755                                     Type *AccessType,
1756                                     TTI::TargetCostKind CostKind) = 0;
1757  virtual InstructionCost
1758  getPointersChainCost(ArrayRef<const Value *> Ptrs, const Value *Base,
1759                       const TTI::PointersChainInfo &Info, Type *AccessTy,
1760                       TTI::TargetCostKind CostKind) = 0;
1761  virtual unsigned getInliningThresholdMultiplier() const = 0;
1762  virtual unsigned getInliningCostBenefitAnalysisSavingsMultiplier() const = 0;
1763  virtual unsigned
1764  getInliningCostBenefitAnalysisProfitableMultiplier() const = 0;
1765  virtual unsigned adjustInliningThreshold(const CallBase *CB) = 0;
1766  virtual int getInlinerVectorBonusPercent() const = 0;
1767  virtual unsigned getCallerAllocaCost(const CallBase *CB,
1768                                       const AllocaInst *AI) const = 0;
1769  virtual InstructionCost getMemcpyCost(const Instruction *I) = 0;
1770  virtual uint64_t getMaxMemIntrinsicInlineSizeThreshold() const = 0;
1771  virtual unsigned
1772  getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize,
1773                                   ProfileSummaryInfo *PSI,
1774                                   BlockFrequencyInfo *BFI) = 0;
1775  virtual InstructionCost getInstructionCost(const User *U,
1776                                             ArrayRef<const Value *> Operands,
1777                                             TargetCostKind CostKind) = 0;
1778  virtual BranchProbability getPredictableBranchThreshold() = 0;
1779  virtual bool hasBranchDivergence(const Function *F = nullptr) = 0;
1780  virtual bool isSourceOfDivergence(const Value *V) = 0;
1781  virtual bool isAlwaysUniform(const Value *V) = 0;
1782  virtual bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const = 0;
1783  virtual bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const = 0;
1784  virtual unsigned getFlatAddressSpace() = 0;
1785  virtual bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
1786                                          Intrinsic::ID IID) const = 0;
1787  virtual bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const = 0;
1788  virtual bool
1789  canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const = 0;
1790  virtual unsigned getAssumedAddrSpace(const Value *V) const = 0;
1791  virtual bool isSingleThreaded() const = 0;
1792  virtual std::pair<const Value *, unsigned>
1793  getPredicatedAddrSpace(const Value *V) const = 0;
1794  virtual Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
1795                                                  Value *OldV,
1796                                                  Value *NewV) const = 0;
1797  virtual bool isLoweredToCall(const Function *F) = 0;
1798  virtual void getUnrollingPreferences(Loop *L, ScalarEvolution &,
1799                                       UnrollingPreferences &UP,
1800                                       OptimizationRemarkEmitter *ORE) = 0;
1801  virtual void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
1802                                     PeelingPreferences &PP) = 0;
1803  virtual bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
1804                                        AssumptionCache &AC,
1805                                        TargetLibraryInfo *LibInfo,
1806                                        HardwareLoopInfo &HWLoopInfo) = 0;
1807  virtual bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) = 0;
1808  virtual TailFoldingStyle
1809  getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) = 0;
1810  virtual std::optional<Instruction *> instCombineIntrinsic(
1811      InstCombiner &IC, IntrinsicInst &II) = 0;
1812  virtual std::optional<Value *> simplifyDemandedUseBitsIntrinsic(
1813      InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask,
1814      KnownBits & Known, bool &KnownBitsComputed) = 0;
1815  virtual std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
1816      InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts,
1817      APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3,
1818      std::function<void(Instruction *, unsigned, APInt, APInt &)>
1819          SimplifyAndSetOp) = 0;
1820  virtual bool isLegalAddImmediate(int64_t Imm) = 0;
1821  virtual bool isLegalICmpImmediate(int64_t Imm) = 0;
1822  virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
1823                                     int64_t BaseOffset, bool HasBaseReg,
1824                                     int64_t Scale, unsigned AddrSpace,
1825                                     Instruction *I) = 0;
1826  virtual bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
1827                             const TargetTransformInfo::LSRCost &C2) = 0;
1828  virtual bool isNumRegsMajorCostOfLSR() = 0;
1829  virtual bool shouldFoldTerminatingConditionAfterLSR() const = 0;
1830  virtual bool isProfitableLSRChainElement(Instruction *I) = 0;
1831  virtual bool canMacroFuseCmp() = 0;
1832  virtual bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE,
1833                          LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC,
1834                          TargetLibraryInfo *LibInfo) = 0;
1835  virtual AddressingModeKind
1836    getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const = 0;
1837  virtual bool isLegalMaskedStore(Type *DataType, Align Alignment) = 0;
1838  virtual bool isLegalMaskedLoad(Type *DataType, Align Alignment) = 0;
1839  virtual bool isLegalNTStore(Type *DataType, Align Alignment) = 0;
1840  virtual bool isLegalNTLoad(Type *DataType, Align Alignment) = 0;
1841  virtual bool isLegalBroadcastLoad(Type *ElementTy,
1842                                    ElementCount NumElements) const = 0;
1843  virtual bool isLegalMaskedScatter(Type *DataType, Align Alignment) = 0;
1844  virtual bool isLegalMaskedGather(Type *DataType, Align Alignment) = 0;
1845  virtual bool forceScalarizeMaskedGather(VectorType *DataType,
1846                                          Align Alignment) = 0;
1847  virtual bool forceScalarizeMaskedScatter(VectorType *DataType,
1848                                           Align Alignment) = 0;
1849  virtual bool isLegalMaskedCompressStore(Type *DataType) = 0;
1850  virtual bool isLegalMaskedExpandLoad(Type *DataType) = 0;
1851  virtual bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0,
1852                               unsigned Opcode1,
1853                               const SmallBitVector &OpcodeMask) const = 0;
1854  virtual bool enableOrderedReductions() = 0;
1855  virtual bool hasDivRemOp(Type *DataType, bool IsSigned) = 0;
1856  virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) = 0;
1857  virtual bool prefersVectorizedAddressing() = 0;
1858  virtual InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
1859                                               int64_t BaseOffset,
1860                                               bool HasBaseReg, int64_t Scale,
1861                                               unsigned AddrSpace) = 0;
1862  virtual bool LSRWithInstrQueries() = 0;
1863  virtual bool isTruncateFree(Type *Ty1, Type *Ty2) = 0;
1864  virtual bool isProfitableToHoist(Instruction *I) = 0;
1865  virtual bool useAA() = 0;
1866  virtual bool isTypeLegal(Type *Ty) = 0;
1867  virtual unsigned getRegUsageForType(Type *Ty) = 0;
1868  virtual bool shouldBuildLookupTables() = 0;
1869  virtual bool shouldBuildLookupTablesForConstant(Constant *C) = 0;
1870  virtual bool shouldBuildRelLookupTables() = 0;
1871  virtual bool useColdCCForColdCall(Function &F) = 0;
1872  virtual InstructionCost getScalarizationOverhead(VectorType *Ty,
1873                                                   const APInt &DemandedElts,
1874                                                   bool Insert, bool Extract,
1875                                                   TargetCostKind CostKind) = 0;
1876  virtual InstructionCost
1877  getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
1878                                   ArrayRef<Type *> Tys,
1879                                   TargetCostKind CostKind) = 0;
1880  virtual bool supportsEfficientVectorElementLoadStore() = 0;
1881  virtual bool supportsTailCalls() = 0;
1882  virtual bool supportsTailCallFor(const CallBase *CB) = 0;
1883  virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0;
1884  virtual MemCmpExpansionOptions
1885  enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const = 0;
1886  virtual bool enableSelectOptimize() = 0;
1887  virtual bool shouldTreatInstructionLikeSelect(const Instruction *I) = 0;
1888  virtual bool enableInterleavedAccessVectorization() = 0;
1889  virtual bool enableMaskedInterleavedAccessVectorization() = 0;
1890  virtual bool isFPVectorizationPotentiallyUnsafe() = 0;
1891  virtual bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
1892                                              unsigned BitWidth,
1893                                              unsigned AddressSpace,
1894                                              Align Alignment,
1895                                              unsigned *Fast) = 0;
1896  virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = 0;
1897  virtual bool haveFastSqrt(Type *Ty) = 0;
1898  virtual bool isExpensiveToSpeculativelyExecute(const Instruction *I) = 0;
1899  virtual bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) = 0;
1900  virtual InstructionCost getFPOpCost(Type *Ty) = 0;
1901  virtual InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx,
1902                                                const APInt &Imm, Type *Ty) = 0;
1903  virtual InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
1904                                        TargetCostKind CostKind) = 0;
1905  virtual InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx,
1906                                            const APInt &Imm, Type *Ty,
1907                                            TargetCostKind CostKind,
1908                                            Instruction *Inst = nullptr) = 0;
1909  virtual InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
1910                                              const APInt &Imm, Type *Ty,
1911                                              TargetCostKind CostKind) = 0;
1912  virtual bool preferToKeepConstantsAttached(const Instruction &Inst,
1913                                             const Function &Fn) const = 0;
1914  virtual unsigned getNumberOfRegisters(unsigned ClassID) const = 0;
1915  virtual unsigned getRegisterClassForType(bool Vector,
1916                                           Type *Ty = nullptr) const = 0;
1917  virtual const char *getRegisterClassName(unsigned ClassID) const = 0;
1918  virtual TypeSize getRegisterBitWidth(RegisterKind K) const = 0;
1919  virtual unsigned getMinVectorRegisterBitWidth() const = 0;
1920  virtual std::optional<unsigned> getMaxVScale() const = 0;
1921  virtual std::optional<unsigned> getVScaleForTuning() const = 0;
1922  virtual bool isVScaleKnownToBeAPowerOfTwo() const = 0;
1923  virtual bool
1924  shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const = 0;
1925  virtual ElementCount getMinimumVF(unsigned ElemWidth,
1926                                    bool IsScalable) const = 0;
1927  virtual unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const = 0;
1928  virtual unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy,
1929                                     Type *ScalarValTy) const = 0;
1930  virtual bool shouldConsiderAddressTypePromotion(
1931      const Instruction &I, bool &AllowPromotionWithoutCommonHeader) = 0;
1932  virtual unsigned getCacheLineSize() const = 0;
1933  virtual std::optional<unsigned> getCacheSize(CacheLevel Level) const = 0;
1934  virtual std::optional<unsigned> getCacheAssociativity(CacheLevel Level)
1935      const = 0;
1936  virtual std::optional<unsigned> getMinPageSize() const = 0;
1937
1938  /// \return How much before a load we should place the prefetch
1939  /// instruction.  This is currently measured in number of
1940  /// instructions.
1941  virtual unsigned getPrefetchDistance() const = 0;
1942
1943  /// \return Some HW prefetchers can handle accesses up to a certain
1944  /// constant stride.  This is the minimum stride in bytes where it
1945  /// makes sense to start adding SW prefetches.  The default is 1,
1946  /// i.e. prefetch with any stride.  Sometimes prefetching is beneficial
1947  /// even below the HW prefetcher limit, and the arguments provided are
1948  /// meant to serve as a basis for deciding this for a particular loop.
1949  virtual unsigned getMinPrefetchStride(unsigned NumMemAccesses,
1950                                        unsigned NumStridedMemAccesses,
1951                                        unsigned NumPrefetches,
1952                                        bool HasCall) const = 0;
1953
1954  /// \return The maximum number of iterations to prefetch ahead.  If
1955  /// the required number of iterations is more than this number, no
1956  /// prefetching is performed.
1957  virtual unsigned getMaxPrefetchIterationsAhead() const = 0;
1958
1959  /// \return True if prefetching should also be done for writes.
1960  virtual bool enableWritePrefetching() const = 0;
1961
1962  /// \return if target want to issue a prefetch in address space \p AS.
1963  virtual bool shouldPrefetchAddressSpace(unsigned AS) const = 0;
1964
1965  virtual unsigned getMaxInterleaveFactor(ElementCount VF) = 0;
1966  virtual InstructionCost getArithmeticInstrCost(
1967      unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
1968      OperandValueInfo Opd1Info, OperandValueInfo Opd2Info,
1969      ArrayRef<const Value *> Args, const Instruction *CxtI = nullptr) = 0;
1970  virtual InstructionCost getAltInstrCost(
1971      VectorType *VecTy, unsigned Opcode0, unsigned Opcode1,
1972      const SmallBitVector &OpcodeMask,
1973      TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const = 0;
1974
1975  virtual InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp,
1976                                         ArrayRef<int> Mask,
1977                                         TTI::TargetCostKind CostKind,
1978                                         int Index, VectorType *SubTp,
1979                                         ArrayRef<const Value *> Args) = 0;
1980  virtual InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst,
1981                                           Type *Src, CastContextHint CCH,
1982                                           TTI::TargetCostKind CostKind,
1983                                           const Instruction *I) = 0;
1984  virtual InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
1985                                                   VectorType *VecTy,
1986                                                   unsigned Index) = 0;
1987  virtual InstructionCost getCFInstrCost(unsigned Opcode,
1988                                         TTI::TargetCostKind CostKind,
1989                                         const Instruction *I = nullptr) = 0;
1990  virtual InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
1991                                             Type *CondTy,
1992                                             CmpInst::Predicate VecPred,
1993                                             TTI::TargetCostKind CostKind,
1994                                             const Instruction *I) = 0;
1995  virtual InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
1996                                             TTI::TargetCostKind CostKind,
1997                                             unsigned Index, Value *Op0,
1998                                             Value *Op1) = 0;
1999  virtual InstructionCost getVectorInstrCost(const Instruction &I, Type *Val,
2000                                             TTI::TargetCostKind CostKind,
2001                                             unsigned Index) = 0;
2002
2003  virtual InstructionCost
2004  getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF,
2005                            const APInt &DemandedDstElts,
2006                            TTI::TargetCostKind CostKind) = 0;
2007
2008  virtual InstructionCost
2009  getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
2010                  unsigned AddressSpace, TTI::TargetCostKind CostKind,
2011                  OperandValueInfo OpInfo, const Instruction *I) = 0;
2012  virtual InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src,
2013                                            Align Alignment,
2014                                            unsigned AddressSpace,
2015                                            TTI::TargetCostKind CostKind,
2016                                            const Instruction *I) = 0;
2017  virtual InstructionCost
2018  getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
2019                        unsigned AddressSpace,
2020                        TTI::TargetCostKind CostKind) = 0;
2021  virtual InstructionCost
2022  getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
2023                         bool VariableMask, Align Alignment,
2024                         TTI::TargetCostKind CostKind,
2025                         const Instruction *I = nullptr) = 0;
2026
2027  virtual InstructionCost getInterleavedMemoryOpCost(
2028      unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
2029      Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
2030      bool UseMaskForCond = false, bool UseMaskForGaps = false) = 0;
2031  virtual InstructionCost
2032  getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
2033                             std::optional<FastMathFlags> FMF,
2034                             TTI::TargetCostKind CostKind) = 0;
2035  virtual InstructionCost
2036  getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF,
2037                         TTI::TargetCostKind CostKind) = 0;
2038  virtual InstructionCost getExtendedReductionCost(
2039      unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty,
2040      FastMathFlags FMF,
2041      TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) = 0;
2042  virtual InstructionCost getMulAccReductionCost(
2043      bool IsUnsigned, Type *ResTy, VectorType *Ty,
2044      TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) = 0;
2045  virtual InstructionCost
2046  getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
2047                        TTI::TargetCostKind CostKind) = 0;
2048  virtual InstructionCost getCallInstrCost(Function *F, Type *RetTy,
2049                                           ArrayRef<Type *> Tys,
2050                                           TTI::TargetCostKind CostKind) = 0;
2051  virtual unsigned getNumberOfParts(Type *Tp) = 0;
2052  virtual InstructionCost
2053  getAddressComputationCost(Type *Ty, ScalarEvolution *SE, const SCEV *Ptr) = 0;
2054  virtual InstructionCost
2055  getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) = 0;
2056  virtual bool getTgtMemIntrinsic(IntrinsicInst *Inst,
2057                                  MemIntrinsicInfo &Info) = 0;
2058  virtual unsigned getAtomicMemIntrinsicMaxElementSize() const = 0;
2059  virtual Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
2060                                                   Type *ExpectedType) = 0;
2061  virtual Type *getMemcpyLoopLoweringType(
2062      LLVMContext &Context, Value *Length, unsigned SrcAddrSpace,
2063      unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign,
2064      std::optional<uint32_t> AtomicElementSize) const = 0;
2065
2066  virtual void getMemcpyLoopResidualLoweringType(
2067      SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
2068      unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
2069      unsigned SrcAlign, unsigned DestAlign,
2070      std::optional<uint32_t> AtomicCpySize) const = 0;
2071  virtual bool areInlineCompatible(const Function *Caller,
2072                                   const Function *Callee) const = 0;
2073  virtual unsigned getInlineCallPenalty(const Function *F, const CallBase &Call,
2074                                        unsigned DefaultCallPenalty) const = 0;
2075  virtual bool areTypesABICompatible(const Function *Caller,
2076                                     const Function *Callee,
2077                                     const ArrayRef<Type *> &Types) const = 0;
2078  virtual bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const = 0;
2079  virtual bool isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const = 0;
2080  virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const = 0;
2081  virtual bool isLegalToVectorizeLoad(LoadInst *LI) const = 0;
2082  virtual bool isLegalToVectorizeStore(StoreInst *SI) const = 0;
2083  virtual bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
2084                                           Align Alignment,
2085                                           unsigned AddrSpace) const = 0;
2086  virtual bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
2087                                            Align Alignment,
2088                                            unsigned AddrSpace) const = 0;
2089  virtual bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
2090                                           ElementCount VF) const = 0;
2091  virtual bool isElementTypeLegalForScalableVector(Type *Ty) const = 0;
2092  virtual unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
2093                                       unsigned ChainSizeInBytes,
2094                                       VectorType *VecTy) const = 0;
2095  virtual unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
2096                                        unsigned ChainSizeInBytes,
2097                                        VectorType *VecTy) const = 0;
2098  virtual bool preferInLoopReduction(unsigned Opcode, Type *Ty,
2099                                     ReductionFlags) const = 0;
2100  virtual bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
2101                                               ReductionFlags) const = 0;
2102  virtual bool preferEpilogueVectorization() const = 0;
2103
2104  virtual bool shouldExpandReduction(const IntrinsicInst *II) const = 0;
2105  virtual unsigned getGISelRematGlobalCost() const = 0;
2106  virtual unsigned getMinTripCountTailFoldingThreshold() const = 0;
2107  virtual bool enableScalableVectorization() const = 0;
2108  virtual bool supportsScalableVectors() const = 0;
2109  virtual bool hasActiveVectorLength(unsigned Opcode, Type *DataType,
2110                                     Align Alignment) const = 0;
2111  virtual VPLegalization
2112  getVPLegalizationStrategy(const VPIntrinsic &PI) const = 0;
2113  virtual bool hasArmWideBranch(bool Thumb) const = 0;
2114  virtual unsigned getMaxNumArgs() const = 0;
2115};
2116
2117template <typename T>
2118class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
2119  T Impl;
2120
2121public:
2122  Model(T Impl) : Impl(std::move(Impl)) {}
2123  ~Model() override = default;
2124
2125  const DataLayout &getDataLayout() const override {
2126    return Impl.getDataLayout();
2127  }
2128
2129  InstructionCost
2130  getGEPCost(Type *PointeeType, const Value *Ptr,
2131             ArrayRef<const Value *> Operands, Type *AccessType,
2132             TargetTransformInfo::TargetCostKind CostKind) override {
2133    return Impl.getGEPCost(PointeeType, Ptr, Operands, AccessType, CostKind);
2134  }
2135  InstructionCost getPointersChainCost(ArrayRef<const Value *> Ptrs,
2136                                       const Value *Base,
2137                                       const PointersChainInfo &Info,
2138                                       Type *AccessTy,
2139                                       TargetCostKind CostKind) override {
2140    return Impl.getPointersChainCost(Ptrs, Base, Info, AccessTy, CostKind);
2141  }
2142  unsigned getInliningThresholdMultiplier() const override {
2143    return Impl.getInliningThresholdMultiplier();
2144  }
2145  unsigned adjustInliningThreshold(const CallBase *CB) override {
2146    return Impl.adjustInliningThreshold(CB);
2147  }
2148  unsigned getInliningCostBenefitAnalysisSavingsMultiplier() const override {
2149    return Impl.getInliningCostBenefitAnalysisSavingsMultiplier();
2150  }
2151  unsigned getInliningCostBenefitAnalysisProfitableMultiplier() const override {
2152    return Impl.getInliningCostBenefitAnalysisProfitableMultiplier();
2153  }
2154  int getInlinerVectorBonusPercent() const override {
2155    return Impl.getInlinerVectorBonusPercent();
2156  }
2157  unsigned getCallerAllocaCost(const CallBase *CB,
2158                               const AllocaInst *AI) const override {
2159    return Impl.getCallerAllocaCost(CB, AI);
2160  }
2161  InstructionCost getMemcpyCost(const Instruction *I) override {
2162    return Impl.getMemcpyCost(I);
2163  }
2164
2165  uint64_t getMaxMemIntrinsicInlineSizeThreshold() const override {
2166    return Impl.getMaxMemIntrinsicInlineSizeThreshold();
2167  }
2168
2169  InstructionCost getInstructionCost(const User *U,
2170                                     ArrayRef<const Value *> Operands,
2171                                     TargetCostKind CostKind) override {
2172    return Impl.getInstructionCost(U, Operands, CostKind);
2173  }
2174  BranchProbability getPredictableBranchThreshold() override {
2175    return Impl.getPredictableBranchThreshold();
2176  }
2177  bool hasBranchDivergence(const Function *F = nullptr) override {
2178    return Impl.hasBranchDivergence(F);
2179  }
2180  bool isSourceOfDivergence(const Value *V) override {
2181    return Impl.isSourceOfDivergence(V);
2182  }
2183
2184  bool isAlwaysUniform(const Value *V) override {
2185    return Impl.isAlwaysUniform(V);
2186  }
2187
2188  bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const override {
2189    return Impl.isValidAddrSpaceCast(FromAS, ToAS);
2190  }
2191
2192  bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const override {
2193    return Impl.addrspacesMayAlias(AS0, AS1);
2194  }
2195
2196  unsigned getFlatAddressSpace() override { return Impl.getFlatAddressSpace(); }
2197
2198  bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
2199                                  Intrinsic::ID IID) const override {
2200    return Impl.collectFlatAddressOperands(OpIndexes, IID);
2201  }
2202
2203  bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const override {
2204    return Impl.isNoopAddrSpaceCast(FromAS, ToAS);
2205  }
2206
2207  bool
2208  canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const override {
2209    return Impl.canHaveNonUndefGlobalInitializerInAddressSpace(AS);
2210  }
2211
2212  unsigned getAssumedAddrSpace(const Value *V) const override {
2213    return Impl.getAssumedAddrSpace(V);
2214  }
2215
2216  bool isSingleThreaded() const override { return Impl.isSingleThreaded(); }
2217
2218  std::pair<const Value *, unsigned>
2219  getPredicatedAddrSpace(const Value *V) const override {
2220    return Impl.getPredicatedAddrSpace(V);
2221  }
2222
2223  Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
2224                                          Value *NewV) const override {
2225    return Impl.rewriteIntrinsicWithAddressSpace(II, OldV, NewV);
2226  }
2227
2228  bool isLoweredToCall(const Function *F) override {
2229    return Impl.isLoweredToCall(F);
2230  }
2231  void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
2232                               UnrollingPreferences &UP,
2233                               OptimizationRemarkEmitter *ORE) override {
2234    return Impl.getUnrollingPreferences(L, SE, UP, ORE);
2235  }
2236  void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
2237                             PeelingPreferences &PP) override {
2238    return Impl.getPeelingPreferences(L, SE, PP);
2239  }
2240  bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
2241                                AssumptionCache &AC, TargetLibraryInfo *LibInfo,
2242                                HardwareLoopInfo &HWLoopInfo) override {
2243    return Impl.isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
2244  }
2245  bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) override {
2246    return Impl.preferPredicateOverEpilogue(TFI);
2247  }
2248  TailFoldingStyle
2249  getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) override {
2250    return Impl.getPreferredTailFoldingStyle(IVUpdateMayOverflow);
2251  }
2252  std::optional<Instruction *>
2253  instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) override {
2254    return Impl.instCombineIntrinsic(IC, II);
2255  }
2256  std::optional<Value *>
2257  simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II,
2258                                   APInt DemandedMask, KnownBits &Known,
2259                                   bool &KnownBitsComputed) override {
2260    return Impl.simplifyDemandedUseBitsIntrinsic(IC, II, DemandedMask, Known,
2261                                                 KnownBitsComputed);
2262  }
2263  std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
2264      InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
2265      APInt &UndefElts2, APInt &UndefElts3,
2266      std::function<void(Instruction *, unsigned, APInt, APInt &)>
2267          SimplifyAndSetOp) override {
2268    return Impl.simplifyDemandedVectorEltsIntrinsic(
2269        IC, II, DemandedElts, UndefElts, UndefElts2, UndefElts3,
2270        SimplifyAndSetOp);
2271  }
2272  bool isLegalAddImmediate(int64_t Imm) override {
2273    return Impl.isLegalAddImmediate(Imm);
2274  }
2275  bool isLegalICmpImmediate(int64_t Imm) override {
2276    return Impl.isLegalICmpImmediate(Imm);
2277  }
2278  bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
2279                             bool HasBaseReg, int64_t Scale, unsigned AddrSpace,
2280                             Instruction *I) override {
2281    return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
2282                                      AddrSpace, I);
2283  }
2284  bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
2285                     const TargetTransformInfo::LSRCost &C2) override {
2286    return Impl.isLSRCostLess(C1, C2);
2287  }
2288  bool isNumRegsMajorCostOfLSR() override {
2289    return Impl.isNumRegsMajorCostOfLSR();
2290  }
2291  bool shouldFoldTerminatingConditionAfterLSR() const override {
2292    return Impl.shouldFoldTerminatingConditionAfterLSR();
2293  }
2294  bool isProfitableLSRChainElement(Instruction *I) override {
2295    return Impl.isProfitableLSRChainElement(I);
2296  }
2297  bool canMacroFuseCmp() override { return Impl.canMacroFuseCmp(); }
2298  bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI,
2299                  DominatorTree *DT, AssumptionCache *AC,
2300                  TargetLibraryInfo *LibInfo) override {
2301    return Impl.canSaveCmp(L, BI, SE, LI, DT, AC, LibInfo);
2302  }
2303  AddressingModeKind
2304    getPreferredAddressingMode(const Loop *L,
2305                               ScalarEvolution *SE) const override {
2306    return Impl.getPreferredAddressingMode(L, SE);
2307  }
2308  bool isLegalMaskedStore(Type *DataType, Align Alignment) override {
2309    return Impl.isLegalMaskedStore(DataType, Alignment);
2310  }
2311  bool isLegalMaskedLoad(Type *DataType, Align Alignment) override {
2312    return Impl.isLegalMaskedLoad(DataType, Alignment);
2313  }
2314  bool isLegalNTStore(Type *DataType, Align Alignment) override {
2315    return Impl.isLegalNTStore(DataType, Alignment);
2316  }
2317  bool isLegalNTLoad(Type *DataType, Align Alignment) override {
2318    return Impl.isLegalNTLoad(DataType, Alignment);
2319  }
2320  bool isLegalBroadcastLoad(Type *ElementTy,
2321                            ElementCount NumElements) const override {
2322    return Impl.isLegalBroadcastLoad(ElementTy, NumElements);
2323  }
2324  bool isLegalMaskedScatter(Type *DataType, Align Alignment) override {
2325    return Impl.isLegalMaskedScatter(DataType, Alignment);
2326  }
2327  bool isLegalMaskedGather(Type *DataType, Align Alignment) override {
2328    return Impl.isLegalMaskedGather(DataType, Alignment);
2329  }
2330  bool forceScalarizeMaskedGather(VectorType *DataType,
2331                                  Align Alignment) override {
2332    return Impl.forceScalarizeMaskedGather(DataType, Alignment);
2333  }
2334  bool forceScalarizeMaskedScatter(VectorType *DataType,
2335                                   Align Alignment) override {
2336    return Impl.forceScalarizeMaskedScatter(DataType, Alignment);
2337  }
2338  bool isLegalMaskedCompressStore(Type *DataType) override {
2339    return Impl.isLegalMaskedCompressStore(DataType);
2340  }
2341  bool isLegalMaskedExpandLoad(Type *DataType) override {
2342    return Impl.isLegalMaskedExpandLoad(DataType);
2343  }
2344  bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1,
2345                       const SmallBitVector &OpcodeMask) const override {
2346    return Impl.isLegalAltInstr(VecTy, Opcode0, Opcode1, OpcodeMask);
2347  }
2348  bool enableOrderedReductions() override {
2349    return Impl.enableOrderedReductions();
2350  }
2351  bool hasDivRemOp(Type *DataType, bool IsSigned) override {
2352    return Impl.hasDivRemOp(DataType, IsSigned);
2353  }
2354  bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) override {
2355    return Impl.hasVolatileVariant(I, AddrSpace);
2356  }
2357  bool prefersVectorizedAddressing() override {
2358    return Impl.prefersVectorizedAddressing();
2359  }
2360  InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
2361                                       int64_t BaseOffset, bool HasBaseReg,
2362                                       int64_t Scale,
2363                                       unsigned AddrSpace) override {
2364    return Impl.getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
2365                                     AddrSpace);
2366  }
2367  bool LSRWithInstrQueries() override { return Impl.LSRWithInstrQueries(); }
2368  bool isTruncateFree(Type *Ty1, Type *Ty2) override {
2369    return Impl.isTruncateFree(Ty1, Ty2);
2370  }
2371  bool isProfitableToHoist(Instruction *I) override {
2372    return Impl.isProfitableToHoist(I);
2373  }
2374  bool useAA() override { return Impl.useAA(); }
2375  bool isTypeLegal(Type *Ty) override { return Impl.isTypeLegal(Ty); }
2376  unsigned getRegUsageForType(Type *Ty) override {
2377    return Impl.getRegUsageForType(Ty);
2378  }
2379  bool shouldBuildLookupTables() override {
2380    return Impl.shouldBuildLookupTables();
2381  }
2382  bool shouldBuildLookupTablesForConstant(Constant *C) override {
2383    return Impl.shouldBuildLookupTablesForConstant(C);
2384  }
2385  bool shouldBuildRelLookupTables() override {
2386    return Impl.shouldBuildRelLookupTables();
2387  }
2388  bool useColdCCForColdCall(Function &F) override {
2389    return Impl.useColdCCForColdCall(F);
2390  }
2391
2392  InstructionCost getScalarizationOverhead(VectorType *Ty,
2393                                           const APInt &DemandedElts,
2394                                           bool Insert, bool Extract,
2395                                           TargetCostKind CostKind) override {
2396    return Impl.getScalarizationOverhead(Ty, DemandedElts, Insert, Extract,
2397                                         CostKind);
2398  }
2399  InstructionCost
2400  getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
2401                                   ArrayRef<Type *> Tys,
2402                                   TargetCostKind CostKind) override {
2403    return Impl.getOperandsScalarizationOverhead(Args, Tys, CostKind);
2404  }
2405
2406  bool supportsEfficientVectorElementLoadStore() override {
2407    return Impl.supportsEfficientVectorElementLoadStore();
2408  }
2409
2410  bool supportsTailCalls() override { return Impl.supportsTailCalls(); }
2411  bool supportsTailCallFor(const CallBase *CB) override {
2412    return Impl.supportsTailCallFor(CB);
2413  }
2414
2415  bool enableAggressiveInterleaving(bool LoopHasReductions) override {
2416    return Impl.enableAggressiveInterleaving(LoopHasReductions);
2417  }
2418  MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
2419                                               bool IsZeroCmp) const override {
2420    return Impl.enableMemCmpExpansion(OptSize, IsZeroCmp);
2421  }
2422  bool enableSelectOptimize() override {
2423    return Impl.enableSelectOptimize();
2424  }
2425  bool shouldTreatInstructionLikeSelect(const Instruction *I) override {
2426    return Impl.shouldTreatInstructionLikeSelect(I);
2427  }
2428  bool enableInterleavedAccessVectorization() override {
2429    return Impl.enableInterleavedAccessVectorization();
2430  }
2431  bool enableMaskedInterleavedAccessVectorization() override {
2432    return Impl.enableMaskedInterleavedAccessVectorization();
2433  }
2434  bool isFPVectorizationPotentiallyUnsafe() override {
2435    return Impl.isFPVectorizationPotentiallyUnsafe();
2436  }
2437  bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth,
2438                                      unsigned AddressSpace, Align Alignment,
2439                                      unsigned *Fast) override {
2440    return Impl.allowsMisalignedMemoryAccesses(Context, BitWidth, AddressSpace,
2441                                               Alignment, Fast);
2442  }
2443  PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) override {
2444    return Impl.getPopcntSupport(IntTyWidthInBit);
2445  }
2446  bool haveFastSqrt(Type *Ty) override { return Impl.haveFastSqrt(Ty); }
2447
2448  bool isExpensiveToSpeculativelyExecute(const Instruction* I) override {
2449    return Impl.isExpensiveToSpeculativelyExecute(I);
2450  }
2451
2452  bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) override {
2453    return Impl.isFCmpOrdCheaperThanFCmpZero(Ty);
2454  }
2455
2456  InstructionCost getFPOpCost(Type *Ty) override {
2457    return Impl.getFPOpCost(Ty);
2458  }
2459
2460  InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx,
2461                                        const APInt &Imm, Type *Ty) override {
2462    return Impl.getIntImmCodeSizeCost(Opc, Idx, Imm, Ty);
2463  }
2464  InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
2465                                TargetCostKind CostKind) override {
2466    return Impl.getIntImmCost(Imm, Ty, CostKind);
2467  }
2468  InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx,
2469                                    const APInt &Imm, Type *Ty,
2470                                    TargetCostKind CostKind,
2471                                    Instruction *Inst = nullptr) override {
2472    return Impl.getIntImmCostInst(Opc, Idx, Imm, Ty, CostKind, Inst);
2473  }
2474  InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
2475                                      const APInt &Imm, Type *Ty,
2476                                      TargetCostKind CostKind) override {
2477    return Impl.getIntImmCostIntrin(IID, Idx, Imm, Ty, CostKind);
2478  }
2479  bool preferToKeepConstantsAttached(const Instruction &Inst,
2480                                     const Function &Fn) const override {
2481    return Impl.preferToKeepConstantsAttached(Inst, Fn);
2482  }
2483  unsigned getNumberOfRegisters(unsigned ClassID) const override {
2484    return Impl.getNumberOfRegisters(ClassID);
2485  }
2486  unsigned getRegisterClassForType(bool Vector,
2487                                   Type *Ty = nullptr) const override {
2488    return Impl.getRegisterClassForType(Vector, Ty);
2489  }
2490  const char *getRegisterClassName(unsigned ClassID) const override {
2491    return Impl.getRegisterClassName(ClassID);
2492  }
2493  TypeSize getRegisterBitWidth(RegisterKind K) const override {
2494    return Impl.getRegisterBitWidth(K);
2495  }
2496  unsigned getMinVectorRegisterBitWidth() const override {
2497    return Impl.getMinVectorRegisterBitWidth();
2498  }
2499  std::optional<unsigned> getMaxVScale() const override {
2500    return Impl.getMaxVScale();
2501  }
2502  std::optional<unsigned> getVScaleForTuning() const override {
2503    return Impl.getVScaleForTuning();
2504  }
2505  bool isVScaleKnownToBeAPowerOfTwo() const override {
2506    return Impl.isVScaleKnownToBeAPowerOfTwo();
2507  }
2508  bool shouldMaximizeVectorBandwidth(
2509      TargetTransformInfo::RegisterKind K) const override {
2510    return Impl.shouldMaximizeVectorBandwidth(K);
2511  }
2512  ElementCount getMinimumVF(unsigned ElemWidth,
2513                            bool IsScalable) const override {
2514    return Impl.getMinimumVF(ElemWidth, IsScalable);
2515  }
2516  unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const override {
2517    return Impl.getMaximumVF(ElemWidth, Opcode);
2518  }
2519  unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy,
2520                             Type *ScalarValTy) const override {
2521    return Impl.getStoreMinimumVF(VF, ScalarMemTy, ScalarValTy);
2522  }
2523  bool shouldConsiderAddressTypePromotion(
2524      const Instruction &I, bool &AllowPromotionWithoutCommonHeader) override {
2525    return Impl.shouldConsiderAddressTypePromotion(
2526        I, AllowPromotionWithoutCommonHeader);
2527  }
2528  unsigned getCacheLineSize() const override { return Impl.getCacheLineSize(); }
2529  std::optional<unsigned> getCacheSize(CacheLevel Level) const override {
2530    return Impl.getCacheSize(Level);
2531  }
2532  std::optional<unsigned>
2533  getCacheAssociativity(CacheLevel Level) const override {
2534    return Impl.getCacheAssociativity(Level);
2535  }
2536
2537  std::optional<unsigned> getMinPageSize() const override {
2538    return Impl.getMinPageSize();
2539  }
2540
2541  /// Return the preferred prefetch distance in terms of instructions.
2542  ///
2543  unsigned getPrefetchDistance() const override {
2544    return Impl.getPrefetchDistance();
2545  }
2546
2547  /// Return the minimum stride necessary to trigger software
2548  /// prefetching.
2549  ///
2550  unsigned getMinPrefetchStride(unsigned NumMemAccesses,
2551                                unsigned NumStridedMemAccesses,
2552                                unsigned NumPrefetches,
2553                                bool HasCall) const override {
2554    return Impl.getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses,
2555                                     NumPrefetches, HasCall);
2556  }
2557
2558  /// Return the maximum prefetch distance in terms of loop
2559  /// iterations.
2560  ///
2561  unsigned getMaxPrefetchIterationsAhead() const override {
2562    return Impl.getMaxPrefetchIterationsAhead();
2563  }
2564
2565  /// \return True if prefetching should also be done for writes.
2566  bool enableWritePrefetching() const override {
2567    return Impl.enableWritePrefetching();
2568  }
2569
2570  /// \return if target want to issue a prefetch in address space \p AS.
2571  bool shouldPrefetchAddressSpace(unsigned AS) const override {
2572    return Impl.shouldPrefetchAddressSpace(AS);
2573  }
2574
2575  unsigned getMaxInterleaveFactor(ElementCount VF) override {
2576    return Impl.getMaxInterleaveFactor(VF);
2577  }
2578  unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
2579                                            unsigned &JTSize,
2580                                            ProfileSummaryInfo *PSI,
2581                                            BlockFrequencyInfo *BFI) override {
2582    return Impl.getEstimatedNumberOfCaseClusters(SI, JTSize, PSI, BFI);
2583  }
2584  InstructionCost getArithmeticInstrCost(
2585      unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
2586      OperandValueInfo Opd1Info, OperandValueInfo Opd2Info,
2587      ArrayRef<const Value *> Args,
2588      const Instruction *CxtI = nullptr) override {
2589    return Impl.getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info, Opd2Info,
2590                                       Args, CxtI);
2591  }
2592  InstructionCost getAltInstrCost(VectorType *VecTy, unsigned Opcode0,
2593                                  unsigned Opcode1,
2594                                  const SmallBitVector &OpcodeMask,
2595                                  TTI::TargetCostKind CostKind) const override {
2596    return Impl.getAltInstrCost(VecTy, Opcode0, Opcode1, OpcodeMask, CostKind);
2597  }
2598
2599  InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp,
2600                                 ArrayRef<int> Mask,
2601                                 TTI::TargetCostKind CostKind, int Index,
2602                                 VectorType *SubTp,
2603                                 ArrayRef<const Value *> Args) override {
2604    return Impl.getShuffleCost(Kind, Tp, Mask, CostKind, Index, SubTp, Args);
2605  }
2606  InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
2607                                   CastContextHint CCH,
2608                                   TTI::TargetCostKind CostKind,
2609                                   const Instruction *I) override {
2610    return Impl.getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
2611  }
2612  InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
2613                                           VectorType *VecTy,
2614                                           unsigned Index) override {
2615    return Impl.getExtractWithExtendCost(Opcode, Dst, VecTy, Index);
2616  }
2617  InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
2618                                 const Instruction *I = nullptr) override {
2619    return Impl.getCFInstrCost(Opcode, CostKind, I);
2620  }
2621  InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
2622                                     CmpInst::Predicate VecPred,
2623                                     TTI::TargetCostKind CostKind,
2624                                     const Instruction *I) override {
2625    return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I);
2626  }
2627  InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
2628                                     TTI::TargetCostKind CostKind,
2629                                     unsigned Index, Value *Op0,
2630                                     Value *Op1) override {
2631    return Impl.getVectorInstrCost(Opcode, Val, CostKind, Index, Op0, Op1);
2632  }
2633  InstructionCost getVectorInstrCost(const Instruction &I, Type *Val,
2634                                     TTI::TargetCostKind CostKind,
2635                                     unsigned Index) override {
2636    return Impl.getVectorInstrCost(I, Val, CostKind, Index);
2637  }
2638  InstructionCost
2639  getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF,
2640                            const APInt &DemandedDstElts,
2641                            TTI::TargetCostKind CostKind) override {
2642    return Impl.getReplicationShuffleCost(EltTy, ReplicationFactor, VF,
2643                                          DemandedDstElts, CostKind);
2644  }
2645  InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
2646                                  unsigned AddressSpace,
2647                                  TTI::TargetCostKind CostKind,
2648                                  OperandValueInfo OpInfo,
2649                                  const Instruction *I) override {
2650    return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, CostKind,
2651                                OpInfo, I);
2652  }
2653  InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
2654                                    unsigned AddressSpace,
2655                                    TTI::TargetCostKind CostKind,
2656                                    const Instruction *I) override {
2657    return Impl.getVPMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
2658                                  CostKind, I);
2659  }
2660  InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
2661                                        Align Alignment, unsigned AddressSpace,
2662                                        TTI::TargetCostKind CostKind) override {
2663    return Impl.getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
2664                                      CostKind);
2665  }
2666  InstructionCost
2667  getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
2668                         bool VariableMask, Align Alignment,
2669                         TTI::TargetCostKind CostKind,
2670                         const Instruction *I = nullptr) override {
2671    return Impl.getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
2672                                       Alignment, CostKind, I);
2673  }
2674  InstructionCost getInterleavedMemoryOpCost(
2675      unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
2676      Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
2677      bool UseMaskForCond, bool UseMaskForGaps) override {
2678    return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
2679                                           Alignment, AddressSpace, CostKind,
2680                                           UseMaskForCond, UseMaskForGaps);
2681  }
2682  InstructionCost
2683  getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
2684                             std::optional<FastMathFlags> FMF,
2685                             TTI::TargetCostKind CostKind) override {
2686    return Impl.getArithmeticReductionCost(Opcode, Ty, FMF, CostKind);
2687  }
2688  InstructionCost
2689  getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF,
2690                         TTI::TargetCostKind CostKind) override {
2691    return Impl.getMinMaxReductionCost(IID, Ty, FMF, CostKind);
2692  }
2693  InstructionCost
2694  getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy,
2695                           VectorType *Ty, FastMathFlags FMF,
2696                           TTI::TargetCostKind CostKind) override {
2697    return Impl.getExtendedReductionCost(Opcode, IsUnsigned, ResTy, Ty, FMF,
2698                                         CostKind);
2699  }
2700  InstructionCost
2701  getMulAccReductionCost(bool IsUnsigned, Type *ResTy, VectorType *Ty,
2702                         TTI::TargetCostKind CostKind) override {
2703    return Impl.getMulAccReductionCost(IsUnsigned, ResTy, Ty, CostKind);
2704  }
2705  InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
2706                                        TTI::TargetCostKind CostKind) override {
2707    return Impl.getIntrinsicInstrCost(ICA, CostKind);
2708  }
2709  InstructionCost getCallInstrCost(Function *F, Type *RetTy,
2710                                   ArrayRef<Type *> Tys,
2711                                   TTI::TargetCostKind CostKind) override {
2712    return Impl.getCallInstrCost(F, RetTy, Tys, CostKind);
2713  }
2714  unsigned getNumberOfParts(Type *Tp) override {
2715    return Impl.getNumberOfParts(Tp);
2716  }
2717  InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
2718                                            const SCEV *Ptr) override {
2719    return Impl.getAddressComputationCost(Ty, SE, Ptr);
2720  }
2721  InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) override {
2722    return Impl.getCostOfKeepingLiveOverCall(Tys);
2723  }
2724  bool getTgtMemIntrinsic(IntrinsicInst *Inst,
2725                          MemIntrinsicInfo &Info) override {
2726    return Impl.getTgtMemIntrinsic(Inst, Info);
2727  }
2728  unsigned getAtomicMemIntrinsicMaxElementSize() const override {
2729    return Impl.getAtomicMemIntrinsicMaxElementSize();
2730  }
2731  Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
2732                                           Type *ExpectedType) override {
2733    return Impl.getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
2734  }
2735  Type *getMemcpyLoopLoweringType(
2736      LLVMContext &Context, Value *Length, unsigned SrcAddrSpace,
2737      unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign,
2738      std::optional<uint32_t> AtomicElementSize) const override {
2739    return Impl.getMemcpyLoopLoweringType(Context, Length, SrcAddrSpace,
2740                                          DestAddrSpace, SrcAlign, DestAlign,
2741                                          AtomicElementSize);
2742  }
2743  void getMemcpyLoopResidualLoweringType(
2744      SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
2745      unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
2746      unsigned SrcAlign, unsigned DestAlign,
2747      std::optional<uint32_t> AtomicCpySize) const override {
2748    Impl.getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes,
2749                                           SrcAddrSpace, DestAddrSpace,
2750                                           SrcAlign, DestAlign, AtomicCpySize);
2751  }
2752  bool areInlineCompatible(const Function *Caller,
2753                           const Function *Callee) const override {
2754    return Impl.areInlineCompatible(Caller, Callee);
2755  }
2756  unsigned getInlineCallPenalty(const Function *F, const CallBase &Call,
2757                                unsigned DefaultCallPenalty) const override {
2758    return Impl.getInlineCallPenalty(F, Call, DefaultCallPenalty);
2759  }
2760  bool areTypesABICompatible(const Function *Caller, const Function *Callee,
2761                             const ArrayRef<Type *> &Types) const override {
2762    return Impl.areTypesABICompatible(Caller, Callee, Types);
2763  }
2764  bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const override {
2765    return Impl.isIndexedLoadLegal(Mode, Ty, getDataLayout());
2766  }
2767  bool isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const override {
2768    return Impl.isIndexedStoreLegal(Mode, Ty, getDataLayout());
2769  }
2770  unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const override {
2771    return Impl.getLoadStoreVecRegBitWidth(AddrSpace);
2772  }
2773  bool isLegalToVectorizeLoad(LoadInst *LI) const override {
2774    return Impl.isLegalToVectorizeLoad(LI);
2775  }
2776  bool isLegalToVectorizeStore(StoreInst *SI) const override {
2777    return Impl.isLegalToVectorizeStore(SI);
2778  }
2779  bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
2780                                   unsigned AddrSpace) const override {
2781    return Impl.isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment,
2782                                            AddrSpace);
2783  }
2784  bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
2785                                    unsigned AddrSpace) const override {
2786    return Impl.isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment,
2787                                             AddrSpace);
2788  }
2789  bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
2790                                   ElementCount VF) const override {
2791    return Impl.isLegalToVectorizeReduction(RdxDesc, VF);
2792  }
2793  bool isElementTypeLegalForScalableVector(Type *Ty) const override {
2794    return Impl.isElementTypeLegalForScalableVector(Ty);
2795  }
2796  unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
2797                               unsigned ChainSizeInBytes,
2798                               VectorType *VecTy) const override {
2799    return Impl.getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy);
2800  }
2801  unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
2802                                unsigned ChainSizeInBytes,
2803                                VectorType *VecTy) const override {
2804    return Impl.getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy);
2805  }
2806  bool preferInLoopReduction(unsigned Opcode, Type *Ty,
2807                             ReductionFlags Flags) const override {
2808    return Impl.preferInLoopReduction(Opcode, Ty, Flags);
2809  }
2810  bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
2811                                       ReductionFlags Flags) const override {
2812    return Impl.preferPredicatedReductionSelect(Opcode, Ty, Flags);
2813  }
2814  bool preferEpilogueVectorization() const override {
2815    return Impl.preferEpilogueVectorization();
2816  }
2817
2818  bool shouldExpandReduction(const IntrinsicInst *II) const override {
2819    return Impl.shouldExpandReduction(II);
2820  }
2821
2822  unsigned getGISelRematGlobalCost() const override {
2823    return Impl.getGISelRematGlobalCost();
2824  }
2825
2826  unsigned getMinTripCountTailFoldingThreshold() const override {
2827    return Impl.getMinTripCountTailFoldingThreshold();
2828  }
2829
2830  bool supportsScalableVectors() const override {
2831    return Impl.supportsScalableVectors();
2832  }
2833
2834  bool enableScalableVectorization() const override {
2835    return Impl.enableScalableVectorization();
2836  }
2837
2838  bool hasActiveVectorLength(unsigned Opcode, Type *DataType,
2839                             Align Alignment) const override {
2840    return Impl.hasActiveVectorLength(Opcode, DataType, Alignment);
2841  }
2842
2843  VPLegalization
2844  getVPLegalizationStrategy(const VPIntrinsic &PI) const override {
2845    return Impl.getVPLegalizationStrategy(PI);
2846  }
2847
2848  bool hasArmWideBranch(bool Thumb) const override {
2849    return Impl.hasArmWideBranch(Thumb);
2850  }
2851
2852  unsigned getMaxNumArgs() const override {
2853    return Impl.getMaxNumArgs();
2854  }
2855};
2856
2857template <typename T>
2858TargetTransformInfo::TargetTransformInfo(T Impl)
2859    : TTIImpl(new Model<T>(Impl)) {}
2860
2861/// Analysis pass providing the \c TargetTransformInfo.
2862///
2863/// The core idea of the TargetIRAnalysis is to expose an interface through
2864/// which LLVM targets can analyze and provide information about the middle
2865/// end's target-independent IR. This supports use cases such as target-aware
2866/// cost modeling of IR constructs.
2867///
2868/// This is a function analysis because much of the cost modeling for targets
2869/// is done in a subtarget specific way and LLVM supports compiling different
2870/// functions targeting different subtargets in order to support runtime
2871/// dispatch according to the observed subtarget.
2872class TargetIRAnalysis : public AnalysisInfoMixin<TargetIRAnalysis> {
2873public:
2874  typedef TargetTransformInfo Result;
2875
2876  /// Default construct a target IR analysis.
2877  ///
2878  /// This will use the module's datalayout to construct a baseline
2879  /// conservative TTI result.
2880  TargetIRAnalysis();
2881
2882  /// Construct an IR analysis pass around a target-provide callback.
2883  ///
2884  /// The callback will be called with a particular function for which the TTI
2885  /// is needed and must return a TTI object for that function.
2886  TargetIRAnalysis(std::function<Result(const Function &)> TTICallback);
2887
2888  // Value semantics. We spell out the constructors for MSVC.
2889  TargetIRAnalysis(const TargetIRAnalysis &Arg)
2890      : TTICallback(Arg.TTICallback) {}
2891  TargetIRAnalysis(TargetIRAnalysis &&Arg)
2892      : TTICallback(std::move(Arg.TTICallback)) {}
2893  TargetIRAnalysis &operator=(const TargetIRAnalysis &RHS) {
2894    TTICallback = RHS.TTICallback;
2895    return *this;
2896  }
2897  TargetIRAnalysis &operator=(TargetIRAnalysis &&RHS) {
2898    TTICallback = std::move(RHS.TTICallback);
2899    return *this;
2900  }
2901
2902  Result run(const Function &F, FunctionAnalysisManager &);
2903
2904private:
2905  friend AnalysisInfoMixin<TargetIRAnalysis>;
2906  static AnalysisKey Key;
2907
2908  /// The callback used to produce a result.
2909  ///
2910  /// We use a completely opaque callback so that targets can provide whatever
2911  /// mechanism they desire for constructing the TTI for a given function.
2912  ///
2913  /// FIXME: Should we really use std::function? It's relatively inefficient.
2914  /// It might be possible to arrange for even stateful callbacks to outlive
2915  /// the analysis and thus use a function_ref which would be lighter weight.
2916  /// This may also be less error prone as the callback is likely to reference
2917  /// the external TargetMachine, and that reference needs to never dangle.
2918  std::function<Result(const Function &)> TTICallback;
2919
2920  /// Helper function used as the callback in the default constructor.
2921  static Result getDefaultTTI(const Function &F);
2922};
2923
2924/// Wrapper pass for TargetTransformInfo.
2925///
2926/// This pass can be constructed from a TTI object which it stores internally
2927/// and is queried by passes.
2928class TargetTransformInfoWrapperPass : public ImmutablePass {
2929  TargetIRAnalysis TIRA;
2930  std::optional<TargetTransformInfo> TTI;
2931
2932  virtual void anchor();
2933
2934public:
2935  static char ID;
2936
2937  /// We must provide a default constructor for the pass but it should
2938  /// never be used.
2939  ///
2940  /// Use the constructor below or call one of the creation routines.
2941  TargetTransformInfoWrapperPass();
2942
2943  explicit TargetTransformInfoWrapperPass(TargetIRAnalysis TIRA);
2944
2945  TargetTransformInfo &getTTI(const Function &F);
2946};
2947
2948/// Create an analysis pass wrapper around a TTI object.
2949///
2950/// This analysis pass just holds the TTI instance and makes it available to
2951/// clients.
2952ImmutablePass *createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA);
2953
2954} // namespace llvm
2955
2956#endif
2957