1//===-- WebAssemblyTargetTransformInfo.cpp - WebAssembly-specific TTI -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file defines the WebAssembly-specific TargetTransformInfo
11/// implementation.
12///
13//===----------------------------------------------------------------------===//
14
15#include "WebAssemblyTargetTransformInfo.h"
16#include "llvm/CodeGen/CostTable.h"
17#include "llvm/Support/Debug.h"
18using namespace llvm;
19
20#define DEBUG_TYPE "wasmtti"
21
22TargetTransformInfo::PopcntSupportKind
23WebAssemblyTTIImpl::getPopcntSupport(unsigned TyWidth) const {
24  assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
25  return TargetTransformInfo::PSK_FastHardware;
26}
27
28unsigned WebAssemblyTTIImpl::getNumberOfRegisters(unsigned ClassID) const {
29  unsigned Result = BaseT::getNumberOfRegisters(ClassID);
30
31  // For SIMD, use at least 16 registers, as a rough guess.
32  bool Vector = (ClassID == 1);
33  if (Vector)
34    Result = std::max(Result, 16u);
35
36  return Result;
37}
38
39TypeSize WebAssemblyTTIImpl::getRegisterBitWidth(
40    TargetTransformInfo::RegisterKind K) const {
41  switch (K) {
42  case TargetTransformInfo::RGK_Scalar:
43    return TypeSize::getFixed(64);
44  case TargetTransformInfo::RGK_FixedWidthVector:
45    return TypeSize::getFixed(getST()->hasSIMD128() ? 128 : 64);
46  case TargetTransformInfo::RGK_ScalableVector:
47    return TypeSize::getScalable(0);
48  }
49
50  llvm_unreachable("Unsupported register kind");
51}
52
53InstructionCost WebAssemblyTTIImpl::getArithmeticInstrCost(
54    unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
55    TTI::OperandValueInfo Op1Info, TTI::OperandValueInfo Op2Info,
56    ArrayRef<const Value *> Args,
57    const Instruction *CxtI) {
58
59  InstructionCost Cost =
60      BasicTTIImplBase<WebAssemblyTTIImpl>::getArithmeticInstrCost(
61          Opcode, Ty, CostKind, Op1Info, Op2Info);
62
63  if (auto *VTy = dyn_cast<VectorType>(Ty)) {
64    switch (Opcode) {
65    case Instruction::LShr:
66    case Instruction::AShr:
67    case Instruction::Shl:
68      // SIMD128's shifts currently only accept a scalar shift count. For each
69      // element, we'll need to extract, op, insert. The following is a rough
70      // approximation.
71      if (!Op2Info.isUniform())
72        Cost =
73            cast<FixedVectorType>(VTy)->getNumElements() *
74            (TargetTransformInfo::TCC_Basic +
75             getArithmeticInstrCost(Opcode, VTy->getElementType(), CostKind) +
76             TargetTransformInfo::TCC_Basic);
77      break;
78    }
79  }
80  return Cost;
81}
82
83InstructionCost
84WebAssemblyTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
85                                       TTI::TargetCostKind CostKind,
86                                       unsigned Index, Value *Op0, Value *Op1) {
87  InstructionCost Cost = BasicTTIImplBase::getVectorInstrCost(
88      Opcode, Val, CostKind, Index, Op0, Op1);
89
90  // SIMD128's insert/extract currently only take constant indices.
91  if (Index == -1u)
92    return Cost + 25 * TargetTransformInfo::TCC_Expensive;
93
94  return Cost;
95}
96
97bool WebAssemblyTTIImpl::areInlineCompatible(const Function *Caller,
98                                             const Function *Callee) const {
99  // Allow inlining only when the Callee has a subset of the Caller's
100  // features. In principle, we should be able to inline regardless of any
101  // features because WebAssembly supports features at module granularity, not
102  // function granularity, but without this restriction it would be possible for
103  // a module to "forget" about features if all the functions that used them
104  // were inlined.
105  const TargetMachine &TM = getTLI()->getTargetMachine();
106
107  const FeatureBitset &CallerBits =
108      TM.getSubtargetImpl(*Caller)->getFeatureBits();
109  const FeatureBitset &CalleeBits =
110      TM.getSubtargetImpl(*Callee)->getFeatureBits();
111
112  return (CallerBits & CalleeBits) == CalleeBits;
113}
114
115void WebAssemblyTTIImpl::getUnrollingPreferences(
116    Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP,
117    OptimizationRemarkEmitter *ORE) const {
118  // Scan the loop: don't unroll loops with calls. This is a standard approach
119  // for most (all?) targets.
120  for (BasicBlock *BB : L->blocks())
121    for (Instruction &I : *BB)
122      if (isa<CallInst>(I) || isa<InvokeInst>(I))
123        if (const Function *F = cast<CallBase>(I).getCalledFunction())
124          if (isLoweredToCall(F))
125            return;
126
127  // The chosen threshold is within the range of 'LoopMicroOpBufferSize' of
128  // the various microarchitectures that use the BasicTTI implementation and
129  // has been selected through heuristics across multiple cores and runtimes.
130  UP.Partial = UP.Runtime = UP.UpperBound = true;
131  UP.PartialThreshold = 30;
132
133  // Avoid unrolling when optimizing for size.
134  UP.OptSizeThreshold = 0;
135  UP.PartialOptSizeThreshold = 0;
136
137  // Set number of instructions optimized when "back edge"
138  // becomes "fall through" to default value of 2.
139  UP.BEInsns = 2;
140}
141
142bool WebAssemblyTTIImpl::supportsTailCalls() const {
143  return getST()->hasTailCall();
144}
145