1//=== lib/CodeGen/GlobalISel/AMDGPUPreLegalizerCombiner.cpp ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass does combining of machine instructions at the generic MI level,
10// before the legalizer.
11//
12//===----------------------------------------------------------------------===//
13
14#include "AMDGPU.h"
15#include "AMDGPUCombinerHelper.h"
16#include "AMDGPULegalizerInfo.h"
17#include "GCNSubtarget.h"
18#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
19#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
20#include "llvm/CodeGen/GlobalISel/Combiner.h"
21#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
22#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
23#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h"
24#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
25#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
26#include "llvm/CodeGen/MachineDominators.h"
27#include "llvm/CodeGen/TargetPassConfig.h"
28#include "llvm/Target/TargetMachine.h"
29
30#define GET_GICOMBINER_DEPS
31#include "AMDGPUGenPreLegalizeGICombiner.inc"
32#undef GET_GICOMBINER_DEPS
33
34#define DEBUG_TYPE "amdgpu-prelegalizer-combiner"
35
36using namespace llvm;
37using namespace MIPatternMatch;
38namespace {
39
40#define GET_GICOMBINER_TYPES
41#include "AMDGPUGenPreLegalizeGICombiner.inc"
42#undef GET_GICOMBINER_TYPES
43
44class AMDGPUPreLegalizerCombinerImpl : public Combiner {
45protected:
46  const AMDGPUPreLegalizerCombinerImplRuleConfig &RuleConfig;
47  const GCNSubtarget &STI;
48  // TODO: Make CombinerHelper methods const.
49  mutable AMDGPUCombinerHelper Helper;
50
51public:
52  AMDGPUPreLegalizerCombinerImpl(
53      MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
54      GISelKnownBits &KB, GISelCSEInfo *CSEInfo,
55      const AMDGPUPreLegalizerCombinerImplRuleConfig &RuleConfig,
56      const GCNSubtarget &STI, MachineDominatorTree *MDT,
57      const LegalizerInfo *LI);
58
59  static const char *getName() { return "AMDGPUPreLegalizerCombinerImpl"; }
60
61  bool tryCombineAllImpl(MachineInstr &MI) const;
62  bool tryCombineAll(MachineInstr &I) const override;
63
64  struct ClampI64ToI16MatchInfo {
65    int64_t Cmp1 = 0;
66    int64_t Cmp2 = 0;
67    Register Origin;
68  };
69
70  bool matchClampI64ToI16(MachineInstr &MI, const MachineRegisterInfo &MRI,
71                          const MachineFunction &MF,
72                          ClampI64ToI16MatchInfo &MatchInfo) const;
73
74  void applyClampI64ToI16(MachineInstr &MI,
75                          const ClampI64ToI16MatchInfo &MatchInfo) const;
76
77private:
78#define GET_GICOMBINER_CLASS_MEMBERS
79#define AMDGPUSubtarget GCNSubtarget
80#include "AMDGPUGenPreLegalizeGICombiner.inc"
81#undef GET_GICOMBINER_CLASS_MEMBERS
82#undef AMDGPUSubtarget
83};
84
85#define GET_GICOMBINER_IMPL
86#define AMDGPUSubtarget GCNSubtarget
87#include "AMDGPUGenPreLegalizeGICombiner.inc"
88#undef AMDGPUSubtarget
89#undef GET_GICOMBINER_IMPL
90
91AMDGPUPreLegalizerCombinerImpl::AMDGPUPreLegalizerCombinerImpl(
92    MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
93    GISelKnownBits &KB, GISelCSEInfo *CSEInfo,
94    const AMDGPUPreLegalizerCombinerImplRuleConfig &RuleConfig,
95    const GCNSubtarget &STI, MachineDominatorTree *MDT, const LegalizerInfo *LI)
96    : Combiner(MF, CInfo, TPC, &KB, CSEInfo), RuleConfig(RuleConfig), STI(STI),
97      Helper(Observer, B, /*IsPreLegalize*/ true, &KB, MDT, LI),
98#define GET_GICOMBINER_CONSTRUCTOR_INITS
99#include "AMDGPUGenPreLegalizeGICombiner.inc"
100#undef GET_GICOMBINER_CONSTRUCTOR_INITS
101{
102}
103
104bool AMDGPUPreLegalizerCombinerImpl::tryCombineAll(MachineInstr &MI) const {
105  if (tryCombineAllImpl(MI))
106    return true;
107
108  switch (MI.getOpcode()) {
109  case TargetOpcode::G_CONCAT_VECTORS:
110    return Helper.tryCombineConcatVectors(MI);
111  case TargetOpcode::G_SHUFFLE_VECTOR:
112    return Helper.tryCombineShuffleVector(MI);
113  }
114
115  return false;
116}
117
118bool AMDGPUPreLegalizerCombinerImpl::matchClampI64ToI16(
119    MachineInstr &MI, const MachineRegisterInfo &MRI, const MachineFunction &MF,
120    ClampI64ToI16MatchInfo &MatchInfo) const {
121  assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Invalid instruction!");
122
123  // Try to find a pattern where an i64 value should get clamped to short.
124  const LLT SrcType = MRI.getType(MI.getOperand(1).getReg());
125  if (SrcType != LLT::scalar(64))
126    return false;
127
128  const LLT DstType = MRI.getType(MI.getOperand(0).getReg());
129  if (DstType != LLT::scalar(16))
130    return false;
131
132  Register Base;
133
134  auto IsApplicableForCombine = [&MatchInfo]() -> bool {
135    const auto Cmp1 = MatchInfo.Cmp1;
136    const auto Cmp2 = MatchInfo.Cmp2;
137    const auto Diff = std::abs(Cmp2 - Cmp1);
138
139    // If the difference between both comparison values is 0 or 1, there is no
140    // need to clamp.
141    if (Diff == 0 || Diff == 1)
142      return false;
143
144    const int64_t Min = std::numeric_limits<int16_t>::min();
145    const int64_t Max = std::numeric_limits<int16_t>::max();
146
147    // Check if the comparison values are between SHORT_MIN and SHORT_MAX.
148    return ((Cmp2 >= Cmp1 && Cmp1 >= Min && Cmp2 <= Max) ||
149            (Cmp1 >= Cmp2 && Cmp1 <= Max && Cmp2 >= Min));
150  };
151
152  // Try to match a combination of min / max MIR opcodes.
153  if (mi_match(MI.getOperand(1).getReg(), MRI,
154               m_GSMin(m_Reg(Base), m_ICst(MatchInfo.Cmp1)))) {
155    if (mi_match(Base, MRI,
156                 m_GSMax(m_Reg(MatchInfo.Origin), m_ICst(MatchInfo.Cmp2)))) {
157      return IsApplicableForCombine();
158    }
159  }
160
161  if (mi_match(MI.getOperand(1).getReg(), MRI,
162               m_GSMax(m_Reg(Base), m_ICst(MatchInfo.Cmp1)))) {
163    if (mi_match(Base, MRI,
164                 m_GSMin(m_Reg(MatchInfo.Origin), m_ICst(MatchInfo.Cmp2)))) {
165      return IsApplicableForCombine();
166    }
167  }
168
169  return false;
170}
171
172// We want to find a combination of instructions that
173// gets generated when an i64 gets clamped to i16.
174// The corresponding pattern is:
175// G_MAX / G_MAX for i16 <= G_TRUNC i64.
176// This can be efficiently written as following:
177// v_cvt_pk_i16_i32 v0, v0, v1
178// v_med3_i32 v0, Clamp_Min, v0, Clamp_Max
179void AMDGPUPreLegalizerCombinerImpl::applyClampI64ToI16(
180    MachineInstr &MI, const ClampI64ToI16MatchInfo &MatchInfo) const {
181
182  Register Src = MatchInfo.Origin;
183  assert(MI.getParent()->getParent()->getRegInfo().getType(Src) ==
184         LLT::scalar(64));
185  const LLT S32 = LLT::scalar(32);
186
187  B.setInstrAndDebugLoc(MI);
188
189  auto Unmerge = B.buildUnmerge(S32, Src);
190
191  assert(MI.getOpcode() != AMDGPU::G_AMDGPU_CVT_PK_I16_I32);
192
193  const LLT V2S16 = LLT::fixed_vector(2, 16);
194  auto CvtPk =
195      B.buildInstr(AMDGPU::G_AMDGPU_CVT_PK_I16_I32, {V2S16},
196                   {Unmerge.getReg(0), Unmerge.getReg(1)}, MI.getFlags());
197
198  auto MinBoundary = std::min(MatchInfo.Cmp1, MatchInfo.Cmp2);
199  auto MaxBoundary = std::max(MatchInfo.Cmp1, MatchInfo.Cmp2);
200  auto MinBoundaryDst = B.buildConstant(S32, MinBoundary);
201  auto MaxBoundaryDst = B.buildConstant(S32, MaxBoundary);
202
203  auto Bitcast = B.buildBitcast({S32}, CvtPk);
204
205  auto Med3 = B.buildInstr(
206      AMDGPU::G_AMDGPU_SMED3, {S32},
207      {MinBoundaryDst.getReg(0), Bitcast.getReg(0), MaxBoundaryDst.getReg(0)},
208      MI.getFlags());
209
210  B.buildTrunc(MI.getOperand(0).getReg(), Med3);
211
212  MI.eraseFromParent();
213}
214
215// Pass boilerplate
216// ================
217
218class AMDGPUPreLegalizerCombiner : public MachineFunctionPass {
219public:
220  static char ID;
221
222  AMDGPUPreLegalizerCombiner(bool IsOptNone = false);
223
224  StringRef getPassName() const override {
225    return "AMDGPUPreLegalizerCombiner";
226  }
227
228  bool runOnMachineFunction(MachineFunction &MF) override;
229
230  void getAnalysisUsage(AnalysisUsage &AU) const override;
231
232private:
233  bool IsOptNone;
234  AMDGPUPreLegalizerCombinerImplRuleConfig RuleConfig;
235};
236} // end anonymous namespace
237
238void AMDGPUPreLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
239  AU.addRequired<TargetPassConfig>();
240  AU.setPreservesCFG();
241  getSelectionDAGFallbackAnalysisUsage(AU);
242  AU.addRequired<GISelKnownBitsAnalysis>();
243  AU.addPreserved<GISelKnownBitsAnalysis>();
244  if (!IsOptNone) {
245    AU.addRequired<MachineDominatorTree>();
246    AU.addPreserved<MachineDominatorTree>();
247  }
248
249  AU.addRequired<GISelCSEAnalysisWrapperPass>();
250  AU.addPreserved<GISelCSEAnalysisWrapperPass>();
251  MachineFunctionPass::getAnalysisUsage(AU);
252}
253
254AMDGPUPreLegalizerCombiner::AMDGPUPreLegalizerCombiner(bool IsOptNone)
255    : MachineFunctionPass(ID), IsOptNone(IsOptNone) {
256  initializeAMDGPUPreLegalizerCombinerPass(*PassRegistry::getPassRegistry());
257
258  if (!RuleConfig.parseCommandLineOption())
259    report_fatal_error("Invalid rule identifier");
260}
261
262bool AMDGPUPreLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
263  if (MF.getProperties().hasProperty(
264          MachineFunctionProperties::Property::FailedISel))
265    return false;
266  auto *TPC = &getAnalysis<TargetPassConfig>();
267  const Function &F = MF.getFunction();
268  bool EnableOpt =
269      MF.getTarget().getOptLevel() != CodeGenOptLevel::None && !skipFunction(F);
270  GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
271
272  // Enable CSE.
273  GISelCSEAnalysisWrapper &Wrapper =
274      getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper();
275  auto *CSEInfo = &Wrapper.get(TPC->getCSEConfig());
276
277  const GCNSubtarget &STI = MF.getSubtarget<GCNSubtarget>();
278  MachineDominatorTree *MDT =
279      IsOptNone ? nullptr : &getAnalysis<MachineDominatorTree>();
280  CombinerInfo CInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,
281                     nullptr, EnableOpt, F.hasOptSize(), F.hasMinSize());
282  AMDGPUPreLegalizerCombinerImpl Impl(MF, CInfo, TPC, *KB, CSEInfo, RuleConfig,
283                                      STI, MDT, STI.getLegalizerInfo());
284  return Impl.combineMachineInstrs();
285}
286
287char AMDGPUPreLegalizerCombiner::ID = 0;
288INITIALIZE_PASS_BEGIN(AMDGPUPreLegalizerCombiner, DEBUG_TYPE,
289                      "Combine AMDGPU machine instrs before legalization",
290                      false, false)
291INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
292INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis)
293INITIALIZE_PASS_END(AMDGPUPreLegalizerCombiner, DEBUG_TYPE,
294                    "Combine AMDGPU machine instrs before legalization", false,
295                    false)
296
297namespace llvm {
298FunctionPass *createAMDGPUPreLegalizeCombiner(bool IsOptNone) {
299  return new AMDGPUPreLegalizerCombiner(IsOptNone);
300}
301} // end namespace llvm
302