1//=== lib/CodeGen/GlobalISel/AMDGPUPostLegalizerCombiner.cpp --------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass does combining of machine instructions at the generic MI level,
10// after the legalizer.
11//
12//===----------------------------------------------------------------------===//
13
14#include "AMDGPU.h"
15#include "AMDGPUCombinerHelper.h"
16#include "AMDGPULegalizerInfo.h"
17#include "GCNSubtarget.h"
18#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
19#include "llvm/CodeGen/GlobalISel/Combiner.h"
20#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
21#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
22#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h"
23#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
24#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
25#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
26#include "llvm/CodeGen/MachineDominators.h"
27#include "llvm/CodeGen/TargetPassConfig.h"
28#include "llvm/IR/IntrinsicsAMDGPU.h"
29#include "llvm/Target/TargetMachine.h"
30
31#define GET_GICOMBINER_DEPS
32#include "AMDGPUGenPreLegalizeGICombiner.inc"
33#undef GET_GICOMBINER_DEPS
34
35#define DEBUG_TYPE "amdgpu-postlegalizer-combiner"
36
37using namespace llvm;
38using namespace MIPatternMatch;
39
40namespace {
41#define GET_GICOMBINER_TYPES
42#include "AMDGPUGenPostLegalizeGICombiner.inc"
43#undef GET_GICOMBINER_TYPES
44
45class AMDGPUPostLegalizerCombinerImpl : public Combiner {
46protected:
47  const AMDGPUPostLegalizerCombinerImplRuleConfig &RuleConfig;
48  const GCNSubtarget &STI;
49  const SIInstrInfo &TII;
50  // TODO: Make CombinerHelper methods const.
51  mutable AMDGPUCombinerHelper Helper;
52
53public:
54  AMDGPUPostLegalizerCombinerImpl(
55      MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
56      GISelKnownBits &KB, GISelCSEInfo *CSEInfo,
57      const AMDGPUPostLegalizerCombinerImplRuleConfig &RuleConfig,
58      const GCNSubtarget &STI, MachineDominatorTree *MDT,
59      const LegalizerInfo *LI);
60
61  static const char *getName() { return "AMDGPUPostLegalizerCombinerImpl"; }
62
63  bool tryCombineAllImpl(MachineInstr &I) const;
64  bool tryCombineAll(MachineInstr &I) const override;
65
66  struct FMinFMaxLegacyInfo {
67    Register LHS;
68    Register RHS;
69    Register True;
70    Register False;
71    CmpInst::Predicate Pred;
72  };
73
74  // TODO: Make sure fmin_legacy/fmax_legacy don't canonicalize
75  bool matchFMinFMaxLegacy(MachineInstr &MI, FMinFMaxLegacyInfo &Info) const;
76  void applySelectFCmpToFMinToFMaxLegacy(MachineInstr &MI,
77                                         const FMinFMaxLegacyInfo &Info) const;
78
79  bool matchUCharToFloat(MachineInstr &MI) const;
80  void applyUCharToFloat(MachineInstr &MI) const;
81
82  bool
83  matchRcpSqrtToRsq(MachineInstr &MI,
84                    std::function<void(MachineIRBuilder &)> &MatchInfo) const;
85
86  // FIXME: Should be able to have 2 separate matchdatas rather than custom
87  // struct boilerplate.
88  struct CvtF32UByteMatchInfo {
89    Register CvtVal;
90    unsigned ShiftOffset;
91  };
92
93  bool matchCvtF32UByteN(MachineInstr &MI,
94                         CvtF32UByteMatchInfo &MatchInfo) const;
95  void applyCvtF32UByteN(MachineInstr &MI,
96                         const CvtF32UByteMatchInfo &MatchInfo) const;
97
98  bool matchRemoveFcanonicalize(MachineInstr &MI, Register &Reg) const;
99
100  // Combine unsigned buffer load and signed extension instructions to generate
101  // signed buffer laod instructions.
102  bool matchCombineSignExtendInReg(
103      MachineInstr &MI, std::pair<MachineInstr *, unsigned> &MatchInfo) const;
104  void applyCombineSignExtendInReg(
105      MachineInstr &MI, std::pair<MachineInstr *, unsigned> &MatchInfo) const;
106
107  // Find the s_mul_u64 instructions where the higher bits are either
108  // zero-extended or sign-extended.
109  bool matchCombine_s_mul_u64(MachineInstr &MI, unsigned &NewOpcode) const;
110  // Replace the s_mul_u64 instructions with S_MUL_I64_I32_PSEUDO if the higher
111  // 33 bits are sign extended and with S_MUL_U64_U32_PSEUDO if the higher 32
112  // bits are zero extended.
113  void applyCombine_s_mul_u64(MachineInstr &MI, unsigned &NewOpcode) const;
114
115private:
116#define GET_GICOMBINER_CLASS_MEMBERS
117#define AMDGPUSubtarget GCNSubtarget
118#include "AMDGPUGenPostLegalizeGICombiner.inc"
119#undef GET_GICOMBINER_CLASS_MEMBERS
120#undef AMDGPUSubtarget
121};
122
123#define GET_GICOMBINER_IMPL
124#define AMDGPUSubtarget GCNSubtarget
125#include "AMDGPUGenPostLegalizeGICombiner.inc"
126#undef AMDGPUSubtarget
127#undef GET_GICOMBINER_IMPL
128
129AMDGPUPostLegalizerCombinerImpl::AMDGPUPostLegalizerCombinerImpl(
130    MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
131    GISelKnownBits &KB, GISelCSEInfo *CSEInfo,
132    const AMDGPUPostLegalizerCombinerImplRuleConfig &RuleConfig,
133    const GCNSubtarget &STI, MachineDominatorTree *MDT, const LegalizerInfo *LI)
134    : Combiner(MF, CInfo, TPC, &KB, CSEInfo), RuleConfig(RuleConfig), STI(STI),
135      TII(*STI.getInstrInfo()),
136      Helper(Observer, B, /*IsPreLegalize*/ false, &KB, MDT, LI),
137#define GET_GICOMBINER_CONSTRUCTOR_INITS
138#include "AMDGPUGenPostLegalizeGICombiner.inc"
139#undef GET_GICOMBINER_CONSTRUCTOR_INITS
140{
141}
142
143bool AMDGPUPostLegalizerCombinerImpl::tryCombineAll(MachineInstr &MI) const {
144  if (tryCombineAllImpl(MI))
145    return true;
146
147  switch (MI.getOpcode()) {
148  case TargetOpcode::G_SHL:
149  case TargetOpcode::G_LSHR:
150  case TargetOpcode::G_ASHR:
151    // On some subtargets, 64-bit shift is a quarter rate instruction. In the
152    // common case, splitting this into a move and a 32-bit shift is faster and
153    // the same code size.
154    return Helper.tryCombineShiftToUnmerge(MI, 32);
155  }
156
157  return false;
158}
159
160bool AMDGPUPostLegalizerCombinerImpl::matchFMinFMaxLegacy(
161    MachineInstr &MI, FMinFMaxLegacyInfo &Info) const {
162  // FIXME: Type predicate on pattern
163  if (MRI.getType(MI.getOperand(0).getReg()) != LLT::scalar(32))
164    return false;
165
166  Register Cond = MI.getOperand(1).getReg();
167  if (!MRI.hasOneNonDBGUse(Cond) ||
168      !mi_match(Cond, MRI,
169                m_GFCmp(m_Pred(Info.Pred), m_Reg(Info.LHS), m_Reg(Info.RHS))))
170    return false;
171
172  Info.True = MI.getOperand(2).getReg();
173  Info.False = MI.getOperand(3).getReg();
174
175  // TODO: Handle case where the the selected value is an fneg and the compared
176  // constant is the negation of the selected value.
177  if (!(Info.LHS == Info.True && Info.RHS == Info.False) &&
178      !(Info.LHS == Info.False && Info.RHS == Info.True))
179    return false;
180
181  switch (Info.Pred) {
182  case CmpInst::FCMP_FALSE:
183  case CmpInst::FCMP_OEQ:
184  case CmpInst::FCMP_ONE:
185  case CmpInst::FCMP_ORD:
186  case CmpInst::FCMP_UNO:
187  case CmpInst::FCMP_UEQ:
188  case CmpInst::FCMP_UNE:
189  case CmpInst::FCMP_TRUE:
190    return false;
191  default:
192    return true;
193  }
194}
195
196void AMDGPUPostLegalizerCombinerImpl::applySelectFCmpToFMinToFMaxLegacy(
197    MachineInstr &MI, const FMinFMaxLegacyInfo &Info) const {
198  B.setInstrAndDebugLoc(MI);
199  auto buildNewInst = [&MI, this](unsigned Opc, Register X, Register Y) {
200    B.buildInstr(Opc, {MI.getOperand(0)}, {X, Y}, MI.getFlags());
201  };
202
203  switch (Info.Pred) {
204  case CmpInst::FCMP_ULT:
205  case CmpInst::FCMP_ULE:
206    if (Info.LHS == Info.True)
207      buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.RHS, Info.LHS);
208    else
209      buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.LHS, Info.RHS);
210    break;
211  case CmpInst::FCMP_OLE:
212  case CmpInst::FCMP_OLT: {
213    // We need to permute the operands to get the correct NaN behavior. The
214    // selected operand is the second one based on the failing compare with NaN,
215    // so permute it based on the compare type the hardware uses.
216    if (Info.LHS == Info.True)
217      buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.LHS, Info.RHS);
218    else
219      buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.RHS, Info.LHS);
220    break;
221  }
222  case CmpInst::FCMP_UGE:
223  case CmpInst::FCMP_UGT: {
224    if (Info.LHS == Info.True)
225      buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.RHS, Info.LHS);
226    else
227      buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.LHS, Info.RHS);
228    break;
229  }
230  case CmpInst::FCMP_OGT:
231  case CmpInst::FCMP_OGE: {
232    if (Info.LHS == Info.True)
233      buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.LHS, Info.RHS);
234    else
235      buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.RHS, Info.LHS);
236    break;
237  }
238  default:
239    llvm_unreachable("predicate should not have matched");
240  }
241
242  MI.eraseFromParent();
243}
244
245bool AMDGPUPostLegalizerCombinerImpl::matchUCharToFloat(
246    MachineInstr &MI) const {
247  Register DstReg = MI.getOperand(0).getReg();
248
249  // TODO: We could try to match extracting the higher bytes, which would be
250  // easier if i8 vectors weren't promoted to i32 vectors, particularly after
251  // types are legalized. v4i8 -> v4f32 is probably the only case to worry
252  // about in practice.
253  LLT Ty = MRI.getType(DstReg);
254  if (Ty == LLT::scalar(32) || Ty == LLT::scalar(16)) {
255    Register SrcReg = MI.getOperand(1).getReg();
256    unsigned SrcSize = MRI.getType(SrcReg).getSizeInBits();
257    assert(SrcSize == 16 || SrcSize == 32 || SrcSize == 64);
258    const APInt Mask = APInt::getHighBitsSet(SrcSize, SrcSize - 8);
259    return Helper.getKnownBits()->maskedValueIsZero(SrcReg, Mask);
260  }
261
262  return false;
263}
264
265void AMDGPUPostLegalizerCombinerImpl::applyUCharToFloat(
266    MachineInstr &MI) const {
267  B.setInstrAndDebugLoc(MI);
268
269  const LLT S32 = LLT::scalar(32);
270
271  Register DstReg = MI.getOperand(0).getReg();
272  Register SrcReg = MI.getOperand(1).getReg();
273  LLT Ty = MRI.getType(DstReg);
274  LLT SrcTy = MRI.getType(SrcReg);
275  if (SrcTy != S32)
276    SrcReg = B.buildAnyExtOrTrunc(S32, SrcReg).getReg(0);
277
278  if (Ty == S32) {
279    B.buildInstr(AMDGPU::G_AMDGPU_CVT_F32_UBYTE0, {DstReg}, {SrcReg},
280                 MI.getFlags());
281  } else {
282    auto Cvt0 = B.buildInstr(AMDGPU::G_AMDGPU_CVT_F32_UBYTE0, {S32}, {SrcReg},
283                             MI.getFlags());
284    B.buildFPTrunc(DstReg, Cvt0, MI.getFlags());
285  }
286
287  MI.eraseFromParent();
288}
289
290bool AMDGPUPostLegalizerCombinerImpl::matchRcpSqrtToRsq(
291    MachineInstr &MI,
292    std::function<void(MachineIRBuilder &)> &MatchInfo) const {
293  auto getRcpSrc = [=](const MachineInstr &MI) -> MachineInstr * {
294    if (!MI.getFlag(MachineInstr::FmContract))
295      return nullptr;
296
297    if (auto *GI = dyn_cast<GIntrinsic>(&MI)) {
298      if (GI->is(Intrinsic::amdgcn_rcp))
299        return MRI.getVRegDef(MI.getOperand(2).getReg());
300    }
301    return nullptr;
302  };
303
304  auto getSqrtSrc = [=](const MachineInstr &MI) -> MachineInstr * {
305    if (!MI.getFlag(MachineInstr::FmContract))
306      return nullptr;
307    MachineInstr *SqrtSrcMI = nullptr;
308    auto Match =
309        mi_match(MI.getOperand(0).getReg(), MRI, m_GFSqrt(m_MInstr(SqrtSrcMI)));
310    (void)Match;
311    return SqrtSrcMI;
312  };
313
314  MachineInstr *RcpSrcMI = nullptr, *SqrtSrcMI = nullptr;
315  // rcp(sqrt(x))
316  if ((RcpSrcMI = getRcpSrc(MI)) && (SqrtSrcMI = getSqrtSrc(*RcpSrcMI))) {
317    MatchInfo = [SqrtSrcMI, &MI](MachineIRBuilder &B) {
318      B.buildIntrinsic(Intrinsic::amdgcn_rsq, {MI.getOperand(0)})
319          .addUse(SqrtSrcMI->getOperand(0).getReg())
320          .setMIFlags(MI.getFlags());
321    };
322    return true;
323  }
324
325  // sqrt(rcp(x))
326  if ((SqrtSrcMI = getSqrtSrc(MI)) && (RcpSrcMI = getRcpSrc(*SqrtSrcMI))) {
327    MatchInfo = [RcpSrcMI, &MI](MachineIRBuilder &B) {
328      B.buildIntrinsic(Intrinsic::amdgcn_rsq, {MI.getOperand(0)})
329          .addUse(RcpSrcMI->getOperand(0).getReg())
330          .setMIFlags(MI.getFlags());
331    };
332    return true;
333  }
334  return false;
335}
336
337bool AMDGPUPostLegalizerCombinerImpl::matchCvtF32UByteN(
338    MachineInstr &MI, CvtF32UByteMatchInfo &MatchInfo) const {
339  Register SrcReg = MI.getOperand(1).getReg();
340
341  // Look through G_ZEXT.
342  bool IsShr = mi_match(SrcReg, MRI, m_GZExt(m_Reg(SrcReg)));
343
344  Register Src0;
345  int64_t ShiftAmt;
346  IsShr = mi_match(SrcReg, MRI, m_GLShr(m_Reg(Src0), m_ICst(ShiftAmt)));
347  if (IsShr || mi_match(SrcReg, MRI, m_GShl(m_Reg(Src0), m_ICst(ShiftAmt)))) {
348    const unsigned Offset = MI.getOpcode() - AMDGPU::G_AMDGPU_CVT_F32_UBYTE0;
349
350    unsigned ShiftOffset = 8 * Offset;
351    if (IsShr)
352      ShiftOffset += ShiftAmt;
353    else
354      ShiftOffset -= ShiftAmt;
355
356    MatchInfo.CvtVal = Src0;
357    MatchInfo.ShiftOffset = ShiftOffset;
358    return ShiftOffset < 32 && ShiftOffset >= 8 && (ShiftOffset % 8) == 0;
359  }
360
361  // TODO: Simplify demanded bits.
362  return false;
363}
364
365void AMDGPUPostLegalizerCombinerImpl::applyCvtF32UByteN(
366    MachineInstr &MI, const CvtF32UByteMatchInfo &MatchInfo) const {
367  B.setInstrAndDebugLoc(MI);
368  unsigned NewOpc = AMDGPU::G_AMDGPU_CVT_F32_UBYTE0 + MatchInfo.ShiftOffset / 8;
369
370  const LLT S32 = LLT::scalar(32);
371  Register CvtSrc = MatchInfo.CvtVal;
372  LLT SrcTy = MRI.getType(MatchInfo.CvtVal);
373  if (SrcTy != S32) {
374    assert(SrcTy.isScalar() && SrcTy.getSizeInBits() >= 8);
375    CvtSrc = B.buildAnyExt(S32, CvtSrc).getReg(0);
376  }
377
378  assert(MI.getOpcode() != NewOpc);
379  B.buildInstr(NewOpc, {MI.getOperand(0)}, {CvtSrc}, MI.getFlags());
380  MI.eraseFromParent();
381}
382
383bool AMDGPUPostLegalizerCombinerImpl::matchRemoveFcanonicalize(
384    MachineInstr &MI, Register &Reg) const {
385  const SITargetLowering *TLI = static_cast<const SITargetLowering *>(
386      MF.getSubtarget().getTargetLowering());
387  Reg = MI.getOperand(1).getReg();
388  return TLI->isCanonicalized(Reg, MF);
389}
390
391// The buffer_load_{i8, i16} intrinsics are intially lowered as buffer_load_{u8,
392// u16} instructions. Here, the buffer_load_{u8, u16} instructions are combined
393// with sign extension instrucions in order to generate buffer_load_{i8, i16}
394// instructions.
395
396// Identify buffer_load_{u8, u16}.
397bool AMDGPUPostLegalizerCombinerImpl::matchCombineSignExtendInReg(
398    MachineInstr &MI, std::pair<MachineInstr *, unsigned> &MatchData) const {
399  Register LoadReg = MI.getOperand(1).getReg();
400  if (!MRI.hasOneNonDBGUse(LoadReg))
401    return false;
402
403  // Check if the first operand of the sign extension is a subword buffer load
404  // instruction.
405  MachineInstr *LoadMI = MRI.getVRegDef(LoadReg);
406  int64_t Width = MI.getOperand(2).getImm();
407  switch (LoadMI->getOpcode()) {
408  case AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE:
409    MatchData = {LoadMI, AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE};
410    return Width == 8;
411  case AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT:
412    MatchData = {LoadMI, AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT};
413    return Width == 16;
414  case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_UBYTE:
415    MatchData = {LoadMI, AMDGPU::G_AMDGPU_S_BUFFER_LOAD_SBYTE};
416    return Width == 8;
417  case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_USHORT:
418    MatchData = {LoadMI, AMDGPU::G_AMDGPU_S_BUFFER_LOAD_SSHORT};
419    return Width == 16;
420  }
421  return false;
422}
423
424// Combine buffer_load_{u8, u16} and the sign extension instruction to generate
425// buffer_load_{i8, i16}.
426void AMDGPUPostLegalizerCombinerImpl::applyCombineSignExtendInReg(
427    MachineInstr &MI, std::pair<MachineInstr *, unsigned> &MatchData) const {
428  auto [LoadMI, NewOpcode] = MatchData;
429  LoadMI->setDesc(TII.get(NewOpcode));
430  // Update the destination register of the load with the destination register
431  // of the sign extension.
432  Register SignExtendInsnDst = MI.getOperand(0).getReg();
433  LoadMI->getOperand(0).setReg(SignExtendInsnDst);
434  // Remove the sign extension.
435  MI.eraseFromParent();
436}
437
438bool AMDGPUPostLegalizerCombinerImpl::matchCombine_s_mul_u64(
439    MachineInstr &MI, unsigned &NewOpcode) const {
440  Register Src0 = MI.getOperand(1).getReg();
441  Register Src1 = MI.getOperand(2).getReg();
442  if (MRI.getType(Src0) != LLT::scalar(64))
443    return false;
444
445  if (KB->getKnownBits(Src1).countMinLeadingZeros() >= 32 &&
446      KB->getKnownBits(Src0).countMinLeadingZeros() >= 32) {
447    NewOpcode = AMDGPU::G_AMDGPU_S_MUL_U64_U32;
448    return true;
449  }
450
451  if (KB->computeNumSignBits(Src1) >= 33 &&
452      KB->computeNumSignBits(Src0) >= 33) {
453    NewOpcode = AMDGPU::G_AMDGPU_S_MUL_I64_I32;
454    return true;
455  }
456  return false;
457}
458
459void AMDGPUPostLegalizerCombinerImpl::applyCombine_s_mul_u64(
460    MachineInstr &MI, unsigned &NewOpcode) const {
461  Helper.replaceOpcodeWith(MI, NewOpcode);
462}
463
464// Pass boilerplate
465// ================
466
467class AMDGPUPostLegalizerCombiner : public MachineFunctionPass {
468public:
469  static char ID;
470
471  AMDGPUPostLegalizerCombiner(bool IsOptNone = false);
472
473  StringRef getPassName() const override {
474    return "AMDGPUPostLegalizerCombiner";
475  }
476
477  bool runOnMachineFunction(MachineFunction &MF) override;
478
479  void getAnalysisUsage(AnalysisUsage &AU) const override;
480
481private:
482  bool IsOptNone;
483  AMDGPUPostLegalizerCombinerImplRuleConfig RuleConfig;
484};
485} // end anonymous namespace
486
487void AMDGPUPostLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
488  AU.addRequired<TargetPassConfig>();
489  AU.setPreservesCFG();
490  getSelectionDAGFallbackAnalysisUsage(AU);
491  AU.addRequired<GISelKnownBitsAnalysis>();
492  AU.addPreserved<GISelKnownBitsAnalysis>();
493  if (!IsOptNone) {
494    AU.addRequired<MachineDominatorTree>();
495    AU.addPreserved<MachineDominatorTree>();
496  }
497  MachineFunctionPass::getAnalysisUsage(AU);
498}
499
500AMDGPUPostLegalizerCombiner::AMDGPUPostLegalizerCombiner(bool IsOptNone)
501    : MachineFunctionPass(ID), IsOptNone(IsOptNone) {
502  initializeAMDGPUPostLegalizerCombinerPass(*PassRegistry::getPassRegistry());
503
504  if (!RuleConfig.parseCommandLineOption())
505    report_fatal_error("Invalid rule identifier");
506}
507
508bool AMDGPUPostLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
509  if (MF.getProperties().hasProperty(
510          MachineFunctionProperties::Property::FailedISel))
511    return false;
512  auto *TPC = &getAnalysis<TargetPassConfig>();
513  const Function &F = MF.getFunction();
514  bool EnableOpt =
515      MF.getTarget().getOptLevel() != CodeGenOptLevel::None && !skipFunction(F);
516
517  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
518  const AMDGPULegalizerInfo *LI =
519      static_cast<const AMDGPULegalizerInfo *>(ST.getLegalizerInfo());
520
521  GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
522  MachineDominatorTree *MDT =
523      IsOptNone ? nullptr : &getAnalysis<MachineDominatorTree>();
524
525  CombinerInfo CInfo(/*AllowIllegalOps*/ false, /*ShouldLegalizeIllegal*/ true,
526                     LI, EnableOpt, F.hasOptSize(), F.hasMinSize());
527
528  AMDGPUPostLegalizerCombinerImpl Impl(MF, CInfo, TPC, *KB, /*CSEInfo*/ nullptr,
529                                       RuleConfig, ST, MDT, LI);
530  return Impl.combineMachineInstrs();
531}
532
533char AMDGPUPostLegalizerCombiner::ID = 0;
534INITIALIZE_PASS_BEGIN(AMDGPUPostLegalizerCombiner, DEBUG_TYPE,
535                      "Combine AMDGPU machine instrs after legalization", false,
536                      false)
537INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
538INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis)
539INITIALIZE_PASS_END(AMDGPUPostLegalizerCombiner, DEBUG_TYPE,
540                    "Combine AMDGPU machine instrs after legalization", false,
541                    false)
542
543namespace llvm {
544FunctionPass *createAMDGPUPostLegalizeCombiner(bool IsOptNone) {
545  return new AMDGPUPostLegalizerCombiner(IsOptNone);
546}
547} // end namespace llvm
548