1//===-- lib/CodeGen/GlobalISel/GICombinerHelper.cpp -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
9#include "llvm/CodeGen/GlobalISel/Combiner.h"
10#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
11#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
12#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
13#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
14#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
15#include "llvm/CodeGen/GlobalISel/Utils.h"
16#include "llvm/CodeGen/MachineDominators.h"
17#include "llvm/CodeGen/MachineFrameInfo.h"
18#include "llvm/CodeGen/MachineInstr.h"
19#include "llvm/CodeGen/MachineRegisterInfo.h"
20#include "llvm/CodeGen/TargetInstrInfo.h"
21#include "llvm/CodeGen/TargetLowering.h"
22#include "llvm/Support/MathExtras.h"
23#include "llvm/Target/TargetMachine.h"
24
25#define DEBUG_TYPE "gi-combiner"
26
27using namespace llvm;
28using namespace MIPatternMatch;
29
30// Option to allow testing of the combiner while no targets know about indexed
31// addressing.
32static cl::opt<bool>
33    ForceLegalIndexing("force-legal-indexing", cl::Hidden, cl::init(false),
34                       cl::desc("Force all indexed operations to be "
35                                "legal for the GlobalISel combiner"));
36
37
38CombinerHelper::CombinerHelper(GISelChangeObserver &Observer,
39                               MachineIRBuilder &B, GISelKnownBits *KB,
40                               MachineDominatorTree *MDT,
41                               const LegalizerInfo *LI)
42    : Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer),
43      KB(KB), MDT(MDT), LI(LI) {
44  (void)this->KB;
45}
46
47void CombinerHelper::replaceRegWith(MachineRegisterInfo &MRI, Register FromReg,
48                                    Register ToReg) const {
49  Observer.changingAllUsesOfReg(MRI, FromReg);
50
51  if (MRI.constrainRegAttrs(ToReg, FromReg))
52    MRI.replaceRegWith(FromReg, ToReg);
53  else
54    Builder.buildCopy(ToReg, FromReg);
55
56  Observer.finishedChangingAllUsesOfReg();
57}
58
59void CombinerHelper::replaceRegOpWith(MachineRegisterInfo &MRI,
60                                      MachineOperand &FromRegOp,
61                                      Register ToReg) const {
62  assert(FromRegOp.getParent() && "Expected an operand in an MI");
63  Observer.changingInstr(*FromRegOp.getParent());
64
65  FromRegOp.setReg(ToReg);
66
67  Observer.changedInstr(*FromRegOp.getParent());
68}
69
70bool CombinerHelper::tryCombineCopy(MachineInstr &MI) {
71  if (matchCombineCopy(MI)) {
72    applyCombineCopy(MI);
73    return true;
74  }
75  return false;
76}
77bool CombinerHelper::matchCombineCopy(MachineInstr &MI) {
78  if (MI.getOpcode() != TargetOpcode::COPY)
79    return false;
80  Register DstReg = MI.getOperand(0).getReg();
81  Register SrcReg = MI.getOperand(1).getReg();
82  return canReplaceReg(DstReg, SrcReg, MRI);
83}
84void CombinerHelper::applyCombineCopy(MachineInstr &MI) {
85  Register DstReg = MI.getOperand(0).getReg();
86  Register SrcReg = MI.getOperand(1).getReg();
87  MI.eraseFromParent();
88  replaceRegWith(MRI, DstReg, SrcReg);
89}
90
91bool CombinerHelper::tryCombineConcatVectors(MachineInstr &MI) {
92  bool IsUndef = false;
93  SmallVector<Register, 4> Ops;
94  if (matchCombineConcatVectors(MI, IsUndef, Ops)) {
95    applyCombineConcatVectors(MI, IsUndef, Ops);
96    return true;
97  }
98  return false;
99}
100
101bool CombinerHelper::matchCombineConcatVectors(MachineInstr &MI, bool &IsUndef,
102                                               SmallVectorImpl<Register> &Ops) {
103  assert(MI.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
104         "Invalid instruction");
105  IsUndef = true;
106  MachineInstr *Undef = nullptr;
107
108  // Walk over all the operands of concat vectors and check if they are
109  // build_vector themselves or undef.
110  // Then collect their operands in Ops.
111  for (const MachineOperand &MO : MI.uses()) {
112    Register Reg = MO.getReg();
113    MachineInstr *Def = MRI.getVRegDef(Reg);
114    assert(Def && "Operand not defined");
115    switch (Def->getOpcode()) {
116    case TargetOpcode::G_BUILD_VECTOR:
117      IsUndef = false;
118      // Remember the operands of the build_vector to fold
119      // them into the yet-to-build flattened concat vectors.
120      for (const MachineOperand &BuildVecMO : Def->uses())
121        Ops.push_back(BuildVecMO.getReg());
122      break;
123    case TargetOpcode::G_IMPLICIT_DEF: {
124      LLT OpType = MRI.getType(Reg);
125      // Keep one undef value for all the undef operands.
126      if (!Undef) {
127        Builder.setInsertPt(*MI.getParent(), MI);
128        Undef = Builder.buildUndef(OpType.getScalarType());
129      }
130      assert(MRI.getType(Undef->getOperand(0).getReg()) ==
131                 OpType.getScalarType() &&
132             "All undefs should have the same type");
133      // Break the undef vector in as many scalar elements as needed
134      // for the flattening.
135      for (unsigned EltIdx = 0, EltEnd = OpType.getNumElements();
136           EltIdx != EltEnd; ++EltIdx)
137        Ops.push_back(Undef->getOperand(0).getReg());
138      break;
139    }
140    default:
141      return false;
142    }
143  }
144  return true;
145}
146void CombinerHelper::applyCombineConcatVectors(
147    MachineInstr &MI, bool IsUndef, const ArrayRef<Register> Ops) {
148  // We determined that the concat_vectors can be flatten.
149  // Generate the flattened build_vector.
150  Register DstReg = MI.getOperand(0).getReg();
151  Builder.setInsertPt(*MI.getParent(), MI);
152  Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
153
154  // Note: IsUndef is sort of redundant. We could have determine it by
155  // checking that at all Ops are undef.  Alternatively, we could have
156  // generate a build_vector of undefs and rely on another combine to
157  // clean that up.  For now, given we already gather this information
158  // in tryCombineConcatVectors, just save compile time and issue the
159  // right thing.
160  if (IsUndef)
161    Builder.buildUndef(NewDstReg);
162  else
163    Builder.buildBuildVector(NewDstReg, Ops);
164  MI.eraseFromParent();
165  replaceRegWith(MRI, DstReg, NewDstReg);
166}
167
168bool CombinerHelper::tryCombineShuffleVector(MachineInstr &MI) {
169  SmallVector<Register, 4> Ops;
170  if (matchCombineShuffleVector(MI, Ops)) {
171    applyCombineShuffleVector(MI, Ops);
172    return true;
173  }
174  return false;
175}
176
177bool CombinerHelper::matchCombineShuffleVector(MachineInstr &MI,
178                                               SmallVectorImpl<Register> &Ops) {
179  assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR &&
180         "Invalid instruction kind");
181  LLT DstType = MRI.getType(MI.getOperand(0).getReg());
182  Register Src1 = MI.getOperand(1).getReg();
183  LLT SrcType = MRI.getType(Src1);
184  // As bizarre as it may look, shuffle vector can actually produce
185  // scalar! This is because at the IR level a <1 x ty> shuffle
186  // vector is perfectly valid.
187  unsigned DstNumElts = DstType.isVector() ? DstType.getNumElements() : 1;
188  unsigned SrcNumElts = SrcType.isVector() ? SrcType.getNumElements() : 1;
189
190  // If the resulting vector is smaller than the size of the source
191  // vectors being concatenated, we won't be able to replace the
192  // shuffle vector into a concat_vectors.
193  //
194  // Note: We may still be able to produce a concat_vectors fed by
195  //       extract_vector_elt and so on. It is less clear that would
196  //       be better though, so don't bother for now.
197  //
198  // If the destination is a scalar, the size of the sources doesn't
199  // matter. we will lower the shuffle to a plain copy. This will
200  // work only if the source and destination have the same size. But
201  // that's covered by the next condition.
202  //
203  // TODO: If the size between the source and destination don't match
204  //       we could still emit an extract vector element in that case.
205  if (DstNumElts < 2 * SrcNumElts && DstNumElts != 1)
206    return false;
207
208  // Check that the shuffle mask can be broken evenly between the
209  // different sources.
210  if (DstNumElts % SrcNumElts != 0)
211    return false;
212
213  // Mask length is a multiple of the source vector length.
214  // Check if the shuffle is some kind of concatenation of the input
215  // vectors.
216  unsigned NumConcat = DstNumElts / SrcNumElts;
217  SmallVector<int, 8> ConcatSrcs(NumConcat, -1);
218  ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
219  for (unsigned i = 0; i != DstNumElts; ++i) {
220    int Idx = Mask[i];
221    // Undef value.
222    if (Idx < 0)
223      continue;
224    // Ensure the indices in each SrcType sized piece are sequential and that
225    // the same source is used for the whole piece.
226    if ((Idx % SrcNumElts != (i % SrcNumElts)) ||
227        (ConcatSrcs[i / SrcNumElts] >= 0 &&
228         ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts)))
229      return false;
230    // Remember which source this index came from.
231    ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts;
232  }
233
234  // The shuffle is concatenating multiple vectors together.
235  // Collect the different operands for that.
236  Register UndefReg;
237  Register Src2 = MI.getOperand(2).getReg();
238  for (auto Src : ConcatSrcs) {
239    if (Src < 0) {
240      if (!UndefReg) {
241        Builder.setInsertPt(*MI.getParent(), MI);
242        UndefReg = Builder.buildUndef(SrcType).getReg(0);
243      }
244      Ops.push_back(UndefReg);
245    } else if (Src == 0)
246      Ops.push_back(Src1);
247    else
248      Ops.push_back(Src2);
249  }
250  return true;
251}
252
253void CombinerHelper::applyCombineShuffleVector(MachineInstr &MI,
254                                               const ArrayRef<Register> Ops) {
255  Register DstReg = MI.getOperand(0).getReg();
256  Builder.setInsertPt(*MI.getParent(), MI);
257  Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
258
259  if (Ops.size() == 1)
260    Builder.buildCopy(NewDstReg, Ops[0]);
261  else
262    Builder.buildMerge(NewDstReg, Ops);
263
264  MI.eraseFromParent();
265  replaceRegWith(MRI, DstReg, NewDstReg);
266}
267
268namespace {
269
270/// Select a preference between two uses. CurrentUse is the current preference
271/// while *ForCandidate is attributes of the candidate under consideration.
272PreferredTuple ChoosePreferredUse(PreferredTuple &CurrentUse,
273                                  const LLT TyForCandidate,
274                                  unsigned OpcodeForCandidate,
275                                  MachineInstr *MIForCandidate) {
276  if (!CurrentUse.Ty.isValid()) {
277    if (CurrentUse.ExtendOpcode == OpcodeForCandidate ||
278        CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT)
279      return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
280    return CurrentUse;
281  }
282
283  // We permit the extend to hoist through basic blocks but this is only
284  // sensible if the target has extending loads. If you end up lowering back
285  // into a load and extend during the legalizer then the end result is
286  // hoisting the extend up to the load.
287
288  // Prefer defined extensions to undefined extensions as these are more
289  // likely to reduce the number of instructions.
290  if (OpcodeForCandidate == TargetOpcode::G_ANYEXT &&
291      CurrentUse.ExtendOpcode != TargetOpcode::G_ANYEXT)
292    return CurrentUse;
293  else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT &&
294           OpcodeForCandidate != TargetOpcode::G_ANYEXT)
295    return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
296
297  // Prefer sign extensions to zero extensions as sign-extensions tend to be
298  // more expensive.
299  if (CurrentUse.Ty == TyForCandidate) {
300    if (CurrentUse.ExtendOpcode == TargetOpcode::G_SEXT &&
301        OpcodeForCandidate == TargetOpcode::G_ZEXT)
302      return CurrentUse;
303    else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ZEXT &&
304             OpcodeForCandidate == TargetOpcode::G_SEXT)
305      return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
306  }
307
308  // This is potentially target specific. We've chosen the largest type
309  // because G_TRUNC is usually free. One potential catch with this is that
310  // some targets have a reduced number of larger registers than smaller
311  // registers and this choice potentially increases the live-range for the
312  // larger value.
313  if (TyForCandidate.getSizeInBits() > CurrentUse.Ty.getSizeInBits()) {
314    return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
315  }
316  return CurrentUse;
317}
318
319/// Find a suitable place to insert some instructions and insert them. This
320/// function accounts for special cases like inserting before a PHI node.
321/// The current strategy for inserting before PHI's is to duplicate the
322/// instructions for each predecessor. However, while that's ok for G_TRUNC
323/// on most targets since it generally requires no code, other targets/cases may
324/// want to try harder to find a dominating block.
325static void InsertInsnsWithoutSideEffectsBeforeUse(
326    MachineIRBuilder &Builder, MachineInstr &DefMI, MachineOperand &UseMO,
327    std::function<void(MachineBasicBlock *, MachineBasicBlock::iterator,
328                       MachineOperand &UseMO)>
329        Inserter) {
330  MachineInstr &UseMI = *UseMO.getParent();
331
332  MachineBasicBlock *InsertBB = UseMI.getParent();
333
334  // If the use is a PHI then we want the predecessor block instead.
335  if (UseMI.isPHI()) {
336    MachineOperand *PredBB = std::next(&UseMO);
337    InsertBB = PredBB->getMBB();
338  }
339
340  // If the block is the same block as the def then we want to insert just after
341  // the def instead of at the start of the block.
342  if (InsertBB == DefMI.getParent()) {
343    MachineBasicBlock::iterator InsertPt = &DefMI;
344    Inserter(InsertBB, std::next(InsertPt), UseMO);
345    return;
346  }
347
348  // Otherwise we want the start of the BB
349  Inserter(InsertBB, InsertBB->getFirstNonPHI(), UseMO);
350}
351} // end anonymous namespace
352
353bool CombinerHelper::tryCombineExtendingLoads(MachineInstr &MI) {
354  PreferredTuple Preferred;
355  if (matchCombineExtendingLoads(MI, Preferred)) {
356    applyCombineExtendingLoads(MI, Preferred);
357    return true;
358  }
359  return false;
360}
361
362bool CombinerHelper::matchCombineExtendingLoads(MachineInstr &MI,
363                                                PreferredTuple &Preferred) {
364  // We match the loads and follow the uses to the extend instead of matching
365  // the extends and following the def to the load. This is because the load
366  // must remain in the same position for correctness (unless we also add code
367  // to find a safe place to sink it) whereas the extend is freely movable.
368  // It also prevents us from duplicating the load for the volatile case or just
369  // for performance.
370
371  if (MI.getOpcode() != TargetOpcode::G_LOAD &&
372      MI.getOpcode() != TargetOpcode::G_SEXTLOAD &&
373      MI.getOpcode() != TargetOpcode::G_ZEXTLOAD)
374    return false;
375
376  auto &LoadValue = MI.getOperand(0);
377  assert(LoadValue.isReg() && "Result wasn't a register?");
378
379  LLT LoadValueTy = MRI.getType(LoadValue.getReg());
380  if (!LoadValueTy.isScalar())
381    return false;
382
383  // Most architectures are going to legalize <s8 loads into at least a 1 byte
384  // load, and the MMOs can only describe memory accesses in multiples of bytes.
385  // If we try to perform extload combining on those, we can end up with
386  // %a(s8) = extload %ptr (load 1 byte from %ptr)
387  // ... which is an illegal extload instruction.
388  if (LoadValueTy.getSizeInBits() < 8)
389    return false;
390
391  // For non power-of-2 types, they will very likely be legalized into multiple
392  // loads. Don't bother trying to match them into extending loads.
393  if (!isPowerOf2_32(LoadValueTy.getSizeInBits()))
394    return false;
395
396  // Find the preferred type aside from the any-extends (unless it's the only
397  // one) and non-extending ops. We'll emit an extending load to that type and
398  // and emit a variant of (extend (trunc X)) for the others according to the
399  // relative type sizes. At the same time, pick an extend to use based on the
400  // extend involved in the chosen type.
401  unsigned PreferredOpcode = MI.getOpcode() == TargetOpcode::G_LOAD
402                                 ? TargetOpcode::G_ANYEXT
403                                 : MI.getOpcode() == TargetOpcode::G_SEXTLOAD
404                                       ? TargetOpcode::G_SEXT
405                                       : TargetOpcode::G_ZEXT;
406  Preferred = {LLT(), PreferredOpcode, nullptr};
407  for (auto &UseMI : MRI.use_nodbg_instructions(LoadValue.getReg())) {
408    if (UseMI.getOpcode() == TargetOpcode::G_SEXT ||
409        UseMI.getOpcode() == TargetOpcode::G_ZEXT ||
410        (UseMI.getOpcode() == TargetOpcode::G_ANYEXT)) {
411      // Check for legality.
412      if (LI) {
413        LegalityQuery::MemDesc MMDesc;
414        const auto &MMO = **MI.memoperands_begin();
415        MMDesc.SizeInBits = MMO.getSizeInBits();
416        MMDesc.AlignInBits = MMO.getAlign().value() * 8;
417        MMDesc.Ordering = MMO.getOrdering();
418        LLT UseTy = MRI.getType(UseMI.getOperand(0).getReg());
419        LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
420        if (LI->getAction({MI.getOpcode(), {UseTy, SrcTy}, {MMDesc}}).Action !=
421            LegalizeActions::Legal)
422          continue;
423      }
424      Preferred = ChoosePreferredUse(Preferred,
425                                     MRI.getType(UseMI.getOperand(0).getReg()),
426                                     UseMI.getOpcode(), &UseMI);
427    }
428  }
429
430  // There were no extends
431  if (!Preferred.MI)
432    return false;
433  // It should be impossible to chose an extend without selecting a different
434  // type since by definition the result of an extend is larger.
435  assert(Preferred.Ty != LoadValueTy && "Extending to same type?");
436
437  LLVM_DEBUG(dbgs() << "Preferred use is: " << *Preferred.MI);
438  return true;
439}
440
441void CombinerHelper::applyCombineExtendingLoads(MachineInstr &MI,
442                                                PreferredTuple &Preferred) {
443  // Rewrite the load to the chosen extending load.
444  Register ChosenDstReg = Preferred.MI->getOperand(0).getReg();
445
446  // Inserter to insert a truncate back to the original type at a given point
447  // with some basic CSE to limit truncate duplication to one per BB.
448  DenseMap<MachineBasicBlock *, MachineInstr *> EmittedInsns;
449  auto InsertTruncAt = [&](MachineBasicBlock *InsertIntoBB,
450                           MachineBasicBlock::iterator InsertBefore,
451                           MachineOperand &UseMO) {
452    MachineInstr *PreviouslyEmitted = EmittedInsns.lookup(InsertIntoBB);
453    if (PreviouslyEmitted) {
454      Observer.changingInstr(*UseMO.getParent());
455      UseMO.setReg(PreviouslyEmitted->getOperand(0).getReg());
456      Observer.changedInstr(*UseMO.getParent());
457      return;
458    }
459
460    Builder.setInsertPt(*InsertIntoBB, InsertBefore);
461    Register NewDstReg = MRI.cloneVirtualRegister(MI.getOperand(0).getReg());
462    MachineInstr *NewMI = Builder.buildTrunc(NewDstReg, ChosenDstReg);
463    EmittedInsns[InsertIntoBB] = NewMI;
464    replaceRegOpWith(MRI, UseMO, NewDstReg);
465  };
466
467  Observer.changingInstr(MI);
468  MI.setDesc(
469      Builder.getTII().get(Preferred.ExtendOpcode == TargetOpcode::G_SEXT
470                               ? TargetOpcode::G_SEXTLOAD
471                               : Preferred.ExtendOpcode == TargetOpcode::G_ZEXT
472                                     ? TargetOpcode::G_ZEXTLOAD
473                                     : TargetOpcode::G_LOAD));
474
475  // Rewrite all the uses to fix up the types.
476  auto &LoadValue = MI.getOperand(0);
477  SmallVector<MachineOperand *, 4> Uses;
478  for (auto &UseMO : MRI.use_operands(LoadValue.getReg()))
479    Uses.push_back(&UseMO);
480
481  for (auto *UseMO : Uses) {
482    MachineInstr *UseMI = UseMO->getParent();
483
484    // If the extend is compatible with the preferred extend then we should fix
485    // up the type and extend so that it uses the preferred use.
486    if (UseMI->getOpcode() == Preferred.ExtendOpcode ||
487        UseMI->getOpcode() == TargetOpcode::G_ANYEXT) {
488      Register UseDstReg = UseMI->getOperand(0).getReg();
489      MachineOperand &UseSrcMO = UseMI->getOperand(1);
490      const LLT UseDstTy = MRI.getType(UseDstReg);
491      if (UseDstReg != ChosenDstReg) {
492        if (Preferred.Ty == UseDstTy) {
493          // If the use has the same type as the preferred use, then merge
494          // the vregs and erase the extend. For example:
495          //    %1:_(s8) = G_LOAD ...
496          //    %2:_(s32) = G_SEXT %1(s8)
497          //    %3:_(s32) = G_ANYEXT %1(s8)
498          //    ... = ... %3(s32)
499          // rewrites to:
500          //    %2:_(s32) = G_SEXTLOAD ...
501          //    ... = ... %2(s32)
502          replaceRegWith(MRI, UseDstReg, ChosenDstReg);
503          Observer.erasingInstr(*UseMO->getParent());
504          UseMO->getParent()->eraseFromParent();
505        } else if (Preferred.Ty.getSizeInBits() < UseDstTy.getSizeInBits()) {
506          // If the preferred size is smaller, then keep the extend but extend
507          // from the result of the extending load. For example:
508          //    %1:_(s8) = G_LOAD ...
509          //    %2:_(s32) = G_SEXT %1(s8)
510          //    %3:_(s64) = G_ANYEXT %1(s8)
511          //    ... = ... %3(s64)
512          /// rewrites to:
513          //    %2:_(s32) = G_SEXTLOAD ...
514          //    %3:_(s64) = G_ANYEXT %2:_(s32)
515          //    ... = ... %3(s64)
516          replaceRegOpWith(MRI, UseSrcMO, ChosenDstReg);
517        } else {
518          // If the preferred size is large, then insert a truncate. For
519          // example:
520          //    %1:_(s8) = G_LOAD ...
521          //    %2:_(s64) = G_SEXT %1(s8)
522          //    %3:_(s32) = G_ZEXT %1(s8)
523          //    ... = ... %3(s32)
524          /// rewrites to:
525          //    %2:_(s64) = G_SEXTLOAD ...
526          //    %4:_(s8) = G_TRUNC %2:_(s32)
527          //    %3:_(s64) = G_ZEXT %2:_(s8)
528          //    ... = ... %3(s64)
529          InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO,
530                                                 InsertTruncAt);
531        }
532        continue;
533      }
534      // The use is (one of) the uses of the preferred use we chose earlier.
535      // We're going to update the load to def this value later so just erase
536      // the old extend.
537      Observer.erasingInstr(*UseMO->getParent());
538      UseMO->getParent()->eraseFromParent();
539      continue;
540    }
541
542    // The use isn't an extend. Truncate back to the type we originally loaded.
543    // This is free on many targets.
544    InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO, InsertTruncAt);
545  }
546
547  MI.getOperand(0).setReg(ChosenDstReg);
548  Observer.changedInstr(MI);
549}
550
551bool CombinerHelper::isPredecessor(const MachineInstr &DefMI,
552                                   const MachineInstr &UseMI) {
553  assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() &&
554         "shouldn't consider debug uses");
555  assert(DefMI.getParent() == UseMI.getParent());
556  if (&DefMI == &UseMI)
557    return false;
558
559  // Loop through the basic block until we find one of the instructions.
560  MachineBasicBlock::const_iterator I = DefMI.getParent()->begin();
561  for (; &*I != &DefMI && &*I != &UseMI; ++I)
562    return &*I == &DefMI;
563
564  llvm_unreachable("Block must contain instructions");
565}
566
567bool CombinerHelper::dominates(const MachineInstr &DefMI,
568                               const MachineInstr &UseMI) {
569  assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() &&
570         "shouldn't consider debug uses");
571  if (MDT)
572    return MDT->dominates(&DefMI, &UseMI);
573  else if (DefMI.getParent() != UseMI.getParent())
574    return false;
575
576  return isPredecessor(DefMI, UseMI);
577}
578
579bool CombinerHelper::matchSextAlreadyExtended(MachineInstr &MI) {
580  assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
581  Register SrcReg = MI.getOperand(1).getReg();
582  unsigned SrcSignBits = KB->computeNumSignBits(SrcReg);
583  unsigned NumSextBits =
584      MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits() -
585      MI.getOperand(2).getImm();
586  return SrcSignBits >= NumSextBits;
587}
588
589bool CombinerHelper::applySextAlreadyExtended(MachineInstr &MI) {
590  assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
591  MachineIRBuilder MIB(MI);
592  MIB.buildCopy(MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
593  MI.eraseFromParent();
594  return true;
595}
596
597bool CombinerHelper::findPostIndexCandidate(MachineInstr &MI, Register &Addr,
598                                            Register &Base, Register &Offset) {
599  auto &MF = *MI.getParent()->getParent();
600  const auto &TLI = *MF.getSubtarget().getTargetLowering();
601
602#ifndef NDEBUG
603  unsigned Opcode = MI.getOpcode();
604  assert(Opcode == TargetOpcode::G_LOAD || Opcode == TargetOpcode::G_SEXTLOAD ||
605         Opcode == TargetOpcode::G_ZEXTLOAD || Opcode == TargetOpcode::G_STORE);
606#endif
607
608  Base = MI.getOperand(1).getReg();
609  MachineInstr *BaseDef = MRI.getUniqueVRegDef(Base);
610  if (BaseDef && BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
611    return false;
612
613  LLVM_DEBUG(dbgs() << "Searching for post-indexing opportunity for: " << MI);
614
615  for (auto &Use : MRI.use_nodbg_instructions(Base)) {
616    if (Use.getOpcode() != TargetOpcode::G_PTR_ADD)
617      continue;
618
619    Offset = Use.getOperand(2).getReg();
620    if (!ForceLegalIndexing &&
621        !TLI.isIndexingLegal(MI, Base, Offset, /*IsPre*/ false, MRI)) {
622      LLVM_DEBUG(dbgs() << "    Ignoring candidate with illegal addrmode: "
623                        << Use);
624      continue;
625    }
626
627    // Make sure the offset calculation is before the potentially indexed op.
628    // FIXME: we really care about dependency here. The offset calculation might
629    // be movable.
630    MachineInstr *OffsetDef = MRI.getUniqueVRegDef(Offset);
631    if (!OffsetDef || !dominates(*OffsetDef, MI)) {
632      LLVM_DEBUG(dbgs() << "    Ignoring candidate with offset after mem-op: "
633                        << Use);
634      continue;
635    }
636
637    // FIXME: check whether all uses of Base are load/store with foldable
638    // addressing modes. If so, using the normal addr-modes is better than
639    // forming an indexed one.
640
641    bool MemOpDominatesAddrUses = true;
642    for (auto &PtrAddUse :
643         MRI.use_nodbg_instructions(Use.getOperand(0).getReg())) {
644      if (!dominates(MI, PtrAddUse)) {
645        MemOpDominatesAddrUses = false;
646        break;
647      }
648    }
649
650    if (!MemOpDominatesAddrUses) {
651      LLVM_DEBUG(
652          dbgs() << "    Ignoring candidate as memop does not dominate uses: "
653                 << Use);
654      continue;
655    }
656
657    LLVM_DEBUG(dbgs() << "    Found match: " << Use);
658    Addr = Use.getOperand(0).getReg();
659    return true;
660  }
661
662  return false;
663}
664
665bool CombinerHelper::findPreIndexCandidate(MachineInstr &MI, Register &Addr,
666                                           Register &Base, Register &Offset) {
667  auto &MF = *MI.getParent()->getParent();
668  const auto &TLI = *MF.getSubtarget().getTargetLowering();
669
670#ifndef NDEBUG
671  unsigned Opcode = MI.getOpcode();
672  assert(Opcode == TargetOpcode::G_LOAD || Opcode == TargetOpcode::G_SEXTLOAD ||
673         Opcode == TargetOpcode::G_ZEXTLOAD || Opcode == TargetOpcode::G_STORE);
674#endif
675
676  Addr = MI.getOperand(1).getReg();
677  MachineInstr *AddrDef = getOpcodeDef(TargetOpcode::G_PTR_ADD, Addr, MRI);
678  if (!AddrDef || MRI.hasOneNonDBGUse(Addr))
679    return false;
680
681  Base = AddrDef->getOperand(1).getReg();
682  Offset = AddrDef->getOperand(2).getReg();
683
684  LLVM_DEBUG(dbgs() << "Found potential pre-indexed load_store: " << MI);
685
686  if (!ForceLegalIndexing &&
687      !TLI.isIndexingLegal(MI, Base, Offset, /*IsPre*/ true, MRI)) {
688    LLVM_DEBUG(dbgs() << "    Skipping, not legal for target");
689    return false;
690  }
691
692  MachineInstr *BaseDef = getDefIgnoringCopies(Base, MRI);
693  if (BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
694    LLVM_DEBUG(dbgs() << "    Skipping, frame index would need copy anyway.");
695    return false;
696  }
697
698  if (MI.getOpcode() == TargetOpcode::G_STORE) {
699    // Would require a copy.
700    if (Base == MI.getOperand(0).getReg()) {
701      LLVM_DEBUG(dbgs() << "    Skipping, storing base so need copy anyway.");
702      return false;
703    }
704
705    // We're expecting one use of Addr in MI, but it could also be the
706    // value stored, which isn't actually dominated by the instruction.
707    if (MI.getOperand(0).getReg() == Addr) {
708      LLVM_DEBUG(dbgs() << "    Skipping, does not dominate all addr uses");
709      return false;
710    }
711  }
712
713  // FIXME: check whether all uses of the base pointer are constant PtrAdds.
714  // That might allow us to end base's liveness here by adjusting the constant.
715
716  for (auto &UseMI : MRI.use_nodbg_instructions(Addr)) {
717    if (!dominates(MI, UseMI)) {
718      LLVM_DEBUG(dbgs() << "    Skipping, does not dominate all addr uses.");
719      return false;
720    }
721  }
722
723  return true;
724}
725
726bool CombinerHelper::tryCombineIndexedLoadStore(MachineInstr &MI) {
727  IndexedLoadStoreMatchInfo MatchInfo;
728  if (matchCombineIndexedLoadStore(MI, MatchInfo)) {
729    applyCombineIndexedLoadStore(MI, MatchInfo);
730    return true;
731  }
732  return false;
733}
734
735bool CombinerHelper::matchCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) {
736  unsigned Opcode = MI.getOpcode();
737  if (Opcode != TargetOpcode::G_LOAD && Opcode != TargetOpcode::G_SEXTLOAD &&
738      Opcode != TargetOpcode::G_ZEXTLOAD && Opcode != TargetOpcode::G_STORE)
739    return false;
740
741  MatchInfo.IsPre = findPreIndexCandidate(MI, MatchInfo.Addr, MatchInfo.Base,
742                                          MatchInfo.Offset);
743  if (!MatchInfo.IsPre &&
744      !findPostIndexCandidate(MI, MatchInfo.Addr, MatchInfo.Base,
745                              MatchInfo.Offset))
746    return false;
747
748  return true;
749}
750
751void CombinerHelper::applyCombineIndexedLoadStore(
752    MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) {
753  MachineInstr &AddrDef = *MRI.getUniqueVRegDef(MatchInfo.Addr);
754  MachineIRBuilder MIRBuilder(MI);
755  unsigned Opcode = MI.getOpcode();
756  bool IsStore = Opcode == TargetOpcode::G_STORE;
757  unsigned NewOpcode;
758  switch (Opcode) {
759  case TargetOpcode::G_LOAD:
760    NewOpcode = TargetOpcode::G_INDEXED_LOAD;
761    break;
762  case TargetOpcode::G_SEXTLOAD:
763    NewOpcode = TargetOpcode::G_INDEXED_SEXTLOAD;
764    break;
765  case TargetOpcode::G_ZEXTLOAD:
766    NewOpcode = TargetOpcode::G_INDEXED_ZEXTLOAD;
767    break;
768  case TargetOpcode::G_STORE:
769    NewOpcode = TargetOpcode::G_INDEXED_STORE;
770    break;
771  default:
772    llvm_unreachable("Unknown load/store opcode");
773  }
774
775  auto MIB = MIRBuilder.buildInstr(NewOpcode);
776  if (IsStore) {
777    MIB.addDef(MatchInfo.Addr);
778    MIB.addUse(MI.getOperand(0).getReg());
779  } else {
780    MIB.addDef(MI.getOperand(0).getReg());
781    MIB.addDef(MatchInfo.Addr);
782  }
783
784  MIB.addUse(MatchInfo.Base);
785  MIB.addUse(MatchInfo.Offset);
786  MIB.addImm(MatchInfo.IsPre);
787  MI.eraseFromParent();
788  AddrDef.eraseFromParent();
789
790  LLVM_DEBUG(dbgs() << "    Combinined to indexed operation");
791}
792
793bool CombinerHelper::matchElideBrByInvertingCond(MachineInstr &MI) {
794  if (MI.getOpcode() != TargetOpcode::G_BR)
795    return false;
796
797  // Try to match the following:
798  // bb1:
799  //   %c(s32) = G_ICMP pred, %a, %b
800  //   %c1(s1) = G_TRUNC %c(s32)
801  //   G_BRCOND %c1, %bb2
802  //   G_BR %bb3
803  // bb2:
804  // ...
805  // bb3:
806
807  // The above pattern does not have a fall through to the successor bb2, always
808  // resulting in a branch no matter which path is taken. Here we try to find
809  // and replace that pattern with conditional branch to bb3 and otherwise
810  // fallthrough to bb2.
811
812  MachineBasicBlock *MBB = MI.getParent();
813  MachineBasicBlock::iterator BrIt(MI);
814  if (BrIt == MBB->begin())
815    return false;
816  assert(std::next(BrIt) == MBB->end() && "expected G_BR to be a terminator");
817
818  MachineInstr *BrCond = &*std::prev(BrIt);
819  if (BrCond->getOpcode() != TargetOpcode::G_BRCOND)
820    return false;
821
822  // Check that the next block is the conditional branch target.
823  if (!MBB->isLayoutSuccessor(BrCond->getOperand(1).getMBB()))
824    return false;
825
826  MachineInstr *CmpMI = MRI.getVRegDef(BrCond->getOperand(0).getReg());
827  if (!CmpMI || CmpMI->getOpcode() != TargetOpcode::G_ICMP ||
828      !MRI.hasOneNonDBGUse(CmpMI->getOperand(0).getReg()))
829    return false;
830  return true;
831}
832
833bool CombinerHelper::tryElideBrByInvertingCond(MachineInstr &MI) {
834  if (!matchElideBrByInvertingCond(MI))
835    return false;
836  applyElideBrByInvertingCond(MI);
837  return true;
838}
839
840void CombinerHelper::applyElideBrByInvertingCond(MachineInstr &MI) {
841  MachineBasicBlock *BrTarget = MI.getOperand(0).getMBB();
842  MachineBasicBlock::iterator BrIt(MI);
843  MachineInstr *BrCond = &*std::prev(BrIt);
844  MachineInstr *CmpMI = MRI.getVRegDef(BrCond->getOperand(0).getReg());
845
846  CmpInst::Predicate InversePred = CmpInst::getInversePredicate(
847      (CmpInst::Predicate)CmpMI->getOperand(1).getPredicate());
848
849  // Invert the G_ICMP condition.
850  Observer.changingInstr(*CmpMI);
851  CmpMI->getOperand(1).setPredicate(InversePred);
852  Observer.changedInstr(*CmpMI);
853
854  // Change the conditional branch target.
855  Observer.changingInstr(*BrCond);
856  BrCond->getOperand(1).setMBB(BrTarget);
857  Observer.changedInstr(*BrCond);
858  MI.eraseFromParent();
859}
860
861static bool shouldLowerMemFuncForSize(const MachineFunction &MF) {
862  // On Darwin, -Os means optimize for size without hurting performance, so
863  // only really optimize for size when -Oz (MinSize) is used.
864  if (MF.getTarget().getTargetTriple().isOSDarwin())
865    return MF.getFunction().hasMinSize();
866  return MF.getFunction().hasOptSize();
867}
868
869// Returns a list of types to use for memory op lowering in MemOps. A partial
870// port of findOptimalMemOpLowering in TargetLowering.
871static bool findGISelOptimalMemOpLowering(std::vector<LLT> &MemOps,
872                                          unsigned Limit, const MemOp &Op,
873                                          unsigned DstAS, unsigned SrcAS,
874                                          const AttributeList &FuncAttributes,
875                                          const TargetLowering &TLI) {
876  if (Op.isMemcpyWithFixedDstAlign() && Op.getSrcAlign() < Op.getDstAlign())
877    return false;
878
879  LLT Ty = TLI.getOptimalMemOpLLT(Op, FuncAttributes);
880
881  if (Ty == LLT()) {
882    // Use the largest scalar type whose alignment constraints are satisfied.
883    // We only need to check DstAlign here as SrcAlign is always greater or
884    // equal to DstAlign (or zero).
885    Ty = LLT::scalar(64);
886    if (Op.isFixedDstAlign())
887      while (Op.getDstAlign() < Ty.getSizeInBytes() &&
888             !TLI.allowsMisalignedMemoryAccesses(Ty, DstAS, Op.getDstAlign()))
889        Ty = LLT::scalar(Ty.getSizeInBytes());
890    assert(Ty.getSizeInBits() > 0 && "Could not find valid type");
891    // FIXME: check for the largest legal type we can load/store to.
892  }
893
894  unsigned NumMemOps = 0;
895  uint64_t Size = Op.size();
896  while (Size) {
897    unsigned TySize = Ty.getSizeInBytes();
898    while (TySize > Size) {
899      // For now, only use non-vector load / store's for the left-over pieces.
900      LLT NewTy = Ty;
901      // FIXME: check for mem op safety and legality of the types. Not all of
902      // SDAGisms map cleanly to GISel concepts.
903      if (NewTy.isVector())
904        NewTy = NewTy.getSizeInBits() > 64 ? LLT::scalar(64) : LLT::scalar(32);
905      NewTy = LLT::scalar(PowerOf2Floor(NewTy.getSizeInBits() - 1));
906      unsigned NewTySize = NewTy.getSizeInBytes();
907      assert(NewTySize > 0 && "Could not find appropriate type");
908
909      // If the new LLT cannot cover all of the remaining bits, then consider
910      // issuing a (or a pair of) unaligned and overlapping load / store.
911      bool Fast;
912      // Need to get a VT equivalent for allowMisalignedMemoryAccesses().
913      MVT VT = getMVTForLLT(Ty);
914      if (NumMemOps && Op.allowOverlap() && NewTySize < Size &&
915          TLI.allowsMisalignedMemoryAccesses(
916              VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign().value() : 0,
917              MachineMemOperand::MONone, &Fast) &&
918          Fast)
919        TySize = Size;
920      else {
921        Ty = NewTy;
922        TySize = NewTySize;
923      }
924    }
925
926    if (++NumMemOps > Limit)
927      return false;
928
929    MemOps.push_back(Ty);
930    Size -= TySize;
931  }
932
933  return true;
934}
935
936static Type *getTypeForLLT(LLT Ty, LLVMContext &C) {
937  if (Ty.isVector())
938    return FixedVectorType::get(IntegerType::get(C, Ty.getScalarSizeInBits()),
939                                Ty.getNumElements());
940  return IntegerType::get(C, Ty.getSizeInBits());
941}
942
943// Get a vectorized representation of the memset value operand, GISel edition.
944static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB) {
945  MachineRegisterInfo &MRI = *MIB.getMRI();
946  unsigned NumBits = Ty.getScalarSizeInBits();
947  auto ValVRegAndVal = getConstantVRegValWithLookThrough(Val, MRI);
948  if (!Ty.isVector() && ValVRegAndVal) {
949    unsigned KnownVal = ValVRegAndVal->Value;
950    APInt Scalar = APInt(8, KnownVal);
951    APInt SplatVal = APInt::getSplat(NumBits, Scalar);
952    return MIB.buildConstant(Ty, SplatVal).getReg(0);
953  }
954
955  // Extend the byte value to the larger type, and then multiply by a magic
956  // value 0x010101... in order to replicate it across every byte.
957  // Unless it's zero, in which case just emit a larger G_CONSTANT 0.
958  if (ValVRegAndVal && ValVRegAndVal->Value == 0) {
959    return MIB.buildConstant(Ty, 0).getReg(0);
960  }
961
962  LLT ExtType = Ty.getScalarType();
963  auto ZExt = MIB.buildZExtOrTrunc(ExtType, Val);
964  if (NumBits > 8) {
965    APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01));
966    auto MagicMI = MIB.buildConstant(ExtType, Magic);
967    Val = MIB.buildMul(ExtType, ZExt, MagicMI).getReg(0);
968  }
969
970  // For vector types create a G_BUILD_VECTOR.
971  if (Ty.isVector())
972    Val = MIB.buildSplatVector(Ty, Val).getReg(0);
973
974  return Val;
975}
976
977bool CombinerHelper::optimizeMemset(MachineInstr &MI, Register Dst,
978                                    Register Val, unsigned KnownLen,
979                                    Align Alignment, bool IsVolatile) {
980  auto &MF = *MI.getParent()->getParent();
981  const auto &TLI = *MF.getSubtarget().getTargetLowering();
982  auto &DL = MF.getDataLayout();
983  LLVMContext &C = MF.getFunction().getContext();
984
985  assert(KnownLen != 0 && "Have a zero length memset length!");
986
987  bool DstAlignCanChange = false;
988  MachineFrameInfo &MFI = MF.getFrameInfo();
989  bool OptSize = shouldLowerMemFuncForSize(MF);
990
991  MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
992  if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
993    DstAlignCanChange = true;
994
995  unsigned Limit = TLI.getMaxStoresPerMemset(OptSize);
996  std::vector<LLT> MemOps;
997
998  const auto &DstMMO = **MI.memoperands_begin();
999  MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
1000
1001  auto ValVRegAndVal = getConstantVRegValWithLookThrough(Val, MRI);
1002  bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0;
1003
1004  if (!findGISelOptimalMemOpLowering(MemOps, Limit,
1005                                     MemOp::Set(KnownLen, DstAlignCanChange,
1006                                                Alignment,
1007                                                /*IsZeroMemset=*/IsZeroVal,
1008                                                /*IsVolatile=*/IsVolatile),
1009                                     DstPtrInfo.getAddrSpace(), ~0u,
1010                                     MF.getFunction().getAttributes(), TLI))
1011    return false;
1012
1013  if (DstAlignCanChange) {
1014    // Get an estimate of the type from the LLT.
1015    Type *IRTy = getTypeForLLT(MemOps[0], C);
1016    Align NewAlign = DL.getABITypeAlign(IRTy);
1017    if (NewAlign > Alignment) {
1018      Alignment = NewAlign;
1019      unsigned FI = FIDef->getOperand(1).getIndex();
1020      // Give the stack frame object a larger alignment if needed.
1021      if (MFI.getObjectAlign(FI) < Alignment)
1022        MFI.setObjectAlignment(FI, Alignment);
1023    }
1024  }
1025
1026  MachineIRBuilder MIB(MI);
1027  // Find the largest store and generate the bit pattern for it.
1028  LLT LargestTy = MemOps[0];
1029  for (unsigned i = 1; i < MemOps.size(); i++)
1030    if (MemOps[i].getSizeInBits() > LargestTy.getSizeInBits())
1031      LargestTy = MemOps[i];
1032
1033  // The memset stored value is always defined as an s8, so in order to make it
1034  // work with larger store types we need to repeat the bit pattern across the
1035  // wider type.
1036  Register MemSetValue = getMemsetValue(Val, LargestTy, MIB);
1037
1038  if (!MemSetValue)
1039    return false;
1040
1041  // Generate the stores. For each store type in the list, we generate the
1042  // matching store of that type to the destination address.
1043  LLT PtrTy = MRI.getType(Dst);
1044  unsigned DstOff = 0;
1045  unsigned Size = KnownLen;
1046  for (unsigned I = 0; I < MemOps.size(); I++) {
1047    LLT Ty = MemOps[I];
1048    unsigned TySize = Ty.getSizeInBytes();
1049    if (TySize > Size) {
1050      // Issuing an unaligned load / store pair that overlaps with the previous
1051      // pair. Adjust the offset accordingly.
1052      assert(I == MemOps.size() - 1 && I != 0);
1053      DstOff -= TySize - Size;
1054    }
1055
1056    // If this store is smaller than the largest store see whether we can get
1057    // the smaller value for free with a truncate.
1058    Register Value = MemSetValue;
1059    if (Ty.getSizeInBits() < LargestTy.getSizeInBits()) {
1060      MVT VT = getMVTForLLT(Ty);
1061      MVT LargestVT = getMVTForLLT(LargestTy);
1062      if (!LargestTy.isVector() && !Ty.isVector() &&
1063          TLI.isTruncateFree(LargestVT, VT))
1064        Value = MIB.buildTrunc(Ty, MemSetValue).getReg(0);
1065      else
1066        Value = getMemsetValue(Val, Ty, MIB);
1067      if (!Value)
1068        return false;
1069    }
1070
1071    auto *StoreMMO =
1072        MF.getMachineMemOperand(&DstMMO, DstOff, Ty.getSizeInBytes());
1073
1074    Register Ptr = Dst;
1075    if (DstOff != 0) {
1076      auto Offset =
1077          MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), DstOff);
1078      Ptr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
1079    }
1080
1081    MIB.buildStore(Value, Ptr, *StoreMMO);
1082    DstOff += Ty.getSizeInBytes();
1083    Size -= TySize;
1084  }
1085
1086  MI.eraseFromParent();
1087  return true;
1088}
1089
1090bool CombinerHelper::optimizeMemcpy(MachineInstr &MI, Register Dst,
1091                                    Register Src, unsigned KnownLen,
1092                                    Align DstAlign, Align SrcAlign,
1093                                    bool IsVolatile) {
1094  auto &MF = *MI.getParent()->getParent();
1095  const auto &TLI = *MF.getSubtarget().getTargetLowering();
1096  auto &DL = MF.getDataLayout();
1097  LLVMContext &C = MF.getFunction().getContext();
1098
1099  assert(KnownLen != 0 && "Have a zero length memcpy length!");
1100
1101  bool DstAlignCanChange = false;
1102  MachineFrameInfo &MFI = MF.getFrameInfo();
1103  bool OptSize = shouldLowerMemFuncForSize(MF);
1104  Align Alignment = commonAlignment(DstAlign, SrcAlign);
1105
1106  MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
1107  if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
1108    DstAlignCanChange = true;
1109
1110  // FIXME: infer better src pointer alignment like SelectionDAG does here.
1111  // FIXME: also use the equivalent of isMemSrcFromConstant and alwaysinlining
1112  // if the memcpy is in a tail call position.
1113
1114  unsigned Limit = TLI.getMaxStoresPerMemcpy(OptSize);
1115  std::vector<LLT> MemOps;
1116
1117  const auto &DstMMO = **MI.memoperands_begin();
1118  const auto &SrcMMO = **std::next(MI.memoperands_begin());
1119  MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
1120  MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
1121
1122  if (!findGISelOptimalMemOpLowering(
1123          MemOps, Limit,
1124          MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
1125                      IsVolatile),
1126          DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
1127          MF.getFunction().getAttributes(), TLI))
1128    return false;
1129
1130  if (DstAlignCanChange) {
1131    // Get an estimate of the type from the LLT.
1132    Type *IRTy = getTypeForLLT(MemOps[0], C);
1133    Align NewAlign = DL.getABITypeAlign(IRTy);
1134
1135    // Don't promote to an alignment that would require dynamic stack
1136    // realignment.
1137    const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
1138    if (!TRI->needsStackRealignment(MF))
1139      while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign))
1140        NewAlign = NewAlign / 2;
1141
1142    if (NewAlign > Alignment) {
1143      Alignment = NewAlign;
1144      unsigned FI = FIDef->getOperand(1).getIndex();
1145      // Give the stack frame object a larger alignment if needed.
1146      if (MFI.getObjectAlign(FI) < Alignment)
1147        MFI.setObjectAlignment(FI, Alignment);
1148    }
1149  }
1150
1151  LLVM_DEBUG(dbgs() << "Inlining memcpy: " << MI << " into loads & stores\n");
1152
1153  MachineIRBuilder MIB(MI);
1154  // Now we need to emit a pair of load and stores for each of the types we've
1155  // collected. I.e. for each type, generate a load from the source pointer of
1156  // that type width, and then generate a corresponding store to the dest buffer
1157  // of that value loaded. This can result in a sequence of loads and stores
1158  // mixed types, depending on what the target specifies as good types to use.
1159  unsigned CurrOffset = 0;
1160  LLT PtrTy = MRI.getType(Src);
1161  unsigned Size = KnownLen;
1162  for (auto CopyTy : MemOps) {
1163    // Issuing an unaligned load / store pair  that overlaps with the previous
1164    // pair. Adjust the offset accordingly.
1165    if (CopyTy.getSizeInBytes() > Size)
1166      CurrOffset -= CopyTy.getSizeInBytes() - Size;
1167
1168    // Construct MMOs for the accesses.
1169    auto *LoadMMO =
1170        MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
1171    auto *StoreMMO =
1172        MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
1173
1174    // Create the load.
1175    Register LoadPtr = Src;
1176    Register Offset;
1177    if (CurrOffset != 0) {
1178      Offset = MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset)
1179                   .getReg(0);
1180      LoadPtr = MIB.buildPtrAdd(PtrTy, Src, Offset).getReg(0);
1181    }
1182    auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO);
1183
1184    // Create the store.
1185    Register StorePtr =
1186        CurrOffset == 0 ? Dst : MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
1187    MIB.buildStore(LdVal, StorePtr, *StoreMMO);
1188    CurrOffset += CopyTy.getSizeInBytes();
1189    Size -= CopyTy.getSizeInBytes();
1190  }
1191
1192  MI.eraseFromParent();
1193  return true;
1194}
1195
1196bool CombinerHelper::optimizeMemmove(MachineInstr &MI, Register Dst,
1197                                     Register Src, unsigned KnownLen,
1198                                     Align DstAlign, Align SrcAlign,
1199                                     bool IsVolatile) {
1200  auto &MF = *MI.getParent()->getParent();
1201  const auto &TLI = *MF.getSubtarget().getTargetLowering();
1202  auto &DL = MF.getDataLayout();
1203  LLVMContext &C = MF.getFunction().getContext();
1204
1205  assert(KnownLen != 0 && "Have a zero length memmove length!");
1206
1207  bool DstAlignCanChange = false;
1208  MachineFrameInfo &MFI = MF.getFrameInfo();
1209  bool OptSize = shouldLowerMemFuncForSize(MF);
1210  Align Alignment = commonAlignment(DstAlign, SrcAlign);
1211
1212  MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
1213  if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
1214    DstAlignCanChange = true;
1215
1216  unsigned Limit = TLI.getMaxStoresPerMemmove(OptSize);
1217  std::vector<LLT> MemOps;
1218
1219  const auto &DstMMO = **MI.memoperands_begin();
1220  const auto &SrcMMO = **std::next(MI.memoperands_begin());
1221  MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
1222  MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
1223
1224  // FIXME: SelectionDAG always passes false for 'AllowOverlap', apparently due
1225  // to a bug in it's findOptimalMemOpLowering implementation. For now do the
1226  // same thing here.
1227  if (!findGISelOptimalMemOpLowering(
1228          MemOps, Limit,
1229          MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
1230                      /*IsVolatile*/ true),
1231          DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
1232          MF.getFunction().getAttributes(), TLI))
1233    return false;
1234
1235  if (DstAlignCanChange) {
1236    // Get an estimate of the type from the LLT.
1237    Type *IRTy = getTypeForLLT(MemOps[0], C);
1238    Align NewAlign = DL.getABITypeAlign(IRTy);
1239
1240    // Don't promote to an alignment that would require dynamic stack
1241    // realignment.
1242    const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
1243    if (!TRI->needsStackRealignment(MF))
1244      while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign))
1245        NewAlign = NewAlign / 2;
1246
1247    if (NewAlign > Alignment) {
1248      Alignment = NewAlign;
1249      unsigned FI = FIDef->getOperand(1).getIndex();
1250      // Give the stack frame object a larger alignment if needed.
1251      if (MFI.getObjectAlign(FI) < Alignment)
1252        MFI.setObjectAlignment(FI, Alignment);
1253    }
1254  }
1255
1256  LLVM_DEBUG(dbgs() << "Inlining memmove: " << MI << " into loads & stores\n");
1257
1258  MachineIRBuilder MIB(MI);
1259  // Memmove requires that we perform the loads first before issuing the stores.
1260  // Apart from that, this loop is pretty much doing the same thing as the
1261  // memcpy codegen function.
1262  unsigned CurrOffset = 0;
1263  LLT PtrTy = MRI.getType(Src);
1264  SmallVector<Register, 16> LoadVals;
1265  for (auto CopyTy : MemOps) {
1266    // Construct MMO for the load.
1267    auto *LoadMMO =
1268        MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
1269
1270    // Create the load.
1271    Register LoadPtr = Src;
1272    if (CurrOffset != 0) {
1273      auto Offset =
1274          MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset);
1275      LoadPtr = MIB.buildPtrAdd(PtrTy, Src, Offset).getReg(0);
1276    }
1277    LoadVals.push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0));
1278    CurrOffset += CopyTy.getSizeInBytes();
1279  }
1280
1281  CurrOffset = 0;
1282  for (unsigned I = 0; I < MemOps.size(); ++I) {
1283    LLT CopyTy = MemOps[I];
1284    // Now store the values loaded.
1285    auto *StoreMMO =
1286        MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
1287
1288    Register StorePtr = Dst;
1289    if (CurrOffset != 0) {
1290      auto Offset =
1291          MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset);
1292      StorePtr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
1293    }
1294    MIB.buildStore(LoadVals[I], StorePtr, *StoreMMO);
1295    CurrOffset += CopyTy.getSizeInBytes();
1296  }
1297  MI.eraseFromParent();
1298  return true;
1299}
1300
1301bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) {
1302  // This combine is fairly complex so it's not written with a separate
1303  // matcher function.
1304  assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
1305  Intrinsic::ID ID = (Intrinsic::ID)MI.getIntrinsicID();
1306  assert((ID == Intrinsic::memcpy || ID == Intrinsic::memmove ||
1307          ID == Intrinsic::memset) &&
1308         "Expected a memcpy like intrinsic");
1309
1310  auto MMOIt = MI.memoperands_begin();
1311  const MachineMemOperand *MemOp = *MMOIt;
1312  bool IsVolatile = MemOp->isVolatile();
1313  // Don't try to optimize volatile.
1314  if (IsVolatile)
1315    return false;
1316
1317  Align DstAlign = MemOp->getBaseAlign();
1318  Align SrcAlign;
1319  Register Dst = MI.getOperand(1).getReg();
1320  Register Src = MI.getOperand(2).getReg();
1321  Register Len = MI.getOperand(3).getReg();
1322
1323  if (ID != Intrinsic::memset) {
1324    assert(MMOIt != MI.memoperands_end() && "Expected a second MMO on MI");
1325    MemOp = *(++MMOIt);
1326    SrcAlign = MemOp->getBaseAlign();
1327  }
1328
1329  // See if this is a constant length copy
1330  auto LenVRegAndVal = getConstantVRegValWithLookThrough(Len, MRI);
1331  if (!LenVRegAndVal)
1332    return false; // Leave it to the legalizer to lower it to a libcall.
1333  unsigned KnownLen = LenVRegAndVal->Value;
1334
1335  if (KnownLen == 0) {
1336    MI.eraseFromParent();
1337    return true;
1338  }
1339
1340  if (MaxLen && KnownLen > MaxLen)
1341    return false;
1342
1343  if (ID == Intrinsic::memcpy)
1344    return optimizeMemcpy(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
1345  if (ID == Intrinsic::memmove)
1346    return optimizeMemmove(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
1347  if (ID == Intrinsic::memset)
1348    return optimizeMemset(MI, Dst, Src, KnownLen, DstAlign, IsVolatile);
1349  return false;
1350}
1351
1352bool CombinerHelper::matchPtrAddImmedChain(MachineInstr &MI,
1353                                           PtrAddChain &MatchInfo) {
1354  // We're trying to match the following pattern:
1355  //   %t1 = G_PTR_ADD %base, G_CONSTANT imm1
1356  //   %root = G_PTR_ADD %t1, G_CONSTANT imm2
1357  // -->
1358  //   %root = G_PTR_ADD %base, G_CONSTANT (imm1 + imm2)
1359
1360  if (MI.getOpcode() != TargetOpcode::G_PTR_ADD)
1361    return false;
1362
1363  Register Add2 = MI.getOperand(1).getReg();
1364  Register Imm1 = MI.getOperand(2).getReg();
1365  auto MaybeImmVal = getConstantVRegValWithLookThrough(Imm1, MRI);
1366  if (!MaybeImmVal)
1367    return false;
1368
1369  MachineInstr *Add2Def = MRI.getUniqueVRegDef(Add2);
1370  if (!Add2Def || Add2Def->getOpcode() != TargetOpcode::G_PTR_ADD)
1371    return false;
1372
1373  Register Base = Add2Def->getOperand(1).getReg();
1374  Register Imm2 = Add2Def->getOperand(2).getReg();
1375  auto MaybeImm2Val = getConstantVRegValWithLookThrough(Imm2, MRI);
1376  if (!MaybeImm2Val)
1377    return false;
1378
1379  // Pass the combined immediate to the apply function.
1380  MatchInfo.Imm = MaybeImmVal->Value + MaybeImm2Val->Value;
1381  MatchInfo.Base = Base;
1382  return true;
1383}
1384
1385bool CombinerHelper::applyPtrAddImmedChain(MachineInstr &MI,
1386                                           PtrAddChain &MatchInfo) {
1387  assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD");
1388  MachineIRBuilder MIB(MI);
1389  LLT OffsetTy = MRI.getType(MI.getOperand(2).getReg());
1390  auto NewOffset = MIB.buildConstant(OffsetTy, MatchInfo.Imm);
1391  Observer.changingInstr(MI);
1392  MI.getOperand(1).setReg(MatchInfo.Base);
1393  MI.getOperand(2).setReg(NewOffset.getReg(0));
1394  Observer.changedInstr(MI);
1395  return true;
1396}
1397
1398bool CombinerHelper::matchCombineMulToShl(MachineInstr &MI,
1399                                          unsigned &ShiftVal) {
1400  assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
1401  auto MaybeImmVal =
1402      getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
1403  if (!MaybeImmVal || !isPowerOf2_64(MaybeImmVal->Value))
1404    return false;
1405  ShiftVal = Log2_64(MaybeImmVal->Value);
1406  return true;
1407}
1408
1409bool CombinerHelper::applyCombineMulToShl(MachineInstr &MI,
1410                                          unsigned &ShiftVal) {
1411  assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
1412  MachineIRBuilder MIB(MI);
1413  LLT ShiftTy = MRI.getType(MI.getOperand(0).getReg());
1414  auto ShiftCst = MIB.buildConstant(ShiftTy, ShiftVal);
1415  Observer.changingInstr(MI);
1416  MI.setDesc(MIB.getTII().get(TargetOpcode::G_SHL));
1417  MI.getOperand(2).setReg(ShiftCst.getReg(0));
1418  Observer.changedInstr(MI);
1419  return true;
1420}
1421
1422bool CombinerHelper::matchCombineShiftToUnmerge(MachineInstr &MI,
1423                                                unsigned TargetShiftSize,
1424                                                unsigned &ShiftVal) {
1425  assert((MI.getOpcode() == TargetOpcode::G_SHL ||
1426          MI.getOpcode() == TargetOpcode::G_LSHR ||
1427          MI.getOpcode() == TargetOpcode::G_ASHR) && "Expected a shift");
1428
1429  LLT Ty = MRI.getType(MI.getOperand(0).getReg());
1430  if (Ty.isVector()) // TODO:
1431    return false;
1432
1433  // Don't narrow further than the requested size.
1434  unsigned Size = Ty.getSizeInBits();
1435  if (Size <= TargetShiftSize)
1436    return false;
1437
1438  auto MaybeImmVal =
1439    getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
1440  if (!MaybeImmVal)
1441    return false;
1442
1443  ShiftVal = MaybeImmVal->Value;
1444  return ShiftVal >= Size / 2 && ShiftVal < Size;
1445}
1446
1447bool CombinerHelper::applyCombineShiftToUnmerge(MachineInstr &MI,
1448                                                const unsigned &ShiftVal) {
1449  Register DstReg = MI.getOperand(0).getReg();
1450  Register SrcReg = MI.getOperand(1).getReg();
1451  LLT Ty = MRI.getType(SrcReg);
1452  unsigned Size = Ty.getSizeInBits();
1453  unsigned HalfSize = Size / 2;
1454  assert(ShiftVal >= HalfSize);
1455
1456  LLT HalfTy = LLT::scalar(HalfSize);
1457
1458  Builder.setInstr(MI);
1459  auto Unmerge = Builder.buildUnmerge(HalfTy, SrcReg);
1460  unsigned NarrowShiftAmt = ShiftVal - HalfSize;
1461
1462  if (MI.getOpcode() == TargetOpcode::G_LSHR) {
1463    Register Narrowed = Unmerge.getReg(1);
1464
1465    //  dst = G_LSHR s64:x, C for C >= 32
1466    // =>
1467    //   lo, hi = G_UNMERGE_VALUES x
1468    //   dst = G_MERGE_VALUES (G_LSHR hi, C - 32), 0
1469
1470    if (NarrowShiftAmt != 0) {
1471      Narrowed = Builder.buildLShr(HalfTy, Narrowed,
1472        Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0);
1473    }
1474
1475    auto Zero = Builder.buildConstant(HalfTy, 0);
1476    Builder.buildMerge(DstReg, { Narrowed, Zero });
1477  } else if (MI.getOpcode() == TargetOpcode::G_SHL) {
1478    Register Narrowed = Unmerge.getReg(0);
1479    //  dst = G_SHL s64:x, C for C >= 32
1480    // =>
1481    //   lo, hi = G_UNMERGE_VALUES x
1482    //   dst = G_MERGE_VALUES 0, (G_SHL hi, C - 32)
1483    if (NarrowShiftAmt != 0) {
1484      Narrowed = Builder.buildShl(HalfTy, Narrowed,
1485        Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0);
1486    }
1487
1488    auto Zero = Builder.buildConstant(HalfTy, 0);
1489    Builder.buildMerge(DstReg, { Zero, Narrowed });
1490  } else {
1491    assert(MI.getOpcode() == TargetOpcode::G_ASHR);
1492    auto Hi = Builder.buildAShr(
1493      HalfTy, Unmerge.getReg(1),
1494      Builder.buildConstant(HalfTy, HalfSize - 1));
1495
1496    if (ShiftVal == HalfSize) {
1497      // (G_ASHR i64:x, 32) ->
1498      //   G_MERGE_VALUES hi_32(x), (G_ASHR hi_32(x), 31)
1499      Builder.buildMerge(DstReg, { Unmerge.getReg(1), Hi });
1500    } else if (ShiftVal == Size - 1) {
1501      // Don't need a second shift.
1502      // (G_ASHR i64:x, 63) ->
1503      //   %narrowed = (G_ASHR hi_32(x), 31)
1504      //   G_MERGE_VALUES %narrowed, %narrowed
1505      Builder.buildMerge(DstReg, { Hi, Hi });
1506    } else {
1507      auto Lo = Builder.buildAShr(
1508        HalfTy, Unmerge.getReg(1),
1509        Builder.buildConstant(HalfTy, ShiftVal - HalfSize));
1510
1511      // (G_ASHR i64:x, C) ->, for C >= 32
1512      //   G_MERGE_VALUES (G_ASHR hi_32(x), C - 32), (G_ASHR hi_32(x), 31)
1513      Builder.buildMerge(DstReg, { Lo, Hi });
1514    }
1515  }
1516
1517  MI.eraseFromParent();
1518  return true;
1519}
1520
1521bool CombinerHelper::tryCombineShiftToUnmerge(MachineInstr &MI,
1522                                              unsigned TargetShiftAmount) {
1523  unsigned ShiftAmt;
1524  if (matchCombineShiftToUnmerge(MI, TargetShiftAmount, ShiftAmt)) {
1525    applyCombineShiftToUnmerge(MI, ShiftAmt);
1526    return true;
1527  }
1528
1529  return false;
1530}
1531
1532bool CombinerHelper::matchAnyExplicitUseIsUndef(MachineInstr &MI) {
1533  return any_of(MI.explicit_uses(), [this](const MachineOperand &MO) {
1534    return MO.isReg() &&
1535           getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
1536  });
1537}
1538
1539bool CombinerHelper::matchAllExplicitUsesAreUndef(MachineInstr &MI) {
1540  return all_of(MI.explicit_uses(), [this](const MachineOperand &MO) {
1541    return !MO.isReg() ||
1542           getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
1543  });
1544}
1545
1546bool CombinerHelper::matchUndefShuffleVectorMask(MachineInstr &MI) {
1547  assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
1548  ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
1549  return all_of(Mask, [](int Elt) { return Elt < 0; });
1550}
1551
1552bool CombinerHelper::matchUndefStore(MachineInstr &MI) {
1553  assert(MI.getOpcode() == TargetOpcode::G_STORE);
1554  return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(0).getReg(),
1555                      MRI);
1556}
1557
1558bool CombinerHelper::eraseInst(MachineInstr &MI) {
1559  MI.eraseFromParent();
1560  return true;
1561}
1562
1563bool CombinerHelper::matchEqualDefs(const MachineOperand &MOP1,
1564                                    const MachineOperand &MOP2) {
1565  if (!MOP1.isReg() || !MOP2.isReg())
1566    return false;
1567  MachineInstr *I1 = getDefIgnoringCopies(MOP1.getReg(), MRI);
1568  if (!I1)
1569    return false;
1570  MachineInstr *I2 = getDefIgnoringCopies(MOP2.getReg(), MRI);
1571  if (!I2)
1572    return false;
1573
1574  // Handle a case like this:
1575  //
1576  // %0:_(s64), %1:_(s64) = G_UNMERGE_VALUES %2:_(<2 x s64>)
1577  //
1578  // Even though %0 and %1 are produced by the same instruction they are not
1579  // the same values.
1580  if (I1 == I2)
1581    return MOP1.getReg() == MOP2.getReg();
1582
1583  // If we have an instruction which loads or stores, we can't guarantee that
1584  // it is identical.
1585  //
1586  // For example, we may have
1587  //
1588  // %x1 = G_LOAD %addr (load N from @somewhere)
1589  // ...
1590  // call @foo
1591  // ...
1592  // %x2 = G_LOAD %addr (load N from @somewhere)
1593  // ...
1594  // %or = G_OR %x1, %x2
1595  //
1596  // It's possible that @foo will modify whatever lives at the address we're
1597  // loading from. To be safe, let's just assume that all loads and stores
1598  // are different (unless we have something which is guaranteed to not
1599  // change.)
1600  if (I1->mayLoadOrStore() && !I1->isDereferenceableInvariantLoad(nullptr))
1601    return false;
1602
1603  // Check for physical registers on the instructions first to avoid cases
1604  // like this:
1605  //
1606  // %a = COPY $physreg
1607  // ...
1608  // SOMETHING implicit-def $physreg
1609  // ...
1610  // %b = COPY $physreg
1611  //
1612  // These copies are not equivalent.
1613  if (any_of(I1->uses(), [](const MachineOperand &MO) {
1614        return MO.isReg() && MO.getReg().isPhysical();
1615      })) {
1616    // Check if we have a case like this:
1617    //
1618    // %a = COPY $physreg
1619    // %b = COPY %a
1620    //
1621    // In this case, I1 and I2 will both be equal to %a = COPY $physreg.
1622    // From that, we know that they must have the same value, since they must
1623    // have come from the same COPY.
1624    return I1->isIdenticalTo(*I2);
1625  }
1626
1627  // We don't have any physical registers, so we don't necessarily need the
1628  // same vreg defs.
1629  //
1630  // On the off-chance that there's some target instruction feeding into the
1631  // instruction, let's use produceSameValue instead of isIdenticalTo.
1632  return Builder.getTII().produceSameValue(*I1, *I2, &MRI);
1633}
1634
1635bool CombinerHelper::matchConstantOp(const MachineOperand &MOP, int64_t C) {
1636  if (!MOP.isReg())
1637    return false;
1638  // MIPatternMatch doesn't let us look through G_ZEXT etc.
1639  auto ValAndVReg = getConstantVRegValWithLookThrough(MOP.getReg(), MRI);
1640  return ValAndVReg && ValAndVReg->Value == C;
1641}
1642
1643bool CombinerHelper::replaceSingleDefInstWithOperand(MachineInstr &MI,
1644                                                     unsigned OpIdx) {
1645  assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
1646  Register OldReg = MI.getOperand(0).getReg();
1647  Register Replacement = MI.getOperand(OpIdx).getReg();
1648  assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
1649  MI.eraseFromParent();
1650  replaceRegWith(MRI, OldReg, Replacement);
1651  return true;
1652}
1653
1654bool CombinerHelper::matchSelectSameVal(MachineInstr &MI) {
1655  assert(MI.getOpcode() == TargetOpcode::G_SELECT);
1656  // Match (cond ? x : x)
1657  return matchEqualDefs(MI.getOperand(2), MI.getOperand(3)) &&
1658         canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(2).getReg(),
1659                       MRI);
1660}
1661
1662bool CombinerHelper::matchBinOpSameVal(MachineInstr &MI) {
1663  return matchEqualDefs(MI.getOperand(1), MI.getOperand(2)) &&
1664         canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(1).getReg(),
1665                       MRI);
1666}
1667
1668bool CombinerHelper::matchOperandIsZero(MachineInstr &MI, unsigned OpIdx) {
1669  return matchConstantOp(MI.getOperand(OpIdx), 0) &&
1670         canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(OpIdx).getReg(),
1671                       MRI);
1672}
1673
1674bool CombinerHelper::replaceInstWithFConstant(MachineInstr &MI, double C) {
1675  assert(MI.getNumDefs() == 1 && "Expected only one def?");
1676  Builder.setInstr(MI);
1677  Builder.buildFConstant(MI.getOperand(0), C);
1678  MI.eraseFromParent();
1679  return true;
1680}
1681
1682bool CombinerHelper::replaceInstWithConstant(MachineInstr &MI, int64_t C) {
1683  assert(MI.getNumDefs() == 1 && "Expected only one def?");
1684  Builder.setInstr(MI);
1685  Builder.buildConstant(MI.getOperand(0), C);
1686  MI.eraseFromParent();
1687  return true;
1688}
1689
1690bool CombinerHelper::replaceInstWithUndef(MachineInstr &MI) {
1691  assert(MI.getNumDefs() == 1 && "Expected only one def?");
1692  Builder.setInstr(MI);
1693  Builder.buildUndef(MI.getOperand(0));
1694  MI.eraseFromParent();
1695  return true;
1696}
1697
1698bool CombinerHelper::matchSimplifyAddToSub(
1699    MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) {
1700  Register LHS = MI.getOperand(1).getReg();
1701  Register RHS = MI.getOperand(2).getReg();
1702  Register &NewLHS = std::get<0>(MatchInfo);
1703  Register &NewRHS = std::get<1>(MatchInfo);
1704
1705  // Helper lambda to check for opportunities for
1706  // ((0-A) + B) -> B - A
1707  // (A + (0-B)) -> A - B
1708  auto CheckFold = [&](Register &MaybeSub, Register &MaybeNewLHS) {
1709    int64_t Cst;
1710    if (!mi_match(MaybeSub, MRI, m_GSub(m_ICst(Cst), m_Reg(NewRHS))) ||
1711        Cst != 0)
1712      return false;
1713    NewLHS = MaybeNewLHS;
1714    return true;
1715  };
1716
1717  return CheckFold(LHS, RHS) || CheckFold(RHS, LHS);
1718}
1719
1720bool CombinerHelper::applySimplifyAddToSub(
1721    MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) {
1722  Builder.setInstr(MI);
1723  Register SubLHS, SubRHS;
1724  std::tie(SubLHS, SubRHS) = MatchInfo;
1725  Builder.buildSub(MI.getOperand(0).getReg(), SubLHS, SubRHS);
1726  MI.eraseFromParent();
1727  return true;
1728}
1729
1730bool CombinerHelper::tryCombine(MachineInstr &MI) {
1731  if (tryCombineCopy(MI))
1732    return true;
1733  if (tryCombineExtendingLoads(MI))
1734    return true;
1735  if (tryCombineIndexedLoadStore(MI))
1736    return true;
1737  return false;
1738}
1739