1//===-- PPCISelDAGToDAG.cpp - PPC --pattern matching inst selector --------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines a pattern matching instruction selector for PowerPC,
10// converting from a legalized dag to a PPC dag.
11//
12//===----------------------------------------------------------------------===//
13
14#include "MCTargetDesc/PPCMCTargetDesc.h"
15#include "MCTargetDesc/PPCPredicates.h"
16#include "PPC.h"
17#include "PPCISelLowering.h"
18#include "PPCMachineFunctionInfo.h"
19#include "PPCSubtarget.h"
20#include "PPCTargetMachine.h"
21#include "llvm/ADT/APInt.h"
22#include "llvm/ADT/DenseMap.h"
23#include "llvm/ADT/STLExtras.h"
24#include "llvm/ADT/SmallPtrSet.h"
25#include "llvm/ADT/SmallVector.h"
26#include "llvm/ADT/Statistic.h"
27#include "llvm/Analysis/BranchProbabilityInfo.h"
28#include "llvm/CodeGen/FunctionLoweringInfo.h"
29#include "llvm/CodeGen/ISDOpcodes.h"
30#include "llvm/CodeGen/MachineBasicBlock.h"
31#include "llvm/CodeGen/MachineFunction.h"
32#include "llvm/CodeGen/MachineInstrBuilder.h"
33#include "llvm/CodeGen/MachineRegisterInfo.h"
34#include "llvm/CodeGen/SelectionDAG.h"
35#include "llvm/CodeGen/SelectionDAGISel.h"
36#include "llvm/CodeGen/SelectionDAGNodes.h"
37#include "llvm/CodeGen/TargetInstrInfo.h"
38#include "llvm/CodeGen/TargetRegisterInfo.h"
39#include "llvm/CodeGen/ValueTypes.h"
40#include "llvm/IR/BasicBlock.h"
41#include "llvm/IR/DebugLoc.h"
42#include "llvm/IR/Function.h"
43#include "llvm/IR/GlobalValue.h"
44#include "llvm/IR/InlineAsm.h"
45#include "llvm/IR/InstrTypes.h"
46#include "llvm/IR/Module.h"
47#include "llvm/Support/Casting.h"
48#include "llvm/Support/CodeGen.h"
49#include "llvm/Support/CommandLine.h"
50#include "llvm/Support/Compiler.h"
51#include "llvm/Support/Debug.h"
52#include "llvm/Support/ErrorHandling.h"
53#include "llvm/Support/KnownBits.h"
54#include "llvm/Support/MachineValueType.h"
55#include "llvm/Support/MathExtras.h"
56#include "llvm/Support/raw_ostream.h"
57#include <algorithm>
58#include <cassert>
59#include <cstdint>
60#include <iterator>
61#include <limits>
62#include <memory>
63#include <new>
64#include <tuple>
65#include <utility>
66
67using namespace llvm;
68
69#define DEBUG_TYPE "ppc-codegen"
70
71STATISTIC(NumSextSetcc,
72          "Number of (sext(setcc)) nodes expanded into GPR sequence.");
73STATISTIC(NumZextSetcc,
74          "Number of (zext(setcc)) nodes expanded into GPR sequence.");
75STATISTIC(SignExtensionsAdded,
76          "Number of sign extensions for compare inputs added.");
77STATISTIC(ZeroExtensionsAdded,
78          "Number of zero extensions for compare inputs added.");
79STATISTIC(NumLogicOpsOnComparison,
80          "Number of logical ops on i1 values calculated in GPR.");
81STATISTIC(OmittedForNonExtendUses,
82          "Number of compares not eliminated as they have non-extending uses.");
83STATISTIC(NumP9Setb,
84          "Number of compares lowered to setb.");
85
86// FIXME: Remove this once the bug has been fixed!
87cl::opt<bool> ANDIGlueBug("expose-ppc-andi-glue-bug",
88cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden);
89
90static cl::opt<bool>
91    UseBitPermRewriter("ppc-use-bit-perm-rewriter", cl::init(true),
92                       cl::desc("use aggressive ppc isel for bit permutations"),
93                       cl::Hidden);
94static cl::opt<bool> BPermRewriterNoMasking(
95    "ppc-bit-perm-rewriter-stress-rotates",
96    cl::desc("stress rotate selection in aggressive ppc isel for "
97             "bit permutations"),
98    cl::Hidden);
99
100static cl::opt<bool> EnableBranchHint(
101  "ppc-use-branch-hint", cl::init(true),
102    cl::desc("Enable static hinting of branches on ppc"),
103    cl::Hidden);
104
105static cl::opt<bool> EnableTLSOpt(
106  "ppc-tls-opt", cl::init(true),
107    cl::desc("Enable tls optimization peephole"),
108    cl::Hidden);
109
110enum ICmpInGPRType { ICGPR_All, ICGPR_None, ICGPR_I32, ICGPR_I64,
111  ICGPR_NonExtIn, ICGPR_Zext, ICGPR_Sext, ICGPR_ZextI32,
112  ICGPR_SextI32, ICGPR_ZextI64, ICGPR_SextI64 };
113
114static cl::opt<ICmpInGPRType> CmpInGPR(
115  "ppc-gpr-icmps", cl::Hidden, cl::init(ICGPR_All),
116  cl::desc("Specify the types of comparisons to emit GPR-only code for."),
117  cl::values(clEnumValN(ICGPR_None, "none", "Do not modify integer comparisons."),
118             clEnumValN(ICGPR_All, "all", "All possible int comparisons in GPRs."),
119             clEnumValN(ICGPR_I32, "i32", "Only i32 comparisons in GPRs."),
120             clEnumValN(ICGPR_I64, "i64", "Only i64 comparisons in GPRs."),
121             clEnumValN(ICGPR_NonExtIn, "nonextin",
122                        "Only comparisons where inputs don't need [sz]ext."),
123             clEnumValN(ICGPR_Zext, "zext", "Only comparisons with zext result."),
124             clEnumValN(ICGPR_ZextI32, "zexti32",
125                        "Only i32 comparisons with zext result."),
126             clEnumValN(ICGPR_ZextI64, "zexti64",
127                        "Only i64 comparisons with zext result."),
128             clEnumValN(ICGPR_Sext, "sext", "Only comparisons with sext result."),
129             clEnumValN(ICGPR_SextI32, "sexti32",
130                        "Only i32 comparisons with sext result."),
131             clEnumValN(ICGPR_SextI64, "sexti64",
132                        "Only i64 comparisons with sext result.")));
133namespace {
134
135  //===--------------------------------------------------------------------===//
136  /// PPCDAGToDAGISel - PPC specific code to select PPC machine
137  /// instructions for SelectionDAG operations.
138  ///
139  class PPCDAGToDAGISel : public SelectionDAGISel {
140    const PPCTargetMachine &TM;
141    const PPCSubtarget *PPCSubTarget = nullptr;
142    const PPCTargetLowering *PPCLowering = nullptr;
143    unsigned GlobalBaseReg = 0;
144
145  public:
146    explicit PPCDAGToDAGISel(PPCTargetMachine &tm, CodeGenOpt::Level OptLevel)
147        : SelectionDAGISel(tm, OptLevel), TM(tm) {}
148
149    bool runOnMachineFunction(MachineFunction &MF) override {
150      // Make sure we re-emit a set of the global base reg if necessary
151      GlobalBaseReg = 0;
152      PPCSubTarget = &MF.getSubtarget<PPCSubtarget>();
153      PPCLowering = PPCSubTarget->getTargetLowering();
154      SelectionDAGISel::runOnMachineFunction(MF);
155
156      if (!PPCSubTarget->isSVR4ABI())
157        InsertVRSaveCode(MF);
158
159      return true;
160    }
161
162    void PreprocessISelDAG() override;
163    void PostprocessISelDAG() override;
164
165    /// getI16Imm - Return a target constant with the specified value, of type
166    /// i16.
167    inline SDValue getI16Imm(unsigned Imm, const SDLoc &dl) {
168      return CurDAG->getTargetConstant(Imm, dl, MVT::i16);
169    }
170
171    /// getI32Imm - Return a target constant with the specified value, of type
172    /// i32.
173    inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
174      return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
175    }
176
177    /// getI64Imm - Return a target constant with the specified value, of type
178    /// i64.
179    inline SDValue getI64Imm(uint64_t Imm, const SDLoc &dl) {
180      return CurDAG->getTargetConstant(Imm, dl, MVT::i64);
181    }
182
183    /// getSmallIPtrImm - Return a target constant of pointer type.
184    inline SDValue getSmallIPtrImm(unsigned Imm, const SDLoc &dl) {
185      return CurDAG->getTargetConstant(
186          Imm, dl, PPCLowering->getPointerTy(CurDAG->getDataLayout()));
187    }
188
189    /// isRotateAndMask - Returns true if Mask and Shift can be folded into a
190    /// rotate and mask opcode and mask operation.
191    static bool isRotateAndMask(SDNode *N, unsigned Mask, bool isShiftMask,
192                                unsigned &SH, unsigned &MB, unsigned &ME);
193
194    /// getGlobalBaseReg - insert code into the entry mbb to materialize the PIC
195    /// base register.  Return the virtual register that holds this value.
196    SDNode *getGlobalBaseReg();
197
198    void selectFrameIndex(SDNode *SN, SDNode *N, unsigned Offset = 0);
199
200    // Select - Convert the specified operand from a target-independent to a
201    // target-specific node if it hasn't already been changed.
202    void Select(SDNode *N) override;
203
204    bool tryBitfieldInsert(SDNode *N);
205    bool tryBitPermutation(SDNode *N);
206    bool tryIntCompareInGPR(SDNode *N);
207    bool tryAndWithMask(SDNode *N);
208
209    // tryTLSXFormLoad - Convert an ISD::LOAD fed by a PPCISD::ADD_TLS into
210    // an X-Form load instruction with the offset being a relocation coming from
211    // the PPCISD::ADD_TLS.
212    bool tryTLSXFormLoad(LoadSDNode *N);
213    // tryTLSXFormStore - Convert an ISD::STORE fed by a PPCISD::ADD_TLS into
214    // an X-Form store instruction with the offset being a relocation coming from
215    // the PPCISD::ADD_TLS.
216    bool tryTLSXFormStore(StoreSDNode *N);
217    /// SelectCC - Select a comparison of the specified values with the
218    /// specified condition code, returning the CR# of the expression.
219    SDValue SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC,
220                     const SDLoc &dl);
221
222    /// SelectAddrImmOffs - Return true if the operand is valid for a preinc
223    /// immediate field.  Note that the operand at this point is already the
224    /// result of a prior SelectAddressRegImm call.
225    bool SelectAddrImmOffs(SDValue N, SDValue &Out) const {
226      if (N.getOpcode() == ISD::TargetConstant ||
227          N.getOpcode() == ISD::TargetGlobalAddress) {
228        Out = N;
229        return true;
230      }
231
232      return false;
233    }
234
235    /// SelectAddrIdx - Given the specified address, check to see if it can be
236    /// represented as an indexed [r+r] operation.
237    /// This is for xform instructions whose associated displacement form is D.
238    /// The last parameter \p 0 means associated D form has no requirment for 16
239    /// bit signed displacement.
240    /// Returns false if it can be represented by [r+imm], which are preferred.
241    bool SelectAddrIdx(SDValue N, SDValue &Base, SDValue &Index) {
242      return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG, 0);
243    }
244
245    /// SelectAddrIdx4 - Given the specified address, check to see if it can be
246    /// represented as an indexed [r+r] operation.
247    /// This is for xform instructions whose associated displacement form is DS.
248    /// The last parameter \p 4 means associated DS form 16 bit signed
249    /// displacement must be a multiple of 4.
250    /// Returns false if it can be represented by [r+imm], which are preferred.
251    bool SelectAddrIdxX4(SDValue N, SDValue &Base, SDValue &Index) {
252      return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG, 4);
253    }
254
255    /// SelectAddrIdx16 - Given the specified address, check to see if it can be
256    /// represented as an indexed [r+r] operation.
257    /// This is for xform instructions whose associated displacement form is DQ.
258    /// The last parameter \p 16 means associated DQ form 16 bit signed
259    /// displacement must be a multiple of 16.
260    /// Returns false if it can be represented by [r+imm], which are preferred.
261    bool SelectAddrIdxX16(SDValue N, SDValue &Base, SDValue &Index) {
262      return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG, 16);
263    }
264
265    /// SelectAddrIdxOnly - Given the specified address, force it to be
266    /// represented as an indexed [r+r] operation.
267    bool SelectAddrIdxOnly(SDValue N, SDValue &Base, SDValue &Index) {
268      return PPCLowering->SelectAddressRegRegOnly(N, Base, Index, *CurDAG);
269    }
270
271    /// SelectAddrImm - Returns true if the address N can be represented by
272    /// a base register plus a signed 16-bit displacement [r+imm].
273    /// The last parameter \p 0 means D form has no requirment for 16 bit signed
274    /// displacement.
275    bool SelectAddrImm(SDValue N, SDValue &Disp,
276                       SDValue &Base) {
277      return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 0);
278    }
279
280    /// SelectAddrImmX4 - Returns true if the address N can be represented by
281    /// a base register plus a signed 16-bit displacement that is a multiple of
282    /// 4 (last parameter). Suitable for use by STD and friends.
283    bool SelectAddrImmX4(SDValue N, SDValue &Disp, SDValue &Base) {
284      return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 4);
285    }
286
287    /// SelectAddrImmX16 - Returns true if the address N can be represented by
288    /// a base register plus a signed 16-bit displacement that is a multiple of
289    /// 16(last parameter). Suitable for use by STXV and friends.
290    bool SelectAddrImmX16(SDValue N, SDValue &Disp, SDValue &Base) {
291      return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 16);
292    }
293
294    // Select an address into a single register.
295    bool SelectAddr(SDValue N, SDValue &Base) {
296      Base = N;
297      return true;
298    }
299
300    /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
301    /// inline asm expressions.  It is always correct to compute the value into
302    /// a register.  The case of adding a (possibly relocatable) constant to a
303    /// register can be improved, but it is wrong to substitute Reg+Reg for
304    /// Reg in an asm, because the load or store opcode would have to change.
305    bool SelectInlineAsmMemoryOperand(const SDValue &Op,
306                                      unsigned ConstraintID,
307                                      std::vector<SDValue> &OutOps) override {
308      switch(ConstraintID) {
309      default:
310        errs() << "ConstraintID: " << ConstraintID << "\n";
311        llvm_unreachable("Unexpected asm memory constraint");
312      case InlineAsm::Constraint_es:
313      case InlineAsm::Constraint_m:
314      case InlineAsm::Constraint_o:
315      case InlineAsm::Constraint_Q:
316      case InlineAsm::Constraint_Z:
317      case InlineAsm::Constraint_Zy:
318        // We need to make sure that this one operand does not end up in r0
319        // (because we might end up lowering this as 0(%op)).
320        const TargetRegisterInfo *TRI = PPCSubTarget->getRegisterInfo();
321        const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF, /*Kind=*/1);
322        SDLoc dl(Op);
323        SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i32);
324        SDValue NewOp =
325          SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
326                                         dl, Op.getValueType(),
327                                         Op, RC), 0);
328
329        OutOps.push_back(NewOp);
330        return false;
331      }
332      return true;
333    }
334
335    void InsertVRSaveCode(MachineFunction &MF);
336
337    StringRef getPassName() const override {
338      return "PowerPC DAG->DAG Pattern Instruction Selection";
339    }
340
341// Include the pieces autogenerated from the target description.
342#include "PPCGenDAGISel.inc"
343
344private:
345    bool trySETCC(SDNode *N);
346
347    void PeepholePPC64();
348    void PeepholePPC64ZExt();
349    void PeepholeCROps();
350
351    SDValue combineToCMPB(SDNode *N);
352    void foldBoolExts(SDValue &Res, SDNode *&N);
353
354    bool AllUsersSelectZero(SDNode *N);
355    void SwapAllSelectUsers(SDNode *N);
356
357    bool isOffsetMultipleOf(SDNode *N, unsigned Val) const;
358    void transferMemOperands(SDNode *N, SDNode *Result);
359  };
360
361} // end anonymous namespace
362
363/// InsertVRSaveCode - Once the entire function has been instruction selected,
364/// all virtual registers are created and all machine instructions are built,
365/// check to see if we need to save/restore VRSAVE.  If so, do it.
366void PPCDAGToDAGISel::InsertVRSaveCode(MachineFunction &Fn) {
367  // Check to see if this function uses vector registers, which means we have to
368  // save and restore the VRSAVE register and update it with the regs we use.
369  //
370  // In this case, there will be virtual registers of vector type created
371  // by the scheduler.  Detect them now.
372  bool HasVectorVReg = false;
373  for (unsigned i = 0, e = RegInfo->getNumVirtRegs(); i != e; ++i) {
374    unsigned Reg = Register::index2VirtReg(i);
375    if (RegInfo->getRegClass(Reg) == &PPC::VRRCRegClass) {
376      HasVectorVReg = true;
377      break;
378    }
379  }
380  if (!HasVectorVReg) return;  // nothing to do.
381
382  // If we have a vector register, we want to emit code into the entry and exit
383  // blocks to save and restore the VRSAVE register.  We do this here (instead
384  // of marking all vector instructions as clobbering VRSAVE) for two reasons:
385  //
386  // 1. This (trivially) reduces the load on the register allocator, by not
387  //    having to represent the live range of the VRSAVE register.
388  // 2. This (more significantly) allows us to create a temporary virtual
389  //    register to hold the saved VRSAVE value, allowing this temporary to be
390  //    register allocated, instead of forcing it to be spilled to the stack.
391
392  // Create two vregs - one to hold the VRSAVE register that is live-in to the
393  // function and one for the value after having bits or'd into it.
394  Register InVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
395  Register UpdatedVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
396
397  const TargetInstrInfo &TII = *PPCSubTarget->getInstrInfo();
398  MachineBasicBlock &EntryBB = *Fn.begin();
399  DebugLoc dl;
400  // Emit the following code into the entry block:
401  // InVRSAVE = MFVRSAVE
402  // UpdatedVRSAVE = UPDATE_VRSAVE InVRSAVE
403  // MTVRSAVE UpdatedVRSAVE
404  MachineBasicBlock::iterator IP = EntryBB.begin();  // Insert Point
405  BuildMI(EntryBB, IP, dl, TII.get(PPC::MFVRSAVE), InVRSAVE);
406  BuildMI(EntryBB, IP, dl, TII.get(PPC::UPDATE_VRSAVE),
407          UpdatedVRSAVE).addReg(InVRSAVE);
408  BuildMI(EntryBB, IP, dl, TII.get(PPC::MTVRSAVE)).addReg(UpdatedVRSAVE);
409
410  // Find all return blocks, outputting a restore in each epilog.
411  for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) {
412    if (BB->isReturnBlock()) {
413      IP = BB->end(); --IP;
414
415      // Skip over all terminator instructions, which are part of the return
416      // sequence.
417      MachineBasicBlock::iterator I2 = IP;
418      while (I2 != BB->begin() && (--I2)->isTerminator())
419        IP = I2;
420
421      // Emit: MTVRSAVE InVRSave
422      BuildMI(*BB, IP, dl, TII.get(PPC::MTVRSAVE)).addReg(InVRSAVE);
423    }
424  }
425}
426
427/// getGlobalBaseReg - Output the instructions required to put the
428/// base address to use for accessing globals into a register.
429///
430SDNode *PPCDAGToDAGISel::getGlobalBaseReg() {
431  if (!GlobalBaseReg) {
432    const TargetInstrInfo &TII = *PPCSubTarget->getInstrInfo();
433    // Insert the set of GlobalBaseReg into the first MBB of the function
434    MachineBasicBlock &FirstMBB = MF->front();
435    MachineBasicBlock::iterator MBBI = FirstMBB.begin();
436    const Module *M = MF->getFunction().getParent();
437    DebugLoc dl;
438
439    if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) == MVT::i32) {
440      if (PPCSubTarget->isTargetELF()) {
441        GlobalBaseReg = PPC::R30;
442        if (!PPCSubTarget->isSecurePlt() &&
443            M->getPICLevel() == PICLevel::SmallPIC) {
444          BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MoveGOTtoLR));
445          BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
446          MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true);
447        } else {
448          BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR));
449          BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
450          Register TempReg = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
451          BuildMI(FirstMBB, MBBI, dl,
452                  TII.get(PPC::UpdateGBR), GlobalBaseReg)
453                  .addReg(TempReg, RegState::Define).addReg(GlobalBaseReg);
454          MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true);
455        }
456      } else {
457        GlobalBaseReg =
458          RegInfo->createVirtualRegister(&PPC::GPRC_and_GPRC_NOR0RegClass);
459        BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR));
460        BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
461      }
462    } else {
463      // We must ensure that this sequence is dominated by the prologue.
464      // FIXME: This is a bit of a big hammer since we don't get the benefits
465      // of shrink-wrapping whenever we emit this instruction. Considering
466      // this is used in any function where we emit a jump table, this may be
467      // a significant limitation. We should consider inserting this in the
468      // block where it is used and then commoning this sequence up if it
469      // appears in multiple places.
470      // Note: on ISA 3.0 cores, we can use lnia (addpcis) instead of
471      // MovePCtoLR8.
472      MF->getInfo<PPCFunctionInfo>()->setShrinkWrapDisabled(true);
473      GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass);
474      BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR8));
475      BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR8), GlobalBaseReg);
476    }
477  }
478  return CurDAG->getRegister(GlobalBaseReg,
479                             PPCLowering->getPointerTy(CurDAG->getDataLayout()))
480      .getNode();
481}
482
483/// isInt32Immediate - This method tests to see if the node is a 32-bit constant
484/// operand. If so Imm will receive the 32-bit value.
485static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
486  if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
487    Imm = cast<ConstantSDNode>(N)->getZExtValue();
488    return true;
489  }
490  return false;
491}
492
493/// isInt64Immediate - This method tests to see if the node is a 64-bit constant
494/// operand.  If so Imm will receive the 64-bit value.
495static bool isInt64Immediate(SDNode *N, uint64_t &Imm) {
496  if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i64) {
497    Imm = cast<ConstantSDNode>(N)->getZExtValue();
498    return true;
499  }
500  return false;
501}
502
503// isInt32Immediate - This method tests to see if a constant operand.
504// If so Imm will receive the 32 bit value.
505static bool isInt32Immediate(SDValue N, unsigned &Imm) {
506  return isInt32Immediate(N.getNode(), Imm);
507}
508
509/// isInt64Immediate - This method tests to see if the value is a 64-bit
510/// constant operand. If so Imm will receive the 64-bit value.
511static bool isInt64Immediate(SDValue N, uint64_t &Imm) {
512  return isInt64Immediate(N.getNode(), Imm);
513}
514
515static unsigned getBranchHint(unsigned PCC,
516                              const FunctionLoweringInfo &FuncInfo,
517                              const SDValue &DestMBB) {
518  assert(isa<BasicBlockSDNode>(DestMBB));
519
520  if (!FuncInfo.BPI) return PPC::BR_NO_HINT;
521
522  const BasicBlock *BB = FuncInfo.MBB->getBasicBlock();
523  const Instruction *BBTerm = BB->getTerminator();
524
525  if (BBTerm->getNumSuccessors() != 2) return PPC::BR_NO_HINT;
526
527  const BasicBlock *TBB = BBTerm->getSuccessor(0);
528  const BasicBlock *FBB = BBTerm->getSuccessor(1);
529
530  auto TProb = FuncInfo.BPI->getEdgeProbability(BB, TBB);
531  auto FProb = FuncInfo.BPI->getEdgeProbability(BB, FBB);
532
533  // We only want to handle cases which are easy to predict at static time, e.g.
534  // C++ throw statement, that is very likely not taken, or calling never
535  // returned function, e.g. stdlib exit(). So we set Threshold to filter
536  // unwanted cases.
537  //
538  // Below is LLVM branch weight table, we only want to handle case 1, 2
539  //
540  // Case                  Taken:Nontaken  Example
541  // 1. Unreachable        1048575:1       C++ throw, stdlib exit(),
542  // 2. Invoke-terminating 1:1048575
543  // 3. Coldblock          4:64            __builtin_expect
544  // 4. Loop Branch        124:4           For loop
545  // 5. PH/ZH/FPH          20:12
546  const uint32_t Threshold = 10000;
547
548  if (std::max(TProb, FProb) / Threshold < std::min(TProb, FProb))
549    return PPC::BR_NO_HINT;
550
551  LLVM_DEBUG(dbgs() << "Use branch hint for '" << FuncInfo.Fn->getName()
552                    << "::" << BB->getName() << "'\n"
553                    << " -> " << TBB->getName() << ": " << TProb << "\n"
554                    << " -> " << FBB->getName() << ": " << FProb << "\n");
555
556  const BasicBlockSDNode *BBDN = cast<BasicBlockSDNode>(DestMBB);
557
558  // If Dest BasicBlock is False-BasicBlock (FBB), swap branch probabilities,
559  // because we want 'TProb' stands for 'branch probability' to Dest BasicBlock
560  if (BBDN->getBasicBlock()->getBasicBlock() != TBB)
561    std::swap(TProb, FProb);
562
563  return (TProb > FProb) ? PPC::BR_TAKEN_HINT : PPC::BR_NONTAKEN_HINT;
564}
565
566// isOpcWithIntImmediate - This method tests to see if the node is a specific
567// opcode and that it has a immediate integer right operand.
568// If so Imm will receive the 32 bit value.
569static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
570  return N->getOpcode() == Opc
571         && isInt32Immediate(N->getOperand(1).getNode(), Imm);
572}
573
574void PPCDAGToDAGISel::selectFrameIndex(SDNode *SN, SDNode *N, unsigned Offset) {
575  SDLoc dl(SN);
576  int FI = cast<FrameIndexSDNode>(N)->getIndex();
577  SDValue TFI = CurDAG->getTargetFrameIndex(FI, N->getValueType(0));
578  unsigned Opc = N->getValueType(0) == MVT::i32 ? PPC::ADDI : PPC::ADDI8;
579  if (SN->hasOneUse())
580    CurDAG->SelectNodeTo(SN, Opc, N->getValueType(0), TFI,
581                         getSmallIPtrImm(Offset, dl));
582  else
583    ReplaceNode(SN, CurDAG->getMachineNode(Opc, dl, N->getValueType(0), TFI,
584                                           getSmallIPtrImm(Offset, dl)));
585}
586
587bool PPCDAGToDAGISel::isRotateAndMask(SDNode *N, unsigned Mask,
588                                      bool isShiftMask, unsigned &SH,
589                                      unsigned &MB, unsigned &ME) {
590  // Don't even go down this path for i64, since different logic will be
591  // necessary for rldicl/rldicr/rldimi.
592  if (N->getValueType(0) != MVT::i32)
593    return false;
594
595  unsigned Shift  = 32;
596  unsigned Indeterminant = ~0;  // bit mask marking indeterminant results
597  unsigned Opcode = N->getOpcode();
598  if (N->getNumOperands() != 2 ||
599      !isInt32Immediate(N->getOperand(1).getNode(), Shift) || (Shift > 31))
600    return false;
601
602  if (Opcode == ISD::SHL) {
603    // apply shift left to mask if it comes first
604    if (isShiftMask) Mask = Mask << Shift;
605    // determine which bits are made indeterminant by shift
606    Indeterminant = ~(0xFFFFFFFFu << Shift);
607  } else if (Opcode == ISD::SRL) {
608    // apply shift right to mask if it comes first
609    if (isShiftMask) Mask = Mask >> Shift;
610    // determine which bits are made indeterminant by shift
611    Indeterminant = ~(0xFFFFFFFFu >> Shift);
612    // adjust for the left rotate
613    Shift = 32 - Shift;
614  } else if (Opcode == ISD::ROTL) {
615    Indeterminant = 0;
616  } else {
617    return false;
618  }
619
620  // if the mask doesn't intersect any Indeterminant bits
621  if (Mask && !(Mask & Indeterminant)) {
622    SH = Shift & 31;
623    // make sure the mask is still a mask (wrap arounds may not be)
624    return isRunOfOnes(Mask, MB, ME);
625  }
626  return false;
627}
628
629bool PPCDAGToDAGISel::tryTLSXFormStore(StoreSDNode *ST) {
630  SDValue Base = ST->getBasePtr();
631  if (Base.getOpcode() != PPCISD::ADD_TLS)
632    return false;
633  SDValue Offset = ST->getOffset();
634  if (!Offset.isUndef())
635    return false;
636
637  SDLoc dl(ST);
638  EVT MemVT = ST->getMemoryVT();
639  EVT RegVT = ST->getValue().getValueType();
640
641  unsigned Opcode;
642  switch (MemVT.getSimpleVT().SimpleTy) {
643    default:
644      return false;
645    case MVT::i8: {
646      Opcode = (RegVT == MVT::i32) ? PPC::STBXTLS_32 : PPC::STBXTLS;
647      break;
648    }
649    case MVT::i16: {
650      Opcode = (RegVT == MVT::i32) ? PPC::STHXTLS_32 : PPC::STHXTLS;
651      break;
652    }
653    case MVT::i32: {
654      Opcode = (RegVT == MVT::i32) ? PPC::STWXTLS_32 : PPC::STWXTLS;
655      break;
656    }
657    case MVT::i64: {
658      Opcode = PPC::STDXTLS;
659      break;
660    }
661  }
662  SDValue Chain = ST->getChain();
663  SDVTList VTs = ST->getVTList();
664  SDValue Ops[] = {ST->getValue(), Base.getOperand(0), Base.getOperand(1),
665                   Chain};
666  SDNode *MN = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);
667  transferMemOperands(ST, MN);
668  ReplaceNode(ST, MN);
669  return true;
670}
671
672bool PPCDAGToDAGISel::tryTLSXFormLoad(LoadSDNode *LD) {
673  SDValue Base = LD->getBasePtr();
674  if (Base.getOpcode() != PPCISD::ADD_TLS)
675    return false;
676  SDValue Offset = LD->getOffset();
677  if (!Offset.isUndef())
678    return false;
679
680  SDLoc dl(LD);
681  EVT MemVT = LD->getMemoryVT();
682  EVT RegVT = LD->getValueType(0);
683  unsigned Opcode;
684  switch (MemVT.getSimpleVT().SimpleTy) {
685    default:
686      return false;
687    case MVT::i8: {
688      Opcode = (RegVT == MVT::i32) ? PPC::LBZXTLS_32 : PPC::LBZXTLS;
689      break;
690    }
691    case MVT::i16: {
692      Opcode = (RegVT == MVT::i32) ? PPC::LHZXTLS_32 : PPC::LHZXTLS;
693      break;
694    }
695    case MVT::i32: {
696      Opcode = (RegVT == MVT::i32) ? PPC::LWZXTLS_32 : PPC::LWZXTLS;
697      break;
698    }
699    case MVT::i64: {
700      Opcode = PPC::LDXTLS;
701      break;
702    }
703  }
704  SDValue Chain = LD->getChain();
705  SDVTList VTs = LD->getVTList();
706  SDValue Ops[] = {Base.getOperand(0), Base.getOperand(1), Chain};
707  SDNode *MN = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);
708  transferMemOperands(LD, MN);
709  ReplaceNode(LD, MN);
710  return true;
711}
712
713/// Turn an or of two masked values into the rotate left word immediate then
714/// mask insert (rlwimi) instruction.
715bool PPCDAGToDAGISel::tryBitfieldInsert(SDNode *N) {
716  SDValue Op0 = N->getOperand(0);
717  SDValue Op1 = N->getOperand(1);
718  SDLoc dl(N);
719
720  KnownBits LKnown = CurDAG->computeKnownBits(Op0);
721  KnownBits RKnown = CurDAG->computeKnownBits(Op1);
722
723  unsigned TargetMask = LKnown.Zero.getZExtValue();
724  unsigned InsertMask = RKnown.Zero.getZExtValue();
725
726  if ((TargetMask | InsertMask) == 0xFFFFFFFF) {
727    unsigned Op0Opc = Op0.getOpcode();
728    unsigned Op1Opc = Op1.getOpcode();
729    unsigned Value, SH = 0;
730    TargetMask = ~TargetMask;
731    InsertMask = ~InsertMask;
732
733    // If the LHS has a foldable shift and the RHS does not, then swap it to the
734    // RHS so that we can fold the shift into the insert.
735    if (Op0Opc == ISD::AND && Op1Opc == ISD::AND) {
736      if (Op0.getOperand(0).getOpcode() == ISD::SHL ||
737          Op0.getOperand(0).getOpcode() == ISD::SRL) {
738        if (Op1.getOperand(0).getOpcode() != ISD::SHL &&
739            Op1.getOperand(0).getOpcode() != ISD::SRL) {
740          std::swap(Op0, Op1);
741          std::swap(Op0Opc, Op1Opc);
742          std::swap(TargetMask, InsertMask);
743        }
744      }
745    } else if (Op0Opc == ISD::SHL || Op0Opc == ISD::SRL) {
746      if (Op1Opc == ISD::AND && Op1.getOperand(0).getOpcode() != ISD::SHL &&
747          Op1.getOperand(0).getOpcode() != ISD::SRL) {
748        std::swap(Op0, Op1);
749        std::swap(Op0Opc, Op1Opc);
750        std::swap(TargetMask, InsertMask);
751      }
752    }
753
754    unsigned MB, ME;
755    if (isRunOfOnes(InsertMask, MB, ME)) {
756      if ((Op1Opc == ISD::SHL || Op1Opc == ISD::SRL) &&
757          isInt32Immediate(Op1.getOperand(1), Value)) {
758        Op1 = Op1.getOperand(0);
759        SH  = (Op1Opc == ISD::SHL) ? Value : 32 - Value;
760      }
761      if (Op1Opc == ISD::AND) {
762       // The AND mask might not be a constant, and we need to make sure that
763       // if we're going to fold the masking with the insert, all bits not
764       // know to be zero in the mask are known to be one.
765        KnownBits MKnown = CurDAG->computeKnownBits(Op1.getOperand(1));
766        bool CanFoldMask = InsertMask == MKnown.One.getZExtValue();
767
768        unsigned SHOpc = Op1.getOperand(0).getOpcode();
769        if ((SHOpc == ISD::SHL || SHOpc == ISD::SRL) && CanFoldMask &&
770            isInt32Immediate(Op1.getOperand(0).getOperand(1), Value)) {
771          // Note that Value must be in range here (less than 32) because
772          // otherwise there would not be any bits set in InsertMask.
773          Op1 = Op1.getOperand(0).getOperand(0);
774          SH  = (SHOpc == ISD::SHL) ? Value : 32 - Value;
775        }
776      }
777
778      SH &= 31;
779      SDValue Ops[] = { Op0, Op1, getI32Imm(SH, dl), getI32Imm(MB, dl),
780                          getI32Imm(ME, dl) };
781      ReplaceNode(N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops));
782      return true;
783    }
784  }
785  return false;
786}
787
788// Predict the number of instructions that would be generated by calling
789// selectI64Imm(N).
790static unsigned selectI64ImmInstrCountDirect(int64_t Imm) {
791  // Assume no remaining bits.
792  unsigned Remainder = 0;
793  // Assume no shift required.
794  unsigned Shift = 0;
795
796  // If it can't be represented as a 32 bit value.
797  if (!isInt<32>(Imm)) {
798    Shift = countTrailingZeros<uint64_t>(Imm);
799    int64_t ImmSh = static_cast<uint64_t>(Imm) >> Shift;
800
801    // If the shifted value fits 32 bits.
802    if (isInt<32>(ImmSh)) {
803      // Go with the shifted value.
804      Imm = ImmSh;
805    } else {
806      // Still stuck with a 64 bit value.
807      Remainder = Imm;
808      Shift = 32;
809      Imm >>= 32;
810    }
811  }
812
813  // Intermediate operand.
814  unsigned Result = 0;
815
816  // Handle first 32 bits.
817  unsigned Lo = Imm & 0xFFFF;
818
819  // Simple value.
820  if (isInt<16>(Imm)) {
821    // Just the Lo bits.
822    ++Result;
823  } else if (Lo) {
824    // Handle the Hi bits and Lo bits.
825    Result += 2;
826  } else {
827    // Just the Hi bits.
828    ++Result;
829  }
830
831  // If no shift, we're done.
832  if (!Shift) return Result;
833
834  // If Hi word == Lo word,
835  // we can use rldimi to insert the Lo word into Hi word.
836  if ((unsigned)(Imm & 0xFFFFFFFF) == Remainder) {
837    ++Result;
838    return Result;
839  }
840
841  // Shift for next step if the upper 32-bits were not zero.
842  if (Imm)
843    ++Result;
844
845  // Add in the last bits as required.
846  if ((Remainder >> 16) & 0xFFFF)
847    ++Result;
848  if (Remainder & 0xFFFF)
849    ++Result;
850
851  return Result;
852}
853
854static uint64_t Rot64(uint64_t Imm, unsigned R) {
855  return (Imm << R) | (Imm >> (64 - R));
856}
857
858static unsigned selectI64ImmInstrCount(int64_t Imm) {
859  unsigned Count = selectI64ImmInstrCountDirect(Imm);
860
861  // If the instruction count is 1 or 2, we do not need further analysis
862  // since rotate + load constant requires at least 2 instructions.
863  if (Count <= 2)
864    return Count;
865
866  for (unsigned r = 1; r < 63; ++r) {
867    uint64_t RImm = Rot64(Imm, r);
868    unsigned RCount = selectI64ImmInstrCountDirect(RImm) + 1;
869    Count = std::min(Count, RCount);
870
871    // See comments in selectI64Imm for an explanation of the logic below.
872    unsigned LS = findLastSet(RImm);
873    if (LS != r-1)
874      continue;
875
876    uint64_t OnesMask = -(int64_t) (UINT64_C(1) << (LS+1));
877    uint64_t RImmWithOnes = RImm | OnesMask;
878
879    RCount = selectI64ImmInstrCountDirect(RImmWithOnes) + 1;
880    Count = std::min(Count, RCount);
881  }
882
883  return Count;
884}
885
886// Select a 64-bit constant. For cost-modeling purposes, selectI64ImmInstrCount
887// (above) needs to be kept in sync with this function.
888static SDNode *selectI64ImmDirect(SelectionDAG *CurDAG, const SDLoc &dl,
889                                  int64_t Imm) {
890  // Assume no remaining bits.
891  unsigned Remainder = 0;
892  // Assume no shift required.
893  unsigned Shift = 0;
894
895  // If it can't be represented as a 32 bit value.
896  if (!isInt<32>(Imm)) {
897    Shift = countTrailingZeros<uint64_t>(Imm);
898    int64_t ImmSh = static_cast<uint64_t>(Imm) >> Shift;
899
900    // If the shifted value fits 32 bits.
901    if (isInt<32>(ImmSh)) {
902      // Go with the shifted value.
903      Imm = ImmSh;
904    } else {
905      // Still stuck with a 64 bit value.
906      Remainder = Imm;
907      Shift = 32;
908      Imm >>= 32;
909    }
910  }
911
912  // Intermediate operand.
913  SDNode *Result;
914
915  // Handle first 32 bits.
916  unsigned Lo = Imm & 0xFFFF;
917  unsigned Hi = (Imm >> 16) & 0xFFFF;
918
919  auto getI32Imm = [CurDAG, dl](unsigned Imm) {
920      return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
921  };
922
923  // Simple value.
924  if (isInt<16>(Imm)) {
925    uint64_t SextImm = SignExtend64(Lo, 16);
926    SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i64);
927    // Just the Lo bits.
928    Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, SDImm);
929  } else if (Lo) {
930    // Handle the Hi bits.
931    unsigned OpC = Hi ? PPC::LIS8 : PPC::LI8;
932    Result = CurDAG->getMachineNode(OpC, dl, MVT::i64, getI32Imm(Hi));
933    // And Lo bits.
934    Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64,
935                                    SDValue(Result, 0), getI32Imm(Lo));
936  } else {
937    // Just the Hi bits.
938    Result = CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64, getI32Imm(Hi));
939  }
940
941  // If no shift, we're done.
942  if (!Shift) return Result;
943
944  // If Hi word == Lo word,
945  // we can use rldimi to insert the Lo word into Hi word.
946  if ((unsigned)(Imm & 0xFFFFFFFF) == Remainder) {
947    SDValue Ops[] =
948      { SDValue(Result, 0), SDValue(Result, 0), getI32Imm(Shift), getI32Imm(0)};
949    return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops);
950  }
951
952  // Shift for next step if the upper 32-bits were not zero.
953  if (Imm) {
954    Result = CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64,
955                                    SDValue(Result, 0),
956                                    getI32Imm(Shift),
957                                    getI32Imm(63 - Shift));
958  }
959
960  // Add in the last bits as required.
961  if ((Hi = (Remainder >> 16) & 0xFFFF)) {
962    Result = CurDAG->getMachineNode(PPC::ORIS8, dl, MVT::i64,
963                                    SDValue(Result, 0), getI32Imm(Hi));
964  }
965  if ((Lo = Remainder & 0xFFFF)) {
966    Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64,
967                                    SDValue(Result, 0), getI32Imm(Lo));
968  }
969
970  return Result;
971}
972
973static SDNode *selectI64Imm(SelectionDAG *CurDAG, const SDLoc &dl,
974                            int64_t Imm) {
975  unsigned Count = selectI64ImmInstrCountDirect(Imm);
976
977  // If the instruction count is 1 or 2, we do not need further analysis
978  // since rotate + load constant requires at least 2 instructions.
979  if (Count <= 2)
980    return selectI64ImmDirect(CurDAG, dl, Imm);
981
982  unsigned RMin = 0;
983
984  int64_t MatImm;
985  unsigned MaskEnd;
986
987  for (unsigned r = 1; r < 63; ++r) {
988    uint64_t RImm = Rot64(Imm, r);
989    unsigned RCount = selectI64ImmInstrCountDirect(RImm) + 1;
990    if (RCount < Count) {
991      Count = RCount;
992      RMin = r;
993      MatImm = RImm;
994      MaskEnd = 63;
995    }
996
997    // If the immediate to generate has many trailing zeros, it might be
998    // worthwhile to generate a rotated value with too many leading ones
999    // (because that's free with li/lis's sign-extension semantics), and then
1000    // mask them off after rotation.
1001
1002    unsigned LS = findLastSet(RImm);
1003    // We're adding (63-LS) higher-order ones, and we expect to mask them off
1004    // after performing the inverse rotation by (64-r). So we need that:
1005    //   63-LS == 64-r => LS == r-1
1006    if (LS != r-1)
1007      continue;
1008
1009    uint64_t OnesMask = -(int64_t) (UINT64_C(1) << (LS+1));
1010    uint64_t RImmWithOnes = RImm | OnesMask;
1011
1012    RCount = selectI64ImmInstrCountDirect(RImmWithOnes) + 1;
1013    if (RCount < Count) {
1014      Count = RCount;
1015      RMin = r;
1016      MatImm = RImmWithOnes;
1017      MaskEnd = LS;
1018    }
1019  }
1020
1021  if (!RMin)
1022    return selectI64ImmDirect(CurDAG, dl, Imm);
1023
1024  auto getI32Imm = [CurDAG, dl](unsigned Imm) {
1025      return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
1026  };
1027
1028  SDValue Val = SDValue(selectI64ImmDirect(CurDAG, dl, MatImm), 0);
1029  return CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64, Val,
1030                                getI32Imm(64 - RMin), getI32Imm(MaskEnd));
1031}
1032
1033static unsigned allUsesTruncate(SelectionDAG *CurDAG, SDNode *N) {
1034  unsigned MaxTruncation = 0;
1035  // Cannot use range-based for loop here as we need the actual use (i.e. we
1036  // need the operand number corresponding to the use). A range-based for
1037  // will unbox the use and provide an SDNode*.
1038  for (SDNode::use_iterator Use = N->use_begin(), UseEnd = N->use_end();
1039       Use != UseEnd; ++Use) {
1040    unsigned Opc =
1041      Use->isMachineOpcode() ? Use->getMachineOpcode() : Use->getOpcode();
1042    switch (Opc) {
1043    default: return 0;
1044    case ISD::TRUNCATE:
1045      if (Use->isMachineOpcode())
1046        return 0;
1047      MaxTruncation =
1048        std::max(MaxTruncation, (unsigned)Use->getValueType(0).getSizeInBits());
1049      continue;
1050    case ISD::STORE: {
1051      if (Use->isMachineOpcode())
1052        return 0;
1053      StoreSDNode *STN = cast<StoreSDNode>(*Use);
1054      unsigned MemVTSize = STN->getMemoryVT().getSizeInBits();
1055      if (MemVTSize == 64 || Use.getOperandNo() != 0)
1056        return 0;
1057      MaxTruncation = std::max(MaxTruncation, MemVTSize);
1058      continue;
1059    }
1060    case PPC::STW8:
1061    case PPC::STWX8:
1062    case PPC::STWU8:
1063    case PPC::STWUX8:
1064      if (Use.getOperandNo() != 0)
1065        return 0;
1066      MaxTruncation = std::max(MaxTruncation, 32u);
1067      continue;
1068    case PPC::STH8:
1069    case PPC::STHX8:
1070    case PPC::STHU8:
1071    case PPC::STHUX8:
1072      if (Use.getOperandNo() != 0)
1073        return 0;
1074      MaxTruncation = std::max(MaxTruncation, 16u);
1075      continue;
1076    case PPC::STB8:
1077    case PPC::STBX8:
1078    case PPC::STBU8:
1079    case PPC::STBUX8:
1080      if (Use.getOperandNo() != 0)
1081        return 0;
1082      MaxTruncation = std::max(MaxTruncation, 8u);
1083      continue;
1084    }
1085  }
1086  return MaxTruncation;
1087}
1088
1089// Select a 64-bit constant.
1090static SDNode *selectI64Imm(SelectionDAG *CurDAG, SDNode *N) {
1091  SDLoc dl(N);
1092
1093  // Get 64 bit value.
1094  int64_t Imm = cast<ConstantSDNode>(N)->getZExtValue();
1095  if (unsigned MinSize = allUsesTruncate(CurDAG, N)) {
1096    uint64_t SextImm = SignExtend64(Imm, MinSize);
1097    SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i64);
1098    if (isInt<16>(SextImm))
1099      return CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, SDImm);
1100  }
1101  return selectI64Imm(CurDAG, dl, Imm);
1102}
1103
1104namespace {
1105
1106class BitPermutationSelector {
1107  struct ValueBit {
1108    SDValue V;
1109
1110    // The bit number in the value, using a convention where bit 0 is the
1111    // lowest-order bit.
1112    unsigned Idx;
1113
1114    // ConstZero means a bit we need to mask off.
1115    // Variable is a bit comes from an input variable.
1116    // VariableKnownToBeZero is also a bit comes from an input variable,
1117    // but it is known to be already zero. So we do not need to mask them.
1118    enum Kind {
1119      ConstZero,
1120      Variable,
1121      VariableKnownToBeZero
1122    } K;
1123
1124    ValueBit(SDValue V, unsigned I, Kind K = Variable)
1125      : V(V), Idx(I), K(K) {}
1126    ValueBit(Kind K = Variable)
1127      : V(SDValue(nullptr, 0)), Idx(UINT32_MAX), K(K) {}
1128
1129    bool isZero() const {
1130      return K == ConstZero || K == VariableKnownToBeZero;
1131    }
1132
1133    bool hasValue() const {
1134      return K == Variable || K == VariableKnownToBeZero;
1135    }
1136
1137    SDValue getValue() const {
1138      assert(hasValue() && "Cannot get the value of a constant bit");
1139      return V;
1140    }
1141
1142    unsigned getValueBitIndex() const {
1143      assert(hasValue() && "Cannot get the value bit index of a constant bit");
1144      return Idx;
1145    }
1146  };
1147
1148  // A bit group has the same underlying value and the same rotate factor.
1149  struct BitGroup {
1150    SDValue V;
1151    unsigned RLAmt;
1152    unsigned StartIdx, EndIdx;
1153
1154    // This rotation amount assumes that the lower 32 bits of the quantity are
1155    // replicated in the high 32 bits by the rotation operator (which is done
1156    // by rlwinm and friends in 64-bit mode).
1157    bool Repl32;
1158    // Did converting to Repl32 == true change the rotation factor? If it did,
1159    // it decreased it by 32.
1160    bool Repl32CR;
1161    // Was this group coalesced after setting Repl32 to true?
1162    bool Repl32Coalesced;
1163
1164    BitGroup(SDValue V, unsigned R, unsigned S, unsigned E)
1165      : V(V), RLAmt(R), StartIdx(S), EndIdx(E), Repl32(false), Repl32CR(false),
1166        Repl32Coalesced(false) {
1167      LLVM_DEBUG(dbgs() << "\tbit group for " << V.getNode() << " RLAmt = " << R
1168                        << " [" << S << ", " << E << "]\n");
1169    }
1170  };
1171
1172  // Information on each (Value, RLAmt) pair (like the number of groups
1173  // associated with each) used to choose the lowering method.
1174  struct ValueRotInfo {
1175    SDValue V;
1176    unsigned RLAmt = std::numeric_limits<unsigned>::max();
1177    unsigned NumGroups = 0;
1178    unsigned FirstGroupStartIdx = std::numeric_limits<unsigned>::max();
1179    bool Repl32 = false;
1180
1181    ValueRotInfo() = default;
1182
1183    // For sorting (in reverse order) by NumGroups, and then by
1184    // FirstGroupStartIdx.
1185    bool operator < (const ValueRotInfo &Other) const {
1186      // We need to sort so that the non-Repl32 come first because, when we're
1187      // doing masking, the Repl32 bit groups might be subsumed into the 64-bit
1188      // masking operation.
1189      if (Repl32 < Other.Repl32)
1190        return true;
1191      else if (Repl32 > Other.Repl32)
1192        return false;
1193      else if (NumGroups > Other.NumGroups)
1194        return true;
1195      else if (NumGroups < Other.NumGroups)
1196        return false;
1197      else if (RLAmt == 0 && Other.RLAmt != 0)
1198        return true;
1199      else if (RLAmt != 0 && Other.RLAmt == 0)
1200        return false;
1201      else if (FirstGroupStartIdx < Other.FirstGroupStartIdx)
1202        return true;
1203      return false;
1204    }
1205  };
1206
1207  using ValueBitsMemoizedValue = std::pair<bool, SmallVector<ValueBit, 64>>;
1208  using ValueBitsMemoizer =
1209      DenseMap<SDValue, std::unique_ptr<ValueBitsMemoizedValue>>;
1210  ValueBitsMemoizer Memoizer;
1211
1212  // Return a pair of bool and a SmallVector pointer to a memoization entry.
1213  // The bool is true if something interesting was deduced, otherwise if we're
1214  // providing only a generic representation of V (or something else likewise
1215  // uninteresting for instruction selection) through the SmallVector.
1216  std::pair<bool, SmallVector<ValueBit, 64> *> getValueBits(SDValue V,
1217                                                            unsigned NumBits) {
1218    auto &ValueEntry = Memoizer[V];
1219    if (ValueEntry)
1220      return std::make_pair(ValueEntry->first, &ValueEntry->second);
1221    ValueEntry.reset(new ValueBitsMemoizedValue());
1222    bool &Interesting = ValueEntry->first;
1223    SmallVector<ValueBit, 64> &Bits = ValueEntry->second;
1224    Bits.resize(NumBits);
1225
1226    switch (V.getOpcode()) {
1227    default: break;
1228    case ISD::ROTL:
1229      if (isa<ConstantSDNode>(V.getOperand(1))) {
1230        unsigned RotAmt = V.getConstantOperandVal(1);
1231
1232        const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
1233
1234        for (unsigned i = 0; i < NumBits; ++i)
1235          Bits[i] = LHSBits[i < RotAmt ? i + (NumBits - RotAmt) : i - RotAmt];
1236
1237        return std::make_pair(Interesting = true, &Bits);
1238      }
1239      break;
1240    case ISD::SHL:
1241      if (isa<ConstantSDNode>(V.getOperand(1))) {
1242        unsigned ShiftAmt = V.getConstantOperandVal(1);
1243
1244        const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
1245
1246        for (unsigned i = ShiftAmt; i < NumBits; ++i)
1247          Bits[i] = LHSBits[i - ShiftAmt];
1248
1249        for (unsigned i = 0; i < ShiftAmt; ++i)
1250          Bits[i] = ValueBit(ValueBit::ConstZero);
1251
1252        return std::make_pair(Interesting = true, &Bits);
1253      }
1254      break;
1255    case ISD::SRL:
1256      if (isa<ConstantSDNode>(V.getOperand(1))) {
1257        unsigned ShiftAmt = V.getConstantOperandVal(1);
1258
1259        const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
1260
1261        for (unsigned i = 0; i < NumBits - ShiftAmt; ++i)
1262          Bits[i] = LHSBits[i + ShiftAmt];
1263
1264        for (unsigned i = NumBits - ShiftAmt; i < NumBits; ++i)
1265          Bits[i] = ValueBit(ValueBit::ConstZero);
1266
1267        return std::make_pair(Interesting = true, &Bits);
1268      }
1269      break;
1270    case ISD::AND:
1271      if (isa<ConstantSDNode>(V.getOperand(1))) {
1272        uint64_t Mask = V.getConstantOperandVal(1);
1273
1274        const SmallVector<ValueBit, 64> *LHSBits;
1275        // Mark this as interesting, only if the LHS was also interesting. This
1276        // prevents the overall procedure from matching a single immediate 'and'
1277        // (which is non-optimal because such an and might be folded with other
1278        // things if we don't select it here).
1279        std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0), NumBits);
1280
1281        for (unsigned i = 0; i < NumBits; ++i)
1282          if (((Mask >> i) & 1) == 1)
1283            Bits[i] = (*LHSBits)[i];
1284          else {
1285            // AND instruction masks this bit. If the input is already zero,
1286            // we have nothing to do here. Otherwise, make the bit ConstZero.
1287            if ((*LHSBits)[i].isZero())
1288              Bits[i] = (*LHSBits)[i];
1289            else
1290              Bits[i] = ValueBit(ValueBit::ConstZero);
1291          }
1292
1293        return std::make_pair(Interesting, &Bits);
1294      }
1295      break;
1296    case ISD::OR: {
1297      const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
1298      const auto &RHSBits = *getValueBits(V.getOperand(1), NumBits).second;
1299
1300      bool AllDisjoint = true;
1301      SDValue LastVal = SDValue();
1302      unsigned LastIdx = 0;
1303      for (unsigned i = 0; i < NumBits; ++i) {
1304        if (LHSBits[i].isZero() && RHSBits[i].isZero()) {
1305          // If both inputs are known to be zero and one is ConstZero and
1306          // another is VariableKnownToBeZero, we can select whichever
1307          // we like. To minimize the number of bit groups, we select
1308          // VariableKnownToBeZero if this bit is the next bit of the same
1309          // input variable from the previous bit. Otherwise, we select
1310          // ConstZero.
1311          if (LHSBits[i].hasValue() && LHSBits[i].getValue() == LastVal &&
1312              LHSBits[i].getValueBitIndex() == LastIdx + 1)
1313            Bits[i] = LHSBits[i];
1314          else if (RHSBits[i].hasValue() && RHSBits[i].getValue() == LastVal &&
1315                   RHSBits[i].getValueBitIndex() == LastIdx + 1)
1316            Bits[i] = RHSBits[i];
1317          else
1318            Bits[i] = ValueBit(ValueBit::ConstZero);
1319        }
1320        else if (LHSBits[i].isZero())
1321          Bits[i] = RHSBits[i];
1322        else if (RHSBits[i].isZero())
1323          Bits[i] = LHSBits[i];
1324        else {
1325          AllDisjoint = false;
1326          break;
1327        }
1328        // We remember the value and bit index of this bit.
1329        if (Bits[i].hasValue()) {
1330          LastVal = Bits[i].getValue();
1331          LastIdx = Bits[i].getValueBitIndex();
1332        }
1333        else {
1334          if (LastVal) LastVal = SDValue();
1335          LastIdx = 0;
1336        }
1337      }
1338
1339      if (!AllDisjoint)
1340        break;
1341
1342      return std::make_pair(Interesting = true, &Bits);
1343    }
1344    case ISD::ZERO_EXTEND: {
1345      // We support only the case with zero extension from i32 to i64 so far.
1346      if (V.getValueType() != MVT::i64 ||
1347          V.getOperand(0).getValueType() != MVT::i32)
1348        break;
1349
1350      const SmallVector<ValueBit, 64> *LHSBits;
1351      const unsigned NumOperandBits = 32;
1352      std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0),
1353                                                    NumOperandBits);
1354
1355      for (unsigned i = 0; i < NumOperandBits; ++i)
1356        Bits[i] = (*LHSBits)[i];
1357
1358      for (unsigned i = NumOperandBits; i < NumBits; ++i)
1359        Bits[i] = ValueBit(ValueBit::ConstZero);
1360
1361      return std::make_pair(Interesting, &Bits);
1362    }
1363    case ISD::TRUNCATE: {
1364      EVT FromType = V.getOperand(0).getValueType();
1365      EVT ToType = V.getValueType();
1366      // We support only the case with truncate from i64 to i32.
1367      if (FromType != MVT::i64 || ToType != MVT::i32)
1368        break;
1369      const unsigned NumAllBits = FromType.getSizeInBits();
1370      SmallVector<ValueBit, 64> *InBits;
1371      std::tie(Interesting, InBits) = getValueBits(V.getOperand(0),
1372                                                    NumAllBits);
1373      const unsigned NumValidBits = ToType.getSizeInBits();
1374
1375      // A 32-bit instruction cannot touch upper 32-bit part of 64-bit value.
1376      // So, we cannot include this truncate.
1377      bool UseUpper32bit = false;
1378      for (unsigned i = 0; i < NumValidBits; ++i)
1379        if ((*InBits)[i].hasValue() && (*InBits)[i].getValueBitIndex() >= 32) {
1380          UseUpper32bit = true;
1381          break;
1382        }
1383      if (UseUpper32bit)
1384        break;
1385
1386      for (unsigned i = 0; i < NumValidBits; ++i)
1387        Bits[i] = (*InBits)[i];
1388
1389      return std::make_pair(Interesting, &Bits);
1390    }
1391    case ISD::AssertZext: {
1392      // For AssertZext, we look through the operand and
1393      // mark the bits known to be zero.
1394      const SmallVector<ValueBit, 64> *LHSBits;
1395      std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0),
1396                                                    NumBits);
1397
1398      EVT FromType = cast<VTSDNode>(V.getOperand(1))->getVT();
1399      const unsigned NumValidBits = FromType.getSizeInBits();
1400      for (unsigned i = 0; i < NumValidBits; ++i)
1401        Bits[i] = (*LHSBits)[i];
1402
1403      // These bits are known to be zero but the AssertZext may be from a value
1404      // that already has some constant zero bits (i.e. from a masking and).
1405      for (unsigned i = NumValidBits; i < NumBits; ++i)
1406        Bits[i] = (*LHSBits)[i].hasValue()
1407                      ? ValueBit((*LHSBits)[i].getValue(),
1408                                 (*LHSBits)[i].getValueBitIndex(),
1409                                 ValueBit::VariableKnownToBeZero)
1410                      : ValueBit(ValueBit::ConstZero);
1411
1412      return std::make_pair(Interesting, &Bits);
1413    }
1414    case ISD::LOAD:
1415      LoadSDNode *LD = cast<LoadSDNode>(V);
1416      if (ISD::isZEXTLoad(V.getNode()) && V.getResNo() == 0) {
1417        EVT VT = LD->getMemoryVT();
1418        const unsigned NumValidBits = VT.getSizeInBits();
1419
1420        for (unsigned i = 0; i < NumValidBits; ++i)
1421          Bits[i] = ValueBit(V, i);
1422
1423        // These bits are known to be zero.
1424        for (unsigned i = NumValidBits; i < NumBits; ++i)
1425          Bits[i] = ValueBit(V, i, ValueBit::VariableKnownToBeZero);
1426
1427        // Zero-extending load itself cannot be optimized. So, it is not
1428        // interesting by itself though it gives useful information.
1429        return std::make_pair(Interesting = false, &Bits);
1430      }
1431      break;
1432    }
1433
1434    for (unsigned i = 0; i < NumBits; ++i)
1435      Bits[i] = ValueBit(V, i);
1436
1437    return std::make_pair(Interesting = false, &Bits);
1438  }
1439
1440  // For each value (except the constant ones), compute the left-rotate amount
1441  // to get it from its original to final position.
1442  void computeRotationAmounts() {
1443    NeedMask = false;
1444    RLAmt.resize(Bits.size());
1445    for (unsigned i = 0; i < Bits.size(); ++i)
1446      if (Bits[i].hasValue()) {
1447        unsigned VBI = Bits[i].getValueBitIndex();
1448        if (i >= VBI)
1449          RLAmt[i] = i - VBI;
1450        else
1451          RLAmt[i] = Bits.size() - (VBI - i);
1452      } else if (Bits[i].isZero()) {
1453        NeedMask = true;
1454        RLAmt[i] = UINT32_MAX;
1455      } else {
1456        llvm_unreachable("Unknown value bit type");
1457      }
1458  }
1459
1460  // Collect groups of consecutive bits with the same underlying value and
1461  // rotation factor. If we're doing late masking, we ignore zeros, otherwise
1462  // they break up groups.
1463  void collectBitGroups(bool LateMask) {
1464    BitGroups.clear();
1465
1466    unsigned LastRLAmt = RLAmt[0];
1467    SDValue LastValue = Bits[0].hasValue() ? Bits[0].getValue() : SDValue();
1468    unsigned LastGroupStartIdx = 0;
1469    bool IsGroupOfZeros = !Bits[LastGroupStartIdx].hasValue();
1470    for (unsigned i = 1; i < Bits.size(); ++i) {
1471      unsigned ThisRLAmt = RLAmt[i];
1472      SDValue ThisValue = Bits[i].hasValue() ? Bits[i].getValue() : SDValue();
1473      if (LateMask && !ThisValue) {
1474        ThisValue = LastValue;
1475        ThisRLAmt = LastRLAmt;
1476        // If we're doing late masking, then the first bit group always starts
1477        // at zero (even if the first bits were zero).
1478        if (BitGroups.empty())
1479          LastGroupStartIdx = 0;
1480      }
1481
1482      // If this bit is known to be zero and the current group is a bit group
1483      // of zeros, we do not need to terminate the current bit group even the
1484      // Value or RLAmt does not match here. Instead, we terminate this group
1485      // when the first non-zero bit appears later.
1486      if (IsGroupOfZeros && Bits[i].isZero())
1487        continue;
1488
1489      // If this bit has the same underlying value and the same rotate factor as
1490      // the last one, then they're part of the same group.
1491      if (ThisRLAmt == LastRLAmt && ThisValue == LastValue)
1492        // We cannot continue the current group if this bits is not known to
1493        // be zero in a bit group of zeros.
1494        if (!(IsGroupOfZeros && ThisValue && !Bits[i].isZero()))
1495          continue;
1496
1497      if (LastValue.getNode())
1498        BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx,
1499                                     i-1));
1500      LastRLAmt = ThisRLAmt;
1501      LastValue = ThisValue;
1502      LastGroupStartIdx = i;
1503      IsGroupOfZeros = !Bits[LastGroupStartIdx].hasValue();
1504    }
1505    if (LastValue.getNode())
1506      BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx,
1507                                   Bits.size()-1));
1508
1509    if (BitGroups.empty())
1510      return;
1511
1512    // We might be able to combine the first and last groups.
1513    if (BitGroups.size() > 1) {
1514      // If the first and last groups are the same, then remove the first group
1515      // in favor of the last group, making the ending index of the last group
1516      // equal to the ending index of the to-be-removed first group.
1517      if (BitGroups[0].StartIdx == 0 &&
1518          BitGroups[BitGroups.size()-1].EndIdx == Bits.size()-1 &&
1519          BitGroups[0].V == BitGroups[BitGroups.size()-1].V &&
1520          BitGroups[0].RLAmt == BitGroups[BitGroups.size()-1].RLAmt) {
1521        LLVM_DEBUG(dbgs() << "\tcombining final bit group with initial one\n");
1522        BitGroups[BitGroups.size()-1].EndIdx = BitGroups[0].EndIdx;
1523        BitGroups.erase(BitGroups.begin());
1524      }
1525    }
1526  }
1527
1528  // Take all (SDValue, RLAmt) pairs and sort them by the number of groups
1529  // associated with each. If the number of groups are same, we prefer a group
1530  // which does not require rotate, i.e. RLAmt is 0, to avoid the first rotate
1531  // instruction. If there is a degeneracy, pick the one that occurs
1532  // first (in the final value).
1533  void collectValueRotInfo() {
1534    ValueRots.clear();
1535
1536    for (auto &BG : BitGroups) {
1537      unsigned RLAmtKey = BG.RLAmt + (BG.Repl32 ? 64 : 0);
1538      ValueRotInfo &VRI = ValueRots[std::make_pair(BG.V, RLAmtKey)];
1539      VRI.V = BG.V;
1540      VRI.RLAmt = BG.RLAmt;
1541      VRI.Repl32 = BG.Repl32;
1542      VRI.NumGroups += 1;
1543      VRI.FirstGroupStartIdx = std::min(VRI.FirstGroupStartIdx, BG.StartIdx);
1544    }
1545
1546    // Now that we've collected the various ValueRotInfo instances, we need to
1547    // sort them.
1548    ValueRotsVec.clear();
1549    for (auto &I : ValueRots) {
1550      ValueRotsVec.push_back(I.second);
1551    }
1552    llvm::sort(ValueRotsVec);
1553  }
1554
1555  // In 64-bit mode, rlwinm and friends have a rotation operator that
1556  // replicates the low-order 32 bits into the high-order 32-bits. The mask
1557  // indices of these instructions can only be in the lower 32 bits, so they
1558  // can only represent some 64-bit bit groups. However, when they can be used,
1559  // the 32-bit replication can be used to represent, as a single bit group,
1560  // otherwise separate bit groups. We'll convert to replicated-32-bit bit
1561  // groups when possible. Returns true if any of the bit groups were
1562  // converted.
1563  void assignRepl32BitGroups() {
1564    // If we have bits like this:
1565    //
1566    // Indices:    15 14 13 12 11 10 9 8  7  6  5  4  3  2  1  0
1567    // V bits: ... 7  6  5  4  3  2  1 0 31 30 29 28 27 26 25 24
1568    // Groups:    |      RLAmt = 8      |      RLAmt = 40       |
1569    //
1570    // But, making use of a 32-bit operation that replicates the low-order 32
1571    // bits into the high-order 32 bits, this can be one bit group with a RLAmt
1572    // of 8.
1573
1574    auto IsAllLow32 = [this](BitGroup & BG) {
1575      if (BG.StartIdx <= BG.EndIdx) {
1576        for (unsigned i = BG.StartIdx; i <= BG.EndIdx; ++i) {
1577          if (!Bits[i].hasValue())
1578            continue;
1579          if (Bits[i].getValueBitIndex() >= 32)
1580            return false;
1581        }
1582      } else {
1583        for (unsigned i = BG.StartIdx; i < Bits.size(); ++i) {
1584          if (!Bits[i].hasValue())
1585            continue;
1586          if (Bits[i].getValueBitIndex() >= 32)
1587            return false;
1588        }
1589        for (unsigned i = 0; i <= BG.EndIdx; ++i) {
1590          if (!Bits[i].hasValue())
1591            continue;
1592          if (Bits[i].getValueBitIndex() >= 32)
1593            return false;
1594        }
1595      }
1596
1597      return true;
1598    };
1599
1600    for (auto &BG : BitGroups) {
1601      // If this bit group has RLAmt of 0 and will not be merged with
1602      // another bit group, we don't benefit from Repl32. We don't mark
1603      // such group to give more freedom for later instruction selection.
1604      if (BG.RLAmt == 0) {
1605        auto PotentiallyMerged = [this](BitGroup & BG) {
1606          for (auto &BG2 : BitGroups)
1607            if (&BG != &BG2 && BG.V == BG2.V &&
1608                (BG2.RLAmt == 0 || BG2.RLAmt == 32))
1609              return true;
1610          return false;
1611        };
1612        if (!PotentiallyMerged(BG))
1613          continue;
1614      }
1615      if (BG.StartIdx < 32 && BG.EndIdx < 32) {
1616        if (IsAllLow32(BG)) {
1617          if (BG.RLAmt >= 32) {
1618            BG.RLAmt -= 32;
1619            BG.Repl32CR = true;
1620          }
1621
1622          BG.Repl32 = true;
1623
1624          LLVM_DEBUG(dbgs() << "\t32-bit replicated bit group for "
1625                            << BG.V.getNode() << " RLAmt = " << BG.RLAmt << " ["
1626                            << BG.StartIdx << ", " << BG.EndIdx << "]\n");
1627        }
1628      }
1629    }
1630
1631    // Now walk through the bit groups, consolidating where possible.
1632    for (auto I = BitGroups.begin(); I != BitGroups.end();) {
1633      // We might want to remove this bit group by merging it with the previous
1634      // group (which might be the ending group).
1635      auto IP = (I == BitGroups.begin()) ?
1636                std::prev(BitGroups.end()) : std::prev(I);
1637      if (I->Repl32 && IP->Repl32 && I->V == IP->V && I->RLAmt == IP->RLAmt &&
1638          I->StartIdx == (IP->EndIdx + 1) % 64 && I != IP) {
1639
1640        LLVM_DEBUG(dbgs() << "\tcombining 32-bit replicated bit group for "
1641                          << I->V.getNode() << " RLAmt = " << I->RLAmt << " ["
1642                          << I->StartIdx << ", " << I->EndIdx
1643                          << "] with group with range [" << IP->StartIdx << ", "
1644                          << IP->EndIdx << "]\n");
1645
1646        IP->EndIdx = I->EndIdx;
1647        IP->Repl32CR = IP->Repl32CR || I->Repl32CR;
1648        IP->Repl32Coalesced = true;
1649        I = BitGroups.erase(I);
1650        continue;
1651      } else {
1652        // There is a special case worth handling: If there is a single group
1653        // covering the entire upper 32 bits, and it can be merged with both
1654        // the next and previous groups (which might be the same group), then
1655        // do so. If it is the same group (so there will be only one group in
1656        // total), then we need to reverse the order of the range so that it
1657        // covers the entire 64 bits.
1658        if (I->StartIdx == 32 && I->EndIdx == 63) {
1659          assert(std::next(I) == BitGroups.end() &&
1660                 "bit group ends at index 63 but there is another?");
1661          auto IN = BitGroups.begin();
1662
1663          if (IP->Repl32 && IN->Repl32 && I->V == IP->V && I->V == IN->V &&
1664              (I->RLAmt % 32) == IP->RLAmt && (I->RLAmt % 32) == IN->RLAmt &&
1665              IP->EndIdx == 31 && IN->StartIdx == 0 && I != IP &&
1666              IsAllLow32(*I)) {
1667
1668            LLVM_DEBUG(dbgs() << "\tcombining bit group for " << I->V.getNode()
1669                              << " RLAmt = " << I->RLAmt << " [" << I->StartIdx
1670                              << ", " << I->EndIdx
1671                              << "] with 32-bit replicated groups with ranges ["
1672                              << IP->StartIdx << ", " << IP->EndIdx << "] and ["
1673                              << IN->StartIdx << ", " << IN->EndIdx << "]\n");
1674
1675            if (IP == IN) {
1676              // There is only one other group; change it to cover the whole
1677              // range (backward, so that it can still be Repl32 but cover the
1678              // whole 64-bit range).
1679              IP->StartIdx = 31;
1680              IP->EndIdx = 30;
1681              IP->Repl32CR = IP->Repl32CR || I->RLAmt >= 32;
1682              IP->Repl32Coalesced = true;
1683              I = BitGroups.erase(I);
1684            } else {
1685              // There are two separate groups, one before this group and one
1686              // after us (at the beginning). We're going to remove this group,
1687              // but also the group at the very beginning.
1688              IP->EndIdx = IN->EndIdx;
1689              IP->Repl32CR = IP->Repl32CR || IN->Repl32CR || I->RLAmt >= 32;
1690              IP->Repl32Coalesced = true;
1691              I = BitGroups.erase(I);
1692              BitGroups.erase(BitGroups.begin());
1693            }
1694
1695            // This must be the last group in the vector (and we might have
1696            // just invalidated the iterator above), so break here.
1697            break;
1698          }
1699        }
1700      }
1701
1702      ++I;
1703    }
1704  }
1705
1706  SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
1707    return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
1708  }
1709
1710  uint64_t getZerosMask() {
1711    uint64_t Mask = 0;
1712    for (unsigned i = 0; i < Bits.size(); ++i) {
1713      if (Bits[i].hasValue())
1714        continue;
1715      Mask |= (UINT64_C(1) << i);
1716    }
1717
1718    return ~Mask;
1719  }
1720
1721  // This method extends an input value to 64 bit if input is 32-bit integer.
1722  // While selecting instructions in BitPermutationSelector in 64-bit mode,
1723  // an input value can be a 32-bit integer if a ZERO_EXTEND node is included.
1724  // In such case, we extend it to 64 bit to be consistent with other values.
1725  SDValue ExtendToInt64(SDValue V, const SDLoc &dl) {
1726    if (V.getValueSizeInBits() == 64)
1727      return V;
1728
1729    assert(V.getValueSizeInBits() == 32);
1730    SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
1731    SDValue ImDef = SDValue(CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl,
1732                                                   MVT::i64), 0);
1733    SDValue ExtVal = SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl,
1734                                                    MVT::i64, ImDef, V,
1735                                                    SubRegIdx), 0);
1736    return ExtVal;
1737  }
1738
1739  SDValue TruncateToInt32(SDValue V, const SDLoc &dl) {
1740    if (V.getValueSizeInBits() == 32)
1741      return V;
1742
1743    assert(V.getValueSizeInBits() == 64);
1744    SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
1745    SDValue SubVal = SDValue(CurDAG->getMachineNode(PPC::EXTRACT_SUBREG, dl,
1746                                                    MVT::i32, V, SubRegIdx), 0);
1747    return SubVal;
1748  }
1749
1750  // Depending on the number of groups for a particular value, it might be
1751  // better to rotate, mask explicitly (using andi/andis), and then or the
1752  // result. Select this part of the result first.
1753  void SelectAndParts32(const SDLoc &dl, SDValue &Res, unsigned *InstCnt) {
1754    if (BPermRewriterNoMasking)
1755      return;
1756
1757    for (ValueRotInfo &VRI : ValueRotsVec) {
1758      unsigned Mask = 0;
1759      for (unsigned i = 0; i < Bits.size(); ++i) {
1760        if (!Bits[i].hasValue() || Bits[i].getValue() != VRI.V)
1761          continue;
1762        if (RLAmt[i] != VRI.RLAmt)
1763          continue;
1764        Mask |= (1u << i);
1765      }
1766
1767      // Compute the masks for andi/andis that would be necessary.
1768      unsigned ANDIMask = (Mask & UINT16_MAX), ANDISMask = Mask >> 16;
1769      assert((ANDIMask != 0 || ANDISMask != 0) &&
1770             "No set bits in mask for value bit groups");
1771      bool NeedsRotate = VRI.RLAmt != 0;
1772
1773      // We're trying to minimize the number of instructions. If we have one
1774      // group, using one of andi/andis can break even.  If we have three
1775      // groups, we can use both andi and andis and break even (to use both
1776      // andi and andis we also need to or the results together). We need four
1777      // groups if we also need to rotate. To use andi/andis we need to do more
1778      // than break even because rotate-and-mask instructions tend to be easier
1779      // to schedule.
1780
1781      // FIXME: We've biased here against using andi/andis, which is right for
1782      // POWER cores, but not optimal everywhere. For example, on the A2,
1783      // andi/andis have single-cycle latency whereas the rotate-and-mask
1784      // instructions take two cycles, and it would be better to bias toward
1785      // andi/andis in break-even cases.
1786
1787      unsigned NumAndInsts = (unsigned) NeedsRotate +
1788                             (unsigned) (ANDIMask != 0) +
1789                             (unsigned) (ANDISMask != 0) +
1790                             (unsigned) (ANDIMask != 0 && ANDISMask != 0) +
1791                             (unsigned) (bool) Res;
1792
1793      LLVM_DEBUG(dbgs() << "\t\trotation groups for " << VRI.V.getNode()
1794                        << " RL: " << VRI.RLAmt << ":"
1795                        << "\n\t\t\tisel using masking: " << NumAndInsts
1796                        << " using rotates: " << VRI.NumGroups << "\n");
1797
1798      if (NumAndInsts >= VRI.NumGroups)
1799        continue;
1800
1801      LLVM_DEBUG(dbgs() << "\t\t\t\tusing masking\n");
1802
1803      if (InstCnt) *InstCnt += NumAndInsts;
1804
1805      SDValue VRot;
1806      if (VRI.RLAmt) {
1807        SDValue Ops[] =
1808          { TruncateToInt32(VRI.V, dl), getI32Imm(VRI.RLAmt, dl),
1809            getI32Imm(0, dl), getI32Imm(31, dl) };
1810        VRot = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
1811                                              Ops), 0);
1812      } else {
1813        VRot = TruncateToInt32(VRI.V, dl);
1814      }
1815
1816      SDValue ANDIVal, ANDISVal;
1817      if (ANDIMask != 0)
1818        ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI_rec, dl, MVT::i32,
1819                                                 VRot, getI32Imm(ANDIMask, dl)),
1820                          0);
1821      if (ANDISMask != 0)
1822        ANDISVal =
1823            SDValue(CurDAG->getMachineNode(PPC::ANDIS_rec, dl, MVT::i32, VRot,
1824                                           getI32Imm(ANDISMask, dl)),
1825                    0);
1826
1827      SDValue TotalVal;
1828      if (!ANDIVal)
1829        TotalVal = ANDISVal;
1830      else if (!ANDISVal)
1831        TotalVal = ANDIVal;
1832      else
1833        TotalVal = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32,
1834                             ANDIVal, ANDISVal), 0);
1835
1836      if (!Res)
1837        Res = TotalVal;
1838      else
1839        Res = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32,
1840                        Res, TotalVal), 0);
1841
1842      // Now, remove all groups with this underlying value and rotation
1843      // factor.
1844      eraseMatchingBitGroups([VRI](const BitGroup &BG) {
1845        return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt;
1846      });
1847    }
1848  }
1849
1850  // Instruction selection for the 32-bit case.
1851  SDNode *Select32(SDNode *N, bool LateMask, unsigned *InstCnt) {
1852    SDLoc dl(N);
1853    SDValue Res;
1854
1855    if (InstCnt) *InstCnt = 0;
1856
1857    // Take care of cases that should use andi/andis first.
1858    SelectAndParts32(dl, Res, InstCnt);
1859
1860    // If we've not yet selected a 'starting' instruction, and we have no zeros
1861    // to fill in, select the (Value, RLAmt) with the highest priority (largest
1862    // number of groups), and start with this rotated value.
1863    if ((!NeedMask || LateMask) && !Res) {
1864      ValueRotInfo &VRI = ValueRotsVec[0];
1865      if (VRI.RLAmt) {
1866        if (InstCnt) *InstCnt += 1;
1867        SDValue Ops[] =
1868          { TruncateToInt32(VRI.V, dl), getI32Imm(VRI.RLAmt, dl),
1869            getI32Imm(0, dl), getI32Imm(31, dl) };
1870        Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops),
1871                      0);
1872      } else {
1873        Res = TruncateToInt32(VRI.V, dl);
1874      }
1875
1876      // Now, remove all groups with this underlying value and rotation factor.
1877      eraseMatchingBitGroups([VRI](const BitGroup &BG) {
1878        return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt;
1879      });
1880    }
1881
1882    if (InstCnt) *InstCnt += BitGroups.size();
1883
1884    // Insert the other groups (one at a time).
1885    for (auto &BG : BitGroups) {
1886      if (!Res) {
1887        SDValue Ops[] =
1888          { TruncateToInt32(BG.V, dl), getI32Imm(BG.RLAmt, dl),
1889            getI32Imm(Bits.size() - BG.EndIdx - 1, dl),
1890            getI32Imm(Bits.size() - BG.StartIdx - 1, dl) };
1891        Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
1892      } else {
1893        SDValue Ops[] =
1894          { Res, TruncateToInt32(BG.V, dl), getI32Imm(BG.RLAmt, dl),
1895              getI32Imm(Bits.size() - BG.EndIdx - 1, dl),
1896            getI32Imm(Bits.size() - BG.StartIdx - 1, dl) };
1897        Res = SDValue(CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops), 0);
1898      }
1899    }
1900
1901    if (LateMask) {
1902      unsigned Mask = (unsigned) getZerosMask();
1903
1904      unsigned ANDIMask = (Mask & UINT16_MAX), ANDISMask = Mask >> 16;
1905      assert((ANDIMask != 0 || ANDISMask != 0) &&
1906             "No set bits in zeros mask?");
1907
1908      if (InstCnt) *InstCnt += (unsigned) (ANDIMask != 0) +
1909                               (unsigned) (ANDISMask != 0) +
1910                               (unsigned) (ANDIMask != 0 && ANDISMask != 0);
1911
1912      SDValue ANDIVal, ANDISVal;
1913      if (ANDIMask != 0)
1914        ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI_rec, dl, MVT::i32,
1915                                                 Res, getI32Imm(ANDIMask, dl)),
1916                          0);
1917      if (ANDISMask != 0)
1918        ANDISVal =
1919            SDValue(CurDAG->getMachineNode(PPC::ANDIS_rec, dl, MVT::i32, Res,
1920                                           getI32Imm(ANDISMask, dl)),
1921                    0);
1922
1923      if (!ANDIVal)
1924        Res = ANDISVal;
1925      else if (!ANDISVal)
1926        Res = ANDIVal;
1927      else
1928        Res = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32,
1929                        ANDIVal, ANDISVal), 0);
1930    }
1931
1932    return Res.getNode();
1933  }
1934
1935  unsigned SelectRotMask64Count(unsigned RLAmt, bool Repl32,
1936                                unsigned MaskStart, unsigned MaskEnd,
1937                                bool IsIns) {
1938    // In the notation used by the instructions, 'start' and 'end' are reversed
1939    // because bits are counted from high to low order.
1940    unsigned InstMaskStart = 64 - MaskEnd - 1,
1941             InstMaskEnd   = 64 - MaskStart - 1;
1942
1943    if (Repl32)
1944      return 1;
1945
1946    if ((!IsIns && (InstMaskEnd == 63 || InstMaskStart == 0)) ||
1947        InstMaskEnd == 63 - RLAmt)
1948      return 1;
1949
1950    return 2;
1951  }
1952
1953  // For 64-bit values, not all combinations of rotates and masks are
1954  // available. Produce one if it is available.
1955  SDValue SelectRotMask64(SDValue V, const SDLoc &dl, unsigned RLAmt,
1956                          bool Repl32, unsigned MaskStart, unsigned MaskEnd,
1957                          unsigned *InstCnt = nullptr) {
1958    // In the notation used by the instructions, 'start' and 'end' are reversed
1959    // because bits are counted from high to low order.
1960    unsigned InstMaskStart = 64 - MaskEnd - 1,
1961             InstMaskEnd   = 64 - MaskStart - 1;
1962
1963    if (InstCnt) *InstCnt += 1;
1964
1965    if (Repl32) {
1966      // This rotation amount assumes that the lower 32 bits of the quantity
1967      // are replicated in the high 32 bits by the rotation operator (which is
1968      // done by rlwinm and friends).
1969      assert(InstMaskStart >= 32 && "Mask cannot start out of range");
1970      assert(InstMaskEnd   >= 32 && "Mask cannot end out of range");
1971      SDValue Ops[] =
1972        { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
1973          getI32Imm(InstMaskStart - 32, dl), getI32Imm(InstMaskEnd - 32, dl) };
1974      return SDValue(CurDAG->getMachineNode(PPC::RLWINM8, dl, MVT::i64,
1975                                            Ops), 0);
1976    }
1977
1978    if (InstMaskEnd == 63) {
1979      SDValue Ops[] =
1980        { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
1981          getI32Imm(InstMaskStart, dl) };
1982      return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Ops), 0);
1983    }
1984
1985    if (InstMaskStart == 0) {
1986      SDValue Ops[] =
1987        { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
1988          getI32Imm(InstMaskEnd, dl) };
1989      return SDValue(CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64, Ops), 0);
1990    }
1991
1992    if (InstMaskEnd == 63 - RLAmt) {
1993      SDValue Ops[] =
1994        { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
1995          getI32Imm(InstMaskStart, dl) };
1996      return SDValue(CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, Ops), 0);
1997    }
1998
1999    // We cannot do this with a single instruction, so we'll use two. The
2000    // problem is that we're not free to choose both a rotation amount and mask
2001    // start and end independently. We can choose an arbitrary mask start and
2002    // end, but then the rotation amount is fixed. Rotation, however, can be
2003    // inverted, and so by applying an "inverse" rotation first, we can get the
2004    // desired result.
2005    if (InstCnt) *InstCnt += 1;
2006
2007    // The rotation mask for the second instruction must be MaskStart.
2008    unsigned RLAmt2 = MaskStart;
2009    // The first instruction must rotate V so that the overall rotation amount
2010    // is RLAmt.
2011    unsigned RLAmt1 = (64 + RLAmt - RLAmt2) % 64;
2012    if (RLAmt1)
2013      V = SelectRotMask64(V, dl, RLAmt1, false, 0, 63);
2014    return SelectRotMask64(V, dl, RLAmt2, false, MaskStart, MaskEnd);
2015  }
2016
2017  // For 64-bit values, not all combinations of rotates and masks are
2018  // available. Produce a rotate-mask-and-insert if one is available.
2019  SDValue SelectRotMaskIns64(SDValue Base, SDValue V, const SDLoc &dl,
2020                             unsigned RLAmt, bool Repl32, unsigned MaskStart,
2021                             unsigned MaskEnd, unsigned *InstCnt = nullptr) {
2022    // In the notation used by the instructions, 'start' and 'end' are reversed
2023    // because bits are counted from high to low order.
2024    unsigned InstMaskStart = 64 - MaskEnd - 1,
2025             InstMaskEnd   = 64 - MaskStart - 1;
2026
2027    if (InstCnt) *InstCnt += 1;
2028
2029    if (Repl32) {
2030      // This rotation amount assumes that the lower 32 bits of the quantity
2031      // are replicated in the high 32 bits by the rotation operator (which is
2032      // done by rlwinm and friends).
2033      assert(InstMaskStart >= 32 && "Mask cannot start out of range");
2034      assert(InstMaskEnd   >= 32 && "Mask cannot end out of range");
2035      SDValue Ops[] =
2036        { ExtendToInt64(Base, dl), ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
2037          getI32Imm(InstMaskStart - 32, dl), getI32Imm(InstMaskEnd - 32, dl) };
2038      return SDValue(CurDAG->getMachineNode(PPC::RLWIMI8, dl, MVT::i64,
2039                                            Ops), 0);
2040    }
2041
2042    if (InstMaskEnd == 63 - RLAmt) {
2043      SDValue Ops[] =
2044        { ExtendToInt64(Base, dl), ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
2045          getI32Imm(InstMaskStart, dl) };
2046      return SDValue(CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops), 0);
2047    }
2048
2049    // We cannot do this with a single instruction, so we'll use two. The
2050    // problem is that we're not free to choose both a rotation amount and mask
2051    // start and end independently. We can choose an arbitrary mask start and
2052    // end, but then the rotation amount is fixed. Rotation, however, can be
2053    // inverted, and so by applying an "inverse" rotation first, we can get the
2054    // desired result.
2055    if (InstCnt) *InstCnt += 1;
2056
2057    // The rotation mask for the second instruction must be MaskStart.
2058    unsigned RLAmt2 = MaskStart;
2059    // The first instruction must rotate V so that the overall rotation amount
2060    // is RLAmt.
2061    unsigned RLAmt1 = (64 + RLAmt - RLAmt2) % 64;
2062    if (RLAmt1)
2063      V = SelectRotMask64(V, dl, RLAmt1, false, 0, 63);
2064    return SelectRotMaskIns64(Base, V, dl, RLAmt2, false, MaskStart, MaskEnd);
2065  }
2066
2067  void SelectAndParts64(const SDLoc &dl, SDValue &Res, unsigned *InstCnt) {
2068    if (BPermRewriterNoMasking)
2069      return;
2070
2071    // The idea here is the same as in the 32-bit version, but with additional
2072    // complications from the fact that Repl32 might be true. Because we
2073    // aggressively convert bit groups to Repl32 form (which, for small
2074    // rotation factors, involves no other change), and then coalesce, it might
2075    // be the case that a single 64-bit masking operation could handle both
2076    // some Repl32 groups and some non-Repl32 groups. If converting to Repl32
2077    // form allowed coalescing, then we must use a 32-bit rotaton in order to
2078    // completely capture the new combined bit group.
2079
2080    for (ValueRotInfo &VRI : ValueRotsVec) {
2081      uint64_t Mask = 0;
2082
2083      // We need to add to the mask all bits from the associated bit groups.
2084      // If Repl32 is false, we need to add bits from bit groups that have
2085      // Repl32 true, but are trivially convertable to Repl32 false. Such a
2086      // group is trivially convertable if it overlaps only with the lower 32
2087      // bits, and the group has not been coalesced.
2088      auto MatchingBG = [VRI](const BitGroup &BG) {
2089        if (VRI.V != BG.V)
2090          return false;
2091
2092        unsigned EffRLAmt = BG.RLAmt;
2093        if (!VRI.Repl32 && BG.Repl32) {
2094          if (BG.StartIdx < 32 && BG.EndIdx < 32 && BG.StartIdx <= BG.EndIdx &&
2095              !BG.Repl32Coalesced) {
2096            if (BG.Repl32CR)
2097              EffRLAmt += 32;
2098          } else {
2099            return false;
2100          }
2101        } else if (VRI.Repl32 != BG.Repl32) {
2102          return false;
2103        }
2104
2105        return VRI.RLAmt == EffRLAmt;
2106      };
2107
2108      for (auto &BG : BitGroups) {
2109        if (!MatchingBG(BG))
2110          continue;
2111
2112        if (BG.StartIdx <= BG.EndIdx) {
2113          for (unsigned i = BG.StartIdx; i <= BG.EndIdx; ++i)
2114            Mask |= (UINT64_C(1) << i);
2115        } else {
2116          for (unsigned i = BG.StartIdx; i < Bits.size(); ++i)
2117            Mask |= (UINT64_C(1) << i);
2118          for (unsigned i = 0; i <= BG.EndIdx; ++i)
2119            Mask |= (UINT64_C(1) << i);
2120        }
2121      }
2122
2123      // We can use the 32-bit andi/andis technique if the mask does not
2124      // require any higher-order bits. This can save an instruction compared
2125      // to always using the general 64-bit technique.
2126      bool Use32BitInsts = isUInt<32>(Mask);
2127      // Compute the masks for andi/andis that would be necessary.
2128      unsigned ANDIMask = (Mask & UINT16_MAX),
2129               ANDISMask = (Mask >> 16) & UINT16_MAX;
2130
2131      bool NeedsRotate = VRI.RLAmt || (VRI.Repl32 && !isUInt<32>(Mask));
2132
2133      unsigned NumAndInsts = (unsigned) NeedsRotate +
2134                             (unsigned) (bool) Res;
2135      if (Use32BitInsts)
2136        NumAndInsts += (unsigned) (ANDIMask != 0) + (unsigned) (ANDISMask != 0) +
2137                       (unsigned) (ANDIMask != 0 && ANDISMask != 0);
2138      else
2139        NumAndInsts += selectI64ImmInstrCount(Mask) + /* and */ 1;
2140
2141      unsigned NumRLInsts = 0;
2142      bool FirstBG = true;
2143      bool MoreBG = false;
2144      for (auto &BG : BitGroups) {
2145        if (!MatchingBG(BG)) {
2146          MoreBG = true;
2147          continue;
2148        }
2149        NumRLInsts +=
2150          SelectRotMask64Count(BG.RLAmt, BG.Repl32, BG.StartIdx, BG.EndIdx,
2151                               !FirstBG);
2152        FirstBG = false;
2153      }
2154
2155      LLVM_DEBUG(dbgs() << "\t\trotation groups for " << VRI.V.getNode()
2156                        << " RL: " << VRI.RLAmt << (VRI.Repl32 ? " (32):" : ":")
2157                        << "\n\t\t\tisel using masking: " << NumAndInsts
2158                        << " using rotates: " << NumRLInsts << "\n");
2159
2160      // When we'd use andi/andis, we bias toward using the rotates (andi only
2161      // has a record form, and is cracked on POWER cores). However, when using
2162      // general 64-bit constant formation, bias toward the constant form,
2163      // because that exposes more opportunities for CSE.
2164      if (NumAndInsts > NumRLInsts)
2165        continue;
2166      // When merging multiple bit groups, instruction or is used.
2167      // But when rotate is used, rldimi can inert the rotated value into any
2168      // register, so instruction or can be avoided.
2169      if ((Use32BitInsts || MoreBG) && NumAndInsts == NumRLInsts)
2170        continue;
2171
2172      LLVM_DEBUG(dbgs() << "\t\t\t\tusing masking\n");
2173
2174      if (InstCnt) *InstCnt += NumAndInsts;
2175
2176      SDValue VRot;
2177      // We actually need to generate a rotation if we have a non-zero rotation
2178      // factor or, in the Repl32 case, if we care about any of the
2179      // higher-order replicated bits. In the latter case, we generate a mask
2180      // backward so that it actually includes the entire 64 bits.
2181      if (VRI.RLAmt || (VRI.Repl32 && !isUInt<32>(Mask)))
2182        VRot = SelectRotMask64(VRI.V, dl, VRI.RLAmt, VRI.Repl32,
2183                               VRI.Repl32 ? 31 : 0, VRI.Repl32 ? 30 : 63);
2184      else
2185        VRot = VRI.V;
2186
2187      SDValue TotalVal;
2188      if (Use32BitInsts) {
2189        assert((ANDIMask != 0 || ANDISMask != 0) &&
2190               "No set bits in mask when using 32-bit ands for 64-bit value");
2191
2192        SDValue ANDIVal, ANDISVal;
2193        if (ANDIMask != 0)
2194          ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI8_rec, dl, MVT::i64,
2195                                                   ExtendToInt64(VRot, dl),
2196                                                   getI32Imm(ANDIMask, dl)),
2197                            0);
2198        if (ANDISMask != 0)
2199          ANDISVal =
2200              SDValue(CurDAG->getMachineNode(PPC::ANDIS8_rec, dl, MVT::i64,
2201                                             ExtendToInt64(VRot, dl),
2202                                             getI32Imm(ANDISMask, dl)),
2203                      0);
2204
2205        if (!ANDIVal)
2206          TotalVal = ANDISVal;
2207        else if (!ANDISVal)
2208          TotalVal = ANDIVal;
2209        else
2210          TotalVal = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
2211                               ExtendToInt64(ANDIVal, dl), ANDISVal), 0);
2212      } else {
2213        TotalVal = SDValue(selectI64Imm(CurDAG, dl, Mask), 0);
2214        TotalVal =
2215          SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64,
2216                                         ExtendToInt64(VRot, dl), TotalVal),
2217                  0);
2218     }
2219
2220      if (!Res)
2221        Res = TotalVal;
2222      else
2223        Res = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
2224                                             ExtendToInt64(Res, dl), TotalVal),
2225                      0);
2226
2227      // Now, remove all groups with this underlying value and rotation
2228      // factor.
2229      eraseMatchingBitGroups(MatchingBG);
2230    }
2231  }
2232
2233  // Instruction selection for the 64-bit case.
2234  SDNode *Select64(SDNode *N, bool LateMask, unsigned *InstCnt) {
2235    SDLoc dl(N);
2236    SDValue Res;
2237
2238    if (InstCnt) *InstCnt = 0;
2239
2240    // Take care of cases that should use andi/andis first.
2241    SelectAndParts64(dl, Res, InstCnt);
2242
2243    // If we've not yet selected a 'starting' instruction, and we have no zeros
2244    // to fill in, select the (Value, RLAmt) with the highest priority (largest
2245    // number of groups), and start with this rotated value.
2246    if ((!NeedMask || LateMask) && !Res) {
2247      // If we have both Repl32 groups and non-Repl32 groups, the non-Repl32
2248      // groups will come first, and so the VRI representing the largest number
2249      // of groups might not be first (it might be the first Repl32 groups).
2250      unsigned MaxGroupsIdx = 0;
2251      if (!ValueRotsVec[0].Repl32) {
2252        for (unsigned i = 0, ie = ValueRotsVec.size(); i < ie; ++i)
2253          if (ValueRotsVec[i].Repl32) {
2254            if (ValueRotsVec[i].NumGroups > ValueRotsVec[0].NumGroups)
2255              MaxGroupsIdx = i;
2256            break;
2257          }
2258      }
2259
2260      ValueRotInfo &VRI = ValueRotsVec[MaxGroupsIdx];
2261      bool NeedsRotate = false;
2262      if (VRI.RLAmt) {
2263        NeedsRotate = true;
2264      } else if (VRI.Repl32) {
2265        for (auto &BG : BitGroups) {
2266          if (BG.V != VRI.V || BG.RLAmt != VRI.RLAmt ||
2267              BG.Repl32 != VRI.Repl32)
2268            continue;
2269
2270          // We don't need a rotate if the bit group is confined to the lower
2271          // 32 bits.
2272          if (BG.StartIdx < 32 && BG.EndIdx < 32 && BG.StartIdx < BG.EndIdx)
2273            continue;
2274
2275          NeedsRotate = true;
2276          break;
2277        }
2278      }
2279
2280      if (NeedsRotate)
2281        Res = SelectRotMask64(VRI.V, dl, VRI.RLAmt, VRI.Repl32,
2282                              VRI.Repl32 ? 31 : 0, VRI.Repl32 ? 30 : 63,
2283                              InstCnt);
2284      else
2285        Res = VRI.V;
2286
2287      // Now, remove all groups with this underlying value and rotation factor.
2288      if (Res)
2289        eraseMatchingBitGroups([VRI](const BitGroup &BG) {
2290          return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt &&
2291                 BG.Repl32 == VRI.Repl32;
2292        });
2293    }
2294
2295    // Because 64-bit rotates are more flexible than inserts, we might have a
2296    // preference regarding which one we do first (to save one instruction).
2297    if (!Res)
2298      for (auto I = BitGroups.begin(), IE = BitGroups.end(); I != IE; ++I) {
2299        if (SelectRotMask64Count(I->RLAmt, I->Repl32, I->StartIdx, I->EndIdx,
2300                                false) <
2301            SelectRotMask64Count(I->RLAmt, I->Repl32, I->StartIdx, I->EndIdx,
2302                                true)) {
2303          if (I != BitGroups.begin()) {
2304            BitGroup BG = *I;
2305            BitGroups.erase(I);
2306            BitGroups.insert(BitGroups.begin(), BG);
2307          }
2308
2309          break;
2310        }
2311      }
2312
2313    // Insert the other groups (one at a time).
2314    for (auto &BG : BitGroups) {
2315      if (!Res)
2316        Res = SelectRotMask64(BG.V, dl, BG.RLAmt, BG.Repl32, BG.StartIdx,
2317                              BG.EndIdx, InstCnt);
2318      else
2319        Res = SelectRotMaskIns64(Res, BG.V, dl, BG.RLAmt, BG.Repl32,
2320                                 BG.StartIdx, BG.EndIdx, InstCnt);
2321    }
2322
2323    if (LateMask) {
2324      uint64_t Mask = getZerosMask();
2325
2326      // We can use the 32-bit andi/andis technique if the mask does not
2327      // require any higher-order bits. This can save an instruction compared
2328      // to always using the general 64-bit technique.
2329      bool Use32BitInsts = isUInt<32>(Mask);
2330      // Compute the masks for andi/andis that would be necessary.
2331      unsigned ANDIMask = (Mask & UINT16_MAX),
2332               ANDISMask = (Mask >> 16) & UINT16_MAX;
2333
2334      if (Use32BitInsts) {
2335        assert((ANDIMask != 0 || ANDISMask != 0) &&
2336               "No set bits in mask when using 32-bit ands for 64-bit value");
2337
2338        if (InstCnt) *InstCnt += (unsigned) (ANDIMask != 0) +
2339                                 (unsigned) (ANDISMask != 0) +
2340                                 (unsigned) (ANDIMask != 0 && ANDISMask != 0);
2341
2342        SDValue ANDIVal, ANDISVal;
2343        if (ANDIMask != 0)
2344          ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI8_rec, dl, MVT::i64,
2345                                                   ExtendToInt64(Res, dl),
2346                                                   getI32Imm(ANDIMask, dl)),
2347                            0);
2348        if (ANDISMask != 0)
2349          ANDISVal =
2350              SDValue(CurDAG->getMachineNode(PPC::ANDIS8_rec, dl, MVT::i64,
2351                                             ExtendToInt64(Res, dl),
2352                                             getI32Imm(ANDISMask, dl)),
2353                      0);
2354
2355        if (!ANDIVal)
2356          Res = ANDISVal;
2357        else if (!ANDISVal)
2358          Res = ANDIVal;
2359        else
2360          Res = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
2361                          ExtendToInt64(ANDIVal, dl), ANDISVal), 0);
2362      } else {
2363        if (InstCnt) *InstCnt += selectI64ImmInstrCount(Mask) + /* and */ 1;
2364
2365        SDValue MaskVal = SDValue(selectI64Imm(CurDAG, dl, Mask), 0);
2366        Res =
2367          SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64,
2368                                         ExtendToInt64(Res, dl), MaskVal), 0);
2369      }
2370    }
2371
2372    return Res.getNode();
2373  }
2374
2375  SDNode *Select(SDNode *N, bool LateMask, unsigned *InstCnt = nullptr) {
2376    // Fill in BitGroups.
2377    collectBitGroups(LateMask);
2378    if (BitGroups.empty())
2379      return nullptr;
2380
2381    // For 64-bit values, figure out when we can use 32-bit instructions.
2382    if (Bits.size() == 64)
2383      assignRepl32BitGroups();
2384
2385    // Fill in ValueRotsVec.
2386    collectValueRotInfo();
2387
2388    if (Bits.size() == 32) {
2389      return Select32(N, LateMask, InstCnt);
2390    } else {
2391      assert(Bits.size() == 64 && "Not 64 bits here?");
2392      return Select64(N, LateMask, InstCnt);
2393    }
2394
2395    return nullptr;
2396  }
2397
2398  void eraseMatchingBitGroups(function_ref<bool(const BitGroup &)> F) {
2399    BitGroups.erase(remove_if(BitGroups, F), BitGroups.end());
2400  }
2401
2402  SmallVector<ValueBit, 64> Bits;
2403
2404  bool NeedMask = false;
2405  SmallVector<unsigned, 64> RLAmt;
2406
2407  SmallVector<BitGroup, 16> BitGroups;
2408
2409  DenseMap<std::pair<SDValue, unsigned>, ValueRotInfo> ValueRots;
2410  SmallVector<ValueRotInfo, 16> ValueRotsVec;
2411
2412  SelectionDAG *CurDAG = nullptr;
2413
2414public:
2415  BitPermutationSelector(SelectionDAG *DAG)
2416    : CurDAG(DAG) {}
2417
2418  // Here we try to match complex bit permutations into a set of
2419  // rotate-and-shift/shift/and/or instructions, using a set of heuristics
2420  // known to produce optimal code for common cases (like i32 byte swapping).
2421  SDNode *Select(SDNode *N) {
2422    Memoizer.clear();
2423    auto Result =
2424        getValueBits(SDValue(N, 0), N->getValueType(0).getSizeInBits());
2425    if (!Result.first)
2426      return nullptr;
2427    Bits = std::move(*Result.second);
2428
2429    LLVM_DEBUG(dbgs() << "Considering bit-permutation-based instruction"
2430                         " selection for:    ");
2431    LLVM_DEBUG(N->dump(CurDAG));
2432
2433    // Fill it RLAmt and set NeedMask.
2434    computeRotationAmounts();
2435
2436    if (!NeedMask)
2437      return Select(N, false);
2438
2439    // We currently have two techniques for handling results with zeros: early
2440    // masking (the default) and late masking. Late masking is sometimes more
2441    // efficient, but because the structure of the bit groups is different, it
2442    // is hard to tell without generating both and comparing the results. With
2443    // late masking, we ignore zeros in the resulting value when inserting each
2444    // set of bit groups, and then mask in the zeros at the end. With early
2445    // masking, we only insert the non-zero parts of the result at every step.
2446
2447    unsigned InstCnt = 0, InstCntLateMask = 0;
2448    LLVM_DEBUG(dbgs() << "\tEarly masking:\n");
2449    SDNode *RN = Select(N, false, &InstCnt);
2450    LLVM_DEBUG(dbgs() << "\t\tisel would use " << InstCnt << " instructions\n");
2451
2452    LLVM_DEBUG(dbgs() << "\tLate masking:\n");
2453    SDNode *RNLM = Select(N, true, &InstCntLateMask);
2454    LLVM_DEBUG(dbgs() << "\t\tisel would use " << InstCntLateMask
2455                      << " instructions\n");
2456
2457    if (InstCnt <= InstCntLateMask) {
2458      LLVM_DEBUG(dbgs() << "\tUsing early-masking for isel\n");
2459      return RN;
2460    }
2461
2462    LLVM_DEBUG(dbgs() << "\tUsing late-masking for isel\n");
2463    return RNLM;
2464  }
2465};
2466
2467class IntegerCompareEliminator {
2468  SelectionDAG *CurDAG;
2469  PPCDAGToDAGISel *S;
2470  // Conversion type for interpreting results of a 32-bit instruction as
2471  // a 64-bit value or vice versa.
2472  enum ExtOrTruncConversion { Ext, Trunc };
2473
2474  // Modifiers to guide how an ISD::SETCC node's result is to be computed
2475  // in a GPR.
2476  // ZExtOrig - use the original condition code, zero-extend value
2477  // ZExtInvert - invert the condition code, zero-extend value
2478  // SExtOrig - use the original condition code, sign-extend value
2479  // SExtInvert - invert the condition code, sign-extend value
2480  enum SetccInGPROpts { ZExtOrig, ZExtInvert, SExtOrig, SExtInvert };
2481
2482  // Comparisons against zero to emit GPR code sequences for. Each of these
2483  // sequences may need to be emitted for two or more equivalent patterns.
2484  // For example (a >= 0) == (a > -1). The direction of the comparison (</>)
2485  // matters as well as the extension type: sext (-1/0), zext (1/0).
2486  // GEZExt - (zext (LHS >= 0))
2487  // GESExt - (sext (LHS >= 0))
2488  // LEZExt - (zext (LHS <= 0))
2489  // LESExt - (sext (LHS <= 0))
2490  enum ZeroCompare { GEZExt, GESExt, LEZExt, LESExt };
2491
2492  SDNode *tryEXTEND(SDNode *N);
2493  SDNode *tryLogicOpOfCompares(SDNode *N);
2494  SDValue computeLogicOpInGPR(SDValue LogicOp);
2495  SDValue signExtendInputIfNeeded(SDValue Input);
2496  SDValue zeroExtendInputIfNeeded(SDValue Input);
2497  SDValue addExtOrTrunc(SDValue NatWidthRes, ExtOrTruncConversion Conv);
2498  SDValue getCompoundZeroComparisonInGPR(SDValue LHS, SDLoc dl,
2499                                        ZeroCompare CmpTy);
2500  SDValue get32BitZExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2501                              int64_t RHSValue, SDLoc dl);
2502 SDValue get32BitSExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2503                              int64_t RHSValue, SDLoc dl);
2504  SDValue get64BitZExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2505                              int64_t RHSValue, SDLoc dl);
2506  SDValue get64BitSExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2507                              int64_t RHSValue, SDLoc dl);
2508  SDValue getSETCCInGPR(SDValue Compare, SetccInGPROpts ConvOpts);
2509
2510public:
2511  IntegerCompareEliminator(SelectionDAG *DAG,
2512                           PPCDAGToDAGISel *Sel) : CurDAG(DAG), S(Sel) {
2513    assert(CurDAG->getTargetLoweringInfo()
2514           .getPointerTy(CurDAG->getDataLayout()).getSizeInBits() == 64 &&
2515           "Only expecting to use this on 64 bit targets.");
2516  }
2517  SDNode *Select(SDNode *N) {
2518    if (CmpInGPR == ICGPR_None)
2519      return nullptr;
2520    switch (N->getOpcode()) {
2521    default: break;
2522    case ISD::ZERO_EXTEND:
2523      if (CmpInGPR == ICGPR_Sext || CmpInGPR == ICGPR_SextI32 ||
2524          CmpInGPR == ICGPR_SextI64)
2525        return nullptr;
2526      LLVM_FALLTHROUGH;
2527    case ISD::SIGN_EXTEND:
2528      if (CmpInGPR == ICGPR_Zext || CmpInGPR == ICGPR_ZextI32 ||
2529          CmpInGPR == ICGPR_ZextI64)
2530        return nullptr;
2531      return tryEXTEND(N);
2532    case ISD::AND:
2533    case ISD::OR:
2534    case ISD::XOR:
2535      return tryLogicOpOfCompares(N);
2536    }
2537    return nullptr;
2538  }
2539};
2540
2541static bool isLogicOp(unsigned Opc) {
2542  return Opc == ISD::AND || Opc == ISD::OR || Opc == ISD::XOR;
2543}
2544// The obvious case for wanting to keep the value in a GPR. Namely, the
2545// result of the comparison is actually needed in a GPR.
2546SDNode *IntegerCompareEliminator::tryEXTEND(SDNode *N) {
2547  assert((N->getOpcode() == ISD::ZERO_EXTEND ||
2548          N->getOpcode() == ISD::SIGN_EXTEND) &&
2549         "Expecting a zero/sign extend node!");
2550  SDValue WideRes;
2551  // If we are zero-extending the result of a logical operation on i1
2552  // values, we can keep the values in GPRs.
2553  if (isLogicOp(N->getOperand(0).getOpcode()) &&
2554      N->getOperand(0).getValueType() == MVT::i1 &&
2555      N->getOpcode() == ISD::ZERO_EXTEND)
2556    WideRes = computeLogicOpInGPR(N->getOperand(0));
2557  else if (N->getOperand(0).getOpcode() != ISD::SETCC)
2558    return nullptr;
2559  else
2560    WideRes =
2561      getSETCCInGPR(N->getOperand(0),
2562                    N->getOpcode() == ISD::SIGN_EXTEND ?
2563                    SetccInGPROpts::SExtOrig : SetccInGPROpts::ZExtOrig);
2564
2565  if (!WideRes)
2566    return nullptr;
2567
2568  SDLoc dl(N);
2569  bool Input32Bit = WideRes.getValueType() == MVT::i32;
2570  bool Output32Bit = N->getValueType(0) == MVT::i32;
2571
2572  NumSextSetcc += N->getOpcode() == ISD::SIGN_EXTEND ? 1 : 0;
2573  NumZextSetcc += N->getOpcode() == ISD::SIGN_EXTEND ? 0 : 1;
2574
2575  SDValue ConvOp = WideRes;
2576  if (Input32Bit != Output32Bit)
2577    ConvOp = addExtOrTrunc(WideRes, Input32Bit ? ExtOrTruncConversion::Ext :
2578                           ExtOrTruncConversion::Trunc);
2579  return ConvOp.getNode();
2580}
2581
2582// Attempt to perform logical operations on the results of comparisons while
2583// keeping the values in GPRs. Without doing so, these would end up being
2584// lowered to CR-logical operations which suffer from significant latency and
2585// low ILP.
2586SDNode *IntegerCompareEliminator::tryLogicOpOfCompares(SDNode *N) {
2587  if (N->getValueType(0) != MVT::i1)
2588    return nullptr;
2589  assert(isLogicOp(N->getOpcode()) &&
2590         "Expected a logic operation on setcc results.");
2591  SDValue LoweredLogical = computeLogicOpInGPR(SDValue(N, 0));
2592  if (!LoweredLogical)
2593    return nullptr;
2594
2595  SDLoc dl(N);
2596  bool IsBitwiseNegate = LoweredLogical.getMachineOpcode() == PPC::XORI8;
2597  unsigned SubRegToExtract = IsBitwiseNegate ? PPC::sub_eq : PPC::sub_gt;
2598  SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32);
2599  SDValue LHS = LoweredLogical.getOperand(0);
2600  SDValue RHS = LoweredLogical.getOperand(1);
2601  SDValue WideOp;
2602  SDValue OpToConvToRecForm;
2603
2604  // Look through any 32-bit to 64-bit implicit extend nodes to find the
2605  // opcode that is input to the XORI.
2606  if (IsBitwiseNegate &&
2607      LoweredLogical.getOperand(0).getMachineOpcode() == PPC::INSERT_SUBREG)
2608    OpToConvToRecForm = LoweredLogical.getOperand(0).getOperand(1);
2609  else if (IsBitwiseNegate)
2610    // If the input to the XORI isn't an extension, that's what we're after.
2611    OpToConvToRecForm = LoweredLogical.getOperand(0);
2612  else
2613    // If this is not an XORI, it is a reg-reg logical op and we can convert
2614    // it to record-form.
2615    OpToConvToRecForm = LoweredLogical;
2616
2617  // Get the record-form version of the node we're looking to use to get the
2618  // CR result from.
2619  uint16_t NonRecOpc = OpToConvToRecForm.getMachineOpcode();
2620  int NewOpc = PPCInstrInfo::getRecordFormOpcode(NonRecOpc);
2621
2622  // Convert the right node to record-form. This is either the logical we're
2623  // looking at or it is the input node to the negation (if we're looking at
2624  // a bitwise negation).
2625  if (NewOpc != -1 && IsBitwiseNegate) {
2626    // The input to the XORI has a record-form. Use it.
2627    assert(LoweredLogical.getConstantOperandVal(1) == 1 &&
2628           "Expected a PPC::XORI8 only for bitwise negation.");
2629    // Emit the record-form instruction.
2630    std::vector<SDValue> Ops;
2631    for (int i = 0, e = OpToConvToRecForm.getNumOperands(); i < e; i++)
2632      Ops.push_back(OpToConvToRecForm.getOperand(i));
2633
2634    WideOp =
2635      SDValue(CurDAG->getMachineNode(NewOpc, dl,
2636                                     OpToConvToRecForm.getValueType(),
2637                                     MVT::Glue, Ops), 0);
2638  } else {
2639    assert((NewOpc != -1 || !IsBitwiseNegate) &&
2640           "No record form available for AND8/OR8/XOR8?");
2641    WideOp =
2642        SDValue(CurDAG->getMachineNode(NewOpc == -1 ? PPC::ANDI8_rec : NewOpc,
2643                                       dl, MVT::i64, MVT::Glue, LHS, RHS),
2644                0);
2645  }
2646
2647  // Select this node to a single bit from CR0 set by the record-form node
2648  // just created. For bitwise negation, use the EQ bit which is the equivalent
2649  // of negating the result (i.e. it is a bit set when the result of the
2650  // operation is zero).
2651  SDValue SRIdxVal =
2652    CurDAG->getTargetConstant(SubRegToExtract, dl, MVT::i32);
2653  SDValue CRBit =
2654    SDValue(CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl,
2655                                   MVT::i1, CR0Reg, SRIdxVal,
2656                                   WideOp.getValue(1)), 0);
2657  return CRBit.getNode();
2658}
2659
2660// Lower a logical operation on i1 values into a GPR sequence if possible.
2661// The result can be kept in a GPR if requested.
2662// Three types of inputs can be handled:
2663// - SETCC
2664// - TRUNCATE
2665// - Logical operation (AND/OR/XOR)
2666// There is also a special case that is handled (namely a complement operation
2667// achieved with xor %a, -1).
2668SDValue IntegerCompareEliminator::computeLogicOpInGPR(SDValue LogicOp) {
2669  assert(isLogicOp(LogicOp.getOpcode()) &&
2670        "Can only handle logic operations here.");
2671  assert(LogicOp.getValueType() == MVT::i1 &&
2672         "Can only handle logic operations on i1 values here.");
2673  SDLoc dl(LogicOp);
2674  SDValue LHS, RHS;
2675
2676 // Special case: xor %a, -1
2677  bool IsBitwiseNegation = isBitwiseNot(LogicOp);
2678
2679  // Produces a GPR sequence for each operand of the binary logic operation.
2680  // For SETCC, it produces the respective comparison, for TRUNCATE it truncates
2681  // the value in a GPR and for logic operations, it will recursively produce
2682  // a GPR sequence for the operation.
2683 auto getLogicOperand = [&] (SDValue Operand) -> SDValue {
2684    unsigned OperandOpcode = Operand.getOpcode();
2685    if (OperandOpcode == ISD::SETCC)
2686      return getSETCCInGPR(Operand, SetccInGPROpts::ZExtOrig);
2687    else if (OperandOpcode == ISD::TRUNCATE) {
2688      SDValue InputOp = Operand.getOperand(0);
2689     EVT InVT = InputOp.getValueType();
2690      return SDValue(CurDAG->getMachineNode(InVT == MVT::i32 ? PPC::RLDICL_32 :
2691                                            PPC::RLDICL, dl, InVT, InputOp,
2692                                            S->getI64Imm(0, dl),
2693                                            S->getI64Imm(63, dl)), 0);
2694    } else if (isLogicOp(OperandOpcode))
2695      return computeLogicOpInGPR(Operand);
2696    return SDValue();
2697  };
2698  LHS = getLogicOperand(LogicOp.getOperand(0));
2699  RHS = getLogicOperand(LogicOp.getOperand(1));
2700
2701  // If a GPR sequence can't be produced for the LHS we can't proceed.
2702  // Not producing a GPR sequence for the RHS is only a problem if this isn't
2703  // a bitwise negation operation.
2704  if (!LHS || (!RHS && !IsBitwiseNegation))
2705    return SDValue();
2706
2707  NumLogicOpsOnComparison++;
2708
2709  // We will use the inputs as 64-bit values.
2710  if (LHS.getValueType() == MVT::i32)
2711    LHS = addExtOrTrunc(LHS, ExtOrTruncConversion::Ext);
2712  if (!IsBitwiseNegation && RHS.getValueType() == MVT::i32)
2713    RHS = addExtOrTrunc(RHS, ExtOrTruncConversion::Ext);
2714
2715  unsigned NewOpc;
2716  switch (LogicOp.getOpcode()) {
2717  default: llvm_unreachable("Unknown logic operation.");
2718  case ISD::AND: NewOpc = PPC::AND8; break;
2719  case ISD::OR:  NewOpc = PPC::OR8;  break;
2720  case ISD::XOR: NewOpc = PPC::XOR8; break;
2721  }
2722
2723  if (IsBitwiseNegation) {
2724    RHS = S->getI64Imm(1, dl);
2725    NewOpc = PPC::XORI8;
2726  }
2727
2728  return SDValue(CurDAG->getMachineNode(NewOpc, dl, MVT::i64, LHS, RHS), 0);
2729
2730}
2731
2732/// If the value isn't guaranteed to be sign-extended to 64-bits, extend it.
2733/// Otherwise just reinterpret it as a 64-bit value.
2734/// Useful when emitting comparison code for 32-bit values without using
2735/// the compare instruction (which only considers the lower 32-bits).
2736SDValue IntegerCompareEliminator::signExtendInputIfNeeded(SDValue Input) {
2737  assert(Input.getValueType() == MVT::i32 &&
2738         "Can only sign-extend 32-bit values here.");
2739  unsigned Opc = Input.getOpcode();
2740
2741  // The value was sign extended and then truncated to 32-bits. No need to
2742  // sign extend it again.
2743  if (Opc == ISD::TRUNCATE &&
2744      (Input.getOperand(0).getOpcode() == ISD::AssertSext ||
2745       Input.getOperand(0).getOpcode() == ISD::SIGN_EXTEND))
2746    return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
2747
2748  LoadSDNode *InputLoad = dyn_cast<LoadSDNode>(Input);
2749  // The input is a sign-extending load. All ppc sign-extending loads
2750  // sign-extend to the full 64-bits.
2751  if (InputLoad && InputLoad->getExtensionType() == ISD::SEXTLOAD)
2752    return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
2753
2754  ConstantSDNode *InputConst = dyn_cast<ConstantSDNode>(Input);
2755  // We don't sign-extend constants.
2756  if (InputConst)
2757    return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
2758
2759  SDLoc dl(Input);
2760  SignExtensionsAdded++;
2761  return SDValue(CurDAG->getMachineNode(PPC::EXTSW_32_64, dl,
2762                                        MVT::i64, Input), 0);
2763}
2764
2765/// If the value isn't guaranteed to be zero-extended to 64-bits, extend it.
2766/// Otherwise just reinterpret it as a 64-bit value.
2767/// Useful when emitting comparison code for 32-bit values without using
2768/// the compare instruction (which only considers the lower 32-bits).
2769SDValue IntegerCompareEliminator::zeroExtendInputIfNeeded(SDValue Input) {
2770  assert(Input.getValueType() == MVT::i32 &&
2771         "Can only zero-extend 32-bit values here.");
2772  unsigned Opc = Input.getOpcode();
2773
2774  // The only condition under which we can omit the actual extend instruction:
2775  // - The value is a positive constant
2776  // - The value comes from a load that isn't a sign-extending load
2777  // An ISD::TRUNCATE needs to be zero-extended unless it is fed by a zext.
2778  bool IsTruncateOfZExt = Opc == ISD::TRUNCATE &&
2779    (Input.getOperand(0).getOpcode() == ISD::AssertZext ||
2780     Input.getOperand(0).getOpcode() == ISD::ZERO_EXTEND);
2781  if (IsTruncateOfZExt)
2782    return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
2783
2784  ConstantSDNode *InputConst = dyn_cast<ConstantSDNode>(Input);
2785  if (InputConst && InputConst->getSExtValue() >= 0)
2786    return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
2787
2788  LoadSDNode *InputLoad = dyn_cast<LoadSDNode>(Input);
2789  // The input is a load that doesn't sign-extend (it will be zero-extended).
2790  if (InputLoad && InputLoad->getExtensionType() != ISD::SEXTLOAD)
2791    return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
2792
2793  // None of the above, need to zero-extend.
2794  SDLoc dl(Input);
2795  ZeroExtensionsAdded++;
2796  return SDValue(CurDAG->getMachineNode(PPC::RLDICL_32_64, dl, MVT::i64, Input,
2797                                        S->getI64Imm(0, dl),
2798                                        S->getI64Imm(32, dl)), 0);
2799}
2800
2801// Handle a 32-bit value in a 64-bit register and vice-versa. These are of
2802// course not actual zero/sign extensions that will generate machine code,
2803// they're just a way to reinterpret a 32 bit value in a register as a
2804// 64 bit value and vice-versa.
2805SDValue IntegerCompareEliminator::addExtOrTrunc(SDValue NatWidthRes,
2806                                                ExtOrTruncConversion Conv) {
2807  SDLoc dl(NatWidthRes);
2808
2809  // For reinterpreting 32-bit values as 64 bit values, we generate
2810  // INSERT_SUBREG IMPLICIT_DEF:i64, <input>, TargetConstant:i32<1>
2811  if (Conv == ExtOrTruncConversion::Ext) {
2812    SDValue ImDef(CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl, MVT::i64), 0);
2813    SDValue SubRegIdx =
2814      CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
2815    return SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl, MVT::i64,
2816                                          ImDef, NatWidthRes, SubRegIdx), 0);
2817  }
2818
2819  assert(Conv == ExtOrTruncConversion::Trunc &&
2820         "Unknown convertion between 32 and 64 bit values.");
2821  // For reinterpreting 64-bit values as 32-bit values, we just need to
2822  // EXTRACT_SUBREG (i.e. extract the low word).
2823  SDValue SubRegIdx =
2824    CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
2825  return SDValue(CurDAG->getMachineNode(PPC::EXTRACT_SUBREG, dl, MVT::i32,
2826                                        NatWidthRes, SubRegIdx), 0);
2827}
2828
2829// Produce a GPR sequence for compound comparisons (<=, >=) against zero.
2830// Handle both zero-extensions and sign-extensions.
2831SDValue
2832IntegerCompareEliminator::getCompoundZeroComparisonInGPR(SDValue LHS, SDLoc dl,
2833                                                         ZeroCompare CmpTy) {
2834  EVT InVT = LHS.getValueType();
2835  bool Is32Bit = InVT == MVT::i32;
2836  SDValue ToExtend;
2837
2838  // Produce the value that needs to be either zero or sign extended.
2839  switch (CmpTy) {
2840  case ZeroCompare::GEZExt:
2841  case ZeroCompare::GESExt:
2842    ToExtend = SDValue(CurDAG->getMachineNode(Is32Bit ? PPC::NOR : PPC::NOR8,
2843                                              dl, InVT, LHS, LHS), 0);
2844    break;
2845  case ZeroCompare::LEZExt:
2846  case ZeroCompare::LESExt: {
2847    if (Is32Bit) {
2848      // Upper 32 bits cannot be undefined for this sequence.
2849      LHS = signExtendInputIfNeeded(LHS);
2850      SDValue Neg =
2851        SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0);
2852      ToExtend =
2853        SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
2854                                       Neg, S->getI64Imm(1, dl),
2855                                       S->getI64Imm(63, dl)), 0);
2856    } else {
2857      SDValue Addi =
2858        SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS,
2859                                       S->getI64Imm(~0ULL, dl)), 0);
2860      ToExtend = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
2861                                                Addi, LHS), 0);
2862    }
2863    break;
2864  }
2865  }
2866
2867  // For 64-bit sequences, the extensions are the same for the GE/LE cases.
2868  if (!Is32Bit &&
2869      (CmpTy == ZeroCompare::GEZExt || CmpTy == ZeroCompare::LEZExt))
2870    return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
2871                                          ToExtend, S->getI64Imm(1, dl),
2872                                          S->getI64Imm(63, dl)), 0);
2873  if (!Is32Bit &&
2874      (CmpTy == ZeroCompare::GESExt || CmpTy == ZeroCompare::LESExt))
2875    return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, ToExtend,
2876                                          S->getI64Imm(63, dl)), 0);
2877
2878  assert(Is32Bit && "Should have handled the 32-bit sequences above.");
2879  // For 32-bit sequences, the extensions differ between GE/LE cases.
2880  switch (CmpTy) {
2881  case ZeroCompare::GEZExt: {
2882    SDValue ShiftOps[] = { ToExtend, S->getI32Imm(1, dl), S->getI32Imm(31, dl),
2883                           S->getI32Imm(31, dl) };
2884    return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
2885                                          ShiftOps), 0);
2886  }
2887  case ZeroCompare::GESExt:
2888    return SDValue(CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, ToExtend,
2889                                          S->getI32Imm(31, dl)), 0);
2890  case ZeroCompare::LEZExt:
2891    return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, ToExtend,
2892                                          S->getI32Imm(1, dl)), 0);
2893  case ZeroCompare::LESExt:
2894    return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, ToExtend,
2895                                          S->getI32Imm(-1, dl)), 0);
2896  }
2897
2898  // The above case covers all the enumerators so it can't have a default clause
2899  // to avoid compiler warnings.
2900  llvm_unreachable("Unknown zero-comparison type.");
2901}
2902
2903/// Produces a zero-extended result of comparing two 32-bit values according to
2904/// the passed condition code.
2905SDValue
2906IntegerCompareEliminator::get32BitZExtCompare(SDValue LHS, SDValue RHS,
2907                                              ISD::CondCode CC,
2908                                              int64_t RHSValue, SDLoc dl) {
2909  if (CmpInGPR == ICGPR_I64 || CmpInGPR == ICGPR_SextI64 ||
2910      CmpInGPR == ICGPR_ZextI64 || CmpInGPR == ICGPR_Sext)
2911    return SDValue();
2912  bool IsRHSZero = RHSValue == 0;
2913  bool IsRHSOne = RHSValue == 1;
2914  bool IsRHSNegOne = RHSValue == -1LL;
2915  switch (CC) {
2916  default: return SDValue();
2917  case ISD::SETEQ: {
2918    // (zext (setcc %a, %b, seteq)) -> (lshr (cntlzw (xor %a, %b)), 5)
2919    // (zext (setcc %a, 0, seteq))  -> (lshr (cntlzw %a), 5)
2920    SDValue Xor = IsRHSZero ? LHS :
2921      SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);
2922    SDValue Clz =
2923      SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0);
2924    SDValue ShiftOps[] = { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl),
2925      S->getI32Imm(31, dl) };
2926    return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
2927                                          ShiftOps), 0);
2928  }
2929  case ISD::SETNE: {
2930    // (zext (setcc %a, %b, setne)) -> (xor (lshr (cntlzw (xor %a, %b)), 5), 1)
2931    // (zext (setcc %a, 0, setne))  -> (xor (lshr (cntlzw %a), 5), 1)
2932    SDValue Xor = IsRHSZero ? LHS :
2933      SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);
2934    SDValue Clz =
2935      SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0);
2936    SDValue ShiftOps[] = { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl),
2937      S->getI32Imm(31, dl) };
2938    SDValue Shift =
2939      SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0);
2940    return SDValue(CurDAG->getMachineNode(PPC::XORI, dl, MVT::i32, Shift,
2941                                          S->getI32Imm(1, dl)), 0);
2942  }
2943  case ISD::SETGE: {
2944    // (zext (setcc %a, %b, setge)) -> (xor (lshr (sub %a, %b), 63), 1)
2945    // (zext (setcc %a, 0, setge))  -> (lshr (~ %a), 31)
2946    if(IsRHSZero)
2947      return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);
2948
2949    // Not a special case (i.e. RHS == 0). Handle (%a >= %b) as (%b <= %a)
2950    // by swapping inputs and falling through.
2951    std::swap(LHS, RHS);
2952    ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
2953    IsRHSZero = RHSConst && RHSConst->isNullValue();
2954    LLVM_FALLTHROUGH;
2955  }
2956  case ISD::SETLE: {
2957    if (CmpInGPR == ICGPR_NonExtIn)
2958      return SDValue();
2959    // (zext (setcc %a, %b, setle)) -> (xor (lshr (sub %b, %a), 63), 1)
2960    // (zext (setcc %a, 0, setle))  -> (xor (lshr (- %a), 63), 1)
2961    if(IsRHSZero) {
2962      if (CmpInGPR == ICGPR_NonExtIn)
2963        return SDValue();
2964      return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);
2965    }
2966
2967    // The upper 32-bits of the register can't be undefined for this sequence.
2968    LHS = signExtendInputIfNeeded(LHS);
2969    RHS = signExtendInputIfNeeded(RHS);
2970    SDValue Sub =
2971      SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0);
2972    SDValue Shift =
2973      SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Sub,
2974                                     S->getI64Imm(1, dl), S->getI64Imm(63, dl)),
2975              0);
2976    return
2977      SDValue(CurDAG->getMachineNode(PPC::XORI8, dl,
2978                                     MVT::i64, Shift, S->getI32Imm(1, dl)), 0);
2979  }
2980  case ISD::SETGT: {
2981    // (zext (setcc %a, %b, setgt)) -> (lshr (sub %b, %a), 63)
2982    // (zext (setcc %a, -1, setgt)) -> (lshr (~ %a), 31)
2983    // (zext (setcc %a, 0, setgt))  -> (lshr (- %a), 63)
2984    // Handle SETLT -1 (which is equivalent to SETGE 0).
2985    if (IsRHSNegOne)
2986      return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);
2987
2988    if (IsRHSZero) {
2989      if (CmpInGPR == ICGPR_NonExtIn)
2990        return SDValue();
2991      // The upper 32-bits of the register can't be undefined for this sequence.
2992      LHS = signExtendInputIfNeeded(LHS);
2993      RHS = signExtendInputIfNeeded(RHS);
2994      SDValue Neg =
2995        SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0);
2996      return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
2997                     Neg, S->getI32Imm(1, dl), S->getI32Imm(63, dl)), 0);
2998    }
2999    // Not a special case (i.e. RHS == 0 or RHS == -1). Handle (%a > %b) as
3000    // (%b < %a) by swapping inputs and falling through.
3001    std::swap(LHS, RHS);
3002    ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3003    IsRHSZero = RHSConst && RHSConst->isNullValue();
3004    IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
3005    LLVM_FALLTHROUGH;
3006  }
3007  case ISD::SETLT: {
3008    // (zext (setcc %a, %b, setlt)) -> (lshr (sub %a, %b), 63)
3009    // (zext (setcc %a, 1, setlt))  -> (xor (lshr (- %a), 63), 1)
3010    // (zext (setcc %a, 0, setlt))  -> (lshr %a, 31)
3011    // Handle SETLT 1 (which is equivalent to SETLE 0).
3012    if (IsRHSOne) {
3013      if (CmpInGPR == ICGPR_NonExtIn)
3014        return SDValue();
3015      return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);
3016    }
3017
3018    if (IsRHSZero) {
3019      SDValue ShiftOps[] = { LHS, S->getI32Imm(1, dl), S->getI32Imm(31, dl),
3020                             S->getI32Imm(31, dl) };
3021      return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
3022                                            ShiftOps), 0);
3023    }
3024
3025    if (CmpInGPR == ICGPR_NonExtIn)
3026      return SDValue();
3027    // The upper 32-bits of the register can't be undefined for this sequence.
3028    LHS = signExtendInputIfNeeded(LHS);
3029    RHS = signExtendInputIfNeeded(RHS);
3030    SDValue SUBFNode =
3031      SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);
3032    return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3033                                    SUBFNode, S->getI64Imm(1, dl),
3034                                    S->getI64Imm(63, dl)), 0);
3035  }
3036  case ISD::SETUGE:
3037    // (zext (setcc %a, %b, setuge)) -> (xor (lshr (sub %b, %a), 63), 1)
3038    // (zext (setcc %a, %b, setule)) -> (xor (lshr (sub %a, %b), 63), 1)
3039    std::swap(LHS, RHS);
3040    LLVM_FALLTHROUGH;
3041  case ISD::SETULE: {
3042    if (CmpInGPR == ICGPR_NonExtIn)
3043      return SDValue();
3044    // The upper 32-bits of the register can't be undefined for this sequence.
3045    LHS = zeroExtendInputIfNeeded(LHS);
3046    RHS = zeroExtendInputIfNeeded(RHS);
3047    SDValue Subtract =
3048      SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0);
3049    SDValue SrdiNode =
3050      SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3051                                          Subtract, S->getI64Imm(1, dl),
3052                                          S->getI64Imm(63, dl)), 0);
3053    return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, SrdiNode,
3054                                            S->getI32Imm(1, dl)), 0);
3055  }
3056  case ISD::SETUGT:
3057    // (zext (setcc %a, %b, setugt)) -> (lshr (sub %b, %a), 63)
3058    // (zext (setcc %a, %b, setult)) -> (lshr (sub %a, %b), 63)
3059    std::swap(LHS, RHS);
3060    LLVM_FALLTHROUGH;
3061  case ISD::SETULT: {
3062    if (CmpInGPR == ICGPR_NonExtIn)
3063      return SDValue();
3064    // The upper 32-bits of the register can't be undefined for this sequence.
3065    LHS = zeroExtendInputIfNeeded(LHS);
3066    RHS = zeroExtendInputIfNeeded(RHS);
3067    SDValue Subtract =
3068      SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);
3069    return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3070                                          Subtract, S->getI64Imm(1, dl),
3071                                          S->getI64Imm(63, dl)), 0);
3072  }
3073  }
3074}
3075
3076/// Produces a sign-extended result of comparing two 32-bit values according to
3077/// the passed condition code.
3078SDValue
3079IntegerCompareEliminator::get32BitSExtCompare(SDValue LHS, SDValue RHS,
3080                                              ISD::CondCode CC,
3081                                              int64_t RHSValue, SDLoc dl) {
3082  if (CmpInGPR == ICGPR_I64 || CmpInGPR == ICGPR_SextI64 ||
3083      CmpInGPR == ICGPR_ZextI64 || CmpInGPR == ICGPR_Zext)
3084    return SDValue();
3085  bool IsRHSZero = RHSValue == 0;
3086  bool IsRHSOne = RHSValue == 1;
3087  bool IsRHSNegOne = RHSValue == -1LL;
3088
3089  switch (CC) {
3090  default: return SDValue();
3091  case ISD::SETEQ: {
3092    // (sext (setcc %a, %b, seteq)) ->
3093    //   (ashr (shl (ctlz (xor %a, %b)), 58), 63)
3094    // (sext (setcc %a, 0, seteq)) ->
3095    //   (ashr (shl (ctlz %a), 58), 63)
3096    SDValue CountInput = IsRHSZero ? LHS :
3097      SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);
3098    SDValue Cntlzw =
3099      SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, CountInput), 0);
3100    SDValue SHLOps[] = { Cntlzw, S->getI32Imm(27, dl),
3101                         S->getI32Imm(5, dl), S->getI32Imm(31, dl) };
3102    SDValue Slwi =
3103      SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, SHLOps), 0);
3104    return SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Slwi), 0);
3105  }
3106  case ISD::SETNE: {
3107    // Bitwise xor the operands, count leading zeros, shift right by 5 bits and
3108    // flip the bit, finally take 2's complement.
3109    // (sext (setcc %a, %b, setne)) ->
3110    //   (neg (xor (lshr (ctlz (xor %a, %b)), 5), 1))
3111    // Same as above, but the first xor is not needed.
3112    // (sext (setcc %a, 0, setne)) ->
3113    //   (neg (xor (lshr (ctlz %a), 5), 1))
3114    SDValue Xor = IsRHSZero ? LHS :
3115      SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);
3116    SDValue Clz =
3117      SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0);
3118    SDValue ShiftOps[] =
3119      { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl), S->getI32Imm(31, dl) };
3120    SDValue Shift =
3121      SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0);
3122    SDValue Xori =
3123      SDValue(CurDAG->getMachineNode(PPC::XORI, dl, MVT::i32, Shift,
3124                                     S->getI32Imm(1, dl)), 0);
3125    return SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Xori), 0);
3126  }
3127  case ISD::SETGE: {
3128    // (sext (setcc %a, %b, setge)) -> (add (lshr (sub %a, %b), 63), -1)
3129    // (sext (setcc %a, 0, setge))  -> (ashr (~ %a), 31)
3130    if (IsRHSZero)
3131      return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);
3132
3133    // Not a special case (i.e. RHS == 0). Handle (%a >= %b) as (%b <= %a)
3134    // by swapping inputs and falling through.
3135    std::swap(LHS, RHS);
3136    ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3137    IsRHSZero = RHSConst && RHSConst->isNullValue();
3138    LLVM_FALLTHROUGH;
3139  }
3140  case ISD::SETLE: {
3141    if (CmpInGPR == ICGPR_NonExtIn)
3142      return SDValue();
3143    // (sext (setcc %a, %b, setge)) -> (add (lshr (sub %b, %a), 63), -1)
3144    // (sext (setcc %a, 0, setle))  -> (add (lshr (- %a), 63), -1)
3145    if (IsRHSZero)
3146      return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);
3147
3148    // The upper 32-bits of the register can't be undefined for this sequence.
3149    LHS = signExtendInputIfNeeded(LHS);
3150    RHS = signExtendInputIfNeeded(RHS);
3151    SDValue SUBFNode =
3152      SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, MVT::Glue,
3153                                     LHS, RHS), 0);
3154    SDValue Srdi =
3155      SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3156                                     SUBFNode, S->getI64Imm(1, dl),
3157                                     S->getI64Imm(63, dl)), 0);
3158    return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, Srdi,
3159                                          S->getI32Imm(-1, dl)), 0);
3160  }
3161  case ISD::SETGT: {
3162    // (sext (setcc %a, %b, setgt)) -> (ashr (sub %b, %a), 63)
3163    // (sext (setcc %a, -1, setgt)) -> (ashr (~ %a), 31)
3164    // (sext (setcc %a, 0, setgt))  -> (ashr (- %a), 63)
3165    if (IsRHSNegOne)
3166      return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);
3167    if (IsRHSZero) {
3168      if (CmpInGPR == ICGPR_NonExtIn)
3169        return SDValue();
3170      // The upper 32-bits of the register can't be undefined for this sequence.
3171      LHS = signExtendInputIfNeeded(LHS);
3172      RHS = signExtendInputIfNeeded(RHS);
3173      SDValue Neg =
3174        SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0);
3175        return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, Neg,
3176                                              S->getI64Imm(63, dl)), 0);
3177    }
3178    // Not a special case (i.e. RHS == 0 or RHS == -1). Handle (%a > %b) as
3179    // (%b < %a) by swapping inputs and falling through.
3180    std::swap(LHS, RHS);
3181    ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3182    IsRHSZero = RHSConst && RHSConst->isNullValue();
3183    IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
3184    LLVM_FALLTHROUGH;
3185  }
3186  case ISD::SETLT: {
3187    // (sext (setcc %a, %b, setgt)) -> (ashr (sub %a, %b), 63)
3188    // (sext (setcc %a, 1, setgt))  -> (add (lshr (- %a), 63), -1)
3189    // (sext (setcc %a, 0, setgt))  -> (ashr %a, 31)
3190    if (IsRHSOne) {
3191      if (CmpInGPR == ICGPR_NonExtIn)
3192        return SDValue();
3193      return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);
3194    }
3195    if (IsRHSZero)
3196      return SDValue(CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, LHS,
3197                                            S->getI32Imm(31, dl)), 0);
3198
3199    if (CmpInGPR == ICGPR_NonExtIn)
3200      return SDValue();
3201    // The upper 32-bits of the register can't be undefined for this sequence.
3202    LHS = signExtendInputIfNeeded(LHS);
3203    RHS = signExtendInputIfNeeded(RHS);
3204    SDValue SUBFNode =
3205      SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);
3206    return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,
3207                                          SUBFNode, S->getI64Imm(63, dl)), 0);
3208  }
3209  case ISD::SETUGE:
3210    // (sext (setcc %a, %b, setuge)) -> (add (lshr (sub %a, %b), 63), -1)
3211    // (sext (setcc %a, %b, setule)) -> (add (lshr (sub %b, %a), 63), -1)
3212    std::swap(LHS, RHS);
3213    LLVM_FALLTHROUGH;
3214  case ISD::SETULE: {
3215    if (CmpInGPR == ICGPR_NonExtIn)
3216      return SDValue();
3217    // The upper 32-bits of the register can't be undefined for this sequence.
3218    LHS = zeroExtendInputIfNeeded(LHS);
3219    RHS = zeroExtendInputIfNeeded(RHS);
3220    SDValue Subtract =
3221      SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0);
3222    SDValue Shift =
3223      SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Subtract,
3224                                     S->getI32Imm(1, dl), S->getI32Imm(63,dl)),
3225              0);
3226    return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, Shift,
3227                                          S->getI32Imm(-1, dl)), 0);
3228  }
3229  case ISD::SETUGT:
3230    // (sext (setcc %a, %b, setugt)) -> (ashr (sub %b, %a), 63)
3231    // (sext (setcc %a, %b, setugt)) -> (ashr (sub %a, %b), 63)
3232    std::swap(LHS, RHS);
3233    LLVM_FALLTHROUGH;
3234  case ISD::SETULT: {
3235    if (CmpInGPR == ICGPR_NonExtIn)
3236      return SDValue();
3237    // The upper 32-bits of the register can't be undefined for this sequence.
3238    LHS = zeroExtendInputIfNeeded(LHS);
3239    RHS = zeroExtendInputIfNeeded(RHS);
3240    SDValue Subtract =
3241      SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);
3242    return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,
3243                                          Subtract, S->getI64Imm(63, dl)), 0);
3244  }
3245  }
3246}
3247
3248/// Produces a zero-extended result of comparing two 64-bit values according to
3249/// the passed condition code.
3250SDValue
3251IntegerCompareEliminator::get64BitZExtCompare(SDValue LHS, SDValue RHS,
3252                                              ISD::CondCode CC,
3253                                              int64_t RHSValue, SDLoc dl) {
3254  if (CmpInGPR == ICGPR_I32 || CmpInGPR == ICGPR_SextI32 ||
3255      CmpInGPR == ICGPR_ZextI32 || CmpInGPR == ICGPR_Sext)
3256    return SDValue();
3257  bool IsRHSZero = RHSValue == 0;
3258  bool IsRHSOne = RHSValue == 1;
3259  bool IsRHSNegOne = RHSValue == -1LL;
3260  switch (CC) {
3261  default: return SDValue();
3262  case ISD::SETEQ: {
3263    // (zext (setcc %a, %b, seteq)) -> (lshr (ctlz (xor %a, %b)), 6)
3264    // (zext (setcc %a, 0, seteq)) ->  (lshr (ctlz %a), 6)
3265    SDValue Xor = IsRHSZero ? LHS :
3266      SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);
3267    SDValue Clz =
3268      SDValue(CurDAG->getMachineNode(PPC::CNTLZD, dl, MVT::i64, Xor), 0);
3269    return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Clz,
3270                                          S->getI64Imm(58, dl),
3271                                          S->getI64Imm(63, dl)), 0);
3272  }
3273  case ISD::SETNE: {
3274    // {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1)
3275    // (zext (setcc %a, %b, setne)) -> (sube addc.reg, addc.reg, addc.CA)
3276    // {addcz.reg, addcz.CA} = (addcarry %a, -1)
3277    // (zext (setcc %a, 0, setne)) -> (sube addcz.reg, addcz.reg, addcz.CA)
3278    SDValue Xor = IsRHSZero ? LHS :
3279      SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);
3280    SDValue AC =
3281      SDValue(CurDAG->getMachineNode(PPC::ADDIC8, dl, MVT::i64, MVT::Glue,
3282                                     Xor, S->getI32Imm(~0U, dl)), 0);
3283    return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, AC,
3284                                          Xor, AC.getValue(1)), 0);
3285  }
3286  case ISD::SETGE: {
3287    // {subc.reg, subc.CA} = (subcarry %a, %b)
3288    // (zext (setcc %a, %b, setge)) ->
3289    //   (adde (lshr %b, 63), (ashr %a, 63), subc.CA)
3290    // (zext (setcc %a, 0, setge)) -> (lshr (~ %a), 63)
3291    if (IsRHSZero)
3292      return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);
3293    std::swap(LHS, RHS);
3294    ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3295    IsRHSZero = RHSConst && RHSConst->isNullValue();
3296    LLVM_FALLTHROUGH;
3297  }
3298  case ISD::SETLE: {
3299    // {subc.reg, subc.CA} = (subcarry %b, %a)
3300    // (zext (setcc %a, %b, setge)) ->
3301    //   (adde (lshr %a, 63), (ashr %b, 63), subc.CA)
3302    // (zext (setcc %a, 0, setge)) -> (lshr (or %a, (add %a, -1)), 63)
3303    if (IsRHSZero)
3304      return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);
3305    SDValue ShiftL =
3306      SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS,
3307                                     S->getI64Imm(1, dl),
3308                                     S->getI64Imm(63, dl)), 0);
3309    SDValue ShiftR =
3310      SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, RHS,
3311                                     S->getI64Imm(63, dl)), 0);
3312    SDValue SubtractCarry =
3313      SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3314                                     LHS, RHS), 1);
3315    return SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue,
3316                                          ShiftR, ShiftL, SubtractCarry), 0);
3317  }
3318  case ISD::SETGT: {
3319    // {subc.reg, subc.CA} = (subcarry %b, %a)
3320    // (zext (setcc %a, %b, setgt)) ->
3321    //   (xor (adde (lshr %a, 63), (ashr %b, 63), subc.CA), 1)
3322    // (zext (setcc %a, 0, setgt)) -> (lshr (nor (add %a, -1), %a), 63)
3323    if (IsRHSNegOne)
3324      return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);
3325    if (IsRHSZero) {
3326      SDValue Addi =
3327        SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS,
3328                                       S->getI64Imm(~0ULL, dl)), 0);
3329      SDValue Nor =
3330        SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64, Addi, LHS), 0);
3331      return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Nor,
3332                                            S->getI64Imm(1, dl),
3333                                            S->getI64Imm(63, dl)), 0);
3334    }
3335    std::swap(LHS, RHS);
3336    ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3337    IsRHSZero = RHSConst && RHSConst->isNullValue();
3338    IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
3339    LLVM_FALLTHROUGH;
3340  }
3341  case ISD::SETLT: {
3342    // {subc.reg, subc.CA} = (subcarry %a, %b)
3343    // (zext (setcc %a, %b, setlt)) ->
3344    //   (xor (adde (lshr %b, 63), (ashr %a, 63), subc.CA), 1)
3345    // (zext (setcc %a, 0, setlt)) -> (lshr %a, 63)
3346    if (IsRHSOne)
3347      return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);
3348    if (IsRHSZero)
3349      return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS,
3350                                            S->getI64Imm(1, dl),
3351                                            S->getI64Imm(63, dl)), 0);
3352    SDValue SRADINode =
3353      SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,
3354                                     LHS, S->getI64Imm(63, dl)), 0);
3355    SDValue SRDINode =
3356      SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3357                                     RHS, S->getI64Imm(1, dl),
3358                                     S->getI64Imm(63, dl)), 0);
3359    SDValue SUBFC8Carry =
3360      SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3361                                     RHS, LHS), 1);
3362    SDValue ADDE8Node =
3363      SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue,
3364                                     SRDINode, SRADINode, SUBFC8Carry), 0);
3365    return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64,
3366                                          ADDE8Node, S->getI64Imm(1, dl)), 0);
3367  }
3368  case ISD::SETUGE:
3369    // {subc.reg, subc.CA} = (subcarry %a, %b)
3370    // (zext (setcc %a, %b, setuge)) -> (add (sube %b, %b, subc.CA), 1)
3371    std::swap(LHS, RHS);
3372    LLVM_FALLTHROUGH;
3373  case ISD::SETULE: {
3374    // {subc.reg, subc.CA} = (subcarry %b, %a)
3375    // (zext (setcc %a, %b, setule)) -> (add (sube %a, %a, subc.CA), 1)
3376    SDValue SUBFC8Carry =
3377      SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3378                                     LHS, RHS), 1);
3379    SDValue SUBFE8Node =
3380      SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, MVT::Glue,
3381                                     LHS, LHS, SUBFC8Carry), 0);
3382    return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64,
3383                                          SUBFE8Node, S->getI64Imm(1, dl)), 0);
3384  }
3385  case ISD::SETUGT:
3386    // {subc.reg, subc.CA} = (subcarry %b, %a)
3387    // (zext (setcc %a, %b, setugt)) -> -(sube %b, %b, subc.CA)
3388    std::swap(LHS, RHS);
3389    LLVM_FALLTHROUGH;
3390  case ISD::SETULT: {
3391    // {subc.reg, subc.CA} = (subcarry %a, %b)
3392    // (zext (setcc %a, %b, setult)) -> -(sube %a, %a, subc.CA)
3393    SDValue SubtractCarry =
3394      SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3395                                     RHS, LHS), 1);
3396    SDValue ExtSub =
3397      SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64,
3398                                     LHS, LHS, SubtractCarry), 0);
3399    return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64,
3400                                          ExtSub), 0);
3401  }
3402  }
3403}
3404
3405/// Produces a sign-extended result of comparing two 64-bit values according to
3406/// the passed condition code.
3407SDValue
3408IntegerCompareEliminator::get64BitSExtCompare(SDValue LHS, SDValue RHS,
3409                                              ISD::CondCode CC,
3410                                              int64_t RHSValue, SDLoc dl) {
3411  if (CmpInGPR == ICGPR_I32 || CmpInGPR == ICGPR_SextI32 ||
3412      CmpInGPR == ICGPR_ZextI32 || CmpInGPR == ICGPR_Zext)
3413    return SDValue();
3414  bool IsRHSZero = RHSValue == 0;
3415  bool IsRHSOne = RHSValue == 1;
3416  bool IsRHSNegOne = RHSValue == -1LL;
3417  switch (CC) {
3418  default: return SDValue();
3419  case ISD::SETEQ: {
3420    // {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1)
3421    // (sext (setcc %a, %b, seteq)) -> (sube addc.reg, addc.reg, addc.CA)
3422    // {addcz.reg, addcz.CA} = (addcarry %a, -1)
3423    // (sext (setcc %a, 0, seteq)) -> (sube addcz.reg, addcz.reg, addcz.CA)
3424    SDValue AddInput = IsRHSZero ? LHS :
3425      SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);
3426    SDValue Addic =
3427      SDValue(CurDAG->getMachineNode(PPC::ADDIC8, dl, MVT::i64, MVT::Glue,
3428                                     AddInput, S->getI32Imm(~0U, dl)), 0);
3429    return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, Addic,
3430                                          Addic, Addic.getValue(1)), 0);
3431  }
3432  case ISD::SETNE: {
3433    // {subfc.reg, subfc.CA} = (subcarry 0, (xor %a, %b))
3434    // (sext (setcc %a, %b, setne)) -> (sube subfc.reg, subfc.reg, subfc.CA)
3435    // {subfcz.reg, subfcz.CA} = (subcarry 0, %a)
3436    // (sext (setcc %a, 0, setne)) -> (sube subfcz.reg, subfcz.reg, subfcz.CA)
3437    SDValue Xor = IsRHSZero ? LHS :
3438      SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);
3439    SDValue SC =
3440      SDValue(CurDAG->getMachineNode(PPC::SUBFIC8, dl, MVT::i64, MVT::Glue,
3441                                     Xor, S->getI32Imm(0, dl)), 0);
3442    return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, SC,
3443                                          SC, SC.getValue(1)), 0);
3444  }
3445  case ISD::SETGE: {
3446    // {subc.reg, subc.CA} = (subcarry %a, %b)
3447    // (zext (setcc %a, %b, setge)) ->
3448    //   (- (adde (lshr %b, 63), (ashr %a, 63), subc.CA))
3449    // (zext (setcc %a, 0, setge)) -> (~ (ashr %a, 63))
3450    if (IsRHSZero)
3451      return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);
3452    std::swap(LHS, RHS);
3453    ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3454    IsRHSZero = RHSConst && RHSConst->isNullValue();
3455    LLVM_FALLTHROUGH;
3456  }
3457  case ISD::SETLE: {
3458    // {subc.reg, subc.CA} = (subcarry %b, %a)
3459    // (zext (setcc %a, %b, setge)) ->
3460    //   (- (adde (lshr %a, 63), (ashr %b, 63), subc.CA))
3461    // (zext (setcc %a, 0, setge)) -> (ashr (or %a, (add %a, -1)), 63)
3462    if (IsRHSZero)
3463      return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);
3464    SDValue ShiftR =
3465      SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, RHS,
3466                                     S->getI64Imm(63, dl)), 0);
3467    SDValue ShiftL =
3468      SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS,
3469                                     S->getI64Imm(1, dl),
3470                                     S->getI64Imm(63, dl)), 0);
3471    SDValue SubtractCarry =
3472      SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3473                                     LHS, RHS), 1);
3474    SDValue Adde =
3475      SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue,
3476                                     ShiftR, ShiftL, SubtractCarry), 0);
3477    return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, Adde), 0);
3478  }
3479  case ISD::SETGT: {
3480    // {subc.reg, subc.CA} = (subcarry %b, %a)
3481    // (zext (setcc %a, %b, setgt)) ->
3482    //   -(xor (adde (lshr %a, 63), (ashr %b, 63), subc.CA), 1)
3483    // (zext (setcc %a, 0, setgt)) -> (ashr (nor (add %a, -1), %a), 63)
3484    if (IsRHSNegOne)
3485      return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);
3486    if (IsRHSZero) {
3487      SDValue Add =
3488        SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS,
3489                                       S->getI64Imm(-1, dl)), 0);
3490      SDValue Nor =
3491        SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64, Add, LHS), 0);
3492      return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, Nor,
3493                                            S->getI64Imm(63, dl)), 0);
3494    }
3495    std::swap(LHS, RHS);
3496    ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3497    IsRHSZero = RHSConst && RHSConst->isNullValue();
3498    IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
3499    LLVM_FALLTHROUGH;
3500  }
3501  case ISD::SETLT: {
3502    // {subc.reg, subc.CA} = (subcarry %a, %b)
3503    // (zext (setcc %a, %b, setlt)) ->
3504    //   -(xor (adde (lshr %b, 63), (ashr %a, 63), subc.CA), 1)
3505    // (zext (setcc %a, 0, setlt)) -> (ashr %a, 63)
3506    if (IsRHSOne)
3507      return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);
3508    if (IsRHSZero) {
3509      return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, LHS,
3510                                            S->getI64Imm(63, dl)), 0);
3511    }
3512    SDValue SRADINode =
3513      SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,
3514                                     LHS, S->getI64Imm(63, dl)), 0);
3515    SDValue SRDINode =
3516      SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3517                                     RHS, S->getI64Imm(1, dl),
3518                                     S->getI64Imm(63, dl)), 0);
3519    SDValue SUBFC8Carry =
3520      SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3521                                     RHS, LHS), 1);
3522    SDValue ADDE8Node =
3523      SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64,
3524                                     SRDINode, SRADINode, SUBFC8Carry), 0);
3525    SDValue XORI8Node =
3526      SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64,
3527                                     ADDE8Node, S->getI64Imm(1, dl)), 0);
3528    return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64,
3529                                          XORI8Node), 0);
3530  }
3531  case ISD::SETUGE:
3532    // {subc.reg, subc.CA} = (subcarry %a, %b)
3533    // (sext (setcc %a, %b, setuge)) -> ~(sube %b, %b, subc.CA)
3534    std::swap(LHS, RHS);
3535    LLVM_FALLTHROUGH;
3536  case ISD::SETULE: {
3537    // {subc.reg, subc.CA} = (subcarry %b, %a)
3538    // (sext (setcc %a, %b, setule)) -> ~(sube %a, %a, subc.CA)
3539    SDValue SubtractCarry =
3540      SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3541                                     LHS, RHS), 1);
3542    SDValue ExtSub =
3543      SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, MVT::Glue, LHS,
3544                                     LHS, SubtractCarry), 0);
3545    return SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64,
3546                                          ExtSub, ExtSub), 0);
3547  }
3548  case ISD::SETUGT:
3549    // {subc.reg, subc.CA} = (subcarry %b, %a)
3550    // (sext (setcc %a, %b, setugt)) -> (sube %b, %b, subc.CA)
3551    std::swap(LHS, RHS);
3552    LLVM_FALLTHROUGH;
3553  case ISD::SETULT: {
3554    // {subc.reg, subc.CA} = (subcarry %a, %b)
3555    // (sext (setcc %a, %b, setult)) -> (sube %a, %a, subc.CA)
3556    SDValue SubCarry =
3557      SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3558                                     RHS, LHS), 1);
3559    return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64,
3560                                     LHS, LHS, SubCarry), 0);
3561  }
3562  }
3563}
3564
3565/// Do all uses of this SDValue need the result in a GPR?
3566/// This is meant to be used on values that have type i1 since
3567/// it is somewhat meaningless to ask if values of other types
3568/// should be kept in GPR's.
3569static bool allUsesExtend(SDValue Compare, SelectionDAG *CurDAG) {
3570  assert(Compare.getOpcode() == ISD::SETCC &&
3571         "An ISD::SETCC node required here.");
3572
3573  // For values that have a single use, the caller should obviously already have
3574  // checked if that use is an extending use. We check the other uses here.
3575  if (Compare.hasOneUse())
3576    return true;
3577  // We want the value in a GPR if it is being extended, used for a select, or
3578  // used in logical operations.
3579  for (auto CompareUse : Compare.getNode()->uses())
3580    if (CompareUse->getOpcode() != ISD::SIGN_EXTEND &&
3581        CompareUse->getOpcode() != ISD::ZERO_EXTEND &&
3582        CompareUse->getOpcode() != ISD::SELECT &&
3583        !isLogicOp(CompareUse->getOpcode())) {
3584      OmittedForNonExtendUses++;
3585      return false;
3586    }
3587  return true;
3588}
3589
3590/// Returns an equivalent of a SETCC node but with the result the same width as
3591/// the inputs. This can also be used for SELECT_CC if either the true or false
3592/// values is a power of two while the other is zero.
3593SDValue IntegerCompareEliminator::getSETCCInGPR(SDValue Compare,
3594                                                SetccInGPROpts ConvOpts) {
3595  assert((Compare.getOpcode() == ISD::SETCC ||
3596          Compare.getOpcode() == ISD::SELECT_CC) &&
3597         "An ISD::SETCC node required here.");
3598
3599  // Don't convert this comparison to a GPR sequence because there are uses
3600  // of the i1 result (i.e. uses that require the result in the CR).
3601  if ((Compare.getOpcode() == ISD::SETCC) && !allUsesExtend(Compare, CurDAG))
3602    return SDValue();
3603
3604  SDValue LHS = Compare.getOperand(0);
3605  SDValue RHS = Compare.getOperand(1);
3606
3607  // The condition code is operand 2 for SETCC and operand 4 for SELECT_CC.
3608  int CCOpNum = Compare.getOpcode() == ISD::SELECT_CC ? 4 : 2;
3609  ISD::CondCode CC =
3610    cast<CondCodeSDNode>(Compare.getOperand(CCOpNum))->get();
3611  EVT InputVT = LHS.getValueType();
3612  if (InputVT != MVT::i32 && InputVT != MVT::i64)
3613    return SDValue();
3614
3615  if (ConvOpts == SetccInGPROpts::ZExtInvert ||
3616      ConvOpts == SetccInGPROpts::SExtInvert)
3617    CC = ISD::getSetCCInverse(CC, InputVT);
3618
3619  bool Inputs32Bit = InputVT == MVT::i32;
3620
3621  SDLoc dl(Compare);
3622  ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3623  int64_t RHSValue = RHSConst ? RHSConst->getSExtValue() : INT64_MAX;
3624  bool IsSext = ConvOpts == SetccInGPROpts::SExtOrig ||
3625    ConvOpts == SetccInGPROpts::SExtInvert;
3626
3627  if (IsSext && Inputs32Bit)
3628    return get32BitSExtCompare(LHS, RHS, CC, RHSValue, dl);
3629  else if (Inputs32Bit)
3630    return get32BitZExtCompare(LHS, RHS, CC, RHSValue, dl);
3631  else if (IsSext)
3632    return get64BitSExtCompare(LHS, RHS, CC, RHSValue, dl);
3633  return get64BitZExtCompare(LHS, RHS, CC, RHSValue, dl);
3634}
3635
3636} // end anonymous namespace
3637
3638bool PPCDAGToDAGISel::tryIntCompareInGPR(SDNode *N) {
3639  if (N->getValueType(0) != MVT::i32 &&
3640      N->getValueType(0) != MVT::i64)
3641    return false;
3642
3643  // This optimization will emit code that assumes 64-bit registers
3644  // so we don't want to run it in 32-bit mode. Also don't run it
3645  // on functions that are not to be optimized.
3646  if (TM.getOptLevel() == CodeGenOpt::None || !TM.isPPC64())
3647    return false;
3648
3649  switch (N->getOpcode()) {
3650  default: break;
3651  case ISD::ZERO_EXTEND:
3652  case ISD::SIGN_EXTEND:
3653  case ISD::AND:
3654  case ISD::OR:
3655  case ISD::XOR: {
3656    IntegerCompareEliminator ICmpElim(CurDAG, this);
3657    if (SDNode *New = ICmpElim.Select(N)) {
3658      ReplaceNode(N, New);
3659      return true;
3660    }
3661  }
3662  }
3663  return false;
3664}
3665
3666bool PPCDAGToDAGISel::tryBitPermutation(SDNode *N) {
3667  if (N->getValueType(0) != MVT::i32 &&
3668      N->getValueType(0) != MVT::i64)
3669    return false;
3670
3671  if (!UseBitPermRewriter)
3672    return false;
3673
3674  switch (N->getOpcode()) {
3675  default: break;
3676  case ISD::ROTL:
3677  case ISD::SHL:
3678  case ISD::SRL:
3679  case ISD::AND:
3680  case ISD::OR: {
3681    BitPermutationSelector BPS(CurDAG);
3682    if (SDNode *New = BPS.Select(N)) {
3683      ReplaceNode(N, New);
3684      return true;
3685    }
3686    return false;
3687  }
3688  }
3689
3690  return false;
3691}
3692
3693/// SelectCC - Select a comparison of the specified values with the specified
3694/// condition code, returning the CR# of the expression.
3695SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC,
3696                                  const SDLoc &dl) {
3697  // Always select the LHS.
3698  unsigned Opc;
3699
3700  if (LHS.getValueType() == MVT::i32) {
3701    unsigned Imm;
3702    if (CC == ISD::SETEQ || CC == ISD::SETNE) {
3703      if (isInt32Immediate(RHS, Imm)) {
3704        // SETEQ/SETNE comparison with 16-bit immediate, fold it.
3705        if (isUInt<16>(Imm))
3706          return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS,
3707                                                getI32Imm(Imm & 0xFFFF, dl)),
3708                         0);
3709        // If this is a 16-bit signed immediate, fold it.
3710        if (isInt<16>((int)Imm))
3711          return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS,
3712                                                getI32Imm(Imm & 0xFFFF, dl)),
3713                         0);
3714
3715        // For non-equality comparisons, the default code would materialize the
3716        // constant, then compare against it, like this:
3717        //   lis r2, 4660
3718        //   ori r2, r2, 22136
3719        //   cmpw cr0, r3, r2
3720        // Since we are just comparing for equality, we can emit this instead:
3721        //   xoris r0,r3,0x1234
3722        //   cmplwi cr0,r0,0x5678
3723        //   beq cr0,L6
3724        SDValue Xor(CurDAG->getMachineNode(PPC::XORIS, dl, MVT::i32, LHS,
3725                                           getI32Imm(Imm >> 16, dl)), 0);
3726        return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, Xor,
3727                                              getI32Imm(Imm & 0xFFFF, dl)), 0);
3728      }
3729      Opc = PPC::CMPLW;
3730    } else if (ISD::isUnsignedIntSetCC(CC)) {
3731      if (isInt32Immediate(RHS, Imm) && isUInt<16>(Imm))
3732        return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS,
3733                                              getI32Imm(Imm & 0xFFFF, dl)), 0);
3734      Opc = PPC::CMPLW;
3735    } else {
3736      int16_t SImm;
3737      if (isIntS16Immediate(RHS, SImm))
3738        return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS,
3739                                              getI32Imm((int)SImm & 0xFFFF,
3740                                                        dl)),
3741                         0);
3742      Opc = PPC::CMPW;
3743    }
3744  } else if (LHS.getValueType() == MVT::i64) {
3745    uint64_t Imm;
3746    if (CC == ISD::SETEQ || CC == ISD::SETNE) {
3747      if (isInt64Immediate(RHS.getNode(), Imm)) {
3748        // SETEQ/SETNE comparison with 16-bit immediate, fold it.
3749        if (isUInt<16>(Imm))
3750          return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS,
3751                                                getI32Imm(Imm & 0xFFFF, dl)),
3752                         0);
3753        // If this is a 16-bit signed immediate, fold it.
3754        if (isInt<16>(Imm))
3755          return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS,
3756                                                getI32Imm(Imm & 0xFFFF, dl)),
3757                         0);
3758
3759        // For non-equality comparisons, the default code would materialize the
3760        // constant, then compare against it, like this:
3761        //   lis r2, 4660
3762        //   ori r2, r2, 22136
3763        //   cmpd cr0, r3, r2
3764        // Since we are just comparing for equality, we can emit this instead:
3765        //   xoris r0,r3,0x1234
3766        //   cmpldi cr0,r0,0x5678
3767        //   beq cr0,L6
3768        if (isUInt<32>(Imm)) {
3769          SDValue Xor(CurDAG->getMachineNode(PPC::XORIS8, dl, MVT::i64, LHS,
3770                                             getI64Imm(Imm >> 16, dl)), 0);
3771          return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, Xor,
3772                                                getI64Imm(Imm & 0xFFFF, dl)),
3773                         0);
3774        }
3775      }
3776      Opc = PPC::CMPLD;
3777    } else if (ISD::isUnsignedIntSetCC(CC)) {
3778      if (isInt64Immediate(RHS.getNode(), Imm) && isUInt<16>(Imm))
3779        return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS,
3780                                              getI64Imm(Imm & 0xFFFF, dl)), 0);
3781      Opc = PPC::CMPLD;
3782    } else {
3783      int16_t SImm;
3784      if (isIntS16Immediate(RHS, SImm))
3785        return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS,
3786                                              getI64Imm(SImm & 0xFFFF, dl)),
3787                         0);
3788      Opc = PPC::CMPD;
3789    }
3790  } else if (LHS.getValueType() == MVT::f32) {
3791    if (PPCSubTarget->hasSPE()) {
3792      switch (CC) {
3793        default:
3794        case ISD::SETEQ:
3795        case ISD::SETNE:
3796          Opc = PPC::EFSCMPEQ;
3797          break;
3798        case ISD::SETLT:
3799        case ISD::SETGE:
3800        case ISD::SETOLT:
3801        case ISD::SETOGE:
3802        case ISD::SETULT:
3803        case ISD::SETUGE:
3804          Opc = PPC::EFSCMPLT;
3805          break;
3806        case ISD::SETGT:
3807        case ISD::SETLE:
3808        case ISD::SETOGT:
3809        case ISD::SETOLE:
3810        case ISD::SETUGT:
3811        case ISD::SETULE:
3812          Opc = PPC::EFSCMPGT;
3813          break;
3814      }
3815    } else
3816      Opc = PPC::FCMPUS;
3817  } else if (LHS.getValueType() == MVT::f64) {
3818    if (PPCSubTarget->hasSPE()) {
3819      switch (CC) {
3820        default:
3821        case ISD::SETEQ:
3822        case ISD::SETNE:
3823          Opc = PPC::EFDCMPEQ;
3824          break;
3825        case ISD::SETLT:
3826        case ISD::SETGE:
3827        case ISD::SETOLT:
3828        case ISD::SETOGE:
3829        case ISD::SETULT:
3830        case ISD::SETUGE:
3831          Opc = PPC::EFDCMPLT;
3832          break;
3833        case ISD::SETGT:
3834        case ISD::SETLE:
3835        case ISD::SETOGT:
3836        case ISD::SETOLE:
3837        case ISD::SETUGT:
3838        case ISD::SETULE:
3839          Opc = PPC::EFDCMPGT;
3840          break;
3841      }
3842    } else
3843      Opc = PPCSubTarget->hasVSX() ? PPC::XSCMPUDP : PPC::FCMPUD;
3844  } else {
3845    assert(LHS.getValueType() == MVT::f128 && "Unknown vt!");
3846    assert(PPCSubTarget->hasVSX() && "__float128 requires VSX");
3847    Opc = PPC::XSCMPUQP;
3848  }
3849  return SDValue(CurDAG->getMachineNode(Opc, dl, MVT::i32, LHS, RHS), 0);
3850}
3851
3852static PPC::Predicate getPredicateForSetCC(ISD::CondCode CC, const EVT &VT,
3853                                           const PPCSubtarget *Subtarget) {
3854  // For SPE instructions, the result is in GT bit of the CR
3855  bool UseSPE = Subtarget->hasSPE() && VT.isFloatingPoint();
3856
3857  switch (CC) {
3858  case ISD::SETUEQ:
3859  case ISD::SETONE:
3860  case ISD::SETOLE:
3861  case ISD::SETOGE:
3862    llvm_unreachable("Should be lowered by legalize!");
3863  default: llvm_unreachable("Unknown condition!");
3864  case ISD::SETOEQ:
3865  case ISD::SETEQ:
3866    return UseSPE ? PPC::PRED_GT : PPC::PRED_EQ;
3867  case ISD::SETUNE:
3868  case ISD::SETNE:
3869    return UseSPE ? PPC::PRED_LE : PPC::PRED_NE;
3870  case ISD::SETOLT:
3871  case ISD::SETLT:
3872    return UseSPE ? PPC::PRED_GT : PPC::PRED_LT;
3873  case ISD::SETULE:
3874  case ISD::SETLE:
3875    return UseSPE ? PPC::PRED_LE : PPC::PRED_LE;
3876  case ISD::SETOGT:
3877  case ISD::SETGT:
3878    return UseSPE ? PPC::PRED_GT : PPC::PRED_GT;
3879  case ISD::SETUGE:
3880  case ISD::SETGE:
3881    return UseSPE ? PPC::PRED_LE : PPC::PRED_GE;
3882  case ISD::SETO:   return PPC::PRED_NU;
3883  case ISD::SETUO:  return PPC::PRED_UN;
3884    // These two are invalid for floating point.  Assume we have int.
3885  case ISD::SETULT: return PPC::PRED_LT;
3886  case ISD::SETUGT: return PPC::PRED_GT;
3887  }
3888}
3889
3890/// getCRIdxForSetCC - Return the index of the condition register field
3891/// associated with the SetCC condition, and whether or not the field is
3892/// treated as inverted.  That is, lt = 0; ge = 0 inverted.
3893static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert) {
3894  Invert = false;
3895  switch (CC) {
3896  default: llvm_unreachable("Unknown condition!");
3897  case ISD::SETOLT:
3898  case ISD::SETLT:  return 0;                  // Bit #0 = SETOLT
3899  case ISD::SETOGT:
3900  case ISD::SETGT:  return 1;                  // Bit #1 = SETOGT
3901  case ISD::SETOEQ:
3902  case ISD::SETEQ:  return 2;                  // Bit #2 = SETOEQ
3903  case ISD::SETUO:  return 3;                  // Bit #3 = SETUO
3904  case ISD::SETUGE:
3905  case ISD::SETGE:  Invert = true; return 0;   // !Bit #0 = SETUGE
3906  case ISD::SETULE:
3907  case ISD::SETLE:  Invert = true; return 1;   // !Bit #1 = SETULE
3908  case ISD::SETUNE:
3909  case ISD::SETNE:  Invert = true; return 2;   // !Bit #2 = SETUNE
3910  case ISD::SETO:   Invert = true; return 3;   // !Bit #3 = SETO
3911  case ISD::SETUEQ:
3912  case ISD::SETOGE:
3913  case ISD::SETOLE:
3914  case ISD::SETONE:
3915    llvm_unreachable("Invalid branch code: should be expanded by legalize");
3916  // These are invalid for floating point.  Assume integer.
3917  case ISD::SETULT: return 0;
3918  case ISD::SETUGT: return 1;
3919  }
3920}
3921
3922// getVCmpInst: return the vector compare instruction for the specified
3923// vector type and condition code. Since this is for altivec specific code,
3924// only support the altivec types (v16i8, v8i16, v4i32, v2i64, and v4f32).
3925static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC,
3926                                bool HasVSX, bool &Swap, bool &Negate) {
3927  Swap = false;
3928  Negate = false;
3929
3930  if (VecVT.isFloatingPoint()) {
3931    /* Handle some cases by swapping input operands.  */
3932    switch (CC) {
3933      case ISD::SETLE: CC = ISD::SETGE; Swap = true; break;
3934      case ISD::SETLT: CC = ISD::SETGT; Swap = true; break;
3935      case ISD::SETOLE: CC = ISD::SETOGE; Swap = true; break;
3936      case ISD::SETOLT: CC = ISD::SETOGT; Swap = true; break;
3937      case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break;
3938      case ISD::SETUGT: CC = ISD::SETULT; Swap = true; break;
3939      default: break;
3940    }
3941    /* Handle some cases by negating the result.  */
3942    switch (CC) {
3943      case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break;
3944      case ISD::SETUNE: CC = ISD::SETOEQ; Negate = true; break;
3945      case ISD::SETULE: CC = ISD::SETOGT; Negate = true; break;
3946      case ISD::SETULT: CC = ISD::SETOGE; Negate = true; break;
3947      default: break;
3948    }
3949    /* We have instructions implementing the remaining cases.  */
3950    switch (CC) {
3951      case ISD::SETEQ:
3952      case ISD::SETOEQ:
3953        if (VecVT == MVT::v4f32)
3954          return HasVSX ? PPC::XVCMPEQSP : PPC::VCMPEQFP;
3955        else if (VecVT == MVT::v2f64)
3956          return PPC::XVCMPEQDP;
3957        break;
3958      case ISD::SETGT:
3959      case ISD::SETOGT:
3960        if (VecVT == MVT::v4f32)
3961          return HasVSX ? PPC::XVCMPGTSP : PPC::VCMPGTFP;
3962        else if (VecVT == MVT::v2f64)
3963          return PPC::XVCMPGTDP;
3964        break;
3965      case ISD::SETGE:
3966      case ISD::SETOGE:
3967        if (VecVT == MVT::v4f32)
3968          return HasVSX ? PPC::XVCMPGESP : PPC::VCMPGEFP;
3969        else if (VecVT == MVT::v2f64)
3970          return PPC::XVCMPGEDP;
3971        break;
3972      default:
3973        break;
3974    }
3975    llvm_unreachable("Invalid floating-point vector compare condition");
3976  } else {
3977    /* Handle some cases by swapping input operands.  */
3978    switch (CC) {
3979      case ISD::SETGE: CC = ISD::SETLE; Swap = true; break;
3980      case ISD::SETLT: CC = ISD::SETGT; Swap = true; break;
3981      case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break;
3982      case ISD::SETULT: CC = ISD::SETUGT; Swap = true; break;
3983      default: break;
3984    }
3985    /* Handle some cases by negating the result.  */
3986    switch (CC) {
3987      case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break;
3988      case ISD::SETUNE: CC = ISD::SETUEQ; Negate = true; break;
3989      case ISD::SETLE: CC = ISD::SETGT; Negate = true; break;
3990      case ISD::SETULE: CC = ISD::SETUGT; Negate = true; break;
3991      default: break;
3992    }
3993    /* We have instructions implementing the remaining cases.  */
3994    switch (CC) {
3995      case ISD::SETEQ:
3996      case ISD::SETUEQ:
3997        if (VecVT == MVT::v16i8)
3998          return PPC::VCMPEQUB;
3999        else if (VecVT == MVT::v8i16)
4000          return PPC::VCMPEQUH;
4001        else if (VecVT == MVT::v4i32)
4002          return PPC::VCMPEQUW;
4003        else if (VecVT == MVT::v2i64)
4004          return PPC::VCMPEQUD;
4005        break;
4006      case ISD::SETGT:
4007        if (VecVT == MVT::v16i8)
4008          return PPC::VCMPGTSB;
4009        else if (VecVT == MVT::v8i16)
4010          return PPC::VCMPGTSH;
4011        else if (VecVT == MVT::v4i32)
4012          return PPC::VCMPGTSW;
4013        else if (VecVT == MVT::v2i64)
4014          return PPC::VCMPGTSD;
4015        break;
4016      case ISD::SETUGT:
4017        if (VecVT == MVT::v16i8)
4018          return PPC::VCMPGTUB;
4019        else if (VecVT == MVT::v8i16)
4020          return PPC::VCMPGTUH;
4021        else if (VecVT == MVT::v4i32)
4022          return PPC::VCMPGTUW;
4023        else if (VecVT == MVT::v2i64)
4024          return PPC::VCMPGTUD;
4025        break;
4026      default:
4027        break;
4028    }
4029    llvm_unreachable("Invalid integer vector compare condition");
4030  }
4031}
4032
4033bool PPCDAGToDAGISel::trySETCC(SDNode *N) {
4034  SDLoc dl(N);
4035  unsigned Imm;
4036  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
4037  EVT PtrVT =
4038      CurDAG->getTargetLoweringInfo().getPointerTy(CurDAG->getDataLayout());
4039  bool isPPC64 = (PtrVT == MVT::i64);
4040
4041  if (!PPCSubTarget->useCRBits() &&
4042      isInt32Immediate(N->getOperand(1), Imm)) {
4043    // We can codegen setcc op, imm very efficiently compared to a brcond.
4044    // Check for those cases here.
4045    // setcc op, 0
4046    if (Imm == 0) {
4047      SDValue Op = N->getOperand(0);
4048      switch (CC) {
4049      default: break;
4050      case ISD::SETEQ: {
4051        Op = SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Op), 0);
4052        SDValue Ops[] = { Op, getI32Imm(27, dl), getI32Imm(5, dl),
4053                          getI32Imm(31, dl) };
4054        CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4055        return true;
4056      }
4057      case ISD::SETNE: {
4058        if (isPPC64) break;
4059        SDValue AD =
4060          SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
4061                                         Op, getI32Imm(~0U, dl)), 0);
4062        CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, AD, Op, AD.getValue(1));
4063        return true;
4064      }
4065      case ISD::SETLT: {
4066        SDValue Ops[] = { Op, getI32Imm(1, dl), getI32Imm(31, dl),
4067                          getI32Imm(31, dl) };
4068        CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4069        return true;
4070      }
4071      case ISD::SETGT: {
4072        SDValue T =
4073          SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Op), 0);
4074        T = SDValue(CurDAG->getMachineNode(PPC::ANDC, dl, MVT::i32, T, Op), 0);
4075        SDValue Ops[] = { T, getI32Imm(1, dl), getI32Imm(31, dl),
4076                          getI32Imm(31, dl) };
4077        CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4078        return true;
4079      }
4080      }
4081    } else if (Imm == ~0U) {        // setcc op, -1
4082      SDValue Op = N->getOperand(0);
4083      switch (CC) {
4084      default: break;
4085      case ISD::SETEQ:
4086        if (isPPC64) break;
4087        Op = SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
4088                                            Op, getI32Imm(1, dl)), 0);
4089        CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32,
4090                             SDValue(CurDAG->getMachineNode(PPC::LI, dl,
4091                                                            MVT::i32,
4092                                                            getI32Imm(0, dl)),
4093                                     0), Op.getValue(1));
4094        return true;
4095      case ISD::SETNE: {
4096        if (isPPC64) break;
4097        Op = SDValue(CurDAG->getMachineNode(PPC::NOR, dl, MVT::i32, Op, Op), 0);
4098        SDNode *AD = CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
4099                                            Op, getI32Imm(~0U, dl));
4100        CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(AD, 0), Op,
4101                             SDValue(AD, 1));
4102        return true;
4103      }
4104      case ISD::SETLT: {
4105        SDValue AD = SDValue(CurDAG->getMachineNode(PPC::ADDI, dl, MVT::i32, Op,
4106                                                    getI32Imm(1, dl)), 0);
4107        SDValue AN = SDValue(CurDAG->getMachineNode(PPC::AND, dl, MVT::i32, AD,
4108                                                    Op), 0);
4109        SDValue Ops[] = { AN, getI32Imm(1, dl), getI32Imm(31, dl),
4110                          getI32Imm(31, dl) };
4111        CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4112        return true;
4113      }
4114      case ISD::SETGT: {
4115        SDValue Ops[] = { Op, getI32Imm(1, dl), getI32Imm(31, dl),
4116                          getI32Imm(31, dl) };
4117        Op = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
4118        CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Op, getI32Imm(1, dl));
4119        return true;
4120      }
4121      }
4122    }
4123  }
4124
4125  SDValue LHS = N->getOperand(0);
4126  SDValue RHS = N->getOperand(1);
4127
4128  // Altivec Vector compare instructions do not set any CR register by default and
4129  // vector compare operations return the same type as the operands.
4130  if (LHS.getValueType().isVector()) {
4131    if (PPCSubTarget->hasQPX() || PPCSubTarget->hasSPE())
4132      return false;
4133
4134    EVT VecVT = LHS.getValueType();
4135    bool Swap, Negate;
4136    unsigned int VCmpInst = getVCmpInst(VecVT.getSimpleVT(), CC,
4137                                        PPCSubTarget->hasVSX(), Swap, Negate);
4138    if (Swap)
4139      std::swap(LHS, RHS);
4140
4141    EVT ResVT = VecVT.changeVectorElementTypeToInteger();
4142    if (Negate) {
4143      SDValue VCmp(CurDAG->getMachineNode(VCmpInst, dl, ResVT, LHS, RHS), 0);
4144      CurDAG->SelectNodeTo(N, PPCSubTarget->hasVSX() ? PPC::XXLNOR : PPC::VNOR,
4145                           ResVT, VCmp, VCmp);
4146      return true;
4147    }
4148
4149    CurDAG->SelectNodeTo(N, VCmpInst, ResVT, LHS, RHS);
4150    return true;
4151  }
4152
4153  if (PPCSubTarget->useCRBits())
4154    return false;
4155
4156  bool Inv;
4157  unsigned Idx = getCRIdxForSetCC(CC, Inv);
4158  SDValue CCReg = SelectCC(LHS, RHS, CC, dl);
4159  SDValue IntCR;
4160
4161  // SPE e*cmp* instructions only set the 'gt' bit, so hard-code that
4162  // The correct compare instruction is already set by SelectCC()
4163  if (PPCSubTarget->hasSPE() && LHS.getValueType().isFloatingPoint()) {
4164    Idx = 1;
4165  }
4166
4167  // Force the ccreg into CR7.
4168  SDValue CR7Reg = CurDAG->getRegister(PPC::CR7, MVT::i32);
4169
4170  SDValue InFlag(nullptr, 0);  // Null incoming flag value.
4171  CCReg = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, CR7Reg, CCReg,
4172                               InFlag).getValue(1);
4173
4174  IntCR = SDValue(CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, CR7Reg,
4175                                         CCReg), 0);
4176
4177  SDValue Ops[] = { IntCR, getI32Imm((32 - (3 - Idx)) & 31, dl),
4178                      getI32Imm(31, dl), getI32Imm(31, dl) };
4179  if (!Inv) {
4180    CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4181    return true;
4182  }
4183
4184  // Get the specified bit.
4185  SDValue Tmp =
4186    SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
4187  CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Tmp, getI32Imm(1, dl));
4188  return true;
4189}
4190
4191/// Does this node represent a load/store node whose address can be represented
4192/// with a register plus an immediate that's a multiple of \p Val:
4193bool PPCDAGToDAGISel::isOffsetMultipleOf(SDNode *N, unsigned Val) const {
4194  LoadSDNode *LDN = dyn_cast<LoadSDNode>(N);
4195  StoreSDNode *STN = dyn_cast<StoreSDNode>(N);
4196  SDValue AddrOp;
4197  if (LDN)
4198    AddrOp = LDN->getOperand(1);
4199  else if (STN)
4200    AddrOp = STN->getOperand(2);
4201
4202  // If the address points a frame object or a frame object with an offset,
4203  // we need to check the object alignment.
4204  short Imm = 0;
4205  if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(
4206          AddrOp.getOpcode() == ISD::ADD ? AddrOp.getOperand(0) :
4207                                           AddrOp)) {
4208    // If op0 is a frame index that is under aligned, we can't do it either,
4209    // because it is translated to r31 or r1 + slot + offset. We won't know the
4210    // slot number until the stack frame is finalized.
4211    const MachineFrameInfo &MFI = CurDAG->getMachineFunction().getFrameInfo();
4212    unsigned SlotAlign = MFI.getObjectAlignment(FI->getIndex());
4213    if ((SlotAlign % Val) != 0)
4214      return false;
4215
4216    // If we have an offset, we need further check on the offset.
4217    if (AddrOp.getOpcode() != ISD::ADD)
4218      return true;
4219  }
4220
4221  if (AddrOp.getOpcode() == ISD::ADD)
4222    return isIntS16Immediate(AddrOp.getOperand(1), Imm) && !(Imm % Val);
4223
4224  // If the address comes from the outside, the offset will be zero.
4225  return AddrOp.getOpcode() == ISD::CopyFromReg;
4226}
4227
4228void PPCDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
4229  // Transfer memoperands.
4230  MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
4231  CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});
4232}
4233
4234static bool mayUseP9Setb(SDNode *N, const ISD::CondCode &CC, SelectionDAG *DAG,
4235                         bool &NeedSwapOps, bool &IsUnCmp) {
4236
4237  assert(N->getOpcode() == ISD::SELECT_CC && "Expecting a SELECT_CC here.");
4238
4239  SDValue LHS = N->getOperand(0);
4240  SDValue RHS = N->getOperand(1);
4241  SDValue TrueRes = N->getOperand(2);
4242  SDValue FalseRes = N->getOperand(3);
4243  ConstantSDNode *TrueConst = dyn_cast<ConstantSDNode>(TrueRes);
4244  if (!TrueConst)
4245    return false;
4246
4247  assert((N->getSimpleValueType(0) == MVT::i64 ||
4248          N->getSimpleValueType(0) == MVT::i32) &&
4249         "Expecting either i64 or i32 here.");
4250
4251  // We are looking for any of:
4252  // (select_cc lhs, rhs,  1, (sext (setcc [lr]hs, [lr]hs, cc2)), cc1)
4253  // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, cc2)), cc1)
4254  // (select_cc lhs, rhs,  0, (select_cc [lr]hs, [lr]hs,  1, -1, cc2), seteq)
4255  // (select_cc lhs, rhs,  0, (select_cc [lr]hs, [lr]hs, -1,  1, cc2), seteq)
4256  int64_t TrueResVal = TrueConst->getSExtValue();
4257  if ((TrueResVal < -1 || TrueResVal > 1) ||
4258      (TrueResVal == -1 && FalseRes.getOpcode() != ISD::ZERO_EXTEND) ||
4259      (TrueResVal == 1 && FalseRes.getOpcode() != ISD::SIGN_EXTEND) ||
4260      (TrueResVal == 0 &&
4261       (FalseRes.getOpcode() != ISD::SELECT_CC || CC != ISD::SETEQ)))
4262    return false;
4263
4264  bool InnerIsSel = FalseRes.getOpcode() == ISD::SELECT_CC;
4265  SDValue SetOrSelCC = InnerIsSel ? FalseRes : FalseRes.getOperand(0);
4266  if (SetOrSelCC.getOpcode() != ISD::SETCC &&
4267      SetOrSelCC.getOpcode() != ISD::SELECT_CC)
4268    return false;
4269
4270  // Without this setb optimization, the outer SELECT_CC will be manually
4271  // selected to SELECT_CC_I4/SELECT_CC_I8 Pseudo, then expand-isel-pseudos pass
4272  // transforms pseudo instruction to isel instruction. When there are more than
4273  // one use for result like zext/sext, with current optimization we only see
4274  // isel is replaced by setb but can't see any significant gain. Since
4275  // setb has longer latency than original isel, we should avoid this. Another
4276  // point is that setb requires comparison always kept, it can break the
4277  // opportunity to get the comparison away if we have in future.
4278  if (!SetOrSelCC.hasOneUse() || (!InnerIsSel && !FalseRes.hasOneUse()))
4279    return false;
4280
4281  SDValue InnerLHS = SetOrSelCC.getOperand(0);
4282  SDValue InnerRHS = SetOrSelCC.getOperand(1);
4283  ISD::CondCode InnerCC =
4284      cast<CondCodeSDNode>(SetOrSelCC.getOperand(InnerIsSel ? 4 : 2))->get();
4285  // If the inner comparison is a select_cc, make sure the true/false values are
4286  // 1/-1 and canonicalize it if needed.
4287  if (InnerIsSel) {
4288    ConstantSDNode *SelCCTrueConst =
4289        dyn_cast<ConstantSDNode>(SetOrSelCC.getOperand(2));
4290    ConstantSDNode *SelCCFalseConst =
4291        dyn_cast<ConstantSDNode>(SetOrSelCC.getOperand(3));
4292    if (!SelCCTrueConst || !SelCCFalseConst)
4293      return false;
4294    int64_t SelCCTVal = SelCCTrueConst->getSExtValue();
4295    int64_t SelCCFVal = SelCCFalseConst->getSExtValue();
4296    // The values must be -1/1 (requiring a swap) or 1/-1.
4297    if (SelCCTVal == -1 && SelCCFVal == 1) {
4298      std::swap(InnerLHS, InnerRHS);
4299    } else if (SelCCTVal != 1 || SelCCFVal != -1)
4300      return false;
4301  }
4302
4303  // Canonicalize unsigned case
4304  if (InnerCC == ISD::SETULT || InnerCC == ISD::SETUGT) {
4305    IsUnCmp = true;
4306    InnerCC = (InnerCC == ISD::SETULT) ? ISD::SETLT : ISD::SETGT;
4307  }
4308
4309  bool InnerSwapped = false;
4310  if (LHS == InnerRHS && RHS == InnerLHS)
4311    InnerSwapped = true;
4312  else if (LHS != InnerLHS || RHS != InnerRHS)
4313    return false;
4314
4315  switch (CC) {
4316  // (select_cc lhs, rhs,  0, \
4317  //     (select_cc [lr]hs, [lr]hs, 1, -1, setlt/setgt), seteq)
4318  case ISD::SETEQ:
4319    if (!InnerIsSel)
4320      return false;
4321    if (InnerCC != ISD::SETLT && InnerCC != ISD::SETGT)
4322      return false;
4323    NeedSwapOps = (InnerCC == ISD::SETGT) ? InnerSwapped : !InnerSwapped;
4324    break;
4325
4326  // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, setne)), setu?lt)
4327  // (select_cc lhs, rhs, -1, (zext (setcc lhs, rhs, setgt)), setu?lt)
4328  // (select_cc lhs, rhs, -1, (zext (setcc rhs, lhs, setlt)), setu?lt)
4329  // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, setne)), setu?lt)
4330  // (select_cc lhs, rhs, 1, (sext (setcc lhs, rhs, setgt)), setu?lt)
4331  // (select_cc lhs, rhs, 1, (sext (setcc rhs, lhs, setlt)), setu?lt)
4332  case ISD::SETULT:
4333    if (!IsUnCmp && InnerCC != ISD::SETNE)
4334      return false;
4335    IsUnCmp = true;
4336    LLVM_FALLTHROUGH;
4337  case ISD::SETLT:
4338    if (InnerCC == ISD::SETNE || (InnerCC == ISD::SETGT && !InnerSwapped) ||
4339        (InnerCC == ISD::SETLT && InnerSwapped))
4340      NeedSwapOps = (TrueResVal == 1);
4341    else
4342      return false;
4343    break;
4344
4345  // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, setne)), setu?gt)
4346  // (select_cc lhs, rhs, 1, (sext (setcc lhs, rhs, setlt)), setu?gt)
4347  // (select_cc lhs, rhs, 1, (sext (setcc rhs, lhs, setgt)), setu?gt)
4348  // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, setne)), setu?gt)
4349  // (select_cc lhs, rhs, -1, (zext (setcc lhs, rhs, setlt)), setu?gt)
4350  // (select_cc lhs, rhs, -1, (zext (setcc rhs, lhs, setgt)), setu?gt)
4351  case ISD::SETUGT:
4352    if (!IsUnCmp && InnerCC != ISD::SETNE)
4353      return false;
4354    IsUnCmp = true;
4355    LLVM_FALLTHROUGH;
4356  case ISD::SETGT:
4357    if (InnerCC == ISD::SETNE || (InnerCC == ISD::SETLT && !InnerSwapped) ||
4358        (InnerCC == ISD::SETGT && InnerSwapped))
4359      NeedSwapOps = (TrueResVal == -1);
4360    else
4361      return false;
4362    break;
4363
4364  default:
4365    return false;
4366  }
4367
4368  LLVM_DEBUG(dbgs() << "Found a node that can be lowered to a SETB: ");
4369  LLVM_DEBUG(N->dump());
4370
4371  return true;
4372}
4373
4374bool PPCDAGToDAGISel::tryAndWithMask(SDNode *N) {
4375  if (N->getOpcode() != ISD::AND)
4376    return false;
4377
4378  SDLoc dl(N);
4379  SDValue Val = N->getOperand(0);
4380  unsigned Imm, Imm2, SH, MB, ME;
4381  uint64_t Imm64;
4382
4383  // If this is an and of a value rotated between 0 and 31 bits and then and'd
4384  // with a mask, emit rlwinm
4385  if (isInt32Immediate(N->getOperand(1), Imm) &&
4386      isRotateAndMask(N->getOperand(0).getNode(), Imm, false, SH, MB, ME)) {
4387    SDValue Val = N->getOperand(0).getOperand(0);
4388    SDValue Ops[] = { Val, getI32Imm(SH, dl), getI32Imm(MB, dl),
4389                      getI32Imm(ME, dl) };
4390    CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4391    return true;
4392  }
4393
4394  // If this is just a masked value where the input is not handled, and
4395  // is not a rotate-left (handled by a pattern in the .td file), emit rlwinm
4396  if (isInt32Immediate(N->getOperand(1), Imm)) {
4397    if (isRunOfOnes(Imm, MB, ME) &&
4398        N->getOperand(0).getOpcode() != ISD::ROTL) {
4399      SDValue Ops[] = { Val, getI32Imm(0, dl), getI32Imm(MB, dl),
4400                        getI32Imm(ME, dl) };
4401      CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4402      return true;
4403    }
4404    // AND X, 0 -> 0, not "rlwinm 32".
4405    if (Imm == 0) {
4406      ReplaceUses(SDValue(N, 0), N->getOperand(1));
4407      return true;
4408    }
4409
4410    // ISD::OR doesn't get all the bitfield insertion fun.
4411    // (and (or x, c1), c2) where isRunOfOnes(~(c1^c2)) might be a
4412    // bitfield insert.
4413    if (N->getOperand(0).getOpcode() == ISD::OR &&
4414        isInt32Immediate(N->getOperand(0).getOperand(1), Imm2)) {
4415      // The idea here is to check whether this is equivalent to:
4416      //   (c1 & m) | (x & ~m)
4417      // where m is a run-of-ones mask. The logic here is that, for each bit in
4418      // c1 and c2:
4419      //  - if both are 1, then the output will be 1.
4420      //  - if both are 0, then the output will be 0.
4421      //  - if the bit in c1 is 0, and the bit in c2 is 1, then the output will
4422      //    come from x.
4423      //  - if the bit in c1 is 1, and the bit in c2 is 0, then the output will
4424      //    be 0.
4425      //  If that last condition is never the case, then we can form m from the
4426      //  bits that are the same between c1 and c2.
4427      unsigned MB, ME;
4428      if (isRunOfOnes(~(Imm^Imm2), MB, ME) && !(~Imm & Imm2)) {
4429        SDValue Ops[] = { N->getOperand(0).getOperand(0),
4430                            N->getOperand(0).getOperand(1),
4431                            getI32Imm(0, dl), getI32Imm(MB, dl),
4432                            getI32Imm(ME, dl) };
4433        ReplaceNode(N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops));
4434        return true;
4435      }
4436    }
4437  } else if (isInt64Immediate(N->getOperand(1).getNode(), Imm64)) {
4438    // If this is a 64-bit zero-extension mask, emit rldicl.
4439    if (isMask_64(Imm64)) {
4440      MB = 64 - countTrailingOnes(Imm64);
4441      SH = 0;
4442
4443      if (Val.getOpcode() == ISD::ANY_EXTEND) {
4444        auto Op0 = Val.getOperand(0);
4445        if ( Op0.getOpcode() == ISD::SRL &&
4446           isInt32Immediate(Op0.getOperand(1).getNode(), Imm) && Imm <= MB) {
4447
4448           auto ResultType = Val.getNode()->getValueType(0);
4449           auto ImDef = CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl,
4450                                               ResultType);
4451           SDValue IDVal (ImDef, 0);
4452
4453           Val = SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl,
4454                         ResultType, IDVal, Op0.getOperand(0),
4455                         getI32Imm(1, dl)), 0);
4456           SH = 64 - Imm;
4457        }
4458      }
4459
4460      // If the operand is a logical right shift, we can fold it into this
4461      // instruction: rldicl(rldicl(x, 64-n, n), 0, mb) -> rldicl(x, 64-n, mb)
4462      // for n <= mb. The right shift is really a left rotate followed by a
4463      // mask, and this mask is a more-restrictive sub-mask of the mask implied
4464      // by the shift.
4465      if (Val.getOpcode() == ISD::SRL &&
4466          isInt32Immediate(Val.getOperand(1).getNode(), Imm) && Imm <= MB) {
4467        assert(Imm < 64 && "Illegal shift amount");
4468        Val = Val.getOperand(0);
4469        SH = 64 - Imm;
4470      }
4471
4472      SDValue Ops[] = { Val, getI32Imm(SH, dl), getI32Imm(MB, dl) };
4473      CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops);
4474      return true;
4475    } else if (isMask_64(~Imm64)) {
4476      // If this is a negated 64-bit zero-extension mask,
4477      // i.e. the immediate is a sequence of ones from most significant side
4478      // and all zero for reminder, we should use rldicr.
4479      MB = 63 - countTrailingOnes(~Imm64);
4480      SH = 0;
4481      SDValue Ops[] = { Val, getI32Imm(SH, dl), getI32Imm(MB, dl) };
4482      CurDAG->SelectNodeTo(N, PPC::RLDICR, MVT::i64, Ops);
4483      return true;
4484    }
4485
4486    // It is not 16-bit imm that means we need two instructions at least if
4487    // using "and" instruction. Try to exploit it with rotate mask instructions.
4488    if (isRunOfOnes64(Imm64, MB, ME)) {
4489      if (MB >= 32 && MB <= ME) {
4490        //                MB  ME
4491        // +----------------------+
4492        // |xxxxxxxxxxx00011111000|
4493        // +----------------------+
4494        //  0         32         64
4495        // We can only do it if the MB is larger than 32 and MB <= ME
4496        // as RLWINM will replace the content of [0 - 32) with [32 - 64) even
4497        // we didn't rotate it.
4498        SDValue Ops[] = { Val, getI64Imm(0, dl), getI64Imm(MB - 32, dl),
4499                          getI64Imm(ME - 32, dl) };
4500        CurDAG->SelectNodeTo(N, PPC::RLWINM8, MVT::i64, Ops);
4501        return true;
4502      }
4503      // TODO - handle it with rldicl + rldicl
4504    }
4505  }
4506
4507  return false;
4508}
4509
4510// Select - Convert the specified operand from a target-independent to a
4511// target-specific node if it hasn't already been changed.
4512void PPCDAGToDAGISel::Select(SDNode *N) {
4513  SDLoc dl(N);
4514  if (N->isMachineOpcode()) {
4515    N->setNodeId(-1);
4516    return;   // Already selected.
4517  }
4518
4519  // In case any misguided DAG-level optimizations form an ADD with a
4520  // TargetConstant operand, crash here instead of miscompiling (by selecting
4521  // an r+r add instead of some kind of r+i add).
4522  if (N->getOpcode() == ISD::ADD &&
4523      N->getOperand(1).getOpcode() == ISD::TargetConstant)
4524    llvm_unreachable("Invalid ADD with TargetConstant operand");
4525
4526  // Try matching complex bit permutations before doing anything else.
4527  if (tryBitPermutation(N))
4528    return;
4529
4530  // Try to emit integer compares as GPR-only sequences (i.e. no use of CR).
4531  if (tryIntCompareInGPR(N))
4532    return;
4533
4534  switch (N->getOpcode()) {
4535  default: break;
4536
4537  case ISD::Constant:
4538    if (N->getValueType(0) == MVT::i64) {
4539      ReplaceNode(N, selectI64Imm(CurDAG, N));
4540      return;
4541    }
4542    break;
4543
4544  case ISD::SETCC:
4545    if (trySETCC(N))
4546      return;
4547    break;
4548  // These nodes will be transformed into GETtlsADDR32 node, which
4549  // later becomes BL_TLS __tls_get_addr(sym at tlsgd)@PLT
4550  case PPCISD::ADDI_TLSLD_L_ADDR:
4551  case PPCISD::ADDI_TLSGD_L_ADDR: {
4552    const Module *Mod = MF->getFunction().getParent();
4553    if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) != MVT::i32 ||
4554        !PPCSubTarget->isSecurePlt() || !PPCSubTarget->isTargetELF() ||
4555        Mod->getPICLevel() == PICLevel::SmallPIC)
4556      break;
4557    // Attach global base pointer on GETtlsADDR32 node in order to
4558    // generate secure plt code for TLS symbols.
4559    getGlobalBaseReg();
4560  } break;
4561  case PPCISD::CALL: {
4562    if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) != MVT::i32 ||
4563        !TM.isPositionIndependent() || !PPCSubTarget->isSecurePlt() ||
4564        !PPCSubTarget->isTargetELF())
4565      break;
4566
4567    SDValue Op = N->getOperand(1);
4568
4569    if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
4570      if (GA->getTargetFlags() == PPCII::MO_PLT)
4571        getGlobalBaseReg();
4572    }
4573    else if (ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op)) {
4574      if (ES->getTargetFlags() == PPCII::MO_PLT)
4575        getGlobalBaseReg();
4576    }
4577  }
4578    break;
4579
4580  case PPCISD::GlobalBaseReg:
4581    ReplaceNode(N, getGlobalBaseReg());
4582    return;
4583
4584  case ISD::FrameIndex:
4585    selectFrameIndex(N, N);
4586    return;
4587
4588  case PPCISD::MFOCRF: {
4589    SDValue InFlag = N->getOperand(1);
4590    ReplaceNode(N, CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32,
4591                                          N->getOperand(0), InFlag));
4592    return;
4593  }
4594
4595  case PPCISD::READ_TIME_BASE:
4596    ReplaceNode(N, CurDAG->getMachineNode(PPC::ReadTB, dl, MVT::i32, MVT::i32,
4597                                          MVT::Other, N->getOperand(0)));
4598    return;
4599
4600  case PPCISD::SRA_ADDZE: {
4601    SDValue N0 = N->getOperand(0);
4602    SDValue ShiftAmt =
4603      CurDAG->getTargetConstant(*cast<ConstantSDNode>(N->getOperand(1))->
4604                                  getConstantIntValue(), dl,
4605                                  N->getValueType(0));
4606    if (N->getValueType(0) == MVT::i64) {
4607      SDNode *Op =
4608        CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, MVT::Glue,
4609                               N0, ShiftAmt);
4610      CurDAG->SelectNodeTo(N, PPC::ADDZE8, MVT::i64, SDValue(Op, 0),
4611                           SDValue(Op, 1));
4612      return;
4613    } else {
4614      assert(N->getValueType(0) == MVT::i32 &&
4615             "Expecting i64 or i32 in PPCISD::SRA_ADDZE");
4616      SDNode *Op =
4617        CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, MVT::Glue,
4618                               N0, ShiftAmt);
4619      CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32, SDValue(Op, 0),
4620                           SDValue(Op, 1));
4621      return;
4622    }
4623  }
4624
4625  case ISD::STORE: {
4626    // Change TLS initial-exec D-form stores to X-form stores.
4627    StoreSDNode *ST = cast<StoreSDNode>(N);
4628    if (EnableTLSOpt && PPCSubTarget->isELFv2ABI() &&
4629        ST->getAddressingMode() != ISD::PRE_INC)
4630      if (tryTLSXFormStore(ST))
4631        return;
4632    break;
4633  }
4634  case ISD::LOAD: {
4635    // Handle preincrement loads.
4636    LoadSDNode *LD = cast<LoadSDNode>(N);
4637    EVT LoadedVT = LD->getMemoryVT();
4638
4639    // Normal loads are handled by code generated from the .td file.
4640    if (LD->getAddressingMode() != ISD::PRE_INC) {
4641      // Change TLS initial-exec D-form loads to X-form loads.
4642      if (EnableTLSOpt && PPCSubTarget->isELFv2ABI())
4643        if (tryTLSXFormLoad(LD))
4644          return;
4645      break;
4646    }
4647
4648    SDValue Offset = LD->getOffset();
4649    if (Offset.getOpcode() == ISD::TargetConstant ||
4650        Offset.getOpcode() == ISD::TargetGlobalAddress) {
4651
4652      unsigned Opcode;
4653      bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
4654      if (LD->getValueType(0) != MVT::i64) {
4655        // Handle PPC32 integer and normal FP loads.
4656        assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
4657        switch (LoadedVT.getSimpleVT().SimpleTy) {
4658          default: llvm_unreachable("Invalid PPC load type!");
4659          case MVT::f64: Opcode = PPC::LFDU; break;
4660          case MVT::f32: Opcode = PPC::LFSU; break;
4661          case MVT::i32: Opcode = PPC::LWZU; break;
4662          case MVT::i16: Opcode = isSExt ? PPC::LHAU : PPC::LHZU; break;
4663          case MVT::i1:
4664          case MVT::i8:  Opcode = PPC::LBZU; break;
4665        }
4666      } else {
4667        assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!");
4668        assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
4669        switch (LoadedVT.getSimpleVT().SimpleTy) {
4670          default: llvm_unreachable("Invalid PPC load type!");
4671          case MVT::i64: Opcode = PPC::LDU; break;
4672          case MVT::i32: Opcode = PPC::LWZU8; break;
4673          case MVT::i16: Opcode = isSExt ? PPC::LHAU8 : PPC::LHZU8; break;
4674          case MVT::i1:
4675          case MVT::i8:  Opcode = PPC::LBZU8; break;
4676        }
4677      }
4678
4679      SDValue Chain = LD->getChain();
4680      SDValue Base = LD->getBasePtr();
4681      SDValue Ops[] = { Offset, Base, Chain };
4682      SDNode *MN = CurDAG->getMachineNode(
4683          Opcode, dl, LD->getValueType(0),
4684          PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::Other, Ops);
4685      transferMemOperands(N, MN);
4686      ReplaceNode(N, MN);
4687      return;
4688    } else {
4689      unsigned Opcode;
4690      bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
4691      if (LD->getValueType(0) != MVT::i64) {
4692        // Handle PPC32 integer and normal FP loads.
4693        assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
4694        switch (LoadedVT.getSimpleVT().SimpleTy) {
4695          default: llvm_unreachable("Invalid PPC load type!");
4696          case MVT::v4f64: Opcode = PPC::QVLFDUX; break; // QPX
4697          case MVT::v4f32: Opcode = PPC::QVLFSUX; break; // QPX
4698          case MVT::f64: Opcode = PPC::LFDUX; break;
4699          case MVT::f32: Opcode = PPC::LFSUX; break;
4700          case MVT::i32: Opcode = PPC::LWZUX; break;
4701          case MVT::i16: Opcode = isSExt ? PPC::LHAUX : PPC::LHZUX; break;
4702          case MVT::i1:
4703          case MVT::i8:  Opcode = PPC::LBZUX; break;
4704        }
4705      } else {
4706        assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!");
4707        assert((!isSExt || LoadedVT == MVT::i16 || LoadedVT == MVT::i32) &&
4708               "Invalid sext update load");
4709        switch (LoadedVT.getSimpleVT().SimpleTy) {
4710          default: llvm_unreachable("Invalid PPC load type!");
4711          case MVT::i64: Opcode = PPC::LDUX; break;
4712          case MVT::i32: Opcode = isSExt ? PPC::LWAUX  : PPC::LWZUX8; break;
4713          case MVT::i16: Opcode = isSExt ? PPC::LHAUX8 : PPC::LHZUX8; break;
4714          case MVT::i1:
4715          case MVT::i8:  Opcode = PPC::LBZUX8; break;
4716        }
4717      }
4718
4719      SDValue Chain = LD->getChain();
4720      SDValue Base = LD->getBasePtr();
4721      SDValue Ops[] = { Base, Offset, Chain };
4722      SDNode *MN = CurDAG->getMachineNode(
4723          Opcode, dl, LD->getValueType(0),
4724          PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::Other, Ops);
4725      transferMemOperands(N, MN);
4726      ReplaceNode(N, MN);
4727      return;
4728    }
4729  }
4730
4731  case ISD::AND:
4732    // If this is an 'and' with a mask, try to emit rlwinm/rldicl/rldicr
4733    if (tryAndWithMask(N))
4734      return;
4735
4736    // Other cases are autogenerated.
4737    break;
4738  case ISD::OR: {
4739    if (N->getValueType(0) == MVT::i32)
4740      if (tryBitfieldInsert(N))
4741        return;
4742
4743    int16_t Imm;
4744    if (N->getOperand(0)->getOpcode() == ISD::FrameIndex &&
4745        isIntS16Immediate(N->getOperand(1), Imm)) {
4746      KnownBits LHSKnown = CurDAG->computeKnownBits(N->getOperand(0));
4747
4748      // If this is equivalent to an add, then we can fold it with the
4749      // FrameIndex calculation.
4750      if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)Imm) == ~0ULL) {
4751        selectFrameIndex(N, N->getOperand(0).getNode(), (int)Imm);
4752        return;
4753      }
4754    }
4755
4756    // OR with a 32-bit immediate can be handled by ori + oris
4757    // without creating an immediate in a GPR.
4758    uint64_t Imm64 = 0;
4759    bool IsPPC64 = PPCSubTarget->isPPC64();
4760    if (IsPPC64 && isInt64Immediate(N->getOperand(1), Imm64) &&
4761        (Imm64 & ~0xFFFFFFFFuLL) == 0) {
4762      // If ImmHi (ImmHi) is zero, only one ori (oris) is generated later.
4763      uint64_t ImmHi = Imm64 >> 16;
4764      uint64_t ImmLo = Imm64 & 0xFFFF;
4765      if (ImmHi != 0 && ImmLo != 0) {
4766        SDNode *Lo = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64,
4767                                            N->getOperand(0),
4768                                            getI16Imm(ImmLo, dl));
4769        SDValue Ops1[] = { SDValue(Lo, 0), getI16Imm(ImmHi, dl)};
4770        CurDAG->SelectNodeTo(N, PPC::ORIS8, MVT::i64, Ops1);
4771        return;
4772      }
4773    }
4774
4775    // Other cases are autogenerated.
4776    break;
4777  }
4778  case ISD::XOR: {
4779    // XOR with a 32-bit immediate can be handled by xori + xoris
4780    // without creating an immediate in a GPR.
4781    uint64_t Imm64 = 0;
4782    bool IsPPC64 = PPCSubTarget->isPPC64();
4783    if (IsPPC64 && isInt64Immediate(N->getOperand(1), Imm64) &&
4784        (Imm64 & ~0xFFFFFFFFuLL) == 0) {
4785      // If ImmHi (ImmHi) is zero, only one xori (xoris) is generated later.
4786      uint64_t ImmHi = Imm64 >> 16;
4787      uint64_t ImmLo = Imm64 & 0xFFFF;
4788      if (ImmHi != 0 && ImmLo != 0) {
4789        SDNode *Lo = CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64,
4790                                            N->getOperand(0),
4791                                            getI16Imm(ImmLo, dl));
4792        SDValue Ops1[] = { SDValue(Lo, 0), getI16Imm(ImmHi, dl)};
4793        CurDAG->SelectNodeTo(N, PPC::XORIS8, MVT::i64, Ops1);
4794        return;
4795      }
4796    }
4797
4798    break;
4799  }
4800  case ISD::ADD: {
4801    int16_t Imm;
4802    if (N->getOperand(0)->getOpcode() == ISD::FrameIndex &&
4803        isIntS16Immediate(N->getOperand(1), Imm)) {
4804      selectFrameIndex(N, N->getOperand(0).getNode(), (int)Imm);
4805      return;
4806    }
4807
4808    break;
4809  }
4810  case ISD::SHL: {
4811    unsigned Imm, SH, MB, ME;
4812    if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) &&
4813        isRotateAndMask(N, Imm, true, SH, MB, ME)) {
4814      SDValue Ops[] = { N->getOperand(0).getOperand(0),
4815                          getI32Imm(SH, dl), getI32Imm(MB, dl),
4816                          getI32Imm(ME, dl) };
4817      CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4818      return;
4819    }
4820
4821    // Other cases are autogenerated.
4822    break;
4823  }
4824  case ISD::SRL: {
4825    unsigned Imm, SH, MB, ME;
4826    if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) &&
4827        isRotateAndMask(N, Imm, true, SH, MB, ME)) {
4828      SDValue Ops[] = { N->getOperand(0).getOperand(0),
4829                          getI32Imm(SH, dl), getI32Imm(MB, dl),
4830                          getI32Imm(ME, dl) };
4831      CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4832      return;
4833    }
4834
4835    // Other cases are autogenerated.
4836    break;
4837  }
4838  // FIXME: Remove this once the ANDI glue bug is fixed:
4839  case PPCISD::ANDI_rec_1_EQ_BIT:
4840  case PPCISD::ANDI_rec_1_GT_BIT: {
4841    if (!ANDIGlueBug)
4842      break;
4843
4844    EVT InVT = N->getOperand(0).getValueType();
4845    assert((InVT == MVT::i64 || InVT == MVT::i32) &&
4846           "Invalid input type for ANDI_rec_1_EQ_BIT");
4847
4848    unsigned Opcode = (InVT == MVT::i64) ? PPC::ANDI8_rec : PPC::ANDI_rec;
4849    SDValue AndI(CurDAG->getMachineNode(Opcode, dl, InVT, MVT::Glue,
4850                                        N->getOperand(0),
4851                                        CurDAG->getTargetConstant(1, dl, InVT)),
4852                 0);
4853    SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32);
4854    SDValue SRIdxVal = CurDAG->getTargetConstant(
4855        N->getOpcode() == PPCISD::ANDI_rec_1_EQ_BIT ? PPC::sub_eq : PPC::sub_gt,
4856        dl, MVT::i32);
4857
4858    CurDAG->SelectNodeTo(N, TargetOpcode::EXTRACT_SUBREG, MVT::i1, CR0Reg,
4859                         SRIdxVal, SDValue(AndI.getNode(), 1) /* glue */);
4860    return;
4861  }
4862  case ISD::SELECT_CC: {
4863    ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
4864    EVT PtrVT =
4865        CurDAG->getTargetLoweringInfo().getPointerTy(CurDAG->getDataLayout());
4866    bool isPPC64 = (PtrVT == MVT::i64);
4867
4868    // If this is a select of i1 operands, we'll pattern match it.
4869    if (PPCSubTarget->useCRBits() &&
4870        N->getOperand(0).getValueType() == MVT::i1)
4871      break;
4872
4873    if (PPCSubTarget->isISA3_0() && PPCSubTarget->isPPC64()) {
4874      bool NeedSwapOps = false;
4875      bool IsUnCmp = false;
4876      if (mayUseP9Setb(N, CC, CurDAG, NeedSwapOps, IsUnCmp)) {
4877        SDValue LHS = N->getOperand(0);
4878        SDValue RHS = N->getOperand(1);
4879        if (NeedSwapOps)
4880          std::swap(LHS, RHS);
4881
4882        // Make use of SelectCC to generate the comparison to set CR bits, for
4883        // equality comparisons having one literal operand, SelectCC probably
4884        // doesn't need to materialize the whole literal and just use xoris to
4885        // check it first, it leads the following comparison result can't
4886        // exactly represent GT/LT relationship. So to avoid this we specify
4887        // SETGT/SETUGT here instead of SETEQ.
4888        SDValue GenCC =
4889            SelectCC(LHS, RHS, IsUnCmp ? ISD::SETUGT : ISD::SETGT, dl);
4890        CurDAG->SelectNodeTo(
4891            N, N->getSimpleValueType(0) == MVT::i64 ? PPC::SETB8 : PPC::SETB,
4892            N->getValueType(0), GenCC);
4893        NumP9Setb++;
4894        return;
4895      }
4896    }
4897
4898    // Handle the setcc cases here.  select_cc lhs, 0, 1, 0, cc
4899    if (!isPPC64)
4900      if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1)))
4901        if (ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N->getOperand(2)))
4902          if (ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N->getOperand(3)))
4903            if (N1C->isNullValue() && N3C->isNullValue() &&
4904                N2C->getZExtValue() == 1ULL && CC == ISD::SETNE &&
4905                // FIXME: Implement this optzn for PPC64.
4906                N->getValueType(0) == MVT::i32) {
4907              SDNode *Tmp =
4908                CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
4909                                       N->getOperand(0), getI32Imm(~0U, dl));
4910              CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(Tmp, 0),
4911                                   N->getOperand(0), SDValue(Tmp, 1));
4912              return;
4913            }
4914
4915    SDValue CCReg = SelectCC(N->getOperand(0), N->getOperand(1), CC, dl);
4916
4917    if (N->getValueType(0) == MVT::i1) {
4918      // An i1 select is: (c & t) | (!c & f).
4919      bool Inv;
4920      unsigned Idx = getCRIdxForSetCC(CC, Inv);
4921
4922      unsigned SRI;
4923      switch (Idx) {
4924      default: llvm_unreachable("Invalid CC index");
4925      case 0: SRI = PPC::sub_lt; break;
4926      case 1: SRI = PPC::sub_gt; break;
4927      case 2: SRI = PPC::sub_eq; break;
4928      case 3: SRI = PPC::sub_un; break;
4929      }
4930
4931      SDValue CCBit = CurDAG->getTargetExtractSubreg(SRI, dl, MVT::i1, CCReg);
4932
4933      SDValue NotCCBit(CurDAG->getMachineNode(PPC::CRNOR, dl, MVT::i1,
4934                                              CCBit, CCBit), 0);
4935      SDValue C =    Inv ? NotCCBit : CCBit,
4936              NotC = Inv ? CCBit    : NotCCBit;
4937
4938      SDValue CAndT(CurDAG->getMachineNode(PPC::CRAND, dl, MVT::i1,
4939                                           C, N->getOperand(2)), 0);
4940      SDValue NotCAndF(CurDAG->getMachineNode(PPC::CRAND, dl, MVT::i1,
4941                                              NotC, N->getOperand(3)), 0);
4942
4943      CurDAG->SelectNodeTo(N, PPC::CROR, MVT::i1, CAndT, NotCAndF);
4944      return;
4945    }
4946
4947    unsigned BROpc =
4948        getPredicateForSetCC(CC, N->getOperand(0).getValueType(), PPCSubTarget);
4949
4950    unsigned SelectCCOp;
4951    if (N->getValueType(0) == MVT::i32)
4952      SelectCCOp = PPC::SELECT_CC_I4;
4953    else if (N->getValueType(0) == MVT::i64)
4954      SelectCCOp = PPC::SELECT_CC_I8;
4955    else if (N->getValueType(0) == MVT::f32) {
4956      if (PPCSubTarget->hasP8Vector())
4957        SelectCCOp = PPC::SELECT_CC_VSSRC;
4958      else if (PPCSubTarget->hasSPE())
4959        SelectCCOp = PPC::SELECT_CC_SPE4;
4960      else
4961        SelectCCOp = PPC::SELECT_CC_F4;
4962    } else if (N->getValueType(0) == MVT::f64) {
4963      if (PPCSubTarget->hasVSX())
4964        SelectCCOp = PPC::SELECT_CC_VSFRC;
4965      else if (PPCSubTarget->hasSPE())
4966        SelectCCOp = PPC::SELECT_CC_SPE;
4967      else
4968        SelectCCOp = PPC::SELECT_CC_F8;
4969    } else if (N->getValueType(0) == MVT::f128)
4970      SelectCCOp = PPC::SELECT_CC_F16;
4971    else if (PPCSubTarget->hasSPE())
4972      SelectCCOp = PPC::SELECT_CC_SPE;
4973    else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4f64)
4974      SelectCCOp = PPC::SELECT_CC_QFRC;
4975    else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4f32)
4976      SelectCCOp = PPC::SELECT_CC_QSRC;
4977    else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4i1)
4978      SelectCCOp = PPC::SELECT_CC_QBRC;
4979    else if (N->getValueType(0) == MVT::v2f64 ||
4980             N->getValueType(0) == MVT::v2i64)
4981      SelectCCOp = PPC::SELECT_CC_VSRC;
4982    else
4983      SelectCCOp = PPC::SELECT_CC_VRRC;
4984
4985    SDValue Ops[] = { CCReg, N->getOperand(2), N->getOperand(3),
4986                        getI32Imm(BROpc, dl) };
4987    CurDAG->SelectNodeTo(N, SelectCCOp, N->getValueType(0), Ops);
4988    return;
4989  }
4990  case ISD::VECTOR_SHUFFLE:
4991    if (PPCSubTarget->hasVSX() && (N->getValueType(0) == MVT::v2f64 ||
4992                                  N->getValueType(0) == MVT::v2i64)) {
4993      ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
4994
4995      SDValue Op1 = N->getOperand(SVN->getMaskElt(0) < 2 ? 0 : 1),
4996              Op2 = N->getOperand(SVN->getMaskElt(1) < 2 ? 0 : 1);
4997      unsigned DM[2];
4998
4999      for (int i = 0; i < 2; ++i)
5000        if (SVN->getMaskElt(i) <= 0 || SVN->getMaskElt(i) == 2)
5001          DM[i] = 0;
5002        else
5003          DM[i] = 1;
5004
5005      if (Op1 == Op2 && DM[0] == 0 && DM[1] == 0 &&
5006          Op1.getOpcode() == ISD::SCALAR_TO_VECTOR &&
5007          isa<LoadSDNode>(Op1.getOperand(0))) {
5008        LoadSDNode *LD = cast<LoadSDNode>(Op1.getOperand(0));
5009        SDValue Base, Offset;
5010
5011        if (LD->isUnindexed() && LD->hasOneUse() && Op1.hasOneUse() &&
5012            (LD->getMemoryVT() == MVT::f64 ||
5013             LD->getMemoryVT() == MVT::i64) &&
5014            SelectAddrIdxOnly(LD->getBasePtr(), Base, Offset)) {
5015          SDValue Chain = LD->getChain();
5016          SDValue Ops[] = { Base, Offset, Chain };
5017          MachineMemOperand *MemOp = LD->getMemOperand();
5018          SDNode *NewN = CurDAG->SelectNodeTo(N, PPC::LXVDSX,
5019                                              N->getValueType(0), Ops);
5020          CurDAG->setNodeMemRefs(cast<MachineSDNode>(NewN), {MemOp});
5021          return;
5022        }
5023      }
5024
5025      // For little endian, we must swap the input operands and adjust
5026      // the mask elements (reverse and invert them).
5027      if (PPCSubTarget->isLittleEndian()) {
5028        std::swap(Op1, Op2);
5029        unsigned tmp = DM[0];
5030        DM[0] = 1 - DM[1];
5031        DM[1] = 1 - tmp;
5032      }
5033
5034      SDValue DMV = CurDAG->getTargetConstant(DM[1] | (DM[0] << 1), dl,
5035                                              MVT::i32);
5036      SDValue Ops[] = { Op1, Op2, DMV };
5037      CurDAG->SelectNodeTo(N, PPC::XXPERMDI, N->getValueType(0), Ops);
5038      return;
5039    }
5040
5041    break;
5042  case PPCISD::BDNZ:
5043  case PPCISD::BDZ: {
5044    bool IsPPC64 = PPCSubTarget->isPPC64();
5045    SDValue Ops[] = { N->getOperand(1), N->getOperand(0) };
5046    CurDAG->SelectNodeTo(N, N->getOpcode() == PPCISD::BDNZ
5047                                ? (IsPPC64 ? PPC::BDNZ8 : PPC::BDNZ)
5048                                : (IsPPC64 ? PPC::BDZ8 : PPC::BDZ),
5049                         MVT::Other, Ops);
5050    return;
5051  }
5052  case PPCISD::COND_BRANCH: {
5053    // Op #0 is the Chain.
5054    // Op #1 is the PPC::PRED_* number.
5055    // Op #2 is the CR#
5056    // Op #3 is the Dest MBB
5057    // Op #4 is the Flag.
5058    // Prevent PPC::PRED_* from being selected into LI.
5059    unsigned PCC = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
5060    if (EnableBranchHint)
5061      PCC |= getBranchHint(PCC, *FuncInfo, N->getOperand(3));
5062
5063    SDValue Pred = getI32Imm(PCC, dl);
5064    SDValue Ops[] = { Pred, N->getOperand(2), N->getOperand(3),
5065      N->getOperand(0), N->getOperand(4) };
5066    CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops);
5067    return;
5068  }
5069  case ISD::BR_CC: {
5070    ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
5071    unsigned PCC =
5072        getPredicateForSetCC(CC, N->getOperand(2).getValueType(), PPCSubTarget);
5073
5074    if (N->getOperand(2).getValueType() == MVT::i1) {
5075      unsigned Opc;
5076      bool Swap;
5077      switch (PCC) {
5078      default: llvm_unreachable("Unexpected Boolean-operand predicate");
5079      case PPC::PRED_LT: Opc = PPC::CRANDC; Swap = true;  break;
5080      case PPC::PRED_LE: Opc = PPC::CRORC;  Swap = true;  break;
5081      case PPC::PRED_EQ: Opc = PPC::CREQV;  Swap = false; break;
5082      case PPC::PRED_GE: Opc = PPC::CRORC;  Swap = false; break;
5083      case PPC::PRED_GT: Opc = PPC::CRANDC; Swap = false; break;
5084      case PPC::PRED_NE: Opc = PPC::CRXOR;  Swap = false; break;
5085      }
5086
5087      // A signed comparison of i1 values produces the opposite result to an
5088      // unsigned one if the condition code includes less-than or greater-than.
5089      // This is because 1 is the most negative signed i1 number and the most
5090      // positive unsigned i1 number. The CR-logical operations used for such
5091      // comparisons are non-commutative so for signed comparisons vs. unsigned
5092      // ones, the input operands just need to be swapped.
5093      if (ISD::isSignedIntSetCC(CC))
5094        Swap = !Swap;
5095
5096      SDValue BitComp(CurDAG->getMachineNode(Opc, dl, MVT::i1,
5097                                             N->getOperand(Swap ? 3 : 2),
5098                                             N->getOperand(Swap ? 2 : 3)), 0);
5099      CurDAG->SelectNodeTo(N, PPC::BC, MVT::Other, BitComp, N->getOperand(4),
5100                           N->getOperand(0));
5101      return;
5102    }
5103
5104    if (EnableBranchHint)
5105      PCC |= getBranchHint(PCC, *FuncInfo, N->getOperand(4));
5106
5107    SDValue CondCode = SelectCC(N->getOperand(2), N->getOperand(3), CC, dl);
5108    SDValue Ops[] = { getI32Imm(PCC, dl), CondCode,
5109                        N->getOperand(4), N->getOperand(0) };
5110    CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops);
5111    return;
5112  }
5113  case ISD::BRIND: {
5114    // FIXME: Should custom lower this.
5115    SDValue Chain = N->getOperand(0);
5116    SDValue Target = N->getOperand(1);
5117    unsigned Opc = Target.getValueType() == MVT::i32 ? PPC::MTCTR : PPC::MTCTR8;
5118    unsigned Reg = Target.getValueType() == MVT::i32 ? PPC::BCTR : PPC::BCTR8;
5119    Chain = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, Target,
5120                                           Chain), 0);
5121    CurDAG->SelectNodeTo(N, Reg, MVT::Other, Chain);
5122    return;
5123  }
5124  case PPCISD::TOC_ENTRY: {
5125    const bool isPPC64 = PPCSubTarget->isPPC64();
5126    const bool isELFABI = PPCSubTarget->isSVR4ABI();
5127    const bool isAIXABI = PPCSubTarget->isAIXABI();
5128
5129    assert(!PPCSubTarget->isDarwin() && "TOC is an ELF/XCOFF construct");
5130
5131    // PowerPC only support small, medium and large code model.
5132    const CodeModel::Model CModel = TM.getCodeModel();
5133    assert(!(CModel == CodeModel::Tiny || CModel == CodeModel::Kernel) &&
5134           "PowerPC doesn't support tiny or kernel code models.");
5135
5136    if (isAIXABI && CModel == CodeModel::Medium)
5137      report_fatal_error("Medium code model is not supported on AIX.");
5138
5139    // For 64-bit small code model, we allow SelectCodeCommon to handle this,
5140    // selecting one of LDtoc, LDtocJTI, LDtocCPT, and LDtocBA.
5141    if (isPPC64 && CModel == CodeModel::Small)
5142      break;
5143
5144    // Handle 32-bit small code model.
5145    if (!isPPC64) {
5146      // Transforms the ISD::TOC_ENTRY node to a PPCISD::LWZtoc.
5147      auto replaceWithLWZtoc = [this, &dl](SDNode *TocEntry) {
5148        SDValue GA = TocEntry->getOperand(0);
5149        SDValue TocBase = TocEntry->getOperand(1);
5150        SDNode *MN = CurDAG->getMachineNode(PPC::LWZtoc, dl, MVT::i32, GA,
5151                                            TocBase);
5152        transferMemOperands(TocEntry, MN);
5153        ReplaceNode(TocEntry, MN);
5154      };
5155
5156      if (isELFABI) {
5157        assert(TM.isPositionIndependent() &&
5158               "32-bit ELF can only have TOC entries in position independent"
5159               " code.");
5160        // 32-bit ELF always uses a small code model toc access.
5161        replaceWithLWZtoc(N);
5162        return;
5163      }
5164
5165      if (isAIXABI && CModel == CodeModel::Small) {
5166        replaceWithLWZtoc(N);
5167        return;
5168      }
5169    }
5170
5171    assert(CModel != CodeModel::Small && "All small code models handled.");
5172
5173    assert((isPPC64 || (isAIXABI && !isPPC64)) && "We are dealing with 64-bit"
5174           " ELF/AIX or 32-bit AIX in the following.");
5175
5176    // Transforms the ISD::TOC_ENTRY node for 32-bit AIX large code model mode
5177    // or 64-bit medium (ELF-only) or large (ELF and AIX) code model code. We
5178    // generate two instructions as described below. The first source operand
5179    // is a symbol reference. If it must be toc-referenced according to
5180    // PPCSubTarget, we generate:
5181    // [32-bit AIX]
5182    //   LWZtocL(@sym, ADDIStocHA(%r2, @sym))
5183    // [64-bit ELF/AIX]
5184    //   LDtocL(@sym, ADDIStocHA8(%x2, @sym))
5185    // Otherwise we generate:
5186    //   ADDItocL(ADDIStocHA8(%x2, @sym), @sym)
5187    SDValue GA = N->getOperand(0);
5188    SDValue TOCbase = N->getOperand(1);
5189
5190    EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
5191    SDNode *Tmp = CurDAG->getMachineNode(
5192        isPPC64 ? PPC::ADDIStocHA8 : PPC::ADDIStocHA, dl, VT, TOCbase, GA);
5193
5194    if (PPCLowering->isAccessedAsGotIndirect(GA)) {
5195      // If it is accessed as got-indirect, we need an extra LWZ/LD to load
5196      // the address.
5197      SDNode *MN = CurDAG->getMachineNode(
5198          isPPC64 ? PPC::LDtocL : PPC::LWZtocL, dl, VT, GA, SDValue(Tmp, 0));
5199
5200      transferMemOperands(N, MN);
5201      ReplaceNode(N, MN);
5202      return;
5203    }
5204
5205    // Build the address relative to the TOC-pointer.
5206    ReplaceNode(N, CurDAG->getMachineNode(PPC::ADDItocL, dl, MVT::i64,
5207                                          SDValue(Tmp, 0), GA));
5208    return;
5209  }
5210  case PPCISD::PPC32_PICGOT:
5211    // Generate a PIC-safe GOT reference.
5212    assert(PPCSubTarget->is32BitELFABI() &&
5213           "PPCISD::PPC32_PICGOT is only supported for 32-bit SVR4");
5214    CurDAG->SelectNodeTo(N, PPC::PPC32PICGOT,
5215                         PPCLowering->getPointerTy(CurDAG->getDataLayout()),
5216                         MVT::i32);
5217    return;
5218
5219  case PPCISD::VADD_SPLAT: {
5220    // This expands into one of three sequences, depending on whether
5221    // the first operand is odd or even, positive or negative.
5222    assert(isa<ConstantSDNode>(N->getOperand(0)) &&
5223           isa<ConstantSDNode>(N->getOperand(1)) &&
5224           "Invalid operand on VADD_SPLAT!");
5225
5226    int Elt     = N->getConstantOperandVal(0);
5227    int EltSize = N->getConstantOperandVal(1);
5228    unsigned Opc1, Opc2, Opc3;
5229    EVT VT;
5230
5231    if (EltSize == 1) {
5232      Opc1 = PPC::VSPLTISB;
5233      Opc2 = PPC::VADDUBM;
5234      Opc3 = PPC::VSUBUBM;
5235      VT = MVT::v16i8;
5236    } else if (EltSize == 2) {
5237      Opc1 = PPC::VSPLTISH;
5238      Opc2 = PPC::VADDUHM;
5239      Opc3 = PPC::VSUBUHM;
5240      VT = MVT::v8i16;
5241    } else {
5242      assert(EltSize == 4 && "Invalid element size on VADD_SPLAT!");
5243      Opc1 = PPC::VSPLTISW;
5244      Opc2 = PPC::VADDUWM;
5245      Opc3 = PPC::VSUBUWM;
5246      VT = MVT::v4i32;
5247    }
5248
5249    if ((Elt & 1) == 0) {
5250      // Elt is even, in the range [-32,-18] + [16,30].
5251      //
5252      // Convert: VADD_SPLAT elt, size
5253      // Into:    tmp = VSPLTIS[BHW] elt
5254      //          VADDU[BHW]M tmp, tmp
5255      // Where:   [BHW] = B for size = 1, H for size = 2, W for size = 4
5256      SDValue EltVal = getI32Imm(Elt >> 1, dl);
5257      SDNode *Tmp = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
5258      SDValue TmpVal = SDValue(Tmp, 0);
5259      ReplaceNode(N, CurDAG->getMachineNode(Opc2, dl, VT, TmpVal, TmpVal));
5260      return;
5261    } else if (Elt > 0) {
5262      // Elt is odd and positive, in the range [17,31].
5263      //
5264      // Convert: VADD_SPLAT elt, size
5265      // Into:    tmp1 = VSPLTIS[BHW] elt-16
5266      //          tmp2 = VSPLTIS[BHW] -16
5267      //          VSUBU[BHW]M tmp1, tmp2
5268      SDValue EltVal = getI32Imm(Elt - 16, dl);
5269      SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
5270      EltVal = getI32Imm(-16, dl);
5271      SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
5272      ReplaceNode(N, CurDAG->getMachineNode(Opc3, dl, VT, SDValue(Tmp1, 0),
5273                                            SDValue(Tmp2, 0)));
5274      return;
5275    } else {
5276      // Elt is odd and negative, in the range [-31,-17].
5277      //
5278      // Convert: VADD_SPLAT elt, size
5279      // Into:    tmp1 = VSPLTIS[BHW] elt+16
5280      //          tmp2 = VSPLTIS[BHW] -16
5281      //          VADDU[BHW]M tmp1, tmp2
5282      SDValue EltVal = getI32Imm(Elt + 16, dl);
5283      SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
5284      EltVal = getI32Imm(-16, dl);
5285      SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
5286      ReplaceNode(N, CurDAG->getMachineNode(Opc2, dl, VT, SDValue(Tmp1, 0),
5287                                            SDValue(Tmp2, 0)));
5288      return;
5289    }
5290  }
5291  }
5292
5293  SelectCode(N);
5294}
5295
5296// If the target supports the cmpb instruction, do the idiom recognition here.
5297// We don't do this as a DAG combine because we don't want to do it as nodes
5298// are being combined (because we might miss part of the eventual idiom). We
5299// don't want to do it during instruction selection because we want to reuse
5300// the logic for lowering the masking operations already part of the
5301// instruction selector.
5302SDValue PPCDAGToDAGISel::combineToCMPB(SDNode *N) {
5303  SDLoc dl(N);
5304
5305  assert(N->getOpcode() == ISD::OR &&
5306         "Only OR nodes are supported for CMPB");
5307
5308  SDValue Res;
5309  if (!PPCSubTarget->hasCMPB())
5310    return Res;
5311
5312  if (N->getValueType(0) != MVT::i32 &&
5313      N->getValueType(0) != MVT::i64)
5314    return Res;
5315
5316  EVT VT = N->getValueType(0);
5317
5318  SDValue RHS, LHS;
5319  bool BytesFound[8] = {false, false, false, false, false, false, false, false};
5320  uint64_t Mask = 0, Alt = 0;
5321
5322  auto IsByteSelectCC = [this](SDValue O, unsigned &b,
5323                               uint64_t &Mask, uint64_t &Alt,
5324                               SDValue &LHS, SDValue &RHS) {
5325    if (O.getOpcode() != ISD::SELECT_CC)
5326      return false;
5327    ISD::CondCode CC = cast<CondCodeSDNode>(O.getOperand(4))->get();
5328
5329    if (!isa<ConstantSDNode>(O.getOperand(2)) ||
5330        !isa<ConstantSDNode>(O.getOperand(3)))
5331      return false;
5332
5333    uint64_t PM = O.getConstantOperandVal(2);
5334    uint64_t PAlt = O.getConstantOperandVal(3);
5335    for (b = 0; b < 8; ++b) {
5336      uint64_t Mask = UINT64_C(0xFF) << (8*b);
5337      if (PM && (PM & Mask) == PM && (PAlt & Mask) == PAlt)
5338        break;
5339    }
5340
5341    if (b == 8)
5342      return false;
5343    Mask |= PM;
5344    Alt  |= PAlt;
5345
5346    if (!isa<ConstantSDNode>(O.getOperand(1)) ||
5347        O.getConstantOperandVal(1) != 0) {
5348      SDValue Op0 = O.getOperand(0), Op1 = O.getOperand(1);
5349      if (Op0.getOpcode() == ISD::TRUNCATE)
5350        Op0 = Op0.getOperand(0);
5351      if (Op1.getOpcode() == ISD::TRUNCATE)
5352        Op1 = Op1.getOperand(0);
5353
5354      if (Op0.getOpcode() == ISD::SRL && Op1.getOpcode() == ISD::SRL &&
5355          Op0.getOperand(1) == Op1.getOperand(1) && CC == ISD::SETEQ &&
5356          isa<ConstantSDNode>(Op0.getOperand(1))) {
5357
5358        unsigned Bits = Op0.getValueSizeInBits();
5359        if (b != Bits/8-1)
5360          return false;
5361        if (Op0.getConstantOperandVal(1) != Bits-8)
5362          return false;
5363
5364        LHS = Op0.getOperand(0);
5365        RHS = Op1.getOperand(0);
5366        return true;
5367      }
5368
5369      // When we have small integers (i16 to be specific), the form present
5370      // post-legalization uses SETULT in the SELECT_CC for the
5371      // higher-order byte, depending on the fact that the
5372      // even-higher-order bytes are known to all be zero, for example:
5373      //   select_cc (xor $lhs, $rhs), 256, 65280, 0, setult
5374      // (so when the second byte is the same, because all higher-order
5375      // bits from bytes 3 and 4 are known to be zero, the result of the
5376      // xor can be at most 255)
5377      if (Op0.getOpcode() == ISD::XOR && CC == ISD::SETULT &&
5378          isa<ConstantSDNode>(O.getOperand(1))) {
5379
5380        uint64_t ULim = O.getConstantOperandVal(1);
5381        if (ULim != (UINT64_C(1) << b*8))
5382          return false;
5383
5384        // Now we need to make sure that the upper bytes are known to be
5385        // zero.
5386        unsigned Bits = Op0.getValueSizeInBits();
5387        if (!CurDAG->MaskedValueIsZero(
5388                Op0, APInt::getHighBitsSet(Bits, Bits - (b + 1) * 8)))
5389          return false;
5390
5391        LHS = Op0.getOperand(0);
5392        RHS = Op0.getOperand(1);
5393        return true;
5394      }
5395
5396      return false;
5397    }
5398
5399    if (CC != ISD::SETEQ)
5400      return false;
5401
5402    SDValue Op = O.getOperand(0);
5403    if (Op.getOpcode() == ISD::AND) {
5404      if (!isa<ConstantSDNode>(Op.getOperand(1)))
5405        return false;
5406      if (Op.getConstantOperandVal(1) != (UINT64_C(0xFF) << (8*b)))
5407        return false;
5408
5409      SDValue XOR = Op.getOperand(0);
5410      if (XOR.getOpcode() == ISD::TRUNCATE)
5411        XOR = XOR.getOperand(0);
5412      if (XOR.getOpcode() != ISD::XOR)
5413        return false;
5414
5415      LHS = XOR.getOperand(0);
5416      RHS = XOR.getOperand(1);
5417      return true;
5418    } else if (Op.getOpcode() == ISD::SRL) {
5419      if (!isa<ConstantSDNode>(Op.getOperand(1)))
5420        return false;
5421      unsigned Bits = Op.getValueSizeInBits();
5422      if (b != Bits/8-1)
5423        return false;
5424      if (Op.getConstantOperandVal(1) != Bits-8)
5425        return false;
5426
5427      SDValue XOR = Op.getOperand(0);
5428      if (XOR.getOpcode() == ISD::TRUNCATE)
5429        XOR = XOR.getOperand(0);
5430      if (XOR.getOpcode() != ISD::XOR)
5431        return false;
5432
5433      LHS = XOR.getOperand(0);
5434      RHS = XOR.getOperand(1);
5435      return true;
5436    }
5437
5438    return false;
5439  };
5440
5441  SmallVector<SDValue, 8> Queue(1, SDValue(N, 0));
5442  while (!Queue.empty()) {
5443    SDValue V = Queue.pop_back_val();
5444
5445    for (const SDValue &O : V.getNode()->ops()) {
5446      unsigned b = 0;
5447      uint64_t M = 0, A = 0;
5448      SDValue OLHS, ORHS;
5449      if (O.getOpcode() == ISD::OR) {
5450        Queue.push_back(O);
5451      } else if (IsByteSelectCC(O, b, M, A, OLHS, ORHS)) {
5452        if (!LHS) {
5453          LHS = OLHS;
5454          RHS = ORHS;
5455          BytesFound[b] = true;
5456          Mask |= M;
5457          Alt  |= A;
5458        } else if ((LHS == ORHS && RHS == OLHS) ||
5459                   (RHS == ORHS && LHS == OLHS)) {
5460          BytesFound[b] = true;
5461          Mask |= M;
5462          Alt  |= A;
5463        } else {
5464          return Res;
5465        }
5466      } else {
5467        return Res;
5468      }
5469    }
5470  }
5471
5472  unsigned LastB = 0, BCnt = 0;
5473  for (unsigned i = 0; i < 8; ++i)
5474    if (BytesFound[LastB]) {
5475      ++BCnt;
5476      LastB = i;
5477    }
5478
5479  if (!LastB || BCnt < 2)
5480    return Res;
5481
5482  // Because we'll be zero-extending the output anyway if don't have a specific
5483  // value for each input byte (via the Mask), we can 'anyext' the inputs.
5484  if (LHS.getValueType() != VT) {
5485    LHS = CurDAG->getAnyExtOrTrunc(LHS, dl, VT);
5486    RHS = CurDAG->getAnyExtOrTrunc(RHS, dl, VT);
5487  }
5488
5489  Res = CurDAG->getNode(PPCISD::CMPB, dl, VT, LHS, RHS);
5490
5491  bool NonTrivialMask = ((int64_t) Mask) != INT64_C(-1);
5492  if (NonTrivialMask && !Alt) {
5493    // Res = Mask & CMPB
5494    Res = CurDAG->getNode(ISD::AND, dl, VT, Res,
5495                          CurDAG->getConstant(Mask, dl, VT));
5496  } else if (Alt) {
5497    // Res = (CMPB & Mask) | (~CMPB & Alt)
5498    // Which, as suggested here:
5499    //   https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge
5500    // can be written as:
5501    // Res = Alt ^ ((Alt ^ Mask) & CMPB)
5502    // useful because the (Alt ^ Mask) can be pre-computed.
5503    Res = CurDAG->getNode(ISD::AND, dl, VT, Res,
5504                          CurDAG->getConstant(Mask ^ Alt, dl, VT));
5505    Res = CurDAG->getNode(ISD::XOR, dl, VT, Res,
5506                          CurDAG->getConstant(Alt, dl, VT));
5507  }
5508
5509  return Res;
5510}
5511
5512// When CR bit registers are enabled, an extension of an i1 variable to a i32
5513// or i64 value is lowered in terms of a SELECT_I[48] operation, and thus
5514// involves constant materialization of a 0 or a 1 or both. If the result of
5515// the extension is then operated upon by some operator that can be constant
5516// folded with a constant 0 or 1, and that constant can be materialized using
5517// only one instruction (like a zero or one), then we should fold in those
5518// operations with the select.
5519void PPCDAGToDAGISel::foldBoolExts(SDValue &Res, SDNode *&N) {
5520  if (!PPCSubTarget->useCRBits())
5521    return;
5522
5523  if (N->getOpcode() != ISD::ZERO_EXTEND &&
5524      N->getOpcode() != ISD::SIGN_EXTEND &&
5525      N->getOpcode() != ISD::ANY_EXTEND)
5526    return;
5527
5528  if (N->getOperand(0).getValueType() != MVT::i1)
5529    return;
5530
5531  if (!N->hasOneUse())
5532    return;
5533
5534  SDLoc dl(N);
5535  EVT VT = N->getValueType(0);
5536  SDValue Cond = N->getOperand(0);
5537  SDValue ConstTrue =
5538    CurDAG->getConstant(N->getOpcode() == ISD::SIGN_EXTEND ? -1 : 1, dl, VT);
5539  SDValue ConstFalse = CurDAG->getConstant(0, dl, VT);
5540
5541  do {
5542    SDNode *User = *N->use_begin();
5543    if (User->getNumOperands() != 2)
5544      break;
5545
5546    auto TryFold = [this, N, User, dl](SDValue Val) {
5547      SDValue UserO0 = User->getOperand(0), UserO1 = User->getOperand(1);
5548      SDValue O0 = UserO0.getNode() == N ? Val : UserO0;
5549      SDValue O1 = UserO1.getNode() == N ? Val : UserO1;
5550
5551      return CurDAG->FoldConstantArithmetic(User->getOpcode(), dl,
5552                                            User->getValueType(0),
5553                                            O0.getNode(), O1.getNode());
5554    };
5555
5556    // FIXME: When the semantics of the interaction between select and undef
5557    // are clearly defined, it may turn out to be unnecessary to break here.
5558    SDValue TrueRes = TryFold(ConstTrue);
5559    if (!TrueRes || TrueRes.isUndef())
5560      break;
5561    SDValue FalseRes = TryFold(ConstFalse);
5562    if (!FalseRes || FalseRes.isUndef())
5563      break;
5564
5565    // For us to materialize these using one instruction, we must be able to
5566    // represent them as signed 16-bit integers.
5567    uint64_t True  = cast<ConstantSDNode>(TrueRes)->getZExtValue(),
5568             False = cast<ConstantSDNode>(FalseRes)->getZExtValue();
5569    if (!isInt<16>(True) || !isInt<16>(False))
5570      break;
5571
5572    // We can replace User with a new SELECT node, and try again to see if we
5573    // can fold the select with its user.
5574    Res = CurDAG->getSelect(dl, User->getValueType(0), Cond, TrueRes, FalseRes);
5575    N = User;
5576    ConstTrue = TrueRes;
5577    ConstFalse = FalseRes;
5578  } while (N->hasOneUse());
5579}
5580
5581void PPCDAGToDAGISel::PreprocessISelDAG() {
5582  SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
5583
5584  bool MadeChange = false;
5585  while (Position != CurDAG->allnodes_begin()) {
5586    SDNode *N = &*--Position;
5587    if (N->use_empty())
5588      continue;
5589
5590    SDValue Res;
5591    switch (N->getOpcode()) {
5592    default: break;
5593    case ISD::OR:
5594      Res = combineToCMPB(N);
5595      break;
5596    }
5597
5598    if (!Res)
5599      foldBoolExts(Res, N);
5600
5601    if (Res) {
5602      LLVM_DEBUG(dbgs() << "PPC DAG preprocessing replacing:\nOld:    ");
5603      LLVM_DEBUG(N->dump(CurDAG));
5604      LLVM_DEBUG(dbgs() << "\nNew: ");
5605      LLVM_DEBUG(Res.getNode()->dump(CurDAG));
5606      LLVM_DEBUG(dbgs() << "\n");
5607
5608      CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
5609      MadeChange = true;
5610    }
5611  }
5612
5613  if (MadeChange)
5614    CurDAG->RemoveDeadNodes();
5615}
5616
5617/// PostprocessISelDAG - Perform some late peephole optimizations
5618/// on the DAG representation.
5619void PPCDAGToDAGISel::PostprocessISelDAG() {
5620  // Skip peepholes at -O0.
5621  if (TM.getOptLevel() == CodeGenOpt::None)
5622    return;
5623
5624  PeepholePPC64();
5625  PeepholeCROps();
5626  PeepholePPC64ZExt();
5627}
5628
5629// Check if all users of this node will become isel where the second operand
5630// is the constant zero. If this is so, and if we can negate the condition,
5631// then we can flip the true and false operands. This will allow the zero to
5632// be folded with the isel so that we don't need to materialize a register
5633// containing zero.
5634bool PPCDAGToDAGISel::AllUsersSelectZero(SDNode *N) {
5635  for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
5636       UI != UE; ++UI) {
5637    SDNode *User = *UI;
5638    if (!User->isMachineOpcode())
5639      return false;
5640    if (User->getMachineOpcode() != PPC::SELECT_I4 &&
5641        User->getMachineOpcode() != PPC::SELECT_I8)
5642      return false;
5643
5644    SDNode *Op2 = User->getOperand(2).getNode();
5645    if (!Op2->isMachineOpcode())
5646      return false;
5647
5648    if (Op2->getMachineOpcode() != PPC::LI &&
5649        Op2->getMachineOpcode() != PPC::LI8)
5650      return false;
5651
5652    ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op2->getOperand(0));
5653    if (!C)
5654      return false;
5655
5656    if (!C->isNullValue())
5657      return false;
5658  }
5659
5660  return true;
5661}
5662
5663void PPCDAGToDAGISel::SwapAllSelectUsers(SDNode *N) {
5664  SmallVector<SDNode *, 4> ToReplace;
5665  for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
5666       UI != UE; ++UI) {
5667    SDNode *User = *UI;
5668    assert((User->getMachineOpcode() == PPC::SELECT_I4 ||
5669            User->getMachineOpcode() == PPC::SELECT_I8) &&
5670           "Must have all select users");
5671    ToReplace.push_back(User);
5672  }
5673
5674  for (SmallVector<SDNode *, 4>::iterator UI = ToReplace.begin(),
5675       UE = ToReplace.end(); UI != UE; ++UI) {
5676    SDNode *User = *UI;
5677    SDNode *ResNode =
5678      CurDAG->getMachineNode(User->getMachineOpcode(), SDLoc(User),
5679                             User->getValueType(0), User->getOperand(0),
5680                             User->getOperand(2),
5681                             User->getOperand(1));
5682
5683    LLVM_DEBUG(dbgs() << "CR Peephole replacing:\nOld:    ");
5684    LLVM_DEBUG(User->dump(CurDAG));
5685    LLVM_DEBUG(dbgs() << "\nNew: ");
5686    LLVM_DEBUG(ResNode->dump(CurDAG));
5687    LLVM_DEBUG(dbgs() << "\n");
5688
5689    ReplaceUses(User, ResNode);
5690  }
5691}
5692
5693void PPCDAGToDAGISel::PeepholeCROps() {
5694  bool IsModified;
5695  do {
5696    IsModified = false;
5697    for (SDNode &Node : CurDAG->allnodes()) {
5698      MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(&Node);
5699      if (!MachineNode || MachineNode->use_empty())
5700        continue;
5701      SDNode *ResNode = MachineNode;
5702
5703      bool Op1Set   = false, Op1Unset = false,
5704           Op1Not   = false,
5705           Op2Set   = false, Op2Unset = false,
5706           Op2Not   = false;
5707
5708      unsigned Opcode = MachineNode->getMachineOpcode();
5709      switch (Opcode) {
5710      default: break;
5711      case PPC::CRAND:
5712      case PPC::CRNAND:
5713      case PPC::CROR:
5714      case PPC::CRXOR:
5715      case PPC::CRNOR:
5716      case PPC::CREQV:
5717      case PPC::CRANDC:
5718      case PPC::CRORC: {
5719        SDValue Op = MachineNode->getOperand(1);
5720        if (Op.isMachineOpcode()) {
5721          if (Op.getMachineOpcode() == PPC::CRSET)
5722            Op2Set = true;
5723          else if (Op.getMachineOpcode() == PPC::CRUNSET)
5724            Op2Unset = true;
5725          else if (Op.getMachineOpcode() == PPC::CRNOR &&
5726                   Op.getOperand(0) == Op.getOperand(1))
5727            Op2Not = true;
5728        }
5729        LLVM_FALLTHROUGH;
5730      }
5731      case PPC::BC:
5732      case PPC::BCn:
5733      case PPC::SELECT_I4:
5734      case PPC::SELECT_I8:
5735      case PPC::SELECT_F4:
5736      case PPC::SELECT_F8:
5737      case PPC::SELECT_QFRC:
5738      case PPC::SELECT_QSRC:
5739      case PPC::SELECT_QBRC:
5740      case PPC::SELECT_SPE:
5741      case PPC::SELECT_SPE4:
5742      case PPC::SELECT_VRRC:
5743      case PPC::SELECT_VSFRC:
5744      case PPC::SELECT_VSSRC:
5745      case PPC::SELECT_VSRC: {
5746        SDValue Op = MachineNode->getOperand(0);
5747        if (Op.isMachineOpcode()) {
5748          if (Op.getMachineOpcode() == PPC::CRSET)
5749            Op1Set = true;
5750          else if (Op.getMachineOpcode() == PPC::CRUNSET)
5751            Op1Unset = true;
5752          else if (Op.getMachineOpcode() == PPC::CRNOR &&
5753                   Op.getOperand(0) == Op.getOperand(1))
5754            Op1Not = true;
5755        }
5756        }
5757        break;
5758      }
5759
5760      bool SelectSwap = false;
5761      switch (Opcode) {
5762      default: break;
5763      case PPC::CRAND:
5764        if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
5765          // x & x = x
5766          ResNode = MachineNode->getOperand(0).getNode();
5767        else if (Op1Set)
5768          // 1 & y = y
5769          ResNode = MachineNode->getOperand(1).getNode();
5770        else if (Op2Set)
5771          // x & 1 = x
5772          ResNode = MachineNode->getOperand(0).getNode();
5773        else if (Op1Unset || Op2Unset)
5774          // x & 0 = 0 & y = 0
5775          ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
5776                                           MVT::i1);
5777        else if (Op1Not)
5778          // ~x & y = andc(y, x)
5779          ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
5780                                           MVT::i1, MachineNode->getOperand(1),
5781                                           MachineNode->getOperand(0).
5782                                             getOperand(0));
5783        else if (Op2Not)
5784          // x & ~y = andc(x, y)
5785          ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
5786                                           MVT::i1, MachineNode->getOperand(0),
5787                                           MachineNode->getOperand(1).
5788                                             getOperand(0));
5789        else if (AllUsersSelectZero(MachineNode)) {
5790          ResNode = CurDAG->getMachineNode(PPC::CRNAND, SDLoc(MachineNode),
5791                                           MVT::i1, MachineNode->getOperand(0),
5792                                           MachineNode->getOperand(1));
5793          SelectSwap = true;
5794        }
5795        break;
5796      case PPC::CRNAND:
5797        if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
5798          // nand(x, x) -> nor(x, x)
5799          ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
5800                                           MVT::i1, MachineNode->getOperand(0),
5801                                           MachineNode->getOperand(0));
5802        else if (Op1Set)
5803          // nand(1, y) -> nor(y, y)
5804          ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
5805                                           MVT::i1, MachineNode->getOperand(1),
5806                                           MachineNode->getOperand(1));
5807        else if (Op2Set)
5808          // nand(x, 1) -> nor(x, x)
5809          ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
5810                                           MVT::i1, MachineNode->getOperand(0),
5811                                           MachineNode->getOperand(0));
5812        else if (Op1Unset || Op2Unset)
5813          // nand(x, 0) = nand(0, y) = 1
5814          ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
5815                                           MVT::i1);
5816        else if (Op1Not)
5817          // nand(~x, y) = ~(~x & y) = x | ~y = orc(x, y)
5818          ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
5819                                           MVT::i1, MachineNode->getOperand(0).
5820                                                      getOperand(0),
5821                                           MachineNode->getOperand(1));
5822        else if (Op2Not)
5823          // nand(x, ~y) = ~x | y = orc(y, x)
5824          ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
5825                                           MVT::i1, MachineNode->getOperand(1).
5826                                                      getOperand(0),
5827                                           MachineNode->getOperand(0));
5828        else if (AllUsersSelectZero(MachineNode)) {
5829          ResNode = CurDAG->getMachineNode(PPC::CRAND, SDLoc(MachineNode),
5830                                           MVT::i1, MachineNode->getOperand(0),
5831                                           MachineNode->getOperand(1));
5832          SelectSwap = true;
5833        }
5834        break;
5835      case PPC::CROR:
5836        if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
5837          // x | x = x
5838          ResNode = MachineNode->getOperand(0).getNode();
5839        else if (Op1Set || Op2Set)
5840          // x | 1 = 1 | y = 1
5841          ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
5842                                           MVT::i1);
5843        else if (Op1Unset)
5844          // 0 | y = y
5845          ResNode = MachineNode->getOperand(1).getNode();
5846        else if (Op2Unset)
5847          // x | 0 = x
5848          ResNode = MachineNode->getOperand(0).getNode();
5849        else if (Op1Not)
5850          // ~x | y = orc(y, x)
5851          ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
5852                                           MVT::i1, MachineNode->getOperand(1),
5853                                           MachineNode->getOperand(0).
5854                                             getOperand(0));
5855        else if (Op2Not)
5856          // x | ~y = orc(x, y)
5857          ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
5858                                           MVT::i1, MachineNode->getOperand(0),
5859                                           MachineNode->getOperand(1).
5860                                             getOperand(0));
5861        else if (AllUsersSelectZero(MachineNode)) {
5862          ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
5863                                           MVT::i1, MachineNode->getOperand(0),
5864                                           MachineNode->getOperand(1));
5865          SelectSwap = true;
5866        }
5867        break;
5868      case PPC::CRXOR:
5869        if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
5870          // xor(x, x) = 0
5871          ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
5872                                           MVT::i1);
5873        else if (Op1Set)
5874          // xor(1, y) -> nor(y, y)
5875          ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
5876                                           MVT::i1, MachineNode->getOperand(1),
5877                                           MachineNode->getOperand(1));
5878        else if (Op2Set)
5879          // xor(x, 1) -> nor(x, x)
5880          ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
5881                                           MVT::i1, MachineNode->getOperand(0),
5882                                           MachineNode->getOperand(0));
5883        else if (Op1Unset)
5884          // xor(0, y) = y
5885          ResNode = MachineNode->getOperand(1).getNode();
5886        else if (Op2Unset)
5887          // xor(x, 0) = x
5888          ResNode = MachineNode->getOperand(0).getNode();
5889        else if (Op1Not)
5890          // xor(~x, y) = eqv(x, y)
5891          ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode),
5892                                           MVT::i1, MachineNode->getOperand(0).
5893                                                      getOperand(0),
5894                                           MachineNode->getOperand(1));
5895        else if (Op2Not)
5896          // xor(x, ~y) = eqv(x, y)
5897          ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode),
5898                                           MVT::i1, MachineNode->getOperand(0),
5899                                           MachineNode->getOperand(1).
5900                                             getOperand(0));
5901        else if (AllUsersSelectZero(MachineNode)) {
5902          ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode),
5903                                           MVT::i1, MachineNode->getOperand(0),
5904                                           MachineNode->getOperand(1));
5905          SelectSwap = true;
5906        }
5907        break;
5908      case PPC::CRNOR:
5909        if (Op1Set || Op2Set)
5910          // nor(1, y) -> 0
5911          ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
5912                                           MVT::i1);
5913        else if (Op1Unset)
5914          // nor(0, y) = ~y -> nor(y, y)
5915          ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
5916                                           MVT::i1, MachineNode->getOperand(1),
5917                                           MachineNode->getOperand(1));
5918        else if (Op2Unset)
5919          // nor(x, 0) = ~x
5920          ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
5921                                           MVT::i1, MachineNode->getOperand(0),
5922                                           MachineNode->getOperand(0));
5923        else if (Op1Not)
5924          // nor(~x, y) = andc(x, y)
5925          ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
5926                                           MVT::i1, MachineNode->getOperand(0).
5927                                                      getOperand(0),
5928                                           MachineNode->getOperand(1));
5929        else if (Op2Not)
5930          // nor(x, ~y) = andc(y, x)
5931          ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
5932                                           MVT::i1, MachineNode->getOperand(1).
5933                                                      getOperand(0),
5934                                           MachineNode->getOperand(0));
5935        else if (AllUsersSelectZero(MachineNode)) {
5936          ResNode = CurDAG->getMachineNode(PPC::CROR, SDLoc(MachineNode),
5937                                           MVT::i1, MachineNode->getOperand(0),
5938                                           MachineNode->getOperand(1));
5939          SelectSwap = true;
5940        }
5941        break;
5942      case PPC::CREQV:
5943        if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
5944          // eqv(x, x) = 1
5945          ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
5946                                           MVT::i1);
5947        else if (Op1Set)
5948          // eqv(1, y) = y
5949          ResNode = MachineNode->getOperand(1).getNode();
5950        else if (Op2Set)
5951          // eqv(x, 1) = x
5952          ResNode = MachineNode->getOperand(0).getNode();
5953        else if (Op1Unset)
5954          // eqv(0, y) = ~y -> nor(y, y)
5955          ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
5956                                           MVT::i1, MachineNode->getOperand(1),
5957                                           MachineNode->getOperand(1));
5958        else if (Op2Unset)
5959          // eqv(x, 0) = ~x
5960          ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
5961                                           MVT::i1, MachineNode->getOperand(0),
5962                                           MachineNode->getOperand(0));
5963        else if (Op1Not)
5964          // eqv(~x, y) = xor(x, y)
5965          ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode),
5966                                           MVT::i1, MachineNode->getOperand(0).
5967                                                      getOperand(0),
5968                                           MachineNode->getOperand(1));
5969        else if (Op2Not)
5970          // eqv(x, ~y) = xor(x, y)
5971          ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode),
5972                                           MVT::i1, MachineNode->getOperand(0),
5973                                           MachineNode->getOperand(1).
5974                                             getOperand(0));
5975        else if (AllUsersSelectZero(MachineNode)) {
5976          ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode),
5977                                           MVT::i1, MachineNode->getOperand(0),
5978                                           MachineNode->getOperand(1));
5979          SelectSwap = true;
5980        }
5981        break;
5982      case PPC::CRANDC:
5983        if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
5984          // andc(x, x) = 0
5985          ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
5986                                           MVT::i1);
5987        else if (Op1Set)
5988          // andc(1, y) = ~y
5989          ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
5990                                           MVT::i1, MachineNode->getOperand(1),
5991                                           MachineNode->getOperand(1));
5992        else if (Op1Unset || Op2Set)
5993          // andc(0, y) = andc(x, 1) = 0
5994          ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
5995                                           MVT::i1);
5996        else if (Op2Unset)
5997          // andc(x, 0) = x
5998          ResNode = MachineNode->getOperand(0).getNode();
5999        else if (Op1Not)
6000          // andc(~x, y) = ~(x | y) = nor(x, y)
6001          ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
6002                                           MVT::i1, MachineNode->getOperand(0).
6003                                                      getOperand(0),
6004                                           MachineNode->getOperand(1));
6005        else if (Op2Not)
6006          // andc(x, ~y) = x & y
6007          ResNode = CurDAG->getMachineNode(PPC::CRAND, SDLoc(MachineNode),
6008                                           MVT::i1, MachineNode->getOperand(0),
6009                                           MachineNode->getOperand(1).
6010                                             getOperand(0));
6011        else if (AllUsersSelectZero(MachineNode)) {
6012          ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
6013                                           MVT::i1, MachineNode->getOperand(1),
6014                                           MachineNode->getOperand(0));
6015          SelectSwap = true;
6016        }
6017        break;
6018      case PPC::CRORC:
6019        if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
6020          // orc(x, x) = 1
6021          ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
6022                                           MVT::i1);
6023        else if (Op1Set || Op2Unset)
6024          // orc(1, y) = orc(x, 0) = 1
6025          ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
6026                                           MVT::i1);
6027        else if (Op2Set)
6028          // orc(x, 1) = x
6029          ResNode = MachineNode->getOperand(0).getNode();
6030        else if (Op1Unset)
6031          // orc(0, y) = ~y
6032          ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
6033                                           MVT::i1, MachineNode->getOperand(1),
6034                                           MachineNode->getOperand(1));
6035        else if (Op1Not)
6036          // orc(~x, y) = ~(x & y) = nand(x, y)
6037          ResNode = CurDAG->getMachineNode(PPC::CRNAND, SDLoc(MachineNode),
6038                                           MVT::i1, MachineNode->getOperand(0).
6039                                                      getOperand(0),
6040                                           MachineNode->getOperand(1));
6041        else if (Op2Not)
6042          // orc(x, ~y) = x | y
6043          ResNode = CurDAG->getMachineNode(PPC::CROR, SDLoc(MachineNode),
6044                                           MVT::i1, MachineNode->getOperand(0),
6045                                           MachineNode->getOperand(1).
6046                                             getOperand(0));
6047        else if (AllUsersSelectZero(MachineNode)) {
6048          ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
6049                                           MVT::i1, MachineNode->getOperand(1),
6050                                           MachineNode->getOperand(0));
6051          SelectSwap = true;
6052        }
6053        break;
6054      case PPC::SELECT_I4:
6055      case PPC::SELECT_I8:
6056      case PPC::SELECT_F4:
6057      case PPC::SELECT_F8:
6058      case PPC::SELECT_QFRC:
6059      case PPC::SELECT_QSRC:
6060      case PPC::SELECT_QBRC:
6061      case PPC::SELECT_SPE:
6062      case PPC::SELECT_SPE4:
6063      case PPC::SELECT_VRRC:
6064      case PPC::SELECT_VSFRC:
6065      case PPC::SELECT_VSSRC:
6066      case PPC::SELECT_VSRC:
6067        if (Op1Set)
6068          ResNode = MachineNode->getOperand(1).getNode();
6069        else if (Op1Unset)
6070          ResNode = MachineNode->getOperand(2).getNode();
6071        else if (Op1Not)
6072          ResNode = CurDAG->getMachineNode(MachineNode->getMachineOpcode(),
6073                                           SDLoc(MachineNode),
6074                                           MachineNode->getValueType(0),
6075                                           MachineNode->getOperand(0).
6076                                             getOperand(0),
6077                                           MachineNode->getOperand(2),
6078                                           MachineNode->getOperand(1));
6079        break;
6080      case PPC::BC:
6081      case PPC::BCn:
6082        if (Op1Not)
6083          ResNode = CurDAG->getMachineNode(Opcode == PPC::BC ? PPC::BCn :
6084                                                               PPC::BC,
6085                                           SDLoc(MachineNode),
6086                                           MVT::Other,
6087                                           MachineNode->getOperand(0).
6088                                             getOperand(0),
6089                                           MachineNode->getOperand(1),
6090                                           MachineNode->getOperand(2));
6091        // FIXME: Handle Op1Set, Op1Unset here too.
6092        break;
6093      }
6094
6095      // If we're inverting this node because it is used only by selects that
6096      // we'd like to swap, then swap the selects before the node replacement.
6097      if (SelectSwap)
6098        SwapAllSelectUsers(MachineNode);
6099
6100      if (ResNode != MachineNode) {
6101        LLVM_DEBUG(dbgs() << "CR Peephole replacing:\nOld:    ");
6102        LLVM_DEBUG(MachineNode->dump(CurDAG));
6103        LLVM_DEBUG(dbgs() << "\nNew: ");
6104        LLVM_DEBUG(ResNode->dump(CurDAG));
6105        LLVM_DEBUG(dbgs() << "\n");
6106
6107        ReplaceUses(MachineNode, ResNode);
6108        IsModified = true;
6109      }
6110    }
6111    if (IsModified)
6112      CurDAG->RemoveDeadNodes();
6113  } while (IsModified);
6114}
6115
6116// Gather the set of 32-bit operations that are known to have their
6117// higher-order 32 bits zero, where ToPromote contains all such operations.
6118static bool PeepholePPC64ZExtGather(SDValue Op32,
6119                                    SmallPtrSetImpl<SDNode *> &ToPromote) {
6120  if (!Op32.isMachineOpcode())
6121    return false;
6122
6123  // First, check for the "frontier" instructions (those that will clear the
6124  // higher-order 32 bits.
6125
6126  // For RLWINM and RLWNM, we need to make sure that the mask does not wrap
6127  // around. If it does not, then these instructions will clear the
6128  // higher-order bits.
6129  if ((Op32.getMachineOpcode() == PPC::RLWINM ||
6130       Op32.getMachineOpcode() == PPC::RLWNM) &&
6131      Op32.getConstantOperandVal(2) <= Op32.getConstantOperandVal(3)) {
6132    ToPromote.insert(Op32.getNode());
6133    return true;
6134  }
6135
6136  // SLW and SRW always clear the higher-order bits.
6137  if (Op32.getMachineOpcode() == PPC::SLW ||
6138      Op32.getMachineOpcode() == PPC::SRW) {
6139    ToPromote.insert(Op32.getNode());
6140    return true;
6141  }
6142
6143  // For LI and LIS, we need the immediate to be positive (so that it is not
6144  // sign extended).
6145  if (Op32.getMachineOpcode() == PPC::LI ||
6146      Op32.getMachineOpcode() == PPC::LIS) {
6147    if (!isUInt<15>(Op32.getConstantOperandVal(0)))
6148      return false;
6149
6150    ToPromote.insert(Op32.getNode());
6151    return true;
6152  }
6153
6154  // LHBRX and LWBRX always clear the higher-order bits.
6155  if (Op32.getMachineOpcode() == PPC::LHBRX ||
6156      Op32.getMachineOpcode() == PPC::LWBRX) {
6157    ToPromote.insert(Op32.getNode());
6158    return true;
6159  }
6160
6161  // CNT[LT]ZW always produce a 64-bit value in [0,32], and so is zero extended.
6162  if (Op32.getMachineOpcode() == PPC::CNTLZW ||
6163      Op32.getMachineOpcode() == PPC::CNTTZW) {
6164    ToPromote.insert(Op32.getNode());
6165    return true;
6166  }
6167
6168  // Next, check for those instructions we can look through.
6169
6170  // Assuming the mask does not wrap around, then the higher-order bits are
6171  // taken directly from the first operand.
6172  if (Op32.getMachineOpcode() == PPC::RLWIMI &&
6173      Op32.getConstantOperandVal(3) <= Op32.getConstantOperandVal(4)) {
6174    SmallPtrSet<SDNode *, 16> ToPromote1;
6175    if (!PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1))
6176      return false;
6177
6178    ToPromote.insert(Op32.getNode());
6179    ToPromote.insert(ToPromote1.begin(), ToPromote1.end());
6180    return true;
6181  }
6182
6183  // For OR, the higher-order bits are zero if that is true for both operands.
6184  // For SELECT_I4, the same is true (but the relevant operand numbers are
6185  // shifted by 1).
6186  if (Op32.getMachineOpcode() == PPC::OR ||
6187      Op32.getMachineOpcode() == PPC::SELECT_I4) {
6188    unsigned B = Op32.getMachineOpcode() == PPC::SELECT_I4 ? 1 : 0;
6189    SmallPtrSet<SDNode *, 16> ToPromote1;
6190    if (!PeepholePPC64ZExtGather(Op32.getOperand(B+0), ToPromote1))
6191      return false;
6192    if (!PeepholePPC64ZExtGather(Op32.getOperand(B+1), ToPromote1))
6193      return false;
6194
6195    ToPromote.insert(Op32.getNode());
6196    ToPromote.insert(ToPromote1.begin(), ToPromote1.end());
6197    return true;
6198  }
6199
6200  // For ORI and ORIS, we need the higher-order bits of the first operand to be
6201  // zero, and also for the constant to be positive (so that it is not sign
6202  // extended).
6203  if (Op32.getMachineOpcode() == PPC::ORI ||
6204      Op32.getMachineOpcode() == PPC::ORIS) {
6205    SmallPtrSet<SDNode *, 16> ToPromote1;
6206    if (!PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1))
6207      return false;
6208    if (!isUInt<15>(Op32.getConstantOperandVal(1)))
6209      return false;
6210
6211    ToPromote.insert(Op32.getNode());
6212    ToPromote.insert(ToPromote1.begin(), ToPromote1.end());
6213    return true;
6214  }
6215
6216  // The higher-order bits of AND are zero if that is true for at least one of
6217  // the operands.
6218  if (Op32.getMachineOpcode() == PPC::AND) {
6219    SmallPtrSet<SDNode *, 16> ToPromote1, ToPromote2;
6220    bool Op0OK =
6221      PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1);
6222    bool Op1OK =
6223      PeepholePPC64ZExtGather(Op32.getOperand(1), ToPromote2);
6224    if (!Op0OK && !Op1OK)
6225      return false;
6226
6227    ToPromote.insert(Op32.getNode());
6228
6229    if (Op0OK)
6230      ToPromote.insert(ToPromote1.begin(), ToPromote1.end());
6231
6232    if (Op1OK)
6233      ToPromote.insert(ToPromote2.begin(), ToPromote2.end());
6234
6235    return true;
6236  }
6237
6238  // For ANDI and ANDIS, the higher-order bits are zero if either that is true
6239  // of the first operand, or if the second operand is positive (so that it is
6240  // not sign extended).
6241  if (Op32.getMachineOpcode() == PPC::ANDI_rec ||
6242      Op32.getMachineOpcode() == PPC::ANDIS_rec) {
6243    SmallPtrSet<SDNode *, 16> ToPromote1;
6244    bool Op0OK =
6245      PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1);
6246    bool Op1OK = isUInt<15>(Op32.getConstantOperandVal(1));
6247    if (!Op0OK && !Op1OK)
6248      return false;
6249
6250    ToPromote.insert(Op32.getNode());
6251
6252    if (Op0OK)
6253      ToPromote.insert(ToPromote1.begin(), ToPromote1.end());
6254
6255    return true;
6256  }
6257
6258  return false;
6259}
6260
6261void PPCDAGToDAGISel::PeepholePPC64ZExt() {
6262  if (!PPCSubTarget->isPPC64())
6263    return;
6264
6265  // When we zero-extend from i32 to i64, we use a pattern like this:
6266  // def : Pat<(i64 (zext i32:$in)),
6267  //           (RLDICL (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $in, sub_32),
6268  //                   0, 32)>;
6269  // There are several 32-bit shift/rotate instructions, however, that will
6270  // clear the higher-order bits of their output, rendering the RLDICL
6271  // unnecessary. When that happens, we remove it here, and redefine the
6272  // relevant 32-bit operation to be a 64-bit operation.
6273
6274  SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
6275
6276  bool MadeChange = false;
6277  while (Position != CurDAG->allnodes_begin()) {
6278    SDNode *N = &*--Position;
6279    // Skip dead nodes and any non-machine opcodes.
6280    if (N->use_empty() || !N->isMachineOpcode())
6281      continue;
6282
6283    if (N->getMachineOpcode() != PPC::RLDICL)
6284      continue;
6285
6286    if (N->getConstantOperandVal(1) != 0 ||
6287        N->getConstantOperandVal(2) != 32)
6288      continue;
6289
6290    SDValue ISR = N->getOperand(0);
6291    if (!ISR.isMachineOpcode() ||
6292        ISR.getMachineOpcode() != TargetOpcode::INSERT_SUBREG)
6293      continue;
6294
6295    if (!ISR.hasOneUse())
6296      continue;
6297
6298    if (ISR.getConstantOperandVal(2) != PPC::sub_32)
6299      continue;
6300
6301    SDValue IDef = ISR.getOperand(0);
6302    if (!IDef.isMachineOpcode() ||
6303        IDef.getMachineOpcode() != TargetOpcode::IMPLICIT_DEF)
6304      continue;
6305
6306    // We now know that we're looking at a canonical i32 -> i64 zext. See if we
6307    // can get rid of it.
6308
6309    SDValue Op32 = ISR->getOperand(1);
6310    if (!Op32.isMachineOpcode())
6311      continue;
6312
6313    // There are some 32-bit instructions that always clear the high-order 32
6314    // bits, there are also some instructions (like AND) that we can look
6315    // through.
6316    SmallPtrSet<SDNode *, 16> ToPromote;
6317    if (!PeepholePPC64ZExtGather(Op32, ToPromote))
6318      continue;
6319
6320    // If the ToPromote set contains nodes that have uses outside of the set
6321    // (except for the original INSERT_SUBREG), then abort the transformation.
6322    bool OutsideUse = false;
6323    for (SDNode *PN : ToPromote) {
6324      for (SDNode *UN : PN->uses()) {
6325        if (!ToPromote.count(UN) && UN != ISR.getNode()) {
6326          OutsideUse = true;
6327          break;
6328        }
6329      }
6330
6331      if (OutsideUse)
6332        break;
6333    }
6334    if (OutsideUse)
6335      continue;
6336
6337    MadeChange = true;
6338
6339    // We now know that this zero extension can be removed by promoting to
6340    // nodes in ToPromote to 64-bit operations, where for operations in the
6341    // frontier of the set, we need to insert INSERT_SUBREGs for their
6342    // operands.
6343    for (SDNode *PN : ToPromote) {
6344      unsigned NewOpcode;
6345      switch (PN->getMachineOpcode()) {
6346      default:
6347        llvm_unreachable("Don't know the 64-bit variant of this instruction");
6348      case PPC::RLWINM:    NewOpcode = PPC::RLWINM8; break;
6349      case PPC::RLWNM:     NewOpcode = PPC::RLWNM8; break;
6350      case PPC::SLW:       NewOpcode = PPC::SLW8; break;
6351      case PPC::SRW:       NewOpcode = PPC::SRW8; break;
6352      case PPC::LI:        NewOpcode = PPC::LI8; break;
6353      case PPC::LIS:       NewOpcode = PPC::LIS8; break;
6354      case PPC::LHBRX:     NewOpcode = PPC::LHBRX8; break;
6355      case PPC::LWBRX:     NewOpcode = PPC::LWBRX8; break;
6356      case PPC::CNTLZW:    NewOpcode = PPC::CNTLZW8; break;
6357      case PPC::CNTTZW:    NewOpcode = PPC::CNTTZW8; break;
6358      case PPC::RLWIMI:    NewOpcode = PPC::RLWIMI8; break;
6359      case PPC::OR:        NewOpcode = PPC::OR8; break;
6360      case PPC::SELECT_I4: NewOpcode = PPC::SELECT_I8; break;
6361      case PPC::ORI:       NewOpcode = PPC::ORI8; break;
6362      case PPC::ORIS:      NewOpcode = PPC::ORIS8; break;
6363      case PPC::AND:       NewOpcode = PPC::AND8; break;
6364      case PPC::ANDI_rec:
6365        NewOpcode = PPC::ANDI8_rec;
6366        break;
6367      case PPC::ANDIS_rec:
6368        NewOpcode = PPC::ANDIS8_rec;
6369        break;
6370      }
6371
6372      // Note: During the replacement process, the nodes will be in an
6373      // inconsistent state (some instructions will have operands with values
6374      // of the wrong type). Once done, however, everything should be right
6375      // again.
6376
6377      SmallVector<SDValue, 4> Ops;
6378      for (const SDValue &V : PN->ops()) {
6379        if (!ToPromote.count(V.getNode()) && V.getValueType() == MVT::i32 &&
6380            !isa<ConstantSDNode>(V)) {
6381          SDValue ReplOpOps[] = { ISR.getOperand(0), V, ISR.getOperand(2) };
6382          SDNode *ReplOp =
6383            CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, SDLoc(V),
6384                                   ISR.getNode()->getVTList(), ReplOpOps);
6385          Ops.push_back(SDValue(ReplOp, 0));
6386        } else {
6387          Ops.push_back(V);
6388        }
6389      }
6390
6391      // Because all to-be-promoted nodes only have users that are other
6392      // promoted nodes (or the original INSERT_SUBREG), we can safely replace
6393      // the i32 result value type with i64.
6394
6395      SmallVector<EVT, 2> NewVTs;
6396      SDVTList VTs = PN->getVTList();
6397      for (unsigned i = 0, ie = VTs.NumVTs; i != ie; ++i)
6398        if (VTs.VTs[i] == MVT::i32)
6399          NewVTs.push_back(MVT::i64);
6400        else
6401          NewVTs.push_back(VTs.VTs[i]);
6402
6403      LLVM_DEBUG(dbgs() << "PPC64 ZExt Peephole morphing:\nOld:    ");
6404      LLVM_DEBUG(PN->dump(CurDAG));
6405
6406      CurDAG->SelectNodeTo(PN, NewOpcode, CurDAG->getVTList(NewVTs), Ops);
6407
6408      LLVM_DEBUG(dbgs() << "\nNew: ");
6409      LLVM_DEBUG(PN->dump(CurDAG));
6410      LLVM_DEBUG(dbgs() << "\n");
6411    }
6412
6413    // Now we replace the original zero extend and its associated INSERT_SUBREG
6414    // with the value feeding the INSERT_SUBREG (which has now been promoted to
6415    // return an i64).
6416
6417    LLVM_DEBUG(dbgs() << "PPC64 ZExt Peephole replacing:\nOld:    ");
6418    LLVM_DEBUG(N->dump(CurDAG));
6419    LLVM_DEBUG(dbgs() << "\nNew: ");
6420    LLVM_DEBUG(Op32.getNode()->dump(CurDAG));
6421    LLVM_DEBUG(dbgs() << "\n");
6422
6423    ReplaceUses(N, Op32.getNode());
6424  }
6425
6426  if (MadeChange)
6427    CurDAG->RemoveDeadNodes();
6428}
6429
6430void PPCDAGToDAGISel::PeepholePPC64() {
6431  // These optimizations are currently supported only for 64-bit SVR4.
6432  if (PPCSubTarget->isDarwin() || !PPCSubTarget->isPPC64())
6433    return;
6434
6435  SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
6436
6437  while (Position != CurDAG->allnodes_begin()) {
6438    SDNode *N = &*--Position;
6439    // Skip dead nodes and any non-machine opcodes.
6440    if (N->use_empty() || !N->isMachineOpcode())
6441      continue;
6442
6443    unsigned FirstOp;
6444    unsigned StorageOpcode = N->getMachineOpcode();
6445    bool RequiresMod4Offset = false;
6446
6447    switch (StorageOpcode) {
6448    default: continue;
6449
6450    case PPC::LWA:
6451    case PPC::LD:
6452    case PPC::DFLOADf64:
6453    case PPC::DFLOADf32:
6454      RequiresMod4Offset = true;
6455      LLVM_FALLTHROUGH;
6456    case PPC::LBZ:
6457    case PPC::LBZ8:
6458    case PPC::LFD:
6459    case PPC::LFS:
6460    case PPC::LHA:
6461    case PPC::LHA8:
6462    case PPC::LHZ:
6463    case PPC::LHZ8:
6464    case PPC::LWZ:
6465    case PPC::LWZ8:
6466      FirstOp = 0;
6467      break;
6468
6469    case PPC::STD:
6470    case PPC::DFSTOREf64:
6471    case PPC::DFSTOREf32:
6472      RequiresMod4Offset = true;
6473      LLVM_FALLTHROUGH;
6474    case PPC::STB:
6475    case PPC::STB8:
6476    case PPC::STFD:
6477    case PPC::STFS:
6478    case PPC::STH:
6479    case PPC::STH8:
6480    case PPC::STW:
6481    case PPC::STW8:
6482      FirstOp = 1;
6483      break;
6484    }
6485
6486    // If this is a load or store with a zero offset, or within the alignment,
6487    // we may be able to fold an add-immediate into the memory operation.
6488    // The check against alignment is below, as it can't occur until we check
6489    // the arguments to N
6490    if (!isa<ConstantSDNode>(N->getOperand(FirstOp)))
6491      continue;
6492
6493    SDValue Base = N->getOperand(FirstOp + 1);
6494    if (!Base.isMachineOpcode())
6495      continue;
6496
6497    unsigned Flags = 0;
6498    bool ReplaceFlags = true;
6499
6500    // When the feeding operation is an add-immediate of some sort,
6501    // determine whether we need to add relocation information to the
6502    // target flags on the immediate operand when we fold it into the
6503    // load instruction.
6504    //
6505    // For something like ADDItocL, the relocation information is
6506    // inferred from the opcode; when we process it in the AsmPrinter,
6507    // we add the necessary relocation there.  A load, though, can receive
6508    // relocation from various flavors of ADDIxxx, so we need to carry
6509    // the relocation information in the target flags.
6510    switch (Base.getMachineOpcode()) {
6511    default: continue;
6512
6513    case PPC::ADDI8:
6514    case PPC::ADDI:
6515      // In some cases (such as TLS) the relocation information
6516      // is already in place on the operand, so copying the operand
6517      // is sufficient.
6518      ReplaceFlags = false;
6519      // For these cases, the immediate may not be divisible by 4, in
6520      // which case the fold is illegal for DS-form instructions.  (The
6521      // other cases provide aligned addresses and are always safe.)
6522      if (RequiresMod4Offset &&
6523          (!isa<ConstantSDNode>(Base.getOperand(1)) ||
6524           Base.getConstantOperandVal(1) % 4 != 0))
6525        continue;
6526      break;
6527    case PPC::ADDIdtprelL:
6528      Flags = PPCII::MO_DTPREL_LO;
6529      break;
6530    case PPC::ADDItlsldL:
6531      Flags = PPCII::MO_TLSLD_LO;
6532      break;
6533    case PPC::ADDItocL:
6534      Flags = PPCII::MO_TOC_LO;
6535      break;
6536    }
6537
6538    SDValue ImmOpnd = Base.getOperand(1);
6539
6540    // On PPC64, the TOC base pointer is guaranteed by the ABI only to have
6541    // 8-byte alignment, and so we can only use offsets less than 8 (otherwise,
6542    // we might have needed different @ha relocation values for the offset
6543    // pointers).
6544    int MaxDisplacement = 7;
6545    if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) {
6546      const GlobalValue *GV = GA->getGlobal();
6547      MaxDisplacement = std::min((int) GV->getAlignment() - 1, MaxDisplacement);
6548    }
6549
6550    bool UpdateHBase = false;
6551    SDValue HBase = Base.getOperand(0);
6552
6553    int Offset = N->getConstantOperandVal(FirstOp);
6554    if (ReplaceFlags) {
6555      if (Offset < 0 || Offset > MaxDisplacement) {
6556        // If we have a addi(toc@l)/addis(toc@ha) pair, and the addis has only
6557        // one use, then we can do this for any offset, we just need to also
6558        // update the offset (i.e. the symbol addend) on the addis also.
6559        if (Base.getMachineOpcode() != PPC::ADDItocL)
6560          continue;
6561
6562        if (!HBase.isMachineOpcode() ||
6563            HBase.getMachineOpcode() != PPC::ADDIStocHA8)
6564          continue;
6565
6566        if (!Base.hasOneUse() || !HBase.hasOneUse())
6567          continue;
6568
6569        SDValue HImmOpnd = HBase.getOperand(1);
6570        if (HImmOpnd != ImmOpnd)
6571          continue;
6572
6573        UpdateHBase = true;
6574      }
6575    } else {
6576      // If we're directly folding the addend from an addi instruction, then:
6577      //  1. In general, the offset on the memory access must be zero.
6578      //  2. If the addend is a constant, then it can be combined with a
6579      //     non-zero offset, but only if the result meets the encoding
6580      //     requirements.
6581      if (auto *C = dyn_cast<ConstantSDNode>(ImmOpnd)) {
6582        Offset += C->getSExtValue();
6583
6584        if (RequiresMod4Offset && (Offset % 4) != 0)
6585          continue;
6586
6587        if (!isInt<16>(Offset))
6588          continue;
6589
6590        ImmOpnd = CurDAG->getTargetConstant(Offset, SDLoc(ImmOpnd),
6591                                            ImmOpnd.getValueType());
6592      } else if (Offset != 0) {
6593        continue;
6594      }
6595    }
6596
6597    // We found an opportunity.  Reverse the operands from the add
6598    // immediate and substitute them into the load or store.  If
6599    // needed, update the target flags for the immediate operand to
6600    // reflect the necessary relocation information.
6601    LLVM_DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase:    ");
6602    LLVM_DEBUG(Base->dump(CurDAG));
6603    LLVM_DEBUG(dbgs() << "\nN: ");
6604    LLVM_DEBUG(N->dump(CurDAG));
6605    LLVM_DEBUG(dbgs() << "\n");
6606
6607    // If the relocation information isn't already present on the
6608    // immediate operand, add it now.
6609    if (ReplaceFlags) {
6610      if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) {
6611        SDLoc dl(GA);
6612        const GlobalValue *GV = GA->getGlobal();
6613        // We can't perform this optimization for data whose alignment
6614        // is insufficient for the instruction encoding.
6615        if (GV->getAlignment() < 4 &&
6616            (RequiresMod4Offset || (Offset % 4) != 0)) {
6617          LLVM_DEBUG(dbgs() << "Rejected this candidate for alignment.\n\n");
6618          continue;
6619        }
6620        ImmOpnd = CurDAG->getTargetGlobalAddress(GV, dl, MVT::i64, Offset, Flags);
6621      } else if (ConstantPoolSDNode *CP =
6622                 dyn_cast<ConstantPoolSDNode>(ImmOpnd)) {
6623        const Constant *C = CP->getConstVal();
6624        ImmOpnd = CurDAG->getTargetConstantPool(C, MVT::i64,
6625                                                CP->getAlignment(),
6626                                                Offset, Flags);
6627      }
6628    }
6629
6630    if (FirstOp == 1) // Store
6631      (void)CurDAG->UpdateNodeOperands(N, N->getOperand(0), ImmOpnd,
6632                                       Base.getOperand(0), N->getOperand(3));
6633    else // Load
6634      (void)CurDAG->UpdateNodeOperands(N, ImmOpnd, Base.getOperand(0),
6635                                       N->getOperand(2));
6636
6637    if (UpdateHBase)
6638      (void)CurDAG->UpdateNodeOperands(HBase.getNode(), HBase.getOperand(0),
6639                                       ImmOpnd);
6640
6641    // The add-immediate may now be dead, in which case remove it.
6642    if (Base.getNode()->use_empty())
6643      CurDAG->RemoveDeadNode(Base.getNode());
6644  }
6645}
6646
6647/// createPPCISelDag - This pass converts a legalized DAG into a
6648/// PowerPC-specific DAG, ready for instruction scheduling.
6649///
6650FunctionPass *llvm::createPPCISelDag(PPCTargetMachine &TM,
6651                                     CodeGenOpt::Level OptLevel) {
6652  return new PPCDAGToDAGISel(TM, OptLevel);
6653}
6654