1//===-- RISCVInstrInfo.cpp - RISC-V Instruction Information -----*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the RISC-V implementation of the TargetInstrInfo class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "RISCVInstrInfo.h"
14#include "MCTargetDesc/RISCVMatInt.h"
15#include "RISCV.h"
16#include "RISCVMachineFunctionInfo.h"
17#include "RISCVSubtarget.h"
18#include "RISCVTargetMachine.h"
19#include "llvm/ADT/STLExtras.h"
20#include "llvm/ADT/SmallVector.h"
21#include "llvm/Analysis/MemoryLocation.h"
22#include "llvm/Analysis/ValueTracking.h"
23#include "llvm/CodeGen/LiveIntervals.h"
24#include "llvm/CodeGen/LiveVariables.h"
25#include "llvm/CodeGen/MachineCombinerPattern.h"
26#include "llvm/CodeGen/MachineFunctionPass.h"
27#include "llvm/CodeGen/MachineInstrBuilder.h"
28#include "llvm/CodeGen/MachineRegisterInfo.h"
29#include "llvm/CodeGen/MachineTraceMetrics.h"
30#include "llvm/CodeGen/RegisterScavenging.h"
31#include "llvm/CodeGen/StackMaps.h"
32#include "llvm/IR/DebugInfoMetadata.h"
33#include "llvm/MC/MCInstBuilder.h"
34#include "llvm/MC/TargetRegistry.h"
35#include "llvm/Support/ErrorHandling.h"
36
37using namespace llvm;
38
39#define GEN_CHECK_COMPRESS_INSTR
40#include "RISCVGenCompressInstEmitter.inc"
41
42#define GET_INSTRINFO_CTOR_DTOR
43#define GET_INSTRINFO_NAMED_OPS
44#include "RISCVGenInstrInfo.inc"
45
46static cl::opt<bool> PreferWholeRegisterMove(
47    "riscv-prefer-whole-register-move", cl::init(false), cl::Hidden,
48    cl::desc("Prefer whole register move for vector registers."));
49
50static cl::opt<MachineTraceStrategy> ForceMachineCombinerStrategy(
51    "riscv-force-machine-combiner-strategy", cl::Hidden,
52    cl::desc("Force machine combiner to use a specific strategy for machine "
53             "trace metrics evaluation."),
54    cl::init(MachineTraceStrategy::TS_NumStrategies),
55    cl::values(clEnumValN(MachineTraceStrategy::TS_Local, "local",
56                          "Local strategy."),
57               clEnumValN(MachineTraceStrategy::TS_MinInstrCount, "min-instr",
58                          "MinInstrCount strategy.")));
59
60namespace llvm::RISCVVPseudosTable {
61
62using namespace RISCV;
63
64#define GET_RISCVVPseudosTable_IMPL
65#include "RISCVGenSearchableTables.inc"
66
67} // namespace llvm::RISCVVPseudosTable
68
69RISCVInstrInfo::RISCVInstrInfo(RISCVSubtarget &STI)
70    : RISCVGenInstrInfo(RISCV::ADJCALLSTACKDOWN, RISCV::ADJCALLSTACKUP),
71      STI(STI) {}
72
73MCInst RISCVInstrInfo::getNop() const {
74  if (STI.hasStdExtCOrZca())
75    return MCInstBuilder(RISCV::C_NOP);
76  return MCInstBuilder(RISCV::ADDI)
77      .addReg(RISCV::X0)
78      .addReg(RISCV::X0)
79      .addImm(0);
80}
81
82unsigned RISCVInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
83                                             int &FrameIndex) const {
84  unsigned Dummy;
85  return isLoadFromStackSlot(MI, FrameIndex, Dummy);
86}
87
88unsigned RISCVInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
89                                             int &FrameIndex,
90                                             unsigned &MemBytes) const {
91  switch (MI.getOpcode()) {
92  default:
93    return 0;
94  case RISCV::LB:
95  case RISCV::LBU:
96    MemBytes = 1;
97    break;
98  case RISCV::LH:
99  case RISCV::LHU:
100  case RISCV::FLH:
101    MemBytes = 2;
102    break;
103  case RISCV::LW:
104  case RISCV::FLW:
105  case RISCV::LWU:
106    MemBytes = 4;
107    break;
108  case RISCV::LD:
109  case RISCV::FLD:
110    MemBytes = 8;
111    break;
112  }
113
114  if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() &&
115      MI.getOperand(2).getImm() == 0) {
116    FrameIndex = MI.getOperand(1).getIndex();
117    return MI.getOperand(0).getReg();
118  }
119
120  return 0;
121}
122
123unsigned RISCVInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
124                                            int &FrameIndex) const {
125  unsigned Dummy;
126  return isStoreToStackSlot(MI, FrameIndex, Dummy);
127}
128
129unsigned RISCVInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
130                                            int &FrameIndex,
131                                            unsigned &MemBytes) const {
132  switch (MI.getOpcode()) {
133  default:
134    return 0;
135  case RISCV::SB:
136    MemBytes = 1;
137    break;
138  case RISCV::SH:
139  case RISCV::FSH:
140    MemBytes = 2;
141    break;
142  case RISCV::SW:
143  case RISCV::FSW:
144    MemBytes = 4;
145    break;
146  case RISCV::SD:
147  case RISCV::FSD:
148    MemBytes = 8;
149    break;
150  }
151
152  if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() &&
153      MI.getOperand(2).getImm() == 0) {
154    FrameIndex = MI.getOperand(1).getIndex();
155    return MI.getOperand(0).getReg();
156  }
157
158  return 0;
159}
160
161static bool forwardCopyWillClobberTuple(unsigned DstReg, unsigned SrcReg,
162                                        unsigned NumRegs) {
163  return DstReg > SrcReg && (DstReg - SrcReg) < NumRegs;
164}
165
166static bool isConvertibleToVMV_V_V(const RISCVSubtarget &STI,
167                                   const MachineBasicBlock &MBB,
168                                   MachineBasicBlock::const_iterator MBBI,
169                                   MachineBasicBlock::const_iterator &DefMBBI,
170                                   RISCVII::VLMUL LMul) {
171  if (PreferWholeRegisterMove)
172    return false;
173
174  assert(MBBI->getOpcode() == TargetOpcode::COPY &&
175         "Unexpected COPY instruction.");
176  Register SrcReg = MBBI->getOperand(1).getReg();
177  const TargetRegisterInfo *TRI = STI.getRegisterInfo();
178
179  bool FoundDef = false;
180  bool FirstVSetVLI = false;
181  unsigned FirstSEW = 0;
182  while (MBBI != MBB.begin()) {
183    --MBBI;
184    if (MBBI->isMetaInstruction())
185      continue;
186
187    if (MBBI->getOpcode() == RISCV::PseudoVSETVLI ||
188        MBBI->getOpcode() == RISCV::PseudoVSETVLIX0 ||
189        MBBI->getOpcode() == RISCV::PseudoVSETIVLI) {
190      // There is a vsetvli between COPY and source define instruction.
191      // vy = def_vop ...  (producing instruction)
192      // ...
193      // vsetvli
194      // ...
195      // vx = COPY vy
196      if (!FoundDef) {
197        if (!FirstVSetVLI) {
198          FirstVSetVLI = true;
199          unsigned FirstVType = MBBI->getOperand(2).getImm();
200          RISCVII::VLMUL FirstLMul = RISCVVType::getVLMUL(FirstVType);
201          FirstSEW = RISCVVType::getSEW(FirstVType);
202          // The first encountered vsetvli must have the same lmul as the
203          // register class of COPY.
204          if (FirstLMul != LMul)
205            return false;
206        }
207        // Only permit `vsetvli x0, x0, vtype` between COPY and the source
208        // define instruction.
209        if (MBBI->getOperand(0).getReg() != RISCV::X0)
210          return false;
211        if (MBBI->getOperand(1).isImm())
212          return false;
213        if (MBBI->getOperand(1).getReg() != RISCV::X0)
214          return false;
215        continue;
216      }
217
218      // MBBI is the first vsetvli before the producing instruction.
219      unsigned VType = MBBI->getOperand(2).getImm();
220      // If there is a vsetvli between COPY and the producing instruction.
221      if (FirstVSetVLI) {
222        // If SEW is different, return false.
223        if (RISCVVType::getSEW(VType) != FirstSEW)
224          return false;
225      }
226
227      // If the vsetvli is tail undisturbed, keep the whole register move.
228      if (!RISCVVType::isTailAgnostic(VType))
229        return false;
230
231      // The checking is conservative. We only have register classes for
232      // LMUL = 1/2/4/8. We should be able to convert vmv1r.v to vmv.v.v
233      // for fractional LMUL operations. However, we could not use the vsetvli
234      // lmul for widening operations. The result of widening operation is
235      // 2 x LMUL.
236      return LMul == RISCVVType::getVLMUL(VType);
237    } else if (MBBI->isInlineAsm() || MBBI->isCall()) {
238      return false;
239    } else if (MBBI->getNumDefs()) {
240      // Check all the instructions which will change VL.
241      // For example, vleff has implicit def VL.
242      if (MBBI->modifiesRegister(RISCV::VL))
243        return false;
244
245      // Only converting whole register copies to vmv.v.v when the defining
246      // value appears in the explicit operands.
247      for (const MachineOperand &MO : MBBI->explicit_operands()) {
248        if (!MO.isReg() || !MO.isDef())
249          continue;
250        if (!FoundDef && TRI->regsOverlap(MO.getReg(), SrcReg)) {
251          // We only permit the source of COPY has the same LMUL as the defined
252          // operand.
253          // There are cases we need to keep the whole register copy if the LMUL
254          // is different.
255          // For example,
256          // $x0 = PseudoVSETIVLI 4, 73   // vsetivli zero, 4, e16,m2,ta,m
257          // $v28m4 = PseudoVWADD_VV_M2 $v26m2, $v8m2
258          // # The COPY may be created by vlmul_trunc intrinsic.
259          // $v26m2 = COPY renamable $v28m2, implicit killed $v28m4
260          //
261          // After widening, the valid value will be 4 x e32 elements. If we
262          // convert the COPY to vmv.v.v, it will only copy 4 x e16 elements.
263          // FIXME: The COPY of subregister of Zvlsseg register will not be able
264          // to convert to vmv.v.[v|i] under the constraint.
265          if (MO.getReg() != SrcReg)
266            return false;
267
268          // In widening reduction instructions with LMUL_1 input vector case,
269          // only checking the LMUL is insufficient due to reduction result is
270          // always LMUL_1.
271          // For example,
272          // $x11 = PseudoVSETIVLI 1, 64 // vsetivli a1, 1, e8, m1, ta, mu
273          // $v8m1 = PseudoVWREDSUM_VS_M1 $v26, $v27
274          // $v26 = COPY killed renamable $v8
275          // After widening, The valid value will be 1 x e16 elements. If we
276          // convert the COPY to vmv.v.v, it will only copy 1 x e8 elements.
277          uint64_t TSFlags = MBBI->getDesc().TSFlags;
278          if (RISCVII::isRVVWideningReduction(TSFlags))
279            return false;
280
281          // If the producing instruction does not depend on vsetvli, do not
282          // convert COPY to vmv.v.v. For example, VL1R_V or PseudoVRELOAD.
283          if (!RISCVII::hasSEWOp(TSFlags) || !RISCVII::hasVLOp(TSFlags))
284            return false;
285
286          // Found the definition.
287          FoundDef = true;
288          DefMBBI = MBBI;
289          break;
290        }
291      }
292    }
293  }
294
295  return false;
296}
297
298void RISCVInstrInfo::copyPhysRegVector(MachineBasicBlock &MBB,
299                                       MachineBasicBlock::iterator MBBI,
300                                       const DebugLoc &DL, MCRegister DstReg,
301                                       MCRegister SrcReg, bool KillSrc,
302                                       unsigned Opc, unsigned NF) const {
303  const TargetRegisterInfo *TRI = STI.getRegisterInfo();
304
305  RISCVII::VLMUL LMul;
306  unsigned SubRegIdx;
307  unsigned VVOpc, VIOpc;
308  switch (Opc) {
309  default:
310    llvm_unreachable("Impossible LMUL for vector register copy.");
311  case RISCV::VMV1R_V:
312    LMul = RISCVII::LMUL_1;
313    SubRegIdx = RISCV::sub_vrm1_0;
314    VVOpc = RISCV::PseudoVMV_V_V_M1;
315    VIOpc = RISCV::PseudoVMV_V_I_M1;
316    break;
317  case RISCV::VMV2R_V:
318    LMul = RISCVII::LMUL_2;
319    SubRegIdx = RISCV::sub_vrm2_0;
320    VVOpc = RISCV::PseudoVMV_V_V_M2;
321    VIOpc = RISCV::PseudoVMV_V_I_M2;
322    break;
323  case RISCV::VMV4R_V:
324    LMul = RISCVII::LMUL_4;
325    SubRegIdx = RISCV::sub_vrm4_0;
326    VVOpc = RISCV::PseudoVMV_V_V_M4;
327    VIOpc = RISCV::PseudoVMV_V_I_M4;
328    break;
329  case RISCV::VMV8R_V:
330    assert(NF == 1);
331    LMul = RISCVII::LMUL_8;
332    SubRegIdx = RISCV::sub_vrm1_0; // There is no sub_vrm8_0.
333    VVOpc = RISCV::PseudoVMV_V_V_M8;
334    VIOpc = RISCV::PseudoVMV_V_I_M8;
335    break;
336  }
337
338  bool UseVMV_V_V = false;
339  bool UseVMV_V_I = false;
340  MachineBasicBlock::const_iterator DefMBBI;
341  if (isConvertibleToVMV_V_V(STI, MBB, MBBI, DefMBBI, LMul)) {
342    UseVMV_V_V = true;
343    Opc = VVOpc;
344
345    if (DefMBBI->getOpcode() == VIOpc) {
346      UseVMV_V_I = true;
347      Opc = VIOpc;
348    }
349  }
350
351  if (NF == 1) {
352    auto MIB = BuildMI(MBB, MBBI, DL, get(Opc), DstReg);
353    if (UseVMV_V_V)
354      MIB.addReg(DstReg, RegState::Undef);
355    if (UseVMV_V_I)
356      MIB = MIB.add(DefMBBI->getOperand(2));
357    else
358      MIB = MIB.addReg(SrcReg, getKillRegState(KillSrc));
359    if (UseVMV_V_V) {
360      const MCInstrDesc &Desc = DefMBBI->getDesc();
361      MIB.add(DefMBBI->getOperand(RISCVII::getVLOpNum(Desc)));  // AVL
362      MIB.add(DefMBBI->getOperand(RISCVII::getSEWOpNum(Desc))); // SEW
363      MIB.addImm(0);                                            // tu, mu
364      MIB.addReg(RISCV::VL, RegState::Implicit);
365      MIB.addReg(RISCV::VTYPE, RegState::Implicit);
366    }
367    return;
368  }
369
370  int I = 0, End = NF, Incr = 1;
371  unsigned SrcEncoding = TRI->getEncodingValue(SrcReg);
372  unsigned DstEncoding = TRI->getEncodingValue(DstReg);
373  unsigned LMulVal;
374  bool Fractional;
375  std::tie(LMulVal, Fractional) = RISCVVType::decodeVLMUL(LMul);
376  assert(!Fractional && "It is impossible be fractional lmul here.");
377  if (forwardCopyWillClobberTuple(DstEncoding, SrcEncoding, NF * LMulVal)) {
378    I = NF - 1;
379    End = -1;
380    Incr = -1;
381  }
382
383  for (; I != End; I += Incr) {
384    auto MIB =
385        BuildMI(MBB, MBBI, DL, get(Opc), TRI->getSubReg(DstReg, SubRegIdx + I));
386    if (UseVMV_V_V)
387      MIB.addReg(TRI->getSubReg(DstReg, SubRegIdx + I), RegState::Undef);
388    if (UseVMV_V_I)
389      MIB = MIB.add(DefMBBI->getOperand(2));
390    else
391      MIB = MIB.addReg(TRI->getSubReg(SrcReg, SubRegIdx + I),
392                       getKillRegState(KillSrc));
393    if (UseVMV_V_V) {
394      const MCInstrDesc &Desc = DefMBBI->getDesc();
395      MIB.add(DefMBBI->getOperand(RISCVII::getVLOpNum(Desc)));  // AVL
396      MIB.add(DefMBBI->getOperand(RISCVII::getSEWOpNum(Desc))); // SEW
397      MIB.addImm(0);                                            // tu, mu
398      MIB.addReg(RISCV::VL, RegState::Implicit);
399      MIB.addReg(RISCV::VTYPE, RegState::Implicit);
400    }
401  }
402}
403
404void RISCVInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
405                                 MachineBasicBlock::iterator MBBI,
406                                 const DebugLoc &DL, MCRegister DstReg,
407                                 MCRegister SrcReg, bool KillSrc) const {
408  const TargetRegisterInfo *TRI = STI.getRegisterInfo();
409
410  if (RISCV::GPRRegClass.contains(DstReg, SrcReg)) {
411    BuildMI(MBB, MBBI, DL, get(RISCV::ADDI), DstReg)
412        .addReg(SrcReg, getKillRegState(KillSrc))
413        .addImm(0);
414    return;
415  }
416
417  if (RISCV::GPRPairRegClass.contains(DstReg, SrcReg)) {
418    // Emit an ADDI for both parts of GPRPair.
419    BuildMI(MBB, MBBI, DL, get(RISCV::ADDI),
420            TRI->getSubReg(DstReg, RISCV::sub_gpr_even))
421        .addReg(TRI->getSubReg(SrcReg, RISCV::sub_gpr_even),
422                getKillRegState(KillSrc))
423        .addImm(0);
424    BuildMI(MBB, MBBI, DL, get(RISCV::ADDI),
425            TRI->getSubReg(DstReg, RISCV::sub_gpr_odd))
426        .addReg(TRI->getSubReg(SrcReg, RISCV::sub_gpr_odd),
427                getKillRegState(KillSrc))
428        .addImm(0);
429    return;
430  }
431
432  // Handle copy from csr
433  if (RISCV::VCSRRegClass.contains(SrcReg) &&
434      RISCV::GPRRegClass.contains(DstReg)) {
435    BuildMI(MBB, MBBI, DL, get(RISCV::CSRRS), DstReg)
436        .addImm(RISCVSysReg::lookupSysRegByName(TRI->getName(SrcReg))->Encoding)
437        .addReg(RISCV::X0);
438    return;
439  }
440
441  if (RISCV::FPR16RegClass.contains(DstReg, SrcReg)) {
442    unsigned Opc;
443    if (STI.hasStdExtZfh()) {
444      Opc = RISCV::FSGNJ_H;
445    } else {
446      assert(STI.hasStdExtF() &&
447             (STI.hasStdExtZfhmin() || STI.hasStdExtZfbfmin()) &&
448             "Unexpected extensions");
449      // Zfhmin/Zfbfmin doesn't have FSGNJ_H, replace FSGNJ_H with FSGNJ_S.
450      DstReg = TRI->getMatchingSuperReg(DstReg, RISCV::sub_16,
451                                        &RISCV::FPR32RegClass);
452      SrcReg = TRI->getMatchingSuperReg(SrcReg, RISCV::sub_16,
453                                        &RISCV::FPR32RegClass);
454      Opc = RISCV::FSGNJ_S;
455    }
456    BuildMI(MBB, MBBI, DL, get(Opc), DstReg)
457        .addReg(SrcReg, getKillRegState(KillSrc))
458        .addReg(SrcReg, getKillRegState(KillSrc));
459    return;
460  }
461
462  if (RISCV::FPR32RegClass.contains(DstReg, SrcReg)) {
463    BuildMI(MBB, MBBI, DL, get(RISCV::FSGNJ_S), DstReg)
464        .addReg(SrcReg, getKillRegState(KillSrc))
465        .addReg(SrcReg, getKillRegState(KillSrc));
466    return;
467  }
468
469  if (RISCV::FPR64RegClass.contains(DstReg, SrcReg)) {
470    BuildMI(MBB, MBBI, DL, get(RISCV::FSGNJ_D), DstReg)
471        .addReg(SrcReg, getKillRegState(KillSrc))
472        .addReg(SrcReg, getKillRegState(KillSrc));
473    return;
474  }
475
476  if (RISCV::FPR32RegClass.contains(DstReg) &&
477      RISCV::GPRRegClass.contains(SrcReg)) {
478    BuildMI(MBB, MBBI, DL, get(RISCV::FMV_W_X), DstReg)
479        .addReg(SrcReg, getKillRegState(KillSrc));
480    return;
481  }
482
483  if (RISCV::GPRRegClass.contains(DstReg) &&
484      RISCV::FPR32RegClass.contains(SrcReg)) {
485    BuildMI(MBB, MBBI, DL, get(RISCV::FMV_X_W), DstReg)
486        .addReg(SrcReg, getKillRegState(KillSrc));
487    return;
488  }
489
490  if (RISCV::FPR64RegClass.contains(DstReg) &&
491      RISCV::GPRRegClass.contains(SrcReg)) {
492    assert(STI.getXLen() == 64 && "Unexpected GPR size");
493    BuildMI(MBB, MBBI, DL, get(RISCV::FMV_D_X), DstReg)
494        .addReg(SrcReg, getKillRegState(KillSrc));
495    return;
496  }
497
498  if (RISCV::GPRRegClass.contains(DstReg) &&
499      RISCV::FPR64RegClass.contains(SrcReg)) {
500    assert(STI.getXLen() == 64 && "Unexpected GPR size");
501    BuildMI(MBB, MBBI, DL, get(RISCV::FMV_X_D), DstReg)
502        .addReg(SrcReg, getKillRegState(KillSrc));
503    return;
504  }
505
506  // VR->VR copies.
507  if (RISCV::VRRegClass.contains(DstReg, SrcReg)) {
508    copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV1R_V);
509    return;
510  }
511
512  if (RISCV::VRM2RegClass.contains(DstReg, SrcReg)) {
513    copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV2R_V);
514    return;
515  }
516
517  if (RISCV::VRM4RegClass.contains(DstReg, SrcReg)) {
518    copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV4R_V);
519    return;
520  }
521
522  if (RISCV::VRM8RegClass.contains(DstReg, SrcReg)) {
523    copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV8R_V);
524    return;
525  }
526
527  if (RISCV::VRN2M1RegClass.contains(DstReg, SrcReg)) {
528    copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV1R_V,
529                      /*NF=*/2);
530    return;
531  }
532
533  if (RISCV::VRN2M2RegClass.contains(DstReg, SrcReg)) {
534    copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV2R_V,
535                      /*NF=*/2);
536    return;
537  }
538
539  if (RISCV::VRN2M4RegClass.contains(DstReg, SrcReg)) {
540    copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV4R_V,
541                      /*NF=*/2);
542    return;
543  }
544
545  if (RISCV::VRN3M1RegClass.contains(DstReg, SrcReg)) {
546    copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV1R_V,
547                      /*NF=*/3);
548    return;
549  }
550
551  if (RISCV::VRN3M2RegClass.contains(DstReg, SrcReg)) {
552    copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV2R_V,
553                      /*NF=*/3);
554    return;
555  }
556
557  if (RISCV::VRN4M1RegClass.contains(DstReg, SrcReg)) {
558    copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV1R_V,
559                      /*NF=*/4);
560    return;
561  }
562
563  if (RISCV::VRN4M2RegClass.contains(DstReg, SrcReg)) {
564    copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV2R_V,
565                      /*NF=*/4);
566    return;
567  }
568
569  if (RISCV::VRN5M1RegClass.contains(DstReg, SrcReg)) {
570    copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV1R_V,
571                      /*NF=*/5);
572    return;
573  }
574
575  if (RISCV::VRN6M1RegClass.contains(DstReg, SrcReg)) {
576    copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV1R_V,
577                      /*NF=*/6);
578    return;
579  }
580
581  if (RISCV::VRN7M1RegClass.contains(DstReg, SrcReg)) {
582    copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV1R_V,
583                      /*NF=*/7);
584    return;
585  }
586
587  if (RISCV::VRN8M1RegClass.contains(DstReg, SrcReg)) {
588    copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV1R_V,
589                      /*NF=*/8);
590    return;
591  }
592
593  llvm_unreachable("Impossible reg-to-reg copy");
594}
595
596void RISCVInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
597                                         MachineBasicBlock::iterator I,
598                                         Register SrcReg, bool IsKill, int FI,
599                                         const TargetRegisterClass *RC,
600                                         const TargetRegisterInfo *TRI,
601                                         Register VReg) const {
602  MachineFunction *MF = MBB.getParent();
603  MachineFrameInfo &MFI = MF->getFrameInfo();
604
605  unsigned Opcode;
606  bool IsScalableVector = true;
607  if (RISCV::GPRRegClass.hasSubClassEq(RC)) {
608    Opcode = TRI->getRegSizeInBits(RISCV::GPRRegClass) == 32 ?
609             RISCV::SW : RISCV::SD;
610    IsScalableVector = false;
611  } else if (RISCV::GPRPairRegClass.hasSubClassEq(RC)) {
612    Opcode = RISCV::PseudoRV32ZdinxSD;
613    IsScalableVector = false;
614  } else if (RISCV::FPR16RegClass.hasSubClassEq(RC)) {
615    Opcode = RISCV::FSH;
616    IsScalableVector = false;
617  } else if (RISCV::FPR32RegClass.hasSubClassEq(RC)) {
618    Opcode = RISCV::FSW;
619    IsScalableVector = false;
620  } else if (RISCV::FPR64RegClass.hasSubClassEq(RC)) {
621    Opcode = RISCV::FSD;
622    IsScalableVector = false;
623  } else if (RISCV::VRRegClass.hasSubClassEq(RC)) {
624    Opcode = RISCV::VS1R_V;
625  } else if (RISCV::VRM2RegClass.hasSubClassEq(RC)) {
626    Opcode = RISCV::VS2R_V;
627  } else if (RISCV::VRM4RegClass.hasSubClassEq(RC)) {
628    Opcode = RISCV::VS4R_V;
629  } else if (RISCV::VRM8RegClass.hasSubClassEq(RC)) {
630    Opcode = RISCV::VS8R_V;
631  } else if (RISCV::VRN2M1RegClass.hasSubClassEq(RC))
632    Opcode = RISCV::PseudoVSPILL2_M1;
633  else if (RISCV::VRN2M2RegClass.hasSubClassEq(RC))
634    Opcode = RISCV::PseudoVSPILL2_M2;
635  else if (RISCV::VRN2M4RegClass.hasSubClassEq(RC))
636    Opcode = RISCV::PseudoVSPILL2_M4;
637  else if (RISCV::VRN3M1RegClass.hasSubClassEq(RC))
638    Opcode = RISCV::PseudoVSPILL3_M1;
639  else if (RISCV::VRN3M2RegClass.hasSubClassEq(RC))
640    Opcode = RISCV::PseudoVSPILL3_M2;
641  else if (RISCV::VRN4M1RegClass.hasSubClassEq(RC))
642    Opcode = RISCV::PseudoVSPILL4_M1;
643  else if (RISCV::VRN4M2RegClass.hasSubClassEq(RC))
644    Opcode = RISCV::PseudoVSPILL4_M2;
645  else if (RISCV::VRN5M1RegClass.hasSubClassEq(RC))
646    Opcode = RISCV::PseudoVSPILL5_M1;
647  else if (RISCV::VRN6M1RegClass.hasSubClassEq(RC))
648    Opcode = RISCV::PseudoVSPILL6_M1;
649  else if (RISCV::VRN7M1RegClass.hasSubClassEq(RC))
650    Opcode = RISCV::PseudoVSPILL7_M1;
651  else if (RISCV::VRN8M1RegClass.hasSubClassEq(RC))
652    Opcode = RISCV::PseudoVSPILL8_M1;
653  else
654    llvm_unreachable("Can't store this register to stack slot");
655
656  if (IsScalableVector) {
657    MachineMemOperand *MMO = MF->getMachineMemOperand(
658        MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOStore,
659        MemoryLocation::UnknownSize, MFI.getObjectAlign(FI));
660
661    MFI.setStackID(FI, TargetStackID::ScalableVector);
662    BuildMI(MBB, I, DebugLoc(), get(Opcode))
663        .addReg(SrcReg, getKillRegState(IsKill))
664        .addFrameIndex(FI)
665        .addMemOperand(MMO);
666  } else {
667    MachineMemOperand *MMO = MF->getMachineMemOperand(
668        MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOStore,
669        MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
670
671    BuildMI(MBB, I, DebugLoc(), get(Opcode))
672        .addReg(SrcReg, getKillRegState(IsKill))
673        .addFrameIndex(FI)
674        .addImm(0)
675        .addMemOperand(MMO);
676  }
677}
678
679void RISCVInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
680                                          MachineBasicBlock::iterator I,
681                                          Register DstReg, int FI,
682                                          const TargetRegisterClass *RC,
683                                          const TargetRegisterInfo *TRI,
684                                          Register VReg) const {
685  MachineFunction *MF = MBB.getParent();
686  MachineFrameInfo &MFI = MF->getFrameInfo();
687
688  unsigned Opcode;
689  bool IsScalableVector = true;
690  if (RISCV::GPRRegClass.hasSubClassEq(RC)) {
691    Opcode = TRI->getRegSizeInBits(RISCV::GPRRegClass) == 32 ?
692             RISCV::LW : RISCV::LD;
693    IsScalableVector = false;
694  } else if (RISCV::GPRPairRegClass.hasSubClassEq(RC)) {
695    Opcode = RISCV::PseudoRV32ZdinxLD;
696    IsScalableVector = false;
697  } else if (RISCV::FPR16RegClass.hasSubClassEq(RC)) {
698    Opcode = RISCV::FLH;
699    IsScalableVector = false;
700  } else if (RISCV::FPR32RegClass.hasSubClassEq(RC)) {
701    Opcode = RISCV::FLW;
702    IsScalableVector = false;
703  } else if (RISCV::FPR64RegClass.hasSubClassEq(RC)) {
704    Opcode = RISCV::FLD;
705    IsScalableVector = false;
706  } else if (RISCV::VRRegClass.hasSubClassEq(RC)) {
707    Opcode = RISCV::VL1RE8_V;
708  } else if (RISCV::VRM2RegClass.hasSubClassEq(RC)) {
709    Opcode = RISCV::VL2RE8_V;
710  } else if (RISCV::VRM4RegClass.hasSubClassEq(RC)) {
711    Opcode = RISCV::VL4RE8_V;
712  } else if (RISCV::VRM8RegClass.hasSubClassEq(RC)) {
713    Opcode = RISCV::VL8RE8_V;
714  } else if (RISCV::VRN2M1RegClass.hasSubClassEq(RC))
715    Opcode = RISCV::PseudoVRELOAD2_M1;
716  else if (RISCV::VRN2M2RegClass.hasSubClassEq(RC))
717    Opcode = RISCV::PseudoVRELOAD2_M2;
718  else if (RISCV::VRN2M4RegClass.hasSubClassEq(RC))
719    Opcode = RISCV::PseudoVRELOAD2_M4;
720  else if (RISCV::VRN3M1RegClass.hasSubClassEq(RC))
721    Opcode = RISCV::PseudoVRELOAD3_M1;
722  else if (RISCV::VRN3M2RegClass.hasSubClassEq(RC))
723    Opcode = RISCV::PseudoVRELOAD3_M2;
724  else if (RISCV::VRN4M1RegClass.hasSubClassEq(RC))
725    Opcode = RISCV::PseudoVRELOAD4_M1;
726  else if (RISCV::VRN4M2RegClass.hasSubClassEq(RC))
727    Opcode = RISCV::PseudoVRELOAD4_M2;
728  else if (RISCV::VRN5M1RegClass.hasSubClassEq(RC))
729    Opcode = RISCV::PseudoVRELOAD5_M1;
730  else if (RISCV::VRN6M1RegClass.hasSubClassEq(RC))
731    Opcode = RISCV::PseudoVRELOAD6_M1;
732  else if (RISCV::VRN7M1RegClass.hasSubClassEq(RC))
733    Opcode = RISCV::PseudoVRELOAD7_M1;
734  else if (RISCV::VRN8M1RegClass.hasSubClassEq(RC))
735    Opcode = RISCV::PseudoVRELOAD8_M1;
736  else
737    llvm_unreachable("Can't load this register from stack slot");
738
739  if (IsScalableVector) {
740    MachineMemOperand *MMO = MF->getMachineMemOperand(
741        MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOLoad,
742        MemoryLocation::UnknownSize, MFI.getObjectAlign(FI));
743
744    MFI.setStackID(FI, TargetStackID::ScalableVector);
745    BuildMI(MBB, I, DebugLoc(), get(Opcode), DstReg)
746        .addFrameIndex(FI)
747        .addMemOperand(MMO);
748  } else {
749    MachineMemOperand *MMO = MF->getMachineMemOperand(
750        MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOLoad,
751        MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
752
753    BuildMI(MBB, I, DebugLoc(), get(Opcode), DstReg)
754        .addFrameIndex(FI)
755        .addImm(0)
756        .addMemOperand(MMO);
757  }
758}
759
760MachineInstr *RISCVInstrInfo::foldMemoryOperandImpl(
761    MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops,
762    MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS,
763    VirtRegMap *VRM) const {
764  const MachineFrameInfo &MFI = MF.getFrameInfo();
765
766  // The below optimizations narrow the load so they are only valid for little
767  // endian.
768  // TODO: Support big endian by adding an offset into the frame object?
769  if (MF.getDataLayout().isBigEndian())
770    return nullptr;
771
772  // Fold load from stack followed by sext.b/sext.h/sext.w/zext.b/zext.h/zext.w.
773  if (Ops.size() != 1 || Ops[0] != 1)
774   return nullptr;
775
776  unsigned LoadOpc;
777  switch (MI.getOpcode()) {
778  default:
779    if (RISCV::isSEXT_W(MI)) {
780      LoadOpc = RISCV::LW;
781      break;
782    }
783    if (RISCV::isZEXT_W(MI)) {
784      LoadOpc = RISCV::LWU;
785      break;
786    }
787    if (RISCV::isZEXT_B(MI)) {
788      LoadOpc = RISCV::LBU;
789      break;
790    }
791    return nullptr;
792  case RISCV::SEXT_H:
793    LoadOpc = RISCV::LH;
794    break;
795  case RISCV::SEXT_B:
796    LoadOpc = RISCV::LB;
797    break;
798  case RISCV::ZEXT_H_RV32:
799  case RISCV::ZEXT_H_RV64:
800    LoadOpc = RISCV::LHU;
801    break;
802  }
803
804  MachineMemOperand *MMO = MF.getMachineMemOperand(
805      MachinePointerInfo::getFixedStack(MF, FrameIndex),
806      MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIndex),
807      MFI.getObjectAlign(FrameIndex));
808
809  Register DstReg = MI.getOperand(0).getReg();
810  return BuildMI(*MI.getParent(), InsertPt, MI.getDebugLoc(), get(LoadOpc),
811                 DstReg)
812      .addFrameIndex(FrameIndex)
813      .addImm(0)
814      .addMemOperand(MMO);
815}
816
817void RISCVInstrInfo::movImm(MachineBasicBlock &MBB,
818                            MachineBasicBlock::iterator MBBI,
819                            const DebugLoc &DL, Register DstReg, uint64_t Val,
820                            MachineInstr::MIFlag Flag, bool DstRenamable,
821                            bool DstIsDead) const {
822  Register SrcReg = RISCV::X0;
823
824  if (!STI.is64Bit() && !isInt<32>(Val))
825    report_fatal_error("Should only materialize 32-bit constants for RV32");
826
827  RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Val, STI);
828  assert(!Seq.empty());
829
830  bool SrcRenamable = false;
831  unsigned Num = 0;
832
833  for (const RISCVMatInt::Inst &Inst : Seq) {
834    bool LastItem = ++Num == Seq.size();
835    unsigned DstRegState = getDeadRegState(DstIsDead && LastItem) |
836                           getRenamableRegState(DstRenamable);
837    unsigned SrcRegState = getKillRegState(SrcReg != RISCV::X0) |
838                           getRenamableRegState(SrcRenamable);
839    switch (Inst.getOpndKind()) {
840    case RISCVMatInt::Imm:
841      BuildMI(MBB, MBBI, DL, get(Inst.getOpcode()))
842          .addReg(DstReg, RegState::Define | DstRegState)
843          .addImm(Inst.getImm())
844          .setMIFlag(Flag);
845      break;
846    case RISCVMatInt::RegX0:
847      BuildMI(MBB, MBBI, DL, get(Inst.getOpcode()))
848          .addReg(DstReg, RegState::Define | DstRegState)
849          .addReg(SrcReg, SrcRegState)
850          .addReg(RISCV::X0)
851          .setMIFlag(Flag);
852      break;
853    case RISCVMatInt::RegReg:
854      BuildMI(MBB, MBBI, DL, get(Inst.getOpcode()))
855          .addReg(DstReg, RegState::Define | DstRegState)
856          .addReg(SrcReg, SrcRegState)
857          .addReg(SrcReg, SrcRegState)
858          .setMIFlag(Flag);
859      break;
860    case RISCVMatInt::RegImm:
861      BuildMI(MBB, MBBI, DL, get(Inst.getOpcode()))
862          .addReg(DstReg, RegState::Define | DstRegState)
863          .addReg(SrcReg, SrcRegState)
864          .addImm(Inst.getImm())
865          .setMIFlag(Flag);
866      break;
867    }
868
869    // Only the first instruction has X0 as its source.
870    SrcReg = DstReg;
871    SrcRenamable = DstRenamable;
872  }
873}
874
875static RISCVCC::CondCode getCondFromBranchOpc(unsigned Opc) {
876  switch (Opc) {
877  default:
878    return RISCVCC::COND_INVALID;
879  case RISCV::BEQ:
880    return RISCVCC::COND_EQ;
881  case RISCV::BNE:
882    return RISCVCC::COND_NE;
883  case RISCV::BLT:
884    return RISCVCC::COND_LT;
885  case RISCV::BGE:
886    return RISCVCC::COND_GE;
887  case RISCV::BLTU:
888    return RISCVCC::COND_LTU;
889  case RISCV::BGEU:
890    return RISCVCC::COND_GEU;
891  }
892}
893
894// The contents of values added to Cond are not examined outside of
895// RISCVInstrInfo, giving us flexibility in what to push to it. For RISCV, we
896// push BranchOpcode, Reg1, Reg2.
897static void parseCondBranch(MachineInstr &LastInst, MachineBasicBlock *&Target,
898                            SmallVectorImpl<MachineOperand> &Cond) {
899  // Block ends with fall-through condbranch.
900  assert(LastInst.getDesc().isConditionalBranch() &&
901         "Unknown conditional branch");
902  Target = LastInst.getOperand(2).getMBB();
903  unsigned CC = getCondFromBranchOpc(LastInst.getOpcode());
904  Cond.push_back(MachineOperand::CreateImm(CC));
905  Cond.push_back(LastInst.getOperand(0));
906  Cond.push_back(LastInst.getOperand(1));
907}
908
909unsigned RISCVCC::getBrCond(RISCVCC::CondCode CC) {
910  switch (CC) {
911  default:
912    llvm_unreachable("Unknown condition code!");
913  case RISCVCC::COND_EQ:
914    return RISCV::BEQ;
915  case RISCVCC::COND_NE:
916    return RISCV::BNE;
917  case RISCVCC::COND_LT:
918    return RISCV::BLT;
919  case RISCVCC::COND_GE:
920    return RISCV::BGE;
921  case RISCVCC::COND_LTU:
922    return RISCV::BLTU;
923  case RISCVCC::COND_GEU:
924    return RISCV::BGEU;
925  }
926}
927
928const MCInstrDesc &RISCVInstrInfo::getBrCond(RISCVCC::CondCode CC) const {
929  return get(RISCVCC::getBrCond(CC));
930}
931
932RISCVCC::CondCode RISCVCC::getOppositeBranchCondition(RISCVCC::CondCode CC) {
933  switch (CC) {
934  default:
935    llvm_unreachable("Unrecognized conditional branch");
936  case RISCVCC::COND_EQ:
937    return RISCVCC::COND_NE;
938  case RISCVCC::COND_NE:
939    return RISCVCC::COND_EQ;
940  case RISCVCC::COND_LT:
941    return RISCVCC::COND_GE;
942  case RISCVCC::COND_GE:
943    return RISCVCC::COND_LT;
944  case RISCVCC::COND_LTU:
945    return RISCVCC::COND_GEU;
946  case RISCVCC::COND_GEU:
947    return RISCVCC::COND_LTU;
948  }
949}
950
951bool RISCVInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
952                                   MachineBasicBlock *&TBB,
953                                   MachineBasicBlock *&FBB,
954                                   SmallVectorImpl<MachineOperand> &Cond,
955                                   bool AllowModify) const {
956  TBB = FBB = nullptr;
957  Cond.clear();
958
959  // If the block has no terminators, it just falls into the block after it.
960  MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
961  if (I == MBB.end() || !isUnpredicatedTerminator(*I))
962    return false;
963
964  // Count the number of terminators and find the first unconditional or
965  // indirect branch.
966  MachineBasicBlock::iterator FirstUncondOrIndirectBr = MBB.end();
967  int NumTerminators = 0;
968  for (auto J = I.getReverse(); J != MBB.rend() && isUnpredicatedTerminator(*J);
969       J++) {
970    NumTerminators++;
971    if (J->getDesc().isUnconditionalBranch() ||
972        J->getDesc().isIndirectBranch()) {
973      FirstUncondOrIndirectBr = J.getReverse();
974    }
975  }
976
977  // If AllowModify is true, we can erase any terminators after
978  // FirstUncondOrIndirectBR.
979  if (AllowModify && FirstUncondOrIndirectBr != MBB.end()) {
980    while (std::next(FirstUncondOrIndirectBr) != MBB.end()) {
981      std::next(FirstUncondOrIndirectBr)->eraseFromParent();
982      NumTerminators--;
983    }
984    I = FirstUncondOrIndirectBr;
985  }
986
987  // We can't handle blocks that end in an indirect branch.
988  if (I->getDesc().isIndirectBranch())
989    return true;
990
991  // We can't handle Generic branch opcodes from Global ISel.
992  if (I->isPreISelOpcode())
993    return true;
994
995  // We can't handle blocks with more than 2 terminators.
996  if (NumTerminators > 2)
997    return true;
998
999  // Handle a single unconditional branch.
1000  if (NumTerminators == 1 && I->getDesc().isUnconditionalBranch()) {
1001    TBB = getBranchDestBlock(*I);
1002    return false;
1003  }
1004
1005  // Handle a single conditional branch.
1006  if (NumTerminators == 1 && I->getDesc().isConditionalBranch()) {
1007    parseCondBranch(*I, TBB, Cond);
1008    return false;
1009  }
1010
1011  // Handle a conditional branch followed by an unconditional branch.
1012  if (NumTerminators == 2 && std::prev(I)->getDesc().isConditionalBranch() &&
1013      I->getDesc().isUnconditionalBranch()) {
1014    parseCondBranch(*std::prev(I), TBB, Cond);
1015    FBB = getBranchDestBlock(*I);
1016    return false;
1017  }
1018
1019  // Otherwise, we can't handle this.
1020  return true;
1021}
1022
1023unsigned RISCVInstrInfo::removeBranch(MachineBasicBlock &MBB,
1024                                      int *BytesRemoved) const {
1025  if (BytesRemoved)
1026    *BytesRemoved = 0;
1027  MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
1028  if (I == MBB.end())
1029    return 0;
1030
1031  if (!I->getDesc().isUnconditionalBranch() &&
1032      !I->getDesc().isConditionalBranch())
1033    return 0;
1034
1035  // Remove the branch.
1036  if (BytesRemoved)
1037    *BytesRemoved += getInstSizeInBytes(*I);
1038  I->eraseFromParent();
1039
1040  I = MBB.end();
1041
1042  if (I == MBB.begin())
1043    return 1;
1044  --I;
1045  if (!I->getDesc().isConditionalBranch())
1046    return 1;
1047
1048  // Remove the branch.
1049  if (BytesRemoved)
1050    *BytesRemoved += getInstSizeInBytes(*I);
1051  I->eraseFromParent();
1052  return 2;
1053}
1054
1055// Inserts a branch into the end of the specific MachineBasicBlock, returning
1056// the number of instructions inserted.
1057unsigned RISCVInstrInfo::insertBranch(
1058    MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
1059    ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const {
1060  if (BytesAdded)
1061    *BytesAdded = 0;
1062
1063  // Shouldn't be a fall through.
1064  assert(TBB && "insertBranch must not be told to insert a fallthrough");
1065  assert((Cond.size() == 3 || Cond.size() == 0) &&
1066         "RISC-V branch conditions have two components!");
1067
1068  // Unconditional branch.
1069  if (Cond.empty()) {
1070    MachineInstr &MI = *BuildMI(&MBB, DL, get(RISCV::PseudoBR)).addMBB(TBB);
1071    if (BytesAdded)
1072      *BytesAdded += getInstSizeInBytes(MI);
1073    return 1;
1074  }
1075
1076  // Either a one or two-way conditional branch.
1077  auto CC = static_cast<RISCVCC::CondCode>(Cond[0].getImm());
1078  MachineInstr &CondMI =
1079      *BuildMI(&MBB, DL, getBrCond(CC)).add(Cond[1]).add(Cond[2]).addMBB(TBB);
1080  if (BytesAdded)
1081    *BytesAdded += getInstSizeInBytes(CondMI);
1082
1083  // One-way conditional branch.
1084  if (!FBB)
1085    return 1;
1086
1087  // Two-way conditional branch.
1088  MachineInstr &MI = *BuildMI(&MBB, DL, get(RISCV::PseudoBR)).addMBB(FBB);
1089  if (BytesAdded)
1090    *BytesAdded += getInstSizeInBytes(MI);
1091  return 2;
1092}
1093
1094void RISCVInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
1095                                          MachineBasicBlock &DestBB,
1096                                          MachineBasicBlock &RestoreBB,
1097                                          const DebugLoc &DL, int64_t BrOffset,
1098                                          RegScavenger *RS) const {
1099  assert(RS && "RegScavenger required for long branching");
1100  assert(MBB.empty() &&
1101         "new block should be inserted for expanding unconditional branch");
1102  assert(MBB.pred_size() == 1);
1103  assert(RestoreBB.empty() &&
1104         "restore block should be inserted for restoring clobbered registers");
1105
1106  MachineFunction *MF = MBB.getParent();
1107  MachineRegisterInfo &MRI = MF->getRegInfo();
1108  RISCVMachineFunctionInfo *RVFI = MF->getInfo<RISCVMachineFunctionInfo>();
1109  const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
1110
1111  if (!isInt<32>(BrOffset))
1112    report_fatal_error(
1113        "Branch offsets outside of the signed 32-bit range not supported");
1114
1115  // FIXME: A virtual register must be used initially, as the register
1116  // scavenger won't work with empty blocks (SIInstrInfo::insertIndirectBranch
1117  // uses the same workaround).
1118  Register ScratchReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
1119  auto II = MBB.end();
1120  // We may also update the jump target to RestoreBB later.
1121  MachineInstr &MI = *BuildMI(MBB, II, DL, get(RISCV::PseudoJump))
1122                          .addReg(ScratchReg, RegState::Define | RegState::Dead)
1123                          .addMBB(&DestBB, RISCVII::MO_CALL);
1124
1125  RS->enterBasicBlockEnd(MBB);
1126  Register TmpGPR =
1127      RS->scavengeRegisterBackwards(RISCV::GPRRegClass, MI.getIterator(),
1128                                    /*RestoreAfter=*/false, /*SpAdj=*/0,
1129                                    /*AllowSpill=*/false);
1130  if (TmpGPR != RISCV::NoRegister)
1131    RS->setRegUsed(TmpGPR);
1132  else {
1133    // The case when there is no scavenged register needs special handling.
1134
1135    // Pick s11 because it doesn't make a difference.
1136    TmpGPR = RISCV::X27;
1137
1138    int FrameIndex = RVFI->getBranchRelaxationScratchFrameIndex();
1139    if (FrameIndex == -1)
1140      report_fatal_error("underestimated function size");
1141
1142    storeRegToStackSlot(MBB, MI, TmpGPR, /*IsKill=*/true, FrameIndex,
1143                        &RISCV::GPRRegClass, TRI, Register());
1144    TRI->eliminateFrameIndex(std::prev(MI.getIterator()),
1145                             /*SpAdj=*/0, /*FIOperandNum=*/1);
1146
1147    MI.getOperand(1).setMBB(&RestoreBB);
1148
1149    loadRegFromStackSlot(RestoreBB, RestoreBB.end(), TmpGPR, FrameIndex,
1150                         &RISCV::GPRRegClass, TRI, Register());
1151    TRI->eliminateFrameIndex(RestoreBB.back(),
1152                             /*SpAdj=*/0, /*FIOperandNum=*/1);
1153  }
1154
1155  MRI.replaceRegWith(ScratchReg, TmpGPR);
1156  MRI.clearVirtRegs();
1157}
1158
1159bool RISCVInstrInfo::reverseBranchCondition(
1160    SmallVectorImpl<MachineOperand> &Cond) const {
1161  assert((Cond.size() == 3) && "Invalid branch condition!");
1162  auto CC = static_cast<RISCVCC::CondCode>(Cond[0].getImm());
1163  Cond[0].setImm(getOppositeBranchCondition(CC));
1164  return false;
1165}
1166
1167bool RISCVInstrInfo::optimizeCondBranch(MachineInstr &MI) const {
1168  MachineBasicBlock *MBB = MI.getParent();
1169  MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
1170
1171  MachineBasicBlock *TBB, *FBB;
1172  SmallVector<MachineOperand, 3> Cond;
1173  if (analyzeBranch(*MBB, TBB, FBB, Cond, /*AllowModify=*/false))
1174    return false;
1175  (void)FBB;
1176
1177  RISCVCC::CondCode CC = static_cast<RISCVCC::CondCode>(Cond[0].getImm());
1178  assert(CC != RISCVCC::COND_INVALID);
1179
1180  if (CC == RISCVCC::COND_EQ || CC == RISCVCC::COND_NE)
1181    return false;
1182
1183  // For two constants C0 and C1 from
1184  // ```
1185  // li Y, C0
1186  // li Z, C1
1187  // ```
1188  // 1. if C1 = C0 + 1
1189  // we can turn:
1190  //  (a) blt Y, X -> bge X, Z
1191  //  (b) bge Y, X -> blt X, Z
1192  //
1193  // 2. if C1 = C0 - 1
1194  // we can turn:
1195  //  (a) blt X, Y -> bge Z, X
1196  //  (b) bge X, Y -> blt Z, X
1197  //
1198  // To make sure this optimization is really beneficial, we only
1199  // optimize for cases where Y had only one use (i.e. only used by the branch).
1200
1201  // Right now we only care about LI (i.e. ADDI x0, imm)
1202  auto isLoadImm = [](const MachineInstr *MI, int64_t &Imm) -> bool {
1203    if (MI->getOpcode() == RISCV::ADDI && MI->getOperand(1).isReg() &&
1204        MI->getOperand(1).getReg() == RISCV::X0) {
1205      Imm = MI->getOperand(2).getImm();
1206      return true;
1207    }
1208    return false;
1209  };
1210  // Either a load from immediate instruction or X0.
1211  auto isFromLoadImm = [&](const MachineOperand &Op, int64_t &Imm) -> bool {
1212    if (!Op.isReg())
1213      return false;
1214    Register Reg = Op.getReg();
1215    if (Reg == RISCV::X0) {
1216      Imm = 0;
1217      return true;
1218    }
1219    if (!Reg.isVirtual())
1220      return false;
1221    return isLoadImm(MRI.getVRegDef(Op.getReg()), Imm);
1222  };
1223
1224  MachineOperand &LHS = MI.getOperand(0);
1225  MachineOperand &RHS = MI.getOperand(1);
1226  // Try to find the register for constant Z; return
1227  // invalid register otherwise.
1228  auto searchConst = [&](int64_t C1) -> Register {
1229    MachineBasicBlock::reverse_iterator II(&MI), E = MBB->rend();
1230    auto DefC1 = std::find_if(++II, E, [&](const MachineInstr &I) -> bool {
1231      int64_t Imm;
1232      return isLoadImm(&I, Imm) && Imm == C1 &&
1233             I.getOperand(0).getReg().isVirtual();
1234    });
1235    if (DefC1 != E)
1236      return DefC1->getOperand(0).getReg();
1237
1238    return Register();
1239  };
1240
1241  bool Modify = false;
1242  int64_t C0;
1243  if (isFromLoadImm(LHS, C0) && MRI.hasOneUse(LHS.getReg())) {
1244    // Might be case 1.
1245    // Signed integer overflow is UB. (UINT64_MAX is bigger so we don't need
1246    // to worry about unsigned overflow here)
1247    if (C0 < INT64_MAX)
1248      if (Register RegZ = searchConst(C0 + 1)) {
1249        reverseBranchCondition(Cond);
1250        Cond[1] = MachineOperand::CreateReg(RHS.getReg(), /*isDef=*/false);
1251        Cond[2] = MachineOperand::CreateReg(RegZ, /*isDef=*/false);
1252        // We might extend the live range of Z, clear its kill flag to
1253        // account for this.
1254        MRI.clearKillFlags(RegZ);
1255        Modify = true;
1256      }
1257  } else if (isFromLoadImm(RHS, C0) && MRI.hasOneUse(RHS.getReg())) {
1258    // Might be case 2.
1259    // For unsigned cases, we don't want C1 to wrap back to UINT64_MAX
1260    // when C0 is zero.
1261    if ((CC == RISCVCC::COND_GE || CC == RISCVCC::COND_LT) || C0)
1262      if (Register RegZ = searchConst(C0 - 1)) {
1263        reverseBranchCondition(Cond);
1264        Cond[1] = MachineOperand::CreateReg(RegZ, /*isDef=*/false);
1265        Cond[2] = MachineOperand::CreateReg(LHS.getReg(), /*isDef=*/false);
1266        // We might extend the live range of Z, clear its kill flag to
1267        // account for this.
1268        MRI.clearKillFlags(RegZ);
1269        Modify = true;
1270      }
1271  }
1272
1273  if (!Modify)
1274    return false;
1275
1276  // Build the new branch and remove the old one.
1277  BuildMI(*MBB, MI, MI.getDebugLoc(),
1278          getBrCond(static_cast<RISCVCC::CondCode>(Cond[0].getImm())))
1279      .add(Cond[1])
1280      .add(Cond[2])
1281      .addMBB(TBB);
1282  MI.eraseFromParent();
1283
1284  return true;
1285}
1286
1287MachineBasicBlock *
1288RISCVInstrInfo::getBranchDestBlock(const MachineInstr &MI) const {
1289  assert(MI.getDesc().isBranch() && "Unexpected opcode!");
1290  // The branch target is always the last operand.
1291  int NumOp = MI.getNumExplicitOperands();
1292  return MI.getOperand(NumOp - 1).getMBB();
1293}
1294
1295bool RISCVInstrInfo::isBranchOffsetInRange(unsigned BranchOp,
1296                                           int64_t BrOffset) const {
1297  unsigned XLen = STI.getXLen();
1298  // Ideally we could determine the supported branch offset from the
1299  // RISCVII::FormMask, but this can't be used for Pseudo instructions like
1300  // PseudoBR.
1301  switch (BranchOp) {
1302  default:
1303    llvm_unreachable("Unexpected opcode!");
1304  case RISCV::BEQ:
1305  case RISCV::BNE:
1306  case RISCV::BLT:
1307  case RISCV::BGE:
1308  case RISCV::BLTU:
1309  case RISCV::BGEU:
1310    return isIntN(13, BrOffset);
1311  case RISCV::JAL:
1312  case RISCV::PseudoBR:
1313    return isIntN(21, BrOffset);
1314  case RISCV::PseudoJump:
1315    return isIntN(32, SignExtend64(BrOffset + 0x800, XLen));
1316  }
1317}
1318
1319// If the operation has a predicated pseudo instruction, return the pseudo
1320// instruction opcode. Otherwise, return RISCV::INSTRUCTION_LIST_END.
1321// TODO: Support more operations.
1322unsigned getPredicatedOpcode(unsigned Opcode) {
1323  switch (Opcode) {
1324  case RISCV::ADD:   return RISCV::PseudoCCADD;   break;
1325  case RISCV::SUB:   return RISCV::PseudoCCSUB;   break;
1326  case RISCV::SLL:   return RISCV::PseudoCCSLL;   break;
1327  case RISCV::SRL:   return RISCV::PseudoCCSRL;   break;
1328  case RISCV::SRA:   return RISCV::PseudoCCSRA;   break;
1329  case RISCV::AND:   return RISCV::PseudoCCAND;   break;
1330  case RISCV::OR:    return RISCV::PseudoCCOR;    break;
1331  case RISCV::XOR:   return RISCV::PseudoCCXOR;   break;
1332
1333  case RISCV::ADDI:  return RISCV::PseudoCCADDI;  break;
1334  case RISCV::SLLI:  return RISCV::PseudoCCSLLI;  break;
1335  case RISCV::SRLI:  return RISCV::PseudoCCSRLI;  break;
1336  case RISCV::SRAI:  return RISCV::PseudoCCSRAI;  break;
1337  case RISCV::ANDI:  return RISCV::PseudoCCANDI;  break;
1338  case RISCV::ORI:   return RISCV::PseudoCCORI;   break;
1339  case RISCV::XORI:  return RISCV::PseudoCCXORI;  break;
1340
1341  case RISCV::ADDW:  return RISCV::PseudoCCADDW;  break;
1342  case RISCV::SUBW:  return RISCV::PseudoCCSUBW;  break;
1343  case RISCV::SLLW:  return RISCV::PseudoCCSLLW;  break;
1344  case RISCV::SRLW:  return RISCV::PseudoCCSRLW;  break;
1345  case RISCV::SRAW:  return RISCV::PseudoCCSRAW;  break;
1346
1347  case RISCV::ADDIW: return RISCV::PseudoCCADDIW; break;
1348  case RISCV::SLLIW: return RISCV::PseudoCCSLLIW; break;
1349  case RISCV::SRLIW: return RISCV::PseudoCCSRLIW; break;
1350  case RISCV::SRAIW: return RISCV::PseudoCCSRAIW; break;
1351
1352  case RISCV::ANDN:  return RISCV::PseudoCCANDN;  break;
1353  case RISCV::ORN:   return RISCV::PseudoCCORN;   break;
1354  case RISCV::XNOR:  return RISCV::PseudoCCXNOR;  break;
1355  }
1356
1357  return RISCV::INSTRUCTION_LIST_END;
1358}
1359
1360/// Identify instructions that can be folded into a CCMOV instruction, and
1361/// return the defining instruction.
1362static MachineInstr *canFoldAsPredicatedOp(Register Reg,
1363                                           const MachineRegisterInfo &MRI,
1364                                           const TargetInstrInfo *TII) {
1365  if (!Reg.isVirtual())
1366    return nullptr;
1367  if (!MRI.hasOneNonDBGUse(Reg))
1368    return nullptr;
1369  MachineInstr *MI = MRI.getVRegDef(Reg);
1370  if (!MI)
1371    return nullptr;
1372  // Check if MI can be predicated and folded into the CCMOV.
1373  if (getPredicatedOpcode(MI->getOpcode()) == RISCV::INSTRUCTION_LIST_END)
1374    return nullptr;
1375  // Don't predicate li idiom.
1376  if (MI->getOpcode() == RISCV::ADDI && MI->getOperand(1).isReg() &&
1377      MI->getOperand(1).getReg() == RISCV::X0)
1378    return nullptr;
1379  // Check if MI has any other defs or physreg uses.
1380  for (const MachineOperand &MO : llvm::drop_begin(MI->operands())) {
1381    // Reject frame index operands, PEI can't handle the predicated pseudos.
1382    if (MO.isFI() || MO.isCPI() || MO.isJTI())
1383      return nullptr;
1384    if (!MO.isReg())
1385      continue;
1386    // MI can't have any tied operands, that would conflict with predication.
1387    if (MO.isTied())
1388      return nullptr;
1389    if (MO.isDef())
1390      return nullptr;
1391    // Allow constant physregs.
1392    if (MO.getReg().isPhysical() && !MRI.isConstantPhysReg(MO.getReg()))
1393      return nullptr;
1394  }
1395  bool DontMoveAcrossStores = true;
1396  if (!MI->isSafeToMove(/* AliasAnalysis = */ nullptr, DontMoveAcrossStores))
1397    return nullptr;
1398  return MI;
1399}
1400
1401bool RISCVInstrInfo::analyzeSelect(const MachineInstr &MI,
1402                                   SmallVectorImpl<MachineOperand> &Cond,
1403                                   unsigned &TrueOp, unsigned &FalseOp,
1404                                   bool &Optimizable) const {
1405  assert(MI.getOpcode() == RISCV::PseudoCCMOVGPR &&
1406         "Unknown select instruction");
1407  // CCMOV operands:
1408  // 0: Def.
1409  // 1: LHS of compare.
1410  // 2: RHS of compare.
1411  // 3: Condition code.
1412  // 4: False use.
1413  // 5: True use.
1414  TrueOp = 5;
1415  FalseOp = 4;
1416  Cond.push_back(MI.getOperand(1));
1417  Cond.push_back(MI.getOperand(2));
1418  Cond.push_back(MI.getOperand(3));
1419  // We can only fold when we support short forward branch opt.
1420  Optimizable = STI.hasShortForwardBranchOpt();
1421  return false;
1422}
1423
1424MachineInstr *
1425RISCVInstrInfo::optimizeSelect(MachineInstr &MI,
1426                               SmallPtrSetImpl<MachineInstr *> &SeenMIs,
1427                               bool PreferFalse) const {
1428  assert(MI.getOpcode() == RISCV::PseudoCCMOVGPR &&
1429         "Unknown select instruction");
1430  if (!STI.hasShortForwardBranchOpt())
1431    return nullptr;
1432
1433  MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
1434  MachineInstr *DefMI =
1435      canFoldAsPredicatedOp(MI.getOperand(5).getReg(), MRI, this);
1436  bool Invert = !DefMI;
1437  if (!DefMI)
1438    DefMI = canFoldAsPredicatedOp(MI.getOperand(4).getReg(), MRI, this);
1439  if (!DefMI)
1440    return nullptr;
1441
1442  // Find new register class to use.
1443  MachineOperand FalseReg = MI.getOperand(Invert ? 5 : 4);
1444  Register DestReg = MI.getOperand(0).getReg();
1445  const TargetRegisterClass *PreviousClass = MRI.getRegClass(FalseReg.getReg());
1446  if (!MRI.constrainRegClass(DestReg, PreviousClass))
1447    return nullptr;
1448
1449  unsigned PredOpc = getPredicatedOpcode(DefMI->getOpcode());
1450  assert(PredOpc != RISCV::INSTRUCTION_LIST_END && "Unexpected opcode!");
1451
1452  // Create a new predicated version of DefMI.
1453  MachineInstrBuilder NewMI =
1454      BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(PredOpc), DestReg);
1455
1456  // Copy the condition portion.
1457  NewMI.add(MI.getOperand(1));
1458  NewMI.add(MI.getOperand(2));
1459
1460  // Add condition code, inverting if necessary.
1461  auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());
1462  if (Invert)
1463    CC = RISCVCC::getOppositeBranchCondition(CC);
1464  NewMI.addImm(CC);
1465
1466  // Copy the false register.
1467  NewMI.add(FalseReg);
1468
1469  // Copy all the DefMI operands.
1470  const MCInstrDesc &DefDesc = DefMI->getDesc();
1471  for (unsigned i = 1, e = DefDesc.getNumOperands(); i != e; ++i)
1472    NewMI.add(DefMI->getOperand(i));
1473
1474  // Update SeenMIs set: register newly created MI and erase removed DefMI.
1475  SeenMIs.insert(NewMI);
1476  SeenMIs.erase(DefMI);
1477
1478  // If MI is inside a loop, and DefMI is outside the loop, then kill flags on
1479  // DefMI would be invalid when tranferred inside the loop.  Checking for a
1480  // loop is expensive, but at least remove kill flags if they are in different
1481  // BBs.
1482  if (DefMI->getParent() != MI.getParent())
1483    NewMI->clearKillInfo();
1484
1485  // The caller will erase MI, but not DefMI.
1486  DefMI->eraseFromParent();
1487  return NewMI;
1488}
1489
1490unsigned RISCVInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
1491  if (MI.isMetaInstruction())
1492    return 0;
1493
1494  unsigned Opcode = MI.getOpcode();
1495
1496  if (Opcode == TargetOpcode::INLINEASM ||
1497      Opcode == TargetOpcode::INLINEASM_BR) {
1498    const MachineFunction &MF = *MI.getParent()->getParent();
1499    const auto &TM = static_cast<const RISCVTargetMachine &>(MF.getTarget());
1500    return getInlineAsmLength(MI.getOperand(0).getSymbolName(),
1501                              *TM.getMCAsmInfo());
1502  }
1503
1504  if (!MI.memoperands_empty()) {
1505    MachineMemOperand *MMO = *(MI.memoperands_begin());
1506    const MachineFunction &MF = *MI.getParent()->getParent();
1507    const auto &ST = MF.getSubtarget<RISCVSubtarget>();
1508    if (ST.hasStdExtZihintntl() && MMO->isNonTemporal()) {
1509      if (ST.hasStdExtCOrZca() && ST.enableRVCHintInstrs()) {
1510        if (isCompressibleInst(MI, STI))
1511          return 4; // c.ntl.all + c.load/c.store
1512        return 6;   // c.ntl.all + load/store
1513      }
1514      return 8; // ntl.all + load/store
1515    }
1516  }
1517
1518  if (Opcode == TargetOpcode::BUNDLE)
1519    return getInstBundleLength(MI);
1520
1521  if (MI.getParent() && MI.getParent()->getParent()) {
1522    if (isCompressibleInst(MI, STI))
1523      return 2;
1524  }
1525
1526  switch (Opcode) {
1527  case TargetOpcode::STACKMAP:
1528    // The upper bound for a stackmap intrinsic is the full length of its shadow
1529    return StackMapOpers(&MI).getNumPatchBytes();
1530  case TargetOpcode::PATCHPOINT:
1531    // The size of the patchpoint intrinsic is the number of bytes requested
1532    return PatchPointOpers(&MI).getNumPatchBytes();
1533  case TargetOpcode::STATEPOINT:
1534    // The size of the statepoint intrinsic is the number of bytes requested
1535    return StatepointOpers(&MI).getNumPatchBytes();
1536  default:
1537    return get(Opcode).getSize();
1538  }
1539}
1540
1541unsigned RISCVInstrInfo::getInstBundleLength(const MachineInstr &MI) const {
1542  unsigned Size = 0;
1543  MachineBasicBlock::const_instr_iterator I = MI.getIterator();
1544  MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
1545  while (++I != E && I->isInsideBundle()) {
1546    assert(!I->isBundle() && "No nested bundle!");
1547    Size += getInstSizeInBytes(*I);
1548  }
1549  return Size;
1550}
1551
1552bool RISCVInstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const {
1553  const unsigned Opcode = MI.getOpcode();
1554  switch (Opcode) {
1555  default:
1556    break;
1557  case RISCV::FSGNJ_D:
1558  case RISCV::FSGNJ_S:
1559  case RISCV::FSGNJ_H:
1560  case RISCV::FSGNJ_D_INX:
1561  case RISCV::FSGNJ_D_IN32X:
1562  case RISCV::FSGNJ_S_INX:
1563  case RISCV::FSGNJ_H_INX:
1564    // The canonical floating-point move is fsgnj rd, rs, rs.
1565    return MI.getOperand(1).isReg() && MI.getOperand(2).isReg() &&
1566           MI.getOperand(1).getReg() == MI.getOperand(2).getReg();
1567  case RISCV::ADDI:
1568  case RISCV::ORI:
1569  case RISCV::XORI:
1570    return (MI.getOperand(1).isReg() &&
1571            MI.getOperand(1).getReg() == RISCV::X0) ||
1572           (MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0);
1573  }
1574  return MI.isAsCheapAsAMove();
1575}
1576
1577std::optional<DestSourcePair>
1578RISCVInstrInfo::isCopyInstrImpl(const MachineInstr &MI) const {
1579  if (MI.isMoveReg())
1580    return DestSourcePair{MI.getOperand(0), MI.getOperand(1)};
1581  switch (MI.getOpcode()) {
1582  default:
1583    break;
1584  case RISCV::ADDI:
1585    // Operand 1 can be a frameindex but callers expect registers
1586    if (MI.getOperand(1).isReg() && MI.getOperand(2).isImm() &&
1587        MI.getOperand(2).getImm() == 0)
1588      return DestSourcePair{MI.getOperand(0), MI.getOperand(1)};
1589    break;
1590  case RISCV::FSGNJ_D:
1591  case RISCV::FSGNJ_S:
1592  case RISCV::FSGNJ_H:
1593  case RISCV::FSGNJ_D_INX:
1594  case RISCV::FSGNJ_D_IN32X:
1595  case RISCV::FSGNJ_S_INX:
1596  case RISCV::FSGNJ_H_INX:
1597    // The canonical floating-point move is fsgnj rd, rs, rs.
1598    if (MI.getOperand(1).isReg() && MI.getOperand(2).isReg() &&
1599        MI.getOperand(1).getReg() == MI.getOperand(2).getReg())
1600      return DestSourcePair{MI.getOperand(0), MI.getOperand(1)};
1601    break;
1602  }
1603  return std::nullopt;
1604}
1605
1606MachineTraceStrategy RISCVInstrInfo::getMachineCombinerTraceStrategy() const {
1607  if (ForceMachineCombinerStrategy.getNumOccurrences() == 0) {
1608    // The option is unused. Choose Local strategy only for in-order cores. When
1609    // scheduling model is unspecified, use MinInstrCount strategy as more
1610    // generic one.
1611    const auto &SchedModel = STI.getSchedModel();
1612    return (!SchedModel.hasInstrSchedModel() || SchedModel.isOutOfOrder())
1613               ? MachineTraceStrategy::TS_MinInstrCount
1614               : MachineTraceStrategy::TS_Local;
1615  }
1616  // The strategy was forced by the option.
1617  return ForceMachineCombinerStrategy;
1618}
1619
1620void RISCVInstrInfo::finalizeInsInstrs(
1621    MachineInstr &Root, MachineCombinerPattern &P,
1622    SmallVectorImpl<MachineInstr *> &InsInstrs) const {
1623  int16_t FrmOpIdx =
1624      RISCV::getNamedOperandIdx(Root.getOpcode(), RISCV::OpName::frm);
1625  if (FrmOpIdx < 0) {
1626    assert(all_of(InsInstrs,
1627                  [](MachineInstr *MI) {
1628                    return RISCV::getNamedOperandIdx(MI->getOpcode(),
1629                                                     RISCV::OpName::frm) < 0;
1630                  }) &&
1631           "New instructions require FRM whereas the old one does not have it");
1632    return;
1633  }
1634
1635  const MachineOperand &FRM = Root.getOperand(FrmOpIdx);
1636  MachineFunction &MF = *Root.getMF();
1637
1638  for (auto *NewMI : InsInstrs) {
1639    assert(static_cast<unsigned>(RISCV::getNamedOperandIdx(
1640               NewMI->getOpcode(), RISCV::OpName::frm)) ==
1641               NewMI->getNumOperands() &&
1642           "Instruction has unexpected number of operands");
1643    MachineInstrBuilder MIB(MF, NewMI);
1644    MIB.add(FRM);
1645    if (FRM.getImm() == RISCVFPRndMode::DYN)
1646      MIB.addUse(RISCV::FRM, RegState::Implicit);
1647  }
1648}
1649
1650static bool isFADD(unsigned Opc) {
1651  switch (Opc) {
1652  default:
1653    return false;
1654  case RISCV::FADD_H:
1655  case RISCV::FADD_S:
1656  case RISCV::FADD_D:
1657    return true;
1658  }
1659}
1660
1661static bool isFSUB(unsigned Opc) {
1662  switch (Opc) {
1663  default:
1664    return false;
1665  case RISCV::FSUB_H:
1666  case RISCV::FSUB_S:
1667  case RISCV::FSUB_D:
1668    return true;
1669  }
1670}
1671
1672static bool isFMUL(unsigned Opc) {
1673  switch (Opc) {
1674  default:
1675    return false;
1676  case RISCV::FMUL_H:
1677  case RISCV::FMUL_S:
1678  case RISCV::FMUL_D:
1679    return true;
1680  }
1681}
1682
1683bool RISCVInstrInfo::hasReassociableSibling(const MachineInstr &Inst,
1684                                            bool &Commuted) const {
1685  if (!TargetInstrInfo::hasReassociableSibling(Inst, Commuted))
1686    return false;
1687
1688  const MachineRegisterInfo &MRI = Inst.getMF()->getRegInfo();
1689  unsigned OperandIdx = Commuted ? 2 : 1;
1690  const MachineInstr &Sibling =
1691      *MRI.getVRegDef(Inst.getOperand(OperandIdx).getReg());
1692
1693  int16_t InstFrmOpIdx =
1694      RISCV::getNamedOperandIdx(Inst.getOpcode(), RISCV::OpName::frm);
1695  int16_t SiblingFrmOpIdx =
1696      RISCV::getNamedOperandIdx(Sibling.getOpcode(), RISCV::OpName::frm);
1697
1698  return (InstFrmOpIdx < 0 && SiblingFrmOpIdx < 0) ||
1699         RISCV::hasEqualFRM(Inst, Sibling);
1700}
1701
1702bool RISCVInstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst,
1703                                                 bool Invert) const {
1704  unsigned Opc = Inst.getOpcode();
1705  if (Invert) {
1706    auto InverseOpcode = getInverseOpcode(Opc);
1707    if (!InverseOpcode)
1708      return false;
1709    Opc = *InverseOpcode;
1710  }
1711
1712  if (isFADD(Opc) || isFMUL(Opc))
1713    return Inst.getFlag(MachineInstr::MIFlag::FmReassoc) &&
1714           Inst.getFlag(MachineInstr::MIFlag::FmNsz);
1715
1716  switch (Opc) {
1717  default:
1718    return false;
1719  case RISCV::ADD:
1720  case RISCV::ADDW:
1721  case RISCV::AND:
1722  case RISCV::OR:
1723  case RISCV::XOR:
1724  // From RISC-V ISA spec, if both the high and low bits of the same product
1725  // are required, then the recommended code sequence is:
1726  //
1727  // MULH[[S]U] rdh, rs1, rs2
1728  // MUL        rdl, rs1, rs2
1729  // (source register specifiers must be in same order and rdh cannot be the
1730  //  same as rs1 or rs2)
1731  //
1732  // Microarchitectures can then fuse these into a single multiply operation
1733  // instead of performing two separate multiplies.
1734  // MachineCombiner may reassociate MUL operands and lose the fusion
1735  // opportunity.
1736  case RISCV::MUL:
1737  case RISCV::MULW:
1738  case RISCV::MIN:
1739  case RISCV::MINU:
1740  case RISCV::MAX:
1741  case RISCV::MAXU:
1742  case RISCV::FMIN_H:
1743  case RISCV::FMIN_S:
1744  case RISCV::FMIN_D:
1745  case RISCV::FMAX_H:
1746  case RISCV::FMAX_S:
1747  case RISCV::FMAX_D:
1748    return true;
1749  }
1750
1751  return false;
1752}
1753
1754std::optional<unsigned>
1755RISCVInstrInfo::getInverseOpcode(unsigned Opcode) const {
1756  switch (Opcode) {
1757  default:
1758    return std::nullopt;
1759  case RISCV::FADD_H:
1760    return RISCV::FSUB_H;
1761  case RISCV::FADD_S:
1762    return RISCV::FSUB_S;
1763  case RISCV::FADD_D:
1764    return RISCV::FSUB_D;
1765  case RISCV::FSUB_H:
1766    return RISCV::FADD_H;
1767  case RISCV::FSUB_S:
1768    return RISCV::FADD_S;
1769  case RISCV::FSUB_D:
1770    return RISCV::FADD_D;
1771  case RISCV::ADD:
1772    return RISCV::SUB;
1773  case RISCV::SUB:
1774    return RISCV::ADD;
1775  case RISCV::ADDW:
1776    return RISCV::SUBW;
1777  case RISCV::SUBW:
1778    return RISCV::ADDW;
1779  }
1780}
1781
1782static bool canCombineFPFusedMultiply(const MachineInstr &Root,
1783                                      const MachineOperand &MO,
1784                                      bool DoRegPressureReduce) {
1785  if (!MO.isReg() || !MO.getReg().isVirtual())
1786    return false;
1787  const MachineRegisterInfo &MRI = Root.getMF()->getRegInfo();
1788  MachineInstr *MI = MRI.getVRegDef(MO.getReg());
1789  if (!MI || !isFMUL(MI->getOpcode()))
1790    return false;
1791
1792  if (!Root.getFlag(MachineInstr::MIFlag::FmContract) ||
1793      !MI->getFlag(MachineInstr::MIFlag::FmContract))
1794    return false;
1795
1796  // Try combining even if fmul has more than one use as it eliminates
1797  // dependency between fadd(fsub) and fmul. However, it can extend liveranges
1798  // for fmul operands, so reject the transformation in register pressure
1799  // reduction mode.
1800  if (DoRegPressureReduce && !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
1801    return false;
1802
1803  // Do not combine instructions from different basic blocks.
1804  if (Root.getParent() != MI->getParent())
1805    return false;
1806  return RISCV::hasEqualFRM(Root, *MI);
1807}
1808
1809static bool
1810getFPFusedMultiplyPatterns(MachineInstr &Root,
1811                           SmallVectorImpl<MachineCombinerPattern> &Patterns,
1812                           bool DoRegPressureReduce) {
1813  unsigned Opc = Root.getOpcode();
1814  bool IsFAdd = isFADD(Opc);
1815  if (!IsFAdd && !isFSUB(Opc))
1816    return false;
1817  bool Added = false;
1818  if (canCombineFPFusedMultiply(Root, Root.getOperand(1),
1819                                DoRegPressureReduce)) {
1820    Patterns.push_back(IsFAdd ? MachineCombinerPattern::FMADD_AX
1821                              : MachineCombinerPattern::FMSUB);
1822    Added = true;
1823  }
1824  if (canCombineFPFusedMultiply(Root, Root.getOperand(2),
1825                                DoRegPressureReduce)) {
1826    Patterns.push_back(IsFAdd ? MachineCombinerPattern::FMADD_XA
1827                              : MachineCombinerPattern::FNMSUB);
1828    Added = true;
1829  }
1830  return Added;
1831}
1832
1833static bool getFPPatterns(MachineInstr &Root,
1834                          SmallVectorImpl<MachineCombinerPattern> &Patterns,
1835                          bool DoRegPressureReduce) {
1836  return getFPFusedMultiplyPatterns(Root, Patterns, DoRegPressureReduce);
1837}
1838
1839bool RISCVInstrInfo::getMachineCombinerPatterns(
1840    MachineInstr &Root, SmallVectorImpl<MachineCombinerPattern> &Patterns,
1841    bool DoRegPressureReduce) const {
1842
1843  if (getFPPatterns(Root, Patterns, DoRegPressureReduce))
1844    return true;
1845
1846  return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns,
1847                                                     DoRegPressureReduce);
1848}
1849
1850static unsigned getFPFusedMultiplyOpcode(unsigned RootOpc,
1851                                         MachineCombinerPattern Pattern) {
1852  switch (RootOpc) {
1853  default:
1854    llvm_unreachable("Unexpected opcode");
1855  case RISCV::FADD_H:
1856    return RISCV::FMADD_H;
1857  case RISCV::FADD_S:
1858    return RISCV::FMADD_S;
1859  case RISCV::FADD_D:
1860    return RISCV::FMADD_D;
1861  case RISCV::FSUB_H:
1862    return Pattern == MachineCombinerPattern::FMSUB ? RISCV::FMSUB_H
1863                                                    : RISCV::FNMSUB_H;
1864  case RISCV::FSUB_S:
1865    return Pattern == MachineCombinerPattern::FMSUB ? RISCV::FMSUB_S
1866                                                    : RISCV::FNMSUB_S;
1867  case RISCV::FSUB_D:
1868    return Pattern == MachineCombinerPattern::FMSUB ? RISCV::FMSUB_D
1869                                                    : RISCV::FNMSUB_D;
1870  }
1871}
1872
1873static unsigned getAddendOperandIdx(MachineCombinerPattern Pattern) {
1874  switch (Pattern) {
1875  default:
1876    llvm_unreachable("Unexpected pattern");
1877  case MachineCombinerPattern::FMADD_AX:
1878  case MachineCombinerPattern::FMSUB:
1879    return 2;
1880  case MachineCombinerPattern::FMADD_XA:
1881  case MachineCombinerPattern::FNMSUB:
1882    return 1;
1883  }
1884}
1885
1886static void combineFPFusedMultiply(MachineInstr &Root, MachineInstr &Prev,
1887                                   MachineCombinerPattern Pattern,
1888                                   SmallVectorImpl<MachineInstr *> &InsInstrs,
1889                                   SmallVectorImpl<MachineInstr *> &DelInstrs) {
1890  MachineFunction *MF = Root.getMF();
1891  MachineRegisterInfo &MRI = MF->getRegInfo();
1892  const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
1893
1894  MachineOperand &Mul1 = Prev.getOperand(1);
1895  MachineOperand &Mul2 = Prev.getOperand(2);
1896  MachineOperand &Dst = Root.getOperand(0);
1897  MachineOperand &Addend = Root.getOperand(getAddendOperandIdx(Pattern));
1898
1899  Register DstReg = Dst.getReg();
1900  unsigned FusedOpc = getFPFusedMultiplyOpcode(Root.getOpcode(), Pattern);
1901  uint32_t IntersectedFlags = Root.getFlags() & Prev.getFlags();
1902  DebugLoc MergedLoc =
1903      DILocation::getMergedLocation(Root.getDebugLoc(), Prev.getDebugLoc());
1904
1905  bool Mul1IsKill = Mul1.isKill();
1906  bool Mul2IsKill = Mul2.isKill();
1907  bool AddendIsKill = Addend.isKill();
1908
1909  // We need to clear kill flags since we may be extending the live range past
1910  // a kill. If the mul had kill flags, we can preserve those since we know
1911  // where the previous range stopped.
1912  MRI.clearKillFlags(Mul1.getReg());
1913  MRI.clearKillFlags(Mul2.getReg());
1914
1915  MachineInstrBuilder MIB =
1916      BuildMI(*MF, MergedLoc, TII->get(FusedOpc), DstReg)
1917          .addReg(Mul1.getReg(), getKillRegState(Mul1IsKill))
1918          .addReg(Mul2.getReg(), getKillRegState(Mul2IsKill))
1919          .addReg(Addend.getReg(), getKillRegState(AddendIsKill))
1920          .setMIFlags(IntersectedFlags);
1921
1922  InsInstrs.push_back(MIB);
1923  if (MRI.hasOneNonDBGUse(Prev.getOperand(0).getReg()))
1924    DelInstrs.push_back(&Prev);
1925  DelInstrs.push_back(&Root);
1926}
1927
1928void RISCVInstrInfo::genAlternativeCodeSequence(
1929    MachineInstr &Root, MachineCombinerPattern Pattern,
1930    SmallVectorImpl<MachineInstr *> &InsInstrs,
1931    SmallVectorImpl<MachineInstr *> &DelInstrs,
1932    DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
1933  MachineRegisterInfo &MRI = Root.getMF()->getRegInfo();
1934  switch (Pattern) {
1935  default:
1936    TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs,
1937                                                DelInstrs, InstrIdxForVirtReg);
1938    return;
1939  case MachineCombinerPattern::FMADD_AX:
1940  case MachineCombinerPattern::FMSUB: {
1941    MachineInstr &Prev = *MRI.getVRegDef(Root.getOperand(1).getReg());
1942    combineFPFusedMultiply(Root, Prev, Pattern, InsInstrs, DelInstrs);
1943    return;
1944  }
1945  case MachineCombinerPattern::FMADD_XA:
1946  case MachineCombinerPattern::FNMSUB: {
1947    MachineInstr &Prev = *MRI.getVRegDef(Root.getOperand(2).getReg());
1948    combineFPFusedMultiply(Root, Prev, Pattern, InsInstrs, DelInstrs);
1949    return;
1950  }
1951  }
1952}
1953
1954bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI,
1955                                       StringRef &ErrInfo) const {
1956  MCInstrDesc const &Desc = MI.getDesc();
1957
1958  for (const auto &[Index, Operand] : enumerate(Desc.operands())) {
1959    unsigned OpType = Operand.OperandType;
1960    if (OpType >= RISCVOp::OPERAND_FIRST_RISCV_IMM &&
1961        OpType <= RISCVOp::OPERAND_LAST_RISCV_IMM) {
1962      const MachineOperand &MO = MI.getOperand(Index);
1963      if (MO.isImm()) {
1964        int64_t Imm = MO.getImm();
1965        bool Ok;
1966        switch (OpType) {
1967        default:
1968          llvm_unreachable("Unexpected operand type");
1969
1970          // clang-format off
1971#define CASE_OPERAND_UIMM(NUM)                                                 \
1972  case RISCVOp::OPERAND_UIMM##NUM:                                             \
1973    Ok = isUInt<NUM>(Imm);                                                     \
1974    break;
1975        CASE_OPERAND_UIMM(1)
1976        CASE_OPERAND_UIMM(2)
1977        CASE_OPERAND_UIMM(3)
1978        CASE_OPERAND_UIMM(4)
1979        CASE_OPERAND_UIMM(5)
1980        CASE_OPERAND_UIMM(6)
1981        CASE_OPERAND_UIMM(7)
1982        CASE_OPERAND_UIMM(8)
1983        CASE_OPERAND_UIMM(12)
1984        CASE_OPERAND_UIMM(20)
1985          // clang-format on
1986        case RISCVOp::OPERAND_UIMM2_LSB0:
1987          Ok = isShiftedUInt<1, 1>(Imm);
1988          break;
1989        case RISCVOp::OPERAND_UIMM7_LSB00:
1990          Ok = isShiftedUInt<5, 2>(Imm);
1991          break;
1992        case RISCVOp::OPERAND_UIMM8_LSB00:
1993          Ok = isShiftedUInt<6, 2>(Imm);
1994          break;
1995        case RISCVOp::OPERAND_UIMM8_LSB000:
1996          Ok = isShiftedUInt<5, 3>(Imm);
1997          break;
1998        case RISCVOp::OPERAND_UIMM8_GE32:
1999          Ok = isUInt<8>(Imm) && Imm >= 32;
2000          break;
2001        case RISCVOp::OPERAND_UIMM9_LSB000:
2002          Ok = isShiftedUInt<6, 3>(Imm);
2003          break;
2004        case RISCVOp::OPERAND_SIMM10_LSB0000_NONZERO:
2005          Ok = isShiftedInt<6, 4>(Imm) && (Imm != 0);
2006          break;
2007        case RISCVOp::OPERAND_UIMM10_LSB00_NONZERO:
2008          Ok = isShiftedUInt<8, 2>(Imm) && (Imm != 0);
2009          break;
2010        case RISCVOp::OPERAND_ZERO:
2011          Ok = Imm == 0;
2012          break;
2013        case RISCVOp::OPERAND_SIMM5:
2014          Ok = isInt<5>(Imm);
2015          break;
2016        case RISCVOp::OPERAND_SIMM5_PLUS1:
2017          Ok = (isInt<5>(Imm) && Imm != -16) || Imm == 16;
2018          break;
2019        case RISCVOp::OPERAND_SIMM6:
2020          Ok = isInt<6>(Imm);
2021          break;
2022        case RISCVOp::OPERAND_SIMM6_NONZERO:
2023          Ok = Imm != 0 && isInt<6>(Imm);
2024          break;
2025        case RISCVOp::OPERAND_VTYPEI10:
2026          Ok = isUInt<10>(Imm);
2027          break;
2028        case RISCVOp::OPERAND_VTYPEI11:
2029          Ok = isUInt<11>(Imm);
2030          break;
2031        case RISCVOp::OPERAND_SIMM12:
2032          Ok = isInt<12>(Imm);
2033          break;
2034        case RISCVOp::OPERAND_SIMM12_LSB00000:
2035          Ok = isShiftedInt<7, 5>(Imm);
2036          break;
2037        case RISCVOp::OPERAND_UIMMLOG2XLEN:
2038          Ok = STI.is64Bit() ? isUInt<6>(Imm) : isUInt<5>(Imm);
2039          break;
2040        case RISCVOp::OPERAND_UIMMLOG2XLEN_NONZERO:
2041          Ok = STI.is64Bit() ? isUInt<6>(Imm) : isUInt<5>(Imm);
2042          Ok = Ok && Imm != 0;
2043          break;
2044        case RISCVOp::OPERAND_CLUI_IMM:
2045          Ok = (isUInt<5>(Imm) && Imm != 0) ||
2046               (Imm >= 0xfffe0 && Imm <= 0xfffff);
2047          break;
2048        case RISCVOp::OPERAND_RVKRNUM:
2049          Ok = Imm >= 0 && Imm <= 10;
2050          break;
2051        case RISCVOp::OPERAND_RVKRNUM_0_7:
2052          Ok = Imm >= 0 && Imm <= 7;
2053          break;
2054        case RISCVOp::OPERAND_RVKRNUM_1_10:
2055          Ok = Imm >= 1 && Imm <= 10;
2056          break;
2057        case RISCVOp::OPERAND_RVKRNUM_2_14:
2058          Ok = Imm >= 2 && Imm <= 14;
2059          break;
2060        }
2061        if (!Ok) {
2062          ErrInfo = "Invalid immediate";
2063          return false;
2064        }
2065      }
2066    }
2067  }
2068
2069  const uint64_t TSFlags = Desc.TSFlags;
2070  if (RISCVII::hasVLOp(TSFlags)) {
2071    const MachineOperand &Op = MI.getOperand(RISCVII::getVLOpNum(Desc));
2072    if (!Op.isImm() && !Op.isReg())  {
2073      ErrInfo = "Invalid operand type for VL operand";
2074      return false;
2075    }
2076    if (Op.isReg() && Op.getReg() != RISCV::NoRegister) {
2077      const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
2078      auto *RC = MRI.getRegClass(Op.getReg());
2079      if (!RISCV::GPRRegClass.hasSubClassEq(RC)) {
2080        ErrInfo = "Invalid register class for VL operand";
2081        return false;
2082      }
2083    }
2084    if (!RISCVII::hasSEWOp(TSFlags)) {
2085      ErrInfo = "VL operand w/o SEW operand?";
2086      return false;
2087    }
2088  }
2089  if (RISCVII::hasSEWOp(TSFlags)) {
2090    unsigned OpIdx = RISCVII::getSEWOpNum(Desc);
2091    if (!MI.getOperand(OpIdx).isImm()) {
2092      ErrInfo = "SEW value expected to be an immediate";
2093      return false;
2094    }
2095    uint64_t Log2SEW = MI.getOperand(OpIdx).getImm();
2096    if (Log2SEW > 31) {
2097      ErrInfo = "Unexpected SEW value";
2098      return false;
2099    }
2100    unsigned SEW = Log2SEW ? 1 << Log2SEW : 8;
2101    if (!RISCVVType::isValidSEW(SEW)) {
2102      ErrInfo = "Unexpected SEW value";
2103      return false;
2104    }
2105  }
2106  if (RISCVII::hasVecPolicyOp(TSFlags)) {
2107    unsigned OpIdx = RISCVII::getVecPolicyOpNum(Desc);
2108    if (!MI.getOperand(OpIdx).isImm()) {
2109      ErrInfo = "Policy operand expected to be an immediate";
2110      return false;
2111    }
2112    uint64_t Policy = MI.getOperand(OpIdx).getImm();
2113    if (Policy > (RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC)) {
2114      ErrInfo = "Invalid Policy Value";
2115      return false;
2116    }
2117    if (!RISCVII::hasVLOp(TSFlags)) {
2118      ErrInfo = "policy operand w/o VL operand?";
2119      return false;
2120    }
2121
2122    // VecPolicy operands can only exist on instructions with passthru/merge
2123    // arguments. Note that not all arguments with passthru have vec policy
2124    // operands- some instructions have implicit policies.
2125    unsigned UseOpIdx;
2126    if (!MI.isRegTiedToUseOperand(0, &UseOpIdx)) {
2127      ErrInfo = "policy operand w/o tied operand?";
2128      return false;
2129    }
2130  }
2131
2132  return true;
2133}
2134
2135bool RISCVInstrInfo::canFoldIntoAddrMode(const MachineInstr &MemI, Register Reg,
2136                                         const MachineInstr &AddrI,
2137                                         ExtAddrMode &AM) const {
2138  switch (MemI.getOpcode()) {
2139  default:
2140    return false;
2141  case RISCV::LB:
2142  case RISCV::LBU:
2143  case RISCV::LH:
2144  case RISCV::LHU:
2145  case RISCV::LW:
2146  case RISCV::LWU:
2147  case RISCV::LD:
2148  case RISCV::FLH:
2149  case RISCV::FLW:
2150  case RISCV::FLD:
2151  case RISCV::SB:
2152  case RISCV::SH:
2153  case RISCV::SW:
2154  case RISCV::SD:
2155  case RISCV::FSH:
2156  case RISCV::FSW:
2157  case RISCV::FSD:
2158    break;
2159  }
2160
2161  if (MemI.getOperand(0).getReg() == Reg)
2162    return false;
2163
2164  if (AddrI.getOpcode() != RISCV::ADDI || !AddrI.getOperand(1).isReg() ||
2165      !AddrI.getOperand(2).isImm())
2166    return false;
2167
2168  int64_t OldOffset = MemI.getOperand(2).getImm();
2169  int64_t Disp = AddrI.getOperand(2).getImm();
2170  int64_t NewOffset = OldOffset + Disp;
2171  if (!STI.is64Bit())
2172    NewOffset = SignExtend64<32>(NewOffset);
2173
2174  if (!isInt<12>(NewOffset))
2175    return false;
2176
2177  AM.BaseReg = AddrI.getOperand(1).getReg();
2178  AM.ScaledReg = 0;
2179  AM.Scale = 0;
2180  AM.Displacement = NewOffset;
2181  AM.Form = ExtAddrMode::Formula::Basic;
2182  return true;
2183}
2184
2185MachineInstr *RISCVInstrInfo::emitLdStWithAddr(MachineInstr &MemI,
2186                                               const ExtAddrMode &AM) const {
2187
2188  const DebugLoc &DL = MemI.getDebugLoc();
2189  MachineBasicBlock &MBB = *MemI.getParent();
2190
2191  assert(AM.ScaledReg == 0 && AM.Scale == 0 &&
2192         "Addressing mode not supported for folding");
2193
2194  return BuildMI(MBB, MemI, DL, get(MemI.getOpcode()))
2195      .addReg(MemI.getOperand(0).getReg(),
2196              MemI.mayLoad() ? RegState::Define : 0)
2197      .addReg(AM.BaseReg)
2198      .addImm(AM.Displacement)
2199      .setMemRefs(MemI.memoperands())
2200      .setMIFlags(MemI.getFlags());
2201}
2202
2203bool RISCVInstrInfo::getMemOperandsWithOffsetWidth(
2204    const MachineInstr &LdSt, SmallVectorImpl<const MachineOperand *> &BaseOps,
2205    int64_t &Offset, bool &OffsetIsScalable, unsigned &Width,
2206    const TargetRegisterInfo *TRI) const {
2207  if (!LdSt.mayLoadOrStore())
2208    return false;
2209
2210  // Conservatively, only handle scalar loads/stores for now.
2211  switch (LdSt.getOpcode()) {
2212  case RISCV::LB:
2213  case RISCV::LBU:
2214  case RISCV::SB:
2215  case RISCV::LH:
2216  case RISCV::LHU:
2217  case RISCV::FLH:
2218  case RISCV::SH:
2219  case RISCV::FSH:
2220  case RISCV::LW:
2221  case RISCV::LWU:
2222  case RISCV::FLW:
2223  case RISCV::SW:
2224  case RISCV::FSW:
2225  case RISCV::LD:
2226  case RISCV::FLD:
2227  case RISCV::SD:
2228  case RISCV::FSD:
2229    break;
2230  default:
2231    return false;
2232  }
2233  const MachineOperand *BaseOp;
2234  OffsetIsScalable = false;
2235  if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, Width, TRI))
2236    return false;
2237  BaseOps.push_back(BaseOp);
2238  return true;
2239}
2240
2241// TODO: This was copied from SIInstrInfo. Could it be lifted to a common
2242// helper?
2243static bool memOpsHaveSameBasePtr(const MachineInstr &MI1,
2244                                  ArrayRef<const MachineOperand *> BaseOps1,
2245                                  const MachineInstr &MI2,
2246                                  ArrayRef<const MachineOperand *> BaseOps2) {
2247  // Only examine the first "base" operand of each instruction, on the
2248  // assumption that it represents the real base address of the memory access.
2249  // Other operands are typically offsets or indices from this base address.
2250  if (BaseOps1.front()->isIdenticalTo(*BaseOps2.front()))
2251    return true;
2252
2253  if (!MI1.hasOneMemOperand() || !MI2.hasOneMemOperand())
2254    return false;
2255
2256  auto MO1 = *MI1.memoperands_begin();
2257  auto MO2 = *MI2.memoperands_begin();
2258  if (MO1->getAddrSpace() != MO2->getAddrSpace())
2259    return false;
2260
2261  auto Base1 = MO1->getValue();
2262  auto Base2 = MO2->getValue();
2263  if (!Base1 || !Base2)
2264    return false;
2265  Base1 = getUnderlyingObject(Base1);
2266  Base2 = getUnderlyingObject(Base2);
2267
2268  if (isa<UndefValue>(Base1) || isa<UndefValue>(Base2))
2269    return false;
2270
2271  return Base1 == Base2;
2272}
2273
2274bool RISCVInstrInfo::shouldClusterMemOps(
2275    ArrayRef<const MachineOperand *> BaseOps1, int64_t Offset1,
2276    bool OffsetIsScalable1, ArrayRef<const MachineOperand *> BaseOps2,
2277    int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize,
2278    unsigned NumBytes) const {
2279  // If the mem ops (to be clustered) do not have the same base ptr, then they
2280  // should not be clustered
2281  if (!BaseOps1.empty() && !BaseOps2.empty()) {
2282    const MachineInstr &FirstLdSt = *BaseOps1.front()->getParent();
2283    const MachineInstr &SecondLdSt = *BaseOps2.front()->getParent();
2284    if (!memOpsHaveSameBasePtr(FirstLdSt, BaseOps1, SecondLdSt, BaseOps2))
2285      return false;
2286  } else if (!BaseOps1.empty() || !BaseOps2.empty()) {
2287    // If only one base op is empty, they do not have the same base ptr
2288    return false;
2289  }
2290
2291  unsigned CacheLineSize =
2292      BaseOps1.front()->getParent()->getMF()->getSubtarget().getCacheLineSize();
2293  // Assume a cache line size of 64 bytes if no size is set in RISCVSubtarget.
2294  CacheLineSize = CacheLineSize ? CacheLineSize : 64;
2295  // Cluster if the memory operations are on the same or a neighbouring cache
2296  // line, but limit the maximum ClusterSize to avoid creating too much
2297  // additional register pressure.
2298  return ClusterSize <= 4 && std::abs(Offset1 - Offset2) < CacheLineSize;
2299}
2300
2301// Set BaseReg (the base register operand), Offset (the byte offset being
2302// accessed) and the access Width of the passed instruction that reads/writes
2303// memory. Returns false if the instruction does not read/write memory or the
2304// BaseReg/Offset/Width can't be determined. Is not guaranteed to always
2305// recognise base operands and offsets in all cases.
2306// TODO: Add an IsScalable bool ref argument (like the equivalent AArch64
2307// function) and set it as appropriate.
2308bool RISCVInstrInfo::getMemOperandWithOffsetWidth(
2309    const MachineInstr &LdSt, const MachineOperand *&BaseReg, int64_t &Offset,
2310    unsigned &Width, const TargetRegisterInfo *TRI) const {
2311  if (!LdSt.mayLoadOrStore())
2312    return false;
2313
2314  // Here we assume the standard RISC-V ISA, which uses a base+offset
2315  // addressing mode. You'll need to relax these conditions to support custom
2316  // load/store instructions.
2317  if (LdSt.getNumExplicitOperands() != 3)
2318    return false;
2319  if ((!LdSt.getOperand(1).isReg() && !LdSt.getOperand(1).isFI()) ||
2320      !LdSt.getOperand(2).isImm())
2321    return false;
2322
2323  if (!LdSt.hasOneMemOperand())
2324    return false;
2325
2326  Width = (*LdSt.memoperands_begin())->getSize();
2327  BaseReg = &LdSt.getOperand(1);
2328  Offset = LdSt.getOperand(2).getImm();
2329  return true;
2330}
2331
2332bool RISCVInstrInfo::areMemAccessesTriviallyDisjoint(
2333    const MachineInstr &MIa, const MachineInstr &MIb) const {
2334  assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
2335  assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
2336
2337  if (MIa.hasUnmodeledSideEffects() || MIb.hasUnmodeledSideEffects() ||
2338      MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef())
2339    return false;
2340
2341  // Retrieve the base register, offset from the base register and width. Width
2342  // is the size of memory that is being loaded/stored (e.g. 1, 2, 4).  If
2343  // base registers are identical, and the offset of a lower memory access +
2344  // the width doesn't overlap the offset of a higher memory access,
2345  // then the memory accesses are different.
2346  const TargetRegisterInfo *TRI = STI.getRegisterInfo();
2347  const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;
2348  int64_t OffsetA = 0, OffsetB = 0;
2349  unsigned int WidthA = 0, WidthB = 0;
2350  if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, WidthA, TRI) &&
2351      getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, WidthB, TRI)) {
2352    if (BaseOpA->isIdenticalTo(*BaseOpB)) {
2353      int LowOffset = std::min(OffsetA, OffsetB);
2354      int HighOffset = std::max(OffsetA, OffsetB);
2355      int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
2356      if (LowOffset + LowWidth <= HighOffset)
2357        return true;
2358    }
2359  }
2360  return false;
2361}
2362
2363std::pair<unsigned, unsigned>
2364RISCVInstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
2365  const unsigned Mask = RISCVII::MO_DIRECT_FLAG_MASK;
2366  return std::make_pair(TF & Mask, TF & ~Mask);
2367}
2368
2369ArrayRef<std::pair<unsigned, const char *>>
2370RISCVInstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
2371  using namespace RISCVII;
2372  static const std::pair<unsigned, const char *> TargetFlags[] = {
2373      {MO_CALL, "riscv-call"},
2374      {MO_LO, "riscv-lo"},
2375      {MO_HI, "riscv-hi"},
2376      {MO_PCREL_LO, "riscv-pcrel-lo"},
2377      {MO_PCREL_HI, "riscv-pcrel-hi"},
2378      {MO_GOT_HI, "riscv-got-hi"},
2379      {MO_TPREL_LO, "riscv-tprel-lo"},
2380      {MO_TPREL_HI, "riscv-tprel-hi"},
2381      {MO_TPREL_ADD, "riscv-tprel-add"},
2382      {MO_TLS_GOT_HI, "riscv-tls-got-hi"},
2383      {MO_TLS_GD_HI, "riscv-tls-gd-hi"},
2384      {MO_TLSDESC_HI, "riscv-tlsdesc-hi"},
2385      {MO_TLSDESC_LOAD_LO, "riscv-tlsdesc-load-lo"},
2386      {MO_TLSDESC_ADD_LO, "riscv-tlsdesc-add-lo"},
2387      {MO_TLSDESC_CALL, "riscv-tlsdesc-call"}};
2388  return ArrayRef(TargetFlags);
2389}
2390bool RISCVInstrInfo::isFunctionSafeToOutlineFrom(
2391    MachineFunction &MF, bool OutlineFromLinkOnceODRs) const {
2392  const Function &F = MF.getFunction();
2393
2394  // Can F be deduplicated by the linker? If it can, don't outline from it.
2395  if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage())
2396    return false;
2397
2398  // Don't outline from functions with section markings; the program could
2399  // expect that all the code is in the named section.
2400  if (F.hasSection())
2401    return false;
2402
2403  // It's safe to outline from MF.
2404  return true;
2405}
2406
2407bool RISCVInstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB,
2408                                            unsigned &Flags) const {
2409  // More accurate safety checking is done in getOutliningCandidateInfo.
2410  return TargetInstrInfo::isMBBSafeToOutlineFrom(MBB, Flags);
2411}
2412
2413// Enum values indicating how an outlined call should be constructed.
2414enum MachineOutlinerConstructionID {
2415  MachineOutlinerDefault
2416};
2417
2418bool RISCVInstrInfo::shouldOutlineFromFunctionByDefault(
2419    MachineFunction &MF) const {
2420  return MF.getFunction().hasMinSize();
2421}
2422
2423std::optional<outliner::OutlinedFunction>
2424RISCVInstrInfo::getOutliningCandidateInfo(
2425    std::vector<outliner::Candidate> &RepeatedSequenceLocs) const {
2426
2427  // First we need to filter out candidates where the X5 register (IE t0) can't
2428  // be used to setup the function call.
2429  auto CannotInsertCall = [](outliner::Candidate &C) {
2430    const TargetRegisterInfo *TRI = C.getMF()->getSubtarget().getRegisterInfo();
2431    return !C.isAvailableAcrossAndOutOfSeq(RISCV::X5, *TRI);
2432  };
2433
2434  llvm::erase_if(RepeatedSequenceLocs, CannotInsertCall);
2435
2436  // If the sequence doesn't have enough candidates left, then we're done.
2437  if (RepeatedSequenceLocs.size() < 2)
2438    return std::nullopt;
2439
2440  unsigned SequenceSize = 0;
2441
2442  for (auto &MI : RepeatedSequenceLocs[0])
2443    SequenceSize += getInstSizeInBytes(MI);
2444
2445  // call t0, function = 8 bytes.
2446  unsigned CallOverhead = 8;
2447  for (auto &C : RepeatedSequenceLocs)
2448    C.setCallInfo(MachineOutlinerDefault, CallOverhead);
2449
2450  // jr t0 = 4 bytes, 2 bytes if compressed instructions are enabled.
2451  unsigned FrameOverhead = 4;
2452  if (RepeatedSequenceLocs[0]
2453          .getMF()
2454          ->getSubtarget<RISCVSubtarget>()
2455          .hasStdExtCOrZca())
2456    FrameOverhead = 2;
2457
2458  return outliner::OutlinedFunction(RepeatedSequenceLocs, SequenceSize,
2459                                    FrameOverhead, MachineOutlinerDefault);
2460}
2461
2462outliner::InstrType
2463RISCVInstrInfo::getOutliningTypeImpl(MachineBasicBlock::iterator &MBBI,
2464                                 unsigned Flags) const {
2465  MachineInstr &MI = *MBBI;
2466  MachineBasicBlock *MBB = MI.getParent();
2467  const TargetRegisterInfo *TRI =
2468      MBB->getParent()->getSubtarget().getRegisterInfo();
2469  const auto &F = MI.getMF()->getFunction();
2470
2471  // We can manually strip out CFI instructions later.
2472  if (MI.isCFIInstruction())
2473    // If current function has exception handling code, we can't outline &
2474    // strip these CFI instructions since it may break .eh_frame section
2475    // needed in unwinding.
2476    return F.needsUnwindTableEntry() ? outliner::InstrType::Illegal
2477                                     : outliner::InstrType::Invisible;
2478
2479  // We need support for tail calls to outlined functions before return
2480  // statements can be allowed.
2481  if (MI.isReturn())
2482    return outliner::InstrType::Illegal;
2483
2484  // Don't allow modifying the X5 register which we use for return addresses for
2485  // these outlined functions.
2486  if (MI.modifiesRegister(RISCV::X5, TRI) ||
2487      MI.getDesc().hasImplicitDefOfPhysReg(RISCV::X5))
2488    return outliner::InstrType::Illegal;
2489
2490  // Make sure the operands don't reference something unsafe.
2491  for (const auto &MO : MI.operands()) {
2492
2493    // pcrel-hi and pcrel-lo can't put in separate sections, filter that out
2494    // if any possible.
2495    if (MO.getTargetFlags() == RISCVII::MO_PCREL_LO &&
2496        (MI.getMF()->getTarget().getFunctionSections() || F.hasComdat() ||
2497         F.hasSection()))
2498      return outliner::InstrType::Illegal;
2499  }
2500
2501  return outliner::InstrType::Legal;
2502}
2503
2504void RISCVInstrInfo::buildOutlinedFrame(
2505    MachineBasicBlock &MBB, MachineFunction &MF,
2506    const outliner::OutlinedFunction &OF) const {
2507
2508  // Strip out any CFI instructions
2509  bool Changed = true;
2510  while (Changed) {
2511    Changed = false;
2512    auto I = MBB.begin();
2513    auto E = MBB.end();
2514    for (; I != E; ++I) {
2515      if (I->isCFIInstruction()) {
2516        I->removeFromParent();
2517        Changed = true;
2518        break;
2519      }
2520    }
2521  }
2522
2523  MBB.addLiveIn(RISCV::X5);
2524
2525  // Add in a return instruction to the end of the outlined frame.
2526  MBB.insert(MBB.end(), BuildMI(MF, DebugLoc(), get(RISCV::JALR))
2527      .addReg(RISCV::X0, RegState::Define)
2528      .addReg(RISCV::X5)
2529      .addImm(0));
2530}
2531
2532MachineBasicBlock::iterator RISCVInstrInfo::insertOutlinedCall(
2533    Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It,
2534    MachineFunction &MF, outliner::Candidate &C) const {
2535
2536  // Add in a call instruction to the outlined function at the given location.
2537  It = MBB.insert(It,
2538                  BuildMI(MF, DebugLoc(), get(RISCV::PseudoCALLReg), RISCV::X5)
2539                      .addGlobalAddress(M.getNamedValue(MF.getName()), 0,
2540                                        RISCVII::MO_CALL));
2541  return It;
2542}
2543
2544std::optional<RegImmPair> RISCVInstrInfo::isAddImmediate(const MachineInstr &MI,
2545                                                         Register Reg) const {
2546  // TODO: Handle cases where Reg is a super- or sub-register of the
2547  // destination register.
2548  const MachineOperand &Op0 = MI.getOperand(0);
2549  if (!Op0.isReg() || Reg != Op0.getReg())
2550    return std::nullopt;
2551
2552  // Don't consider ADDIW as a candidate because the caller may not be aware
2553  // of its sign extension behaviour.
2554  if (MI.getOpcode() == RISCV::ADDI && MI.getOperand(1).isReg() &&
2555      MI.getOperand(2).isImm())
2556    return RegImmPair{MI.getOperand(1).getReg(), MI.getOperand(2).getImm()};
2557
2558  return std::nullopt;
2559}
2560
2561// MIR printer helper function to annotate Operands with a comment.
2562std::string RISCVInstrInfo::createMIROperandComment(
2563    const MachineInstr &MI, const MachineOperand &Op, unsigned OpIdx,
2564    const TargetRegisterInfo *TRI) const {
2565  // Print a generic comment for this operand if there is one.
2566  std::string GenericComment =
2567      TargetInstrInfo::createMIROperandComment(MI, Op, OpIdx, TRI);
2568  if (!GenericComment.empty())
2569    return GenericComment;
2570
2571  // If not, we must have an immediate operand.
2572  if (!Op.isImm())
2573    return std::string();
2574
2575  std::string Comment;
2576  raw_string_ostream OS(Comment);
2577
2578  uint64_t TSFlags = MI.getDesc().TSFlags;
2579
2580  // Print the full VType operand of vsetvli/vsetivli instructions, and the SEW
2581  // operand of vector codegen pseudos.
2582  if ((MI.getOpcode() == RISCV::VSETVLI || MI.getOpcode() == RISCV::VSETIVLI ||
2583       MI.getOpcode() == RISCV::PseudoVSETVLI ||
2584       MI.getOpcode() == RISCV::PseudoVSETIVLI ||
2585       MI.getOpcode() == RISCV::PseudoVSETVLIX0) &&
2586      OpIdx == 2) {
2587    unsigned Imm = MI.getOperand(OpIdx).getImm();
2588    RISCVVType::printVType(Imm, OS);
2589  } else if (RISCVII::hasSEWOp(TSFlags) &&
2590             OpIdx == RISCVII::getSEWOpNum(MI.getDesc())) {
2591    unsigned Log2SEW = MI.getOperand(OpIdx).getImm();
2592    unsigned SEW = Log2SEW ? 1 << Log2SEW : 8;
2593    assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW");
2594    OS << "e" << SEW;
2595  } else if (RISCVII::hasVecPolicyOp(TSFlags) &&
2596             OpIdx == RISCVII::getVecPolicyOpNum(MI.getDesc())) {
2597    unsigned Policy = MI.getOperand(OpIdx).getImm();
2598    assert(Policy <= (RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC) &&
2599           "Invalid Policy Value");
2600    OS << (Policy & RISCVII::TAIL_AGNOSTIC ? "ta" : "tu") << ", "
2601       << (Policy & RISCVII::MASK_AGNOSTIC ? "ma" : "mu");
2602  }
2603
2604  OS.flush();
2605  return Comment;
2606}
2607
2608// clang-format off
2609#define CASE_VFMA_OPCODE_COMMON(OP, TYPE, LMUL)                                \
2610  RISCV::PseudoV##OP##_##TYPE##_##LMUL
2611
2612#define CASE_VFMA_OPCODE_LMULS_M1(OP, TYPE)                                    \
2613  CASE_VFMA_OPCODE_COMMON(OP, TYPE, M1):                                       \
2614  case CASE_VFMA_OPCODE_COMMON(OP, TYPE, M2):                                  \
2615  case CASE_VFMA_OPCODE_COMMON(OP, TYPE, M4):                                  \
2616  case CASE_VFMA_OPCODE_COMMON(OP, TYPE, M8)
2617
2618#define CASE_VFMA_OPCODE_LMULS_MF2(OP, TYPE)                                   \
2619  CASE_VFMA_OPCODE_COMMON(OP, TYPE, MF2):                                      \
2620  case CASE_VFMA_OPCODE_LMULS_M1(OP, TYPE)
2621
2622#define CASE_VFMA_OPCODE_LMULS_MF4(OP, TYPE)                                   \
2623  CASE_VFMA_OPCODE_COMMON(OP, TYPE, MF4):                                      \
2624  case CASE_VFMA_OPCODE_LMULS_MF2(OP, TYPE)
2625
2626#define CASE_VFMA_OPCODE_LMULS(OP, TYPE)                                       \
2627  CASE_VFMA_OPCODE_COMMON(OP, TYPE, MF8):                                      \
2628  case CASE_VFMA_OPCODE_LMULS_MF4(OP, TYPE)
2629
2630#define CASE_VFMA_SPLATS(OP)                                                   \
2631  CASE_VFMA_OPCODE_LMULS_MF4(OP, VFPR16):                                      \
2632  case CASE_VFMA_OPCODE_LMULS_MF2(OP, VFPR32):                                 \
2633  case CASE_VFMA_OPCODE_LMULS_M1(OP, VFPR64)
2634// clang-format on
2635
2636bool RISCVInstrInfo::findCommutedOpIndices(const MachineInstr &MI,
2637                                           unsigned &SrcOpIdx1,
2638                                           unsigned &SrcOpIdx2) const {
2639  const MCInstrDesc &Desc = MI.getDesc();
2640  if (!Desc.isCommutable())
2641    return false;
2642
2643  switch (MI.getOpcode()) {
2644  case RISCV::TH_MVEQZ:
2645  case RISCV::TH_MVNEZ:
2646    // We can't commute operands if operand 2 (i.e., rs1 in
2647    // mveqz/mvnez rd,rs1,rs2) is the zero-register (as it is
2648    // not valid as the in/out-operand 1).
2649    if (MI.getOperand(2).getReg() == RISCV::X0)
2650      return false;
2651    // Operands 1 and 2 are commutable, if we switch the opcode.
2652    return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 1, 2);
2653  case RISCV::TH_MULA:
2654  case RISCV::TH_MULAW:
2655  case RISCV::TH_MULAH:
2656  case RISCV::TH_MULS:
2657  case RISCV::TH_MULSW:
2658  case RISCV::TH_MULSH:
2659    // Operands 2 and 3 are commutable.
2660    return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 2, 3);
2661  case RISCV::PseudoCCMOVGPRNoX0:
2662  case RISCV::PseudoCCMOVGPR:
2663    // Operands 4 and 5 are commutable.
2664    return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 4, 5);
2665  case CASE_VFMA_SPLATS(FMADD):
2666  case CASE_VFMA_SPLATS(FMSUB):
2667  case CASE_VFMA_SPLATS(FMACC):
2668  case CASE_VFMA_SPLATS(FMSAC):
2669  case CASE_VFMA_SPLATS(FNMADD):
2670  case CASE_VFMA_SPLATS(FNMSUB):
2671  case CASE_VFMA_SPLATS(FNMACC):
2672  case CASE_VFMA_SPLATS(FNMSAC):
2673  case CASE_VFMA_OPCODE_LMULS_MF4(FMACC, VV):
2674  case CASE_VFMA_OPCODE_LMULS_MF4(FMSAC, VV):
2675  case CASE_VFMA_OPCODE_LMULS_MF4(FNMACC, VV):
2676  case CASE_VFMA_OPCODE_LMULS_MF4(FNMSAC, VV):
2677  case CASE_VFMA_OPCODE_LMULS(MADD, VX):
2678  case CASE_VFMA_OPCODE_LMULS(NMSUB, VX):
2679  case CASE_VFMA_OPCODE_LMULS(MACC, VX):
2680  case CASE_VFMA_OPCODE_LMULS(NMSAC, VX):
2681  case CASE_VFMA_OPCODE_LMULS(MACC, VV):
2682  case CASE_VFMA_OPCODE_LMULS(NMSAC, VV): {
2683    // If the tail policy is undisturbed we can't commute.
2684    assert(RISCVII::hasVecPolicyOp(MI.getDesc().TSFlags));
2685    if ((MI.getOperand(MI.getNumExplicitOperands() - 1).getImm() & 1) == 0)
2686      return false;
2687
2688    // For these instructions we can only swap operand 1 and operand 3 by
2689    // changing the opcode.
2690    unsigned CommutableOpIdx1 = 1;
2691    unsigned CommutableOpIdx2 = 3;
2692    if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, CommutableOpIdx1,
2693                              CommutableOpIdx2))
2694      return false;
2695    return true;
2696  }
2697  case CASE_VFMA_OPCODE_LMULS_MF4(FMADD, VV):
2698  case CASE_VFMA_OPCODE_LMULS_MF4(FMSUB, VV):
2699  case CASE_VFMA_OPCODE_LMULS_MF4(FNMADD, VV):
2700  case CASE_VFMA_OPCODE_LMULS_MF4(FNMSUB, VV):
2701  case CASE_VFMA_OPCODE_LMULS(MADD, VV):
2702  case CASE_VFMA_OPCODE_LMULS(NMSUB, VV): {
2703    // If the tail policy is undisturbed we can't commute.
2704    assert(RISCVII::hasVecPolicyOp(MI.getDesc().TSFlags));
2705    if ((MI.getOperand(MI.getNumExplicitOperands() - 1).getImm() & 1) == 0)
2706      return false;
2707
2708    // For these instructions we have more freedom. We can commute with the
2709    // other multiplicand or with the addend/subtrahend/minuend.
2710
2711    // Any fixed operand must be from source 1, 2 or 3.
2712    if (SrcOpIdx1 != CommuteAnyOperandIndex && SrcOpIdx1 > 3)
2713      return false;
2714    if (SrcOpIdx2 != CommuteAnyOperandIndex && SrcOpIdx2 > 3)
2715      return false;
2716
2717    // It both ops are fixed one must be the tied source.
2718    if (SrcOpIdx1 != CommuteAnyOperandIndex &&
2719        SrcOpIdx2 != CommuteAnyOperandIndex && SrcOpIdx1 != 1 && SrcOpIdx2 != 1)
2720      return false;
2721
2722    // Look for two different register operands assumed to be commutable
2723    // regardless of the FMA opcode. The FMA opcode is adjusted later if
2724    // needed.
2725    if (SrcOpIdx1 == CommuteAnyOperandIndex ||
2726        SrcOpIdx2 == CommuteAnyOperandIndex) {
2727      // At least one of operands to be commuted is not specified and
2728      // this method is free to choose appropriate commutable operands.
2729      unsigned CommutableOpIdx1 = SrcOpIdx1;
2730      if (SrcOpIdx1 == SrcOpIdx2) {
2731        // Both of operands are not fixed. Set one of commutable
2732        // operands to the tied source.
2733        CommutableOpIdx1 = 1;
2734      } else if (SrcOpIdx1 == CommuteAnyOperandIndex) {
2735        // Only one of the operands is not fixed.
2736        CommutableOpIdx1 = SrcOpIdx2;
2737      }
2738
2739      // CommutableOpIdx1 is well defined now. Let's choose another commutable
2740      // operand and assign its index to CommutableOpIdx2.
2741      unsigned CommutableOpIdx2;
2742      if (CommutableOpIdx1 != 1) {
2743        // If we haven't already used the tied source, we must use it now.
2744        CommutableOpIdx2 = 1;
2745      } else {
2746        Register Op1Reg = MI.getOperand(CommutableOpIdx1).getReg();
2747
2748        // The commuted operands should have different registers.
2749        // Otherwise, the commute transformation does not change anything and
2750        // is useless. We use this as a hint to make our decision.
2751        if (Op1Reg != MI.getOperand(2).getReg())
2752          CommutableOpIdx2 = 2;
2753        else
2754          CommutableOpIdx2 = 3;
2755      }
2756
2757      // Assign the found pair of commutable indices to SrcOpIdx1 and
2758      // SrcOpIdx2 to return those values.
2759      if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, CommutableOpIdx1,
2760                                CommutableOpIdx2))
2761        return false;
2762    }
2763
2764    return true;
2765  }
2766  }
2767
2768  return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
2769}
2770
2771#define CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, LMUL)               \
2772  case RISCV::PseudoV##OLDOP##_##TYPE##_##LMUL:                                \
2773    Opc = RISCV::PseudoV##NEWOP##_##TYPE##_##LMUL;                             \
2774    break;
2775
2776#define CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, TYPE)                   \
2777  CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M1)                       \
2778  CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M2)                       \
2779  CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M4)                       \
2780  CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M8)
2781
2782#define CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, TYPE)                  \
2783  CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF2)                      \
2784  CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, TYPE)
2785
2786#define CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, TYPE)                  \
2787  CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF4)                      \
2788  CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, TYPE)
2789
2790#define CASE_VFMA_CHANGE_OPCODE_LMULS(OLDOP, NEWOP, TYPE)                      \
2791  CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF8)                      \
2792  CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, TYPE)
2793
2794#define CASE_VFMA_CHANGE_OPCODE_SPLATS(OLDOP, NEWOP)                           \
2795  CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, VFPR16)                      \
2796  CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, VFPR32)                      \
2797  CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, VFPR64)
2798
2799MachineInstr *RISCVInstrInfo::commuteInstructionImpl(MachineInstr &MI,
2800                                                     bool NewMI,
2801                                                     unsigned OpIdx1,
2802                                                     unsigned OpIdx2) const {
2803  auto cloneIfNew = [NewMI](MachineInstr &MI) -> MachineInstr & {
2804    if (NewMI)
2805      return *MI.getParent()->getParent()->CloneMachineInstr(&MI);
2806    return MI;
2807  };
2808
2809  switch (MI.getOpcode()) {
2810  case RISCV::TH_MVEQZ:
2811  case RISCV::TH_MVNEZ: {
2812    auto &WorkingMI = cloneIfNew(MI);
2813    WorkingMI.setDesc(get(MI.getOpcode() == RISCV::TH_MVEQZ ? RISCV::TH_MVNEZ
2814                                                            : RISCV::TH_MVEQZ));
2815    return TargetInstrInfo::commuteInstructionImpl(WorkingMI, false, OpIdx1,
2816                                                   OpIdx2);
2817  }
2818  case RISCV::PseudoCCMOVGPRNoX0:
2819  case RISCV::PseudoCCMOVGPR: {
2820    // CCMOV can be commuted by inverting the condition.
2821    auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());
2822    CC = RISCVCC::getOppositeBranchCondition(CC);
2823    auto &WorkingMI = cloneIfNew(MI);
2824    WorkingMI.getOperand(3).setImm(CC);
2825    return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI*/ false,
2826                                                   OpIdx1, OpIdx2);
2827  }
2828  case CASE_VFMA_SPLATS(FMACC):
2829  case CASE_VFMA_SPLATS(FMADD):
2830  case CASE_VFMA_SPLATS(FMSAC):
2831  case CASE_VFMA_SPLATS(FMSUB):
2832  case CASE_VFMA_SPLATS(FNMACC):
2833  case CASE_VFMA_SPLATS(FNMADD):
2834  case CASE_VFMA_SPLATS(FNMSAC):
2835  case CASE_VFMA_SPLATS(FNMSUB):
2836  case CASE_VFMA_OPCODE_LMULS_MF4(FMACC, VV):
2837  case CASE_VFMA_OPCODE_LMULS_MF4(FMSAC, VV):
2838  case CASE_VFMA_OPCODE_LMULS_MF4(FNMACC, VV):
2839  case CASE_VFMA_OPCODE_LMULS_MF4(FNMSAC, VV):
2840  case CASE_VFMA_OPCODE_LMULS(MADD, VX):
2841  case CASE_VFMA_OPCODE_LMULS(NMSUB, VX):
2842  case CASE_VFMA_OPCODE_LMULS(MACC, VX):
2843  case CASE_VFMA_OPCODE_LMULS(NMSAC, VX):
2844  case CASE_VFMA_OPCODE_LMULS(MACC, VV):
2845  case CASE_VFMA_OPCODE_LMULS(NMSAC, VV): {
2846    // It only make sense to toggle these between clobbering the
2847    // addend/subtrahend/minuend one of the multiplicands.
2848    assert((OpIdx1 == 1 || OpIdx2 == 1) && "Unexpected opcode index");
2849    assert((OpIdx1 == 3 || OpIdx2 == 3) && "Unexpected opcode index");
2850    unsigned Opc;
2851    switch (MI.getOpcode()) {
2852      default:
2853        llvm_unreachable("Unexpected opcode");
2854      CASE_VFMA_CHANGE_OPCODE_SPLATS(FMACC, FMADD)
2855      CASE_VFMA_CHANGE_OPCODE_SPLATS(FMADD, FMACC)
2856      CASE_VFMA_CHANGE_OPCODE_SPLATS(FMSAC, FMSUB)
2857      CASE_VFMA_CHANGE_OPCODE_SPLATS(FMSUB, FMSAC)
2858      CASE_VFMA_CHANGE_OPCODE_SPLATS(FNMACC, FNMADD)
2859      CASE_VFMA_CHANGE_OPCODE_SPLATS(FNMADD, FNMACC)
2860      CASE_VFMA_CHANGE_OPCODE_SPLATS(FNMSAC, FNMSUB)
2861      CASE_VFMA_CHANGE_OPCODE_SPLATS(FNMSUB, FNMSAC)
2862      CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(FMACC, FMADD, VV)
2863      CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(FMSAC, FMSUB, VV)
2864      CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(FNMACC, FNMADD, VV)
2865      CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(FNMSAC, FNMSUB, VV)
2866      CASE_VFMA_CHANGE_OPCODE_LMULS(MACC, MADD, VX)
2867      CASE_VFMA_CHANGE_OPCODE_LMULS(MADD, MACC, VX)
2868      CASE_VFMA_CHANGE_OPCODE_LMULS(NMSAC, NMSUB, VX)
2869      CASE_VFMA_CHANGE_OPCODE_LMULS(NMSUB, NMSAC, VX)
2870      CASE_VFMA_CHANGE_OPCODE_LMULS(MACC, MADD, VV)
2871      CASE_VFMA_CHANGE_OPCODE_LMULS(NMSAC, NMSUB, VV)
2872    }
2873
2874    auto &WorkingMI = cloneIfNew(MI);
2875    WorkingMI.setDesc(get(Opc));
2876    return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
2877                                                   OpIdx1, OpIdx2);
2878  }
2879  case CASE_VFMA_OPCODE_LMULS_MF4(FMADD, VV):
2880  case CASE_VFMA_OPCODE_LMULS_MF4(FMSUB, VV):
2881  case CASE_VFMA_OPCODE_LMULS_MF4(FNMADD, VV):
2882  case CASE_VFMA_OPCODE_LMULS_MF4(FNMSUB, VV):
2883  case CASE_VFMA_OPCODE_LMULS(MADD, VV):
2884  case CASE_VFMA_OPCODE_LMULS(NMSUB, VV): {
2885    assert((OpIdx1 == 1 || OpIdx2 == 1) && "Unexpected opcode index");
2886    // If one of the operands, is the addend we need to change opcode.
2887    // Otherwise we're just swapping 2 of the multiplicands.
2888    if (OpIdx1 == 3 || OpIdx2 == 3) {
2889      unsigned Opc;
2890      switch (MI.getOpcode()) {
2891        default:
2892          llvm_unreachable("Unexpected opcode");
2893        CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(FMADD, FMACC, VV)
2894        CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(FMSUB, FMSAC, VV)
2895        CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(FNMADD, FNMACC, VV)
2896        CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(FNMSUB, FNMSAC, VV)
2897        CASE_VFMA_CHANGE_OPCODE_LMULS(MADD, MACC, VV)
2898        CASE_VFMA_CHANGE_OPCODE_LMULS(NMSUB, NMSAC, VV)
2899      }
2900
2901      auto &WorkingMI = cloneIfNew(MI);
2902      WorkingMI.setDesc(get(Opc));
2903      return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
2904                                                     OpIdx1, OpIdx2);
2905    }
2906    // Let the default code handle it.
2907    break;
2908  }
2909  }
2910
2911  return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
2912}
2913
2914#undef CASE_VFMA_CHANGE_OPCODE_SPLATS
2915#undef CASE_VFMA_CHANGE_OPCODE_LMULS
2916#undef CASE_VFMA_CHANGE_OPCODE_COMMON
2917#undef CASE_VFMA_SPLATS
2918#undef CASE_VFMA_OPCODE_LMULS
2919#undef CASE_VFMA_OPCODE_COMMON
2920
2921// clang-format off
2922#define CASE_WIDEOP_OPCODE_COMMON(OP, LMUL)                                    \
2923  RISCV::PseudoV##OP##_##LMUL##_TIED
2924
2925#define CASE_WIDEOP_OPCODE_LMULS_MF4(OP)                                       \
2926  CASE_WIDEOP_OPCODE_COMMON(OP, MF4):                                          \
2927  case CASE_WIDEOP_OPCODE_COMMON(OP, MF2):                                     \
2928  case CASE_WIDEOP_OPCODE_COMMON(OP, M1):                                      \
2929  case CASE_WIDEOP_OPCODE_COMMON(OP, M2):                                      \
2930  case CASE_WIDEOP_OPCODE_COMMON(OP, M4)
2931
2932#define CASE_WIDEOP_OPCODE_LMULS(OP)                                           \
2933  CASE_WIDEOP_OPCODE_COMMON(OP, MF8):                                          \
2934  case CASE_WIDEOP_OPCODE_LMULS_MF4(OP)
2935// clang-format on
2936
2937#define CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, LMUL)                             \
2938  case RISCV::PseudoV##OP##_##LMUL##_TIED:                                     \
2939    NewOpc = RISCV::PseudoV##OP##_##LMUL;                                      \
2940    break;
2941
2942#define CASE_WIDEOP_CHANGE_OPCODE_LMULS_MF4(OP)                                 \
2943  CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF4)                                    \
2944  CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF2)                                    \
2945  CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, M1)                                     \
2946  CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, M2)                                     \
2947  CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, M4)
2948
2949#define CASE_WIDEOP_CHANGE_OPCODE_LMULS(OP)                                    \
2950  CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF8)                                    \
2951  CASE_WIDEOP_CHANGE_OPCODE_LMULS_MF4(OP)
2952
2953MachineInstr *RISCVInstrInfo::convertToThreeAddress(MachineInstr &MI,
2954                                                    LiveVariables *LV,
2955                                                    LiveIntervals *LIS) const {
2956  MachineInstrBuilder MIB;
2957  switch (MI.getOpcode()) {
2958  default:
2959    return nullptr;
2960  case CASE_WIDEOP_OPCODE_LMULS_MF4(FWADD_WV):
2961  case CASE_WIDEOP_OPCODE_LMULS_MF4(FWSUB_WV): {
2962    assert(RISCVII::hasVecPolicyOp(MI.getDesc().TSFlags) &&
2963           MI.getNumExplicitOperands() == 7 &&
2964           "Expect 7 explicit operands rd, rs2, rs1, rm, vl, sew, policy");
2965    // If the tail policy is undisturbed we can't convert.
2966    if ((MI.getOperand(RISCVII::getVecPolicyOpNum(MI.getDesc())).getImm() &
2967         1) == 0)
2968      return nullptr;
2969    // clang-format off
2970    unsigned NewOpc;
2971    switch (MI.getOpcode()) {
2972    default:
2973      llvm_unreachable("Unexpected opcode");
2974    CASE_WIDEOP_CHANGE_OPCODE_LMULS_MF4(FWADD_WV)
2975    CASE_WIDEOP_CHANGE_OPCODE_LMULS_MF4(FWSUB_WV)
2976    }
2977    // clang-format on
2978
2979    MachineBasicBlock &MBB = *MI.getParent();
2980    MIB = BuildMI(MBB, MI, MI.getDebugLoc(), get(NewOpc))
2981              .add(MI.getOperand(0))
2982              .addReg(MI.getOperand(0).getReg(), RegState::Undef)
2983              .add(MI.getOperand(1))
2984              .add(MI.getOperand(2))
2985              .add(MI.getOperand(3))
2986              .add(MI.getOperand(4))
2987              .add(MI.getOperand(5))
2988              .add(MI.getOperand(6));
2989    break;
2990  }
2991  case CASE_WIDEOP_OPCODE_LMULS(WADD_WV):
2992  case CASE_WIDEOP_OPCODE_LMULS(WADDU_WV):
2993  case CASE_WIDEOP_OPCODE_LMULS(WSUB_WV):
2994  case CASE_WIDEOP_OPCODE_LMULS(WSUBU_WV): {
2995    // If the tail policy is undisturbed we can't convert.
2996    assert(RISCVII::hasVecPolicyOp(MI.getDesc().TSFlags) &&
2997           MI.getNumExplicitOperands() == 6);
2998    if ((MI.getOperand(5).getImm() & 1) == 0)
2999      return nullptr;
3000
3001    // clang-format off
3002    unsigned NewOpc;
3003    switch (MI.getOpcode()) {
3004    default:
3005      llvm_unreachable("Unexpected opcode");
3006    CASE_WIDEOP_CHANGE_OPCODE_LMULS(WADD_WV)
3007    CASE_WIDEOP_CHANGE_OPCODE_LMULS(WADDU_WV)
3008    CASE_WIDEOP_CHANGE_OPCODE_LMULS(WSUB_WV)
3009    CASE_WIDEOP_CHANGE_OPCODE_LMULS(WSUBU_WV)
3010    }
3011    // clang-format on
3012
3013    MachineBasicBlock &MBB = *MI.getParent();
3014    MIB = BuildMI(MBB, MI, MI.getDebugLoc(), get(NewOpc))
3015              .add(MI.getOperand(0))
3016              .addReg(MI.getOperand(0).getReg(), RegState::Undef)
3017              .add(MI.getOperand(1))
3018              .add(MI.getOperand(2))
3019              .add(MI.getOperand(3))
3020              .add(MI.getOperand(4))
3021              .add(MI.getOperand(5));
3022    break;
3023  }
3024  }
3025  MIB.copyImplicitOps(MI);
3026
3027  if (LV) {
3028    unsigned NumOps = MI.getNumOperands();
3029    for (unsigned I = 1; I < NumOps; ++I) {
3030      MachineOperand &Op = MI.getOperand(I);
3031      if (Op.isReg() && Op.isKill())
3032        LV->replaceKillInstruction(Op.getReg(), MI, *MIB);
3033    }
3034  }
3035
3036  if (LIS) {
3037    SlotIndex Idx = LIS->ReplaceMachineInstrInMaps(MI, *MIB);
3038
3039    if (MI.getOperand(0).isEarlyClobber()) {
3040      // Use operand 1 was tied to early-clobber def operand 0, so its live
3041      // interval could have ended at an early-clobber slot. Now they are not
3042      // tied we need to update it to the normal register slot.
3043      LiveInterval &LI = LIS->getInterval(MI.getOperand(1).getReg());
3044      LiveRange::Segment *S = LI.getSegmentContaining(Idx);
3045      if (S->end == Idx.getRegSlot(true))
3046        S->end = Idx.getRegSlot();
3047    }
3048  }
3049
3050  return MIB;
3051}
3052
3053#undef CASE_WIDEOP_CHANGE_OPCODE_LMULS
3054#undef CASE_WIDEOP_CHANGE_OPCODE_COMMON
3055#undef CASE_WIDEOP_OPCODE_LMULS
3056#undef CASE_WIDEOP_OPCODE_COMMON
3057
3058void RISCVInstrInfo::getVLENFactoredAmount(MachineFunction &MF,
3059                                           MachineBasicBlock &MBB,
3060                                           MachineBasicBlock::iterator II,
3061                                           const DebugLoc &DL, Register DestReg,
3062                                           int64_t Amount,
3063                                           MachineInstr::MIFlag Flag) const {
3064  assert(Amount > 0 && "There is no need to get VLEN scaled value.");
3065  assert(Amount % 8 == 0 &&
3066         "Reserve the stack by the multiple of one vector size.");
3067
3068  MachineRegisterInfo &MRI = MF.getRegInfo();
3069  int64_t NumOfVReg = Amount / 8;
3070
3071  BuildMI(MBB, II, DL, get(RISCV::PseudoReadVLENB), DestReg).setMIFlag(Flag);
3072  assert(isInt<32>(NumOfVReg) &&
3073         "Expect the number of vector registers within 32-bits.");
3074  if (llvm::has_single_bit<uint32_t>(NumOfVReg)) {
3075    uint32_t ShiftAmount = Log2_32(NumOfVReg);
3076    if (ShiftAmount == 0)
3077      return;
3078    BuildMI(MBB, II, DL, get(RISCV::SLLI), DestReg)
3079        .addReg(DestReg, RegState::Kill)
3080        .addImm(ShiftAmount)
3081        .setMIFlag(Flag);
3082  } else if (STI.hasStdExtZba() &&
3083             ((NumOfVReg % 3 == 0 && isPowerOf2_64(NumOfVReg / 3)) ||
3084              (NumOfVReg % 5 == 0 && isPowerOf2_64(NumOfVReg / 5)) ||
3085              (NumOfVReg % 9 == 0 && isPowerOf2_64(NumOfVReg / 9)))) {
3086    // We can use Zba SHXADD+SLLI instructions for multiply in some cases.
3087    unsigned Opc;
3088    uint32_t ShiftAmount;
3089    if (NumOfVReg % 9 == 0) {
3090      Opc = RISCV::SH3ADD;
3091      ShiftAmount = Log2_64(NumOfVReg / 9);
3092    } else if (NumOfVReg % 5 == 0) {
3093      Opc = RISCV::SH2ADD;
3094      ShiftAmount = Log2_64(NumOfVReg / 5);
3095    } else if (NumOfVReg % 3 == 0) {
3096      Opc = RISCV::SH1ADD;
3097      ShiftAmount = Log2_64(NumOfVReg / 3);
3098    } else {
3099      llvm_unreachable("Unexpected number of vregs");
3100    }
3101    if (ShiftAmount)
3102      BuildMI(MBB, II, DL, get(RISCV::SLLI), DestReg)
3103          .addReg(DestReg, RegState::Kill)
3104          .addImm(ShiftAmount)
3105          .setMIFlag(Flag);
3106    BuildMI(MBB, II, DL, get(Opc), DestReg)
3107        .addReg(DestReg, RegState::Kill)
3108        .addReg(DestReg)
3109        .setMIFlag(Flag);
3110  } else if (llvm::has_single_bit<uint32_t>(NumOfVReg - 1)) {
3111    Register ScaledRegister = MRI.createVirtualRegister(&RISCV::GPRRegClass);
3112    uint32_t ShiftAmount = Log2_32(NumOfVReg - 1);
3113    BuildMI(MBB, II, DL, get(RISCV::SLLI), ScaledRegister)
3114        .addReg(DestReg)
3115        .addImm(ShiftAmount)
3116        .setMIFlag(Flag);
3117    BuildMI(MBB, II, DL, get(RISCV::ADD), DestReg)
3118        .addReg(ScaledRegister, RegState::Kill)
3119        .addReg(DestReg, RegState::Kill)
3120        .setMIFlag(Flag);
3121  } else if (llvm::has_single_bit<uint32_t>(NumOfVReg + 1)) {
3122    Register ScaledRegister = MRI.createVirtualRegister(&RISCV::GPRRegClass);
3123    uint32_t ShiftAmount = Log2_32(NumOfVReg + 1);
3124    BuildMI(MBB, II, DL, get(RISCV::SLLI), ScaledRegister)
3125        .addReg(DestReg)
3126        .addImm(ShiftAmount)
3127        .setMIFlag(Flag);
3128    BuildMI(MBB, II, DL, get(RISCV::SUB), DestReg)
3129        .addReg(ScaledRegister, RegState::Kill)
3130        .addReg(DestReg, RegState::Kill)
3131        .setMIFlag(Flag);
3132  } else if (STI.hasStdExtM() || STI.hasStdExtZmmul()) {
3133    Register N = MRI.createVirtualRegister(&RISCV::GPRRegClass);
3134    movImm(MBB, II, DL, N, NumOfVReg, Flag);
3135    BuildMI(MBB, II, DL, get(RISCV::MUL), DestReg)
3136        .addReg(DestReg, RegState::Kill)
3137        .addReg(N, RegState::Kill)
3138        .setMIFlag(Flag);
3139  } else {
3140    Register Acc = MRI.createVirtualRegister(&RISCV::GPRRegClass);
3141    BuildMI(MBB, II, DL, get(RISCV::ADDI), Acc)
3142        .addReg(RISCV::X0)
3143        .addImm(0)
3144        .setMIFlag(Flag);
3145    uint32_t PrevShiftAmount = 0;
3146    for (uint32_t ShiftAmount = 0; NumOfVReg >> ShiftAmount; ShiftAmount++) {
3147      if (NumOfVReg & (1LL << ShiftAmount)) {
3148        if (ShiftAmount)
3149          BuildMI(MBB, II, DL, get(RISCV::SLLI), DestReg)
3150              .addReg(DestReg, RegState::Kill)
3151              .addImm(ShiftAmount - PrevShiftAmount)
3152              .setMIFlag(Flag);
3153        if (NumOfVReg >> (ShiftAmount + 1))
3154          BuildMI(MBB, II, DL, get(RISCV::ADD), Acc)
3155              .addReg(Acc, RegState::Kill)
3156              .addReg(DestReg)
3157              .setMIFlag(Flag);
3158        PrevShiftAmount = ShiftAmount;
3159      }
3160    }
3161    BuildMI(MBB, II, DL, get(RISCV::ADD), DestReg)
3162        .addReg(DestReg, RegState::Kill)
3163        .addReg(Acc)
3164        .setMIFlag(Flag);
3165  }
3166}
3167
3168ArrayRef<std::pair<MachineMemOperand::Flags, const char *>>
3169RISCVInstrInfo::getSerializableMachineMemOperandTargetFlags() const {
3170  static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
3171      {{MONontemporalBit0, "riscv-nontemporal-domain-bit-0"},
3172       {MONontemporalBit1, "riscv-nontemporal-domain-bit-1"}};
3173  return ArrayRef(TargetFlags);
3174}
3175
3176// Returns true if this is the sext.w pattern, addiw rd, rs1, 0.
3177bool RISCV::isSEXT_W(const MachineInstr &MI) {
3178  return MI.getOpcode() == RISCV::ADDIW && MI.getOperand(1).isReg() &&
3179         MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0;
3180}
3181
3182// Returns true if this is the zext.w pattern, adduw rd, rs1, x0.
3183bool RISCV::isZEXT_W(const MachineInstr &MI) {
3184  return MI.getOpcode() == RISCV::ADD_UW && MI.getOperand(1).isReg() &&
3185         MI.getOperand(2).isReg() && MI.getOperand(2).getReg() == RISCV::X0;
3186}
3187
3188// Returns true if this is the zext.b pattern, andi rd, rs1, 255.
3189bool RISCV::isZEXT_B(const MachineInstr &MI) {
3190  return MI.getOpcode() == RISCV::ANDI && MI.getOperand(1).isReg() &&
3191         MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 255;
3192}
3193
3194static bool isRVVWholeLoadStore(unsigned Opcode) {
3195  switch (Opcode) {
3196  default:
3197    return false;
3198  case RISCV::VS1R_V:
3199  case RISCV::VS2R_V:
3200  case RISCV::VS4R_V:
3201  case RISCV::VS8R_V:
3202  case RISCV::VL1RE8_V:
3203  case RISCV::VL2RE8_V:
3204  case RISCV::VL4RE8_V:
3205  case RISCV::VL8RE8_V:
3206  case RISCV::VL1RE16_V:
3207  case RISCV::VL2RE16_V:
3208  case RISCV::VL4RE16_V:
3209  case RISCV::VL8RE16_V:
3210  case RISCV::VL1RE32_V:
3211  case RISCV::VL2RE32_V:
3212  case RISCV::VL4RE32_V:
3213  case RISCV::VL8RE32_V:
3214  case RISCV::VL1RE64_V:
3215  case RISCV::VL2RE64_V:
3216  case RISCV::VL4RE64_V:
3217  case RISCV::VL8RE64_V:
3218    return true;
3219  }
3220}
3221
3222bool RISCV::isRVVSpill(const MachineInstr &MI) {
3223  // RVV lacks any support for immediate addressing for stack addresses, so be
3224  // conservative.
3225  unsigned Opcode = MI.getOpcode();
3226  if (!RISCVVPseudosTable::getPseudoInfo(Opcode) &&
3227      !isRVVWholeLoadStore(Opcode) && !isRVVSpillForZvlsseg(Opcode))
3228    return false;
3229  return true;
3230}
3231
3232std::optional<std::pair<unsigned, unsigned>>
3233RISCV::isRVVSpillForZvlsseg(unsigned Opcode) {
3234  switch (Opcode) {
3235  default:
3236    return std::nullopt;
3237  case RISCV::PseudoVSPILL2_M1:
3238  case RISCV::PseudoVRELOAD2_M1:
3239    return std::make_pair(2u, 1u);
3240  case RISCV::PseudoVSPILL2_M2:
3241  case RISCV::PseudoVRELOAD2_M2:
3242    return std::make_pair(2u, 2u);
3243  case RISCV::PseudoVSPILL2_M4:
3244  case RISCV::PseudoVRELOAD2_M4:
3245    return std::make_pair(2u, 4u);
3246  case RISCV::PseudoVSPILL3_M1:
3247  case RISCV::PseudoVRELOAD3_M1:
3248    return std::make_pair(3u, 1u);
3249  case RISCV::PseudoVSPILL3_M2:
3250  case RISCV::PseudoVRELOAD3_M2:
3251    return std::make_pair(3u, 2u);
3252  case RISCV::PseudoVSPILL4_M1:
3253  case RISCV::PseudoVRELOAD4_M1:
3254    return std::make_pair(4u, 1u);
3255  case RISCV::PseudoVSPILL4_M2:
3256  case RISCV::PseudoVRELOAD4_M2:
3257    return std::make_pair(4u, 2u);
3258  case RISCV::PseudoVSPILL5_M1:
3259  case RISCV::PseudoVRELOAD5_M1:
3260    return std::make_pair(5u, 1u);
3261  case RISCV::PseudoVSPILL6_M1:
3262  case RISCV::PseudoVRELOAD6_M1:
3263    return std::make_pair(6u, 1u);
3264  case RISCV::PseudoVSPILL7_M1:
3265  case RISCV::PseudoVRELOAD7_M1:
3266    return std::make_pair(7u, 1u);
3267  case RISCV::PseudoVSPILL8_M1:
3268  case RISCV::PseudoVRELOAD8_M1:
3269    return std::make_pair(8u, 1u);
3270  }
3271}
3272
3273bool RISCV::isFaultFirstLoad(const MachineInstr &MI) {
3274  return MI.getNumExplicitDefs() == 2 && MI.modifiesRegister(RISCV::VL) &&
3275         !MI.isInlineAsm();
3276}
3277
3278bool RISCV::hasEqualFRM(const MachineInstr &MI1, const MachineInstr &MI2) {
3279  int16_t MI1FrmOpIdx =
3280      RISCV::getNamedOperandIdx(MI1.getOpcode(), RISCV::OpName::frm);
3281  int16_t MI2FrmOpIdx =
3282      RISCV::getNamedOperandIdx(MI2.getOpcode(), RISCV::OpName::frm);
3283  if (MI1FrmOpIdx < 0 || MI2FrmOpIdx < 0)
3284    return false;
3285  MachineOperand FrmOp1 = MI1.getOperand(MI1FrmOpIdx);
3286  MachineOperand FrmOp2 = MI2.getOperand(MI2FrmOpIdx);
3287  return FrmOp1.getImm() == FrmOp2.getImm();
3288}
3289
3290std::optional<unsigned>
3291RISCV::getVectorLowDemandedScalarBits(uint16_t Opcode, unsigned Log2SEW) {
3292  // TODO: Handle Zvbb instructions
3293  switch (Opcode) {
3294  default:
3295    return std::nullopt;
3296
3297  // 11.6. Vector Single-Width Shift Instructions
3298  case RISCV::VSLL_VX:
3299  case RISCV::VSRL_VX:
3300  case RISCV::VSRA_VX:
3301  // 12.4. Vector Single-Width Scaling Shift Instructions
3302  case RISCV::VSSRL_VX:
3303  case RISCV::VSSRA_VX:
3304    // Only the low lg2(SEW) bits of the shift-amount value are used.
3305    return Log2SEW;
3306
3307  // 11.7 Vector Narrowing Integer Right Shift Instructions
3308  case RISCV::VNSRL_WX:
3309  case RISCV::VNSRA_WX:
3310  // 12.5. Vector Narrowing Fixed-Point Clip Instructions
3311  case RISCV::VNCLIPU_WX:
3312  case RISCV::VNCLIP_WX:
3313    // Only the low lg2(2*SEW) bits of the shift-amount value are used.
3314    return Log2SEW + 1;
3315
3316  // 11.1. Vector Single-Width Integer Add and Subtract
3317  case RISCV::VADD_VX:
3318  case RISCV::VSUB_VX:
3319  case RISCV::VRSUB_VX:
3320  // 11.2. Vector Widening Integer Add/Subtract
3321  case RISCV::VWADDU_VX:
3322  case RISCV::VWSUBU_VX:
3323  case RISCV::VWADD_VX:
3324  case RISCV::VWSUB_VX:
3325  case RISCV::VWADDU_WX:
3326  case RISCV::VWSUBU_WX:
3327  case RISCV::VWADD_WX:
3328  case RISCV::VWSUB_WX:
3329  // 11.4. Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions
3330  case RISCV::VADC_VXM:
3331  case RISCV::VADC_VIM:
3332  case RISCV::VMADC_VXM:
3333  case RISCV::VMADC_VIM:
3334  case RISCV::VMADC_VX:
3335  case RISCV::VSBC_VXM:
3336  case RISCV::VMSBC_VXM:
3337  case RISCV::VMSBC_VX:
3338  // 11.5 Vector Bitwise Logical Instructions
3339  case RISCV::VAND_VX:
3340  case RISCV::VOR_VX:
3341  case RISCV::VXOR_VX:
3342  // 11.8. Vector Integer Compare Instructions
3343  case RISCV::VMSEQ_VX:
3344  case RISCV::VMSNE_VX:
3345  case RISCV::VMSLTU_VX:
3346  case RISCV::VMSLT_VX:
3347  case RISCV::VMSLEU_VX:
3348  case RISCV::VMSLE_VX:
3349  case RISCV::VMSGTU_VX:
3350  case RISCV::VMSGT_VX:
3351  // 11.9. Vector Integer Min/Max Instructions
3352  case RISCV::VMINU_VX:
3353  case RISCV::VMIN_VX:
3354  case RISCV::VMAXU_VX:
3355  case RISCV::VMAX_VX:
3356  // 11.10. Vector Single-Width Integer Multiply Instructions
3357  case RISCV::VMUL_VX:
3358  case RISCV::VMULH_VX:
3359  case RISCV::VMULHU_VX:
3360  case RISCV::VMULHSU_VX:
3361  // 11.11. Vector Integer Divide Instructions
3362  case RISCV::VDIVU_VX:
3363  case RISCV::VDIV_VX:
3364  case RISCV::VREMU_VX:
3365  case RISCV::VREM_VX:
3366  // 11.12. Vector Widening Integer Multiply Instructions
3367  case RISCV::VWMUL_VX:
3368  case RISCV::VWMULU_VX:
3369  case RISCV::VWMULSU_VX:
3370  // 11.13. Vector Single-Width Integer Multiply-Add Instructions
3371  case RISCV::VMACC_VX:
3372  case RISCV::VNMSAC_VX:
3373  case RISCV::VMADD_VX:
3374  case RISCV::VNMSUB_VX:
3375  // 11.14. Vector Widening Integer Multiply-Add Instructions
3376  case RISCV::VWMACCU_VX:
3377  case RISCV::VWMACC_VX:
3378  case RISCV::VWMACCSU_VX:
3379  case RISCV::VWMACCUS_VX:
3380  // 11.15. Vector Integer Merge Instructions
3381  case RISCV::VMERGE_VXM:
3382  // 11.16. Vector Integer Move Instructions
3383  case RISCV::VMV_V_X:
3384  // 12.1. Vector Single-Width Saturating Add and Subtract
3385  case RISCV::VSADDU_VX:
3386  case RISCV::VSADD_VX:
3387  case RISCV::VSSUBU_VX:
3388  case RISCV::VSSUB_VX:
3389  // 12.2. Vector Single-Width Averaging Add and Subtract
3390  case RISCV::VAADDU_VX:
3391  case RISCV::VAADD_VX:
3392  case RISCV::VASUBU_VX:
3393  case RISCV::VASUB_VX:
3394  // 12.3. Vector Single-Width Fractional Multiply with Rounding and Saturation
3395  case RISCV::VSMUL_VX:
3396  // 16.1. Integer Scalar Move Instructions
3397  case RISCV::VMV_S_X:
3398    return 1U << Log2SEW;
3399  }
3400}
3401
3402unsigned RISCV::getRVVMCOpcode(unsigned RVVPseudoOpcode) {
3403  const RISCVVPseudosTable::PseudoInfo *RVV =
3404      RISCVVPseudosTable::getPseudoInfo(RVVPseudoOpcode);
3405  if (!RVV)
3406    return 0;
3407  return RVV->BaseInstr;
3408}
3409