138363Swpaul//===--------------------- InstrBuilder.cpp ---------------------*- C++ -*-===//
238363Swpaul//
338363Swpaul// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
438363Swpaul// See https://llvm.org/LICENSE.txt for license information.
538363Swpaul// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
638363Swpaul//
738363Swpaul//===----------------------------------------------------------------------===//
838363Swpaul/// \file
938363Swpaul///
1038363Swpaul/// This file implements the InstrBuilder interface.
1138363Swpaul///
1238363Swpaul//===----------------------------------------------------------------------===//
1338363Swpaul
1438363Swpaul#include "llvm/MCA/InstrBuilder.h"
1538363Swpaul#include "llvm/ADT/APInt.h"
1638363Swpaul#include "llvm/ADT/DenseMap.h"
1738363Swpaul#include "llvm/ADT/Statistic.h"
1838363Swpaul#include "llvm/MC/MCInst.h"
1938363Swpaul#include "llvm/Support/Debug.h"
2038363Swpaul#include "llvm/Support/WithColor.h"
2138363Swpaul#include "llvm/Support/raw_ostream.h"
2238363Swpaul
2338363Swpaul#define DEBUG_TYPE "llvm-mca-instrbuilder"
2438363Swpaul
2538363Swpaulnamespace llvm {
2638363Swpaulnamespace mca {
2738363Swpaul
2838363Swpaulchar RecycledInstErr::ID = 0;
2938363Swpaul
3038363SwpaulInstrBuilder::InstrBuilder(const llvm::MCSubtargetInfo &sti,
3138363Swpaul                           const llvm::MCInstrInfo &mcii,
3245629Swpaul                           const llvm::MCRegisterInfo &mri,
3338363Swpaul                           const llvm::MCInstrAnalysis *mcia,
3438363Swpaul                           const mca::InstrumentManager &im)
3538363Swpaul    : STI(sti), MCII(mcii), MRI(mri), MCIA(mcia), IM(im), FirstCallInst(true),
3638363Swpaul      FirstReturnInst(true) {
3738363Swpaul  const MCSchedModel &SM = STI.getSchedModel();
3838363Swpaul  ProcResourceMasks.resize(SM.getNumProcResourceKinds());
3938363Swpaul  computeProcResourceMasks(STI.getSchedModel(), ProcResourceMasks);
4038363Swpaul}
4138363Swpaul
4238363Swpaulstatic void initializeUsedResources(InstrDesc &ID,
4338363Swpaul                                    const MCSchedClassDesc &SCDesc,
4438363Swpaul                                    const MCSubtargetInfo &STI,
4538363Swpaul                                    ArrayRef<uint64_t> ProcResourceMasks) {
4638363Swpaul  const MCSchedModel &SM = STI.getSchedModel();
4738363Swpaul
4838363Swpaul  // Populate resources consumed.
4938363Swpaul  using ResourcePlusCycles = std::pair<uint64_t, ResourceUsage>;
5038363Swpaul  SmallVector<ResourcePlusCycles, 4> Worklist;
5138363Swpaul
5238363Swpaul  // Track cycles contributed by resources that are in a "Super" relationship.
5338363Swpaul  // This is required if we want to correctly match the behavior of method
5438363Swpaul  // SubtargetEmitter::ExpandProcResource() in Tablegen. When computing the set
5538363Swpaul  // of "consumed" processor resources and resource cycles, the logic in
5638363Swpaul  // ExpandProcResource() doesn't update the number of resource cycles
5738363Swpaul  // contributed by a "Super" resource to a group.
5838363Swpaul  // We need to take this into account when we find that a processor resource is
5938363Swpaul  // part of a group, and it is also used as the "Super" of other resources.
6038363Swpaul  // This map stores the number of cycles contributed by sub-resources that are
6138363Swpaul  // part of a "Super" resource. The key value is the "Super" resource mask ID.
6238363Swpaul  DenseMap<uint64_t, unsigned> SuperResources;
6338363Swpaul
6438363Swpaul  unsigned NumProcResources = SM.getNumProcResourceKinds();
6538363Swpaul  APInt Buffers(NumProcResources, 0);
6638363Swpaul
6738363Swpaul  bool AllInOrderResources = true;
6838363Swpaul  bool AnyDispatchHazards = false;
6938363Swpaul  for (unsigned I = 0, E = SCDesc.NumWriteProcResEntries; I < E; ++I) {
7038363Swpaul    const MCWriteProcResEntry *PRE = STI.getWriteProcResBegin(&SCDesc) + I;
7138363Swpaul    const MCProcResourceDesc &PR = *SM.getProcResource(PRE->ProcResourceIdx);
7238363Swpaul    if (!PRE->Cycles) {
7338363Swpaul#ifndef NDEBUG
7438363Swpaul      WithColor::warning()
7538363Swpaul          << "Ignoring invalid write of zero cycles on processor resource "
7638363Swpaul          << PR.Name << "\n";
7738363Swpaul      WithColor::note() << "found in scheduling class " << SCDesc.Name
7838363Swpaul                        << " (write index #" << I << ")\n";
7938363Swpaul#endif
8038363Swpaul      continue;
8138363Swpaul    }
8238363Swpaul
8338363Swpaul    uint64_t Mask = ProcResourceMasks[PRE->ProcResourceIdx];
8438363Swpaul    if (PR.BufferSize < 0) {
8538363Swpaul      AllInOrderResources = false;
8638363Swpaul    } else {
8738363Swpaul      Buffers.setBit(getResourceStateIndex(Mask));
8838363Swpaul      AnyDispatchHazards |= (PR.BufferSize == 0);
8938363Swpaul      AllInOrderResources &= (PR.BufferSize <= 1);
9038363Swpaul    }
9138363Swpaul
9238363Swpaul    CycleSegment RCy(0, PRE->Cycles, false);
9338363Swpaul    Worklist.emplace_back(ResourcePlusCycles(Mask, ResourceUsage(RCy)));
9438363Swpaul    if (PR.SuperIdx) {
9538363Swpaul      uint64_t Super = ProcResourceMasks[PR.SuperIdx];
9638363Swpaul      SuperResources[Super] += PRE->Cycles;
9738363Swpaul    }
9838363Swpaul  }
9938363Swpaul
10038363Swpaul  ID.MustIssueImmediately = AllInOrderResources && AnyDispatchHazards;
10138363Swpaul
10238363Swpaul  // Sort elements by mask popcount, so that we prioritize resource units over
10338363Swpaul  // resource groups, and smaller groups over larger groups.
10438363Swpaul  sort(Worklist, [](const ResourcePlusCycles &A, const ResourcePlusCycles &B) {
10538363Swpaul    unsigned popcntA = llvm::popcount(A.first);
10638363Swpaul    unsigned popcntB = llvm::popcount(B.first);
10738363Swpaul    if (popcntA < popcntB)
10838363Swpaul      return true;
10938363Swpaul    if (popcntA > popcntB)
11038363Swpaul      return false;
11138363Swpaul    return A.first < B.first;
11238363Swpaul  });
11338363Swpaul
11438363Swpaul  uint64_t UsedResourceUnits = 0;
11538363Swpaul  uint64_t UsedResourceGroups = 0;
11638363Swpaul  uint64_t UnitsFromResourceGroups = 0;
11738363Swpaul
11838363Swpaul  // Remove cycles contributed by smaller resources, and check if there
11938363Swpaul  // are partially overlapping resource groups.
12038363Swpaul  ID.HasPartiallyOverlappingGroups = false;
12138363Swpaul
12238363Swpaul  for (unsigned I = 0, E = Worklist.size(); I < E; ++I) {
12338363Swpaul    ResourcePlusCycles &A = Worklist[I];
12438363Swpaul    if (!A.second.size()) {
12538363Swpaul      assert(llvm::popcount(A.first) > 1 && "Expected a group!");
12638363Swpaul      UsedResourceGroups |= PowerOf2Floor(A.first);
12738363Swpaul      continue;
12838363Swpaul    }
12938363Swpaul
13038363Swpaul    ID.Resources.emplace_back(A);
13138363Swpaul    uint64_t NormalizedMask = A.first;
13238363Swpaul
13338363Swpaul    if (llvm::popcount(A.first) == 1) {
13438363Swpaul      UsedResourceUnits |= A.first;
13538363Swpaul    } else {
13638363Swpaul      // Remove the leading 1 from the resource group mask.
13738363Swpaul      NormalizedMask ^= PowerOf2Floor(NormalizedMask);
13838363Swpaul      if (UnitsFromResourceGroups & NormalizedMask)
13938363Swpaul        ID.HasPartiallyOverlappingGroups = true;
14038363Swpaul
14138363Swpaul      UnitsFromResourceGroups |= NormalizedMask;
14238363Swpaul      UsedResourceGroups |= (A.first ^ NormalizedMask);
14338363Swpaul    }
14438363Swpaul
14538363Swpaul    for (unsigned J = I + 1; J < E; ++J) {
14638363Swpaul      ResourcePlusCycles &B = Worklist[J];
14738363Swpaul      if ((NormalizedMask & B.first) == NormalizedMask) {
14838363Swpaul        B.second.CS.subtract(A.second.size() - SuperResources[A.first]);
14938363Swpaul        if (llvm::popcount(B.first) > 1)
15038363Swpaul          B.second.NumUnits++;
15138363Swpaul      }
15238363Swpaul    }
15338363Swpaul  }
15438363Swpaul
15538363Swpaul  // A SchedWrite may specify a number of cycles in which a resource group
15638363Swpaul  // is reserved. For example (on target x86; cpu Haswell):
15738363Swpaul  //
15838363Swpaul  //  SchedWriteRes<[HWPort0, HWPort1, HWPort01]> {
15938363Swpaul  //    let ResourceCycles = [2, 2, 3];
16038363Swpaul  //  }
16138363Swpaul  //
16238363Swpaul  // This means:
16338363Swpaul  // Resource units HWPort0 and HWPort1 are both used for 2cy.
16438363Swpaul  // Resource group HWPort01 is the union of HWPort0 and HWPort1.
16538363Swpaul  // Since this write touches both HWPort0 and HWPort1 for 2cy, HWPort01
16638363Swpaul  // will not be usable for 2 entire cycles from instruction issue.
16738363Swpaul  //
16838363Swpaul  // On top of those 2cy, SchedWriteRes explicitly specifies an extra latency
16938363Swpaul  // of 3 cycles for HWPort01. This tool assumes that the 3cy latency is an
17038363Swpaul  // extra delay on top of the 2 cycles latency.
17138363Swpaul  // During those extra cycles, HWPort01 is not usable by other instructions.
17238363Swpaul  for (ResourcePlusCycles &RPC : ID.Resources) {
17338363Swpaul    if (llvm::popcount(RPC.first) > 1 && !RPC.second.isReserved()) {
17438526Swpaul      // Remove the leading 1 from the resource group mask.
17538526Swpaul      uint64_t Mask = RPC.first ^ PowerOf2Floor(RPC.first);
17638526Swpaul      uint64_t MaxResourceUnits = llvm::popcount(Mask);
17738526Swpaul      if (RPC.second.NumUnits > (unsigned)llvm::popcount(Mask)) {
17838526Swpaul        RPC.second.setReserved();
17938526Swpaul        RPC.second.NumUnits = MaxResourceUnits;
18038526Swpaul      }
18138363Swpaul    }
18238363Swpaul  }
18338363Swpaul
18438363Swpaul  // Identify extra buffers that are consumed through super resources.
18538363Swpaul  for (const std::pair<uint64_t, unsigned> &SR : SuperResources) {
18638363Swpaul    for (unsigned I = 1, E = NumProcResources; I < E; ++I) {
18738363Swpaul      const MCProcResourceDesc &PR = *SM.getProcResource(I);
18838363Swpaul      if (PR.BufferSize == -1)
18938363Swpaul        continue;
19038363Swpaul
19138363Swpaul      uint64_t Mask = ProcResourceMasks[I];
19238363Swpaul      if (Mask != SR.first && ((Mask & SR.first) == SR.first))
19338363Swpaul        Buffers.setBit(getResourceStateIndex(Mask));
19438363Swpaul    }
19538363Swpaul  }
19638363Swpaul
19738363Swpaul  ID.UsedBuffers = Buffers.getZExtValue();
19838363Swpaul  ID.UsedProcResUnits = UsedResourceUnits;
19938363Swpaul  ID.UsedProcResGroups = UsedResourceGroups;
20038363Swpaul
20138363Swpaul  LLVM_DEBUG({
20238363Swpaul    for (const std::pair<uint64_t, ResourceUsage> &R : ID.Resources)
20338363Swpaul      dbgs() << "\t\tResource Mask=" << format_hex(R.first, 16) << ", "
20438363Swpaul             << "Reserved=" << R.second.isReserved() << ", "
20538363Swpaul             << "#Units=" << R.second.NumUnits << ", "
20638363Swpaul             << "cy=" << R.second.size() << '\n';
20738363Swpaul    uint64_t BufferIDs = ID.UsedBuffers;
20838363Swpaul    while (BufferIDs) {
20938363Swpaul      uint64_t Current = BufferIDs & (-BufferIDs);
21038363Swpaul      dbgs() << "\t\tBuffer Mask=" << format_hex(Current, 16) << '\n';
21138363Swpaul      BufferIDs ^= Current;
21238363Swpaul    }
21338363Swpaul    dbgs() << "\t\t Used Units=" << format_hex(ID.UsedProcResUnits, 16) << '\n';
21438363Swpaul    dbgs() << "\t\tUsed Groups=" << format_hex(ID.UsedProcResGroups, 16)
21538363Swpaul           << '\n';
21638363Swpaul    dbgs() << "\t\tHasPartiallyOverlappingGroups="
21738363Swpaul           << ID.HasPartiallyOverlappingGroups << '\n';
21838363Swpaul  });
21938363Swpaul}
22038363Swpaul
22138363Swpaulstatic void computeMaxLatency(InstrDesc &ID, const MCInstrDesc &MCDesc,
22238363Swpaul                              const MCSchedClassDesc &SCDesc,
22338363Swpaul                              const MCSubtargetInfo &STI) {
22438363Swpaul  if (MCDesc.isCall()) {
22538363Swpaul    // We cannot estimate how long this call will take.
22638363Swpaul    // Artificially set an arbitrarily high latency (100cy).
22738363Swpaul    ID.MaxLatency = 100U;
22838363Swpaul    return;
22938363Swpaul  }
23038363Swpaul
23138363Swpaul  int Latency = MCSchedModel::computeInstrLatency(STI, SCDesc);
23238363Swpaul  // If latency is unknown, then conservatively assume a MaxLatency of 100cy.
23338363Swpaul  ID.MaxLatency = Latency < 0 ? 100U : static_cast<unsigned>(Latency);
23438363Swpaul}
23538363Swpaul
23638363Swpaulstatic Error verifyOperands(const MCInstrDesc &MCDesc, const MCInst &MCI) {
23738363Swpaul  // Count register definitions, and skip non register operands in the process.
23838363Swpaul  unsigned I, E;
23938363Swpaul  unsigned NumExplicitDefs = MCDesc.getNumDefs();
24038363Swpaul  for (I = 0, E = MCI.getNumOperands(); NumExplicitDefs && I < E; ++I) {
24138363Swpaul    const MCOperand &Op = MCI.getOperand(I);
24238363Swpaul    if (Op.isReg())
24338363Swpaul      --NumExplicitDefs;
24438363Swpaul  }
24538363Swpaul
24638363Swpaul  if (NumExplicitDefs) {
24738363Swpaul    return make_error<InstructionError<MCInst>>(
24838363Swpaul        "Expected more register operand definitions.", MCI);
24938363Swpaul  }
25038363Swpaul
25138363Swpaul  if (MCDesc.hasOptionalDef()) {
25238363Swpaul    // Always assume that the optional definition is the last operand.
25338363Swpaul    const MCOperand &Op = MCI.getOperand(MCDesc.getNumOperands() - 1);
25438363Swpaul    if (I == MCI.getNumOperands() || !Op.isReg()) {
25538363Swpaul      std::string Message =
25638363Swpaul          "expected a register operand for an optional definition. Instruction "
25738363Swpaul          "has not been correctly analyzed.";
25838363Swpaul      return make_error<InstructionError<MCInst>>(Message, MCI);
25938363Swpaul    }
26038363Swpaul  }
26138363Swpaul
26238363Swpaul  return ErrorSuccess();
26338363Swpaul}
26438363Swpaul
26538363Swpaulvoid InstrBuilder::populateWrites(InstrDesc &ID, const MCInst &MCI,
26638363Swpaul                                  unsigned SchedClassID) {
26738363Swpaul  const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode());
26838363Swpaul  const MCSchedModel &SM = STI.getSchedModel();
26938363Swpaul  const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassID);
27038363Swpaul
27138363Swpaul  // Assumptions made by this algorithm:
27238363Swpaul  //  1. The number of explicit and implicit register definitions in a MCInst
27338363Swpaul  //     matches the number of explicit and implicit definitions according to
27438363Swpaul  //     the opcode descriptor (MCInstrDesc).
27538363Swpaul  //  2. Uses start at index #(MCDesc.getNumDefs()).
27638363Swpaul  //  3. There can only be a single optional register definition, an it is
27738363Swpaul  //     either the last operand of the sequence (excluding extra operands
27838363Swpaul  //     contributed by variadic opcodes) or one of the explicit register
27938363Swpaul  //     definitions. The latter occurs for some Thumb1 instructions.
28038363Swpaul  //
28138363Swpaul  // These assumptions work quite well for most out-of-order in-tree targets
28238363Swpaul  // like x86. This is mainly because the vast majority of instructions is
28338363Swpaul  // expanded to MCInst using a straightforward lowering logic that preserves
28438363Swpaul  // the ordering of the operands.
28538363Swpaul  //
28638363Swpaul  // About assumption 1.
28738363Swpaul  // The algorithm allows non-register operands between register operand
28838363Swpaul  // definitions. This helps to handle some special ARM instructions with
28938363Swpaul  // implicit operand increment (-mtriple=armv7):
29038363Swpaul  //
29138363Swpaul  // vld1.32  {d18, d19}, [r1]!  @ <MCInst #1463 VLD1q32wb_fixed
29238363Swpaul  //                             @  <MCOperand Reg:59>
29338363Swpaul  //                             @  <MCOperand Imm:0>     (!!)
29438363Swpaul  //                             @  <MCOperand Reg:67>
29538363Swpaul  //                             @  <MCOperand Imm:0>
29638363Swpaul  //                             @  <MCOperand Imm:14>
29738363Swpaul  //                             @  <MCOperand Reg:0>>
29838363Swpaul  //
29938363Swpaul  // MCDesc reports:
30038363Swpaul  //  6 explicit operands.
30138363Swpaul  //  1 optional definition
30238363Swpaul  //  2 explicit definitions (!!)
30338363Swpaul  //
30438363Swpaul  // The presence of an 'Imm' operand between the two register definitions
30538363Swpaul  // breaks the assumption that "register definitions are always at the
30638363Swpaul  // beginning of the operand sequence".
30738363Swpaul  //
30838363Swpaul  // To workaround this issue, this algorithm ignores (i.e. skips) any
30938363Swpaul  // non-register operands between register definitions.  The optional
31038363Swpaul  // definition is still at index #(NumOperands-1).
31138363Swpaul  //
31238363Swpaul  // According to assumption 2. register reads start at #(NumExplicitDefs-1).
31338363Swpaul  // That means, register R1 from the example is both read and written.
31438363Swpaul  unsigned NumExplicitDefs = MCDesc.getNumDefs();
31538363Swpaul  unsigned NumImplicitDefs = MCDesc.implicit_defs().size();
31638363Swpaul  unsigned NumWriteLatencyEntries = SCDesc.NumWriteLatencyEntries;
31738363Swpaul  unsigned TotalDefs = NumExplicitDefs + NumImplicitDefs;
31838363Swpaul  if (MCDesc.hasOptionalDef())
31938363Swpaul    TotalDefs++;
32038363Swpaul
32138363Swpaul  unsigned NumVariadicOps = MCI.getNumOperands() - MCDesc.getNumOperands();
32238363Swpaul  ID.Writes.resize(TotalDefs + NumVariadicOps);
32338363Swpaul  // Iterate over the operands list, and skip non-register operands.
32438363Swpaul  // The first NumExplicitDefs register operands are expected to be register
32538363Swpaul  // definitions.
32638363Swpaul  unsigned CurrentDef = 0;
32738363Swpaul  unsigned OptionalDefIdx = MCDesc.getNumOperands() - 1;
32838363Swpaul  unsigned i = 0;
32938363Swpaul  for (; i < MCI.getNumOperands() && CurrentDef < NumExplicitDefs; ++i) {
33038363Swpaul    const MCOperand &Op = MCI.getOperand(i);
33138363Swpaul    if (!Op.isReg())
33238363Swpaul      continue;
33338363Swpaul
33438363Swpaul    if (MCDesc.operands()[CurrentDef].isOptionalDef()) {
33538363Swpaul      OptionalDefIdx = CurrentDef++;
33638363Swpaul      continue;
33738363Swpaul    }
33838363Swpaul
33938363Swpaul    WriteDescriptor &Write = ID.Writes[CurrentDef];
34038363Swpaul    Write.OpIndex = i;
34138363Swpaul    if (CurrentDef < NumWriteLatencyEntries) {
34238363Swpaul      const MCWriteLatencyEntry &WLE =
34338363Swpaul          *STI.getWriteLatencyEntry(&SCDesc, CurrentDef);
34438363Swpaul      // Conservatively default to MaxLatency.
34538363Swpaul      Write.Latency =
34638363Swpaul          WLE.Cycles < 0 ? ID.MaxLatency : static_cast<unsigned>(WLE.Cycles);
34738363Swpaul      Write.SClassOrWriteResourceID = WLE.WriteResourceID;
34838363Swpaul    } else {
34938363Swpaul      // Assign a default latency for this write.
35038363Swpaul      Write.Latency = ID.MaxLatency;
35138363Swpaul      Write.SClassOrWriteResourceID = 0;
35238363Swpaul    }
35338363Swpaul    Write.IsOptionalDef = false;
35438363Swpaul    LLVM_DEBUG({
35538363Swpaul      dbgs() << "\t\t[Def]    OpIdx=" << Write.OpIndex
35638363Swpaul             << ", Latency=" << Write.Latency
35738363Swpaul             << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n';
35838363Swpaul    });
35938363Swpaul    CurrentDef++;
36038363Swpaul  }
36138363Swpaul
36238363Swpaul  assert(CurrentDef == NumExplicitDefs &&
36338363Swpaul         "Expected more register operand definitions.");
36440588Swpaul  for (CurrentDef = 0; CurrentDef < NumImplicitDefs; ++CurrentDef) {
36538363Swpaul    unsigned Index = NumExplicitDefs + CurrentDef;
36638363Swpaul    WriteDescriptor &Write = ID.Writes[Index];
36738363Swpaul    Write.OpIndex = ~CurrentDef;
36838363Swpaul    Write.RegisterID = MCDesc.implicit_defs()[CurrentDef];
36938363Swpaul    if (Index < NumWriteLatencyEntries) {
37038363Swpaul      const MCWriteLatencyEntry &WLE =
37138363Swpaul          *STI.getWriteLatencyEntry(&SCDesc, Index);
37238363Swpaul      // Conservatively default to MaxLatency.
37338363Swpaul      Write.Latency =
37438363Swpaul          WLE.Cycles < 0 ? ID.MaxLatency : static_cast<unsigned>(WLE.Cycles);
37538363Swpaul      Write.SClassOrWriteResourceID = WLE.WriteResourceID;
37638363Swpaul    } else {
37738363Swpaul      // Assign a default latency for this write.
37838363Swpaul      Write.Latency = ID.MaxLatency;
37938363Swpaul      Write.SClassOrWriteResourceID = 0;
38038363Swpaul    }
38138363Swpaul
38238363Swpaul    Write.IsOptionalDef = false;
38338363Swpaul    assert(Write.RegisterID != 0 && "Expected a valid phys register!");
38438363Swpaul    LLVM_DEBUG({
38538363Swpaul      dbgs() << "\t\t[Def][I] OpIdx=" << ~Write.OpIndex
38638363Swpaul             << ", PhysReg=" << MRI.getName(Write.RegisterID)
38738363Swpaul             << ", Latency=" << Write.Latency
38838363Swpaul             << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n';
38938363Swpaul    });
39038363Swpaul  }
39138363Swpaul
39238363Swpaul  if (MCDesc.hasOptionalDef()) {
39338363Swpaul    WriteDescriptor &Write = ID.Writes[NumExplicitDefs + NumImplicitDefs];
39438363Swpaul    Write.OpIndex = OptionalDefIdx;
39538363Swpaul    // Assign a default latency for this write.
39638363Swpaul    Write.Latency = ID.MaxLatency;
39738363Swpaul    Write.SClassOrWriteResourceID = 0;
39838363Swpaul    Write.IsOptionalDef = true;
39938363Swpaul    LLVM_DEBUG({
40038363Swpaul      dbgs() << "\t\t[Def][O] OpIdx=" << Write.OpIndex
40138363Swpaul             << ", Latency=" << Write.Latency
40238363Swpaul             << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n';
40338363Swpaul    });
40438363Swpaul  }
40538363Swpaul
40638363Swpaul  if (!NumVariadicOps)
40738363Swpaul    return;
40838363Swpaul
40938363Swpaul  bool AssumeUsesOnly = !MCDesc.variadicOpsAreDefs();
41038363Swpaul  CurrentDef = NumExplicitDefs + NumImplicitDefs + MCDesc.hasOptionalDef();
41138363Swpaul  for (unsigned I = 0, OpIndex = MCDesc.getNumOperands();
41238363Swpaul       I < NumVariadicOps && !AssumeUsesOnly; ++I, ++OpIndex) {
41338363Swpaul    const MCOperand &Op = MCI.getOperand(OpIndex);
41438363Swpaul    if (!Op.isReg())
41538363Swpaul      continue;
41638363Swpaul
41738363Swpaul    WriteDescriptor &Write = ID.Writes[CurrentDef];
41838363Swpaul    Write.OpIndex = OpIndex;
41938363Swpaul    // Assign a default latency for this write.
42038363Swpaul    Write.Latency = ID.MaxLatency;
42138363Swpaul    Write.SClassOrWriteResourceID = 0;
42238363Swpaul    Write.IsOptionalDef = false;
42338363Swpaul    ++CurrentDef;
42438363Swpaul    LLVM_DEBUG({
42538363Swpaul      dbgs() << "\t\t[Def][V] OpIdx=" << Write.OpIndex
42638363Swpaul             << ", Latency=" << Write.Latency
42738363Swpaul             << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n';
42838363Swpaul    });
42938363Swpaul  }
43038363Swpaul
43138363Swpaul  ID.Writes.resize(CurrentDef);
43238363Swpaul}
43338363Swpaul
43438363Swpaulvoid InstrBuilder::populateReads(InstrDesc &ID, const MCInst &MCI,
43538363Swpaul                                 unsigned SchedClassID) {
43638363Swpaul  const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode());
43738363Swpaul  unsigned NumExplicitUses = MCDesc.getNumOperands() - MCDesc.getNumDefs();
43838363Swpaul  unsigned NumImplicitUses = MCDesc.implicit_uses().size();
43938363Swpaul  // Remove the optional definition.
44038469Swpaul  if (MCDesc.hasOptionalDef())
44139647Swpaul    --NumExplicitUses;
44238363Swpaul  unsigned NumVariadicOps = MCI.getNumOperands() - MCDesc.getNumOperands();
44338363Swpaul  unsigned TotalUses = NumExplicitUses + NumImplicitUses + NumVariadicOps;
44438363Swpaul  ID.Reads.resize(TotalUses);
44538363Swpaul  unsigned CurrentUse = 0;
44638363Swpaul  for (unsigned I = 0, OpIndex = MCDesc.getNumDefs(); I < NumExplicitUses;
44738363Swpaul       ++I, ++OpIndex) {
44838363Swpaul    const MCOperand &Op = MCI.getOperand(OpIndex);
44938363Swpaul    if (!Op.isReg())
45038363Swpaul      continue;
45138363Swpaul
45238363Swpaul    ReadDescriptor &Read = ID.Reads[CurrentUse];
45338363Swpaul    Read.OpIndex = OpIndex;
45438363Swpaul    Read.UseIndex = I;
45538363Swpaul    Read.SchedClassID = SchedClassID;
45638363Swpaul    ++CurrentUse;
45738363Swpaul    LLVM_DEBUG(dbgs() << "\t\t[Use]    OpIdx=" << Read.OpIndex
45838363Swpaul                      << ", UseIndex=" << Read.UseIndex << '\n');
45938363Swpaul  }
46038363Swpaul
46138363Swpaul  // For the purpose of ReadAdvance, implicit uses come directly after explicit
46238363Swpaul  // uses. The "UseIndex" must be updated according to that implicit layout.
46338363Swpaul  for (unsigned I = 0; I < NumImplicitUses; ++I) {
46438363Swpaul    ReadDescriptor &Read = ID.Reads[CurrentUse + I];
46538363Swpaul    Read.OpIndex = ~I;
46638363Swpaul    Read.UseIndex = NumExplicitUses + I;
46738363Swpaul    Read.RegisterID = MCDesc.implicit_uses()[I];
46838363Swpaul    Read.SchedClassID = SchedClassID;
46938363Swpaul    LLVM_DEBUG(dbgs() << "\t\t[Use][I] OpIdx=" << ~Read.OpIndex
47038363Swpaul                      << ", UseIndex=" << Read.UseIndex << ", RegisterID="
47138363Swpaul                      << MRI.getName(Read.RegisterID) << '\n');
47238363Swpaul  }
47338363Swpaul
47438363Swpaul  CurrentUse += NumImplicitUses;
47538363Swpaul
47638363Swpaul  bool AssumeDefsOnly = MCDesc.variadicOpsAreDefs();
47738363Swpaul  for (unsigned I = 0, OpIndex = MCDesc.getNumOperands();
47838363Swpaul       I < NumVariadicOps && !AssumeDefsOnly; ++I, ++OpIndex) {
47938363Swpaul    const MCOperand &Op = MCI.getOperand(OpIndex);
48038363Swpaul    if (!Op.isReg())
48138363Swpaul      continue;
48238363Swpaul
48338363Swpaul    ReadDescriptor &Read = ID.Reads[CurrentUse];
48438363Swpaul    Read.OpIndex = OpIndex;
48538363Swpaul    Read.UseIndex = NumExplicitUses + NumImplicitUses + I;
48638363Swpaul    Read.SchedClassID = SchedClassID;
48738363Swpaul    ++CurrentUse;
48838363Swpaul    LLVM_DEBUG(dbgs() << "\t\t[Use][V] OpIdx=" << Read.OpIndex
48938363Swpaul                      << ", UseIndex=" << Read.UseIndex << '\n');
49038363Swpaul  }
49138363Swpaul
49238363Swpaul  ID.Reads.resize(CurrentUse);
49338363Swpaul}
49438363Swpaul
49538363SwpaulError InstrBuilder::verifyInstrDesc(const InstrDesc &ID,
49638363Swpaul                                    const MCInst &MCI) const {
49738363Swpaul  if (ID.NumMicroOps != 0)
49838363Swpaul    return ErrorSuccess();
49938363Swpaul
50038363Swpaul  bool UsesBuffers = ID.UsedBuffers;
50138363Swpaul  bool UsesResources = !ID.Resources.empty();
50238363Swpaul  if (!UsesBuffers && !UsesResources)
50338363Swpaul    return ErrorSuccess();
50438363Swpaul
50538363Swpaul  // FIXME: see PR44797. We should revisit these checks and possibly move them
50638363Swpaul  // in CodeGenSchedule.cpp.
50738363Swpaul  StringRef Message = "found an inconsistent instruction that decodes to zero "
50838363Swpaul                      "opcodes and that consumes scheduler resources.";
50938363Swpaul  return make_error<InstructionError<MCInst>>(std::string(Message), MCI);
51038363Swpaul}
51138363Swpaul
51238363SwpaulExpected<const InstrDesc &>
51338363SwpaulInstrBuilder::createInstrDescImpl(const MCInst &MCI,
51438363Swpaul                                  const SmallVector<SharedInstrument> &IVec) {
51538363Swpaul  assert(STI.getSchedModel().hasInstrSchedModel() &&
51638363Swpaul         "Itineraries are not yet supported!");
51738363Swpaul
51838363Swpaul  // Obtain the instruction descriptor from the opcode.
51938363Swpaul  unsigned short Opcode = MCI.getOpcode();
52038363Swpaul  const MCInstrDesc &MCDesc = MCII.get(Opcode);
52138363Swpaul  const MCSchedModel &SM = STI.getSchedModel();
52238363Swpaul
52338363Swpaul  // Then obtain the scheduling class information from the instruction.
52438363Swpaul  // Allow InstrumentManager to override and use a different SchedClassID
52538363Swpaul  unsigned SchedClassID = IM.getSchedClassID(MCII, MCI, IVec);
52638363Swpaul  bool IsVariant = SM.getSchedClassDesc(SchedClassID)->isVariant();
52738363Swpaul
52838363Swpaul  // Try to solve variant scheduling classes.
52938363Swpaul  if (IsVariant) {
53038363Swpaul    unsigned CPUID = SM.getProcessorID();
53138363Swpaul    while (SchedClassID && SM.getSchedClassDesc(SchedClassID)->isVariant())
53238363Swpaul      SchedClassID =
53338363Swpaul          STI.resolveVariantSchedClass(SchedClassID, &MCI, &MCII, CPUID);
53438363Swpaul
53538363Swpaul    if (!SchedClassID) {
53638363Swpaul      return make_error<InstructionError<MCInst>>(
53738363Swpaul          "unable to resolve scheduling class for write variant.", MCI);
53838363Swpaul    }
53938363Swpaul  }
54038363Swpaul
54138363Swpaul  // Check if this instruction is supported. Otherwise, report an error.
54245062Swpaul  const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassID);
54345062Swpaul  if (SCDesc.NumMicroOps == MCSchedClassDesc::InvalidNumMicroOps) {
54438363Swpaul    return make_error<InstructionError<MCInst>>(
54538363Swpaul        "found an unsupported instruction in the input assembly sequence.",
54638363Swpaul        MCI);
54738363Swpaul  }
54838363Swpaul
54938363Swpaul  LLVM_DEBUG(dbgs() << "\n\t\tOpcode Name= " << MCII.getName(Opcode) << '\n');
55038363Swpaul  LLVM_DEBUG(dbgs() << "\t\tSchedClassID=" << SchedClassID << '\n');
55138363Swpaul  LLVM_DEBUG(dbgs() << "\t\tOpcode=" << Opcode << '\n');
55238363Swpaul
55338363Swpaul  // Create a new empty descriptor.
55438363Swpaul  std::unique_ptr<InstrDesc> ID = std::make_unique<InstrDesc>();
55538363Swpaul  ID->NumMicroOps = SCDesc.NumMicroOps;
55638363Swpaul  ID->SchedClassID = SchedClassID;
55738363Swpaul
55838363Swpaul  if (MCDesc.isCall() && FirstCallInst) {
55938363Swpaul    // We don't correctly model calls.
56038363Swpaul    WithColor::warning() << "found a call in the input assembly sequence.\n";
56138363Swpaul    WithColor::note() << "call instructions are not correctly modeled. "
56238363Swpaul                      << "Assume a latency of 100cy.\n";
56338363Swpaul    FirstCallInst = false;
56438363Swpaul  }
56538363Swpaul
56638363Swpaul  if (MCDesc.isReturn() && FirstReturnInst) {
56738363Swpaul    WithColor::warning() << "found a return instruction in the input"
56838363Swpaul                         << " assembly sequence.\n";
56938363Swpaul    WithColor::note() << "program counter updates are ignored.\n";
57038363Swpaul    FirstReturnInst = false;
57138363Swpaul  }
57238363Swpaul
57338363Swpaul  initializeUsedResources(*ID, SCDesc, STI, ProcResourceMasks);
57438363Swpaul  computeMaxLatency(*ID, MCDesc, SCDesc, STI);
57538363Swpaul
57638363Swpaul  if (Error Err = verifyOperands(MCDesc, MCI))
57738363Swpaul    return std::move(Err);
57838363Swpaul
57938363Swpaul  populateWrites(*ID, MCI, SchedClassID);
58038363Swpaul  populateReads(*ID, MCI, SchedClassID);
58138363Swpaul
58238363Swpaul  LLVM_DEBUG(dbgs() << "\t\tMaxLatency=" << ID->MaxLatency << '\n');
58338363Swpaul  LLVM_DEBUG(dbgs() << "\t\tNumMicroOps=" << ID->NumMicroOps << '\n');
58438363Swpaul
58538363Swpaul  // Validation check on the instruction descriptor.
58638363Swpaul  if (Error Err = verifyInstrDesc(*ID, MCI))
58738363Swpaul    return std::move(Err);
58845062Swpaul
58938363Swpaul  // Now add the new descriptor.
59045062Swpaul  bool IsVariadic = MCDesc.isVariadic();
59138363Swpaul  if ((ID->IsRecyclable = !IsVariadic && !IsVariant)) {
59245062Swpaul    auto DKey = std::make_pair(MCI.getOpcode(), SchedClassID);
59338363Swpaul    Descriptors[DKey] = std::move(ID);
59445062Swpaul    return *Descriptors[DKey];
59545062Swpaul  }
59645062Swpaul
59745062Swpaul  auto VDKey = std::make_pair(&MCI, SchedClassID);
59845062Swpaul  VariantDescriptors[VDKey] = std::move(ID);
59945062Swpaul  return *VariantDescriptors[VDKey];
60038363Swpaul}
60138363Swpaul
60238363SwpaulExpected<const InstrDesc &>
60338363SwpaulInstrBuilder::getOrCreateInstrDesc(const MCInst &MCI,
60438363Swpaul                                   const SmallVector<SharedInstrument> &IVec) {
60538363Swpaul  // Cache lookup using SchedClassID from Instrumentation
60638363Swpaul  unsigned SchedClassID = IM.getSchedClassID(MCII, MCI, IVec);
60738363Swpaul
60838363Swpaul  auto DKey = std::make_pair(MCI.getOpcode(), SchedClassID);
60938363Swpaul  if (Descriptors.find_as(DKey) != Descriptors.end())
61038363Swpaul    return *Descriptors[DKey];
61138363Swpaul
61238363Swpaul  unsigned CPUID = STI.getSchedModel().getProcessorID();
61338363Swpaul  SchedClassID = STI.resolveVariantSchedClass(SchedClassID, &MCI, &MCII, CPUID);
61438363Swpaul  auto VDKey = std::make_pair(&MCI, SchedClassID);
61538363Swpaul  if (VariantDescriptors.find(VDKey) != VariantDescriptors.end())
61638363Swpaul    return *VariantDescriptors[VDKey];
61738363Swpaul
61838363Swpaul  return createInstrDescImpl(MCI, IVec);
61938363Swpaul}
62038363Swpaul
62138363SwpaulSTATISTIC(NumVariantInst, "Number of MCInsts that doesn't have static Desc");
62238363Swpaul
62345601SwpaulExpected<std::unique_ptr<Instruction>>
62440097SwpaulInstrBuilder::createInstruction(const MCInst &MCI,
62538810Swpaul                                const SmallVector<SharedInstrument> &IVec) {
62645629Swpaul  Expected<const InstrDesc &> DescOrErr = getOrCreateInstrDesc(MCI, IVec);
62738363Swpaul  if (!DescOrErr)
62838363Swpaul    return DescOrErr.takeError();
62938363Swpaul  const InstrDesc &D = *DescOrErr;
63038363Swpaul  Instruction *NewIS = nullptr;
63138363Swpaul  std::unique_ptr<Instruction> CreatedIS;
63238363Swpaul  bool IsInstRecycled = false;
63338363Swpaul
63438363Swpaul  if (!D.IsRecyclable)
63538363Swpaul    ++NumVariantInst;
63638363Swpaul
63738363Swpaul  if (D.IsRecyclable && InstRecycleCB) {
63838363Swpaul    if (auto *I = InstRecycleCB(D)) {
63938363Swpaul      NewIS = I;
64038363Swpaul      NewIS->reset();
64138363Swpaul      IsInstRecycled = true;
64238363Swpaul    }
64338363Swpaul  }
64438363Swpaul  if (!IsInstRecycled) {
64538363Swpaul    CreatedIS = std::make_unique<Instruction>(D, MCI.getOpcode());
64638363Swpaul    NewIS = CreatedIS.get();
64738363Swpaul  }
64838363Swpaul
64938363Swpaul  const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode());
65038363Swpaul  const MCSchedClassDesc &SCDesc =
65138363Swpaul      *STI.getSchedModel().getSchedClassDesc(D.SchedClassID);
65238363Swpaul
65338363Swpaul  NewIS->setMayLoad(MCDesc.mayLoad());
65438363Swpaul  NewIS->setMayStore(MCDesc.mayStore());
65538363Swpaul  NewIS->setHasSideEffects(MCDesc.hasUnmodeledSideEffects());
65638363Swpaul  NewIS->setBeginGroup(SCDesc.BeginGroup);
65738363Swpaul  NewIS->setEndGroup(SCDesc.EndGroup);
65838363Swpaul  NewIS->setRetireOOO(SCDesc.RetireOOO);
65938363Swpaul
66038363Swpaul  // Check if this is a dependency breaking instruction.
66138363Swpaul  APInt Mask;
66238363Swpaul
66338363Swpaul  bool IsZeroIdiom = false;
66438363Swpaul  bool IsDepBreaking = false;
66538363Swpaul  if (MCIA) {
66638363Swpaul    unsigned ProcID = STI.getSchedModel().getProcessorID();
66738363Swpaul    IsZeroIdiom = MCIA->isZeroIdiom(MCI, Mask, ProcID);
66838363Swpaul    IsDepBreaking =
66938363Swpaul        IsZeroIdiom || MCIA->isDependencyBreaking(MCI, Mask, ProcID);
67038363Swpaul    if (MCIA->isOptimizableRegisterMove(MCI, ProcID))
67138363Swpaul      NewIS->setOptimizableMove();
67238363Swpaul  }
67338363Swpaul
67438363Swpaul  // Initialize Reads first.
67538363Swpaul  MCPhysReg RegID = 0;
67638363Swpaul  size_t Idx = 0U;
67738363Swpaul  for (const ReadDescriptor &RD : D.Reads) {
67838363Swpaul    if (!RD.isImplicitRead()) {
67938363Swpaul      // explicit read.
68038363Swpaul      const MCOperand &Op = MCI.getOperand(RD.OpIndex);
68138363Swpaul      // Skip non-register operands.
68238363Swpaul      if (!Op.isReg())
68338363Swpaul        continue;
68438363Swpaul      RegID = Op.getReg();
68538363Swpaul    } else {
68638363Swpaul      // Implicit read.
68738363Swpaul      RegID = RD.RegisterID;
68838363Swpaul    }
68938363Swpaul
69038363Swpaul    // Skip invalid register operands.
69138363Swpaul    if (!RegID)
69238363Swpaul      continue;
69338363Swpaul
69438363Swpaul    // Okay, this is a register operand. Create a ReadState for it.
69538363Swpaul    ReadState *RS = nullptr;
69638363Swpaul    if (IsInstRecycled && Idx < NewIS->getUses().size()) {
69738363Swpaul      NewIS->getUses()[Idx] = ReadState(RD, RegID);
69838363Swpaul      RS = &NewIS->getUses()[Idx++];
69938363Swpaul    } else {
70038363Swpaul      NewIS->getUses().emplace_back(RD, RegID);
70138363Swpaul      RS = &NewIS->getUses().back();
70238363Swpaul      ++Idx;
70338363Swpaul    }
70438363Swpaul
70538363Swpaul    if (IsDepBreaking) {
70638363Swpaul      // A mask of all zeroes means: explicit input operands are not
70738363Swpaul      // independent.
70838363Swpaul      if (Mask.isZero()) {
70938363Swpaul        if (!RD.isImplicitRead())
71038363Swpaul          RS->setIndependentFromDef();
71138363Swpaul      } else {
71238363Swpaul        // Check if this register operand is independent according to `Mask`.
71338363Swpaul        // Note that Mask may not have enough bits to describe all explicit and
71438363Swpaul        // implicit input operands. If this register operand doesn't have a
71538363Swpaul        // corresponding bit in Mask, then conservatively assume that it is
71638363Swpaul        // dependent.
71738363Swpaul        if (Mask.getBitWidth() > RD.UseIndex) {
71838363Swpaul          // Okay. This map describe register use `RD.UseIndex`.
71938363Swpaul          if (Mask[RD.UseIndex])
72038363Swpaul            RS->setIndependentFromDef();
72138363Swpaul        }
72238363Swpaul      }
72338363Swpaul    }
72438363Swpaul  }
72538363Swpaul  if (IsInstRecycled && Idx < NewIS->getUses().size())
72638363Swpaul    NewIS->getUses().pop_back_n(NewIS->getUses().size() - Idx);
72738363Swpaul
72838363Swpaul  // Early exit if there are no writes.
72938363Swpaul  if (D.Writes.empty()) {
73038363Swpaul    if (IsInstRecycled)
73138363Swpaul      return llvm::make_error<RecycledInstErr>(NewIS);
73238363Swpaul    else
73338363Swpaul      return std::move(CreatedIS);
73438363Swpaul  }
73538363Swpaul
73638363Swpaul  // Track register writes that implicitly clear the upper portion of the
73738363Swpaul  // underlying super-registers using an APInt.
73838363Swpaul  APInt WriteMask(D.Writes.size(), 0);
73938363Swpaul
74038363Swpaul  // Now query the MCInstrAnalysis object to obtain information about which
74138363Swpaul  // register writes implicitly clear the upper portion of a super-register.
74238363Swpaul  if (MCIA)
74338363Swpaul    MCIA->clearsSuperRegisters(MRI, MCI, WriteMask);
74438363Swpaul
74538363Swpaul  // Initialize writes.
74638363Swpaul  unsigned WriteIndex = 0;
74738363Swpaul  Idx = 0U;
74838363Swpaul  for (const WriteDescriptor &WD : D.Writes) {
74938363Swpaul    RegID = WD.isImplicitWrite() ? WD.RegisterID
75038363Swpaul                                 : MCI.getOperand(WD.OpIndex).getReg();
75138363Swpaul    // Check if this is a optional definition that references NoReg.
75238363Swpaul    if (WD.IsOptionalDef && !RegID) {
75338363Swpaul      ++WriteIndex;
75438363Swpaul      continue;
75538363Swpaul    }
75638363Swpaul
75738363Swpaul    assert(RegID && "Expected a valid register ID!");
75838363Swpaul    if (IsInstRecycled && Idx < NewIS->getDefs().size()) {
75938363Swpaul      NewIS->getDefs()[Idx++] =
76038363Swpaul          WriteState(WD, RegID,
76138363Swpaul                     /* ClearsSuperRegs */ WriteMask[WriteIndex],
76238363Swpaul                     /* WritesZero */ IsZeroIdiom);
76338363Swpaul    } else {
76438363Swpaul      NewIS->getDefs().emplace_back(WD, RegID,
76538363Swpaul                                    /* ClearsSuperRegs */ WriteMask[WriteIndex],
76638363Swpaul                                    /* WritesZero */ IsZeroIdiom);
76738363Swpaul      ++Idx;
76838363Swpaul    }
76938363Swpaul    ++WriteIndex;
77038363Swpaul  }
77138363Swpaul  if (IsInstRecycled && Idx < NewIS->getDefs().size())
77238363Swpaul    NewIS->getDefs().pop_back_n(NewIS->getDefs().size() - Idx);
77338363Swpaul
77438363Swpaul  if (IsInstRecycled)
77538363Swpaul    return llvm::make_error<RecycledInstErr>(NewIS);
77638363Swpaul  else
77738363Swpaul    return std::move(CreatedIS);
77838363Swpaul}
77938363Swpaul} // namespace mca
78038363Swpaul} // namespace llvm
78138363Swpaul