138363Swpaul//===--------------------- InstrBuilder.cpp ---------------------*- C++ -*-===// 238363Swpaul// 338363Swpaul// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 438363Swpaul// See https://llvm.org/LICENSE.txt for license information. 538363Swpaul// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 638363Swpaul// 738363Swpaul//===----------------------------------------------------------------------===// 838363Swpaul/// \file 938363Swpaul/// 1038363Swpaul/// This file implements the InstrBuilder interface. 1138363Swpaul/// 1238363Swpaul//===----------------------------------------------------------------------===// 1338363Swpaul 1438363Swpaul#include "llvm/MCA/InstrBuilder.h" 1538363Swpaul#include "llvm/ADT/APInt.h" 1638363Swpaul#include "llvm/ADT/DenseMap.h" 1738363Swpaul#include "llvm/ADT/Statistic.h" 1838363Swpaul#include "llvm/MC/MCInst.h" 1938363Swpaul#include "llvm/Support/Debug.h" 2038363Swpaul#include "llvm/Support/WithColor.h" 2138363Swpaul#include "llvm/Support/raw_ostream.h" 2238363Swpaul 2338363Swpaul#define DEBUG_TYPE "llvm-mca-instrbuilder" 2438363Swpaul 2538363Swpaulnamespace llvm { 2638363Swpaulnamespace mca { 2738363Swpaul 2838363Swpaulchar RecycledInstErr::ID = 0; 2938363Swpaul 3038363SwpaulInstrBuilder::InstrBuilder(const llvm::MCSubtargetInfo &sti, 3138363Swpaul const llvm::MCInstrInfo &mcii, 3245629Swpaul const llvm::MCRegisterInfo &mri, 3338363Swpaul const llvm::MCInstrAnalysis *mcia, 3438363Swpaul const mca::InstrumentManager &im) 3538363Swpaul : STI(sti), MCII(mcii), MRI(mri), MCIA(mcia), IM(im), FirstCallInst(true), 3638363Swpaul FirstReturnInst(true) { 3738363Swpaul const MCSchedModel &SM = STI.getSchedModel(); 3838363Swpaul ProcResourceMasks.resize(SM.getNumProcResourceKinds()); 3938363Swpaul computeProcResourceMasks(STI.getSchedModel(), ProcResourceMasks); 4038363Swpaul} 4138363Swpaul 4238363Swpaulstatic void initializeUsedResources(InstrDesc &ID, 4338363Swpaul const MCSchedClassDesc &SCDesc, 4438363Swpaul const MCSubtargetInfo &STI, 4538363Swpaul ArrayRef<uint64_t> ProcResourceMasks) { 4638363Swpaul const MCSchedModel &SM = STI.getSchedModel(); 4738363Swpaul 4838363Swpaul // Populate resources consumed. 4938363Swpaul using ResourcePlusCycles = std::pair<uint64_t, ResourceUsage>; 5038363Swpaul SmallVector<ResourcePlusCycles, 4> Worklist; 5138363Swpaul 5238363Swpaul // Track cycles contributed by resources that are in a "Super" relationship. 5338363Swpaul // This is required if we want to correctly match the behavior of method 5438363Swpaul // SubtargetEmitter::ExpandProcResource() in Tablegen. When computing the set 5538363Swpaul // of "consumed" processor resources and resource cycles, the logic in 5638363Swpaul // ExpandProcResource() doesn't update the number of resource cycles 5738363Swpaul // contributed by a "Super" resource to a group. 5838363Swpaul // We need to take this into account when we find that a processor resource is 5938363Swpaul // part of a group, and it is also used as the "Super" of other resources. 6038363Swpaul // This map stores the number of cycles contributed by sub-resources that are 6138363Swpaul // part of a "Super" resource. The key value is the "Super" resource mask ID. 6238363Swpaul DenseMap<uint64_t, unsigned> SuperResources; 6338363Swpaul 6438363Swpaul unsigned NumProcResources = SM.getNumProcResourceKinds(); 6538363Swpaul APInt Buffers(NumProcResources, 0); 6638363Swpaul 6738363Swpaul bool AllInOrderResources = true; 6838363Swpaul bool AnyDispatchHazards = false; 6938363Swpaul for (unsigned I = 0, E = SCDesc.NumWriteProcResEntries; I < E; ++I) { 7038363Swpaul const MCWriteProcResEntry *PRE = STI.getWriteProcResBegin(&SCDesc) + I; 7138363Swpaul const MCProcResourceDesc &PR = *SM.getProcResource(PRE->ProcResourceIdx); 7238363Swpaul if (!PRE->Cycles) { 7338363Swpaul#ifndef NDEBUG 7438363Swpaul WithColor::warning() 7538363Swpaul << "Ignoring invalid write of zero cycles on processor resource " 7638363Swpaul << PR.Name << "\n"; 7738363Swpaul WithColor::note() << "found in scheduling class " << SCDesc.Name 7838363Swpaul << " (write index #" << I << ")\n"; 7938363Swpaul#endif 8038363Swpaul continue; 8138363Swpaul } 8238363Swpaul 8338363Swpaul uint64_t Mask = ProcResourceMasks[PRE->ProcResourceIdx]; 8438363Swpaul if (PR.BufferSize < 0) { 8538363Swpaul AllInOrderResources = false; 8638363Swpaul } else { 8738363Swpaul Buffers.setBit(getResourceStateIndex(Mask)); 8838363Swpaul AnyDispatchHazards |= (PR.BufferSize == 0); 8938363Swpaul AllInOrderResources &= (PR.BufferSize <= 1); 9038363Swpaul } 9138363Swpaul 9238363Swpaul CycleSegment RCy(0, PRE->Cycles, false); 9338363Swpaul Worklist.emplace_back(ResourcePlusCycles(Mask, ResourceUsage(RCy))); 9438363Swpaul if (PR.SuperIdx) { 9538363Swpaul uint64_t Super = ProcResourceMasks[PR.SuperIdx]; 9638363Swpaul SuperResources[Super] += PRE->Cycles; 9738363Swpaul } 9838363Swpaul } 9938363Swpaul 10038363Swpaul ID.MustIssueImmediately = AllInOrderResources && AnyDispatchHazards; 10138363Swpaul 10238363Swpaul // Sort elements by mask popcount, so that we prioritize resource units over 10338363Swpaul // resource groups, and smaller groups over larger groups. 10438363Swpaul sort(Worklist, [](const ResourcePlusCycles &A, const ResourcePlusCycles &B) { 10538363Swpaul unsigned popcntA = llvm::popcount(A.first); 10638363Swpaul unsigned popcntB = llvm::popcount(B.first); 10738363Swpaul if (popcntA < popcntB) 10838363Swpaul return true; 10938363Swpaul if (popcntA > popcntB) 11038363Swpaul return false; 11138363Swpaul return A.first < B.first; 11238363Swpaul }); 11338363Swpaul 11438363Swpaul uint64_t UsedResourceUnits = 0; 11538363Swpaul uint64_t UsedResourceGroups = 0; 11638363Swpaul uint64_t UnitsFromResourceGroups = 0; 11738363Swpaul 11838363Swpaul // Remove cycles contributed by smaller resources, and check if there 11938363Swpaul // are partially overlapping resource groups. 12038363Swpaul ID.HasPartiallyOverlappingGroups = false; 12138363Swpaul 12238363Swpaul for (unsigned I = 0, E = Worklist.size(); I < E; ++I) { 12338363Swpaul ResourcePlusCycles &A = Worklist[I]; 12438363Swpaul if (!A.second.size()) { 12538363Swpaul assert(llvm::popcount(A.first) > 1 && "Expected a group!"); 12638363Swpaul UsedResourceGroups |= PowerOf2Floor(A.first); 12738363Swpaul continue; 12838363Swpaul } 12938363Swpaul 13038363Swpaul ID.Resources.emplace_back(A); 13138363Swpaul uint64_t NormalizedMask = A.first; 13238363Swpaul 13338363Swpaul if (llvm::popcount(A.first) == 1) { 13438363Swpaul UsedResourceUnits |= A.first; 13538363Swpaul } else { 13638363Swpaul // Remove the leading 1 from the resource group mask. 13738363Swpaul NormalizedMask ^= PowerOf2Floor(NormalizedMask); 13838363Swpaul if (UnitsFromResourceGroups & NormalizedMask) 13938363Swpaul ID.HasPartiallyOverlappingGroups = true; 14038363Swpaul 14138363Swpaul UnitsFromResourceGroups |= NormalizedMask; 14238363Swpaul UsedResourceGroups |= (A.first ^ NormalizedMask); 14338363Swpaul } 14438363Swpaul 14538363Swpaul for (unsigned J = I + 1; J < E; ++J) { 14638363Swpaul ResourcePlusCycles &B = Worklist[J]; 14738363Swpaul if ((NormalizedMask & B.first) == NormalizedMask) { 14838363Swpaul B.second.CS.subtract(A.second.size() - SuperResources[A.first]); 14938363Swpaul if (llvm::popcount(B.first) > 1) 15038363Swpaul B.second.NumUnits++; 15138363Swpaul } 15238363Swpaul } 15338363Swpaul } 15438363Swpaul 15538363Swpaul // A SchedWrite may specify a number of cycles in which a resource group 15638363Swpaul // is reserved. For example (on target x86; cpu Haswell): 15738363Swpaul // 15838363Swpaul // SchedWriteRes<[HWPort0, HWPort1, HWPort01]> { 15938363Swpaul // let ResourceCycles = [2, 2, 3]; 16038363Swpaul // } 16138363Swpaul // 16238363Swpaul // This means: 16338363Swpaul // Resource units HWPort0 and HWPort1 are both used for 2cy. 16438363Swpaul // Resource group HWPort01 is the union of HWPort0 and HWPort1. 16538363Swpaul // Since this write touches both HWPort0 and HWPort1 for 2cy, HWPort01 16638363Swpaul // will not be usable for 2 entire cycles from instruction issue. 16738363Swpaul // 16838363Swpaul // On top of those 2cy, SchedWriteRes explicitly specifies an extra latency 16938363Swpaul // of 3 cycles for HWPort01. This tool assumes that the 3cy latency is an 17038363Swpaul // extra delay on top of the 2 cycles latency. 17138363Swpaul // During those extra cycles, HWPort01 is not usable by other instructions. 17238363Swpaul for (ResourcePlusCycles &RPC : ID.Resources) { 17338363Swpaul if (llvm::popcount(RPC.first) > 1 && !RPC.second.isReserved()) { 17438526Swpaul // Remove the leading 1 from the resource group mask. 17538526Swpaul uint64_t Mask = RPC.first ^ PowerOf2Floor(RPC.first); 17638526Swpaul uint64_t MaxResourceUnits = llvm::popcount(Mask); 17738526Swpaul if (RPC.second.NumUnits > (unsigned)llvm::popcount(Mask)) { 17838526Swpaul RPC.second.setReserved(); 17938526Swpaul RPC.second.NumUnits = MaxResourceUnits; 18038526Swpaul } 18138363Swpaul } 18238363Swpaul } 18338363Swpaul 18438363Swpaul // Identify extra buffers that are consumed through super resources. 18538363Swpaul for (const std::pair<uint64_t, unsigned> &SR : SuperResources) { 18638363Swpaul for (unsigned I = 1, E = NumProcResources; I < E; ++I) { 18738363Swpaul const MCProcResourceDesc &PR = *SM.getProcResource(I); 18838363Swpaul if (PR.BufferSize == -1) 18938363Swpaul continue; 19038363Swpaul 19138363Swpaul uint64_t Mask = ProcResourceMasks[I]; 19238363Swpaul if (Mask != SR.first && ((Mask & SR.first) == SR.first)) 19338363Swpaul Buffers.setBit(getResourceStateIndex(Mask)); 19438363Swpaul } 19538363Swpaul } 19638363Swpaul 19738363Swpaul ID.UsedBuffers = Buffers.getZExtValue(); 19838363Swpaul ID.UsedProcResUnits = UsedResourceUnits; 19938363Swpaul ID.UsedProcResGroups = UsedResourceGroups; 20038363Swpaul 20138363Swpaul LLVM_DEBUG({ 20238363Swpaul for (const std::pair<uint64_t, ResourceUsage> &R : ID.Resources) 20338363Swpaul dbgs() << "\t\tResource Mask=" << format_hex(R.first, 16) << ", " 20438363Swpaul << "Reserved=" << R.second.isReserved() << ", " 20538363Swpaul << "#Units=" << R.second.NumUnits << ", " 20638363Swpaul << "cy=" << R.second.size() << '\n'; 20738363Swpaul uint64_t BufferIDs = ID.UsedBuffers; 20838363Swpaul while (BufferIDs) { 20938363Swpaul uint64_t Current = BufferIDs & (-BufferIDs); 21038363Swpaul dbgs() << "\t\tBuffer Mask=" << format_hex(Current, 16) << '\n'; 21138363Swpaul BufferIDs ^= Current; 21238363Swpaul } 21338363Swpaul dbgs() << "\t\t Used Units=" << format_hex(ID.UsedProcResUnits, 16) << '\n'; 21438363Swpaul dbgs() << "\t\tUsed Groups=" << format_hex(ID.UsedProcResGroups, 16) 21538363Swpaul << '\n'; 21638363Swpaul dbgs() << "\t\tHasPartiallyOverlappingGroups=" 21738363Swpaul << ID.HasPartiallyOverlappingGroups << '\n'; 21838363Swpaul }); 21938363Swpaul} 22038363Swpaul 22138363Swpaulstatic void computeMaxLatency(InstrDesc &ID, const MCInstrDesc &MCDesc, 22238363Swpaul const MCSchedClassDesc &SCDesc, 22338363Swpaul const MCSubtargetInfo &STI) { 22438363Swpaul if (MCDesc.isCall()) { 22538363Swpaul // We cannot estimate how long this call will take. 22638363Swpaul // Artificially set an arbitrarily high latency (100cy). 22738363Swpaul ID.MaxLatency = 100U; 22838363Swpaul return; 22938363Swpaul } 23038363Swpaul 23138363Swpaul int Latency = MCSchedModel::computeInstrLatency(STI, SCDesc); 23238363Swpaul // If latency is unknown, then conservatively assume a MaxLatency of 100cy. 23338363Swpaul ID.MaxLatency = Latency < 0 ? 100U : static_cast<unsigned>(Latency); 23438363Swpaul} 23538363Swpaul 23638363Swpaulstatic Error verifyOperands(const MCInstrDesc &MCDesc, const MCInst &MCI) { 23738363Swpaul // Count register definitions, and skip non register operands in the process. 23838363Swpaul unsigned I, E; 23938363Swpaul unsigned NumExplicitDefs = MCDesc.getNumDefs(); 24038363Swpaul for (I = 0, E = MCI.getNumOperands(); NumExplicitDefs && I < E; ++I) { 24138363Swpaul const MCOperand &Op = MCI.getOperand(I); 24238363Swpaul if (Op.isReg()) 24338363Swpaul --NumExplicitDefs; 24438363Swpaul } 24538363Swpaul 24638363Swpaul if (NumExplicitDefs) { 24738363Swpaul return make_error<InstructionError<MCInst>>( 24838363Swpaul "Expected more register operand definitions.", MCI); 24938363Swpaul } 25038363Swpaul 25138363Swpaul if (MCDesc.hasOptionalDef()) { 25238363Swpaul // Always assume that the optional definition is the last operand. 25338363Swpaul const MCOperand &Op = MCI.getOperand(MCDesc.getNumOperands() - 1); 25438363Swpaul if (I == MCI.getNumOperands() || !Op.isReg()) { 25538363Swpaul std::string Message = 25638363Swpaul "expected a register operand for an optional definition. Instruction " 25738363Swpaul "has not been correctly analyzed."; 25838363Swpaul return make_error<InstructionError<MCInst>>(Message, MCI); 25938363Swpaul } 26038363Swpaul } 26138363Swpaul 26238363Swpaul return ErrorSuccess(); 26338363Swpaul} 26438363Swpaul 26538363Swpaulvoid InstrBuilder::populateWrites(InstrDesc &ID, const MCInst &MCI, 26638363Swpaul unsigned SchedClassID) { 26738363Swpaul const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode()); 26838363Swpaul const MCSchedModel &SM = STI.getSchedModel(); 26938363Swpaul const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassID); 27038363Swpaul 27138363Swpaul // Assumptions made by this algorithm: 27238363Swpaul // 1. The number of explicit and implicit register definitions in a MCInst 27338363Swpaul // matches the number of explicit and implicit definitions according to 27438363Swpaul // the opcode descriptor (MCInstrDesc). 27538363Swpaul // 2. Uses start at index #(MCDesc.getNumDefs()). 27638363Swpaul // 3. There can only be a single optional register definition, an it is 27738363Swpaul // either the last operand of the sequence (excluding extra operands 27838363Swpaul // contributed by variadic opcodes) or one of the explicit register 27938363Swpaul // definitions. The latter occurs for some Thumb1 instructions. 28038363Swpaul // 28138363Swpaul // These assumptions work quite well for most out-of-order in-tree targets 28238363Swpaul // like x86. This is mainly because the vast majority of instructions is 28338363Swpaul // expanded to MCInst using a straightforward lowering logic that preserves 28438363Swpaul // the ordering of the operands. 28538363Swpaul // 28638363Swpaul // About assumption 1. 28738363Swpaul // The algorithm allows non-register operands between register operand 28838363Swpaul // definitions. This helps to handle some special ARM instructions with 28938363Swpaul // implicit operand increment (-mtriple=armv7): 29038363Swpaul // 29138363Swpaul // vld1.32 {d18, d19}, [r1]! @ <MCInst #1463 VLD1q32wb_fixed 29238363Swpaul // @ <MCOperand Reg:59> 29338363Swpaul // @ <MCOperand Imm:0> (!!) 29438363Swpaul // @ <MCOperand Reg:67> 29538363Swpaul // @ <MCOperand Imm:0> 29638363Swpaul // @ <MCOperand Imm:14> 29738363Swpaul // @ <MCOperand Reg:0>> 29838363Swpaul // 29938363Swpaul // MCDesc reports: 30038363Swpaul // 6 explicit operands. 30138363Swpaul // 1 optional definition 30238363Swpaul // 2 explicit definitions (!!) 30338363Swpaul // 30438363Swpaul // The presence of an 'Imm' operand between the two register definitions 30538363Swpaul // breaks the assumption that "register definitions are always at the 30638363Swpaul // beginning of the operand sequence". 30738363Swpaul // 30838363Swpaul // To workaround this issue, this algorithm ignores (i.e. skips) any 30938363Swpaul // non-register operands between register definitions. The optional 31038363Swpaul // definition is still at index #(NumOperands-1). 31138363Swpaul // 31238363Swpaul // According to assumption 2. register reads start at #(NumExplicitDefs-1). 31338363Swpaul // That means, register R1 from the example is both read and written. 31438363Swpaul unsigned NumExplicitDefs = MCDesc.getNumDefs(); 31538363Swpaul unsigned NumImplicitDefs = MCDesc.implicit_defs().size(); 31638363Swpaul unsigned NumWriteLatencyEntries = SCDesc.NumWriteLatencyEntries; 31738363Swpaul unsigned TotalDefs = NumExplicitDefs + NumImplicitDefs; 31838363Swpaul if (MCDesc.hasOptionalDef()) 31938363Swpaul TotalDefs++; 32038363Swpaul 32138363Swpaul unsigned NumVariadicOps = MCI.getNumOperands() - MCDesc.getNumOperands(); 32238363Swpaul ID.Writes.resize(TotalDefs + NumVariadicOps); 32338363Swpaul // Iterate over the operands list, and skip non-register operands. 32438363Swpaul // The first NumExplicitDefs register operands are expected to be register 32538363Swpaul // definitions. 32638363Swpaul unsigned CurrentDef = 0; 32738363Swpaul unsigned OptionalDefIdx = MCDesc.getNumOperands() - 1; 32838363Swpaul unsigned i = 0; 32938363Swpaul for (; i < MCI.getNumOperands() && CurrentDef < NumExplicitDefs; ++i) { 33038363Swpaul const MCOperand &Op = MCI.getOperand(i); 33138363Swpaul if (!Op.isReg()) 33238363Swpaul continue; 33338363Swpaul 33438363Swpaul if (MCDesc.operands()[CurrentDef].isOptionalDef()) { 33538363Swpaul OptionalDefIdx = CurrentDef++; 33638363Swpaul continue; 33738363Swpaul } 33838363Swpaul 33938363Swpaul WriteDescriptor &Write = ID.Writes[CurrentDef]; 34038363Swpaul Write.OpIndex = i; 34138363Swpaul if (CurrentDef < NumWriteLatencyEntries) { 34238363Swpaul const MCWriteLatencyEntry &WLE = 34338363Swpaul *STI.getWriteLatencyEntry(&SCDesc, CurrentDef); 34438363Swpaul // Conservatively default to MaxLatency. 34538363Swpaul Write.Latency = 34638363Swpaul WLE.Cycles < 0 ? ID.MaxLatency : static_cast<unsigned>(WLE.Cycles); 34738363Swpaul Write.SClassOrWriteResourceID = WLE.WriteResourceID; 34838363Swpaul } else { 34938363Swpaul // Assign a default latency for this write. 35038363Swpaul Write.Latency = ID.MaxLatency; 35138363Swpaul Write.SClassOrWriteResourceID = 0; 35238363Swpaul } 35338363Swpaul Write.IsOptionalDef = false; 35438363Swpaul LLVM_DEBUG({ 35538363Swpaul dbgs() << "\t\t[Def] OpIdx=" << Write.OpIndex 35638363Swpaul << ", Latency=" << Write.Latency 35738363Swpaul << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n'; 35838363Swpaul }); 35938363Swpaul CurrentDef++; 36038363Swpaul } 36138363Swpaul 36238363Swpaul assert(CurrentDef == NumExplicitDefs && 36338363Swpaul "Expected more register operand definitions."); 36440588Swpaul for (CurrentDef = 0; CurrentDef < NumImplicitDefs; ++CurrentDef) { 36538363Swpaul unsigned Index = NumExplicitDefs + CurrentDef; 36638363Swpaul WriteDescriptor &Write = ID.Writes[Index]; 36738363Swpaul Write.OpIndex = ~CurrentDef; 36838363Swpaul Write.RegisterID = MCDesc.implicit_defs()[CurrentDef]; 36938363Swpaul if (Index < NumWriteLatencyEntries) { 37038363Swpaul const MCWriteLatencyEntry &WLE = 37138363Swpaul *STI.getWriteLatencyEntry(&SCDesc, Index); 37238363Swpaul // Conservatively default to MaxLatency. 37338363Swpaul Write.Latency = 37438363Swpaul WLE.Cycles < 0 ? ID.MaxLatency : static_cast<unsigned>(WLE.Cycles); 37538363Swpaul Write.SClassOrWriteResourceID = WLE.WriteResourceID; 37638363Swpaul } else { 37738363Swpaul // Assign a default latency for this write. 37838363Swpaul Write.Latency = ID.MaxLatency; 37938363Swpaul Write.SClassOrWriteResourceID = 0; 38038363Swpaul } 38138363Swpaul 38238363Swpaul Write.IsOptionalDef = false; 38338363Swpaul assert(Write.RegisterID != 0 && "Expected a valid phys register!"); 38438363Swpaul LLVM_DEBUG({ 38538363Swpaul dbgs() << "\t\t[Def][I] OpIdx=" << ~Write.OpIndex 38638363Swpaul << ", PhysReg=" << MRI.getName(Write.RegisterID) 38738363Swpaul << ", Latency=" << Write.Latency 38838363Swpaul << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n'; 38938363Swpaul }); 39038363Swpaul } 39138363Swpaul 39238363Swpaul if (MCDesc.hasOptionalDef()) { 39338363Swpaul WriteDescriptor &Write = ID.Writes[NumExplicitDefs + NumImplicitDefs]; 39438363Swpaul Write.OpIndex = OptionalDefIdx; 39538363Swpaul // Assign a default latency for this write. 39638363Swpaul Write.Latency = ID.MaxLatency; 39738363Swpaul Write.SClassOrWriteResourceID = 0; 39838363Swpaul Write.IsOptionalDef = true; 39938363Swpaul LLVM_DEBUG({ 40038363Swpaul dbgs() << "\t\t[Def][O] OpIdx=" << Write.OpIndex 40138363Swpaul << ", Latency=" << Write.Latency 40238363Swpaul << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n'; 40338363Swpaul }); 40438363Swpaul } 40538363Swpaul 40638363Swpaul if (!NumVariadicOps) 40738363Swpaul return; 40838363Swpaul 40938363Swpaul bool AssumeUsesOnly = !MCDesc.variadicOpsAreDefs(); 41038363Swpaul CurrentDef = NumExplicitDefs + NumImplicitDefs + MCDesc.hasOptionalDef(); 41138363Swpaul for (unsigned I = 0, OpIndex = MCDesc.getNumOperands(); 41238363Swpaul I < NumVariadicOps && !AssumeUsesOnly; ++I, ++OpIndex) { 41338363Swpaul const MCOperand &Op = MCI.getOperand(OpIndex); 41438363Swpaul if (!Op.isReg()) 41538363Swpaul continue; 41638363Swpaul 41738363Swpaul WriteDescriptor &Write = ID.Writes[CurrentDef]; 41838363Swpaul Write.OpIndex = OpIndex; 41938363Swpaul // Assign a default latency for this write. 42038363Swpaul Write.Latency = ID.MaxLatency; 42138363Swpaul Write.SClassOrWriteResourceID = 0; 42238363Swpaul Write.IsOptionalDef = false; 42338363Swpaul ++CurrentDef; 42438363Swpaul LLVM_DEBUG({ 42538363Swpaul dbgs() << "\t\t[Def][V] OpIdx=" << Write.OpIndex 42638363Swpaul << ", Latency=" << Write.Latency 42738363Swpaul << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n'; 42838363Swpaul }); 42938363Swpaul } 43038363Swpaul 43138363Swpaul ID.Writes.resize(CurrentDef); 43238363Swpaul} 43338363Swpaul 43438363Swpaulvoid InstrBuilder::populateReads(InstrDesc &ID, const MCInst &MCI, 43538363Swpaul unsigned SchedClassID) { 43638363Swpaul const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode()); 43738363Swpaul unsigned NumExplicitUses = MCDesc.getNumOperands() - MCDesc.getNumDefs(); 43838363Swpaul unsigned NumImplicitUses = MCDesc.implicit_uses().size(); 43938363Swpaul // Remove the optional definition. 44038469Swpaul if (MCDesc.hasOptionalDef()) 44139647Swpaul --NumExplicitUses; 44238363Swpaul unsigned NumVariadicOps = MCI.getNumOperands() - MCDesc.getNumOperands(); 44338363Swpaul unsigned TotalUses = NumExplicitUses + NumImplicitUses + NumVariadicOps; 44438363Swpaul ID.Reads.resize(TotalUses); 44538363Swpaul unsigned CurrentUse = 0; 44638363Swpaul for (unsigned I = 0, OpIndex = MCDesc.getNumDefs(); I < NumExplicitUses; 44738363Swpaul ++I, ++OpIndex) { 44838363Swpaul const MCOperand &Op = MCI.getOperand(OpIndex); 44938363Swpaul if (!Op.isReg()) 45038363Swpaul continue; 45138363Swpaul 45238363Swpaul ReadDescriptor &Read = ID.Reads[CurrentUse]; 45338363Swpaul Read.OpIndex = OpIndex; 45438363Swpaul Read.UseIndex = I; 45538363Swpaul Read.SchedClassID = SchedClassID; 45638363Swpaul ++CurrentUse; 45738363Swpaul LLVM_DEBUG(dbgs() << "\t\t[Use] OpIdx=" << Read.OpIndex 45838363Swpaul << ", UseIndex=" << Read.UseIndex << '\n'); 45938363Swpaul } 46038363Swpaul 46138363Swpaul // For the purpose of ReadAdvance, implicit uses come directly after explicit 46238363Swpaul // uses. The "UseIndex" must be updated according to that implicit layout. 46338363Swpaul for (unsigned I = 0; I < NumImplicitUses; ++I) { 46438363Swpaul ReadDescriptor &Read = ID.Reads[CurrentUse + I]; 46538363Swpaul Read.OpIndex = ~I; 46638363Swpaul Read.UseIndex = NumExplicitUses + I; 46738363Swpaul Read.RegisterID = MCDesc.implicit_uses()[I]; 46838363Swpaul Read.SchedClassID = SchedClassID; 46938363Swpaul LLVM_DEBUG(dbgs() << "\t\t[Use][I] OpIdx=" << ~Read.OpIndex 47038363Swpaul << ", UseIndex=" << Read.UseIndex << ", RegisterID=" 47138363Swpaul << MRI.getName(Read.RegisterID) << '\n'); 47238363Swpaul } 47338363Swpaul 47438363Swpaul CurrentUse += NumImplicitUses; 47538363Swpaul 47638363Swpaul bool AssumeDefsOnly = MCDesc.variadicOpsAreDefs(); 47738363Swpaul for (unsigned I = 0, OpIndex = MCDesc.getNumOperands(); 47838363Swpaul I < NumVariadicOps && !AssumeDefsOnly; ++I, ++OpIndex) { 47938363Swpaul const MCOperand &Op = MCI.getOperand(OpIndex); 48038363Swpaul if (!Op.isReg()) 48138363Swpaul continue; 48238363Swpaul 48338363Swpaul ReadDescriptor &Read = ID.Reads[CurrentUse]; 48438363Swpaul Read.OpIndex = OpIndex; 48538363Swpaul Read.UseIndex = NumExplicitUses + NumImplicitUses + I; 48638363Swpaul Read.SchedClassID = SchedClassID; 48738363Swpaul ++CurrentUse; 48838363Swpaul LLVM_DEBUG(dbgs() << "\t\t[Use][V] OpIdx=" << Read.OpIndex 48938363Swpaul << ", UseIndex=" << Read.UseIndex << '\n'); 49038363Swpaul } 49138363Swpaul 49238363Swpaul ID.Reads.resize(CurrentUse); 49338363Swpaul} 49438363Swpaul 49538363SwpaulError InstrBuilder::verifyInstrDesc(const InstrDesc &ID, 49638363Swpaul const MCInst &MCI) const { 49738363Swpaul if (ID.NumMicroOps != 0) 49838363Swpaul return ErrorSuccess(); 49938363Swpaul 50038363Swpaul bool UsesBuffers = ID.UsedBuffers; 50138363Swpaul bool UsesResources = !ID.Resources.empty(); 50238363Swpaul if (!UsesBuffers && !UsesResources) 50338363Swpaul return ErrorSuccess(); 50438363Swpaul 50538363Swpaul // FIXME: see PR44797. We should revisit these checks and possibly move them 50638363Swpaul // in CodeGenSchedule.cpp. 50738363Swpaul StringRef Message = "found an inconsistent instruction that decodes to zero " 50838363Swpaul "opcodes and that consumes scheduler resources."; 50938363Swpaul return make_error<InstructionError<MCInst>>(std::string(Message), MCI); 51038363Swpaul} 51138363Swpaul 51238363SwpaulExpected<const InstrDesc &> 51338363SwpaulInstrBuilder::createInstrDescImpl(const MCInst &MCI, 51438363Swpaul const SmallVector<SharedInstrument> &IVec) { 51538363Swpaul assert(STI.getSchedModel().hasInstrSchedModel() && 51638363Swpaul "Itineraries are not yet supported!"); 51738363Swpaul 51838363Swpaul // Obtain the instruction descriptor from the opcode. 51938363Swpaul unsigned short Opcode = MCI.getOpcode(); 52038363Swpaul const MCInstrDesc &MCDesc = MCII.get(Opcode); 52138363Swpaul const MCSchedModel &SM = STI.getSchedModel(); 52238363Swpaul 52338363Swpaul // Then obtain the scheduling class information from the instruction. 52438363Swpaul // Allow InstrumentManager to override and use a different SchedClassID 52538363Swpaul unsigned SchedClassID = IM.getSchedClassID(MCII, MCI, IVec); 52638363Swpaul bool IsVariant = SM.getSchedClassDesc(SchedClassID)->isVariant(); 52738363Swpaul 52838363Swpaul // Try to solve variant scheduling classes. 52938363Swpaul if (IsVariant) { 53038363Swpaul unsigned CPUID = SM.getProcessorID(); 53138363Swpaul while (SchedClassID && SM.getSchedClassDesc(SchedClassID)->isVariant()) 53238363Swpaul SchedClassID = 53338363Swpaul STI.resolveVariantSchedClass(SchedClassID, &MCI, &MCII, CPUID); 53438363Swpaul 53538363Swpaul if (!SchedClassID) { 53638363Swpaul return make_error<InstructionError<MCInst>>( 53738363Swpaul "unable to resolve scheduling class for write variant.", MCI); 53838363Swpaul } 53938363Swpaul } 54038363Swpaul 54138363Swpaul // Check if this instruction is supported. Otherwise, report an error. 54245062Swpaul const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassID); 54345062Swpaul if (SCDesc.NumMicroOps == MCSchedClassDesc::InvalidNumMicroOps) { 54438363Swpaul return make_error<InstructionError<MCInst>>( 54538363Swpaul "found an unsupported instruction in the input assembly sequence.", 54638363Swpaul MCI); 54738363Swpaul } 54838363Swpaul 54938363Swpaul LLVM_DEBUG(dbgs() << "\n\t\tOpcode Name= " << MCII.getName(Opcode) << '\n'); 55038363Swpaul LLVM_DEBUG(dbgs() << "\t\tSchedClassID=" << SchedClassID << '\n'); 55138363Swpaul LLVM_DEBUG(dbgs() << "\t\tOpcode=" << Opcode << '\n'); 55238363Swpaul 55338363Swpaul // Create a new empty descriptor. 55438363Swpaul std::unique_ptr<InstrDesc> ID = std::make_unique<InstrDesc>(); 55538363Swpaul ID->NumMicroOps = SCDesc.NumMicroOps; 55638363Swpaul ID->SchedClassID = SchedClassID; 55738363Swpaul 55838363Swpaul if (MCDesc.isCall() && FirstCallInst) { 55938363Swpaul // We don't correctly model calls. 56038363Swpaul WithColor::warning() << "found a call in the input assembly sequence.\n"; 56138363Swpaul WithColor::note() << "call instructions are not correctly modeled. " 56238363Swpaul << "Assume a latency of 100cy.\n"; 56338363Swpaul FirstCallInst = false; 56438363Swpaul } 56538363Swpaul 56638363Swpaul if (MCDesc.isReturn() && FirstReturnInst) { 56738363Swpaul WithColor::warning() << "found a return instruction in the input" 56838363Swpaul << " assembly sequence.\n"; 56938363Swpaul WithColor::note() << "program counter updates are ignored.\n"; 57038363Swpaul FirstReturnInst = false; 57138363Swpaul } 57238363Swpaul 57338363Swpaul initializeUsedResources(*ID, SCDesc, STI, ProcResourceMasks); 57438363Swpaul computeMaxLatency(*ID, MCDesc, SCDesc, STI); 57538363Swpaul 57638363Swpaul if (Error Err = verifyOperands(MCDesc, MCI)) 57738363Swpaul return std::move(Err); 57838363Swpaul 57938363Swpaul populateWrites(*ID, MCI, SchedClassID); 58038363Swpaul populateReads(*ID, MCI, SchedClassID); 58138363Swpaul 58238363Swpaul LLVM_DEBUG(dbgs() << "\t\tMaxLatency=" << ID->MaxLatency << '\n'); 58338363Swpaul LLVM_DEBUG(dbgs() << "\t\tNumMicroOps=" << ID->NumMicroOps << '\n'); 58438363Swpaul 58538363Swpaul // Validation check on the instruction descriptor. 58638363Swpaul if (Error Err = verifyInstrDesc(*ID, MCI)) 58738363Swpaul return std::move(Err); 58845062Swpaul 58938363Swpaul // Now add the new descriptor. 59045062Swpaul bool IsVariadic = MCDesc.isVariadic(); 59138363Swpaul if ((ID->IsRecyclable = !IsVariadic && !IsVariant)) { 59245062Swpaul auto DKey = std::make_pair(MCI.getOpcode(), SchedClassID); 59338363Swpaul Descriptors[DKey] = std::move(ID); 59445062Swpaul return *Descriptors[DKey]; 59545062Swpaul } 59645062Swpaul 59745062Swpaul auto VDKey = std::make_pair(&MCI, SchedClassID); 59845062Swpaul VariantDescriptors[VDKey] = std::move(ID); 59945062Swpaul return *VariantDescriptors[VDKey]; 60038363Swpaul} 60138363Swpaul 60238363SwpaulExpected<const InstrDesc &> 60338363SwpaulInstrBuilder::getOrCreateInstrDesc(const MCInst &MCI, 60438363Swpaul const SmallVector<SharedInstrument> &IVec) { 60538363Swpaul // Cache lookup using SchedClassID from Instrumentation 60638363Swpaul unsigned SchedClassID = IM.getSchedClassID(MCII, MCI, IVec); 60738363Swpaul 60838363Swpaul auto DKey = std::make_pair(MCI.getOpcode(), SchedClassID); 60938363Swpaul if (Descriptors.find_as(DKey) != Descriptors.end()) 61038363Swpaul return *Descriptors[DKey]; 61138363Swpaul 61238363Swpaul unsigned CPUID = STI.getSchedModel().getProcessorID(); 61338363Swpaul SchedClassID = STI.resolveVariantSchedClass(SchedClassID, &MCI, &MCII, CPUID); 61438363Swpaul auto VDKey = std::make_pair(&MCI, SchedClassID); 61538363Swpaul if (VariantDescriptors.find(VDKey) != VariantDescriptors.end()) 61638363Swpaul return *VariantDescriptors[VDKey]; 61738363Swpaul 61838363Swpaul return createInstrDescImpl(MCI, IVec); 61938363Swpaul} 62038363Swpaul 62138363SwpaulSTATISTIC(NumVariantInst, "Number of MCInsts that doesn't have static Desc"); 62238363Swpaul 62345601SwpaulExpected<std::unique_ptr<Instruction>> 62440097SwpaulInstrBuilder::createInstruction(const MCInst &MCI, 62538810Swpaul const SmallVector<SharedInstrument> &IVec) { 62645629Swpaul Expected<const InstrDesc &> DescOrErr = getOrCreateInstrDesc(MCI, IVec); 62738363Swpaul if (!DescOrErr) 62838363Swpaul return DescOrErr.takeError(); 62938363Swpaul const InstrDesc &D = *DescOrErr; 63038363Swpaul Instruction *NewIS = nullptr; 63138363Swpaul std::unique_ptr<Instruction> CreatedIS; 63238363Swpaul bool IsInstRecycled = false; 63338363Swpaul 63438363Swpaul if (!D.IsRecyclable) 63538363Swpaul ++NumVariantInst; 63638363Swpaul 63738363Swpaul if (D.IsRecyclable && InstRecycleCB) { 63838363Swpaul if (auto *I = InstRecycleCB(D)) { 63938363Swpaul NewIS = I; 64038363Swpaul NewIS->reset(); 64138363Swpaul IsInstRecycled = true; 64238363Swpaul } 64338363Swpaul } 64438363Swpaul if (!IsInstRecycled) { 64538363Swpaul CreatedIS = std::make_unique<Instruction>(D, MCI.getOpcode()); 64638363Swpaul NewIS = CreatedIS.get(); 64738363Swpaul } 64838363Swpaul 64938363Swpaul const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode()); 65038363Swpaul const MCSchedClassDesc &SCDesc = 65138363Swpaul *STI.getSchedModel().getSchedClassDesc(D.SchedClassID); 65238363Swpaul 65338363Swpaul NewIS->setMayLoad(MCDesc.mayLoad()); 65438363Swpaul NewIS->setMayStore(MCDesc.mayStore()); 65538363Swpaul NewIS->setHasSideEffects(MCDesc.hasUnmodeledSideEffects()); 65638363Swpaul NewIS->setBeginGroup(SCDesc.BeginGroup); 65738363Swpaul NewIS->setEndGroup(SCDesc.EndGroup); 65838363Swpaul NewIS->setRetireOOO(SCDesc.RetireOOO); 65938363Swpaul 66038363Swpaul // Check if this is a dependency breaking instruction. 66138363Swpaul APInt Mask; 66238363Swpaul 66338363Swpaul bool IsZeroIdiom = false; 66438363Swpaul bool IsDepBreaking = false; 66538363Swpaul if (MCIA) { 66638363Swpaul unsigned ProcID = STI.getSchedModel().getProcessorID(); 66738363Swpaul IsZeroIdiom = MCIA->isZeroIdiom(MCI, Mask, ProcID); 66838363Swpaul IsDepBreaking = 66938363Swpaul IsZeroIdiom || MCIA->isDependencyBreaking(MCI, Mask, ProcID); 67038363Swpaul if (MCIA->isOptimizableRegisterMove(MCI, ProcID)) 67138363Swpaul NewIS->setOptimizableMove(); 67238363Swpaul } 67338363Swpaul 67438363Swpaul // Initialize Reads first. 67538363Swpaul MCPhysReg RegID = 0; 67638363Swpaul size_t Idx = 0U; 67738363Swpaul for (const ReadDescriptor &RD : D.Reads) { 67838363Swpaul if (!RD.isImplicitRead()) { 67938363Swpaul // explicit read. 68038363Swpaul const MCOperand &Op = MCI.getOperand(RD.OpIndex); 68138363Swpaul // Skip non-register operands. 68238363Swpaul if (!Op.isReg()) 68338363Swpaul continue; 68438363Swpaul RegID = Op.getReg(); 68538363Swpaul } else { 68638363Swpaul // Implicit read. 68738363Swpaul RegID = RD.RegisterID; 68838363Swpaul } 68938363Swpaul 69038363Swpaul // Skip invalid register operands. 69138363Swpaul if (!RegID) 69238363Swpaul continue; 69338363Swpaul 69438363Swpaul // Okay, this is a register operand. Create a ReadState for it. 69538363Swpaul ReadState *RS = nullptr; 69638363Swpaul if (IsInstRecycled && Idx < NewIS->getUses().size()) { 69738363Swpaul NewIS->getUses()[Idx] = ReadState(RD, RegID); 69838363Swpaul RS = &NewIS->getUses()[Idx++]; 69938363Swpaul } else { 70038363Swpaul NewIS->getUses().emplace_back(RD, RegID); 70138363Swpaul RS = &NewIS->getUses().back(); 70238363Swpaul ++Idx; 70338363Swpaul } 70438363Swpaul 70538363Swpaul if (IsDepBreaking) { 70638363Swpaul // A mask of all zeroes means: explicit input operands are not 70738363Swpaul // independent. 70838363Swpaul if (Mask.isZero()) { 70938363Swpaul if (!RD.isImplicitRead()) 71038363Swpaul RS->setIndependentFromDef(); 71138363Swpaul } else { 71238363Swpaul // Check if this register operand is independent according to `Mask`. 71338363Swpaul // Note that Mask may not have enough bits to describe all explicit and 71438363Swpaul // implicit input operands. If this register operand doesn't have a 71538363Swpaul // corresponding bit in Mask, then conservatively assume that it is 71638363Swpaul // dependent. 71738363Swpaul if (Mask.getBitWidth() > RD.UseIndex) { 71838363Swpaul // Okay. This map describe register use `RD.UseIndex`. 71938363Swpaul if (Mask[RD.UseIndex]) 72038363Swpaul RS->setIndependentFromDef(); 72138363Swpaul } 72238363Swpaul } 72338363Swpaul } 72438363Swpaul } 72538363Swpaul if (IsInstRecycled && Idx < NewIS->getUses().size()) 72638363Swpaul NewIS->getUses().pop_back_n(NewIS->getUses().size() - Idx); 72738363Swpaul 72838363Swpaul // Early exit if there are no writes. 72938363Swpaul if (D.Writes.empty()) { 73038363Swpaul if (IsInstRecycled) 73138363Swpaul return llvm::make_error<RecycledInstErr>(NewIS); 73238363Swpaul else 73338363Swpaul return std::move(CreatedIS); 73438363Swpaul } 73538363Swpaul 73638363Swpaul // Track register writes that implicitly clear the upper portion of the 73738363Swpaul // underlying super-registers using an APInt. 73838363Swpaul APInt WriteMask(D.Writes.size(), 0); 73938363Swpaul 74038363Swpaul // Now query the MCInstrAnalysis object to obtain information about which 74138363Swpaul // register writes implicitly clear the upper portion of a super-register. 74238363Swpaul if (MCIA) 74338363Swpaul MCIA->clearsSuperRegisters(MRI, MCI, WriteMask); 74438363Swpaul 74538363Swpaul // Initialize writes. 74638363Swpaul unsigned WriteIndex = 0; 74738363Swpaul Idx = 0U; 74838363Swpaul for (const WriteDescriptor &WD : D.Writes) { 74938363Swpaul RegID = WD.isImplicitWrite() ? WD.RegisterID 75038363Swpaul : MCI.getOperand(WD.OpIndex).getReg(); 75138363Swpaul // Check if this is a optional definition that references NoReg. 75238363Swpaul if (WD.IsOptionalDef && !RegID) { 75338363Swpaul ++WriteIndex; 75438363Swpaul continue; 75538363Swpaul } 75638363Swpaul 75738363Swpaul assert(RegID && "Expected a valid register ID!"); 75838363Swpaul if (IsInstRecycled && Idx < NewIS->getDefs().size()) { 75938363Swpaul NewIS->getDefs()[Idx++] = 76038363Swpaul WriteState(WD, RegID, 76138363Swpaul /* ClearsSuperRegs */ WriteMask[WriteIndex], 76238363Swpaul /* WritesZero */ IsZeroIdiom); 76338363Swpaul } else { 76438363Swpaul NewIS->getDefs().emplace_back(WD, RegID, 76538363Swpaul /* ClearsSuperRegs */ WriteMask[WriteIndex], 76638363Swpaul /* WritesZero */ IsZeroIdiom); 76738363Swpaul ++Idx; 76838363Swpaul } 76938363Swpaul ++WriteIndex; 77038363Swpaul } 77138363Swpaul if (IsInstRecycled && Idx < NewIS->getDefs().size()) 77238363Swpaul NewIS->getDefs().pop_back_n(NewIS->getDefs().size() - Idx); 77338363Swpaul 77438363Swpaul if (IsInstRecycled) 77538363Swpaul return llvm::make_error<RecycledInstErr>(NewIS); 77638363Swpaul else 77738363Swpaul return std::move(CreatedIS); 77838363Swpaul} 77938363Swpaul} // namespace mca 78038363Swpaul} // namespace llvm 78138363Swpaul