1249259Sdim//===-- AMDGPUAsmPrinter.cpp - AMDGPU Assebly printer --------------------===// 2249259Sdim// 3249259Sdim// The LLVM Compiler Infrastructure 4249259Sdim// 5249259Sdim// This file is distributed under the University of Illinois Open Source 6249259Sdim// License. See LICENSE.TXT for details. 7249259Sdim// 8249259Sdim//===----------------------------------------------------------------------===// 9249259Sdim// 10249259Sdim/// \file 11249259Sdim/// 12249259Sdim/// The AMDGPUAsmPrinter is used to print both assembly string and also binary 13249259Sdim/// code. When passed an MCAsmStreamer it prints assembly and when passed 14249259Sdim/// an MCObjectStreamer it outputs binary code. 15249259Sdim// 16249259Sdim//===----------------------------------------------------------------------===// 17249259Sdim// 18249259Sdim 19249259Sdim 20249259Sdim#include "AMDGPUAsmPrinter.h" 21249259Sdim#include "AMDGPU.h" 22263508Sdim#include "R600Defines.h" 23263508Sdim#include "R600MachineFunctionInfo.h" 24263508Sdim#include "R600RegisterInfo.h" 25251662Sdim#include "SIDefines.h" 26249259Sdim#include "SIMachineFunctionInfo.h" 27249259Sdim#include "SIRegisterInfo.h" 28251662Sdim#include "llvm/MC/MCContext.h" 29251662Sdim#include "llvm/MC/MCSectionELF.h" 30249259Sdim#include "llvm/MC/MCStreamer.h" 31251662Sdim#include "llvm/Support/ELF.h" 32263508Sdim#include "llvm/Support/MathExtras.h" 33249259Sdim#include "llvm/Support/TargetRegistry.h" 34249259Sdim#include "llvm/Target/TargetLoweringObjectFile.h" 35249259Sdim 36249259Sdimusing namespace llvm; 37249259Sdim 38249259Sdim 39249259Sdimstatic AsmPrinter *createAMDGPUAsmPrinterPass(TargetMachine &tm, 40249259Sdim MCStreamer &Streamer) { 41249259Sdim return new AMDGPUAsmPrinter(tm, Streamer); 42249259Sdim} 43249259Sdim 44249259Sdimextern "C" void LLVMInitializeR600AsmPrinter() { 45249259Sdim TargetRegistry::RegisterAsmPrinter(TheAMDGPUTarget, createAMDGPUAsmPrinterPass); 46249259Sdim} 47249259Sdim 48263508SdimAMDGPUAsmPrinter::AMDGPUAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) 49263508Sdim : AsmPrinter(TM, Streamer) 50263508Sdim{ 51263508Sdim DisasmEnabled = TM.getSubtarget<AMDGPUSubtarget>().dumpCode() && 52263508Sdim ! Streamer.hasRawTextSupport(); 53263508Sdim} 54263508Sdim 55249259Sdim/// We need to override this function so we can avoid 56249259Sdim/// the call to EmitFunctionHeader(), which the MCPureStreamer can't handle. 57249259Sdimbool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) { 58249259Sdim SetupMachineFunction(MF); 59249259Sdim if (OutStreamer.hasRawTextSupport()) { 60249259Sdim OutStreamer.EmitRawText("@" + MF.getName() + ":"); 61249259Sdim } 62251662Sdim 63263508Sdim MCContext &Context = getObjFileLowering().getContext(); 64263508Sdim const MCSectionELF *ConfigSection = Context.getELFSection(".AMDGPU.config", 65251662Sdim ELF::SHT_PROGBITS, 0, 66251662Sdim SectionKind::getReadOnly()); 67251662Sdim OutStreamer.SwitchSection(ConfigSection); 68263508Sdim const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>(); 69263508Sdim if (STM.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) { 70251662Sdim EmitProgramInfoSI(MF); 71251662Sdim } else { 72251662Sdim EmitProgramInfoR600(MF); 73249259Sdim } 74263508Sdim 75263508Sdim DisasmLines.clear(); 76263508Sdim HexLines.clear(); 77263508Sdim DisasmLineMaxLen = 0; 78263508Sdim 79251662Sdim OutStreamer.SwitchSection(getObjFileLowering().getTextSection()); 80249259Sdim EmitFunctionBody(); 81263508Sdim 82263508Sdim if (STM.dumpCode()) { 83263508Sdim#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 84263508Sdim MF.dump(); 85263508Sdim#endif 86263508Sdim 87263508Sdim if (DisasmEnabled) { 88263508Sdim OutStreamer.SwitchSection(Context.getELFSection(".AMDGPU.disasm", 89263508Sdim ELF::SHT_NOTE, 0, 90263508Sdim SectionKind::getReadOnly())); 91263508Sdim 92263508Sdim for (size_t i = 0; i < DisasmLines.size(); ++i) { 93263508Sdim std::string Comment(DisasmLineMaxLen - DisasmLines[i].size(), ' '); 94263508Sdim Comment += " ; " + HexLines[i] + "\n"; 95263508Sdim 96263508Sdim OutStreamer.EmitBytes(StringRef(DisasmLines[i])); 97263508Sdim OutStreamer.EmitBytes(StringRef(Comment)); 98263508Sdim } 99263508Sdim } 100263508Sdim } 101263508Sdim 102249259Sdim return false; 103249259Sdim} 104249259Sdim 105251662Sdimvoid AMDGPUAsmPrinter::EmitProgramInfoR600(MachineFunction &MF) { 106251662Sdim unsigned MaxGPR = 0; 107251662Sdim bool killPixel = false; 108251662Sdim const R600RegisterInfo * RI = 109251662Sdim static_cast<const R600RegisterInfo*>(TM.getRegisterInfo()); 110251662Sdim R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>(); 111251662Sdim const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>(); 112251662Sdim 113251662Sdim for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); 114251662Sdim BB != BB_E; ++BB) { 115251662Sdim MachineBasicBlock &MBB = *BB; 116251662Sdim for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); 117251662Sdim I != E; ++I) { 118251662Sdim MachineInstr &MI = *I; 119251662Sdim if (MI.getOpcode() == AMDGPU::KILLGT) 120251662Sdim killPixel = true; 121251662Sdim unsigned numOperands = MI.getNumOperands(); 122251662Sdim for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) { 123251662Sdim MachineOperand & MO = MI.getOperand(op_idx); 124251662Sdim if (!MO.isReg()) 125251662Sdim continue; 126251662Sdim unsigned HWReg = RI->getEncodingValue(MO.getReg()) & 0xff; 127251662Sdim 128251662Sdim // Register with value > 127 aren't GPR 129251662Sdim if (HWReg > 127) 130251662Sdim continue; 131251662Sdim MaxGPR = std::max(MaxGPR, HWReg); 132251662Sdim } 133251662Sdim } 134251662Sdim } 135251662Sdim 136251662Sdim unsigned RsrcReg; 137263508Sdim if (STM.getGeneration() >= AMDGPUSubtarget::EVERGREEN) { 138251662Sdim // Evergreen / Northern Islands 139251662Sdim switch (MFI->ShaderType) { 140251662Sdim default: // Fall through 141251662Sdim case ShaderType::COMPUTE: RsrcReg = R_0288D4_SQ_PGM_RESOURCES_LS; break; 142251662Sdim case ShaderType::GEOMETRY: RsrcReg = R_028878_SQ_PGM_RESOURCES_GS; break; 143251662Sdim case ShaderType::PIXEL: RsrcReg = R_028844_SQ_PGM_RESOURCES_PS; break; 144251662Sdim case ShaderType::VERTEX: RsrcReg = R_028860_SQ_PGM_RESOURCES_VS; break; 145251662Sdim } 146251662Sdim } else { 147251662Sdim // R600 / R700 148251662Sdim switch (MFI->ShaderType) { 149251662Sdim default: // Fall through 150251662Sdim case ShaderType::GEOMETRY: // Fall through 151251662Sdim case ShaderType::COMPUTE: // Fall through 152251662Sdim case ShaderType::VERTEX: RsrcReg = R_028868_SQ_PGM_RESOURCES_VS; break; 153251662Sdim case ShaderType::PIXEL: RsrcReg = R_028850_SQ_PGM_RESOURCES_PS; break; 154251662Sdim } 155251662Sdim } 156251662Sdim 157251662Sdim OutStreamer.EmitIntValue(RsrcReg, 4); 158251662Sdim OutStreamer.EmitIntValue(S_NUM_GPRS(MaxGPR + 1) | 159251662Sdim S_STACK_SIZE(MFI->StackSize), 4); 160251662Sdim OutStreamer.EmitIntValue(R_02880C_DB_SHADER_CONTROL, 4); 161251662Sdim OutStreamer.EmitIntValue(S_02880C_KILL_ENABLE(killPixel), 4); 162263508Sdim 163263508Sdim if (MFI->ShaderType == ShaderType::COMPUTE) { 164263508Sdim OutStreamer.EmitIntValue(R_0288E8_SQ_LDS_ALLOC, 4); 165263508Sdim OutStreamer.EmitIntValue(RoundUpToAlignment(MFI->LDSSize, 4) >> 2, 4); 166263508Sdim } 167251662Sdim} 168251662Sdim 169251662Sdimvoid AMDGPUAsmPrinter::EmitProgramInfoSI(MachineFunction &MF) { 170263508Sdim const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>(); 171249259Sdim unsigned MaxSGPR = 0; 172249259Sdim unsigned MaxVGPR = 0; 173249259Sdim bool VCCUsed = false; 174249259Sdim const SIRegisterInfo * RI = 175249259Sdim static_cast<const SIRegisterInfo*>(TM.getRegisterInfo()); 176249259Sdim 177249259Sdim for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); 178249259Sdim BB != BB_E; ++BB) { 179249259Sdim MachineBasicBlock &MBB = *BB; 180249259Sdim for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); 181249259Sdim I != E; ++I) { 182249259Sdim MachineInstr &MI = *I; 183249259Sdim 184249259Sdim unsigned numOperands = MI.getNumOperands(); 185249259Sdim for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) { 186263508Sdim MachineOperand &MO = MI.getOperand(op_idx); 187249259Sdim unsigned maxUsed; 188249259Sdim unsigned width = 0; 189249259Sdim bool isSGPR = false; 190249259Sdim unsigned reg; 191249259Sdim unsigned hwReg; 192249259Sdim if (!MO.isReg()) { 193249259Sdim continue; 194249259Sdim } 195249259Sdim reg = MO.getReg(); 196249259Sdim if (reg == AMDGPU::VCC) { 197249259Sdim VCCUsed = true; 198249259Sdim continue; 199249259Sdim } 200263508Sdim 201249259Sdim switch (reg) { 202249259Sdim default: break; 203263508Sdim case AMDGPU::SCC: 204249259Sdim case AMDGPU::EXEC: 205249259Sdim case AMDGPU::M0: 206249259Sdim continue; 207249259Sdim } 208249259Sdim 209249259Sdim if (AMDGPU::SReg_32RegClass.contains(reg)) { 210249259Sdim isSGPR = true; 211249259Sdim width = 1; 212249259Sdim } else if (AMDGPU::VReg_32RegClass.contains(reg)) { 213249259Sdim isSGPR = false; 214249259Sdim width = 1; 215249259Sdim } else if (AMDGPU::SReg_64RegClass.contains(reg)) { 216249259Sdim isSGPR = true; 217249259Sdim width = 2; 218249259Sdim } else if (AMDGPU::VReg_64RegClass.contains(reg)) { 219249259Sdim isSGPR = false; 220249259Sdim width = 2; 221251662Sdim } else if (AMDGPU::VReg_96RegClass.contains(reg)) { 222251662Sdim isSGPR = false; 223251662Sdim width = 3; 224249259Sdim } else if (AMDGPU::SReg_128RegClass.contains(reg)) { 225249259Sdim isSGPR = true; 226249259Sdim width = 4; 227249259Sdim } else if (AMDGPU::VReg_128RegClass.contains(reg)) { 228249259Sdim isSGPR = false; 229249259Sdim width = 4; 230249259Sdim } else if (AMDGPU::SReg_256RegClass.contains(reg)) { 231249259Sdim isSGPR = true; 232249259Sdim width = 8; 233249259Sdim } else if (AMDGPU::VReg_256RegClass.contains(reg)) { 234249259Sdim isSGPR = false; 235249259Sdim width = 8; 236263508Sdim } else if (AMDGPU::SReg_512RegClass.contains(reg)) { 237263508Sdim isSGPR = true; 238263508Sdim width = 16; 239249259Sdim } else if (AMDGPU::VReg_512RegClass.contains(reg)) { 240249259Sdim isSGPR = false; 241249259Sdim width = 16; 242249259Sdim } else { 243249259Sdim assert(!"Unknown register class"); 244249259Sdim } 245249259Sdim hwReg = RI->getEncodingValue(reg) & 0xff; 246249259Sdim maxUsed = hwReg + width - 1; 247249259Sdim if (isSGPR) { 248249259Sdim MaxSGPR = maxUsed > MaxSGPR ? maxUsed : MaxSGPR; 249249259Sdim } else { 250249259Sdim MaxVGPR = maxUsed > MaxVGPR ? maxUsed : MaxVGPR; 251249259Sdim } 252249259Sdim } 253249259Sdim } 254249259Sdim } 255249259Sdim if (VCCUsed) { 256249259Sdim MaxSGPR += 2; 257249259Sdim } 258249259Sdim SIMachineFunctionInfo * MFI = MF.getInfo<SIMachineFunctionInfo>(); 259251662Sdim unsigned RsrcReg; 260251662Sdim switch (MFI->ShaderType) { 261251662Sdim default: // Fall through 262251662Sdim case ShaderType::COMPUTE: RsrcReg = R_00B848_COMPUTE_PGM_RSRC1; break; 263251662Sdim case ShaderType::GEOMETRY: RsrcReg = R_00B228_SPI_SHADER_PGM_RSRC1_GS; break; 264251662Sdim case ShaderType::PIXEL: RsrcReg = R_00B028_SPI_SHADER_PGM_RSRC1_PS; break; 265251662Sdim case ShaderType::VERTEX: RsrcReg = R_00B128_SPI_SHADER_PGM_RSRC1_VS; break; 266251662Sdim } 267251662Sdim 268251662Sdim OutStreamer.EmitIntValue(RsrcReg, 4); 269251662Sdim OutStreamer.EmitIntValue(S_00B028_VGPRS(MaxVGPR / 4) | S_00B028_SGPRS(MaxSGPR / 8), 4); 270263508Sdim 271263508Sdim unsigned LDSAlignShift; 272263508Sdim if (STM.getGeneration() < AMDGPUSubtarget::SEA_ISLANDS) { 273263508Sdim // LDS is allocated in 64 dword blocks 274263508Sdim LDSAlignShift = 8; 275263508Sdim } else { 276263508Sdim // LDS is allocated in 128 dword blocks 277263508Sdim LDSAlignShift = 9; 278263508Sdim } 279263508Sdim unsigned LDSBlocks = 280263508Sdim RoundUpToAlignment(MFI->LDSSize, 1 << LDSAlignShift) >> LDSAlignShift; 281263508Sdim 282263508Sdim if (MFI->ShaderType == ShaderType::COMPUTE) { 283263508Sdim OutStreamer.EmitIntValue(R_00B84C_COMPUTE_PGM_RSRC2, 4); 284263508Sdim OutStreamer.EmitIntValue(S_00B84C_LDS_SIZE(LDSBlocks), 4); 285263508Sdim } 286251662Sdim if (MFI->ShaderType == ShaderType::PIXEL) { 287263508Sdim OutStreamer.EmitIntValue(R_00B02C_SPI_SHADER_PGM_RSRC2_PS, 4); 288263508Sdim OutStreamer.EmitIntValue(S_00B02C_EXTRA_LDS_SIZE(LDSBlocks), 4); 289251662Sdim OutStreamer.EmitIntValue(R_0286CC_SPI_PS_INPUT_ENA, 4); 290251662Sdim OutStreamer.EmitIntValue(MFI->PSInputAddr, 4); 291251662Sdim } 292249259Sdim} 293