1249259Sdim//===-- AMDGPUAsmPrinter.cpp - AMDGPU Assebly printer  --------------------===//
2249259Sdim//
3249259Sdim//                     The LLVM Compiler Infrastructure
4249259Sdim//
5249259Sdim// This file is distributed under the University of Illinois Open Source
6249259Sdim// License. See LICENSE.TXT for details.
7249259Sdim//
8249259Sdim//===----------------------------------------------------------------------===//
9249259Sdim//
10249259Sdim/// \file
11249259Sdim///
12249259Sdim/// The AMDGPUAsmPrinter is used to print both assembly string and also binary
13249259Sdim/// code.  When passed an MCAsmStreamer it prints assembly and when passed
14249259Sdim/// an MCObjectStreamer it outputs binary code.
15249259Sdim//
16249259Sdim//===----------------------------------------------------------------------===//
17249259Sdim//
18249259Sdim
19249259Sdim
20249259Sdim#include "AMDGPUAsmPrinter.h"
21249259Sdim#include "AMDGPU.h"
22263508Sdim#include "R600Defines.h"
23263508Sdim#include "R600MachineFunctionInfo.h"
24263508Sdim#include "R600RegisterInfo.h"
25251662Sdim#include "SIDefines.h"
26249259Sdim#include "SIMachineFunctionInfo.h"
27249259Sdim#include "SIRegisterInfo.h"
28251662Sdim#include "llvm/MC/MCContext.h"
29251662Sdim#include "llvm/MC/MCSectionELF.h"
30249259Sdim#include "llvm/MC/MCStreamer.h"
31251662Sdim#include "llvm/Support/ELF.h"
32263508Sdim#include "llvm/Support/MathExtras.h"
33249259Sdim#include "llvm/Support/TargetRegistry.h"
34249259Sdim#include "llvm/Target/TargetLoweringObjectFile.h"
35249259Sdim
36249259Sdimusing namespace llvm;
37249259Sdim
38249259Sdim
39249259Sdimstatic AsmPrinter *createAMDGPUAsmPrinterPass(TargetMachine &tm,
40249259Sdim                                              MCStreamer &Streamer) {
41249259Sdim  return new AMDGPUAsmPrinter(tm, Streamer);
42249259Sdim}
43249259Sdim
44249259Sdimextern "C" void LLVMInitializeR600AsmPrinter() {
45249259Sdim  TargetRegistry::RegisterAsmPrinter(TheAMDGPUTarget, createAMDGPUAsmPrinterPass);
46249259Sdim}
47249259Sdim
48263508SdimAMDGPUAsmPrinter::AMDGPUAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
49263508Sdim    : AsmPrinter(TM, Streamer)
50263508Sdim{
51263508Sdim  DisasmEnabled = TM.getSubtarget<AMDGPUSubtarget>().dumpCode() &&
52263508Sdim                  ! Streamer.hasRawTextSupport();
53263508Sdim}
54263508Sdim
55249259Sdim/// We need to override this function so we can avoid
56249259Sdim/// the call to EmitFunctionHeader(), which the MCPureStreamer can't handle.
57249259Sdimbool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
58249259Sdim  SetupMachineFunction(MF);
59249259Sdim  if (OutStreamer.hasRawTextSupport()) {
60249259Sdim    OutStreamer.EmitRawText("@" + MF.getName() + ":");
61249259Sdim  }
62251662Sdim
63263508Sdim  MCContext &Context = getObjFileLowering().getContext();
64263508Sdim  const MCSectionELF *ConfigSection = Context.getELFSection(".AMDGPU.config",
65251662Sdim                                              ELF::SHT_PROGBITS, 0,
66251662Sdim                                              SectionKind::getReadOnly());
67251662Sdim  OutStreamer.SwitchSection(ConfigSection);
68263508Sdim  const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();
69263508Sdim  if (STM.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) {
70251662Sdim    EmitProgramInfoSI(MF);
71251662Sdim  } else {
72251662Sdim    EmitProgramInfoR600(MF);
73249259Sdim  }
74263508Sdim
75263508Sdim  DisasmLines.clear();
76263508Sdim  HexLines.clear();
77263508Sdim  DisasmLineMaxLen = 0;
78263508Sdim
79251662Sdim  OutStreamer.SwitchSection(getObjFileLowering().getTextSection());
80249259Sdim  EmitFunctionBody();
81263508Sdim
82263508Sdim  if (STM.dumpCode()) {
83263508Sdim#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
84263508Sdim    MF.dump();
85263508Sdim#endif
86263508Sdim
87263508Sdim    if (DisasmEnabled) {
88263508Sdim      OutStreamer.SwitchSection(Context.getELFSection(".AMDGPU.disasm",
89263508Sdim                                                  ELF::SHT_NOTE, 0,
90263508Sdim                                                  SectionKind::getReadOnly()));
91263508Sdim
92263508Sdim      for (size_t i = 0; i < DisasmLines.size(); ++i) {
93263508Sdim        std::string Comment(DisasmLineMaxLen - DisasmLines[i].size(), ' ');
94263508Sdim        Comment += " ; " + HexLines[i] + "\n";
95263508Sdim
96263508Sdim        OutStreamer.EmitBytes(StringRef(DisasmLines[i]));
97263508Sdim        OutStreamer.EmitBytes(StringRef(Comment));
98263508Sdim      }
99263508Sdim    }
100263508Sdim  }
101263508Sdim
102249259Sdim  return false;
103249259Sdim}
104249259Sdim
105251662Sdimvoid AMDGPUAsmPrinter::EmitProgramInfoR600(MachineFunction &MF) {
106251662Sdim  unsigned MaxGPR = 0;
107251662Sdim  bool killPixel = false;
108251662Sdim  const R600RegisterInfo * RI =
109251662Sdim                static_cast<const R600RegisterInfo*>(TM.getRegisterInfo());
110251662Sdim  R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
111251662Sdim  const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();
112251662Sdim
113251662Sdim  for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
114251662Sdim                                                  BB != BB_E; ++BB) {
115251662Sdim    MachineBasicBlock &MBB = *BB;
116251662Sdim    for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
117251662Sdim                                                    I != E; ++I) {
118251662Sdim      MachineInstr &MI = *I;
119251662Sdim      if (MI.getOpcode() == AMDGPU::KILLGT)
120251662Sdim        killPixel = true;
121251662Sdim      unsigned numOperands = MI.getNumOperands();
122251662Sdim      for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) {
123251662Sdim        MachineOperand & MO = MI.getOperand(op_idx);
124251662Sdim        if (!MO.isReg())
125251662Sdim          continue;
126251662Sdim        unsigned HWReg = RI->getEncodingValue(MO.getReg()) & 0xff;
127251662Sdim
128251662Sdim        // Register with value > 127 aren't GPR
129251662Sdim        if (HWReg > 127)
130251662Sdim          continue;
131251662Sdim        MaxGPR = std::max(MaxGPR, HWReg);
132251662Sdim      }
133251662Sdim    }
134251662Sdim  }
135251662Sdim
136251662Sdim  unsigned RsrcReg;
137263508Sdim  if (STM.getGeneration() >= AMDGPUSubtarget::EVERGREEN) {
138251662Sdim    // Evergreen / Northern Islands
139251662Sdim    switch (MFI->ShaderType) {
140251662Sdim    default: // Fall through
141251662Sdim    case ShaderType::COMPUTE:  RsrcReg = R_0288D4_SQ_PGM_RESOURCES_LS; break;
142251662Sdim    case ShaderType::GEOMETRY: RsrcReg = R_028878_SQ_PGM_RESOURCES_GS; break;
143251662Sdim    case ShaderType::PIXEL:    RsrcReg = R_028844_SQ_PGM_RESOURCES_PS; break;
144251662Sdim    case ShaderType::VERTEX:   RsrcReg = R_028860_SQ_PGM_RESOURCES_VS; break;
145251662Sdim    }
146251662Sdim  } else {
147251662Sdim    // R600 / R700
148251662Sdim    switch (MFI->ShaderType) {
149251662Sdim    default: // Fall through
150251662Sdim    case ShaderType::GEOMETRY: // Fall through
151251662Sdim    case ShaderType::COMPUTE:  // Fall through
152251662Sdim    case ShaderType::VERTEX:   RsrcReg = R_028868_SQ_PGM_RESOURCES_VS; break;
153251662Sdim    case ShaderType::PIXEL:    RsrcReg = R_028850_SQ_PGM_RESOURCES_PS; break;
154251662Sdim    }
155251662Sdim  }
156251662Sdim
157251662Sdim  OutStreamer.EmitIntValue(RsrcReg, 4);
158251662Sdim  OutStreamer.EmitIntValue(S_NUM_GPRS(MaxGPR + 1) |
159251662Sdim                           S_STACK_SIZE(MFI->StackSize), 4);
160251662Sdim  OutStreamer.EmitIntValue(R_02880C_DB_SHADER_CONTROL, 4);
161251662Sdim  OutStreamer.EmitIntValue(S_02880C_KILL_ENABLE(killPixel), 4);
162263508Sdim
163263508Sdim  if (MFI->ShaderType == ShaderType::COMPUTE) {
164263508Sdim    OutStreamer.EmitIntValue(R_0288E8_SQ_LDS_ALLOC, 4);
165263508Sdim    OutStreamer.EmitIntValue(RoundUpToAlignment(MFI->LDSSize, 4) >> 2, 4);
166263508Sdim  }
167251662Sdim}
168251662Sdim
169251662Sdimvoid AMDGPUAsmPrinter::EmitProgramInfoSI(MachineFunction &MF) {
170263508Sdim  const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();
171249259Sdim  unsigned MaxSGPR = 0;
172249259Sdim  unsigned MaxVGPR = 0;
173249259Sdim  bool VCCUsed = false;
174249259Sdim  const SIRegisterInfo * RI =
175249259Sdim                static_cast<const SIRegisterInfo*>(TM.getRegisterInfo());
176249259Sdim
177249259Sdim  for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
178249259Sdim                                                  BB != BB_E; ++BB) {
179249259Sdim    MachineBasicBlock &MBB = *BB;
180249259Sdim    for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
181249259Sdim                                                    I != E; ++I) {
182249259Sdim      MachineInstr &MI = *I;
183249259Sdim
184249259Sdim      unsigned numOperands = MI.getNumOperands();
185249259Sdim      for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) {
186263508Sdim        MachineOperand &MO = MI.getOperand(op_idx);
187249259Sdim        unsigned maxUsed;
188249259Sdim        unsigned width = 0;
189249259Sdim        bool isSGPR = false;
190249259Sdim        unsigned reg;
191249259Sdim        unsigned hwReg;
192249259Sdim        if (!MO.isReg()) {
193249259Sdim          continue;
194249259Sdim        }
195249259Sdim        reg = MO.getReg();
196249259Sdim        if (reg == AMDGPU::VCC) {
197249259Sdim          VCCUsed = true;
198249259Sdim          continue;
199249259Sdim        }
200263508Sdim
201249259Sdim        switch (reg) {
202249259Sdim        default: break;
203263508Sdim        case AMDGPU::SCC:
204249259Sdim        case AMDGPU::EXEC:
205249259Sdim        case AMDGPU::M0:
206249259Sdim          continue;
207249259Sdim        }
208249259Sdim
209249259Sdim        if (AMDGPU::SReg_32RegClass.contains(reg)) {
210249259Sdim          isSGPR = true;
211249259Sdim          width = 1;
212249259Sdim        } else if (AMDGPU::VReg_32RegClass.contains(reg)) {
213249259Sdim          isSGPR = false;
214249259Sdim          width = 1;
215249259Sdim        } else if (AMDGPU::SReg_64RegClass.contains(reg)) {
216249259Sdim          isSGPR = true;
217249259Sdim          width = 2;
218249259Sdim        } else if (AMDGPU::VReg_64RegClass.contains(reg)) {
219249259Sdim          isSGPR = false;
220249259Sdim          width = 2;
221251662Sdim        } else if (AMDGPU::VReg_96RegClass.contains(reg)) {
222251662Sdim          isSGPR = false;
223251662Sdim          width = 3;
224249259Sdim        } else if (AMDGPU::SReg_128RegClass.contains(reg)) {
225249259Sdim          isSGPR = true;
226249259Sdim          width = 4;
227249259Sdim        } else if (AMDGPU::VReg_128RegClass.contains(reg)) {
228249259Sdim          isSGPR = false;
229249259Sdim          width = 4;
230249259Sdim        } else if (AMDGPU::SReg_256RegClass.contains(reg)) {
231249259Sdim          isSGPR = true;
232249259Sdim          width = 8;
233249259Sdim        } else if (AMDGPU::VReg_256RegClass.contains(reg)) {
234249259Sdim          isSGPR = false;
235249259Sdim          width = 8;
236263508Sdim        } else if (AMDGPU::SReg_512RegClass.contains(reg)) {
237263508Sdim          isSGPR = true;
238263508Sdim          width = 16;
239249259Sdim        } else if (AMDGPU::VReg_512RegClass.contains(reg)) {
240249259Sdim          isSGPR = false;
241249259Sdim          width = 16;
242249259Sdim        } else {
243249259Sdim          assert(!"Unknown register class");
244249259Sdim        }
245249259Sdim        hwReg = RI->getEncodingValue(reg) & 0xff;
246249259Sdim        maxUsed = hwReg + width - 1;
247249259Sdim        if (isSGPR) {
248249259Sdim          MaxSGPR = maxUsed > MaxSGPR ? maxUsed : MaxSGPR;
249249259Sdim        } else {
250249259Sdim          MaxVGPR = maxUsed > MaxVGPR ? maxUsed : MaxVGPR;
251249259Sdim        }
252249259Sdim      }
253249259Sdim    }
254249259Sdim  }
255249259Sdim  if (VCCUsed) {
256249259Sdim    MaxSGPR += 2;
257249259Sdim  }
258249259Sdim  SIMachineFunctionInfo * MFI = MF.getInfo<SIMachineFunctionInfo>();
259251662Sdim  unsigned RsrcReg;
260251662Sdim  switch (MFI->ShaderType) {
261251662Sdim  default: // Fall through
262251662Sdim  case ShaderType::COMPUTE:  RsrcReg = R_00B848_COMPUTE_PGM_RSRC1; break;
263251662Sdim  case ShaderType::GEOMETRY: RsrcReg = R_00B228_SPI_SHADER_PGM_RSRC1_GS; break;
264251662Sdim  case ShaderType::PIXEL:    RsrcReg = R_00B028_SPI_SHADER_PGM_RSRC1_PS; break;
265251662Sdim  case ShaderType::VERTEX:   RsrcReg = R_00B128_SPI_SHADER_PGM_RSRC1_VS; break;
266251662Sdim  }
267251662Sdim
268251662Sdim  OutStreamer.EmitIntValue(RsrcReg, 4);
269251662Sdim  OutStreamer.EmitIntValue(S_00B028_VGPRS(MaxVGPR / 4) | S_00B028_SGPRS(MaxSGPR / 8), 4);
270263508Sdim
271263508Sdim  unsigned LDSAlignShift;
272263508Sdim  if (STM.getGeneration() < AMDGPUSubtarget::SEA_ISLANDS) {
273263508Sdim    // LDS is allocated in 64 dword blocks
274263508Sdim    LDSAlignShift = 8;
275263508Sdim  } else {
276263508Sdim    // LDS is allocated in 128 dword blocks
277263508Sdim    LDSAlignShift = 9;
278263508Sdim  }
279263508Sdim  unsigned LDSBlocks =
280263508Sdim          RoundUpToAlignment(MFI->LDSSize, 1 << LDSAlignShift) >> LDSAlignShift;
281263508Sdim
282263508Sdim  if (MFI->ShaderType == ShaderType::COMPUTE) {
283263508Sdim    OutStreamer.EmitIntValue(R_00B84C_COMPUTE_PGM_RSRC2, 4);
284263508Sdim    OutStreamer.EmitIntValue(S_00B84C_LDS_SIZE(LDSBlocks), 4);
285263508Sdim  }
286251662Sdim  if (MFI->ShaderType == ShaderType::PIXEL) {
287263508Sdim    OutStreamer.EmitIntValue(R_00B02C_SPI_SHADER_PGM_RSRC2_PS, 4);
288263508Sdim    OutStreamer.EmitIntValue(S_00B02C_EXTRA_LDS_SIZE(LDSBlocks), 4);
289251662Sdim    OutStreamer.EmitIntValue(R_0286CC_SPI_PS_INPUT_ENA, 4);
290251662Sdim    OutStreamer.EmitIntValue(MFI->PSInputAddr, 4);
291251662Sdim  }
292249259Sdim}
293