AMDGPUAsmPrinter.cpp revision 263508
1//===-- AMDGPUAsmPrinter.cpp - AMDGPU Assebly printer  --------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11///
12/// The AMDGPUAsmPrinter is used to print both assembly string and also binary
13/// code.  When passed an MCAsmStreamer it prints assembly and when passed
14/// an MCObjectStreamer it outputs binary code.
15//
16//===----------------------------------------------------------------------===//
17//
18
19
20#include "AMDGPUAsmPrinter.h"
21#include "AMDGPU.h"
22#include "R600Defines.h"
23#include "R600MachineFunctionInfo.h"
24#include "R600RegisterInfo.h"
25#include "SIDefines.h"
26#include "SIMachineFunctionInfo.h"
27#include "SIRegisterInfo.h"
28#include "llvm/MC/MCContext.h"
29#include "llvm/MC/MCSectionELF.h"
30#include "llvm/MC/MCStreamer.h"
31#include "llvm/Support/ELF.h"
32#include "llvm/Support/MathExtras.h"
33#include "llvm/Support/TargetRegistry.h"
34#include "llvm/Target/TargetLoweringObjectFile.h"
35
36using namespace llvm;
37
38
39static AsmPrinter *createAMDGPUAsmPrinterPass(TargetMachine &tm,
40                                              MCStreamer &Streamer) {
41  return new AMDGPUAsmPrinter(tm, Streamer);
42}
43
44extern "C" void LLVMInitializeR600AsmPrinter() {
45  TargetRegistry::RegisterAsmPrinter(TheAMDGPUTarget, createAMDGPUAsmPrinterPass);
46}
47
48AMDGPUAsmPrinter::AMDGPUAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
49    : AsmPrinter(TM, Streamer)
50{
51  DisasmEnabled = TM.getSubtarget<AMDGPUSubtarget>().dumpCode() &&
52                  ! Streamer.hasRawTextSupport();
53}
54
55/// We need to override this function so we can avoid
56/// the call to EmitFunctionHeader(), which the MCPureStreamer can't handle.
57bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
58  SetupMachineFunction(MF);
59  if (OutStreamer.hasRawTextSupport()) {
60    OutStreamer.EmitRawText("@" + MF.getName() + ":");
61  }
62
63  MCContext &Context = getObjFileLowering().getContext();
64  const MCSectionELF *ConfigSection = Context.getELFSection(".AMDGPU.config",
65                                              ELF::SHT_PROGBITS, 0,
66                                              SectionKind::getReadOnly());
67  OutStreamer.SwitchSection(ConfigSection);
68  const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();
69  if (STM.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) {
70    EmitProgramInfoSI(MF);
71  } else {
72    EmitProgramInfoR600(MF);
73  }
74
75  DisasmLines.clear();
76  HexLines.clear();
77  DisasmLineMaxLen = 0;
78
79  OutStreamer.SwitchSection(getObjFileLowering().getTextSection());
80  EmitFunctionBody();
81
82  if (STM.dumpCode()) {
83#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
84    MF.dump();
85#endif
86
87    if (DisasmEnabled) {
88      OutStreamer.SwitchSection(Context.getELFSection(".AMDGPU.disasm",
89                                                  ELF::SHT_NOTE, 0,
90                                                  SectionKind::getReadOnly()));
91
92      for (size_t i = 0; i < DisasmLines.size(); ++i) {
93        std::string Comment(DisasmLineMaxLen - DisasmLines[i].size(), ' ');
94        Comment += " ; " + HexLines[i] + "\n";
95
96        OutStreamer.EmitBytes(StringRef(DisasmLines[i]));
97        OutStreamer.EmitBytes(StringRef(Comment));
98      }
99    }
100  }
101
102  return false;
103}
104
105void AMDGPUAsmPrinter::EmitProgramInfoR600(MachineFunction &MF) {
106  unsigned MaxGPR = 0;
107  bool killPixel = false;
108  const R600RegisterInfo * RI =
109                static_cast<const R600RegisterInfo*>(TM.getRegisterInfo());
110  R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
111  const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();
112
113  for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
114                                                  BB != BB_E; ++BB) {
115    MachineBasicBlock &MBB = *BB;
116    for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
117                                                    I != E; ++I) {
118      MachineInstr &MI = *I;
119      if (MI.getOpcode() == AMDGPU::KILLGT)
120        killPixel = true;
121      unsigned numOperands = MI.getNumOperands();
122      for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) {
123        MachineOperand & MO = MI.getOperand(op_idx);
124        if (!MO.isReg())
125          continue;
126        unsigned HWReg = RI->getEncodingValue(MO.getReg()) & 0xff;
127
128        // Register with value > 127 aren't GPR
129        if (HWReg > 127)
130          continue;
131        MaxGPR = std::max(MaxGPR, HWReg);
132      }
133    }
134  }
135
136  unsigned RsrcReg;
137  if (STM.getGeneration() >= AMDGPUSubtarget::EVERGREEN) {
138    // Evergreen / Northern Islands
139    switch (MFI->ShaderType) {
140    default: // Fall through
141    case ShaderType::COMPUTE:  RsrcReg = R_0288D4_SQ_PGM_RESOURCES_LS; break;
142    case ShaderType::GEOMETRY: RsrcReg = R_028878_SQ_PGM_RESOURCES_GS; break;
143    case ShaderType::PIXEL:    RsrcReg = R_028844_SQ_PGM_RESOURCES_PS; break;
144    case ShaderType::VERTEX:   RsrcReg = R_028860_SQ_PGM_RESOURCES_VS; break;
145    }
146  } else {
147    // R600 / R700
148    switch (MFI->ShaderType) {
149    default: // Fall through
150    case ShaderType::GEOMETRY: // Fall through
151    case ShaderType::COMPUTE:  // Fall through
152    case ShaderType::VERTEX:   RsrcReg = R_028868_SQ_PGM_RESOURCES_VS; break;
153    case ShaderType::PIXEL:    RsrcReg = R_028850_SQ_PGM_RESOURCES_PS; break;
154    }
155  }
156
157  OutStreamer.EmitIntValue(RsrcReg, 4);
158  OutStreamer.EmitIntValue(S_NUM_GPRS(MaxGPR + 1) |
159                           S_STACK_SIZE(MFI->StackSize), 4);
160  OutStreamer.EmitIntValue(R_02880C_DB_SHADER_CONTROL, 4);
161  OutStreamer.EmitIntValue(S_02880C_KILL_ENABLE(killPixel), 4);
162
163  if (MFI->ShaderType == ShaderType::COMPUTE) {
164    OutStreamer.EmitIntValue(R_0288E8_SQ_LDS_ALLOC, 4);
165    OutStreamer.EmitIntValue(RoundUpToAlignment(MFI->LDSSize, 4) >> 2, 4);
166  }
167}
168
169void AMDGPUAsmPrinter::EmitProgramInfoSI(MachineFunction &MF) {
170  const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();
171  unsigned MaxSGPR = 0;
172  unsigned MaxVGPR = 0;
173  bool VCCUsed = false;
174  const SIRegisterInfo * RI =
175                static_cast<const SIRegisterInfo*>(TM.getRegisterInfo());
176
177  for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
178                                                  BB != BB_E; ++BB) {
179    MachineBasicBlock &MBB = *BB;
180    for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
181                                                    I != E; ++I) {
182      MachineInstr &MI = *I;
183
184      unsigned numOperands = MI.getNumOperands();
185      for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) {
186        MachineOperand &MO = MI.getOperand(op_idx);
187        unsigned maxUsed;
188        unsigned width = 0;
189        bool isSGPR = false;
190        unsigned reg;
191        unsigned hwReg;
192        if (!MO.isReg()) {
193          continue;
194        }
195        reg = MO.getReg();
196        if (reg == AMDGPU::VCC) {
197          VCCUsed = true;
198          continue;
199        }
200
201        switch (reg) {
202        default: break;
203        case AMDGPU::SCC:
204        case AMDGPU::EXEC:
205        case AMDGPU::M0:
206          continue;
207        }
208
209        if (AMDGPU::SReg_32RegClass.contains(reg)) {
210          isSGPR = true;
211          width = 1;
212        } else if (AMDGPU::VReg_32RegClass.contains(reg)) {
213          isSGPR = false;
214          width = 1;
215        } else if (AMDGPU::SReg_64RegClass.contains(reg)) {
216          isSGPR = true;
217          width = 2;
218        } else if (AMDGPU::VReg_64RegClass.contains(reg)) {
219          isSGPR = false;
220          width = 2;
221        } else if (AMDGPU::VReg_96RegClass.contains(reg)) {
222          isSGPR = false;
223          width = 3;
224        } else if (AMDGPU::SReg_128RegClass.contains(reg)) {
225          isSGPR = true;
226          width = 4;
227        } else if (AMDGPU::VReg_128RegClass.contains(reg)) {
228          isSGPR = false;
229          width = 4;
230        } else if (AMDGPU::SReg_256RegClass.contains(reg)) {
231          isSGPR = true;
232          width = 8;
233        } else if (AMDGPU::VReg_256RegClass.contains(reg)) {
234          isSGPR = false;
235          width = 8;
236        } else if (AMDGPU::SReg_512RegClass.contains(reg)) {
237          isSGPR = true;
238          width = 16;
239        } else if (AMDGPU::VReg_512RegClass.contains(reg)) {
240          isSGPR = false;
241          width = 16;
242        } else {
243          assert(!"Unknown register class");
244        }
245        hwReg = RI->getEncodingValue(reg) & 0xff;
246        maxUsed = hwReg + width - 1;
247        if (isSGPR) {
248          MaxSGPR = maxUsed > MaxSGPR ? maxUsed : MaxSGPR;
249        } else {
250          MaxVGPR = maxUsed > MaxVGPR ? maxUsed : MaxVGPR;
251        }
252      }
253    }
254  }
255  if (VCCUsed) {
256    MaxSGPR += 2;
257  }
258  SIMachineFunctionInfo * MFI = MF.getInfo<SIMachineFunctionInfo>();
259  unsigned RsrcReg;
260  switch (MFI->ShaderType) {
261  default: // Fall through
262  case ShaderType::COMPUTE:  RsrcReg = R_00B848_COMPUTE_PGM_RSRC1; break;
263  case ShaderType::GEOMETRY: RsrcReg = R_00B228_SPI_SHADER_PGM_RSRC1_GS; break;
264  case ShaderType::PIXEL:    RsrcReg = R_00B028_SPI_SHADER_PGM_RSRC1_PS; break;
265  case ShaderType::VERTEX:   RsrcReg = R_00B128_SPI_SHADER_PGM_RSRC1_VS; break;
266  }
267
268  OutStreamer.EmitIntValue(RsrcReg, 4);
269  OutStreamer.EmitIntValue(S_00B028_VGPRS(MaxVGPR / 4) | S_00B028_SGPRS(MaxSGPR / 8), 4);
270
271  unsigned LDSAlignShift;
272  if (STM.getGeneration() < AMDGPUSubtarget::SEA_ISLANDS) {
273    // LDS is allocated in 64 dword blocks
274    LDSAlignShift = 8;
275  } else {
276    // LDS is allocated in 128 dword blocks
277    LDSAlignShift = 9;
278  }
279  unsigned LDSBlocks =
280          RoundUpToAlignment(MFI->LDSSize, 1 << LDSAlignShift) >> LDSAlignShift;
281
282  if (MFI->ShaderType == ShaderType::COMPUTE) {
283    OutStreamer.EmitIntValue(R_00B84C_COMPUTE_PGM_RSRC2, 4);
284    OutStreamer.EmitIntValue(S_00B84C_LDS_SIZE(LDSBlocks), 4);
285  }
286  if (MFI->ShaderType == ShaderType::PIXEL) {
287    OutStreamer.EmitIntValue(R_00B02C_SPI_SHADER_PGM_RSRC2_PS, 4);
288    OutStreamer.EmitIntValue(S_00B02C_EXTRA_LDS_SIZE(LDSBlocks), 4);
289    OutStreamer.EmitIntValue(R_0286CC_SPI_PS_INPUT_ENA, 4);
290    OutStreamer.EmitIntValue(MFI->PSInputAddr, 4);
291  }
292}
293