1//===-- AMDGPUTargetStreamer.cpp - Mips Target Streamer Methods -----------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file provides AMDGPU specific target streamer methods.
11//
12//===----------------------------------------------------------------------===//
13
14#include "AMDGPUTargetStreamer.h"
15#include "SIDefines.h"
16#include "Utils/AMDGPUBaseInfo.h"
17#include "llvm/ADT/Twine.h"
18#include "llvm/MC/MCContext.h"
19#include "llvm/MC/MCELFStreamer.h"
20#include "llvm/MC/MCObjectFileInfo.h"
21#include "llvm/MC/MCSectionELF.h"
22#include "llvm/Support/ELF.h"
23#include "llvm/Support/FormattedStream.h"
24
25using namespace llvm;
26
27AMDGPUTargetStreamer::AMDGPUTargetStreamer(MCStreamer &S)
28    : MCTargetStreamer(S) { }
29
30//===----------------------------------------------------------------------===//
31// AMDGPUTargetAsmStreamer
32//===----------------------------------------------------------------------===//
33
34AMDGPUTargetAsmStreamer::AMDGPUTargetAsmStreamer(MCStreamer &S,
35                                                 formatted_raw_ostream &OS)
36    : AMDGPUTargetStreamer(S), OS(OS) { }
37
38void
39AMDGPUTargetAsmStreamer::EmitDirectiveHSACodeObjectVersion(uint32_t Major,
40                                                           uint32_t Minor) {
41  OS << "\t.hsa_code_object_version " <<
42        Twine(Major) << "," << Twine(Minor) << '\n';
43}
44
45void
46AMDGPUTargetAsmStreamer::EmitDirectiveHSACodeObjectISA(uint32_t Major,
47                                                       uint32_t Minor,
48                                                       uint32_t Stepping,
49                                                       StringRef VendorName,
50                                                       StringRef ArchName) {
51  OS << "\t.hsa_code_object_isa " <<
52        Twine(Major) << "," << Twine(Minor) << "," << Twine(Stepping) <<
53        ",\"" << VendorName << "\",\"" << ArchName << "\"\n";
54
55}
56
57void
58AMDGPUTargetAsmStreamer::EmitAMDKernelCodeT(const amd_kernel_code_t &Header) {
59  uint64_t ComputePgmRsrc2 = (Header.compute_pgm_resource_registers >> 32);
60  bool EnableSGPRPrivateSegmentBuffer = (Header.code_properties &
61      AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER);
62  bool EnableSGPRDispatchPtr = (Header.code_properties &
63      AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR);
64  bool EnableSGPRQueuePtr = (Header.code_properties &
65      AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR);
66  bool EnableSGPRKernargSegmentPtr = (Header.code_properties &
67      AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR);
68  bool EnableSGPRDispatchID = (Header.code_properties &
69      AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID);
70  bool EnableSGPRFlatScratchInit = (Header.code_properties &
71      AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
72  bool EnableSGPRPrivateSegmentSize = (Header.code_properties &
73      AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
74  bool EnableSGPRGridWorkgroupCountX = (Header.code_properties &
75      AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X);
76  bool EnableSGPRGridWorkgroupCountY = (Header.code_properties &
77      AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y);
78  bool EnableSGPRGridWorkgroupCountZ = (Header.code_properties &
79      AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z);
80  bool EnableOrderedAppendGDS = (Header.code_properties &
81      AMD_CODE_PROPERTY_ENABLE_ORDERED_APPEND_GDS);
82  uint32_t PrivateElementSize = (Header.code_properties &
83      AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE) >>
84          AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE_SHIFT;
85  bool IsPtr64 = (Header.code_properties & AMD_CODE_PROPERTY_IS_PTR64);
86  bool IsDynamicCallstack = (Header.code_properties &
87      AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK);
88  bool IsDebugEnabled = (Header.code_properties &
89      AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED);
90  bool IsXNackEnabled = (Header.code_properties &
91      AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED);
92
93  OS << "\t.amd_kernel_code_t\n" <<
94    "\t\tkernel_code_version_major = " <<
95        Header.amd_kernel_code_version_major << '\n' <<
96    "\t\tkernel_code_version_minor = " <<
97        Header.amd_kernel_code_version_minor << '\n' <<
98    "\t\tmachine_kind = " <<
99        Header.amd_machine_kind << '\n' <<
100    "\t\tmachine_version_major = " <<
101        Header.amd_machine_version_major << '\n' <<
102    "\t\tmachine_version_minor = " <<
103        Header.amd_machine_version_minor << '\n' <<
104    "\t\tmachine_version_stepping = " <<
105        Header.amd_machine_version_stepping << '\n' <<
106    "\t\tkernel_code_entry_byte_offset = " <<
107        Header.kernel_code_entry_byte_offset << '\n' <<
108    "\t\tkernel_code_prefetch_byte_size = " <<
109        Header.kernel_code_prefetch_byte_size << '\n' <<
110    "\t\tmax_scratch_backing_memory_byte_size = " <<
111        Header.max_scratch_backing_memory_byte_size << '\n' <<
112    "\t\tcompute_pgm_rsrc1_vgprs = " <<
113        G_00B848_VGPRS(Header.compute_pgm_resource_registers) << '\n' <<
114    "\t\tcompute_pgm_rsrc1_sgprs = " <<
115        G_00B848_SGPRS(Header.compute_pgm_resource_registers) << '\n' <<
116    "\t\tcompute_pgm_rsrc1_priority = " <<
117        G_00B848_PRIORITY(Header.compute_pgm_resource_registers) << '\n' <<
118    "\t\tcompute_pgm_rsrc1_float_mode = " <<
119        G_00B848_FLOAT_MODE(Header.compute_pgm_resource_registers) << '\n' <<
120    "\t\tcompute_pgm_rsrc1_priv = " <<
121        G_00B848_PRIV(Header.compute_pgm_resource_registers) << '\n' <<
122    "\t\tcompute_pgm_rsrc1_dx10_clamp = " <<
123        G_00B848_DX10_CLAMP(Header.compute_pgm_resource_registers) << '\n' <<
124    "\t\tcompute_pgm_rsrc1_debug_mode = " <<
125        G_00B848_DEBUG_MODE(Header.compute_pgm_resource_registers) << '\n' <<
126    "\t\tcompute_pgm_rsrc1_ieee_mode = " <<
127        G_00B848_IEEE_MODE(Header.compute_pgm_resource_registers) << '\n' <<
128    "\t\tcompute_pgm_rsrc2_scratch_en = " <<
129        G_00B84C_SCRATCH_EN(ComputePgmRsrc2) << '\n' <<
130    "\t\tcompute_pgm_rsrc2_user_sgpr = " <<
131        G_00B84C_USER_SGPR(ComputePgmRsrc2) << '\n' <<
132    "\t\tcompute_pgm_rsrc2_tgid_x_en = " <<
133        G_00B84C_TGID_X_EN(ComputePgmRsrc2) << '\n' <<
134    "\t\tcompute_pgm_rsrc2_tgid_y_en = " <<
135        G_00B84C_TGID_Y_EN(ComputePgmRsrc2) << '\n' <<
136    "\t\tcompute_pgm_rsrc2_tgid_z_en = " <<
137        G_00B84C_TGID_Z_EN(ComputePgmRsrc2) << '\n' <<
138    "\t\tcompute_pgm_rsrc2_tg_size_en = " <<
139        G_00B84C_TG_SIZE_EN(ComputePgmRsrc2) << '\n' <<
140    "\t\tcompute_pgm_rsrc2_tidig_comp_cnt = " <<
141        G_00B84C_TIDIG_COMP_CNT(ComputePgmRsrc2) << '\n' <<
142    "\t\tcompute_pgm_rsrc2_excp_en_msb = " <<
143        G_00B84C_EXCP_EN_MSB(ComputePgmRsrc2) << '\n' <<
144    "\t\tcompute_pgm_rsrc2_lds_size = " <<
145        G_00B84C_LDS_SIZE(ComputePgmRsrc2) << '\n' <<
146    "\t\tcompute_pgm_rsrc2_excp_en = " <<
147        G_00B84C_EXCP_EN(ComputePgmRsrc2) << '\n' <<
148
149    "\t\tenable_sgpr_private_segment_buffer = " <<
150        EnableSGPRPrivateSegmentBuffer << '\n' <<
151    "\t\tenable_sgpr_dispatch_ptr = " <<
152        EnableSGPRDispatchPtr << '\n' <<
153    "\t\tenable_sgpr_queue_ptr = " <<
154        EnableSGPRQueuePtr << '\n' <<
155    "\t\tenable_sgpr_kernarg_segment_ptr = " <<
156        EnableSGPRKernargSegmentPtr << '\n' <<
157    "\t\tenable_sgpr_dispatch_id = " <<
158        EnableSGPRDispatchID << '\n' <<
159    "\t\tenable_sgpr_flat_scratch_init = " <<
160        EnableSGPRFlatScratchInit << '\n' <<
161    "\t\tenable_sgpr_private_segment_size = " <<
162        EnableSGPRPrivateSegmentSize << '\n' <<
163    "\t\tenable_sgpr_grid_workgroup_count_x = " <<
164        EnableSGPRGridWorkgroupCountX << '\n' <<
165    "\t\tenable_sgpr_grid_workgroup_count_y = " <<
166        EnableSGPRGridWorkgroupCountY << '\n' <<
167    "\t\tenable_sgpr_grid_workgroup_count_z = " <<
168        EnableSGPRGridWorkgroupCountZ << '\n' <<
169    "\t\tenable_ordered_append_gds = " <<
170        EnableOrderedAppendGDS << '\n' <<
171    "\t\tprivate_element_size = " <<
172        PrivateElementSize << '\n' <<
173    "\t\tis_ptr64 = " <<
174        IsPtr64 << '\n' <<
175    "\t\tis_dynamic_callstack = " <<
176        IsDynamicCallstack << '\n' <<
177    "\t\tis_debug_enabled = " <<
178        IsDebugEnabled << '\n' <<
179    "\t\tis_xnack_enabled = " <<
180        IsXNackEnabled << '\n' <<
181    "\t\tworkitem_private_segment_byte_size = " <<
182        Header.workitem_private_segment_byte_size << '\n' <<
183    "\t\tworkgroup_group_segment_byte_size = " <<
184        Header.workgroup_group_segment_byte_size << '\n' <<
185    "\t\tgds_segment_byte_size = " <<
186        Header.gds_segment_byte_size << '\n' <<
187    "\t\tkernarg_segment_byte_size = " <<
188        Header.kernarg_segment_byte_size << '\n' <<
189    "\t\tworkgroup_fbarrier_count = " <<
190        Header.workgroup_fbarrier_count << '\n' <<
191    "\t\twavefront_sgpr_count = " <<
192        Header.wavefront_sgpr_count << '\n' <<
193    "\t\tworkitem_vgpr_count = " <<
194        Header.workitem_vgpr_count << '\n' <<
195    "\t\treserved_vgpr_first = " <<
196        Header.reserved_vgpr_first << '\n' <<
197    "\t\treserved_vgpr_count = " <<
198        Header.reserved_vgpr_count << '\n' <<
199    "\t\treserved_sgpr_first = " <<
200        Header.reserved_sgpr_first << '\n' <<
201    "\t\treserved_sgpr_count = " <<
202        Header.reserved_sgpr_count << '\n' <<
203    "\t\tdebug_wavefront_private_segment_offset_sgpr = " <<
204        Header.debug_wavefront_private_segment_offset_sgpr << '\n' <<
205    "\t\tdebug_private_segment_buffer_sgpr = " <<
206        Header.debug_private_segment_buffer_sgpr << '\n' <<
207    "\t\tkernarg_segment_alignment = " <<
208        (uint32_t)Header.kernarg_segment_alignment << '\n' <<
209    "\t\tgroup_segment_alignment = " <<
210        (uint32_t)Header.group_segment_alignment << '\n' <<
211    "\t\tprivate_segment_alignment = " <<
212        (uint32_t)Header.private_segment_alignment << '\n' <<
213    "\t\twavefront_size = " <<
214        (uint32_t)Header.wavefront_size << '\n' <<
215    "\t\tcall_convention = " <<
216        Header.call_convention << '\n' <<
217    "\t\truntime_loader_kernel_symbol = " <<
218        Header.runtime_loader_kernel_symbol << '\n' <<
219    // TODO: control_directives
220    "\t.end_amd_kernel_code_t\n";
221
222}
223
224void AMDGPUTargetAsmStreamer::EmitAMDGPUSymbolType(StringRef SymbolName,
225                                                   unsigned Type) {
226  switch (Type) {
227    default: llvm_unreachable("Invalid AMDGPU symbol type");
228    case ELF::STT_AMDGPU_HSA_KERNEL:
229      OS << "\t.amdgpu_hsa_kernel " << SymbolName << '\n' ;
230      break;
231  }
232}
233
234void AMDGPUTargetAsmStreamer::EmitAMDGPUHsaModuleScopeGlobal(
235    StringRef GlobalName) {
236  OS << "\t.amdgpu_hsa_module_global " << GlobalName << '\n';
237}
238
239void AMDGPUTargetAsmStreamer::EmitAMDGPUHsaProgramScopeGlobal(
240    StringRef GlobalName) {
241  OS << "\t.amdgpu_hsa_program_global " << GlobalName << '\n';
242}
243
244//===----------------------------------------------------------------------===//
245// AMDGPUTargetELFStreamer
246//===----------------------------------------------------------------------===//
247
248AMDGPUTargetELFStreamer::AMDGPUTargetELFStreamer(MCStreamer &S)
249    : AMDGPUTargetStreamer(S), Streamer(S) { }
250
251MCELFStreamer &AMDGPUTargetELFStreamer::getStreamer() {
252  return static_cast<MCELFStreamer &>(Streamer);
253}
254
255void
256AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectVersion(uint32_t Major,
257                                                           uint32_t Minor) {
258  MCStreamer &OS = getStreamer();
259  MCSectionELF *Note = OS.getContext().getELFSection(".note", ELF::SHT_NOTE, 0);
260
261  unsigned NameSZ = 4;
262
263  OS.PushSection();
264  OS.SwitchSection(Note);
265  OS.EmitIntValue(NameSZ, 4);                            // namesz
266  OS.EmitIntValue(8, 4);                                 // descz
267  OS.EmitIntValue(NT_AMDGPU_HSA_CODE_OBJECT_VERSION, 4); // type
268  OS.EmitBytes(StringRef("AMD", NameSZ));                // name
269  OS.EmitIntValue(Major, 4);                             // desc
270  OS.EmitIntValue(Minor, 4);
271  OS.EmitValueToAlignment(4);
272  OS.PopSection();
273}
274
275void
276AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectISA(uint32_t Major,
277                                                       uint32_t Minor,
278                                                       uint32_t Stepping,
279                                                       StringRef VendorName,
280                                                       StringRef ArchName) {
281  MCStreamer &OS = getStreamer();
282  MCSectionELF *Note = OS.getContext().getELFSection(".note", ELF::SHT_NOTE, 0);
283
284  unsigned NameSZ = 4;
285  uint16_t VendorNameSize = VendorName.size() + 1;
286  uint16_t ArchNameSize = ArchName.size() + 1;
287  unsigned DescSZ = sizeof(VendorNameSize) + sizeof(ArchNameSize) +
288                    sizeof(Major) + sizeof(Minor) + sizeof(Stepping) +
289                    VendorNameSize + ArchNameSize;
290
291  OS.PushSection();
292  OS.SwitchSection(Note);
293  OS.EmitIntValue(NameSZ, 4);                            // namesz
294  OS.EmitIntValue(DescSZ, 4);                            // descsz
295  OS.EmitIntValue(NT_AMDGPU_HSA_ISA, 4);                 // type
296  OS.EmitBytes(StringRef("AMD", 4));                     // name
297  OS.EmitIntValue(VendorNameSize, 2);                    // desc
298  OS.EmitIntValue(ArchNameSize, 2);
299  OS.EmitIntValue(Major, 4);
300  OS.EmitIntValue(Minor, 4);
301  OS.EmitIntValue(Stepping, 4);
302  OS.EmitBytes(VendorName);
303  OS.EmitIntValue(0, 1); // NULL terminate VendorName
304  OS.EmitBytes(ArchName);
305  OS.EmitIntValue(0, 1); // NULL terminte ArchName
306  OS.EmitValueToAlignment(4);
307  OS.PopSection();
308}
309
310void
311AMDGPUTargetELFStreamer::EmitAMDKernelCodeT(const amd_kernel_code_t &Header) {
312
313  MCStreamer &OS = getStreamer();
314  OS.PushSection();
315  // The MCObjectFileInfo that is available to the assembler is a generic
316  // implementation and not AMDGPUHSATargetObjectFile, so we can't use
317  // MCObjectFileInfo::getTextSection() here for fetching the HSATextSection.
318  OS.SwitchSection(AMDGPU::getHSATextSection(OS.getContext()));
319  OS.EmitBytes(StringRef((const char*)&Header, sizeof(Header)));
320  OS.PopSection();
321}
322
323void AMDGPUTargetELFStreamer::EmitAMDGPUSymbolType(StringRef SymbolName,
324                                                   unsigned Type) {
325  MCSymbolELF *Symbol = cast<MCSymbolELF>(
326      getStreamer().getContext().getOrCreateSymbol(SymbolName));
327  Symbol->setType(ELF::STT_AMDGPU_HSA_KERNEL);
328}
329
330void AMDGPUTargetELFStreamer::EmitAMDGPUHsaModuleScopeGlobal(
331    StringRef GlobalName) {
332
333  MCSymbolELF *Symbol = cast<MCSymbolELF>(
334      getStreamer().getContext().getOrCreateSymbol(GlobalName));
335  Symbol->setType(ELF::STT_OBJECT);
336  Symbol->setBinding(ELF::STB_LOCAL);
337}
338
339void AMDGPUTargetELFStreamer::EmitAMDGPUHsaProgramScopeGlobal(
340    StringRef GlobalName) {
341
342  MCSymbolELF *Symbol = cast<MCSymbolELF>(
343      getStreamer().getContext().getOrCreateSymbol(GlobalName));
344  Symbol->setType(ELF::STT_OBJECT);
345  Symbol->setBinding(ELF::STB_GLOBAL);
346}
347