1//===-- AMDGPUTargetStreamer.cpp - Mips Target Streamer Methods -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file provides AMDGPU specific target streamer methods.
10//
11//===----------------------------------------------------------------------===//
12
13#include "AMDGPUTargetStreamer.h"
14#include "AMDGPU.h"
15#include "SIDefines.h"
16#include "Utils/AMDGPUBaseInfo.h"
17#include "Utils/AMDKernelCodeTUtils.h"
18#include "llvm/ADT/Twine.h"
19#include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h"
20#include "llvm/BinaryFormat/ELF.h"
21#include "llvm/IR/Constants.h"
22#include "llvm/IR/Function.h"
23#include "llvm/IR/Metadata.h"
24#include "llvm/IR/Module.h"
25#include "llvm/MC/MCContext.h"
26#include "llvm/MC/MCELFStreamer.h"
27#include "llvm/MC/MCObjectFileInfo.h"
28#include "llvm/MC/MCSectionELF.h"
29#include "llvm/Support/FormattedStream.h"
30#include "llvm/Support/TargetParser.h"
31
32namespace llvm {
33#include "AMDGPUPTNote.h"
34}
35
36using namespace llvm;
37using namespace llvm::AMDGPU;
38using namespace llvm::AMDGPU::HSAMD;
39
40//===----------------------------------------------------------------------===//
41// AMDGPUTargetStreamer
42//===----------------------------------------------------------------------===//
43
44bool AMDGPUTargetStreamer::EmitHSAMetadataV2(StringRef HSAMetadataString) {
45  HSAMD::Metadata HSAMetadata;
46  if (HSAMD::fromString(HSAMetadataString, HSAMetadata))
47    return false;
48
49  return EmitHSAMetadata(HSAMetadata);
50}
51
52bool AMDGPUTargetStreamer::EmitHSAMetadataV3(StringRef HSAMetadataString) {
53  msgpack::Document HSAMetadataDoc;
54  if (!HSAMetadataDoc.fromYAML(HSAMetadataString))
55    return false;
56  return EmitHSAMetadata(HSAMetadataDoc, false);
57}
58
59StringRef AMDGPUTargetStreamer::getArchNameFromElfMach(unsigned ElfMach) {
60  AMDGPU::GPUKind AK;
61
62  switch (ElfMach) {
63  default: llvm_unreachable("Unhandled ELF::EF_AMDGPU type");
64  case ELF::EF_AMDGPU_MACH_R600_R600:      AK = GK_R600;    break;
65  case ELF::EF_AMDGPU_MACH_R600_R630:      AK = GK_R630;    break;
66  case ELF::EF_AMDGPU_MACH_R600_RS880:     AK = GK_RS880;   break;
67  case ELF::EF_AMDGPU_MACH_R600_RV670:     AK = GK_RV670;   break;
68  case ELF::EF_AMDGPU_MACH_R600_RV710:     AK = GK_RV710;   break;
69  case ELF::EF_AMDGPU_MACH_R600_RV730:     AK = GK_RV730;   break;
70  case ELF::EF_AMDGPU_MACH_R600_RV770:     AK = GK_RV770;   break;
71  case ELF::EF_AMDGPU_MACH_R600_CEDAR:     AK = GK_CEDAR;   break;
72  case ELF::EF_AMDGPU_MACH_R600_CYPRESS:   AK = GK_CYPRESS; break;
73  case ELF::EF_AMDGPU_MACH_R600_JUNIPER:   AK = GK_JUNIPER; break;
74  case ELF::EF_AMDGPU_MACH_R600_REDWOOD:   AK = GK_REDWOOD; break;
75  case ELF::EF_AMDGPU_MACH_R600_SUMO:      AK = GK_SUMO;    break;
76  case ELF::EF_AMDGPU_MACH_R600_BARTS:     AK = GK_BARTS;   break;
77  case ELF::EF_AMDGPU_MACH_R600_CAICOS:    AK = GK_CAICOS;  break;
78  case ELF::EF_AMDGPU_MACH_R600_CAYMAN:    AK = GK_CAYMAN;  break;
79  case ELF::EF_AMDGPU_MACH_R600_TURKS:     AK = GK_TURKS;   break;
80  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX600:  AK = GK_GFX600;  break;
81  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX601:  AK = GK_GFX601;  break;
82  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX700:  AK = GK_GFX700;  break;
83  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX701:  AK = GK_GFX701;  break;
84  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX702:  AK = GK_GFX702;  break;
85  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX703:  AK = GK_GFX703;  break;
86  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX704:  AK = GK_GFX704;  break;
87  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX801:  AK = GK_GFX801;  break;
88  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX802:  AK = GK_GFX802;  break;
89  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX803:  AK = GK_GFX803;  break;
90  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX810:  AK = GK_GFX810;  break;
91  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX900:  AK = GK_GFX900;  break;
92  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX902:  AK = GK_GFX902;  break;
93  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX904:  AK = GK_GFX904;  break;
94  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX906:  AK = GK_GFX906;  break;
95  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX908:  AK = GK_GFX908;  break;
96  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX909:  AK = GK_GFX909;  break;
97  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010: AK = GK_GFX1010; break;
98  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011: AK = GK_GFX1011; break;
99  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012: AK = GK_GFX1012; break;
100  case ELF::EF_AMDGPU_MACH_NONE:           AK = GK_NONE;    break;
101  }
102
103  StringRef GPUName = getArchNameAMDGCN(AK);
104  if (GPUName != "")
105    return GPUName;
106  return getArchNameR600(AK);
107}
108
109unsigned AMDGPUTargetStreamer::getElfMach(StringRef GPU) {
110  AMDGPU::GPUKind AK = parseArchAMDGCN(GPU);
111  if (AK == AMDGPU::GPUKind::GK_NONE)
112    AK = parseArchR600(GPU);
113
114  switch (AK) {
115  case GK_R600:    return ELF::EF_AMDGPU_MACH_R600_R600;
116  case GK_R630:    return ELF::EF_AMDGPU_MACH_R600_R630;
117  case GK_RS880:   return ELF::EF_AMDGPU_MACH_R600_RS880;
118  case GK_RV670:   return ELF::EF_AMDGPU_MACH_R600_RV670;
119  case GK_RV710:   return ELF::EF_AMDGPU_MACH_R600_RV710;
120  case GK_RV730:   return ELF::EF_AMDGPU_MACH_R600_RV730;
121  case GK_RV770:   return ELF::EF_AMDGPU_MACH_R600_RV770;
122  case GK_CEDAR:   return ELF::EF_AMDGPU_MACH_R600_CEDAR;
123  case GK_CYPRESS: return ELF::EF_AMDGPU_MACH_R600_CYPRESS;
124  case GK_JUNIPER: return ELF::EF_AMDGPU_MACH_R600_JUNIPER;
125  case GK_REDWOOD: return ELF::EF_AMDGPU_MACH_R600_REDWOOD;
126  case GK_SUMO:    return ELF::EF_AMDGPU_MACH_R600_SUMO;
127  case GK_BARTS:   return ELF::EF_AMDGPU_MACH_R600_BARTS;
128  case GK_CAICOS:  return ELF::EF_AMDGPU_MACH_R600_CAICOS;
129  case GK_CAYMAN:  return ELF::EF_AMDGPU_MACH_R600_CAYMAN;
130  case GK_TURKS:   return ELF::EF_AMDGPU_MACH_R600_TURKS;
131  case GK_GFX600:  return ELF::EF_AMDGPU_MACH_AMDGCN_GFX600;
132  case GK_GFX601:  return ELF::EF_AMDGPU_MACH_AMDGCN_GFX601;
133  case GK_GFX700:  return ELF::EF_AMDGPU_MACH_AMDGCN_GFX700;
134  case GK_GFX701:  return ELF::EF_AMDGPU_MACH_AMDGCN_GFX701;
135  case GK_GFX702:  return ELF::EF_AMDGPU_MACH_AMDGCN_GFX702;
136  case GK_GFX703:  return ELF::EF_AMDGPU_MACH_AMDGCN_GFX703;
137  case GK_GFX704:  return ELF::EF_AMDGPU_MACH_AMDGCN_GFX704;
138  case GK_GFX801:  return ELF::EF_AMDGPU_MACH_AMDGCN_GFX801;
139  case GK_GFX802:  return ELF::EF_AMDGPU_MACH_AMDGCN_GFX802;
140  case GK_GFX803:  return ELF::EF_AMDGPU_MACH_AMDGCN_GFX803;
141  case GK_GFX810:  return ELF::EF_AMDGPU_MACH_AMDGCN_GFX810;
142  case GK_GFX900:  return ELF::EF_AMDGPU_MACH_AMDGCN_GFX900;
143  case GK_GFX902:  return ELF::EF_AMDGPU_MACH_AMDGCN_GFX902;
144  case GK_GFX904:  return ELF::EF_AMDGPU_MACH_AMDGCN_GFX904;
145  case GK_GFX906:  return ELF::EF_AMDGPU_MACH_AMDGCN_GFX906;
146  case GK_GFX908:  return ELF::EF_AMDGPU_MACH_AMDGCN_GFX908;
147  case GK_GFX909:  return ELF::EF_AMDGPU_MACH_AMDGCN_GFX909;
148  case GK_GFX1010: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010;
149  case GK_GFX1011: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011;
150  case GK_GFX1012: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012;
151  case GK_NONE:    return ELF::EF_AMDGPU_MACH_NONE;
152  }
153
154  llvm_unreachable("unknown GPU");
155}
156
157//===----------------------------------------------------------------------===//
158// AMDGPUTargetAsmStreamer
159//===----------------------------------------------------------------------===//
160
161AMDGPUTargetAsmStreamer::AMDGPUTargetAsmStreamer(MCStreamer &S,
162                                                 formatted_raw_ostream &OS)
163    : AMDGPUTargetStreamer(S), OS(OS) { }
164
165// A hook for emitting stuff at the end.
166// We use it for emitting the accumulated PAL metadata as directives.
167void AMDGPUTargetAsmStreamer::finish() {
168  std::string S;
169  getPALMetadata()->toString(S);
170  OS << S;
171}
172
173void AMDGPUTargetAsmStreamer::EmitDirectiveAMDGCNTarget(StringRef Target) {
174  OS << "\t.amdgcn_target \"" << Target << "\"\n";
175}
176
177void AMDGPUTargetAsmStreamer::EmitDirectiveHSACodeObjectVersion(
178    uint32_t Major, uint32_t Minor) {
179  OS << "\t.hsa_code_object_version " <<
180        Twine(Major) << "," << Twine(Minor) << '\n';
181}
182
183void
184AMDGPUTargetAsmStreamer::EmitDirectiveHSACodeObjectISA(uint32_t Major,
185                                                       uint32_t Minor,
186                                                       uint32_t Stepping,
187                                                       StringRef VendorName,
188                                                       StringRef ArchName) {
189  OS << "\t.hsa_code_object_isa " <<
190        Twine(Major) << "," << Twine(Minor) << "," << Twine(Stepping) <<
191        ",\"" << VendorName << "\",\"" << ArchName << "\"\n";
192
193}
194
195void
196AMDGPUTargetAsmStreamer::EmitAMDKernelCodeT(const amd_kernel_code_t &Header) {
197  OS << "\t.amd_kernel_code_t\n";
198  dumpAmdKernelCode(&Header, OS, "\t\t");
199  OS << "\t.end_amd_kernel_code_t\n";
200}
201
202void AMDGPUTargetAsmStreamer::EmitAMDGPUSymbolType(StringRef SymbolName,
203                                                   unsigned Type) {
204  switch (Type) {
205    default: llvm_unreachable("Invalid AMDGPU symbol type");
206    case ELF::STT_AMDGPU_HSA_KERNEL:
207      OS << "\t.amdgpu_hsa_kernel " << SymbolName << '\n' ;
208      break;
209  }
210}
211
212void AMDGPUTargetAsmStreamer::emitAMDGPULDS(MCSymbol *Symbol, unsigned Size,
213                                            unsigned Align) {
214  OS << "\t.amdgpu_lds " << Symbol->getName() << ", " << Size << ", " << Align
215     << '\n';
216}
217
218bool AMDGPUTargetAsmStreamer::EmitISAVersion(StringRef IsaVersionString) {
219  OS << "\t.amd_amdgpu_isa \"" << IsaVersionString << "\"\n";
220  return true;
221}
222
223bool AMDGPUTargetAsmStreamer::EmitHSAMetadata(
224    const AMDGPU::HSAMD::Metadata &HSAMetadata) {
225  std::string HSAMetadataString;
226  if (HSAMD::toString(HSAMetadata, HSAMetadataString))
227    return false;
228
229  OS << '\t' << AssemblerDirectiveBegin << '\n';
230  OS << HSAMetadataString << '\n';
231  OS << '\t' << AssemblerDirectiveEnd << '\n';
232  return true;
233}
234
235bool AMDGPUTargetAsmStreamer::EmitHSAMetadata(
236    msgpack::Document &HSAMetadataDoc, bool Strict) {
237  V3::MetadataVerifier Verifier(Strict);
238  if (!Verifier.verify(HSAMetadataDoc.getRoot()))
239    return false;
240
241  std::string HSAMetadataString;
242  raw_string_ostream StrOS(HSAMetadataString);
243  HSAMetadataDoc.toYAML(StrOS);
244
245  OS << '\t' << V3::AssemblerDirectiveBegin << '\n';
246  OS << StrOS.str() << '\n';
247  OS << '\t' << V3::AssemblerDirectiveEnd << '\n';
248  return true;
249}
250
251bool AMDGPUTargetAsmStreamer::EmitCodeEnd() {
252  const uint32_t Encoded_s_code_end = 0xbf9f0000;
253  OS << "\t.p2alignl 6, " << Encoded_s_code_end << '\n';
254  OS << "\t.fill 48, 4, " << Encoded_s_code_end << '\n';
255  return true;
256}
257
258void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
259    const MCSubtargetInfo &STI, StringRef KernelName,
260    const amdhsa::kernel_descriptor_t &KD, uint64_t NextVGPR, uint64_t NextSGPR,
261    bool ReserveVCC, bool ReserveFlatScr, bool ReserveXNACK) {
262  IsaVersion IVersion = getIsaVersion(STI.getCPU());
263
264  OS << "\t.amdhsa_kernel " << KernelName << '\n';
265
266#define PRINT_FIELD(STREAM, DIRECTIVE, KERNEL_DESC, MEMBER_NAME, FIELD_NAME)   \
267  STREAM << "\t\t" << DIRECTIVE << " "                                         \
268         << AMDHSA_BITS_GET(KERNEL_DESC.MEMBER_NAME, FIELD_NAME) << '\n';
269
270  OS << "\t\t.amdhsa_group_segment_fixed_size " << KD.group_segment_fixed_size
271     << '\n';
272  OS << "\t\t.amdhsa_private_segment_fixed_size "
273     << KD.private_segment_fixed_size << '\n';
274
275  PRINT_FIELD(OS, ".amdhsa_user_sgpr_private_segment_buffer", KD,
276              kernel_code_properties,
277              amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER);
278  PRINT_FIELD(OS, ".amdhsa_user_sgpr_dispatch_ptr", KD,
279              kernel_code_properties,
280              amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR);
281  PRINT_FIELD(OS, ".amdhsa_user_sgpr_queue_ptr", KD,
282              kernel_code_properties,
283              amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR);
284  PRINT_FIELD(OS, ".amdhsa_user_sgpr_kernarg_segment_ptr", KD,
285              kernel_code_properties,
286              amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR);
287  PRINT_FIELD(OS, ".amdhsa_user_sgpr_dispatch_id", KD,
288              kernel_code_properties,
289              amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID);
290  PRINT_FIELD(OS, ".amdhsa_user_sgpr_flat_scratch_init", KD,
291              kernel_code_properties,
292              amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
293  PRINT_FIELD(OS, ".amdhsa_user_sgpr_private_segment_size", KD,
294              kernel_code_properties,
295              amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
296  if (IVersion.Major >= 10)
297    PRINT_FIELD(OS, ".amdhsa_wavefront_size32", KD,
298                kernel_code_properties,
299                amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
300  PRINT_FIELD(
301      OS, ".amdhsa_system_sgpr_private_segment_wavefront_offset", KD,
302      compute_pgm_rsrc2,
303      amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET);
304  PRINT_FIELD(OS, ".amdhsa_system_sgpr_workgroup_id_x", KD,
305              compute_pgm_rsrc2,
306              amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X);
307  PRINT_FIELD(OS, ".amdhsa_system_sgpr_workgroup_id_y", KD,
308              compute_pgm_rsrc2,
309              amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y);
310  PRINT_FIELD(OS, ".amdhsa_system_sgpr_workgroup_id_z", KD,
311              compute_pgm_rsrc2,
312              amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z);
313  PRINT_FIELD(OS, ".amdhsa_system_sgpr_workgroup_info", KD,
314              compute_pgm_rsrc2,
315              amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO);
316  PRINT_FIELD(OS, ".amdhsa_system_vgpr_workitem_id", KD,
317              compute_pgm_rsrc2,
318              amdhsa::COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID);
319
320  // These directives are required.
321  OS << "\t\t.amdhsa_next_free_vgpr " << NextVGPR << '\n';
322  OS << "\t\t.amdhsa_next_free_sgpr " << NextSGPR << '\n';
323
324  if (!ReserveVCC)
325    OS << "\t\t.amdhsa_reserve_vcc " << ReserveVCC << '\n';
326  if (IVersion.Major >= 7 && !ReserveFlatScr)
327    OS << "\t\t.amdhsa_reserve_flat_scratch " << ReserveFlatScr << '\n';
328  if (IVersion.Major >= 8 && ReserveXNACK != hasXNACK(STI))
329    OS << "\t\t.amdhsa_reserve_xnack_mask " << ReserveXNACK << '\n';
330
331  PRINT_FIELD(OS, ".amdhsa_float_round_mode_32", KD,
332              compute_pgm_rsrc1,
333              amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32);
334  PRINT_FIELD(OS, ".amdhsa_float_round_mode_16_64", KD,
335              compute_pgm_rsrc1,
336              amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64);
337  PRINT_FIELD(OS, ".amdhsa_float_denorm_mode_32", KD,
338              compute_pgm_rsrc1,
339              amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32);
340  PRINT_FIELD(OS, ".amdhsa_float_denorm_mode_16_64", KD,
341              compute_pgm_rsrc1,
342              amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64);
343  PRINT_FIELD(OS, ".amdhsa_dx10_clamp", KD,
344              compute_pgm_rsrc1,
345              amdhsa::COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP);
346  PRINT_FIELD(OS, ".amdhsa_ieee_mode", KD,
347              compute_pgm_rsrc1,
348              amdhsa::COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE);
349  if (IVersion.Major >= 9)
350    PRINT_FIELD(OS, ".amdhsa_fp16_overflow", KD,
351                compute_pgm_rsrc1,
352                amdhsa::COMPUTE_PGM_RSRC1_FP16_OVFL);
353  if (IVersion.Major >= 10) {
354    PRINT_FIELD(OS, ".amdhsa_workgroup_processor_mode", KD,
355                compute_pgm_rsrc1,
356                amdhsa::COMPUTE_PGM_RSRC1_WGP_MODE);
357    PRINT_FIELD(OS, ".amdhsa_memory_ordered", KD,
358                compute_pgm_rsrc1,
359                amdhsa::COMPUTE_PGM_RSRC1_MEM_ORDERED);
360    PRINT_FIELD(OS, ".amdhsa_forward_progress", KD,
361                compute_pgm_rsrc1,
362                amdhsa::COMPUTE_PGM_RSRC1_FWD_PROGRESS);
363  }
364  PRINT_FIELD(
365      OS, ".amdhsa_exception_fp_ieee_invalid_op", KD,
366      compute_pgm_rsrc2,
367      amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION);
368  PRINT_FIELD(OS, ".amdhsa_exception_fp_denorm_src", KD,
369              compute_pgm_rsrc2,
370              amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE);
371  PRINT_FIELD(
372      OS, ".amdhsa_exception_fp_ieee_div_zero", KD,
373      compute_pgm_rsrc2,
374      amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO);
375  PRINT_FIELD(OS, ".amdhsa_exception_fp_ieee_overflow", KD,
376              compute_pgm_rsrc2,
377              amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW);
378  PRINT_FIELD(OS, ".amdhsa_exception_fp_ieee_underflow", KD,
379              compute_pgm_rsrc2,
380              amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW);
381  PRINT_FIELD(OS, ".amdhsa_exception_fp_ieee_inexact", KD,
382              compute_pgm_rsrc2,
383              amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT);
384  PRINT_FIELD(OS, ".amdhsa_exception_int_div_zero", KD,
385              compute_pgm_rsrc2,
386              amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO);
387#undef PRINT_FIELD
388
389  OS << "\t.end_amdhsa_kernel\n";
390}
391
392//===----------------------------------------------------------------------===//
393// AMDGPUTargetELFStreamer
394//===----------------------------------------------------------------------===//
395
396AMDGPUTargetELFStreamer::AMDGPUTargetELFStreamer(
397    MCStreamer &S, const MCSubtargetInfo &STI)
398    : AMDGPUTargetStreamer(S), Streamer(S) {
399  MCAssembler &MCA = getStreamer().getAssembler();
400  unsigned EFlags = MCA.getELFHeaderEFlags();
401
402  EFlags &= ~ELF::EF_AMDGPU_MACH;
403  EFlags |= getElfMach(STI.getCPU());
404
405  EFlags &= ~ELF::EF_AMDGPU_XNACK;
406  if (AMDGPU::hasXNACK(STI))
407    EFlags |= ELF::EF_AMDGPU_XNACK;
408
409  EFlags &= ~ELF::EF_AMDGPU_SRAM_ECC;
410  if (AMDGPU::hasSRAMECC(STI))
411    EFlags |= ELF::EF_AMDGPU_SRAM_ECC;
412
413  MCA.setELFHeaderEFlags(EFlags);
414}
415
416MCELFStreamer &AMDGPUTargetELFStreamer::getStreamer() {
417  return static_cast<MCELFStreamer &>(Streamer);
418}
419
420// A hook for emitting stuff at the end.
421// We use it for emitting the accumulated PAL metadata as a .note record.
422void AMDGPUTargetELFStreamer::finish() {
423  std::string Blob;
424  const char *Vendor = getPALMetadata()->getVendor();
425  unsigned Type = getPALMetadata()->getType();
426  getPALMetadata()->toBlob(Type, Blob);
427  if (Blob.empty())
428    return;
429  EmitNote(Vendor, MCConstantExpr::create(Blob.size(), getContext()), Type,
430           [&](MCELFStreamer &OS) { OS.EmitBytes(Blob); });
431}
432
433void AMDGPUTargetELFStreamer::EmitNote(
434    StringRef Name, const MCExpr *DescSZ, unsigned NoteType,
435    function_ref<void(MCELFStreamer &)> EmitDesc) {
436  auto &S = getStreamer();
437  auto &Context = S.getContext();
438
439  auto NameSZ = Name.size() + 1;
440
441  S.PushSection();
442  S.SwitchSection(Context.getELFSection(
443    ElfNote::SectionName, ELF::SHT_NOTE, ELF::SHF_ALLOC));
444  S.EmitIntValue(NameSZ, 4);                                  // namesz
445  S.EmitValue(DescSZ, 4);                                     // descz
446  S.EmitIntValue(NoteType, 4);                                // type
447  S.EmitBytes(Name);                                          // name
448  S.EmitValueToAlignment(4, 0, 1, 0);                         // padding 0
449  EmitDesc(S);                                                // desc
450  S.EmitValueToAlignment(4, 0, 1, 0);                         // padding 0
451  S.PopSection();
452}
453
454void AMDGPUTargetELFStreamer::EmitDirectiveAMDGCNTarget(StringRef Target) {}
455
456void AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectVersion(
457    uint32_t Major, uint32_t Minor) {
458
459  EmitNote(ElfNote::NoteNameV2, MCConstantExpr::create(8, getContext()),
460           ElfNote::NT_AMDGPU_HSA_CODE_OBJECT_VERSION, [&](MCELFStreamer &OS) {
461             OS.EmitIntValue(Major, 4);
462             OS.EmitIntValue(Minor, 4);
463           });
464}
465
466void
467AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectISA(uint32_t Major,
468                                                       uint32_t Minor,
469                                                       uint32_t Stepping,
470                                                       StringRef VendorName,
471                                                       StringRef ArchName) {
472  uint16_t VendorNameSize = VendorName.size() + 1;
473  uint16_t ArchNameSize = ArchName.size() + 1;
474
475  unsigned DescSZ = sizeof(VendorNameSize) + sizeof(ArchNameSize) +
476    sizeof(Major) + sizeof(Minor) + sizeof(Stepping) +
477    VendorNameSize + ArchNameSize;
478
479  EmitNote(ElfNote::NoteNameV2, MCConstantExpr::create(DescSZ, getContext()),
480           ElfNote::NT_AMDGPU_HSA_ISA, [&](MCELFStreamer &OS) {
481             OS.EmitIntValue(VendorNameSize, 2);
482             OS.EmitIntValue(ArchNameSize, 2);
483             OS.EmitIntValue(Major, 4);
484             OS.EmitIntValue(Minor, 4);
485             OS.EmitIntValue(Stepping, 4);
486             OS.EmitBytes(VendorName);
487             OS.EmitIntValue(0, 1); // NULL terminate VendorName
488             OS.EmitBytes(ArchName);
489             OS.EmitIntValue(0, 1); // NULL terminte ArchName
490           });
491}
492
493void
494AMDGPUTargetELFStreamer::EmitAMDKernelCodeT(const amd_kernel_code_t &Header) {
495
496  MCStreamer &OS = getStreamer();
497  OS.PushSection();
498  OS.EmitBytes(StringRef((const char*)&Header, sizeof(Header)));
499  OS.PopSection();
500}
501
502void AMDGPUTargetELFStreamer::EmitAMDGPUSymbolType(StringRef SymbolName,
503                                                   unsigned Type) {
504  MCSymbolELF *Symbol = cast<MCSymbolELF>(
505      getStreamer().getContext().getOrCreateSymbol(SymbolName));
506  Symbol->setType(Type);
507}
508
509void AMDGPUTargetELFStreamer::emitAMDGPULDS(MCSymbol *Symbol, unsigned Size,
510                                            unsigned Align) {
511  assert(isPowerOf2_32(Align));
512
513  MCSymbolELF *SymbolELF = cast<MCSymbolELF>(Symbol);
514  SymbolELF->setType(ELF::STT_OBJECT);
515
516  if (!SymbolELF->isBindingSet()) {
517    SymbolELF->setBinding(ELF::STB_GLOBAL);
518    SymbolELF->setExternal(true);
519  }
520
521  if (SymbolELF->declareCommon(Size, Align, true)) {
522    report_fatal_error("Symbol: " + Symbol->getName() +
523                       " redeclared as different type");
524  }
525
526  SymbolELF->setIndex(ELF::SHN_AMDGPU_LDS);
527  SymbolELF->setSize(MCConstantExpr::create(Size, getContext()));
528}
529
530bool AMDGPUTargetELFStreamer::EmitISAVersion(StringRef IsaVersionString) {
531  // Create two labels to mark the beginning and end of the desc field
532  // and a MCExpr to calculate the size of the desc field.
533  auto &Context = getContext();
534  auto *DescBegin = Context.createTempSymbol();
535  auto *DescEnd = Context.createTempSymbol();
536  auto *DescSZ = MCBinaryExpr::createSub(
537    MCSymbolRefExpr::create(DescEnd, Context),
538    MCSymbolRefExpr::create(DescBegin, Context), Context);
539
540  EmitNote(ElfNote::NoteNameV2, DescSZ, ELF::NT_AMD_AMDGPU_ISA,
541           [&](MCELFStreamer &OS) {
542             OS.EmitLabel(DescBegin);
543             OS.EmitBytes(IsaVersionString);
544             OS.EmitLabel(DescEnd);
545           });
546  return true;
547}
548
549bool AMDGPUTargetELFStreamer::EmitHSAMetadata(msgpack::Document &HSAMetadataDoc,
550                                              bool Strict) {
551  V3::MetadataVerifier Verifier(Strict);
552  if (!Verifier.verify(HSAMetadataDoc.getRoot()))
553    return false;
554
555  std::string HSAMetadataString;
556  HSAMetadataDoc.writeToBlob(HSAMetadataString);
557
558  // Create two labels to mark the beginning and end of the desc field
559  // and a MCExpr to calculate the size of the desc field.
560  auto &Context = getContext();
561  auto *DescBegin = Context.createTempSymbol();
562  auto *DescEnd = Context.createTempSymbol();
563  auto *DescSZ = MCBinaryExpr::createSub(
564      MCSymbolRefExpr::create(DescEnd, Context),
565      MCSymbolRefExpr::create(DescBegin, Context), Context);
566
567  EmitNote(ElfNote::NoteNameV3, DescSZ, ELF::NT_AMDGPU_METADATA,
568           [&](MCELFStreamer &OS) {
569             OS.EmitLabel(DescBegin);
570             OS.EmitBytes(HSAMetadataString);
571             OS.EmitLabel(DescEnd);
572           });
573  return true;
574}
575
576bool AMDGPUTargetELFStreamer::EmitHSAMetadata(
577    const AMDGPU::HSAMD::Metadata &HSAMetadata) {
578  std::string HSAMetadataString;
579  if (HSAMD::toString(HSAMetadata, HSAMetadataString))
580    return false;
581
582  // Create two labels to mark the beginning and end of the desc field
583  // and a MCExpr to calculate the size of the desc field.
584  auto &Context = getContext();
585  auto *DescBegin = Context.createTempSymbol();
586  auto *DescEnd = Context.createTempSymbol();
587  auto *DescSZ = MCBinaryExpr::createSub(
588    MCSymbolRefExpr::create(DescEnd, Context),
589    MCSymbolRefExpr::create(DescBegin, Context), Context);
590
591  EmitNote(ElfNote::NoteNameV2, DescSZ, ELF::NT_AMD_AMDGPU_HSA_METADATA,
592           [&](MCELFStreamer &OS) {
593             OS.EmitLabel(DescBegin);
594             OS.EmitBytes(HSAMetadataString);
595             OS.EmitLabel(DescEnd);
596           });
597  return true;
598}
599
600bool AMDGPUTargetELFStreamer::EmitCodeEnd() {
601  const uint32_t Encoded_s_code_end = 0xbf9f0000;
602
603  MCStreamer &OS = getStreamer();
604  OS.PushSection();
605  OS.EmitValueToAlignment(64, Encoded_s_code_end, 4);
606  for (unsigned I = 0; I < 48; ++I)
607    OS.EmitIntValue(Encoded_s_code_end, 4);
608  OS.PopSection();
609  return true;
610}
611
612void AMDGPUTargetELFStreamer::EmitAmdhsaKernelDescriptor(
613    const MCSubtargetInfo &STI, StringRef KernelName,
614    const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR,
615    uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr,
616    bool ReserveXNACK) {
617  auto &Streamer = getStreamer();
618  auto &Context = Streamer.getContext();
619
620  MCSymbolELF *KernelCodeSymbol = cast<MCSymbolELF>(
621      Context.getOrCreateSymbol(Twine(KernelName)));
622  MCSymbolELF *KernelDescriptorSymbol = cast<MCSymbolELF>(
623      Context.getOrCreateSymbol(Twine(KernelName) + Twine(".kd")));
624
625  // Copy kernel descriptor symbol's binding, other and visibility from the
626  // kernel code symbol.
627  KernelDescriptorSymbol->setBinding(KernelCodeSymbol->getBinding());
628  KernelDescriptorSymbol->setOther(KernelCodeSymbol->getOther());
629  KernelDescriptorSymbol->setVisibility(KernelCodeSymbol->getVisibility());
630  // Kernel descriptor symbol's type and size are fixed.
631  KernelDescriptorSymbol->setType(ELF::STT_OBJECT);
632  KernelDescriptorSymbol->setSize(
633      MCConstantExpr::create(sizeof(KernelDescriptor), Context));
634
635  // The visibility of the kernel code symbol must be protected or less to allow
636  // static relocations from the kernel descriptor to be used.
637  if (KernelCodeSymbol->getVisibility() == ELF::STV_DEFAULT)
638    KernelCodeSymbol->setVisibility(ELF::STV_PROTECTED);
639
640  Streamer.EmitLabel(KernelDescriptorSymbol);
641  Streamer.EmitBytes(StringRef(
642      (const char*)&(KernelDescriptor),
643      offsetof(amdhsa::kernel_descriptor_t, kernel_code_entry_byte_offset)));
644  // FIXME: Remove the use of VK_AMDGPU_REL64 in the expression below. The
645  // expression being created is:
646  //   (start of kernel code) - (start of kernel descriptor)
647  // It implies R_AMDGPU_REL64, but ends up being R_AMDGPU_ABS64.
648  Streamer.EmitValue(MCBinaryExpr::createSub(
649      MCSymbolRefExpr::create(
650          KernelCodeSymbol, MCSymbolRefExpr::VK_AMDGPU_REL64, Context),
651      MCSymbolRefExpr::create(
652          KernelDescriptorSymbol, MCSymbolRefExpr::VK_None, Context),
653      Context),
654      sizeof(KernelDescriptor.kernel_code_entry_byte_offset));
655  Streamer.EmitBytes(StringRef(
656      (const char*)&(KernelDescriptor) +
657          offsetof(amdhsa::kernel_descriptor_t, kernel_code_entry_byte_offset) +
658          sizeof(KernelDescriptor.kernel_code_entry_byte_offset),
659      sizeof(KernelDescriptor) -
660          offsetof(amdhsa::kernel_descriptor_t, kernel_code_entry_byte_offset) -
661          sizeof(KernelDescriptor.kernel_code_entry_byte_offset)));
662}
663