1//===--- AMDGPUHSAMetadataStreamer.cpp --------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// AMDGPU HSA Metadata Streamer.
11///
12//
13//===----------------------------------------------------------------------===//
14
15#include "AMDGPUHSAMetadataStreamer.h"
16#include "AMDGPU.h"
17#include "AMDGPUSubtarget.h"
18#include "MCTargetDesc/AMDGPUTargetStreamer.h"
19#include "SIMachineFunctionInfo.h"
20#include "SIProgramInfo.h"
21#include "Utils/AMDGPUBaseInfo.h"
22#include "llvm/ADT/StringSwitch.h"
23#include "llvm/IR/Constants.h"
24#include "llvm/IR/Module.h"
25#include "llvm/Support/raw_ostream.h"
26
27namespace llvm {
28
29static cl::opt<bool> DumpHSAMetadata(
30    "amdgpu-dump-hsa-metadata",
31    cl::desc("Dump AMDGPU HSA Metadata"));
32static cl::opt<bool> VerifyHSAMetadata(
33    "amdgpu-verify-hsa-metadata",
34    cl::desc("Verify AMDGPU HSA Metadata"));
35
36namespace AMDGPU {
37namespace HSAMD {
38
39//===----------------------------------------------------------------------===//
40// HSAMetadataStreamerV2
41//===----------------------------------------------------------------------===//
42void MetadataStreamerV2::dump(StringRef HSAMetadataString) const {
43  errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n';
44}
45
46void MetadataStreamerV2::verify(StringRef HSAMetadataString) const {
47  errs() << "AMDGPU HSA Metadata Parser Test: ";
48
49  HSAMD::Metadata FromHSAMetadataString;
50  if (fromString(HSAMetadataString, FromHSAMetadataString)) {
51    errs() << "FAIL\n";
52    return;
53  }
54
55  std::string ToHSAMetadataString;
56  if (toString(FromHSAMetadataString, ToHSAMetadataString)) {
57    errs() << "FAIL\n";
58    return;
59  }
60
61  errs() << (HSAMetadataString == ToHSAMetadataString ? "PASS" : "FAIL")
62         << '\n';
63  if (HSAMetadataString != ToHSAMetadataString) {
64    errs() << "Original input: " << HSAMetadataString << '\n'
65           << "Produced output: " << ToHSAMetadataString << '\n';
66  }
67}
68
69AccessQualifier
70MetadataStreamerV2::getAccessQualifier(StringRef AccQual) const {
71  if (AccQual.empty())
72    return AccessQualifier::Unknown;
73
74  return StringSwitch<AccessQualifier>(AccQual)
75             .Case("read_only",  AccessQualifier::ReadOnly)
76             .Case("write_only", AccessQualifier::WriteOnly)
77             .Case("read_write", AccessQualifier::ReadWrite)
78             .Default(AccessQualifier::Default);
79}
80
81AddressSpaceQualifier
82MetadataStreamerV2::getAddressSpaceQualifier(
83    unsigned AddressSpace) const {
84  switch (AddressSpace) {
85  case AMDGPUAS::PRIVATE_ADDRESS:
86    return AddressSpaceQualifier::Private;
87  case AMDGPUAS::GLOBAL_ADDRESS:
88    return AddressSpaceQualifier::Global;
89  case AMDGPUAS::CONSTANT_ADDRESS:
90    return AddressSpaceQualifier::Constant;
91  case AMDGPUAS::LOCAL_ADDRESS:
92    return AddressSpaceQualifier::Local;
93  case AMDGPUAS::FLAT_ADDRESS:
94    return AddressSpaceQualifier::Generic;
95  case AMDGPUAS::REGION_ADDRESS:
96    return AddressSpaceQualifier::Region;
97  default:
98    return AddressSpaceQualifier::Unknown;
99  }
100}
101
102ValueKind MetadataStreamerV2::getValueKind(Type *Ty, StringRef TypeQual,
103                                           StringRef BaseTypeName) const {
104  if (TypeQual.find("pipe") != StringRef::npos)
105    return ValueKind::Pipe;
106
107  return StringSwitch<ValueKind>(BaseTypeName)
108             .Case("image1d_t", ValueKind::Image)
109             .Case("image1d_array_t", ValueKind::Image)
110             .Case("image1d_buffer_t", ValueKind::Image)
111             .Case("image2d_t", ValueKind::Image)
112             .Case("image2d_array_t", ValueKind::Image)
113             .Case("image2d_array_depth_t", ValueKind::Image)
114             .Case("image2d_array_msaa_t", ValueKind::Image)
115             .Case("image2d_array_msaa_depth_t", ValueKind::Image)
116             .Case("image2d_depth_t", ValueKind::Image)
117             .Case("image2d_msaa_t", ValueKind::Image)
118             .Case("image2d_msaa_depth_t", ValueKind::Image)
119             .Case("image3d_t", ValueKind::Image)
120             .Case("sampler_t", ValueKind::Sampler)
121             .Case("queue_t", ValueKind::Queue)
122             .Default(isa<PointerType>(Ty) ?
123                          (Ty->getPointerAddressSpace() ==
124                           AMDGPUAS::LOCAL_ADDRESS ?
125                           ValueKind::DynamicSharedPointer :
126                           ValueKind::GlobalBuffer) :
127                      ValueKind::ByValue);
128}
129
130ValueType MetadataStreamerV2::getValueType(Type *Ty, StringRef TypeName) const {
131  switch (Ty->getTypeID()) {
132  case Type::IntegerTyID: {
133    auto Signed = !TypeName.startswith("u");
134    switch (Ty->getIntegerBitWidth()) {
135    case 8:
136      return Signed ? ValueType::I8 : ValueType::U8;
137    case 16:
138      return Signed ? ValueType::I16 : ValueType::U16;
139    case 32:
140      return Signed ? ValueType::I32 : ValueType::U32;
141    case 64:
142      return Signed ? ValueType::I64 : ValueType::U64;
143    default:
144      return ValueType::Struct;
145    }
146  }
147  case Type::HalfTyID:
148    return ValueType::F16;
149  case Type::FloatTyID:
150    return ValueType::F32;
151  case Type::DoubleTyID:
152    return ValueType::F64;
153  case Type::PointerTyID:
154    return getValueType(Ty->getPointerElementType(), TypeName);
155  case Type::VectorTyID:
156    return getValueType(Ty->getVectorElementType(), TypeName);
157  default:
158    return ValueType::Struct;
159  }
160}
161
162std::string MetadataStreamerV2::getTypeName(Type *Ty, bool Signed) const {
163  switch (Ty->getTypeID()) {
164  case Type::IntegerTyID: {
165    if (!Signed)
166      return (Twine('u') + getTypeName(Ty, true)).str();
167
168    auto BitWidth = Ty->getIntegerBitWidth();
169    switch (BitWidth) {
170    case 8:
171      return "char";
172    case 16:
173      return "short";
174    case 32:
175      return "int";
176    case 64:
177      return "long";
178    default:
179      return (Twine('i') + Twine(BitWidth)).str();
180    }
181  }
182  case Type::HalfTyID:
183    return "half";
184  case Type::FloatTyID:
185    return "float";
186  case Type::DoubleTyID:
187    return "double";
188  case Type::VectorTyID: {
189    auto VecTy = cast<VectorType>(Ty);
190    auto ElTy = VecTy->getElementType();
191    auto NumElements = VecTy->getVectorNumElements();
192    return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str();
193  }
194  default:
195    return "unknown";
196  }
197}
198
199std::vector<uint32_t>
200MetadataStreamerV2::getWorkGroupDimensions(MDNode *Node) const {
201  std::vector<uint32_t> Dims;
202  if (Node->getNumOperands() != 3)
203    return Dims;
204
205  for (auto &Op : Node->operands())
206    Dims.push_back(mdconst::extract<ConstantInt>(Op)->getZExtValue());
207  return Dims;
208}
209
210Kernel::CodeProps::Metadata
211MetadataStreamerV2::getHSACodeProps(const MachineFunction &MF,
212                                    const SIProgramInfo &ProgramInfo) const {
213  const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
214  const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
215  HSAMD::Kernel::CodeProps::Metadata HSACodeProps;
216  const Function &F = MF.getFunction();
217
218  assert(F.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
219         F.getCallingConv() == CallingConv::SPIR_KERNEL);
220
221  Align MaxKernArgAlign;
222  HSACodeProps.mKernargSegmentSize = STM.getKernArgSegmentSize(F,
223                                                               MaxKernArgAlign);
224  HSACodeProps.mGroupSegmentFixedSize = ProgramInfo.LDSSize;
225  HSACodeProps.mPrivateSegmentFixedSize = ProgramInfo.ScratchSize;
226  HSACodeProps.mKernargSegmentAlign =
227      std::max(MaxKernArgAlign, Align(4)).value();
228  HSACodeProps.mWavefrontSize = STM.getWavefrontSize();
229  HSACodeProps.mNumSGPRs = ProgramInfo.NumSGPR;
230  HSACodeProps.mNumVGPRs = ProgramInfo.NumVGPR;
231  HSACodeProps.mMaxFlatWorkGroupSize = MFI.getMaxFlatWorkGroupSize();
232  HSACodeProps.mIsDynamicCallStack = ProgramInfo.DynamicCallStack;
233  HSACodeProps.mIsXNACKEnabled = STM.isXNACKEnabled();
234  HSACodeProps.mNumSpilledSGPRs = MFI.getNumSpilledSGPRs();
235  HSACodeProps.mNumSpilledVGPRs = MFI.getNumSpilledVGPRs();
236
237  return HSACodeProps;
238}
239
240Kernel::DebugProps::Metadata
241MetadataStreamerV2::getHSADebugProps(const MachineFunction &MF,
242                                     const SIProgramInfo &ProgramInfo) const {
243  return HSAMD::Kernel::DebugProps::Metadata();
244}
245
246void MetadataStreamerV2::emitVersion() {
247  auto &Version = HSAMetadata.mVersion;
248
249  Version.push_back(VersionMajor);
250  Version.push_back(VersionMinor);
251}
252
253void MetadataStreamerV2::emitPrintf(const Module &Mod) {
254  auto &Printf = HSAMetadata.mPrintf;
255
256  auto Node = Mod.getNamedMetadata("llvm.printf.fmts");
257  if (!Node)
258    return;
259
260  for (auto Op : Node->operands())
261    if (Op->getNumOperands())
262      Printf.push_back(cast<MDString>(Op->getOperand(0))->getString());
263}
264
265void MetadataStreamerV2::emitKernelLanguage(const Function &Func) {
266  auto &Kernel = HSAMetadata.mKernels.back();
267
268  // TODO: What about other languages?
269  auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version");
270  if (!Node || !Node->getNumOperands())
271    return;
272  auto Op0 = Node->getOperand(0);
273  if (Op0->getNumOperands() <= 1)
274    return;
275
276  Kernel.mLanguage = "OpenCL C";
277  Kernel.mLanguageVersion.push_back(
278      mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue());
279  Kernel.mLanguageVersion.push_back(
280      mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue());
281}
282
283void MetadataStreamerV2::emitKernelAttrs(const Function &Func) {
284  auto &Attrs = HSAMetadata.mKernels.back().mAttrs;
285
286  if (auto Node = Func.getMetadata("reqd_work_group_size"))
287    Attrs.mReqdWorkGroupSize = getWorkGroupDimensions(Node);
288  if (auto Node = Func.getMetadata("work_group_size_hint"))
289    Attrs.mWorkGroupSizeHint = getWorkGroupDimensions(Node);
290  if (auto Node = Func.getMetadata("vec_type_hint")) {
291    Attrs.mVecTypeHint = getTypeName(
292        cast<ValueAsMetadata>(Node->getOperand(0))->getType(),
293        mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue());
294  }
295  if (Func.hasFnAttribute("runtime-handle")) {
296    Attrs.mRuntimeHandle =
297        Func.getFnAttribute("runtime-handle").getValueAsString().str();
298  }
299}
300
301void MetadataStreamerV2::emitKernelArgs(const Function &Func) {
302  for (auto &Arg : Func.args())
303    emitKernelArg(Arg);
304
305  emitHiddenKernelArgs(Func);
306}
307
308void MetadataStreamerV2::emitKernelArg(const Argument &Arg) {
309  auto Func = Arg.getParent();
310  auto ArgNo = Arg.getArgNo();
311  const MDNode *Node;
312
313  StringRef Name;
314  Node = Func->getMetadata("kernel_arg_name");
315  if (Node && ArgNo < Node->getNumOperands())
316    Name = cast<MDString>(Node->getOperand(ArgNo))->getString();
317  else if (Arg.hasName())
318    Name = Arg.getName();
319
320  StringRef TypeName;
321  Node = Func->getMetadata("kernel_arg_type");
322  if (Node && ArgNo < Node->getNumOperands())
323    TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
324
325  StringRef BaseTypeName;
326  Node = Func->getMetadata("kernel_arg_base_type");
327  if (Node && ArgNo < Node->getNumOperands())
328    BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
329
330  StringRef AccQual;
331  if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() &&
332      Arg.hasNoAliasAttr()) {
333    AccQual = "read_only";
334  } else {
335    Node = Func->getMetadata("kernel_arg_access_qual");
336    if (Node && ArgNo < Node->getNumOperands())
337      AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
338  }
339
340  StringRef TypeQual;
341  Node = Func->getMetadata("kernel_arg_type_qual");
342  if (Node && ArgNo < Node->getNumOperands())
343    TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
344
345  Type *Ty = Arg.getType();
346  const DataLayout &DL = Func->getParent()->getDataLayout();
347
348  unsigned PointeeAlign = 0;
349  if (auto PtrTy = dyn_cast<PointerType>(Ty)) {
350    if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
351      PointeeAlign = Arg.getParamAlignment();
352      if (PointeeAlign == 0)
353        PointeeAlign = DL.getABITypeAlignment(PtrTy->getElementType());
354    }
355  }
356
357  emitKernelArg(DL, Ty, getValueKind(Arg.getType(), TypeQual, BaseTypeName),
358                PointeeAlign, Name, TypeName, BaseTypeName, AccQual, TypeQual);
359}
360
361void MetadataStreamerV2::emitKernelArg(const DataLayout &DL, Type *Ty,
362                                       ValueKind ValueKind,
363                                       unsigned PointeeAlign, StringRef Name,
364                                       StringRef TypeName,
365                                       StringRef BaseTypeName,
366                                       StringRef AccQual, StringRef TypeQual) {
367  HSAMetadata.mKernels.back().mArgs.push_back(Kernel::Arg::Metadata());
368  auto &Arg = HSAMetadata.mKernels.back().mArgs.back();
369
370  Arg.mName = Name;
371  Arg.mTypeName = TypeName;
372  Arg.mSize = DL.getTypeAllocSize(Ty);
373  Arg.mAlign = DL.getABITypeAlignment(Ty);
374  Arg.mValueKind = ValueKind;
375  Arg.mValueType = getValueType(Ty, BaseTypeName);
376  Arg.mPointeeAlign = PointeeAlign;
377
378  if (auto PtrTy = dyn_cast<PointerType>(Ty))
379    Arg.mAddrSpaceQual = getAddressSpaceQualifier(PtrTy->getAddressSpace());
380
381  Arg.mAccQual = getAccessQualifier(AccQual);
382
383  // TODO: Emit Arg.mActualAccQual.
384
385  SmallVector<StringRef, 1> SplitTypeQuals;
386  TypeQual.split(SplitTypeQuals, " ", -1, false);
387  for (StringRef Key : SplitTypeQuals) {
388    auto P = StringSwitch<bool*>(Key)
389                 .Case("const",    &Arg.mIsConst)
390                 .Case("restrict", &Arg.mIsRestrict)
391                 .Case("volatile", &Arg.mIsVolatile)
392                 .Case("pipe",     &Arg.mIsPipe)
393                 .Default(nullptr);
394    if (P)
395      *P = true;
396  }
397}
398
399void MetadataStreamerV2::emitHiddenKernelArgs(const Function &Func) {
400  int HiddenArgNumBytes =
401      getIntegerAttribute(Func, "amdgpu-implicitarg-num-bytes", 0);
402
403  if (!HiddenArgNumBytes)
404    return;
405
406  auto &DL = Func.getParent()->getDataLayout();
407  auto Int64Ty = Type::getInt64Ty(Func.getContext());
408
409  if (HiddenArgNumBytes >= 8)
410    emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetX);
411  if (HiddenArgNumBytes >= 16)
412    emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetY);
413  if (HiddenArgNumBytes >= 24)
414    emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetZ);
415
416  auto Int8PtrTy = Type::getInt8PtrTy(Func.getContext(),
417                                      AMDGPUAS::GLOBAL_ADDRESS);
418
419  // Emit "printf buffer" argument if printf is used, otherwise emit dummy
420  // "none" argument.
421  if (HiddenArgNumBytes >= 32) {
422    if (Func.getParent()->getNamedMetadata("llvm.printf.fmts"))
423      emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenPrintfBuffer);
424    else if (Func.getParent()->getFunction("__ockl_hostcall_internal")) {
425      // The printf runtime binding pass should have ensured that hostcall and
426      // printf are not used in the same module.
427      assert(!Func.getParent()->getNamedMetadata("llvm.printf.fmts"));
428      emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenHostcallBuffer);
429    } else
430      emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone);
431  }
432
433  // Emit "default queue" and "completion action" arguments if enqueue kernel is
434  // used, otherwise emit dummy "none" arguments.
435  if (HiddenArgNumBytes >= 48) {
436    if (Func.hasFnAttribute("calls-enqueue-kernel")) {
437      emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenDefaultQueue);
438      emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenCompletionAction);
439    } else {
440      emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone);
441      emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone);
442    }
443  }
444
445  // Emit the pointer argument for multi-grid object.
446  if (HiddenArgNumBytes >= 56)
447    emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenMultiGridSyncArg);
448}
449
450bool MetadataStreamerV2::emitTo(AMDGPUTargetStreamer &TargetStreamer) {
451  return TargetStreamer.EmitHSAMetadata(getHSAMetadata());
452}
453
454void MetadataStreamerV2::begin(const Module &Mod) {
455  emitVersion();
456  emitPrintf(Mod);
457}
458
459void MetadataStreamerV2::end() {
460  std::string HSAMetadataString;
461  if (toString(HSAMetadata, HSAMetadataString))
462    return;
463
464  if (DumpHSAMetadata)
465    dump(HSAMetadataString);
466  if (VerifyHSAMetadata)
467    verify(HSAMetadataString);
468}
469
470void MetadataStreamerV2::emitKernel(const MachineFunction &MF,
471                                    const SIProgramInfo &ProgramInfo) {
472  auto &Func = MF.getFunction();
473  if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL)
474    return;
475
476  auto CodeProps = getHSACodeProps(MF, ProgramInfo);
477  auto DebugProps = getHSADebugProps(MF, ProgramInfo);
478
479  HSAMetadata.mKernels.push_back(Kernel::Metadata());
480  auto &Kernel = HSAMetadata.mKernels.back();
481
482  Kernel.mName = Func.getName();
483  Kernel.mSymbolName = (Twine(Func.getName()) + Twine("@kd")).str();
484  emitKernelLanguage(Func);
485  emitKernelAttrs(Func);
486  emitKernelArgs(Func);
487  HSAMetadata.mKernels.back().mCodeProps = CodeProps;
488  HSAMetadata.mKernels.back().mDebugProps = DebugProps;
489}
490
491//===----------------------------------------------------------------------===//
492// HSAMetadataStreamerV3
493//===----------------------------------------------------------------------===//
494
495void MetadataStreamerV3::dump(StringRef HSAMetadataString) const {
496  errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n';
497}
498
499void MetadataStreamerV3::verify(StringRef HSAMetadataString) const {
500  errs() << "AMDGPU HSA Metadata Parser Test: ";
501
502  msgpack::Document FromHSAMetadataString;
503
504  if (!FromHSAMetadataString.fromYAML(HSAMetadataString)) {
505    errs() << "FAIL\n";
506    return;
507  }
508
509  std::string ToHSAMetadataString;
510  raw_string_ostream StrOS(ToHSAMetadataString);
511  FromHSAMetadataString.toYAML(StrOS);
512
513  errs() << (HSAMetadataString == StrOS.str() ? "PASS" : "FAIL") << '\n';
514  if (HSAMetadataString != ToHSAMetadataString) {
515    errs() << "Original input: " << HSAMetadataString << '\n'
516           << "Produced output: " << StrOS.str() << '\n';
517  }
518}
519
520Optional<StringRef>
521MetadataStreamerV3::getAccessQualifier(StringRef AccQual) const {
522  return StringSwitch<Optional<StringRef>>(AccQual)
523      .Case("read_only", StringRef("read_only"))
524      .Case("write_only", StringRef("write_only"))
525      .Case("read_write", StringRef("read_write"))
526      .Default(None);
527}
528
529Optional<StringRef>
530MetadataStreamerV3::getAddressSpaceQualifier(unsigned AddressSpace) const {
531  switch (AddressSpace) {
532  case AMDGPUAS::PRIVATE_ADDRESS:
533    return StringRef("private");
534  case AMDGPUAS::GLOBAL_ADDRESS:
535    return StringRef("global");
536  case AMDGPUAS::CONSTANT_ADDRESS:
537    return StringRef("constant");
538  case AMDGPUAS::LOCAL_ADDRESS:
539    return StringRef("local");
540  case AMDGPUAS::FLAT_ADDRESS:
541    return StringRef("generic");
542  case AMDGPUAS::REGION_ADDRESS:
543    return StringRef("region");
544  default:
545    return None;
546  }
547}
548
549StringRef MetadataStreamerV3::getValueKind(Type *Ty, StringRef TypeQual,
550                                           StringRef BaseTypeName) const {
551  if (TypeQual.find("pipe") != StringRef::npos)
552    return "pipe";
553
554  return StringSwitch<StringRef>(BaseTypeName)
555      .Case("image1d_t", "image")
556      .Case("image1d_array_t", "image")
557      .Case("image1d_buffer_t", "image")
558      .Case("image2d_t", "image")
559      .Case("image2d_array_t", "image")
560      .Case("image2d_array_depth_t", "image")
561      .Case("image2d_array_msaa_t", "image")
562      .Case("image2d_array_msaa_depth_t", "image")
563      .Case("image2d_depth_t", "image")
564      .Case("image2d_msaa_t", "image")
565      .Case("image2d_msaa_depth_t", "image")
566      .Case("image3d_t", "image")
567      .Case("sampler_t", "sampler")
568      .Case("queue_t", "queue")
569      .Default(isa<PointerType>(Ty)
570                   ? (Ty->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS
571                          ? "dynamic_shared_pointer"
572                          : "global_buffer")
573                   : "by_value");
574}
575
576StringRef MetadataStreamerV3::getValueType(Type *Ty, StringRef TypeName) const {
577  switch (Ty->getTypeID()) {
578  case Type::IntegerTyID: {
579    auto Signed = !TypeName.startswith("u");
580    switch (Ty->getIntegerBitWidth()) {
581    case 8:
582      return Signed ? "i8" : "u8";
583    case 16:
584      return Signed ? "i16" : "u16";
585    case 32:
586      return Signed ? "i32" : "u32";
587    case 64:
588      return Signed ? "i64" : "u64";
589    default:
590      return "struct";
591    }
592  }
593  case Type::HalfTyID:
594    return "f16";
595  case Type::FloatTyID:
596    return "f32";
597  case Type::DoubleTyID:
598    return "f64";
599  case Type::PointerTyID:
600    return getValueType(Ty->getPointerElementType(), TypeName);
601  case Type::VectorTyID:
602    return getValueType(Ty->getVectorElementType(), TypeName);
603  default:
604    return "struct";
605  }
606}
607
608std::string MetadataStreamerV3::getTypeName(Type *Ty, bool Signed) const {
609  switch (Ty->getTypeID()) {
610  case Type::IntegerTyID: {
611    if (!Signed)
612      return (Twine('u') + getTypeName(Ty, true)).str();
613
614    auto BitWidth = Ty->getIntegerBitWidth();
615    switch (BitWidth) {
616    case 8:
617      return "char";
618    case 16:
619      return "short";
620    case 32:
621      return "int";
622    case 64:
623      return "long";
624    default:
625      return (Twine('i') + Twine(BitWidth)).str();
626    }
627  }
628  case Type::HalfTyID:
629    return "half";
630  case Type::FloatTyID:
631    return "float";
632  case Type::DoubleTyID:
633    return "double";
634  case Type::VectorTyID: {
635    auto VecTy = cast<VectorType>(Ty);
636    auto ElTy = VecTy->getElementType();
637    auto NumElements = VecTy->getVectorNumElements();
638    return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str();
639  }
640  default:
641    return "unknown";
642  }
643}
644
645msgpack::ArrayDocNode
646MetadataStreamerV3::getWorkGroupDimensions(MDNode *Node) const {
647  auto Dims = HSAMetadataDoc->getArrayNode();
648  if (Node->getNumOperands() != 3)
649    return Dims;
650
651  for (auto &Op : Node->operands())
652    Dims.push_back(Dims.getDocument()->getNode(
653        uint64_t(mdconst::extract<ConstantInt>(Op)->getZExtValue())));
654  return Dims;
655}
656
657void MetadataStreamerV3::emitVersion() {
658  auto Version = HSAMetadataDoc->getArrayNode();
659  Version.push_back(Version.getDocument()->getNode(VersionMajor));
660  Version.push_back(Version.getDocument()->getNode(VersionMinor));
661  getRootMetadata("amdhsa.version") = Version;
662}
663
664void MetadataStreamerV3::emitPrintf(const Module &Mod) {
665  auto Node = Mod.getNamedMetadata("llvm.printf.fmts");
666  if (!Node)
667    return;
668
669  auto Printf = HSAMetadataDoc->getArrayNode();
670  for (auto Op : Node->operands())
671    if (Op->getNumOperands())
672      Printf.push_back(Printf.getDocument()->getNode(
673          cast<MDString>(Op->getOperand(0))->getString(), /*Copy=*/true));
674  getRootMetadata("amdhsa.printf") = Printf;
675}
676
677void MetadataStreamerV3::emitKernelLanguage(const Function &Func,
678                                            msgpack::MapDocNode Kern) {
679  // TODO: What about other languages?
680  auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version");
681  if (!Node || !Node->getNumOperands())
682    return;
683  auto Op0 = Node->getOperand(0);
684  if (Op0->getNumOperands() <= 1)
685    return;
686
687  Kern[".language"] = Kern.getDocument()->getNode("OpenCL C");
688  auto LanguageVersion = Kern.getDocument()->getArrayNode();
689  LanguageVersion.push_back(Kern.getDocument()->getNode(
690      mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue()));
691  LanguageVersion.push_back(Kern.getDocument()->getNode(
692      mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue()));
693  Kern[".language_version"] = LanguageVersion;
694}
695
696void MetadataStreamerV3::emitKernelAttrs(const Function &Func,
697                                         msgpack::MapDocNode Kern) {
698
699  if (auto Node = Func.getMetadata("reqd_work_group_size"))
700    Kern[".reqd_workgroup_size"] = getWorkGroupDimensions(Node);
701  if (auto Node = Func.getMetadata("work_group_size_hint"))
702    Kern[".workgroup_size_hint"] = getWorkGroupDimensions(Node);
703  if (auto Node = Func.getMetadata("vec_type_hint")) {
704    Kern[".vec_type_hint"] = Kern.getDocument()->getNode(
705        getTypeName(
706            cast<ValueAsMetadata>(Node->getOperand(0))->getType(),
707            mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue()),
708        /*Copy=*/true);
709  }
710  if (Func.hasFnAttribute("runtime-handle")) {
711    Kern[".device_enqueue_symbol"] = Kern.getDocument()->getNode(
712        Func.getFnAttribute("runtime-handle").getValueAsString().str(),
713        /*Copy=*/true);
714  }
715}
716
717void MetadataStreamerV3::emitKernelArgs(const Function &Func,
718                                        msgpack::MapDocNode Kern) {
719  unsigned Offset = 0;
720  auto Args = HSAMetadataDoc->getArrayNode();
721  for (auto &Arg : Func.args())
722    emitKernelArg(Arg, Offset, Args);
723
724  emitHiddenKernelArgs(Func, Offset, Args);
725
726  Kern[".args"] = Args;
727}
728
729void MetadataStreamerV3::emitKernelArg(const Argument &Arg, unsigned &Offset,
730                                       msgpack::ArrayDocNode Args) {
731  auto Func = Arg.getParent();
732  auto ArgNo = Arg.getArgNo();
733  const MDNode *Node;
734
735  StringRef Name;
736  Node = Func->getMetadata("kernel_arg_name");
737  if (Node && ArgNo < Node->getNumOperands())
738    Name = cast<MDString>(Node->getOperand(ArgNo))->getString();
739  else if (Arg.hasName())
740    Name = Arg.getName();
741
742  StringRef TypeName;
743  Node = Func->getMetadata("kernel_arg_type");
744  if (Node && ArgNo < Node->getNumOperands())
745    TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
746
747  StringRef BaseTypeName;
748  Node = Func->getMetadata("kernel_arg_base_type");
749  if (Node && ArgNo < Node->getNumOperands())
750    BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
751
752  StringRef AccQual;
753  if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() &&
754      Arg.hasNoAliasAttr()) {
755    AccQual = "read_only";
756  } else {
757    Node = Func->getMetadata("kernel_arg_access_qual");
758    if (Node && ArgNo < Node->getNumOperands())
759      AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
760  }
761
762  StringRef TypeQual;
763  Node = Func->getMetadata("kernel_arg_type_qual");
764  if (Node && ArgNo < Node->getNumOperands())
765    TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
766
767  Type *Ty = Arg.getType();
768  const DataLayout &DL = Func->getParent()->getDataLayout();
769
770  unsigned PointeeAlign = 0;
771  if (auto PtrTy = dyn_cast<PointerType>(Ty)) {
772    if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
773      PointeeAlign = Arg.getParamAlignment();
774      if (PointeeAlign == 0)
775        PointeeAlign = DL.getABITypeAlignment(PtrTy->getElementType());
776    }
777  }
778
779  emitKernelArg(Func->getParent()->getDataLayout(), Arg.getType(),
780                getValueKind(Arg.getType(), TypeQual, BaseTypeName), Offset,
781                Args, PointeeAlign, Name, TypeName, BaseTypeName, AccQual,
782                TypeQual);
783}
784
785void MetadataStreamerV3::emitKernelArg(const DataLayout &DL, Type *Ty,
786                                       StringRef ValueKind, unsigned &Offset,
787                                       msgpack::ArrayDocNode Args,
788                                       unsigned PointeeAlign, StringRef Name,
789                                       StringRef TypeName,
790                                       StringRef BaseTypeName,
791                                       StringRef AccQual, StringRef TypeQual) {
792  auto Arg = Args.getDocument()->getMapNode();
793
794  if (!Name.empty())
795    Arg[".name"] = Arg.getDocument()->getNode(Name, /*Copy=*/true);
796  if (!TypeName.empty())
797    Arg[".type_name"] = Arg.getDocument()->getNode(TypeName, /*Copy=*/true);
798  auto Size = DL.getTypeAllocSize(Ty);
799  auto Align = DL.getABITypeAlignment(Ty);
800  Arg[".size"] = Arg.getDocument()->getNode(Size);
801  Offset = alignTo(Offset, Align);
802  Arg[".offset"] = Arg.getDocument()->getNode(Offset);
803  Offset += Size;
804  Arg[".value_kind"] = Arg.getDocument()->getNode(ValueKind, /*Copy=*/true);
805  Arg[".value_type"] =
806      Arg.getDocument()->getNode(getValueType(Ty, BaseTypeName), /*Copy=*/true);
807  if (PointeeAlign)
808    Arg[".pointee_align"] = Arg.getDocument()->getNode(PointeeAlign);
809
810  if (auto PtrTy = dyn_cast<PointerType>(Ty))
811    if (auto Qualifier = getAddressSpaceQualifier(PtrTy->getAddressSpace()))
812      Arg[".address_space"] = Arg.getDocument()->getNode(*Qualifier, /*Copy=*/true);
813
814  if (auto AQ = getAccessQualifier(AccQual))
815    Arg[".access"] = Arg.getDocument()->getNode(*AQ, /*Copy=*/true);
816
817  // TODO: Emit Arg[".actual_access"].
818
819  SmallVector<StringRef, 1> SplitTypeQuals;
820  TypeQual.split(SplitTypeQuals, " ", -1, false);
821  for (StringRef Key : SplitTypeQuals) {
822    if (Key == "const")
823      Arg[".is_const"] = Arg.getDocument()->getNode(true);
824    else if (Key == "restrict")
825      Arg[".is_restrict"] = Arg.getDocument()->getNode(true);
826    else if (Key == "volatile")
827      Arg[".is_volatile"] = Arg.getDocument()->getNode(true);
828    else if (Key == "pipe")
829      Arg[".is_pipe"] = Arg.getDocument()->getNode(true);
830  }
831
832  Args.push_back(Arg);
833}
834
835void MetadataStreamerV3::emitHiddenKernelArgs(const Function &Func,
836                                              unsigned &Offset,
837                                              msgpack::ArrayDocNode Args) {
838  int HiddenArgNumBytes =
839      getIntegerAttribute(Func, "amdgpu-implicitarg-num-bytes", 0);
840
841  if (!HiddenArgNumBytes)
842    return;
843
844  auto &DL = Func.getParent()->getDataLayout();
845  auto Int64Ty = Type::getInt64Ty(Func.getContext());
846
847  if (HiddenArgNumBytes >= 8)
848    emitKernelArg(DL, Int64Ty, "hidden_global_offset_x", Offset, Args);
849  if (HiddenArgNumBytes >= 16)
850    emitKernelArg(DL, Int64Ty, "hidden_global_offset_y", Offset, Args);
851  if (HiddenArgNumBytes >= 24)
852    emitKernelArg(DL, Int64Ty, "hidden_global_offset_z", Offset, Args);
853
854  auto Int8PtrTy =
855      Type::getInt8PtrTy(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS);
856
857  // Emit "printf buffer" argument if printf is used, otherwise emit dummy
858  // "none" argument.
859  if (HiddenArgNumBytes >= 32) {
860    if (Func.getParent()->getNamedMetadata("llvm.printf.fmts"))
861      emitKernelArg(DL, Int8PtrTy, "hidden_printf_buffer", Offset, Args);
862    else if (Func.getParent()->getFunction("__ockl_hostcall_internal")) {
863      // The printf runtime binding pass should have ensured that hostcall and
864      // printf are not used in the same module.
865      assert(!Func.getParent()->getNamedMetadata("llvm.printf.fmts"));
866      emitKernelArg(DL, Int8PtrTy, "hidden_hostcall_buffer", Offset, Args);
867    } else
868      emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args);
869  }
870
871  // Emit "default queue" and "completion action" arguments if enqueue kernel is
872  // used, otherwise emit dummy "none" arguments.
873  if (HiddenArgNumBytes >= 48) {
874    if (Func.hasFnAttribute("calls-enqueue-kernel")) {
875      emitKernelArg(DL, Int8PtrTy, "hidden_default_queue", Offset, Args);
876      emitKernelArg(DL, Int8PtrTy, "hidden_completion_action", Offset, Args);
877    } else {
878      emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args);
879      emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args);
880    }
881  }
882
883  // Emit the pointer argument for multi-grid object.
884  if (HiddenArgNumBytes >= 56)
885    emitKernelArg(DL, Int8PtrTy, "hidden_multigrid_sync_arg", Offset, Args);
886}
887
888msgpack::MapDocNode
889MetadataStreamerV3::getHSAKernelProps(const MachineFunction &MF,
890                                      const SIProgramInfo &ProgramInfo) const {
891  const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
892  const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
893  const Function &F = MF.getFunction();
894
895  auto Kern = HSAMetadataDoc->getMapNode();
896
897  Align MaxKernArgAlign;
898  Kern[".kernarg_segment_size"] = Kern.getDocument()->getNode(
899      STM.getKernArgSegmentSize(F, MaxKernArgAlign));
900  Kern[".group_segment_fixed_size"] =
901      Kern.getDocument()->getNode(ProgramInfo.LDSSize);
902  Kern[".private_segment_fixed_size"] =
903      Kern.getDocument()->getNode(ProgramInfo.ScratchSize);
904  Kern[".kernarg_segment_align"] =
905      Kern.getDocument()->getNode(std::max(Align(4), MaxKernArgAlign).value());
906  Kern[".wavefront_size"] =
907      Kern.getDocument()->getNode(STM.getWavefrontSize());
908  Kern[".sgpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumSGPR);
909  Kern[".vgpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumVGPR);
910  Kern[".max_flat_workgroup_size"] =
911      Kern.getDocument()->getNode(MFI.getMaxFlatWorkGroupSize());
912  Kern[".sgpr_spill_count"] =
913      Kern.getDocument()->getNode(MFI.getNumSpilledSGPRs());
914  Kern[".vgpr_spill_count"] =
915      Kern.getDocument()->getNode(MFI.getNumSpilledVGPRs());
916
917  return Kern;
918}
919
920bool MetadataStreamerV3::emitTo(AMDGPUTargetStreamer &TargetStreamer) {
921  return TargetStreamer.EmitHSAMetadata(*HSAMetadataDoc, true);
922}
923
924void MetadataStreamerV3::begin(const Module &Mod) {
925  emitVersion();
926  emitPrintf(Mod);
927  getRootMetadata("amdhsa.kernels") = HSAMetadataDoc->getArrayNode();
928}
929
930void MetadataStreamerV3::end() {
931  std::string HSAMetadataString;
932  raw_string_ostream StrOS(HSAMetadataString);
933  HSAMetadataDoc->toYAML(StrOS);
934
935  if (DumpHSAMetadata)
936    dump(StrOS.str());
937  if (VerifyHSAMetadata)
938    verify(StrOS.str());
939}
940
941void MetadataStreamerV3::emitKernel(const MachineFunction &MF,
942                                    const SIProgramInfo &ProgramInfo) {
943  auto &Func = MF.getFunction();
944  auto Kern = getHSAKernelProps(MF, ProgramInfo);
945
946  assert(Func.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
947         Func.getCallingConv() == CallingConv::SPIR_KERNEL);
948
949  auto Kernels =
950      getRootMetadata("amdhsa.kernels").getArray(/*Convert=*/true);
951
952  {
953    Kern[".name"] = Kern.getDocument()->getNode(Func.getName());
954    Kern[".symbol"] = Kern.getDocument()->getNode(
955        (Twine(Func.getName()) + Twine(".kd")).str(), /*Copy=*/true);
956    emitKernelLanguage(Func, Kern);
957    emitKernelAttrs(Func, Kern);
958    emitKernelArgs(Func, Kern);
959  }
960
961  Kernels.push_back(Kern);
962}
963
964} // end namespace HSAMD
965} // end namespace AMDGPU
966} // end namespace llvm
967