1//===--- AMDGPUHSAMetadataStreamer.cpp --------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// AMDGPU HSA Metadata Streamer.
11///
12//
13//===----------------------------------------------------------------------===//
14
15#include "AMDGPUHSAMetadataStreamer.h"
16#include "AMDGPU.h"
17#include "AMDGPUSubtarget.h"
18#include "MCTargetDesc/AMDGPUTargetStreamer.h"
19#include "SIMachineFunctionInfo.h"
20#include "SIProgramInfo.h"
21#include "Utils/AMDGPUBaseInfo.h"
22#include "llvm/ADT/StringSwitch.h"
23#include "llvm/IR/Constants.h"
24#include "llvm/IR/Module.h"
25#include "llvm/Support/raw_ostream.h"
26
27namespace llvm {
28
29static cl::opt<bool> DumpHSAMetadata(
30    "amdgpu-dump-hsa-metadata",
31    cl::desc("Dump AMDGPU HSA Metadata"));
32static cl::opt<bool> VerifyHSAMetadata(
33    "amdgpu-verify-hsa-metadata",
34    cl::desc("Verify AMDGPU HSA Metadata"));
35
36namespace AMDGPU {
37namespace HSAMD {
38
39//===----------------------------------------------------------------------===//
40// HSAMetadataStreamerV2
41//===----------------------------------------------------------------------===//
42void MetadataStreamerV2::dump(StringRef HSAMetadataString) const {
43  errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n';
44}
45
46void MetadataStreamerV2::verify(StringRef HSAMetadataString) const {
47  errs() << "AMDGPU HSA Metadata Parser Test: ";
48
49  HSAMD::Metadata FromHSAMetadataString;
50  if (fromString(std::string(HSAMetadataString), FromHSAMetadataString)) {
51    errs() << "FAIL\n";
52    return;
53  }
54
55  std::string ToHSAMetadataString;
56  if (toString(FromHSAMetadataString, ToHSAMetadataString)) {
57    errs() << "FAIL\n";
58    return;
59  }
60
61  errs() << (HSAMetadataString == ToHSAMetadataString ? "PASS" : "FAIL")
62         << '\n';
63  if (HSAMetadataString != ToHSAMetadataString) {
64    errs() << "Original input: " << HSAMetadataString << '\n'
65           << "Produced output: " << ToHSAMetadataString << '\n';
66  }
67}
68
69AccessQualifier
70MetadataStreamerV2::getAccessQualifier(StringRef AccQual) const {
71  if (AccQual.empty())
72    return AccessQualifier::Unknown;
73
74  return StringSwitch<AccessQualifier>(AccQual)
75             .Case("read_only",  AccessQualifier::ReadOnly)
76             .Case("write_only", AccessQualifier::WriteOnly)
77             .Case("read_write", AccessQualifier::ReadWrite)
78             .Default(AccessQualifier::Default);
79}
80
81AddressSpaceQualifier
82MetadataStreamerV2::getAddressSpaceQualifier(
83    unsigned AddressSpace) const {
84  switch (AddressSpace) {
85  case AMDGPUAS::PRIVATE_ADDRESS:
86    return AddressSpaceQualifier::Private;
87  case AMDGPUAS::GLOBAL_ADDRESS:
88    return AddressSpaceQualifier::Global;
89  case AMDGPUAS::CONSTANT_ADDRESS:
90    return AddressSpaceQualifier::Constant;
91  case AMDGPUAS::LOCAL_ADDRESS:
92    return AddressSpaceQualifier::Local;
93  case AMDGPUAS::FLAT_ADDRESS:
94    return AddressSpaceQualifier::Generic;
95  case AMDGPUAS::REGION_ADDRESS:
96    return AddressSpaceQualifier::Region;
97  default:
98    return AddressSpaceQualifier::Unknown;
99  }
100}
101
102ValueKind MetadataStreamerV2::getValueKind(Type *Ty, StringRef TypeQual,
103                                           StringRef BaseTypeName) const {
104  if (TypeQual.find("pipe") != StringRef::npos)
105    return ValueKind::Pipe;
106
107  return StringSwitch<ValueKind>(BaseTypeName)
108             .Case("image1d_t", ValueKind::Image)
109             .Case("image1d_array_t", ValueKind::Image)
110             .Case("image1d_buffer_t", ValueKind::Image)
111             .Case("image2d_t", ValueKind::Image)
112             .Case("image2d_array_t", ValueKind::Image)
113             .Case("image2d_array_depth_t", ValueKind::Image)
114             .Case("image2d_array_msaa_t", ValueKind::Image)
115             .Case("image2d_array_msaa_depth_t", ValueKind::Image)
116             .Case("image2d_depth_t", ValueKind::Image)
117             .Case("image2d_msaa_t", ValueKind::Image)
118             .Case("image2d_msaa_depth_t", ValueKind::Image)
119             .Case("image3d_t", ValueKind::Image)
120             .Case("sampler_t", ValueKind::Sampler)
121             .Case("queue_t", ValueKind::Queue)
122             .Default(isa<PointerType>(Ty) ?
123                          (Ty->getPointerAddressSpace() ==
124                           AMDGPUAS::LOCAL_ADDRESS ?
125                           ValueKind::DynamicSharedPointer :
126                           ValueKind::GlobalBuffer) :
127                      ValueKind::ByValue);
128}
129
130std::string MetadataStreamerV2::getTypeName(Type *Ty, bool Signed) const {
131  switch (Ty->getTypeID()) {
132  case Type::IntegerTyID: {
133    if (!Signed)
134      return (Twine('u') + getTypeName(Ty, true)).str();
135
136    auto BitWidth = Ty->getIntegerBitWidth();
137    switch (BitWidth) {
138    case 8:
139      return "char";
140    case 16:
141      return "short";
142    case 32:
143      return "int";
144    case 64:
145      return "long";
146    default:
147      return (Twine('i') + Twine(BitWidth)).str();
148    }
149  }
150  case Type::HalfTyID:
151    return "half";
152  case Type::FloatTyID:
153    return "float";
154  case Type::DoubleTyID:
155    return "double";
156  case Type::FixedVectorTyID: {
157    auto VecTy = cast<FixedVectorType>(Ty);
158    auto ElTy = VecTy->getElementType();
159    auto NumElements = VecTy->getNumElements();
160    return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str();
161  }
162  default:
163    return "unknown";
164  }
165}
166
167std::vector<uint32_t>
168MetadataStreamerV2::getWorkGroupDimensions(MDNode *Node) const {
169  std::vector<uint32_t> Dims;
170  if (Node->getNumOperands() != 3)
171    return Dims;
172
173  for (auto &Op : Node->operands())
174    Dims.push_back(mdconst::extract<ConstantInt>(Op)->getZExtValue());
175  return Dims;
176}
177
178Kernel::CodeProps::Metadata
179MetadataStreamerV2::getHSACodeProps(const MachineFunction &MF,
180                                    const SIProgramInfo &ProgramInfo) const {
181  const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
182  const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
183  HSAMD::Kernel::CodeProps::Metadata HSACodeProps;
184  const Function &F = MF.getFunction();
185
186  assert(F.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
187         F.getCallingConv() == CallingConv::SPIR_KERNEL);
188
189  Align MaxKernArgAlign;
190  HSACodeProps.mKernargSegmentSize = STM.getKernArgSegmentSize(F,
191                                                               MaxKernArgAlign);
192  HSACodeProps.mGroupSegmentFixedSize = ProgramInfo.LDSSize;
193  HSACodeProps.mPrivateSegmentFixedSize = ProgramInfo.ScratchSize;
194  HSACodeProps.mKernargSegmentAlign =
195      std::max(MaxKernArgAlign, Align(4)).value();
196  HSACodeProps.mWavefrontSize = STM.getWavefrontSize();
197  HSACodeProps.mNumSGPRs = ProgramInfo.NumSGPR;
198  HSACodeProps.mNumVGPRs = ProgramInfo.NumVGPR;
199  HSACodeProps.mMaxFlatWorkGroupSize = MFI.getMaxFlatWorkGroupSize();
200  HSACodeProps.mIsDynamicCallStack = ProgramInfo.DynamicCallStack;
201  HSACodeProps.mIsXNACKEnabled = STM.isXNACKEnabled();
202  HSACodeProps.mNumSpilledSGPRs = MFI.getNumSpilledSGPRs();
203  HSACodeProps.mNumSpilledVGPRs = MFI.getNumSpilledVGPRs();
204
205  return HSACodeProps;
206}
207
208Kernel::DebugProps::Metadata
209MetadataStreamerV2::getHSADebugProps(const MachineFunction &MF,
210                                     const SIProgramInfo &ProgramInfo) const {
211  return HSAMD::Kernel::DebugProps::Metadata();
212}
213
214void MetadataStreamerV2::emitVersion() {
215  auto &Version = HSAMetadata.mVersion;
216
217  Version.push_back(VersionMajor);
218  Version.push_back(VersionMinor);
219}
220
221void MetadataStreamerV2::emitPrintf(const Module &Mod) {
222  auto &Printf = HSAMetadata.mPrintf;
223
224  auto Node = Mod.getNamedMetadata("llvm.printf.fmts");
225  if (!Node)
226    return;
227
228  for (auto Op : Node->operands())
229    if (Op->getNumOperands())
230      Printf.push_back(
231          std::string(cast<MDString>(Op->getOperand(0))->getString()));
232}
233
234void MetadataStreamerV2::emitKernelLanguage(const Function &Func) {
235  auto &Kernel = HSAMetadata.mKernels.back();
236
237  // TODO: What about other languages?
238  auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version");
239  if (!Node || !Node->getNumOperands())
240    return;
241  auto Op0 = Node->getOperand(0);
242  if (Op0->getNumOperands() <= 1)
243    return;
244
245  Kernel.mLanguage = "OpenCL C";
246  Kernel.mLanguageVersion.push_back(
247      mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue());
248  Kernel.mLanguageVersion.push_back(
249      mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue());
250}
251
252void MetadataStreamerV2::emitKernelAttrs(const Function &Func) {
253  auto &Attrs = HSAMetadata.mKernels.back().mAttrs;
254
255  if (auto Node = Func.getMetadata("reqd_work_group_size"))
256    Attrs.mReqdWorkGroupSize = getWorkGroupDimensions(Node);
257  if (auto Node = Func.getMetadata("work_group_size_hint"))
258    Attrs.mWorkGroupSizeHint = getWorkGroupDimensions(Node);
259  if (auto Node = Func.getMetadata("vec_type_hint")) {
260    Attrs.mVecTypeHint = getTypeName(
261        cast<ValueAsMetadata>(Node->getOperand(0))->getType(),
262        mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue());
263  }
264  if (Func.hasFnAttribute("runtime-handle")) {
265    Attrs.mRuntimeHandle =
266        Func.getFnAttribute("runtime-handle").getValueAsString().str();
267  }
268}
269
270void MetadataStreamerV2::emitKernelArgs(const Function &Func) {
271  for (auto &Arg : Func.args())
272    emitKernelArg(Arg);
273
274  emitHiddenKernelArgs(Func);
275}
276
277void MetadataStreamerV2::emitKernelArg(const Argument &Arg) {
278  auto Func = Arg.getParent();
279  auto ArgNo = Arg.getArgNo();
280  const MDNode *Node;
281
282  StringRef Name;
283  Node = Func->getMetadata("kernel_arg_name");
284  if (Node && ArgNo < Node->getNumOperands())
285    Name = cast<MDString>(Node->getOperand(ArgNo))->getString();
286  else if (Arg.hasName())
287    Name = Arg.getName();
288
289  StringRef TypeName;
290  Node = Func->getMetadata("kernel_arg_type");
291  if (Node && ArgNo < Node->getNumOperands())
292    TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
293
294  StringRef BaseTypeName;
295  Node = Func->getMetadata("kernel_arg_base_type");
296  if (Node && ArgNo < Node->getNumOperands())
297    BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
298
299  StringRef AccQual;
300  if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() &&
301      Arg.hasNoAliasAttr()) {
302    AccQual = "read_only";
303  } else {
304    Node = Func->getMetadata("kernel_arg_access_qual");
305    if (Node && ArgNo < Node->getNumOperands())
306      AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
307  }
308
309  StringRef TypeQual;
310  Node = Func->getMetadata("kernel_arg_type_qual");
311  if (Node && ArgNo < Node->getNumOperands())
312    TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
313
314  Type *Ty = Arg.getType();
315  const DataLayout &DL = Func->getParent()->getDataLayout();
316
317  MaybeAlign PointeeAlign;
318  if (auto PtrTy = dyn_cast<PointerType>(Ty)) {
319    if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
320      PointeeAlign = DL.getValueOrABITypeAlignment(Arg.getParamAlign(),
321                                                   PtrTy->getElementType());
322    }
323  }
324
325  emitKernelArg(DL, Ty, getValueKind(Arg.getType(), TypeQual, BaseTypeName),
326                PointeeAlign, Name, TypeName, BaseTypeName, AccQual, TypeQual);
327}
328
329void MetadataStreamerV2::emitKernelArg(const DataLayout &DL, Type *Ty,
330                                       ValueKind ValueKind,
331                                       MaybeAlign PointeeAlign, StringRef Name,
332                                       StringRef TypeName,
333                                       StringRef BaseTypeName,
334                                       StringRef AccQual, StringRef TypeQual) {
335  HSAMetadata.mKernels.back().mArgs.push_back(Kernel::Arg::Metadata());
336  auto &Arg = HSAMetadata.mKernels.back().mArgs.back();
337
338  Arg.mName = std::string(Name);
339  Arg.mTypeName = std::string(TypeName);
340  Arg.mSize = DL.getTypeAllocSize(Ty);
341  Arg.mAlign = DL.getABITypeAlign(Ty).value();
342  Arg.mValueKind = ValueKind;
343  Arg.mPointeeAlign = PointeeAlign ? PointeeAlign->value() : 0;
344
345  if (auto PtrTy = dyn_cast<PointerType>(Ty))
346    Arg.mAddrSpaceQual = getAddressSpaceQualifier(PtrTy->getAddressSpace());
347
348  Arg.mAccQual = getAccessQualifier(AccQual);
349
350  // TODO: Emit Arg.mActualAccQual.
351
352  SmallVector<StringRef, 1> SplitTypeQuals;
353  TypeQual.split(SplitTypeQuals, " ", -1, false);
354  for (StringRef Key : SplitTypeQuals) {
355    auto P = StringSwitch<bool*>(Key)
356                 .Case("const",    &Arg.mIsConst)
357                 .Case("restrict", &Arg.mIsRestrict)
358                 .Case("volatile", &Arg.mIsVolatile)
359                 .Case("pipe",     &Arg.mIsPipe)
360                 .Default(nullptr);
361    if (P)
362      *P = true;
363  }
364}
365
366void MetadataStreamerV2::emitHiddenKernelArgs(const Function &Func) {
367  int HiddenArgNumBytes =
368      getIntegerAttribute(Func, "amdgpu-implicitarg-num-bytes", 0);
369
370  if (!HiddenArgNumBytes)
371    return;
372
373  auto &DL = Func.getParent()->getDataLayout();
374  auto Int64Ty = Type::getInt64Ty(Func.getContext());
375
376  if (HiddenArgNumBytes >= 8)
377    emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetX);
378  if (HiddenArgNumBytes >= 16)
379    emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetY);
380  if (HiddenArgNumBytes >= 24)
381    emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetZ);
382
383  auto Int8PtrTy = Type::getInt8PtrTy(Func.getContext(),
384                                      AMDGPUAS::GLOBAL_ADDRESS);
385
386  // Emit "printf buffer" argument if printf is used, otherwise emit dummy
387  // "none" argument.
388  if (HiddenArgNumBytes >= 32) {
389    if (Func.getParent()->getNamedMetadata("llvm.printf.fmts"))
390      emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenPrintfBuffer);
391    else if (Func.getParent()->getFunction("__ockl_hostcall_internal")) {
392      // The printf runtime binding pass should have ensured that hostcall and
393      // printf are not used in the same module.
394      assert(!Func.getParent()->getNamedMetadata("llvm.printf.fmts"));
395      emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenHostcallBuffer);
396    } else
397      emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone);
398  }
399
400  // Emit "default queue" and "completion action" arguments if enqueue kernel is
401  // used, otherwise emit dummy "none" arguments.
402  if (HiddenArgNumBytes >= 48) {
403    if (Func.hasFnAttribute("calls-enqueue-kernel")) {
404      emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenDefaultQueue);
405      emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenCompletionAction);
406    } else {
407      emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone);
408      emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone);
409    }
410  }
411
412  // Emit the pointer argument for multi-grid object.
413  if (HiddenArgNumBytes >= 56)
414    emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenMultiGridSyncArg);
415}
416
417bool MetadataStreamerV2::emitTo(AMDGPUTargetStreamer &TargetStreamer) {
418  return TargetStreamer.EmitHSAMetadata(getHSAMetadata());
419}
420
421void MetadataStreamerV2::begin(const Module &Mod) {
422  emitVersion();
423  emitPrintf(Mod);
424}
425
426void MetadataStreamerV2::end() {
427  std::string HSAMetadataString;
428  if (toString(HSAMetadata, HSAMetadataString))
429    return;
430
431  if (DumpHSAMetadata)
432    dump(HSAMetadataString);
433  if (VerifyHSAMetadata)
434    verify(HSAMetadataString);
435}
436
437void MetadataStreamerV2::emitKernel(const MachineFunction &MF,
438                                    const SIProgramInfo &ProgramInfo) {
439  auto &Func = MF.getFunction();
440  if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL)
441    return;
442
443  auto CodeProps = getHSACodeProps(MF, ProgramInfo);
444  auto DebugProps = getHSADebugProps(MF, ProgramInfo);
445
446  HSAMetadata.mKernels.push_back(Kernel::Metadata());
447  auto &Kernel = HSAMetadata.mKernels.back();
448
449  Kernel.mName = std::string(Func.getName());
450  Kernel.mSymbolName = (Twine(Func.getName()) + Twine("@kd")).str();
451  emitKernelLanguage(Func);
452  emitKernelAttrs(Func);
453  emitKernelArgs(Func);
454  HSAMetadata.mKernels.back().mCodeProps = CodeProps;
455  HSAMetadata.mKernels.back().mDebugProps = DebugProps;
456}
457
458//===----------------------------------------------------------------------===//
459// HSAMetadataStreamerV3
460//===----------------------------------------------------------------------===//
461
462void MetadataStreamerV3::dump(StringRef HSAMetadataString) const {
463  errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n';
464}
465
466void MetadataStreamerV3::verify(StringRef HSAMetadataString) const {
467  errs() << "AMDGPU HSA Metadata Parser Test: ";
468
469  msgpack::Document FromHSAMetadataString;
470
471  if (!FromHSAMetadataString.fromYAML(HSAMetadataString)) {
472    errs() << "FAIL\n";
473    return;
474  }
475
476  std::string ToHSAMetadataString;
477  raw_string_ostream StrOS(ToHSAMetadataString);
478  FromHSAMetadataString.toYAML(StrOS);
479
480  errs() << (HSAMetadataString == StrOS.str() ? "PASS" : "FAIL") << '\n';
481  if (HSAMetadataString != ToHSAMetadataString) {
482    errs() << "Original input: " << HSAMetadataString << '\n'
483           << "Produced output: " << StrOS.str() << '\n';
484  }
485}
486
487Optional<StringRef>
488MetadataStreamerV3::getAccessQualifier(StringRef AccQual) const {
489  return StringSwitch<Optional<StringRef>>(AccQual)
490      .Case("read_only", StringRef("read_only"))
491      .Case("write_only", StringRef("write_only"))
492      .Case("read_write", StringRef("read_write"))
493      .Default(None);
494}
495
496Optional<StringRef>
497MetadataStreamerV3::getAddressSpaceQualifier(unsigned AddressSpace) const {
498  switch (AddressSpace) {
499  case AMDGPUAS::PRIVATE_ADDRESS:
500    return StringRef("private");
501  case AMDGPUAS::GLOBAL_ADDRESS:
502    return StringRef("global");
503  case AMDGPUAS::CONSTANT_ADDRESS:
504    return StringRef("constant");
505  case AMDGPUAS::LOCAL_ADDRESS:
506    return StringRef("local");
507  case AMDGPUAS::FLAT_ADDRESS:
508    return StringRef("generic");
509  case AMDGPUAS::REGION_ADDRESS:
510    return StringRef("region");
511  default:
512    return None;
513  }
514}
515
516StringRef MetadataStreamerV3::getValueKind(Type *Ty, StringRef TypeQual,
517                                           StringRef BaseTypeName) const {
518  if (TypeQual.find("pipe") != StringRef::npos)
519    return "pipe";
520
521  return StringSwitch<StringRef>(BaseTypeName)
522      .Case("image1d_t", "image")
523      .Case("image1d_array_t", "image")
524      .Case("image1d_buffer_t", "image")
525      .Case("image2d_t", "image")
526      .Case("image2d_array_t", "image")
527      .Case("image2d_array_depth_t", "image")
528      .Case("image2d_array_msaa_t", "image")
529      .Case("image2d_array_msaa_depth_t", "image")
530      .Case("image2d_depth_t", "image")
531      .Case("image2d_msaa_t", "image")
532      .Case("image2d_msaa_depth_t", "image")
533      .Case("image3d_t", "image")
534      .Case("sampler_t", "sampler")
535      .Case("queue_t", "queue")
536      .Default(isa<PointerType>(Ty)
537                   ? (Ty->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS
538                          ? "dynamic_shared_pointer"
539                          : "global_buffer")
540                   : "by_value");
541}
542
543std::string MetadataStreamerV3::getTypeName(Type *Ty, bool Signed) const {
544  switch (Ty->getTypeID()) {
545  case Type::IntegerTyID: {
546    if (!Signed)
547      return (Twine('u') + getTypeName(Ty, true)).str();
548
549    auto BitWidth = Ty->getIntegerBitWidth();
550    switch (BitWidth) {
551    case 8:
552      return "char";
553    case 16:
554      return "short";
555    case 32:
556      return "int";
557    case 64:
558      return "long";
559    default:
560      return (Twine('i') + Twine(BitWidth)).str();
561    }
562  }
563  case Type::HalfTyID:
564    return "half";
565  case Type::FloatTyID:
566    return "float";
567  case Type::DoubleTyID:
568    return "double";
569  case Type::FixedVectorTyID: {
570    auto VecTy = cast<FixedVectorType>(Ty);
571    auto ElTy = VecTy->getElementType();
572    auto NumElements = VecTy->getNumElements();
573    return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str();
574  }
575  default:
576    return "unknown";
577  }
578}
579
580msgpack::ArrayDocNode
581MetadataStreamerV3::getWorkGroupDimensions(MDNode *Node) const {
582  auto Dims = HSAMetadataDoc->getArrayNode();
583  if (Node->getNumOperands() != 3)
584    return Dims;
585
586  for (auto &Op : Node->operands())
587    Dims.push_back(Dims.getDocument()->getNode(
588        uint64_t(mdconst::extract<ConstantInt>(Op)->getZExtValue())));
589  return Dims;
590}
591
592void MetadataStreamerV3::emitVersion() {
593  auto Version = HSAMetadataDoc->getArrayNode();
594  Version.push_back(Version.getDocument()->getNode(VersionMajor));
595  Version.push_back(Version.getDocument()->getNode(VersionMinor));
596  getRootMetadata("amdhsa.version") = Version;
597}
598
599void MetadataStreamerV3::emitPrintf(const Module &Mod) {
600  auto Node = Mod.getNamedMetadata("llvm.printf.fmts");
601  if (!Node)
602    return;
603
604  auto Printf = HSAMetadataDoc->getArrayNode();
605  for (auto Op : Node->operands())
606    if (Op->getNumOperands())
607      Printf.push_back(Printf.getDocument()->getNode(
608          cast<MDString>(Op->getOperand(0))->getString(), /*Copy=*/true));
609  getRootMetadata("amdhsa.printf") = Printf;
610}
611
612void MetadataStreamerV3::emitKernelLanguage(const Function &Func,
613                                            msgpack::MapDocNode Kern) {
614  // TODO: What about other languages?
615  auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version");
616  if (!Node || !Node->getNumOperands())
617    return;
618  auto Op0 = Node->getOperand(0);
619  if (Op0->getNumOperands() <= 1)
620    return;
621
622  Kern[".language"] = Kern.getDocument()->getNode("OpenCL C");
623  auto LanguageVersion = Kern.getDocument()->getArrayNode();
624  LanguageVersion.push_back(Kern.getDocument()->getNode(
625      mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue()));
626  LanguageVersion.push_back(Kern.getDocument()->getNode(
627      mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue()));
628  Kern[".language_version"] = LanguageVersion;
629}
630
631void MetadataStreamerV3::emitKernelAttrs(const Function &Func,
632                                         msgpack::MapDocNode Kern) {
633
634  if (auto Node = Func.getMetadata("reqd_work_group_size"))
635    Kern[".reqd_workgroup_size"] = getWorkGroupDimensions(Node);
636  if (auto Node = Func.getMetadata("work_group_size_hint"))
637    Kern[".workgroup_size_hint"] = getWorkGroupDimensions(Node);
638  if (auto Node = Func.getMetadata("vec_type_hint")) {
639    Kern[".vec_type_hint"] = Kern.getDocument()->getNode(
640        getTypeName(
641            cast<ValueAsMetadata>(Node->getOperand(0))->getType(),
642            mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue()),
643        /*Copy=*/true);
644  }
645  if (Func.hasFnAttribute("runtime-handle")) {
646    Kern[".device_enqueue_symbol"] = Kern.getDocument()->getNode(
647        Func.getFnAttribute("runtime-handle").getValueAsString().str(),
648        /*Copy=*/true);
649  }
650}
651
652void MetadataStreamerV3::emitKernelArgs(const Function &Func,
653                                        msgpack::MapDocNode Kern) {
654  unsigned Offset = 0;
655  auto Args = HSAMetadataDoc->getArrayNode();
656  for (auto &Arg : Func.args())
657    emitKernelArg(Arg, Offset, Args);
658
659  emitHiddenKernelArgs(Func, Offset, Args);
660
661  Kern[".args"] = Args;
662}
663
664void MetadataStreamerV3::emitKernelArg(const Argument &Arg, unsigned &Offset,
665                                       msgpack::ArrayDocNode Args) {
666  auto Func = Arg.getParent();
667  auto ArgNo = Arg.getArgNo();
668  const MDNode *Node;
669
670  StringRef Name;
671  Node = Func->getMetadata("kernel_arg_name");
672  if (Node && ArgNo < Node->getNumOperands())
673    Name = cast<MDString>(Node->getOperand(ArgNo))->getString();
674  else if (Arg.hasName())
675    Name = Arg.getName();
676
677  StringRef TypeName;
678  Node = Func->getMetadata("kernel_arg_type");
679  if (Node && ArgNo < Node->getNumOperands())
680    TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
681
682  StringRef BaseTypeName;
683  Node = Func->getMetadata("kernel_arg_base_type");
684  if (Node && ArgNo < Node->getNumOperands())
685    BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
686
687  StringRef AccQual;
688  if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() &&
689      Arg.hasNoAliasAttr()) {
690    AccQual = "read_only";
691  } else {
692    Node = Func->getMetadata("kernel_arg_access_qual");
693    if (Node && ArgNo < Node->getNumOperands())
694      AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
695  }
696
697  StringRef TypeQual;
698  Node = Func->getMetadata("kernel_arg_type_qual");
699  if (Node && ArgNo < Node->getNumOperands())
700    TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
701
702  Type *Ty = Arg.getType();
703  const DataLayout &DL = Func->getParent()->getDataLayout();
704
705  MaybeAlign PointeeAlign;
706  if (auto PtrTy = dyn_cast<PointerType>(Ty)) {
707    if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
708      PointeeAlign = DL.getValueOrABITypeAlignment(Arg.getParamAlign(),
709                                                   PtrTy->getElementType());
710    }
711  }
712
713  emitKernelArg(Func->getParent()->getDataLayout(), Arg.getType(),
714                getValueKind(Arg.getType(), TypeQual, BaseTypeName), Offset,
715                Args, PointeeAlign, Name, TypeName, BaseTypeName, AccQual,
716                TypeQual);
717}
718
719void MetadataStreamerV3::emitKernelArg(const DataLayout &DL, Type *Ty,
720                                       StringRef ValueKind, unsigned &Offset,
721                                       msgpack::ArrayDocNode Args,
722                                       MaybeAlign PointeeAlign, StringRef Name,
723                                       StringRef TypeName,
724                                       StringRef BaseTypeName,
725                                       StringRef AccQual, StringRef TypeQual) {
726  auto Arg = Args.getDocument()->getMapNode();
727
728  if (!Name.empty())
729    Arg[".name"] = Arg.getDocument()->getNode(Name, /*Copy=*/true);
730  if (!TypeName.empty())
731    Arg[".type_name"] = Arg.getDocument()->getNode(TypeName, /*Copy=*/true);
732  auto Size = DL.getTypeAllocSize(Ty);
733  Align Alignment = DL.getABITypeAlign(Ty);
734  Arg[".size"] = Arg.getDocument()->getNode(Size);
735  Offset = alignTo(Offset, Alignment);
736  Arg[".offset"] = Arg.getDocument()->getNode(Offset);
737  Offset += Size;
738  Arg[".value_kind"] = Arg.getDocument()->getNode(ValueKind, /*Copy=*/true);
739  if (PointeeAlign)
740    Arg[".pointee_align"] = Arg.getDocument()->getNode(PointeeAlign->value());
741
742  if (auto PtrTy = dyn_cast<PointerType>(Ty))
743    if (auto Qualifier = getAddressSpaceQualifier(PtrTy->getAddressSpace()))
744      Arg[".address_space"] = Arg.getDocument()->getNode(*Qualifier, /*Copy=*/true);
745
746  if (auto AQ = getAccessQualifier(AccQual))
747    Arg[".access"] = Arg.getDocument()->getNode(*AQ, /*Copy=*/true);
748
749  // TODO: Emit Arg[".actual_access"].
750
751  SmallVector<StringRef, 1> SplitTypeQuals;
752  TypeQual.split(SplitTypeQuals, " ", -1, false);
753  for (StringRef Key : SplitTypeQuals) {
754    if (Key == "const")
755      Arg[".is_const"] = Arg.getDocument()->getNode(true);
756    else if (Key == "restrict")
757      Arg[".is_restrict"] = Arg.getDocument()->getNode(true);
758    else if (Key == "volatile")
759      Arg[".is_volatile"] = Arg.getDocument()->getNode(true);
760    else if (Key == "pipe")
761      Arg[".is_pipe"] = Arg.getDocument()->getNode(true);
762  }
763
764  Args.push_back(Arg);
765}
766
767void MetadataStreamerV3::emitHiddenKernelArgs(const Function &Func,
768                                              unsigned &Offset,
769                                              msgpack::ArrayDocNode Args) {
770  int HiddenArgNumBytes =
771      getIntegerAttribute(Func, "amdgpu-implicitarg-num-bytes", 0);
772
773  if (!HiddenArgNumBytes)
774    return;
775
776  auto &DL = Func.getParent()->getDataLayout();
777  auto Int64Ty = Type::getInt64Ty(Func.getContext());
778
779  if (HiddenArgNumBytes >= 8)
780    emitKernelArg(DL, Int64Ty, "hidden_global_offset_x", Offset, Args);
781  if (HiddenArgNumBytes >= 16)
782    emitKernelArg(DL, Int64Ty, "hidden_global_offset_y", Offset, Args);
783  if (HiddenArgNumBytes >= 24)
784    emitKernelArg(DL, Int64Ty, "hidden_global_offset_z", Offset, Args);
785
786  auto Int8PtrTy =
787      Type::getInt8PtrTy(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS);
788
789  // Emit "printf buffer" argument if printf is used, otherwise emit dummy
790  // "none" argument.
791  if (HiddenArgNumBytes >= 32) {
792    if (Func.getParent()->getNamedMetadata("llvm.printf.fmts"))
793      emitKernelArg(DL, Int8PtrTy, "hidden_printf_buffer", Offset, Args);
794    else if (Func.getParent()->getFunction("__ockl_hostcall_internal")) {
795      // The printf runtime binding pass should have ensured that hostcall and
796      // printf are not used in the same module.
797      assert(!Func.getParent()->getNamedMetadata("llvm.printf.fmts"));
798      emitKernelArg(DL, Int8PtrTy, "hidden_hostcall_buffer", Offset, Args);
799    } else
800      emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args);
801  }
802
803  // Emit "default queue" and "completion action" arguments if enqueue kernel is
804  // used, otherwise emit dummy "none" arguments.
805  if (HiddenArgNumBytes >= 48) {
806    if (Func.hasFnAttribute("calls-enqueue-kernel")) {
807      emitKernelArg(DL, Int8PtrTy, "hidden_default_queue", Offset, Args);
808      emitKernelArg(DL, Int8PtrTy, "hidden_completion_action", Offset, Args);
809    } else {
810      emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args);
811      emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args);
812    }
813  }
814
815  // Emit the pointer argument for multi-grid object.
816  if (HiddenArgNumBytes >= 56)
817    emitKernelArg(DL, Int8PtrTy, "hidden_multigrid_sync_arg", Offset, Args);
818}
819
820msgpack::MapDocNode
821MetadataStreamerV3::getHSAKernelProps(const MachineFunction &MF,
822                                      const SIProgramInfo &ProgramInfo) const {
823  const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
824  const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
825  const Function &F = MF.getFunction();
826
827  auto Kern = HSAMetadataDoc->getMapNode();
828
829  Align MaxKernArgAlign;
830  Kern[".kernarg_segment_size"] = Kern.getDocument()->getNode(
831      STM.getKernArgSegmentSize(F, MaxKernArgAlign));
832  Kern[".group_segment_fixed_size"] =
833      Kern.getDocument()->getNode(ProgramInfo.LDSSize);
834  Kern[".private_segment_fixed_size"] =
835      Kern.getDocument()->getNode(ProgramInfo.ScratchSize);
836  Kern[".kernarg_segment_align"] =
837      Kern.getDocument()->getNode(std::max(Align(4), MaxKernArgAlign).value());
838  Kern[".wavefront_size"] =
839      Kern.getDocument()->getNode(STM.getWavefrontSize());
840  Kern[".sgpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumSGPR);
841  Kern[".vgpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumVGPR);
842  Kern[".max_flat_workgroup_size"] =
843      Kern.getDocument()->getNode(MFI.getMaxFlatWorkGroupSize());
844  Kern[".sgpr_spill_count"] =
845      Kern.getDocument()->getNode(MFI.getNumSpilledSGPRs());
846  Kern[".vgpr_spill_count"] =
847      Kern.getDocument()->getNode(MFI.getNumSpilledVGPRs());
848
849  return Kern;
850}
851
852bool MetadataStreamerV3::emitTo(AMDGPUTargetStreamer &TargetStreamer) {
853  return TargetStreamer.EmitHSAMetadata(*HSAMetadataDoc, true);
854}
855
856void MetadataStreamerV3::begin(const Module &Mod) {
857  emitVersion();
858  emitPrintf(Mod);
859  getRootMetadata("amdhsa.kernels") = HSAMetadataDoc->getArrayNode();
860}
861
862void MetadataStreamerV3::end() {
863  std::string HSAMetadataString;
864  raw_string_ostream StrOS(HSAMetadataString);
865  HSAMetadataDoc->toYAML(StrOS);
866
867  if (DumpHSAMetadata)
868    dump(StrOS.str());
869  if (VerifyHSAMetadata)
870    verify(StrOS.str());
871}
872
873void MetadataStreamerV3::emitKernel(const MachineFunction &MF,
874                                    const SIProgramInfo &ProgramInfo) {
875  auto &Func = MF.getFunction();
876  auto Kern = getHSAKernelProps(MF, ProgramInfo);
877
878  assert(Func.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
879         Func.getCallingConv() == CallingConv::SPIR_KERNEL);
880
881  auto Kernels =
882      getRootMetadata("amdhsa.kernels").getArray(/*Convert=*/true);
883
884  {
885    Kern[".name"] = Kern.getDocument()->getNode(Func.getName());
886    Kern[".symbol"] = Kern.getDocument()->getNode(
887        (Twine(Func.getName()) + Twine(".kd")).str(), /*Copy=*/true);
888    emitKernelLanguage(Func, Kern);
889    emitKernelAttrs(Func, Kern);
890    emitKernelArgs(Func, Kern);
891  }
892
893  Kernels.push_back(Kern);
894}
895
896} // end namespace HSAMD
897} // end namespace AMDGPU
898} // end namespace llvm
899