1//===--- HIPAMD.cpp - HIP Tool and ToolChain Implementations ----*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "HIPAMD.h"
10#include "AMDGPU.h"
11#include "CommonArgs.h"
12#include "HIPUtility.h"
13#include "clang/Basic/Cuda.h"
14#include "clang/Basic/TargetID.h"
15#include "clang/Driver/Compilation.h"
16#include "clang/Driver/Driver.h"
17#include "clang/Driver/DriverDiagnostic.h"
18#include "clang/Driver/InputInfo.h"
19#include "clang/Driver/Options.h"
20#include "clang/Driver/SanitizerArgs.h"
21#include "llvm/Support/Alignment.h"
22#include "llvm/Support/FileSystem.h"
23#include "llvm/Support/Path.h"
24#include "llvm/Support/TargetParser.h"
25
26using namespace clang::driver;
27using namespace clang::driver::toolchains;
28using namespace clang::driver::tools;
29using namespace clang;
30using namespace llvm::opt;
31
32#if defined(_WIN32) || defined(_WIN64)
33#define NULL_FILE "nul"
34#else
35#define NULL_FILE "/dev/null"
36#endif
37
38static bool shouldSkipSanitizeOption(const ToolChain &TC,
39                                     const llvm::opt::ArgList &DriverArgs,
40                                     StringRef TargetID,
41                                     const llvm::opt::Arg *A) {
42  // For actions without targetID, do nothing.
43  if (TargetID.empty())
44    return false;
45  Option O = A->getOption();
46  if (!O.matches(options::OPT_fsanitize_EQ))
47    return false;
48
49  if (!DriverArgs.hasFlag(options::OPT_fgpu_sanitize,
50                          options::OPT_fno_gpu_sanitize, true))
51    return true;
52
53  auto &Diags = TC.getDriver().getDiags();
54
55  // For simplicity, we only allow -fsanitize=address
56  SanitizerMask K = parseSanitizerValue(A->getValue(), /*AllowGroups=*/false);
57  if (K != SanitizerKind::Address)
58    return true;
59
60  llvm::StringMap<bool> FeatureMap;
61  auto OptionalGpuArch = parseTargetID(TC.getTriple(), TargetID, &FeatureMap);
62
63  assert(OptionalGpuArch && "Invalid Target ID");
64  (void)OptionalGpuArch;
65  auto Loc = FeatureMap.find("xnack");
66  if (Loc == FeatureMap.end() || !Loc->second) {
67    Diags.Report(
68        clang::diag::warn_drv_unsupported_option_for_offload_arch_req_feature)
69        << A->getAsString(DriverArgs) << TargetID << "xnack+";
70    return true;
71  }
72  return false;
73}
74
75void AMDGCN::Linker::constructLlvmLinkCommand(Compilation &C,
76                                         const JobAction &JA,
77                                         const InputInfoList &Inputs,
78                                         const InputInfo &Output,
79                                         const llvm::opt::ArgList &Args) const {
80  // Construct llvm-link command.
81  // The output from llvm-link is a bitcode file.
82  ArgStringList LlvmLinkArgs;
83
84  assert(!Inputs.empty() && "Must have at least one input.");
85
86  LlvmLinkArgs.append({"-o", Output.getFilename()});
87  for (auto Input : Inputs)
88    LlvmLinkArgs.push_back(Input.getFilename());
89
90  // Look for archive of bundled bitcode in arguments, and add temporary files
91  // for the extracted archive of bitcode to inputs.
92  auto TargetID = Args.getLastArgValue(options::OPT_mcpu_EQ);
93  AddStaticDeviceLibsLinking(C, *this, JA, Inputs, Args, LlvmLinkArgs, "amdgcn",
94                             TargetID,
95                             /*IsBitCodeSDL=*/true,
96                             /*PostClangLink=*/false);
97
98  const char *LlvmLink =
99    Args.MakeArgString(getToolChain().GetProgramPath("llvm-link"));
100  C.addCommand(std::make_unique<Command>(JA, *this, ResponseFileSupport::None(),
101                                         LlvmLink, LlvmLinkArgs, Inputs,
102                                         Output));
103}
104
105void AMDGCN::Linker::constructLldCommand(Compilation &C, const JobAction &JA,
106                                         const InputInfoList &Inputs,
107                                         const InputInfo &Output,
108                                         const llvm::opt::ArgList &Args) const {
109  // Construct lld command.
110  // The output from ld.lld is an HSA code object file.
111  ArgStringList LldArgs{"-flavor",
112                        "gnu",
113                        "-m",
114                        "elf64_amdgpu",
115                        "--no-undefined",
116                        "-shared",
117                        "-plugin-opt=-amdgpu-internalize-symbols"};
118
119  auto &TC = getToolChain();
120  auto &D = TC.getDriver();
121  assert(!Inputs.empty() && "Must have at least one input.");
122  bool IsThinLTO = D.getLTOMode(/*IsOffload=*/true) == LTOK_Thin;
123  addLTOOptions(TC, Args, LldArgs, Output, Inputs[0], IsThinLTO);
124
125  // Extract all the -m options
126  std::vector<llvm::StringRef> Features;
127  amdgpu::getAMDGPUTargetFeatures(D, TC.getTriple(), Args, Features);
128
129  // Add features to mattr such as cumode
130  std::string MAttrString = "-plugin-opt=-mattr=";
131  for (auto OneFeature : unifyTargetFeatures(Features)) {
132    MAttrString.append(Args.MakeArgString(OneFeature));
133    if (OneFeature != Features.back())
134      MAttrString.append(",");
135  }
136  if (!Features.empty())
137    LldArgs.push_back(Args.MakeArgString(MAttrString));
138
139  // ToDo: Remove this option after AMDGPU backend supports ISA-level linking.
140  // Since AMDGPU backend currently does not support ISA-level linking, all
141  // called functions need to be imported.
142  if (IsThinLTO)
143    LldArgs.push_back(Args.MakeArgString("-plugin-opt=-force-import-all"));
144
145  for (const Arg *A : Args.filtered(options::OPT_mllvm)) {
146    LldArgs.push_back(
147        Args.MakeArgString(Twine("-plugin-opt=") + A->getValue(0)));
148  }
149
150  if (C.getDriver().isSaveTempsEnabled())
151    LldArgs.push_back("-save-temps");
152
153  addLinkerCompressDebugSectionsOption(TC, Args, LldArgs);
154
155  for (auto *Arg : Args.filtered(options::OPT_Xoffload_linker))
156    LldArgs.push_back(Arg->getValue(1));
157
158  LldArgs.append({"-o", Output.getFilename()});
159  for (auto Input : Inputs)
160    LldArgs.push_back(Input.getFilename());
161
162  // Look for archive of bundled bitcode in arguments, and add temporary files
163  // for the extracted archive of bitcode to inputs.
164  auto TargetID = Args.getLastArgValue(options::OPT_mcpu_EQ);
165  AddStaticDeviceLibsLinking(C, *this, JA, Inputs, Args, LldArgs, "amdgcn",
166                             TargetID,
167                             /*IsBitCodeSDL=*/true,
168                             /*PostClangLink=*/false);
169
170  const char *Lld = Args.MakeArgString(getToolChain().GetProgramPath("lld"));
171  C.addCommand(std::make_unique<Command>(JA, *this, ResponseFileSupport::None(),
172                                         Lld, LldArgs, Inputs, Output));
173}
174
175// For amdgcn the inputs of the linker job are device bitcode and output is
176// either an object file or bitcode (-emit-llvm). It calls llvm-link, opt,
177// llc, then lld steps.
178void AMDGCN::Linker::ConstructJob(Compilation &C, const JobAction &JA,
179                                  const InputInfo &Output,
180                                  const InputInfoList &Inputs,
181                                  const ArgList &Args,
182                                  const char *LinkingOutput) const {
183  if (Inputs.size() > 0 &&
184      Inputs[0].getType() == types::TY_Image &&
185      JA.getType() == types::TY_Object)
186    return HIP::constructGenerateObjFileFromHIPFatBinary(C, Output, Inputs,
187                                                         Args, JA, *this);
188
189  if (JA.getType() == types::TY_HIP_FATBIN)
190    return HIP::constructHIPFatbinCommand(C, JA, Output.getFilename(), Inputs,
191                                          Args, *this);
192
193  if (JA.getType() == types::TY_LLVM_BC)
194    return constructLlvmLinkCommand(C, JA, Inputs, Output, Args);
195
196  return constructLldCommand(C, JA, Inputs, Output, Args);
197}
198
199HIPAMDToolChain::HIPAMDToolChain(const Driver &D, const llvm::Triple &Triple,
200                                 const ToolChain &HostTC, const ArgList &Args)
201    : ROCMToolChain(D, Triple, Args), HostTC(HostTC) {
202  // Lookup binaries into the driver directory, this is used to
203  // discover the clang-offload-bundler executable.
204  getProgramPaths().push_back(getDriver().Dir);
205
206  // Diagnose unsupported sanitizer options only once.
207  if (!Args.hasFlag(options::OPT_fgpu_sanitize, options::OPT_fno_gpu_sanitize,
208                    true))
209    return;
210  for (auto *A : Args.filtered(options::OPT_fsanitize_EQ)) {
211    SanitizerMask K = parseSanitizerValue(A->getValue(), /*AllowGroups=*/false);
212    if (K != SanitizerKind::Address)
213      D.getDiags().Report(clang::diag::warn_drv_unsupported_option_for_target)
214          << A->getAsString(Args) << getTriple().str();
215  }
216}
217
218void HIPAMDToolChain::addClangTargetOptions(
219    const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args,
220    Action::OffloadKind DeviceOffloadingKind) const {
221  HostTC.addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadingKind);
222
223  assert(DeviceOffloadingKind == Action::OFK_HIP &&
224         "Only HIP offloading kinds are supported for GPUs.");
225
226  CC1Args.push_back("-fcuda-is-device");
227
228  if (DriverArgs.hasFlag(options::OPT_fcuda_approx_transcendentals,
229                         options::OPT_fno_cuda_approx_transcendentals, false))
230    CC1Args.push_back("-fcuda-approx-transcendentals");
231
232  if (!DriverArgs.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc,
233                          false))
234    CC1Args.append({"-mllvm", "-amdgpu-internalize-symbols"});
235
236  StringRef MaxThreadsPerBlock =
237      DriverArgs.getLastArgValue(options::OPT_gpu_max_threads_per_block_EQ);
238  if (!MaxThreadsPerBlock.empty()) {
239    std::string ArgStr =
240        (Twine("--gpu-max-threads-per-block=") + MaxThreadsPerBlock).str();
241    CC1Args.push_back(DriverArgs.MakeArgStringRef(ArgStr));
242  }
243
244  CC1Args.push_back("-fcuda-allow-variadic-functions");
245
246  // Default to "hidden" visibility, as object level linking will not be
247  // supported for the foreseeable future.
248  if (!DriverArgs.hasArg(options::OPT_fvisibility_EQ,
249                         options::OPT_fvisibility_ms_compat)) {
250    CC1Args.append({"-fvisibility=hidden"});
251    CC1Args.push_back("-fapply-global-visibility-to-externs");
252  }
253
254  for (auto BCFile : getDeviceLibs(DriverArgs)) {
255    CC1Args.push_back(BCFile.ShouldInternalize ? "-mlink-builtin-bitcode"
256                                               : "-mlink-bitcode-file");
257    CC1Args.push_back(DriverArgs.MakeArgString(BCFile.Path));
258  }
259}
260
261llvm::opt::DerivedArgList *
262HIPAMDToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
263                               StringRef BoundArch,
264                               Action::OffloadKind DeviceOffloadKind) const {
265  DerivedArgList *DAL =
266      HostTC.TranslateArgs(Args, BoundArch, DeviceOffloadKind);
267  if (!DAL)
268    DAL = new DerivedArgList(Args.getBaseArgs());
269
270  const OptTable &Opts = getDriver().getOpts();
271
272  for (Arg *A : Args) {
273    if (!shouldSkipArgument(A) &&
274        !shouldSkipSanitizeOption(*this, Args, BoundArch, A))
275      DAL->append(A);
276  }
277
278  if (!BoundArch.empty()) {
279    DAL->eraseArg(options::OPT_mcpu_EQ);
280    DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_mcpu_EQ), BoundArch);
281    checkTargetID(*DAL);
282  }
283
284  return DAL;
285}
286
287Tool *HIPAMDToolChain::buildLinker() const {
288  assert(getTriple().getArch() == llvm::Triple::amdgcn);
289  return new tools::AMDGCN::Linker(*this);
290}
291
292void HIPAMDToolChain::addClangWarningOptions(ArgStringList &CC1Args) const {
293  HostTC.addClangWarningOptions(CC1Args);
294}
295
296ToolChain::CXXStdlibType
297HIPAMDToolChain::GetCXXStdlibType(const ArgList &Args) const {
298  return HostTC.GetCXXStdlibType(Args);
299}
300
301void HIPAMDToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
302                                                ArgStringList &CC1Args) const {
303  HostTC.AddClangSystemIncludeArgs(DriverArgs, CC1Args);
304}
305
306void HIPAMDToolChain::AddClangCXXStdlibIncludeArgs(
307    const ArgList &Args, ArgStringList &CC1Args) const {
308  HostTC.AddClangCXXStdlibIncludeArgs(Args, CC1Args);
309}
310
311void HIPAMDToolChain::AddIAMCUIncludeArgs(const ArgList &Args,
312                                          ArgStringList &CC1Args) const {
313  HostTC.AddIAMCUIncludeArgs(Args, CC1Args);
314}
315
316void HIPAMDToolChain::AddHIPIncludeArgs(const ArgList &DriverArgs,
317                                        ArgStringList &CC1Args) const {
318  RocmInstallation.AddHIPIncludeArgs(DriverArgs, CC1Args);
319}
320
321SanitizerMask HIPAMDToolChain::getSupportedSanitizers() const {
322  // The HIPAMDToolChain only supports sanitizers in the sense that it allows
323  // sanitizer arguments on the command line if they are supported by the host
324  // toolchain. The HIPAMDToolChain will actually ignore any command line
325  // arguments for any of these "supported" sanitizers. That means that no
326  // sanitization of device code is actually supported at this time.
327  //
328  // This behavior is necessary because the host and device toolchains
329  // invocations often share the command line, so the device toolchain must
330  // tolerate flags meant only for the host toolchain.
331  return HostTC.getSupportedSanitizers();
332}
333
334VersionTuple HIPAMDToolChain::computeMSVCVersion(const Driver *D,
335                                                 const ArgList &Args) const {
336  return HostTC.computeMSVCVersion(D, Args);
337}
338
339llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12>
340HIPAMDToolChain::getDeviceLibs(const llvm::opt::ArgList &DriverArgs) const {
341  llvm::SmallVector<BitCodeLibraryInfo, 12> BCLibs;
342  if (DriverArgs.hasArg(options::OPT_nogpulib))
343    return {};
344  ArgStringList LibraryPaths;
345
346  // Find in --hip-device-lib-path and HIP_LIBRARY_PATH.
347  for (StringRef Path : RocmInstallation.getRocmDeviceLibPathArg())
348    LibraryPaths.push_back(DriverArgs.MakeArgString(Path));
349
350  addDirectoryList(DriverArgs, LibraryPaths, "", "HIP_DEVICE_LIB_PATH");
351
352  // Maintain compatability with --hip-device-lib.
353  auto BCLibArgs = DriverArgs.getAllArgValues(options::OPT_hip_device_lib_EQ);
354  if (!BCLibArgs.empty()) {
355    llvm::for_each(BCLibArgs, [&](StringRef BCName) {
356      StringRef FullName;
357      for (StringRef LibraryPath : LibraryPaths) {
358        SmallString<128> Path(LibraryPath);
359        llvm::sys::path::append(Path, BCName);
360        FullName = Path;
361        if (llvm::sys::fs::exists(FullName)) {
362          BCLibs.push_back(FullName);
363          return;
364        }
365      }
366      getDriver().Diag(diag::err_drv_no_such_file) << BCName;
367    });
368  } else {
369    if (!RocmInstallation.hasDeviceLibrary()) {
370      getDriver().Diag(diag::err_drv_no_rocm_device_lib) << 0;
371      return {};
372    }
373    StringRef GpuArch = getGPUArch(DriverArgs);
374    assert(!GpuArch.empty() && "Must have an explicit GPU arch.");
375
376    // If --hip-device-lib is not set, add the default bitcode libraries.
377    if (DriverArgs.hasFlag(options::OPT_fgpu_sanitize,
378                           options::OPT_fno_gpu_sanitize, true) &&
379        getSanitizerArgs(DriverArgs).needsAsanRt()) {
380      auto AsanRTL = RocmInstallation.getAsanRTLPath();
381      if (AsanRTL.empty()) {
382        unsigned DiagID = getDriver().getDiags().getCustomDiagID(
383            DiagnosticsEngine::Error,
384            "AMDGPU address sanitizer runtime library (asanrtl) is not found. "
385            "Please install ROCm device library which supports address "
386            "sanitizer");
387        getDriver().Diag(DiagID);
388        return {};
389      } else
390        BCLibs.emplace_back(AsanRTL, /*ShouldInternalize=*/false);
391    }
392
393    // Add the HIP specific bitcode library.
394    BCLibs.push_back(RocmInstallation.getHIPPath());
395
396    // Add common device libraries like ocml etc.
397    for (StringRef N : getCommonDeviceLibNames(DriverArgs, GpuArch.str()))
398      BCLibs.emplace_back(N);
399
400    // Add instrument lib.
401    auto InstLib =
402        DriverArgs.getLastArgValue(options::OPT_gpu_instrument_lib_EQ);
403    if (InstLib.empty())
404      return BCLibs;
405    if (llvm::sys::fs::exists(InstLib))
406      BCLibs.push_back(InstLib);
407    else
408      getDriver().Diag(diag::err_drv_no_such_file) << InstLib;
409  }
410
411  return BCLibs;
412}
413
414void HIPAMDToolChain::checkTargetID(
415    const llvm::opt::ArgList &DriverArgs) const {
416  auto PTID = getParsedTargetID(DriverArgs);
417  if (PTID.OptionalTargetID && !PTID.OptionalGPUArch) {
418    getDriver().Diag(clang::diag::err_drv_bad_target_id)
419        << *PTID.OptionalTargetID;
420  }
421}
422