1311118Sdim/*===---- cuda_builtin_vars.h - CUDA built-in variables ---------------------===
2311118Sdim *
3353358Sdim * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4353358Sdim * See https://llvm.org/LICENSE.txt for license information.
5353358Sdim * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6311118Sdim *
7311118Sdim *===-----------------------------------------------------------------------===
8311118Sdim */
9311118Sdim
10311118Sdim#ifndef __CUDA_BUILTIN_VARS_H
11311118Sdim#define __CUDA_BUILTIN_VARS_H
12311118Sdim
13311118Sdim// Forward declares from vector_types.h.
14311118Sdimstruct uint3;
15311118Sdimstruct dim3;
16311118Sdim
17311118Sdim// The file implements built-in CUDA variables using __declspec(property).
18311118Sdim// https://msdn.microsoft.com/en-us/library/yhfk0thd.aspx
19311118Sdim// All read accesses of built-in variable fields get converted into calls to a
20311118Sdim// getter function which in turn calls the appropriate builtin to fetch the
21311118Sdim// value.
22311118Sdim//
23311118Sdim// Example:
24311118Sdim//    int x = threadIdx.x;
25311118Sdim// IR output:
26311118Sdim//  %0 = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() #3
27311118Sdim// PTX output:
28311118Sdim//  mov.u32     %r2, %tid.x;
29311118Sdim
30311118Sdim#define __CUDA_DEVICE_BUILTIN(FIELD, INTRINSIC)                                \
31311118Sdim  __declspec(property(get = __fetch_builtin_##FIELD)) unsigned int FIELD;      \
32311118Sdim  static inline __attribute__((always_inline))                                 \
33311118Sdim      __attribute__((device)) unsigned int __fetch_builtin_##FIELD(void) {     \
34311118Sdim    return INTRINSIC;                                                          \
35311118Sdim  }
36311118Sdim
37311118Sdim#if __cplusplus >= 201103L
38311118Sdim#define __DELETE =delete
39311118Sdim#else
40311118Sdim#define __DELETE
41311118Sdim#endif
42311118Sdim
43341825Sdim// Make sure nobody can create instances of the special variable types.  nvcc
44311118Sdim// also disallows taking address of special variables, so we disable address-of
45311118Sdim// operator as well.
46311118Sdim#define __CUDA_DISALLOW_BUILTINVAR_ACCESS(TypeName)                            \
47311118Sdim  __attribute__((device)) TypeName() __DELETE;                                 \
48311118Sdim  __attribute__((device)) TypeName(const TypeName &) __DELETE;                 \
49311118Sdim  __attribute__((device)) void operator=(const TypeName &) const __DELETE;     \
50311118Sdim  __attribute__((device)) TypeName *operator&() const __DELETE
51311118Sdim
52311118Sdimstruct __cuda_builtin_threadIdx_t {
53311118Sdim  __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_tid_x());
54311118Sdim  __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_tid_y());
55311118Sdim  __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_tid_z());
56311118Sdim  // threadIdx should be convertible to uint3 (in fact in nvcc, it *is* a
57311118Sdim  // uint3).  This function is defined after we pull in vector_types.h.
58311118Sdim  __attribute__((device)) operator uint3() const;
59311118Sdimprivate:
60311118Sdim  __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_threadIdx_t);
61311118Sdim};
62311118Sdim
63311118Sdimstruct __cuda_builtin_blockIdx_t {
64311118Sdim  __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_ctaid_x());
65311118Sdim  __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_ctaid_y());
66311118Sdim  __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_ctaid_z());
67311118Sdim  // blockIdx should be convertible to uint3 (in fact in nvcc, it *is* a
68311118Sdim  // uint3).  This function is defined after we pull in vector_types.h.
69311118Sdim  __attribute__((device)) operator uint3() const;
70311118Sdimprivate:
71311118Sdim  __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_blockIdx_t);
72311118Sdim};
73311118Sdim
74311118Sdimstruct __cuda_builtin_blockDim_t {
75311118Sdim  __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_ntid_x());
76311118Sdim  __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_ntid_y());
77311118Sdim  __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_ntid_z());
78311118Sdim  // blockDim should be convertible to dim3 (in fact in nvcc, it *is* a
79311118Sdim  // dim3).  This function is defined after we pull in vector_types.h.
80311118Sdim  __attribute__((device)) operator dim3() const;
81311118Sdimprivate:
82311118Sdim  __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_blockDim_t);
83311118Sdim};
84311118Sdim
85311118Sdimstruct __cuda_builtin_gridDim_t {
86311118Sdim  __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_nctaid_x());
87311118Sdim  __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_nctaid_y());
88311118Sdim  __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_nctaid_z());
89311118Sdim  // gridDim should be convertible to dim3 (in fact in nvcc, it *is* a
90311118Sdim  // dim3).  This function is defined after we pull in vector_types.h.
91311118Sdim  __attribute__((device)) operator dim3() const;
92311118Sdimprivate:
93311118Sdim  __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_gridDim_t);
94311118Sdim};
95311118Sdim
96311118Sdim#define __CUDA_BUILTIN_VAR                                                     \
97311118Sdim  extern const __attribute__((device)) __attribute__((weak))
98311118Sdim__CUDA_BUILTIN_VAR __cuda_builtin_threadIdx_t threadIdx;
99311118Sdim__CUDA_BUILTIN_VAR __cuda_builtin_blockIdx_t blockIdx;
100311118Sdim__CUDA_BUILTIN_VAR __cuda_builtin_blockDim_t blockDim;
101311118Sdim__CUDA_BUILTIN_VAR __cuda_builtin_gridDim_t gridDim;
102311118Sdim
103311118Sdim// warpSize should translate to read of %WARP_SZ but there's currently no
104311118Sdim// builtin to do so. According to PTX v4.2 docs 'to date, all target
105311118Sdim// architectures have a WARP_SZ value of 32'.
106311118Sdim__attribute__((device)) const int warpSize = 32;
107311118Sdim
108311118Sdim#undef __CUDA_DEVICE_BUILTIN
109311118Sdim#undef __CUDA_BUILTIN_VAR
110311118Sdim#undef __CUDA_DISALLOW_BUILTINVAR_ACCESS
111311118Sdim
112311118Sdim#endif /* __CUDA_BUILTIN_VARS_H */
113