1/*===---- cuda_builtin_vars.h - CUDA built-in variables ---------------------===
2 *
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 *
7 *===-----------------------------------------------------------------------===
8 */
9
10#ifndef __CUDA_BUILTIN_VARS_H
11#define __CUDA_BUILTIN_VARS_H
12
13// Forward declares from vector_types.h.
14struct uint3;
15struct dim3;
16
17// The file implements built-in CUDA variables using __declspec(property).
18// https://msdn.microsoft.com/en-us/library/yhfk0thd.aspx
19// All read accesses of built-in variable fields get converted into calls to a
20// getter function which in turn calls the appropriate builtin to fetch the
21// value.
22//
23// Example:
24//    int x = threadIdx.x;
25// IR output:
26//  %0 = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() #3
27// PTX output:
28//  mov.u32     %r2, %tid.x;
29
30#define __CUDA_DEVICE_BUILTIN(FIELD, INTRINSIC)                                \
31  __declspec(property(get = __fetch_builtin_##FIELD)) unsigned int FIELD;      \
32  static inline __attribute__((always_inline))                                 \
33      __attribute__((device)) unsigned int __fetch_builtin_##FIELD(void) {     \
34    return INTRINSIC;                                                          \
35  }
36
37#if __cplusplus >= 201103L
38#define __DELETE =delete
39#else
40#define __DELETE
41#endif
42
43// Make sure nobody can create instances of the special variable types.  nvcc
44// also disallows taking address of special variables, so we disable address-of
45// operator as well.
46#define __CUDA_DISALLOW_BUILTINVAR_ACCESS(TypeName)                            \
47  __attribute__((device)) TypeName() __DELETE;                                 \
48  __attribute__((device)) TypeName(const TypeName &) __DELETE;                 \
49  __attribute__((device)) void operator=(const TypeName &) const __DELETE;     \
50  __attribute__((device)) TypeName *operator&() const __DELETE
51
52struct __cuda_builtin_threadIdx_t {
53  __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_tid_x());
54  __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_tid_y());
55  __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_tid_z());
56  // threadIdx should be convertible to uint3 (in fact in nvcc, it *is* a
57  // uint3).  This function is defined after we pull in vector_types.h.
58  __attribute__((device)) operator uint3() const;
59private:
60  __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_threadIdx_t);
61};
62
63struct __cuda_builtin_blockIdx_t {
64  __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_ctaid_x());
65  __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_ctaid_y());
66  __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_ctaid_z());
67  // blockIdx should be convertible to uint3 (in fact in nvcc, it *is* a
68  // uint3).  This function is defined after we pull in vector_types.h.
69  __attribute__((device)) operator uint3() const;
70private:
71  __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_blockIdx_t);
72};
73
74struct __cuda_builtin_blockDim_t {
75  __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_ntid_x());
76  __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_ntid_y());
77  __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_ntid_z());
78  // blockDim should be convertible to dim3 (in fact in nvcc, it *is* a
79  // dim3).  This function is defined after we pull in vector_types.h.
80  __attribute__((device)) operator dim3() const;
81private:
82  __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_blockDim_t);
83};
84
85struct __cuda_builtin_gridDim_t {
86  __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_nctaid_x());
87  __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_nctaid_y());
88  __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_nctaid_z());
89  // gridDim should be convertible to dim3 (in fact in nvcc, it *is* a
90  // dim3).  This function is defined after we pull in vector_types.h.
91  __attribute__((device)) operator dim3() const;
92private:
93  __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_gridDim_t);
94};
95
96#define __CUDA_BUILTIN_VAR                                                     \
97  extern const __attribute__((device)) __attribute__((weak))
98__CUDA_BUILTIN_VAR __cuda_builtin_threadIdx_t threadIdx;
99__CUDA_BUILTIN_VAR __cuda_builtin_blockIdx_t blockIdx;
100__CUDA_BUILTIN_VAR __cuda_builtin_blockDim_t blockDim;
101__CUDA_BUILTIN_VAR __cuda_builtin_gridDim_t gridDim;
102
103// warpSize should translate to read of %WARP_SZ but there's currently no
104// builtin to do so. According to PTX v4.2 docs 'to date, all target
105// architectures have a WARP_SZ value of 32'.
106__attribute__((device)) const int warpSize = 32;
107
108#undef __CUDA_DEVICE_BUILTIN
109#undef __CUDA_BUILTIN_VAR
110#undef __CUDA_DISALLOW_BUILTINVAR_ACCESS
111
112#endif /* __CUDA_BUILTIN_VARS_H */
113