1/*===---- cuda_builtin_vars.h - CUDA built-in variables ---------------------=== 2 * 3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 * See https://llvm.org/LICENSE.txt for license information. 5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 * 7 *===-----------------------------------------------------------------------=== 8 */ 9 10#ifndef __CUDA_BUILTIN_VARS_H 11#define __CUDA_BUILTIN_VARS_H 12 13// Forward declares from vector_types.h. 14struct uint3; 15struct dim3; 16 17// The file implements built-in CUDA variables using __declspec(property). 18// https://msdn.microsoft.com/en-us/library/yhfk0thd.aspx 19// All read accesses of built-in variable fields get converted into calls to a 20// getter function which in turn calls the appropriate builtin to fetch the 21// value. 22// 23// Example: 24// int x = threadIdx.x; 25// IR output: 26// %0 = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() #3 27// PTX output: 28// mov.u32 %r2, %tid.x; 29 30#define __CUDA_DEVICE_BUILTIN(FIELD, INTRINSIC) \ 31 __declspec(property(get = __fetch_builtin_##FIELD)) unsigned int FIELD; \ 32 static inline __attribute__((always_inline)) \ 33 __attribute__((device)) unsigned int __fetch_builtin_##FIELD(void) { \ 34 return INTRINSIC; \ 35 } 36 37#if __cplusplus >= 201103L 38#define __DELETE =delete 39#else 40#define __DELETE 41#endif 42 43// Make sure nobody can create instances of the special variable types. nvcc 44// also disallows taking address of special variables, so we disable address-of 45// operator as well. 46#define __CUDA_DISALLOW_BUILTINVAR_ACCESS(TypeName) \ 47 __attribute__((device)) TypeName() __DELETE; \ 48 __attribute__((device)) TypeName(const TypeName &) __DELETE; \ 49 __attribute__((device)) void operator=(const TypeName &) const __DELETE; \ 50 __attribute__((device)) TypeName *operator&() const __DELETE 51 52struct __cuda_builtin_threadIdx_t { 53 __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_tid_x()); 54 __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_tid_y()); 55 __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_tid_z()); 56 // threadIdx should be convertible to uint3 (in fact in nvcc, it *is* a 57 // uint3). This function is defined after we pull in vector_types.h. 58 __attribute__((device)) operator uint3() const; 59private: 60 __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_threadIdx_t); 61}; 62 63struct __cuda_builtin_blockIdx_t { 64 __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_ctaid_x()); 65 __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_ctaid_y()); 66 __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_ctaid_z()); 67 // blockIdx should be convertible to uint3 (in fact in nvcc, it *is* a 68 // uint3). This function is defined after we pull in vector_types.h. 69 __attribute__((device)) operator uint3() const; 70private: 71 __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_blockIdx_t); 72}; 73 74struct __cuda_builtin_blockDim_t { 75 __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_ntid_x()); 76 __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_ntid_y()); 77 __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_ntid_z()); 78 // blockDim should be convertible to dim3 (in fact in nvcc, it *is* a 79 // dim3). This function is defined after we pull in vector_types.h. 80 __attribute__((device)) operator dim3() const; 81private: 82 __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_blockDim_t); 83}; 84 85struct __cuda_builtin_gridDim_t { 86 __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_nctaid_x()); 87 __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_nctaid_y()); 88 __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_nctaid_z()); 89 // gridDim should be convertible to dim3 (in fact in nvcc, it *is* a 90 // dim3). This function is defined after we pull in vector_types.h. 91 __attribute__((device)) operator dim3() const; 92private: 93 __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_gridDim_t); 94}; 95 96#define __CUDA_BUILTIN_VAR \ 97 extern const __attribute__((device)) __attribute__((weak)) 98__CUDA_BUILTIN_VAR __cuda_builtin_threadIdx_t threadIdx; 99__CUDA_BUILTIN_VAR __cuda_builtin_blockIdx_t blockIdx; 100__CUDA_BUILTIN_VAR __cuda_builtin_blockDim_t blockDim; 101__CUDA_BUILTIN_VAR __cuda_builtin_gridDim_t gridDim; 102 103// warpSize should translate to read of %WARP_SZ but there's currently no 104// builtin to do so. According to PTX v4.2 docs 'to date, all target 105// architectures have a WARP_SZ value of 32'. 106__attribute__((device)) const int warpSize = 32; 107 108#undef __CUDA_DEVICE_BUILTIN 109#undef __CUDA_BUILTIN_VAR 110#undef __CUDA_DISALLOW_BUILTINVAR_ACCESS 111 112#endif /* __CUDA_BUILTIN_VARS_H */ 113