1311118Sdim/*===---- cuda_builtin_vars.h - CUDA built-in variables ---------------------=== 2311118Sdim * 3353358Sdim * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4353358Sdim * See https://llvm.org/LICENSE.txt for license information. 5353358Sdim * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6311118Sdim * 7311118Sdim *===-----------------------------------------------------------------------=== 8311118Sdim */ 9311118Sdim 10311118Sdim#ifndef __CUDA_BUILTIN_VARS_H 11311118Sdim#define __CUDA_BUILTIN_VARS_H 12311118Sdim 13311118Sdim// Forward declares from vector_types.h. 14311118Sdimstruct uint3; 15311118Sdimstruct dim3; 16311118Sdim 17311118Sdim// The file implements built-in CUDA variables using __declspec(property). 18311118Sdim// https://msdn.microsoft.com/en-us/library/yhfk0thd.aspx 19311118Sdim// All read accesses of built-in variable fields get converted into calls to a 20311118Sdim// getter function which in turn calls the appropriate builtin to fetch the 21311118Sdim// value. 22311118Sdim// 23311118Sdim// Example: 24311118Sdim// int x = threadIdx.x; 25311118Sdim// IR output: 26311118Sdim// %0 = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() #3 27311118Sdim// PTX output: 28311118Sdim// mov.u32 %r2, %tid.x; 29311118Sdim 30311118Sdim#define __CUDA_DEVICE_BUILTIN(FIELD, INTRINSIC) \ 31311118Sdim __declspec(property(get = __fetch_builtin_##FIELD)) unsigned int FIELD; \ 32311118Sdim static inline __attribute__((always_inline)) \ 33311118Sdim __attribute__((device)) unsigned int __fetch_builtin_##FIELD(void) { \ 34311118Sdim return INTRINSIC; \ 35311118Sdim } 36311118Sdim 37311118Sdim#if __cplusplus >= 201103L 38311118Sdim#define __DELETE =delete 39311118Sdim#else 40311118Sdim#define __DELETE 41311118Sdim#endif 42311118Sdim 43341825Sdim// Make sure nobody can create instances of the special variable types. nvcc 44311118Sdim// also disallows taking address of special variables, so we disable address-of 45311118Sdim// operator as well. 46311118Sdim#define __CUDA_DISALLOW_BUILTINVAR_ACCESS(TypeName) \ 47311118Sdim __attribute__((device)) TypeName() __DELETE; \ 48311118Sdim __attribute__((device)) TypeName(const TypeName &) __DELETE; \ 49311118Sdim __attribute__((device)) void operator=(const TypeName &) const __DELETE; \ 50311118Sdim __attribute__((device)) TypeName *operator&() const __DELETE 51311118Sdim 52311118Sdimstruct __cuda_builtin_threadIdx_t { 53311118Sdim __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_tid_x()); 54311118Sdim __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_tid_y()); 55311118Sdim __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_tid_z()); 56311118Sdim // threadIdx should be convertible to uint3 (in fact in nvcc, it *is* a 57311118Sdim // uint3). This function is defined after we pull in vector_types.h. 58311118Sdim __attribute__((device)) operator uint3() const; 59311118Sdimprivate: 60311118Sdim __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_threadIdx_t); 61311118Sdim}; 62311118Sdim 63311118Sdimstruct __cuda_builtin_blockIdx_t { 64311118Sdim __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_ctaid_x()); 65311118Sdim __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_ctaid_y()); 66311118Sdim __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_ctaid_z()); 67311118Sdim // blockIdx should be convertible to uint3 (in fact in nvcc, it *is* a 68311118Sdim // uint3). This function is defined after we pull in vector_types.h. 69311118Sdim __attribute__((device)) operator uint3() const; 70311118Sdimprivate: 71311118Sdim __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_blockIdx_t); 72311118Sdim}; 73311118Sdim 74311118Sdimstruct __cuda_builtin_blockDim_t { 75311118Sdim __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_ntid_x()); 76311118Sdim __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_ntid_y()); 77311118Sdim __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_ntid_z()); 78311118Sdim // blockDim should be convertible to dim3 (in fact in nvcc, it *is* a 79311118Sdim // dim3). This function is defined after we pull in vector_types.h. 80311118Sdim __attribute__((device)) operator dim3() const; 81311118Sdimprivate: 82311118Sdim __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_blockDim_t); 83311118Sdim}; 84311118Sdim 85311118Sdimstruct __cuda_builtin_gridDim_t { 86311118Sdim __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_nctaid_x()); 87311118Sdim __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_nctaid_y()); 88311118Sdim __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_nctaid_z()); 89311118Sdim // gridDim should be convertible to dim3 (in fact in nvcc, it *is* a 90311118Sdim // dim3). This function is defined after we pull in vector_types.h. 91311118Sdim __attribute__((device)) operator dim3() const; 92311118Sdimprivate: 93311118Sdim __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_gridDim_t); 94311118Sdim}; 95311118Sdim 96311118Sdim#define __CUDA_BUILTIN_VAR \ 97311118Sdim extern const __attribute__((device)) __attribute__((weak)) 98311118Sdim__CUDA_BUILTIN_VAR __cuda_builtin_threadIdx_t threadIdx; 99311118Sdim__CUDA_BUILTIN_VAR __cuda_builtin_blockIdx_t blockIdx; 100311118Sdim__CUDA_BUILTIN_VAR __cuda_builtin_blockDim_t blockDim; 101311118Sdim__CUDA_BUILTIN_VAR __cuda_builtin_gridDim_t gridDim; 102311118Sdim 103311118Sdim// warpSize should translate to read of %WARP_SZ but there's currently no 104311118Sdim// builtin to do so. According to PTX v4.2 docs 'to date, all target 105311118Sdim// architectures have a WARP_SZ value of 32'. 106311118Sdim__attribute__((device)) const int warpSize = 32; 107311118Sdim 108311118Sdim#undef __CUDA_DEVICE_BUILTIN 109311118Sdim#undef __CUDA_BUILTIN_VAR 110311118Sdim#undef __CUDA_DISALLOW_BUILTINVAR_ACCESS 111311118Sdim 112311118Sdim#endif /* __CUDA_BUILTIN_VARS_H */ 113