1336815Sdim/*===---- __clang_cuda_device_functions.h - CUDA runtime support -----------=== 2336815Sdim * 3353358Sdim * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4353358Sdim * See https://llvm.org/LICENSE.txt for license information. 5353358Sdim * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6336815Sdim * 7336815Sdim *===-----------------------------------------------------------------------=== 8336815Sdim */ 9336815Sdim 10336815Sdim#ifndef __CLANG_CUDA_DEVICE_FUNCTIONS_H__ 11336815Sdim#define __CLANG_CUDA_DEVICE_FUNCTIONS_H__ 12336815Sdim 13353358Sdim#ifndef _OPENMP 14336815Sdim#if CUDA_VERSION < 9000 15336815Sdim#error This file is intended to be used with CUDA-9+ only. 16336815Sdim#endif 17353358Sdim#endif 18336815Sdim 19336815Sdim// __DEVICE__ is a helper macro with common set of attributes for the wrappers 20336815Sdim// we implement in this file. We need static in order to avoid emitting unused 21336815Sdim// functions and __forceinline__ helps inlining these wrappers at -O1. 22336815Sdim#pragma push_macro("__DEVICE__") 23353358Sdim#ifdef _OPENMP 24353358Sdim#define __DEVICE__ static __attribute__((always_inline)) 25353358Sdim#else 26336815Sdim#define __DEVICE__ static __device__ __forceinline__ 27353358Sdim#endif 28336815Sdim 29336815Sdim// libdevice provides fast low precision and slow full-recision implementations 30336815Sdim// for some functions. Which one gets selected depends on 31336815Sdim// __CLANG_CUDA_APPROX_TRANSCENDENTALS__ which gets defined by clang if 32336815Sdim// -ffast-math or -fcuda-approx-transcendentals are in effect. 33336815Sdim#pragma push_macro("__FAST_OR_SLOW") 34336815Sdim#if defined(__CLANG_CUDA_APPROX_TRANSCENDENTALS__) 35336815Sdim#define __FAST_OR_SLOW(fast, slow) fast 36336815Sdim#else 37336815Sdim#define __FAST_OR_SLOW(fast, slow) slow 38336815Sdim#endif 39336815Sdim 40353358Sdim// For C++ 17 we need to include noexcept attribute to be compatible 41353358Sdim// with the header-defined version. This may be removed once 42353358Sdim// variant is supported. 43353358Sdim#if defined(_OPENMP) && defined(__cplusplus) && __cplusplus >= 201703L 44353358Sdim#define __NOEXCEPT noexcept 45353358Sdim#else 46353358Sdim#define __NOEXCEPT 47353358Sdim#endif 48353358Sdim 49336815Sdim__DEVICE__ int __all(int __a) { return __nvvm_vote_all(__a); } 50336815Sdim__DEVICE__ int __any(int __a) { return __nvvm_vote_any(__a); } 51336815Sdim__DEVICE__ unsigned int __ballot(int __a) { return __nvvm_vote_ballot(__a); } 52336815Sdim__DEVICE__ unsigned int __brev(unsigned int __a) { return __nv_brev(__a); } 53336815Sdim__DEVICE__ unsigned long long __brevll(unsigned long long __a) { 54336815Sdim return __nv_brevll(__a); 55336815Sdim} 56353358Sdim#if defined(__cplusplus) 57336815Sdim__DEVICE__ void __brkpt() { asm volatile("brkpt;"); } 58336815Sdim__DEVICE__ void __brkpt(int __a) { __brkpt(); } 59353358Sdim#else 60353358Sdim__DEVICE__ void __attribute__((overloadable)) __brkpt(void) { asm volatile("brkpt;"); } 61353358Sdim__DEVICE__ void __attribute__((overloadable)) __brkpt(int __a) { __brkpt(); } 62353358Sdim#endif 63336815Sdim__DEVICE__ unsigned int __byte_perm(unsigned int __a, unsigned int __b, 64336815Sdim unsigned int __c) { 65336815Sdim return __nv_byte_perm(__a, __b, __c); 66336815Sdim} 67336815Sdim__DEVICE__ int __clz(int __a) { return __nv_clz(__a); } 68336815Sdim__DEVICE__ int __clzll(long long __a) { return __nv_clzll(__a); } 69336815Sdim__DEVICE__ float __cosf(float __a) { return __nv_fast_cosf(__a); } 70336815Sdim__DEVICE__ double __dAtomicAdd(double *__p, double __v) { 71336815Sdim return __nvvm_atom_add_gen_d(__p, __v); 72336815Sdim} 73336815Sdim__DEVICE__ double __dAtomicAdd_block(double *__p, double __v) { 74336815Sdim return __nvvm_atom_cta_add_gen_d(__p, __v); 75336815Sdim} 76336815Sdim__DEVICE__ double __dAtomicAdd_system(double *__p, double __v) { 77336815Sdim return __nvvm_atom_sys_add_gen_d(__p, __v); 78336815Sdim} 79336815Sdim__DEVICE__ double __dadd_rd(double __a, double __b) { 80336815Sdim return __nv_dadd_rd(__a, __b); 81336815Sdim} 82336815Sdim__DEVICE__ double __dadd_rn(double __a, double __b) { 83336815Sdim return __nv_dadd_rn(__a, __b); 84336815Sdim} 85336815Sdim__DEVICE__ double __dadd_ru(double __a, double __b) { 86336815Sdim return __nv_dadd_ru(__a, __b); 87336815Sdim} 88336815Sdim__DEVICE__ double __dadd_rz(double __a, double __b) { 89336815Sdim return __nv_dadd_rz(__a, __b); 90336815Sdim} 91336815Sdim__DEVICE__ double __ddiv_rd(double __a, double __b) { 92336815Sdim return __nv_ddiv_rd(__a, __b); 93336815Sdim} 94336815Sdim__DEVICE__ double __ddiv_rn(double __a, double __b) { 95336815Sdim return __nv_ddiv_rn(__a, __b); 96336815Sdim} 97336815Sdim__DEVICE__ double __ddiv_ru(double __a, double __b) { 98336815Sdim return __nv_ddiv_ru(__a, __b); 99336815Sdim} 100336815Sdim__DEVICE__ double __ddiv_rz(double __a, double __b) { 101336815Sdim return __nv_ddiv_rz(__a, __b); 102336815Sdim} 103336815Sdim__DEVICE__ double __dmul_rd(double __a, double __b) { 104336815Sdim return __nv_dmul_rd(__a, __b); 105336815Sdim} 106336815Sdim__DEVICE__ double __dmul_rn(double __a, double __b) { 107336815Sdim return __nv_dmul_rn(__a, __b); 108336815Sdim} 109336815Sdim__DEVICE__ double __dmul_ru(double __a, double __b) { 110336815Sdim return __nv_dmul_ru(__a, __b); 111336815Sdim} 112336815Sdim__DEVICE__ double __dmul_rz(double __a, double __b) { 113336815Sdim return __nv_dmul_rz(__a, __b); 114336815Sdim} 115336815Sdim__DEVICE__ float __double2float_rd(double __a) { 116336815Sdim return __nv_double2float_rd(__a); 117336815Sdim} 118336815Sdim__DEVICE__ float __double2float_rn(double __a) { 119336815Sdim return __nv_double2float_rn(__a); 120336815Sdim} 121336815Sdim__DEVICE__ float __double2float_ru(double __a) { 122336815Sdim return __nv_double2float_ru(__a); 123336815Sdim} 124336815Sdim__DEVICE__ float __double2float_rz(double __a) { 125336815Sdim return __nv_double2float_rz(__a); 126336815Sdim} 127336815Sdim__DEVICE__ int __double2hiint(double __a) { return __nv_double2hiint(__a); } 128336815Sdim__DEVICE__ int __double2int_rd(double __a) { return __nv_double2int_rd(__a); } 129336815Sdim__DEVICE__ int __double2int_rn(double __a) { return __nv_double2int_rn(__a); } 130336815Sdim__DEVICE__ int __double2int_ru(double __a) { return __nv_double2int_ru(__a); } 131336815Sdim__DEVICE__ int __double2int_rz(double __a) { return __nv_double2int_rz(__a); } 132336815Sdim__DEVICE__ long long __double2ll_rd(double __a) { 133336815Sdim return __nv_double2ll_rd(__a); 134336815Sdim} 135336815Sdim__DEVICE__ long long __double2ll_rn(double __a) { 136336815Sdim return __nv_double2ll_rn(__a); 137336815Sdim} 138336815Sdim__DEVICE__ long long __double2ll_ru(double __a) { 139336815Sdim return __nv_double2ll_ru(__a); 140336815Sdim} 141336815Sdim__DEVICE__ long long __double2ll_rz(double __a) { 142336815Sdim return __nv_double2ll_rz(__a); 143336815Sdim} 144336815Sdim__DEVICE__ int __double2loint(double __a) { return __nv_double2loint(__a); } 145336815Sdim__DEVICE__ unsigned int __double2uint_rd(double __a) { 146336815Sdim return __nv_double2uint_rd(__a); 147336815Sdim} 148336815Sdim__DEVICE__ unsigned int __double2uint_rn(double __a) { 149336815Sdim return __nv_double2uint_rn(__a); 150336815Sdim} 151336815Sdim__DEVICE__ unsigned int __double2uint_ru(double __a) { 152336815Sdim return __nv_double2uint_ru(__a); 153336815Sdim} 154336815Sdim__DEVICE__ unsigned int __double2uint_rz(double __a) { 155336815Sdim return __nv_double2uint_rz(__a); 156336815Sdim} 157336815Sdim__DEVICE__ unsigned long long __double2ull_rd(double __a) { 158336815Sdim return __nv_double2ull_rd(__a); 159336815Sdim} 160336815Sdim__DEVICE__ unsigned long long __double2ull_rn(double __a) { 161336815Sdim return __nv_double2ull_rn(__a); 162336815Sdim} 163336815Sdim__DEVICE__ unsigned long long __double2ull_ru(double __a) { 164336815Sdim return __nv_double2ull_ru(__a); 165336815Sdim} 166336815Sdim__DEVICE__ unsigned long long __double2ull_rz(double __a) { 167336815Sdim return __nv_double2ull_rz(__a); 168336815Sdim} 169336815Sdim__DEVICE__ long long __double_as_longlong(double __a) { 170336815Sdim return __nv_double_as_longlong(__a); 171336815Sdim} 172336815Sdim__DEVICE__ double __drcp_rd(double __a) { return __nv_drcp_rd(__a); } 173336815Sdim__DEVICE__ double __drcp_rn(double __a) { return __nv_drcp_rn(__a); } 174336815Sdim__DEVICE__ double __drcp_ru(double __a) { return __nv_drcp_ru(__a); } 175336815Sdim__DEVICE__ double __drcp_rz(double __a) { return __nv_drcp_rz(__a); } 176336815Sdim__DEVICE__ double __dsqrt_rd(double __a) { return __nv_dsqrt_rd(__a); } 177336815Sdim__DEVICE__ double __dsqrt_rn(double __a) { return __nv_dsqrt_rn(__a); } 178336815Sdim__DEVICE__ double __dsqrt_ru(double __a) { return __nv_dsqrt_ru(__a); } 179336815Sdim__DEVICE__ double __dsqrt_rz(double __a) { return __nv_dsqrt_rz(__a); } 180336815Sdim__DEVICE__ double __dsub_rd(double __a, double __b) { 181336815Sdim return __nv_dsub_rd(__a, __b); 182336815Sdim} 183336815Sdim__DEVICE__ double __dsub_rn(double __a, double __b) { 184336815Sdim return __nv_dsub_rn(__a, __b); 185336815Sdim} 186336815Sdim__DEVICE__ double __dsub_ru(double __a, double __b) { 187336815Sdim return __nv_dsub_ru(__a, __b); 188336815Sdim} 189336815Sdim__DEVICE__ double __dsub_rz(double __a, double __b) { 190336815Sdim return __nv_dsub_rz(__a, __b); 191336815Sdim} 192336815Sdim__DEVICE__ float __exp10f(float __a) { return __nv_fast_exp10f(__a); } 193336815Sdim__DEVICE__ float __expf(float __a) { return __nv_fast_expf(__a); } 194336815Sdim__DEVICE__ float __fAtomicAdd(float *__p, float __v) { 195336815Sdim return __nvvm_atom_add_gen_f(__p, __v); 196336815Sdim} 197336815Sdim__DEVICE__ float __fAtomicAdd_block(float *__p, float __v) { 198336815Sdim return __nvvm_atom_cta_add_gen_f(__p, __v); 199336815Sdim} 200336815Sdim__DEVICE__ float __fAtomicAdd_system(float *__p, float __v) { 201336815Sdim return __nvvm_atom_sys_add_gen_f(__p, __v); 202336815Sdim} 203336815Sdim__DEVICE__ float __fAtomicExch(float *__p, float __v) { 204336815Sdim return __nv_int_as_float( 205336815Sdim __nvvm_atom_xchg_gen_i((int *)__p, __nv_float_as_int(__v))); 206336815Sdim} 207336815Sdim__DEVICE__ float __fAtomicExch_block(float *__p, float __v) { 208336815Sdim return __nv_int_as_float( 209336815Sdim __nvvm_atom_cta_xchg_gen_i((int *)__p, __nv_float_as_int(__v))); 210336815Sdim} 211336815Sdim__DEVICE__ float __fAtomicExch_system(float *__p, float __v) { 212336815Sdim return __nv_int_as_float( 213336815Sdim __nvvm_atom_sys_xchg_gen_i((int *)__p, __nv_float_as_int(__v))); 214336815Sdim} 215336815Sdim__DEVICE__ float __fadd_rd(float __a, float __b) { 216336815Sdim return __nv_fadd_rd(__a, __b); 217336815Sdim} 218336815Sdim__DEVICE__ float __fadd_rn(float __a, float __b) { 219336815Sdim return __nv_fadd_rn(__a, __b); 220336815Sdim} 221336815Sdim__DEVICE__ float __fadd_ru(float __a, float __b) { 222336815Sdim return __nv_fadd_ru(__a, __b); 223336815Sdim} 224336815Sdim__DEVICE__ float __fadd_rz(float __a, float __b) { 225336815Sdim return __nv_fadd_rz(__a, __b); 226336815Sdim} 227336815Sdim__DEVICE__ float __fdiv_rd(float __a, float __b) { 228336815Sdim return __nv_fdiv_rd(__a, __b); 229336815Sdim} 230336815Sdim__DEVICE__ float __fdiv_rn(float __a, float __b) { 231336815Sdim return __nv_fdiv_rn(__a, __b); 232336815Sdim} 233336815Sdim__DEVICE__ float __fdiv_ru(float __a, float __b) { 234336815Sdim return __nv_fdiv_ru(__a, __b); 235336815Sdim} 236336815Sdim__DEVICE__ float __fdiv_rz(float __a, float __b) { 237336815Sdim return __nv_fdiv_rz(__a, __b); 238336815Sdim} 239336815Sdim__DEVICE__ float __fdividef(float __a, float __b) { 240336815Sdim return __nv_fast_fdividef(__a, __b); 241336815Sdim} 242336815Sdim__DEVICE__ int __ffs(int __a) { return __nv_ffs(__a); } 243336815Sdim__DEVICE__ int __ffsll(long long __a) { return __nv_ffsll(__a); } 244336815Sdim__DEVICE__ int __finite(double __a) { return __nv_isfinited(__a); } 245336815Sdim__DEVICE__ int __finitef(float __a) { return __nv_finitef(__a); } 246353358Sdim#ifdef _MSC_VER 247353358Sdim__DEVICE__ int __finitel(long double __a); 248353358Sdim#endif 249336815Sdim__DEVICE__ int __float2int_rd(float __a) { return __nv_float2int_rd(__a); } 250336815Sdim__DEVICE__ int __float2int_rn(float __a) { return __nv_float2int_rn(__a); } 251336815Sdim__DEVICE__ int __float2int_ru(float __a) { return __nv_float2int_ru(__a); } 252336815Sdim__DEVICE__ int __float2int_rz(float __a) { return __nv_float2int_rz(__a); } 253336815Sdim__DEVICE__ long long __float2ll_rd(float __a) { return __nv_float2ll_rd(__a); } 254336815Sdim__DEVICE__ long long __float2ll_rn(float __a) { return __nv_float2ll_rn(__a); } 255336815Sdim__DEVICE__ long long __float2ll_ru(float __a) { return __nv_float2ll_ru(__a); } 256336815Sdim__DEVICE__ long long __float2ll_rz(float __a) { return __nv_float2ll_rz(__a); } 257336815Sdim__DEVICE__ unsigned int __float2uint_rd(float __a) { 258336815Sdim return __nv_float2uint_rd(__a); 259336815Sdim} 260336815Sdim__DEVICE__ unsigned int __float2uint_rn(float __a) { 261336815Sdim return __nv_float2uint_rn(__a); 262336815Sdim} 263336815Sdim__DEVICE__ unsigned int __float2uint_ru(float __a) { 264336815Sdim return __nv_float2uint_ru(__a); 265336815Sdim} 266336815Sdim__DEVICE__ unsigned int __float2uint_rz(float __a) { 267336815Sdim return __nv_float2uint_rz(__a); 268336815Sdim} 269336815Sdim__DEVICE__ unsigned long long __float2ull_rd(float __a) { 270336815Sdim return __nv_float2ull_rd(__a); 271336815Sdim} 272336815Sdim__DEVICE__ unsigned long long __float2ull_rn(float __a) { 273336815Sdim return __nv_float2ull_rn(__a); 274336815Sdim} 275336815Sdim__DEVICE__ unsigned long long __float2ull_ru(float __a) { 276336815Sdim return __nv_float2ull_ru(__a); 277336815Sdim} 278336815Sdim__DEVICE__ unsigned long long __float2ull_rz(float __a) { 279336815Sdim return __nv_float2ull_rz(__a); 280336815Sdim} 281336815Sdim__DEVICE__ int __float_as_int(float __a) { return __nv_float_as_int(__a); } 282336815Sdim__DEVICE__ unsigned int __float_as_uint(float __a) { 283336815Sdim return __nv_float_as_uint(__a); 284336815Sdim} 285336815Sdim__DEVICE__ double __fma_rd(double __a, double __b, double __c) { 286336815Sdim return __nv_fma_rd(__a, __b, __c); 287336815Sdim} 288336815Sdim__DEVICE__ double __fma_rn(double __a, double __b, double __c) { 289336815Sdim return __nv_fma_rn(__a, __b, __c); 290336815Sdim} 291336815Sdim__DEVICE__ double __fma_ru(double __a, double __b, double __c) { 292336815Sdim return __nv_fma_ru(__a, __b, __c); 293336815Sdim} 294336815Sdim__DEVICE__ double __fma_rz(double __a, double __b, double __c) { 295336815Sdim return __nv_fma_rz(__a, __b, __c); 296336815Sdim} 297336815Sdim__DEVICE__ float __fmaf_ieee_rd(float __a, float __b, float __c) { 298336815Sdim return __nv_fmaf_ieee_rd(__a, __b, __c); 299336815Sdim} 300336815Sdim__DEVICE__ float __fmaf_ieee_rn(float __a, float __b, float __c) { 301336815Sdim return __nv_fmaf_ieee_rn(__a, __b, __c); 302336815Sdim} 303336815Sdim__DEVICE__ float __fmaf_ieee_ru(float __a, float __b, float __c) { 304336815Sdim return __nv_fmaf_ieee_ru(__a, __b, __c); 305336815Sdim} 306336815Sdim__DEVICE__ float __fmaf_ieee_rz(float __a, float __b, float __c) { 307336815Sdim return __nv_fmaf_ieee_rz(__a, __b, __c); 308336815Sdim} 309336815Sdim__DEVICE__ float __fmaf_rd(float __a, float __b, float __c) { 310336815Sdim return __nv_fmaf_rd(__a, __b, __c); 311336815Sdim} 312336815Sdim__DEVICE__ float __fmaf_rn(float __a, float __b, float __c) { 313336815Sdim return __nv_fmaf_rn(__a, __b, __c); 314336815Sdim} 315336815Sdim__DEVICE__ float __fmaf_ru(float __a, float __b, float __c) { 316336815Sdim return __nv_fmaf_ru(__a, __b, __c); 317336815Sdim} 318336815Sdim__DEVICE__ float __fmaf_rz(float __a, float __b, float __c) { 319336815Sdim return __nv_fmaf_rz(__a, __b, __c); 320336815Sdim} 321336815Sdim__DEVICE__ float __fmul_rd(float __a, float __b) { 322336815Sdim return __nv_fmul_rd(__a, __b); 323336815Sdim} 324336815Sdim__DEVICE__ float __fmul_rn(float __a, float __b) { 325336815Sdim return __nv_fmul_rn(__a, __b); 326336815Sdim} 327336815Sdim__DEVICE__ float __fmul_ru(float __a, float __b) { 328336815Sdim return __nv_fmul_ru(__a, __b); 329336815Sdim} 330336815Sdim__DEVICE__ float __fmul_rz(float __a, float __b) { 331336815Sdim return __nv_fmul_rz(__a, __b); 332336815Sdim} 333336815Sdim__DEVICE__ float __frcp_rd(float __a) { return __nv_frcp_rd(__a); } 334336815Sdim__DEVICE__ float __frcp_rn(float __a) { return __nv_frcp_rn(__a); } 335336815Sdim__DEVICE__ float __frcp_ru(float __a) { return __nv_frcp_ru(__a); } 336336815Sdim__DEVICE__ float __frcp_rz(float __a) { return __nv_frcp_rz(__a); } 337336815Sdim__DEVICE__ float __frsqrt_rn(float __a) { return __nv_frsqrt_rn(__a); } 338336815Sdim__DEVICE__ float __fsqrt_rd(float __a) { return __nv_fsqrt_rd(__a); } 339336815Sdim__DEVICE__ float __fsqrt_rn(float __a) { return __nv_fsqrt_rn(__a); } 340336815Sdim__DEVICE__ float __fsqrt_ru(float __a) { return __nv_fsqrt_ru(__a); } 341336815Sdim__DEVICE__ float __fsqrt_rz(float __a) { return __nv_fsqrt_rz(__a); } 342336815Sdim__DEVICE__ float __fsub_rd(float __a, float __b) { 343336815Sdim return __nv_fsub_rd(__a, __b); 344336815Sdim} 345336815Sdim__DEVICE__ float __fsub_rn(float __a, float __b) { 346336815Sdim return __nv_fsub_rn(__a, __b); 347336815Sdim} 348336815Sdim__DEVICE__ float __fsub_ru(float __a, float __b) { 349336815Sdim return __nv_fsub_ru(__a, __b); 350336815Sdim} 351336815Sdim__DEVICE__ float __fsub_rz(float __a, float __b) { 352336815Sdim return __nv_fsub_rz(__a, __b); 353336815Sdim} 354336815Sdim__DEVICE__ int __hadd(int __a, int __b) { return __nv_hadd(__a, __b); } 355336815Sdim__DEVICE__ double __hiloint2double(int __a, int __b) { 356336815Sdim return __nv_hiloint2double(__a, __b); 357336815Sdim} 358336815Sdim__DEVICE__ int __iAtomicAdd(int *__p, int __v) { 359336815Sdim return __nvvm_atom_add_gen_i(__p, __v); 360336815Sdim} 361336815Sdim__DEVICE__ int __iAtomicAdd_block(int *__p, int __v) { 362336815Sdim __nvvm_atom_cta_add_gen_i(__p, __v); 363336815Sdim} 364336815Sdim__DEVICE__ int __iAtomicAdd_system(int *__p, int __v) { 365336815Sdim __nvvm_atom_sys_add_gen_i(__p, __v); 366336815Sdim} 367336815Sdim__DEVICE__ int __iAtomicAnd(int *__p, int __v) { 368336815Sdim return __nvvm_atom_and_gen_i(__p, __v); 369336815Sdim} 370336815Sdim__DEVICE__ int __iAtomicAnd_block(int *__p, int __v) { 371336815Sdim return __nvvm_atom_cta_and_gen_i(__p, __v); 372336815Sdim} 373336815Sdim__DEVICE__ int __iAtomicAnd_system(int *__p, int __v) { 374336815Sdim return __nvvm_atom_sys_and_gen_i(__p, __v); 375336815Sdim} 376336815Sdim__DEVICE__ int __iAtomicCAS(int *__p, int __cmp, int __v) { 377336815Sdim return __nvvm_atom_cas_gen_i(__p, __cmp, __v); 378336815Sdim} 379336815Sdim__DEVICE__ int __iAtomicCAS_block(int *__p, int __cmp, int __v) { 380336815Sdim return __nvvm_atom_cta_cas_gen_i(__p, __cmp, __v); 381336815Sdim} 382336815Sdim__DEVICE__ int __iAtomicCAS_system(int *__p, int __cmp, int __v) { 383336815Sdim return __nvvm_atom_sys_cas_gen_i(__p, __cmp, __v); 384336815Sdim} 385336815Sdim__DEVICE__ int __iAtomicExch(int *__p, int __v) { 386336815Sdim return __nvvm_atom_xchg_gen_i(__p, __v); 387336815Sdim} 388336815Sdim__DEVICE__ int __iAtomicExch_block(int *__p, int __v) { 389336815Sdim return __nvvm_atom_cta_xchg_gen_i(__p, __v); 390336815Sdim} 391336815Sdim__DEVICE__ int __iAtomicExch_system(int *__p, int __v) { 392336815Sdim return __nvvm_atom_sys_xchg_gen_i(__p, __v); 393336815Sdim} 394336815Sdim__DEVICE__ int __iAtomicMax(int *__p, int __v) { 395336815Sdim return __nvvm_atom_max_gen_i(__p, __v); 396336815Sdim} 397336815Sdim__DEVICE__ int __iAtomicMax_block(int *__p, int __v) { 398336815Sdim return __nvvm_atom_cta_max_gen_i(__p, __v); 399336815Sdim} 400336815Sdim__DEVICE__ int __iAtomicMax_system(int *__p, int __v) { 401336815Sdim return __nvvm_atom_sys_max_gen_i(__p, __v); 402336815Sdim} 403336815Sdim__DEVICE__ int __iAtomicMin(int *__p, int __v) { 404336815Sdim return __nvvm_atom_min_gen_i(__p, __v); 405336815Sdim} 406336815Sdim__DEVICE__ int __iAtomicMin_block(int *__p, int __v) { 407336815Sdim return __nvvm_atom_cta_min_gen_i(__p, __v); 408336815Sdim} 409336815Sdim__DEVICE__ int __iAtomicMin_system(int *__p, int __v) { 410336815Sdim return __nvvm_atom_sys_min_gen_i(__p, __v); 411336815Sdim} 412336815Sdim__DEVICE__ int __iAtomicOr(int *__p, int __v) { 413336815Sdim return __nvvm_atom_or_gen_i(__p, __v); 414336815Sdim} 415336815Sdim__DEVICE__ int __iAtomicOr_block(int *__p, int __v) { 416336815Sdim return __nvvm_atom_cta_or_gen_i(__p, __v); 417336815Sdim} 418336815Sdim__DEVICE__ int __iAtomicOr_system(int *__p, int __v) { 419336815Sdim return __nvvm_atom_sys_or_gen_i(__p, __v); 420336815Sdim} 421336815Sdim__DEVICE__ int __iAtomicXor(int *__p, int __v) { 422336815Sdim return __nvvm_atom_xor_gen_i(__p, __v); 423336815Sdim} 424336815Sdim__DEVICE__ int __iAtomicXor_block(int *__p, int __v) { 425336815Sdim return __nvvm_atom_cta_xor_gen_i(__p, __v); 426336815Sdim} 427336815Sdim__DEVICE__ int __iAtomicXor_system(int *__p, int __v) { 428336815Sdim return __nvvm_atom_sys_xor_gen_i(__p, __v); 429336815Sdim} 430336815Sdim__DEVICE__ long long __illAtomicMax(long long *__p, long long __v) { 431336815Sdim return __nvvm_atom_max_gen_ll(__p, __v); 432336815Sdim} 433336815Sdim__DEVICE__ long long __illAtomicMax_block(long long *__p, long long __v) { 434336815Sdim return __nvvm_atom_cta_max_gen_ll(__p, __v); 435336815Sdim} 436336815Sdim__DEVICE__ long long __illAtomicMax_system(long long *__p, long long __v) { 437336815Sdim return __nvvm_atom_sys_max_gen_ll(__p, __v); 438336815Sdim} 439336815Sdim__DEVICE__ long long __illAtomicMin(long long *__p, long long __v) { 440336815Sdim return __nvvm_atom_min_gen_ll(__p, __v); 441336815Sdim} 442336815Sdim__DEVICE__ long long __illAtomicMin_block(long long *__p, long long __v) { 443336815Sdim return __nvvm_atom_cta_min_gen_ll(__p, __v); 444336815Sdim} 445336815Sdim__DEVICE__ long long __illAtomicMin_system(long long *__p, long long __v) { 446336815Sdim return __nvvm_atom_sys_min_gen_ll(__p, __v); 447336815Sdim} 448336815Sdim__DEVICE__ double __int2double_rn(int __a) { return __nv_int2double_rn(__a); } 449336815Sdim__DEVICE__ float __int2float_rd(int __a) { return __nv_int2float_rd(__a); } 450336815Sdim__DEVICE__ float __int2float_rn(int __a) { return __nv_int2float_rn(__a); } 451336815Sdim__DEVICE__ float __int2float_ru(int __a) { return __nv_int2float_ru(__a); } 452336815Sdim__DEVICE__ float __int2float_rz(int __a) { return __nv_int2float_rz(__a); } 453336815Sdim__DEVICE__ float __int_as_float(int __a) { return __nv_int_as_float(__a); } 454336815Sdim__DEVICE__ int __isfinited(double __a) { return __nv_isfinited(__a); } 455336815Sdim__DEVICE__ int __isinf(double __a) { return __nv_isinfd(__a); } 456336815Sdim__DEVICE__ int __isinff(float __a) { return __nv_isinff(__a); } 457353358Sdim#ifdef _MSC_VER 458353358Sdim__DEVICE__ int __isinfl(long double __a); 459353358Sdim#endif 460336815Sdim__DEVICE__ int __isnan(double __a) { return __nv_isnand(__a); } 461336815Sdim__DEVICE__ int __isnanf(float __a) { return __nv_isnanf(__a); } 462353358Sdim#ifdef _MSC_VER 463353358Sdim__DEVICE__ int __isnanl(long double __a); 464353358Sdim#endif 465336815Sdim__DEVICE__ double __ll2double_rd(long long __a) { 466336815Sdim return __nv_ll2double_rd(__a); 467336815Sdim} 468336815Sdim__DEVICE__ double __ll2double_rn(long long __a) { 469336815Sdim return __nv_ll2double_rn(__a); 470336815Sdim} 471336815Sdim__DEVICE__ double __ll2double_ru(long long __a) { 472336815Sdim return __nv_ll2double_ru(__a); 473336815Sdim} 474336815Sdim__DEVICE__ double __ll2double_rz(long long __a) { 475336815Sdim return __nv_ll2double_rz(__a); 476336815Sdim} 477336815Sdim__DEVICE__ float __ll2float_rd(long long __a) { return __nv_ll2float_rd(__a); } 478336815Sdim__DEVICE__ float __ll2float_rn(long long __a) { return __nv_ll2float_rn(__a); } 479336815Sdim__DEVICE__ float __ll2float_ru(long long __a) { return __nv_ll2float_ru(__a); } 480336815Sdim__DEVICE__ float __ll2float_rz(long long __a) { return __nv_ll2float_rz(__a); } 481336815Sdim__DEVICE__ long long __llAtomicAnd(long long *__p, long long __v) { 482336815Sdim return __nvvm_atom_and_gen_ll(__p, __v); 483336815Sdim} 484336815Sdim__DEVICE__ long long __llAtomicAnd_block(long long *__p, long long __v) { 485336815Sdim return __nvvm_atom_cta_and_gen_ll(__p, __v); 486336815Sdim} 487336815Sdim__DEVICE__ long long __llAtomicAnd_system(long long *__p, long long __v) { 488336815Sdim return __nvvm_atom_sys_and_gen_ll(__p, __v); 489336815Sdim} 490336815Sdim__DEVICE__ long long __llAtomicOr(long long *__p, long long __v) { 491336815Sdim return __nvvm_atom_or_gen_ll(__p, __v); 492336815Sdim} 493336815Sdim__DEVICE__ long long __llAtomicOr_block(long long *__p, long long __v) { 494336815Sdim return __nvvm_atom_cta_or_gen_ll(__p, __v); 495336815Sdim} 496336815Sdim__DEVICE__ long long __llAtomicOr_system(long long *__p, long long __v) { 497336815Sdim return __nvvm_atom_sys_or_gen_ll(__p, __v); 498336815Sdim} 499336815Sdim__DEVICE__ long long __llAtomicXor(long long *__p, long long __v) { 500336815Sdim return __nvvm_atom_xor_gen_ll(__p, __v); 501336815Sdim} 502336815Sdim__DEVICE__ long long __llAtomicXor_block(long long *__p, long long __v) { 503336815Sdim return __nvvm_atom_cta_xor_gen_ll(__p, __v); 504336815Sdim} 505336815Sdim__DEVICE__ long long __llAtomicXor_system(long long *__p, long long __v) { 506336815Sdim return __nvvm_atom_sys_xor_gen_ll(__p, __v); 507336815Sdim} 508336815Sdim__DEVICE__ float __log10f(float __a) { return __nv_fast_log10f(__a); } 509336815Sdim__DEVICE__ float __log2f(float __a) { return __nv_fast_log2f(__a); } 510336815Sdim__DEVICE__ float __logf(float __a) { return __nv_fast_logf(__a); } 511336815Sdim__DEVICE__ double __longlong_as_double(long long __a) { 512336815Sdim return __nv_longlong_as_double(__a); 513336815Sdim} 514336815Sdim__DEVICE__ int __mul24(int __a, int __b) { return __nv_mul24(__a, __b); } 515336815Sdim__DEVICE__ long long __mul64hi(long long __a, long long __b) { 516336815Sdim return __nv_mul64hi(__a, __b); 517336815Sdim} 518336815Sdim__DEVICE__ int __mulhi(int __a, int __b) { return __nv_mulhi(__a, __b); } 519336815Sdim__DEVICE__ unsigned int __pm0(void) { return __nvvm_read_ptx_sreg_pm0(); } 520336815Sdim__DEVICE__ unsigned int __pm1(void) { return __nvvm_read_ptx_sreg_pm1(); } 521336815Sdim__DEVICE__ unsigned int __pm2(void) { return __nvvm_read_ptx_sreg_pm2(); } 522336815Sdim__DEVICE__ unsigned int __pm3(void) { return __nvvm_read_ptx_sreg_pm3(); } 523336815Sdim__DEVICE__ int __popc(int __a) { return __nv_popc(__a); } 524336815Sdim__DEVICE__ int __popcll(long long __a) { return __nv_popcll(__a); } 525336815Sdim__DEVICE__ float __powf(float __a, float __b) { 526336815Sdim return __nv_fast_powf(__a, __b); 527336815Sdim} 528336815Sdim 529336815Sdim// Parameter must have a known integer value. 530336815Sdim#define __prof_trigger(__a) asm __volatile__("pmevent \t%0;" ::"i"(__a)) 531336815Sdim__DEVICE__ int __rhadd(int __a, int __b) { return __nv_rhadd(__a, __b); } 532336815Sdim__DEVICE__ unsigned int __sad(int __a, int __b, unsigned int __c) { 533336815Sdim return __nv_sad(__a, __b, __c); 534336815Sdim} 535336815Sdim__DEVICE__ float __saturatef(float __a) { return __nv_saturatef(__a); } 536336815Sdim__DEVICE__ int __signbitd(double __a) { return __nv_signbitd(__a); } 537336815Sdim__DEVICE__ int __signbitf(float __a) { return __nv_signbitf(__a); } 538353358Sdim__DEVICE__ void __sincosf(float __a, float *__s, float *__c) { 539353358Sdim return __nv_fast_sincosf(__a, __s, __c); 540336815Sdim} 541336815Sdim__DEVICE__ float __sinf(float __a) { return __nv_fast_sinf(__a); } 542336815Sdim__DEVICE__ int __syncthreads_and(int __a) { return __nvvm_bar0_and(__a); } 543336815Sdim__DEVICE__ int __syncthreads_count(int __a) { return __nvvm_bar0_popc(__a); } 544336815Sdim__DEVICE__ int __syncthreads_or(int __a) { return __nvvm_bar0_or(__a); } 545336815Sdim__DEVICE__ float __tanf(float __a) { return __nv_fast_tanf(__a); } 546336815Sdim__DEVICE__ void __threadfence(void) { __nvvm_membar_gl(); } 547336815Sdim__DEVICE__ void __threadfence_block(void) { __nvvm_membar_cta(); }; 548336815Sdim__DEVICE__ void __threadfence_system(void) { __nvvm_membar_sys(); }; 549336815Sdim__DEVICE__ void __trap(void) { asm volatile("trap;"); } 550336815Sdim__DEVICE__ unsigned int __uAtomicAdd(unsigned int *__p, unsigned int __v) { 551336815Sdim return __nvvm_atom_add_gen_i((int *)__p, __v); 552336815Sdim} 553336815Sdim__DEVICE__ unsigned int __uAtomicAdd_block(unsigned int *__p, 554336815Sdim unsigned int __v) { 555336815Sdim return __nvvm_atom_cta_add_gen_i((int *)__p, __v); 556336815Sdim} 557336815Sdim__DEVICE__ unsigned int __uAtomicAdd_system(unsigned int *__p, 558336815Sdim unsigned int __v) { 559336815Sdim return __nvvm_atom_sys_add_gen_i((int *)__p, __v); 560336815Sdim} 561336815Sdim__DEVICE__ unsigned int __uAtomicAnd(unsigned int *__p, unsigned int __v) { 562336815Sdim return __nvvm_atom_and_gen_i((int *)__p, __v); 563336815Sdim} 564336815Sdim__DEVICE__ unsigned int __uAtomicAnd_block(unsigned int *__p, 565336815Sdim unsigned int __v) { 566336815Sdim return __nvvm_atom_cta_and_gen_i((int *)__p, __v); 567336815Sdim} 568336815Sdim__DEVICE__ unsigned int __uAtomicAnd_system(unsigned int *__p, 569336815Sdim unsigned int __v) { 570336815Sdim return __nvvm_atom_sys_and_gen_i((int *)__p, __v); 571336815Sdim} 572336815Sdim__DEVICE__ unsigned int __uAtomicCAS(unsigned int *__p, unsigned int __cmp, 573336815Sdim unsigned int __v) { 574336815Sdim return __nvvm_atom_cas_gen_i((int *)__p, __cmp, __v); 575336815Sdim} 576336815Sdim__DEVICE__ unsigned int 577336815Sdim__uAtomicCAS_block(unsigned int *__p, unsigned int __cmp, unsigned int __v) { 578336815Sdim return __nvvm_atom_cta_cas_gen_i((int *)__p, __cmp, __v); 579336815Sdim} 580336815Sdim__DEVICE__ unsigned int 581336815Sdim__uAtomicCAS_system(unsigned int *__p, unsigned int __cmp, unsigned int __v) { 582336815Sdim return __nvvm_atom_sys_cas_gen_i((int *)__p, __cmp, __v); 583336815Sdim} 584336815Sdim__DEVICE__ unsigned int __uAtomicDec(unsigned int *__p, unsigned int __v) { 585336815Sdim return __nvvm_atom_dec_gen_ui(__p, __v); 586336815Sdim} 587336815Sdim__DEVICE__ unsigned int __uAtomicDec_block(unsigned int *__p, 588336815Sdim unsigned int __v) { 589336815Sdim return __nvvm_atom_cta_dec_gen_ui(__p, __v); 590336815Sdim} 591336815Sdim__DEVICE__ unsigned int __uAtomicDec_system(unsigned int *__p, 592336815Sdim unsigned int __v) { 593336815Sdim return __nvvm_atom_sys_dec_gen_ui(__p, __v); 594336815Sdim} 595336815Sdim__DEVICE__ unsigned int __uAtomicExch(unsigned int *__p, unsigned int __v) { 596336815Sdim return __nvvm_atom_xchg_gen_i((int *)__p, __v); 597336815Sdim} 598336815Sdim__DEVICE__ unsigned int __uAtomicExch_block(unsigned int *__p, 599336815Sdim unsigned int __v) { 600336815Sdim return __nvvm_atom_cta_xchg_gen_i((int *)__p, __v); 601336815Sdim} 602336815Sdim__DEVICE__ unsigned int __uAtomicExch_system(unsigned int *__p, 603336815Sdim unsigned int __v) { 604336815Sdim return __nvvm_atom_sys_xchg_gen_i((int *)__p, __v); 605336815Sdim} 606336815Sdim__DEVICE__ unsigned int __uAtomicInc(unsigned int *__p, unsigned int __v) { 607336815Sdim return __nvvm_atom_inc_gen_ui(__p, __v); 608336815Sdim} 609336815Sdim__DEVICE__ unsigned int __uAtomicInc_block(unsigned int *__p, 610336815Sdim unsigned int __v) { 611336815Sdim return __nvvm_atom_cta_inc_gen_ui(__p, __v); 612336815Sdim} 613336815Sdim__DEVICE__ unsigned int __uAtomicInc_system(unsigned int *__p, 614336815Sdim unsigned int __v) { 615336815Sdim return __nvvm_atom_sys_inc_gen_ui(__p, __v); 616336815Sdim} 617336815Sdim__DEVICE__ unsigned int __uAtomicMax(unsigned int *__p, unsigned int __v) { 618336815Sdim return __nvvm_atom_max_gen_ui(__p, __v); 619336815Sdim} 620336815Sdim__DEVICE__ unsigned int __uAtomicMax_block(unsigned int *__p, 621336815Sdim unsigned int __v) { 622336815Sdim return __nvvm_atom_cta_max_gen_ui(__p, __v); 623336815Sdim} 624336815Sdim__DEVICE__ unsigned int __uAtomicMax_system(unsigned int *__p, 625336815Sdim unsigned int __v) { 626336815Sdim return __nvvm_atom_sys_max_gen_ui(__p, __v); 627336815Sdim} 628336815Sdim__DEVICE__ unsigned int __uAtomicMin(unsigned int *__p, unsigned int __v) { 629336815Sdim return __nvvm_atom_min_gen_ui(__p, __v); 630336815Sdim} 631336815Sdim__DEVICE__ unsigned int __uAtomicMin_block(unsigned int *__p, 632336815Sdim unsigned int __v) { 633336815Sdim return __nvvm_atom_cta_min_gen_ui(__p, __v); 634336815Sdim} 635336815Sdim__DEVICE__ unsigned int __uAtomicMin_system(unsigned int *__p, 636336815Sdim unsigned int __v) { 637336815Sdim return __nvvm_atom_sys_min_gen_ui(__p, __v); 638336815Sdim} 639336815Sdim__DEVICE__ unsigned int __uAtomicOr(unsigned int *__p, unsigned int __v) { 640336815Sdim return __nvvm_atom_or_gen_i((int *)__p, __v); 641336815Sdim} 642336815Sdim__DEVICE__ unsigned int __uAtomicOr_block(unsigned int *__p, unsigned int __v) { 643336815Sdim return __nvvm_atom_cta_or_gen_i((int *)__p, __v); 644336815Sdim} 645336815Sdim__DEVICE__ unsigned int __uAtomicOr_system(unsigned int *__p, 646336815Sdim unsigned int __v) { 647336815Sdim return __nvvm_atom_sys_or_gen_i((int *)__p, __v); 648336815Sdim} 649336815Sdim__DEVICE__ unsigned int __uAtomicXor(unsigned int *__p, unsigned int __v) { 650336815Sdim return __nvvm_atom_xor_gen_i((int *)__p, __v); 651336815Sdim} 652336815Sdim__DEVICE__ unsigned int __uAtomicXor_block(unsigned int *__p, 653336815Sdim unsigned int __v) { 654336815Sdim return __nvvm_atom_cta_xor_gen_i((int *)__p, __v); 655336815Sdim} 656336815Sdim__DEVICE__ unsigned int __uAtomicXor_system(unsigned int *__p, 657336815Sdim unsigned int __v) { 658336815Sdim return __nvvm_atom_sys_xor_gen_i((int *)__p, __v); 659336815Sdim} 660336815Sdim__DEVICE__ unsigned int __uhadd(unsigned int __a, unsigned int __b) { 661336815Sdim return __nv_uhadd(__a, __b); 662336815Sdim} 663336815Sdim__DEVICE__ double __uint2double_rn(unsigned int __a) { 664336815Sdim return __nv_uint2double_rn(__a); 665336815Sdim} 666336815Sdim__DEVICE__ float __uint2float_rd(unsigned int __a) { 667336815Sdim return __nv_uint2float_rd(__a); 668336815Sdim} 669336815Sdim__DEVICE__ float __uint2float_rn(unsigned int __a) { 670336815Sdim return __nv_uint2float_rn(__a); 671336815Sdim} 672336815Sdim__DEVICE__ float __uint2float_ru(unsigned int __a) { 673336815Sdim return __nv_uint2float_ru(__a); 674336815Sdim} 675336815Sdim__DEVICE__ float __uint2float_rz(unsigned int __a) { 676336815Sdim return __nv_uint2float_rz(__a); 677336815Sdim} 678336815Sdim__DEVICE__ float __uint_as_float(unsigned int __a) { 679336815Sdim return __nv_uint_as_float(__a); 680336815Sdim} // 681336815Sdim__DEVICE__ double __ull2double_rd(unsigned long long __a) { 682336815Sdim return __nv_ull2double_rd(__a); 683336815Sdim} 684336815Sdim__DEVICE__ double __ull2double_rn(unsigned long long __a) { 685336815Sdim return __nv_ull2double_rn(__a); 686336815Sdim} 687336815Sdim__DEVICE__ double __ull2double_ru(unsigned long long __a) { 688336815Sdim return __nv_ull2double_ru(__a); 689336815Sdim} 690336815Sdim__DEVICE__ double __ull2double_rz(unsigned long long __a) { 691336815Sdim return __nv_ull2double_rz(__a); 692336815Sdim} 693336815Sdim__DEVICE__ float __ull2float_rd(unsigned long long __a) { 694336815Sdim return __nv_ull2float_rd(__a); 695336815Sdim} 696336815Sdim__DEVICE__ float __ull2float_rn(unsigned long long __a) { 697336815Sdim return __nv_ull2float_rn(__a); 698336815Sdim} 699336815Sdim__DEVICE__ float __ull2float_ru(unsigned long long __a) { 700336815Sdim return __nv_ull2float_ru(__a); 701336815Sdim} 702336815Sdim__DEVICE__ float __ull2float_rz(unsigned long long __a) { 703336815Sdim return __nv_ull2float_rz(__a); 704336815Sdim} 705336815Sdim__DEVICE__ unsigned long long __ullAtomicAdd(unsigned long long *__p, 706336815Sdim unsigned long long __v) { 707336815Sdim return __nvvm_atom_add_gen_ll((long long *)__p, __v); 708336815Sdim} 709336815Sdim__DEVICE__ unsigned long long __ullAtomicAdd_block(unsigned long long *__p, 710336815Sdim unsigned long long __v) { 711336815Sdim return __nvvm_atom_cta_add_gen_ll((long long *)__p, __v); 712336815Sdim} 713336815Sdim__DEVICE__ unsigned long long __ullAtomicAdd_system(unsigned long long *__p, 714336815Sdim unsigned long long __v) { 715336815Sdim return __nvvm_atom_sys_add_gen_ll((long long *)__p, __v); 716336815Sdim} 717336815Sdim__DEVICE__ unsigned long long __ullAtomicAnd(unsigned long long *__p, 718336815Sdim unsigned long long __v) { 719336815Sdim return __nvvm_atom_and_gen_ll((long long *)__p, __v); 720336815Sdim} 721336815Sdim__DEVICE__ unsigned long long __ullAtomicAnd_block(unsigned long long *__p, 722336815Sdim unsigned long long __v) { 723336815Sdim return __nvvm_atom_cta_and_gen_ll((long long *)__p, __v); 724336815Sdim} 725336815Sdim__DEVICE__ unsigned long long __ullAtomicAnd_system(unsigned long long *__p, 726336815Sdim unsigned long long __v) { 727336815Sdim return __nvvm_atom_sys_and_gen_ll((long long *)__p, __v); 728336815Sdim} 729336815Sdim__DEVICE__ unsigned long long __ullAtomicCAS(unsigned long long *__p, 730336815Sdim unsigned long long __cmp, 731336815Sdim unsigned long long __v) { 732336815Sdim return __nvvm_atom_cas_gen_ll((long long *)__p, __cmp, __v); 733336815Sdim} 734336815Sdim__DEVICE__ unsigned long long __ullAtomicCAS_block(unsigned long long *__p, 735336815Sdim unsigned long long __cmp, 736336815Sdim unsigned long long __v) { 737336815Sdim return __nvvm_atom_cta_cas_gen_ll((long long *)__p, __cmp, __v); 738336815Sdim} 739336815Sdim__DEVICE__ unsigned long long __ullAtomicCAS_system(unsigned long long *__p, 740336815Sdim unsigned long long __cmp, 741336815Sdim unsigned long long __v) { 742336815Sdim return __nvvm_atom_sys_cas_gen_ll((long long *)__p, __cmp, __v); 743336815Sdim} 744336815Sdim__DEVICE__ unsigned long long __ullAtomicExch(unsigned long long *__p, 745336815Sdim unsigned long long __v) { 746336815Sdim return __nvvm_atom_xchg_gen_ll((long long *)__p, __v); 747336815Sdim} 748336815Sdim__DEVICE__ unsigned long long __ullAtomicExch_block(unsigned long long *__p, 749336815Sdim unsigned long long __v) { 750336815Sdim return __nvvm_atom_cta_xchg_gen_ll((long long *)__p, __v); 751336815Sdim} 752336815Sdim__DEVICE__ unsigned long long __ullAtomicExch_system(unsigned long long *__p, 753336815Sdim unsigned long long __v) { 754336815Sdim return __nvvm_atom_sys_xchg_gen_ll((long long *)__p, __v); 755336815Sdim} 756336815Sdim__DEVICE__ unsigned long long __ullAtomicMax(unsigned long long *__p, 757336815Sdim unsigned long long __v) { 758336815Sdim return __nvvm_atom_max_gen_ull(__p, __v); 759336815Sdim} 760336815Sdim__DEVICE__ unsigned long long __ullAtomicMax_block(unsigned long long *__p, 761336815Sdim unsigned long long __v) { 762336815Sdim return __nvvm_atom_cta_max_gen_ull(__p, __v); 763336815Sdim} 764336815Sdim__DEVICE__ unsigned long long __ullAtomicMax_system(unsigned long long *__p, 765336815Sdim unsigned long long __v) { 766336815Sdim return __nvvm_atom_sys_max_gen_ull(__p, __v); 767336815Sdim} 768336815Sdim__DEVICE__ unsigned long long __ullAtomicMin(unsigned long long *__p, 769336815Sdim unsigned long long __v) { 770336815Sdim return __nvvm_atom_min_gen_ull(__p, __v); 771336815Sdim} 772336815Sdim__DEVICE__ unsigned long long __ullAtomicMin_block(unsigned long long *__p, 773336815Sdim unsigned long long __v) { 774336815Sdim return __nvvm_atom_cta_min_gen_ull(__p, __v); 775336815Sdim} 776336815Sdim__DEVICE__ unsigned long long __ullAtomicMin_system(unsigned long long *__p, 777336815Sdim unsigned long long __v) { 778336815Sdim return __nvvm_atom_sys_min_gen_ull(__p, __v); 779336815Sdim} 780336815Sdim__DEVICE__ unsigned long long __ullAtomicOr(unsigned long long *__p, 781336815Sdim unsigned long long __v) { 782336815Sdim return __nvvm_atom_or_gen_ll((long long *)__p, __v); 783336815Sdim} 784336815Sdim__DEVICE__ unsigned long long __ullAtomicOr_block(unsigned long long *__p, 785336815Sdim unsigned long long __v) { 786336815Sdim return __nvvm_atom_cta_or_gen_ll((long long *)__p, __v); 787336815Sdim} 788336815Sdim__DEVICE__ unsigned long long __ullAtomicOr_system(unsigned long long *__p, 789336815Sdim unsigned long long __v) { 790336815Sdim return __nvvm_atom_sys_or_gen_ll((long long *)__p, __v); 791336815Sdim} 792336815Sdim__DEVICE__ unsigned long long __ullAtomicXor(unsigned long long *__p, 793336815Sdim unsigned long long __v) { 794336815Sdim return __nvvm_atom_xor_gen_ll((long long *)__p, __v); 795336815Sdim} 796336815Sdim__DEVICE__ unsigned long long __ullAtomicXor_block(unsigned long long *__p, 797336815Sdim unsigned long long __v) { 798336815Sdim return __nvvm_atom_cta_xor_gen_ll((long long *)__p, __v); 799336815Sdim} 800336815Sdim__DEVICE__ unsigned long long __ullAtomicXor_system(unsigned long long *__p, 801336815Sdim unsigned long long __v) { 802336815Sdim return __nvvm_atom_sys_xor_gen_ll((long long *)__p, __v); 803336815Sdim} 804336815Sdim__DEVICE__ unsigned int __umul24(unsigned int __a, unsigned int __b) { 805336815Sdim return __nv_umul24(__a, __b); 806336815Sdim} 807336815Sdim__DEVICE__ unsigned long long __umul64hi(unsigned long long __a, 808336815Sdim unsigned long long __b) { 809336815Sdim return __nv_umul64hi(__a, __b); 810336815Sdim} 811336815Sdim__DEVICE__ unsigned int __umulhi(unsigned int __a, unsigned int __b) { 812336815Sdim return __nv_umulhi(__a, __b); 813336815Sdim} 814336815Sdim__DEVICE__ unsigned int __urhadd(unsigned int __a, unsigned int __b) { 815336815Sdim return __nv_urhadd(__a, __b); 816336815Sdim} 817336815Sdim__DEVICE__ unsigned int __usad(unsigned int __a, unsigned int __b, 818336815Sdim unsigned int __c) { 819336815Sdim return __nv_usad(__a, __b, __c); 820336815Sdim} 821336815Sdim 822336815Sdim#if CUDA_VERSION >= 9000 && CUDA_VERSION < 9020 823336815Sdim__DEVICE__ unsigned int __vabs2(unsigned int __a) { return __nv_vabs2(__a); } 824336815Sdim__DEVICE__ unsigned int __vabs4(unsigned int __a) { return __nv_vabs4(__a); } 825336815Sdim__DEVICE__ unsigned int __vabsdiffs2(unsigned int __a, unsigned int __b) { 826336815Sdim return __nv_vabsdiffs2(__a, __b); 827336815Sdim} 828336815Sdim__DEVICE__ unsigned int __vabsdiffs4(unsigned int __a, unsigned int __b) { 829336815Sdim return __nv_vabsdiffs4(__a, __b); 830336815Sdim} 831336815Sdim__DEVICE__ unsigned int __vabsdiffu2(unsigned int __a, unsigned int __b) { 832336815Sdim return __nv_vabsdiffu2(__a, __b); 833336815Sdim} 834336815Sdim__DEVICE__ unsigned int __vabsdiffu4(unsigned int __a, unsigned int __b) { 835336815Sdim return __nv_vabsdiffu4(__a, __b); 836336815Sdim} 837336815Sdim__DEVICE__ unsigned int __vabsss2(unsigned int __a) { 838336815Sdim return __nv_vabsss2(__a); 839336815Sdim} 840336815Sdim__DEVICE__ unsigned int __vabsss4(unsigned int __a) { 841336815Sdim return __nv_vabsss4(__a); 842336815Sdim} 843336815Sdim__DEVICE__ unsigned int __vadd2(unsigned int __a, unsigned int __b) { 844336815Sdim return __nv_vadd2(__a, __b); 845336815Sdim} 846336815Sdim__DEVICE__ unsigned int __vadd4(unsigned int __a, unsigned int __b) { 847336815Sdim return __nv_vadd4(__a, __b); 848336815Sdim} 849336815Sdim__DEVICE__ unsigned int __vaddss2(unsigned int __a, unsigned int __b) { 850336815Sdim return __nv_vaddss2(__a, __b); 851336815Sdim} 852336815Sdim__DEVICE__ unsigned int __vaddss4(unsigned int __a, unsigned int __b) { 853336815Sdim return __nv_vaddss4(__a, __b); 854336815Sdim} 855336815Sdim__DEVICE__ unsigned int __vaddus2(unsigned int __a, unsigned int __b) { 856336815Sdim return __nv_vaddus2(__a, __b); 857336815Sdim} 858336815Sdim__DEVICE__ unsigned int __vaddus4(unsigned int __a, unsigned int __b) { 859336815Sdim return __nv_vaddus4(__a, __b); 860336815Sdim} 861336815Sdim__DEVICE__ unsigned int __vavgs2(unsigned int __a, unsigned int __b) { 862336815Sdim return __nv_vavgs2(__a, __b); 863336815Sdim} 864336815Sdim__DEVICE__ unsigned int __vavgs4(unsigned int __a, unsigned int __b) { 865336815Sdim return __nv_vavgs4(__a, __b); 866336815Sdim} 867336815Sdim__DEVICE__ unsigned int __vavgu2(unsigned int __a, unsigned int __b) { 868336815Sdim return __nv_vavgu2(__a, __b); 869336815Sdim} 870336815Sdim__DEVICE__ unsigned int __vavgu4(unsigned int __a, unsigned int __b) { 871336815Sdim return __nv_vavgu4(__a, __b); 872336815Sdim} 873336815Sdim__DEVICE__ unsigned int __vcmpeq2(unsigned int __a, unsigned int __b) { 874336815Sdim return __nv_vcmpeq2(__a, __b); 875336815Sdim} 876336815Sdim__DEVICE__ unsigned int __vcmpeq4(unsigned int __a, unsigned int __b) { 877336815Sdim return __nv_vcmpeq4(__a, __b); 878336815Sdim} 879336815Sdim__DEVICE__ unsigned int __vcmpges2(unsigned int __a, unsigned int __b) { 880336815Sdim return __nv_vcmpges2(__a, __b); 881336815Sdim} 882336815Sdim__DEVICE__ unsigned int __vcmpges4(unsigned int __a, unsigned int __b) { 883336815Sdim return __nv_vcmpges4(__a, __b); 884336815Sdim} 885336815Sdim__DEVICE__ unsigned int __vcmpgeu2(unsigned int __a, unsigned int __b) { 886336815Sdim return __nv_vcmpgeu2(__a, __b); 887336815Sdim} 888336815Sdim__DEVICE__ unsigned int __vcmpgeu4(unsigned int __a, unsigned int __b) { 889336815Sdim return __nv_vcmpgeu4(__a, __b); 890336815Sdim} 891336815Sdim__DEVICE__ unsigned int __vcmpgts2(unsigned int __a, unsigned int __b) { 892336815Sdim return __nv_vcmpgts2(__a, __b); 893336815Sdim} 894336815Sdim__DEVICE__ unsigned int __vcmpgts4(unsigned int __a, unsigned int __b) { 895336815Sdim return __nv_vcmpgts4(__a, __b); 896336815Sdim} 897336815Sdim__DEVICE__ unsigned int __vcmpgtu2(unsigned int __a, unsigned int __b) { 898336815Sdim return __nv_vcmpgtu2(__a, __b); 899336815Sdim} 900336815Sdim__DEVICE__ unsigned int __vcmpgtu4(unsigned int __a, unsigned int __b) { 901336815Sdim return __nv_vcmpgtu4(__a, __b); 902336815Sdim} 903336815Sdim__DEVICE__ unsigned int __vcmples2(unsigned int __a, unsigned int __b) { 904336815Sdim return __nv_vcmples2(__a, __b); 905336815Sdim} 906336815Sdim__DEVICE__ unsigned int __vcmples4(unsigned int __a, unsigned int __b) { 907336815Sdim return __nv_vcmples4(__a, __b); 908336815Sdim} 909336815Sdim__DEVICE__ unsigned int __vcmpleu2(unsigned int __a, unsigned int __b) { 910336815Sdim return __nv_vcmpleu2(__a, __b); 911336815Sdim} 912336815Sdim__DEVICE__ unsigned int __vcmpleu4(unsigned int __a, unsigned int __b) { 913336815Sdim return __nv_vcmpleu4(__a, __b); 914336815Sdim} 915336815Sdim__DEVICE__ unsigned int __vcmplts2(unsigned int __a, unsigned int __b) { 916336815Sdim return __nv_vcmplts2(__a, __b); 917336815Sdim} 918336815Sdim__DEVICE__ unsigned int __vcmplts4(unsigned int __a, unsigned int __b) { 919336815Sdim return __nv_vcmplts4(__a, __b); 920336815Sdim} 921336815Sdim__DEVICE__ unsigned int __vcmpltu2(unsigned int __a, unsigned int __b) { 922336815Sdim return __nv_vcmpltu2(__a, __b); 923336815Sdim} 924336815Sdim__DEVICE__ unsigned int __vcmpltu4(unsigned int __a, unsigned int __b) { 925336815Sdim return __nv_vcmpltu4(__a, __b); 926336815Sdim} 927336815Sdim__DEVICE__ unsigned int __vcmpne2(unsigned int __a, unsigned int __b) { 928336815Sdim return __nv_vcmpne2(__a, __b); 929336815Sdim} 930336815Sdim__DEVICE__ unsigned int __vcmpne4(unsigned int __a, unsigned int __b) { 931336815Sdim return __nv_vcmpne4(__a, __b); 932336815Sdim} 933336815Sdim__DEVICE__ unsigned int __vhaddu2(unsigned int __a, unsigned int __b) { 934336815Sdim return __nv_vhaddu2(__a, __b); 935336815Sdim} 936336815Sdim__DEVICE__ unsigned int __vhaddu4(unsigned int __a, unsigned int __b) { 937336815Sdim return __nv_vhaddu4(__a, __b); 938336815Sdim} 939336815Sdim__DEVICE__ unsigned int __vmaxs2(unsigned int __a, unsigned int __b) { 940336815Sdim return __nv_vmaxs2(__a, __b); 941336815Sdim} 942336815Sdim__DEVICE__ unsigned int __vmaxs4(unsigned int __a, unsigned int __b) { 943336815Sdim return __nv_vmaxs4(__a, __b); 944336815Sdim} 945336815Sdim__DEVICE__ unsigned int __vmaxu2(unsigned int __a, unsigned int __b) { 946336815Sdim return __nv_vmaxu2(__a, __b); 947336815Sdim} 948336815Sdim__DEVICE__ unsigned int __vmaxu4(unsigned int __a, unsigned int __b) { 949336815Sdim return __nv_vmaxu4(__a, __b); 950336815Sdim} 951336815Sdim__DEVICE__ unsigned int __vmins2(unsigned int __a, unsigned int __b) { 952336815Sdim return __nv_vmins2(__a, __b); 953336815Sdim} 954336815Sdim__DEVICE__ unsigned int __vmins4(unsigned int __a, unsigned int __b) { 955336815Sdim return __nv_vmins4(__a, __b); 956336815Sdim} 957336815Sdim__DEVICE__ unsigned int __vminu2(unsigned int __a, unsigned int __b) { 958336815Sdim return __nv_vminu2(__a, __b); 959336815Sdim} 960336815Sdim__DEVICE__ unsigned int __vminu4(unsigned int __a, unsigned int __b) { 961336815Sdim return __nv_vminu4(__a, __b); 962336815Sdim} 963336815Sdim__DEVICE__ unsigned int __vneg2(unsigned int __a) { return __nv_vneg2(__a); } 964336815Sdim__DEVICE__ unsigned int __vneg4(unsigned int __a) { return __nv_vneg4(__a); } 965336815Sdim__DEVICE__ unsigned int __vnegss2(unsigned int __a) { 966336815Sdim return __nv_vnegss2(__a); 967336815Sdim} 968336815Sdim__DEVICE__ unsigned int __vnegss4(unsigned int __a) { 969336815Sdim return __nv_vnegss4(__a); 970336815Sdim} 971336815Sdim__DEVICE__ unsigned int __vsads2(unsigned int __a, unsigned int __b) { 972336815Sdim return __nv_vsads2(__a, __b); 973336815Sdim} 974336815Sdim__DEVICE__ unsigned int __vsads4(unsigned int __a, unsigned int __b) { 975336815Sdim return __nv_vsads4(__a, __b); 976336815Sdim} 977336815Sdim__DEVICE__ unsigned int __vsadu2(unsigned int __a, unsigned int __b) { 978336815Sdim return __nv_vsadu2(__a, __b); 979336815Sdim} 980336815Sdim__DEVICE__ unsigned int __vsadu4(unsigned int __a, unsigned int __b) { 981336815Sdim return __nv_vsadu4(__a, __b); 982336815Sdim} 983336815Sdim__DEVICE__ unsigned int __vseteq2(unsigned int __a, unsigned int __b) { 984336815Sdim return __nv_vseteq2(__a, __b); 985336815Sdim} 986336815Sdim__DEVICE__ unsigned int __vseteq4(unsigned int __a, unsigned int __b) { 987336815Sdim return __nv_vseteq4(__a, __b); 988336815Sdim} 989336815Sdim__DEVICE__ unsigned int __vsetges2(unsigned int __a, unsigned int __b) { 990336815Sdim return __nv_vsetges2(__a, __b); 991336815Sdim} 992336815Sdim__DEVICE__ unsigned int __vsetges4(unsigned int __a, unsigned int __b) { 993336815Sdim return __nv_vsetges4(__a, __b); 994336815Sdim} 995336815Sdim__DEVICE__ unsigned int __vsetgeu2(unsigned int __a, unsigned int __b) { 996336815Sdim return __nv_vsetgeu2(__a, __b); 997336815Sdim} 998336815Sdim__DEVICE__ unsigned int __vsetgeu4(unsigned int __a, unsigned int __b) { 999336815Sdim return __nv_vsetgeu4(__a, __b); 1000336815Sdim} 1001336815Sdim__DEVICE__ unsigned int __vsetgts2(unsigned int __a, unsigned int __b) { 1002336815Sdim return __nv_vsetgts2(__a, __b); 1003336815Sdim} 1004336815Sdim__DEVICE__ unsigned int __vsetgts4(unsigned int __a, unsigned int __b) { 1005336815Sdim return __nv_vsetgts4(__a, __b); 1006336815Sdim} 1007336815Sdim__DEVICE__ unsigned int __vsetgtu2(unsigned int __a, unsigned int __b) { 1008336815Sdim return __nv_vsetgtu2(__a, __b); 1009336815Sdim} 1010336815Sdim__DEVICE__ unsigned int __vsetgtu4(unsigned int __a, unsigned int __b) { 1011336815Sdim return __nv_vsetgtu4(__a, __b); 1012336815Sdim} 1013336815Sdim__DEVICE__ unsigned int __vsetles2(unsigned int __a, unsigned int __b) { 1014336815Sdim return __nv_vsetles2(__a, __b); 1015336815Sdim} 1016336815Sdim__DEVICE__ unsigned int __vsetles4(unsigned int __a, unsigned int __b) { 1017336815Sdim return __nv_vsetles4(__a, __b); 1018336815Sdim} 1019336815Sdim__DEVICE__ unsigned int __vsetleu2(unsigned int __a, unsigned int __b) { 1020336815Sdim return __nv_vsetleu2(__a, __b); 1021336815Sdim} 1022336815Sdim__DEVICE__ unsigned int __vsetleu4(unsigned int __a, unsigned int __b) { 1023336815Sdim return __nv_vsetleu4(__a, __b); 1024336815Sdim} 1025336815Sdim__DEVICE__ unsigned int __vsetlts2(unsigned int __a, unsigned int __b) { 1026336815Sdim return __nv_vsetlts2(__a, __b); 1027336815Sdim} 1028336815Sdim__DEVICE__ unsigned int __vsetlts4(unsigned int __a, unsigned int __b) { 1029336815Sdim return __nv_vsetlts4(__a, __b); 1030336815Sdim} 1031336815Sdim__DEVICE__ unsigned int __vsetltu2(unsigned int __a, unsigned int __b) { 1032336815Sdim return __nv_vsetltu2(__a, __b); 1033336815Sdim} 1034336815Sdim__DEVICE__ unsigned int __vsetltu4(unsigned int __a, unsigned int __b) { 1035336815Sdim return __nv_vsetltu4(__a, __b); 1036336815Sdim} 1037336815Sdim__DEVICE__ unsigned int __vsetne2(unsigned int __a, unsigned int __b) { 1038336815Sdim return __nv_vsetne2(__a, __b); 1039336815Sdim} 1040336815Sdim__DEVICE__ unsigned int __vsetne4(unsigned int __a, unsigned int __b) { 1041336815Sdim return __nv_vsetne4(__a, __b); 1042336815Sdim} 1043336815Sdim__DEVICE__ unsigned int __vsub2(unsigned int __a, unsigned int __b) { 1044336815Sdim return __nv_vsub2(__a, __b); 1045336815Sdim} 1046336815Sdim__DEVICE__ unsigned int __vsub4(unsigned int __a, unsigned int __b) { 1047336815Sdim return __nv_vsub4(__a, __b); 1048336815Sdim} 1049336815Sdim__DEVICE__ unsigned int __vsubss2(unsigned int __a, unsigned int __b) { 1050336815Sdim return __nv_vsubss2(__a, __b); 1051336815Sdim} 1052336815Sdim__DEVICE__ unsigned int __vsubss4(unsigned int __a, unsigned int __b) { 1053336815Sdim return __nv_vsubss4(__a, __b); 1054336815Sdim} 1055336815Sdim__DEVICE__ unsigned int __vsubus2(unsigned int __a, unsigned int __b) { 1056336815Sdim return __nv_vsubus2(__a, __b); 1057336815Sdim} 1058336815Sdim__DEVICE__ unsigned int __vsubus4(unsigned int __a, unsigned int __b) { 1059336815Sdim return __nv_vsubus4(__a, __b); 1060336815Sdim} 1061336815Sdim#else // CUDA_VERSION >= 9020 1062336815Sdim// CUDA no longer provides inline assembly (or bitcode) implementation of these 1063336815Sdim// functions, so we have to reimplment them. The implementation is naive and is 1064336815Sdim// not optimized for performance. 1065336815Sdim 1066336815Sdim// Helper function to convert N-bit boolean subfields into all-0 or all-1. 1067336815Sdim// E.g. __bool2mask(0x01000100,8) -> 0xff00ff00 1068336815Sdim// __bool2mask(0x00010000,16) -> 0xffff0000 1069336815Sdim__DEVICE__ unsigned int __bool2mask(unsigned int __a, int shift) { 1070336815Sdim return (__a << shift) - __a; 1071336815Sdim} 1072336815Sdim__DEVICE__ unsigned int __vabs2(unsigned int __a) { 1073336815Sdim unsigned int r; 1074336815Sdim asm("vabsdiff2.s32.s32.s32 %0,%1,%2,%3;" 1075336815Sdim : "=r"(r) 1076336815Sdim : "r"(__a), "r"(0), "r"(0)); 1077336815Sdim return r; 1078336815Sdim} 1079336815Sdim__DEVICE__ unsigned int __vabs4(unsigned int __a) { 1080336815Sdim unsigned int r; 1081336815Sdim asm("vabsdiff4.s32.s32.s32 %0,%1,%2,%3;" 1082336815Sdim : "=r"(r) 1083336815Sdim : "r"(__a), "r"(0), "r"(0)); 1084336815Sdim return r; 1085336815Sdim} 1086336815Sdim__DEVICE__ unsigned int __vabsdiffs2(unsigned int __a, unsigned int __b) { 1087336815Sdim unsigned int r; 1088336815Sdim asm("vabsdiff2.s32.s32.s32 %0,%1,%2,%3;" 1089336815Sdim : "=r"(r) 1090336815Sdim : "r"(__a), "r"(__b), "r"(0)); 1091336815Sdim return r; 1092336815Sdim} 1093336815Sdim 1094336815Sdim__DEVICE__ unsigned int __vabsdiffs4(unsigned int __a, unsigned int __b) { 1095336815Sdim unsigned int r; 1096336815Sdim asm("vabsdiff4.s32.s32.s32 %0,%1,%2,%3;" 1097336815Sdim : "=r"(r) 1098336815Sdim : "r"(__a), "r"(__b), "r"(0)); 1099336815Sdim return r; 1100336815Sdim} 1101336815Sdim__DEVICE__ unsigned int __vabsdiffu2(unsigned int __a, unsigned int __b) { 1102336815Sdim unsigned int r; 1103336815Sdim asm("vabsdiff2.u32.u32.u32 %0,%1,%2,%3;" 1104336815Sdim : "=r"(r) 1105336815Sdim : "r"(__a), "r"(__b), "r"(0)); 1106336815Sdim return r; 1107336815Sdim} 1108336815Sdim__DEVICE__ unsigned int __vabsdiffu4(unsigned int __a, unsigned int __b) { 1109336815Sdim unsigned int r; 1110336815Sdim asm("vabsdiff4.u32.u32.u32 %0,%1,%2,%3;" 1111336815Sdim : "=r"(r) 1112336815Sdim : "r"(__a), "r"(__b), "r"(0)); 1113336815Sdim return r; 1114336815Sdim} 1115336815Sdim__DEVICE__ unsigned int __vabsss2(unsigned int __a) { 1116336815Sdim unsigned int r; 1117336815Sdim asm("vabsdiff2.s32.s32.s32.sat %0,%1,%2,%3;" 1118336815Sdim : "=r"(r) 1119336815Sdim : "r"(__a), "r"(0), "r"(0)); 1120336815Sdim return r; 1121336815Sdim} 1122336815Sdim__DEVICE__ unsigned int __vabsss4(unsigned int __a) { 1123336815Sdim unsigned int r; 1124336815Sdim asm("vabsdiff4.s32.s32.s32.sat %0,%1,%2,%3;" 1125336815Sdim : "=r"(r) 1126336815Sdim : "r"(__a), "r"(0), "r"(0)); 1127336815Sdim return r; 1128336815Sdim} 1129336815Sdim__DEVICE__ unsigned int __vadd2(unsigned int __a, unsigned int __b) { 1130336815Sdim unsigned int r; 1131336815Sdim asm("vadd2.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); 1132336815Sdim return r; 1133336815Sdim} 1134336815Sdim__DEVICE__ unsigned int __vadd4(unsigned int __a, unsigned int __b) { 1135336815Sdim unsigned int r; 1136336815Sdim asm("vadd4.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); 1137336815Sdim return r; 1138336815Sdim} 1139336815Sdim__DEVICE__ unsigned int __vaddss2(unsigned int __a, unsigned int __b) { 1140336815Sdim unsigned int r; 1141336815Sdim asm("vadd2.s32.s32.s32.sat %0,%1,%2,%3;" 1142336815Sdim : "=r"(r) 1143336815Sdim : "r"(__a), "r"(__b), "r"(0)); 1144336815Sdim return r; 1145336815Sdim} 1146336815Sdim__DEVICE__ unsigned int __vaddss4(unsigned int __a, unsigned int __b) { 1147336815Sdim unsigned int r; 1148336815Sdim asm("vadd4.s32.s32.s32.sat %0,%1,%2,%3;" 1149336815Sdim : "=r"(r) 1150336815Sdim : "r"(__a), "r"(__b), "r"(0)); 1151336815Sdim return r; 1152336815Sdim} 1153336815Sdim__DEVICE__ unsigned int __vaddus2(unsigned int __a, unsigned int __b) { 1154336815Sdim unsigned int r; 1155336815Sdim asm("vadd2.u32.u32.u32.sat %0,%1,%2,%3;" 1156336815Sdim : "=r"(r) 1157336815Sdim : "r"(__a), "r"(__b), "r"(0)); 1158336815Sdim return r; 1159336815Sdim} 1160336815Sdim__DEVICE__ unsigned int __vaddus4(unsigned int __a, unsigned int __b) { 1161336815Sdim unsigned int r; 1162336815Sdim asm("vadd4.u32.u32.u32.sat %0,%1,%2,%3;" 1163336815Sdim : "=r"(r) 1164336815Sdim : "r"(__a), "r"(__b), "r"(0)); 1165336815Sdim return r; 1166336815Sdim} 1167336815Sdim__DEVICE__ unsigned int __vavgs2(unsigned int __a, unsigned int __b) { 1168336815Sdim unsigned int r; 1169336815Sdim asm("vavrg2.s32.s32.s32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); 1170336815Sdim return r; 1171336815Sdim} 1172336815Sdim__DEVICE__ unsigned int __vavgs4(unsigned int __a, unsigned int __b) { 1173336815Sdim unsigned int r; 1174336815Sdim asm("vavrg4.s32.s32.s32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); 1175336815Sdim return r; 1176336815Sdim} 1177336815Sdim__DEVICE__ unsigned int __vavgu2(unsigned int __a, unsigned int __b) { 1178336815Sdim unsigned int r; 1179336815Sdim asm("vavrg2.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); 1180336815Sdim return r; 1181336815Sdim} 1182336815Sdim__DEVICE__ unsigned int __vavgu4(unsigned int __a, unsigned int __b) { 1183336815Sdim unsigned int r; 1184336815Sdim asm("vavrg4.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); 1185336815Sdim return r; 1186336815Sdim} 1187336815Sdim__DEVICE__ unsigned int __vseteq2(unsigned int __a, unsigned int __b) { 1188336815Sdim unsigned int r; 1189336815Sdim asm("vset2.u32.u32.eq %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); 1190336815Sdim return r; 1191336815Sdim} 1192336815Sdim__DEVICE__ unsigned int __vcmpeq2(unsigned int __a, unsigned int __b) { 1193336815Sdim return __bool2mask(__vseteq2(__a, __b), 16); 1194336815Sdim} 1195336815Sdim__DEVICE__ unsigned int __vseteq4(unsigned int __a, unsigned int __b) { 1196336815Sdim unsigned int r; 1197336815Sdim asm("vset4.u32.u32.eq %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); 1198336815Sdim return r; 1199336815Sdim} 1200336815Sdim__DEVICE__ unsigned int __vcmpeq4(unsigned int __a, unsigned int __b) { 1201336815Sdim return __bool2mask(__vseteq4(__a, __b), 8); 1202336815Sdim} 1203336815Sdim__DEVICE__ unsigned int __vsetges2(unsigned int __a, unsigned int __b) { 1204336815Sdim unsigned int r; 1205336815Sdim asm("vset2.s32.s32.ge %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); 1206336815Sdim return r; 1207336815Sdim} 1208336815Sdim__DEVICE__ unsigned int __vcmpges2(unsigned int __a, unsigned int __b) { 1209336815Sdim return __bool2mask(__vsetges2(__a, __b), 16); 1210336815Sdim} 1211336815Sdim__DEVICE__ unsigned int __vsetges4(unsigned int __a, unsigned int __b) { 1212336815Sdim unsigned int r; 1213336815Sdim asm("vset4.s32.s32.ge %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); 1214336815Sdim return r; 1215336815Sdim} 1216336815Sdim__DEVICE__ unsigned int __vcmpges4(unsigned int __a, unsigned int __b) { 1217336815Sdim return __bool2mask(__vsetges4(__a, __b), 8); 1218336815Sdim} 1219336815Sdim__DEVICE__ unsigned int __vsetgeu2(unsigned int __a, unsigned int __b) { 1220336815Sdim unsigned int r; 1221336815Sdim asm("vset2.u32.u32.ge %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); 1222336815Sdim return r; 1223336815Sdim} 1224336815Sdim__DEVICE__ unsigned int __vcmpgeu2(unsigned int __a, unsigned int __b) { 1225336815Sdim return __bool2mask(__vsetgeu2(__a, __b), 16); 1226336815Sdim} 1227336815Sdim__DEVICE__ unsigned int __vsetgeu4(unsigned int __a, unsigned int __b) { 1228336815Sdim unsigned int r; 1229336815Sdim asm("vset4.u32.u32.ge %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); 1230336815Sdim return r; 1231336815Sdim} 1232336815Sdim__DEVICE__ unsigned int __vcmpgeu4(unsigned int __a, unsigned int __b) { 1233336815Sdim return __bool2mask(__vsetgeu4(__a, __b), 8); 1234336815Sdim} 1235336815Sdim__DEVICE__ unsigned int __vsetgts2(unsigned int __a, unsigned int __b) { 1236336815Sdim unsigned int r; 1237336815Sdim asm("vset2.s32.s32.gt %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); 1238336815Sdim return r; 1239336815Sdim} 1240336815Sdim__DEVICE__ unsigned int __vcmpgts2(unsigned int __a, unsigned int __b) { 1241336815Sdim return __bool2mask(__vsetgts2(__a, __b), 16); 1242336815Sdim} 1243336815Sdim__DEVICE__ unsigned int __vsetgts4(unsigned int __a, unsigned int __b) { 1244336815Sdim unsigned int r; 1245336815Sdim asm("vset4.s32.s32.gt %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); 1246336815Sdim return r; 1247336815Sdim} 1248336815Sdim__DEVICE__ unsigned int __vcmpgts4(unsigned int __a, unsigned int __b) { 1249336815Sdim return __bool2mask(__vsetgts4(__a, __b), 8); 1250336815Sdim} 1251336815Sdim__DEVICE__ unsigned int __vsetgtu2(unsigned int __a, unsigned int __b) { 1252336815Sdim unsigned int r; 1253336815Sdim asm("vset2.u32.u32.gt %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); 1254336815Sdim return r; 1255336815Sdim} 1256336815Sdim__DEVICE__ unsigned int __vcmpgtu2(unsigned int __a, unsigned int __b) { 1257336815Sdim return __bool2mask(__vsetgtu2(__a, __b), 16); 1258336815Sdim} 1259336815Sdim__DEVICE__ unsigned int __vsetgtu4(unsigned int __a, unsigned int __b) { 1260336815Sdim unsigned int r; 1261336815Sdim asm("vset4.u32.u32.gt %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); 1262336815Sdim return r; 1263336815Sdim} 1264336815Sdim__DEVICE__ unsigned int __vcmpgtu4(unsigned int __a, unsigned int __b) { 1265336815Sdim return __bool2mask(__vsetgtu4(__a, __b), 8); 1266336815Sdim} 1267336815Sdim__DEVICE__ unsigned int __vsetles2(unsigned int __a, unsigned int __b) { 1268336815Sdim unsigned int r; 1269336815Sdim asm("vset2.s32.s32.le %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); 1270336815Sdim return r; 1271336815Sdim} 1272336815Sdim__DEVICE__ unsigned int __vcmples2(unsigned int __a, unsigned int __b) { 1273336815Sdim return __bool2mask(__vsetles2(__a, __b), 16); 1274336815Sdim} 1275336815Sdim__DEVICE__ unsigned int __vsetles4(unsigned int __a, unsigned int __b) { 1276336815Sdim unsigned int r; 1277336815Sdim asm("vset4.s32.s32.le %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); 1278336815Sdim return r; 1279336815Sdim} 1280336815Sdim__DEVICE__ unsigned int __vcmples4(unsigned int __a, unsigned int __b) { 1281336815Sdim return __bool2mask(__vsetles4(__a, __b), 8); 1282336815Sdim} 1283336815Sdim__DEVICE__ unsigned int __vsetleu2(unsigned int __a, unsigned int __b) { 1284336815Sdim unsigned int r; 1285336815Sdim asm("vset2.u32.u32.le %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); 1286336815Sdim return r; 1287336815Sdim} 1288336815Sdim__DEVICE__ unsigned int __vcmpleu2(unsigned int __a, unsigned int __b) { 1289336815Sdim return __bool2mask(__vsetleu2(__a, __b), 16); 1290336815Sdim} 1291336815Sdim__DEVICE__ unsigned int __vsetleu4(unsigned int __a, unsigned int __b) { 1292336815Sdim unsigned int r; 1293336815Sdim asm("vset4.u32.u32.le %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); 1294336815Sdim return r; 1295336815Sdim} 1296336815Sdim__DEVICE__ unsigned int __vcmpleu4(unsigned int __a, unsigned int __b) { 1297336815Sdim return __bool2mask(__vsetleu4(__a, __b), 8); 1298336815Sdim} 1299336815Sdim__DEVICE__ unsigned int __vsetlts2(unsigned int __a, unsigned int __b) { 1300336815Sdim unsigned int r; 1301336815Sdim asm("vset2.s32.s32.lt %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); 1302336815Sdim return r; 1303336815Sdim} 1304336815Sdim__DEVICE__ unsigned int __vcmplts2(unsigned int __a, unsigned int __b) { 1305336815Sdim return __bool2mask(__vsetlts2(__a, __b), 16); 1306336815Sdim} 1307336815Sdim__DEVICE__ unsigned int __vsetlts4(unsigned int __a, unsigned int __b) { 1308336815Sdim unsigned int r; 1309336815Sdim asm("vset4.s32.s32.lt %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); 1310336815Sdim return r; 1311336815Sdim} 1312336815Sdim__DEVICE__ unsigned int __vcmplts4(unsigned int __a, unsigned int __b) { 1313336815Sdim return __bool2mask(__vsetlts4(__a, __b), 8); 1314336815Sdim} 1315336815Sdim__DEVICE__ unsigned int __vsetltu2(unsigned int __a, unsigned int __b) { 1316336815Sdim unsigned int r; 1317336815Sdim asm("vset2.u32.u32.lt %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); 1318336815Sdim return r; 1319336815Sdim} 1320336815Sdim__DEVICE__ unsigned int __vcmpltu2(unsigned int __a, unsigned int __b) { 1321336815Sdim return __bool2mask(__vsetltu2(__a, __b), 16); 1322336815Sdim} 1323336815Sdim__DEVICE__ unsigned int __vsetltu4(unsigned int __a, unsigned int __b) { 1324336815Sdim unsigned int r; 1325336815Sdim asm("vset4.u32.u32.lt %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); 1326336815Sdim return r; 1327336815Sdim} 1328336815Sdim__DEVICE__ unsigned int __vcmpltu4(unsigned int __a, unsigned int __b) { 1329336815Sdim return __bool2mask(__vsetltu4(__a, __b), 8); 1330336815Sdim} 1331336815Sdim__DEVICE__ unsigned int __vsetne2(unsigned int __a, unsigned int __b) { 1332336815Sdim unsigned int r; 1333336815Sdim asm("vset2.u32.u32.ne %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); 1334336815Sdim return r; 1335336815Sdim} 1336336815Sdim__DEVICE__ unsigned int __vcmpne2(unsigned int __a, unsigned int __b) { 1337336815Sdim return __bool2mask(__vsetne2(__a, __b), 16); 1338336815Sdim} 1339336815Sdim__DEVICE__ unsigned int __vsetne4(unsigned int __a, unsigned int __b) { 1340336815Sdim unsigned int r; 1341336815Sdim asm("vset4.u32.u32.ne %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); 1342336815Sdim return r; 1343336815Sdim} 1344336815Sdim__DEVICE__ unsigned int __vcmpne4(unsigned int __a, unsigned int __b) { 1345336815Sdim return __bool2mask(__vsetne4(__a, __b), 8); 1346336815Sdim} 1347336815Sdim 1348336815Sdim// Based on ITEM 23 in AIM-239: http://dspace.mit.edu/handle/1721.1/6086 1349336815Sdim// (a & b) + (a | b) = a + b = (a ^ b) + 2 * (a & b) => 1350336815Sdim// (a + b) / 2 = ((a ^ b) >> 1) + (a & b) 1351336815Sdim// To operate on multiple sub-elements we need to make sure to mask out bits 1352336815Sdim// that crossed over into adjacent elements during the shift. 1353336815Sdim__DEVICE__ unsigned int __vhaddu2(unsigned int __a, unsigned int __b) { 1354336815Sdim return (((__a ^ __b) >> 1) & ~0x80008000u) + (__a & __b); 1355336815Sdim} 1356336815Sdim__DEVICE__ unsigned int __vhaddu4(unsigned int __a, unsigned int __b) { 1357336815Sdim return (((__a ^ __b) >> 1) & ~0x80808080u) + (__a & __b); 1358336815Sdim} 1359336815Sdim 1360336815Sdim__DEVICE__ unsigned int __vmaxs2(unsigned int __a, unsigned int __b) { 1361336815Sdim unsigned int r; 1362336815Sdim if ((__a & 0x8000) && (__b & 0x8000)) { 1363336815Sdim // Work around a bug in ptxas which produces invalid result if low element 1364336815Sdim // is negative. 1365336815Sdim unsigned mask = __vcmpgts2(__a, __b); 1366336815Sdim r = (__a & mask) | (__b & ~mask); 1367336815Sdim } else { 1368336815Sdim asm("vmax2.s32.s32.s32 %0,%1,%2,%3;" 1369336815Sdim : "=r"(r) 1370336815Sdim : "r"(__a), "r"(__b), "r"(0)); 1371336815Sdim } 1372336815Sdim return r; 1373336815Sdim} 1374336815Sdim__DEVICE__ unsigned int __vmaxs4(unsigned int __a, unsigned int __b) { 1375336815Sdim unsigned int r; 1376336815Sdim asm("vmax4.s32.s32.s32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); 1377336815Sdim return r; 1378336815Sdim} 1379336815Sdim__DEVICE__ unsigned int __vmaxu2(unsigned int __a, unsigned int __b) { 1380336815Sdim unsigned int r; 1381336815Sdim asm("vmax2.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); 1382336815Sdim return r; 1383336815Sdim} 1384336815Sdim__DEVICE__ unsigned int __vmaxu4(unsigned int __a, unsigned int __b) { 1385336815Sdim unsigned int r; 1386336815Sdim asm("vmax4.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); 1387336815Sdim return r; 1388336815Sdim} 1389336815Sdim__DEVICE__ unsigned int __vmins2(unsigned int __a, unsigned int __b) { 1390336815Sdim unsigned int r; 1391336815Sdim asm("vmin2.s32.s32.s32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); 1392336815Sdim return r; 1393336815Sdim} 1394336815Sdim__DEVICE__ unsigned int __vmins4(unsigned int __a, unsigned int __b) { 1395336815Sdim unsigned int r; 1396336815Sdim asm("vmin4.s32.s32.s32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); 1397336815Sdim return r; 1398336815Sdim} 1399336815Sdim__DEVICE__ unsigned int __vminu2(unsigned int __a, unsigned int __b) { 1400336815Sdim unsigned int r; 1401336815Sdim asm("vmin2.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); 1402336815Sdim return r; 1403336815Sdim} 1404336815Sdim__DEVICE__ unsigned int __vminu4(unsigned int __a, unsigned int __b) { 1405336815Sdim unsigned int r; 1406336815Sdim asm("vmin4.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); 1407336815Sdim return r; 1408336815Sdim} 1409336815Sdim__DEVICE__ unsigned int __vsads2(unsigned int __a, unsigned int __b) { 1410336815Sdim unsigned int r; 1411336815Sdim asm("vabsdiff2.s32.s32.s32.add %0,%1,%2,%3;" 1412336815Sdim : "=r"(r) 1413336815Sdim : "r"(__a), "r"(__b), "r"(0)); 1414336815Sdim return r; 1415336815Sdim} 1416336815Sdim__DEVICE__ unsigned int __vsads4(unsigned int __a, unsigned int __b) { 1417336815Sdim unsigned int r; 1418336815Sdim asm("vabsdiff4.s32.s32.s32.add %0,%1,%2,%3;" 1419336815Sdim : "=r"(r) 1420336815Sdim : "r"(__a), "r"(__b), "r"(0)); 1421336815Sdim return r; 1422336815Sdim} 1423336815Sdim__DEVICE__ unsigned int __vsadu2(unsigned int __a, unsigned int __b) { 1424336815Sdim unsigned int r; 1425336815Sdim asm("vabsdiff2.u32.u32.u32.add %0,%1,%2,%3;" 1426336815Sdim : "=r"(r) 1427336815Sdim : "r"(__a), "r"(__b), "r"(0)); 1428336815Sdim return r; 1429336815Sdim} 1430336815Sdim__DEVICE__ unsigned int __vsadu4(unsigned int __a, unsigned int __b) { 1431336815Sdim unsigned int r; 1432336815Sdim asm("vabsdiff4.u32.u32.u32.add %0,%1,%2,%3;" 1433336815Sdim : "=r"(r) 1434336815Sdim : "r"(__a), "r"(__b), "r"(0)); 1435336815Sdim return r; 1436336815Sdim} 1437336815Sdim 1438336815Sdim__DEVICE__ unsigned int __vsub2(unsigned int __a, unsigned int __b) { 1439336815Sdim unsigned int r; 1440336815Sdim asm("vsub2.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); 1441336815Sdim return r; 1442336815Sdim} 1443336815Sdim__DEVICE__ unsigned int __vneg2(unsigned int __a) { return __vsub2(0, __a); } 1444336815Sdim 1445336815Sdim__DEVICE__ unsigned int __vsub4(unsigned int __a, unsigned int __b) { 1446336815Sdim unsigned int r; 1447336815Sdim asm("vsub4.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); 1448336815Sdim return r; 1449336815Sdim} 1450336815Sdim__DEVICE__ unsigned int __vneg4(unsigned int __a) { return __vsub4(0, __a); } 1451336815Sdim__DEVICE__ unsigned int __vsubss2(unsigned int __a, unsigned int __b) { 1452336815Sdim unsigned int r; 1453336815Sdim asm("vsub2.s32.s32.s32.sat %0,%1,%2,%3;" 1454336815Sdim : "=r"(r) 1455336815Sdim : "r"(__a), "r"(__b), "r"(0)); 1456336815Sdim return r; 1457336815Sdim} 1458336815Sdim__DEVICE__ unsigned int __vnegss2(unsigned int __a) { 1459336815Sdim return __vsubss2(0, __a); 1460336815Sdim} 1461336815Sdim__DEVICE__ unsigned int __vsubss4(unsigned int __a, unsigned int __b) { 1462336815Sdim unsigned int r; 1463336815Sdim asm("vsub4.s32.s32.s32.sat %0,%1,%2,%3;" 1464336815Sdim : "=r"(r) 1465336815Sdim : "r"(__a), "r"(__b), "r"(0)); 1466336815Sdim return r; 1467336815Sdim} 1468336815Sdim__DEVICE__ unsigned int __vnegss4(unsigned int __a) { 1469336815Sdim return __vsubss4(0, __a); 1470336815Sdim} 1471336815Sdim__DEVICE__ unsigned int __vsubus2(unsigned int __a, unsigned int __b) { 1472336815Sdim unsigned int r; 1473336815Sdim asm("vsub2.u32.u32.u32.sat %0,%1,%2,%3;" 1474336815Sdim : "=r"(r) 1475336815Sdim : "r"(__a), "r"(__b), "r"(0)); 1476336815Sdim return r; 1477336815Sdim} 1478336815Sdim__DEVICE__ unsigned int __vsubus4(unsigned int __a, unsigned int __b) { 1479336815Sdim unsigned int r; 1480336815Sdim asm("vsub4.u32.u32.u32.sat %0,%1,%2,%3;" 1481336815Sdim : "=r"(r) 1482336815Sdim : "r"(__a), "r"(__b), "r"(0)); 1483336815Sdim return r; 1484336815Sdim} 1485336815Sdim#endif // CUDA_VERSION >= 9020 1486353358Sdim__DEVICE__ int abs(int __a) __NOEXCEPT { return __nv_abs(__a); } 1487353358Sdim__DEVICE__ double fabs(double __a) __NOEXCEPT { return __nv_fabs(__a); } 1488336815Sdim__DEVICE__ double acos(double __a) { return __nv_acos(__a); } 1489336815Sdim__DEVICE__ float acosf(float __a) { return __nv_acosf(__a); } 1490336815Sdim__DEVICE__ double acosh(double __a) { return __nv_acosh(__a); } 1491336815Sdim__DEVICE__ float acoshf(float __a) { return __nv_acoshf(__a); } 1492336815Sdim__DEVICE__ double asin(double __a) { return __nv_asin(__a); } 1493336815Sdim__DEVICE__ float asinf(float __a) { return __nv_asinf(__a); } 1494336815Sdim__DEVICE__ double asinh(double __a) { return __nv_asinh(__a); } 1495336815Sdim__DEVICE__ float asinhf(float __a) { return __nv_asinhf(__a); } 1496336815Sdim__DEVICE__ double atan(double __a) { return __nv_atan(__a); } 1497336815Sdim__DEVICE__ double atan2(double __a, double __b) { return __nv_atan2(__a, __b); } 1498336815Sdim__DEVICE__ float atan2f(float __a, float __b) { return __nv_atan2f(__a, __b); } 1499336815Sdim__DEVICE__ float atanf(float __a) { return __nv_atanf(__a); } 1500336815Sdim__DEVICE__ double atanh(double __a) { return __nv_atanh(__a); } 1501336815Sdim__DEVICE__ float atanhf(float __a) { return __nv_atanhf(__a); } 1502336815Sdim__DEVICE__ double cbrt(double __a) { return __nv_cbrt(__a); } 1503336815Sdim__DEVICE__ float cbrtf(float __a) { return __nv_cbrtf(__a); } 1504336815Sdim__DEVICE__ double ceil(double __a) { return __nv_ceil(__a); } 1505336815Sdim__DEVICE__ float ceilf(float __a) { return __nv_ceilf(__a); } 1506353358Sdim#ifndef _OPENMP 1507336815Sdim__DEVICE__ int clock() { return __nvvm_read_ptx_sreg_clock(); } 1508336815Sdim__DEVICE__ long long clock64() { return __nvvm_read_ptx_sreg_clock64(); } 1509353358Sdim#endif 1510336815Sdim__DEVICE__ double copysign(double __a, double __b) { 1511336815Sdim return __nv_copysign(__a, __b); 1512336815Sdim} 1513336815Sdim__DEVICE__ float copysignf(float __a, float __b) { 1514336815Sdim return __nv_copysignf(__a, __b); 1515336815Sdim} 1516336815Sdim__DEVICE__ double cos(double __a) { return __nv_cos(__a); } 1517336815Sdim__DEVICE__ float cosf(float __a) { 1518336815Sdim return __FAST_OR_SLOW(__nv_fast_cosf, __nv_cosf)(__a); 1519336815Sdim} 1520336815Sdim__DEVICE__ double cosh(double __a) { return __nv_cosh(__a); } 1521336815Sdim__DEVICE__ float coshf(float __a) { return __nv_coshf(__a); } 1522336815Sdim__DEVICE__ double cospi(double __a) { return __nv_cospi(__a); } 1523336815Sdim__DEVICE__ float cospif(float __a) { return __nv_cospif(__a); } 1524336815Sdim__DEVICE__ double cyl_bessel_i0(double __a) { return __nv_cyl_bessel_i0(__a); } 1525336815Sdim__DEVICE__ float cyl_bessel_i0f(float __a) { return __nv_cyl_bessel_i0f(__a); } 1526336815Sdim__DEVICE__ double cyl_bessel_i1(double __a) { return __nv_cyl_bessel_i1(__a); } 1527336815Sdim__DEVICE__ float cyl_bessel_i1f(float __a) { return __nv_cyl_bessel_i1f(__a); } 1528336815Sdim__DEVICE__ double erf(double __a) { return __nv_erf(__a); } 1529336815Sdim__DEVICE__ double erfc(double __a) { return __nv_erfc(__a); } 1530336815Sdim__DEVICE__ float erfcf(float __a) { return __nv_erfcf(__a); } 1531336815Sdim__DEVICE__ double erfcinv(double __a) { return __nv_erfcinv(__a); } 1532336815Sdim__DEVICE__ float erfcinvf(float __a) { return __nv_erfcinvf(__a); } 1533336815Sdim__DEVICE__ double erfcx(double __a) { return __nv_erfcx(__a); } 1534336815Sdim__DEVICE__ float erfcxf(float __a) { return __nv_erfcxf(__a); } 1535336815Sdim__DEVICE__ float erff(float __a) { return __nv_erff(__a); } 1536336815Sdim__DEVICE__ double erfinv(double __a) { return __nv_erfinv(__a); } 1537336815Sdim__DEVICE__ float erfinvf(float __a) { return __nv_erfinvf(__a); } 1538336815Sdim__DEVICE__ double exp(double __a) { return __nv_exp(__a); } 1539336815Sdim__DEVICE__ double exp10(double __a) { return __nv_exp10(__a); } 1540336815Sdim__DEVICE__ float exp10f(float __a) { return __nv_exp10f(__a); } 1541336815Sdim__DEVICE__ double exp2(double __a) { return __nv_exp2(__a); } 1542336815Sdim__DEVICE__ float exp2f(float __a) { return __nv_exp2f(__a); } 1543336815Sdim__DEVICE__ float expf(float __a) { return __nv_expf(__a); } 1544336815Sdim__DEVICE__ double expm1(double __a) { return __nv_expm1(__a); } 1545336815Sdim__DEVICE__ float expm1f(float __a) { return __nv_expm1f(__a); } 1546336815Sdim__DEVICE__ float fabsf(float __a) { return __nv_fabsf(__a); } 1547336815Sdim__DEVICE__ double fdim(double __a, double __b) { return __nv_fdim(__a, __b); } 1548336815Sdim__DEVICE__ float fdimf(float __a, float __b) { return __nv_fdimf(__a, __b); } 1549336815Sdim__DEVICE__ double fdivide(double __a, double __b) { return __a / __b; } 1550336815Sdim__DEVICE__ float fdividef(float __a, float __b) { 1551336815Sdim#if __FAST_MATH__ && !__CUDA_PREC_DIV 1552336815Sdim return __nv_fast_fdividef(__a, __b); 1553336815Sdim#else 1554336815Sdim return __a / __b; 1555336815Sdim#endif 1556336815Sdim} 1557336815Sdim__DEVICE__ double floor(double __f) { return __nv_floor(__f); } 1558336815Sdim__DEVICE__ float floorf(float __f) { return __nv_floorf(__f); } 1559336815Sdim__DEVICE__ double fma(double __a, double __b, double __c) { 1560336815Sdim return __nv_fma(__a, __b, __c); 1561336815Sdim} 1562336815Sdim__DEVICE__ float fmaf(float __a, float __b, float __c) { 1563336815Sdim return __nv_fmaf(__a, __b, __c); 1564336815Sdim} 1565336815Sdim__DEVICE__ double fmax(double __a, double __b) { return __nv_fmax(__a, __b); } 1566336815Sdim__DEVICE__ float fmaxf(float __a, float __b) { return __nv_fmaxf(__a, __b); } 1567336815Sdim__DEVICE__ double fmin(double __a, double __b) { return __nv_fmin(__a, __b); } 1568336815Sdim__DEVICE__ float fminf(float __a, float __b) { return __nv_fminf(__a, __b); } 1569336815Sdim__DEVICE__ double fmod(double __a, double __b) { return __nv_fmod(__a, __b); } 1570336815Sdim__DEVICE__ float fmodf(float __a, float __b) { return __nv_fmodf(__a, __b); } 1571336815Sdim__DEVICE__ double frexp(double __a, int *__b) { return __nv_frexp(__a, __b); } 1572336815Sdim__DEVICE__ float frexpf(float __a, int *__b) { return __nv_frexpf(__a, __b); } 1573336815Sdim__DEVICE__ double hypot(double __a, double __b) { return __nv_hypot(__a, __b); } 1574336815Sdim__DEVICE__ float hypotf(float __a, float __b) { return __nv_hypotf(__a, __b); } 1575336815Sdim__DEVICE__ int ilogb(double __a) { return __nv_ilogb(__a); } 1576336815Sdim__DEVICE__ int ilogbf(float __a) { return __nv_ilogbf(__a); } 1577336815Sdim__DEVICE__ double j0(double __a) { return __nv_j0(__a); } 1578336815Sdim__DEVICE__ float j0f(float __a) { return __nv_j0f(__a); } 1579336815Sdim__DEVICE__ double j1(double __a) { return __nv_j1(__a); } 1580336815Sdim__DEVICE__ float j1f(float __a) { return __nv_j1f(__a); } 1581336815Sdim__DEVICE__ double jn(int __n, double __a) { return __nv_jn(__n, __a); } 1582336815Sdim__DEVICE__ float jnf(int __n, float __a) { return __nv_jnf(__n, __a); } 1583353358Sdim#if defined(__LP64__) || defined(_WIN64) 1584353358Sdim__DEVICE__ long labs(long __a) __NOEXCEPT { return __nv_llabs(__a); }; 1585336815Sdim#else 1586353358Sdim__DEVICE__ long labs(long __a) __NOEXCEPT { return __nv_abs(__a); }; 1587336815Sdim#endif 1588336815Sdim__DEVICE__ double ldexp(double __a, int __b) { return __nv_ldexp(__a, __b); } 1589336815Sdim__DEVICE__ float ldexpf(float __a, int __b) { return __nv_ldexpf(__a, __b); } 1590336815Sdim__DEVICE__ double lgamma(double __a) { return __nv_lgamma(__a); } 1591336815Sdim__DEVICE__ float lgammaf(float __a) { return __nv_lgammaf(__a); } 1592353358Sdim__DEVICE__ long long llabs(long long __a) __NOEXCEPT { return __nv_llabs(__a); } 1593336815Sdim__DEVICE__ long long llmax(long long __a, long long __b) { 1594336815Sdim return __nv_llmax(__a, __b); 1595336815Sdim} 1596336815Sdim__DEVICE__ long long llmin(long long __a, long long __b) { 1597336815Sdim return __nv_llmin(__a, __b); 1598336815Sdim} 1599336815Sdim__DEVICE__ long long llrint(double __a) { return __nv_llrint(__a); } 1600336815Sdim__DEVICE__ long long llrintf(float __a) { return __nv_llrintf(__a); } 1601336815Sdim__DEVICE__ long long llround(double __a) { return __nv_llround(__a); } 1602336815Sdim__DEVICE__ long long llroundf(float __a) { return __nv_llroundf(__a); } 1603336815Sdim__DEVICE__ double log(double __a) { return __nv_log(__a); } 1604336815Sdim__DEVICE__ double log10(double __a) { return __nv_log10(__a); } 1605336815Sdim__DEVICE__ float log10f(float __a) { return __nv_log10f(__a); } 1606336815Sdim__DEVICE__ double log1p(double __a) { return __nv_log1p(__a); } 1607336815Sdim__DEVICE__ float log1pf(float __a) { return __nv_log1pf(__a); } 1608336815Sdim__DEVICE__ double log2(double __a) { return __nv_log2(__a); } 1609336815Sdim__DEVICE__ float log2f(float __a) { 1610336815Sdim return __FAST_OR_SLOW(__nv_fast_log2f, __nv_log2f)(__a); 1611336815Sdim} 1612336815Sdim__DEVICE__ double logb(double __a) { return __nv_logb(__a); } 1613336815Sdim__DEVICE__ float logbf(float __a) { return __nv_logbf(__a); } 1614336815Sdim__DEVICE__ float logf(float __a) { 1615336815Sdim return __FAST_OR_SLOW(__nv_fast_logf, __nv_logf)(__a); 1616336815Sdim} 1617353358Sdim#if defined(__LP64__) || defined(_WIN64) 1618336815Sdim__DEVICE__ long lrint(double __a) { return llrint(__a); } 1619336815Sdim__DEVICE__ long lrintf(float __a) { return __float2ll_rn(__a); } 1620336815Sdim__DEVICE__ long lround(double __a) { return llround(__a); } 1621336815Sdim__DEVICE__ long lroundf(float __a) { return llroundf(__a); } 1622336815Sdim#else 1623336815Sdim__DEVICE__ long lrint(double __a) { return (long)rint(__a); } 1624336815Sdim__DEVICE__ long lrintf(float __a) { return __float2int_rn(__a); } 1625336815Sdim__DEVICE__ long lround(double __a) { return round(__a); } 1626336815Sdim__DEVICE__ long lroundf(float __a) { return roundf(__a); } 1627336815Sdim#endif 1628336815Sdim__DEVICE__ int max(int __a, int __b) { return __nv_max(__a, __b); } 1629353358Sdim// These functions shouldn't be declared when including this header 1630353358Sdim// for math function resolution purposes. 1631353358Sdim#ifndef _OPENMP 1632336815Sdim__DEVICE__ void *memcpy(void *__a, const void *__b, size_t __c) { 1633336815Sdim return __builtin_memcpy(__a, __b, __c); 1634336815Sdim} 1635336815Sdim__DEVICE__ void *memset(void *__a, int __b, size_t __c) { 1636336815Sdim return __builtin_memset(__a, __b, __c); 1637336815Sdim} 1638353358Sdim#endif 1639336815Sdim__DEVICE__ int min(int __a, int __b) { return __nv_min(__a, __b); } 1640336815Sdim__DEVICE__ double modf(double __a, double *__b) { return __nv_modf(__a, __b); } 1641336815Sdim__DEVICE__ float modff(float __a, float *__b) { return __nv_modff(__a, __b); } 1642336815Sdim__DEVICE__ double nearbyint(double __a) { return __nv_nearbyint(__a); } 1643336815Sdim__DEVICE__ float nearbyintf(float __a) { return __nv_nearbyintf(__a); } 1644336815Sdim__DEVICE__ double nextafter(double __a, double __b) { 1645336815Sdim return __nv_nextafter(__a, __b); 1646336815Sdim} 1647336815Sdim__DEVICE__ float nextafterf(float __a, float __b) { 1648336815Sdim return __nv_nextafterf(__a, __b); 1649336815Sdim} 1650336815Sdim__DEVICE__ double norm(int __dim, const double *__t) { 1651336815Sdim return __nv_norm(__dim, __t); 1652336815Sdim} 1653336815Sdim__DEVICE__ double norm3d(double __a, double __b, double __c) { 1654336815Sdim return __nv_norm3d(__a, __b, __c); 1655336815Sdim} 1656336815Sdim__DEVICE__ float norm3df(float __a, float __b, float __c) { 1657336815Sdim return __nv_norm3df(__a, __b, __c); 1658336815Sdim} 1659336815Sdim__DEVICE__ double norm4d(double __a, double __b, double __c, double __d) { 1660336815Sdim return __nv_norm4d(__a, __b, __c, __d); 1661336815Sdim} 1662336815Sdim__DEVICE__ float norm4df(float __a, float __b, float __c, float __d) { 1663336815Sdim return __nv_norm4df(__a, __b, __c, __d); 1664336815Sdim} 1665336815Sdim__DEVICE__ double normcdf(double __a) { return __nv_normcdf(__a); } 1666336815Sdim__DEVICE__ float normcdff(float __a) { return __nv_normcdff(__a); } 1667336815Sdim__DEVICE__ double normcdfinv(double __a) { return __nv_normcdfinv(__a); } 1668336815Sdim__DEVICE__ float normcdfinvf(float __a) { return __nv_normcdfinvf(__a); } 1669336815Sdim__DEVICE__ float normf(int __dim, const float *__t) { 1670336815Sdim return __nv_normf(__dim, __t); 1671336815Sdim} 1672336815Sdim__DEVICE__ double pow(double __a, double __b) { return __nv_pow(__a, __b); } 1673336815Sdim__DEVICE__ float powf(float __a, float __b) { return __nv_powf(__a, __b); } 1674336815Sdim__DEVICE__ double powi(double __a, int __b) { return __nv_powi(__a, __b); } 1675336815Sdim__DEVICE__ float powif(float __a, int __b) { return __nv_powif(__a, __b); } 1676336815Sdim__DEVICE__ double rcbrt(double __a) { return __nv_rcbrt(__a); } 1677336815Sdim__DEVICE__ float rcbrtf(float __a) { return __nv_rcbrtf(__a); } 1678336815Sdim__DEVICE__ double remainder(double __a, double __b) { 1679336815Sdim return __nv_remainder(__a, __b); 1680336815Sdim} 1681336815Sdim__DEVICE__ float remainderf(float __a, float __b) { 1682336815Sdim return __nv_remainderf(__a, __b); 1683336815Sdim} 1684336815Sdim__DEVICE__ double remquo(double __a, double __b, int *__c) { 1685336815Sdim return __nv_remquo(__a, __b, __c); 1686336815Sdim} 1687336815Sdim__DEVICE__ float remquof(float __a, float __b, int *__c) { 1688336815Sdim return __nv_remquof(__a, __b, __c); 1689336815Sdim} 1690336815Sdim__DEVICE__ double rhypot(double __a, double __b) { 1691336815Sdim return __nv_rhypot(__a, __b); 1692336815Sdim} 1693336815Sdim__DEVICE__ float rhypotf(float __a, float __b) { 1694336815Sdim return __nv_rhypotf(__a, __b); 1695336815Sdim} 1696336815Sdim__DEVICE__ double rint(double __a) { return __nv_rint(__a); } 1697336815Sdim__DEVICE__ float rintf(float __a) { return __nv_rintf(__a); } 1698336815Sdim__DEVICE__ double rnorm(int __a, const double *__b) { 1699336815Sdim return __nv_rnorm(__a, __b); 1700336815Sdim} 1701336815Sdim__DEVICE__ double rnorm3d(double __a, double __b, double __c) { 1702336815Sdim return __nv_rnorm3d(__a, __b, __c); 1703336815Sdim} 1704336815Sdim__DEVICE__ float rnorm3df(float __a, float __b, float __c) { 1705336815Sdim return __nv_rnorm3df(__a, __b, __c); 1706336815Sdim} 1707336815Sdim__DEVICE__ double rnorm4d(double __a, double __b, double __c, double __d) { 1708336815Sdim return __nv_rnorm4d(__a, __b, __c, __d); 1709336815Sdim} 1710336815Sdim__DEVICE__ float rnorm4df(float __a, float __b, float __c, float __d) { 1711336815Sdim return __nv_rnorm4df(__a, __b, __c, __d); 1712336815Sdim} 1713336815Sdim__DEVICE__ float rnormf(int __dim, const float *__t) { 1714336815Sdim return __nv_rnormf(__dim, __t); 1715336815Sdim} 1716336815Sdim__DEVICE__ double round(double __a) { return __nv_round(__a); } 1717336815Sdim__DEVICE__ float roundf(float __a) { return __nv_roundf(__a); } 1718336815Sdim__DEVICE__ double rsqrt(double __a) { return __nv_rsqrt(__a); } 1719336815Sdim__DEVICE__ float rsqrtf(float __a) { return __nv_rsqrtf(__a); } 1720336815Sdim__DEVICE__ double scalbn(double __a, int __b) { return __nv_scalbn(__a, __b); } 1721336815Sdim__DEVICE__ float scalbnf(float __a, int __b) { return __nv_scalbnf(__a, __b); } 1722353358Sdim// TODO: remove once variant is supported 1723353358Sdim#ifndef _OPENMP 1724336815Sdim__DEVICE__ double scalbln(double __a, long __b) { 1725336815Sdim if (__b > INT_MAX) 1726336815Sdim return __a > 0 ? HUGE_VAL : -HUGE_VAL; 1727336815Sdim if (__b < INT_MIN) 1728336815Sdim return __a > 0 ? 0.0 : -0.0; 1729336815Sdim return scalbn(__a, (int)__b); 1730336815Sdim} 1731336815Sdim__DEVICE__ float scalblnf(float __a, long __b) { 1732336815Sdim if (__b > INT_MAX) 1733336815Sdim return __a > 0 ? HUGE_VALF : -HUGE_VALF; 1734336815Sdim if (__b < INT_MIN) 1735336815Sdim return __a > 0 ? 0.f : -0.f; 1736336815Sdim return scalbnf(__a, (int)__b); 1737336815Sdim} 1738353358Sdim#endif 1739336815Sdim__DEVICE__ double sin(double __a) { return __nv_sin(__a); } 1740353358Sdim__DEVICE__ void sincos(double __a, double *__s, double *__c) { 1741353358Sdim return __nv_sincos(__a, __s, __c); 1742336815Sdim} 1743353358Sdim__DEVICE__ void sincosf(float __a, float *__s, float *__c) { 1744353358Sdim return __FAST_OR_SLOW(__nv_fast_sincosf, __nv_sincosf)(__a, __s, __c); 1745336815Sdim} 1746353358Sdim__DEVICE__ void sincospi(double __a, double *__s, double *__c) { 1747353358Sdim return __nv_sincospi(__a, __s, __c); 1748336815Sdim} 1749353358Sdim__DEVICE__ void sincospif(float __a, float *__s, float *__c) { 1750353358Sdim return __nv_sincospif(__a, __s, __c); 1751336815Sdim} 1752336815Sdim__DEVICE__ float sinf(float __a) { 1753336815Sdim return __FAST_OR_SLOW(__nv_fast_sinf, __nv_sinf)(__a); 1754336815Sdim} 1755336815Sdim__DEVICE__ double sinh(double __a) { return __nv_sinh(__a); } 1756336815Sdim__DEVICE__ float sinhf(float __a) { return __nv_sinhf(__a); } 1757336815Sdim__DEVICE__ double sinpi(double __a) { return __nv_sinpi(__a); } 1758336815Sdim__DEVICE__ float sinpif(float __a) { return __nv_sinpif(__a); } 1759336815Sdim__DEVICE__ double sqrt(double __a) { return __nv_sqrt(__a); } 1760336815Sdim__DEVICE__ float sqrtf(float __a) { return __nv_sqrtf(__a); } 1761336815Sdim__DEVICE__ double tan(double __a) { return __nv_tan(__a); } 1762336815Sdim__DEVICE__ float tanf(float __a) { return __nv_tanf(__a); } 1763336815Sdim__DEVICE__ double tanh(double __a) { return __nv_tanh(__a); } 1764336815Sdim__DEVICE__ float tanhf(float __a) { return __nv_tanhf(__a); } 1765336815Sdim__DEVICE__ double tgamma(double __a) { return __nv_tgamma(__a); } 1766336815Sdim__DEVICE__ float tgammaf(float __a) { return __nv_tgammaf(__a); } 1767336815Sdim__DEVICE__ double trunc(double __a) { return __nv_trunc(__a); } 1768336815Sdim__DEVICE__ float truncf(float __a) { return __nv_truncf(__a); } 1769336815Sdim__DEVICE__ unsigned long long ullmax(unsigned long long __a, 1770336815Sdim unsigned long long __b) { 1771336815Sdim return __nv_ullmax(__a, __b); 1772336815Sdim} 1773336815Sdim__DEVICE__ unsigned long long ullmin(unsigned long long __a, 1774336815Sdim unsigned long long __b) { 1775336815Sdim return __nv_ullmin(__a, __b); 1776336815Sdim} 1777336815Sdim__DEVICE__ unsigned int umax(unsigned int __a, unsigned int __b) { 1778336815Sdim return __nv_umax(__a, __b); 1779336815Sdim} 1780336815Sdim__DEVICE__ unsigned int umin(unsigned int __a, unsigned int __b) { 1781336815Sdim return __nv_umin(__a, __b); 1782336815Sdim} 1783336815Sdim__DEVICE__ double y0(double __a) { return __nv_y0(__a); } 1784336815Sdim__DEVICE__ float y0f(float __a) { return __nv_y0f(__a); } 1785336815Sdim__DEVICE__ double y1(double __a) { return __nv_y1(__a); } 1786336815Sdim__DEVICE__ float y1f(float __a) { return __nv_y1f(__a); } 1787336815Sdim__DEVICE__ double yn(int __a, double __b) { return __nv_yn(__a, __b); } 1788336815Sdim__DEVICE__ float ynf(int __a, float __b) { return __nv_ynf(__a, __b); } 1789336815Sdim 1790353358Sdim#undef __NOEXCEPT 1791336815Sdim#pragma pop_macro("__DEVICE__") 1792336815Sdim#pragma pop_macro("__FAST_OR_SLOW") 1793336815Sdim#endif // __CLANG_CUDA_DEVICE_FUNCTIONS_H__ 1794