1274958Sdim/*===---- arm_acle.h - ARM Non-Neon intrinsics -----------------------------=== 2274958Sdim * 3353358Sdim * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4353358Sdim * See https://llvm.org/LICENSE.txt for license information. 5353358Sdim * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6274958Sdim * 7274958Sdim *===-----------------------------------------------------------------------=== 8274958Sdim */ 9274958Sdim 10274958Sdim#ifndef __ARM_ACLE_H 11274958Sdim#define __ARM_ACLE_H 12274958Sdim 13274958Sdim#ifndef __ARM_ACLE 14274958Sdim#error "ACLE intrinsics support not enabled." 15274958Sdim#endif 16274958Sdim 17274958Sdim#include <stdint.h> 18274958Sdim 19274958Sdim#if defined(__cplusplus) 20274958Sdimextern "C" { 21274958Sdim#endif 22274958Sdim 23274958Sdim/* 8 SYNCHRONIZATION, BARRIER AND HINT INTRINSICS */ 24274958Sdim/* 8.3 Memory barriers */ 25274958Sdim#if !defined(_MSC_VER) 26274958Sdim#define __dmb(i) __builtin_arm_dmb(i) 27274958Sdim#define __dsb(i) __builtin_arm_dsb(i) 28274958Sdim#define __isb(i) __builtin_arm_isb(i) 29274958Sdim#endif 30274958Sdim 31274958Sdim/* 8.4 Hints */ 32274958Sdim 33274958Sdim#if !defined(_MSC_VER) 34288943Sdimstatic __inline__ void __attribute__((__always_inline__, __nodebug__)) __wfi(void) { 35274958Sdim __builtin_arm_wfi(); 36274958Sdim} 37274958Sdim 38288943Sdimstatic __inline__ void __attribute__((__always_inline__, __nodebug__)) __wfe(void) { 39274958Sdim __builtin_arm_wfe(); 40274958Sdim} 41274958Sdim 42288943Sdimstatic __inline__ void __attribute__((__always_inline__, __nodebug__)) __sev(void) { 43274958Sdim __builtin_arm_sev(); 44274958Sdim} 45274958Sdim 46288943Sdimstatic __inline__ void __attribute__((__always_inline__, __nodebug__)) __sevl(void) { 47274958Sdim __builtin_arm_sevl(); 48274958Sdim} 49274958Sdim 50288943Sdimstatic __inline__ void __attribute__((__always_inline__, __nodebug__)) __yield(void) { 51274958Sdim __builtin_arm_yield(); 52274958Sdim} 53274958Sdim#endif 54274958Sdim 55280031Sdim#if __ARM_32BIT_STATE 56280031Sdim#define __dbg(t) __builtin_arm_dbg(t) 57280031Sdim#endif 58280031Sdim 59280031Sdim/* 8.5 Swap */ 60288943Sdimstatic __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) 61309124Sdim__swp(uint32_t __x, volatile uint32_t *__p) { 62280031Sdim uint32_t v; 63309124Sdim do 64309124Sdim v = __builtin_arm_ldrex(__p); 65309124Sdim while (__builtin_arm_strex(__x, __p)); 66280031Sdim return v; 67280031Sdim} 68280031Sdim 69280031Sdim/* 8.6 Memory prefetch intrinsics */ 70280031Sdim/* 8.6.1 Data prefetch */ 71280031Sdim#define __pld(addr) __pldx(0, 0, 0, addr) 72280031Sdim 73280031Sdim#if __ARM_32BIT_STATE 74280031Sdim#define __pldx(access_kind, cache_level, retention_policy, addr) \ 75280031Sdim __builtin_arm_prefetch(addr, access_kind, 1) 76280031Sdim#else 77280031Sdim#define __pldx(access_kind, cache_level, retention_policy, addr) \ 78280031Sdim __builtin_arm_prefetch(addr, access_kind, cache_level, retention_policy, 1) 79280031Sdim#endif 80280031Sdim 81280031Sdim/* 8.6.2 Instruction prefetch */ 82280031Sdim#define __pli(addr) __plix(0, 0, addr) 83280031Sdim 84280031Sdim#if __ARM_32BIT_STATE 85280031Sdim#define __plix(cache_level, retention_policy, addr) \ 86280031Sdim __builtin_arm_prefetch(addr, 0, 0) 87280031Sdim#else 88280031Sdim#define __plix(cache_level, retention_policy, addr) \ 89280031Sdim __builtin_arm_prefetch(addr, 0, cache_level, retention_policy, 0) 90280031Sdim#endif 91280031Sdim 92274958Sdim/* 8.7 NOP */ 93360784Sdim#if !defined(_MSC_VER) || !defined(__aarch64__) 94288943Sdimstatic __inline__ void __attribute__((__always_inline__, __nodebug__)) __nop(void) { 95274958Sdim __builtin_arm_nop(); 96274958Sdim} 97360784Sdim#endif 98274958Sdim 99274958Sdim/* 9 DATA-PROCESSING INTRINSICS */ 100274958Sdim/* 9.2 Miscellaneous data-processing intrinsics */ 101280031Sdim/* ROR */ 102288943Sdimstatic __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) 103309124Sdim__ror(uint32_t __x, uint32_t __y) { 104309124Sdim __y %= 32; 105309124Sdim if (__y == 0) 106309124Sdim return __x; 107309124Sdim return (__x >> __y) | (__x << (32 - __y)); 108280031Sdim} 109280031Sdim 110288943Sdimstatic __inline__ uint64_t __attribute__((__always_inline__, __nodebug__)) 111309124Sdim__rorll(uint64_t __x, uint32_t __y) { 112309124Sdim __y %= 64; 113309124Sdim if (__y == 0) 114309124Sdim return __x; 115309124Sdim return (__x >> __y) | (__x << (64 - __y)); 116280031Sdim} 117280031Sdim 118288943Sdimstatic __inline__ unsigned long __attribute__((__always_inline__, __nodebug__)) 119309124Sdim__rorl(unsigned long __x, uint32_t __y) { 120280031Sdim#if __SIZEOF_LONG__ == 4 121309124Sdim return __ror(__x, __y); 122280031Sdim#else 123309124Sdim return __rorll(__x, __y); 124280031Sdim#endif 125280031Sdim} 126280031Sdim 127280031Sdim 128280031Sdim/* CLZ */ 129288943Sdimstatic __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) 130309124Sdim__clz(uint32_t __t) { 131309124Sdim return __builtin_clz(__t); 132274958Sdim} 133274958Sdim 134288943Sdimstatic __inline__ unsigned long __attribute__((__always_inline__, __nodebug__)) 135309124Sdim__clzl(unsigned long __t) { 136309124Sdim return __builtin_clzl(__t); 137274958Sdim} 138274958Sdim 139288943Sdimstatic __inline__ uint64_t __attribute__((__always_inline__, __nodebug__)) 140309124Sdim__clzll(uint64_t __t) { 141309124Sdim return __builtin_clzll(__t); 142274958Sdim} 143274958Sdim 144360784Sdim/* CLS */ 145360784Sdimstatic __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) 146360784Sdim__cls(uint32_t __t) { 147360784Sdim return __builtin_arm_cls(__t); 148360784Sdim} 149360784Sdim 150360784Sdimstatic __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) 151360784Sdim__clsl(unsigned long __t) { 152360784Sdim#if __SIZEOF_LONG__ == 4 153360784Sdim return __builtin_arm_cls(__t); 154360784Sdim#else 155360784Sdim return __builtin_arm_cls64(__t); 156360784Sdim#endif 157360784Sdim} 158360784Sdim 159360784Sdimstatic __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) 160360784Sdim__clsll(uint64_t __t) { 161360784Sdim return __builtin_arm_cls64(__t); 162360784Sdim} 163360784Sdim 164280031Sdim/* REV */ 165288943Sdimstatic __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) 166309124Sdim__rev(uint32_t __t) { 167309124Sdim return __builtin_bswap32(__t); 168274958Sdim} 169274958Sdim 170288943Sdimstatic __inline__ unsigned long __attribute__((__always_inline__, __nodebug__)) 171309124Sdim__revl(unsigned long __t) { 172274958Sdim#if __SIZEOF_LONG__ == 4 173309124Sdim return __builtin_bswap32(__t); 174274958Sdim#else 175309124Sdim return __builtin_bswap64(__t); 176274958Sdim#endif 177274958Sdim} 178274958Sdim 179288943Sdimstatic __inline__ uint64_t __attribute__((__always_inline__, __nodebug__)) 180309124Sdim__revll(uint64_t __t) { 181309124Sdim return __builtin_bswap64(__t); 182274958Sdim} 183274958Sdim 184280031Sdim/* REV16 */ 185288943Sdimstatic __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) 186309124Sdim__rev16(uint32_t __t) { 187309124Sdim return __ror(__rev(__t), 16); 188280031Sdim} 189280031Sdim 190296417Sdimstatic __inline__ uint64_t __attribute__((__always_inline__, __nodebug__)) 191309124Sdim__rev16ll(uint64_t __t) { 192309124Sdim return (((uint64_t)__rev16(__t >> 32)) << 32) | __rev16(__t); 193296417Sdim} 194296417Sdim 195288943Sdimstatic __inline__ unsigned long __attribute__((__always_inline__, __nodebug__)) 196309124Sdim__rev16l(unsigned long __t) { 197296417Sdim#if __SIZEOF_LONG__ == 4 198309124Sdim return __rev16(__t); 199296417Sdim#else 200309124Sdim return __rev16ll(__t); 201296417Sdim#endif 202280031Sdim} 203280031Sdim 204280031Sdim/* REVSH */ 205288943Sdimstatic __inline__ int16_t __attribute__((__always_inline__, __nodebug__)) 206309124Sdim__revsh(int16_t __t) { 207309124Sdim return __builtin_bswap16(__t); 208280031Sdim} 209280031Sdim 210280031Sdim/* RBIT */ 211288943Sdimstatic __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) 212309124Sdim__rbit(uint32_t __t) { 213309124Sdim return __builtin_arm_rbit(__t); 214280031Sdim} 215280031Sdim 216288943Sdimstatic __inline__ uint64_t __attribute__((__always_inline__, __nodebug__)) 217309124Sdim__rbitll(uint64_t __t) { 218280031Sdim#if __ARM_32BIT_STATE 219309124Sdim return (((uint64_t)__builtin_arm_rbit(__t)) << 32) | 220309124Sdim __builtin_arm_rbit(__t >> 32); 221280031Sdim#else 222309124Sdim return __builtin_arm_rbit64(__t); 223280031Sdim#endif 224280031Sdim} 225280031Sdim 226288943Sdimstatic __inline__ unsigned long __attribute__((__always_inline__, __nodebug__)) 227309124Sdim__rbitl(unsigned long __t) { 228280031Sdim#if __SIZEOF_LONG__ == 4 229309124Sdim return __rbit(__t); 230280031Sdim#else 231309124Sdim return __rbitll(__t); 232280031Sdim#endif 233280031Sdim} 234280031Sdim 235274958Sdim/* 236321369Sdim * 9.3 16-bit multiplications 237321369Sdim */ 238321369Sdim#if __ARM_FEATURE_DSP 239321369Sdimstatic __inline__ int32_t __attribute__((__always_inline__,__nodebug__)) 240321369Sdim__smulbb(int32_t __a, int32_t __b) { 241321369Sdim return __builtin_arm_smulbb(__a, __b); 242321369Sdim} 243321369Sdimstatic __inline__ int32_t __attribute__((__always_inline__,__nodebug__)) 244321369Sdim__smulbt(int32_t __a, int32_t __b) { 245321369Sdim return __builtin_arm_smulbt(__a, __b); 246321369Sdim} 247321369Sdimstatic __inline__ int32_t __attribute__((__always_inline__,__nodebug__)) 248321369Sdim__smultb(int32_t __a, int32_t __b) { 249321369Sdim return __builtin_arm_smultb(__a, __b); 250321369Sdim} 251321369Sdimstatic __inline__ int32_t __attribute__((__always_inline__,__nodebug__)) 252321369Sdim__smultt(int32_t __a, int32_t __b) { 253321369Sdim return __builtin_arm_smultt(__a, __b); 254321369Sdim} 255321369Sdimstatic __inline__ int32_t __attribute__((__always_inline__,__nodebug__)) 256321369Sdim__smulwb(int32_t __a, int32_t __b) { 257321369Sdim return __builtin_arm_smulwb(__a, __b); 258321369Sdim} 259321369Sdimstatic __inline__ int32_t __attribute__((__always_inline__,__nodebug__)) 260321369Sdim__smulwt(int32_t __a, int32_t __b) { 261321369Sdim return __builtin_arm_smulwt(__a, __b); 262321369Sdim} 263321369Sdim#endif 264321369Sdim 265321369Sdim/* 266274958Sdim * 9.4 Saturating intrinsics 267274958Sdim * 268274958Sdim * FIXME: Change guard to their corrosponding __ARM_FEATURE flag when Q flag 269274958Sdim * intrinsics are implemented and the flag is enabled. 270274958Sdim */ 271274958Sdim/* 9.4.1 Width-specified saturation intrinsics */ 272321369Sdim#if __ARM_FEATURE_SAT 273274958Sdim#define __ssat(x, y) __builtin_arm_ssat(x, y) 274274958Sdim#define __usat(x, y) __builtin_arm_usat(x, y) 275274958Sdim#endif 276274958Sdim 277274958Sdim/* 9.4.2 Saturating addition and subtraction intrinsics */ 278321369Sdim#if __ARM_FEATURE_DSP 279288943Sdimstatic __inline__ int32_t __attribute__((__always_inline__, __nodebug__)) 280309124Sdim__qadd(int32_t __t, int32_t __v) { 281309124Sdim return __builtin_arm_qadd(__t, __v); 282274958Sdim} 283274958Sdim 284288943Sdimstatic __inline__ int32_t __attribute__((__always_inline__, __nodebug__)) 285309124Sdim__qsub(int32_t __t, int32_t __v) { 286309124Sdim return __builtin_arm_qsub(__t, __v); 287274958Sdim} 288274958Sdim 289288943Sdimstatic __inline__ int32_t __attribute__((__always_inline__, __nodebug__)) 290309124Sdim__qdbl(int32_t __t) { 291309124Sdim return __builtin_arm_qadd(__t, __t); 292274958Sdim} 293274958Sdim#endif 294274958Sdim 295321369Sdim/* 9.4.3 Accumultating multiplications */ 296321369Sdim#if __ARM_FEATURE_DSP 297321369Sdimstatic __inline__ int32_t __attribute__((__always_inline__, __nodebug__)) 298321369Sdim__smlabb(int32_t __a, int32_t __b, int32_t __c) { 299321369Sdim return __builtin_arm_smlabb(__a, __b, __c); 300321369Sdim} 301321369Sdimstatic __inline__ int32_t __attribute__((__always_inline__, __nodebug__)) 302321369Sdim__smlabt(int32_t __a, int32_t __b, int32_t __c) { 303321369Sdim return __builtin_arm_smlabt(__a, __b, __c); 304321369Sdim} 305321369Sdimstatic __inline__ int32_t __attribute__((__always_inline__, __nodebug__)) 306321369Sdim__smlatb(int32_t __a, int32_t __b, int32_t __c) { 307321369Sdim return __builtin_arm_smlatb(__a, __b, __c); 308321369Sdim} 309321369Sdimstatic __inline__ int32_t __attribute__((__always_inline__, __nodebug__)) 310321369Sdim__smlatt(int32_t __a, int32_t __b, int32_t __c) { 311321369Sdim return __builtin_arm_smlatt(__a, __b, __c); 312321369Sdim} 313321369Sdimstatic __inline__ int32_t __attribute__((__always_inline__, __nodebug__)) 314321369Sdim__smlawb(int32_t __a, int32_t __b, int32_t __c) { 315321369Sdim return __builtin_arm_smlawb(__a, __b, __c); 316321369Sdim} 317321369Sdimstatic __inline__ int32_t __attribute__((__always_inline__, __nodebug__)) 318321369Sdim__smlawt(int32_t __a, int32_t __b, int32_t __c) { 319321369Sdim return __builtin_arm_smlawt(__a, __b, __c); 320321369Sdim} 321321369Sdim#endif 322321369Sdim 323321369Sdim 324321369Sdim/* 9.5.4 Parallel 16-bit saturation */ 325321369Sdim#if __ARM_FEATURE_SIMD32 326321369Sdim#define __ssat16(x, y) __builtin_arm_ssat16(x, y) 327321369Sdim#define __usat16(x, y) __builtin_arm_usat16(x, y) 328321369Sdim#endif 329321369Sdim 330321369Sdim/* 9.5.5 Packing and unpacking */ 331321369Sdim#if __ARM_FEATURE_SIMD32 332321369Sdimtypedef int32_t int8x4_t; 333321369Sdimtypedef int32_t int16x2_t; 334321369Sdimtypedef uint32_t uint8x4_t; 335321369Sdimtypedef uint32_t uint16x2_t; 336321369Sdim 337321369Sdimstatic __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__)) 338321369Sdim__sxtab16(int16x2_t __a, int8x4_t __b) { 339321369Sdim return __builtin_arm_sxtab16(__a, __b); 340321369Sdim} 341321369Sdimstatic __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__)) 342321369Sdim__sxtb16(int8x4_t __a) { 343321369Sdim return __builtin_arm_sxtb16(__a); 344321369Sdim} 345321369Sdimstatic __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__)) 346321369Sdim__uxtab16(int16x2_t __a, int8x4_t __b) { 347321369Sdim return __builtin_arm_uxtab16(__a, __b); 348321369Sdim} 349321369Sdimstatic __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__)) 350321369Sdim__uxtb16(int8x4_t __a) { 351321369Sdim return __builtin_arm_uxtb16(__a); 352321369Sdim} 353321369Sdim#endif 354321369Sdim 355321369Sdim/* 9.5.6 Parallel selection */ 356321369Sdim#if __ARM_FEATURE_SIMD32 357321369Sdimstatic __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__)) 358321369Sdim__sel(uint8x4_t __a, uint8x4_t __b) { 359321369Sdim return __builtin_arm_sel(__a, __b); 360321369Sdim} 361321369Sdim#endif 362321369Sdim 363321369Sdim/* 9.5.7 Parallel 8-bit addition and subtraction */ 364321369Sdim#if __ARM_FEATURE_SIMD32 365321369Sdimstatic __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__)) 366321369Sdim__qadd8(int8x4_t __a, int8x4_t __b) { 367321369Sdim return __builtin_arm_qadd8(__a, __b); 368321369Sdim} 369321369Sdimstatic __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__)) 370321369Sdim__qsub8(int8x4_t __a, int8x4_t __b) { 371321369Sdim return __builtin_arm_qsub8(__a, __b); 372321369Sdim} 373321369Sdimstatic __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__)) 374321369Sdim__sadd8(int8x4_t __a, int8x4_t __b) { 375321369Sdim return __builtin_arm_sadd8(__a, __b); 376321369Sdim} 377321369Sdimstatic __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__)) 378321369Sdim__shadd8(int8x4_t __a, int8x4_t __b) { 379321369Sdim return __builtin_arm_shadd8(__a, __b); 380321369Sdim} 381321369Sdimstatic __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__)) 382321369Sdim__shsub8(int8x4_t __a, int8x4_t __b) { 383321369Sdim return __builtin_arm_shsub8(__a, __b); 384321369Sdim} 385321369Sdimstatic __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__)) 386321369Sdim__ssub8(int8x4_t __a, int8x4_t __b) { 387321369Sdim return __builtin_arm_ssub8(__a, __b); 388321369Sdim} 389321369Sdimstatic __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__)) 390321369Sdim__uadd8(uint8x4_t __a, uint8x4_t __b) { 391321369Sdim return __builtin_arm_uadd8(__a, __b); 392321369Sdim} 393321369Sdimstatic __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__)) 394321369Sdim__uhadd8(uint8x4_t __a, uint8x4_t __b) { 395321369Sdim return __builtin_arm_uhadd8(__a, __b); 396321369Sdim} 397321369Sdimstatic __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__)) 398321369Sdim__uhsub8(uint8x4_t __a, uint8x4_t __b) { 399321369Sdim return __builtin_arm_uhsub8(__a, __b); 400321369Sdim} 401321369Sdimstatic __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__)) 402321369Sdim__uqadd8(uint8x4_t __a, uint8x4_t __b) { 403321369Sdim return __builtin_arm_uqadd8(__a, __b); 404321369Sdim} 405321369Sdimstatic __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__)) 406321369Sdim__uqsub8(uint8x4_t __a, uint8x4_t __b) { 407321369Sdim return __builtin_arm_uqsub8(__a, __b); 408321369Sdim} 409321369Sdimstatic __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__)) 410321369Sdim__usub8(uint8x4_t __a, uint8x4_t __b) { 411321369Sdim return __builtin_arm_usub8(__a, __b); 412321369Sdim} 413321369Sdim#endif 414321369Sdim 415321369Sdim/* 9.5.8 Sum of 8-bit absolute differences */ 416321369Sdim#if __ARM_FEATURE_SIMD32 417321369Sdimstatic __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) 418321369Sdim__usad8(uint8x4_t __a, uint8x4_t __b) { 419321369Sdim return __builtin_arm_usad8(__a, __b); 420321369Sdim} 421321369Sdimstatic __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) 422321369Sdim__usada8(uint8x4_t __a, uint8x4_t __b, uint32_t __c) { 423321369Sdim return __builtin_arm_usada8(__a, __b, __c); 424321369Sdim} 425321369Sdim#endif 426321369Sdim 427321369Sdim/* 9.5.9 Parallel 16-bit addition and subtraction */ 428321369Sdim#if __ARM_FEATURE_SIMD32 429321369Sdimstatic __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__)) 430321369Sdim__qadd16(int16x2_t __a, int16x2_t __b) { 431321369Sdim return __builtin_arm_qadd16(__a, __b); 432321369Sdim} 433321369Sdimstatic __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__)) 434321369Sdim__qasx(int16x2_t __a, int16x2_t __b) { 435321369Sdim return __builtin_arm_qasx(__a, __b); 436321369Sdim} 437321369Sdimstatic __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__)) 438321369Sdim__qsax(int16x2_t __a, int16x2_t __b) { 439321369Sdim return __builtin_arm_qsax(__a, __b); 440321369Sdim} 441321369Sdimstatic __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__)) 442321369Sdim__qsub16(int16x2_t __a, int16x2_t __b) { 443321369Sdim return __builtin_arm_qsub16(__a, __b); 444321369Sdim} 445321369Sdimstatic __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__)) 446321369Sdim__sadd16(int16x2_t __a, int16x2_t __b) { 447321369Sdim return __builtin_arm_sadd16(__a, __b); 448321369Sdim} 449321369Sdimstatic __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__)) 450321369Sdim__sasx(int16x2_t __a, int16x2_t __b) { 451321369Sdim return __builtin_arm_sasx(__a, __b); 452321369Sdim} 453321369Sdimstatic __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__)) 454321369Sdim__shadd16(int16x2_t __a, int16x2_t __b) { 455321369Sdim return __builtin_arm_shadd16(__a, __b); 456321369Sdim} 457321369Sdimstatic __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__)) 458321369Sdim__shasx(int16x2_t __a, int16x2_t __b) { 459321369Sdim return __builtin_arm_shasx(__a, __b); 460321369Sdim} 461321369Sdimstatic __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__)) 462321369Sdim__shsax(int16x2_t __a, int16x2_t __b) { 463321369Sdim return __builtin_arm_shsax(__a, __b); 464321369Sdim} 465321369Sdimstatic __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__)) 466321369Sdim__shsub16(int16x2_t __a, int16x2_t __b) { 467321369Sdim return __builtin_arm_shsub16(__a, __b); 468321369Sdim} 469321369Sdimstatic __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__)) 470321369Sdim__ssax(int16x2_t __a, int16x2_t __b) { 471321369Sdim return __builtin_arm_ssax(__a, __b); 472321369Sdim} 473321369Sdimstatic __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__)) 474321369Sdim__ssub16(int16x2_t __a, int16x2_t __b) { 475321369Sdim return __builtin_arm_ssub16(__a, __b); 476321369Sdim} 477321369Sdimstatic __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__)) 478321369Sdim__uadd16(uint16x2_t __a, uint16x2_t __b) { 479321369Sdim return __builtin_arm_uadd16(__a, __b); 480321369Sdim} 481321369Sdimstatic __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__)) 482321369Sdim__uasx(uint16x2_t __a, uint16x2_t __b) { 483321369Sdim return __builtin_arm_uasx(__a, __b); 484321369Sdim} 485321369Sdimstatic __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__)) 486321369Sdim__uhadd16(uint16x2_t __a, uint16x2_t __b) { 487321369Sdim return __builtin_arm_uhadd16(__a, __b); 488321369Sdim} 489321369Sdimstatic __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__)) 490321369Sdim__uhasx(uint16x2_t __a, uint16x2_t __b) { 491321369Sdim return __builtin_arm_uhasx(__a, __b); 492321369Sdim} 493321369Sdimstatic __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__)) 494321369Sdim__uhsax(uint16x2_t __a, uint16x2_t __b) { 495321369Sdim return __builtin_arm_uhsax(__a, __b); 496321369Sdim} 497321369Sdimstatic __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__)) 498321369Sdim__uhsub16(uint16x2_t __a, uint16x2_t __b) { 499321369Sdim return __builtin_arm_uhsub16(__a, __b); 500321369Sdim} 501321369Sdimstatic __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__)) 502321369Sdim__uqadd16(uint16x2_t __a, uint16x2_t __b) { 503321369Sdim return __builtin_arm_uqadd16(__a, __b); 504321369Sdim} 505321369Sdimstatic __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__)) 506321369Sdim__uqasx(uint16x2_t __a, uint16x2_t __b) { 507321369Sdim return __builtin_arm_uqasx(__a, __b); 508321369Sdim} 509321369Sdimstatic __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__)) 510321369Sdim__uqsax(uint16x2_t __a, uint16x2_t __b) { 511321369Sdim return __builtin_arm_uqsax(__a, __b); 512321369Sdim} 513321369Sdimstatic __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__)) 514321369Sdim__uqsub16(uint16x2_t __a, uint16x2_t __b) { 515321369Sdim return __builtin_arm_uqsub16(__a, __b); 516321369Sdim} 517321369Sdimstatic __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__)) 518321369Sdim__usax(uint16x2_t __a, uint16x2_t __b) { 519321369Sdim return __builtin_arm_usax(__a, __b); 520321369Sdim} 521321369Sdimstatic __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__)) 522321369Sdim__usub16(uint16x2_t __a, uint16x2_t __b) { 523321369Sdim return __builtin_arm_usub16(__a, __b); 524321369Sdim} 525321369Sdim#endif 526321369Sdim 527321369Sdim/* 9.5.10 Parallel 16-bit multiplications */ 528321369Sdim#if __ARM_FEATURE_SIMD32 529321369Sdimstatic __inline__ int32_t __attribute__((__always_inline__, __nodebug__)) 530321369Sdim__smlad(int16x2_t __a, int16x2_t __b, int32_t __c) { 531321369Sdim return __builtin_arm_smlad(__a, __b, __c); 532321369Sdim} 533321369Sdimstatic __inline__ int32_t __attribute__((__always_inline__, __nodebug__)) 534321369Sdim__smladx(int16x2_t __a, int16x2_t __b, int32_t __c) { 535321369Sdim return __builtin_arm_smladx(__a, __b, __c); 536321369Sdim} 537321369Sdimstatic __inline__ int64_t __attribute__((__always_inline__, __nodebug__)) 538321369Sdim__smlald(int16x2_t __a, int16x2_t __b, int64_t __c) { 539321369Sdim return __builtin_arm_smlald(__a, __b, __c); 540321369Sdim} 541321369Sdimstatic __inline__ int64_t __attribute__((__always_inline__, __nodebug__)) 542321369Sdim__smlaldx(int16x2_t __a, int16x2_t __b, int64_t __c) { 543321369Sdim return __builtin_arm_smlaldx(__a, __b, __c); 544321369Sdim} 545321369Sdimstatic __inline__ int32_t __attribute__((__always_inline__, __nodebug__)) 546321369Sdim__smlsd(int16x2_t __a, int16x2_t __b, int32_t __c) { 547321369Sdim return __builtin_arm_smlsd(__a, __b, __c); 548321369Sdim} 549321369Sdimstatic __inline__ int32_t __attribute__((__always_inline__, __nodebug__)) 550321369Sdim__smlsdx(int16x2_t __a, int16x2_t __b, int32_t __c) { 551321369Sdim return __builtin_arm_smlsdx(__a, __b, __c); 552321369Sdim} 553321369Sdimstatic __inline__ int64_t __attribute__((__always_inline__, __nodebug__)) 554321369Sdim__smlsld(int16x2_t __a, int16x2_t __b, int64_t __c) { 555321369Sdim return __builtin_arm_smlsld(__a, __b, __c); 556321369Sdim} 557321369Sdimstatic __inline__ int64_t __attribute__((__always_inline__, __nodebug__)) 558321369Sdim__smlsldx(int16x2_t __a, int16x2_t __b, int64_t __c) { 559321369Sdim return __builtin_arm_smlsldx(__a, __b, __c); 560321369Sdim} 561321369Sdimstatic __inline__ int32_t __attribute__((__always_inline__, __nodebug__)) 562321369Sdim__smuad(int16x2_t __a, int16x2_t __b) { 563321369Sdim return __builtin_arm_smuad(__a, __b); 564321369Sdim} 565321369Sdimstatic __inline__ int32_t __attribute__((__always_inline__, __nodebug__)) 566321369Sdim__smuadx(int16x2_t __a, int16x2_t __b) { 567321369Sdim return __builtin_arm_smuadx(__a, __b); 568321369Sdim} 569321369Sdimstatic __inline__ int32_t __attribute__((__always_inline__, __nodebug__)) 570321369Sdim__smusd(int16x2_t __a, int16x2_t __b) { 571321369Sdim return __builtin_arm_smusd(__a, __b); 572321369Sdim} 573321369Sdimstatic __inline__ int32_t __attribute__((__always_inline__, __nodebug__)) 574321369Sdim__smusdx(int16x2_t __a, int16x2_t __b) { 575321369Sdim return __builtin_arm_smusdx(__a, __b); 576321369Sdim} 577321369Sdim#endif 578321369Sdim 579274958Sdim/* 9.7 CRC32 intrinsics */ 580274958Sdim#if __ARM_FEATURE_CRC32 581288943Sdimstatic __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) 582309124Sdim__crc32b(uint32_t __a, uint8_t __b) { 583309124Sdim return __builtin_arm_crc32b(__a, __b); 584274958Sdim} 585274958Sdim 586288943Sdimstatic __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) 587309124Sdim__crc32h(uint32_t __a, uint16_t __b) { 588309124Sdim return __builtin_arm_crc32h(__a, __b); 589274958Sdim} 590274958Sdim 591288943Sdimstatic __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) 592309124Sdim__crc32w(uint32_t __a, uint32_t __b) { 593309124Sdim return __builtin_arm_crc32w(__a, __b); 594274958Sdim} 595274958Sdim 596288943Sdimstatic __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) 597309124Sdim__crc32d(uint32_t __a, uint64_t __b) { 598309124Sdim return __builtin_arm_crc32d(__a, __b); 599274958Sdim} 600274958Sdim 601288943Sdimstatic __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) 602309124Sdim__crc32cb(uint32_t __a, uint8_t __b) { 603309124Sdim return __builtin_arm_crc32cb(__a, __b); 604274958Sdim} 605274958Sdim 606288943Sdimstatic __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) 607309124Sdim__crc32ch(uint32_t __a, uint16_t __b) { 608309124Sdim return __builtin_arm_crc32ch(__a, __b); 609274958Sdim} 610274958Sdim 611288943Sdimstatic __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) 612309124Sdim__crc32cw(uint32_t __a, uint32_t __b) { 613309124Sdim return __builtin_arm_crc32cw(__a, __b); 614274958Sdim} 615274958Sdim 616288943Sdimstatic __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) 617309124Sdim__crc32cd(uint32_t __a, uint64_t __b) { 618309124Sdim return __builtin_arm_crc32cd(__a, __b); 619274958Sdim} 620274958Sdim#endif 621274958Sdim 622353358Sdim/* Armv8.3-A Javascript conversion intrinsic */ 623353358Sdim#if __ARM_64BIT_STATE && defined(__ARM_FEATURE_JCVT) 624353358Sdimstatic __inline__ int32_t __attribute__((__always_inline__, __nodebug__)) 625353358Sdim__jcvt(double __a) { 626353358Sdim return __builtin_arm_jcvt(__a); 627353358Sdim} 628353358Sdim#endif 629353358Sdim 630288943Sdim/* 10.1 Special register intrinsics */ 631288943Sdim#define __arm_rsr(sysreg) __builtin_arm_rsr(sysreg) 632288943Sdim#define __arm_rsr64(sysreg) __builtin_arm_rsr64(sysreg) 633288943Sdim#define __arm_rsrp(sysreg) __builtin_arm_rsrp(sysreg) 634360784Sdim#define __arm_rsrf(sysreg) __builtin_bit_cast(float, __arm_rsr(sysreg)) 635360784Sdim#define __arm_rsrf64(sysreg) __builtin_bit_cast(double, __arm_rsr64(sysreg)) 636288943Sdim#define __arm_wsr(sysreg, v) __builtin_arm_wsr(sysreg, v) 637288943Sdim#define __arm_wsr64(sysreg, v) __builtin_arm_wsr64(sysreg, v) 638288943Sdim#define __arm_wsrp(sysreg, v) __builtin_arm_wsrp(sysreg, v) 639360784Sdim#define __arm_wsrf(sysreg, v) __arm_wsr(sysreg, __builtin_bit_cast(uint32_t, v)) 640360784Sdim#define __arm_wsrf64(sysreg, v) __arm_wsr64(sysreg, __builtin_bit_cast(uint64_t, v)) 641288943Sdim 642360784Sdim/* Memory Tagging Extensions (MTE) Intrinsics */ 643353358Sdim#if __ARM_FEATURE_MEMORY_TAGGING 644353358Sdim#define __arm_mte_create_random_tag(__ptr, __mask) __builtin_arm_irg(__ptr, __mask) 645353358Sdim#define __arm_mte_increment_tag(__ptr, __tag_offset) __builtin_arm_addg(__ptr, __tag_offset) 646353358Sdim#define __arm_mte_exclude_tag(__ptr, __excluded) __builtin_arm_gmi(__ptr, __excluded) 647353358Sdim#define __arm_mte_get_tag(__ptr) __builtin_arm_ldg(__ptr) 648353358Sdim#define __arm_mte_set_tag(__ptr) __builtin_arm_stg(__ptr) 649353358Sdim#define __arm_mte_ptrdiff(__ptra, __ptrb) __builtin_arm_subp(__ptra, __ptrb) 650353358Sdim#endif 651353358Sdim 652360784Sdim/* Transactional Memory Extension (TME) Intrinsics */ 653360784Sdim#if __ARM_FEATURE_TME 654360784Sdim 655360784Sdim#define _TMFAILURE_REASON 0x00007fffu 656360784Sdim#define _TMFAILURE_RTRY 0x00008000u 657360784Sdim#define _TMFAILURE_CNCL 0x00010000u 658360784Sdim#define _TMFAILURE_MEM 0x00020000u 659360784Sdim#define _TMFAILURE_IMP 0x00040000u 660360784Sdim#define _TMFAILURE_ERR 0x00080000u 661360784Sdim#define _TMFAILURE_SIZE 0x00100000u 662360784Sdim#define _TMFAILURE_NEST 0x00200000u 663360784Sdim#define _TMFAILURE_DBG 0x00400000u 664360784Sdim#define _TMFAILURE_INT 0x00800000u 665360784Sdim#define _TMFAILURE_TRIVIAL 0x01000000u 666360784Sdim 667360784Sdim#define __tstart() __builtin_arm_tstart() 668360784Sdim#define __tcommit() __builtin_arm_tcommit() 669360784Sdim#define __tcancel(__arg) __builtin_arm_tcancel(__arg) 670360784Sdim#define __ttest() __builtin_arm_ttest() 671360784Sdim 672360784Sdim#endif /* __ARM_FEATURE_TME */ 673360784Sdim 674274958Sdim#if defined(__cplusplus) 675274958Sdim} 676274958Sdim#endif 677274958Sdim 678274958Sdim#endif /* __ARM_ACLE_H */ 679