1274958Sdim/*===---- arm_acle.h - ARM Non-Neon intrinsics -----------------------------===
2274958Sdim *
3353358Sdim * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4353358Sdim * See https://llvm.org/LICENSE.txt for license information.
5353358Sdim * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6274958Sdim *
7274958Sdim *===-----------------------------------------------------------------------===
8274958Sdim */
9274958Sdim
10274958Sdim#ifndef __ARM_ACLE_H
11274958Sdim#define __ARM_ACLE_H
12274958Sdim
13274958Sdim#ifndef __ARM_ACLE
14274958Sdim#error "ACLE intrinsics support not enabled."
15274958Sdim#endif
16274958Sdim
17274958Sdim#include <stdint.h>
18274958Sdim
19274958Sdim#if defined(__cplusplus)
20274958Sdimextern "C" {
21274958Sdim#endif
22274958Sdim
23274958Sdim/* 8 SYNCHRONIZATION, BARRIER AND HINT INTRINSICS */
24274958Sdim/* 8.3 Memory barriers */
25274958Sdim#if !defined(_MSC_VER)
26274958Sdim#define __dmb(i) __builtin_arm_dmb(i)
27274958Sdim#define __dsb(i) __builtin_arm_dsb(i)
28274958Sdim#define __isb(i) __builtin_arm_isb(i)
29274958Sdim#endif
30274958Sdim
31274958Sdim/* 8.4 Hints */
32274958Sdim
33274958Sdim#if !defined(_MSC_VER)
34288943Sdimstatic __inline__ void __attribute__((__always_inline__, __nodebug__)) __wfi(void) {
35274958Sdim  __builtin_arm_wfi();
36274958Sdim}
37274958Sdim
38288943Sdimstatic __inline__ void __attribute__((__always_inline__, __nodebug__)) __wfe(void) {
39274958Sdim  __builtin_arm_wfe();
40274958Sdim}
41274958Sdim
42288943Sdimstatic __inline__ void __attribute__((__always_inline__, __nodebug__)) __sev(void) {
43274958Sdim  __builtin_arm_sev();
44274958Sdim}
45274958Sdim
46288943Sdimstatic __inline__ void __attribute__((__always_inline__, __nodebug__)) __sevl(void) {
47274958Sdim  __builtin_arm_sevl();
48274958Sdim}
49274958Sdim
50288943Sdimstatic __inline__ void __attribute__((__always_inline__, __nodebug__)) __yield(void) {
51274958Sdim  __builtin_arm_yield();
52274958Sdim}
53274958Sdim#endif
54274958Sdim
55280031Sdim#if __ARM_32BIT_STATE
56280031Sdim#define __dbg(t) __builtin_arm_dbg(t)
57280031Sdim#endif
58280031Sdim
59280031Sdim/* 8.5 Swap */
60288943Sdimstatic __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
61309124Sdim__swp(uint32_t __x, volatile uint32_t *__p) {
62280031Sdim  uint32_t v;
63309124Sdim  do
64309124Sdim    v = __builtin_arm_ldrex(__p);
65309124Sdim  while (__builtin_arm_strex(__x, __p));
66280031Sdim  return v;
67280031Sdim}
68280031Sdim
69280031Sdim/* 8.6 Memory prefetch intrinsics */
70280031Sdim/* 8.6.1 Data prefetch */
71280031Sdim#define __pld(addr) __pldx(0, 0, 0, addr)
72280031Sdim
73280031Sdim#if __ARM_32BIT_STATE
74280031Sdim#define __pldx(access_kind, cache_level, retention_policy, addr) \
75280031Sdim  __builtin_arm_prefetch(addr, access_kind, 1)
76280031Sdim#else
77280031Sdim#define __pldx(access_kind, cache_level, retention_policy, addr) \
78280031Sdim  __builtin_arm_prefetch(addr, access_kind, cache_level, retention_policy, 1)
79280031Sdim#endif
80280031Sdim
81280031Sdim/* 8.6.2 Instruction prefetch */
82280031Sdim#define __pli(addr) __plix(0, 0, addr)
83280031Sdim
84280031Sdim#if __ARM_32BIT_STATE
85280031Sdim#define __plix(cache_level, retention_policy, addr) \
86280031Sdim  __builtin_arm_prefetch(addr, 0, 0)
87280031Sdim#else
88280031Sdim#define __plix(cache_level, retention_policy, addr) \
89280031Sdim  __builtin_arm_prefetch(addr, 0, cache_level, retention_policy, 0)
90280031Sdim#endif
91280031Sdim
92274958Sdim/* 8.7 NOP */
93360784Sdim#if !defined(_MSC_VER) || !defined(__aarch64__)
94288943Sdimstatic __inline__ void __attribute__((__always_inline__, __nodebug__)) __nop(void) {
95274958Sdim  __builtin_arm_nop();
96274958Sdim}
97360784Sdim#endif
98274958Sdim
99274958Sdim/* 9 DATA-PROCESSING INTRINSICS */
100274958Sdim/* 9.2 Miscellaneous data-processing intrinsics */
101280031Sdim/* ROR */
102288943Sdimstatic __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
103309124Sdim__ror(uint32_t __x, uint32_t __y) {
104309124Sdim  __y %= 32;
105309124Sdim  if (__y == 0)
106309124Sdim    return __x;
107309124Sdim  return (__x >> __y) | (__x << (32 - __y));
108280031Sdim}
109280031Sdim
110288943Sdimstatic __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
111309124Sdim__rorll(uint64_t __x, uint32_t __y) {
112309124Sdim  __y %= 64;
113309124Sdim  if (__y == 0)
114309124Sdim    return __x;
115309124Sdim  return (__x >> __y) | (__x << (64 - __y));
116280031Sdim}
117280031Sdim
118288943Sdimstatic __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))
119309124Sdim__rorl(unsigned long __x, uint32_t __y) {
120280031Sdim#if __SIZEOF_LONG__ == 4
121309124Sdim  return __ror(__x, __y);
122280031Sdim#else
123309124Sdim  return __rorll(__x, __y);
124280031Sdim#endif
125280031Sdim}
126280031Sdim
127280031Sdim
128280031Sdim/* CLZ */
129288943Sdimstatic __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
130309124Sdim__clz(uint32_t __t) {
131309124Sdim  return __builtin_clz(__t);
132274958Sdim}
133274958Sdim
134288943Sdimstatic __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))
135309124Sdim__clzl(unsigned long __t) {
136309124Sdim  return __builtin_clzl(__t);
137274958Sdim}
138274958Sdim
139288943Sdimstatic __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
140309124Sdim__clzll(uint64_t __t) {
141309124Sdim  return __builtin_clzll(__t);
142274958Sdim}
143274958Sdim
144360784Sdim/* CLS */
145360784Sdimstatic __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
146360784Sdim__cls(uint32_t __t) {
147360784Sdim  return __builtin_arm_cls(__t);
148360784Sdim}
149360784Sdim
150360784Sdimstatic __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
151360784Sdim__clsl(unsigned long __t) {
152360784Sdim#if __SIZEOF_LONG__ == 4
153360784Sdim  return __builtin_arm_cls(__t);
154360784Sdim#else
155360784Sdim  return __builtin_arm_cls64(__t);
156360784Sdim#endif
157360784Sdim}
158360784Sdim
159360784Sdimstatic __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
160360784Sdim__clsll(uint64_t __t) {
161360784Sdim  return __builtin_arm_cls64(__t);
162360784Sdim}
163360784Sdim
164280031Sdim/* REV */
165288943Sdimstatic __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
166309124Sdim__rev(uint32_t __t) {
167309124Sdim  return __builtin_bswap32(__t);
168274958Sdim}
169274958Sdim
170288943Sdimstatic __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))
171309124Sdim__revl(unsigned long __t) {
172274958Sdim#if __SIZEOF_LONG__ == 4
173309124Sdim  return __builtin_bswap32(__t);
174274958Sdim#else
175309124Sdim  return __builtin_bswap64(__t);
176274958Sdim#endif
177274958Sdim}
178274958Sdim
179288943Sdimstatic __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
180309124Sdim__revll(uint64_t __t) {
181309124Sdim  return __builtin_bswap64(__t);
182274958Sdim}
183274958Sdim
184280031Sdim/* REV16 */
185288943Sdimstatic __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
186309124Sdim__rev16(uint32_t __t) {
187309124Sdim  return __ror(__rev(__t), 16);
188280031Sdim}
189280031Sdim
190296417Sdimstatic __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
191309124Sdim__rev16ll(uint64_t __t) {
192309124Sdim  return (((uint64_t)__rev16(__t >> 32)) << 32) | __rev16(__t);
193296417Sdim}
194296417Sdim
195288943Sdimstatic __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))
196309124Sdim__rev16l(unsigned long __t) {
197296417Sdim#if __SIZEOF_LONG__ == 4
198309124Sdim    return __rev16(__t);
199296417Sdim#else
200309124Sdim    return __rev16ll(__t);
201296417Sdim#endif
202280031Sdim}
203280031Sdim
204280031Sdim/* REVSH */
205288943Sdimstatic __inline__ int16_t __attribute__((__always_inline__, __nodebug__))
206309124Sdim__revsh(int16_t __t) {
207309124Sdim  return __builtin_bswap16(__t);
208280031Sdim}
209280031Sdim
210280031Sdim/* RBIT */
211288943Sdimstatic __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
212309124Sdim__rbit(uint32_t __t) {
213309124Sdim  return __builtin_arm_rbit(__t);
214280031Sdim}
215280031Sdim
216288943Sdimstatic __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
217309124Sdim__rbitll(uint64_t __t) {
218280031Sdim#if __ARM_32BIT_STATE
219309124Sdim  return (((uint64_t)__builtin_arm_rbit(__t)) << 32) |
220309124Sdim         __builtin_arm_rbit(__t >> 32);
221280031Sdim#else
222309124Sdim  return __builtin_arm_rbit64(__t);
223280031Sdim#endif
224280031Sdim}
225280031Sdim
226288943Sdimstatic __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))
227309124Sdim__rbitl(unsigned long __t) {
228280031Sdim#if __SIZEOF_LONG__ == 4
229309124Sdim  return __rbit(__t);
230280031Sdim#else
231309124Sdim  return __rbitll(__t);
232280031Sdim#endif
233280031Sdim}
234280031Sdim
235274958Sdim/*
236321369Sdim * 9.3 16-bit multiplications
237321369Sdim */
238321369Sdim#if __ARM_FEATURE_DSP
239321369Sdimstatic __inline__ int32_t __attribute__((__always_inline__,__nodebug__))
240321369Sdim__smulbb(int32_t __a, int32_t __b) {
241321369Sdim  return __builtin_arm_smulbb(__a, __b);
242321369Sdim}
243321369Sdimstatic __inline__ int32_t __attribute__((__always_inline__,__nodebug__))
244321369Sdim__smulbt(int32_t __a, int32_t __b) {
245321369Sdim  return __builtin_arm_smulbt(__a, __b);
246321369Sdim}
247321369Sdimstatic __inline__ int32_t __attribute__((__always_inline__,__nodebug__))
248321369Sdim__smultb(int32_t __a, int32_t __b) {
249321369Sdim  return __builtin_arm_smultb(__a, __b);
250321369Sdim}
251321369Sdimstatic __inline__ int32_t __attribute__((__always_inline__,__nodebug__))
252321369Sdim__smultt(int32_t __a, int32_t __b) {
253321369Sdim  return __builtin_arm_smultt(__a, __b);
254321369Sdim}
255321369Sdimstatic __inline__ int32_t __attribute__((__always_inline__,__nodebug__))
256321369Sdim__smulwb(int32_t __a, int32_t __b) {
257321369Sdim  return __builtin_arm_smulwb(__a, __b);
258321369Sdim}
259321369Sdimstatic __inline__ int32_t __attribute__((__always_inline__,__nodebug__))
260321369Sdim__smulwt(int32_t __a, int32_t __b) {
261321369Sdim  return __builtin_arm_smulwt(__a, __b);
262321369Sdim}
263321369Sdim#endif
264321369Sdim
265321369Sdim/*
266274958Sdim * 9.4 Saturating intrinsics
267274958Sdim *
268274958Sdim * FIXME: Change guard to their corrosponding __ARM_FEATURE flag when Q flag
269274958Sdim * intrinsics are implemented and the flag is enabled.
270274958Sdim */
271274958Sdim/* 9.4.1 Width-specified saturation intrinsics */
272321369Sdim#if __ARM_FEATURE_SAT
273274958Sdim#define __ssat(x, y) __builtin_arm_ssat(x, y)
274274958Sdim#define __usat(x, y) __builtin_arm_usat(x, y)
275274958Sdim#endif
276274958Sdim
277274958Sdim/* 9.4.2 Saturating addition and subtraction intrinsics */
278321369Sdim#if __ARM_FEATURE_DSP
279288943Sdimstatic __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
280309124Sdim__qadd(int32_t __t, int32_t __v) {
281309124Sdim  return __builtin_arm_qadd(__t, __v);
282274958Sdim}
283274958Sdim
284288943Sdimstatic __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
285309124Sdim__qsub(int32_t __t, int32_t __v) {
286309124Sdim  return __builtin_arm_qsub(__t, __v);
287274958Sdim}
288274958Sdim
289288943Sdimstatic __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
290309124Sdim__qdbl(int32_t __t) {
291309124Sdim  return __builtin_arm_qadd(__t, __t);
292274958Sdim}
293274958Sdim#endif
294274958Sdim
295321369Sdim/* 9.4.3 Accumultating multiplications */
296321369Sdim#if __ARM_FEATURE_DSP
297321369Sdimstatic __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
298321369Sdim__smlabb(int32_t __a, int32_t __b, int32_t __c) {
299321369Sdim  return __builtin_arm_smlabb(__a, __b, __c);
300321369Sdim}
301321369Sdimstatic __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
302321369Sdim__smlabt(int32_t __a, int32_t __b, int32_t __c) {
303321369Sdim  return __builtin_arm_smlabt(__a, __b, __c);
304321369Sdim}
305321369Sdimstatic __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
306321369Sdim__smlatb(int32_t __a, int32_t __b, int32_t __c) {
307321369Sdim  return __builtin_arm_smlatb(__a, __b, __c);
308321369Sdim}
309321369Sdimstatic __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
310321369Sdim__smlatt(int32_t __a, int32_t __b, int32_t __c) {
311321369Sdim  return __builtin_arm_smlatt(__a, __b, __c);
312321369Sdim}
313321369Sdimstatic __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
314321369Sdim__smlawb(int32_t __a, int32_t __b, int32_t __c) {
315321369Sdim  return __builtin_arm_smlawb(__a, __b, __c);
316321369Sdim}
317321369Sdimstatic __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
318321369Sdim__smlawt(int32_t __a, int32_t __b, int32_t __c) {
319321369Sdim  return __builtin_arm_smlawt(__a, __b, __c);
320321369Sdim}
321321369Sdim#endif
322321369Sdim
323321369Sdim
324321369Sdim/* 9.5.4 Parallel 16-bit saturation */
325321369Sdim#if __ARM_FEATURE_SIMD32
326321369Sdim#define __ssat16(x, y) __builtin_arm_ssat16(x, y)
327321369Sdim#define __usat16(x, y) __builtin_arm_usat16(x, y)
328321369Sdim#endif
329321369Sdim
330321369Sdim/* 9.5.5 Packing and unpacking */
331321369Sdim#if __ARM_FEATURE_SIMD32
332321369Sdimtypedef int32_t int8x4_t;
333321369Sdimtypedef int32_t int16x2_t;
334321369Sdimtypedef uint32_t uint8x4_t;
335321369Sdimtypedef uint32_t uint16x2_t;
336321369Sdim
337321369Sdimstatic __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
338321369Sdim__sxtab16(int16x2_t __a, int8x4_t __b) {
339321369Sdim  return __builtin_arm_sxtab16(__a, __b);
340321369Sdim}
341321369Sdimstatic __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
342321369Sdim__sxtb16(int8x4_t __a) {
343321369Sdim  return __builtin_arm_sxtb16(__a);
344321369Sdim}
345321369Sdimstatic __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
346321369Sdim__uxtab16(int16x2_t __a, int8x4_t __b) {
347321369Sdim  return __builtin_arm_uxtab16(__a, __b);
348321369Sdim}
349321369Sdimstatic __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
350321369Sdim__uxtb16(int8x4_t __a) {
351321369Sdim  return __builtin_arm_uxtb16(__a);
352321369Sdim}
353321369Sdim#endif
354321369Sdim
355321369Sdim/* 9.5.6 Parallel selection */
356321369Sdim#if __ARM_FEATURE_SIMD32
357321369Sdimstatic __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))
358321369Sdim__sel(uint8x4_t __a, uint8x4_t __b) {
359321369Sdim  return __builtin_arm_sel(__a, __b);
360321369Sdim}
361321369Sdim#endif
362321369Sdim
363321369Sdim/* 9.5.7 Parallel 8-bit addition and subtraction */
364321369Sdim#if __ARM_FEATURE_SIMD32
365321369Sdimstatic __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__))
366321369Sdim__qadd8(int8x4_t __a, int8x4_t __b) {
367321369Sdim  return __builtin_arm_qadd8(__a, __b);
368321369Sdim}
369321369Sdimstatic __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__))
370321369Sdim__qsub8(int8x4_t __a, int8x4_t __b) {
371321369Sdim  return __builtin_arm_qsub8(__a, __b);
372321369Sdim}
373321369Sdimstatic __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__))
374321369Sdim__sadd8(int8x4_t __a, int8x4_t __b) {
375321369Sdim  return __builtin_arm_sadd8(__a, __b);
376321369Sdim}
377321369Sdimstatic __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__))
378321369Sdim__shadd8(int8x4_t __a, int8x4_t __b) {
379321369Sdim  return __builtin_arm_shadd8(__a, __b);
380321369Sdim}
381321369Sdimstatic __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__))
382321369Sdim__shsub8(int8x4_t __a, int8x4_t __b) {
383321369Sdim  return __builtin_arm_shsub8(__a, __b);
384321369Sdim}
385321369Sdimstatic __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__))
386321369Sdim__ssub8(int8x4_t __a, int8x4_t __b) {
387321369Sdim  return __builtin_arm_ssub8(__a, __b);
388321369Sdim}
389321369Sdimstatic __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))
390321369Sdim__uadd8(uint8x4_t __a, uint8x4_t __b) {
391321369Sdim  return __builtin_arm_uadd8(__a, __b);
392321369Sdim}
393321369Sdimstatic __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))
394321369Sdim__uhadd8(uint8x4_t __a, uint8x4_t __b) {
395321369Sdim  return __builtin_arm_uhadd8(__a, __b);
396321369Sdim}
397321369Sdimstatic __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))
398321369Sdim__uhsub8(uint8x4_t __a, uint8x4_t __b) {
399321369Sdim  return __builtin_arm_uhsub8(__a, __b);
400321369Sdim}
401321369Sdimstatic __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))
402321369Sdim__uqadd8(uint8x4_t __a, uint8x4_t __b) {
403321369Sdim  return __builtin_arm_uqadd8(__a, __b);
404321369Sdim}
405321369Sdimstatic __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))
406321369Sdim__uqsub8(uint8x4_t __a, uint8x4_t __b) {
407321369Sdim  return __builtin_arm_uqsub8(__a, __b);
408321369Sdim}
409321369Sdimstatic __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))
410321369Sdim__usub8(uint8x4_t __a, uint8x4_t __b) {
411321369Sdim  return __builtin_arm_usub8(__a, __b);
412321369Sdim}
413321369Sdim#endif
414321369Sdim
415321369Sdim/* 9.5.8 Sum of 8-bit absolute differences */
416321369Sdim#if __ARM_FEATURE_SIMD32
417321369Sdimstatic __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
418321369Sdim__usad8(uint8x4_t __a, uint8x4_t __b) {
419321369Sdim  return __builtin_arm_usad8(__a, __b);
420321369Sdim}
421321369Sdimstatic __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
422321369Sdim__usada8(uint8x4_t __a, uint8x4_t __b, uint32_t __c) {
423321369Sdim  return __builtin_arm_usada8(__a, __b, __c);
424321369Sdim}
425321369Sdim#endif
426321369Sdim
427321369Sdim/* 9.5.9 Parallel 16-bit addition and subtraction */
428321369Sdim#if __ARM_FEATURE_SIMD32
429321369Sdimstatic __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
430321369Sdim__qadd16(int16x2_t __a, int16x2_t __b) {
431321369Sdim  return __builtin_arm_qadd16(__a, __b);
432321369Sdim}
433321369Sdimstatic __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
434321369Sdim__qasx(int16x2_t __a, int16x2_t __b) {
435321369Sdim  return __builtin_arm_qasx(__a, __b);
436321369Sdim}
437321369Sdimstatic __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
438321369Sdim__qsax(int16x2_t __a, int16x2_t __b) {
439321369Sdim  return __builtin_arm_qsax(__a, __b);
440321369Sdim}
441321369Sdimstatic __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
442321369Sdim__qsub16(int16x2_t __a, int16x2_t __b) {
443321369Sdim  return __builtin_arm_qsub16(__a, __b);
444321369Sdim}
445321369Sdimstatic __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
446321369Sdim__sadd16(int16x2_t __a, int16x2_t __b) {
447321369Sdim  return __builtin_arm_sadd16(__a, __b);
448321369Sdim}
449321369Sdimstatic __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
450321369Sdim__sasx(int16x2_t __a, int16x2_t __b) {
451321369Sdim  return __builtin_arm_sasx(__a, __b);
452321369Sdim}
453321369Sdimstatic __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
454321369Sdim__shadd16(int16x2_t __a, int16x2_t __b) {
455321369Sdim  return __builtin_arm_shadd16(__a, __b);
456321369Sdim}
457321369Sdimstatic __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
458321369Sdim__shasx(int16x2_t __a, int16x2_t __b) {
459321369Sdim  return __builtin_arm_shasx(__a, __b);
460321369Sdim}
461321369Sdimstatic __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
462321369Sdim__shsax(int16x2_t __a, int16x2_t __b) {
463321369Sdim  return __builtin_arm_shsax(__a, __b);
464321369Sdim}
465321369Sdimstatic __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
466321369Sdim__shsub16(int16x2_t __a, int16x2_t __b) {
467321369Sdim  return __builtin_arm_shsub16(__a, __b);
468321369Sdim}
469321369Sdimstatic __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
470321369Sdim__ssax(int16x2_t __a, int16x2_t __b) {
471321369Sdim  return __builtin_arm_ssax(__a, __b);
472321369Sdim}
473321369Sdimstatic __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
474321369Sdim__ssub16(int16x2_t __a, int16x2_t __b) {
475321369Sdim  return __builtin_arm_ssub16(__a, __b);
476321369Sdim}
477321369Sdimstatic __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
478321369Sdim__uadd16(uint16x2_t __a, uint16x2_t __b) {
479321369Sdim  return __builtin_arm_uadd16(__a, __b);
480321369Sdim}
481321369Sdimstatic __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
482321369Sdim__uasx(uint16x2_t __a, uint16x2_t __b) {
483321369Sdim  return __builtin_arm_uasx(__a, __b);
484321369Sdim}
485321369Sdimstatic __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
486321369Sdim__uhadd16(uint16x2_t __a, uint16x2_t __b) {
487321369Sdim  return __builtin_arm_uhadd16(__a, __b);
488321369Sdim}
489321369Sdimstatic __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
490321369Sdim__uhasx(uint16x2_t __a, uint16x2_t __b) {
491321369Sdim  return __builtin_arm_uhasx(__a, __b);
492321369Sdim}
493321369Sdimstatic __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
494321369Sdim__uhsax(uint16x2_t __a, uint16x2_t __b) {
495321369Sdim  return __builtin_arm_uhsax(__a, __b);
496321369Sdim}
497321369Sdimstatic __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
498321369Sdim__uhsub16(uint16x2_t __a, uint16x2_t __b) {
499321369Sdim  return __builtin_arm_uhsub16(__a, __b);
500321369Sdim}
501321369Sdimstatic __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
502321369Sdim__uqadd16(uint16x2_t __a, uint16x2_t __b) {
503321369Sdim  return __builtin_arm_uqadd16(__a, __b);
504321369Sdim}
505321369Sdimstatic __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
506321369Sdim__uqasx(uint16x2_t __a, uint16x2_t __b) {
507321369Sdim  return __builtin_arm_uqasx(__a, __b);
508321369Sdim}
509321369Sdimstatic __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
510321369Sdim__uqsax(uint16x2_t __a, uint16x2_t __b) {
511321369Sdim  return __builtin_arm_uqsax(__a, __b);
512321369Sdim}
513321369Sdimstatic __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
514321369Sdim__uqsub16(uint16x2_t __a, uint16x2_t __b) {
515321369Sdim  return __builtin_arm_uqsub16(__a, __b);
516321369Sdim}
517321369Sdimstatic __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
518321369Sdim__usax(uint16x2_t __a, uint16x2_t __b) {
519321369Sdim  return __builtin_arm_usax(__a, __b);
520321369Sdim}
521321369Sdimstatic __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
522321369Sdim__usub16(uint16x2_t __a, uint16x2_t __b) {
523321369Sdim  return __builtin_arm_usub16(__a, __b);
524321369Sdim}
525321369Sdim#endif
526321369Sdim
527321369Sdim/* 9.5.10 Parallel 16-bit multiplications */
528321369Sdim#if __ARM_FEATURE_SIMD32
529321369Sdimstatic __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
530321369Sdim__smlad(int16x2_t __a, int16x2_t __b, int32_t __c) {
531321369Sdim  return __builtin_arm_smlad(__a, __b, __c);
532321369Sdim}
533321369Sdimstatic __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
534321369Sdim__smladx(int16x2_t __a, int16x2_t __b, int32_t __c) {
535321369Sdim  return __builtin_arm_smladx(__a, __b, __c);
536321369Sdim}
537321369Sdimstatic __inline__ int64_t __attribute__((__always_inline__, __nodebug__))
538321369Sdim__smlald(int16x2_t __a, int16x2_t __b, int64_t __c) {
539321369Sdim  return __builtin_arm_smlald(__a, __b, __c);
540321369Sdim}
541321369Sdimstatic __inline__ int64_t __attribute__((__always_inline__, __nodebug__))
542321369Sdim__smlaldx(int16x2_t __a, int16x2_t __b, int64_t __c) {
543321369Sdim  return __builtin_arm_smlaldx(__a, __b, __c);
544321369Sdim}
545321369Sdimstatic __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
546321369Sdim__smlsd(int16x2_t __a, int16x2_t __b, int32_t __c) {
547321369Sdim  return __builtin_arm_smlsd(__a, __b, __c);
548321369Sdim}
549321369Sdimstatic __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
550321369Sdim__smlsdx(int16x2_t __a, int16x2_t __b, int32_t __c) {
551321369Sdim  return __builtin_arm_smlsdx(__a, __b, __c);
552321369Sdim}
553321369Sdimstatic __inline__ int64_t __attribute__((__always_inline__, __nodebug__))
554321369Sdim__smlsld(int16x2_t __a, int16x2_t __b, int64_t __c) {
555321369Sdim  return __builtin_arm_smlsld(__a, __b, __c);
556321369Sdim}
557321369Sdimstatic __inline__ int64_t __attribute__((__always_inline__, __nodebug__))
558321369Sdim__smlsldx(int16x2_t __a, int16x2_t __b, int64_t __c) {
559321369Sdim  return __builtin_arm_smlsldx(__a, __b, __c);
560321369Sdim}
561321369Sdimstatic __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
562321369Sdim__smuad(int16x2_t __a, int16x2_t __b) {
563321369Sdim  return __builtin_arm_smuad(__a, __b);
564321369Sdim}
565321369Sdimstatic __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
566321369Sdim__smuadx(int16x2_t __a, int16x2_t __b) {
567321369Sdim  return __builtin_arm_smuadx(__a, __b);
568321369Sdim}
569321369Sdimstatic __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
570321369Sdim__smusd(int16x2_t __a, int16x2_t __b) {
571321369Sdim  return __builtin_arm_smusd(__a, __b);
572321369Sdim}
573321369Sdimstatic __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
574321369Sdim__smusdx(int16x2_t __a, int16x2_t __b) {
575321369Sdim  return __builtin_arm_smusdx(__a, __b);
576321369Sdim}
577321369Sdim#endif
578321369Sdim
579274958Sdim/* 9.7 CRC32 intrinsics */
580274958Sdim#if __ARM_FEATURE_CRC32
581288943Sdimstatic __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
582309124Sdim__crc32b(uint32_t __a, uint8_t __b) {
583309124Sdim  return __builtin_arm_crc32b(__a, __b);
584274958Sdim}
585274958Sdim
586288943Sdimstatic __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
587309124Sdim__crc32h(uint32_t __a, uint16_t __b) {
588309124Sdim  return __builtin_arm_crc32h(__a, __b);
589274958Sdim}
590274958Sdim
591288943Sdimstatic __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
592309124Sdim__crc32w(uint32_t __a, uint32_t __b) {
593309124Sdim  return __builtin_arm_crc32w(__a, __b);
594274958Sdim}
595274958Sdim
596288943Sdimstatic __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
597309124Sdim__crc32d(uint32_t __a, uint64_t __b) {
598309124Sdim  return __builtin_arm_crc32d(__a, __b);
599274958Sdim}
600274958Sdim
601288943Sdimstatic __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
602309124Sdim__crc32cb(uint32_t __a, uint8_t __b) {
603309124Sdim  return __builtin_arm_crc32cb(__a, __b);
604274958Sdim}
605274958Sdim
606288943Sdimstatic __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
607309124Sdim__crc32ch(uint32_t __a, uint16_t __b) {
608309124Sdim  return __builtin_arm_crc32ch(__a, __b);
609274958Sdim}
610274958Sdim
611288943Sdimstatic __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
612309124Sdim__crc32cw(uint32_t __a, uint32_t __b) {
613309124Sdim  return __builtin_arm_crc32cw(__a, __b);
614274958Sdim}
615274958Sdim
616288943Sdimstatic __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
617309124Sdim__crc32cd(uint32_t __a, uint64_t __b) {
618309124Sdim  return __builtin_arm_crc32cd(__a, __b);
619274958Sdim}
620274958Sdim#endif
621274958Sdim
622353358Sdim/* Armv8.3-A Javascript conversion intrinsic */
623353358Sdim#if __ARM_64BIT_STATE && defined(__ARM_FEATURE_JCVT)
624353358Sdimstatic __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
625353358Sdim__jcvt(double __a) {
626353358Sdim  return __builtin_arm_jcvt(__a);
627353358Sdim}
628353358Sdim#endif
629353358Sdim
630288943Sdim/* 10.1 Special register intrinsics */
631288943Sdim#define __arm_rsr(sysreg) __builtin_arm_rsr(sysreg)
632288943Sdim#define __arm_rsr64(sysreg) __builtin_arm_rsr64(sysreg)
633288943Sdim#define __arm_rsrp(sysreg) __builtin_arm_rsrp(sysreg)
634360784Sdim#define __arm_rsrf(sysreg) __builtin_bit_cast(float, __arm_rsr(sysreg))
635360784Sdim#define __arm_rsrf64(sysreg) __builtin_bit_cast(double, __arm_rsr64(sysreg))
636288943Sdim#define __arm_wsr(sysreg, v) __builtin_arm_wsr(sysreg, v)
637288943Sdim#define __arm_wsr64(sysreg, v) __builtin_arm_wsr64(sysreg, v)
638288943Sdim#define __arm_wsrp(sysreg, v) __builtin_arm_wsrp(sysreg, v)
639360784Sdim#define __arm_wsrf(sysreg, v) __arm_wsr(sysreg, __builtin_bit_cast(uint32_t, v))
640360784Sdim#define __arm_wsrf64(sysreg, v) __arm_wsr64(sysreg, __builtin_bit_cast(uint64_t, v))
641288943Sdim
642360784Sdim/* Memory Tagging Extensions (MTE) Intrinsics */
643353358Sdim#if __ARM_FEATURE_MEMORY_TAGGING
644353358Sdim#define __arm_mte_create_random_tag(__ptr, __mask)  __builtin_arm_irg(__ptr, __mask)
645353358Sdim#define __arm_mte_increment_tag(__ptr, __tag_offset)  __builtin_arm_addg(__ptr, __tag_offset)
646353358Sdim#define __arm_mte_exclude_tag(__ptr, __excluded)  __builtin_arm_gmi(__ptr, __excluded)
647353358Sdim#define __arm_mte_get_tag(__ptr) __builtin_arm_ldg(__ptr)
648353358Sdim#define __arm_mte_set_tag(__ptr) __builtin_arm_stg(__ptr)
649353358Sdim#define __arm_mte_ptrdiff(__ptra, __ptrb) __builtin_arm_subp(__ptra, __ptrb)
650353358Sdim#endif
651353358Sdim
652360784Sdim/* Transactional Memory Extension (TME) Intrinsics */
653360784Sdim#if __ARM_FEATURE_TME
654360784Sdim
655360784Sdim#define _TMFAILURE_REASON  0x00007fffu
656360784Sdim#define _TMFAILURE_RTRY    0x00008000u
657360784Sdim#define _TMFAILURE_CNCL    0x00010000u
658360784Sdim#define _TMFAILURE_MEM     0x00020000u
659360784Sdim#define _TMFAILURE_IMP     0x00040000u
660360784Sdim#define _TMFAILURE_ERR     0x00080000u
661360784Sdim#define _TMFAILURE_SIZE    0x00100000u
662360784Sdim#define _TMFAILURE_NEST    0x00200000u
663360784Sdim#define _TMFAILURE_DBG     0x00400000u
664360784Sdim#define _TMFAILURE_INT     0x00800000u
665360784Sdim#define _TMFAILURE_TRIVIAL 0x01000000u
666360784Sdim
667360784Sdim#define __tstart()        __builtin_arm_tstart()
668360784Sdim#define __tcommit()       __builtin_arm_tcommit()
669360784Sdim#define __tcancel(__arg)  __builtin_arm_tcancel(__arg)
670360784Sdim#define __ttest()         __builtin_arm_ttest()
671360784Sdim
672360784Sdim#endif /* __ARM_FEATURE_TME */
673360784Sdim
674274958Sdim#if defined(__cplusplus)
675274958Sdim}
676274958Sdim#endif
677274958Sdim
678274958Sdim#endif /* __ARM_ACLE_H */
679