1/*===---- arm_acle.h - ARM Non-Neon intrinsics -----------------------------===
2 *
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 *
7 *===-----------------------------------------------------------------------===
8 */
9
10#ifndef __ARM_ACLE_H
11#define __ARM_ACLE_H
12
13#ifndef __ARM_ACLE
14#error "ACLE intrinsics support not enabled."
15#endif
16
17#include <stdint.h>
18
19#if defined(__cplusplus)
20extern "C" {
21#endif
22
23/* 8 SYNCHRONIZATION, BARRIER AND HINT INTRINSICS */
24/* 8.3 Memory barriers */
25#if !defined(_MSC_VER)
26#define __dmb(i) __builtin_arm_dmb(i)
27#define __dsb(i) __builtin_arm_dsb(i)
28#define __isb(i) __builtin_arm_isb(i)
29#endif
30
31/* 8.4 Hints */
32
33#if !defined(_MSC_VER)
34static __inline__ void __attribute__((__always_inline__, __nodebug__)) __wfi(void) {
35  __builtin_arm_wfi();
36}
37
38static __inline__ void __attribute__((__always_inline__, __nodebug__)) __wfe(void) {
39  __builtin_arm_wfe();
40}
41
42static __inline__ void __attribute__((__always_inline__, __nodebug__)) __sev(void) {
43  __builtin_arm_sev();
44}
45
46static __inline__ void __attribute__((__always_inline__, __nodebug__)) __sevl(void) {
47  __builtin_arm_sevl();
48}
49
50static __inline__ void __attribute__((__always_inline__, __nodebug__)) __yield(void) {
51  __builtin_arm_yield();
52}
53#endif
54
55#if __ARM_32BIT_STATE
56#define __dbg(t) __builtin_arm_dbg(t)
57#endif
58
59/* 8.5 Swap */
60static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
61__swp(uint32_t __x, volatile uint32_t *__p) {
62  uint32_t v;
63  do
64    v = __builtin_arm_ldrex(__p);
65  while (__builtin_arm_strex(__x, __p));
66  return v;
67}
68
69/* 8.6 Memory prefetch intrinsics */
70/* 8.6.1 Data prefetch */
71#define __pld(addr) __pldx(0, 0, 0, addr)
72
73#if __ARM_32BIT_STATE
74#define __pldx(access_kind, cache_level, retention_policy, addr) \
75  __builtin_arm_prefetch(addr, access_kind, 1)
76#else
77#define __pldx(access_kind, cache_level, retention_policy, addr) \
78  __builtin_arm_prefetch(addr, access_kind, cache_level, retention_policy, 1)
79#endif
80
81/* 8.6.2 Instruction prefetch */
82#define __pli(addr) __plix(0, 0, addr)
83
84#if __ARM_32BIT_STATE
85#define __plix(cache_level, retention_policy, addr) \
86  __builtin_arm_prefetch(addr, 0, 0)
87#else
88#define __plix(cache_level, retention_policy, addr) \
89  __builtin_arm_prefetch(addr, 0, cache_level, retention_policy, 0)
90#endif
91
92/* 8.7 NOP */
93#if !defined(_MSC_VER) || !defined(__aarch64__)
94static __inline__ void __attribute__((__always_inline__, __nodebug__)) __nop(void) {
95  __builtin_arm_nop();
96}
97#endif
98
99/* 9 DATA-PROCESSING INTRINSICS */
100/* 9.2 Miscellaneous data-processing intrinsics */
101/* ROR */
102static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
103__ror(uint32_t __x, uint32_t __y) {
104  __y %= 32;
105  if (__y == 0)
106    return __x;
107  return (__x >> __y) | (__x << (32 - __y));
108}
109
110static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
111__rorll(uint64_t __x, uint32_t __y) {
112  __y %= 64;
113  if (__y == 0)
114    return __x;
115  return (__x >> __y) | (__x << (64 - __y));
116}
117
118static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))
119__rorl(unsigned long __x, uint32_t __y) {
120#if __SIZEOF_LONG__ == 4
121  return __ror(__x, __y);
122#else
123  return __rorll(__x, __y);
124#endif
125}
126
127
128/* CLZ */
129static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
130__clz(uint32_t __t) {
131  return __builtin_clz(__t);
132}
133
134static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))
135__clzl(unsigned long __t) {
136  return __builtin_clzl(__t);
137}
138
139static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
140__clzll(uint64_t __t) {
141  return __builtin_clzll(__t);
142}
143
144/* CLS */
145static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
146__cls(uint32_t __t) {
147  return __builtin_arm_cls(__t);
148}
149
150static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
151__clsl(unsigned long __t) {
152#if __SIZEOF_LONG__ == 4
153  return __builtin_arm_cls(__t);
154#else
155  return __builtin_arm_cls64(__t);
156#endif
157}
158
159static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
160__clsll(uint64_t __t) {
161  return __builtin_arm_cls64(__t);
162}
163
164/* REV */
165static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
166__rev(uint32_t __t) {
167  return __builtin_bswap32(__t);
168}
169
170static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))
171__revl(unsigned long __t) {
172#if __SIZEOF_LONG__ == 4
173  return __builtin_bswap32(__t);
174#else
175  return __builtin_bswap64(__t);
176#endif
177}
178
179static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
180__revll(uint64_t __t) {
181  return __builtin_bswap64(__t);
182}
183
184/* REV16 */
185static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
186__rev16(uint32_t __t) {
187  return __ror(__rev(__t), 16);
188}
189
190static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
191__rev16ll(uint64_t __t) {
192  return (((uint64_t)__rev16(__t >> 32)) << 32) | __rev16(__t);
193}
194
195static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))
196__rev16l(unsigned long __t) {
197#if __SIZEOF_LONG__ == 4
198    return __rev16(__t);
199#else
200    return __rev16ll(__t);
201#endif
202}
203
204/* REVSH */
205static __inline__ int16_t __attribute__((__always_inline__, __nodebug__))
206__revsh(int16_t __t) {
207  return __builtin_bswap16(__t);
208}
209
210/* RBIT */
211static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
212__rbit(uint32_t __t) {
213  return __builtin_arm_rbit(__t);
214}
215
216static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
217__rbitll(uint64_t __t) {
218#if __ARM_32BIT_STATE
219  return (((uint64_t)__builtin_arm_rbit(__t)) << 32) |
220         __builtin_arm_rbit(__t >> 32);
221#else
222  return __builtin_arm_rbit64(__t);
223#endif
224}
225
226static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))
227__rbitl(unsigned long __t) {
228#if __SIZEOF_LONG__ == 4
229  return __rbit(__t);
230#else
231  return __rbitll(__t);
232#endif
233}
234
235/*
236 * 9.3 16-bit multiplications
237 */
238#if __ARM_FEATURE_DSP
239static __inline__ int32_t __attribute__((__always_inline__,__nodebug__))
240__smulbb(int32_t __a, int32_t __b) {
241  return __builtin_arm_smulbb(__a, __b);
242}
243static __inline__ int32_t __attribute__((__always_inline__,__nodebug__))
244__smulbt(int32_t __a, int32_t __b) {
245  return __builtin_arm_smulbt(__a, __b);
246}
247static __inline__ int32_t __attribute__((__always_inline__,__nodebug__))
248__smultb(int32_t __a, int32_t __b) {
249  return __builtin_arm_smultb(__a, __b);
250}
251static __inline__ int32_t __attribute__((__always_inline__,__nodebug__))
252__smultt(int32_t __a, int32_t __b) {
253  return __builtin_arm_smultt(__a, __b);
254}
255static __inline__ int32_t __attribute__((__always_inline__,__nodebug__))
256__smulwb(int32_t __a, int32_t __b) {
257  return __builtin_arm_smulwb(__a, __b);
258}
259static __inline__ int32_t __attribute__((__always_inline__,__nodebug__))
260__smulwt(int32_t __a, int32_t __b) {
261  return __builtin_arm_smulwt(__a, __b);
262}
263#endif
264
265/*
266 * 9.4 Saturating intrinsics
267 *
268 * FIXME: Change guard to their corrosponding __ARM_FEATURE flag when Q flag
269 * intrinsics are implemented and the flag is enabled.
270 */
271/* 9.4.1 Width-specified saturation intrinsics */
272#if __ARM_FEATURE_SAT
273#define __ssat(x, y) __builtin_arm_ssat(x, y)
274#define __usat(x, y) __builtin_arm_usat(x, y)
275#endif
276
277/* 9.4.2 Saturating addition and subtraction intrinsics */
278#if __ARM_FEATURE_DSP
279static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
280__qadd(int32_t __t, int32_t __v) {
281  return __builtin_arm_qadd(__t, __v);
282}
283
284static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
285__qsub(int32_t __t, int32_t __v) {
286  return __builtin_arm_qsub(__t, __v);
287}
288
289static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
290__qdbl(int32_t __t) {
291  return __builtin_arm_qadd(__t, __t);
292}
293#endif
294
295/* 9.4.3 Accumultating multiplications */
296#if __ARM_FEATURE_DSP
297static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
298__smlabb(int32_t __a, int32_t __b, int32_t __c) {
299  return __builtin_arm_smlabb(__a, __b, __c);
300}
301static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
302__smlabt(int32_t __a, int32_t __b, int32_t __c) {
303  return __builtin_arm_smlabt(__a, __b, __c);
304}
305static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
306__smlatb(int32_t __a, int32_t __b, int32_t __c) {
307  return __builtin_arm_smlatb(__a, __b, __c);
308}
309static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
310__smlatt(int32_t __a, int32_t __b, int32_t __c) {
311  return __builtin_arm_smlatt(__a, __b, __c);
312}
313static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
314__smlawb(int32_t __a, int32_t __b, int32_t __c) {
315  return __builtin_arm_smlawb(__a, __b, __c);
316}
317static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
318__smlawt(int32_t __a, int32_t __b, int32_t __c) {
319  return __builtin_arm_smlawt(__a, __b, __c);
320}
321#endif
322
323
324/* 9.5.4 Parallel 16-bit saturation */
325#if __ARM_FEATURE_SIMD32
326#define __ssat16(x, y) __builtin_arm_ssat16(x, y)
327#define __usat16(x, y) __builtin_arm_usat16(x, y)
328#endif
329
330/* 9.5.5 Packing and unpacking */
331#if __ARM_FEATURE_SIMD32
332typedef int32_t int8x4_t;
333typedef int32_t int16x2_t;
334typedef uint32_t uint8x4_t;
335typedef uint32_t uint16x2_t;
336
337static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
338__sxtab16(int16x2_t __a, int8x4_t __b) {
339  return __builtin_arm_sxtab16(__a, __b);
340}
341static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
342__sxtb16(int8x4_t __a) {
343  return __builtin_arm_sxtb16(__a);
344}
345static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
346__uxtab16(int16x2_t __a, int8x4_t __b) {
347  return __builtin_arm_uxtab16(__a, __b);
348}
349static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
350__uxtb16(int8x4_t __a) {
351  return __builtin_arm_uxtb16(__a);
352}
353#endif
354
355/* 9.5.6 Parallel selection */
356#if __ARM_FEATURE_SIMD32
357static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))
358__sel(uint8x4_t __a, uint8x4_t __b) {
359  return __builtin_arm_sel(__a, __b);
360}
361#endif
362
363/* 9.5.7 Parallel 8-bit addition and subtraction */
364#if __ARM_FEATURE_SIMD32
365static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__))
366__qadd8(int8x4_t __a, int8x4_t __b) {
367  return __builtin_arm_qadd8(__a, __b);
368}
369static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__))
370__qsub8(int8x4_t __a, int8x4_t __b) {
371  return __builtin_arm_qsub8(__a, __b);
372}
373static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__))
374__sadd8(int8x4_t __a, int8x4_t __b) {
375  return __builtin_arm_sadd8(__a, __b);
376}
377static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__))
378__shadd8(int8x4_t __a, int8x4_t __b) {
379  return __builtin_arm_shadd8(__a, __b);
380}
381static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__))
382__shsub8(int8x4_t __a, int8x4_t __b) {
383  return __builtin_arm_shsub8(__a, __b);
384}
385static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__))
386__ssub8(int8x4_t __a, int8x4_t __b) {
387  return __builtin_arm_ssub8(__a, __b);
388}
389static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))
390__uadd8(uint8x4_t __a, uint8x4_t __b) {
391  return __builtin_arm_uadd8(__a, __b);
392}
393static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))
394__uhadd8(uint8x4_t __a, uint8x4_t __b) {
395  return __builtin_arm_uhadd8(__a, __b);
396}
397static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))
398__uhsub8(uint8x4_t __a, uint8x4_t __b) {
399  return __builtin_arm_uhsub8(__a, __b);
400}
401static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))
402__uqadd8(uint8x4_t __a, uint8x4_t __b) {
403  return __builtin_arm_uqadd8(__a, __b);
404}
405static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))
406__uqsub8(uint8x4_t __a, uint8x4_t __b) {
407  return __builtin_arm_uqsub8(__a, __b);
408}
409static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))
410__usub8(uint8x4_t __a, uint8x4_t __b) {
411  return __builtin_arm_usub8(__a, __b);
412}
413#endif
414
415/* 9.5.8 Sum of 8-bit absolute differences */
416#if __ARM_FEATURE_SIMD32
417static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
418__usad8(uint8x4_t __a, uint8x4_t __b) {
419  return __builtin_arm_usad8(__a, __b);
420}
421static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
422__usada8(uint8x4_t __a, uint8x4_t __b, uint32_t __c) {
423  return __builtin_arm_usada8(__a, __b, __c);
424}
425#endif
426
427/* 9.5.9 Parallel 16-bit addition and subtraction */
428#if __ARM_FEATURE_SIMD32
429static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
430__qadd16(int16x2_t __a, int16x2_t __b) {
431  return __builtin_arm_qadd16(__a, __b);
432}
433static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
434__qasx(int16x2_t __a, int16x2_t __b) {
435  return __builtin_arm_qasx(__a, __b);
436}
437static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
438__qsax(int16x2_t __a, int16x2_t __b) {
439  return __builtin_arm_qsax(__a, __b);
440}
441static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
442__qsub16(int16x2_t __a, int16x2_t __b) {
443  return __builtin_arm_qsub16(__a, __b);
444}
445static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
446__sadd16(int16x2_t __a, int16x2_t __b) {
447  return __builtin_arm_sadd16(__a, __b);
448}
449static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
450__sasx(int16x2_t __a, int16x2_t __b) {
451  return __builtin_arm_sasx(__a, __b);
452}
453static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
454__shadd16(int16x2_t __a, int16x2_t __b) {
455  return __builtin_arm_shadd16(__a, __b);
456}
457static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
458__shasx(int16x2_t __a, int16x2_t __b) {
459  return __builtin_arm_shasx(__a, __b);
460}
461static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
462__shsax(int16x2_t __a, int16x2_t __b) {
463  return __builtin_arm_shsax(__a, __b);
464}
465static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
466__shsub16(int16x2_t __a, int16x2_t __b) {
467  return __builtin_arm_shsub16(__a, __b);
468}
469static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
470__ssax(int16x2_t __a, int16x2_t __b) {
471  return __builtin_arm_ssax(__a, __b);
472}
473static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
474__ssub16(int16x2_t __a, int16x2_t __b) {
475  return __builtin_arm_ssub16(__a, __b);
476}
477static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
478__uadd16(uint16x2_t __a, uint16x2_t __b) {
479  return __builtin_arm_uadd16(__a, __b);
480}
481static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
482__uasx(uint16x2_t __a, uint16x2_t __b) {
483  return __builtin_arm_uasx(__a, __b);
484}
485static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
486__uhadd16(uint16x2_t __a, uint16x2_t __b) {
487  return __builtin_arm_uhadd16(__a, __b);
488}
489static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
490__uhasx(uint16x2_t __a, uint16x2_t __b) {
491  return __builtin_arm_uhasx(__a, __b);
492}
493static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
494__uhsax(uint16x2_t __a, uint16x2_t __b) {
495  return __builtin_arm_uhsax(__a, __b);
496}
497static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
498__uhsub16(uint16x2_t __a, uint16x2_t __b) {
499  return __builtin_arm_uhsub16(__a, __b);
500}
501static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
502__uqadd16(uint16x2_t __a, uint16x2_t __b) {
503  return __builtin_arm_uqadd16(__a, __b);
504}
505static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
506__uqasx(uint16x2_t __a, uint16x2_t __b) {
507  return __builtin_arm_uqasx(__a, __b);
508}
509static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
510__uqsax(uint16x2_t __a, uint16x2_t __b) {
511  return __builtin_arm_uqsax(__a, __b);
512}
513static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
514__uqsub16(uint16x2_t __a, uint16x2_t __b) {
515  return __builtin_arm_uqsub16(__a, __b);
516}
517static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
518__usax(uint16x2_t __a, uint16x2_t __b) {
519  return __builtin_arm_usax(__a, __b);
520}
521static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
522__usub16(uint16x2_t __a, uint16x2_t __b) {
523  return __builtin_arm_usub16(__a, __b);
524}
525#endif
526
527/* 9.5.10 Parallel 16-bit multiplications */
528#if __ARM_FEATURE_SIMD32
529static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
530__smlad(int16x2_t __a, int16x2_t __b, int32_t __c) {
531  return __builtin_arm_smlad(__a, __b, __c);
532}
533static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
534__smladx(int16x2_t __a, int16x2_t __b, int32_t __c) {
535  return __builtin_arm_smladx(__a, __b, __c);
536}
537static __inline__ int64_t __attribute__((__always_inline__, __nodebug__))
538__smlald(int16x2_t __a, int16x2_t __b, int64_t __c) {
539  return __builtin_arm_smlald(__a, __b, __c);
540}
541static __inline__ int64_t __attribute__((__always_inline__, __nodebug__))
542__smlaldx(int16x2_t __a, int16x2_t __b, int64_t __c) {
543  return __builtin_arm_smlaldx(__a, __b, __c);
544}
545static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
546__smlsd(int16x2_t __a, int16x2_t __b, int32_t __c) {
547  return __builtin_arm_smlsd(__a, __b, __c);
548}
549static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
550__smlsdx(int16x2_t __a, int16x2_t __b, int32_t __c) {
551  return __builtin_arm_smlsdx(__a, __b, __c);
552}
553static __inline__ int64_t __attribute__((__always_inline__, __nodebug__))
554__smlsld(int16x2_t __a, int16x2_t __b, int64_t __c) {
555  return __builtin_arm_smlsld(__a, __b, __c);
556}
557static __inline__ int64_t __attribute__((__always_inline__, __nodebug__))
558__smlsldx(int16x2_t __a, int16x2_t __b, int64_t __c) {
559  return __builtin_arm_smlsldx(__a, __b, __c);
560}
561static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
562__smuad(int16x2_t __a, int16x2_t __b) {
563  return __builtin_arm_smuad(__a, __b);
564}
565static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
566__smuadx(int16x2_t __a, int16x2_t __b) {
567  return __builtin_arm_smuadx(__a, __b);
568}
569static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
570__smusd(int16x2_t __a, int16x2_t __b) {
571  return __builtin_arm_smusd(__a, __b);
572}
573static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
574__smusdx(int16x2_t __a, int16x2_t __b) {
575  return __builtin_arm_smusdx(__a, __b);
576}
577#endif
578
579/* 9.7 CRC32 intrinsics */
580#if __ARM_FEATURE_CRC32
581static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
582__crc32b(uint32_t __a, uint8_t __b) {
583  return __builtin_arm_crc32b(__a, __b);
584}
585
586static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
587__crc32h(uint32_t __a, uint16_t __b) {
588  return __builtin_arm_crc32h(__a, __b);
589}
590
591static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
592__crc32w(uint32_t __a, uint32_t __b) {
593  return __builtin_arm_crc32w(__a, __b);
594}
595
596static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
597__crc32d(uint32_t __a, uint64_t __b) {
598  return __builtin_arm_crc32d(__a, __b);
599}
600
601static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
602__crc32cb(uint32_t __a, uint8_t __b) {
603  return __builtin_arm_crc32cb(__a, __b);
604}
605
606static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
607__crc32ch(uint32_t __a, uint16_t __b) {
608  return __builtin_arm_crc32ch(__a, __b);
609}
610
611static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
612__crc32cw(uint32_t __a, uint32_t __b) {
613  return __builtin_arm_crc32cw(__a, __b);
614}
615
616static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
617__crc32cd(uint32_t __a, uint64_t __b) {
618  return __builtin_arm_crc32cd(__a, __b);
619}
620#endif
621
622/* Armv8.3-A Javascript conversion intrinsic */
623#if __ARM_64BIT_STATE && defined(__ARM_FEATURE_JCVT)
624static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
625__jcvt(double __a) {
626  return __builtin_arm_jcvt(__a);
627}
628#endif
629
630/* 10.1 Special register intrinsics */
631#define __arm_rsr(sysreg) __builtin_arm_rsr(sysreg)
632#define __arm_rsr64(sysreg) __builtin_arm_rsr64(sysreg)
633#define __arm_rsrp(sysreg) __builtin_arm_rsrp(sysreg)
634#define __arm_rsrf(sysreg) __builtin_bit_cast(float, __arm_rsr(sysreg))
635#define __arm_rsrf64(sysreg) __builtin_bit_cast(double, __arm_rsr64(sysreg))
636#define __arm_wsr(sysreg, v) __builtin_arm_wsr(sysreg, v)
637#define __arm_wsr64(sysreg, v) __builtin_arm_wsr64(sysreg, v)
638#define __arm_wsrp(sysreg, v) __builtin_arm_wsrp(sysreg, v)
639#define __arm_wsrf(sysreg, v) __arm_wsr(sysreg, __builtin_bit_cast(uint32_t, v))
640#define __arm_wsrf64(sysreg, v) __arm_wsr64(sysreg, __builtin_bit_cast(uint64_t, v))
641
642/* Memory Tagging Extensions (MTE) Intrinsics */
643#if __ARM_FEATURE_MEMORY_TAGGING
644#define __arm_mte_create_random_tag(__ptr, __mask)  __builtin_arm_irg(__ptr, __mask)
645#define __arm_mte_increment_tag(__ptr, __tag_offset)  __builtin_arm_addg(__ptr, __tag_offset)
646#define __arm_mte_exclude_tag(__ptr, __excluded)  __builtin_arm_gmi(__ptr, __excluded)
647#define __arm_mte_get_tag(__ptr) __builtin_arm_ldg(__ptr)
648#define __arm_mte_set_tag(__ptr) __builtin_arm_stg(__ptr)
649#define __arm_mte_ptrdiff(__ptra, __ptrb) __builtin_arm_subp(__ptra, __ptrb)
650#endif
651
652/* Transactional Memory Extension (TME) Intrinsics */
653#if __ARM_FEATURE_TME
654
655#define _TMFAILURE_REASON  0x00007fffu
656#define _TMFAILURE_RTRY    0x00008000u
657#define _TMFAILURE_CNCL    0x00010000u
658#define _TMFAILURE_MEM     0x00020000u
659#define _TMFAILURE_IMP     0x00040000u
660#define _TMFAILURE_ERR     0x00080000u
661#define _TMFAILURE_SIZE    0x00100000u
662#define _TMFAILURE_NEST    0x00200000u
663#define _TMFAILURE_DBG     0x00400000u
664#define _TMFAILURE_INT     0x00800000u
665#define _TMFAILURE_TRIVIAL 0x01000000u
666
667#define __tstart()        __builtin_arm_tstart()
668#define __tcommit()       __builtin_arm_tcommit()
669#define __tcancel(__arg)  __builtin_arm_tcancel(__arg)
670#define __ttest()         __builtin_arm_ttest()
671
672#endif /* __ARM_FEATURE_TME */
673
674#if defined(__cplusplus)
675}
676#endif
677
678#endif /* __ARM_ACLE_H */
679