clang  10.0.0svn
arm_acle.h
Go to the documentation of this file.
1 /*===---- arm_acle.h - ARM Non-Neon intrinsics -----------------------------===
2  *
3  * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4  * See https://llvm.org/LICENSE.txt for license information.
5  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6  *
7  *===-----------------------------------------------------------------------===
8  */
9 
10 #ifndef __ARM_ACLE_H
11 #define __ARM_ACLE_H
12 
13 #ifndef __ARM_ACLE
14 #error "ACLE intrinsics support not enabled."
15 #endif
16 
17 #include <stdint.h>
18 
19 #if defined(__cplusplus)
20 extern "C" {
21 #endif
22 
23 /* 8 SYNCHRONIZATION, BARRIER AND HINT INTRINSICS */
24 /* 8.3 Memory barriers */
25 #if !defined(_MSC_VER)
26 #define __dmb(i) __builtin_arm_dmb(i)
27 #define __dsb(i) __builtin_arm_dsb(i)
28 #define __isb(i) __builtin_arm_isb(i)
29 #endif
30 
31 /* 8.4 Hints */
32 
33 #if !defined(_MSC_VER)
34 static __inline__ void __attribute__((__always_inline__, __nodebug__)) __wfi(void) {
35  __builtin_arm_wfi();
36 }
37 
38 static __inline__ void __attribute__((__always_inline__, __nodebug__)) __wfe(void) {
39  __builtin_arm_wfe();
40 }
41 
42 static __inline__ void __attribute__((__always_inline__, __nodebug__)) __sev(void) {
43  __builtin_arm_sev();
44 }
45 
46 static __inline__ void __attribute__((__always_inline__, __nodebug__)) __sevl(void) {
47  __builtin_arm_sevl();
48 }
49 
50 static __inline__ void __attribute__((__always_inline__, __nodebug__)) __yield(void) {
51  __builtin_arm_yield();
52 }
53 #endif
54 
55 #if __ARM_32BIT_STATE
56 #define __dbg(t) __builtin_arm_dbg(t)
57 #endif
58 
59 /* 8.5 Swap */
60 static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
61 __swp(uint32_t __x, volatile uint32_t *__p) {
62  uint32_t v;
63  do
64  v = __builtin_arm_ldrex(__p);
65  while (__builtin_arm_strex(__x, __p));
66  return v;
67 }
68 
69 /* 8.6 Memory prefetch intrinsics */
70 /* 8.6.1 Data prefetch */
71 #define __pld(addr) __pldx(0, 0, 0, addr)
72 
73 #if __ARM_32BIT_STATE
74 #define __pldx(access_kind, cache_level, retention_policy, addr) \
75  __builtin_arm_prefetch(addr, access_kind, 1)
76 #else
77 #define __pldx(access_kind, cache_level, retention_policy, addr) \
78  __builtin_arm_prefetch(addr, access_kind, cache_level, retention_policy, 1)
79 #endif
80 
81 /* 8.6.2 Instruction prefetch */
82 #define __pli(addr) __plix(0, 0, addr)
83 
84 #if __ARM_32BIT_STATE
85 #define __plix(cache_level, retention_policy, addr) \
86  __builtin_arm_prefetch(addr, 0, 0)
87 #else
88 #define __plix(cache_level, retention_policy, addr) \
89  __builtin_arm_prefetch(addr, 0, cache_level, retention_policy, 0)
90 #endif
91 
92 /* 8.7 NOP */
93 static __inline__ void __attribute__((__always_inline__, __nodebug__)) __nop(void) {
94  __builtin_arm_nop();
95 }
96 
97 /* 9 DATA-PROCESSING INTRINSICS */
98 /* 9.2 Miscellaneous data-processing intrinsics */
99 /* ROR */
100 static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
101 __ror(uint32_t __x, uint32_t __y) {
102  __y %= 32;
103  if (__y == 0)
104  return __x;
105  return (__x >> __y) | (__x << (32 - __y));
106 }
107 
108 static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
109 __rorll(uint64_t __x, uint32_t __y) {
110  __y %= 64;
111  if (__y == 0)
112  return __x;
113  return (__x >> __y) | (__x << (64 - __y));
114 }
115 
116 static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))
117 __rorl(unsigned long __x, uint32_t __y) {
118 #if __SIZEOF_LONG__ == 4
119  return __ror(__x, __y);
120 #else
121  return __rorll(__x, __y);
122 #endif
123 }
124 
125 
126 /* CLZ */
127 static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
128 __clz(uint32_t __t) {
129  return __builtin_clz(__t);
130 }
131 
132 static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))
133 __clzl(unsigned long __t) {
134  return __builtin_clzl(__t);
135 }
136 
137 static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
138 __clzll(uint64_t __t) {
139  return __builtin_clzll(__t);
140 }
141 
142 /* REV */
143 static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
144 __rev(uint32_t __t) {
145  return __builtin_bswap32(__t);
146 }
147 
148 static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))
149 __revl(unsigned long __t) {
150 #if __SIZEOF_LONG__ == 4
151  return __builtin_bswap32(__t);
152 #else
153  return __builtin_bswap64(__t);
154 #endif
155 }
156 
157 static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
158 __revll(uint64_t __t) {
159  return __builtin_bswap64(__t);
160 }
161 
162 /* REV16 */
163 static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
164 __rev16(uint32_t __t) {
165  return __ror(__rev(__t), 16);
166 }
167 
168 static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
169 __rev16ll(uint64_t __t) {
170  return (((uint64_t)__rev16(__t >> 32)) << 32) | __rev16(__t);
171 }
172 
173 static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))
174 __rev16l(unsigned long __t) {
175 #if __SIZEOF_LONG__ == 4
176  return __rev16(__t);
177 #else
178  return __rev16ll(__t);
179 #endif
180 }
181 
182 /* REVSH */
183 static __inline__ int16_t __attribute__((__always_inline__, __nodebug__))
184 __revsh(int16_t __t) {
185  return __builtin_bswap16(__t);
186 }
187 
188 /* RBIT */
189 static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
190 __rbit(uint32_t __t) {
191  return __builtin_arm_rbit(__t);
192 }
193 
194 static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
195 __rbitll(uint64_t __t) {
196 #if __ARM_32BIT_STATE
197  return (((uint64_t)__builtin_arm_rbit(__t)) << 32) |
198  __builtin_arm_rbit(__t >> 32);
199 #else
200  return __builtin_arm_rbit64(__t);
201 #endif
202 }
203 
204 static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))
205 __rbitl(unsigned long __t) {
206 #if __SIZEOF_LONG__ == 4
207  return __rbit(__t);
208 #else
209  return __rbitll(__t);
210 #endif
211 }
212 
213 /*
214  * 9.3 16-bit multiplications
215  */
216 #if __ARM_FEATURE_DSP
217 static __inline__ int32_t __attribute__((__always_inline__,__nodebug__))
218 __smulbb(int32_t __a, int32_t __b) {
219  return __builtin_arm_smulbb(__a, __b);
220 }
221 static __inline__ int32_t __attribute__((__always_inline__,__nodebug__))
222 __smulbt(int32_t __a, int32_t __b) {
223  return __builtin_arm_smulbt(__a, __b);
224 }
225 static __inline__ int32_t __attribute__((__always_inline__,__nodebug__))
226 __smultb(int32_t __a, int32_t __b) {
227  return __builtin_arm_smultb(__a, __b);
228 }
229 static __inline__ int32_t __attribute__((__always_inline__,__nodebug__))
230 __smultt(int32_t __a, int32_t __b) {
231  return __builtin_arm_smultt(__a, __b);
232 }
233 static __inline__ int32_t __attribute__((__always_inline__,__nodebug__))
234 __smulwb(int32_t __a, int32_t __b) {
235  return __builtin_arm_smulwb(__a, __b);
236 }
237 static __inline__ int32_t __attribute__((__always_inline__,__nodebug__))
238 __smulwt(int32_t __a, int32_t __b) {
239  return __builtin_arm_smulwt(__a, __b);
240 }
241 #endif
242 
243 /*
244  * 9.4 Saturating intrinsics
245  *
246  * FIXME: Change guard to their corrosponding __ARM_FEATURE flag when Q flag
247  * intrinsics are implemented and the flag is enabled.
248  */
249 /* 9.4.1 Width-specified saturation intrinsics */
250 #if __ARM_FEATURE_SAT
251 #define __ssat(x, y) __builtin_arm_ssat(x, y)
252 #define __usat(x, y) __builtin_arm_usat(x, y)
253 #endif
254 
255 /* 9.4.2 Saturating addition and subtraction intrinsics */
256 #if __ARM_FEATURE_DSP
257 static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
258 __qadd(int32_t __t, int32_t __v) {
259  return __builtin_arm_qadd(__t, __v);
260 }
261 
262 static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
263 __qsub(int32_t __t, int32_t __v) {
264  return __builtin_arm_qsub(__t, __v);
265 }
266 
267 static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
268 __qdbl(int32_t __t) {
269  return __builtin_arm_qadd(__t, __t);
270 }
271 #endif
272 
273 /* 9.4.3 Accumultating multiplications */
274 #if __ARM_FEATURE_DSP
275 static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
276 __smlabb(int32_t __a, int32_t __b, int32_t __c) {
277  return __builtin_arm_smlabb(__a, __b, __c);
278 }
279 static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
280 __smlabt(int32_t __a, int32_t __b, int32_t __c) {
281  return __builtin_arm_smlabt(__a, __b, __c);
282 }
283 static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
284 __smlatb(int32_t __a, int32_t __b, int32_t __c) {
285  return __builtin_arm_smlatb(__a, __b, __c);
286 }
287 static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
288 __smlatt(int32_t __a, int32_t __b, int32_t __c) {
289  return __builtin_arm_smlatt(__a, __b, __c);
290 }
291 static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
292 __smlawb(int32_t __a, int32_t __b, int32_t __c) {
293  return __builtin_arm_smlawb(__a, __b, __c);
294 }
295 static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
296 __smlawt(int32_t __a, int32_t __b, int32_t __c) {
297  return __builtin_arm_smlawt(__a, __b, __c);
298 }
299 #endif
300 
301 
302 /* 9.5.4 Parallel 16-bit saturation */
303 #if __ARM_FEATURE_SIMD32
304 #define __ssat16(x, y) __builtin_arm_ssat16(x, y)
305 #define __usat16(x, y) __builtin_arm_usat16(x, y)
306 #endif
307 
308 /* 9.5.5 Packing and unpacking */
309 #if __ARM_FEATURE_SIMD32
310 typedef int32_t int8x4_t;
311 typedef int32_t int16x2_t;
312 typedef uint32_t uint8x4_t;
313 typedef uint32_t uint16x2_t;
314 
315 static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
316 __sxtab16(int16x2_t __a, int8x4_t __b) {
317  return __builtin_arm_sxtab16(__a, __b);
318 }
319 static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
320 __sxtb16(int8x4_t __a) {
321  return __builtin_arm_sxtb16(__a);
322 }
323 static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
324 __uxtab16(int16x2_t __a, int8x4_t __b) {
325  return __builtin_arm_uxtab16(__a, __b);
326 }
327 static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
328 __uxtb16(int8x4_t __a) {
329  return __builtin_arm_uxtb16(__a);
330 }
331 #endif
332 
333 /* 9.5.6 Parallel selection */
334 #if __ARM_FEATURE_SIMD32
335 static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))
336 __sel(uint8x4_t __a, uint8x4_t __b) {
337  return __builtin_arm_sel(__a, __b);
338 }
339 #endif
340 
341 /* 9.5.7 Parallel 8-bit addition and subtraction */
342 #if __ARM_FEATURE_SIMD32
343 static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__))
344 __qadd8(int8x4_t __a, int8x4_t __b) {
345  return __builtin_arm_qadd8(__a, __b);
346 }
347 static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__))
348 __qsub8(int8x4_t __a, int8x4_t __b) {
349  return __builtin_arm_qsub8(__a, __b);
350 }
351 static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__))
352 __sadd8(int8x4_t __a, int8x4_t __b) {
353  return __builtin_arm_sadd8(__a, __b);
354 }
355 static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__))
356 __shadd8(int8x4_t __a, int8x4_t __b) {
357  return __builtin_arm_shadd8(__a, __b);
358 }
359 static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__))
360 __shsub8(int8x4_t __a, int8x4_t __b) {
361  return __builtin_arm_shsub8(__a, __b);
362 }
363 static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__))
364 __ssub8(int8x4_t __a, int8x4_t __b) {
365  return __builtin_arm_ssub8(__a, __b);
366 }
367 static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))
368 __uadd8(uint8x4_t __a, uint8x4_t __b) {
369  return __builtin_arm_uadd8(__a, __b);
370 }
371 static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))
372 __uhadd8(uint8x4_t __a, uint8x4_t __b) {
373  return __builtin_arm_uhadd8(__a, __b);
374 }
375 static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))
376 __uhsub8(uint8x4_t __a, uint8x4_t __b) {
377  return __builtin_arm_uhsub8(__a, __b);
378 }
379 static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))
380 __uqadd8(uint8x4_t __a, uint8x4_t __b) {
381  return __builtin_arm_uqadd8(__a, __b);
382 }
383 static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))
384 __uqsub8(uint8x4_t __a, uint8x4_t __b) {
385  return __builtin_arm_uqsub8(__a, __b);
386 }
387 static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))
388 __usub8(uint8x4_t __a, uint8x4_t __b) {
389  return __builtin_arm_usub8(__a, __b);
390 }
391 #endif
392 
393 /* 9.5.8 Sum of 8-bit absolute differences */
394 #if __ARM_FEATURE_SIMD32
395 static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
396 __usad8(uint8x4_t __a, uint8x4_t __b) {
397  return __builtin_arm_usad8(__a, __b);
398 }
399 static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
400 __usada8(uint8x4_t __a, uint8x4_t __b, uint32_t __c) {
401  return __builtin_arm_usada8(__a, __b, __c);
402 }
403 #endif
404 
405 /* 9.5.9 Parallel 16-bit addition and subtraction */
406 #if __ARM_FEATURE_SIMD32
407 static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
408 __qadd16(int16x2_t __a, int16x2_t __b) {
409  return __builtin_arm_qadd16(__a, __b);
410 }
411 static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
412 __qasx(int16x2_t __a, int16x2_t __b) {
413  return __builtin_arm_qasx(__a, __b);
414 }
415 static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
416 __qsax(int16x2_t __a, int16x2_t __b) {
417  return __builtin_arm_qsax(__a, __b);
418 }
419 static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
420 __qsub16(int16x2_t __a, int16x2_t __b) {
421  return __builtin_arm_qsub16(__a, __b);
422 }
423 static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
424 __sadd16(int16x2_t __a, int16x2_t __b) {
425  return __builtin_arm_sadd16(__a, __b);
426 }
427 static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
428 __sasx(int16x2_t __a, int16x2_t __b) {
429  return __builtin_arm_sasx(__a, __b);
430 }
431 static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
432 __shadd16(int16x2_t __a, int16x2_t __b) {
433  return __builtin_arm_shadd16(__a, __b);
434 }
435 static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
436 __shasx(int16x2_t __a, int16x2_t __b) {
437  return __builtin_arm_shasx(__a, __b);
438 }
439 static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
440 __shsax(int16x2_t __a, int16x2_t __b) {
441  return __builtin_arm_shsax(__a, __b);
442 }
443 static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
444 __shsub16(int16x2_t __a, int16x2_t __b) {
445  return __builtin_arm_shsub16(__a, __b);
446 }
447 static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
448 __ssax(int16x2_t __a, int16x2_t __b) {
449  return __builtin_arm_ssax(__a, __b);
450 }
451 static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
452 __ssub16(int16x2_t __a, int16x2_t __b) {
453  return __builtin_arm_ssub16(__a, __b);
454 }
455 static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
456 __uadd16(uint16x2_t __a, uint16x2_t __b) {
457  return __builtin_arm_uadd16(__a, __b);
458 }
459 static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
460 __uasx(uint16x2_t __a, uint16x2_t __b) {
461  return __builtin_arm_uasx(__a, __b);
462 }
463 static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
464 __uhadd16(uint16x2_t __a, uint16x2_t __b) {
465  return __builtin_arm_uhadd16(__a, __b);
466 }
467 static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
468 __uhasx(uint16x2_t __a, uint16x2_t __b) {
469  return __builtin_arm_uhasx(__a, __b);
470 }
471 static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
472 __uhsax(uint16x2_t __a, uint16x2_t __b) {
473  return __builtin_arm_uhsax(__a, __b);
474 }
475 static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
476 __uhsub16(uint16x2_t __a, uint16x2_t __b) {
477  return __builtin_arm_uhsub16(__a, __b);
478 }
479 static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
480 __uqadd16(uint16x2_t __a, uint16x2_t __b) {
481  return __builtin_arm_uqadd16(__a, __b);
482 }
483 static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
484 __uqasx(uint16x2_t __a, uint16x2_t __b) {
485  return __builtin_arm_uqasx(__a, __b);
486 }
487 static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
488 __uqsax(uint16x2_t __a, uint16x2_t __b) {
489  return __builtin_arm_uqsax(__a, __b);
490 }
491 static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
492 __uqsub16(uint16x2_t __a, uint16x2_t __b) {
493  return __builtin_arm_uqsub16(__a, __b);
494 }
495 static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
496 __usax(uint16x2_t __a, uint16x2_t __b) {
497  return __builtin_arm_usax(__a, __b);
498 }
499 static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
500 __usub16(uint16x2_t __a, uint16x2_t __b) {
501  return __builtin_arm_usub16(__a, __b);
502 }
503 #endif
504 
505 /* 9.5.10 Parallel 16-bit multiplications */
506 #if __ARM_FEATURE_SIMD32
507 static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
508 __smlad(int16x2_t __a, int16x2_t __b, int32_t __c) {
509  return __builtin_arm_smlad(__a, __b, __c);
510 }
511 static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
512 __smladx(int16x2_t __a, int16x2_t __b, int32_t __c) {
513  return __builtin_arm_smladx(__a, __b, __c);
514 }
515 static __inline__ int64_t __attribute__((__always_inline__, __nodebug__))
516 __smlald(int16x2_t __a, int16x2_t __b, int64_t __c) {
517  return __builtin_arm_smlald(__a, __b, __c);
518 }
519 static __inline__ int64_t __attribute__((__always_inline__, __nodebug__))
520 __smlaldx(int16x2_t __a, int16x2_t __b, int64_t __c) {
521  return __builtin_arm_smlaldx(__a, __b, __c);
522 }
523 static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
524 __smlsd(int16x2_t __a, int16x2_t __b, int32_t __c) {
525  return __builtin_arm_smlsd(__a, __b, __c);
526 }
527 static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
528 __smlsdx(int16x2_t __a, int16x2_t __b, int32_t __c) {
529  return __builtin_arm_smlsdx(__a, __b, __c);
530 }
531 static __inline__ int64_t __attribute__((__always_inline__, __nodebug__))
532 __smlsld(int16x2_t __a, int16x2_t __b, int64_t __c) {
533  return __builtin_arm_smlsld(__a, __b, __c);
534 }
535 static __inline__ int64_t __attribute__((__always_inline__, __nodebug__))
536 __smlsldx(int16x2_t __a, int16x2_t __b, int64_t __c) {
537  return __builtin_arm_smlsldx(__a, __b, __c);
538 }
539 static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
540 __smuad(int16x2_t __a, int16x2_t __b) {
541  return __builtin_arm_smuad(__a, __b);
542 }
543 static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
544 __smuadx(int16x2_t __a, int16x2_t __b) {
545  return __builtin_arm_smuadx(__a, __b);
546 }
547 static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
548 __smusd(int16x2_t __a, int16x2_t __b) {
549  return __builtin_arm_smusd(__a, __b);
550 }
551 static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
552 __smusdx(int16x2_t __a, int16x2_t __b) {
553  return __builtin_arm_smusdx(__a, __b);
554 }
555 #endif
556 
557 /* 9.7 CRC32 intrinsics */
558 #if __ARM_FEATURE_CRC32
559 static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
560 __crc32b(uint32_t __a, uint8_t __b) {
561  return __builtin_arm_crc32b(__a, __b);
562 }
563 
564 static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
565 __crc32h(uint32_t __a, uint16_t __b) {
566  return __builtin_arm_crc32h(__a, __b);
567 }
568 
569 static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
570 __crc32w(uint32_t __a, uint32_t __b) {
571  return __builtin_arm_crc32w(__a, __b);
572 }
573 
574 static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
575 __crc32d(uint32_t __a, uint64_t __b) {
576  return __builtin_arm_crc32d(__a, __b);
577 }
578 
579 static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
580 __crc32cb(uint32_t __a, uint8_t __b) {
581  return __builtin_arm_crc32cb(__a, __b);
582 }
583 
584 static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
585 __crc32ch(uint32_t __a, uint16_t __b) {
586  return __builtin_arm_crc32ch(__a, __b);
587 }
588 
589 static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
590 __crc32cw(uint32_t __a, uint32_t __b) {
591  return __builtin_arm_crc32cw(__a, __b);
592 }
593 
594 static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
595 __crc32cd(uint32_t __a, uint64_t __b) {
596  return __builtin_arm_crc32cd(__a, __b);
597 }
598 #endif
599 
600 /* Armv8.3-A Javascript conversion intrinsic */
601 #if __ARM_64BIT_STATE && defined(__ARM_FEATURE_JCVT)
602 static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
603 __jcvt(double __a) {
604  return __builtin_arm_jcvt(__a);
605 }
606 #endif
607 
608 /* 10.1 Special register intrinsics */
609 #define __arm_rsr(sysreg) __builtin_arm_rsr(sysreg)
610 #define __arm_rsr64(sysreg) __builtin_arm_rsr64(sysreg)
611 #define __arm_rsrp(sysreg) __builtin_arm_rsrp(sysreg)
612 #define __arm_wsr(sysreg, v) __builtin_arm_wsr(sysreg, v)
613 #define __arm_wsr64(sysreg, v) __builtin_arm_wsr64(sysreg, v)
614 #define __arm_wsrp(sysreg, v) __builtin_arm_wsrp(sysreg, v)
615 
616 /* Memory Tagging Extensions (MTE) Intrinsics */
617 #if __ARM_FEATURE_MEMORY_TAGGING
618 #define __arm_mte_create_random_tag(__ptr, __mask) __builtin_arm_irg(__ptr, __mask)
619 #define __arm_mte_increment_tag(__ptr, __tag_offset) __builtin_arm_addg(__ptr, __tag_offset)
620 #define __arm_mte_exclude_tag(__ptr, __excluded) __builtin_arm_gmi(__ptr, __excluded)
621 #define __arm_mte_get_tag(__ptr) __builtin_arm_ldg(__ptr)
622 #define __arm_mte_set_tag(__ptr) __builtin_arm_stg(__ptr)
623 #define __arm_mte_ptrdiff(__ptra, __ptrb) __builtin_arm_subp(__ptra, __ptrb)
624 #endif
625 
626 /* Transactional Memory Extension (TME) Intrinsics */
627 #if __ARM_FEATURE_TME
628 
629 #define _TMFAILURE_REASON 0x00007fffu
630 #define _TMFAILURE_RTRY 0x00008000u
631 #define _TMFAILURE_CNCL 0x00010000u
632 #define _TMFAILURE_MEM 0x00020000u
633 #define _TMFAILURE_IMP 0x00040000u
634 #define _TMFAILURE_ERR 0x00080000u
635 #define _TMFAILURE_SIZE 0x00100000u
636 #define _TMFAILURE_NEST 0x00200000u
637 #define _TMFAILURE_DBG 0x00400000u
638 #define _TMFAILURE_INT 0x00800000u
639 #define _TMFAILURE_TRIVIAL 0x01000000u
640 
641 #define __tstart() __builtin_arm_tstart()
642 #define __tcommit() __builtin_arm_tcommit()
643 #define __tcancel(__arg) __builtin_arm_tcancel(__arg)
644 #define __ttest() __builtin_arm_ttest()
645 
646 #endif /* __ARM_FEATURE_TME */
647 
648 #if defined(__cplusplus)
649 }
650 #endif
651 
652 #endif /* __ARM_ACLE_H */
struct __storeu_i16 *__P __v
Definition: immintrin.h:313
__DEVICE__ int __clz(int __a)
static __inline__ void __attribute__((__always_inline__, __nodebug__)) __wfi(void)
Definition: arm_acle.h:34
static __inline__ void int __a
Definition: emmintrin.h:4185
__DEVICE__ int __clzll(long long __a)
static __inline__ uint32_t volatile uint32_t * __p
Definition: arm_acle.h:61
static __inline__ vector float vector float __b
Definition: altivec.h:520
do v
Definition: arm_acle.h:64
static __inline unsigned char unsigned int __x
Definition: adxintrin.h:22
static __inline__ uint32_t uint32_t __y
Definition: arm_acle.h:101
static __inline__ vector float vector float vector float __c
Definition: altivec.h:4185