clang 19.0.0git
immintrin.h
Go to the documentation of this file.
1/*===---- immintrin.h - Intel intrinsics -----------------------------------===
2 *
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 *
7 *===-----------------------------------------------------------------------===
8 */
9
10#ifndef __IMMINTRIN_H
11#define __IMMINTRIN_H
12
13#if !defined(__i386__) && !defined(__x86_64__)
14#error "This header is only meant to be used on x86 and x64 architecture"
15#endif
16
17#include <x86gprintrin.h>
18
19#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
20 defined(__MMX__)
21#include <mmintrin.h>
22#endif
23
24#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
25 defined(__SSE__)
26#include <xmmintrin.h>
27#endif
28
29#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
30 defined(__SSE2__)
31#include <emmintrin.h>
32#endif
33
34#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
35 defined(__SSE3__)
36#include <pmmintrin.h>
37#endif
38
39#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
40 defined(__SSSE3__)
41#include <tmmintrin.h>
42#endif
43
44#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
45 (defined(__SSE4_2__) || defined(__SSE4_1__))
46#include <smmintrin.h>
47#endif
48
49#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
50 (defined(__AES__) || defined(__PCLMUL__))
51#include <wmmintrin.h>
52#endif
53
54#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
55 defined(__CLFLUSHOPT__)
56#include <clflushoptintrin.h>
57#endif
58
59#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
60 defined(__CLWB__)
61#include <clwbintrin.h>
62#endif
63
64#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
65 defined(__AVX__)
66#include <avxintrin.h>
67#endif
68
69#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
70 defined(__AVX2__)
71#include <avx2intrin.h>
72#endif
73
74#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
75 defined(__F16C__)
76#include <f16cintrin.h>
77#endif
78
79/* No feature check desired due to internal checks */
80#include <bmiintrin.h>
81
82#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
83 defined(__BMI2__)
84#include <bmi2intrin.h>
85#endif
86
87#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
88 defined(__LZCNT__)
89#include <lzcntintrin.h>
90#endif
91
92#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
93 defined(__POPCNT__)
94#include <popcntintrin.h>
95#endif
96
97#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
98 defined(__FMA__)
99#include <fmaintrin.h>
100#endif
101
102#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
103 defined(__AVX512F__)
104#include <avx512fintrin.h>
105#endif
106
107#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
108 defined(__AVX512VL__)
109#include <avx512vlintrin.h>
110#endif
111
112#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
113 defined(__AVX512BW__)
114#include <avx512bwintrin.h>
115#endif
116
117#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
118 defined(__AVX512BITALG__)
119#include <avx512bitalgintrin.h>
120#endif
121
122#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
123 defined(__AVX512CD__)
124#include <avx512cdintrin.h>
125#endif
126
127#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
128 defined(__AVX512VPOPCNTDQ__)
130#endif
131
132#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
133 (defined(__AVX512VL__) && defined(__AVX512VPOPCNTDQ__))
135#endif
136
137#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
138 defined(__AVX512VNNI__)
139#include <avx512vnniintrin.h>
140#endif
141
142#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
143 (defined(__AVX512VL__) && defined(__AVX512VNNI__))
144#include <avx512vlvnniintrin.h>
145#endif
146
147#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
148 defined(__AVXVNNI__)
149#include <avxvnniintrin.h>
150#endif
151
152#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
153 defined(__AVX512DQ__)
154#include <avx512dqintrin.h>
155#endif
156
157#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
158 (defined(__AVX512VL__) && defined(__AVX512BITALG__))
159#include <avx512vlbitalgintrin.h>
160#endif
161
162#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
163 (defined(__AVX512VL__) && defined(__AVX512BW__))
164#include <avx512vlbwintrin.h>
165#endif
166
167#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
168 (defined(__AVX512VL__) && defined(__AVX512CD__))
169#include <avx512vlcdintrin.h>
170#endif
171
172#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
173 (defined(__AVX512VL__) && defined(__AVX512DQ__))
174#include <avx512vldqintrin.h>
175#endif
176
177#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
178 defined(__AVX512ER__)
179#include <avx512erintrin.h>
180#endif
181
182#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
183 defined(__AVX512IFMA__)
184#include <avx512ifmaintrin.h>
185#endif
186
187#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
188 (defined(__AVX512IFMA__) && defined(__AVX512VL__))
189#include <avx512ifmavlintrin.h>
190#endif
191
192#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
193 defined(__AVXIFMA__)
194#include <avxifmaintrin.h>
195#endif
196
197#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
198 defined(__AVX512VBMI__)
199#include <avx512vbmiintrin.h>
200#endif
201
202#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
203 (defined(__AVX512VBMI__) && defined(__AVX512VL__))
204#include <avx512vbmivlintrin.h>
205#endif
206
207#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
208 defined(__AVX512VBMI2__)
209#include <avx512vbmi2intrin.h>
210#endif
211
212#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
213 (defined(__AVX512VBMI2__) && defined(__AVX512VL__))
214#include <avx512vlvbmi2intrin.h>
215#endif
216
217#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
218 defined(__AVX512PF__)
219#include <avx512pfintrin.h>
220#endif
221
222#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
223 defined(__AVX512FP16__)
224#include <avx512fp16intrin.h>
225#endif
226
227#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
228 (defined(__AVX512VL__) && defined(__AVX512FP16__))
229#include <avx512vlfp16intrin.h>
230#endif
231
232#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
233 defined(__AVX512BF16__)
234#include <avx512bf16intrin.h>
235#endif
236
237#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
238 (defined(__AVX512VL__) && defined(__AVX512BF16__))
239#include <avx512vlbf16intrin.h>
240#endif
241
242#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
243 defined(__PKU__)
244#include <pkuintrin.h>
245#endif
246
247#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
248 defined(__VPCLMULQDQ__)
249#include <vpclmulqdqintrin.h>
250#endif
251
252#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
253 defined(__VAES__)
254#include <vaesintrin.h>
255#endif
256
257#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
258 defined(__GFNI__)
259#include <gfniintrin.h>
260#endif
261
262#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
263 defined(__AVXVNNIINT8__)
264#include <avxvnniint8intrin.h>
265#endif
266
267#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
268 defined(__AVXNECONVERT__)
269#include <avxneconvertintrin.h>
270#endif
271
272#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
273 defined(__SHA512__)
274#include <sha512intrin.h>
275#endif
276
277#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
278 defined(__SM3__)
279#include <sm3intrin.h>
280#endif
281
282#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
283 defined(__SM4__)
284#include <sm4intrin.h>
285#endif
286
287#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
288 defined(__AVXVNNIINT16__)
289#include <avxvnniint16intrin.h>
290#endif
291
292#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
293 defined(__RDPID__)
294/// Reads the value of the IA32_TSC_AUX MSR (0xc0000103).
295///
296/// \headerfile <immintrin.h>
297///
298/// This intrinsic corresponds to the <c> RDPID </c> instruction.
299///
300/// \returns The 32-bit contents of the MSR.
301static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("rdpid")))
302_rdpid_u32(void) {
303 return __builtin_ia32_rdpid();
304}
305#endif // __RDPID__
306
307#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
308 defined(__RDRND__)
309/// Returns a 16-bit hardware-generated random value.
310///
311/// \headerfile <immintrin.h>
312///
313/// This intrinsic corresponds to the <c> RDRAND </c> instruction.
314///
315/// \param __p
316/// A pointer to a 16-bit memory location to place the random value.
317/// \returns 1 if the value was successfully generated, 0 otherwise.
318static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
319_rdrand16_step(unsigned short *__p)
320{
321 return (int)__builtin_ia32_rdrand16_step(__p);
322}
323
324/// Returns a 32-bit hardware-generated random value.
325///
326/// \headerfile <immintrin.h>
327///
328/// This intrinsic corresponds to the <c> RDRAND </c> instruction.
329///
330/// \param __p
331/// A pointer to a 32-bit memory location to place the random value.
332/// \returns 1 if the value was successfully generated, 0 otherwise.
333static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
334_rdrand32_step(unsigned int *__p)
335{
336 return (int)__builtin_ia32_rdrand32_step(__p);
337}
338
339/// Returns a 64-bit hardware-generated random value.
340///
341/// \headerfile <immintrin.h>
342///
343/// This intrinsic corresponds to the <c> RDRAND </c> instruction.
344///
345/// \param __p
346/// A pointer to a 64-bit memory location to place the random value.
347/// \returns 1 if the value was successfully generated, 0 otherwise.
348static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
349_rdrand64_step(unsigned long long *__p)
350{
351#ifdef __x86_64__
352 return (int)__builtin_ia32_rdrand64_step(__p);
353#else
354 // We need to emulate the functionality of 64-bit rdrand with 2 32-bit
355 // rdrand instructions.
356 unsigned int __lo, __hi;
357 unsigned int __res_lo = __builtin_ia32_rdrand32_step(&__lo);
358 unsigned int __res_hi = __builtin_ia32_rdrand32_step(&__hi);
359 if (__res_lo && __res_hi) {
360 *__p = ((unsigned long long)__hi << 32) | (unsigned long long)__lo;
361 return 1;
362 } else {
363 *__p = 0;
364 return 0;
365 }
366#endif
367}
368#endif /* __RDRND__ */
369
370#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
371 defined(__FSGSBASE__)
372#ifdef __x86_64__
373/// Reads the FS base register.
374///
375/// \headerfile <immintrin.h>
376///
377/// This intrinsic corresponds to the <c> RDFSBASE </c> instruction.
378///
379/// \returns The lower 32 bits of the FS base register.
380static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
381_readfsbase_u32(void)
382{
383 return __builtin_ia32_rdfsbase32();
384}
385
386/// Reads the FS base register.
387///
388/// \headerfile <immintrin.h>
389///
390/// This intrinsic corresponds to the <c> RDFSBASE </c> instruction.
391///
392/// \returns The contents of the FS base register.
393static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
394_readfsbase_u64(void)
395{
396 return __builtin_ia32_rdfsbase64();
397}
398
399/// Reads the GS base register.
400///
401/// \headerfile <immintrin.h>
402///
403/// This intrinsic corresponds to the <c> RDGSBASE </c> instruction.
404///
405/// \returns The lower 32 bits of the GS base register.
406static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
407_readgsbase_u32(void)
408{
409 return __builtin_ia32_rdgsbase32();
410}
411
412/// Reads the GS base register.
413///
414/// \headerfile <immintrin.h>
415///
416/// This intrinsic corresponds to the <c> RDGSBASE </c> instruction.
417///
418/// \returns The contents of the GS base register.
419static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
420_readgsbase_u64(void)
421{
422 return __builtin_ia32_rdgsbase64();
423}
424
425/// Modifies the FS base register.
426///
427/// \headerfile <immintrin.h>
428///
429/// This intrinsic corresponds to the <c> WRFSBASE </c> instruction.
430///
431/// \param __V
432/// Value to use for the lower 32 bits of the FS base register.
433static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
434_writefsbase_u32(unsigned int __V)
435{
436 __builtin_ia32_wrfsbase32(__V);
437}
438
439/// Modifies the FS base register.
440///
441/// \headerfile <immintrin.h>
442///
443/// This intrinsic corresponds to the <c> WRFSBASE </c> instruction.
444///
445/// \param __V
446/// Value to use for the FS base register.
447static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
448_writefsbase_u64(unsigned long long __V)
449{
450 __builtin_ia32_wrfsbase64(__V);
451}
452
453/// Modifies the GS base register.
454///
455/// \headerfile <immintrin.h>
456///
457/// This intrinsic corresponds to the <c> WRGSBASE </c> instruction.
458///
459/// \param __V
460/// Value to use for the lower 32 bits of the GS base register.
461static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
462_writegsbase_u32(unsigned int __V)
463{
464 __builtin_ia32_wrgsbase32(__V);
465}
466
467/// Modifies the GS base register.
468///
469/// \headerfile <immintrin.h>
470///
471/// This intrinsic corresponds to the <c> WRFSBASE </c> instruction.
472///
473/// \param __V
474/// Value to use for GS base register.
475static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
476_writegsbase_u64(unsigned long long __V)
477{
478 __builtin_ia32_wrgsbase64(__V);
479}
480
481#endif
482#endif /* __FSGSBASE__ */
483
484#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
485 defined(__MOVBE__)
486
487/* The structs used below are to force the load/store to be unaligned. This
488 * is accomplished with the __packed__ attribute. The __may_alias__ prevents
489 * tbaa metadata from being generated based on the struct and the type of the
490 * field inside of it.
491 */
492
493/// Load a 16-bit value from memory and swap its bytes.
494///
495/// \headerfile <x86intrin.h>
496///
497/// This intrinsic corresponds to the MOVBE instruction.
498///
499/// \param __P
500/// A pointer to the 16-bit value to load.
501/// \returns The byte-swapped value.
502static __inline__ short __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
503_loadbe_i16(void const * __P) {
504 struct __loadu_i16 {
505 unsigned short __v;
506 } __attribute__((__packed__, __may_alias__));
507 return (short)__builtin_bswap16(((const struct __loadu_i16*)__P)->__v);
508}
509
510/// Swap the bytes of a 16-bit value and store it to memory.
511///
512/// \headerfile <x86intrin.h>
513///
514/// This intrinsic corresponds to the MOVBE instruction.
515///
516/// \param __P
517/// A pointer to the memory for storing the swapped value.
518/// \param __D
519/// The 16-bit value to be byte-swapped.
520static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
521_storebe_i16(void * __P, short __D) {
522 struct __storeu_i16 {
523 unsigned short __v;
524 } __attribute__((__packed__, __may_alias__));
525 ((struct __storeu_i16*)__P)->__v = __builtin_bswap16((unsigned short)__D);
526}
527
528/// Load a 32-bit value from memory and swap its bytes.
529///
530/// \headerfile <x86intrin.h>
531///
532/// This intrinsic corresponds to the MOVBE instruction.
533///
534/// \param __P
535/// A pointer to the 32-bit value to load.
536/// \returns The byte-swapped value.
537static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
538_loadbe_i32(void const * __P) {
539 struct __loadu_i32 {
540 unsigned int __v;
541 } __attribute__((__packed__, __may_alias__));
542 return (int)__builtin_bswap32(((const struct __loadu_i32*)__P)->__v);
543}
544
545/// Swap the bytes of a 32-bit value and store it to memory.
546///
547/// \headerfile <x86intrin.h>
548///
549/// This intrinsic corresponds to the MOVBE instruction.
550///
551/// \param __P
552/// A pointer to the memory for storing the swapped value.
553/// \param __D
554/// The 32-bit value to be byte-swapped.
555static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
556_storebe_i32(void * __P, int __D) {
557 struct __storeu_i32 {
558 unsigned int __v;
559 } __attribute__((__packed__, __may_alias__));
560 ((struct __storeu_i32*)__P)->__v = __builtin_bswap32((unsigned int)__D);
561}
562
563#ifdef __x86_64__
564/// Load a 64-bit value from memory and swap its bytes.
565///
566/// \headerfile <x86intrin.h>
567///
568/// This intrinsic corresponds to the MOVBE instruction.
569///
570/// \param __P
571/// A pointer to the 64-bit value to load.
572/// \returns The byte-swapped value.
573static __inline__ long long __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
574_loadbe_i64(void const * __P) {
575 struct __loadu_i64 {
576 unsigned long long __v;
577 } __attribute__((__packed__, __may_alias__));
578 return (long long)__builtin_bswap64(((const struct __loadu_i64*)__P)->__v);
579}
580
581/// Swap the bytes of a 64-bit value and store it to memory.
582///
583/// \headerfile <x86intrin.h>
584///
585/// This intrinsic corresponds to the MOVBE instruction.
586///
587/// \param __P
588/// A pointer to the memory for storing the swapped value.
589/// \param __D
590/// The 64-bit value to be byte-swapped.
591static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
592_storebe_i64(void * __P, long long __D) {
593 struct __storeu_i64 {
594 unsigned long long __v;
595 } __attribute__((__packed__, __may_alias__));
596 ((struct __storeu_i64*)__P)->__v = __builtin_bswap64((unsigned long long)__D);
597}
598#endif
599#endif /* __MOVBE */
600
601#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
602 defined(__RTM__)
603#include <rtmintrin.h>
604#include <xtestintrin.h>
605#endif
606
607#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
608 defined(__SHA__)
609#include <shaintrin.h>
610#endif
611
612#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
613 defined(__FXSR__)
614#include <fxsrintrin.h>
615#endif
616
617/* No feature check desired due to internal MSC_VER checks */
618#include <xsaveintrin.h>
619
620#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
621 defined(__XSAVEOPT__)
622#include <xsaveoptintrin.h>
623#endif
624
625#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
626 defined(__XSAVEC__)
627#include <xsavecintrin.h>
628#endif
629
630#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
631 defined(__XSAVES__)
632#include <xsavesintrin.h>
633#endif
634
635#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
636 defined(__SHSTK__)
637#include <cetintrin.h>
638#endif
639
640/* Intrinsics inside adcintrin.h are available at all times. */
641#include <adcintrin.h>
642
643#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
644 defined(__ADX__)
645#include <adxintrin.h>
646#endif
647
648#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
649 defined(__RDSEED__)
650#include <rdseedintrin.h>
651#endif
652
653#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
654 defined(__WBNOINVD__)
655#include <wbnoinvdintrin.h>
656#endif
657
658#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
659 defined(__CLDEMOTE__)
660#include <cldemoteintrin.h>
661#endif
662
663#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
664 defined(__WAITPKG__)
665#include <waitpkgintrin.h>
666#endif
667
668#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
669 defined(__MOVDIRI__) || defined(__MOVDIR64B__)
670#include <movdirintrin.h>
671#endif
672
673#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
674 defined(__PCONFIG__)
675#include <pconfigintrin.h>
676#endif
677
678#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
679 defined(__SGX__)
680#include <sgxintrin.h>
681#endif
682
683#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
684 defined(__PTWRITE__)
685#include <ptwriteintrin.h>
686#endif
687
688#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
689 defined(__INVPCID__)
690#include <invpcidintrin.h>
691#endif
692#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
693 defined(__AMX_FP16__)
694#include <amxfp16intrin.h>
695#endif
696
697#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
698 defined(__KL__) || defined(__WIDEKL__)
699#include <keylockerintrin.h>
700#endif
701
702#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
703 defined(__AMX_TILE__) || defined(__AMX_INT8__) || defined(__AMX_BF16__)
704#include <amxintrin.h>
705#endif
706
707#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
708 defined(__AMX_COMPLEX__)
709#include <amxcomplexintrin.h>
710#endif
711
712#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
713 defined(__AVX512VP2INTERSECT__)
715#endif
716
717#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
718 (defined(__AVX512VL__) && defined(__AVX512VP2INTERSECT__))
720#endif
721
722#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
723 defined(__ENQCMD__)
724#include <enqcmdintrin.h>
725#endif
726
727#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
728 defined(__SERIALIZE__)
729#include <serializeintrin.h>
730#endif
731
732#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
733 defined(__TSXLDTRK__)
734#include <tsxldtrkintrin.h>
735#endif
736
737#if defined(_MSC_VER) && __has_extension(gnu_asm)
738/* Define the default attributes for these intrinsics */
739#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
740#ifdef __cplusplus
741extern "C" {
742#endif
743/*----------------------------------------------------------------------------*\
744|* Interlocked Exchange HLE
745\*----------------------------------------------------------------------------*/
746#if defined(__i386__) || defined(__x86_64__)
747static __inline__ long __DEFAULT_FN_ATTRS
748_InterlockedExchange_HLEAcquire(long volatile *_Target, long _Value) {
749 __asm__ __volatile__(".byte 0xf2 ; lock ; xchg {%0, %1|%1, %0}"
750 : "+r" (_Value), "+m" (*_Target) :: "memory");
751 return _Value;
752}
753static __inline__ long __DEFAULT_FN_ATTRS
754_InterlockedExchange_HLERelease(long volatile *_Target, long _Value) {
755 __asm__ __volatile__(".byte 0xf3 ; lock ; xchg {%0, %1|%1, %0}"
756 : "+r" (_Value), "+m" (*_Target) :: "memory");
757 return _Value;
758}
759#endif
760#if defined(__x86_64__)
761static __inline__ __int64 __DEFAULT_FN_ATTRS
762_InterlockedExchange64_HLEAcquire(__int64 volatile *_Target, __int64 _Value) {
763 __asm__ __volatile__(".byte 0xf2 ; lock ; xchg {%0, %1|%1, %0}"
764 : "+r" (_Value), "+m" (*_Target) :: "memory");
765 return _Value;
766}
767static __inline__ __int64 __DEFAULT_FN_ATTRS
768_InterlockedExchange64_HLERelease(__int64 volatile *_Target, __int64 _Value) {
769 __asm__ __volatile__(".byte 0xf3 ; lock ; xchg {%0, %1|%1, %0}"
770 : "+r" (_Value), "+m" (*_Target) :: "memory");
771 return _Value;
772}
773#endif
774/*----------------------------------------------------------------------------*\
775|* Interlocked Compare Exchange HLE
776\*----------------------------------------------------------------------------*/
777#if defined(__i386__) || defined(__x86_64__)
778static __inline__ long __DEFAULT_FN_ATTRS
779_InterlockedCompareExchange_HLEAcquire(long volatile *_Destination,
780 long _Exchange, long _Comparand) {
781 __asm__ __volatile__(".byte 0xf2 ; lock ; cmpxchg {%2, %1|%1, %2}"
782 : "+a" (_Comparand), "+m" (*_Destination)
783 : "r" (_Exchange) : "memory");
784 return _Comparand;
785}
786static __inline__ long __DEFAULT_FN_ATTRS
787_InterlockedCompareExchange_HLERelease(long volatile *_Destination,
788 long _Exchange, long _Comparand) {
789 __asm__ __volatile__(".byte 0xf3 ; lock ; cmpxchg {%2, %1|%1, %2}"
790 : "+a" (_Comparand), "+m" (*_Destination)
791 : "r" (_Exchange) : "memory");
792 return _Comparand;
793}
794#endif
795#if defined(__x86_64__)
796static __inline__ __int64 __DEFAULT_FN_ATTRS
797_InterlockedCompareExchange64_HLEAcquire(__int64 volatile *_Destination,
798 __int64 _Exchange, __int64 _Comparand) {
799 __asm__ __volatile__(".byte 0xf2 ; lock ; cmpxchg {%2, %1|%1, %2}"
800 : "+a" (_Comparand), "+m" (*_Destination)
801 : "r" (_Exchange) : "memory");
802 return _Comparand;
803}
804static __inline__ __int64 __DEFAULT_FN_ATTRS
805_InterlockedCompareExchange64_HLERelease(__int64 volatile *_Destination,
806 __int64 _Exchange, __int64 _Comparand) {
807 __asm__ __volatile__(".byte 0xf3 ; lock ; cmpxchg {%2, %1|%1, %2}"
808 : "+a" (_Comparand), "+m" (*_Destination)
809 : "r" (_Exchange) : "memory");
810 return _Comparand;
811}
812#endif
813#ifdef __cplusplus
814}
815#endif
816
817#undef __DEFAULT_FN_ATTRS
818
819#endif /* defined(_MSC_VER) && __has_extension(gnu_asm) */
820
821#endif /* __IMMINTRIN_H */
_Float16 __2f16 __attribute__((ext_vector_type(2)))
Zeroes the upper 128 bits (bits 255:128) of all YMM registers.
#define __DEFAULT_FN_ATTRS
static __inline__ uint32_t volatile uint32_t * __p
Definition: arm_acle.h:80
static __inline__ void short __D
Definition: immintrin.h:521
struct __storeu_i16 *__P __v
Definition: immintrin.h:525
__inline unsigned int unsigned int unsigned int * __P
Definition: bmi2intrin.h:25