clang 20.0.0git
immintrin.h
Go to the documentation of this file.
1/*===---- immintrin.h - Intel intrinsics -----------------------------------===
2 *
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 *
7 *===-----------------------------------------------------------------------===
8 */
9
10#ifndef __IMMINTRIN_H
11#define __IMMINTRIN_H
12
13#if !defined(__i386__) && !defined(__x86_64__)
14#error "This header is only meant to be used on x86 and x64 architecture"
15#endif
16
17#include <x86gprintrin.h>
18
19#if !defined(__SCE__) || __has_feature(modules) || defined(__MMX__)
20#include <mmintrin.h>
21#endif
22
23#if !defined(__SCE__) || __has_feature(modules) || defined(__SSE__)
24#include <xmmintrin.h>
25#endif
26
27#if !defined(__SCE__) || __has_feature(modules) || defined(__SSE2__)
28#include <emmintrin.h>
29#endif
30
31#if !defined(__SCE__) || __has_feature(modules) || defined(__SSE3__)
32#include <pmmintrin.h>
33#endif
34
35#if !defined(__SCE__) || __has_feature(modules) || defined(__SSSE3__)
36#include <tmmintrin.h>
37#endif
38
39#if !defined(__SCE__) || __has_feature(modules) || \
40 (defined(__SSE4_2__) || defined(__SSE4_1__))
41#include <smmintrin.h>
42#endif
43
44#if !defined(__SCE__) || __has_feature(modules) || \
45 (defined(__AES__) || defined(__PCLMUL__))
46#include <wmmintrin.h>
47#endif
48
49#if !defined(__SCE__) || __has_feature(modules) || defined(__CLFLUSHOPT__)
50#include <clflushoptintrin.h>
51#endif
52
53#if !defined(__SCE__) || __has_feature(modules) || defined(__CLWB__)
54#include <clwbintrin.h>
55#endif
56
57#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX__)
58#include <avxintrin.h>
59#endif
60
61#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX2__)
62#include <avx2intrin.h>
63#endif
64
65#if !defined(__SCE__) || __has_feature(modules) || defined(__F16C__)
66#include <f16cintrin.h>
67#endif
68
69/* No feature check desired due to internal checks */
70#include <bmiintrin.h>
71
72#if !defined(__SCE__) || __has_feature(modules) || defined(__BMI2__)
73#include <bmi2intrin.h>
74#endif
75
76#if !defined(__SCE__) || __has_feature(modules) || defined(__LZCNT__)
77#include <lzcntintrin.h>
78#endif
79
80#if !defined(__SCE__) || __has_feature(modules) || defined(__POPCNT__)
81#include <popcntintrin.h>
82#endif
83
84#if !defined(__SCE__) || __has_feature(modules) || defined(__FMA__)
85#include <fmaintrin.h>
86#endif
87
88#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512F__)
89#include <avx512fintrin.h>
90#endif
91
92#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512VL__)
93#include <avx512vlintrin.h>
94#endif
95
96#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512BW__)
97#include <avx512bwintrin.h>
98#endif
99
100#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512BITALG__)
101#include <avx512bitalgintrin.h>
102#endif
103
104#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512CD__)
105#include <avx512cdintrin.h>
106#endif
107
108#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512VPOPCNTDQ__)
110#endif
111
112#if !defined(__SCE__) || __has_feature(modules) || \
113 (defined(__AVX512VL__) && defined(__AVX512VPOPCNTDQ__))
115#endif
116
117#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512VNNI__)
118#include <avx512vnniintrin.h>
119#endif
120
121#if !defined(__SCE__) || __has_feature(modules) || \
122 (defined(__AVX512VL__) && defined(__AVX512VNNI__))
123#include <avx512vlvnniintrin.h>
124#endif
125
126#if !defined(__SCE__) || __has_feature(modules) || defined(__AVXVNNI__)
127#include <avxvnniintrin.h>
128#endif
129
130#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512DQ__)
131#include <avx512dqintrin.h>
132#endif
133
134#if !defined(__SCE__) || __has_feature(modules) || \
135 (defined(__AVX512VL__) && defined(__AVX512BITALG__))
136#include <avx512vlbitalgintrin.h>
137#endif
138
139#if !defined(__SCE__) || __has_feature(modules) || \
140 (defined(__AVX512VL__) && defined(__AVX512BW__))
141#include <avx512vlbwintrin.h>
142#endif
143
144#if !defined(__SCE__) || __has_feature(modules) || \
145 (defined(__AVX512VL__) && defined(__AVX512CD__))
146#include <avx512vlcdintrin.h>
147#endif
148
149#if !defined(__SCE__) || __has_feature(modules) || \
150 (defined(__AVX512VL__) && defined(__AVX512DQ__))
151#include <avx512vldqintrin.h>
152#endif
153
154#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512IFMA__)
155#include <avx512ifmaintrin.h>
156#endif
157
158#if !defined(__SCE__) || __has_feature(modules) || \
159 (defined(__AVX512IFMA__) && defined(__AVX512VL__))
160#include <avx512ifmavlintrin.h>
161#endif
162
163#if !defined(__SCE__) || __has_feature(modules) || defined(__AVXIFMA__)
164#include <avxifmaintrin.h>
165#endif
166
167#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512VBMI__)
168#include <avx512vbmiintrin.h>
169#endif
170
171#if !defined(__SCE__) || __has_feature(modules) || \
172 (defined(__AVX512VBMI__) && defined(__AVX512VL__))
173#include <avx512vbmivlintrin.h>
174#endif
175
176#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512VBMI2__)
177#include <avx512vbmi2intrin.h>
178#endif
179
180#if !defined(__SCE__) || __has_feature(modules) || \
181 (defined(__AVX512VBMI2__) && defined(__AVX512VL__))
182#include <avx512vlvbmi2intrin.h>
183#endif
184
185#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512FP16__)
186#include <avx512fp16intrin.h>
187#endif
188
189#if !defined(__SCE__) || __has_feature(modules) || \
190 (defined(__AVX512VL__) && defined(__AVX512FP16__))
191#include <avx512vlfp16intrin.h>
192#endif
193
194#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512BF16__)
195#include <avx512bf16intrin.h>
196#endif
197
198#if !defined(__SCE__) || __has_feature(modules) || \
199 (defined(__AVX512VL__) && defined(__AVX512BF16__))
200#include <avx512vlbf16intrin.h>
201#endif
202
203#if !defined(__SCE__) || __has_feature(modules) || defined(__PKU__)
204#include <pkuintrin.h>
205#endif
206
207#if !defined(__SCE__) || __has_feature(modules) || defined(__VPCLMULQDQ__)
208#include <vpclmulqdqintrin.h>
209#endif
210
211#if !defined(__SCE__) || __has_feature(modules) || defined(__VAES__)
212#include <vaesintrin.h>
213#endif
214
215#if !defined(__SCE__) || __has_feature(modules) || defined(__GFNI__)
216#include <gfniintrin.h>
217#endif
218
219#if !defined(__SCE__) || __has_feature(modules) || defined(__AVXVNNIINT8__)
220#include <avxvnniint8intrin.h>
221#endif
222
223#if !defined(__SCE__) || __has_feature(modules) || defined(__AVXNECONVERT__)
224#include <avxneconvertintrin.h>
225#endif
226
227#if !defined(__SCE__) || __has_feature(modules) || defined(__SHA512__)
228#include <sha512intrin.h>
229#endif
230
231#if !defined(__SCE__) || __has_feature(modules) || defined(__SM3__)
232#include <sm3intrin.h>
233#endif
234
235#if !defined(__SCE__) || __has_feature(modules) || defined(__SM4__)
236#include <sm4intrin.h>
237#endif
238
239#if !defined(__SCE__) || __has_feature(modules) || defined(__AVXVNNIINT16__)
240#include <avxvnniint16intrin.h>
241#endif
242
243#if !defined(__SCE__) || __has_feature(modules) || defined(__RDPID__)
244/// Reads the value of the IA32_TSC_AUX MSR (0xc0000103).
245///
246/// \headerfile <immintrin.h>
247///
248/// This intrinsic corresponds to the <c> RDPID </c> instruction.
249///
250/// \returns The 32-bit contents of the MSR.
251static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("rdpid")))
252_rdpid_u32(void) {
253 return __builtin_ia32_rdpid();
254}
255#endif // __RDPID__
256
257#if !defined(__SCE__) || __has_feature(modules) || defined(__RDRND__)
258/// Returns a 16-bit hardware-generated random value.
259///
260/// \headerfile <immintrin.h>
261///
262/// This intrinsic corresponds to the <c> RDRAND </c> instruction.
263///
264/// \param __p
265/// A pointer to a 16-bit memory location to place the random value.
266/// \returns 1 if the value was successfully generated, 0 otherwise.
267static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
268_rdrand16_step(unsigned short *__p)
269{
270 return (int)__builtin_ia32_rdrand16_step(__p);
271}
272
273/// Returns a 32-bit hardware-generated random value.
274///
275/// \headerfile <immintrin.h>
276///
277/// This intrinsic corresponds to the <c> RDRAND </c> instruction.
278///
279/// \param __p
280/// A pointer to a 32-bit memory location to place the random value.
281/// \returns 1 if the value was successfully generated, 0 otherwise.
282static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
283_rdrand32_step(unsigned int *__p)
284{
285 return (int)__builtin_ia32_rdrand32_step(__p);
286}
287
288/// Returns a 64-bit hardware-generated random value.
289///
290/// \headerfile <immintrin.h>
291///
292/// This intrinsic corresponds to the <c> RDRAND </c> instruction.
293///
294/// \param __p
295/// A pointer to a 64-bit memory location to place the random value.
296/// \returns 1 if the value was successfully generated, 0 otherwise.
297static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
298_rdrand64_step(unsigned long long *__p)
299{
300#ifdef __x86_64__
301 return (int)__builtin_ia32_rdrand64_step(__p);
302#else
303 // We need to emulate the functionality of 64-bit rdrand with 2 32-bit
304 // rdrand instructions.
305 unsigned int __lo, __hi;
306 unsigned int __res_lo = __builtin_ia32_rdrand32_step(&__lo);
307 unsigned int __res_hi = __builtin_ia32_rdrand32_step(&__hi);
308 if (__res_lo && __res_hi) {
309 *__p = ((unsigned long long)__hi << 32) | (unsigned long long)__lo;
310 return 1;
311 } else {
312 *__p = 0;
313 return 0;
314 }
315#endif
316}
317#endif /* __RDRND__ */
318
319#if !defined(__SCE__) || __has_feature(modules) || defined(__FSGSBASE__)
320#ifdef __x86_64__
321/// Reads the FS base register.
322///
323/// \headerfile <immintrin.h>
324///
325/// This intrinsic corresponds to the <c> RDFSBASE </c> instruction.
326///
327/// \returns The lower 32 bits of the FS base register.
328static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
329_readfsbase_u32(void)
330{
331 return __builtin_ia32_rdfsbase32();
332}
333
334/// Reads the FS base register.
335///
336/// \headerfile <immintrin.h>
337///
338/// This intrinsic corresponds to the <c> RDFSBASE </c> instruction.
339///
340/// \returns The contents of the FS base register.
341static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
342_readfsbase_u64(void)
343{
344 return __builtin_ia32_rdfsbase64();
345}
346
347/// Reads the GS base register.
348///
349/// \headerfile <immintrin.h>
350///
351/// This intrinsic corresponds to the <c> RDGSBASE </c> instruction.
352///
353/// \returns The lower 32 bits of the GS base register.
354static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
355_readgsbase_u32(void)
356{
357 return __builtin_ia32_rdgsbase32();
358}
359
360/// Reads the GS base register.
361///
362/// \headerfile <immintrin.h>
363///
364/// This intrinsic corresponds to the <c> RDGSBASE </c> instruction.
365///
366/// \returns The contents of the GS base register.
367static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
368_readgsbase_u64(void)
369{
370 return __builtin_ia32_rdgsbase64();
371}
372
373/// Modifies the FS base register.
374///
375/// \headerfile <immintrin.h>
376///
377/// This intrinsic corresponds to the <c> WRFSBASE </c> instruction.
378///
379/// \param __V
380/// Value to use for the lower 32 bits of the FS base register.
381static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
382_writefsbase_u32(unsigned int __V)
383{
384 __builtin_ia32_wrfsbase32(__V);
385}
386
387/// Modifies the FS base register.
388///
389/// \headerfile <immintrin.h>
390///
391/// This intrinsic corresponds to the <c> WRFSBASE </c> instruction.
392///
393/// \param __V
394/// Value to use for the FS base register.
395static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
396_writefsbase_u64(unsigned long long __V)
397{
398 __builtin_ia32_wrfsbase64(__V);
399}
400
401/// Modifies the GS base register.
402///
403/// \headerfile <immintrin.h>
404///
405/// This intrinsic corresponds to the <c> WRGSBASE </c> instruction.
406///
407/// \param __V
408/// Value to use for the lower 32 bits of the GS base register.
409static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
410_writegsbase_u32(unsigned int __V)
411{
412 __builtin_ia32_wrgsbase32(__V);
413}
414
415/// Modifies the GS base register.
416///
417/// \headerfile <immintrin.h>
418///
419/// This intrinsic corresponds to the <c> WRFSBASE </c> instruction.
420///
421/// \param __V
422/// Value to use for GS base register.
423static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
424_writegsbase_u64(unsigned long long __V)
425{
426 __builtin_ia32_wrgsbase64(__V);
427}
428
429#endif
430#endif /* __FSGSBASE__ */
431
432#if !defined(__SCE__) || __has_feature(modules) || defined(__MOVBE__)
433
434/* The structs used below are to force the load/store to be unaligned. This
435 * is accomplished with the __packed__ attribute. The __may_alias__ prevents
436 * tbaa metadata from being generated based on the struct and the type of the
437 * field inside of it.
438 */
439
440/// Load a 16-bit value from memory and swap its bytes.
441///
442/// \headerfile <x86intrin.h>
443///
444/// This intrinsic corresponds to the MOVBE instruction.
445///
446/// \param __P
447/// A pointer to the 16-bit value to load.
448/// \returns The byte-swapped value.
449static __inline__ short __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
450_loadbe_i16(void const * __P) {
451 struct __loadu_i16 {
452 unsigned short __v;
453 } __attribute__((__packed__, __may_alias__));
454 return (short)__builtin_bswap16(((const struct __loadu_i16*)__P)->__v);
455}
456
457/// Swap the bytes of a 16-bit value and store it to memory.
458///
459/// \headerfile <x86intrin.h>
460///
461/// This intrinsic corresponds to the MOVBE instruction.
462///
463/// \param __P
464/// A pointer to the memory for storing the swapped value.
465/// \param __D
466/// The 16-bit value to be byte-swapped.
467static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
468_storebe_i16(void * __P, short __D) {
469 struct __storeu_i16 {
470 unsigned short __v;
471 } __attribute__((__packed__, __may_alias__));
472 ((struct __storeu_i16*)__P)->__v = __builtin_bswap16((unsigned short)__D);
473}
474
475/// Load a 32-bit value from memory and swap its bytes.
476///
477/// \headerfile <x86intrin.h>
478///
479/// This intrinsic corresponds to the MOVBE instruction.
480///
481/// \param __P
482/// A pointer to the 32-bit value to load.
483/// \returns The byte-swapped value.
484static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
485_loadbe_i32(void const * __P) {
486 struct __loadu_i32 {
487 unsigned int __v;
488 } __attribute__((__packed__, __may_alias__));
489 return (int)__builtin_bswap32(((const struct __loadu_i32*)__P)->__v);
490}
491
492/// Swap the bytes of a 32-bit value and store it to memory.
493///
494/// \headerfile <x86intrin.h>
495///
496/// This intrinsic corresponds to the MOVBE instruction.
497///
498/// \param __P
499/// A pointer to the memory for storing the swapped value.
500/// \param __D
501/// The 32-bit value to be byte-swapped.
502static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
503_storebe_i32(void * __P, int __D) {
504 struct __storeu_i32 {
505 unsigned int __v;
506 } __attribute__((__packed__, __may_alias__));
507 ((struct __storeu_i32*)__P)->__v = __builtin_bswap32((unsigned int)__D);
508}
509
510#ifdef __x86_64__
511/// Load a 64-bit value from memory and swap its bytes.
512///
513/// \headerfile <x86intrin.h>
514///
515/// This intrinsic corresponds to the MOVBE instruction.
516///
517/// \param __P
518/// A pointer to the 64-bit value to load.
519/// \returns The byte-swapped value.
520static __inline__ long long __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
521_loadbe_i64(void const * __P) {
522 struct __loadu_i64 {
523 unsigned long long __v;
524 } __attribute__((__packed__, __may_alias__));
525 return (long long)__builtin_bswap64(((const struct __loadu_i64*)__P)->__v);
526}
527
528/// Swap the bytes of a 64-bit value and store it to memory.
529///
530/// \headerfile <x86intrin.h>
531///
532/// This intrinsic corresponds to the MOVBE instruction.
533///
534/// \param __P
535/// A pointer to the memory for storing the swapped value.
536/// \param __D
537/// The 64-bit value to be byte-swapped.
538static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
539_storebe_i64(void * __P, long long __D) {
540 struct __storeu_i64 {
541 unsigned long long __v;
542 } __attribute__((__packed__, __may_alias__));
543 ((struct __storeu_i64*)__P)->__v = __builtin_bswap64((unsigned long long)__D);
544}
545#endif
546#endif /* __MOVBE */
547
548#if !defined(__SCE__) || __has_feature(modules) || defined(__RTM__)
549#include <rtmintrin.h>
550#include <xtestintrin.h>
551#endif
552
553#if !defined(__SCE__) || __has_feature(modules) || defined(__SHA__)
554#include <shaintrin.h>
555#endif
556
557#if !defined(__SCE__) || __has_feature(modules) || defined(__FXSR__)
558#include <fxsrintrin.h>
559#endif
560
561/* No feature check desired due to internal MSC_VER checks */
562#include <xsaveintrin.h>
563
564#if !defined(__SCE__) || __has_feature(modules) || defined(__XSAVEOPT__)
565#include <xsaveoptintrin.h>
566#endif
567
568#if !defined(__SCE__) || __has_feature(modules) || defined(__XSAVEC__)
569#include <xsavecintrin.h>
570#endif
571
572#if !defined(__SCE__) || __has_feature(modules) || defined(__XSAVES__)
573#include <xsavesintrin.h>
574#endif
575
576#if !defined(__SCE__) || __has_feature(modules) || defined(__SHSTK__)
577#include <cetintrin.h>
578#endif
579
580/* Intrinsics inside adcintrin.h are available at all times. */
581#include <adcintrin.h>
582
583#if !defined(__SCE__) || __has_feature(modules) || defined(__ADX__)
584#include <adxintrin.h>
585#endif
586
587#if !defined(__SCE__) || __has_feature(modules) || defined(__RDSEED__)
588#include <rdseedintrin.h>
589#endif
590
591#if !defined(__SCE__) || __has_feature(modules) || defined(__WBNOINVD__)
592#include <wbnoinvdintrin.h>
593#endif
594
595#if !defined(__SCE__) || __has_feature(modules) || defined(__CLDEMOTE__)
596#include <cldemoteintrin.h>
597#endif
598
599#if !defined(__SCE__) || __has_feature(modules) || defined(__WAITPKG__)
600#include <waitpkgintrin.h>
601#endif
602
603#if !defined(__SCE__) || __has_feature(modules) || defined(__MOVDIRI__) || \
604 defined(__MOVDIR64B__)
605#include <movdirintrin.h>
606#endif
607
608#if !defined(__SCE__) || __has_feature(modules) || defined(__MOVRS__)
609#include <movrsintrin.h>
610#endif
611
612#if !defined(__SCE__) || __has_feature(modules) || \
613 (defined(__AVX10_2__) && defined(__MOVRS__))
614#include <movrs_avx10_2intrin.h>
615#endif
616
617#if !defined(__SCE__) || __has_feature(modules) || \
618 (defined(__AVX10_2_512__) && defined(__MOVRS__))
620#endif
621
622#if !defined(__SCE__) || __has_feature(modules) || defined(__PCONFIG__)
623#include <pconfigintrin.h>
624#endif
625
626#if !defined(__SCE__) || __has_feature(modules) || defined(__SGX__)
627#include <sgxintrin.h>
628#endif
629
630#if !defined(__SCE__) || __has_feature(modules) || defined(__PTWRITE__)
631#include <ptwriteintrin.h>
632#endif
633
634#if !defined(__SCE__) || __has_feature(modules) || defined(__INVPCID__)
635#include <invpcidintrin.h>
636#endif
637
638#if !defined(__SCE__) || __has_feature(modules) || defined(__KL__) || \
639 defined(__WIDEKL__)
640#include <keylockerintrin.h>
641#endif
642
643#if !defined(__SCE__) || __has_feature(modules) || defined(__AMX_TILE__) || \
644 defined(__AMX_INT8__) || defined(__AMX_BF16__)
645#include <amxintrin.h>
646#endif
647
648#if !defined(__SCE__) || __has_feature(modules) || defined(__AMX_FP16__)
649#include <amxfp16intrin.h>
650#endif
651
652#if !defined(__SCE__) || __has_feature(modules) || defined(__AMX_COMPLEX__)
653#include <amxcomplexintrin.h>
654#endif
655
656#if !defined(__SCE__) || __has_feature(modules) || defined(__AMX_FP8__)
657#include <amxfp8intrin.h>
658#endif
659
660#if !defined(__SCE__) || __has_feature(modules) || defined(__AMX_TRANSPOSE__)
661#include <amxtransposeintrin.h>
662#endif
663
664#if !defined(__SCE__) || __has_feature(modules) || defined(__AMX_MOVRS__)
665#include <amxmovrsintrin.h>
666#endif
667
668#if !defined(__SCE__) || __has_feature(modules) || \
669 (defined(__AMX_MOVRS__) && defined(__AMX_TRANSPOSE__))
671#endif
672
673#if !defined(__SCE__) || __has_feature(modules) || defined(__AMX_AVX512__)
674#include <amxavx512intrin.h>
675#endif
676
677#if !defined(__SCE__) || __has_feature(modules) || defined(__AMX_TF32__)
678#include <amxtf32intrin.h>
679#endif
680
681#if !defined(__SCE__) || __has_feature(modules) || \
682 (defined(__AMX_TF32__) && defined(__AMX_TRANSPOSE__))
684#endif
685
686#if !defined(__SCE__) || __has_feature(modules) || \
687 (defined(__AMX_BF16__) && defined(__AMX_TRANSPOSE__))
689#endif
690
691#if !defined(__SCE__) || __has_feature(modules) || \
692 (defined(__AMX_FP16__) && defined(__AMX_TRANSPOSE__))
694#endif
695
696#if !defined(__SCE__) || __has_feature(modules) || \
697 (defined(__AMX_COMPLEX__) && defined(__AMX_TRANSPOSE__))
699#endif
700
701#if !defined(__SCE__) || __has_feature(modules) || \
702 defined(__AVX512VP2INTERSECT__)
704#endif
705
706#if !defined(__SCE__) || __has_feature(modules) || \
707 (defined(__AVX512VL__) && defined(__AVX512VP2INTERSECT__))
709#endif
710
711#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX10_2__)
712#include <avx10_2bf16intrin.h>
713#include <avx10_2convertintrin.h>
714#include <avx10_2copyintrin.h>
715#include <avx10_2minmaxintrin.h>
716#include <avx10_2niintrin.h>
718#include <avx10_2satcvtintrin.h>
719#endif
720
721#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX10_2_512__)
725#include <avx10_2_512niintrin.h>
728#endif
729
730#if !defined(__SCE__) || __has_feature(modules) || \
731 (defined(__AVX10_2_512__) && defined(__SM4__))
732#include <sm4evexintrin.h>
733#endif
734
735#if !defined(__SCE__) || __has_feature(modules) || defined(__ENQCMD__)
736#include <enqcmdintrin.h>
737#endif
738
739#if !defined(__SCE__) || __has_feature(modules) || defined(__SERIALIZE__)
740#include <serializeintrin.h>
741#endif
742
743#if !defined(__SCE__) || __has_feature(modules) || defined(__TSXLDTRK__)
744#include <tsxldtrkintrin.h>
745#endif
746
747#if defined(_MSC_VER) && __has_extension(gnu_asm)
748/* Define the default attributes for these intrinsics */
749#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
750#ifdef __cplusplus
751extern "C" {
752#endif
753/*----------------------------------------------------------------------------*\
754|* Interlocked Exchange HLE
755\*----------------------------------------------------------------------------*/
756#if defined(__i386__) || defined(__x86_64__)
757static __inline__ long __DEFAULT_FN_ATTRS
758_InterlockedExchange_HLEAcquire(long volatile *_Target, long _Value) {
759 __asm__ __volatile__(".byte 0xf2 ; lock ; xchg {%0, %1|%1, %0}"
760 : "+r" (_Value), "+m" (*_Target) :: "memory");
761 return _Value;
762}
763static __inline__ long __DEFAULT_FN_ATTRS
764_InterlockedExchange_HLERelease(long volatile *_Target, long _Value) {
765 __asm__ __volatile__(".byte 0xf3 ; lock ; xchg {%0, %1|%1, %0}"
766 : "+r" (_Value), "+m" (*_Target) :: "memory");
767 return _Value;
768}
769#endif
770#if defined(__x86_64__)
771static __inline__ __int64 __DEFAULT_FN_ATTRS
772_InterlockedExchange64_HLEAcquire(__int64 volatile *_Target, __int64 _Value) {
773 __asm__ __volatile__(".byte 0xf2 ; lock ; xchg {%0, %1|%1, %0}"
774 : "+r" (_Value), "+m" (*_Target) :: "memory");
775 return _Value;
776}
777static __inline__ __int64 __DEFAULT_FN_ATTRS
778_InterlockedExchange64_HLERelease(__int64 volatile *_Target, __int64 _Value) {
779 __asm__ __volatile__(".byte 0xf3 ; lock ; xchg {%0, %1|%1, %0}"
780 : "+r" (_Value), "+m" (*_Target) :: "memory");
781 return _Value;
782}
783#endif
784/*----------------------------------------------------------------------------*\
785|* Interlocked Compare Exchange HLE
786\*----------------------------------------------------------------------------*/
787#if defined(__i386__) || defined(__x86_64__)
788static __inline__ long __DEFAULT_FN_ATTRS
789_InterlockedCompareExchange_HLEAcquire(long volatile *_Destination,
790 long _Exchange, long _Comparand) {
791 __asm__ __volatile__(".byte 0xf2 ; lock ; cmpxchg {%2, %1|%1, %2}"
792 : "+a" (_Comparand), "+m" (*_Destination)
793 : "r" (_Exchange) : "memory");
794 return _Comparand;
795}
796static __inline__ long __DEFAULT_FN_ATTRS
797_InterlockedCompareExchange_HLERelease(long volatile *_Destination,
798 long _Exchange, long _Comparand) {
799 __asm__ __volatile__(".byte 0xf3 ; lock ; cmpxchg {%2, %1|%1, %2}"
800 : "+a" (_Comparand), "+m" (*_Destination)
801 : "r" (_Exchange) : "memory");
802 return _Comparand;
803}
804#endif
805#if defined(__x86_64__)
806static __inline__ __int64 __DEFAULT_FN_ATTRS
807_InterlockedCompareExchange64_HLEAcquire(__int64 volatile *_Destination,
808 __int64 _Exchange, __int64 _Comparand) {
809 __asm__ __volatile__(".byte 0xf2 ; lock ; cmpxchg {%2, %1|%1, %2}"
810 : "+a" (_Comparand), "+m" (*_Destination)
811 : "r" (_Exchange) : "memory");
812 return _Comparand;
813}
814static __inline__ __int64 __DEFAULT_FN_ATTRS
815_InterlockedCompareExchange64_HLERelease(__int64 volatile *_Destination,
816 __int64 _Exchange, __int64 _Comparand) {
817 __asm__ __volatile__(".byte 0xf3 ; lock ; cmpxchg {%2, %1|%1, %2}"
818 : "+a" (_Comparand), "+m" (*_Destination)
819 : "r" (_Exchange) : "memory");
820 return _Comparand;
821}
822#endif
823#ifdef __cplusplus
824}
825#endif
826
827#undef __DEFAULT_FN_ATTRS
828
829#endif /* defined(_MSC_VER) && __has_extension(gnu_asm) */
830
831#endif /* __IMMINTRIN_H */
_Float16 __2f16 __attribute__((ext_vector_type(2)))
Zeroes the upper 128 bits (bits 255:128) of all YMM registers.
#define __DEFAULT_FN_ATTRS
static __inline__ uint32_t volatile uint32_t * __p
Definition: arm_acle.h:88
static __inline__ void short __D
Definition: immintrin.h:468
struct __storeu_i16 *__P __v
Definition: immintrin.h:472
__inline unsigned int unsigned int unsigned int * __P
Definition: bmi2intrin.h:25