clang 22.0.0git
immintrin.h
Go to the documentation of this file.
1/*===---- immintrin.h - Intel intrinsics -----------------------------------===
2 *
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 *
7 *===-----------------------------------------------------------------------===
8 */
9
10#ifndef __IMMINTRIN_H
11#define __IMMINTRIN_H
12
13#if !defined(__i386__) && !defined(__x86_64__)
14#error "This header is only meant to be used on x86 and x64 architecture"
15#endif
16
17#include <x86gprintrin.h>
18
19#include <mmintrin.h>
20
21#include <xmmintrin.h>
22
23#include <emmintrin.h>
24
25#include <pmmintrin.h>
26
27#include <tmmintrin.h>
28
29#include <smmintrin.h>
30
31#include <wmmintrin.h>
32
33#include <clflushoptintrin.h>
34
35#include <clwbintrin.h>
36
37#include <avxintrin.h>
38
39#include <avx2intrin.h>
40
41#include <f16cintrin.h>
42
43#include <bmiintrin.h>
44
45#include <bmi2intrin.h>
46
47#include <lzcntintrin.h>
48
49#include <popcntintrin.h>
50
51#include <fmaintrin.h>
52
53#include <avx512fintrin.h>
54
55#include <avx512vlintrin.h>
56
57#include <avx512bwintrin.h>
58
59#include <avx512bitalgintrin.h>
60
61#include <avx512cdintrin.h>
62
64
66
67#include <avx512vnniintrin.h>
68
69#include <avx512vlvnniintrin.h>
70
71#include <avxvnniintrin.h>
72
73#include <avx512dqintrin.h>
74
76
77#include <avx512vlbwintrin.h>
78
79#include <avx512vlcdintrin.h>
80
81#include <avx512vldqintrin.h>
82
83#include <avx512ifmaintrin.h>
84
85#include <avx512ifmavlintrin.h>
86
87#include <avxifmaintrin.h>
88
89#include <avx512vbmiintrin.h>
90
91#include <avx512vbmivlintrin.h>
92
93#include <avx512vbmi2intrin.h>
94
95#include <avx512vlvbmi2intrin.h>
96
97#include <avx512fp16intrin.h>
98
99#include <avx512vlfp16intrin.h>
100
101#include <avx512bf16intrin.h>
102
103#include <avx512vlbf16intrin.h>
104
105#include <pkuintrin.h>
106
107#include <vpclmulqdqintrin.h>
108
109#include <vaesintrin.h>
110
111#include <gfniintrin.h>
112
113#include <avxvnniint8intrin.h>
114
115#include <avxneconvertintrin.h>
116
117#include <sha512intrin.h>
118
119#include <sm3intrin.h>
120
121#include <sm4intrin.h>
122
123#include <avxvnniint16intrin.h>
124
125/// Reads the value of the IA32_TSC_AUX MSR (0xc0000103).
126///
127/// \headerfile <immintrin.h>
128///
129/// This intrinsic corresponds to the <c> RDPID </c> instruction.
130///
131/// \returns The 32-bit contents of the MSR.
132static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("rdpid")))
133_rdpid_u32(void) {
134 return __builtin_ia32_rdpid();
135}
136
137/// Returns a 16-bit hardware-generated random value.
138///
139/// \headerfile <immintrin.h>
140///
141/// This intrinsic corresponds to the <c> RDRAND </c> instruction.
142///
143/// \param __p
144/// A pointer to a 16-bit memory location to place the random value.
145/// \returns 1 if the value was successfully generated, 0 otherwise.
146static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
147_rdrand16_step(unsigned short *__p)
148{
149 return (int)__builtin_ia32_rdrand16_step(__p);
150}
151
152/// Returns a 32-bit hardware-generated random value.
153///
154/// \headerfile <immintrin.h>
155///
156/// This intrinsic corresponds to the <c> RDRAND </c> instruction.
157///
158/// \param __p
159/// A pointer to a 32-bit memory location to place the random value.
160/// \returns 1 if the value was successfully generated, 0 otherwise.
161static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
162_rdrand32_step(unsigned int *__p)
163{
164 return (int)__builtin_ia32_rdrand32_step(__p);
165}
166
167/// Returns a 64-bit hardware-generated random value.
168///
169/// \headerfile <immintrin.h>
170///
171/// This intrinsic corresponds to the <c> RDRAND </c> instruction.
172///
173/// \param __p
174/// A pointer to a 64-bit memory location to place the random value.
175/// \returns 1 if the value was successfully generated, 0 otherwise.
176static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
177_rdrand64_step(unsigned long long *__p)
178{
179#ifdef __x86_64__
180 return (int)__builtin_ia32_rdrand64_step(__p);
181#else
182 // We need to emulate the functionality of 64-bit rdrand with 2 32-bit
183 // rdrand instructions.
184 unsigned int __lo, __hi;
185 unsigned int __res_lo = __builtin_ia32_rdrand32_step(&__lo);
186 unsigned int __res_hi = __builtin_ia32_rdrand32_step(&__hi);
187 if (__res_lo && __res_hi) {
188 *__p = ((unsigned long long)__hi << 32) | (unsigned long long)__lo;
189 return 1;
190 } else {
191 *__p = 0;
192 return 0;
193 }
194#endif
195}
196
197#ifdef __x86_64__
198/// Reads the FS base register.
199///
200/// \headerfile <immintrin.h>
201///
202/// This intrinsic corresponds to the <c> RDFSBASE </c> instruction.
203///
204/// \returns The lower 32 bits of the FS base register.
205static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
206_readfsbase_u32(void)
207{
208 return __builtin_ia32_rdfsbase32();
209}
210
211/// Reads the FS base register.
212///
213/// \headerfile <immintrin.h>
214///
215/// This intrinsic corresponds to the <c> RDFSBASE </c> instruction.
216///
217/// \returns The contents of the FS base register.
218static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
219_readfsbase_u64(void)
220{
221 return __builtin_ia32_rdfsbase64();
222}
223
224/// Reads the GS base register.
225///
226/// \headerfile <immintrin.h>
227///
228/// This intrinsic corresponds to the <c> RDGSBASE </c> instruction.
229///
230/// \returns The lower 32 bits of the GS base register.
231static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
232_readgsbase_u32(void)
233{
234 return __builtin_ia32_rdgsbase32();
235}
236
237/// Reads the GS base register.
238///
239/// \headerfile <immintrin.h>
240///
241/// This intrinsic corresponds to the <c> RDGSBASE </c> instruction.
242///
243/// \returns The contents of the GS base register.
244static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
245_readgsbase_u64(void)
246{
247 return __builtin_ia32_rdgsbase64();
248}
249
250/// Modifies the FS base register.
251///
252/// \headerfile <immintrin.h>
253///
254/// This intrinsic corresponds to the <c> WRFSBASE </c> instruction.
255///
256/// \param __V
257/// Value to use for the lower 32 bits of the FS base register.
258static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
259_writefsbase_u32(unsigned int __V)
260{
261 __builtin_ia32_wrfsbase32(__V);
262}
263
264/// Modifies the FS base register.
265///
266/// \headerfile <immintrin.h>
267///
268/// This intrinsic corresponds to the <c> WRFSBASE </c> instruction.
269///
270/// \param __V
271/// Value to use for the FS base register.
272static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
273_writefsbase_u64(unsigned long long __V)
274{
275 __builtin_ia32_wrfsbase64(__V);
276}
277
278/// Modifies the GS base register.
279///
280/// \headerfile <immintrin.h>
281///
282/// This intrinsic corresponds to the <c> WRGSBASE </c> instruction.
283///
284/// \param __V
285/// Value to use for the lower 32 bits of the GS base register.
286static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
287_writegsbase_u32(unsigned int __V)
288{
289 __builtin_ia32_wrgsbase32(__V);
290}
291
292/// Modifies the GS base register.
293///
294/// \headerfile <immintrin.h>
295///
296/// This intrinsic corresponds to the <c> WRFSBASE </c> instruction.
297///
298/// \param __V
299/// Value to use for GS base register.
300static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
301_writegsbase_u64(unsigned long long __V)
302{
303 __builtin_ia32_wrgsbase64(__V);
304}
305
306#endif
307
308/* The structs used below are to force the load/store to be unaligned. This
309 * is accomplished with the __packed__ attribute. The __may_alias__ prevents
310 * tbaa metadata from being generated based on the struct and the type of the
311 * field inside of it.
312 */
313
314/// Load a 16-bit value from memory and swap its bytes.
315///
316/// \headerfile <x86intrin.h>
317///
318/// This intrinsic corresponds to the MOVBE instruction.
319///
320/// \param __P
321/// A pointer to the 16-bit value to load.
322/// \returns The byte-swapped value.
323static __inline__ short __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
324_loadbe_i16(void const * __P) {
325 struct __loadu_i16 {
326 unsigned short __v;
327 } __attribute__((__packed__, __may_alias__));
328 return (short)__builtin_bswap16(((const struct __loadu_i16*)__P)->__v);
329}
330
331/// Swap the bytes of a 16-bit value and store it to memory.
332///
333/// \headerfile <x86intrin.h>
334///
335/// This intrinsic corresponds to the MOVBE instruction.
336///
337/// \param __P
338/// A pointer to the memory for storing the swapped value.
339/// \param __D
340/// The 16-bit value to be byte-swapped.
341static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
342_storebe_i16(void * __P, short __D) {
343 struct __storeu_i16 {
344 unsigned short __v;
345 } __attribute__((__packed__, __may_alias__));
346 ((struct __storeu_i16*)__P)->__v = __builtin_bswap16((unsigned short)__D);
347}
348
349/// Load a 32-bit value from memory and swap its bytes.
350///
351/// \headerfile <x86intrin.h>
352///
353/// This intrinsic corresponds to the MOVBE instruction.
354///
355/// \param __P
356/// A pointer to the 32-bit value to load.
357/// \returns The byte-swapped value.
358static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
359_loadbe_i32(void const * __P) {
360 struct __loadu_i32 {
361 unsigned int __v;
362 } __attribute__((__packed__, __may_alias__));
363 return (int)__builtin_bswap32(((const struct __loadu_i32*)__P)->__v);
364}
365
366/// Swap the bytes of a 32-bit value and store it to memory.
367///
368/// \headerfile <x86intrin.h>
369///
370/// This intrinsic corresponds to the MOVBE instruction.
371///
372/// \param __P
373/// A pointer to the memory for storing the swapped value.
374/// \param __D
375/// The 32-bit value to be byte-swapped.
376static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
377_storebe_i32(void * __P, int __D) {
378 struct __storeu_i32 {
379 unsigned int __v;
380 } __attribute__((__packed__, __may_alias__));
381 ((struct __storeu_i32*)__P)->__v = __builtin_bswap32((unsigned int)__D);
382}
383
384#ifdef __x86_64__
385/// Load a 64-bit value from memory and swap its bytes.
386///
387/// \headerfile <x86intrin.h>
388///
389/// This intrinsic corresponds to the MOVBE instruction.
390///
391/// \param __P
392/// A pointer to the 64-bit value to load.
393/// \returns The byte-swapped value.
394static __inline__ long long __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
395_loadbe_i64(void const * __P) {
396 struct __loadu_i64 {
397 unsigned long long __v;
398 } __attribute__((__packed__, __may_alias__));
399 return (long long)__builtin_bswap64(((const struct __loadu_i64*)__P)->__v);
400}
401
402/// Swap the bytes of a 64-bit value and store it to memory.
403///
404/// \headerfile <x86intrin.h>
405///
406/// This intrinsic corresponds to the MOVBE instruction.
407///
408/// \param __P
409/// A pointer to the memory for storing the swapped value.
410/// \param __D
411/// The 64-bit value to be byte-swapped.
412static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
413_storebe_i64(void * __P, long long __D) {
414 struct __storeu_i64 {
415 unsigned long long __v;
416 } __attribute__((__packed__, __may_alias__));
417 ((struct __storeu_i64*)__P)->__v = __builtin_bswap64((unsigned long long)__D);
418}
419#endif
420
421#include <rtmintrin.h>
422#include <xtestintrin.h>
423
424#include <shaintrin.h>
425
426#include <fxsrintrin.h>
427
428/* No feature check desired due to internal MSC_VER checks */
429#include <xsaveintrin.h>
430
431#include <xsaveoptintrin.h>
432
433#include <xsavecintrin.h>
434
435#include <xsavesintrin.h>
436
437#include <cetintrin.h>
438
439/* Intrinsics inside adcintrin.h are available at all times. */
440#include <adcintrin.h>
441
442#include <adxintrin.h>
443
444#include <rdseedintrin.h>
445
446#include <wbnoinvdintrin.h>
447
448#include <cldemoteintrin.h>
449
450#include <waitpkgintrin.h>
451
452#include <movdirintrin.h>
453
454#include <movrsintrin.h>
455
456#include <movrs_avx10_2intrin.h>
457
459
460#include <pconfigintrin.h>
461
462#include <sgxintrin.h>
463
464#include <ptwriteintrin.h>
465
466#include <invpcidintrin.h>
467
468#include <keylockerintrin.h>
469
470#include <amxintrin.h>
471
472#include <amxfp16intrin.h>
473
474#include <amxcomplexintrin.h>
475
476#include <amxfp8intrin.h>
477
478#include <amxtransposeintrin.h>
479
480#include <amxmovrsintrin.h>
481
483
484#include <amxavx512intrin.h>
485
486#include <amxtf32intrin.h>
487
489
491
493
495
497
499
500#include <avx10_2bf16intrin.h>
501#include <avx10_2convertintrin.h>
502#include <avx10_2copyintrin.h>
503#include <avx10_2minmaxintrin.h>
504#include <avx10_2niintrin.h>
506#include <avx10_2satcvtintrin.h>
507
511#include <avx10_2_512niintrin.h>
514
515#include <sm4evexintrin.h>
516
517#include <enqcmdintrin.h>
518
519#include <serializeintrin.h>
520
521#include <tsxldtrkintrin.h>
522
523#if defined(_MSC_VER) && __has_extension(gnu_asm)
524/* Define the default attributes for these intrinsics */
525#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
526#ifdef __cplusplus
527extern "C" {
528#endif
529/*----------------------------------------------------------------------------*\
530|* Interlocked Exchange HLE
531\*----------------------------------------------------------------------------*/
532#if defined(__i386__) || defined(__x86_64__)
533static __inline__ long __DEFAULT_FN_ATTRS
534_InterlockedExchange_HLEAcquire(long volatile *_Target, long _Value) {
535 __asm__ __volatile__(".byte 0xf2 ; lock ; xchg {%0, %1|%1, %0}"
536 : "+r" (_Value), "+m" (*_Target) :: "memory");
537 return _Value;
538}
539static __inline__ long __DEFAULT_FN_ATTRS
540_InterlockedExchange_HLERelease(long volatile *_Target, long _Value) {
541 __asm__ __volatile__(".byte 0xf3 ; lock ; xchg {%0, %1|%1, %0}"
542 : "+r" (_Value), "+m" (*_Target) :: "memory");
543 return _Value;
544}
545#endif
546#if defined(__x86_64__)
547static __inline__ __int64 __DEFAULT_FN_ATTRS
548_InterlockedExchange64_HLEAcquire(__int64 volatile *_Target, __int64 _Value) {
549 __asm__ __volatile__(".byte 0xf2 ; lock ; xchg {%0, %1|%1, %0}"
550 : "+r" (_Value), "+m" (*_Target) :: "memory");
551 return _Value;
552}
553static __inline__ __int64 __DEFAULT_FN_ATTRS
554_InterlockedExchange64_HLERelease(__int64 volatile *_Target, __int64 _Value) {
555 __asm__ __volatile__(".byte 0xf3 ; lock ; xchg {%0, %1|%1, %0}"
556 : "+r" (_Value), "+m" (*_Target) :: "memory");
557 return _Value;
558}
559#endif
560/*----------------------------------------------------------------------------*\
561|* Interlocked Compare Exchange HLE
562\*----------------------------------------------------------------------------*/
563#if defined(__i386__) || defined(__x86_64__)
564static __inline__ long __DEFAULT_FN_ATTRS
565_InterlockedCompareExchange_HLEAcquire(long volatile *_Destination,
566 long _Exchange, long _Comparand) {
567 __asm__ __volatile__(".byte 0xf2 ; lock ; cmpxchg {%2, %1|%1, %2}"
568 : "+a" (_Comparand), "+m" (*_Destination)
569 : "r" (_Exchange) : "memory");
570 return _Comparand;
571}
572static __inline__ long __DEFAULT_FN_ATTRS
573_InterlockedCompareExchange_HLERelease(long volatile *_Destination,
574 long _Exchange, long _Comparand) {
575 __asm__ __volatile__(".byte 0xf3 ; lock ; cmpxchg {%2, %1|%1, %2}"
576 : "+a" (_Comparand), "+m" (*_Destination)
577 : "r" (_Exchange) : "memory");
578 return _Comparand;
579}
580#endif
581#if defined(__x86_64__)
582static __inline__ __int64 __DEFAULT_FN_ATTRS
583_InterlockedCompareExchange64_HLEAcquire(__int64 volatile *_Destination,
584 __int64 _Exchange, __int64 _Comparand) {
585 __asm__ __volatile__(".byte 0xf2 ; lock ; cmpxchg {%2, %1|%1, %2}"
586 : "+a" (_Comparand), "+m" (*_Destination)
587 : "r" (_Exchange) : "memory");
588 return _Comparand;
589}
590static __inline__ __int64 __DEFAULT_FN_ATTRS
591_InterlockedCompareExchange64_HLERelease(__int64 volatile *_Destination,
592 __int64 _Exchange, __int64 _Comparand) {
593 __asm__ __volatile__(".byte 0xf3 ; lock ; cmpxchg {%2, %1|%1, %2}"
594 : "+a" (_Comparand), "+m" (*_Destination)
595 : "r" (_Exchange) : "memory");
596 return _Comparand;
597}
598#endif
599#ifdef __cplusplus
600}
601#endif
602
603#undef __DEFAULT_FN_ATTRS
604
605#endif /* defined(_MSC_VER) && __has_extension(gnu_asm) */
606
607#endif /* __IMMINTRIN_H */
_Float16 __2f16 __attribute__((ext_vector_type(2)))
Zeroes the upper 128 bits (bits 255:128) of all YMM registers.
#define __DEFAULT_FN_ATTRS
static __inline__ uint32_t volatile uint32_t * __p
Definition arm_acle.h:57
return __v
Definition arm_acle.h:88
__asm__("swp %0, %1, [%2]" :"=r"(__v) :"r"(__x), "r"(__p) :"memory")
static __inline__ void short __D
Definition immintrin.h:342
__inline unsigned int unsigned int unsigned int * __P
Definition bmi2intrin.h:25