clang 23.0.0git
immintrin.h
Go to the documentation of this file.
1/*===---- immintrin.h - Intel intrinsics -----------------------------------===
2 *
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 *
7 *===-----------------------------------------------------------------------===
8 */
9
10#ifndef __IMMINTRIN_H
11#define __IMMINTRIN_H
12
13#if !defined(__i386__) && !defined(__x86_64__)
14#error "This header is only meant to be used on x86 and x64 architecture"
15#endif
16
17#include <x86gprintrin.h>
18
19#include <mmintrin.h>
20
21#include <xmmintrin.h>
22
23#include <emmintrin.h>
24
25#include <pmmintrin.h>
26
27#include <tmmintrin.h>
28
29#include <smmintrin.h>
30
31#include <wmmintrin.h>
32
33#include <clflushoptintrin.h>
34
35#include <clwbintrin.h>
36
37#include <avxintrin.h>
38
39#include <avx2intrin.h>
40
41#include <f16cintrin.h>
42
43#include <bmiintrin.h>
44
45#include <bmi2intrin.h>
46
47#include <lzcntintrin.h>
48
49#include <popcntintrin.h>
50
51#include <fmaintrin.h>
52
53#include <avx512fintrin.h>
54
55#include <avx512vlintrin.h>
56
57#include <avx512bwintrin.h>
58
59#include <avx512bitalgintrin.h>
60
61#include <avx512bmmintrin.h>
62
63#include <avx512bmmvlintrin.h>
64
65#include <avx512cdintrin.h>
66
68
70
71#include <avx512vnniintrin.h>
72
73#include <avx512vlvnniintrin.h>
74
75#include <avxvnniintrin.h>
76
77#include <avx512dqintrin.h>
78
80
81#include <avx512vlbwintrin.h>
82
83#include <avx512vlcdintrin.h>
84
85#include <avx512vldqintrin.h>
86
87#include <avx512ifmaintrin.h>
88
89#include <avx512ifmavlintrin.h>
90
91#include <avxifmaintrin.h>
92
93#include <avx512vbmiintrin.h>
94
95#include <avx512vbmivlintrin.h>
96
97#include <avx512vbmi2intrin.h>
98
99#include <avx512vlvbmi2intrin.h>
100
101#include <avx512fp16intrin.h>
102
103#include <avx512vlfp16intrin.h>
104
105#include <avx512bf16intrin.h>
106
107#include <avx512vlbf16intrin.h>
108
109#include <pkuintrin.h>
110
111#include <vpclmulqdqintrin.h>
112
113#include <vaesintrin.h>
114
115#include <gfniintrin.h>
116
117#include <avxvnniint8intrin.h>
118
119#include <avxneconvertintrin.h>
120
121#include <sha512intrin.h>
122
123#include <sm3intrin.h>
124
125#include <sm4intrin.h>
126
127#include <avxvnniint16intrin.h>
128
129/// Reads the value of the IA32_TSC_AUX MSR (0xc0000103).
130///
131/// \headerfile <immintrin.h>
132///
133/// This intrinsic corresponds to the <c> RDPID </c> instruction.
134///
135/// \returns The 32-bit contents of the MSR.
136static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("rdpid")))
137_rdpid_u32(void) {
138 return __builtin_ia32_rdpid();
139}
140
141/// Returns a 16-bit hardware-generated random value.
142///
143/// \headerfile <immintrin.h>
144///
145/// This intrinsic corresponds to the <c> RDRAND </c> instruction.
146///
147/// \param __p
148/// A pointer to a 16-bit memory location to place the random value.
149/// \returns 1 if the value was successfully generated, 0 otherwise.
150static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
151_rdrand16_step(unsigned short *__p)
152{
153 return (int)__builtin_ia32_rdrand16_step(__p);
154}
155
156/// Returns a 32-bit hardware-generated random value.
157///
158/// \headerfile <immintrin.h>
159///
160/// This intrinsic corresponds to the <c> RDRAND </c> instruction.
161///
162/// \param __p
163/// A pointer to a 32-bit memory location to place the random value.
164/// \returns 1 if the value was successfully generated, 0 otherwise.
165static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
166_rdrand32_step(unsigned int *__p)
167{
168 return (int)__builtin_ia32_rdrand32_step(__p);
169}
170
171/// Returns a 64-bit hardware-generated random value.
172///
173/// \headerfile <immintrin.h>
174///
175/// This intrinsic corresponds to the <c> RDRAND </c> instruction.
176///
177/// \param __p
178/// A pointer to a 64-bit memory location to place the random value.
179/// \returns 1 if the value was successfully generated, 0 otherwise.
180static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
181_rdrand64_step(unsigned long long *__p)
182{
183#ifdef __x86_64__
184 return (int)__builtin_ia32_rdrand64_step(__p);
185#else
186 // We need to emulate the functionality of 64-bit rdrand with 2 32-bit
187 // rdrand instructions.
188 unsigned int __lo, __hi;
189 unsigned int __res_lo = __builtin_ia32_rdrand32_step(&__lo);
190 unsigned int __res_hi = __builtin_ia32_rdrand32_step(&__hi);
191 if (__res_lo && __res_hi) {
192 *__p = ((unsigned long long)__hi << 32) | (unsigned long long)__lo;
193 return 1;
194 } else {
195 *__p = 0;
196 return 0;
197 }
198#endif
199}
200
201#ifdef __x86_64__
202/// Reads the FS base register.
203///
204/// \headerfile <immintrin.h>
205///
206/// This intrinsic corresponds to the <c> RDFSBASE </c> instruction.
207///
208/// \returns The lower 32 bits of the FS base register.
209static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
210_readfsbase_u32(void)
211{
212 return __builtin_ia32_rdfsbase32();
213}
214
215/// Reads the FS base register.
216///
217/// \headerfile <immintrin.h>
218///
219/// This intrinsic corresponds to the <c> RDFSBASE </c> instruction.
220///
221/// \returns The contents of the FS base register.
222static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
223_readfsbase_u64(void)
224{
225 return __builtin_ia32_rdfsbase64();
226}
227
228/// Reads the GS base register.
229///
230/// \headerfile <immintrin.h>
231///
232/// This intrinsic corresponds to the <c> RDGSBASE </c> instruction.
233///
234/// \returns The lower 32 bits of the GS base register.
235static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
236_readgsbase_u32(void)
237{
238 return __builtin_ia32_rdgsbase32();
239}
240
241/// Reads the GS base register.
242///
243/// \headerfile <immintrin.h>
244///
245/// This intrinsic corresponds to the <c> RDGSBASE </c> instruction.
246///
247/// \returns The contents of the GS base register.
248static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
249_readgsbase_u64(void)
250{
251 return __builtin_ia32_rdgsbase64();
252}
253
254/// Modifies the FS base register.
255///
256/// \headerfile <immintrin.h>
257///
258/// This intrinsic corresponds to the <c> WRFSBASE </c> instruction.
259///
260/// \param __V
261/// Value to use for the lower 32 bits of the FS base register.
262static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
263_writefsbase_u32(unsigned int __V)
264{
265 __builtin_ia32_wrfsbase32(__V);
266}
267
268/// Modifies the FS base register.
269///
270/// \headerfile <immintrin.h>
271///
272/// This intrinsic corresponds to the <c> WRFSBASE </c> instruction.
273///
274/// \param __V
275/// Value to use for the FS base register.
276static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
277_writefsbase_u64(unsigned long long __V)
278{
279 __builtin_ia32_wrfsbase64(__V);
280}
281
282/// Modifies the GS base register.
283///
284/// \headerfile <immintrin.h>
285///
286/// This intrinsic corresponds to the <c> WRGSBASE </c> instruction.
287///
288/// \param __V
289/// Value to use for the lower 32 bits of the GS base register.
290static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
291_writegsbase_u32(unsigned int __V)
292{
293 __builtin_ia32_wrgsbase32(__V);
294}
295
296/// Modifies the GS base register.
297///
298/// \headerfile <immintrin.h>
299///
300/// This intrinsic corresponds to the <c> WRFSBASE </c> instruction.
301///
302/// \param __V
303/// Value to use for GS base register.
304static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
305_writegsbase_u64(unsigned long long __V)
306{
307 __builtin_ia32_wrgsbase64(__V);
308}
309
310#endif
311
312/* The structs used below are to force the load/store to be unaligned. This
313 * is accomplished with the __packed__ attribute. The __may_alias__ prevents
314 * tbaa metadata from being generated based on the struct and the type of the
315 * field inside of it.
316 */
317
318/// Load a 16-bit value from memory and swap its bytes.
319///
320/// \headerfile <x86intrin.h>
321///
322/// This intrinsic corresponds to the MOVBE instruction.
323///
324/// \param __P
325/// A pointer to the 16-bit value to load.
326/// \returns The byte-swapped value.
327static __inline__ short __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
328_loadbe_i16(void const * __P) {
329 struct __loadu_i16 {
330 unsigned short __v;
331 } __attribute__((__packed__, __may_alias__));
332 return (short)__builtin_bswap16(((const struct __loadu_i16*)__P)->__v);
333}
334
335/// Swap the bytes of a 16-bit value and store it to memory.
336///
337/// \headerfile <x86intrin.h>
338///
339/// This intrinsic corresponds to the MOVBE instruction.
340///
341/// \param __P
342/// A pointer to the memory for storing the swapped value.
343/// \param __D
344/// The 16-bit value to be byte-swapped.
345static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
346_storebe_i16(void * __P, short __D) {
347 struct __storeu_i16 {
348 unsigned short __v;
349 } __attribute__((__packed__, __may_alias__));
350 ((struct __storeu_i16*)__P)->__v = __builtin_bswap16((unsigned short)__D);
351}
352
353/// Load a 32-bit value from memory and swap its bytes.
354///
355/// \headerfile <x86intrin.h>
356///
357/// This intrinsic corresponds to the MOVBE instruction.
358///
359/// \param __P
360/// A pointer to the 32-bit value to load.
361/// \returns The byte-swapped value.
362static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
363_loadbe_i32(void const * __P) {
364 struct __loadu_i32 {
365 unsigned int __v;
366 } __attribute__((__packed__, __may_alias__));
367 return (int)__builtin_bswap32(((const struct __loadu_i32*)__P)->__v);
368}
369
370/// Swap the bytes of a 32-bit value and store it to memory.
371///
372/// \headerfile <x86intrin.h>
373///
374/// This intrinsic corresponds to the MOVBE instruction.
375///
376/// \param __P
377/// A pointer to the memory for storing the swapped value.
378/// \param __D
379/// The 32-bit value to be byte-swapped.
380static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
381_storebe_i32(void * __P, int __D) {
382 struct __storeu_i32 {
383 unsigned int __v;
384 } __attribute__((__packed__, __may_alias__));
385 ((struct __storeu_i32*)__P)->__v = __builtin_bswap32((unsigned int)__D);
386}
387
388#ifdef __x86_64__
389/// Load a 64-bit value from memory and swap its bytes.
390///
391/// \headerfile <x86intrin.h>
392///
393/// This intrinsic corresponds to the MOVBE instruction.
394///
395/// \param __P
396/// A pointer to the 64-bit value to load.
397/// \returns The byte-swapped value.
398static __inline__ long long __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
399_loadbe_i64(void const * __P) {
400 struct __loadu_i64 {
401 unsigned long long __v;
402 } __attribute__((__packed__, __may_alias__));
403 return (long long)__builtin_bswap64(((const struct __loadu_i64*)__P)->__v);
404}
405
406/// Swap the bytes of a 64-bit value and store it to memory.
407///
408/// \headerfile <x86intrin.h>
409///
410/// This intrinsic corresponds to the MOVBE instruction.
411///
412/// \param __P
413/// A pointer to the memory for storing the swapped value.
414/// \param __D
415/// The 64-bit value to be byte-swapped.
416static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
417_storebe_i64(void * __P, long long __D) {
418 struct __storeu_i64 {
419 unsigned long long __v;
420 } __attribute__((__packed__, __may_alias__));
421 ((struct __storeu_i64*)__P)->__v = __builtin_bswap64((unsigned long long)__D);
422}
423#endif
424
425#include <rtmintrin.h>
426#include <xtestintrin.h>
427
428#include <shaintrin.h>
429
430#include <fxsrintrin.h>
431
432/* No feature check desired due to internal MSC_VER checks */
433#include <xsaveintrin.h>
434
435#include <xsaveoptintrin.h>
436
437#include <xsavecintrin.h>
438
439#include <xsavesintrin.h>
440
441#include <cetintrin.h>
442
443/* Intrinsics inside adcintrin.h are available at all times. */
444#include <adcintrin.h>
445
446#include <adxintrin.h>
447
448#include <rdseedintrin.h>
449
450#include <wbnoinvdintrin.h>
451
452#include <cldemoteintrin.h>
453
454#include <waitpkgintrin.h>
455
456#include <movdirintrin.h>
457
458#include <movrsintrin.h>
459
460#include <movrs_avx10_2intrin.h>
461
463
464#include <pconfigintrin.h>
465
466#include <sgxintrin.h>
467
468#include <ptwriteintrin.h>
469
470#include <invpcidintrin.h>
471
472#include <keylockerintrin.h>
473
474#include <amxintrin.h>
475
476#include <amxfp16intrin.h>
477
478#include <amxcomplexintrin.h>
479
480#include <amxfp8intrin.h>
481
482#include <amxmovrsintrin.h>
483
484#include <amxavx512intrin.h>
485
486#include <amxtf32intrin.h>
487
489
491
492#include <avx10_2bf16intrin.h>
493#include <avx10_2convertintrin.h>
494#include <avx10_2copyintrin.h>
495#include <avx10_2minmaxintrin.h>
496#include <avx10_2niintrin.h>
498#include <avx10_2satcvtintrin.h>
499
503#include <avx10_2_512niintrin.h>
506
507#include <sm4evexintrin.h>
508
509#include <enqcmdintrin.h>
510
511#include <serializeintrin.h>
512
513#include <tsxldtrkintrin.h>
514
515#if defined(_MSC_VER) && __has_extension(gnu_asm)
516/* Define the default attributes for these intrinsics */
517#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
518#ifdef __cplusplus
519extern "C" {
520#endif
521/*----------------------------------------------------------------------------*\
522|* Interlocked Exchange HLE
523\*----------------------------------------------------------------------------*/
524#if defined(__i386__) || defined(__x86_64__)
525static __inline__ long __DEFAULT_FN_ATTRS
526_InterlockedExchange_HLEAcquire(long volatile *_Target, long _Value) {
527 __asm__ __volatile__(".byte 0xf2 ; lock ; xchg {%0, %1|%1, %0}"
528 : "+r" (_Value), "+m" (*_Target) :: "memory");
529 return _Value;
530}
531static __inline__ long __DEFAULT_FN_ATTRS
532_InterlockedExchange_HLERelease(long volatile *_Target, long _Value) {
533 __asm__ __volatile__(".byte 0xf3 ; lock ; xchg {%0, %1|%1, %0}"
534 : "+r" (_Value), "+m" (*_Target) :: "memory");
535 return _Value;
536}
537#endif
538#if defined(__x86_64__)
539static __inline__ __int64 __DEFAULT_FN_ATTRS
540_InterlockedExchange64_HLEAcquire(__int64 volatile *_Target, __int64 _Value) {
541 __asm__ __volatile__(".byte 0xf2 ; lock ; xchg {%0, %1|%1, %0}"
542 : "+r" (_Value), "+m" (*_Target) :: "memory");
543 return _Value;
544}
545static __inline__ __int64 __DEFAULT_FN_ATTRS
546_InterlockedExchange64_HLERelease(__int64 volatile *_Target, __int64 _Value) {
547 __asm__ __volatile__(".byte 0xf3 ; lock ; xchg {%0, %1|%1, %0}"
548 : "+r" (_Value), "+m" (*_Target) :: "memory");
549 return _Value;
550}
551#endif
552/*----------------------------------------------------------------------------*\
553|* Interlocked Compare Exchange HLE
554\*----------------------------------------------------------------------------*/
555#if defined(__i386__) || defined(__x86_64__)
556static __inline__ long __DEFAULT_FN_ATTRS
557_InterlockedCompareExchange_HLEAcquire(long volatile *_Destination,
558 long _Exchange, long _Comparand) {
559 __asm__ __volatile__(".byte 0xf2 ; lock ; cmpxchg {%2, %1|%1, %2}"
560 : "+a" (_Comparand), "+m" (*_Destination)
561 : "r" (_Exchange) : "memory");
562 return _Comparand;
563}
564static __inline__ long __DEFAULT_FN_ATTRS
565_InterlockedCompareExchange_HLERelease(long volatile *_Destination,
566 long _Exchange, long _Comparand) {
567 __asm__ __volatile__(".byte 0xf3 ; lock ; cmpxchg {%2, %1|%1, %2}"
568 : "+a" (_Comparand), "+m" (*_Destination)
569 : "r" (_Exchange) : "memory");
570 return _Comparand;
571}
572#endif
573#if defined(__x86_64__)
574static __inline__ __int64 __DEFAULT_FN_ATTRS
575_InterlockedCompareExchange64_HLEAcquire(__int64 volatile *_Destination,
576 __int64 _Exchange, __int64 _Comparand) {
577 __asm__ __volatile__(".byte 0xf2 ; lock ; cmpxchg {%2, %1|%1, %2}"
578 : "+a" (_Comparand), "+m" (*_Destination)
579 : "r" (_Exchange) : "memory");
580 return _Comparand;
581}
582static __inline__ __int64 __DEFAULT_FN_ATTRS
583_InterlockedCompareExchange64_HLERelease(__int64 volatile *_Destination,
584 __int64 _Exchange, __int64 _Comparand) {
585 __asm__ __volatile__(".byte 0xf3 ; lock ; cmpxchg {%2, %1|%1, %2}"
586 : "+a" (_Comparand), "+m" (*_Destination)
587 : "r" (_Exchange) : "memory");
588 return _Comparand;
589}
590#endif
591#ifdef __cplusplus
592}
593#endif
594
595#undef __DEFAULT_FN_ATTRS
596
597#endif /* defined(_MSC_VER) && __has_extension(gnu_asm) */
598
599#endif /* __IMMINTRIN_H */
_Float16 __2f16 __attribute__((ext_vector_type(2)))
Zeroes the upper 128 bits (bits 255:128) of all YMM registers.
#define __DEFAULT_FN_ATTRS
static __inline__ uint32_t volatile uint32_t * __p
Definition arm_acle.h:57
return __v
Definition arm_acle.h:88
__asm__("swp %0, %1, [%2]" :"=r"(__v) :"r"(__x), "r"(__p) :"memory")
static __inline__ void short __D
Definition immintrin.h:346
__inline unsigned int unsigned int unsigned int * __P
Definition bmi2intrin.h:25