clang 19.0.0git
immintrin.h
Go to the documentation of this file.
1/*===---- immintrin.h - Intel intrinsics -----------------------------------===
2 *
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 *
7 *===-----------------------------------------------------------------------===
8 */
9
10#ifndef __IMMINTRIN_H
11#define __IMMINTRIN_H
12
13#if !defined(__i386__) && !defined(__x86_64__)
14#error "This header is only meant to be used on x86 and x64 architecture"
15#endif
16
17#include <x86gprintrin.h>
18
19#if !defined(__SCE__) || __has_feature(modules) || defined(__MMX__)
20#include <mmintrin.h>
21#endif
22
23#if !defined(__SCE__) || __has_feature(modules) || defined(__SSE__)
24#include <xmmintrin.h>
25#endif
26
27#if !defined(__SCE__) || __has_feature(modules) || defined(__SSE2__)
28#include <emmintrin.h>
29#endif
30
31#if !defined(__SCE__) || __has_feature(modules) || defined(__SSE3__)
32#include <pmmintrin.h>
33#endif
34
35#if !defined(__SCE__) || __has_feature(modules) || defined(__SSSE3__)
36#include <tmmintrin.h>
37#endif
38
39#if !defined(__SCE__) || __has_feature(modules) || \
40 (defined(__SSE4_2__) || defined(__SSE4_1__))
41#include <smmintrin.h>
42#endif
43
44#if !defined(__SCE__) || __has_feature(modules) || \
45 (defined(__AES__) || defined(__PCLMUL__))
46#include <wmmintrin.h>
47#endif
48
49#if !defined(__SCE__) || __has_feature(modules) || defined(__CLFLUSHOPT__)
50#include <clflushoptintrin.h>
51#endif
52
53#if !defined(__SCE__) || __has_feature(modules) || defined(__CLWB__)
54#include <clwbintrin.h>
55#endif
56
57#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX__)
58#include <avxintrin.h>
59#endif
60
61#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX2__)
62#include <avx2intrin.h>
63#endif
64
65#if !defined(__SCE__) || __has_feature(modules) || defined(__F16C__)
66#include <f16cintrin.h>
67#endif
68
69/* No feature check desired due to internal checks */
70#include <bmiintrin.h>
71
72#if !defined(__SCE__) || __has_feature(modules) || defined(__BMI2__)
73#include <bmi2intrin.h>
74#endif
75
76#if !defined(__SCE__) || __has_feature(modules) || defined(__LZCNT__)
77#include <lzcntintrin.h>
78#endif
79
80#if !defined(__SCE__) || __has_feature(modules) || defined(__POPCNT__)
81#include <popcntintrin.h>
82#endif
83
84#if !defined(__SCE__) || __has_feature(modules) || defined(__FMA__)
85#include <fmaintrin.h>
86#endif
87
88#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512F__)
89#include <avx512fintrin.h>
90#endif
91
92#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512VL__)
93#include <avx512vlintrin.h>
94#endif
95
96#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512BW__)
97#include <avx512bwintrin.h>
98#endif
99
100#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512BITALG__)
101#include <avx512bitalgintrin.h>
102#endif
103
104#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512CD__)
105#include <avx512cdintrin.h>
106#endif
107
108#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512VPOPCNTDQ__)
110#endif
111
112#if !defined(__SCE__) || __has_feature(modules) || \
113 (defined(__AVX512VL__) && defined(__AVX512VPOPCNTDQ__))
115#endif
116
117#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512VNNI__)
118#include <avx512vnniintrin.h>
119#endif
120
121#if !defined(__SCE__) || __has_feature(modules) || \
122 (defined(__AVX512VL__) && defined(__AVX512VNNI__))
123#include <avx512vlvnniintrin.h>
124#endif
125
126#if !defined(__SCE__) || __has_feature(modules) || defined(__AVXVNNI__)
127#include <avxvnniintrin.h>
128#endif
129
130#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512DQ__)
131#include <avx512dqintrin.h>
132#endif
133
134#if !defined(__SCE__) || __has_feature(modules) || \
135 (defined(__AVX512VL__) && defined(__AVX512BITALG__))
136#include <avx512vlbitalgintrin.h>
137#endif
138
139#if !defined(__SCE__) || __has_feature(modules) || \
140 (defined(__AVX512VL__) && defined(__AVX512BW__))
141#include <avx512vlbwintrin.h>
142#endif
143
144#if !defined(__SCE__) || __has_feature(modules) || \
145 (defined(__AVX512VL__) && defined(__AVX512CD__))
146#include <avx512vlcdintrin.h>
147#endif
148
149#if !defined(__SCE__) || __has_feature(modules) || \
150 (defined(__AVX512VL__) && defined(__AVX512DQ__))
151#include <avx512vldqintrin.h>
152#endif
153
154#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512ER__)
155#include <avx512erintrin.h>
156#endif
157
158#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512IFMA__)
159#include <avx512ifmaintrin.h>
160#endif
161
162#if !defined(__SCE__) || __has_feature(modules) || \
163 (defined(__AVX512IFMA__) && defined(__AVX512VL__))
164#include <avx512ifmavlintrin.h>
165#endif
166
167#if !defined(__SCE__) || __has_feature(modules) || defined(__AVXIFMA__)
168#include <avxifmaintrin.h>
169#endif
170
171#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512VBMI__)
172#include <avx512vbmiintrin.h>
173#endif
174
175#if !defined(__SCE__) || __has_feature(modules) || \
176 (defined(__AVX512VBMI__) && defined(__AVX512VL__))
177#include <avx512vbmivlintrin.h>
178#endif
179
180#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512VBMI2__)
181#include <avx512vbmi2intrin.h>
182#endif
183
184#if !defined(__SCE__) || __has_feature(modules) || \
185 (defined(__AVX512VBMI2__) && defined(__AVX512VL__))
186#include <avx512vlvbmi2intrin.h>
187#endif
188
189#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512PF__)
190#include <avx512pfintrin.h>
191#endif
192
193#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512FP16__)
194#include <avx512fp16intrin.h>
195#endif
196
197#if !defined(__SCE__) || __has_feature(modules) || \
198 (defined(__AVX512VL__) && defined(__AVX512FP16__))
199#include <avx512vlfp16intrin.h>
200#endif
201
202#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512BF16__)
203#include <avx512bf16intrin.h>
204#endif
205
206#if !defined(__SCE__) || __has_feature(modules) || \
207 (defined(__AVX512VL__) && defined(__AVX512BF16__))
208#include <avx512vlbf16intrin.h>
209#endif
210
211#if !defined(__SCE__) || __has_feature(modules) || defined(__PKU__)
212#include <pkuintrin.h>
213#endif
214
215#if !defined(__SCE__) || __has_feature(modules) || defined(__VPCLMULQDQ__)
216#include <vpclmulqdqintrin.h>
217#endif
218
219#if !defined(__SCE__) || __has_feature(modules) || defined(__VAES__)
220#include <vaesintrin.h>
221#endif
222
223#if !defined(__SCE__) || __has_feature(modules) || defined(__GFNI__)
224#include <gfniintrin.h>
225#endif
226
227#if !defined(__SCE__) || __has_feature(modules) || defined(__AVXVNNIINT8__)
228#include <avxvnniint8intrin.h>
229#endif
230
231#if !defined(__SCE__) || __has_feature(modules) || defined(__AVXNECONVERT__)
232#include <avxneconvertintrin.h>
233#endif
234
235#if !defined(__SCE__) || __has_feature(modules) || defined(__SHA512__)
236#include <sha512intrin.h>
237#endif
238
239#if !defined(__SCE__) || __has_feature(modules) || defined(__SM3__)
240#include <sm3intrin.h>
241#endif
242
243#if !defined(__SCE__) || __has_feature(modules) || defined(__SM4__)
244#include <sm4intrin.h>
245#endif
246
247#if !defined(__SCE__) || __has_feature(modules) || defined(__AVXVNNIINT16__)
248#include <avxvnniint16intrin.h>
249#endif
250
251#if !defined(__SCE__) || __has_feature(modules) || defined(__RDPID__)
252/// Reads the value of the IA32_TSC_AUX MSR (0xc0000103).
253///
254/// \headerfile <immintrin.h>
255///
256/// This intrinsic corresponds to the <c> RDPID </c> instruction.
257///
258/// \returns The 32-bit contents of the MSR.
259static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("rdpid")))
260_rdpid_u32(void) {
261 return __builtin_ia32_rdpid();
262}
263#endif // __RDPID__
264
265#if !defined(__SCE__) || __has_feature(modules) || defined(__RDRND__)
266/// Returns a 16-bit hardware-generated random value.
267///
268/// \headerfile <immintrin.h>
269///
270/// This intrinsic corresponds to the <c> RDRAND </c> instruction.
271///
272/// \param __p
273/// A pointer to a 16-bit memory location to place the random value.
274/// \returns 1 if the value was successfully generated, 0 otherwise.
275static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
276_rdrand16_step(unsigned short *__p)
277{
278 return (int)__builtin_ia32_rdrand16_step(__p);
279}
280
281/// Returns a 32-bit hardware-generated random value.
282///
283/// \headerfile <immintrin.h>
284///
285/// This intrinsic corresponds to the <c> RDRAND </c> instruction.
286///
287/// \param __p
288/// A pointer to a 32-bit memory location to place the random value.
289/// \returns 1 if the value was successfully generated, 0 otherwise.
290static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
291_rdrand32_step(unsigned int *__p)
292{
293 return (int)__builtin_ia32_rdrand32_step(__p);
294}
295
296/// Returns a 64-bit hardware-generated random value.
297///
298/// \headerfile <immintrin.h>
299///
300/// This intrinsic corresponds to the <c> RDRAND </c> instruction.
301///
302/// \param __p
303/// A pointer to a 64-bit memory location to place the random value.
304/// \returns 1 if the value was successfully generated, 0 otherwise.
305static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
306_rdrand64_step(unsigned long long *__p)
307{
308#ifdef __x86_64__
309 return (int)__builtin_ia32_rdrand64_step(__p);
310#else
311 // We need to emulate the functionality of 64-bit rdrand with 2 32-bit
312 // rdrand instructions.
313 unsigned int __lo, __hi;
314 unsigned int __res_lo = __builtin_ia32_rdrand32_step(&__lo);
315 unsigned int __res_hi = __builtin_ia32_rdrand32_step(&__hi);
316 if (__res_lo && __res_hi) {
317 *__p = ((unsigned long long)__hi << 32) | (unsigned long long)__lo;
318 return 1;
319 } else {
320 *__p = 0;
321 return 0;
322 }
323#endif
324}
325#endif /* __RDRND__ */
326
327#if !defined(__SCE__) || __has_feature(modules) || defined(__FSGSBASE__)
328#ifdef __x86_64__
329/// Reads the FS base register.
330///
331/// \headerfile <immintrin.h>
332///
333/// This intrinsic corresponds to the <c> RDFSBASE </c> instruction.
334///
335/// \returns The lower 32 bits of the FS base register.
336static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
337_readfsbase_u32(void)
338{
339 return __builtin_ia32_rdfsbase32();
340}
341
342/// Reads the FS base register.
343///
344/// \headerfile <immintrin.h>
345///
346/// This intrinsic corresponds to the <c> RDFSBASE </c> instruction.
347///
348/// \returns The contents of the FS base register.
349static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
350_readfsbase_u64(void)
351{
352 return __builtin_ia32_rdfsbase64();
353}
354
355/// Reads the GS base register.
356///
357/// \headerfile <immintrin.h>
358///
359/// This intrinsic corresponds to the <c> RDGSBASE </c> instruction.
360///
361/// \returns The lower 32 bits of the GS base register.
362static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
363_readgsbase_u32(void)
364{
365 return __builtin_ia32_rdgsbase32();
366}
367
368/// Reads the GS base register.
369///
370/// \headerfile <immintrin.h>
371///
372/// This intrinsic corresponds to the <c> RDGSBASE </c> instruction.
373///
374/// \returns The contents of the GS base register.
375static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
376_readgsbase_u64(void)
377{
378 return __builtin_ia32_rdgsbase64();
379}
380
381/// Modifies the FS base register.
382///
383/// \headerfile <immintrin.h>
384///
385/// This intrinsic corresponds to the <c> WRFSBASE </c> instruction.
386///
387/// \param __V
388/// Value to use for the lower 32 bits of the FS base register.
389static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
390_writefsbase_u32(unsigned int __V)
391{
392 __builtin_ia32_wrfsbase32(__V);
393}
394
395/// Modifies the FS base register.
396///
397/// \headerfile <immintrin.h>
398///
399/// This intrinsic corresponds to the <c> WRFSBASE </c> instruction.
400///
401/// \param __V
402/// Value to use for the FS base register.
403static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
404_writefsbase_u64(unsigned long long __V)
405{
406 __builtin_ia32_wrfsbase64(__V);
407}
408
409/// Modifies the GS base register.
410///
411/// \headerfile <immintrin.h>
412///
413/// This intrinsic corresponds to the <c> WRGSBASE </c> instruction.
414///
415/// \param __V
416/// Value to use for the lower 32 bits of the GS base register.
417static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
418_writegsbase_u32(unsigned int __V)
419{
420 __builtin_ia32_wrgsbase32(__V);
421}
422
423/// Modifies the GS base register.
424///
425/// \headerfile <immintrin.h>
426///
427/// This intrinsic corresponds to the <c> WRFSBASE </c> instruction.
428///
429/// \param __V
430/// Value to use for GS base register.
431static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
432_writegsbase_u64(unsigned long long __V)
433{
434 __builtin_ia32_wrgsbase64(__V);
435}
436
437#endif
438#endif /* __FSGSBASE__ */
439
440#if !defined(__SCE__) || __has_feature(modules) || defined(__MOVBE__)
441
442/* The structs used below are to force the load/store to be unaligned. This
443 * is accomplished with the __packed__ attribute. The __may_alias__ prevents
444 * tbaa metadata from being generated based on the struct and the type of the
445 * field inside of it.
446 */
447
448/// Load a 16-bit value from memory and swap its bytes.
449///
450/// \headerfile <x86intrin.h>
451///
452/// This intrinsic corresponds to the MOVBE instruction.
453///
454/// \param __P
455/// A pointer to the 16-bit value to load.
456/// \returns The byte-swapped value.
457static __inline__ short __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
458_loadbe_i16(void const * __P) {
459 struct __loadu_i16 {
460 unsigned short __v;
461 } __attribute__((__packed__, __may_alias__));
462 return (short)__builtin_bswap16(((const struct __loadu_i16*)__P)->__v);
463}
464
465/// Swap the bytes of a 16-bit value and store it to memory.
466///
467/// \headerfile <x86intrin.h>
468///
469/// This intrinsic corresponds to the MOVBE instruction.
470///
471/// \param __P
472/// A pointer to the memory for storing the swapped value.
473/// \param __D
474/// The 16-bit value to be byte-swapped.
475static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
476_storebe_i16(void * __P, short __D) {
477 struct __storeu_i16 {
478 unsigned short __v;
479 } __attribute__((__packed__, __may_alias__));
480 ((struct __storeu_i16*)__P)->__v = __builtin_bswap16((unsigned short)__D);
481}
482
483/// Load a 32-bit value from memory and swap its bytes.
484///
485/// \headerfile <x86intrin.h>
486///
487/// This intrinsic corresponds to the MOVBE instruction.
488///
489/// \param __P
490/// A pointer to the 32-bit value to load.
491/// \returns The byte-swapped value.
492static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
493_loadbe_i32(void const * __P) {
494 struct __loadu_i32 {
495 unsigned int __v;
496 } __attribute__((__packed__, __may_alias__));
497 return (int)__builtin_bswap32(((const struct __loadu_i32*)__P)->__v);
498}
499
500/// Swap the bytes of a 32-bit value and store it to memory.
501///
502/// \headerfile <x86intrin.h>
503///
504/// This intrinsic corresponds to the MOVBE instruction.
505///
506/// \param __P
507/// A pointer to the memory for storing the swapped value.
508/// \param __D
509/// The 32-bit value to be byte-swapped.
510static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
511_storebe_i32(void * __P, int __D) {
512 struct __storeu_i32 {
513 unsigned int __v;
514 } __attribute__((__packed__, __may_alias__));
515 ((struct __storeu_i32*)__P)->__v = __builtin_bswap32((unsigned int)__D);
516}
517
518#ifdef __x86_64__
519/// Load a 64-bit value from memory and swap its bytes.
520///
521/// \headerfile <x86intrin.h>
522///
523/// This intrinsic corresponds to the MOVBE instruction.
524///
525/// \param __P
526/// A pointer to the 64-bit value to load.
527/// \returns The byte-swapped value.
528static __inline__ long long __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
529_loadbe_i64(void const * __P) {
530 struct __loadu_i64 {
531 unsigned long long __v;
532 } __attribute__((__packed__, __may_alias__));
533 return (long long)__builtin_bswap64(((const struct __loadu_i64*)__P)->__v);
534}
535
536/// Swap the bytes of a 64-bit value and store it to memory.
537///
538/// \headerfile <x86intrin.h>
539///
540/// This intrinsic corresponds to the MOVBE instruction.
541///
542/// \param __P
543/// A pointer to the memory for storing the swapped value.
544/// \param __D
545/// The 64-bit value to be byte-swapped.
546static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
547_storebe_i64(void * __P, long long __D) {
548 struct __storeu_i64 {
549 unsigned long long __v;
550 } __attribute__((__packed__, __may_alias__));
551 ((struct __storeu_i64*)__P)->__v = __builtin_bswap64((unsigned long long)__D);
552}
553#endif
554#endif /* __MOVBE */
555
556#if !defined(__SCE__) || __has_feature(modules) || defined(__RTM__)
557#include <rtmintrin.h>
558#include <xtestintrin.h>
559#endif
560
561#if !defined(__SCE__) || __has_feature(modules) || defined(__SHA__)
562#include <shaintrin.h>
563#endif
564
565#if !defined(__SCE__) || __has_feature(modules) || defined(__FXSR__)
566#include <fxsrintrin.h>
567#endif
568
569/* No feature check desired due to internal MSC_VER checks */
570#include <xsaveintrin.h>
571
572#if !defined(__SCE__) || __has_feature(modules) || defined(__XSAVEOPT__)
573#include <xsaveoptintrin.h>
574#endif
575
576#if !defined(__SCE__) || __has_feature(modules) || defined(__XSAVEC__)
577#include <xsavecintrin.h>
578#endif
579
580#if !defined(__SCE__) || __has_feature(modules) || defined(__XSAVES__)
581#include <xsavesintrin.h>
582#endif
583
584#if !defined(__SCE__) || __has_feature(modules) || defined(__SHSTK__)
585#include <cetintrin.h>
586#endif
587
588/* Intrinsics inside adcintrin.h are available at all times. */
589#include <adcintrin.h>
590
591#if !defined(__SCE__) || __has_feature(modules) || defined(__ADX__)
592#include <adxintrin.h>
593#endif
594
595#if !defined(__SCE__) || __has_feature(modules) || defined(__RDSEED__)
596#include <rdseedintrin.h>
597#endif
598
599#if !defined(__SCE__) || __has_feature(modules) || defined(__WBNOINVD__)
600#include <wbnoinvdintrin.h>
601#endif
602
603#if !defined(__SCE__) || __has_feature(modules) || defined(__CLDEMOTE__)
604#include <cldemoteintrin.h>
605#endif
606
607#if !defined(__SCE__) || __has_feature(modules) || defined(__WAITPKG__)
608#include <waitpkgintrin.h>
609#endif
610
611#if !defined(__SCE__) || __has_feature(modules) || defined(__MOVDIRI__) || \
612 defined(__MOVDIR64B__)
613#include <movdirintrin.h>
614#endif
615
616#if !defined(__SCE__) || __has_feature(modules) || defined(__PCONFIG__)
617#include <pconfigintrin.h>
618#endif
619
620#if !defined(__SCE__) || __has_feature(modules) || defined(__SGX__)
621#include <sgxintrin.h>
622#endif
623
624#if !defined(__SCE__) || __has_feature(modules) || defined(__PTWRITE__)
625#include <ptwriteintrin.h>
626#endif
627
628#if !defined(__SCE__) || __has_feature(modules) || defined(__INVPCID__)
629#include <invpcidintrin.h>
630#endif
631#if !defined(__SCE__) || __has_feature(modules) || defined(__AMX_FP16__)
632#include <amxfp16intrin.h>
633#endif
634
635#if !defined(__SCE__) || __has_feature(modules) || defined(__KL__) || \
636 defined(__WIDEKL__)
637#include <keylockerintrin.h>
638#endif
639
640#if !defined(__SCE__) || __has_feature(modules) || defined(__AMX_TILE__) || \
641 defined(__AMX_INT8__) || defined(__AMX_BF16__)
642#include <amxintrin.h>
643#endif
644
645#if !defined(__SCE__) || __has_feature(modules) || defined(__AMX_COMPLEX__)
646#include <amxcomplexintrin.h>
647#endif
648
649#if !defined(__SCE__) || __has_feature(modules) || \
650 defined(__AVX512VP2INTERSECT__)
652#endif
653
654#if !defined(__SCE__) || __has_feature(modules) || \
655 (defined(__AVX512VL__) && defined(__AVX512VP2INTERSECT__))
657#endif
658
659#if !defined(__SCE__) || __has_feature(modules) || defined(__ENQCMD__)
660#include <enqcmdintrin.h>
661#endif
662
663#if !defined(__SCE__) || __has_feature(modules) || defined(__SERIALIZE__)
664#include <serializeintrin.h>
665#endif
666
667#if !defined(__SCE__) || __has_feature(modules) || defined(__TSXLDTRK__)
668#include <tsxldtrkintrin.h>
669#endif
670
671#if defined(_MSC_VER) && __has_extension(gnu_asm)
672/* Define the default attributes for these intrinsics */
673#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
674#ifdef __cplusplus
675extern "C" {
676#endif
677/*----------------------------------------------------------------------------*\
678|* Interlocked Exchange HLE
679\*----------------------------------------------------------------------------*/
680#if defined(__i386__) || defined(__x86_64__)
681static __inline__ long __DEFAULT_FN_ATTRS
682_InterlockedExchange_HLEAcquire(long volatile *_Target, long _Value) {
683 __asm__ __volatile__(".byte 0xf2 ; lock ; xchg {%0, %1|%1, %0}"
684 : "+r" (_Value), "+m" (*_Target) :: "memory");
685 return _Value;
686}
687static __inline__ long __DEFAULT_FN_ATTRS
688_InterlockedExchange_HLERelease(long volatile *_Target, long _Value) {
689 __asm__ __volatile__(".byte 0xf3 ; lock ; xchg {%0, %1|%1, %0}"
690 : "+r" (_Value), "+m" (*_Target) :: "memory");
691 return _Value;
692}
693#endif
694#if defined(__x86_64__)
695static __inline__ __int64 __DEFAULT_FN_ATTRS
696_InterlockedExchange64_HLEAcquire(__int64 volatile *_Target, __int64 _Value) {
697 __asm__ __volatile__(".byte 0xf2 ; lock ; xchg {%0, %1|%1, %0}"
698 : "+r" (_Value), "+m" (*_Target) :: "memory");
699 return _Value;
700}
701static __inline__ __int64 __DEFAULT_FN_ATTRS
702_InterlockedExchange64_HLERelease(__int64 volatile *_Target, __int64 _Value) {
703 __asm__ __volatile__(".byte 0xf3 ; lock ; xchg {%0, %1|%1, %0}"
704 : "+r" (_Value), "+m" (*_Target) :: "memory");
705 return _Value;
706}
707#endif
708/*----------------------------------------------------------------------------*\
709|* Interlocked Compare Exchange HLE
710\*----------------------------------------------------------------------------*/
711#if defined(__i386__) || defined(__x86_64__)
712static __inline__ long __DEFAULT_FN_ATTRS
713_InterlockedCompareExchange_HLEAcquire(long volatile *_Destination,
714 long _Exchange, long _Comparand) {
715 __asm__ __volatile__(".byte 0xf2 ; lock ; cmpxchg {%2, %1|%1, %2}"
716 : "+a" (_Comparand), "+m" (*_Destination)
717 : "r" (_Exchange) : "memory");
718 return _Comparand;
719}
720static __inline__ long __DEFAULT_FN_ATTRS
721_InterlockedCompareExchange_HLERelease(long volatile *_Destination,
722 long _Exchange, long _Comparand) {
723 __asm__ __volatile__(".byte 0xf3 ; lock ; cmpxchg {%2, %1|%1, %2}"
724 : "+a" (_Comparand), "+m" (*_Destination)
725 : "r" (_Exchange) : "memory");
726 return _Comparand;
727}
728#endif
729#if defined(__x86_64__)
730static __inline__ __int64 __DEFAULT_FN_ATTRS
731_InterlockedCompareExchange64_HLEAcquire(__int64 volatile *_Destination,
732 __int64 _Exchange, __int64 _Comparand) {
733 __asm__ __volatile__(".byte 0xf2 ; lock ; cmpxchg {%2, %1|%1, %2}"
734 : "+a" (_Comparand), "+m" (*_Destination)
735 : "r" (_Exchange) : "memory");
736 return _Comparand;
737}
738static __inline__ __int64 __DEFAULT_FN_ATTRS
739_InterlockedCompareExchange64_HLERelease(__int64 volatile *_Destination,
740 __int64 _Exchange, __int64 _Comparand) {
741 __asm__ __volatile__(".byte 0xf3 ; lock ; cmpxchg {%2, %1|%1, %2}"
742 : "+a" (_Comparand), "+m" (*_Destination)
743 : "r" (_Exchange) : "memory");
744 return _Comparand;
745}
746#endif
747#ifdef __cplusplus
748}
749#endif
750
751#undef __DEFAULT_FN_ATTRS
752
753#endif /* defined(_MSC_VER) && __has_extension(gnu_asm) */
754
755#endif /* __IMMINTRIN_H */
_Float16 __2f16 __attribute__((ext_vector_type(2)))
Zeroes the upper 128 bits (bits 255:128) of all YMM registers.
#define __DEFAULT_FN_ATTRS
static __inline__ uint32_t volatile uint32_t * __p
Definition: arm_acle.h:80
static __inline__ void short __D
Definition: immintrin.h:476
struct __storeu_i16 *__P __v
Definition: immintrin.h:480
__inline unsigned int unsigned int unsigned int * __P
Definition: bmi2intrin.h:25