clang 20.0.0git
immintrin.h
Go to the documentation of this file.
1/*===---- immintrin.h - Intel intrinsics -----------------------------------===
2 *
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 *
7 *===-----------------------------------------------------------------------===
8 */
9
10#ifndef __IMMINTRIN_H
11#define __IMMINTRIN_H
12
13#if !defined(__i386__) && !defined(__x86_64__)
14#error "This header is only meant to be used on x86 and x64 architecture"
15#endif
16
17#include <x86gprintrin.h>
18
19#if !defined(__SCE__) || __has_feature(modules) || defined(__MMX__)
20#include <mmintrin.h>
21#endif
22
23#if !defined(__SCE__) || __has_feature(modules) || defined(__SSE__)
24#include <xmmintrin.h>
25#endif
26
27#if !defined(__SCE__) || __has_feature(modules) || defined(__SSE2__)
28#include <emmintrin.h>
29#endif
30
31#if !defined(__SCE__) || __has_feature(modules) || defined(__SSE3__)
32#include <pmmintrin.h>
33#endif
34
35#if !defined(__SCE__) || __has_feature(modules) || defined(__SSSE3__)
36#include <tmmintrin.h>
37#endif
38
39#if !defined(__SCE__) || __has_feature(modules) || \
40 (defined(__SSE4_2__) || defined(__SSE4_1__))
41#include <smmintrin.h>
42#endif
43
44#if !defined(__SCE__) || __has_feature(modules) || \
45 (defined(__AES__) || defined(__PCLMUL__))
46#include <wmmintrin.h>
47#endif
48
49#if !defined(__SCE__) || __has_feature(modules) || defined(__CLFLUSHOPT__)
50#include <clflushoptintrin.h>
51#endif
52
53#if !defined(__SCE__) || __has_feature(modules) || defined(__CLWB__)
54#include <clwbintrin.h>
55#endif
56
57#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX__)
58#include <avxintrin.h>
59#endif
60
61#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX2__)
62#include <avx2intrin.h>
63#endif
64
65#if !defined(__SCE__) || __has_feature(modules) || defined(__F16C__)
66#include <f16cintrin.h>
67#endif
68
69/* No feature check desired due to internal checks */
70#include <bmiintrin.h>
71
72#if !defined(__SCE__) || __has_feature(modules) || defined(__BMI2__)
73#include <bmi2intrin.h>
74#endif
75
76#if !defined(__SCE__) || __has_feature(modules) || defined(__LZCNT__)
77#include <lzcntintrin.h>
78#endif
79
80#if !defined(__SCE__) || __has_feature(modules) || defined(__POPCNT__)
81#include <popcntintrin.h>
82#endif
83
84#if !defined(__SCE__) || __has_feature(modules) || defined(__FMA__)
85#include <fmaintrin.h>
86#endif
87
88#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512F__)
89#include <avx512fintrin.h>
90#endif
91
92#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512VL__)
93#include <avx512vlintrin.h>
94#endif
95
96#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512BW__)
97#include <avx512bwintrin.h>
98#endif
99
100#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512BITALG__)
101#include <avx512bitalgintrin.h>
102#endif
103
104#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512CD__)
105#include <avx512cdintrin.h>
106#endif
107
108#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512VPOPCNTDQ__)
110#endif
111
112#if !defined(__SCE__) || __has_feature(modules) || \
113 (defined(__AVX512VL__) && defined(__AVX512VPOPCNTDQ__))
115#endif
116
117#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512VNNI__)
118#include <avx512vnniintrin.h>
119#endif
120
121#if !defined(__SCE__) || __has_feature(modules) || \
122 (defined(__AVX512VL__) && defined(__AVX512VNNI__))
123#include <avx512vlvnniintrin.h>
124#endif
125
126#if !defined(__SCE__) || __has_feature(modules) || defined(__AVXVNNI__)
127#include <avxvnniintrin.h>
128#endif
129
130#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512DQ__)
131#include <avx512dqintrin.h>
132#endif
133
134#if !defined(__SCE__) || __has_feature(modules) || \
135 (defined(__AVX512VL__) && defined(__AVX512BITALG__))
136#include <avx512vlbitalgintrin.h>
137#endif
138
139#if !defined(__SCE__) || __has_feature(modules) || \
140 (defined(__AVX512VL__) && defined(__AVX512BW__))
141#include <avx512vlbwintrin.h>
142#endif
143
144#if !defined(__SCE__) || __has_feature(modules) || \
145 (defined(__AVX512VL__) && defined(__AVX512CD__))
146#include <avx512vlcdintrin.h>
147#endif
148
149#if !defined(__SCE__) || __has_feature(modules) || \
150 (defined(__AVX512VL__) && defined(__AVX512DQ__))
151#include <avx512vldqintrin.h>
152#endif
153
154#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512IFMA__)
155#include <avx512ifmaintrin.h>
156#endif
157
158#if !defined(__SCE__) || __has_feature(modules) || \
159 (defined(__AVX512IFMA__) && defined(__AVX512VL__))
160#include <avx512ifmavlintrin.h>
161#endif
162
163#if !defined(__SCE__) || __has_feature(modules) || defined(__AVXIFMA__)
164#include <avxifmaintrin.h>
165#endif
166
167#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512VBMI__)
168#include <avx512vbmiintrin.h>
169#endif
170
171#if !defined(__SCE__) || __has_feature(modules) || \
172 (defined(__AVX512VBMI__) && defined(__AVX512VL__))
173#include <avx512vbmivlintrin.h>
174#endif
175
176#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512VBMI2__)
177#include <avx512vbmi2intrin.h>
178#endif
179
180#if !defined(__SCE__) || __has_feature(modules) || \
181 (defined(__AVX512VBMI2__) && defined(__AVX512VL__))
182#include <avx512vlvbmi2intrin.h>
183#endif
184
185#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512FP16__)
186#include <avx512fp16intrin.h>
187#endif
188
189#if !defined(__SCE__) || __has_feature(modules) || \
190 (defined(__AVX512VL__) && defined(__AVX512FP16__))
191#include <avx512vlfp16intrin.h>
192#endif
193
194#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512BF16__)
195#include <avx512bf16intrin.h>
196#endif
197
198#if !defined(__SCE__) || __has_feature(modules) || \
199 (defined(__AVX512VL__) && defined(__AVX512BF16__))
200#include <avx512vlbf16intrin.h>
201#endif
202
203#if !defined(__SCE__) || __has_feature(modules) || defined(__PKU__)
204#include <pkuintrin.h>
205#endif
206
207#if !defined(__SCE__) || __has_feature(modules) || defined(__VPCLMULQDQ__)
208#include <vpclmulqdqintrin.h>
209#endif
210
211#if !defined(__SCE__) || __has_feature(modules) || defined(__VAES__)
212#include <vaesintrin.h>
213#endif
214
215#if !defined(__SCE__) || __has_feature(modules) || defined(__GFNI__)
216#include <gfniintrin.h>
217#endif
218
219#if !defined(__SCE__) || __has_feature(modules) || defined(__AVXVNNIINT8__)
220#include <avxvnniint8intrin.h>
221#endif
222
223#if !defined(__SCE__) || __has_feature(modules) || defined(__AVXNECONVERT__)
224#include <avxneconvertintrin.h>
225#endif
226
227#if !defined(__SCE__) || __has_feature(modules) || defined(__SHA512__)
228#include <sha512intrin.h>
229#endif
230
231#if !defined(__SCE__) || __has_feature(modules) || defined(__SM3__)
232#include <sm3intrin.h>
233#endif
234
235#if !defined(__SCE__) || __has_feature(modules) || defined(__SM4__)
236#include <sm4intrin.h>
237#endif
238
239#if !defined(__SCE__) || __has_feature(modules) || defined(__AVXVNNIINT16__)
240#include <avxvnniint16intrin.h>
241#endif
242
243#if !defined(__SCE__) || __has_feature(modules) || defined(__RDPID__)
244/// Reads the value of the IA32_TSC_AUX MSR (0xc0000103).
245///
246/// \headerfile <immintrin.h>
247///
248/// This intrinsic corresponds to the <c> RDPID </c> instruction.
249///
250/// \returns The 32-bit contents of the MSR.
251static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("rdpid")))
252_rdpid_u32(void) {
253 return __builtin_ia32_rdpid();
254}
255#endif // __RDPID__
256
257#if !defined(__SCE__) || __has_feature(modules) || defined(__RDRND__)
258/// Returns a 16-bit hardware-generated random value.
259///
260/// \headerfile <immintrin.h>
261///
262/// This intrinsic corresponds to the <c> RDRAND </c> instruction.
263///
264/// \param __p
265/// A pointer to a 16-bit memory location to place the random value.
266/// \returns 1 if the value was successfully generated, 0 otherwise.
267static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
268_rdrand16_step(unsigned short *__p)
269{
270 return (int)__builtin_ia32_rdrand16_step(__p);
271}
272
273/// Returns a 32-bit hardware-generated random value.
274///
275/// \headerfile <immintrin.h>
276///
277/// This intrinsic corresponds to the <c> RDRAND </c> instruction.
278///
279/// \param __p
280/// A pointer to a 32-bit memory location to place the random value.
281/// \returns 1 if the value was successfully generated, 0 otherwise.
282static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
283_rdrand32_step(unsigned int *__p)
284{
285 return (int)__builtin_ia32_rdrand32_step(__p);
286}
287
288/// Returns a 64-bit hardware-generated random value.
289///
290/// \headerfile <immintrin.h>
291///
292/// This intrinsic corresponds to the <c> RDRAND </c> instruction.
293///
294/// \param __p
295/// A pointer to a 64-bit memory location to place the random value.
296/// \returns 1 if the value was successfully generated, 0 otherwise.
297static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
298_rdrand64_step(unsigned long long *__p)
299{
300#ifdef __x86_64__
301 return (int)__builtin_ia32_rdrand64_step(__p);
302#else
303 // We need to emulate the functionality of 64-bit rdrand with 2 32-bit
304 // rdrand instructions.
305 unsigned int __lo, __hi;
306 unsigned int __res_lo = __builtin_ia32_rdrand32_step(&__lo);
307 unsigned int __res_hi = __builtin_ia32_rdrand32_step(&__hi);
308 if (__res_lo && __res_hi) {
309 *__p = ((unsigned long long)__hi << 32) | (unsigned long long)__lo;
310 return 1;
311 } else {
312 *__p = 0;
313 return 0;
314 }
315#endif
316}
317#endif /* __RDRND__ */
318
319#if !defined(__SCE__) || __has_feature(modules) || defined(__FSGSBASE__)
320#ifdef __x86_64__
321/// Reads the FS base register.
322///
323/// \headerfile <immintrin.h>
324///
325/// This intrinsic corresponds to the <c> RDFSBASE </c> instruction.
326///
327/// \returns The lower 32 bits of the FS base register.
328static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
329_readfsbase_u32(void)
330{
331 return __builtin_ia32_rdfsbase32();
332}
333
334/// Reads the FS base register.
335///
336/// \headerfile <immintrin.h>
337///
338/// This intrinsic corresponds to the <c> RDFSBASE </c> instruction.
339///
340/// \returns The contents of the FS base register.
341static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
342_readfsbase_u64(void)
343{
344 return __builtin_ia32_rdfsbase64();
345}
346
347/// Reads the GS base register.
348///
349/// \headerfile <immintrin.h>
350///
351/// This intrinsic corresponds to the <c> RDGSBASE </c> instruction.
352///
353/// \returns The lower 32 bits of the GS base register.
354static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
355_readgsbase_u32(void)
356{
357 return __builtin_ia32_rdgsbase32();
358}
359
360/// Reads the GS base register.
361///
362/// \headerfile <immintrin.h>
363///
364/// This intrinsic corresponds to the <c> RDGSBASE </c> instruction.
365///
366/// \returns The contents of the GS base register.
367static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
368_readgsbase_u64(void)
369{
370 return __builtin_ia32_rdgsbase64();
371}
372
373/// Modifies the FS base register.
374///
375/// \headerfile <immintrin.h>
376///
377/// This intrinsic corresponds to the <c> WRFSBASE </c> instruction.
378///
379/// \param __V
380/// Value to use for the lower 32 bits of the FS base register.
381static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
382_writefsbase_u32(unsigned int __V)
383{
384 __builtin_ia32_wrfsbase32(__V);
385}
386
387/// Modifies the FS base register.
388///
389/// \headerfile <immintrin.h>
390///
391/// This intrinsic corresponds to the <c> WRFSBASE </c> instruction.
392///
393/// \param __V
394/// Value to use for the FS base register.
395static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
396_writefsbase_u64(unsigned long long __V)
397{
398 __builtin_ia32_wrfsbase64(__V);
399}
400
401/// Modifies the GS base register.
402///
403/// \headerfile <immintrin.h>
404///
405/// This intrinsic corresponds to the <c> WRGSBASE </c> instruction.
406///
407/// \param __V
408/// Value to use for the lower 32 bits of the GS base register.
409static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
410_writegsbase_u32(unsigned int __V)
411{
412 __builtin_ia32_wrgsbase32(__V);
413}
414
415/// Modifies the GS base register.
416///
417/// \headerfile <immintrin.h>
418///
419/// This intrinsic corresponds to the <c> WRFSBASE </c> instruction.
420///
421/// \param __V
422/// Value to use for GS base register.
423static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
424_writegsbase_u64(unsigned long long __V)
425{
426 __builtin_ia32_wrgsbase64(__V);
427}
428
429#endif
430#endif /* __FSGSBASE__ */
431
432#if !defined(__SCE__) || __has_feature(modules) || defined(__MOVBE__)
433
434/* The structs used below are to force the load/store to be unaligned. This
435 * is accomplished with the __packed__ attribute. The __may_alias__ prevents
436 * tbaa metadata from being generated based on the struct and the type of the
437 * field inside of it.
438 */
439
440/// Load a 16-bit value from memory and swap its bytes.
441///
442/// \headerfile <x86intrin.h>
443///
444/// This intrinsic corresponds to the MOVBE instruction.
445///
446/// \param __P
447/// A pointer to the 16-bit value to load.
448/// \returns The byte-swapped value.
449static __inline__ short __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
450_loadbe_i16(void const * __P) {
451 struct __loadu_i16 {
452 unsigned short __v;
453 } __attribute__((__packed__, __may_alias__));
454 return (short)__builtin_bswap16(((const struct __loadu_i16*)__P)->__v);
455}
456
457/// Swap the bytes of a 16-bit value and store it to memory.
458///
459/// \headerfile <x86intrin.h>
460///
461/// This intrinsic corresponds to the MOVBE instruction.
462///
463/// \param __P
464/// A pointer to the memory for storing the swapped value.
465/// \param __D
466/// The 16-bit value to be byte-swapped.
467static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
468_storebe_i16(void * __P, short __D) {
469 struct __storeu_i16 {
470 unsigned short __v;
471 } __attribute__((__packed__, __may_alias__));
472 ((struct __storeu_i16*)__P)->__v = __builtin_bswap16((unsigned short)__D);
473}
474
475/// Load a 32-bit value from memory and swap its bytes.
476///
477/// \headerfile <x86intrin.h>
478///
479/// This intrinsic corresponds to the MOVBE instruction.
480///
481/// \param __P
482/// A pointer to the 32-bit value to load.
483/// \returns The byte-swapped value.
484static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
485_loadbe_i32(void const * __P) {
486 struct __loadu_i32 {
487 unsigned int __v;
488 } __attribute__((__packed__, __may_alias__));
489 return (int)__builtin_bswap32(((const struct __loadu_i32*)__P)->__v);
490}
491
492/// Swap the bytes of a 32-bit value and store it to memory.
493///
494/// \headerfile <x86intrin.h>
495///
496/// This intrinsic corresponds to the MOVBE instruction.
497///
498/// \param __P
499/// A pointer to the memory for storing the swapped value.
500/// \param __D
501/// The 32-bit value to be byte-swapped.
502static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
503_storebe_i32(void * __P, int __D) {
504 struct __storeu_i32 {
505 unsigned int __v;
506 } __attribute__((__packed__, __may_alias__));
507 ((struct __storeu_i32*)__P)->__v = __builtin_bswap32((unsigned int)__D);
508}
509
510#ifdef __x86_64__
511/// Load a 64-bit value from memory and swap its bytes.
512///
513/// \headerfile <x86intrin.h>
514///
515/// This intrinsic corresponds to the MOVBE instruction.
516///
517/// \param __P
518/// A pointer to the 64-bit value to load.
519/// \returns The byte-swapped value.
520static __inline__ long long __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
521_loadbe_i64(void const * __P) {
522 struct __loadu_i64 {
523 unsigned long long __v;
524 } __attribute__((__packed__, __may_alias__));
525 return (long long)__builtin_bswap64(((const struct __loadu_i64*)__P)->__v);
526}
527
528/// Swap the bytes of a 64-bit value and store it to memory.
529///
530/// \headerfile <x86intrin.h>
531///
532/// This intrinsic corresponds to the MOVBE instruction.
533///
534/// \param __P
535/// A pointer to the memory for storing the swapped value.
536/// \param __D
537/// The 64-bit value to be byte-swapped.
538static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
539_storebe_i64(void * __P, long long __D) {
540 struct __storeu_i64 {
541 unsigned long long __v;
542 } __attribute__((__packed__, __may_alias__));
543 ((struct __storeu_i64*)__P)->__v = __builtin_bswap64((unsigned long long)__D);
544}
545#endif
546#endif /* __MOVBE */
547
548#if !defined(__SCE__) || __has_feature(modules) || defined(__RTM__)
549#include <rtmintrin.h>
550#include <xtestintrin.h>
551#endif
552
553#if !defined(__SCE__) || __has_feature(modules) || defined(__SHA__)
554#include <shaintrin.h>
555#endif
556
557#if !defined(__SCE__) || __has_feature(modules) || defined(__FXSR__)
558#include <fxsrintrin.h>
559#endif
560
561/* No feature check desired due to internal MSC_VER checks */
562#include <xsaveintrin.h>
563
564#if !defined(__SCE__) || __has_feature(modules) || defined(__XSAVEOPT__)
565#include <xsaveoptintrin.h>
566#endif
567
568#if !defined(__SCE__) || __has_feature(modules) || defined(__XSAVEC__)
569#include <xsavecintrin.h>
570#endif
571
572#if !defined(__SCE__) || __has_feature(modules) || defined(__XSAVES__)
573#include <xsavesintrin.h>
574#endif
575
576#if !defined(__SCE__) || __has_feature(modules) || defined(__SHSTK__)
577#include <cetintrin.h>
578#endif
579
580/* Intrinsics inside adcintrin.h are available at all times. */
581#include <adcintrin.h>
582
583#if !defined(__SCE__) || __has_feature(modules) || defined(__ADX__)
584#include <adxintrin.h>
585#endif
586
587#if !defined(__SCE__) || __has_feature(modules) || defined(__RDSEED__)
588#include <rdseedintrin.h>
589#endif
590
591#if !defined(__SCE__) || __has_feature(modules) || defined(__WBNOINVD__)
592#include <wbnoinvdintrin.h>
593#endif
594
595#if !defined(__SCE__) || __has_feature(modules) || defined(__CLDEMOTE__)
596#include <cldemoteintrin.h>
597#endif
598
599#if !defined(__SCE__) || __has_feature(modules) || defined(__WAITPKG__)
600#include <waitpkgintrin.h>
601#endif
602
603#if !defined(__SCE__) || __has_feature(modules) || defined(__MOVDIRI__) || \
604 defined(__MOVDIR64B__)
605#include <movdirintrin.h>
606#endif
607
608#if !defined(__SCE__) || __has_feature(modules) || defined(__PCONFIG__)
609#include <pconfigintrin.h>
610#endif
611
612#if !defined(__SCE__) || __has_feature(modules) || defined(__SGX__)
613#include <sgxintrin.h>
614#endif
615
616#if !defined(__SCE__) || __has_feature(modules) || defined(__PTWRITE__)
617#include <ptwriteintrin.h>
618#endif
619
620#if !defined(__SCE__) || __has_feature(modules) || defined(__INVPCID__)
621#include <invpcidintrin.h>
622#endif
623#if !defined(__SCE__) || __has_feature(modules) || defined(__AMX_FP16__)
624#include <amxfp16intrin.h>
625#endif
626
627#if !defined(__SCE__) || __has_feature(modules) || defined(__KL__) || \
628 defined(__WIDEKL__)
629#include <keylockerintrin.h>
630#endif
631
632#if !defined(__SCE__) || __has_feature(modules) || defined(__AMX_TILE__) || \
633 defined(__AMX_INT8__) || defined(__AMX_BF16__)
634#include <amxintrin.h>
635#endif
636
637#if !defined(__SCE__) || __has_feature(modules) || defined(__AMX_COMPLEX__)
638#include <amxcomplexintrin.h>
639#endif
640
641#if !defined(__SCE__) || __has_feature(modules) || \
642 defined(__AVX512VP2INTERSECT__)
644#endif
645
646#if !defined(__SCE__) || __has_feature(modules) || \
647 (defined(__AVX512VL__) && defined(__AVX512VP2INTERSECT__))
649#endif
650
651#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX10_2__)
652#include <avx10_2minmaxintrin.h>
653#include <avx10_2niintrin.h>
654#include <avx10_2satcvtintrin.h>
655#endif
656
657#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX10_2_512__)
659#include <avx10_2_512niintrin.h>
661#endif
662
663#if !defined(__SCE__) || __has_feature(modules) || defined(__ENQCMD__)
664#include <enqcmdintrin.h>
665#endif
666
667#if !defined(__SCE__) || __has_feature(modules) || defined(__SERIALIZE__)
668#include <serializeintrin.h>
669#endif
670
671#if !defined(__SCE__) || __has_feature(modules) || defined(__TSXLDTRK__)
672#include <tsxldtrkintrin.h>
673#endif
674
675#if defined(_MSC_VER) && __has_extension(gnu_asm)
676/* Define the default attributes for these intrinsics */
677#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
678#ifdef __cplusplus
679extern "C" {
680#endif
681/*----------------------------------------------------------------------------*\
682|* Interlocked Exchange HLE
683\*----------------------------------------------------------------------------*/
684#if defined(__i386__) || defined(__x86_64__)
685static __inline__ long __DEFAULT_FN_ATTRS
686_InterlockedExchange_HLEAcquire(long volatile *_Target, long _Value) {
687 __asm__ __volatile__(".byte 0xf2 ; lock ; xchg {%0, %1|%1, %0}"
688 : "+r" (_Value), "+m" (*_Target) :: "memory");
689 return _Value;
690}
691static __inline__ long __DEFAULT_FN_ATTRS
692_InterlockedExchange_HLERelease(long volatile *_Target, long _Value) {
693 __asm__ __volatile__(".byte 0xf3 ; lock ; xchg {%0, %1|%1, %0}"
694 : "+r" (_Value), "+m" (*_Target) :: "memory");
695 return _Value;
696}
697#endif
698#if defined(__x86_64__)
699static __inline__ __int64 __DEFAULT_FN_ATTRS
700_InterlockedExchange64_HLEAcquire(__int64 volatile *_Target, __int64 _Value) {
701 __asm__ __volatile__(".byte 0xf2 ; lock ; xchg {%0, %1|%1, %0}"
702 : "+r" (_Value), "+m" (*_Target) :: "memory");
703 return _Value;
704}
705static __inline__ __int64 __DEFAULT_FN_ATTRS
706_InterlockedExchange64_HLERelease(__int64 volatile *_Target, __int64 _Value) {
707 __asm__ __volatile__(".byte 0xf3 ; lock ; xchg {%0, %1|%1, %0}"
708 : "+r" (_Value), "+m" (*_Target) :: "memory");
709 return _Value;
710}
711#endif
712/*----------------------------------------------------------------------------*\
713|* Interlocked Compare Exchange HLE
714\*----------------------------------------------------------------------------*/
715#if defined(__i386__) || defined(__x86_64__)
716static __inline__ long __DEFAULT_FN_ATTRS
717_InterlockedCompareExchange_HLEAcquire(long volatile *_Destination,
718 long _Exchange, long _Comparand) {
719 __asm__ __volatile__(".byte 0xf2 ; lock ; cmpxchg {%2, %1|%1, %2}"
720 : "+a" (_Comparand), "+m" (*_Destination)
721 : "r" (_Exchange) : "memory");
722 return _Comparand;
723}
724static __inline__ long __DEFAULT_FN_ATTRS
725_InterlockedCompareExchange_HLERelease(long volatile *_Destination,
726 long _Exchange, long _Comparand) {
727 __asm__ __volatile__(".byte 0xf3 ; lock ; cmpxchg {%2, %1|%1, %2}"
728 : "+a" (_Comparand), "+m" (*_Destination)
729 : "r" (_Exchange) : "memory");
730 return _Comparand;
731}
732#endif
733#if defined(__x86_64__)
734static __inline__ __int64 __DEFAULT_FN_ATTRS
735_InterlockedCompareExchange64_HLEAcquire(__int64 volatile *_Destination,
736 __int64 _Exchange, __int64 _Comparand) {
737 __asm__ __volatile__(".byte 0xf2 ; lock ; cmpxchg {%2, %1|%1, %2}"
738 : "+a" (_Comparand), "+m" (*_Destination)
739 : "r" (_Exchange) : "memory");
740 return _Comparand;
741}
742static __inline__ __int64 __DEFAULT_FN_ATTRS
743_InterlockedCompareExchange64_HLERelease(__int64 volatile *_Destination,
744 __int64 _Exchange, __int64 _Comparand) {
745 __asm__ __volatile__(".byte 0xf3 ; lock ; cmpxchg {%2, %1|%1, %2}"
746 : "+a" (_Comparand), "+m" (*_Destination)
747 : "r" (_Exchange) : "memory");
748 return _Comparand;
749}
750#endif
751#ifdef __cplusplus
752}
753#endif
754
755#undef __DEFAULT_FN_ATTRS
756
757#endif /* defined(_MSC_VER) && __has_extension(gnu_asm) */
758
759#endif /* __IMMINTRIN_H */
_Float16 __2f16 __attribute__((ext_vector_type(2)))
Zeroes the upper 128 bits (bits 255:128) of all YMM registers.
#define __DEFAULT_FN_ATTRS
static __inline__ uint32_t volatile uint32_t * __p
Definition: arm_acle.h:88
static __inline__ void short __D
Definition: immintrin.h:468
struct __storeu_i16 *__P __v
Definition: immintrin.h:472
__inline unsigned int unsigned int unsigned int * __P
Definition: bmi2intrin.h:25