clang 20.0.0git
avx512vldqintrin.h
Go to the documentation of this file.
1/*===---- avx512vldqintrin.h - AVX512VL and AVX512DQ intrinsics ------------===
2 *
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 *
7 *===-----------------------------------------------------------------------===
8 */
9
10#ifndef __IMMINTRIN_H
11#error "Never use <avx512vldqintrin.h> directly; include <immintrin.h> instead."
12#endif
13
14#ifndef __AVX512VLDQINTRIN_H
15#define __AVX512VLDQINTRIN_H
16
17/* Define the default attributes for the functions in this file. */
18#define __DEFAULT_FN_ATTRS128 \
19 __attribute__((__always_inline__, __nodebug__, \
20 __target__("avx512vl,avx512dq,no-evex512"), \
21 __min_vector_width__(128)))
22#define __DEFAULT_FN_ATTRS256 \
23 __attribute__((__always_inline__, __nodebug__, \
24 __target__("avx512vl,avx512dq,no-evex512"), \
25 __min_vector_width__(256)))
26
27static __inline__ __m256i __DEFAULT_FN_ATTRS256
28_mm256_mullo_epi64 (__m256i __A, __m256i __B) {
29 return (__m256i) ((__v4du) __A * (__v4du) __B);
30}
31
32static __inline__ __m256i __DEFAULT_FN_ATTRS256
33_mm256_mask_mullo_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) {
34 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
35 (__v4di)_mm256_mullo_epi64(__A, __B),
36 (__v4di)__W);
37}
38
39static __inline__ __m256i __DEFAULT_FN_ATTRS256
40_mm256_maskz_mullo_epi64(__mmask8 __U, __m256i __A, __m256i __B) {
41 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
42 (__v4di)_mm256_mullo_epi64(__A, __B),
43 (__v4di)_mm256_setzero_si256());
44}
45
46static __inline__ __m128i __DEFAULT_FN_ATTRS128
47_mm_mullo_epi64 (__m128i __A, __m128i __B) {
48 return (__m128i) ((__v2du) __A * (__v2du) __B);
49}
50
51static __inline__ __m128i __DEFAULT_FN_ATTRS128
52_mm_mask_mullo_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
53 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
54 (__v2di)_mm_mullo_epi64(__A, __B),
55 (__v2di)__W);
56}
57
58static __inline__ __m128i __DEFAULT_FN_ATTRS128
59_mm_maskz_mullo_epi64(__mmask8 __U, __m128i __A, __m128i __B) {
60 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
61 (__v2di)_mm_mullo_epi64(__A, __B),
62 (__v2di)_mm_setzero_si128());
63}
64
65static __inline__ __m256d __DEFAULT_FN_ATTRS256
66_mm256_mask_andnot_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
67 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
68 (__v4df)_mm256_andnot_pd(__A, __B),
69 (__v4df)__W);
70}
71
72static __inline__ __m256d __DEFAULT_FN_ATTRS256
73_mm256_maskz_andnot_pd(__mmask8 __U, __m256d __A, __m256d __B) {
74 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
75 (__v4df)_mm256_andnot_pd(__A, __B),
76 (__v4df)_mm256_setzero_pd());
77}
78
79static __inline__ __m128d __DEFAULT_FN_ATTRS128
80_mm_mask_andnot_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
81 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
82 (__v2df)_mm_andnot_pd(__A, __B),
83 (__v2df)__W);
84}
85
86static __inline__ __m128d __DEFAULT_FN_ATTRS128
87_mm_maskz_andnot_pd(__mmask8 __U, __m128d __A, __m128d __B) {
88 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
89 (__v2df)_mm_andnot_pd(__A, __B),
90 (__v2df)_mm_setzero_pd());
91}
92
93static __inline__ __m256 __DEFAULT_FN_ATTRS256
94_mm256_mask_andnot_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
95 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
96 (__v8sf)_mm256_andnot_ps(__A, __B),
97 (__v8sf)__W);
98}
99
100static __inline__ __m256 __DEFAULT_FN_ATTRS256
101_mm256_maskz_andnot_ps(__mmask8 __U, __m256 __A, __m256 __B) {
102 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
103 (__v8sf)_mm256_andnot_ps(__A, __B),
104 (__v8sf)_mm256_setzero_ps());
105}
106
107static __inline__ __m128 __DEFAULT_FN_ATTRS128
108_mm_mask_andnot_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
109 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
110 (__v4sf)_mm_andnot_ps(__A, __B),
111 (__v4sf)__W);
112}
113
114static __inline__ __m128 __DEFAULT_FN_ATTRS128
115_mm_maskz_andnot_ps(__mmask8 __U, __m128 __A, __m128 __B) {
116 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
117 (__v4sf)_mm_andnot_ps(__A, __B),
118 (__v4sf)_mm_setzero_ps());
119}
120
121static __inline__ __m256d __DEFAULT_FN_ATTRS256
122_mm256_mask_and_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
123 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
124 (__v4df)_mm256_and_pd(__A, __B),
125 (__v4df)__W);
126}
127
128static __inline__ __m256d __DEFAULT_FN_ATTRS256
129_mm256_maskz_and_pd(__mmask8 __U, __m256d __A, __m256d __B) {
130 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
131 (__v4df)_mm256_and_pd(__A, __B),
132 (__v4df)_mm256_setzero_pd());
133}
134
135static __inline__ __m128d __DEFAULT_FN_ATTRS128
136_mm_mask_and_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
137 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
138 (__v2df)_mm_and_pd(__A, __B),
139 (__v2df)__W);
140}
141
142static __inline__ __m128d __DEFAULT_FN_ATTRS128
143_mm_maskz_and_pd(__mmask8 __U, __m128d __A, __m128d __B) {
144 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
145 (__v2df)_mm_and_pd(__A, __B),
146 (__v2df)_mm_setzero_pd());
147}
148
149static __inline__ __m256 __DEFAULT_FN_ATTRS256
150_mm256_mask_and_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
151 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
152 (__v8sf)_mm256_and_ps(__A, __B),
153 (__v8sf)__W);
154}
155
156static __inline__ __m256 __DEFAULT_FN_ATTRS256
157_mm256_maskz_and_ps(__mmask8 __U, __m256 __A, __m256 __B) {
158 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
159 (__v8sf)_mm256_and_ps(__A, __B),
160 (__v8sf)_mm256_setzero_ps());
161}
162
163static __inline__ __m128 __DEFAULT_FN_ATTRS128
164_mm_mask_and_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
165 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
166 (__v4sf)_mm_and_ps(__A, __B),
167 (__v4sf)__W);
168}
169
170static __inline__ __m128 __DEFAULT_FN_ATTRS128
171_mm_maskz_and_ps(__mmask8 __U, __m128 __A, __m128 __B) {
172 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
173 (__v4sf)_mm_and_ps(__A, __B),
174 (__v4sf)_mm_setzero_ps());
175}
176
177static __inline__ __m256d __DEFAULT_FN_ATTRS256
178_mm256_mask_xor_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
179 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
180 (__v4df)_mm256_xor_pd(__A, __B),
181 (__v4df)__W);
182}
183
184static __inline__ __m256d __DEFAULT_FN_ATTRS256
185_mm256_maskz_xor_pd(__mmask8 __U, __m256d __A, __m256d __B) {
186 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
187 (__v4df)_mm256_xor_pd(__A, __B),
188 (__v4df)_mm256_setzero_pd());
189}
190
191static __inline__ __m128d __DEFAULT_FN_ATTRS128
192_mm_mask_xor_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
193 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
194 (__v2df)_mm_xor_pd(__A, __B),
195 (__v2df)__W);
196}
197
198static __inline__ __m128d __DEFAULT_FN_ATTRS128
199_mm_maskz_xor_pd (__mmask8 __U, __m128d __A, __m128d __B) {
200 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
201 (__v2df)_mm_xor_pd(__A, __B),
202 (__v2df)_mm_setzero_pd());
203}
204
205static __inline__ __m256 __DEFAULT_FN_ATTRS256
206_mm256_mask_xor_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
207 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
208 (__v8sf)_mm256_xor_ps(__A, __B),
209 (__v8sf)__W);
210}
211
212static __inline__ __m256 __DEFAULT_FN_ATTRS256
213_mm256_maskz_xor_ps(__mmask8 __U, __m256 __A, __m256 __B) {
214 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
215 (__v8sf)_mm256_xor_ps(__A, __B),
216 (__v8sf)_mm256_setzero_ps());
217}
218
219static __inline__ __m128 __DEFAULT_FN_ATTRS128
220_mm_mask_xor_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
221 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
222 (__v4sf)_mm_xor_ps(__A, __B),
223 (__v4sf)__W);
224}
225
226static __inline__ __m128 __DEFAULT_FN_ATTRS128
227_mm_maskz_xor_ps(__mmask8 __U, __m128 __A, __m128 __B) {
228 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
229 (__v4sf)_mm_xor_ps(__A, __B),
230 (__v4sf)_mm_setzero_ps());
231}
232
233static __inline__ __m256d __DEFAULT_FN_ATTRS256
234_mm256_mask_or_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
235 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
236 (__v4df)_mm256_or_pd(__A, __B),
237 (__v4df)__W);
238}
239
240static __inline__ __m256d __DEFAULT_FN_ATTRS256
241_mm256_maskz_or_pd(__mmask8 __U, __m256d __A, __m256d __B) {
242 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
243 (__v4df)_mm256_or_pd(__A, __B),
244 (__v4df)_mm256_setzero_pd());
245}
246
247static __inline__ __m128d __DEFAULT_FN_ATTRS128
248_mm_mask_or_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
249 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
250 (__v2df)_mm_or_pd(__A, __B),
251 (__v2df)__W);
252}
253
254static __inline__ __m128d __DEFAULT_FN_ATTRS128
255_mm_maskz_or_pd(__mmask8 __U, __m128d __A, __m128d __B) {
256 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
257 (__v2df)_mm_or_pd(__A, __B),
258 (__v2df)_mm_setzero_pd());
259}
260
261static __inline__ __m256 __DEFAULT_FN_ATTRS256
262_mm256_mask_or_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
263 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
264 (__v8sf)_mm256_or_ps(__A, __B),
265 (__v8sf)__W);
266}
267
268static __inline__ __m256 __DEFAULT_FN_ATTRS256
269_mm256_maskz_or_ps(__mmask8 __U, __m256 __A, __m256 __B) {
270 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
271 (__v8sf)_mm256_or_ps(__A, __B),
272 (__v8sf)_mm256_setzero_ps());
273}
274
275static __inline__ __m128 __DEFAULT_FN_ATTRS128
276_mm_mask_or_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
277 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
278 (__v4sf)_mm_or_ps(__A, __B),
279 (__v4sf)__W);
280}
281
282static __inline__ __m128 __DEFAULT_FN_ATTRS128
283_mm_maskz_or_ps(__mmask8 __U, __m128 __A, __m128 __B) {
284 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
285 (__v4sf)_mm_or_ps(__A, __B),
286 (__v4sf)_mm_setzero_ps());
287}
288
289static __inline__ __m128i __DEFAULT_FN_ATTRS128
290_mm_cvtpd_epi64 (__m128d __A) {
291 return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A,
292 (__v2di) _mm_setzero_si128(),
293 (__mmask8) -1);
294}
295
296static __inline__ __m128i __DEFAULT_FN_ATTRS128
297_mm_mask_cvtpd_epi64 (__m128i __W, __mmask8 __U, __m128d __A) {
298 return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A,
299 (__v2di) __W,
300 (__mmask8) __U);
301}
302
303static __inline__ __m128i __DEFAULT_FN_ATTRS128
304_mm_maskz_cvtpd_epi64 (__mmask8 __U, __m128d __A) {
305 return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A,
306 (__v2di) _mm_setzero_si128(),
307 (__mmask8) __U);
308}
309
310static __inline__ __m256i __DEFAULT_FN_ATTRS256
311_mm256_cvtpd_epi64 (__m256d __A) {
312 return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A,
313 (__v4di) _mm256_setzero_si256(),
314 (__mmask8) -1);
315}
316
317static __inline__ __m256i __DEFAULT_FN_ATTRS256
318_mm256_mask_cvtpd_epi64 (__m256i __W, __mmask8 __U, __m256d __A) {
319 return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A,
320 (__v4di) __W,
321 (__mmask8) __U);
322}
323
324static __inline__ __m256i __DEFAULT_FN_ATTRS256
326 return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A,
327 (__v4di) _mm256_setzero_si256(),
328 (__mmask8) __U);
329}
330
331static __inline__ __m128i __DEFAULT_FN_ATTRS128
332_mm_cvtpd_epu64 (__m128d __A) {
333 return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A,
334 (__v2di) _mm_setzero_si128(),
335 (__mmask8) -1);
336}
337
338static __inline__ __m128i __DEFAULT_FN_ATTRS128
339_mm_mask_cvtpd_epu64 (__m128i __W, __mmask8 __U, __m128d __A) {
340 return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A,
341 (__v2di) __W,
342 (__mmask8) __U);
343}
344
345static __inline__ __m128i __DEFAULT_FN_ATTRS128
346_mm_maskz_cvtpd_epu64 (__mmask8 __U, __m128d __A) {
347 return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A,
348 (__v2di) _mm_setzero_si128(),
349 (__mmask8) __U);
350}
351
352static __inline__ __m256i __DEFAULT_FN_ATTRS256
353_mm256_cvtpd_epu64 (__m256d __A) {
354 return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A,
355 (__v4di) _mm256_setzero_si256(),
356 (__mmask8) -1);
357}
358
359static __inline__ __m256i __DEFAULT_FN_ATTRS256
360_mm256_mask_cvtpd_epu64 (__m256i __W, __mmask8 __U, __m256d __A) {
361 return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A,
362 (__v4di) __W,
363 (__mmask8) __U);
364}
365
366static __inline__ __m256i __DEFAULT_FN_ATTRS256
368 return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A,
369 (__v4di) _mm256_setzero_si256(),
370 (__mmask8) __U);
371}
372
373static __inline__ __m128i __DEFAULT_FN_ATTRS128
374_mm_cvtps_epi64 (__m128 __A) {
375 return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
376 (__v2di) _mm_setzero_si128(),
377 (__mmask8) -1);
378}
379
380static __inline__ __m128i __DEFAULT_FN_ATTRS128
381_mm_mask_cvtps_epi64 (__m128i __W, __mmask8 __U, __m128 __A) {
382 return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
383 (__v2di) __W,
384 (__mmask8) __U);
385}
386
387static __inline__ __m128i __DEFAULT_FN_ATTRS128
389 return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
390 (__v2di) _mm_setzero_si128(),
391 (__mmask8) __U);
392}
393
394static __inline__ __m256i __DEFAULT_FN_ATTRS256
395_mm256_cvtps_epi64 (__m128 __A) {
396 return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A,
397 (__v4di) _mm256_setzero_si256(),
398 (__mmask8) -1);
399}
400
401static __inline__ __m256i __DEFAULT_FN_ATTRS256
402_mm256_mask_cvtps_epi64 (__m256i __W, __mmask8 __U, __m128 __A) {
403 return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A,
404 (__v4di) __W,
405 (__mmask8) __U);
406}
407
408static __inline__ __m256i __DEFAULT_FN_ATTRS256
410 return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A,
411 (__v4di) _mm256_setzero_si256(),
412 (__mmask8) __U);
413}
414
415static __inline__ __m128i __DEFAULT_FN_ATTRS128
416_mm_cvtps_epu64 (__m128 __A) {
417 return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
418 (__v2di) _mm_setzero_si128(),
419 (__mmask8) -1);
420}
421
422static __inline__ __m128i __DEFAULT_FN_ATTRS128
423_mm_mask_cvtps_epu64 (__m128i __W, __mmask8 __U, __m128 __A) {
424 return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
425 (__v2di) __W,
426 (__mmask8) __U);
427}
428
429static __inline__ __m128i __DEFAULT_FN_ATTRS128
431 return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
432 (__v2di) _mm_setzero_si128(),
433 (__mmask8) __U);
434}
435
436static __inline__ __m256i __DEFAULT_FN_ATTRS256
437_mm256_cvtps_epu64 (__m128 __A) {
438 return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A,
439 (__v4di) _mm256_setzero_si256(),
440 (__mmask8) -1);
441}
442
443static __inline__ __m256i __DEFAULT_FN_ATTRS256
444_mm256_mask_cvtps_epu64 (__m256i __W, __mmask8 __U, __m128 __A) {
445 return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A,
446 (__v4di) __W,
447 (__mmask8) __U);
448}
449
450static __inline__ __m256i __DEFAULT_FN_ATTRS256
452 return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A,
453 (__v4di) _mm256_setzero_si256(),
454 (__mmask8) __U);
455}
456
457static __inline__ __m128d __DEFAULT_FN_ATTRS128
458_mm_cvtepi64_pd (__m128i __A) {
459 return (__m128d)__builtin_convertvector((__v2di)__A, __v2df);
460}
461
462static __inline__ __m128d __DEFAULT_FN_ATTRS128
463_mm_mask_cvtepi64_pd (__m128d __W, __mmask8 __U, __m128i __A) {
464 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
465 (__v2df)_mm_cvtepi64_pd(__A),
466 (__v2df)__W);
467}
468
469static __inline__ __m128d __DEFAULT_FN_ATTRS128
470_mm_maskz_cvtepi64_pd (__mmask8 __U, __m128i __A) {
471 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
472 (__v2df)_mm_cvtepi64_pd(__A),
473 (__v2df)_mm_setzero_pd());
474}
475
476static __inline__ __m256d __DEFAULT_FN_ATTRS256
477_mm256_cvtepi64_pd (__m256i __A) {
478 return (__m256d)__builtin_convertvector((__v4di)__A, __v4df);
479}
480
481static __inline__ __m256d __DEFAULT_FN_ATTRS256
482_mm256_mask_cvtepi64_pd (__m256d __W, __mmask8 __U, __m256i __A) {
483 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
484 (__v4df)_mm256_cvtepi64_pd(__A),
485 (__v4df)__W);
486}
487
488static __inline__ __m256d __DEFAULT_FN_ATTRS256
490 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
491 (__v4df)_mm256_cvtepi64_pd(__A),
492 (__v4df)_mm256_setzero_pd());
493}
494
495static __inline__ __m128 __DEFAULT_FN_ATTRS128
496_mm_cvtepi64_ps (__m128i __A) {
497 return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A,
498 (__v4sf) _mm_setzero_ps(),
499 (__mmask8) -1);
500}
501
502static __inline__ __m128 __DEFAULT_FN_ATTRS128
503_mm_mask_cvtepi64_ps (__m128 __W, __mmask8 __U, __m128i __A) {
504 return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A,
505 (__v4sf) __W,
506 (__mmask8) __U);
507}
508
509static __inline__ __m128 __DEFAULT_FN_ATTRS128
510_mm_maskz_cvtepi64_ps (__mmask8 __U, __m128i __A) {
511 return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A,
512 (__v4sf) _mm_setzero_ps(),
513 (__mmask8) __U);
514}
515
516static __inline__ __m128 __DEFAULT_FN_ATTRS256
517_mm256_cvtepi64_ps (__m256i __A) {
518 return (__m128)__builtin_convertvector((__v4di)__A, __v4sf);
519}
520
521static __inline__ __m128 __DEFAULT_FN_ATTRS256
522_mm256_mask_cvtepi64_ps (__m128 __W, __mmask8 __U, __m256i __A) {
523 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
524 (__v4sf)_mm256_cvtepi64_ps(__A),
525 (__v4sf)__W);
526}
527
528static __inline__ __m128 __DEFAULT_FN_ATTRS256
530 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
531 (__v4sf)_mm256_cvtepi64_ps(__A),
532 (__v4sf)_mm_setzero_ps());
533}
534
535static __inline__ __m128i __DEFAULT_FN_ATTRS128
536_mm_cvttpd_epi64 (__m128d __A) {
537 return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A,
538 (__v2di) _mm_setzero_si128(),
539 (__mmask8) -1);
540}
541
542static __inline__ __m128i __DEFAULT_FN_ATTRS128
543_mm_mask_cvttpd_epi64 (__m128i __W, __mmask8 __U, __m128d __A) {
544 return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A,
545 (__v2di) __W,
546 (__mmask8) __U);
547}
548
549static __inline__ __m128i __DEFAULT_FN_ATTRS128
551 return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A,
552 (__v2di) _mm_setzero_si128(),
553 (__mmask8) __U);
554}
555
556static __inline__ __m256i __DEFAULT_FN_ATTRS256
557_mm256_cvttpd_epi64 (__m256d __A) {
558 return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A,
559 (__v4di) _mm256_setzero_si256(),
560 (__mmask8) -1);
561}
562
563static __inline__ __m256i __DEFAULT_FN_ATTRS256
564_mm256_mask_cvttpd_epi64 (__m256i __W, __mmask8 __U, __m256d __A) {
565 return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A,
566 (__v4di) __W,
567 (__mmask8) __U);
568}
569
570static __inline__ __m256i __DEFAULT_FN_ATTRS256
572 return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A,
573 (__v4di) _mm256_setzero_si256(),
574 (__mmask8) __U);
575}
576
577static __inline__ __m128i __DEFAULT_FN_ATTRS128
578_mm_cvttpd_epu64 (__m128d __A) {
579 return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A,
580 (__v2di) _mm_setzero_si128(),
581 (__mmask8) -1);
582}
583
584static __inline__ __m128i __DEFAULT_FN_ATTRS128
585_mm_mask_cvttpd_epu64 (__m128i __W, __mmask8 __U, __m128d __A) {
586 return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A,
587 (__v2di) __W,
588 (__mmask8) __U);
589}
590
591static __inline__ __m128i __DEFAULT_FN_ATTRS128
593 return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A,
594 (__v2di) _mm_setzero_si128(),
595 (__mmask8) __U);
596}
597
598static __inline__ __m256i __DEFAULT_FN_ATTRS256
599_mm256_cvttpd_epu64 (__m256d __A) {
600 return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A,
601 (__v4di) _mm256_setzero_si256(),
602 (__mmask8) -1);
603}
604
605static __inline__ __m256i __DEFAULT_FN_ATTRS256
606_mm256_mask_cvttpd_epu64 (__m256i __W, __mmask8 __U, __m256d __A) {
607 return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A,
608 (__v4di) __W,
609 (__mmask8) __U);
610}
611
612static __inline__ __m256i __DEFAULT_FN_ATTRS256
614 return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A,
615 (__v4di) _mm256_setzero_si256(),
616 (__mmask8) __U);
617}
618
619static __inline__ __m128i __DEFAULT_FN_ATTRS128
620_mm_cvttps_epi64 (__m128 __A) {
621 return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
622 (__v2di) _mm_setzero_si128(),
623 (__mmask8) -1);
624}
625
626static __inline__ __m128i __DEFAULT_FN_ATTRS128
627_mm_mask_cvttps_epi64 (__m128i __W, __mmask8 __U, __m128 __A) {
628 return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
629 (__v2di) __W,
630 (__mmask8) __U);
631}
632
633static __inline__ __m128i __DEFAULT_FN_ATTRS128
635 return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
636 (__v2di) _mm_setzero_si128(),
637 (__mmask8) __U);
638}
639
640static __inline__ __m256i __DEFAULT_FN_ATTRS256
642 return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A,
643 (__v4di) _mm256_setzero_si256(),
644 (__mmask8) -1);
645}
646
647static __inline__ __m256i __DEFAULT_FN_ATTRS256
648_mm256_mask_cvttps_epi64 (__m256i __W, __mmask8 __U, __m128 __A) {
649 return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A,
650 (__v4di) __W,
651 (__mmask8) __U);
652}
653
654static __inline__ __m256i __DEFAULT_FN_ATTRS256
656 return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A,
657 (__v4di) _mm256_setzero_si256(),
658 (__mmask8) __U);
659}
660
661static __inline__ __m128i __DEFAULT_FN_ATTRS128
662_mm_cvttps_epu64 (__m128 __A) {
663 return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
664 (__v2di) _mm_setzero_si128(),
665 (__mmask8) -1);
666}
667
668static __inline__ __m128i __DEFAULT_FN_ATTRS128
669_mm_mask_cvttps_epu64 (__m128i __W, __mmask8 __U, __m128 __A) {
670 return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
671 (__v2di) __W,
672 (__mmask8) __U);
673}
674
675static __inline__ __m128i __DEFAULT_FN_ATTRS128
677 return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
678 (__v2di) _mm_setzero_si128(),
679 (__mmask8) __U);
680}
681
682static __inline__ __m256i __DEFAULT_FN_ATTRS256
684 return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A,
685 (__v4di) _mm256_setzero_si256(),
686 (__mmask8) -1);
687}
688
689static __inline__ __m256i __DEFAULT_FN_ATTRS256
690_mm256_mask_cvttps_epu64 (__m256i __W, __mmask8 __U, __m128 __A) {
691 return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A,
692 (__v4di) __W,
693 (__mmask8) __U);
694}
695
696static __inline__ __m256i __DEFAULT_FN_ATTRS256
698 return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A,
699 (__v4di) _mm256_setzero_si256(),
700 (__mmask8) __U);
701}
702
703static __inline__ __m128d __DEFAULT_FN_ATTRS128
704_mm_cvtepu64_pd (__m128i __A) {
705 return (__m128d)__builtin_convertvector((__v2du)__A, __v2df);
706}
707
708static __inline__ __m128d __DEFAULT_FN_ATTRS128
709_mm_mask_cvtepu64_pd (__m128d __W, __mmask8 __U, __m128i __A) {
710 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
711 (__v2df)_mm_cvtepu64_pd(__A),
712 (__v2df)__W);
713}
714
715static __inline__ __m128d __DEFAULT_FN_ATTRS128
716_mm_maskz_cvtepu64_pd (__mmask8 __U, __m128i __A) {
717 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
718 (__v2df)_mm_cvtepu64_pd(__A),
719 (__v2df)_mm_setzero_pd());
720}
721
722static __inline__ __m256d __DEFAULT_FN_ATTRS256
723_mm256_cvtepu64_pd (__m256i __A) {
724 return (__m256d)__builtin_convertvector((__v4du)__A, __v4df);
725}
726
727static __inline__ __m256d __DEFAULT_FN_ATTRS256
728_mm256_mask_cvtepu64_pd (__m256d __W, __mmask8 __U, __m256i __A) {
729 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
730 (__v4df)_mm256_cvtepu64_pd(__A),
731 (__v4df)__W);
732}
733
734static __inline__ __m256d __DEFAULT_FN_ATTRS256
736 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
737 (__v4df)_mm256_cvtepu64_pd(__A),
738 (__v4df)_mm256_setzero_pd());
739}
740
741static __inline__ __m128 __DEFAULT_FN_ATTRS128
742_mm_cvtepu64_ps (__m128i __A) {
743 return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A,
744 (__v4sf) _mm_setzero_ps(),
745 (__mmask8) -1);
746}
747
748static __inline__ __m128 __DEFAULT_FN_ATTRS128
749_mm_mask_cvtepu64_ps (__m128 __W, __mmask8 __U, __m128i __A) {
750 return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A,
751 (__v4sf) __W,
752 (__mmask8) __U);
753}
754
755static __inline__ __m128 __DEFAULT_FN_ATTRS128
756_mm_maskz_cvtepu64_ps (__mmask8 __U, __m128i __A) {
757 return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A,
758 (__v4sf) _mm_setzero_ps(),
759 (__mmask8) __U);
760}
761
762static __inline__ __m128 __DEFAULT_FN_ATTRS256
763_mm256_cvtepu64_ps (__m256i __A) {
764 return (__m128)__builtin_convertvector((__v4du)__A, __v4sf);
765}
766
767static __inline__ __m128 __DEFAULT_FN_ATTRS256
768_mm256_mask_cvtepu64_ps (__m128 __W, __mmask8 __U, __m256i __A) {
769 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
770 (__v4sf)_mm256_cvtepu64_ps(__A),
771 (__v4sf)__W);
772}
773
774static __inline__ __m128 __DEFAULT_FN_ATTRS256
776 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
777 (__v4sf)_mm256_cvtepu64_ps(__A),
778 (__v4sf)_mm_setzero_ps());
779}
780
781#define _mm_range_pd(A, B, C) \
782 ((__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \
783 (__v2df)(__m128d)(B), (int)(C), \
784 (__v2df)_mm_setzero_pd(), \
785 (__mmask8)-1))
786
787#define _mm_mask_range_pd(W, U, A, B, C) \
788 ((__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \
789 (__v2df)(__m128d)(B), (int)(C), \
790 (__v2df)(__m128d)(W), \
791 (__mmask8)(U)))
792
793#define _mm_maskz_range_pd(U, A, B, C) \
794 ((__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \
795 (__v2df)(__m128d)(B), (int)(C), \
796 (__v2df)_mm_setzero_pd(), \
797 (__mmask8)(U)))
798
799#define _mm256_range_pd(A, B, C) \
800 ((__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \
801 (__v4df)(__m256d)(B), (int)(C), \
802 (__v4df)_mm256_setzero_pd(), \
803 (__mmask8)-1))
804
805#define _mm256_mask_range_pd(W, U, A, B, C) \
806 ((__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \
807 (__v4df)(__m256d)(B), (int)(C), \
808 (__v4df)(__m256d)(W), \
809 (__mmask8)(U)))
810
811#define _mm256_maskz_range_pd(U, A, B, C) \
812 ((__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \
813 (__v4df)(__m256d)(B), (int)(C), \
814 (__v4df)_mm256_setzero_pd(), \
815 (__mmask8)(U)))
816
817#define _mm_range_ps(A, B, C) \
818 ((__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \
819 (__v4sf)(__m128)(B), (int)(C), \
820 (__v4sf)_mm_setzero_ps(), \
821 (__mmask8)-1))
822
823#define _mm_mask_range_ps(W, U, A, B, C) \
824 ((__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \
825 (__v4sf)(__m128)(B), (int)(C), \
826 (__v4sf)(__m128)(W), (__mmask8)(U)))
827
828#define _mm_maskz_range_ps(U, A, B, C) \
829 ((__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \
830 (__v4sf)(__m128)(B), (int)(C), \
831 (__v4sf)_mm_setzero_ps(), \
832 (__mmask8)(U)))
833
834#define _mm256_range_ps(A, B, C) \
835 ((__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \
836 (__v8sf)(__m256)(B), (int)(C), \
837 (__v8sf)_mm256_setzero_ps(), \
838 (__mmask8)-1))
839
840#define _mm256_mask_range_ps(W, U, A, B, C) \
841 ((__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \
842 (__v8sf)(__m256)(B), (int)(C), \
843 (__v8sf)(__m256)(W), (__mmask8)(U)))
844
845#define _mm256_maskz_range_ps(U, A, B, C) \
846 ((__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \
847 (__v8sf)(__m256)(B), (int)(C), \
848 (__v8sf)_mm256_setzero_ps(), \
849 (__mmask8)(U)))
850
851#define _mm_reduce_pd(A, B) \
852 ((__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \
853 (__v2df)_mm_setzero_pd(), \
854 (__mmask8)-1))
855
856#define _mm_mask_reduce_pd(W, U, A, B) \
857 ((__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \
858 (__v2df)(__m128d)(W), \
859 (__mmask8)(U)))
860
861#define _mm_maskz_reduce_pd(U, A, B) \
862 ((__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \
863 (__v2df)_mm_setzero_pd(), \
864 (__mmask8)(U)))
865
866#define _mm256_reduce_pd(A, B) \
867 ((__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \
868 (__v4df)_mm256_setzero_pd(), \
869 (__mmask8)-1))
870
871#define _mm256_mask_reduce_pd(W, U, A, B) \
872 ((__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \
873 (__v4df)(__m256d)(W), \
874 (__mmask8)(U)))
875
876#define _mm256_maskz_reduce_pd(U, A, B) \
877 ((__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \
878 (__v4df)_mm256_setzero_pd(), \
879 (__mmask8)(U)))
880
881#define _mm_reduce_ps(A, B) \
882 ((__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \
883 (__v4sf)_mm_setzero_ps(), \
884 (__mmask8)-1))
885
886#define _mm_mask_reduce_ps(W, U, A, B) \
887 ((__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \
888 (__v4sf)(__m128)(W), \
889 (__mmask8)(U)))
890
891#define _mm_maskz_reduce_ps(U, A, B) \
892 ((__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \
893 (__v4sf)_mm_setzero_ps(), \
894 (__mmask8)(U)))
895
896#define _mm256_reduce_ps(A, B) \
897 ((__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \
898 (__v8sf)_mm256_setzero_ps(), \
899 (__mmask8)-1))
900
901#define _mm256_mask_reduce_ps(W, U, A, B) \
902 ((__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \
903 (__v8sf)(__m256)(W), \
904 (__mmask8)(U)))
905
906#define _mm256_maskz_reduce_ps(U, A, B) \
907 ((__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \
908 (__v8sf)_mm256_setzero_ps(), \
909 (__mmask8)(U)))
910
911static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
912_mm_movepi32_mask (__m128i __A)
913{
914 return (__mmask8) __builtin_ia32_cvtd2mask128 ((__v4si) __A);
915}
916
917static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
919{
920 return (__mmask8) __builtin_ia32_cvtd2mask256 ((__v8si) __A);
921}
922
923static __inline__ __m128i __DEFAULT_FN_ATTRS128
925{
926 return (__m128i) __builtin_ia32_cvtmask2d128 (__A);
927}
928
929static __inline__ __m256i __DEFAULT_FN_ATTRS256
931{
932 return (__m256i) __builtin_ia32_cvtmask2d256 (__A);
933}
934
935static __inline__ __m128i __DEFAULT_FN_ATTRS128
937{
938 return (__m128i) __builtin_ia32_cvtmask2q128 (__A);
939}
940
941static __inline__ __m256i __DEFAULT_FN_ATTRS256
943{
944 return (__m256i) __builtin_ia32_cvtmask2q256 (__A);
945}
946
947static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
948_mm_movepi64_mask (__m128i __A)
949{
950 return (__mmask8) __builtin_ia32_cvtq2mask128 ((__v2di) __A);
951}
952
953static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
955{
956 return (__mmask8) __builtin_ia32_cvtq2mask256 ((__v4di) __A);
957}
958
959static __inline__ __m256 __DEFAULT_FN_ATTRS256
961{
962 return (__m256)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A,
963 0, 1, 0, 1, 0, 1, 0, 1);
964}
965
966static __inline__ __m256 __DEFAULT_FN_ATTRS256
967_mm256_mask_broadcast_f32x2 (__m256 __O, __mmask8 __M, __m128 __A)
968{
969 return (__m256)__builtin_ia32_selectps_256((__mmask8)__M,
970 (__v8sf)_mm256_broadcast_f32x2(__A),
971 (__v8sf)__O);
972}
973
974static __inline__ __m256 __DEFAULT_FN_ATTRS256
976{
977 return (__m256)__builtin_ia32_selectps_256((__mmask8)__M,
978 (__v8sf)_mm256_broadcast_f32x2(__A),
979 (__v8sf)_mm256_setzero_ps());
980}
981
982static __inline__ __m256d __DEFAULT_FN_ATTRS256
984{
985 return (__m256d)__builtin_shufflevector((__v2df)__A, (__v2df)__A,
986 0, 1, 0, 1);
987}
988
989static __inline__ __m256d __DEFAULT_FN_ATTRS256
990_mm256_mask_broadcast_f64x2(__m256d __O, __mmask8 __M, __m128d __A)
991{
992 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__M,
993 (__v4df)_mm256_broadcast_f64x2(__A),
994 (__v4df)__O);
995}
996
997static __inline__ __m256d __DEFAULT_FN_ATTRS256
999{
1000 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__M,
1001 (__v4df)_mm256_broadcast_f64x2(__A),
1002 (__v4df)_mm256_setzero_pd());
1003}
1004
1005static __inline__ __m128i __DEFAULT_FN_ATTRS128
1007{
1008 return (__m128i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
1009 0, 1, 0, 1);
1010}
1011
1012static __inline__ __m128i __DEFAULT_FN_ATTRS128
1013_mm_mask_broadcast_i32x2 (__m128i __O, __mmask8 __M, __m128i __A)
1014{
1015 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
1016 (__v4si)_mm_broadcast_i32x2(__A),
1017 (__v4si)__O);
1018}
1019
1020static __inline__ __m128i __DEFAULT_FN_ATTRS128
1022{
1023 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
1024 (__v4si)_mm_broadcast_i32x2(__A),
1025 (__v4si)_mm_setzero_si128());
1026}
1027
1028static __inline__ __m256i __DEFAULT_FN_ATTRS256
1030{
1031 return (__m256i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
1032 0, 1, 0, 1, 0, 1, 0, 1);
1033}
1034
1035static __inline__ __m256i __DEFAULT_FN_ATTRS256
1036_mm256_mask_broadcast_i32x2 (__m256i __O, __mmask8 __M, __m128i __A)
1037{
1038 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
1039 (__v8si)_mm256_broadcast_i32x2(__A),
1040 (__v8si)__O);
1041}
1042
1043static __inline__ __m256i __DEFAULT_FN_ATTRS256
1045{
1046 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
1047 (__v8si)_mm256_broadcast_i32x2(__A),
1048 (__v8si)_mm256_setzero_si256());
1049}
1050
1051static __inline__ __m256i __DEFAULT_FN_ATTRS256
1053{
1054 return (__m256i)__builtin_shufflevector((__v2di)__A, (__v2di)__A,
1055 0, 1, 0, 1);
1056}
1057
1058static __inline__ __m256i __DEFAULT_FN_ATTRS256
1059_mm256_mask_broadcast_i64x2(__m256i __O, __mmask8 __M, __m128i __A)
1060{
1061 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
1062 (__v4di)_mm256_broadcast_i64x2(__A),
1063 (__v4di)__O);
1064}
1065
1066static __inline__ __m256i __DEFAULT_FN_ATTRS256
1068{
1069 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
1070 (__v4di)_mm256_broadcast_i64x2(__A),
1071 (__v4di)_mm256_setzero_si256());
1072}
1073
1074#define _mm256_extractf64x2_pd(A, imm) \
1075 ((__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \
1076 (int)(imm), \
1077 (__v2df)_mm_undefined_pd(), \
1078 (__mmask8)-1))
1079
1080#define _mm256_mask_extractf64x2_pd(W, U, A, imm) \
1081 ((__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \
1082 (int)(imm), \
1083 (__v2df)(__m128d)(W), \
1084 (__mmask8)(U)))
1085
1086#define _mm256_maskz_extractf64x2_pd(U, A, imm) \
1087 ((__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \
1088 (int)(imm), \
1089 (__v2df)_mm_setzero_pd(), \
1090 (__mmask8)(U)))
1091
1092#define _mm256_extracti64x2_epi64(A, imm) \
1093 ((__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \
1094 (int)(imm), \
1095 (__v2di)_mm_undefined_si128(), \
1096 (__mmask8)-1))
1097
1098#define _mm256_mask_extracti64x2_epi64(W, U, A, imm) \
1099 ((__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \
1100 (int)(imm), \
1101 (__v2di)(__m128i)(W), \
1102 (__mmask8)(U)))
1103
1104#define _mm256_maskz_extracti64x2_epi64(U, A, imm) \
1105 ((__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \
1106 (int)(imm), \
1107 (__v2di)_mm_setzero_si128(), \
1108 (__mmask8)(U)))
1109
1110#define _mm256_insertf64x2(A, B, imm) \
1111 ((__m256d)__builtin_ia32_insertf64x2_256((__v4df)(__m256d)(A), \
1112 (__v2df)(__m128d)(B), (int)(imm)))
1113
1114#define _mm256_mask_insertf64x2(W, U, A, B, imm) \
1115 ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
1116 (__v4df)_mm256_insertf64x2((A), (B), (imm)), \
1117 (__v4df)(__m256d)(W)))
1118
1119#define _mm256_maskz_insertf64x2(U, A, B, imm) \
1120 ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
1121 (__v4df)_mm256_insertf64x2((A), (B), (imm)), \
1122 (__v4df)_mm256_setzero_pd()))
1123
1124#define _mm256_inserti64x2(A, B, imm) \
1125 ((__m256i)__builtin_ia32_inserti64x2_256((__v4di)(__m256i)(A), \
1126 (__v2di)(__m128i)(B), (int)(imm)))
1127
1128#define _mm256_mask_inserti64x2(W, U, A, B, imm) \
1129 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
1130 (__v4di)_mm256_inserti64x2((A), (B), (imm)), \
1131 (__v4di)(__m256i)(W)))
1132
1133#define _mm256_maskz_inserti64x2(U, A, B, imm) \
1134 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
1135 (__v4di)_mm256_inserti64x2((A), (B), (imm)), \
1136 (__v4di)_mm256_setzero_si256()))
1137
1138#define _mm_mask_fpclass_pd_mask(U, A, imm) \
1139 ((__mmask8)__builtin_ia32_fpclasspd128_mask((__v2df)(__m128d)(A), (int)(imm), \
1140 (__mmask8)(U)))
1141
1142#define _mm_fpclass_pd_mask(A, imm) \
1143 ((__mmask8)__builtin_ia32_fpclasspd128_mask((__v2df)(__m128d)(A), (int)(imm), \
1144 (__mmask8)-1))
1145
1146#define _mm256_mask_fpclass_pd_mask(U, A, imm) \
1147 ((__mmask8)__builtin_ia32_fpclasspd256_mask((__v4df)(__m256d)(A), (int)(imm), \
1148 (__mmask8)(U)))
1149
1150#define _mm256_fpclass_pd_mask(A, imm) \
1151 ((__mmask8)__builtin_ia32_fpclasspd256_mask((__v4df)(__m256d)(A), (int)(imm), \
1152 (__mmask8)-1))
1153
1154#define _mm_mask_fpclass_ps_mask(U, A, imm) \
1155 ((__mmask8)__builtin_ia32_fpclassps128_mask((__v4sf)(__m128)(A), (int)(imm), \
1156 (__mmask8)(U)))
1157
1158#define _mm_fpclass_ps_mask(A, imm) \
1159 ((__mmask8)__builtin_ia32_fpclassps128_mask((__v4sf)(__m128)(A), (int)(imm), \
1160 (__mmask8)-1))
1161
1162#define _mm256_mask_fpclass_ps_mask(U, A, imm) \
1163 ((__mmask8)__builtin_ia32_fpclassps256_mask((__v8sf)(__m256)(A), (int)(imm), \
1164 (__mmask8)(U)))
1165
1166#define _mm256_fpclass_ps_mask(A, imm) \
1167 ((__mmask8)__builtin_ia32_fpclassps256_mask((__v8sf)(__m256)(A), (int)(imm), \
1168 (__mmask8)-1))
1169
1170#undef __DEFAULT_FN_ATTRS128
1171#undef __DEFAULT_FN_ATTRS256
1172
1173#endif
unsigned char __mmask8
Definition: avx512fintrin.h:41
static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_movepi32_mask(__m256i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_andnot_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtps_epu64(__m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttps_epi64(__m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtps_epu64(__m256i __W, __mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_movm_epi32(__mmask8 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtps_epi64(__mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtps_epu64(__m128 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi64_pd(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvttpd_epi64(__m128i __W, __mmask8 __U, __m128d __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_or_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_broadcast_i64x2(__m256i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtps_epu64(__mmask8 __U, __m128 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS256 _mm256_cvtepi64_ps(__m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvttpd_epi64(__mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttpd_epi64(__m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_broadcast_i32x2(__m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvttpd_epu64(__mmask8 __U, __m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtpd_epi64(__mmask8 __U, __m256d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_xor_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi64_ps(__mmask8 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvttps_epu64(__m128i __W, __mmask8 __U, __m128 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_xor_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_xor_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_xor_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_andnot_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mullo_epi64(__m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvttps_epi64(__m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtps_epu64(__m128i __W, __mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_broadcast_i32x2(__m256i __O, __mmask8 __M, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_broadcast_f32x2(__m256 __O, __mmask8 __M, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvttpd_epi64(__mmask8 __U, __m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvttpd_epu64(__m256i __W, __mmask8 __U, __m256d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_or_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvttpd_epu64(__mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_mullo_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtpd_epu64(__mmask8 __U, __m256d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS256 _mm256_cvtepu64_ps(__m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtpd_epi64(__m256i __W, __mmask8 __U, __m256d __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_and_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_cvtepu64_pd(__m256i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_andnot_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtpd_epu64(__m128i __W, __mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtpd_epu64(__mmask8 __U, __m128d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi64_ps(__m128 __W, __mmask8 __U, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvttpd_epi64(__m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtps_epi64(__m128 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu64_ps(__mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_broadcast_i64x2(__m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcast_i32x2(__mmask8 __M, __m128i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_movepi64_mask(__m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvttps_epi64(__mmask8 __U, __m128 __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_cvtepi64_pd(__m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_andnot_ps(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu64_pd(__mmask8 __U, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_and_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu64_ps(__mmask8 __U, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_movm_epi64(__mmask8 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mullo_epi64(__m256i __A, __m256i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_or_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvttps_epi64(__m256i __W, __mmask8 __U, __m128 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtepu64_ps(__m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtpd_epi64(__m128i __W, __mmask8 __U, __m128d __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi64_pd(__m256d __W, __mmask8 __U, __m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_cvtepu64_pd(__m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_cvtepi64_pd(__m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi64_ps(__m128 __W, __mmask8 __U, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi64_ps(__mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_mullo_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_and_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_movepi32_mask(__m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_mullo_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_or_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcast_f64x2(__mmask8 __M, __m128d __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_broadcast_f64x2(__m256d __O, __mmask8 __M, __m128d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_xor_ps(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_or_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_movm_epi32(__mmask8 __A)
#define __DEFAULT_FN_ATTRS256
static __inline__ __m128 __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu64_ps(__m128 __W, __mmask8 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_broadcast_i32x2(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi64_pd(__m128d __W, __mmask8 __U, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_andnot_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
#define __DEFAULT_FN_ATTRS128
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttps_epu64(__m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvttps_epu64(__m256i __W, __mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_mullo_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtps_epi64(__mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtps_epi64(__m256i __W, __mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtpd_epu64(__m256d __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_and_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttpd_epu64(__m128d __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_xor_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtps_epi64(__m128 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_and_ps(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtepi64_ps(__m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_andnot_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtpd_epi64(__mmask8 __U, __m128d __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_or_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_broadcast_i32x2(__m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcast_f32x2(__mmask8 __M, __m128 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_and_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvttps_epi64(__m128i __W, __mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_movm_epi64(__mmask8 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu64_ps(__m128 __W, __mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtps_epu64(__mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtpd_epi64(__m256d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvttpd_epi64(__m256i __W, __mmask8 __U, __m256d __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_broadcast_f64x2(__m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtpd_epi64(__m128d __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu64_pd(__mmask8 __U, __m256i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_xor_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_andnot_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_xor_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvttpd_epu64(__m128i __W, __mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvttps_epi64(__mmask8 __U, __m128 __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_and_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu64_pd(__m256d __W, __mmask8 __U, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvttps_epu64(__mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_broadcast_i32x2(__mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtps_epi64(__m128i __W, __mmask8 __U, __m128 __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_broadcast_f32x2(__m128 __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_movepi64_mask(__m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_or_ps(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu64_pd(__m128d __W, __mmask8 __U, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_or_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi64_pd(__mmask8 __U, __m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_andnot_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvttps_epu64(__mmask8 __U, __m128 __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_and_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvttpd_epu64(__m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtpd_epu64(__m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvttps_epu64(__m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtpd_epu64(__m256i __W, __mmask8 __U, __m256d __A)
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_xor_pd(__m256d __a, __m256d __b)
Performs a bitwise XOR of two 256-bit vectors of [4 x double].
Definition: avxintrin.h:665
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_setzero_ps(void)
Constructs a 256-bit floating-point vector of [8 x float] with all vector elements initialized to zer...
Definition: avxintrin.h:4349
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_xor_ps(__m256 __a, __m256 __b)
Performs a bitwise XOR of two 256-bit vectors of [8 x float].
Definition: avxintrin.h:683
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_and_ps(__m256 __a, __m256 __b)
Performs a bitwise AND of two 256-bit vectors of [8 x float].
Definition: avxintrin.h:569
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_or_ps(__m256 __a, __m256 __b)
Performs a bitwise OR of two 256-bit vectors of [8 x float].
Definition: avxintrin.h:647
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_andnot_pd(__m256d __a, __m256d __b)
Performs a bitwise AND of two 256-bit vectors of [4 x double], using the one's complement of the valu...
Definition: avxintrin.h:590
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_or_pd(__m256d __a, __m256d __b)
Performs a bitwise OR of two 256-bit vectors of [4 x double].
Definition: avxintrin.h:629
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_setzero_pd(void)
Constructs a 256-bit floating-point vector of [4 x double] with all vector elements initialized to ze...
Definition: avxintrin.h:4335
static __inline __m256i __DEFAULT_FN_ATTRS _mm256_setzero_si256(void)
Constructs a 256-bit integer vector initialized to zero.
Definition: avxintrin.h:4362
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_and_pd(__m256d __a, __m256d __b)
Performs a bitwise AND of two 256-bit vectors of [4 x double].
Definition: avxintrin.h:551
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_andnot_ps(__m256 __a, __m256 __b)
Performs a bitwise AND of two 256-bit vectors of [8 x float], using the one's complement of the value...
Definition: avxintrin.h:611
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_or_pd(__m128d __a, __m128d __b)
Performs a bitwise OR of two 128-bit vectors of [2 x double].
Definition: emmintrin.h:407
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_xor_pd(__m128d __a, __m128d __b)
Performs a bitwise XOR of two 128-bit vectors of [2 x double].
Definition: emmintrin.h:424
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_andnot_pd(__m128d __a, __m128d __b)
Performs a bitwise AND of two 128-bit vectors of [2 x double], using the one's complement of the valu...
Definition: emmintrin.h:390
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_setzero_pd(void)
Constructs a 128-bit floating-point vector of [2 x double] initialized to zero.
Definition: emmintrin.h:1866
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_and_pd(__m128d __a, __m128d __b)
Performs a bitwise AND of two 128-bit vectors of [2 x double].
Definition: emmintrin.h:370
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setzero_si128(void)
Creates a 128-bit integer vector initialized to zero.
Definition: emmintrin.h:3865
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_setzero_ps(void)
Constructs a 128-bit floating-point vector of [4 x float] initialized to zero.
Definition: xmmintrin.h:2042
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_and_ps(__m128 __a, __m128 __b)
Performs a bitwise AND of two 128-bit vectors of [4 x float].
Definition: xmmintrin.h:441
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_andnot_ps(__m128 __a, __m128 __b)
Performs a bitwise AND of two 128-bit vectors of [4 x float], using the one's complement of the value...
Definition: xmmintrin.h:463
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_xor_ps(__m128 __a, __m128 __b)
Performs a bitwise exclusive OR of two 128-bit vectors of [4 x float].
Definition: xmmintrin.h:500
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_or_ps(__m128 __a, __m128 __b)
Performs a bitwise OR of two 128-bit vectors of [4 x float].
Definition: xmmintrin.h:481