clang 22.0.0git
X86.cpp
Go to the documentation of this file.
1//===---------- X86.cpp - Emit LLVM Code for builtins ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This contains code to emit Builtin calls as LLVM code.
10//
11//===----------------------------------------------------------------------===//
12
13#include "CGBuiltin.h"
15#include "llvm/IR/InlineAsm.h"
16#include "llvm/IR/IntrinsicsX86.h"
17#include "llvm/TargetParser/X86TargetParser.h"
18
19using namespace clang;
20using namespace CodeGen;
21using namespace llvm;
22
23static std::optional<CodeGenFunction::MSVCIntrin>
24translateX86ToMsvcIntrin(unsigned BuiltinID) {
25 using MSVCIntrin = CodeGenFunction::MSVCIntrin;
26 switch (BuiltinID) {
27 default:
28 return std::nullopt;
29 case clang::X86::BI_BitScanForward:
30 case clang::X86::BI_BitScanForward64:
31 return MSVCIntrin::_BitScanForward;
32 case clang::X86::BI_BitScanReverse:
33 case clang::X86::BI_BitScanReverse64:
34 return MSVCIntrin::_BitScanReverse;
35 case clang::X86::BI_InterlockedAnd64:
36 return MSVCIntrin::_InterlockedAnd;
37 case clang::X86::BI_InterlockedCompareExchange128:
38 return MSVCIntrin::_InterlockedCompareExchange128;
39 case clang::X86::BI_InterlockedExchange64:
40 return MSVCIntrin::_InterlockedExchange;
41 case clang::X86::BI_InterlockedExchangeAdd64:
42 return MSVCIntrin::_InterlockedExchangeAdd;
43 case clang::X86::BI_InterlockedExchangeSub64:
44 return MSVCIntrin::_InterlockedExchangeSub;
45 case clang::X86::BI_InterlockedOr64:
46 return MSVCIntrin::_InterlockedOr;
47 case clang::X86::BI_InterlockedXor64:
48 return MSVCIntrin::_InterlockedXor;
49 case clang::X86::BI_InterlockedDecrement64:
50 return MSVCIntrin::_InterlockedDecrement;
51 case clang::X86::BI_InterlockedIncrement64:
52 return MSVCIntrin::_InterlockedIncrement;
53 }
54 llvm_unreachable("must return from switch");
55}
56
57// Convert the mask from an integer type to a vector of i1.
59 unsigned NumElts) {
60
61 auto *MaskTy = llvm::FixedVectorType::get(
62 CGF.Builder.getInt1Ty(),
63 cast<IntegerType>(Mask->getType())->getBitWidth());
64 Value *MaskVec = CGF.Builder.CreateBitCast(Mask, MaskTy);
65
66 // If we have less than 8 elements, then the starting mask was an i8 and
67 // we need to extract down to the right number of elements.
68 if (NumElts < 8) {
69 int Indices[4];
70 for (unsigned i = 0; i != NumElts; ++i)
71 Indices[i] = i;
72 MaskVec = CGF.Builder.CreateShuffleVector(
73 MaskVec, MaskVec, ArrayRef(Indices, NumElts), "extract");
74 }
75 return MaskVec;
76}
77
78/// Emit rounding for the value \p X according to the rounding \p
79/// RoundingControl based on bits 0 and 1.
81 unsigned RoundingControl) {
82 unsigned RoundingMask = 0b11;
83 unsigned RoundingMode = RoundingControl & RoundingMask;
84
85 Intrinsic::ID ID = Intrinsic::not_intrinsic;
86 LLVMContext &Ctx = CGF.CGM.getLLVMContext();
87 if (CGF.Builder.getIsFPConstrained()) {
88
89 Value *ExceptMode =
90 MetadataAsValue::get(Ctx, MDString::get(Ctx, "fpexcept.ignore"));
91
92 switch (RoundingMode) {
93 case 0b00:
94 ID = Intrinsic::experimental_constrained_roundeven;
95 break;
96 case 0b01:
97 ID = Intrinsic::experimental_constrained_floor;
98 break;
99 case 0b10:
100 ID = Intrinsic::experimental_constrained_ceil;
101 break;
102 case 0b11:
103 ID = Intrinsic::experimental_constrained_trunc;
104 break;
105 default:
106 llvm_unreachable("Invalid rounding mode");
107 }
108
109 Function *F = CGF.CGM.getIntrinsic(ID, X->getType());
110 return CGF.Builder.CreateCall(F, {X, ExceptMode});
111 }
112
113 switch (RoundingMode) {
114 case 0b00:
115 ID = Intrinsic::roundeven;
116 break;
117 case 0b01:
118 ID = Intrinsic::floor;
119 break;
120 case 0b10:
121 ID = Intrinsic::ceil;
122 break;
123 case 0b11:
124 ID = Intrinsic::trunc;
125 break;
126 default:
127 llvm_unreachable("Invalid rounding mode");
128 }
129
130 Function *F = CGF.CGM.getIntrinsic(ID, X->getType());
131 return CGF.Builder.CreateCall(F, {X});
132}
133
135 Align Alignment) {
136 Value *Ptr = Ops[0];
137
138 Value *MaskVec = getMaskVecValue(
139 CGF, Ops[2],
140 cast<llvm::FixedVectorType>(Ops[1]->getType())->getNumElements());
141
142 return CGF.Builder.CreateMaskedStore(Ops[1], Ptr, Alignment, MaskVec);
143}
144
146 Align Alignment) {
147 llvm::Type *Ty = Ops[1]->getType();
148 Value *Ptr = Ops[0];
149
150 Value *MaskVec = getMaskVecValue(
151 CGF, Ops[2], cast<llvm::FixedVectorType>(Ty)->getNumElements());
152
153 return CGF.Builder.CreateMaskedLoad(Ty, Ptr, Alignment, MaskVec, Ops[1]);
154}
155
157 ArrayRef<Value *> Ops) {
158 auto *ResultTy = cast<llvm::VectorType>(Ops[1]->getType());
159 Value *Ptr = Ops[0];
160
161 Value *MaskVec = getMaskVecValue(
162 CGF, Ops[2], cast<FixedVectorType>(ResultTy)->getNumElements());
163
164 llvm::Function *F = CGF.CGM.getIntrinsic(Intrinsic::masked_expandload,
165 ResultTy);
166 return CGF.Builder.CreateCall(F, { Ptr, MaskVec, Ops[1] });
167}
168
171 bool IsCompress) {
172 auto *ResultTy = cast<llvm::FixedVectorType>(Ops[1]->getType());
173
174 Value *MaskVec = getMaskVecValue(CGF, Ops[2], ResultTy->getNumElements());
175
176 Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
177 : Intrinsic::x86_avx512_mask_expand;
178 llvm::Function *F = CGF.CGM.getIntrinsic(IID, ResultTy);
179 return CGF.Builder.CreateCall(F, { Ops[0], Ops[1], MaskVec });
180}
181
183 ArrayRef<Value *> Ops) {
184 auto *ResultTy = cast<llvm::FixedVectorType>(Ops[1]->getType());
185 Value *Ptr = Ops[0];
186
187 Value *MaskVec = getMaskVecValue(CGF, Ops[2], ResultTy->getNumElements());
188
189 llvm::Function *F = CGF.CGM.getIntrinsic(Intrinsic::masked_compressstore,
190 ResultTy);
191 return CGF.Builder.CreateCall(F, { Ops[1], Ptr, MaskVec });
192}
193
194static Value *EmitX86MaskLogic(CodeGenFunction &CGF, Instruction::BinaryOps Opc,
196 bool InvertLHS = false) {
197 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
198 Value *LHS = getMaskVecValue(CGF, Ops[0], NumElts);
199 Value *RHS = getMaskVecValue(CGF, Ops[1], NumElts);
200
201 if (InvertLHS)
202 LHS = CGF.Builder.CreateNot(LHS);
203
204 return CGF.Builder.CreateBitCast(CGF.Builder.CreateBinOp(Opc, LHS, RHS),
205 Ops[0]->getType());
206}
207
209 Value *Amt, bool IsRight) {
210 llvm::Type *Ty = Op0->getType();
211
212 // Amount may be scalar immediate, in which case create a splat vector.
213 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
214 // we only care about the lowest log2 bits anyway.
215 if (Amt->getType() != Ty) {
216 unsigned NumElts = cast<llvm::FixedVectorType>(Ty)->getNumElements();
217 Amt = CGF.Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
218 Amt = CGF.Builder.CreateVectorSplat(NumElts, Amt);
219 }
220
221 unsigned IID = IsRight ? Intrinsic::fshr : Intrinsic::fshl;
222 Function *F = CGF.CGM.getIntrinsic(IID, Ty);
223 return CGF.Builder.CreateCall(F, {Op0, Op1, Amt});
224}
225
227 bool IsSigned) {
228 Value *Op0 = Ops[0];
229 Value *Op1 = Ops[1];
230 llvm::Type *Ty = Op0->getType();
231 uint64_t Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
232
233 CmpInst::Predicate Pred;
234 switch (Imm) {
235 case 0x0:
236 Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
237 break;
238 case 0x1:
239 Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
240 break;
241 case 0x2:
242 Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
243 break;
244 case 0x3:
245 Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
246 break;
247 case 0x4:
248 Pred = ICmpInst::ICMP_EQ;
249 break;
250 case 0x5:
251 Pred = ICmpInst::ICMP_NE;
252 break;
253 case 0x6:
254 return llvm::Constant::getNullValue(Ty); // FALSE
255 case 0x7:
256 return llvm::Constant::getAllOnesValue(Ty); // TRUE
257 default:
258 llvm_unreachable("Unexpected XOP vpcom/vpcomu predicate");
259 }
260
261 Value *Cmp = CGF.Builder.CreateICmp(Pred, Op0, Op1);
262 Value *Res = CGF.Builder.CreateSExt(Cmp, Ty);
263 return Res;
264}
265
267 Value *Mask, Value *Op0, Value *Op1) {
268
269 // If the mask is all ones just return first argument.
270 if (const auto *C = dyn_cast<Constant>(Mask))
271 if (C->isAllOnesValue())
272 return Op0;
273
274 Mask = getMaskVecValue(
275 CGF, Mask, cast<llvm::FixedVectorType>(Op0->getType())->getNumElements());
276
277 return CGF.Builder.CreateSelect(Mask, Op0, Op1);
278}
279
281 Value *Mask, Value *Op0, Value *Op1) {
282 // If the mask is all ones just return first argument.
283 if (const auto *C = dyn_cast<Constant>(Mask))
284 if (C->isAllOnesValue())
285 return Op0;
286
287 auto *MaskTy = llvm::FixedVectorType::get(
288 CGF.Builder.getInt1Ty(), Mask->getType()->getIntegerBitWidth());
289 Mask = CGF.Builder.CreateBitCast(Mask, MaskTy);
290 Mask = CGF.Builder.CreateExtractElement(Mask, (uint64_t)0);
291 return CGF.Builder.CreateSelect(Mask, Op0, Op1);
292}
293
295 unsigned NumElts, Value *MaskIn) {
296 if (MaskIn) {
297 const auto *C = dyn_cast<Constant>(MaskIn);
298 if (!C || !C->isAllOnesValue())
299 Cmp = CGF.Builder.CreateAnd(Cmp, getMaskVecValue(CGF, MaskIn, NumElts));
300 }
301
302 if (NumElts < 8) {
303 int Indices[8];
304 for (unsigned i = 0; i != NumElts; ++i)
305 Indices[i] = i;
306 for (unsigned i = NumElts; i != 8; ++i)
307 Indices[i] = i % NumElts + NumElts;
308 Cmp = CGF.Builder.CreateShuffleVector(
309 Cmp, llvm::Constant::getNullValue(Cmp->getType()), Indices);
310 }
311
312 return CGF.Builder.CreateBitCast(Cmp,
313 IntegerType::get(CGF.getLLVMContext(),
314 std::max(NumElts, 8U)));
315}
316
318 bool Signed, ArrayRef<Value *> Ops) {
319 assert((Ops.size() == 2 || Ops.size() == 4) &&
320 "Unexpected number of arguments");
321 unsigned NumElts =
322 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
323 Value *Cmp;
324
325 if (CC == 3) {
326 Cmp = Constant::getNullValue(
327 llvm::FixedVectorType::get(CGF.Builder.getInt1Ty(), NumElts));
328 } else if (CC == 7) {
329 Cmp = Constant::getAllOnesValue(
330 llvm::FixedVectorType::get(CGF.Builder.getInt1Ty(), NumElts));
331 } else {
332 ICmpInst::Predicate Pred;
333 switch (CC) {
334 default: llvm_unreachable("Unknown condition code");
335 case 0: Pred = ICmpInst::ICMP_EQ; break;
336 case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
337 case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
338 case 4: Pred = ICmpInst::ICMP_NE; break;
339 case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
340 case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
341 }
342 Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]);
343 }
344
345 Value *MaskIn = nullptr;
346 if (Ops.size() == 4)
347 MaskIn = Ops[3];
348
349 return EmitX86MaskedCompareResult(CGF, Cmp, NumElts, MaskIn);
350}
351
353 Value *Zero = Constant::getNullValue(In->getType());
354 return EmitX86MaskedCompare(CGF, 1, true, { In, Zero });
355}
356
358 ArrayRef<Value *> Ops, bool IsSigned) {
359 unsigned Rnd = cast<llvm::ConstantInt>(Ops[3])->getZExtValue();
360 llvm::Type *Ty = Ops[1]->getType();
361
362 Value *Res;
363 if (Rnd != 4) {
364 Intrinsic::ID IID = IsSigned ? Intrinsic::x86_avx512_sitofp_round
365 : Intrinsic::x86_avx512_uitofp_round;
366 Function *F = CGF.CGM.getIntrinsic(IID, { Ty, Ops[0]->getType() });
367 Res = CGF.Builder.CreateCall(F, { Ops[0], Ops[3] });
368 } else {
369 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
370 Res = IsSigned ? CGF.Builder.CreateSIToFP(Ops[0], Ty)
371 : CGF.Builder.CreateUIToFP(Ops[0], Ty);
372 }
373
374 return EmitX86Select(CGF, Ops[2], Res, Ops[1]);
375}
376
377// Lowers X86 FMA intrinsics to IR.
379 ArrayRef<Value *> Ops, unsigned BuiltinID,
380 bool IsAddSub) {
381
382 bool Subtract = false;
383 Intrinsic::ID IID = Intrinsic::not_intrinsic;
384 switch (BuiltinID) {
385 default: break;
386 case clang::X86::BI__builtin_ia32_vfmsubph512_mask3:
387 Subtract = true;
388 [[fallthrough]];
389 case clang::X86::BI__builtin_ia32_vfmaddph512_mask:
390 case clang::X86::BI__builtin_ia32_vfmaddph512_maskz:
391 case clang::X86::BI__builtin_ia32_vfmaddph512_mask3:
392 IID = Intrinsic::x86_avx512fp16_vfmadd_ph_512;
393 break;
394 case clang::X86::BI__builtin_ia32_vfmsubaddph512_mask3:
395 Subtract = true;
396 [[fallthrough]];
397 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask:
398 case clang::X86::BI__builtin_ia32_vfmaddsubph512_maskz:
399 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask3:
400 IID = Intrinsic::x86_avx512fp16_vfmaddsub_ph_512;
401 break;
402 case clang::X86::BI__builtin_ia32_vfmsubps512_mask3:
403 Subtract = true;
404 [[fallthrough]];
405 case clang::X86::BI__builtin_ia32_vfmaddps512_mask:
406 case clang::X86::BI__builtin_ia32_vfmaddps512_maskz:
407 case clang::X86::BI__builtin_ia32_vfmaddps512_mask3:
408 IID = Intrinsic::x86_avx512_vfmadd_ps_512; break;
409 case clang::X86::BI__builtin_ia32_vfmsubpd512_mask3:
410 Subtract = true;
411 [[fallthrough]];
412 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask:
413 case clang::X86::BI__builtin_ia32_vfmaddpd512_maskz:
414 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask3:
415 IID = Intrinsic::x86_avx512_vfmadd_pd_512; break;
416 case clang::X86::BI__builtin_ia32_vfmsubaddps512_mask3:
417 Subtract = true;
418 [[fallthrough]];
419 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask:
420 case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz:
421 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3:
422 IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
423 break;
424 case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
425 Subtract = true;
426 [[fallthrough]];
427 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask:
428 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
429 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
430 IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
431 break;
432 }
433
434 Value *A = Ops[0];
435 Value *B = Ops[1];
436 Value *C = Ops[2];
437
438 if (Subtract)
439 C = CGF.Builder.CreateFNeg(C);
440
441 Value *Res;
442
443 // Only handle in case of _MM_FROUND_CUR_DIRECTION/4 (no rounding).
444 if (IID != Intrinsic::not_intrinsic &&
445 (cast<llvm::ConstantInt>(Ops.back())->getZExtValue() != (uint64_t)4 ||
446 IsAddSub)) {
447 Function *Intr = CGF.CGM.getIntrinsic(IID);
448 Res = CGF.Builder.CreateCall(Intr, {A, B, C, Ops.back() });
449 } else {
450 llvm::Type *Ty = A->getType();
451 Function *FMA;
452 if (CGF.Builder.getIsFPConstrained()) {
453 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
454 FMA = CGF.CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, Ty);
455 Res = CGF.Builder.CreateConstrainedFPCall(FMA, {A, B, C});
456 } else {
457 FMA = CGF.CGM.getIntrinsic(Intrinsic::fma, Ty);
458 Res = CGF.Builder.CreateCall(FMA, {A, B, C});
459 }
460 }
461
462 // Handle any required masking.
463 Value *MaskFalseVal = nullptr;
464 switch (BuiltinID) {
465 case clang::X86::BI__builtin_ia32_vfmaddph512_mask:
466 case clang::X86::BI__builtin_ia32_vfmaddps512_mask:
467 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask:
468 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask:
469 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask:
470 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask:
471 MaskFalseVal = Ops[0];
472 break;
473 case clang::X86::BI__builtin_ia32_vfmaddph512_maskz:
474 case clang::X86::BI__builtin_ia32_vfmaddps512_maskz:
475 case clang::X86::BI__builtin_ia32_vfmaddpd512_maskz:
476 case clang::X86::BI__builtin_ia32_vfmaddsubph512_maskz:
477 case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz:
478 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
479 MaskFalseVal = Constant::getNullValue(Ops[0]->getType());
480 break;
481 case clang::X86::BI__builtin_ia32_vfmsubph512_mask3:
482 case clang::X86::BI__builtin_ia32_vfmaddph512_mask3:
483 case clang::X86::BI__builtin_ia32_vfmsubps512_mask3:
484 case clang::X86::BI__builtin_ia32_vfmaddps512_mask3:
485 case clang::X86::BI__builtin_ia32_vfmsubpd512_mask3:
486 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask3:
487 case clang::X86::BI__builtin_ia32_vfmsubaddph512_mask3:
488 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask3:
489 case clang::X86::BI__builtin_ia32_vfmsubaddps512_mask3:
490 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3:
491 case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
492 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
493 MaskFalseVal = Ops[2];
494 break;
495 }
496
497 if (MaskFalseVal)
498 return EmitX86Select(CGF, Ops[3], Res, MaskFalseVal);
499
500 return Res;
501}
502
505 bool ZeroMask = false, unsigned PTIdx = 0,
506 bool NegAcc = false) {
507 unsigned Rnd = 4;
508 if (Ops.size() > 4)
509 Rnd = cast<llvm::ConstantInt>(Ops[4])->getZExtValue();
510
511 if (NegAcc)
512 Ops[2] = CGF.Builder.CreateFNeg(Ops[2]);
513
514 Ops[0] = CGF.Builder.CreateExtractElement(Ops[0], (uint64_t)0);
515 Ops[1] = CGF.Builder.CreateExtractElement(Ops[1], (uint64_t)0);
516 Ops[2] = CGF.Builder.CreateExtractElement(Ops[2], (uint64_t)0);
517 Value *Res;
518 if (Rnd != 4) {
519 Intrinsic::ID IID;
520
521 switch (Ops[0]->getType()->getPrimitiveSizeInBits()) {
522 case 16:
523 IID = Intrinsic::x86_avx512fp16_vfmadd_f16;
524 break;
525 case 32:
526 IID = Intrinsic::x86_avx512_vfmadd_f32;
527 break;
528 case 64:
529 IID = Intrinsic::x86_avx512_vfmadd_f64;
530 break;
531 default:
532 llvm_unreachable("Unexpected size");
533 }
534 Res = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IID),
535 {Ops[0], Ops[1], Ops[2], Ops[4]});
536 } else if (CGF.Builder.getIsFPConstrained()) {
537 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
538 Function *FMA = CGF.CGM.getIntrinsic(
539 Intrinsic::experimental_constrained_fma, Ops[0]->getType());
540 Res = CGF.Builder.CreateConstrainedFPCall(FMA, Ops.slice(0, 3));
541 } else {
542 Function *FMA = CGF.CGM.getIntrinsic(Intrinsic::fma, Ops[0]->getType());
543 Res = CGF.Builder.CreateCall(FMA, Ops.slice(0, 3));
544 }
545 // If we have more than 3 arguments, we need to do masking.
546 if (Ops.size() > 3) {
547 Value *PassThru = ZeroMask ? Constant::getNullValue(Res->getType())
548 : Ops[PTIdx];
549
550 // If we negated the accumulator and the its the PassThru value we need to
551 // bypass the negate. Conveniently Upper should be the same thing in this
552 // case.
553 if (NegAcc && PTIdx == 2)
554 PassThru = CGF.Builder.CreateExtractElement(Upper, (uint64_t)0);
555
556 Res = EmitX86ScalarSelect(CGF, Ops[3], Res, PassThru);
557 }
558 return CGF.Builder.CreateInsertElement(Upper, Res, (uint64_t)0);
559}
560
561static Value *EmitX86Muldq(CodeGenFunction &CGF, bool IsSigned,
562 ArrayRef<Value *> Ops) {
563 llvm::Type *Ty = Ops[0]->getType();
564 // Arguments have a vXi32 type so cast to vXi64.
565 Ty = llvm::FixedVectorType::get(CGF.Int64Ty,
566 Ty->getPrimitiveSizeInBits() / 64);
567 Value *LHS = CGF.Builder.CreateBitCast(Ops[0], Ty);
568 Value *RHS = CGF.Builder.CreateBitCast(Ops[1], Ty);
569
570 if (IsSigned) {
571 // Shift left then arithmetic shift right.
572 Constant *ShiftAmt = ConstantInt::get(Ty, 32);
573 LHS = CGF.Builder.CreateShl(LHS, ShiftAmt);
574 LHS = CGF.Builder.CreateAShr(LHS, ShiftAmt);
575 RHS = CGF.Builder.CreateShl(RHS, ShiftAmt);
576 RHS = CGF.Builder.CreateAShr(RHS, ShiftAmt);
577 } else {
578 // Clear the upper bits.
579 Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
580 LHS = CGF.Builder.CreateAnd(LHS, Mask);
581 RHS = CGF.Builder.CreateAnd(RHS, Mask);
582 }
583
584 return CGF.Builder.CreateMul(LHS, RHS);
585}
586
587// Emit a masked pternlog intrinsic. This only exists because the header has to
588// use a macro and we aren't able to pass the input argument to a pternlog
589// builtin and a select builtin without evaluating it twice.
590static Value *EmitX86Ternlog(CodeGenFunction &CGF, bool ZeroMask,
591 ArrayRef<Value *> Ops) {
592 llvm::Type *Ty = Ops[0]->getType();
593
594 unsigned VecWidth = Ty->getPrimitiveSizeInBits();
595 unsigned EltWidth = Ty->getScalarSizeInBits();
596 Intrinsic::ID IID;
597 if (VecWidth == 128 && EltWidth == 32)
598 IID = Intrinsic::x86_avx512_pternlog_d_128;
599 else if (VecWidth == 256 && EltWidth == 32)
600 IID = Intrinsic::x86_avx512_pternlog_d_256;
601 else if (VecWidth == 512 && EltWidth == 32)
602 IID = Intrinsic::x86_avx512_pternlog_d_512;
603 else if (VecWidth == 128 && EltWidth == 64)
604 IID = Intrinsic::x86_avx512_pternlog_q_128;
605 else if (VecWidth == 256 && EltWidth == 64)
606 IID = Intrinsic::x86_avx512_pternlog_q_256;
607 else if (VecWidth == 512 && EltWidth == 64)
608 IID = Intrinsic::x86_avx512_pternlog_q_512;
609 else
610 llvm_unreachable("Unexpected intrinsic");
611
612 Value *Ternlog = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IID),
613 Ops.drop_back());
614 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty) : Ops[0];
615 return EmitX86Select(CGF, Ops[4], Ternlog, PassThru);
616}
617
619 llvm::Type *DstTy) {
620 unsigned NumberOfElements =
621 cast<llvm::FixedVectorType>(DstTy)->getNumElements();
622 Value *Mask = getMaskVecValue(CGF, Op, NumberOfElements);
623 return CGF.Builder.CreateSExt(Mask, DstTy, "vpmovm2");
624}
625
626Value *CodeGenFunction::EmitX86CpuIs(const CallExpr *E) {
627 const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts();
628 StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();
629 return EmitX86CpuIs(CPUStr);
630}
631
632// Convert F16 halfs to floats.
635 llvm::Type *DstTy) {
636 assert((Ops.size() == 1 || Ops.size() == 3 || Ops.size() == 4) &&
637 "Unknown cvtph2ps intrinsic");
638
639 // If the SAE intrinsic doesn't use default rounding then we can't upgrade.
640 if (Ops.size() == 4 && cast<llvm::ConstantInt>(Ops[3])->getZExtValue() != 4) {
641 Function *F =
642 CGF.CGM.getIntrinsic(Intrinsic::x86_avx512_mask_vcvtph2ps_512);
643 return CGF.Builder.CreateCall(F, {Ops[0], Ops[1], Ops[2], Ops[3]});
644 }
645
646 unsigned NumDstElts = cast<llvm::FixedVectorType>(DstTy)->getNumElements();
647 Value *Src = Ops[0];
648
649 // Extract the subvector.
650 if (NumDstElts !=
651 cast<llvm::FixedVectorType>(Src->getType())->getNumElements()) {
652 assert(NumDstElts == 4 && "Unexpected vector size");
653 Src = CGF.Builder.CreateShuffleVector(Src, {0, 1, 2, 3});
654 }
655
656 // Bitcast from vXi16 to vXf16.
657 auto *HalfTy = llvm::FixedVectorType::get(
658 llvm::Type::getHalfTy(CGF.getLLVMContext()), NumDstElts);
659 Src = CGF.Builder.CreateBitCast(Src, HalfTy);
660
661 // Perform the fp-extension.
662 Value *Res = CGF.Builder.CreateFPExt(Src, DstTy, "cvtph2ps");
663
664 if (Ops.size() >= 3)
665 Res = EmitX86Select(CGF, Ops[2], Res, Ops[1]);
666 return Res;
667}
668
669Value *CodeGenFunction::EmitX86CpuIs(StringRef CPUStr) {
670
671 llvm::Type *Int32Ty = Builder.getInt32Ty();
672
673 // Matching the struct layout from the compiler-rt/libgcc structure that is
674 // filled in:
675 // unsigned int __cpu_vendor;
676 // unsigned int __cpu_type;
677 // unsigned int __cpu_subtype;
678 // unsigned int __cpu_features[1];
679 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty,
680 llvm::ArrayType::get(Int32Ty, 1));
681
682 // Grab the global __cpu_model.
683 llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
684 cast<llvm::GlobalValue>(CpuModel)->setDSOLocal(true);
685
686 // Calculate the index needed to access the correct field based on the
687 // range. Also adjust the expected value.
688 auto [Index, Value] = StringSwitch<std::pair<unsigned, unsigned>>(CPUStr)
689#define X86_VENDOR(ENUM, STRING) \
690 .Case(STRING, {0u, static_cast<unsigned>(llvm::X86::ENUM)})
691#define X86_CPU_TYPE_ALIAS(ENUM, ALIAS) \
692 .Case(ALIAS, {1u, static_cast<unsigned>(llvm::X86::ENUM)})
693#define X86_CPU_TYPE(ENUM, STR) \
694 .Case(STR, {1u, static_cast<unsigned>(llvm::X86::ENUM)})
696 .Case(ALIAS, {2u, static_cast<unsigned>(llvm::X86::ENUM)})
697#define X86_CPU_SUBTYPE(ENUM, STR) \
698 .Case(STR, {2u, static_cast<unsigned>(llvm::X86::ENUM)})
699#include "llvm/TargetParser/X86TargetParser.def"
700 .Default({0, 0});
701 assert(Value != 0 && "Invalid CPUStr passed to CpuIs");
702
703 // Grab the appropriate field from __cpu_model.
704 llvm::Value *Idxs[] = {ConstantInt::get(Int32Ty, 0),
705 ConstantInt::get(Int32Ty, Index)};
706 llvm::Value *CpuValue = Builder.CreateInBoundsGEP(STy, CpuModel, Idxs);
707 CpuValue = Builder.CreateAlignedLoad(Int32Ty, CpuValue,
709
710 // Check the value of the field against the requested value.
711 return Builder.CreateICmpEQ(CpuValue,
712 llvm::ConstantInt::get(Int32Ty, Value));
713}
714
715Value *CodeGenFunction::EmitX86CpuSupports(const CallExpr *E) {
716 const Expr *FeatureExpr = E->getArg(0)->IgnoreParenCasts();
717 StringRef FeatureStr = cast<StringLiteral>(FeatureExpr)->getString();
718 if (!getContext().getTargetInfo().validateCpuSupports(FeatureStr))
719 return Builder.getFalse();
720 return EmitX86CpuSupports(FeatureStr);
721}
722
723Value *CodeGenFunction::EmitX86CpuSupports(ArrayRef<StringRef> FeatureStrs) {
724 return EmitX86CpuSupports(llvm::X86::getCpuSupportsMask(FeatureStrs));
725}
726
727llvm::Value *
728CodeGenFunction::EmitX86CpuSupports(std::array<uint32_t, 4> FeatureMask) {
729 Value *Result = Builder.getTrue();
730 if (FeatureMask[0] != 0) {
731 // Matching the struct layout from the compiler-rt/libgcc structure that is
732 // filled in:
733 // unsigned int __cpu_vendor;
734 // unsigned int __cpu_type;
735 // unsigned int __cpu_subtype;
736 // unsigned int __cpu_features[1];
737 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty,
738 llvm::ArrayType::get(Int32Ty, 1));
739
740 // Grab the global __cpu_model.
741 llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
742 cast<llvm::GlobalValue>(CpuModel)->setDSOLocal(true);
743
744 // Grab the first (0th) element from the field __cpu_features off of the
745 // global in the struct STy.
746 Value *Idxs[] = {Builder.getInt32(0), Builder.getInt32(3),
747 Builder.getInt32(0)};
748 Value *CpuFeatures = Builder.CreateInBoundsGEP(STy, CpuModel, Idxs);
749 Value *Features = Builder.CreateAlignedLoad(Int32Ty, CpuFeatures,
751
752 // Check the value of the bit corresponding to the feature requested.
753 Value *Mask = Builder.getInt32(FeatureMask[0]);
754 Value *Bitset = Builder.CreateAnd(Features, Mask);
755 Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
756 Result = Builder.CreateAnd(Result, Cmp);
757 }
758
759 llvm::Type *ATy = llvm::ArrayType::get(Int32Ty, 3);
760 llvm::Constant *CpuFeatures2 =
761 CGM.CreateRuntimeVariable(ATy, "__cpu_features2");
762 cast<llvm::GlobalValue>(CpuFeatures2)->setDSOLocal(true);
763 for (int i = 1; i != 4; ++i) {
764 const uint32_t M = FeatureMask[i];
765 if (!M)
766 continue;
767 Value *Idxs[] = {Builder.getInt32(0), Builder.getInt32(i - 1)};
768 Value *Features = Builder.CreateAlignedLoad(
769 Int32Ty, Builder.CreateInBoundsGEP(ATy, CpuFeatures2, Idxs),
771 // Check the value of the bit corresponding to the feature requested.
772 Value *Mask = Builder.getInt32(M);
773 Value *Bitset = Builder.CreateAnd(Features, Mask);
774 Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
775 Result = Builder.CreateAnd(Result, Cmp);
776 }
777
778 return Result;
779}
780
781Value *CodeGenFunction::EmitX86CpuInit() {
782 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy,
783 /*Variadic*/ false);
784 llvm::FunctionCallee Func =
785 CGM.CreateRuntimeFunction(FTy, "__cpu_indicator_init");
786 cast<llvm::GlobalValue>(Func.getCallee())->setDSOLocal(true);
787 cast<llvm::GlobalValue>(Func.getCallee())
788 ->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
789 return Builder.CreateCall(Func);
790}
791
792
794 const CallExpr *E) {
795 if (BuiltinID == Builtin::BI__builtin_cpu_is)
796 return EmitX86CpuIs(E);
797 if (BuiltinID == Builtin::BI__builtin_cpu_supports)
798 return EmitX86CpuSupports(E);
799 if (BuiltinID == Builtin::BI__builtin_cpu_init)
800 return EmitX86CpuInit();
801
802 // Handle MSVC intrinsics before argument evaluation to prevent double
803 // evaluation.
804 if (std::optional<MSVCIntrin> MsvcIntId = translateX86ToMsvcIntrin(BuiltinID))
805 return EmitMSVCBuiltinExpr(*MsvcIntId, E);
806
808 bool IsMaskFCmp = false;
809 bool IsConjFMA = false;
810
811 // Find out if any arguments are required to be integer constant expressions.
812 unsigned ICEArguments = 0;
814 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
815 assert(Error == ASTContext::GE_None && "Should not codegen an error");
816
817 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
818 Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
819 }
820
821 // These exist so that the builtin that takes an immediate can be bounds
822 // checked by clang to avoid passing bad immediates to the backend. Since
823 // AVX has a larger immediate than SSE we would need separate builtins to
824 // do the different bounds checking. Rather than create a clang specific
825 // SSE only builtin, this implements eight separate builtins to match gcc
826 // implementation.
827 auto getCmpIntrinsicCall = [this, &Ops](Intrinsic::ID ID, unsigned Imm) {
828 Ops.push_back(llvm::ConstantInt::get(Int8Ty, Imm));
829 llvm::Function *F = CGM.getIntrinsic(ID);
830 return Builder.CreateCall(F, Ops);
831 };
832
833 // For the vector forms of FP comparisons, translate the builtins directly to
834 // IR.
835 // TODO: The builtins could be removed if the SSE header files used vector
836 // extension comparisons directly (vector ordered/unordered may need
837 // additional support via __builtin_isnan()).
838 auto getVectorFCmpIR = [this, &Ops, E](CmpInst::Predicate Pred,
839 bool IsSignaling) {
840 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
841 Value *Cmp;
842 if (IsSignaling)
843 Cmp = Builder.CreateFCmpS(Pred, Ops[0], Ops[1]);
844 else
845 Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
846 llvm::VectorType *FPVecTy = cast<llvm::VectorType>(Ops[0]->getType());
847 llvm::VectorType *IntVecTy = llvm::VectorType::getInteger(FPVecTy);
848 Value *Sext = Builder.CreateSExt(Cmp, IntVecTy);
849 return Builder.CreateBitCast(Sext, FPVecTy);
850 };
851
852 switch (BuiltinID) {
853 default: return nullptr;
854 case X86::BI_mm_prefetch: {
855 Value *Address = Ops[0];
856 ConstantInt *C = cast<ConstantInt>(Ops[1]);
857 Value *RW = ConstantInt::get(Int32Ty, (C->getZExtValue() >> 2) & 0x1);
858 Value *Locality = ConstantInt::get(Int32Ty, C->getZExtValue() & 0x3);
859 Value *Data = ConstantInt::get(Int32Ty, 1);
860 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
861 return Builder.CreateCall(F, {Address, RW, Locality, Data});
862 }
863 case X86::BI_m_prefetch:
864 case X86::BI_m_prefetchw: {
865 Value *Address = Ops[0];
866 // The 'w' suffix implies write.
867 Value *RW =
868 ConstantInt::get(Int32Ty, BuiltinID == X86::BI_m_prefetchw ? 1 : 0);
869 Value *Locality = ConstantInt::get(Int32Ty, 0x3);
870 Value *Data = ConstantInt::get(Int32Ty, 1);
871 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
872 return Builder.CreateCall(F, {Address, RW, Locality, Data});
873 }
874 case X86::BI_mm_clflush: {
875 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_clflush),
876 Ops[0]);
877 }
878 case X86::BI_mm_lfence: {
879 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_lfence));
880 }
881 case X86::BI_mm_mfence: {
882 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_mfence));
883 }
884 case X86::BI_mm_sfence: {
885 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_sfence));
886 }
887 case X86::BI_mm_pause: {
888 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_pause));
889 }
890 case X86::BI__rdtsc: {
891 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtsc));
892 }
893 case X86::BI__builtin_ia32_rdtscp: {
894 Value *Call = Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtscp));
895 Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 1),
896 Ops[0]);
897 return Builder.CreateExtractValue(Call, 0);
898 }
899 case X86::BI__builtin_ia32_roundps:
900 case X86::BI__builtin_ia32_roundpd:
901 case X86::BI__builtin_ia32_roundps256:
902 case X86::BI__builtin_ia32_roundpd256: {
903 unsigned M = cast<ConstantInt>(Ops[1])->getZExtValue();
904 unsigned MXCSRMask = 0b100;
905 unsigned FRoundNoExcMask = 0b1000;
906 unsigned UseMXCSR = MXCSRMask & M;
907 unsigned FRoundNoExc = FRoundNoExcMask & M;
908
909 if (UseMXCSR || !FRoundNoExc) {
910
911 Intrinsic::ID ID = Intrinsic::not_intrinsic;
912
913 switch (BuiltinID) {
914 case X86::BI__builtin_ia32_roundps:
915 ID = Intrinsic::x86_sse41_round_ps;
916 break;
917 case X86::BI__builtin_ia32_roundps256:
918 ID = Intrinsic::x86_avx_round_ps_256;
919 break;
920 case X86::BI__builtin_ia32_roundpd:
921 ID = Intrinsic::x86_sse41_round_pd;
922 break;
923 case X86::BI__builtin_ia32_roundpd256:
924 ID = Intrinsic::x86_avx_round_pd_256;
925 break;
926 default:
927 llvm_unreachable("must return from switch");
928 }
929
930 Function *F = CGM.getIntrinsic(ID);
931 return Builder.CreateCall(F, Ops);
932 }
933
934 return emitX86RoundImmediate(*this, Ops[0], M);
935 }
936 case X86::BI__builtin_ia32_roundss:
937 case X86::BI__builtin_ia32_roundsd: {
938 unsigned M = cast<ConstantInt>(Ops[2])->getZExtValue();
939 unsigned MXCSRMask = 0b100;
940 unsigned FRoundNoExcMask = 0b1000;
941 unsigned UseMXCSR = MXCSRMask & M;
942 unsigned FRoundNoExc = FRoundNoExcMask & M;
943
944 if (UseMXCSR || !FRoundNoExc) {
945
946 Intrinsic::ID ID = Intrinsic::not_intrinsic;
947
948 switch (BuiltinID) {
949 case X86::BI__builtin_ia32_roundss:
950 ID = Intrinsic::x86_sse41_round_ss;
951 break;
952 case X86::BI__builtin_ia32_roundsd:
953 ID = Intrinsic::x86_sse41_round_sd;
954 break;
955 default:
956 llvm_unreachable("must return from switch");
957 }
958
959 Function *F = CGM.getIntrinsic(ID);
960 return Builder.CreateCall(F, Ops);
961 }
962
963 Value *Idx = Builder.getInt32(0);
964 Value *ValAt0 = Builder.CreateExtractElement(Ops[1], Idx);
965 Value *RoundedAt0 = emitX86RoundImmediate(*this, ValAt0, M);
966
967 return Builder.CreateInsertElement(Ops[0], RoundedAt0, Idx);
968 }
969 case X86::BI__builtin_ia32_lzcnt_u16:
970 case X86::BI__builtin_ia32_lzcnt_u32:
971 case X86::BI__builtin_ia32_lzcnt_u64: {
972 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
973 return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
974 }
975 case X86::BI__builtin_ia32_tzcnt_u16:
976 case X86::BI__builtin_ia32_tzcnt_u32:
977 case X86::BI__builtin_ia32_tzcnt_u64: {
978 Function *F = CGM.getIntrinsic(Intrinsic::cttz, Ops[0]->getType());
979 return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
980 }
981 case X86::BI__builtin_ia32_undef128:
982 case X86::BI__builtin_ia32_undef256:
983 case X86::BI__builtin_ia32_undef512:
984 // The x86 definition of "undef" is not the same as the LLVM definition
985 // (PR32176). We leave optimizing away an unnecessary zero constant to the
986 // IR optimizer and backend.
987 // TODO: If we had a "freeze" IR instruction to generate a fixed undef
988 // value, we should use that here instead of a zero.
989 return llvm::Constant::getNullValue(ConvertType(E->getType()));
990 case X86::BI__builtin_ia32_vec_ext_v4hi:
991 case X86::BI__builtin_ia32_vec_ext_v16qi:
992 case X86::BI__builtin_ia32_vec_ext_v8hi:
993 case X86::BI__builtin_ia32_vec_ext_v4si:
994 case X86::BI__builtin_ia32_vec_ext_v4sf:
995 case X86::BI__builtin_ia32_vec_ext_v2di:
996 case X86::BI__builtin_ia32_vec_ext_v32qi:
997 case X86::BI__builtin_ia32_vec_ext_v16hi:
998 case X86::BI__builtin_ia32_vec_ext_v8si:
999 case X86::BI__builtin_ia32_vec_ext_v4di: {
1000 unsigned NumElts =
1001 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
1002 uint64_t Index = cast<ConstantInt>(Ops[1])->getZExtValue();
1003 Index &= NumElts - 1;
1004 // These builtins exist so we can ensure the index is an ICE and in range.
1005 // Otherwise we could just do this in the header file.
1006 return Builder.CreateExtractElement(Ops[0], Index);
1007 }
1008 case X86::BI__builtin_ia32_vec_set_v4hi:
1009 case X86::BI__builtin_ia32_vec_set_v16qi:
1010 case X86::BI__builtin_ia32_vec_set_v8hi:
1011 case X86::BI__builtin_ia32_vec_set_v4si:
1012 case X86::BI__builtin_ia32_vec_set_v2di:
1013 case X86::BI__builtin_ia32_vec_set_v32qi:
1014 case X86::BI__builtin_ia32_vec_set_v16hi:
1015 case X86::BI__builtin_ia32_vec_set_v8si:
1016 case X86::BI__builtin_ia32_vec_set_v4di: {
1017 unsigned NumElts =
1018 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
1019 unsigned Index = cast<ConstantInt>(Ops[2])->getZExtValue();
1020 Index &= NumElts - 1;
1021 // These builtins exist so we can ensure the index is an ICE and in range.
1022 // Otherwise we could just do this in the header file.
1023 return Builder.CreateInsertElement(Ops[0], Ops[1], Index);
1024 }
1025 case X86::BI_mm_setcsr:
1026 case X86::BI__builtin_ia32_ldmxcsr: {
1027 RawAddress Tmp = CreateMemTemp(E->getArg(0)->getType());
1028 Builder.CreateStore(Ops[0], Tmp);
1029 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr),
1030 Tmp.getPointer());
1031 }
1032 case X86::BI_mm_getcsr:
1033 case X86::BI__builtin_ia32_stmxcsr: {
1034 RawAddress Tmp = CreateMemTemp(E->getType());
1035 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr),
1036 Tmp.getPointer());
1037 return Builder.CreateLoad(Tmp, "stmxcsr");
1038 }
1039 case X86::BI__builtin_ia32_xsave:
1040 case X86::BI__builtin_ia32_xsave64:
1041 case X86::BI__builtin_ia32_xrstor:
1042 case X86::BI__builtin_ia32_xrstor64:
1043 case X86::BI__builtin_ia32_xsaveopt:
1044 case X86::BI__builtin_ia32_xsaveopt64:
1045 case X86::BI__builtin_ia32_xrstors:
1046 case X86::BI__builtin_ia32_xrstors64:
1047 case X86::BI__builtin_ia32_xsavec:
1048 case X86::BI__builtin_ia32_xsavec64:
1049 case X86::BI__builtin_ia32_xsaves:
1050 case X86::BI__builtin_ia32_xsaves64:
1051 case X86::BI__builtin_ia32_xsetbv:
1052 case X86::BI_xsetbv: {
1053 Intrinsic::ID ID;
1054#define INTRINSIC_X86_XSAVE_ID(NAME) \
1055 case X86::BI__builtin_ia32_##NAME: \
1056 ID = Intrinsic::x86_##NAME; \
1057 break
1058 switch (BuiltinID) {
1059 default: llvm_unreachable("Unsupported intrinsic!");
1061 INTRINSIC_X86_XSAVE_ID(xsave64);
1062 INTRINSIC_X86_XSAVE_ID(xrstor);
1063 INTRINSIC_X86_XSAVE_ID(xrstor64);
1064 INTRINSIC_X86_XSAVE_ID(xsaveopt);
1065 INTRINSIC_X86_XSAVE_ID(xsaveopt64);
1066 INTRINSIC_X86_XSAVE_ID(xrstors);
1067 INTRINSIC_X86_XSAVE_ID(xrstors64);
1068 INTRINSIC_X86_XSAVE_ID(xsavec);
1069 INTRINSIC_X86_XSAVE_ID(xsavec64);
1070 INTRINSIC_X86_XSAVE_ID(xsaves);
1071 INTRINSIC_X86_XSAVE_ID(xsaves64);
1072 INTRINSIC_X86_XSAVE_ID(xsetbv);
1073 case X86::BI_xsetbv:
1074 ID = Intrinsic::x86_xsetbv;
1075 break;
1076 }
1077#undef INTRINSIC_X86_XSAVE_ID
1078 Value *Mhi = Builder.CreateTrunc(
1079 Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, 32)), Int32Ty);
1080 Value *Mlo = Builder.CreateTrunc(Ops[1], Int32Ty);
1081 Ops[1] = Mhi;
1082 Ops.push_back(Mlo);
1083 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
1084 }
1085 case X86::BI__builtin_ia32_xgetbv:
1086 case X86::BI_xgetbv:
1087 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_xgetbv), Ops);
1088 case X86::BI__builtin_ia32_storedqudi128_mask:
1089 case X86::BI__builtin_ia32_storedqusi128_mask:
1090 case X86::BI__builtin_ia32_storedquhi128_mask:
1091 case X86::BI__builtin_ia32_storedquqi128_mask:
1092 case X86::BI__builtin_ia32_storeupd128_mask:
1093 case X86::BI__builtin_ia32_storeups128_mask:
1094 case X86::BI__builtin_ia32_storedqudi256_mask:
1095 case X86::BI__builtin_ia32_storedqusi256_mask:
1096 case X86::BI__builtin_ia32_storedquhi256_mask:
1097 case X86::BI__builtin_ia32_storedquqi256_mask:
1098 case X86::BI__builtin_ia32_storeupd256_mask:
1099 case X86::BI__builtin_ia32_storeups256_mask:
1100 case X86::BI__builtin_ia32_storedqudi512_mask:
1101 case X86::BI__builtin_ia32_storedqusi512_mask:
1102 case X86::BI__builtin_ia32_storedquhi512_mask:
1103 case X86::BI__builtin_ia32_storedquqi512_mask:
1104 case X86::BI__builtin_ia32_storeupd512_mask:
1105 case X86::BI__builtin_ia32_storeups512_mask:
1106 return EmitX86MaskedStore(*this, Ops, Align(1));
1107
1108 case X86::BI__builtin_ia32_storesbf16128_mask:
1109 case X86::BI__builtin_ia32_storesh128_mask:
1110 case X86::BI__builtin_ia32_storess128_mask:
1111 case X86::BI__builtin_ia32_storesd128_mask:
1112 return EmitX86MaskedStore(*this, Ops, Align(1));
1113
1114 case X86::BI__builtin_ia32_cvtmask2b128:
1115 case X86::BI__builtin_ia32_cvtmask2b256:
1116 case X86::BI__builtin_ia32_cvtmask2b512:
1117 case X86::BI__builtin_ia32_cvtmask2w128:
1118 case X86::BI__builtin_ia32_cvtmask2w256:
1119 case X86::BI__builtin_ia32_cvtmask2w512:
1120 case X86::BI__builtin_ia32_cvtmask2d128:
1121 case X86::BI__builtin_ia32_cvtmask2d256:
1122 case X86::BI__builtin_ia32_cvtmask2d512:
1123 case X86::BI__builtin_ia32_cvtmask2q128:
1124 case X86::BI__builtin_ia32_cvtmask2q256:
1125 case X86::BI__builtin_ia32_cvtmask2q512:
1126 return EmitX86SExtMask(*this, Ops[0], ConvertType(E->getType()));
1127
1128 case X86::BI__builtin_ia32_cvtb2mask128:
1129 case X86::BI__builtin_ia32_cvtb2mask256:
1130 case X86::BI__builtin_ia32_cvtb2mask512:
1131 case X86::BI__builtin_ia32_cvtw2mask128:
1132 case X86::BI__builtin_ia32_cvtw2mask256:
1133 case X86::BI__builtin_ia32_cvtw2mask512:
1134 case X86::BI__builtin_ia32_cvtd2mask128:
1135 case X86::BI__builtin_ia32_cvtd2mask256:
1136 case X86::BI__builtin_ia32_cvtd2mask512:
1137 case X86::BI__builtin_ia32_cvtq2mask128:
1138 case X86::BI__builtin_ia32_cvtq2mask256:
1139 case X86::BI__builtin_ia32_cvtq2mask512:
1140 return EmitX86ConvertToMask(*this, Ops[0]);
1141
1142 case X86::BI__builtin_ia32_cvtdq2ps512_mask:
1143 case X86::BI__builtin_ia32_cvtqq2ps512_mask:
1144 case X86::BI__builtin_ia32_cvtqq2pd512_mask:
1145 case X86::BI__builtin_ia32_vcvtw2ph512_mask:
1146 case X86::BI__builtin_ia32_vcvtdq2ph512_mask:
1147 case X86::BI__builtin_ia32_vcvtqq2ph512_mask:
1148 return EmitX86ConvertIntToFp(*this, E, Ops, /*IsSigned*/ true);
1149 case X86::BI__builtin_ia32_cvtudq2ps512_mask:
1150 case X86::BI__builtin_ia32_cvtuqq2ps512_mask:
1151 case X86::BI__builtin_ia32_cvtuqq2pd512_mask:
1152 case X86::BI__builtin_ia32_vcvtuw2ph512_mask:
1153 case X86::BI__builtin_ia32_vcvtudq2ph512_mask:
1154 case X86::BI__builtin_ia32_vcvtuqq2ph512_mask:
1155 return EmitX86ConvertIntToFp(*this, E, Ops, /*IsSigned*/ false);
1156
1157 case X86::BI__builtin_ia32_vfmaddsh3_mask:
1158 case X86::BI__builtin_ia32_vfmaddss3_mask:
1159 case X86::BI__builtin_ia32_vfmaddsd3_mask:
1160 return EmitScalarFMAExpr(*this, E, Ops, Ops[0]);
1161 case X86::BI__builtin_ia32_vfmaddsh3_maskz:
1162 case X86::BI__builtin_ia32_vfmaddss3_maskz:
1163 case X86::BI__builtin_ia32_vfmaddsd3_maskz:
1164 return EmitScalarFMAExpr(*this, E, Ops, Ops[0], /*ZeroMask*/ true);
1165 case X86::BI__builtin_ia32_vfmaddsh3_mask3:
1166 case X86::BI__builtin_ia32_vfmaddss3_mask3:
1167 case X86::BI__builtin_ia32_vfmaddsd3_mask3:
1168 return EmitScalarFMAExpr(*this, E, Ops, Ops[2], /*ZeroMask*/ false, 2);
1169 case X86::BI__builtin_ia32_vfmsubsh3_mask3:
1170 case X86::BI__builtin_ia32_vfmsubss3_mask3:
1171 case X86::BI__builtin_ia32_vfmsubsd3_mask3:
1172 return EmitScalarFMAExpr(*this, E, Ops, Ops[2], /*ZeroMask*/ false, 2,
1173 /*NegAcc*/ true);
1174 case X86::BI__builtin_ia32_vfmaddph512_mask:
1175 case X86::BI__builtin_ia32_vfmaddph512_maskz:
1176 case X86::BI__builtin_ia32_vfmaddph512_mask3:
1177 case X86::BI__builtin_ia32_vfmaddps512_mask:
1178 case X86::BI__builtin_ia32_vfmaddps512_maskz:
1179 case X86::BI__builtin_ia32_vfmaddps512_mask3:
1180 case X86::BI__builtin_ia32_vfmsubps512_mask3:
1181 case X86::BI__builtin_ia32_vfmaddpd512_mask:
1182 case X86::BI__builtin_ia32_vfmaddpd512_maskz:
1183 case X86::BI__builtin_ia32_vfmaddpd512_mask3:
1184 case X86::BI__builtin_ia32_vfmsubpd512_mask3:
1185 case X86::BI__builtin_ia32_vfmsubph512_mask3:
1186 return EmitX86FMAExpr(*this, E, Ops, BuiltinID, /*IsAddSub*/ false);
1187 case X86::BI__builtin_ia32_vfmaddsubph512_mask:
1188 case X86::BI__builtin_ia32_vfmaddsubph512_maskz:
1189 case X86::BI__builtin_ia32_vfmaddsubph512_mask3:
1190 case X86::BI__builtin_ia32_vfmsubaddph512_mask3:
1191 case X86::BI__builtin_ia32_vfmaddsubps512_mask:
1192 case X86::BI__builtin_ia32_vfmaddsubps512_maskz:
1193 case X86::BI__builtin_ia32_vfmaddsubps512_mask3:
1194 case X86::BI__builtin_ia32_vfmsubaddps512_mask3:
1195 case X86::BI__builtin_ia32_vfmaddsubpd512_mask:
1196 case X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
1197 case X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
1198 case X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
1199 return EmitX86FMAExpr(*this, E, Ops, BuiltinID, /*IsAddSub*/ true);
1200
1201 case X86::BI__builtin_ia32_movdqa32store128_mask:
1202 case X86::BI__builtin_ia32_movdqa64store128_mask:
1203 case X86::BI__builtin_ia32_storeaps128_mask:
1204 case X86::BI__builtin_ia32_storeapd128_mask:
1205 case X86::BI__builtin_ia32_movdqa32store256_mask:
1206 case X86::BI__builtin_ia32_movdqa64store256_mask:
1207 case X86::BI__builtin_ia32_storeaps256_mask:
1208 case X86::BI__builtin_ia32_storeapd256_mask:
1209 case X86::BI__builtin_ia32_movdqa32store512_mask:
1210 case X86::BI__builtin_ia32_movdqa64store512_mask:
1211 case X86::BI__builtin_ia32_storeaps512_mask:
1212 case X86::BI__builtin_ia32_storeapd512_mask:
1213 return EmitX86MaskedStore(
1214 *this, Ops,
1215 getContext().getTypeAlignInChars(E->getArg(1)->getType()).getAsAlign());
1216
1217 case X86::BI__builtin_ia32_loadups128_mask:
1218 case X86::BI__builtin_ia32_loadups256_mask:
1219 case X86::BI__builtin_ia32_loadups512_mask:
1220 case X86::BI__builtin_ia32_loadupd128_mask:
1221 case X86::BI__builtin_ia32_loadupd256_mask:
1222 case X86::BI__builtin_ia32_loadupd512_mask:
1223 case X86::BI__builtin_ia32_loaddquqi128_mask:
1224 case X86::BI__builtin_ia32_loaddquqi256_mask:
1225 case X86::BI__builtin_ia32_loaddquqi512_mask:
1226 case X86::BI__builtin_ia32_loaddquhi128_mask:
1227 case X86::BI__builtin_ia32_loaddquhi256_mask:
1228 case X86::BI__builtin_ia32_loaddquhi512_mask:
1229 case X86::BI__builtin_ia32_loaddqusi128_mask:
1230 case X86::BI__builtin_ia32_loaddqusi256_mask:
1231 case X86::BI__builtin_ia32_loaddqusi512_mask:
1232 case X86::BI__builtin_ia32_loaddqudi128_mask:
1233 case X86::BI__builtin_ia32_loaddqudi256_mask:
1234 case X86::BI__builtin_ia32_loaddqudi512_mask:
1235 return EmitX86MaskedLoad(*this, Ops, Align(1));
1236
1237 case X86::BI__builtin_ia32_loadsbf16128_mask:
1238 case X86::BI__builtin_ia32_loadsh128_mask:
1239 case X86::BI__builtin_ia32_loadss128_mask:
1240 case X86::BI__builtin_ia32_loadsd128_mask:
1241 return EmitX86MaskedLoad(*this, Ops, Align(1));
1242
1243 case X86::BI__builtin_ia32_loadaps128_mask:
1244 case X86::BI__builtin_ia32_loadaps256_mask:
1245 case X86::BI__builtin_ia32_loadaps512_mask:
1246 case X86::BI__builtin_ia32_loadapd128_mask:
1247 case X86::BI__builtin_ia32_loadapd256_mask:
1248 case X86::BI__builtin_ia32_loadapd512_mask:
1249 case X86::BI__builtin_ia32_movdqa32load128_mask:
1250 case X86::BI__builtin_ia32_movdqa32load256_mask:
1251 case X86::BI__builtin_ia32_movdqa32load512_mask:
1252 case X86::BI__builtin_ia32_movdqa64load128_mask:
1253 case X86::BI__builtin_ia32_movdqa64load256_mask:
1254 case X86::BI__builtin_ia32_movdqa64load512_mask:
1255 return EmitX86MaskedLoad(
1256 *this, Ops,
1257 getContext().getTypeAlignInChars(E->getArg(1)->getType()).getAsAlign());
1258
1259 case X86::BI__builtin_ia32_expandloaddf128_mask:
1260 case X86::BI__builtin_ia32_expandloaddf256_mask:
1261 case X86::BI__builtin_ia32_expandloaddf512_mask:
1262 case X86::BI__builtin_ia32_expandloadsf128_mask:
1263 case X86::BI__builtin_ia32_expandloadsf256_mask:
1264 case X86::BI__builtin_ia32_expandloadsf512_mask:
1265 case X86::BI__builtin_ia32_expandloaddi128_mask:
1266 case X86::BI__builtin_ia32_expandloaddi256_mask:
1267 case X86::BI__builtin_ia32_expandloaddi512_mask:
1268 case X86::BI__builtin_ia32_expandloadsi128_mask:
1269 case X86::BI__builtin_ia32_expandloadsi256_mask:
1270 case X86::BI__builtin_ia32_expandloadsi512_mask:
1271 case X86::BI__builtin_ia32_expandloadhi128_mask:
1272 case X86::BI__builtin_ia32_expandloadhi256_mask:
1273 case X86::BI__builtin_ia32_expandloadhi512_mask:
1274 case X86::BI__builtin_ia32_expandloadqi128_mask:
1275 case X86::BI__builtin_ia32_expandloadqi256_mask:
1276 case X86::BI__builtin_ia32_expandloadqi512_mask:
1277 return EmitX86ExpandLoad(*this, Ops);
1278
1279 case X86::BI__builtin_ia32_compressstoredf128_mask:
1280 case X86::BI__builtin_ia32_compressstoredf256_mask:
1281 case X86::BI__builtin_ia32_compressstoredf512_mask:
1282 case X86::BI__builtin_ia32_compressstoresf128_mask:
1283 case X86::BI__builtin_ia32_compressstoresf256_mask:
1284 case X86::BI__builtin_ia32_compressstoresf512_mask:
1285 case X86::BI__builtin_ia32_compressstoredi128_mask:
1286 case X86::BI__builtin_ia32_compressstoredi256_mask:
1287 case X86::BI__builtin_ia32_compressstoredi512_mask:
1288 case X86::BI__builtin_ia32_compressstoresi128_mask:
1289 case X86::BI__builtin_ia32_compressstoresi256_mask:
1290 case X86::BI__builtin_ia32_compressstoresi512_mask:
1291 case X86::BI__builtin_ia32_compressstorehi128_mask:
1292 case X86::BI__builtin_ia32_compressstorehi256_mask:
1293 case X86::BI__builtin_ia32_compressstorehi512_mask:
1294 case X86::BI__builtin_ia32_compressstoreqi128_mask:
1295 case X86::BI__builtin_ia32_compressstoreqi256_mask:
1296 case X86::BI__builtin_ia32_compressstoreqi512_mask:
1297 return EmitX86CompressStore(*this, Ops);
1298
1299 case X86::BI__builtin_ia32_expanddf128_mask:
1300 case X86::BI__builtin_ia32_expanddf256_mask:
1301 case X86::BI__builtin_ia32_expanddf512_mask:
1302 case X86::BI__builtin_ia32_expandsf128_mask:
1303 case X86::BI__builtin_ia32_expandsf256_mask:
1304 case X86::BI__builtin_ia32_expandsf512_mask:
1305 case X86::BI__builtin_ia32_expanddi128_mask:
1306 case X86::BI__builtin_ia32_expanddi256_mask:
1307 case X86::BI__builtin_ia32_expanddi512_mask:
1308 case X86::BI__builtin_ia32_expandsi128_mask:
1309 case X86::BI__builtin_ia32_expandsi256_mask:
1310 case X86::BI__builtin_ia32_expandsi512_mask:
1311 case X86::BI__builtin_ia32_expandhi128_mask:
1312 case X86::BI__builtin_ia32_expandhi256_mask:
1313 case X86::BI__builtin_ia32_expandhi512_mask:
1314 case X86::BI__builtin_ia32_expandqi128_mask:
1315 case X86::BI__builtin_ia32_expandqi256_mask:
1316 case X86::BI__builtin_ia32_expandqi512_mask:
1317 return EmitX86CompressExpand(*this, Ops, /*IsCompress*/false);
1318
1319 case X86::BI__builtin_ia32_compressdf128_mask:
1320 case X86::BI__builtin_ia32_compressdf256_mask:
1321 case X86::BI__builtin_ia32_compressdf512_mask:
1322 case X86::BI__builtin_ia32_compresssf128_mask:
1323 case X86::BI__builtin_ia32_compresssf256_mask:
1324 case X86::BI__builtin_ia32_compresssf512_mask:
1325 case X86::BI__builtin_ia32_compressdi128_mask:
1326 case X86::BI__builtin_ia32_compressdi256_mask:
1327 case X86::BI__builtin_ia32_compressdi512_mask:
1328 case X86::BI__builtin_ia32_compresssi128_mask:
1329 case X86::BI__builtin_ia32_compresssi256_mask:
1330 case X86::BI__builtin_ia32_compresssi512_mask:
1331 case X86::BI__builtin_ia32_compresshi128_mask:
1332 case X86::BI__builtin_ia32_compresshi256_mask:
1333 case X86::BI__builtin_ia32_compresshi512_mask:
1334 case X86::BI__builtin_ia32_compressqi128_mask:
1335 case X86::BI__builtin_ia32_compressqi256_mask:
1336 case X86::BI__builtin_ia32_compressqi512_mask:
1337 return EmitX86CompressExpand(*this, Ops, /*IsCompress*/true);
1338
1339 case X86::BI__builtin_ia32_gather3div2df:
1340 case X86::BI__builtin_ia32_gather3div2di:
1341 case X86::BI__builtin_ia32_gather3div4df:
1342 case X86::BI__builtin_ia32_gather3div4di:
1343 case X86::BI__builtin_ia32_gather3div4sf:
1344 case X86::BI__builtin_ia32_gather3div4si:
1345 case X86::BI__builtin_ia32_gather3div8sf:
1346 case X86::BI__builtin_ia32_gather3div8si:
1347 case X86::BI__builtin_ia32_gather3siv2df:
1348 case X86::BI__builtin_ia32_gather3siv2di:
1349 case X86::BI__builtin_ia32_gather3siv4df:
1350 case X86::BI__builtin_ia32_gather3siv4di:
1351 case X86::BI__builtin_ia32_gather3siv4sf:
1352 case X86::BI__builtin_ia32_gather3siv4si:
1353 case X86::BI__builtin_ia32_gather3siv8sf:
1354 case X86::BI__builtin_ia32_gather3siv8si:
1355 case X86::BI__builtin_ia32_gathersiv8df:
1356 case X86::BI__builtin_ia32_gathersiv16sf:
1357 case X86::BI__builtin_ia32_gatherdiv8df:
1358 case X86::BI__builtin_ia32_gatherdiv16sf:
1359 case X86::BI__builtin_ia32_gathersiv8di:
1360 case X86::BI__builtin_ia32_gathersiv16si:
1361 case X86::BI__builtin_ia32_gatherdiv8di:
1362 case X86::BI__builtin_ia32_gatherdiv16si: {
1363 Intrinsic::ID IID;
1364 switch (BuiltinID) {
1365 default: llvm_unreachable("Unexpected builtin");
1366 case X86::BI__builtin_ia32_gather3div2df:
1367 IID = Intrinsic::x86_avx512_mask_gather3div2_df;
1368 break;
1369 case X86::BI__builtin_ia32_gather3div2di:
1370 IID = Intrinsic::x86_avx512_mask_gather3div2_di;
1371 break;
1372 case X86::BI__builtin_ia32_gather3div4df:
1373 IID = Intrinsic::x86_avx512_mask_gather3div4_df;
1374 break;
1375 case X86::BI__builtin_ia32_gather3div4di:
1376 IID = Intrinsic::x86_avx512_mask_gather3div4_di;
1377 break;
1378 case X86::BI__builtin_ia32_gather3div4sf:
1379 IID = Intrinsic::x86_avx512_mask_gather3div4_sf;
1380 break;
1381 case X86::BI__builtin_ia32_gather3div4si:
1382 IID = Intrinsic::x86_avx512_mask_gather3div4_si;
1383 break;
1384 case X86::BI__builtin_ia32_gather3div8sf:
1385 IID = Intrinsic::x86_avx512_mask_gather3div8_sf;
1386 break;
1387 case X86::BI__builtin_ia32_gather3div8si:
1388 IID = Intrinsic::x86_avx512_mask_gather3div8_si;
1389 break;
1390 case X86::BI__builtin_ia32_gather3siv2df:
1391 IID = Intrinsic::x86_avx512_mask_gather3siv2_df;
1392 break;
1393 case X86::BI__builtin_ia32_gather3siv2di:
1394 IID = Intrinsic::x86_avx512_mask_gather3siv2_di;
1395 break;
1396 case X86::BI__builtin_ia32_gather3siv4df:
1397 IID = Intrinsic::x86_avx512_mask_gather3siv4_df;
1398 break;
1399 case X86::BI__builtin_ia32_gather3siv4di:
1400 IID = Intrinsic::x86_avx512_mask_gather3siv4_di;
1401 break;
1402 case X86::BI__builtin_ia32_gather3siv4sf:
1403 IID = Intrinsic::x86_avx512_mask_gather3siv4_sf;
1404 break;
1405 case X86::BI__builtin_ia32_gather3siv4si:
1406 IID = Intrinsic::x86_avx512_mask_gather3siv4_si;
1407 break;
1408 case X86::BI__builtin_ia32_gather3siv8sf:
1409 IID = Intrinsic::x86_avx512_mask_gather3siv8_sf;
1410 break;
1411 case X86::BI__builtin_ia32_gather3siv8si:
1412 IID = Intrinsic::x86_avx512_mask_gather3siv8_si;
1413 break;
1414 case X86::BI__builtin_ia32_gathersiv8df:
1415 IID = Intrinsic::x86_avx512_mask_gather_dpd_512;
1416 break;
1417 case X86::BI__builtin_ia32_gathersiv16sf:
1418 IID = Intrinsic::x86_avx512_mask_gather_dps_512;
1419 break;
1420 case X86::BI__builtin_ia32_gatherdiv8df:
1421 IID = Intrinsic::x86_avx512_mask_gather_qpd_512;
1422 break;
1423 case X86::BI__builtin_ia32_gatherdiv16sf:
1424 IID = Intrinsic::x86_avx512_mask_gather_qps_512;
1425 break;
1426 case X86::BI__builtin_ia32_gathersiv8di:
1427 IID = Intrinsic::x86_avx512_mask_gather_dpq_512;
1428 break;
1429 case X86::BI__builtin_ia32_gathersiv16si:
1430 IID = Intrinsic::x86_avx512_mask_gather_dpi_512;
1431 break;
1432 case X86::BI__builtin_ia32_gatherdiv8di:
1433 IID = Intrinsic::x86_avx512_mask_gather_qpq_512;
1434 break;
1435 case X86::BI__builtin_ia32_gatherdiv16si:
1436 IID = Intrinsic::x86_avx512_mask_gather_qpi_512;
1437 break;
1438 }
1439
1440 unsigned MinElts = std::min(
1441 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements(),
1442 cast<llvm::FixedVectorType>(Ops[2]->getType())->getNumElements());
1443 Ops[3] = getMaskVecValue(*this, Ops[3], MinElts);
1444 Function *Intr = CGM.getIntrinsic(IID);
1445 return Builder.CreateCall(Intr, Ops);
1446 }
1447
1448 case X86::BI__builtin_ia32_scattersiv8df:
1449 case X86::BI__builtin_ia32_scattersiv16sf:
1450 case X86::BI__builtin_ia32_scatterdiv8df:
1451 case X86::BI__builtin_ia32_scatterdiv16sf:
1452 case X86::BI__builtin_ia32_scattersiv8di:
1453 case X86::BI__builtin_ia32_scattersiv16si:
1454 case X86::BI__builtin_ia32_scatterdiv8di:
1455 case X86::BI__builtin_ia32_scatterdiv16si:
1456 case X86::BI__builtin_ia32_scatterdiv2df:
1457 case X86::BI__builtin_ia32_scatterdiv2di:
1458 case X86::BI__builtin_ia32_scatterdiv4df:
1459 case X86::BI__builtin_ia32_scatterdiv4di:
1460 case X86::BI__builtin_ia32_scatterdiv4sf:
1461 case X86::BI__builtin_ia32_scatterdiv4si:
1462 case X86::BI__builtin_ia32_scatterdiv8sf:
1463 case X86::BI__builtin_ia32_scatterdiv8si:
1464 case X86::BI__builtin_ia32_scattersiv2df:
1465 case X86::BI__builtin_ia32_scattersiv2di:
1466 case X86::BI__builtin_ia32_scattersiv4df:
1467 case X86::BI__builtin_ia32_scattersiv4di:
1468 case X86::BI__builtin_ia32_scattersiv4sf:
1469 case X86::BI__builtin_ia32_scattersiv4si:
1470 case X86::BI__builtin_ia32_scattersiv8sf:
1471 case X86::BI__builtin_ia32_scattersiv8si: {
1472 Intrinsic::ID IID;
1473 switch (BuiltinID) {
1474 default: llvm_unreachable("Unexpected builtin");
1475 case X86::BI__builtin_ia32_scattersiv8df:
1476 IID = Intrinsic::x86_avx512_mask_scatter_dpd_512;
1477 break;
1478 case X86::BI__builtin_ia32_scattersiv16sf:
1479 IID = Intrinsic::x86_avx512_mask_scatter_dps_512;
1480 break;
1481 case X86::BI__builtin_ia32_scatterdiv8df:
1482 IID = Intrinsic::x86_avx512_mask_scatter_qpd_512;
1483 break;
1484 case X86::BI__builtin_ia32_scatterdiv16sf:
1485 IID = Intrinsic::x86_avx512_mask_scatter_qps_512;
1486 break;
1487 case X86::BI__builtin_ia32_scattersiv8di:
1488 IID = Intrinsic::x86_avx512_mask_scatter_dpq_512;
1489 break;
1490 case X86::BI__builtin_ia32_scattersiv16si:
1491 IID = Intrinsic::x86_avx512_mask_scatter_dpi_512;
1492 break;
1493 case X86::BI__builtin_ia32_scatterdiv8di:
1494 IID = Intrinsic::x86_avx512_mask_scatter_qpq_512;
1495 break;
1496 case X86::BI__builtin_ia32_scatterdiv16si:
1497 IID = Intrinsic::x86_avx512_mask_scatter_qpi_512;
1498 break;
1499 case X86::BI__builtin_ia32_scatterdiv2df:
1500 IID = Intrinsic::x86_avx512_mask_scatterdiv2_df;
1501 break;
1502 case X86::BI__builtin_ia32_scatterdiv2di:
1503 IID = Intrinsic::x86_avx512_mask_scatterdiv2_di;
1504 break;
1505 case X86::BI__builtin_ia32_scatterdiv4df:
1506 IID = Intrinsic::x86_avx512_mask_scatterdiv4_df;
1507 break;
1508 case X86::BI__builtin_ia32_scatterdiv4di:
1509 IID = Intrinsic::x86_avx512_mask_scatterdiv4_di;
1510 break;
1511 case X86::BI__builtin_ia32_scatterdiv4sf:
1512 IID = Intrinsic::x86_avx512_mask_scatterdiv4_sf;
1513 break;
1514 case X86::BI__builtin_ia32_scatterdiv4si:
1515 IID = Intrinsic::x86_avx512_mask_scatterdiv4_si;
1516 break;
1517 case X86::BI__builtin_ia32_scatterdiv8sf:
1518 IID = Intrinsic::x86_avx512_mask_scatterdiv8_sf;
1519 break;
1520 case X86::BI__builtin_ia32_scatterdiv8si:
1521 IID = Intrinsic::x86_avx512_mask_scatterdiv8_si;
1522 break;
1523 case X86::BI__builtin_ia32_scattersiv2df:
1524 IID = Intrinsic::x86_avx512_mask_scattersiv2_df;
1525 break;
1526 case X86::BI__builtin_ia32_scattersiv2di:
1527 IID = Intrinsic::x86_avx512_mask_scattersiv2_di;
1528 break;
1529 case X86::BI__builtin_ia32_scattersiv4df:
1530 IID = Intrinsic::x86_avx512_mask_scattersiv4_df;
1531 break;
1532 case X86::BI__builtin_ia32_scattersiv4di:
1533 IID = Intrinsic::x86_avx512_mask_scattersiv4_di;
1534 break;
1535 case X86::BI__builtin_ia32_scattersiv4sf:
1536 IID = Intrinsic::x86_avx512_mask_scattersiv4_sf;
1537 break;
1538 case X86::BI__builtin_ia32_scattersiv4si:
1539 IID = Intrinsic::x86_avx512_mask_scattersiv4_si;
1540 break;
1541 case X86::BI__builtin_ia32_scattersiv8sf:
1542 IID = Intrinsic::x86_avx512_mask_scattersiv8_sf;
1543 break;
1544 case X86::BI__builtin_ia32_scattersiv8si:
1545 IID = Intrinsic::x86_avx512_mask_scattersiv8_si;
1546 break;
1547 }
1548
1549 unsigned MinElts = std::min(
1550 cast<llvm::FixedVectorType>(Ops[2]->getType())->getNumElements(),
1551 cast<llvm::FixedVectorType>(Ops[3]->getType())->getNumElements());
1552 Ops[1] = getMaskVecValue(*this, Ops[1], MinElts);
1553 Function *Intr = CGM.getIntrinsic(IID);
1554 return Builder.CreateCall(Intr, Ops);
1555 }
1556
1557 case X86::BI__builtin_ia32_vextractf128_pd256:
1558 case X86::BI__builtin_ia32_vextractf128_ps256:
1559 case X86::BI__builtin_ia32_vextractf128_si256:
1560 case X86::BI__builtin_ia32_extract128i256:
1561 case X86::BI__builtin_ia32_extractf64x4_mask:
1562 case X86::BI__builtin_ia32_extractf32x4_mask:
1563 case X86::BI__builtin_ia32_extracti64x4_mask:
1564 case X86::BI__builtin_ia32_extracti32x4_mask:
1565 case X86::BI__builtin_ia32_extractf32x8_mask:
1566 case X86::BI__builtin_ia32_extracti32x8_mask:
1567 case X86::BI__builtin_ia32_extractf32x4_256_mask:
1568 case X86::BI__builtin_ia32_extracti32x4_256_mask:
1569 case X86::BI__builtin_ia32_extractf64x2_256_mask:
1570 case X86::BI__builtin_ia32_extracti64x2_256_mask:
1571 case X86::BI__builtin_ia32_extractf64x2_512_mask:
1572 case X86::BI__builtin_ia32_extracti64x2_512_mask: {
1573 auto *DstTy = cast<llvm::FixedVectorType>(ConvertType(E->getType()));
1574 unsigned NumElts = DstTy->getNumElements();
1575 unsigned SrcNumElts =
1576 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
1577 unsigned SubVectors = SrcNumElts / NumElts;
1578 unsigned Index = cast<ConstantInt>(Ops[1])->getZExtValue();
1579 assert(llvm::isPowerOf2_32(SubVectors) && "Expected power of 2 subvectors");
1580 Index &= SubVectors - 1; // Remove any extra bits.
1581 Index *= NumElts;
1582
1583 int Indices[16];
1584 for (unsigned i = 0; i != NumElts; ++i)
1585 Indices[i] = i + Index;
1586
1587 Value *Res = Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
1588 "extract");
1589
1590 if (Ops.size() == 4)
1591 Res = EmitX86Select(*this, Ops[3], Res, Ops[2]);
1592
1593 return Res;
1594 }
1595 case X86::BI__builtin_ia32_vinsertf128_pd256:
1596 case X86::BI__builtin_ia32_vinsertf128_ps256:
1597 case X86::BI__builtin_ia32_vinsertf128_si256:
1598 case X86::BI__builtin_ia32_insert128i256:
1599 case X86::BI__builtin_ia32_insertf64x4:
1600 case X86::BI__builtin_ia32_insertf32x4:
1601 case X86::BI__builtin_ia32_inserti64x4:
1602 case X86::BI__builtin_ia32_inserti32x4:
1603 case X86::BI__builtin_ia32_insertf32x8:
1604 case X86::BI__builtin_ia32_inserti32x8:
1605 case X86::BI__builtin_ia32_insertf32x4_256:
1606 case X86::BI__builtin_ia32_inserti32x4_256:
1607 case X86::BI__builtin_ia32_insertf64x2_256:
1608 case X86::BI__builtin_ia32_inserti64x2_256:
1609 case X86::BI__builtin_ia32_insertf64x2_512:
1610 case X86::BI__builtin_ia32_inserti64x2_512: {
1611 unsigned DstNumElts =
1612 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
1613 unsigned SrcNumElts =
1614 cast<llvm::FixedVectorType>(Ops[1]->getType())->getNumElements();
1615 unsigned SubVectors = DstNumElts / SrcNumElts;
1616 unsigned Index = cast<ConstantInt>(Ops[2])->getZExtValue();
1617 assert(llvm::isPowerOf2_32(SubVectors) && "Expected power of 2 subvectors");
1618 Index &= SubVectors - 1; // Remove any extra bits.
1619 Index *= SrcNumElts;
1620
1621 int Indices[16];
1622 for (unsigned i = 0; i != DstNumElts; ++i)
1623 Indices[i] = (i >= SrcNumElts) ? SrcNumElts + (i % SrcNumElts) : i;
1624
1625 Value *Op1 = Builder.CreateShuffleVector(
1626 Ops[1], ArrayRef(Indices, DstNumElts), "widen");
1627
1628 for (unsigned i = 0; i != DstNumElts; ++i) {
1629 if (i >= Index && i < (Index + SrcNumElts))
1630 Indices[i] = (i - Index) + DstNumElts;
1631 else
1632 Indices[i] = i;
1633 }
1634
1635 return Builder.CreateShuffleVector(Ops[0], Op1,
1636 ArrayRef(Indices, DstNumElts), "insert");
1637 }
1638 case X86::BI__builtin_ia32_pmovqd512_mask:
1639 case X86::BI__builtin_ia32_pmovwb512_mask: {
1640 Value *Res = Builder.CreateTrunc(Ops[0], Ops[1]->getType());
1641 return EmitX86Select(*this, Ops[2], Res, Ops[1]);
1642 }
1643 case X86::BI__builtin_ia32_pmovdb512_mask:
1644 case X86::BI__builtin_ia32_pmovdw512_mask:
1645 case X86::BI__builtin_ia32_pmovqw512_mask: {
1646 if (const auto *C = dyn_cast<Constant>(Ops[2]))
1647 if (C->isAllOnesValue())
1648 return Builder.CreateTrunc(Ops[0], Ops[1]->getType());
1649
1650 Intrinsic::ID IID;
1651 switch (BuiltinID) {
1652 default: llvm_unreachable("Unsupported intrinsic!");
1653 case X86::BI__builtin_ia32_pmovdb512_mask:
1654 IID = Intrinsic::x86_avx512_mask_pmov_db_512;
1655 break;
1656 case X86::BI__builtin_ia32_pmovdw512_mask:
1657 IID = Intrinsic::x86_avx512_mask_pmov_dw_512;
1658 break;
1659 case X86::BI__builtin_ia32_pmovqw512_mask:
1660 IID = Intrinsic::x86_avx512_mask_pmov_qw_512;
1661 break;
1662 }
1663
1664 Function *Intr = CGM.getIntrinsic(IID);
1665 return Builder.CreateCall(Intr, Ops);
1666 }
1667 case X86::BI__builtin_ia32_pblendw128:
1668 case X86::BI__builtin_ia32_blendpd:
1669 case X86::BI__builtin_ia32_blendps:
1670 case X86::BI__builtin_ia32_blendpd256:
1671 case X86::BI__builtin_ia32_blendps256:
1672 case X86::BI__builtin_ia32_pblendw256:
1673 case X86::BI__builtin_ia32_pblendd128:
1674 case X86::BI__builtin_ia32_pblendd256: {
1675 unsigned NumElts =
1676 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
1677 unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
1678
1679 int Indices[16];
1680 // If there are more than 8 elements, the immediate is used twice so make
1681 // sure we handle that.
1682 for (unsigned i = 0; i != NumElts; ++i)
1683 Indices[i] = ((Imm >> (i % 8)) & 0x1) ? NumElts + i : i;
1684
1685 return Builder.CreateShuffleVector(Ops[0], Ops[1],
1686 ArrayRef(Indices, NumElts), "blend");
1687 }
1688 case X86::BI__builtin_ia32_pshuflw:
1689 case X86::BI__builtin_ia32_pshuflw256:
1690 case X86::BI__builtin_ia32_pshuflw512: {
1691 uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
1692 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
1693 unsigned NumElts = Ty->getNumElements();
1694
1695 // Splat the 8-bits of immediate 4 times to help the loop wrap around.
1696 Imm = (Imm & 0xff) * 0x01010101;
1697
1698 int Indices[32];
1699 for (unsigned l = 0; l != NumElts; l += 8) {
1700 for (unsigned i = 0; i != 4; ++i) {
1701 Indices[l + i] = l + (Imm & 3);
1702 Imm >>= 2;
1703 }
1704 for (unsigned i = 4; i != 8; ++i)
1705 Indices[l + i] = l + i;
1706 }
1707
1708 return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
1709 "pshuflw");
1710 }
1711 case X86::BI__builtin_ia32_pshufhw:
1712 case X86::BI__builtin_ia32_pshufhw256:
1713 case X86::BI__builtin_ia32_pshufhw512: {
1714 uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
1715 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
1716 unsigned NumElts = Ty->getNumElements();
1717
1718 // Splat the 8-bits of immediate 4 times to help the loop wrap around.
1719 Imm = (Imm & 0xff) * 0x01010101;
1720
1721 int Indices[32];
1722 for (unsigned l = 0; l != NumElts; l += 8) {
1723 for (unsigned i = 0; i != 4; ++i)
1724 Indices[l + i] = l + i;
1725 for (unsigned i = 4; i != 8; ++i) {
1726 Indices[l + i] = l + 4 + (Imm & 3);
1727 Imm >>= 2;
1728 }
1729 }
1730
1731 return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
1732 "pshufhw");
1733 }
1734 case X86::BI__builtin_ia32_pshufd:
1735 case X86::BI__builtin_ia32_pshufd256:
1736 case X86::BI__builtin_ia32_pshufd512:
1737 case X86::BI__builtin_ia32_vpermilpd:
1738 case X86::BI__builtin_ia32_vpermilps:
1739 case X86::BI__builtin_ia32_vpermilpd256:
1740 case X86::BI__builtin_ia32_vpermilps256:
1741 case X86::BI__builtin_ia32_vpermilpd512:
1742 case X86::BI__builtin_ia32_vpermilps512: {
1743 uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
1744 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
1745 unsigned NumElts = Ty->getNumElements();
1746 unsigned NumLanes = Ty->getPrimitiveSizeInBits() / 128;
1747 unsigned NumLaneElts = NumElts / NumLanes;
1748
1749 // Splat the 8-bits of immediate 4 times to help the loop wrap around.
1750 Imm = (Imm & 0xff) * 0x01010101;
1751
1752 int Indices[16];
1753 for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
1754 for (unsigned i = 0; i != NumLaneElts; ++i) {
1755 Indices[i + l] = (Imm % NumLaneElts) + l;
1756 Imm /= NumLaneElts;
1757 }
1758 }
1759
1760 return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
1761 "permil");
1762 }
1763 case X86::BI__builtin_ia32_shufpd:
1764 case X86::BI__builtin_ia32_shufpd256:
1765 case X86::BI__builtin_ia32_shufpd512:
1766 case X86::BI__builtin_ia32_shufps:
1767 case X86::BI__builtin_ia32_shufps256:
1768 case X86::BI__builtin_ia32_shufps512: {
1769 uint32_t Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
1770 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
1771 unsigned NumElts = Ty->getNumElements();
1772 unsigned NumLanes = Ty->getPrimitiveSizeInBits() / 128;
1773 unsigned NumLaneElts = NumElts / NumLanes;
1774
1775 // Splat the 8-bits of immediate 4 times to help the loop wrap around.
1776 Imm = (Imm & 0xff) * 0x01010101;
1777
1778 int Indices[16];
1779 for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
1780 for (unsigned i = 0; i != NumLaneElts; ++i) {
1781 unsigned Index = Imm % NumLaneElts;
1782 Imm /= NumLaneElts;
1783 if (i >= (NumLaneElts / 2))
1784 Index += NumElts;
1785 Indices[l + i] = l + Index;
1786 }
1787 }
1788
1789 return Builder.CreateShuffleVector(Ops[0], Ops[1],
1790 ArrayRef(Indices, NumElts), "shufp");
1791 }
1792 case X86::BI__builtin_ia32_permdi256:
1793 case X86::BI__builtin_ia32_permdf256:
1794 case X86::BI__builtin_ia32_permdi512:
1795 case X86::BI__builtin_ia32_permdf512: {
1796 unsigned Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
1797 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
1798 unsigned NumElts = Ty->getNumElements();
1799
1800 // These intrinsics operate on 256-bit lanes of four 64-bit elements.
1801 int Indices[8];
1802 for (unsigned l = 0; l != NumElts; l += 4)
1803 for (unsigned i = 0; i != 4; ++i)
1804 Indices[l + i] = l + ((Imm >> (2 * i)) & 0x3);
1805
1806 return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
1807 "perm");
1808 }
1809 case X86::BI__builtin_ia32_palignr128:
1810 case X86::BI__builtin_ia32_palignr256:
1811 case X86::BI__builtin_ia32_palignr512: {
1812 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0xff;
1813
1814 unsigned NumElts =
1815 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
1816 assert(NumElts % 16 == 0);
1817
1818 // If palignr is shifting the pair of vectors more than the size of two
1819 // lanes, emit zero.
1820 if (ShiftVal >= 32)
1821 return llvm::Constant::getNullValue(ConvertType(E->getType()));
1822
1823 // If palignr is shifting the pair of input vectors more than one lane,
1824 // but less than two lanes, convert to shifting in zeroes.
1825 if (ShiftVal > 16) {
1826 ShiftVal -= 16;
1827 Ops[1] = Ops[0];
1828 Ops[0] = llvm::Constant::getNullValue(Ops[0]->getType());
1829 }
1830
1831 int Indices[64];
1832 // 256-bit palignr operates on 128-bit lanes so we need to handle that
1833 for (unsigned l = 0; l != NumElts; l += 16) {
1834 for (unsigned i = 0; i != 16; ++i) {
1835 unsigned Idx = ShiftVal + i;
1836 if (Idx >= 16)
1837 Idx += NumElts - 16; // End of lane, switch operand.
1838 Indices[l + i] = Idx + l;
1839 }
1840 }
1841
1842 return Builder.CreateShuffleVector(Ops[1], Ops[0],
1843 ArrayRef(Indices, NumElts), "palignr");
1844 }
1845 case X86::BI__builtin_ia32_alignd128:
1846 case X86::BI__builtin_ia32_alignd256:
1847 case X86::BI__builtin_ia32_alignd512:
1848 case X86::BI__builtin_ia32_alignq128:
1849 case X86::BI__builtin_ia32_alignq256:
1850 case X86::BI__builtin_ia32_alignq512: {
1851 unsigned NumElts =
1852 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
1853 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0xff;
1854
1855 // Mask the shift amount to width of a vector.
1856 ShiftVal &= NumElts - 1;
1857
1858 int Indices[16];
1859 for (unsigned i = 0; i != NumElts; ++i)
1860 Indices[i] = i + ShiftVal;
1861
1862 return Builder.CreateShuffleVector(Ops[1], Ops[0],
1863 ArrayRef(Indices, NumElts), "valign");
1864 }
1865 case X86::BI__builtin_ia32_shuf_f32x4_256:
1866 case X86::BI__builtin_ia32_shuf_f64x2_256:
1867 case X86::BI__builtin_ia32_shuf_i32x4_256:
1868 case X86::BI__builtin_ia32_shuf_i64x2_256:
1869 case X86::BI__builtin_ia32_shuf_f32x4:
1870 case X86::BI__builtin_ia32_shuf_f64x2:
1871 case X86::BI__builtin_ia32_shuf_i32x4:
1872 case X86::BI__builtin_ia32_shuf_i64x2: {
1873 unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
1874 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
1875 unsigned NumElts = Ty->getNumElements();
1876 unsigned NumLanes = Ty->getPrimitiveSizeInBits() == 512 ? 4 : 2;
1877 unsigned NumLaneElts = NumElts / NumLanes;
1878
1879 int Indices[16];
1880 for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
1881 unsigned Index = (Imm % NumLanes) * NumLaneElts;
1882 Imm /= NumLanes; // Discard the bits we just used.
1883 if (l >= (NumElts / 2))
1884 Index += NumElts; // Switch to other source.
1885 for (unsigned i = 0; i != NumLaneElts; ++i) {
1886 Indices[l + i] = Index + i;
1887 }
1888 }
1889
1890 return Builder.CreateShuffleVector(Ops[0], Ops[1],
1891 ArrayRef(Indices, NumElts), "shuf");
1892 }
1893
1894 case X86::BI__builtin_ia32_vperm2f128_pd256:
1895 case X86::BI__builtin_ia32_vperm2f128_ps256:
1896 case X86::BI__builtin_ia32_vperm2f128_si256:
1897 case X86::BI__builtin_ia32_permti256: {
1898 unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
1899 unsigned NumElts =
1900 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
1901
1902 // This takes a very simple approach since there are two lanes and a
1903 // shuffle can have 2 inputs. So we reserve the first input for the first
1904 // lane and the second input for the second lane. This may result in
1905 // duplicate sources, but this can be dealt with in the backend.
1906
1907 Value *OutOps[2];
1908 int Indices[8];
1909 for (unsigned l = 0; l != 2; ++l) {
1910 // Determine the source for this lane.
1911 if (Imm & (1 << ((l * 4) + 3)))
1912 OutOps[l] = llvm::ConstantAggregateZero::get(Ops[0]->getType());
1913 else if (Imm & (1 << ((l * 4) + 1)))
1914 OutOps[l] = Ops[1];
1915 else
1916 OutOps[l] = Ops[0];
1917
1918 for (unsigned i = 0; i != NumElts/2; ++i) {
1919 // Start with ith element of the source for this lane.
1920 unsigned Idx = (l * NumElts) + i;
1921 // If bit 0 of the immediate half is set, switch to the high half of
1922 // the source.
1923 if (Imm & (1 << (l * 4)))
1924 Idx += NumElts/2;
1925 Indices[(l * (NumElts/2)) + i] = Idx;
1926 }
1927 }
1928
1929 return Builder.CreateShuffleVector(OutOps[0], OutOps[1],
1930 ArrayRef(Indices, NumElts), "vperm");
1931 }
1932
1933 case X86::BI__builtin_ia32_pslldqi128_byteshift:
1934 case X86::BI__builtin_ia32_pslldqi256_byteshift:
1935 case X86::BI__builtin_ia32_pslldqi512_byteshift: {
1936 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
1937 auto *VecTy = cast<llvm::FixedVectorType>(Ops[0]->getType());
1938 // Builtin type is vXi8.
1939 unsigned NumElts = VecTy->getNumElements();
1940 Value *Zero = llvm::Constant::getNullValue(VecTy);
1941
1942 // If pslldq is shifting the vector more than 15 bytes, emit zero.
1943 if (ShiftVal >= 16)
1944 return Zero;
1945
1946 int Indices[64];
1947 // 256/512-bit pslldq operates on 128-bit lanes so we need to handle that
1948 for (unsigned l = 0; l != NumElts; l += 16) {
1949 for (unsigned i = 0; i != 16; ++i) {
1950 unsigned Idx = NumElts + i - ShiftVal;
1951 if (Idx < NumElts)
1952 Idx -= NumElts - 16; // end of lane, switch operand.
1953 Indices[l + i] = Idx + l;
1954 }
1955 }
1956 return Builder.CreateShuffleVector(Zero, Ops[0], ArrayRef(Indices, NumElts),
1957 "pslldq");
1958 }
1959 case X86::BI__builtin_ia32_psrldqi128_byteshift:
1960 case X86::BI__builtin_ia32_psrldqi256_byteshift:
1961 case X86::BI__builtin_ia32_psrldqi512_byteshift: {
1962 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
1963 auto *VecTy = cast<llvm::FixedVectorType>(Ops[0]->getType());
1964 // Builtin type is vXi8.
1965 unsigned NumElts = VecTy->getNumElements();
1966 Value *Zero = llvm::Constant::getNullValue(VecTy);
1967
1968 // If psrldq is shifting the vector more than 15 bytes, emit zero.
1969 if (ShiftVal >= 16)
1970 return Zero;
1971
1972 int Indices[64];
1973 // 256/512-bit psrldq operates on 128-bit lanes so we need to handle that
1974 for (unsigned l = 0; l != NumElts; l += 16) {
1975 for (unsigned i = 0; i != 16; ++i) {
1976 unsigned Idx = i + ShiftVal;
1977 if (Idx >= 16)
1978 Idx += NumElts - 16; // end of lane, switch operand.
1979 Indices[l + i] = Idx + l;
1980 }
1981 }
1982 return Builder.CreateShuffleVector(Ops[0], Zero, ArrayRef(Indices, NumElts),
1983 "psrldq");
1984 }
1985 case X86::BI__builtin_ia32_kshiftliqi:
1986 case X86::BI__builtin_ia32_kshiftlihi:
1987 case X86::BI__builtin_ia32_kshiftlisi:
1988 case X86::BI__builtin_ia32_kshiftlidi: {
1989 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
1990 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
1991
1992 if (ShiftVal >= NumElts)
1993 return llvm::Constant::getNullValue(Ops[0]->getType());
1994
1995 Value *In = getMaskVecValue(*this, Ops[0], NumElts);
1996
1997 int Indices[64];
1998 for (unsigned i = 0; i != NumElts; ++i)
1999 Indices[i] = NumElts + i - ShiftVal;
2000
2001 Value *Zero = llvm::Constant::getNullValue(In->getType());
2002 Value *SV = Builder.CreateShuffleVector(
2003 Zero, In, ArrayRef(Indices, NumElts), "kshiftl");
2004 return Builder.CreateBitCast(SV, Ops[0]->getType());
2005 }
2006 case X86::BI__builtin_ia32_kshiftriqi:
2007 case X86::BI__builtin_ia32_kshiftrihi:
2008 case X86::BI__builtin_ia32_kshiftrisi:
2009 case X86::BI__builtin_ia32_kshiftridi: {
2010 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
2011 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
2012
2013 if (ShiftVal >= NumElts)
2014 return llvm::Constant::getNullValue(Ops[0]->getType());
2015
2016 Value *In = getMaskVecValue(*this, Ops[0], NumElts);
2017
2018 int Indices[64];
2019 for (unsigned i = 0; i != NumElts; ++i)
2020 Indices[i] = i + ShiftVal;
2021
2022 Value *Zero = llvm::Constant::getNullValue(In->getType());
2023 Value *SV = Builder.CreateShuffleVector(
2024 In, Zero, ArrayRef(Indices, NumElts), "kshiftr");
2025 return Builder.CreateBitCast(SV, Ops[0]->getType());
2026 }
2027 case X86::BI__builtin_ia32_movnti:
2028 case X86::BI__builtin_ia32_movnti64:
2029 case X86::BI__builtin_ia32_movntsd:
2030 case X86::BI__builtin_ia32_movntss: {
2031 llvm::MDNode *Node = llvm::MDNode::get(
2032 getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
2033
2034 Value *Ptr = Ops[0];
2035 Value *Src = Ops[1];
2036
2037 // Extract the 0'th element of the source vector.
2038 if (BuiltinID == X86::BI__builtin_ia32_movntsd ||
2039 BuiltinID == X86::BI__builtin_ia32_movntss)
2040 Src = Builder.CreateExtractElement(Src, (uint64_t)0, "extract");
2041
2042 // Unaligned nontemporal store of the scalar value.
2043 StoreInst *SI = Builder.CreateDefaultAlignedStore(Src, Ptr);
2044 SI->setMetadata(llvm::LLVMContext::MD_nontemporal, Node);
2045 SI->setAlignment(llvm::Align(1));
2046 return SI;
2047 }
2048 // Rotate is a special case of funnel shift - 1st 2 args are the same.
2049 case X86::BI__builtin_ia32_vprotbi:
2050 case X86::BI__builtin_ia32_vprotwi:
2051 case X86::BI__builtin_ia32_vprotdi:
2052 case X86::BI__builtin_ia32_vprotqi:
2053 case X86::BI__builtin_ia32_prold128:
2054 case X86::BI__builtin_ia32_prold256:
2055 case X86::BI__builtin_ia32_prold512:
2056 case X86::BI__builtin_ia32_prolq128:
2057 case X86::BI__builtin_ia32_prolq256:
2058 case X86::BI__builtin_ia32_prolq512:
2059 return EmitX86FunnelShift(*this, Ops[0], Ops[0], Ops[1], false);
2060 case X86::BI__builtin_ia32_prord128:
2061 case X86::BI__builtin_ia32_prord256:
2062 case X86::BI__builtin_ia32_prord512:
2063 case X86::BI__builtin_ia32_prorq128:
2064 case X86::BI__builtin_ia32_prorq256:
2065 case X86::BI__builtin_ia32_prorq512:
2066 return EmitX86FunnelShift(*this, Ops[0], Ops[0], Ops[1], true);
2067 case X86::BI__builtin_ia32_selectb_128:
2068 case X86::BI__builtin_ia32_selectb_256:
2069 case X86::BI__builtin_ia32_selectb_512:
2070 case X86::BI__builtin_ia32_selectw_128:
2071 case X86::BI__builtin_ia32_selectw_256:
2072 case X86::BI__builtin_ia32_selectw_512:
2073 case X86::BI__builtin_ia32_selectd_128:
2074 case X86::BI__builtin_ia32_selectd_256:
2075 case X86::BI__builtin_ia32_selectd_512:
2076 case X86::BI__builtin_ia32_selectq_128:
2077 case X86::BI__builtin_ia32_selectq_256:
2078 case X86::BI__builtin_ia32_selectq_512:
2079 case X86::BI__builtin_ia32_selectph_128:
2080 case X86::BI__builtin_ia32_selectph_256:
2081 case X86::BI__builtin_ia32_selectph_512:
2082 case X86::BI__builtin_ia32_selectpbf_128:
2083 case X86::BI__builtin_ia32_selectpbf_256:
2084 case X86::BI__builtin_ia32_selectpbf_512:
2085 case X86::BI__builtin_ia32_selectps_128:
2086 case X86::BI__builtin_ia32_selectps_256:
2087 case X86::BI__builtin_ia32_selectps_512:
2088 case X86::BI__builtin_ia32_selectpd_128:
2089 case X86::BI__builtin_ia32_selectpd_256:
2090 case X86::BI__builtin_ia32_selectpd_512:
2091 return EmitX86Select(*this, Ops[0], Ops[1], Ops[2]);
2092 case X86::BI__builtin_ia32_selectsh_128:
2093 case X86::BI__builtin_ia32_selectsbf_128:
2094 case X86::BI__builtin_ia32_selectss_128:
2095 case X86::BI__builtin_ia32_selectsd_128: {
2096 Value *A = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
2097 Value *B = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
2098 A = EmitX86ScalarSelect(*this, Ops[0], A, B);
2099 return Builder.CreateInsertElement(Ops[1], A, (uint64_t)0);
2100 }
2101 case X86::BI__builtin_ia32_cmpb128_mask:
2102 case X86::BI__builtin_ia32_cmpb256_mask:
2103 case X86::BI__builtin_ia32_cmpb512_mask:
2104 case X86::BI__builtin_ia32_cmpw128_mask:
2105 case X86::BI__builtin_ia32_cmpw256_mask:
2106 case X86::BI__builtin_ia32_cmpw512_mask:
2107 case X86::BI__builtin_ia32_cmpd128_mask:
2108 case X86::BI__builtin_ia32_cmpd256_mask:
2109 case X86::BI__builtin_ia32_cmpd512_mask:
2110 case X86::BI__builtin_ia32_cmpq128_mask:
2111 case X86::BI__builtin_ia32_cmpq256_mask:
2112 case X86::BI__builtin_ia32_cmpq512_mask: {
2113 unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
2114 return EmitX86MaskedCompare(*this, CC, true, Ops);
2115 }
2116 case X86::BI__builtin_ia32_ucmpb128_mask:
2117 case X86::BI__builtin_ia32_ucmpb256_mask:
2118 case X86::BI__builtin_ia32_ucmpb512_mask:
2119 case X86::BI__builtin_ia32_ucmpw128_mask:
2120 case X86::BI__builtin_ia32_ucmpw256_mask:
2121 case X86::BI__builtin_ia32_ucmpw512_mask:
2122 case X86::BI__builtin_ia32_ucmpd128_mask:
2123 case X86::BI__builtin_ia32_ucmpd256_mask:
2124 case X86::BI__builtin_ia32_ucmpd512_mask:
2125 case X86::BI__builtin_ia32_ucmpq128_mask:
2126 case X86::BI__builtin_ia32_ucmpq256_mask:
2127 case X86::BI__builtin_ia32_ucmpq512_mask: {
2128 unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
2129 return EmitX86MaskedCompare(*this, CC, false, Ops);
2130 }
2131 case X86::BI__builtin_ia32_vpcomb:
2132 case X86::BI__builtin_ia32_vpcomw:
2133 case X86::BI__builtin_ia32_vpcomd:
2134 case X86::BI__builtin_ia32_vpcomq:
2135 return EmitX86vpcom(*this, Ops, true);
2136 case X86::BI__builtin_ia32_vpcomub:
2137 case X86::BI__builtin_ia32_vpcomuw:
2138 case X86::BI__builtin_ia32_vpcomud:
2139 case X86::BI__builtin_ia32_vpcomuq:
2140 return EmitX86vpcom(*this, Ops, false);
2141
2142 case X86::BI__builtin_ia32_kortestcqi:
2143 case X86::BI__builtin_ia32_kortestchi:
2144 case X86::BI__builtin_ia32_kortestcsi:
2145 case X86::BI__builtin_ia32_kortestcdi: {
2146 Value *Or = EmitX86MaskLogic(*this, Instruction::Or, Ops);
2147 Value *C = llvm::Constant::getAllOnesValue(Ops[0]->getType());
2148 Value *Cmp = Builder.CreateICmpEQ(Or, C);
2149 return Builder.CreateZExt(Cmp, ConvertType(E->getType()));
2150 }
2151 case X86::BI__builtin_ia32_kortestzqi:
2152 case X86::BI__builtin_ia32_kortestzhi:
2153 case X86::BI__builtin_ia32_kortestzsi:
2154 case X86::BI__builtin_ia32_kortestzdi: {
2155 Value *Or = EmitX86MaskLogic(*this, Instruction::Or, Ops);
2156 Value *C = llvm::Constant::getNullValue(Ops[0]->getType());
2157 Value *Cmp = Builder.CreateICmpEQ(Or, C);
2158 return Builder.CreateZExt(Cmp, ConvertType(E->getType()));
2159 }
2160
2161 case X86::BI__builtin_ia32_ktestcqi:
2162 case X86::BI__builtin_ia32_ktestzqi:
2163 case X86::BI__builtin_ia32_ktestchi:
2164 case X86::BI__builtin_ia32_ktestzhi:
2165 case X86::BI__builtin_ia32_ktestcsi:
2166 case X86::BI__builtin_ia32_ktestzsi:
2167 case X86::BI__builtin_ia32_ktestcdi:
2168 case X86::BI__builtin_ia32_ktestzdi: {
2169 Intrinsic::ID IID;
2170 switch (BuiltinID) {
2171 default: llvm_unreachable("Unsupported intrinsic!");
2172 case X86::BI__builtin_ia32_ktestcqi:
2173 IID = Intrinsic::x86_avx512_ktestc_b;
2174 break;
2175 case X86::BI__builtin_ia32_ktestzqi:
2176 IID = Intrinsic::x86_avx512_ktestz_b;
2177 break;
2178 case X86::BI__builtin_ia32_ktestchi:
2179 IID = Intrinsic::x86_avx512_ktestc_w;
2180 break;
2181 case X86::BI__builtin_ia32_ktestzhi:
2182 IID = Intrinsic::x86_avx512_ktestz_w;
2183 break;
2184 case X86::BI__builtin_ia32_ktestcsi:
2185 IID = Intrinsic::x86_avx512_ktestc_d;
2186 break;
2187 case X86::BI__builtin_ia32_ktestzsi:
2188 IID = Intrinsic::x86_avx512_ktestz_d;
2189 break;
2190 case X86::BI__builtin_ia32_ktestcdi:
2191 IID = Intrinsic::x86_avx512_ktestc_q;
2192 break;
2193 case X86::BI__builtin_ia32_ktestzdi:
2194 IID = Intrinsic::x86_avx512_ktestz_q;
2195 break;
2196 }
2197
2198 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
2199 Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
2200 Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
2201 Function *Intr = CGM.getIntrinsic(IID);
2202 return Builder.CreateCall(Intr, {LHS, RHS});
2203 }
2204
2205 case X86::BI__builtin_ia32_kaddqi:
2206 case X86::BI__builtin_ia32_kaddhi:
2207 case X86::BI__builtin_ia32_kaddsi:
2208 case X86::BI__builtin_ia32_kadddi: {
2209 Intrinsic::ID IID;
2210 switch (BuiltinID) {
2211 default: llvm_unreachable("Unsupported intrinsic!");
2212 case X86::BI__builtin_ia32_kaddqi:
2213 IID = Intrinsic::x86_avx512_kadd_b;
2214 break;
2215 case X86::BI__builtin_ia32_kaddhi:
2216 IID = Intrinsic::x86_avx512_kadd_w;
2217 break;
2218 case X86::BI__builtin_ia32_kaddsi:
2219 IID = Intrinsic::x86_avx512_kadd_d;
2220 break;
2221 case X86::BI__builtin_ia32_kadddi:
2222 IID = Intrinsic::x86_avx512_kadd_q;
2223 break;
2224 }
2225
2226 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
2227 Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
2228 Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
2229 Function *Intr = CGM.getIntrinsic(IID);
2230 Value *Res = Builder.CreateCall(Intr, {LHS, RHS});
2231 return Builder.CreateBitCast(Res, Ops[0]->getType());
2232 }
2233 case X86::BI__builtin_ia32_kandqi:
2234 case X86::BI__builtin_ia32_kandhi:
2235 case X86::BI__builtin_ia32_kandsi:
2236 case X86::BI__builtin_ia32_kanddi:
2237 return EmitX86MaskLogic(*this, Instruction::And, Ops);
2238 case X86::BI__builtin_ia32_kandnqi:
2239 case X86::BI__builtin_ia32_kandnhi:
2240 case X86::BI__builtin_ia32_kandnsi:
2241 case X86::BI__builtin_ia32_kandndi:
2242 return EmitX86MaskLogic(*this, Instruction::And, Ops, true);
2243 case X86::BI__builtin_ia32_korqi:
2244 case X86::BI__builtin_ia32_korhi:
2245 case X86::BI__builtin_ia32_korsi:
2246 case X86::BI__builtin_ia32_kordi:
2247 return EmitX86MaskLogic(*this, Instruction::Or, Ops);
2248 case X86::BI__builtin_ia32_kxnorqi:
2249 case X86::BI__builtin_ia32_kxnorhi:
2250 case X86::BI__builtin_ia32_kxnorsi:
2251 case X86::BI__builtin_ia32_kxnordi:
2252 return EmitX86MaskLogic(*this, Instruction::Xor, Ops, true);
2253 case X86::BI__builtin_ia32_kxorqi:
2254 case X86::BI__builtin_ia32_kxorhi:
2255 case X86::BI__builtin_ia32_kxorsi:
2256 case X86::BI__builtin_ia32_kxordi:
2257 return EmitX86MaskLogic(*this, Instruction::Xor, Ops);
2258 case X86::BI__builtin_ia32_knotqi:
2259 case X86::BI__builtin_ia32_knothi:
2260 case X86::BI__builtin_ia32_knotsi:
2261 case X86::BI__builtin_ia32_knotdi: {
2262 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
2263 Value *Res = getMaskVecValue(*this, Ops[0], NumElts);
2264 return Builder.CreateBitCast(Builder.CreateNot(Res),
2265 Ops[0]->getType());
2266 }
2267 case X86::BI__builtin_ia32_kmovb:
2268 case X86::BI__builtin_ia32_kmovw:
2269 case X86::BI__builtin_ia32_kmovd:
2270 case X86::BI__builtin_ia32_kmovq: {
2271 // Bitcast to vXi1 type and then back to integer. This gets the mask
2272 // register type into the IR, but might be optimized out depending on
2273 // what's around it.
2274 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
2275 Value *Res = getMaskVecValue(*this, Ops[0], NumElts);
2276 return Builder.CreateBitCast(Res, Ops[0]->getType());
2277 }
2278
2279 case X86::BI__builtin_ia32_kunpckdi:
2280 case X86::BI__builtin_ia32_kunpcksi:
2281 case X86::BI__builtin_ia32_kunpckhi: {
2282 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
2283 Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
2284 Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
2285 int Indices[64];
2286 for (unsigned i = 0; i != NumElts; ++i)
2287 Indices[i] = i;
2288
2289 // First extract half of each vector. This gives better codegen than
2290 // doing it in a single shuffle.
2291 LHS = Builder.CreateShuffleVector(LHS, LHS, ArrayRef(Indices, NumElts / 2));
2292 RHS = Builder.CreateShuffleVector(RHS, RHS, ArrayRef(Indices, NumElts / 2));
2293 // Concat the vectors.
2294 // NOTE: Operands are swapped to match the intrinsic definition.
2295 Value *Res =
2296 Builder.CreateShuffleVector(RHS, LHS, ArrayRef(Indices, NumElts));
2297 return Builder.CreateBitCast(Res, Ops[0]->getType());
2298 }
2299
2300 case X86::BI__builtin_ia32_sqrtsh_round_mask:
2301 case X86::BI__builtin_ia32_sqrtsd_round_mask:
2302 case X86::BI__builtin_ia32_sqrtss_round_mask: {
2303 unsigned CC = cast<llvm::ConstantInt>(Ops[4])->getZExtValue();
2304 // Support only if the rounding mode is 4 (AKA CUR_DIRECTION),
2305 // otherwise keep the intrinsic.
2306 if (CC != 4) {
2307 Intrinsic::ID IID;
2308
2309 switch (BuiltinID) {
2310 default:
2311 llvm_unreachable("Unsupported intrinsic!");
2312 case X86::BI__builtin_ia32_sqrtsh_round_mask:
2313 IID = Intrinsic::x86_avx512fp16_mask_sqrt_sh;
2314 break;
2315 case X86::BI__builtin_ia32_sqrtsd_round_mask:
2316 IID = Intrinsic::x86_avx512_mask_sqrt_sd;
2317 break;
2318 case X86::BI__builtin_ia32_sqrtss_round_mask:
2319 IID = Intrinsic::x86_avx512_mask_sqrt_ss;
2320 break;
2321 }
2322 return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
2323 }
2324 Value *A = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
2325 Function *F;
2326 if (Builder.getIsFPConstrained()) {
2327 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
2328 F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,
2329 A->getType());
2330 A = Builder.CreateConstrainedFPCall(F, A);
2331 } else {
2332 F = CGM.getIntrinsic(Intrinsic::sqrt, A->getType());
2333 A = Builder.CreateCall(F, A);
2334 }
2335 Value *Src = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
2336 A = EmitX86ScalarSelect(*this, Ops[3], A, Src);
2337 return Builder.CreateInsertElement(Ops[0], A, (uint64_t)0);
2338 }
2339 case X86::BI__builtin_ia32_sqrtph512:
2340 case X86::BI__builtin_ia32_sqrtps512:
2341 case X86::BI__builtin_ia32_sqrtpd512: {
2342 unsigned CC = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
2343 // Support only if the rounding mode is 4 (AKA CUR_DIRECTION),
2344 // otherwise keep the intrinsic.
2345 if (CC != 4) {
2346 Intrinsic::ID IID;
2347
2348 switch (BuiltinID) {
2349 default:
2350 llvm_unreachable("Unsupported intrinsic!");
2351 case X86::BI__builtin_ia32_sqrtph512:
2352 IID = Intrinsic::x86_avx512fp16_sqrt_ph_512;
2353 break;
2354 case X86::BI__builtin_ia32_sqrtps512:
2355 IID = Intrinsic::x86_avx512_sqrt_ps_512;
2356 break;
2357 case X86::BI__builtin_ia32_sqrtpd512:
2358 IID = Intrinsic::x86_avx512_sqrt_pd_512;
2359 break;
2360 }
2361 return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
2362 }
2363 if (Builder.getIsFPConstrained()) {
2364 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
2365 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,
2366 Ops[0]->getType());
2367 return Builder.CreateConstrainedFPCall(F, Ops[0]);
2368 } else {
2369 Function *F = CGM.getIntrinsic(Intrinsic::sqrt, Ops[0]->getType());
2370 return Builder.CreateCall(F, Ops[0]);
2371 }
2372 }
2373
2374 case X86::BI__builtin_ia32_pmuludq128:
2375 case X86::BI__builtin_ia32_pmuludq256:
2376 case X86::BI__builtin_ia32_pmuludq512:
2377 return EmitX86Muldq(*this, /*IsSigned*/false, Ops);
2378
2379 case X86::BI__builtin_ia32_pmuldq128:
2380 case X86::BI__builtin_ia32_pmuldq256:
2381 case X86::BI__builtin_ia32_pmuldq512:
2382 return EmitX86Muldq(*this, /*IsSigned*/true, Ops);
2383
2384 case X86::BI__builtin_ia32_pternlogd512_mask:
2385 case X86::BI__builtin_ia32_pternlogq512_mask:
2386 case X86::BI__builtin_ia32_pternlogd128_mask:
2387 case X86::BI__builtin_ia32_pternlogd256_mask:
2388 case X86::BI__builtin_ia32_pternlogq128_mask:
2389 case X86::BI__builtin_ia32_pternlogq256_mask:
2390 return EmitX86Ternlog(*this, /*ZeroMask*/false, Ops);
2391
2392 case X86::BI__builtin_ia32_pternlogd512_maskz:
2393 case X86::BI__builtin_ia32_pternlogq512_maskz:
2394 case X86::BI__builtin_ia32_pternlogd128_maskz:
2395 case X86::BI__builtin_ia32_pternlogd256_maskz:
2396 case X86::BI__builtin_ia32_pternlogq128_maskz:
2397 case X86::BI__builtin_ia32_pternlogq256_maskz:
2398 return EmitX86Ternlog(*this, /*ZeroMask*/true, Ops);
2399
2400 case X86::BI__builtin_ia32_vpshldd128:
2401 case X86::BI__builtin_ia32_vpshldd256:
2402 case X86::BI__builtin_ia32_vpshldd512:
2403 case X86::BI__builtin_ia32_vpshldq128:
2404 case X86::BI__builtin_ia32_vpshldq256:
2405 case X86::BI__builtin_ia32_vpshldq512:
2406 case X86::BI__builtin_ia32_vpshldw128:
2407 case X86::BI__builtin_ia32_vpshldw256:
2408 case X86::BI__builtin_ia32_vpshldw512:
2409 return EmitX86FunnelShift(*this, Ops[0], Ops[1], Ops[2], false);
2410
2411 case X86::BI__builtin_ia32_vpshrdd128:
2412 case X86::BI__builtin_ia32_vpshrdd256:
2413 case X86::BI__builtin_ia32_vpshrdd512:
2414 case X86::BI__builtin_ia32_vpshrdq128:
2415 case X86::BI__builtin_ia32_vpshrdq256:
2416 case X86::BI__builtin_ia32_vpshrdq512:
2417 case X86::BI__builtin_ia32_vpshrdw128:
2418 case X86::BI__builtin_ia32_vpshrdw256:
2419 case X86::BI__builtin_ia32_vpshrdw512:
2420 // Ops 0 and 1 are swapped.
2421 return EmitX86FunnelShift(*this, Ops[1], Ops[0], Ops[2], true);
2422
2423 // Reductions
2424 case X86::BI__builtin_ia32_reduce_fadd_pd512:
2425 case X86::BI__builtin_ia32_reduce_fadd_ps512:
2426 case X86::BI__builtin_ia32_reduce_fadd_ph512:
2427 case X86::BI__builtin_ia32_reduce_fadd_ph256:
2428 case X86::BI__builtin_ia32_reduce_fadd_ph128: {
2429 Function *F =
2430 CGM.getIntrinsic(Intrinsic::vector_reduce_fadd, Ops[1]->getType());
2431 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
2432 Builder.getFastMathFlags().setAllowReassoc();
2433 return Builder.CreateCall(F, {Ops[0], Ops[1]});
2434 }
2435 case X86::BI__builtin_ia32_reduce_fmul_pd512:
2436 case X86::BI__builtin_ia32_reduce_fmul_ps512:
2437 case X86::BI__builtin_ia32_reduce_fmul_ph512:
2438 case X86::BI__builtin_ia32_reduce_fmul_ph256:
2439 case X86::BI__builtin_ia32_reduce_fmul_ph128: {
2440 Function *F =
2441 CGM.getIntrinsic(Intrinsic::vector_reduce_fmul, Ops[1]->getType());
2442 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
2443 Builder.getFastMathFlags().setAllowReassoc();
2444 return Builder.CreateCall(F, {Ops[0], Ops[1]});
2445 }
2446 case X86::BI__builtin_ia32_reduce_fmax_pd512:
2447 case X86::BI__builtin_ia32_reduce_fmax_ps512:
2448 case X86::BI__builtin_ia32_reduce_fmax_ph512:
2449 case X86::BI__builtin_ia32_reduce_fmax_ph256:
2450 case X86::BI__builtin_ia32_reduce_fmax_ph128: {
2451 Function *F =
2452 CGM.getIntrinsic(Intrinsic::vector_reduce_fmax, Ops[0]->getType());
2453 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
2454 Builder.getFastMathFlags().setNoNaNs();
2455 return Builder.CreateCall(F, {Ops[0]});
2456 }
2457 case X86::BI__builtin_ia32_reduce_fmin_pd512:
2458 case X86::BI__builtin_ia32_reduce_fmin_ps512:
2459 case X86::BI__builtin_ia32_reduce_fmin_ph512:
2460 case X86::BI__builtin_ia32_reduce_fmin_ph256:
2461 case X86::BI__builtin_ia32_reduce_fmin_ph128: {
2462 Function *F =
2463 CGM.getIntrinsic(Intrinsic::vector_reduce_fmin, Ops[0]->getType());
2464 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
2465 Builder.getFastMathFlags().setNoNaNs();
2466 return Builder.CreateCall(F, {Ops[0]});
2467 }
2468
2469 case X86::BI__builtin_ia32_rdrand16_step:
2470 case X86::BI__builtin_ia32_rdrand32_step:
2471 case X86::BI__builtin_ia32_rdrand64_step:
2472 case X86::BI__builtin_ia32_rdseed16_step:
2473 case X86::BI__builtin_ia32_rdseed32_step:
2474 case X86::BI__builtin_ia32_rdseed64_step: {
2475 Intrinsic::ID ID;
2476 switch (BuiltinID) {
2477 default: llvm_unreachable("Unsupported intrinsic!");
2478 case X86::BI__builtin_ia32_rdrand16_step:
2479 ID = Intrinsic::x86_rdrand_16;
2480 break;
2481 case X86::BI__builtin_ia32_rdrand32_step:
2482 ID = Intrinsic::x86_rdrand_32;
2483 break;
2484 case X86::BI__builtin_ia32_rdrand64_step:
2485 ID = Intrinsic::x86_rdrand_64;
2486 break;
2487 case X86::BI__builtin_ia32_rdseed16_step:
2488 ID = Intrinsic::x86_rdseed_16;
2489 break;
2490 case X86::BI__builtin_ia32_rdseed32_step:
2491 ID = Intrinsic::x86_rdseed_32;
2492 break;
2493 case X86::BI__builtin_ia32_rdseed64_step:
2494 ID = Intrinsic::x86_rdseed_64;
2495 break;
2496 }
2497
2498 Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID));
2499 Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 0),
2500 Ops[0]);
2501 return Builder.CreateExtractValue(Call, 1);
2502 }
2503 case X86::BI__builtin_ia32_addcarryx_u32:
2504 case X86::BI__builtin_ia32_addcarryx_u64:
2505 case X86::BI__builtin_ia32_subborrow_u32:
2506 case X86::BI__builtin_ia32_subborrow_u64: {
2507 Intrinsic::ID IID;
2508 switch (BuiltinID) {
2509 default: llvm_unreachable("Unsupported intrinsic!");
2510 case X86::BI__builtin_ia32_addcarryx_u32:
2511 IID = Intrinsic::x86_addcarry_32;
2512 break;
2513 case X86::BI__builtin_ia32_addcarryx_u64:
2514 IID = Intrinsic::x86_addcarry_64;
2515 break;
2516 case X86::BI__builtin_ia32_subborrow_u32:
2517 IID = Intrinsic::x86_subborrow_32;
2518 break;
2519 case X86::BI__builtin_ia32_subborrow_u64:
2520 IID = Intrinsic::x86_subborrow_64;
2521 break;
2522 }
2523
2524 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID),
2525 { Ops[0], Ops[1], Ops[2] });
2526 Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 1),
2527 Ops[3]);
2528 return Builder.CreateExtractValue(Call, 0);
2529 }
2530
2531 case X86::BI__builtin_ia32_fpclassps128_mask:
2532 case X86::BI__builtin_ia32_fpclassps256_mask:
2533 case X86::BI__builtin_ia32_fpclassps512_mask:
2534 case X86::BI__builtin_ia32_vfpclassbf16128_mask:
2535 case X86::BI__builtin_ia32_vfpclassbf16256_mask:
2536 case X86::BI__builtin_ia32_vfpclassbf16512_mask:
2537 case X86::BI__builtin_ia32_fpclassph128_mask:
2538 case X86::BI__builtin_ia32_fpclassph256_mask:
2539 case X86::BI__builtin_ia32_fpclassph512_mask:
2540 case X86::BI__builtin_ia32_fpclasspd128_mask:
2541 case X86::BI__builtin_ia32_fpclasspd256_mask:
2542 case X86::BI__builtin_ia32_fpclasspd512_mask: {
2543 unsigned NumElts =
2544 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
2545 Value *MaskIn = Ops[2];
2546 Ops.erase(&Ops[2]);
2547
2548 Intrinsic::ID ID;
2549 switch (BuiltinID) {
2550 default: llvm_unreachable("Unsupported intrinsic!");
2551 case X86::BI__builtin_ia32_vfpclassbf16128_mask:
2552 ID = Intrinsic::x86_avx10_fpclass_bf16_128;
2553 break;
2554 case X86::BI__builtin_ia32_vfpclassbf16256_mask:
2555 ID = Intrinsic::x86_avx10_fpclass_bf16_256;
2556 break;
2557 case X86::BI__builtin_ia32_vfpclassbf16512_mask:
2558 ID = Intrinsic::x86_avx10_fpclass_bf16_512;
2559 break;
2560 case X86::BI__builtin_ia32_fpclassph128_mask:
2561 ID = Intrinsic::x86_avx512fp16_fpclass_ph_128;
2562 break;
2563 case X86::BI__builtin_ia32_fpclassph256_mask:
2564 ID = Intrinsic::x86_avx512fp16_fpclass_ph_256;
2565 break;
2566 case X86::BI__builtin_ia32_fpclassph512_mask:
2567 ID = Intrinsic::x86_avx512fp16_fpclass_ph_512;
2568 break;
2569 case X86::BI__builtin_ia32_fpclassps128_mask:
2570 ID = Intrinsic::x86_avx512_fpclass_ps_128;
2571 break;
2572 case X86::BI__builtin_ia32_fpclassps256_mask:
2573 ID = Intrinsic::x86_avx512_fpclass_ps_256;
2574 break;
2575 case X86::BI__builtin_ia32_fpclassps512_mask:
2576 ID = Intrinsic::x86_avx512_fpclass_ps_512;
2577 break;
2578 case X86::BI__builtin_ia32_fpclasspd128_mask:
2579 ID = Intrinsic::x86_avx512_fpclass_pd_128;
2580 break;
2581 case X86::BI__builtin_ia32_fpclasspd256_mask:
2582 ID = Intrinsic::x86_avx512_fpclass_pd_256;
2583 break;
2584 case X86::BI__builtin_ia32_fpclasspd512_mask:
2585 ID = Intrinsic::x86_avx512_fpclass_pd_512;
2586 break;
2587 }
2588
2589 Value *Fpclass = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
2590 return EmitX86MaskedCompareResult(*this, Fpclass, NumElts, MaskIn);
2591 }
2592
2593 case X86::BI__builtin_ia32_vp2intersect_q_512:
2594 case X86::BI__builtin_ia32_vp2intersect_q_256:
2595 case X86::BI__builtin_ia32_vp2intersect_q_128:
2596 case X86::BI__builtin_ia32_vp2intersect_d_512:
2597 case X86::BI__builtin_ia32_vp2intersect_d_256:
2598 case X86::BI__builtin_ia32_vp2intersect_d_128: {
2599 unsigned NumElts =
2600 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
2601 Intrinsic::ID ID;
2602
2603 switch (BuiltinID) {
2604 default: llvm_unreachable("Unsupported intrinsic!");
2605 case X86::BI__builtin_ia32_vp2intersect_q_512:
2606 ID = Intrinsic::x86_avx512_vp2intersect_q_512;
2607 break;
2608 case X86::BI__builtin_ia32_vp2intersect_q_256:
2609 ID = Intrinsic::x86_avx512_vp2intersect_q_256;
2610 break;
2611 case X86::BI__builtin_ia32_vp2intersect_q_128:
2612 ID = Intrinsic::x86_avx512_vp2intersect_q_128;
2613 break;
2614 case X86::BI__builtin_ia32_vp2intersect_d_512:
2615 ID = Intrinsic::x86_avx512_vp2intersect_d_512;
2616 break;
2617 case X86::BI__builtin_ia32_vp2intersect_d_256:
2618 ID = Intrinsic::x86_avx512_vp2intersect_d_256;
2619 break;
2620 case X86::BI__builtin_ia32_vp2intersect_d_128:
2621 ID = Intrinsic::x86_avx512_vp2intersect_d_128;
2622 break;
2623 }
2624
2625 Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID), {Ops[0], Ops[1]});
2626 Value *Result = Builder.CreateExtractValue(Call, 0);
2627 Result = EmitX86MaskedCompareResult(*this, Result, NumElts, nullptr);
2628 Builder.CreateDefaultAlignedStore(Result, Ops[2]);
2629
2630 Result = Builder.CreateExtractValue(Call, 1);
2631 Result = EmitX86MaskedCompareResult(*this, Result, NumElts, nullptr);
2632 return Builder.CreateDefaultAlignedStore(Result, Ops[3]);
2633 }
2634
2635 case X86::BI__builtin_ia32_vpmultishiftqb128:
2636 case X86::BI__builtin_ia32_vpmultishiftqb256:
2637 case X86::BI__builtin_ia32_vpmultishiftqb512: {
2638 Intrinsic::ID ID;
2639 switch (BuiltinID) {
2640 default: llvm_unreachable("Unsupported intrinsic!");
2641 case X86::BI__builtin_ia32_vpmultishiftqb128:
2642 ID = Intrinsic::x86_avx512_pmultishift_qb_128;
2643 break;
2644 case X86::BI__builtin_ia32_vpmultishiftqb256:
2645 ID = Intrinsic::x86_avx512_pmultishift_qb_256;
2646 break;
2647 case X86::BI__builtin_ia32_vpmultishiftqb512:
2648 ID = Intrinsic::x86_avx512_pmultishift_qb_512;
2649 break;
2650 }
2651
2652 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
2653 }
2654
2655 case X86::BI__builtin_ia32_vpshufbitqmb128_mask:
2656 case X86::BI__builtin_ia32_vpshufbitqmb256_mask:
2657 case X86::BI__builtin_ia32_vpshufbitqmb512_mask: {
2658 unsigned NumElts =
2659 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
2660 Value *MaskIn = Ops[2];
2661 Ops.erase(&Ops[2]);
2662
2663 Intrinsic::ID ID;
2664 switch (BuiltinID) {
2665 default: llvm_unreachable("Unsupported intrinsic!");
2666 case X86::BI__builtin_ia32_vpshufbitqmb128_mask:
2667 ID = Intrinsic::x86_avx512_vpshufbitqmb_128;
2668 break;
2669 case X86::BI__builtin_ia32_vpshufbitqmb256_mask:
2670 ID = Intrinsic::x86_avx512_vpshufbitqmb_256;
2671 break;
2672 case X86::BI__builtin_ia32_vpshufbitqmb512_mask:
2673 ID = Intrinsic::x86_avx512_vpshufbitqmb_512;
2674 break;
2675 }
2676
2677 Value *Shufbit = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
2678 return EmitX86MaskedCompareResult(*this, Shufbit, NumElts, MaskIn);
2679 }
2680
2681 // packed comparison intrinsics
2682 case X86::BI__builtin_ia32_cmpeqps:
2683 case X86::BI__builtin_ia32_cmpeqpd:
2684 return getVectorFCmpIR(CmpInst::FCMP_OEQ, /*IsSignaling*/false);
2685 case X86::BI__builtin_ia32_cmpltps:
2686 case X86::BI__builtin_ia32_cmpltpd:
2687 return getVectorFCmpIR(CmpInst::FCMP_OLT, /*IsSignaling*/true);
2688 case X86::BI__builtin_ia32_cmpleps:
2689 case X86::BI__builtin_ia32_cmplepd:
2690 return getVectorFCmpIR(CmpInst::FCMP_OLE, /*IsSignaling*/true);
2691 case X86::BI__builtin_ia32_cmpunordps:
2692 case X86::BI__builtin_ia32_cmpunordpd:
2693 return getVectorFCmpIR(CmpInst::FCMP_UNO, /*IsSignaling*/false);
2694 case X86::BI__builtin_ia32_cmpneqps:
2695 case X86::BI__builtin_ia32_cmpneqpd:
2696 return getVectorFCmpIR(CmpInst::FCMP_UNE, /*IsSignaling*/false);
2697 case X86::BI__builtin_ia32_cmpnltps:
2698 case X86::BI__builtin_ia32_cmpnltpd:
2699 return getVectorFCmpIR(CmpInst::FCMP_UGE, /*IsSignaling*/true);
2700 case X86::BI__builtin_ia32_cmpnleps:
2701 case X86::BI__builtin_ia32_cmpnlepd:
2702 return getVectorFCmpIR(CmpInst::FCMP_UGT, /*IsSignaling*/true);
2703 case X86::BI__builtin_ia32_cmpordps:
2704 case X86::BI__builtin_ia32_cmpordpd:
2705 return getVectorFCmpIR(CmpInst::FCMP_ORD, /*IsSignaling*/false);
2706 case X86::BI__builtin_ia32_cmpph128_mask:
2707 case X86::BI__builtin_ia32_cmpph256_mask:
2708 case X86::BI__builtin_ia32_cmpph512_mask:
2709 case X86::BI__builtin_ia32_cmpps128_mask:
2710 case X86::BI__builtin_ia32_cmpps256_mask:
2711 case X86::BI__builtin_ia32_cmpps512_mask:
2712 case X86::BI__builtin_ia32_cmppd128_mask:
2713 case X86::BI__builtin_ia32_cmppd256_mask:
2714 case X86::BI__builtin_ia32_cmppd512_mask:
2715 case X86::BI__builtin_ia32_vcmpbf16512_mask:
2716 case X86::BI__builtin_ia32_vcmpbf16256_mask:
2717 case X86::BI__builtin_ia32_vcmpbf16128_mask:
2718 IsMaskFCmp = true;
2719 [[fallthrough]];
2720 case X86::BI__builtin_ia32_cmpps:
2721 case X86::BI__builtin_ia32_cmpps256:
2722 case X86::BI__builtin_ia32_cmppd:
2723 case X86::BI__builtin_ia32_cmppd256: {
2724 // Lowering vector comparisons to fcmp instructions, while
2725 // ignoring signalling behaviour requested
2726 // ignoring rounding mode requested
2727 // This is only possible if fp-model is not strict and FENV_ACCESS is off.
2728
2729 // The third argument is the comparison condition, and integer in the
2730 // range [0, 31]
2731 unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x1f;
2732
2733 // Lowering to IR fcmp instruction.
2734 // Ignoring requested signaling behaviour,
2735 // e.g. both _CMP_GT_OS & _CMP_GT_OQ are translated to FCMP_OGT.
2736 FCmpInst::Predicate Pred;
2737 bool IsSignaling;
2738 // Predicates for 16-31 repeat the 0-15 predicates. Only the signalling
2739 // behavior is inverted. We'll handle that after the switch.
2740 switch (CC & 0xf) {
2741 case 0x00: Pred = FCmpInst::FCMP_OEQ; IsSignaling = false; break;
2742 case 0x01: Pred = FCmpInst::FCMP_OLT; IsSignaling = true; break;
2743 case 0x02: Pred = FCmpInst::FCMP_OLE; IsSignaling = true; break;
2744 case 0x03: Pred = FCmpInst::FCMP_UNO; IsSignaling = false; break;
2745 case 0x04: Pred = FCmpInst::FCMP_UNE; IsSignaling = false; break;
2746 case 0x05: Pred = FCmpInst::FCMP_UGE; IsSignaling = true; break;
2747 case 0x06: Pred = FCmpInst::FCMP_UGT; IsSignaling = true; break;
2748 case 0x07: Pred = FCmpInst::FCMP_ORD; IsSignaling = false; break;
2749 case 0x08: Pred = FCmpInst::FCMP_UEQ; IsSignaling = false; break;
2750 case 0x09: Pred = FCmpInst::FCMP_ULT; IsSignaling = true; break;
2751 case 0x0a: Pred = FCmpInst::FCMP_ULE; IsSignaling = true; break;
2752 case 0x0b: Pred = FCmpInst::FCMP_FALSE; IsSignaling = false; break;
2753 case 0x0c: Pred = FCmpInst::FCMP_ONE; IsSignaling = false; break;
2754 case 0x0d: Pred = FCmpInst::FCMP_OGE; IsSignaling = true; break;
2755 case 0x0e: Pred = FCmpInst::FCMP_OGT; IsSignaling = true; break;
2756 case 0x0f: Pred = FCmpInst::FCMP_TRUE; IsSignaling = false; break;
2757 default: llvm_unreachable("Unhandled CC");
2758 }
2759
2760 // Invert the signalling behavior for 16-31.
2761 if (CC & 0x10)
2762 IsSignaling = !IsSignaling;
2763
2764 // If the predicate is true or false and we're using constrained intrinsics,
2765 // we don't have a compare intrinsic we can use. Just use the legacy X86
2766 // specific intrinsic.
2767 // If the intrinsic is mask enabled and we're using constrained intrinsics,
2768 // use the legacy X86 specific intrinsic.
2769 if (Builder.getIsFPConstrained() &&
2770 (Pred == FCmpInst::FCMP_TRUE || Pred == FCmpInst::FCMP_FALSE ||
2771 IsMaskFCmp)) {
2772
2773 Intrinsic::ID IID;
2774 switch (BuiltinID) {
2775 default: llvm_unreachable("Unexpected builtin");
2776 case X86::BI__builtin_ia32_cmpps:
2777 IID = Intrinsic::x86_sse_cmp_ps;
2778 break;
2779 case X86::BI__builtin_ia32_cmpps256:
2780 IID = Intrinsic::x86_avx_cmp_ps_256;
2781 break;
2782 case X86::BI__builtin_ia32_cmppd:
2783 IID = Intrinsic::x86_sse2_cmp_pd;
2784 break;
2785 case X86::BI__builtin_ia32_cmppd256:
2786 IID = Intrinsic::x86_avx_cmp_pd_256;
2787 break;
2788 case X86::BI__builtin_ia32_cmpph128_mask:
2789 IID = Intrinsic::x86_avx512fp16_mask_cmp_ph_128;
2790 break;
2791 case X86::BI__builtin_ia32_cmpph256_mask:
2792 IID = Intrinsic::x86_avx512fp16_mask_cmp_ph_256;
2793 break;
2794 case X86::BI__builtin_ia32_cmpph512_mask:
2795 IID = Intrinsic::x86_avx512fp16_mask_cmp_ph_512;
2796 break;
2797 case X86::BI__builtin_ia32_cmpps512_mask:
2798 IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
2799 break;
2800 case X86::BI__builtin_ia32_cmppd512_mask:
2801 IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
2802 break;
2803 case X86::BI__builtin_ia32_cmpps128_mask:
2804 IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
2805 break;
2806 case X86::BI__builtin_ia32_cmpps256_mask:
2807 IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
2808 break;
2809 case X86::BI__builtin_ia32_cmppd128_mask:
2810 IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
2811 break;
2812 case X86::BI__builtin_ia32_cmppd256_mask:
2813 IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
2814 break;
2815 }
2816
2817 Function *Intr = CGM.getIntrinsic(IID);
2818 if (IsMaskFCmp) {
2819 unsigned NumElts =
2820 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
2821 Ops[3] = getMaskVecValue(*this, Ops[3], NumElts);
2822 Value *Cmp = Builder.CreateCall(Intr, Ops);
2823 return EmitX86MaskedCompareResult(*this, Cmp, NumElts, nullptr);
2824 }
2825
2826 return Builder.CreateCall(Intr, Ops);
2827 }
2828
2829 // Builtins without the _mask suffix return a vector of integers
2830 // of the same width as the input vectors
2831 if (IsMaskFCmp) {
2832 // We ignore SAE if strict FP is disabled. We only keep precise
2833 // exception behavior under strict FP.
2834 // NOTE: If strict FP does ever go through here a CGFPOptionsRAII
2835 // object will be required.
2836 unsigned NumElts =
2837 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
2838 Value *Cmp;
2839 if (IsSignaling)
2840 Cmp = Builder.CreateFCmpS(Pred, Ops[0], Ops[1]);
2841 else
2842 Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
2843 return EmitX86MaskedCompareResult(*this, Cmp, NumElts, Ops[3]);
2844 }
2845
2846 return getVectorFCmpIR(Pred, IsSignaling);
2847 }
2848
2849 // SSE scalar comparison intrinsics
2850 case X86::BI__builtin_ia32_cmpeqss:
2851 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 0);
2852 case X86::BI__builtin_ia32_cmpltss:
2853 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 1);
2854 case X86::BI__builtin_ia32_cmpless:
2855 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 2);
2856 case X86::BI__builtin_ia32_cmpunordss:
2857 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 3);
2858 case X86::BI__builtin_ia32_cmpneqss:
2859 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 4);
2860 case X86::BI__builtin_ia32_cmpnltss:
2861 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 5);
2862 case X86::BI__builtin_ia32_cmpnless:
2863 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 6);
2864 case X86::BI__builtin_ia32_cmpordss:
2865 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 7);
2866 case X86::BI__builtin_ia32_cmpeqsd:
2867 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 0);
2868 case X86::BI__builtin_ia32_cmpltsd:
2869 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 1);
2870 case X86::BI__builtin_ia32_cmplesd:
2871 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 2);
2872 case X86::BI__builtin_ia32_cmpunordsd:
2873 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 3);
2874 case X86::BI__builtin_ia32_cmpneqsd:
2875 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 4);
2876 case X86::BI__builtin_ia32_cmpnltsd:
2877 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 5);
2878 case X86::BI__builtin_ia32_cmpnlesd:
2879 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 6);
2880 case X86::BI__builtin_ia32_cmpordsd:
2881 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 7);
2882
2883 // f16c half2float intrinsics
2884 case X86::BI__builtin_ia32_vcvtph2ps_mask:
2885 case X86::BI__builtin_ia32_vcvtph2ps256_mask:
2886 case X86::BI__builtin_ia32_vcvtph2ps512_mask: {
2887 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
2888 return EmitX86CvtF16ToFloatExpr(*this, Ops, ConvertType(E->getType()));
2889 }
2890
2891 // AVX512 bf16 intrinsics
2892 case X86::BI__builtin_ia32_cvtneps2bf16_128_mask: {
2893 Ops[2] = getMaskVecValue(
2894 *this, Ops[2],
2895 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements());
2896 Intrinsic::ID IID = Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128;
2897 return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
2898 }
2899
2900 case X86::BI__builtin_ia32_cvtneps2bf16_256_mask:
2901 case X86::BI__builtin_ia32_cvtneps2bf16_512_mask: {
2902 Intrinsic::ID IID;
2903 switch (BuiltinID) {
2904 default: llvm_unreachable("Unsupported intrinsic!");
2905 case X86::BI__builtin_ia32_cvtneps2bf16_256_mask:
2906 IID = Intrinsic::x86_avx512bf16_cvtneps2bf16_256;
2907 break;
2908 case X86::BI__builtin_ia32_cvtneps2bf16_512_mask:
2909 IID = Intrinsic::x86_avx512bf16_cvtneps2bf16_512;
2910 break;
2911 }
2912 Value *Res = Builder.CreateCall(CGM.getIntrinsic(IID), Ops[0]);
2913 return EmitX86Select(*this, Ops[2], Res, Ops[1]);
2914 }
2915
2916 case X86::BI__cpuid:
2917 case X86::BI__cpuidex: {
2918 Value *FuncId = EmitScalarExpr(E->getArg(1));
2919 Value *SubFuncId = BuiltinID == X86::BI__cpuidex
2920 ? EmitScalarExpr(E->getArg(2))
2921 : llvm::ConstantInt::get(Int32Ty, 0);
2922
2923 llvm::StructType *CpuidRetTy =
2924 llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty, Int32Ty);
2925 llvm::FunctionType *FTy =
2926 llvm::FunctionType::get(CpuidRetTy, {Int32Ty, Int32Ty}, false);
2927
2928 StringRef Asm, Constraints;
2929 if (getTarget().getTriple().getArch() == llvm::Triple::x86) {
2930 Asm = "cpuid";
2931 Constraints = "={ax},={bx},={cx},={dx},{ax},{cx}";
2932 } else {
2933 // x86-64 uses %rbx as the base register, so preserve it.
2934 Asm = "xchgq %rbx, ${1:q}\n"
2935 "cpuid\n"
2936 "xchgq %rbx, ${1:q}";
2937 Constraints = "={ax},=r,={cx},={dx},0,2";
2938 }
2939
2940 llvm::InlineAsm *IA = llvm::InlineAsm::get(FTy, Asm, Constraints,
2941 /*hasSideEffects=*/false);
2942 Value *IACall = Builder.CreateCall(IA, {FuncId, SubFuncId});
2943 Value *BasePtr = EmitScalarExpr(E->getArg(0));
2944 Value *Store = nullptr;
2945 for (unsigned i = 0; i < 4; i++) {
2946 Value *Extracted = Builder.CreateExtractValue(IACall, i);
2947 Value *StorePtr = Builder.CreateConstInBoundsGEP1_32(Int32Ty, BasePtr, i);
2948 Store = Builder.CreateAlignedStore(Extracted, StorePtr, getIntAlign());
2949 }
2950
2951 // Return the last store instruction to signal that we have emitted the
2952 // the intrinsic.
2953 return Store;
2954 }
2955
2956 case X86::BI__emul:
2957 case X86::BI__emulu: {
2958 llvm::Type *Int64Ty = llvm::IntegerType::get(getLLVMContext(), 64);
2959 bool isSigned = (BuiltinID == X86::BI__emul);
2960 Value *LHS = Builder.CreateIntCast(Ops[0], Int64Ty, isSigned);
2961 Value *RHS = Builder.CreateIntCast(Ops[1], Int64Ty, isSigned);
2962 return Builder.CreateMul(LHS, RHS, "", !isSigned, isSigned);
2963 }
2964 case X86::BI__mulh:
2965 case X86::BI__umulh:
2966 case X86::BI_mul128:
2967 case X86::BI_umul128: {
2968 llvm::Type *ResType = ConvertType(E->getType());
2969 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
2970
2971 bool IsSigned = (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI_mul128);
2972 Value *LHS = Builder.CreateIntCast(Ops[0], Int128Ty, IsSigned);
2973 Value *RHS = Builder.CreateIntCast(Ops[1], Int128Ty, IsSigned);
2974
2975 Value *MulResult, *HigherBits;
2976 if (IsSigned) {
2977 MulResult = Builder.CreateNSWMul(LHS, RHS);
2978 HigherBits = Builder.CreateAShr(MulResult, 64);
2979 } else {
2980 MulResult = Builder.CreateNUWMul(LHS, RHS);
2981 HigherBits = Builder.CreateLShr(MulResult, 64);
2982 }
2983 HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned);
2984
2985 if (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI__umulh)
2986 return HigherBits;
2987
2988 Address HighBitsAddress = EmitPointerWithAlignment(E->getArg(2));
2989 Builder.CreateStore(HigherBits, HighBitsAddress);
2990 return Builder.CreateIntCast(MulResult, ResType, IsSigned);
2991 }
2992
2993 case X86::BI__faststorefence: {
2994 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
2995 llvm::SyncScope::System);
2996 }
2997 case X86::BI__shiftleft128:
2998 case X86::BI__shiftright128: {
2999 llvm::Function *F = CGM.getIntrinsic(
3000 BuiltinID == X86::BI__shiftleft128 ? Intrinsic::fshl : Intrinsic::fshr,
3001 Int64Ty);
3002 // Flip low/high ops and zero-extend amount to matching type.
3003 // shiftleft128(Low, High, Amt) -> fshl(High, Low, Amt)
3004 // shiftright128(Low, High, Amt) -> fshr(High, Low, Amt)
3005 std::swap(Ops[0], Ops[1]);
3006 Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
3007 return Builder.CreateCall(F, Ops);
3008 }
3009 case X86::BI_ReadWriteBarrier:
3010 case X86::BI_ReadBarrier:
3011 case X86::BI_WriteBarrier: {
3012 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
3013 llvm::SyncScope::SingleThread);
3014 }
3015
3016 case X86::BI_AddressOfReturnAddress: {
3017 Function *F =
3018 CGM.getIntrinsic(Intrinsic::addressofreturnaddress, AllocaInt8PtrTy);
3019 return Builder.CreateCall(F);
3020 }
3021 case X86::BI__stosb: {
3022 // We treat __stosb as a volatile memset - it may not generate "rep stosb"
3023 // instruction, but it will create a memset that won't be optimized away.
3024 return Builder.CreateMemSet(Ops[0], Ops[1], Ops[2], Align(1), true);
3025 }
3026 case X86::BI__ud2:
3027 // llvm.trap makes a ud2a instruction on x86.
3028 return EmitTrapCall(Intrinsic::trap);
3029 case X86::BI__int2c: {
3030 // This syscall signals a driver assertion failure in x86 NT kernels.
3031 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false);
3032 llvm::InlineAsm *IA =
3033 llvm::InlineAsm::get(FTy, "int $$0x2c", "", /*hasSideEffects=*/true);
3034 llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(
3035 getLLVMContext(), llvm::AttributeList::FunctionIndex,
3036 llvm::Attribute::NoReturn);
3037 llvm::CallInst *CI = Builder.CreateCall(IA);
3038 CI->setAttributes(NoReturnAttr);
3039 return CI;
3040 }
3041 case X86::BI__readfsbyte:
3042 case X86::BI__readfsword:
3043 case X86::BI__readfsdword:
3044 case X86::BI__readfsqword: {
3045 llvm::Type *IntTy = ConvertType(E->getType());
3046 Value *Ptr = Builder.CreateIntToPtr(
3047 Ops[0], llvm::PointerType::get(getLLVMContext(), 257));
3048 LoadInst *Load = Builder.CreateAlignedLoad(
3049 IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));
3050 Load->setVolatile(true);
3051 return Load;
3052 }
3053 case X86::BI__readgsbyte:
3054 case X86::BI__readgsword:
3055 case X86::BI__readgsdword:
3056 case X86::BI__readgsqword: {
3057 llvm::Type *IntTy = ConvertType(E->getType());
3058 Value *Ptr = Builder.CreateIntToPtr(
3059 Ops[0], llvm::PointerType::get(getLLVMContext(), 256));
3060 LoadInst *Load = Builder.CreateAlignedLoad(
3061 IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));
3062 Load->setVolatile(true);
3063 return Load;
3064 }
3065 case X86::BI__builtin_ia32_encodekey128_u32: {
3066 Intrinsic::ID IID = Intrinsic::x86_encodekey128;
3067
3068 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[0], Ops[1]});
3069
3070 for (int i = 0; i < 3; ++i) {
3071 Value *Extract = Builder.CreateExtractValue(Call, i + 1);
3072 Value *Ptr = Builder.CreateConstGEP1_32(Int8Ty, Ops[2], i * 16);
3073 Builder.CreateAlignedStore(Extract, Ptr, Align(1));
3074 }
3075
3076 return Builder.CreateExtractValue(Call, 0);
3077 }
3078 case X86::BI__builtin_ia32_encodekey256_u32: {
3079 Intrinsic::ID IID = Intrinsic::x86_encodekey256;
3080
3081 Value *Call =
3082 Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[0], Ops[1], Ops[2]});
3083
3084 for (int i = 0; i < 4; ++i) {
3085 Value *Extract = Builder.CreateExtractValue(Call, i + 1);
3086 Value *Ptr = Builder.CreateConstGEP1_32(Int8Ty, Ops[3], i * 16);
3087 Builder.CreateAlignedStore(Extract, Ptr, Align(1));
3088 }
3089
3090 return Builder.CreateExtractValue(Call, 0);
3091 }
3092 case X86::BI__builtin_ia32_aesenc128kl_u8:
3093 case X86::BI__builtin_ia32_aesdec128kl_u8:
3094 case X86::BI__builtin_ia32_aesenc256kl_u8:
3095 case X86::BI__builtin_ia32_aesdec256kl_u8: {
3096 Intrinsic::ID IID;
3097 StringRef BlockName;
3098 switch (BuiltinID) {
3099 default:
3100 llvm_unreachable("Unexpected builtin");
3101 case X86::BI__builtin_ia32_aesenc128kl_u8:
3102 IID = Intrinsic::x86_aesenc128kl;
3103 BlockName = "aesenc128kl";
3104 break;
3105 case X86::BI__builtin_ia32_aesdec128kl_u8:
3106 IID = Intrinsic::x86_aesdec128kl;
3107 BlockName = "aesdec128kl";
3108 break;
3109 case X86::BI__builtin_ia32_aesenc256kl_u8:
3110 IID = Intrinsic::x86_aesenc256kl;
3111 BlockName = "aesenc256kl";
3112 break;
3113 case X86::BI__builtin_ia32_aesdec256kl_u8:
3114 IID = Intrinsic::x86_aesdec256kl;
3115 BlockName = "aesdec256kl";
3116 break;
3117 }
3118
3119 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[1], Ops[2]});
3120
3121 BasicBlock *NoError =
3122 createBasicBlock(BlockName + "_no_error", this->CurFn);
3123 BasicBlock *Error = createBasicBlock(BlockName + "_error", this->CurFn);
3124 BasicBlock *End = createBasicBlock(BlockName + "_end", this->CurFn);
3125
3126 Value *Ret = Builder.CreateExtractValue(Call, 0);
3127 Value *Succ = Builder.CreateTrunc(Ret, Builder.getInt1Ty());
3128 Value *Out = Builder.CreateExtractValue(Call, 1);
3129 Builder.CreateCondBr(Succ, NoError, Error);
3130
3131 Builder.SetInsertPoint(NoError);
3132 Builder.CreateDefaultAlignedStore(Out, Ops[0]);
3133 Builder.CreateBr(End);
3134
3135 Builder.SetInsertPoint(Error);
3136 Constant *Zero = llvm::Constant::getNullValue(Out->getType());
3137 Builder.CreateDefaultAlignedStore(Zero, Ops[0]);
3138 Builder.CreateBr(End);
3139
3140 Builder.SetInsertPoint(End);
3141 return Builder.CreateExtractValue(Call, 0);
3142 }
3143 case X86::BI__builtin_ia32_aesencwide128kl_u8:
3144 case X86::BI__builtin_ia32_aesdecwide128kl_u8:
3145 case X86::BI__builtin_ia32_aesencwide256kl_u8:
3146 case X86::BI__builtin_ia32_aesdecwide256kl_u8: {
3147 Intrinsic::ID IID;
3148 StringRef BlockName;
3149 switch (BuiltinID) {
3150 case X86::BI__builtin_ia32_aesencwide128kl_u8:
3151 IID = Intrinsic::x86_aesencwide128kl;
3152 BlockName = "aesencwide128kl";
3153 break;
3154 case X86::BI__builtin_ia32_aesdecwide128kl_u8:
3155 IID = Intrinsic::x86_aesdecwide128kl;
3156 BlockName = "aesdecwide128kl";
3157 break;
3158 case X86::BI__builtin_ia32_aesencwide256kl_u8:
3159 IID = Intrinsic::x86_aesencwide256kl;
3160 BlockName = "aesencwide256kl";
3161 break;
3162 case X86::BI__builtin_ia32_aesdecwide256kl_u8:
3163 IID = Intrinsic::x86_aesdecwide256kl;
3164 BlockName = "aesdecwide256kl";
3165 break;
3166 }
3167
3168 llvm::Type *Ty = FixedVectorType::get(Builder.getInt64Ty(), 2);
3169 Value *InOps[9];
3170 InOps[0] = Ops[2];
3171 for (int i = 0; i != 8; ++i) {
3172 Value *Ptr = Builder.CreateConstGEP1_32(Ty, Ops[1], i);
3173 InOps[i + 1] = Builder.CreateAlignedLoad(Ty, Ptr, Align(16));
3174 }
3175
3176 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), InOps);
3177
3178 BasicBlock *NoError =
3179 createBasicBlock(BlockName + "_no_error", this->CurFn);
3180 BasicBlock *Error = createBasicBlock(BlockName + "_error", this->CurFn);
3181 BasicBlock *End = createBasicBlock(BlockName + "_end", this->CurFn);
3182
3183 Value *Ret = Builder.CreateExtractValue(Call, 0);
3184 Value *Succ = Builder.CreateTrunc(Ret, Builder.getInt1Ty());
3185 Builder.CreateCondBr(Succ, NoError, Error);
3186
3187 Builder.SetInsertPoint(NoError);
3188 for (int i = 0; i != 8; ++i) {
3189 Value *Extract = Builder.CreateExtractValue(Call, i + 1);
3190 Value *Ptr = Builder.CreateConstGEP1_32(Extract->getType(), Ops[0], i);
3191 Builder.CreateAlignedStore(Extract, Ptr, Align(16));
3192 }
3193 Builder.CreateBr(End);
3194
3195 Builder.SetInsertPoint(Error);
3196 for (int i = 0; i != 8; ++i) {
3197 Value *Out = Builder.CreateExtractValue(Call, i + 1);
3198 Constant *Zero = llvm::Constant::getNullValue(Out->getType());
3199 Value *Ptr = Builder.CreateConstGEP1_32(Out->getType(), Ops[0], i);
3200 Builder.CreateAlignedStore(Zero, Ptr, Align(16));
3201 }
3202 Builder.CreateBr(End);
3203
3204 Builder.SetInsertPoint(End);
3205 return Builder.CreateExtractValue(Call, 0);
3206 }
3207 case X86::BI__builtin_ia32_vfcmaddcph512_mask:
3208 IsConjFMA = true;
3209 [[fallthrough]];
3210 case X86::BI__builtin_ia32_vfmaddcph512_mask: {
3211 Intrinsic::ID IID = IsConjFMA
3212 ? Intrinsic::x86_avx512fp16_mask_vfcmadd_cph_512
3213 : Intrinsic::x86_avx512fp16_mask_vfmadd_cph_512;
3214 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
3215 return EmitX86Select(*this, Ops[3], Call, Ops[0]);
3216 }
3217 case X86::BI__builtin_ia32_vfcmaddcsh_round_mask:
3218 IsConjFMA = true;
3219 [[fallthrough]];
3220 case X86::BI__builtin_ia32_vfmaddcsh_round_mask: {
3221 Intrinsic::ID IID = IsConjFMA ? Intrinsic::x86_avx512fp16_mask_vfcmadd_csh
3222 : Intrinsic::x86_avx512fp16_mask_vfmadd_csh;
3223 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
3224 Value *And = Builder.CreateAnd(Ops[3], llvm::ConstantInt::get(Int8Ty, 1));
3225 return EmitX86Select(*this, And, Call, Ops[0]);
3226 }
3227 case X86::BI__builtin_ia32_vfcmaddcsh_round_mask3:
3228 IsConjFMA = true;
3229 [[fallthrough]];
3230 case X86::BI__builtin_ia32_vfmaddcsh_round_mask3: {
3231 Intrinsic::ID IID = IsConjFMA ? Intrinsic::x86_avx512fp16_mask_vfcmadd_csh
3232 : Intrinsic::x86_avx512fp16_mask_vfmadd_csh;
3233 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
3234 static constexpr int Mask[] = {0, 5, 6, 7};
3235 return Builder.CreateShuffleVector(Call, Ops[2], Mask);
3236 }
3237 case X86::BI__builtin_ia32_prefetchi:
3238 return Builder.CreateCall(
3239 CGM.getIntrinsic(Intrinsic::prefetch, Ops[0]->getType()),
3240 {Ops[0], llvm::ConstantInt::get(Int32Ty, 0), Ops[1],
3241 llvm::ConstantInt::get(Int32Ty, 0)});
3242 }
3243}
#define X86_CPU_SUBTYPE(ENUM, STR)
#define X86_CPU_SUBTYPE_ALIAS(ENUM, ALIAS)
#define X86_VENDOR(ENUM, STRING)
#define X86_CPU_TYPE_ALIAS(ENUM, ALIAS)
#define X86_CPU_TYPE(ENUM, STR)
static mlir::Value getMaskVecValue(CIRGenBuilderTy &builder, mlir::Location loc, mlir::Value mask, unsigned numElems)
#define INTRINSIC_X86_XSAVE_ID(NAME)
static Value * EmitX86CompressExpand(CodeGenFunction &CGF, ArrayRef< Value * > Ops, bool IsCompress)
Definition X86.cpp:169
static Value * EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC, bool Signed, ArrayRef< Value * > Ops)
Definition X86.cpp:317
static Value * EmitScalarFMAExpr(CodeGenFunction &CGF, const CallExpr *E, MutableArrayRef< Value * > Ops, Value *Upper, bool ZeroMask=false, unsigned PTIdx=0, bool NegAcc=false)
Definition X86.cpp:503
static Value * EmitX86ExpandLoad(CodeGenFunction &CGF, ArrayRef< Value * > Ops)
Definition X86.cpp:156
static Value * EmitX86MaskedLoad(CodeGenFunction &CGF, ArrayRef< Value * > Ops, Align Alignment)
Definition X86.cpp:145
static std::optional< CodeGenFunction::MSVCIntrin > translateX86ToMsvcIntrin(unsigned BuiltinID)
Definition X86.cpp:24
static Value * EmitX86MaskedStore(CodeGenFunction &CGF, ArrayRef< Value * > Ops, Align Alignment)
Definition X86.cpp:134
static Value * EmitX86Muldq(CodeGenFunction &CGF, bool IsSigned, ArrayRef< Value * > Ops)
Definition X86.cpp:561
static Value * EmitX86CvtF16ToFloatExpr(CodeGenFunction &CGF, ArrayRef< Value * > Ops, llvm::Type *DstTy)
Definition X86.cpp:633
static Value * EmitX86SExtMask(CodeGenFunction &CGF, Value *Op, llvm::Type *DstTy)
Definition X86.cpp:618
static Value * EmitX86FunnelShift(CodeGenFunction &CGF, Value *Op0, Value *Op1, Value *Amt, bool IsRight)
Definition X86.cpp:208
static Value * EmitX86MaskLogic(CodeGenFunction &CGF, Instruction::BinaryOps Opc, ArrayRef< Value * > Ops, bool InvertLHS=false)
Definition X86.cpp:194
static Value * EmitX86Select(CodeGenFunction &CGF, Value *Mask, Value *Op0, Value *Op1)
Definition X86.cpp:266
static Value * EmitX86FMAExpr(CodeGenFunction &CGF, const CallExpr *E, ArrayRef< Value * > Ops, unsigned BuiltinID, bool IsAddSub)
Definition X86.cpp:378
static Value * getMaskVecValue(CodeGenFunction &CGF, Value *Mask, unsigned NumElts)
Definition X86.cpp:58
static Value * EmitX86MaskedCompareResult(CodeGenFunction &CGF, Value *Cmp, unsigned NumElts, Value *MaskIn)
Definition X86.cpp:294
static Value * EmitX86CompressStore(CodeGenFunction &CGF, ArrayRef< Value * > Ops)
Definition X86.cpp:182
static Value * EmitX86vpcom(CodeGenFunction &CGF, ArrayRef< Value * > Ops, bool IsSigned)
Definition X86.cpp:226
static Value * EmitX86ConvertToMask(CodeGenFunction &CGF, Value *In)
Definition X86.cpp:352
static Value * EmitX86Ternlog(CodeGenFunction &CGF, bool ZeroMask, ArrayRef< Value * > Ops)
Definition X86.cpp:590
static Value * emitX86RoundImmediate(CodeGenFunction &CGF, Value *X, unsigned RoundingControl)
Emit rounding for the value X according to the rounding RoundingControl based on bits 0 and 1.
Definition X86.cpp:80
static Value * EmitX86ConvertIntToFp(CodeGenFunction &CGF, const CallExpr *E, ArrayRef< Value * > Ops, bool IsSigned)
Definition X86.cpp:357
static Value * EmitX86ScalarSelect(CodeGenFunction &CGF, Value *Mask, Value *Op0, Value *Op1)
Definition X86.cpp:280
TokenType getType() const
Returns the token's type, e.g.
#define ALIAS(NAME, TOK, FLAGS)
#define X(type, name)
Definition Value.h:97
#define ENUM(NAME, LIT)
Enumerates target-specific builtins in their own namespaces within namespace clang.
QualType GetBuiltinType(unsigned ID, GetBuiltinTypeError &Error, unsigned *IntegerConstantArgs=nullptr) const
Return the type for the specified builtin.
@ GE_None
No error.
CallExpr - Represents a function call (C99 6.5.2.2, C++ [expr.call]).
Definition Expr.h:2943
Expr * getArg(unsigned Arg)
getArg - Return the specified argument.
Definition Expr.h:3147
unsigned getNumArgs() const
getNumArgs - Return the number of actual arguments to this call.
Definition Expr.h:3134
static CharUnits fromQuantity(QuantityType Quantity)
fromQuantity - Construct a CharUnits quantity from a raw integer type.
Definition CharUnits.h:63
Like RawAddress, an abstract representation of an aligned address, but the pointer contained in this ...
Definition Address.h:128
llvm::PointerType * getType() const
Return the type of the pointer value.
Definition Address.h:204
CodeGenFunction - This class organizes the per-function state that is used while generating LLVM code...
llvm::Value * EmitScalarOrConstFoldImmArg(unsigned ICEArguments, unsigned Idx, const CallExpr *E)
llvm::Type * ConvertType(QualType T)
llvm::BasicBlock * createBasicBlock(const Twine &name="", llvm::Function *parent=nullptr, llvm::BasicBlock *before=nullptr)
createBasicBlock - Create an LLVM basic block.
@ Default
! No language constraints on evaluation order.
const TargetInfo & getTarget() const
llvm::Value * EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID, const CallExpr *E)
llvm::Value * EmitX86BuiltinExpr(unsigned BuiltinID, const CallExpr *E)
Definition X86.cpp:793
Address EmitPointerWithAlignment(const Expr *Addr, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitPointerWithAlignment - Given an expression with a pointer type, emit the value and compute our be...
Definition CGExpr.cpp:1575
RawAddress CreateMemTemp(QualType T, const Twine &Name="tmp", RawAddress *Alloca=nullptr)
CreateMemTemp - Create a temporary memory object of the given type, with appropriate alignmen and cas...
Definition CGExpr.cpp:189
llvm::Value * EmitScalarExpr(const Expr *E, bool IgnoreResultAssign=false)
EmitScalarExpr - Emit the computation of the specified expression of LLVM scalar type,...
llvm::CallInst * EmitTrapCall(llvm::Intrinsic::ID IntrID)
Emit a call to trap or debugtrap and attach function attribute "trap-func-name" if specified.
Definition CGExpr.cpp:4394
llvm::LLVMContext & getLLVMContext()
llvm::LLVMContext & getLLVMContext()
llvm::Function * getIntrinsic(unsigned IID, ArrayRef< llvm::Type * > Tys={})
An abstract representation of an aligned address.
Definition Address.h:42
llvm::Value * getPointer() const
Definition Address.h:66
Expr * IgnoreParenCasts() LLVM_READONLY
Skip past any parentheses and casts which might surround this expression until reaching a fixed point...
Definition Expr.cpp:3094
QualType getType() const
Definition Expr.h:144
QualType getType() const
Definition Value.cpp:237
The JSON file list parser is used to communicate input to InstallAPI.
@ Asm
Assembly: we accept this only so that we can preprocess it.
@ Result
The result type of a method or function.
Definition TypeBase.h:905
U cast(CodeGen::Address addr)
Definition Address.h:327
unsigned int uint32_t
Diagnostic wrappers for TextAPI types for error reporting.
Definition Dominators.h:30
llvm::IntegerType * Int8Ty
i8, i16, i32, and i64