clang 23.0.0git
X86.cpp
Go to the documentation of this file.
1//===---------- X86.cpp - Emit LLVM Code for builtins ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This contains code to emit Builtin calls as LLVM code.
10//
11//===----------------------------------------------------------------------===//
12
13#include "CGBuiltin.h"
15#include "llvm/IR/InlineAsm.h"
16#include "llvm/IR/IntrinsicsX86.h"
17#include "llvm/TargetParser/X86TargetParser.h"
18
19using namespace clang;
20using namespace CodeGen;
21using namespace llvm;
22
23static std::optional<CodeGenFunction::MSVCIntrin>
24translateX86ToMsvcIntrin(unsigned BuiltinID) {
25 using MSVCIntrin = CodeGenFunction::MSVCIntrin;
26 switch (BuiltinID) {
27 default:
28 return std::nullopt;
29 case clang::X86::BI_BitScanForward:
30 case clang::X86::BI_BitScanForward64:
31 return MSVCIntrin::_BitScanForward;
32 case clang::X86::BI_BitScanReverse:
33 case clang::X86::BI_BitScanReverse64:
34 return MSVCIntrin::_BitScanReverse;
35 case clang::X86::BI_InterlockedAnd64:
36 return MSVCIntrin::_InterlockedAnd;
37 case clang::X86::BI_InterlockedCompareExchange128:
38 return MSVCIntrin::_InterlockedCompareExchange128;
39 case clang::X86::BI_InterlockedExchange64:
40 return MSVCIntrin::_InterlockedExchange;
41 case clang::X86::BI_InterlockedExchangeAdd64:
42 return MSVCIntrin::_InterlockedExchangeAdd;
43 case clang::X86::BI_InterlockedExchangeSub64:
44 return MSVCIntrin::_InterlockedExchangeSub;
45 case clang::X86::BI_InterlockedOr64:
46 return MSVCIntrin::_InterlockedOr;
47 case clang::X86::BI_InterlockedXor64:
48 return MSVCIntrin::_InterlockedXor;
49 case clang::X86::BI_InterlockedDecrement64:
50 return MSVCIntrin::_InterlockedDecrement;
51 case clang::X86::BI_InterlockedIncrement64:
52 return MSVCIntrin::_InterlockedIncrement;
53 }
54 llvm_unreachable("must return from switch");
55}
56
57// Convert the mask from an integer type to a vector of i1.
59 unsigned NumElts) {
60
61 auto *MaskTy = llvm::FixedVectorType::get(
62 CGF.Builder.getInt1Ty(),
63 cast<IntegerType>(Mask->getType())->getBitWidth());
64 Value *MaskVec = CGF.Builder.CreateBitCast(Mask, MaskTy);
65
66 // If we have less than 8 elements, then the starting mask was an i8 and
67 // we need to extract down to the right number of elements.
68 if (NumElts < 8) {
69 int Indices[4];
70 for (unsigned i = 0; i != NumElts; ++i)
71 Indices[i] = i;
72 MaskVec = CGF.Builder.CreateShuffleVector(
73 MaskVec, MaskVec, ArrayRef(Indices, NumElts), "extract");
74 }
75 return MaskVec;
76}
77
78/// Emit rounding for the value \p X according to the rounding \p
79/// RoundingControl based on bits 0 and 1.
81 unsigned RoundingControl) {
82 unsigned RoundingMask = 0b11;
83 unsigned RoundingMode = RoundingControl & RoundingMask;
84
85 Intrinsic::ID ID = Intrinsic::not_intrinsic;
86 LLVMContext &Ctx = CGF.CGM.getLLVMContext();
87 if (CGF.Builder.getIsFPConstrained()) {
88
89 Value *ExceptMode =
90 MetadataAsValue::get(Ctx, MDString::get(Ctx, "fpexcept.ignore"));
91
92 switch (RoundingMode) {
93 case 0b00:
94 ID = Intrinsic::experimental_constrained_roundeven;
95 break;
96 case 0b01:
97 ID = Intrinsic::experimental_constrained_floor;
98 break;
99 case 0b10:
100 ID = Intrinsic::experimental_constrained_ceil;
101 break;
102 case 0b11:
103 ID = Intrinsic::experimental_constrained_trunc;
104 break;
105 default:
106 llvm_unreachable("Invalid rounding mode");
107 }
108
109 Function *F = CGF.CGM.getIntrinsic(ID, X->getType());
110 return CGF.Builder.CreateCall(F, {X, ExceptMode});
111 }
112
113 switch (RoundingMode) {
114 case 0b00:
115 ID = Intrinsic::roundeven;
116 break;
117 case 0b01:
118 ID = Intrinsic::floor;
119 break;
120 case 0b10:
121 ID = Intrinsic::ceil;
122 break;
123 case 0b11:
124 ID = Intrinsic::trunc;
125 break;
126 default:
127 llvm_unreachable("Invalid rounding mode");
128 }
129
130 Function *F = CGF.CGM.getIntrinsic(ID, X->getType());
131 return CGF.Builder.CreateCall(F, {X});
132}
133
135 Align Alignment) {
136 Value *Ptr = Ops[0];
137
138 Value *MaskVec = getMaskVecValue(
139 CGF, Ops[2],
140 cast<llvm::FixedVectorType>(Ops[1]->getType())->getNumElements());
141
142 return CGF.Builder.CreateMaskedStore(Ops[1], Ptr, Alignment, MaskVec);
143}
144
146 Align Alignment) {
147 llvm::Type *Ty = Ops[1]->getType();
148 Value *Ptr = Ops[0];
149
150 Value *MaskVec = getMaskVecValue(
151 CGF, Ops[2], cast<llvm::FixedVectorType>(Ty)->getNumElements());
152
153 return CGF.Builder.CreateMaskedLoad(Ty, Ptr, Alignment, MaskVec, Ops[1]);
154}
155
157 ArrayRef<Value *> Ops) {
158 auto *ResultTy = cast<llvm::VectorType>(Ops[1]->getType());
159 Value *Ptr = Ops[0];
160
161 Value *MaskVec = getMaskVecValue(
162 CGF, Ops[2], cast<FixedVectorType>(ResultTy)->getNumElements());
163
164 return CGF.Builder.CreateMaskedExpandLoad(ResultTy, Ptr, MaybeAlign(),
165 MaskVec, Ops[1]);
166}
167
170 bool IsCompress) {
171 auto *ResultTy = cast<llvm::FixedVectorType>(Ops[1]->getType());
172
173 Value *MaskVec = getMaskVecValue(CGF, Ops[2], ResultTy->getNumElements());
174
175 Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
176 : Intrinsic::x86_avx512_mask_expand;
177 llvm::Function *F = CGF.CGM.getIntrinsic(IID, ResultTy);
178 return CGF.Builder.CreateCall(F, { Ops[0], Ops[1], MaskVec });
179}
180
182 ArrayRef<Value *> Ops) {
183 auto *ResultTy = cast<llvm::FixedVectorType>(Ops[1]->getType());
184 Value *Ptr = Ops[0];
185
186 Value *MaskVec = getMaskVecValue(CGF, Ops[2], ResultTy->getNumElements());
187
188 return CGF.Builder.CreateMaskedCompressStore(Ops[1], Ptr, MaybeAlign(),
189 MaskVec);
190}
191
192static Value *EmitX86MaskLogic(CodeGenFunction &CGF, Instruction::BinaryOps Opc,
194 bool InvertLHS = false) {
195 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
196 Value *LHS = getMaskVecValue(CGF, Ops[0], NumElts);
197 Value *RHS = getMaskVecValue(CGF, Ops[1], NumElts);
198
199 if (InvertLHS)
200 LHS = CGF.Builder.CreateNot(LHS);
201
202 return CGF.Builder.CreateBitCast(CGF.Builder.CreateBinOp(Opc, LHS, RHS),
203 Ops[0]->getType());
204}
205
207 Value *Amt, bool IsRight) {
208 llvm::Type *Ty = Op0->getType();
209
210 // Amount may be scalar immediate, in which case create a splat vector.
211 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
212 // we only care about the lowest log2 bits anyway.
213 if (Amt->getType() != Ty) {
214 unsigned NumElts = cast<llvm::FixedVectorType>(Ty)->getNumElements();
215 Amt = CGF.Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
216 Amt = CGF.Builder.CreateVectorSplat(NumElts, Amt);
217 }
218
219 unsigned IID = IsRight ? Intrinsic::fshr : Intrinsic::fshl;
220 Function *F = CGF.CGM.getIntrinsic(IID, Ty);
221 return CGF.Builder.CreateCall(F, {Op0, Op1, Amt});
222}
223
225 bool IsSigned) {
226 Value *Op0 = Ops[0];
227 Value *Op1 = Ops[1];
228 llvm::Type *Ty = Op0->getType();
229 uint64_t Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
230
231 CmpInst::Predicate Pred;
232 switch (Imm) {
233 case 0x0:
234 Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
235 break;
236 case 0x1:
237 Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
238 break;
239 case 0x2:
240 Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
241 break;
242 case 0x3:
243 Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
244 break;
245 case 0x4:
246 Pred = ICmpInst::ICMP_EQ;
247 break;
248 case 0x5:
249 Pred = ICmpInst::ICMP_NE;
250 break;
251 case 0x6:
252 return llvm::Constant::getNullValue(Ty); // FALSE
253 case 0x7:
254 return llvm::Constant::getAllOnesValue(Ty); // TRUE
255 default:
256 llvm_unreachable("Unexpected XOP vpcom/vpcomu predicate");
257 }
258
259 Value *Cmp = CGF.Builder.CreateICmp(Pred, Op0, Op1);
260 Value *Res = CGF.Builder.CreateSExt(Cmp, Ty);
261 return Res;
262}
263
265 Value *Mask, Value *Op0, Value *Op1) {
266
267 // If the mask is all ones just return first argument.
268 if (const auto *C = dyn_cast<Constant>(Mask))
269 if (C->isAllOnesValue())
270 return Op0;
271
272 Mask = getMaskVecValue(
273 CGF, Mask, cast<llvm::FixedVectorType>(Op0->getType())->getNumElements());
274
275 return CGF.Builder.CreateSelect(Mask, Op0, Op1);
276}
277
279 Value *Mask, Value *Op0, Value *Op1) {
280 // If the mask is all ones just return first argument.
281 if (const auto *C = dyn_cast<Constant>(Mask))
282 if (C->isAllOnesValue())
283 return Op0;
284
285 auto *MaskTy = llvm::FixedVectorType::get(
286 CGF.Builder.getInt1Ty(), Mask->getType()->getIntegerBitWidth());
287 Mask = CGF.Builder.CreateBitCast(Mask, MaskTy);
288 Mask = CGF.Builder.CreateExtractElement(Mask, (uint64_t)0);
289 return CGF.Builder.CreateSelect(Mask, Op0, Op1);
290}
291
293 unsigned NumElts, Value *MaskIn) {
294 if (MaskIn) {
295 const auto *C = dyn_cast<Constant>(MaskIn);
296 if (!C || !C->isAllOnesValue())
297 Cmp = CGF.Builder.CreateAnd(Cmp, getMaskVecValue(CGF, MaskIn, NumElts));
298 }
299
300 if (NumElts < 8) {
301 int Indices[8];
302 for (unsigned i = 0; i != NumElts; ++i)
303 Indices[i] = i;
304 for (unsigned i = NumElts; i != 8; ++i)
305 Indices[i] = i % NumElts + NumElts;
306 Cmp = CGF.Builder.CreateShuffleVector(
307 Cmp, llvm::Constant::getNullValue(Cmp->getType()), Indices);
308 }
309
310 return CGF.Builder.CreateBitCast(Cmp,
311 IntegerType::get(CGF.getLLVMContext(),
312 std::max(NumElts, 8U)));
313}
314
316 bool Signed, ArrayRef<Value *> Ops) {
317 assert((Ops.size() == 2 || Ops.size() == 4) &&
318 "Unexpected number of arguments");
319 unsigned NumElts =
320 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
321 Value *Cmp;
322
323 if (CC == 3) {
324 Cmp = Constant::getNullValue(
325 llvm::FixedVectorType::get(CGF.Builder.getInt1Ty(), NumElts));
326 } else if (CC == 7) {
327 Cmp = Constant::getAllOnesValue(
328 llvm::FixedVectorType::get(CGF.Builder.getInt1Ty(), NumElts));
329 } else {
330 ICmpInst::Predicate Pred;
331 switch (CC) {
332 default: llvm_unreachable("Unknown condition code");
333 case 0: Pred = ICmpInst::ICMP_EQ; break;
334 case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
335 case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
336 case 4: Pred = ICmpInst::ICMP_NE; break;
337 case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
338 case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
339 }
340 Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]);
341 }
342
343 Value *MaskIn = nullptr;
344 if (Ops.size() == 4)
345 MaskIn = Ops[3];
346
347 return EmitX86MaskedCompareResult(CGF, Cmp, NumElts, MaskIn);
348}
349
351 Value *Zero = Constant::getNullValue(In->getType());
352 return EmitX86MaskedCompare(CGF, 1, true, { In, Zero });
353}
354
356 ArrayRef<Value *> Ops, bool IsSigned) {
357 unsigned Rnd = cast<llvm::ConstantInt>(Ops[3])->getZExtValue();
358 llvm::Type *Ty = Ops[1]->getType();
359
360 Value *Res;
361 if (Rnd != 4) {
362 Intrinsic::ID IID = IsSigned ? Intrinsic::x86_avx512_sitofp_round
363 : Intrinsic::x86_avx512_uitofp_round;
364 Function *F = CGF.CGM.getIntrinsic(IID, { Ty, Ops[0]->getType() });
365 Res = CGF.Builder.CreateCall(F, { Ops[0], Ops[3] });
366 } else {
367 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
368 Res = IsSigned ? CGF.Builder.CreateSIToFP(Ops[0], Ty)
369 : CGF.Builder.CreateUIToFP(Ops[0], Ty);
370 }
371
372 return EmitX86Select(CGF, Ops[2], Res, Ops[1]);
373}
374
375// Lowers X86 FMA intrinsics to IR.
377 ArrayRef<Value *> Ops, unsigned BuiltinID,
378 bool IsAddSub) {
379
380 bool Subtract = false;
381 Intrinsic::ID IID = Intrinsic::not_intrinsic;
382 switch (BuiltinID) {
383 default: break;
384 case clang::X86::BI__builtin_ia32_vfmsubph512_mask3:
385 Subtract = true;
386 [[fallthrough]];
387 case clang::X86::BI__builtin_ia32_vfmaddph512_mask:
388 case clang::X86::BI__builtin_ia32_vfmaddph512_maskz:
389 case clang::X86::BI__builtin_ia32_vfmaddph512_mask3:
390 IID = Intrinsic::x86_avx512fp16_vfmadd_ph_512;
391 break;
392 case clang::X86::BI__builtin_ia32_vfmsubaddph512_mask3:
393 Subtract = true;
394 [[fallthrough]];
395 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask:
396 case clang::X86::BI__builtin_ia32_vfmaddsubph512_maskz:
397 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask3:
398 IID = Intrinsic::x86_avx512fp16_vfmaddsub_ph_512;
399 break;
400 case clang::X86::BI__builtin_ia32_vfmsubps512_mask3:
401 Subtract = true;
402 [[fallthrough]];
403 case clang::X86::BI__builtin_ia32_vfmaddps512_mask:
404 case clang::X86::BI__builtin_ia32_vfmaddps512_maskz:
405 case clang::X86::BI__builtin_ia32_vfmaddps512_mask3:
406 IID = Intrinsic::x86_avx512_vfmadd_ps_512; break;
407 case clang::X86::BI__builtin_ia32_vfmsubpd512_mask3:
408 Subtract = true;
409 [[fallthrough]];
410 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask:
411 case clang::X86::BI__builtin_ia32_vfmaddpd512_maskz:
412 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask3:
413 IID = Intrinsic::x86_avx512_vfmadd_pd_512; break;
414 case clang::X86::BI__builtin_ia32_vfmsubaddps512_mask3:
415 Subtract = true;
416 [[fallthrough]];
417 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask:
418 case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz:
419 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3:
420 IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
421 break;
422 case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
423 Subtract = true;
424 [[fallthrough]];
425 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask:
426 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
427 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
428 IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
429 break;
430 }
431
432 Value *A = Ops[0];
433 Value *B = Ops[1];
434 Value *C = Ops[2];
435
436 if (Subtract)
437 C = CGF.Builder.CreateFNeg(C);
438
439 Value *Res;
440
441 // Only handle in case of _MM_FROUND_CUR_DIRECTION/4 (no rounding).
442 if (IID != Intrinsic::not_intrinsic &&
443 (cast<llvm::ConstantInt>(Ops.back())->getZExtValue() != (uint64_t)4 ||
444 IsAddSub)) {
445 Function *Intr = CGF.CGM.getIntrinsic(IID);
446 Res = CGF.Builder.CreateCall(Intr, {A, B, C, Ops.back() });
447 } else {
448 llvm::Type *Ty = A->getType();
449 Function *FMA;
450 if (CGF.Builder.getIsFPConstrained()) {
451 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
452 FMA = CGF.CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, Ty);
453 Res = CGF.Builder.CreateConstrainedFPCall(FMA, {A, B, C});
454 } else {
455 FMA = CGF.CGM.getIntrinsic(Intrinsic::fma, Ty);
456 Res = CGF.Builder.CreateCall(FMA, {A, B, C});
457 }
458 }
459
460 // Handle any required masking.
461 Value *MaskFalseVal = nullptr;
462 switch (BuiltinID) {
463 case clang::X86::BI__builtin_ia32_vfmaddph512_mask:
464 case clang::X86::BI__builtin_ia32_vfmaddps512_mask:
465 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask:
466 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask:
467 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask:
468 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask:
469 MaskFalseVal = Ops[0];
470 break;
471 case clang::X86::BI__builtin_ia32_vfmaddph512_maskz:
472 case clang::X86::BI__builtin_ia32_vfmaddps512_maskz:
473 case clang::X86::BI__builtin_ia32_vfmaddpd512_maskz:
474 case clang::X86::BI__builtin_ia32_vfmaddsubph512_maskz:
475 case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz:
476 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
477 MaskFalseVal = Constant::getNullValue(Ops[0]->getType());
478 break;
479 case clang::X86::BI__builtin_ia32_vfmsubph512_mask3:
480 case clang::X86::BI__builtin_ia32_vfmaddph512_mask3:
481 case clang::X86::BI__builtin_ia32_vfmsubps512_mask3:
482 case clang::X86::BI__builtin_ia32_vfmaddps512_mask3:
483 case clang::X86::BI__builtin_ia32_vfmsubpd512_mask3:
484 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask3:
485 case clang::X86::BI__builtin_ia32_vfmsubaddph512_mask3:
486 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask3:
487 case clang::X86::BI__builtin_ia32_vfmsubaddps512_mask3:
488 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3:
489 case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
490 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
491 MaskFalseVal = Ops[2];
492 break;
493 }
494
495 if (MaskFalseVal)
496 return EmitX86Select(CGF, Ops[3], Res, MaskFalseVal);
497
498 return Res;
499}
500
503 bool ZeroMask = false, unsigned PTIdx = 0,
504 bool NegAcc = false) {
505 unsigned Rnd = 4;
506 if (Ops.size() > 4)
507 Rnd = cast<llvm::ConstantInt>(Ops[4])->getZExtValue();
508
509 if (NegAcc)
510 Ops[2] = CGF.Builder.CreateFNeg(Ops[2]);
511
512 Ops[0] = CGF.Builder.CreateExtractElement(Ops[0], (uint64_t)0);
513 Ops[1] = CGF.Builder.CreateExtractElement(Ops[1], (uint64_t)0);
514 Ops[2] = CGF.Builder.CreateExtractElement(Ops[2], (uint64_t)0);
515 Value *Res;
516 if (Rnd != 4) {
517 Intrinsic::ID IID;
518
519 switch (Ops[0]->getType()->getPrimitiveSizeInBits()) {
520 case 16:
521 IID = Intrinsic::x86_avx512fp16_vfmadd_f16;
522 break;
523 case 32:
524 IID = Intrinsic::x86_avx512_vfmadd_f32;
525 break;
526 case 64:
527 IID = Intrinsic::x86_avx512_vfmadd_f64;
528 break;
529 default:
530 llvm_unreachable("Unexpected size");
531 }
532 Res = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IID),
533 {Ops[0], Ops[1], Ops[2], Ops[4]});
534 } else if (CGF.Builder.getIsFPConstrained()) {
535 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
536 Function *FMA = CGF.CGM.getIntrinsic(
537 Intrinsic::experimental_constrained_fma, Ops[0]->getType());
538 Res = CGF.Builder.CreateConstrainedFPCall(FMA, Ops.slice(0, 3));
539 } else {
540 Function *FMA = CGF.CGM.getIntrinsic(Intrinsic::fma, Ops[0]->getType());
541 Res = CGF.Builder.CreateCall(FMA, Ops.slice(0, 3));
542 }
543 // If we have more than 3 arguments, we need to do masking.
544 if (Ops.size() > 3) {
545 Value *PassThru = ZeroMask ? Constant::getNullValue(Res->getType())
546 : Ops[PTIdx];
547
548 // If we negated the accumulator and the its the PassThru value we need to
549 // bypass the negate. Conveniently Upper should be the same thing in this
550 // case.
551 if (NegAcc && PTIdx == 2)
552 PassThru = CGF.Builder.CreateExtractElement(Upper, (uint64_t)0);
553
554 Res = EmitX86ScalarSelect(CGF, Ops[3], Res, PassThru);
555 }
556 return CGF.Builder.CreateInsertElement(Upper, Res, (uint64_t)0);
557}
558
559static Value *EmitX86Muldq(CodeGenFunction &CGF, bool IsSigned,
560 ArrayRef<Value *> Ops) {
561 llvm::Type *Ty = Ops[0]->getType();
562 // Arguments have a vXi32 type so cast to vXi64.
563 Ty = llvm::FixedVectorType::get(CGF.Int64Ty,
564 Ty->getPrimitiveSizeInBits() / 64);
565 Value *LHS = CGF.Builder.CreateBitCast(Ops[0], Ty);
566 Value *RHS = CGF.Builder.CreateBitCast(Ops[1], Ty);
567
568 if (IsSigned) {
569 // Shift left then arithmetic shift right.
570 Constant *ShiftAmt = ConstantInt::get(Ty, 32);
571 LHS = CGF.Builder.CreateShl(LHS, ShiftAmt);
572 LHS = CGF.Builder.CreateAShr(LHS, ShiftAmt);
573 RHS = CGF.Builder.CreateShl(RHS, ShiftAmt);
574 RHS = CGF.Builder.CreateAShr(RHS, ShiftAmt);
575 } else {
576 // Clear the upper bits.
577 Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
578 LHS = CGF.Builder.CreateAnd(LHS, Mask);
579 RHS = CGF.Builder.CreateAnd(RHS, Mask);
580 }
581
582 return CGF.Builder.CreateMul(LHS, RHS);
583}
584
585// Emit a masked pternlog intrinsic. This only exists because the header has to
586// use a macro and we aren't able to pass the input argument to a pternlog
587// builtin and a select builtin without evaluating it twice.
588static Value *EmitX86Ternlog(CodeGenFunction &CGF, bool ZeroMask,
589 ArrayRef<Value *> Ops) {
590 llvm::Type *Ty = Ops[0]->getType();
591
592 unsigned VecWidth = Ty->getPrimitiveSizeInBits();
593 unsigned EltWidth = Ty->getScalarSizeInBits();
594 Intrinsic::ID IID;
595 if (VecWidth == 128 && EltWidth == 32)
596 IID = Intrinsic::x86_avx512_pternlog_d_128;
597 else if (VecWidth == 256 && EltWidth == 32)
598 IID = Intrinsic::x86_avx512_pternlog_d_256;
599 else if (VecWidth == 512 && EltWidth == 32)
600 IID = Intrinsic::x86_avx512_pternlog_d_512;
601 else if (VecWidth == 128 && EltWidth == 64)
602 IID = Intrinsic::x86_avx512_pternlog_q_128;
603 else if (VecWidth == 256 && EltWidth == 64)
604 IID = Intrinsic::x86_avx512_pternlog_q_256;
605 else if (VecWidth == 512 && EltWidth == 64)
606 IID = Intrinsic::x86_avx512_pternlog_q_512;
607 else
608 llvm_unreachable("Unexpected intrinsic");
609
610 Value *Ternlog = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IID),
611 Ops.drop_back());
612 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty) : Ops[0];
613 return EmitX86Select(CGF, Ops[4], Ternlog, PassThru);
614}
615
617 llvm::Type *DstTy) {
618 unsigned NumberOfElements =
619 cast<llvm::FixedVectorType>(DstTy)->getNumElements();
620 Value *Mask = getMaskVecValue(CGF, Op, NumberOfElements);
621 return CGF.Builder.CreateSExt(Mask, DstTy, "vpmovm2");
622}
623
624Value *CodeGenFunction::EmitX86CpuIs(const CallExpr *E) {
625 const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts();
626 StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();
627 return EmitX86CpuIs(CPUStr);
628}
629
630// Convert F16 halfs to floats.
633 llvm::Type *DstTy) {
634 assert((Ops.size() == 1 || Ops.size() == 3 || Ops.size() == 4) &&
635 "Unknown cvtph2ps intrinsic");
636
637 // If the SAE intrinsic doesn't use default rounding then we can't upgrade.
638 if (Ops.size() == 4 && cast<llvm::ConstantInt>(Ops[3])->getZExtValue() != 4) {
639 Function *F =
640 CGF.CGM.getIntrinsic(Intrinsic::x86_avx512_mask_vcvtph2ps_512);
641 return CGF.Builder.CreateCall(F, {Ops[0], Ops[1], Ops[2], Ops[3]});
642 }
643
644 unsigned NumDstElts = cast<llvm::FixedVectorType>(DstTy)->getNumElements();
645 Value *Src = Ops[0];
646
647 // Extract the subvector.
648 if (NumDstElts !=
649 cast<llvm::FixedVectorType>(Src->getType())->getNumElements()) {
650 assert(NumDstElts == 4 && "Unexpected vector size");
651 Src = CGF.Builder.CreateShuffleVector(Src, {0, 1, 2, 3});
652 }
653
654 // Bitcast from vXi16 to vXf16.
655 auto *HalfTy = llvm::FixedVectorType::get(
656 llvm::Type::getHalfTy(CGF.getLLVMContext()), NumDstElts);
657 Src = CGF.Builder.CreateBitCast(Src, HalfTy);
658
659 // Perform the fp-extension.
660 Value *Res = CGF.Builder.CreateFPExt(Src, DstTy, "cvtph2ps");
661
662 if (Ops.size() >= 3)
663 Res = EmitX86Select(CGF, Ops[2], Res, Ops[1]);
664 return Res;
665}
666
667Value *CodeGenFunction::EmitX86CpuIs(StringRef CPUStr) {
668
669 llvm::Type *Int32Ty = Builder.getInt32Ty();
670
671 // Matching the struct layout from the compiler-rt/libgcc structure that is
672 // filled in:
673 // unsigned int __cpu_vendor;
674 // unsigned int __cpu_type;
675 // unsigned int __cpu_subtype;
676 // unsigned int __cpu_features[1];
677 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty,
678 llvm::ArrayType::get(Int32Ty, 1));
679
680 // Grab the global __cpu_model.
681 llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
682 cast<llvm::GlobalValue>(CpuModel)->setDSOLocal(true);
683
684 // Calculate the index needed to access the correct field based on the
685 // range. Also adjust the expected value.
686 auto [Index, Value] = StringSwitch<std::pair<unsigned, unsigned>>(CPUStr)
687#define X86_VENDOR(ENUM, STRING) \
688 .Case(STRING, {0u, static_cast<unsigned>(llvm::X86::ENUM)})
689#define X86_CPU_TYPE_ALIAS(ENUM, ALIAS) \
690 .Case(ALIAS, {1u, static_cast<unsigned>(llvm::X86::ENUM)})
691#define X86_CPU_TYPE(ENUM, STR) \
692 .Case(STR, {1u, static_cast<unsigned>(llvm::X86::ENUM)})
694 .Case(ALIAS, {2u, static_cast<unsigned>(llvm::X86::ENUM)})
695#define X86_CPU_SUBTYPE(ENUM, STR) \
696 .Case(STR, {2u, static_cast<unsigned>(llvm::X86::ENUM)})
697#include "llvm/TargetParser/X86TargetParser.def"
698 .Default({0, 0});
699 assert(Value != 0 && "Invalid CPUStr passed to CpuIs");
700
701 // Grab the appropriate field from __cpu_model.
702 llvm::Value *Idxs[] = {ConstantInt::get(Int32Ty, 0),
703 ConstantInt::get(Int32Ty, Index)};
704 llvm::Value *CpuValue = Builder.CreateInBoundsGEP(STy, CpuModel, Idxs);
705 CpuValue = Builder.CreateAlignedLoad(Int32Ty, CpuValue,
707
708 // Check the value of the field against the requested value.
709 return Builder.CreateICmpEQ(CpuValue,
710 llvm::ConstantInt::get(Int32Ty, Value));
711}
712
713Value *CodeGenFunction::EmitX86CpuSupports(const CallExpr *E) {
714 const Expr *FeatureExpr = E->getArg(0)->IgnoreParenCasts();
715 StringRef FeatureStr = cast<StringLiteral>(FeatureExpr)->getString();
716 if (!getContext().getTargetInfo().validateCpuSupports(FeatureStr))
717 return Builder.getFalse();
718 return EmitX86CpuSupports(FeatureStr);
719}
720
721Value *CodeGenFunction::EmitX86CpuSupports(ArrayRef<StringRef> FeatureStrs) {
722 return EmitX86CpuSupports(llvm::X86::getCpuSupportsMask(FeatureStrs));
723}
724
725llvm::Value *
726CodeGenFunction::EmitX86CpuSupports(std::array<uint32_t, 4> FeatureMask) {
727 Value *Result = Builder.getTrue();
728 if (FeatureMask[0] != 0) {
729 // Matching the struct layout from the compiler-rt/libgcc structure that is
730 // filled in:
731 // unsigned int __cpu_vendor;
732 // unsigned int __cpu_type;
733 // unsigned int __cpu_subtype;
734 // unsigned int __cpu_features[1];
735 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty,
736 llvm::ArrayType::get(Int32Ty, 1));
737
738 // Grab the global __cpu_model.
739 llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
740 cast<llvm::GlobalValue>(CpuModel)->setDSOLocal(true);
741
742 // Grab the first (0th) element from the field __cpu_features off of the
743 // global in the struct STy.
744 Value *Idxs[] = {Builder.getInt32(0), Builder.getInt32(3),
745 Builder.getInt32(0)};
746 Value *CpuFeatures = Builder.CreateInBoundsGEP(STy, CpuModel, Idxs);
747 Value *Features = Builder.CreateAlignedLoad(Int32Ty, CpuFeatures,
749
750 // Check the value of the bit corresponding to the feature requested.
751 Value *Mask = Builder.getInt32(FeatureMask[0]);
752 Value *Bitset = Builder.CreateAnd(Features, Mask);
753 Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
754 Result = Builder.CreateAnd(Result, Cmp);
755 }
756
757 llvm::Type *ATy = llvm::ArrayType::get(Int32Ty, 3);
758 llvm::Constant *CpuFeatures2 =
759 CGM.CreateRuntimeVariable(ATy, "__cpu_features2");
760 cast<llvm::GlobalValue>(CpuFeatures2)->setDSOLocal(true);
761 for (int i = 1; i != 4; ++i) {
762 const uint32_t M = FeatureMask[i];
763 if (!M)
764 continue;
765 Value *Idxs[] = {Builder.getInt32(0), Builder.getInt32(i - 1)};
766 Value *Features = Builder.CreateAlignedLoad(
767 Int32Ty, Builder.CreateInBoundsGEP(ATy, CpuFeatures2, Idxs),
769 // Check the value of the bit corresponding to the feature requested.
770 Value *Mask = Builder.getInt32(M);
771 Value *Bitset = Builder.CreateAnd(Features, Mask);
772 Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
773 Result = Builder.CreateAnd(Result, Cmp);
774 }
775
776 return Result;
777}
778
779Value *CodeGenFunction::EmitX86CpuInit() {
780 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy,
781 /*Variadic*/ false);
782 llvm::FunctionCallee Func =
783 CGM.CreateRuntimeFunction(FTy, "__cpu_indicator_init");
784 cast<llvm::GlobalValue>(Func.getCallee())->setDSOLocal(true);
785 cast<llvm::GlobalValue>(Func.getCallee())
786 ->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
787 return Builder.CreateCall(Func);
788}
789
790
792 const CallExpr *E) {
793 if (BuiltinID == Builtin::BI__builtin_cpu_is)
794 return EmitX86CpuIs(E);
795 if (BuiltinID == Builtin::BI__builtin_cpu_supports)
796 return EmitX86CpuSupports(E);
797 if (BuiltinID == Builtin::BI__builtin_cpu_init)
798 return EmitX86CpuInit();
799
800 // Handle MSVC intrinsics before argument evaluation to prevent double
801 // evaluation.
802 if (std::optional<MSVCIntrin> MsvcIntId = translateX86ToMsvcIntrin(BuiltinID))
803 return EmitMSVCBuiltinExpr(*MsvcIntId, E);
804
806 bool IsMaskFCmp = false;
807 bool IsConjFMA = false;
808
809 // Find out if any arguments are required to be integer constant expressions.
810 unsigned ICEArguments = 0;
812 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
813 assert(Error == ASTContext::GE_None && "Should not codegen an error");
814
815 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
816 Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
817 }
818
819 // These exist so that the builtin that takes an immediate can be bounds
820 // checked by clang to avoid passing bad immediates to the backend. Since
821 // AVX has a larger immediate than SSE we would need separate builtins to
822 // do the different bounds checking. Rather than create a clang specific
823 // SSE only builtin, this implements eight separate builtins to match gcc
824 // implementation.
825 auto getCmpIntrinsicCall = [this, &Ops](Intrinsic::ID ID, unsigned Imm) {
826 Ops.push_back(llvm::ConstantInt::get(Int8Ty, Imm));
827 llvm::Function *F = CGM.getIntrinsic(ID);
828 return Builder.CreateCall(F, Ops);
829 };
830
831 // For the vector forms of FP comparisons, translate the builtins directly to
832 // IR.
833 // TODO: The builtins could be removed if the SSE header files used vector
834 // extension comparisons directly (vector ordered/unordered may need
835 // additional support via __builtin_isnan()).
836 auto getVectorFCmpIR = [this, &Ops, E](CmpInst::Predicate Pred,
837 bool IsSignaling) {
838 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
839 Value *Cmp;
840 if (IsSignaling)
841 Cmp = Builder.CreateFCmpS(Pred, Ops[0], Ops[1]);
842 else
843 Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
844 llvm::VectorType *FPVecTy = cast<llvm::VectorType>(Ops[0]->getType());
845 llvm::VectorType *IntVecTy = llvm::VectorType::getInteger(FPVecTy);
846 Value *Sext = Builder.CreateSExt(Cmp, IntVecTy);
847 return Builder.CreateBitCast(Sext, FPVecTy);
848 };
849
850 switch (BuiltinID) {
851 default: return nullptr;
852 case X86::BI_mm_prefetch: {
853 Value *Address = Ops[0];
854 ConstantInt *C = cast<ConstantInt>(Ops[1]);
855 Value *RW = ConstantInt::get(Int32Ty, (C->getZExtValue() >> 2) & 0x1);
856 Value *Locality = ConstantInt::get(Int32Ty, C->getZExtValue() & 0x3);
857 Value *Data = ConstantInt::get(Int32Ty, 1);
858 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
859 return Builder.CreateCall(F, {Address, RW, Locality, Data});
860 }
861 case X86::BI_m_prefetch:
862 case X86::BI_m_prefetchw: {
863 Value *Address = Ops[0];
864 // The 'w' suffix implies write.
865 Value *RW =
866 ConstantInt::get(Int32Ty, BuiltinID == X86::BI_m_prefetchw ? 1 : 0);
867 Value *Locality = ConstantInt::get(Int32Ty, 0x3);
868 Value *Data = ConstantInt::get(Int32Ty, 1);
869 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
870 return Builder.CreateCall(F, {Address, RW, Locality, Data});
871 }
872 case X86::BI_mm_clflush: {
873 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_clflush),
874 Ops[0]);
875 }
876 case X86::BI_mm_lfence: {
877 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_lfence));
878 }
879 case X86::BI_mm_mfence: {
880 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_mfence));
881 }
882 case X86::BI_mm_sfence: {
883 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_sfence));
884 }
885 case X86::BI_mm_pause: {
886 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_pause));
887 }
888 case X86::BI__rdtsc: {
889 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtsc));
890 }
891 case X86::BI__builtin_ia32_rdtscp: {
892 Value *Call = Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtscp));
893 Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 1),
894 Ops[0]);
895 return Builder.CreateExtractValue(Call, 0);
896 }
897 case X86::BI__builtin_ia32_roundps:
898 case X86::BI__builtin_ia32_roundpd:
899 case X86::BI__builtin_ia32_roundps256:
900 case X86::BI__builtin_ia32_roundpd256: {
901 unsigned M = cast<ConstantInt>(Ops[1])->getZExtValue();
902 unsigned MXCSRMask = 0b100;
903 unsigned FRoundNoExcMask = 0b1000;
904 unsigned UseMXCSR = MXCSRMask & M;
905 unsigned FRoundNoExc = FRoundNoExcMask & M;
906
907 if (UseMXCSR || !FRoundNoExc) {
908
909 Intrinsic::ID ID = Intrinsic::not_intrinsic;
910
911 switch (BuiltinID) {
912 case X86::BI__builtin_ia32_roundps:
913 ID = Intrinsic::x86_sse41_round_ps;
914 break;
915 case X86::BI__builtin_ia32_roundps256:
916 ID = Intrinsic::x86_avx_round_ps_256;
917 break;
918 case X86::BI__builtin_ia32_roundpd:
919 ID = Intrinsic::x86_sse41_round_pd;
920 break;
921 case X86::BI__builtin_ia32_roundpd256:
922 ID = Intrinsic::x86_avx_round_pd_256;
923 break;
924 default:
925 llvm_unreachable("must return from switch");
926 }
927
928 Function *F = CGM.getIntrinsic(ID);
929 return Builder.CreateCall(F, Ops);
930 }
931
932 return emitX86RoundImmediate(*this, Ops[0], M);
933 }
934 case X86::BI__builtin_ia32_roundss:
935 case X86::BI__builtin_ia32_roundsd: {
936 unsigned M = cast<ConstantInt>(Ops[2])->getZExtValue();
937 unsigned MXCSRMask = 0b100;
938 unsigned FRoundNoExcMask = 0b1000;
939 unsigned UseMXCSR = MXCSRMask & M;
940 unsigned FRoundNoExc = FRoundNoExcMask & M;
941
942 if (UseMXCSR || !FRoundNoExc) {
943
944 Intrinsic::ID ID = Intrinsic::not_intrinsic;
945
946 switch (BuiltinID) {
947 case X86::BI__builtin_ia32_roundss:
948 ID = Intrinsic::x86_sse41_round_ss;
949 break;
950 case X86::BI__builtin_ia32_roundsd:
951 ID = Intrinsic::x86_sse41_round_sd;
952 break;
953 default:
954 llvm_unreachable("must return from switch");
955 }
956
957 Function *F = CGM.getIntrinsic(ID);
958 return Builder.CreateCall(F, Ops);
959 }
960
961 Value *Idx = Builder.getInt32(0);
962 Value *ValAt0 = Builder.CreateExtractElement(Ops[1], Idx);
963 Value *RoundedAt0 = emitX86RoundImmediate(*this, ValAt0, M);
964
965 return Builder.CreateInsertElement(Ops[0], RoundedAt0, Idx);
966 }
967 case X86::BI__builtin_ia32_lzcnt_u16:
968 case X86::BI__builtin_ia32_lzcnt_u32:
969 case X86::BI__builtin_ia32_lzcnt_u64: {
970 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
971 return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
972 }
973 case X86::BI__builtin_ia32_tzcnt_u16:
974 case X86::BI__builtin_ia32_tzcnt_u32:
975 case X86::BI__builtin_ia32_tzcnt_u64: {
976 Function *F = CGM.getIntrinsic(Intrinsic::cttz, Ops[0]->getType());
977 return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
978 }
979 case X86::BI__builtin_ia32_pdep_si:
980 case X86::BI__builtin_ia32_pdep_di: {
981 Function *F = CGM.getIntrinsic(Intrinsic::pdep, Ops[0]->getType());
982 return Builder.CreateCall(F, Ops);
983 }
984 case X86::BI__builtin_ia32_pext_si:
985 case X86::BI__builtin_ia32_pext_di: {
986 Function *F = CGM.getIntrinsic(Intrinsic::pext, Ops[0]->getType());
987 return Builder.CreateCall(F, Ops);
988 }
989 case X86::BI__builtin_ia32_undef128:
990 case X86::BI__builtin_ia32_undef256:
991 case X86::BI__builtin_ia32_undef512:
992 // The x86 definition of "undef" is not the same as the LLVM definition
993 // (PR32176). We leave optimizing away an unnecessary zero constant to the
994 // IR optimizer and backend.
995 // TODO: If we had a "freeze" IR instruction to generate a fixed undef
996 // value, we should use that here instead of a zero.
997 return llvm::Constant::getNullValue(ConvertType(E->getType()));
998 case X86::BI__builtin_ia32_vec_ext_v4hi:
999 case X86::BI__builtin_ia32_vec_ext_v16qi:
1000 case X86::BI__builtin_ia32_vec_ext_v8hi:
1001 case X86::BI__builtin_ia32_vec_ext_v4si:
1002 case X86::BI__builtin_ia32_vec_ext_v4sf:
1003 case X86::BI__builtin_ia32_vec_ext_v2di:
1004 case X86::BI__builtin_ia32_vec_ext_v32qi:
1005 case X86::BI__builtin_ia32_vec_ext_v16hi:
1006 case X86::BI__builtin_ia32_vec_ext_v8si:
1007 case X86::BI__builtin_ia32_vec_ext_v4di: {
1008 unsigned NumElts =
1009 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
1010 uint64_t Index = cast<ConstantInt>(Ops[1])->getZExtValue();
1011 Index &= NumElts - 1;
1012 // These builtins exist so we can ensure the index is an ICE and in range.
1013 // Otherwise we could just do this in the header file.
1014 return Builder.CreateExtractElement(Ops[0], Index);
1015 }
1016 case X86::BI__builtin_ia32_vec_set_v4hi:
1017 case X86::BI__builtin_ia32_vec_set_v16qi:
1018 case X86::BI__builtin_ia32_vec_set_v8hi:
1019 case X86::BI__builtin_ia32_vec_set_v4si:
1020 case X86::BI__builtin_ia32_vec_set_v2di:
1021 case X86::BI__builtin_ia32_vec_set_v32qi:
1022 case X86::BI__builtin_ia32_vec_set_v16hi:
1023 case X86::BI__builtin_ia32_vec_set_v8si:
1024 case X86::BI__builtin_ia32_vec_set_v4di: {
1025 unsigned NumElts =
1026 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
1027 unsigned Index = cast<ConstantInt>(Ops[2])->getZExtValue();
1028 Index &= NumElts - 1;
1029 // These builtins exist so we can ensure the index is an ICE and in range.
1030 // Otherwise we could just do this in the header file.
1031 return Builder.CreateInsertElement(Ops[0], Ops[1], Index);
1032 }
1033 case X86::BI_mm_setcsr:
1034 case X86::BI__builtin_ia32_ldmxcsr: {
1036 Builder.CreateStore(Ops[0], Tmp);
1037 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr),
1038 Tmp.getPointer());
1039 }
1040 case X86::BI_mm_getcsr:
1041 case X86::BI__builtin_ia32_stmxcsr: {
1043 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr),
1044 Tmp.getPointer());
1045 return Builder.CreateLoad(Tmp, "stmxcsr");
1046 }
1047 case X86::BI__builtin_ia32_xsave:
1048 case X86::BI__builtin_ia32_xsave64:
1049 case X86::BI__builtin_ia32_xrstor:
1050 case X86::BI__builtin_ia32_xrstor64:
1051 case X86::BI__builtin_ia32_xsaveopt:
1052 case X86::BI__builtin_ia32_xsaveopt64:
1053 case X86::BI__builtin_ia32_xrstors:
1054 case X86::BI__builtin_ia32_xrstors64:
1055 case X86::BI__builtin_ia32_xsavec:
1056 case X86::BI__builtin_ia32_xsavec64:
1057 case X86::BI__builtin_ia32_xsaves:
1058 case X86::BI__builtin_ia32_xsaves64:
1059 case X86::BI__builtin_ia32_xsetbv:
1060 case X86::BI_xsetbv: {
1061 Intrinsic::ID ID;
1062#define INTRINSIC_X86_XSAVE_ID(NAME) \
1063 case X86::BI__builtin_ia32_##NAME: \
1064 ID = Intrinsic::x86_##NAME; \
1065 break
1066 switch (BuiltinID) {
1067 default: llvm_unreachable("Unsupported intrinsic!");
1069 INTRINSIC_X86_XSAVE_ID(xsave64);
1070 INTRINSIC_X86_XSAVE_ID(xrstor);
1071 INTRINSIC_X86_XSAVE_ID(xrstor64);
1072 INTRINSIC_X86_XSAVE_ID(xsaveopt);
1073 INTRINSIC_X86_XSAVE_ID(xsaveopt64);
1074 INTRINSIC_X86_XSAVE_ID(xrstors);
1075 INTRINSIC_X86_XSAVE_ID(xrstors64);
1076 INTRINSIC_X86_XSAVE_ID(xsavec);
1077 INTRINSIC_X86_XSAVE_ID(xsavec64);
1078 INTRINSIC_X86_XSAVE_ID(xsaves);
1079 INTRINSIC_X86_XSAVE_ID(xsaves64);
1080 INTRINSIC_X86_XSAVE_ID(xsetbv);
1081 case X86::BI_xsetbv:
1082 ID = Intrinsic::x86_xsetbv;
1083 break;
1084 }
1085#undef INTRINSIC_X86_XSAVE_ID
1086 Value *Mhi = Builder.CreateTrunc(
1087 Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, 32)), Int32Ty);
1088 Value *Mlo = Builder.CreateTrunc(Ops[1], Int32Ty);
1089 Ops[1] = Mhi;
1090 Ops.push_back(Mlo);
1091 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
1092 }
1093 case X86::BI__builtin_ia32_xgetbv:
1094 case X86::BI_xgetbv:
1095 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_xgetbv), Ops);
1096 case X86::BI__builtin_ia32_storedqudi128_mask:
1097 case X86::BI__builtin_ia32_storedqusi128_mask:
1098 case X86::BI__builtin_ia32_storedquhi128_mask:
1099 case X86::BI__builtin_ia32_storedquqi128_mask:
1100 case X86::BI__builtin_ia32_storeupd128_mask:
1101 case X86::BI__builtin_ia32_storeups128_mask:
1102 case X86::BI__builtin_ia32_storedqudi256_mask:
1103 case X86::BI__builtin_ia32_storedqusi256_mask:
1104 case X86::BI__builtin_ia32_storedquhi256_mask:
1105 case X86::BI__builtin_ia32_storedquqi256_mask:
1106 case X86::BI__builtin_ia32_storeupd256_mask:
1107 case X86::BI__builtin_ia32_storeups256_mask:
1108 case X86::BI__builtin_ia32_storedqudi512_mask:
1109 case X86::BI__builtin_ia32_storedqusi512_mask:
1110 case X86::BI__builtin_ia32_storedquhi512_mask:
1111 case X86::BI__builtin_ia32_storedquqi512_mask:
1112 case X86::BI__builtin_ia32_storeupd512_mask:
1113 case X86::BI__builtin_ia32_storeups512_mask:
1114 return EmitX86MaskedStore(*this, Ops, Align(1));
1115
1116 case X86::BI__builtin_ia32_storesbf16128_mask:
1117 case X86::BI__builtin_ia32_storesh128_mask:
1118 case X86::BI__builtin_ia32_storess128_mask:
1119 case X86::BI__builtin_ia32_storesd128_mask:
1120 return EmitX86MaskedStore(*this, Ops, Align(1));
1121
1122 case X86::BI__builtin_ia32_cvtmask2b128:
1123 case X86::BI__builtin_ia32_cvtmask2b256:
1124 case X86::BI__builtin_ia32_cvtmask2b512:
1125 case X86::BI__builtin_ia32_cvtmask2w128:
1126 case X86::BI__builtin_ia32_cvtmask2w256:
1127 case X86::BI__builtin_ia32_cvtmask2w512:
1128 case X86::BI__builtin_ia32_cvtmask2d128:
1129 case X86::BI__builtin_ia32_cvtmask2d256:
1130 case X86::BI__builtin_ia32_cvtmask2d512:
1131 case X86::BI__builtin_ia32_cvtmask2q128:
1132 case X86::BI__builtin_ia32_cvtmask2q256:
1133 case X86::BI__builtin_ia32_cvtmask2q512:
1134 return EmitX86SExtMask(*this, Ops[0], ConvertType(E->getType()));
1135
1136 case X86::BI__builtin_ia32_cvtb2mask128:
1137 case X86::BI__builtin_ia32_cvtb2mask256:
1138 case X86::BI__builtin_ia32_cvtb2mask512:
1139 case X86::BI__builtin_ia32_cvtw2mask128:
1140 case X86::BI__builtin_ia32_cvtw2mask256:
1141 case X86::BI__builtin_ia32_cvtw2mask512:
1142 case X86::BI__builtin_ia32_cvtd2mask128:
1143 case X86::BI__builtin_ia32_cvtd2mask256:
1144 case X86::BI__builtin_ia32_cvtd2mask512:
1145 case X86::BI__builtin_ia32_cvtq2mask128:
1146 case X86::BI__builtin_ia32_cvtq2mask256:
1147 case X86::BI__builtin_ia32_cvtq2mask512:
1148 return EmitX86ConvertToMask(*this, Ops[0]);
1149
1150 case X86::BI__builtin_ia32_cvtdq2ps512_mask:
1151 case X86::BI__builtin_ia32_cvtqq2ps512_mask:
1152 case X86::BI__builtin_ia32_cvtqq2pd512_mask:
1153 case X86::BI__builtin_ia32_vcvtw2ph512_mask:
1154 case X86::BI__builtin_ia32_vcvtdq2ph512_mask:
1155 case X86::BI__builtin_ia32_vcvtqq2ph512_mask:
1156 return EmitX86ConvertIntToFp(*this, E, Ops, /*IsSigned*/ true);
1157 case X86::BI__builtin_ia32_cvtudq2ps512_mask:
1158 case X86::BI__builtin_ia32_cvtuqq2ps512_mask:
1159 case X86::BI__builtin_ia32_cvtuqq2pd512_mask:
1160 case X86::BI__builtin_ia32_vcvtuw2ph512_mask:
1161 case X86::BI__builtin_ia32_vcvtudq2ph512_mask:
1162 case X86::BI__builtin_ia32_vcvtuqq2ph512_mask:
1163 return EmitX86ConvertIntToFp(*this, E, Ops, /*IsSigned*/ false);
1164
1165 case X86::BI__builtin_ia32_vfmaddsh3_mask:
1166 case X86::BI__builtin_ia32_vfmaddss3_mask:
1167 case X86::BI__builtin_ia32_vfmaddsd3_mask:
1168 return EmitScalarFMAExpr(*this, E, Ops, Ops[0]);
1169 case X86::BI__builtin_ia32_vfmaddsh3_maskz:
1170 case X86::BI__builtin_ia32_vfmaddss3_maskz:
1171 case X86::BI__builtin_ia32_vfmaddsd3_maskz:
1172 return EmitScalarFMAExpr(*this, E, Ops, Ops[0], /*ZeroMask*/ true);
1173 case X86::BI__builtin_ia32_vfmaddsh3_mask3:
1174 case X86::BI__builtin_ia32_vfmaddss3_mask3:
1175 case X86::BI__builtin_ia32_vfmaddsd3_mask3:
1176 return EmitScalarFMAExpr(*this, E, Ops, Ops[2], /*ZeroMask*/ false, 2);
1177 case X86::BI__builtin_ia32_vfmsubsh3_mask3:
1178 case X86::BI__builtin_ia32_vfmsubss3_mask3:
1179 case X86::BI__builtin_ia32_vfmsubsd3_mask3:
1180 return EmitScalarFMAExpr(*this, E, Ops, Ops[2], /*ZeroMask*/ false, 2,
1181 /*NegAcc*/ true);
1182 case X86::BI__builtin_ia32_vfmaddph512_mask:
1183 case X86::BI__builtin_ia32_vfmaddph512_maskz:
1184 case X86::BI__builtin_ia32_vfmaddph512_mask3:
1185 case X86::BI__builtin_ia32_vfmaddps512_mask:
1186 case X86::BI__builtin_ia32_vfmaddps512_maskz:
1187 case X86::BI__builtin_ia32_vfmaddps512_mask3:
1188 case X86::BI__builtin_ia32_vfmsubps512_mask3:
1189 case X86::BI__builtin_ia32_vfmaddpd512_mask:
1190 case X86::BI__builtin_ia32_vfmaddpd512_maskz:
1191 case X86::BI__builtin_ia32_vfmaddpd512_mask3:
1192 case X86::BI__builtin_ia32_vfmsubpd512_mask3:
1193 case X86::BI__builtin_ia32_vfmsubph512_mask3:
1194 return EmitX86FMAExpr(*this, E, Ops, BuiltinID, /*IsAddSub*/ false);
1195 case X86::BI__builtin_ia32_vfmaddsubph512_mask:
1196 case X86::BI__builtin_ia32_vfmaddsubph512_maskz:
1197 case X86::BI__builtin_ia32_vfmaddsubph512_mask3:
1198 case X86::BI__builtin_ia32_vfmsubaddph512_mask3:
1199 case X86::BI__builtin_ia32_vfmaddsubps512_mask:
1200 case X86::BI__builtin_ia32_vfmaddsubps512_maskz:
1201 case X86::BI__builtin_ia32_vfmaddsubps512_mask3:
1202 case X86::BI__builtin_ia32_vfmsubaddps512_mask3:
1203 case X86::BI__builtin_ia32_vfmaddsubpd512_mask:
1204 case X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
1205 case X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
1206 case X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
1207 return EmitX86FMAExpr(*this, E, Ops, BuiltinID, /*IsAddSub*/ true);
1208
1209 case X86::BI__builtin_ia32_movdqa32store128_mask:
1210 case X86::BI__builtin_ia32_movdqa64store128_mask:
1211 case X86::BI__builtin_ia32_storeaps128_mask:
1212 case X86::BI__builtin_ia32_storeapd128_mask:
1213 case X86::BI__builtin_ia32_movdqa32store256_mask:
1214 case X86::BI__builtin_ia32_movdqa64store256_mask:
1215 case X86::BI__builtin_ia32_storeaps256_mask:
1216 case X86::BI__builtin_ia32_storeapd256_mask:
1217 case X86::BI__builtin_ia32_movdqa32store512_mask:
1218 case X86::BI__builtin_ia32_movdqa64store512_mask:
1219 case X86::BI__builtin_ia32_storeaps512_mask:
1220 case X86::BI__builtin_ia32_storeapd512_mask:
1221 return EmitX86MaskedStore(
1222 *this, Ops,
1223 getContext().getTypeAlignInChars(E->getArg(1)->getType()).getAsAlign());
1224
1225 case X86::BI__builtin_ia32_loadups128_mask:
1226 case X86::BI__builtin_ia32_loadups256_mask:
1227 case X86::BI__builtin_ia32_loadups512_mask:
1228 case X86::BI__builtin_ia32_loadupd128_mask:
1229 case X86::BI__builtin_ia32_loadupd256_mask:
1230 case X86::BI__builtin_ia32_loadupd512_mask:
1231 case X86::BI__builtin_ia32_loaddquqi128_mask:
1232 case X86::BI__builtin_ia32_loaddquqi256_mask:
1233 case X86::BI__builtin_ia32_loaddquqi512_mask:
1234 case X86::BI__builtin_ia32_loaddquhi128_mask:
1235 case X86::BI__builtin_ia32_loaddquhi256_mask:
1236 case X86::BI__builtin_ia32_loaddquhi512_mask:
1237 case X86::BI__builtin_ia32_loaddqusi128_mask:
1238 case X86::BI__builtin_ia32_loaddqusi256_mask:
1239 case X86::BI__builtin_ia32_loaddqusi512_mask:
1240 case X86::BI__builtin_ia32_loaddqudi128_mask:
1241 case X86::BI__builtin_ia32_loaddqudi256_mask:
1242 case X86::BI__builtin_ia32_loaddqudi512_mask:
1243 return EmitX86MaskedLoad(*this, Ops, Align(1));
1244
1245 case X86::BI__builtin_ia32_loadsbf16128_mask:
1246 case X86::BI__builtin_ia32_loadsh128_mask:
1247 case X86::BI__builtin_ia32_loadss128_mask:
1248 case X86::BI__builtin_ia32_loadsd128_mask:
1249 return EmitX86MaskedLoad(*this, Ops, Align(1));
1250
1251 case X86::BI__builtin_ia32_loadaps128_mask:
1252 case X86::BI__builtin_ia32_loadaps256_mask:
1253 case X86::BI__builtin_ia32_loadaps512_mask:
1254 case X86::BI__builtin_ia32_loadapd128_mask:
1255 case X86::BI__builtin_ia32_loadapd256_mask:
1256 case X86::BI__builtin_ia32_loadapd512_mask:
1257 case X86::BI__builtin_ia32_movdqa32load128_mask:
1258 case X86::BI__builtin_ia32_movdqa32load256_mask:
1259 case X86::BI__builtin_ia32_movdqa32load512_mask:
1260 case X86::BI__builtin_ia32_movdqa64load128_mask:
1261 case X86::BI__builtin_ia32_movdqa64load256_mask:
1262 case X86::BI__builtin_ia32_movdqa64load512_mask:
1263 return EmitX86MaskedLoad(
1264 *this, Ops,
1265 getContext().getTypeAlignInChars(E->getArg(1)->getType()).getAsAlign());
1266
1267 case X86::BI__builtin_ia32_expandloaddf128_mask:
1268 case X86::BI__builtin_ia32_expandloaddf256_mask:
1269 case X86::BI__builtin_ia32_expandloaddf512_mask:
1270 case X86::BI__builtin_ia32_expandloadsf128_mask:
1271 case X86::BI__builtin_ia32_expandloadsf256_mask:
1272 case X86::BI__builtin_ia32_expandloadsf512_mask:
1273 case X86::BI__builtin_ia32_expandloaddi128_mask:
1274 case X86::BI__builtin_ia32_expandloaddi256_mask:
1275 case X86::BI__builtin_ia32_expandloaddi512_mask:
1276 case X86::BI__builtin_ia32_expandloadsi128_mask:
1277 case X86::BI__builtin_ia32_expandloadsi256_mask:
1278 case X86::BI__builtin_ia32_expandloadsi512_mask:
1279 case X86::BI__builtin_ia32_expandloadhi128_mask:
1280 case X86::BI__builtin_ia32_expandloadhi256_mask:
1281 case X86::BI__builtin_ia32_expandloadhi512_mask:
1282 case X86::BI__builtin_ia32_expandloadqi128_mask:
1283 case X86::BI__builtin_ia32_expandloadqi256_mask:
1284 case X86::BI__builtin_ia32_expandloadqi512_mask:
1285 return EmitX86ExpandLoad(*this, Ops);
1286
1287 case X86::BI__builtin_ia32_compressstoredf128_mask:
1288 case X86::BI__builtin_ia32_compressstoredf256_mask:
1289 case X86::BI__builtin_ia32_compressstoredf512_mask:
1290 case X86::BI__builtin_ia32_compressstoresf128_mask:
1291 case X86::BI__builtin_ia32_compressstoresf256_mask:
1292 case X86::BI__builtin_ia32_compressstoresf512_mask:
1293 case X86::BI__builtin_ia32_compressstoredi128_mask:
1294 case X86::BI__builtin_ia32_compressstoredi256_mask:
1295 case X86::BI__builtin_ia32_compressstoredi512_mask:
1296 case X86::BI__builtin_ia32_compressstoresi128_mask:
1297 case X86::BI__builtin_ia32_compressstoresi256_mask:
1298 case X86::BI__builtin_ia32_compressstoresi512_mask:
1299 case X86::BI__builtin_ia32_compressstorehi128_mask:
1300 case X86::BI__builtin_ia32_compressstorehi256_mask:
1301 case X86::BI__builtin_ia32_compressstorehi512_mask:
1302 case X86::BI__builtin_ia32_compressstoreqi128_mask:
1303 case X86::BI__builtin_ia32_compressstoreqi256_mask:
1304 case X86::BI__builtin_ia32_compressstoreqi512_mask:
1305 return EmitX86CompressStore(*this, Ops);
1306
1307 case X86::BI__builtin_ia32_expanddf128_mask:
1308 case X86::BI__builtin_ia32_expanddf256_mask:
1309 case X86::BI__builtin_ia32_expanddf512_mask:
1310 case X86::BI__builtin_ia32_expandsf128_mask:
1311 case X86::BI__builtin_ia32_expandsf256_mask:
1312 case X86::BI__builtin_ia32_expandsf512_mask:
1313 case X86::BI__builtin_ia32_expanddi128_mask:
1314 case X86::BI__builtin_ia32_expanddi256_mask:
1315 case X86::BI__builtin_ia32_expanddi512_mask:
1316 case X86::BI__builtin_ia32_expandsi128_mask:
1317 case X86::BI__builtin_ia32_expandsi256_mask:
1318 case X86::BI__builtin_ia32_expandsi512_mask:
1319 case X86::BI__builtin_ia32_expandhi128_mask:
1320 case X86::BI__builtin_ia32_expandhi256_mask:
1321 case X86::BI__builtin_ia32_expandhi512_mask:
1322 case X86::BI__builtin_ia32_expandqi128_mask:
1323 case X86::BI__builtin_ia32_expandqi256_mask:
1324 case X86::BI__builtin_ia32_expandqi512_mask:
1325 return EmitX86CompressExpand(*this, Ops, /*IsCompress*/false);
1326
1327 case X86::BI__builtin_ia32_compressdf128_mask:
1328 case X86::BI__builtin_ia32_compressdf256_mask:
1329 case X86::BI__builtin_ia32_compressdf512_mask:
1330 case X86::BI__builtin_ia32_compresssf128_mask:
1331 case X86::BI__builtin_ia32_compresssf256_mask:
1332 case X86::BI__builtin_ia32_compresssf512_mask:
1333 case X86::BI__builtin_ia32_compressdi128_mask:
1334 case X86::BI__builtin_ia32_compressdi256_mask:
1335 case X86::BI__builtin_ia32_compressdi512_mask:
1336 case X86::BI__builtin_ia32_compresssi128_mask:
1337 case X86::BI__builtin_ia32_compresssi256_mask:
1338 case X86::BI__builtin_ia32_compresssi512_mask:
1339 case X86::BI__builtin_ia32_compresshi128_mask:
1340 case X86::BI__builtin_ia32_compresshi256_mask:
1341 case X86::BI__builtin_ia32_compresshi512_mask:
1342 case X86::BI__builtin_ia32_compressqi128_mask:
1343 case X86::BI__builtin_ia32_compressqi256_mask:
1344 case X86::BI__builtin_ia32_compressqi512_mask:
1345 return EmitX86CompressExpand(*this, Ops, /*IsCompress*/true);
1346
1347 case X86::BI__builtin_ia32_gather3div2df:
1348 case X86::BI__builtin_ia32_gather3div2di:
1349 case X86::BI__builtin_ia32_gather3div4df:
1350 case X86::BI__builtin_ia32_gather3div4di:
1351 case X86::BI__builtin_ia32_gather3div4sf:
1352 case X86::BI__builtin_ia32_gather3div4si:
1353 case X86::BI__builtin_ia32_gather3div8sf:
1354 case X86::BI__builtin_ia32_gather3div8si:
1355 case X86::BI__builtin_ia32_gather3siv2df:
1356 case X86::BI__builtin_ia32_gather3siv2di:
1357 case X86::BI__builtin_ia32_gather3siv4df:
1358 case X86::BI__builtin_ia32_gather3siv4di:
1359 case X86::BI__builtin_ia32_gather3siv4sf:
1360 case X86::BI__builtin_ia32_gather3siv4si:
1361 case X86::BI__builtin_ia32_gather3siv8sf:
1362 case X86::BI__builtin_ia32_gather3siv8si:
1363 case X86::BI__builtin_ia32_gathersiv8df:
1364 case X86::BI__builtin_ia32_gathersiv16sf:
1365 case X86::BI__builtin_ia32_gatherdiv8df:
1366 case X86::BI__builtin_ia32_gatherdiv16sf:
1367 case X86::BI__builtin_ia32_gathersiv8di:
1368 case X86::BI__builtin_ia32_gathersiv16si:
1369 case X86::BI__builtin_ia32_gatherdiv8di:
1370 case X86::BI__builtin_ia32_gatherdiv16si: {
1371 Intrinsic::ID IID;
1372 switch (BuiltinID) {
1373 default: llvm_unreachable("Unexpected builtin");
1374 case X86::BI__builtin_ia32_gather3div2df:
1375 IID = Intrinsic::x86_avx512_mask_gather3div2_df;
1376 break;
1377 case X86::BI__builtin_ia32_gather3div2di:
1378 IID = Intrinsic::x86_avx512_mask_gather3div2_di;
1379 break;
1380 case X86::BI__builtin_ia32_gather3div4df:
1381 IID = Intrinsic::x86_avx512_mask_gather3div4_df;
1382 break;
1383 case X86::BI__builtin_ia32_gather3div4di:
1384 IID = Intrinsic::x86_avx512_mask_gather3div4_di;
1385 break;
1386 case X86::BI__builtin_ia32_gather3div4sf:
1387 IID = Intrinsic::x86_avx512_mask_gather3div4_sf;
1388 break;
1389 case X86::BI__builtin_ia32_gather3div4si:
1390 IID = Intrinsic::x86_avx512_mask_gather3div4_si;
1391 break;
1392 case X86::BI__builtin_ia32_gather3div8sf:
1393 IID = Intrinsic::x86_avx512_mask_gather3div8_sf;
1394 break;
1395 case X86::BI__builtin_ia32_gather3div8si:
1396 IID = Intrinsic::x86_avx512_mask_gather3div8_si;
1397 break;
1398 case X86::BI__builtin_ia32_gather3siv2df:
1399 IID = Intrinsic::x86_avx512_mask_gather3siv2_df;
1400 break;
1401 case X86::BI__builtin_ia32_gather3siv2di:
1402 IID = Intrinsic::x86_avx512_mask_gather3siv2_di;
1403 break;
1404 case X86::BI__builtin_ia32_gather3siv4df:
1405 IID = Intrinsic::x86_avx512_mask_gather3siv4_df;
1406 break;
1407 case X86::BI__builtin_ia32_gather3siv4di:
1408 IID = Intrinsic::x86_avx512_mask_gather3siv4_di;
1409 break;
1410 case X86::BI__builtin_ia32_gather3siv4sf:
1411 IID = Intrinsic::x86_avx512_mask_gather3siv4_sf;
1412 break;
1413 case X86::BI__builtin_ia32_gather3siv4si:
1414 IID = Intrinsic::x86_avx512_mask_gather3siv4_si;
1415 break;
1416 case X86::BI__builtin_ia32_gather3siv8sf:
1417 IID = Intrinsic::x86_avx512_mask_gather3siv8_sf;
1418 break;
1419 case X86::BI__builtin_ia32_gather3siv8si:
1420 IID = Intrinsic::x86_avx512_mask_gather3siv8_si;
1421 break;
1422 case X86::BI__builtin_ia32_gathersiv8df:
1423 IID = Intrinsic::x86_avx512_mask_gather_dpd_512;
1424 break;
1425 case X86::BI__builtin_ia32_gathersiv16sf:
1426 IID = Intrinsic::x86_avx512_mask_gather_dps_512;
1427 break;
1428 case X86::BI__builtin_ia32_gatherdiv8df:
1429 IID = Intrinsic::x86_avx512_mask_gather_qpd_512;
1430 break;
1431 case X86::BI__builtin_ia32_gatherdiv16sf:
1432 IID = Intrinsic::x86_avx512_mask_gather_qps_512;
1433 break;
1434 case X86::BI__builtin_ia32_gathersiv8di:
1435 IID = Intrinsic::x86_avx512_mask_gather_dpq_512;
1436 break;
1437 case X86::BI__builtin_ia32_gathersiv16si:
1438 IID = Intrinsic::x86_avx512_mask_gather_dpi_512;
1439 break;
1440 case X86::BI__builtin_ia32_gatherdiv8di:
1441 IID = Intrinsic::x86_avx512_mask_gather_qpq_512;
1442 break;
1443 case X86::BI__builtin_ia32_gatherdiv16si:
1444 IID = Intrinsic::x86_avx512_mask_gather_qpi_512;
1445 break;
1446 }
1447
1448 unsigned MinElts = std::min(
1449 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements(),
1450 cast<llvm::FixedVectorType>(Ops[2]->getType())->getNumElements());
1451 Ops[3] = getMaskVecValue(*this, Ops[3], MinElts);
1452 Function *Intr = CGM.getIntrinsic(IID);
1453 return Builder.CreateCall(Intr, Ops);
1454 }
1455
1456 case X86::BI__builtin_ia32_scattersiv8df:
1457 case X86::BI__builtin_ia32_scattersiv16sf:
1458 case X86::BI__builtin_ia32_scatterdiv8df:
1459 case X86::BI__builtin_ia32_scatterdiv16sf:
1460 case X86::BI__builtin_ia32_scattersiv8di:
1461 case X86::BI__builtin_ia32_scattersiv16si:
1462 case X86::BI__builtin_ia32_scatterdiv8di:
1463 case X86::BI__builtin_ia32_scatterdiv16si:
1464 case X86::BI__builtin_ia32_scatterdiv2df:
1465 case X86::BI__builtin_ia32_scatterdiv2di:
1466 case X86::BI__builtin_ia32_scatterdiv4df:
1467 case X86::BI__builtin_ia32_scatterdiv4di:
1468 case X86::BI__builtin_ia32_scatterdiv4sf:
1469 case X86::BI__builtin_ia32_scatterdiv4si:
1470 case X86::BI__builtin_ia32_scatterdiv8sf:
1471 case X86::BI__builtin_ia32_scatterdiv8si:
1472 case X86::BI__builtin_ia32_scattersiv2df:
1473 case X86::BI__builtin_ia32_scattersiv2di:
1474 case X86::BI__builtin_ia32_scattersiv4df:
1475 case X86::BI__builtin_ia32_scattersiv4di:
1476 case X86::BI__builtin_ia32_scattersiv4sf:
1477 case X86::BI__builtin_ia32_scattersiv4si:
1478 case X86::BI__builtin_ia32_scattersiv8sf:
1479 case X86::BI__builtin_ia32_scattersiv8si: {
1480 Intrinsic::ID IID;
1481 switch (BuiltinID) {
1482 default: llvm_unreachable("Unexpected builtin");
1483 case X86::BI__builtin_ia32_scattersiv8df:
1484 IID = Intrinsic::x86_avx512_mask_scatter_dpd_512;
1485 break;
1486 case X86::BI__builtin_ia32_scattersiv16sf:
1487 IID = Intrinsic::x86_avx512_mask_scatter_dps_512;
1488 break;
1489 case X86::BI__builtin_ia32_scatterdiv8df:
1490 IID = Intrinsic::x86_avx512_mask_scatter_qpd_512;
1491 break;
1492 case X86::BI__builtin_ia32_scatterdiv16sf:
1493 IID = Intrinsic::x86_avx512_mask_scatter_qps_512;
1494 break;
1495 case X86::BI__builtin_ia32_scattersiv8di:
1496 IID = Intrinsic::x86_avx512_mask_scatter_dpq_512;
1497 break;
1498 case X86::BI__builtin_ia32_scattersiv16si:
1499 IID = Intrinsic::x86_avx512_mask_scatter_dpi_512;
1500 break;
1501 case X86::BI__builtin_ia32_scatterdiv8di:
1502 IID = Intrinsic::x86_avx512_mask_scatter_qpq_512;
1503 break;
1504 case X86::BI__builtin_ia32_scatterdiv16si:
1505 IID = Intrinsic::x86_avx512_mask_scatter_qpi_512;
1506 break;
1507 case X86::BI__builtin_ia32_scatterdiv2df:
1508 IID = Intrinsic::x86_avx512_mask_scatterdiv2_df;
1509 break;
1510 case X86::BI__builtin_ia32_scatterdiv2di:
1511 IID = Intrinsic::x86_avx512_mask_scatterdiv2_di;
1512 break;
1513 case X86::BI__builtin_ia32_scatterdiv4df:
1514 IID = Intrinsic::x86_avx512_mask_scatterdiv4_df;
1515 break;
1516 case X86::BI__builtin_ia32_scatterdiv4di:
1517 IID = Intrinsic::x86_avx512_mask_scatterdiv4_di;
1518 break;
1519 case X86::BI__builtin_ia32_scatterdiv4sf:
1520 IID = Intrinsic::x86_avx512_mask_scatterdiv4_sf;
1521 break;
1522 case X86::BI__builtin_ia32_scatterdiv4si:
1523 IID = Intrinsic::x86_avx512_mask_scatterdiv4_si;
1524 break;
1525 case X86::BI__builtin_ia32_scatterdiv8sf:
1526 IID = Intrinsic::x86_avx512_mask_scatterdiv8_sf;
1527 break;
1528 case X86::BI__builtin_ia32_scatterdiv8si:
1529 IID = Intrinsic::x86_avx512_mask_scatterdiv8_si;
1530 break;
1531 case X86::BI__builtin_ia32_scattersiv2df:
1532 IID = Intrinsic::x86_avx512_mask_scattersiv2_df;
1533 break;
1534 case X86::BI__builtin_ia32_scattersiv2di:
1535 IID = Intrinsic::x86_avx512_mask_scattersiv2_di;
1536 break;
1537 case X86::BI__builtin_ia32_scattersiv4df:
1538 IID = Intrinsic::x86_avx512_mask_scattersiv4_df;
1539 break;
1540 case X86::BI__builtin_ia32_scattersiv4di:
1541 IID = Intrinsic::x86_avx512_mask_scattersiv4_di;
1542 break;
1543 case X86::BI__builtin_ia32_scattersiv4sf:
1544 IID = Intrinsic::x86_avx512_mask_scattersiv4_sf;
1545 break;
1546 case X86::BI__builtin_ia32_scattersiv4si:
1547 IID = Intrinsic::x86_avx512_mask_scattersiv4_si;
1548 break;
1549 case X86::BI__builtin_ia32_scattersiv8sf:
1550 IID = Intrinsic::x86_avx512_mask_scattersiv8_sf;
1551 break;
1552 case X86::BI__builtin_ia32_scattersiv8si:
1553 IID = Intrinsic::x86_avx512_mask_scattersiv8_si;
1554 break;
1555 }
1556
1557 unsigned MinElts = std::min(
1558 cast<llvm::FixedVectorType>(Ops[2]->getType())->getNumElements(),
1559 cast<llvm::FixedVectorType>(Ops[3]->getType())->getNumElements());
1560 Ops[1] = getMaskVecValue(*this, Ops[1], MinElts);
1561 Function *Intr = CGM.getIntrinsic(IID);
1562 return Builder.CreateCall(Intr, Ops);
1563 }
1564
1565 case X86::BI__builtin_ia32_vextractf128_pd256:
1566 case X86::BI__builtin_ia32_vextractf128_ps256:
1567 case X86::BI__builtin_ia32_vextractf128_si256:
1568 case X86::BI__builtin_ia32_extract128i256:
1569 case X86::BI__builtin_ia32_extractf64x4_mask:
1570 case X86::BI__builtin_ia32_extractf32x4_mask:
1571 case X86::BI__builtin_ia32_extracti64x4_mask:
1572 case X86::BI__builtin_ia32_extracti32x4_mask:
1573 case X86::BI__builtin_ia32_extractf32x8_mask:
1574 case X86::BI__builtin_ia32_extracti32x8_mask:
1575 case X86::BI__builtin_ia32_extractf32x4_256_mask:
1576 case X86::BI__builtin_ia32_extracti32x4_256_mask:
1577 case X86::BI__builtin_ia32_extractf64x2_256_mask:
1578 case X86::BI__builtin_ia32_extracti64x2_256_mask:
1579 case X86::BI__builtin_ia32_extractf64x2_512_mask:
1580 case X86::BI__builtin_ia32_extracti64x2_512_mask: {
1581 auto *DstTy = cast<llvm::FixedVectorType>(ConvertType(E->getType()));
1582 unsigned NumElts = DstTy->getNumElements();
1583 unsigned SrcNumElts =
1584 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
1585 unsigned SubVectors = SrcNumElts / NumElts;
1586 unsigned Index = cast<ConstantInt>(Ops[1])->getZExtValue();
1587 assert(llvm::isPowerOf2_32(SubVectors) && "Expected power of 2 subvectors");
1588 Index &= SubVectors - 1; // Remove any extra bits.
1589 Index *= NumElts;
1590
1591 int Indices[16];
1592 for (unsigned i = 0; i != NumElts; ++i)
1593 Indices[i] = i + Index;
1594
1595 Value *Res = Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
1596 "extract");
1597
1598 if (Ops.size() == 4)
1599 Res = EmitX86Select(*this, Ops[3], Res, Ops[2]);
1600
1601 return Res;
1602 }
1603 case X86::BI__builtin_ia32_vinsertf128_pd256:
1604 case X86::BI__builtin_ia32_vinsertf128_ps256:
1605 case X86::BI__builtin_ia32_vinsertf128_si256:
1606 case X86::BI__builtin_ia32_insert128i256:
1607 case X86::BI__builtin_ia32_insertf64x4:
1608 case X86::BI__builtin_ia32_insertf32x4:
1609 case X86::BI__builtin_ia32_inserti64x4:
1610 case X86::BI__builtin_ia32_inserti32x4:
1611 case X86::BI__builtin_ia32_insertf32x8:
1612 case X86::BI__builtin_ia32_inserti32x8:
1613 case X86::BI__builtin_ia32_insertf32x4_256:
1614 case X86::BI__builtin_ia32_inserti32x4_256:
1615 case X86::BI__builtin_ia32_insertf64x2_256:
1616 case X86::BI__builtin_ia32_inserti64x2_256:
1617 case X86::BI__builtin_ia32_insertf64x2_512:
1618 case X86::BI__builtin_ia32_inserti64x2_512: {
1619 unsigned DstNumElts =
1620 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
1621 unsigned SrcNumElts =
1622 cast<llvm::FixedVectorType>(Ops[1]->getType())->getNumElements();
1623 unsigned SubVectors = DstNumElts / SrcNumElts;
1624 unsigned Index = cast<ConstantInt>(Ops[2])->getZExtValue();
1625 assert(llvm::isPowerOf2_32(SubVectors) && "Expected power of 2 subvectors");
1626 Index &= SubVectors - 1; // Remove any extra bits.
1627 Index *= SrcNumElts;
1628
1629 int Indices[16];
1630 for (unsigned i = 0; i != DstNumElts; ++i)
1631 Indices[i] = (i >= SrcNumElts) ? SrcNumElts + (i % SrcNumElts) : i;
1632
1633 Value *Op1 = Builder.CreateShuffleVector(
1634 Ops[1], ArrayRef(Indices, DstNumElts), "widen");
1635
1636 for (unsigned i = 0; i != DstNumElts; ++i) {
1637 if (i >= Index && i < (Index + SrcNumElts))
1638 Indices[i] = (i - Index) + DstNumElts;
1639 else
1640 Indices[i] = i;
1641 }
1642
1643 return Builder.CreateShuffleVector(Ops[0], Op1,
1644 ArrayRef(Indices, DstNumElts), "insert");
1645 }
1646 case X86::BI__builtin_ia32_pmovqd512_mask:
1647 case X86::BI__builtin_ia32_pmovwb512_mask: {
1648 Value *Res = Builder.CreateTrunc(Ops[0], Ops[1]->getType());
1649 return EmitX86Select(*this, Ops[2], Res, Ops[1]);
1650 }
1651 case X86::BI__builtin_ia32_pmovdb512_mask:
1652 case X86::BI__builtin_ia32_pmovdw512_mask:
1653 case X86::BI__builtin_ia32_pmovqw512_mask: {
1654 if (const auto *C = dyn_cast<Constant>(Ops[2]))
1655 if (C->isAllOnesValue())
1656 return Builder.CreateTrunc(Ops[0], Ops[1]->getType());
1657
1658 Intrinsic::ID IID;
1659 switch (BuiltinID) {
1660 default: llvm_unreachable("Unsupported intrinsic!");
1661 case X86::BI__builtin_ia32_pmovdb512_mask:
1662 IID = Intrinsic::x86_avx512_mask_pmov_db_512;
1663 break;
1664 case X86::BI__builtin_ia32_pmovdw512_mask:
1665 IID = Intrinsic::x86_avx512_mask_pmov_dw_512;
1666 break;
1667 case X86::BI__builtin_ia32_pmovqw512_mask:
1668 IID = Intrinsic::x86_avx512_mask_pmov_qw_512;
1669 break;
1670 }
1671
1672 Function *Intr = CGM.getIntrinsic(IID);
1673 return Builder.CreateCall(Intr, Ops);
1674 }
1675 case X86::BI__builtin_ia32_pblendw128:
1676 case X86::BI__builtin_ia32_blendpd:
1677 case X86::BI__builtin_ia32_blendps:
1678 case X86::BI__builtin_ia32_blendpd256:
1679 case X86::BI__builtin_ia32_blendps256:
1680 case X86::BI__builtin_ia32_pblendw256:
1681 case X86::BI__builtin_ia32_pblendd128:
1682 case X86::BI__builtin_ia32_pblendd256: {
1683 unsigned NumElts =
1684 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
1685 unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
1686
1687 int Indices[16];
1688 // If there are more than 8 elements, the immediate is used twice so make
1689 // sure we handle that.
1690 for (unsigned i = 0; i != NumElts; ++i)
1691 Indices[i] = ((Imm >> (i % 8)) & 0x1) ? NumElts + i : i;
1692
1693 return Builder.CreateShuffleVector(Ops[0], Ops[1],
1694 ArrayRef(Indices, NumElts), "blend");
1695 }
1696 case X86::BI__builtin_ia32_pshuflw:
1697 case X86::BI__builtin_ia32_pshuflw256:
1698 case X86::BI__builtin_ia32_pshuflw512: {
1699 uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
1700 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
1701 unsigned NumElts = Ty->getNumElements();
1702
1703 // Splat the 8-bits of immediate 4 times to help the loop wrap around.
1704 Imm = (Imm & 0xff) * 0x01010101;
1705
1706 int Indices[32];
1707 for (unsigned l = 0; l != NumElts; l += 8) {
1708 for (unsigned i = 0; i != 4; ++i) {
1709 Indices[l + i] = l + (Imm & 3);
1710 Imm >>= 2;
1711 }
1712 for (unsigned i = 4; i != 8; ++i)
1713 Indices[l + i] = l + i;
1714 }
1715
1716 return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
1717 "pshuflw");
1718 }
1719 case X86::BI__builtin_ia32_pshufhw:
1720 case X86::BI__builtin_ia32_pshufhw256:
1721 case X86::BI__builtin_ia32_pshufhw512: {
1722 uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
1723 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
1724 unsigned NumElts = Ty->getNumElements();
1725
1726 // Splat the 8-bits of immediate 4 times to help the loop wrap around.
1727 Imm = (Imm & 0xff) * 0x01010101;
1728
1729 int Indices[32];
1730 for (unsigned l = 0; l != NumElts; l += 8) {
1731 for (unsigned i = 0; i != 4; ++i)
1732 Indices[l + i] = l + i;
1733 for (unsigned i = 4; i != 8; ++i) {
1734 Indices[l + i] = l + 4 + (Imm & 3);
1735 Imm >>= 2;
1736 }
1737 }
1738
1739 return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
1740 "pshufhw");
1741 }
1742 case X86::BI__builtin_ia32_pshufd:
1743 case X86::BI__builtin_ia32_pshufd256:
1744 case X86::BI__builtin_ia32_pshufd512:
1745 case X86::BI__builtin_ia32_vpermilpd:
1746 case X86::BI__builtin_ia32_vpermilps:
1747 case X86::BI__builtin_ia32_vpermilpd256:
1748 case X86::BI__builtin_ia32_vpermilps256:
1749 case X86::BI__builtin_ia32_vpermilpd512:
1750 case X86::BI__builtin_ia32_vpermilps512: {
1751 uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
1752 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
1753 unsigned NumElts = Ty->getNumElements();
1754 unsigned NumLanes = Ty->getPrimitiveSizeInBits() / 128;
1755 unsigned NumLaneElts = NumElts / NumLanes;
1756
1757 // Splat the 8-bits of immediate 4 times to help the loop wrap around.
1758 Imm = (Imm & 0xff) * 0x01010101;
1759
1760 int Indices[16];
1761 for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
1762 for (unsigned i = 0; i != NumLaneElts; ++i) {
1763 Indices[i + l] = (Imm % NumLaneElts) + l;
1764 Imm /= NumLaneElts;
1765 }
1766 }
1767
1768 return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
1769 "permil");
1770 }
1771 case X86::BI__builtin_ia32_shufpd:
1772 case X86::BI__builtin_ia32_shufpd256:
1773 case X86::BI__builtin_ia32_shufpd512:
1774 case X86::BI__builtin_ia32_shufps:
1775 case X86::BI__builtin_ia32_shufps256:
1776 case X86::BI__builtin_ia32_shufps512: {
1777 uint32_t Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
1778 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
1779 unsigned NumElts = Ty->getNumElements();
1780 unsigned NumLanes = Ty->getPrimitiveSizeInBits() / 128;
1781 unsigned NumLaneElts = NumElts / NumLanes;
1782
1783 // Splat the 8-bits of immediate 4 times to help the loop wrap around.
1784 Imm = (Imm & 0xff) * 0x01010101;
1785
1786 int Indices[16];
1787 for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
1788 for (unsigned i = 0; i != NumLaneElts; ++i) {
1789 unsigned Index = Imm % NumLaneElts;
1790 Imm /= NumLaneElts;
1791 if (i >= (NumLaneElts / 2))
1792 Index += NumElts;
1793 Indices[l + i] = l + Index;
1794 }
1795 }
1796
1797 return Builder.CreateShuffleVector(Ops[0], Ops[1],
1798 ArrayRef(Indices, NumElts), "shufp");
1799 }
1800 case X86::BI__builtin_ia32_permdi256:
1801 case X86::BI__builtin_ia32_permdf256:
1802 case X86::BI__builtin_ia32_permdi512:
1803 case X86::BI__builtin_ia32_permdf512: {
1804 unsigned Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
1805 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
1806 unsigned NumElts = Ty->getNumElements();
1807
1808 // These intrinsics operate on 256-bit lanes of four 64-bit elements.
1809 int Indices[8];
1810 for (unsigned l = 0; l != NumElts; l += 4)
1811 for (unsigned i = 0; i != 4; ++i)
1812 Indices[l + i] = l + ((Imm >> (2 * i)) & 0x3);
1813
1814 return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
1815 "perm");
1816 }
1817 case X86::BI__builtin_ia32_palignr128:
1818 case X86::BI__builtin_ia32_palignr256:
1819 case X86::BI__builtin_ia32_palignr512: {
1820 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0xff;
1821
1822 unsigned NumElts =
1823 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
1824 assert(NumElts % 16 == 0);
1825
1826 // If palignr is shifting the pair of vectors more than the size of two
1827 // lanes, emit zero.
1828 if (ShiftVal >= 32)
1829 return llvm::Constant::getNullValue(ConvertType(E->getType()));
1830
1831 // If palignr is shifting the pair of input vectors more than one lane,
1832 // but less than two lanes, convert to shifting in zeroes.
1833 if (ShiftVal > 16) {
1834 ShiftVal -= 16;
1835 Ops[1] = Ops[0];
1836 Ops[0] = llvm::Constant::getNullValue(Ops[0]->getType());
1837 }
1838
1839 int Indices[64];
1840 // 256-bit palignr operates on 128-bit lanes so we need to handle that
1841 for (unsigned l = 0; l != NumElts; l += 16) {
1842 for (unsigned i = 0; i != 16; ++i) {
1843 unsigned Idx = ShiftVal + i;
1844 if (Idx >= 16)
1845 Idx += NumElts - 16; // End of lane, switch operand.
1846 Indices[l + i] = Idx + l;
1847 }
1848 }
1849
1850 return Builder.CreateShuffleVector(Ops[1], Ops[0],
1851 ArrayRef(Indices, NumElts), "palignr");
1852 }
1853 case X86::BI__builtin_ia32_alignd128:
1854 case X86::BI__builtin_ia32_alignd256:
1855 case X86::BI__builtin_ia32_alignd512:
1856 case X86::BI__builtin_ia32_alignq128:
1857 case X86::BI__builtin_ia32_alignq256:
1858 case X86::BI__builtin_ia32_alignq512: {
1859 unsigned NumElts =
1860 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
1861 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0xff;
1862
1863 // Mask the shift amount to width of a vector.
1864 ShiftVal &= NumElts - 1;
1865
1866 int Indices[16];
1867 for (unsigned i = 0; i != NumElts; ++i)
1868 Indices[i] = i + ShiftVal;
1869
1870 return Builder.CreateShuffleVector(Ops[1], Ops[0],
1871 ArrayRef(Indices, NumElts), "valign");
1872 }
1873 case X86::BI__builtin_ia32_shuf_f32x4_256:
1874 case X86::BI__builtin_ia32_shuf_f64x2_256:
1875 case X86::BI__builtin_ia32_shuf_i32x4_256:
1876 case X86::BI__builtin_ia32_shuf_i64x2_256:
1877 case X86::BI__builtin_ia32_shuf_f32x4:
1878 case X86::BI__builtin_ia32_shuf_f64x2:
1879 case X86::BI__builtin_ia32_shuf_i32x4:
1880 case X86::BI__builtin_ia32_shuf_i64x2: {
1881 unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
1882 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
1883 unsigned NumElts = Ty->getNumElements();
1884 unsigned NumLanes = Ty->getPrimitiveSizeInBits() == 512 ? 4 : 2;
1885 unsigned NumLaneElts = NumElts / NumLanes;
1886
1887 int Indices[16];
1888 for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
1889 unsigned Index = (Imm % NumLanes) * NumLaneElts;
1890 Imm /= NumLanes; // Discard the bits we just used.
1891 if (l >= (NumElts / 2))
1892 Index += NumElts; // Switch to other source.
1893 for (unsigned i = 0; i != NumLaneElts; ++i) {
1894 Indices[l + i] = Index + i;
1895 }
1896 }
1897
1898 return Builder.CreateShuffleVector(Ops[0], Ops[1],
1899 ArrayRef(Indices, NumElts), "shuf");
1900 }
1901
1902 case X86::BI__builtin_ia32_vperm2f128_pd256:
1903 case X86::BI__builtin_ia32_vperm2f128_ps256:
1904 case X86::BI__builtin_ia32_vperm2f128_si256:
1905 case X86::BI__builtin_ia32_permti256: {
1906 unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
1907 unsigned NumElts =
1908 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
1909
1910 // This takes a very simple approach since there are two lanes and a
1911 // shuffle can have 2 inputs. So we reserve the first input for the first
1912 // lane and the second input for the second lane. This may result in
1913 // duplicate sources, but this can be dealt with in the backend.
1914
1915 Value *OutOps[2];
1916 int Indices[8];
1917 for (unsigned l = 0; l != 2; ++l) {
1918 // Determine the source for this lane.
1919 if (Imm & (1 << ((l * 4) + 3)))
1920 OutOps[l] = llvm::ConstantAggregateZero::get(Ops[0]->getType());
1921 else if (Imm & (1 << ((l * 4) + 1)))
1922 OutOps[l] = Ops[1];
1923 else
1924 OutOps[l] = Ops[0];
1925
1926 for (unsigned i = 0; i != NumElts/2; ++i) {
1927 // Start with ith element of the source for this lane.
1928 unsigned Idx = (l * NumElts) + i;
1929 // If bit 0 of the immediate half is set, switch to the high half of
1930 // the source.
1931 if (Imm & (1 << (l * 4)))
1932 Idx += NumElts/2;
1933 Indices[(l * (NumElts/2)) + i] = Idx;
1934 }
1935 }
1936
1937 return Builder.CreateShuffleVector(OutOps[0], OutOps[1],
1938 ArrayRef(Indices, NumElts), "vperm");
1939 }
1940
1941 case X86::BI__builtin_ia32_pslldqi128_byteshift:
1942 case X86::BI__builtin_ia32_pslldqi256_byteshift:
1943 case X86::BI__builtin_ia32_pslldqi512_byteshift: {
1944 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
1945 auto *VecTy = cast<llvm::FixedVectorType>(Ops[0]->getType());
1946 // Builtin type is vXi8.
1947 unsigned NumElts = VecTy->getNumElements();
1948 Value *Zero = llvm::Constant::getNullValue(VecTy);
1949
1950 // If pslldq is shifting the vector more than 15 bytes, emit zero.
1951 if (ShiftVal >= 16)
1952 return Zero;
1953
1954 int Indices[64];
1955 // 256/512-bit pslldq operates on 128-bit lanes so we need to handle that
1956 for (unsigned l = 0; l != NumElts; l += 16) {
1957 for (unsigned i = 0; i != 16; ++i) {
1958 unsigned Idx = NumElts + i - ShiftVal;
1959 if (Idx < NumElts)
1960 Idx -= NumElts - 16; // end of lane, switch operand.
1961 Indices[l + i] = Idx + l;
1962 }
1963 }
1964 return Builder.CreateShuffleVector(Zero, Ops[0], ArrayRef(Indices, NumElts),
1965 "pslldq");
1966 }
1967 case X86::BI__builtin_ia32_psrldqi128_byteshift:
1968 case X86::BI__builtin_ia32_psrldqi256_byteshift:
1969 case X86::BI__builtin_ia32_psrldqi512_byteshift: {
1970 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
1971 auto *VecTy = cast<llvm::FixedVectorType>(Ops[0]->getType());
1972 // Builtin type is vXi8.
1973 unsigned NumElts = VecTy->getNumElements();
1974 Value *Zero = llvm::Constant::getNullValue(VecTy);
1975
1976 // If psrldq is shifting the vector more than 15 bytes, emit zero.
1977 if (ShiftVal >= 16)
1978 return Zero;
1979
1980 int Indices[64];
1981 // 256/512-bit psrldq operates on 128-bit lanes so we need to handle that
1982 for (unsigned l = 0; l != NumElts; l += 16) {
1983 for (unsigned i = 0; i != 16; ++i) {
1984 unsigned Idx = i + ShiftVal;
1985 if (Idx >= 16)
1986 Idx += NumElts - 16; // end of lane, switch operand.
1987 Indices[l + i] = Idx + l;
1988 }
1989 }
1990 return Builder.CreateShuffleVector(Ops[0], Zero, ArrayRef(Indices, NumElts),
1991 "psrldq");
1992 }
1993 case X86::BI__builtin_ia32_kshiftliqi:
1994 case X86::BI__builtin_ia32_kshiftlihi:
1995 case X86::BI__builtin_ia32_kshiftlisi:
1996 case X86::BI__builtin_ia32_kshiftlidi: {
1997 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
1998 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
1999
2000 if (ShiftVal >= NumElts)
2001 return llvm::Constant::getNullValue(Ops[0]->getType());
2002
2003 Value *In = getMaskVecValue(*this, Ops[0], NumElts);
2004
2005 int Indices[64];
2006 for (unsigned i = 0; i != NumElts; ++i)
2007 Indices[i] = NumElts + i - ShiftVal;
2008
2009 Value *Zero = llvm::Constant::getNullValue(In->getType());
2010 Value *SV = Builder.CreateShuffleVector(
2011 Zero, In, ArrayRef(Indices, NumElts), "kshiftl");
2012 return Builder.CreateBitCast(SV, Ops[0]->getType());
2013 }
2014 case X86::BI__builtin_ia32_kshiftriqi:
2015 case X86::BI__builtin_ia32_kshiftrihi:
2016 case X86::BI__builtin_ia32_kshiftrisi:
2017 case X86::BI__builtin_ia32_kshiftridi: {
2018 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
2019 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
2020
2021 if (ShiftVal >= NumElts)
2022 return llvm::Constant::getNullValue(Ops[0]->getType());
2023
2024 Value *In = getMaskVecValue(*this, Ops[0], NumElts);
2025
2026 int Indices[64];
2027 for (unsigned i = 0; i != NumElts; ++i)
2028 Indices[i] = i + ShiftVal;
2029
2030 Value *Zero = llvm::Constant::getNullValue(In->getType());
2031 Value *SV = Builder.CreateShuffleVector(
2032 In, Zero, ArrayRef(Indices, NumElts), "kshiftr");
2033 return Builder.CreateBitCast(SV, Ops[0]->getType());
2034 }
2035 case X86::BI__builtin_ia32_movnti:
2036 case X86::BI__builtin_ia32_movnti64:
2037 case X86::BI__builtin_ia32_movntsd:
2038 case X86::BI__builtin_ia32_movntss: {
2039 llvm::MDNode *Node = llvm::MDNode::get(
2040 getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
2041
2042 Value *Ptr = Ops[0];
2043 Value *Src = Ops[1];
2044
2045 // Extract the 0'th element of the source vector.
2046 if (BuiltinID == X86::BI__builtin_ia32_movntsd ||
2047 BuiltinID == X86::BI__builtin_ia32_movntss)
2048 Src = Builder.CreateExtractElement(Src, (uint64_t)0, "extract");
2049
2050 // Unaligned nontemporal store of the scalar value.
2051 StoreInst *SI = Builder.CreateDefaultAlignedStore(Src, Ptr);
2052 SI->setMetadata(llvm::LLVMContext::MD_nontemporal, Node);
2053 SI->setAlignment(llvm::Align(1));
2054 return SI;
2055 }
2056 // Rotate is a special case of funnel shift - 1st 2 args are the same.
2057 case X86::BI__builtin_ia32_vprotbi:
2058 case X86::BI__builtin_ia32_vprotwi:
2059 case X86::BI__builtin_ia32_vprotdi:
2060 case X86::BI__builtin_ia32_vprotqi:
2061 case X86::BI__builtin_ia32_prold128:
2062 case X86::BI__builtin_ia32_prold256:
2063 case X86::BI__builtin_ia32_prold512:
2064 case X86::BI__builtin_ia32_prolq128:
2065 case X86::BI__builtin_ia32_prolq256:
2066 case X86::BI__builtin_ia32_prolq512:
2067 return EmitX86FunnelShift(*this, Ops[0], Ops[0], Ops[1], false);
2068 case X86::BI__builtin_ia32_prord128:
2069 case X86::BI__builtin_ia32_prord256:
2070 case X86::BI__builtin_ia32_prord512:
2071 case X86::BI__builtin_ia32_prorq128:
2072 case X86::BI__builtin_ia32_prorq256:
2073 case X86::BI__builtin_ia32_prorq512:
2074 return EmitX86FunnelShift(*this, Ops[0], Ops[0], Ops[1], true);
2075 case X86::BI__builtin_ia32_selectb_128:
2076 case X86::BI__builtin_ia32_selectb_256:
2077 case X86::BI__builtin_ia32_selectb_512:
2078 case X86::BI__builtin_ia32_selectw_128:
2079 case X86::BI__builtin_ia32_selectw_256:
2080 case X86::BI__builtin_ia32_selectw_512:
2081 case X86::BI__builtin_ia32_selectd_128:
2082 case X86::BI__builtin_ia32_selectd_256:
2083 case X86::BI__builtin_ia32_selectd_512:
2084 case X86::BI__builtin_ia32_selectq_128:
2085 case X86::BI__builtin_ia32_selectq_256:
2086 case X86::BI__builtin_ia32_selectq_512:
2087 case X86::BI__builtin_ia32_selectph_128:
2088 case X86::BI__builtin_ia32_selectph_256:
2089 case X86::BI__builtin_ia32_selectph_512:
2090 case X86::BI__builtin_ia32_selectpbf_128:
2091 case X86::BI__builtin_ia32_selectpbf_256:
2092 case X86::BI__builtin_ia32_selectpbf_512:
2093 case X86::BI__builtin_ia32_selectps_128:
2094 case X86::BI__builtin_ia32_selectps_256:
2095 case X86::BI__builtin_ia32_selectps_512:
2096 case X86::BI__builtin_ia32_selectpd_128:
2097 case X86::BI__builtin_ia32_selectpd_256:
2098 case X86::BI__builtin_ia32_selectpd_512:
2099 return EmitX86Select(*this, Ops[0], Ops[1], Ops[2]);
2100 case X86::BI__builtin_ia32_selectsh_128:
2101 case X86::BI__builtin_ia32_selectsbf_128:
2102 case X86::BI__builtin_ia32_selectss_128:
2103 case X86::BI__builtin_ia32_selectsd_128: {
2104 Value *A = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
2105 Value *B = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
2106 A = EmitX86ScalarSelect(*this, Ops[0], A, B);
2107 return Builder.CreateInsertElement(Ops[1], A, (uint64_t)0);
2108 }
2109 case X86::BI__builtin_ia32_cmpb128_mask:
2110 case X86::BI__builtin_ia32_cmpb256_mask:
2111 case X86::BI__builtin_ia32_cmpb512_mask:
2112 case X86::BI__builtin_ia32_cmpw128_mask:
2113 case X86::BI__builtin_ia32_cmpw256_mask:
2114 case X86::BI__builtin_ia32_cmpw512_mask:
2115 case X86::BI__builtin_ia32_cmpd128_mask:
2116 case X86::BI__builtin_ia32_cmpd256_mask:
2117 case X86::BI__builtin_ia32_cmpd512_mask:
2118 case X86::BI__builtin_ia32_cmpq128_mask:
2119 case X86::BI__builtin_ia32_cmpq256_mask:
2120 case X86::BI__builtin_ia32_cmpq512_mask: {
2121 unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
2122 return EmitX86MaskedCompare(*this, CC, true, Ops);
2123 }
2124 case X86::BI__builtin_ia32_ucmpb128_mask:
2125 case X86::BI__builtin_ia32_ucmpb256_mask:
2126 case X86::BI__builtin_ia32_ucmpb512_mask:
2127 case X86::BI__builtin_ia32_ucmpw128_mask:
2128 case X86::BI__builtin_ia32_ucmpw256_mask:
2129 case X86::BI__builtin_ia32_ucmpw512_mask:
2130 case X86::BI__builtin_ia32_ucmpd128_mask:
2131 case X86::BI__builtin_ia32_ucmpd256_mask:
2132 case X86::BI__builtin_ia32_ucmpd512_mask:
2133 case X86::BI__builtin_ia32_ucmpq128_mask:
2134 case X86::BI__builtin_ia32_ucmpq256_mask:
2135 case X86::BI__builtin_ia32_ucmpq512_mask: {
2136 unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
2137 return EmitX86MaskedCompare(*this, CC, false, Ops);
2138 }
2139 case X86::BI__builtin_ia32_vpcomb:
2140 case X86::BI__builtin_ia32_vpcomw:
2141 case X86::BI__builtin_ia32_vpcomd:
2142 case X86::BI__builtin_ia32_vpcomq:
2143 return EmitX86vpcom(*this, Ops, true);
2144 case X86::BI__builtin_ia32_vpcomub:
2145 case X86::BI__builtin_ia32_vpcomuw:
2146 case X86::BI__builtin_ia32_vpcomud:
2147 case X86::BI__builtin_ia32_vpcomuq:
2148 return EmitX86vpcom(*this, Ops, false);
2149
2150 case X86::BI__builtin_ia32_kortestcqi:
2151 case X86::BI__builtin_ia32_kortestchi:
2152 case X86::BI__builtin_ia32_kortestcsi:
2153 case X86::BI__builtin_ia32_kortestcdi: {
2154 Value *Or = EmitX86MaskLogic(*this, Instruction::Or, Ops);
2155 Value *C = llvm::Constant::getAllOnesValue(Ops[0]->getType());
2156 Value *Cmp = Builder.CreateICmpEQ(Or, C);
2157 return Builder.CreateZExt(Cmp, ConvertType(E->getType()));
2158 }
2159 case X86::BI__builtin_ia32_kortestzqi:
2160 case X86::BI__builtin_ia32_kortestzhi:
2161 case X86::BI__builtin_ia32_kortestzsi:
2162 case X86::BI__builtin_ia32_kortestzdi: {
2163 Value *Or = EmitX86MaskLogic(*this, Instruction::Or, Ops);
2164 Value *C = llvm::Constant::getNullValue(Ops[0]->getType());
2165 Value *Cmp = Builder.CreateICmpEQ(Or, C);
2166 return Builder.CreateZExt(Cmp, ConvertType(E->getType()));
2167 }
2168
2169 case X86::BI__builtin_ia32_ktestcqi:
2170 case X86::BI__builtin_ia32_ktestzqi:
2171 case X86::BI__builtin_ia32_ktestchi:
2172 case X86::BI__builtin_ia32_ktestzhi:
2173 case X86::BI__builtin_ia32_ktestcsi:
2174 case X86::BI__builtin_ia32_ktestzsi:
2175 case X86::BI__builtin_ia32_ktestcdi:
2176 case X86::BI__builtin_ia32_ktestzdi: {
2177 Intrinsic::ID IID;
2178 switch (BuiltinID) {
2179 default: llvm_unreachable("Unsupported intrinsic!");
2180 case X86::BI__builtin_ia32_ktestcqi:
2181 IID = Intrinsic::x86_avx512_ktestc_b;
2182 break;
2183 case X86::BI__builtin_ia32_ktestzqi:
2184 IID = Intrinsic::x86_avx512_ktestz_b;
2185 break;
2186 case X86::BI__builtin_ia32_ktestchi:
2187 IID = Intrinsic::x86_avx512_ktestc_w;
2188 break;
2189 case X86::BI__builtin_ia32_ktestzhi:
2190 IID = Intrinsic::x86_avx512_ktestz_w;
2191 break;
2192 case X86::BI__builtin_ia32_ktestcsi:
2193 IID = Intrinsic::x86_avx512_ktestc_d;
2194 break;
2195 case X86::BI__builtin_ia32_ktestzsi:
2196 IID = Intrinsic::x86_avx512_ktestz_d;
2197 break;
2198 case X86::BI__builtin_ia32_ktestcdi:
2199 IID = Intrinsic::x86_avx512_ktestc_q;
2200 break;
2201 case X86::BI__builtin_ia32_ktestzdi:
2202 IID = Intrinsic::x86_avx512_ktestz_q;
2203 break;
2204 }
2205
2206 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
2207 Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
2208 Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
2209 Function *Intr = CGM.getIntrinsic(IID);
2210 return Builder.CreateCall(Intr, {LHS, RHS});
2211 }
2212
2213 case X86::BI__builtin_ia32_kaddqi:
2214 case X86::BI__builtin_ia32_kaddhi:
2215 case X86::BI__builtin_ia32_kaddsi:
2216 case X86::BI__builtin_ia32_kadddi: {
2217 Intrinsic::ID IID;
2218 switch (BuiltinID) {
2219 default: llvm_unreachable("Unsupported intrinsic!");
2220 case X86::BI__builtin_ia32_kaddqi:
2221 IID = Intrinsic::x86_avx512_kadd_b;
2222 break;
2223 case X86::BI__builtin_ia32_kaddhi:
2224 IID = Intrinsic::x86_avx512_kadd_w;
2225 break;
2226 case X86::BI__builtin_ia32_kaddsi:
2227 IID = Intrinsic::x86_avx512_kadd_d;
2228 break;
2229 case X86::BI__builtin_ia32_kadddi:
2230 IID = Intrinsic::x86_avx512_kadd_q;
2231 break;
2232 }
2233
2234 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
2235 Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
2236 Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
2237 Function *Intr = CGM.getIntrinsic(IID);
2238 Value *Res = Builder.CreateCall(Intr, {LHS, RHS});
2239 return Builder.CreateBitCast(Res, Ops[0]->getType());
2240 }
2241 case X86::BI__builtin_ia32_kandqi:
2242 case X86::BI__builtin_ia32_kandhi:
2243 case X86::BI__builtin_ia32_kandsi:
2244 case X86::BI__builtin_ia32_kanddi:
2245 return EmitX86MaskLogic(*this, Instruction::And, Ops);
2246 case X86::BI__builtin_ia32_kandnqi:
2247 case X86::BI__builtin_ia32_kandnhi:
2248 case X86::BI__builtin_ia32_kandnsi:
2249 case X86::BI__builtin_ia32_kandndi:
2250 return EmitX86MaskLogic(*this, Instruction::And, Ops, true);
2251 case X86::BI__builtin_ia32_korqi:
2252 case X86::BI__builtin_ia32_korhi:
2253 case X86::BI__builtin_ia32_korsi:
2254 case X86::BI__builtin_ia32_kordi:
2255 return EmitX86MaskLogic(*this, Instruction::Or, Ops);
2256 case X86::BI__builtin_ia32_kxnorqi:
2257 case X86::BI__builtin_ia32_kxnorhi:
2258 case X86::BI__builtin_ia32_kxnorsi:
2259 case X86::BI__builtin_ia32_kxnordi:
2260 return EmitX86MaskLogic(*this, Instruction::Xor, Ops, true);
2261 case X86::BI__builtin_ia32_kxorqi:
2262 case X86::BI__builtin_ia32_kxorhi:
2263 case X86::BI__builtin_ia32_kxorsi:
2264 case X86::BI__builtin_ia32_kxordi:
2265 return EmitX86MaskLogic(*this, Instruction::Xor, Ops);
2266 case X86::BI__builtin_ia32_knotqi:
2267 case X86::BI__builtin_ia32_knothi:
2268 case X86::BI__builtin_ia32_knotsi:
2269 case X86::BI__builtin_ia32_knotdi: {
2270 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
2271 Value *Res = getMaskVecValue(*this, Ops[0], NumElts);
2272 return Builder.CreateBitCast(Builder.CreateNot(Res),
2273 Ops[0]->getType());
2274 }
2275 case X86::BI__builtin_ia32_kmovb:
2276 case X86::BI__builtin_ia32_kmovw:
2277 case X86::BI__builtin_ia32_kmovd:
2278 case X86::BI__builtin_ia32_kmovq: {
2279 // Bitcast to vXi1 type and then back to integer. This gets the mask
2280 // register type into the IR, but might be optimized out depending on
2281 // what's around it.
2282 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
2283 Value *Res = getMaskVecValue(*this, Ops[0], NumElts);
2284 return Builder.CreateBitCast(Res, Ops[0]->getType());
2285 }
2286
2287 case X86::BI__builtin_ia32_kunpckdi:
2288 case X86::BI__builtin_ia32_kunpcksi:
2289 case X86::BI__builtin_ia32_kunpckhi: {
2290 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
2291 Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
2292 Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
2293 int Indices[64];
2294 for (unsigned i = 0; i != NumElts; ++i)
2295 Indices[i] = i;
2296
2297 // First extract half of each vector. This gives better codegen than
2298 // doing it in a single shuffle.
2299 LHS = Builder.CreateShuffleVector(LHS, LHS, ArrayRef(Indices, NumElts / 2));
2300 RHS = Builder.CreateShuffleVector(RHS, RHS, ArrayRef(Indices, NumElts / 2));
2301 // Concat the vectors.
2302 // NOTE: Operands are swapped to match the intrinsic definition.
2303 Value *Res =
2304 Builder.CreateShuffleVector(RHS, LHS, ArrayRef(Indices, NumElts));
2305 return Builder.CreateBitCast(Res, Ops[0]->getType());
2306 }
2307
2308 case X86::BI__builtin_ia32_sqrtsh_round_mask:
2309 case X86::BI__builtin_ia32_sqrtsd_round_mask:
2310 case X86::BI__builtin_ia32_sqrtss_round_mask: {
2311 unsigned CC = cast<llvm::ConstantInt>(Ops[4])->getZExtValue();
2312 // Support only if the rounding mode is 4 (AKA CUR_DIRECTION),
2313 // otherwise keep the intrinsic.
2314 if (CC != 4) {
2315 Intrinsic::ID IID;
2316
2317 switch (BuiltinID) {
2318 default:
2319 llvm_unreachable("Unsupported intrinsic!");
2320 case X86::BI__builtin_ia32_sqrtsh_round_mask:
2321 IID = Intrinsic::x86_avx512fp16_mask_sqrt_sh;
2322 break;
2323 case X86::BI__builtin_ia32_sqrtsd_round_mask:
2324 IID = Intrinsic::x86_avx512_mask_sqrt_sd;
2325 break;
2326 case X86::BI__builtin_ia32_sqrtss_round_mask:
2327 IID = Intrinsic::x86_avx512_mask_sqrt_ss;
2328 break;
2329 }
2330 return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
2331 }
2332 Value *A = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
2333 Function *F;
2334 if (Builder.getIsFPConstrained()) {
2335 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
2336 F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,
2337 A->getType());
2338 A = Builder.CreateConstrainedFPCall(F, A);
2339 } else {
2340 F = CGM.getIntrinsic(Intrinsic::sqrt, A->getType());
2341 A = Builder.CreateCall(F, A);
2342 }
2343 Value *Src = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
2344 A = EmitX86ScalarSelect(*this, Ops[3], A, Src);
2345 return Builder.CreateInsertElement(Ops[0], A, (uint64_t)0);
2346 }
2347 case X86::BI__builtin_ia32_sqrtph512:
2348 case X86::BI__builtin_ia32_sqrtps512:
2349 case X86::BI__builtin_ia32_sqrtpd512: {
2350 unsigned CC = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
2351 // Support only if the rounding mode is 4 (AKA CUR_DIRECTION),
2352 // otherwise keep the intrinsic.
2353 if (CC != 4) {
2354 Intrinsic::ID IID;
2355
2356 switch (BuiltinID) {
2357 default:
2358 llvm_unreachable("Unsupported intrinsic!");
2359 case X86::BI__builtin_ia32_sqrtph512:
2360 IID = Intrinsic::x86_avx512fp16_sqrt_ph_512;
2361 break;
2362 case X86::BI__builtin_ia32_sqrtps512:
2363 IID = Intrinsic::x86_avx512_sqrt_ps_512;
2364 break;
2365 case X86::BI__builtin_ia32_sqrtpd512:
2366 IID = Intrinsic::x86_avx512_sqrt_pd_512;
2367 break;
2368 }
2369 return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
2370 }
2371 if (Builder.getIsFPConstrained()) {
2372 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
2373 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,
2374 Ops[0]->getType());
2375 return Builder.CreateConstrainedFPCall(F, Ops[0]);
2376 } else {
2377 Function *F = CGM.getIntrinsic(Intrinsic::sqrt, Ops[0]->getType());
2378 return Builder.CreateCall(F, Ops[0]);
2379 }
2380 }
2381
2382 case X86::BI__builtin_ia32_pmuludq128:
2383 case X86::BI__builtin_ia32_pmuludq256:
2384 case X86::BI__builtin_ia32_pmuludq512:
2385 return EmitX86Muldq(*this, /*IsSigned*/false, Ops);
2386
2387 case X86::BI__builtin_ia32_pmuldq128:
2388 case X86::BI__builtin_ia32_pmuldq256:
2389 case X86::BI__builtin_ia32_pmuldq512:
2390 return EmitX86Muldq(*this, /*IsSigned*/true, Ops);
2391
2392 case X86::BI__builtin_ia32_pternlogd512_mask:
2393 case X86::BI__builtin_ia32_pternlogq512_mask:
2394 case X86::BI__builtin_ia32_pternlogd128_mask:
2395 case X86::BI__builtin_ia32_pternlogd256_mask:
2396 case X86::BI__builtin_ia32_pternlogq128_mask:
2397 case X86::BI__builtin_ia32_pternlogq256_mask:
2398 return EmitX86Ternlog(*this, /*ZeroMask*/false, Ops);
2399
2400 case X86::BI__builtin_ia32_pternlogd512_maskz:
2401 case X86::BI__builtin_ia32_pternlogq512_maskz:
2402 case X86::BI__builtin_ia32_pternlogd128_maskz:
2403 case X86::BI__builtin_ia32_pternlogd256_maskz:
2404 case X86::BI__builtin_ia32_pternlogq128_maskz:
2405 case X86::BI__builtin_ia32_pternlogq256_maskz:
2406 return EmitX86Ternlog(*this, /*ZeroMask*/true, Ops);
2407
2408 case X86::BI__builtin_ia32_vpshldd128:
2409 case X86::BI__builtin_ia32_vpshldd256:
2410 case X86::BI__builtin_ia32_vpshldd512:
2411 case X86::BI__builtin_ia32_vpshldq128:
2412 case X86::BI__builtin_ia32_vpshldq256:
2413 case X86::BI__builtin_ia32_vpshldq512:
2414 case X86::BI__builtin_ia32_vpshldw128:
2415 case X86::BI__builtin_ia32_vpshldw256:
2416 case X86::BI__builtin_ia32_vpshldw512:
2417 return EmitX86FunnelShift(*this, Ops[0], Ops[1], Ops[2], false);
2418
2419 case X86::BI__builtin_ia32_vpshrdd128:
2420 case X86::BI__builtin_ia32_vpshrdd256:
2421 case X86::BI__builtin_ia32_vpshrdd512:
2422 case X86::BI__builtin_ia32_vpshrdq128:
2423 case X86::BI__builtin_ia32_vpshrdq256:
2424 case X86::BI__builtin_ia32_vpshrdq512:
2425 case X86::BI__builtin_ia32_vpshrdw128:
2426 case X86::BI__builtin_ia32_vpshrdw256:
2427 case X86::BI__builtin_ia32_vpshrdw512:
2428 // Ops 0 and 1 are swapped.
2429 return EmitX86FunnelShift(*this, Ops[1], Ops[0], Ops[2], true);
2430
2431 // Reductions
2432 case X86::BI__builtin_ia32_reduce_fadd_pd512:
2433 case X86::BI__builtin_ia32_reduce_fadd_ps512:
2434 case X86::BI__builtin_ia32_reduce_fadd_ph512:
2435 case X86::BI__builtin_ia32_reduce_fadd_ph256:
2436 case X86::BI__builtin_ia32_reduce_fadd_ph128: {
2437 Function *F =
2438 CGM.getIntrinsic(Intrinsic::vector_reduce_fadd, Ops[1]->getType());
2439 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
2440 Builder.getFastMathFlags().setAllowReassoc();
2441 return Builder.CreateCall(F, {Ops[0], Ops[1]});
2442 }
2443 case X86::BI__builtin_ia32_reduce_fmul_pd512:
2444 case X86::BI__builtin_ia32_reduce_fmul_ps512:
2445 case X86::BI__builtin_ia32_reduce_fmul_ph512:
2446 case X86::BI__builtin_ia32_reduce_fmul_ph256:
2447 case X86::BI__builtin_ia32_reduce_fmul_ph128: {
2448 Function *F =
2449 CGM.getIntrinsic(Intrinsic::vector_reduce_fmul, Ops[1]->getType());
2450 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
2451 Builder.getFastMathFlags().setAllowReassoc();
2452 return Builder.CreateCall(F, {Ops[0], Ops[1]});
2453 }
2454 case X86::BI__builtin_ia32_reduce_fmax_pd512:
2455 case X86::BI__builtin_ia32_reduce_fmax_ps512:
2456 case X86::BI__builtin_ia32_reduce_fmax_ph512:
2457 case X86::BI__builtin_ia32_reduce_fmax_ph256:
2458 case X86::BI__builtin_ia32_reduce_fmax_ph128: {
2459 Function *F =
2460 CGM.getIntrinsic(Intrinsic::vector_reduce_fmax, Ops[0]->getType());
2461 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
2462 Builder.getFastMathFlags().setNoNaNs();
2463 return Builder.CreateCall(F, {Ops[0]});
2464 }
2465 case X86::BI__builtin_ia32_reduce_fmin_pd512:
2466 case X86::BI__builtin_ia32_reduce_fmin_ps512:
2467 case X86::BI__builtin_ia32_reduce_fmin_ph512:
2468 case X86::BI__builtin_ia32_reduce_fmin_ph256:
2469 case X86::BI__builtin_ia32_reduce_fmin_ph128: {
2470 Function *F =
2471 CGM.getIntrinsic(Intrinsic::vector_reduce_fmin, Ops[0]->getType());
2472 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
2473 Builder.getFastMathFlags().setNoNaNs();
2474 return Builder.CreateCall(F, {Ops[0]});
2475 }
2476
2477 case X86::BI__builtin_ia32_rdrand16_step:
2478 case X86::BI__builtin_ia32_rdrand32_step:
2479 case X86::BI__builtin_ia32_rdrand64_step:
2480 case X86::BI__builtin_ia32_rdseed16_step:
2481 case X86::BI__builtin_ia32_rdseed32_step:
2482 case X86::BI__builtin_ia32_rdseed64_step: {
2483 Intrinsic::ID ID;
2484 switch (BuiltinID) {
2485 default: llvm_unreachable("Unsupported intrinsic!");
2486 case X86::BI__builtin_ia32_rdrand16_step:
2487 ID = Intrinsic::x86_rdrand_16;
2488 break;
2489 case X86::BI__builtin_ia32_rdrand32_step:
2490 ID = Intrinsic::x86_rdrand_32;
2491 break;
2492 case X86::BI__builtin_ia32_rdrand64_step:
2493 ID = Intrinsic::x86_rdrand_64;
2494 break;
2495 case X86::BI__builtin_ia32_rdseed16_step:
2496 ID = Intrinsic::x86_rdseed_16;
2497 break;
2498 case X86::BI__builtin_ia32_rdseed32_step:
2499 ID = Intrinsic::x86_rdseed_32;
2500 break;
2501 case X86::BI__builtin_ia32_rdseed64_step:
2502 ID = Intrinsic::x86_rdseed_64;
2503 break;
2504 }
2505
2506 Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID));
2507 Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 0),
2508 Ops[0]);
2509 return Builder.CreateExtractValue(Call, 1);
2510 }
2511 case X86::BI__builtin_ia32_addcarryx_u32:
2512 case X86::BI__builtin_ia32_addcarryx_u64:
2513 case X86::BI__builtin_ia32_subborrow_u32:
2514 case X86::BI__builtin_ia32_subborrow_u64: {
2515 Intrinsic::ID IID;
2516 switch (BuiltinID) {
2517 default: llvm_unreachable("Unsupported intrinsic!");
2518 case X86::BI__builtin_ia32_addcarryx_u32:
2519 IID = Intrinsic::x86_addcarry_32;
2520 break;
2521 case X86::BI__builtin_ia32_addcarryx_u64:
2522 IID = Intrinsic::x86_addcarry_64;
2523 break;
2524 case X86::BI__builtin_ia32_subborrow_u32:
2525 IID = Intrinsic::x86_subborrow_32;
2526 break;
2527 case X86::BI__builtin_ia32_subborrow_u64:
2528 IID = Intrinsic::x86_subborrow_64;
2529 break;
2530 }
2531
2532 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID),
2533 { Ops[0], Ops[1], Ops[2] });
2534 Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 1),
2535 Ops[3]);
2536 return Builder.CreateExtractValue(Call, 0);
2537 }
2538
2539 case X86::BI__builtin_ia32_fpclassps128_mask:
2540 case X86::BI__builtin_ia32_fpclassps256_mask:
2541 case X86::BI__builtin_ia32_fpclassps512_mask:
2542 case X86::BI__builtin_ia32_vfpclassbf16128_mask:
2543 case X86::BI__builtin_ia32_vfpclassbf16256_mask:
2544 case X86::BI__builtin_ia32_vfpclassbf16512_mask:
2545 case X86::BI__builtin_ia32_fpclassph128_mask:
2546 case X86::BI__builtin_ia32_fpclassph256_mask:
2547 case X86::BI__builtin_ia32_fpclassph512_mask:
2548 case X86::BI__builtin_ia32_fpclasspd128_mask:
2549 case X86::BI__builtin_ia32_fpclasspd256_mask:
2550 case X86::BI__builtin_ia32_fpclasspd512_mask: {
2551 unsigned NumElts =
2552 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
2553 Value *MaskIn = Ops[2];
2554 Ops.erase(&Ops[2]);
2555
2556 Intrinsic::ID ID;
2557 switch (BuiltinID) {
2558 default: llvm_unreachable("Unsupported intrinsic!");
2559 case X86::BI__builtin_ia32_vfpclassbf16128_mask:
2560 ID = Intrinsic::x86_avx10_fpclass_bf16_128;
2561 break;
2562 case X86::BI__builtin_ia32_vfpclassbf16256_mask:
2563 ID = Intrinsic::x86_avx10_fpclass_bf16_256;
2564 break;
2565 case X86::BI__builtin_ia32_vfpclassbf16512_mask:
2566 ID = Intrinsic::x86_avx10_fpclass_bf16_512;
2567 break;
2568 case X86::BI__builtin_ia32_fpclassph128_mask:
2569 ID = Intrinsic::x86_avx512fp16_fpclass_ph_128;
2570 break;
2571 case X86::BI__builtin_ia32_fpclassph256_mask:
2572 ID = Intrinsic::x86_avx512fp16_fpclass_ph_256;
2573 break;
2574 case X86::BI__builtin_ia32_fpclassph512_mask:
2575 ID = Intrinsic::x86_avx512fp16_fpclass_ph_512;
2576 break;
2577 case X86::BI__builtin_ia32_fpclassps128_mask:
2578 ID = Intrinsic::x86_avx512_fpclass_ps_128;
2579 break;
2580 case X86::BI__builtin_ia32_fpclassps256_mask:
2581 ID = Intrinsic::x86_avx512_fpclass_ps_256;
2582 break;
2583 case X86::BI__builtin_ia32_fpclassps512_mask:
2584 ID = Intrinsic::x86_avx512_fpclass_ps_512;
2585 break;
2586 case X86::BI__builtin_ia32_fpclasspd128_mask:
2587 ID = Intrinsic::x86_avx512_fpclass_pd_128;
2588 break;
2589 case X86::BI__builtin_ia32_fpclasspd256_mask:
2590 ID = Intrinsic::x86_avx512_fpclass_pd_256;
2591 break;
2592 case X86::BI__builtin_ia32_fpclasspd512_mask:
2593 ID = Intrinsic::x86_avx512_fpclass_pd_512;
2594 break;
2595 }
2596
2597 Value *Fpclass = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
2598 return EmitX86MaskedCompareResult(*this, Fpclass, NumElts, MaskIn);
2599 }
2600
2601 case X86::BI__builtin_ia32_vp2intersect_q_512:
2602 case X86::BI__builtin_ia32_vp2intersect_q_256:
2603 case X86::BI__builtin_ia32_vp2intersect_q_128:
2604 case X86::BI__builtin_ia32_vp2intersect_d_512:
2605 case X86::BI__builtin_ia32_vp2intersect_d_256:
2606 case X86::BI__builtin_ia32_vp2intersect_d_128: {
2607 unsigned NumElts =
2608 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
2609 Intrinsic::ID ID;
2610
2611 switch (BuiltinID) {
2612 default: llvm_unreachable("Unsupported intrinsic!");
2613 case X86::BI__builtin_ia32_vp2intersect_q_512:
2614 ID = Intrinsic::x86_avx512_vp2intersect_q_512;
2615 break;
2616 case X86::BI__builtin_ia32_vp2intersect_q_256:
2617 ID = Intrinsic::x86_avx512_vp2intersect_q_256;
2618 break;
2619 case X86::BI__builtin_ia32_vp2intersect_q_128:
2620 ID = Intrinsic::x86_avx512_vp2intersect_q_128;
2621 break;
2622 case X86::BI__builtin_ia32_vp2intersect_d_512:
2623 ID = Intrinsic::x86_avx512_vp2intersect_d_512;
2624 break;
2625 case X86::BI__builtin_ia32_vp2intersect_d_256:
2626 ID = Intrinsic::x86_avx512_vp2intersect_d_256;
2627 break;
2628 case X86::BI__builtin_ia32_vp2intersect_d_128:
2629 ID = Intrinsic::x86_avx512_vp2intersect_d_128;
2630 break;
2631 }
2632
2633 Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID), {Ops[0], Ops[1]});
2634 Value *Result = Builder.CreateExtractValue(Call, 0);
2635 Result = EmitX86MaskedCompareResult(*this, Result, NumElts, nullptr);
2636 Builder.CreateDefaultAlignedStore(Result, Ops[2]);
2637
2638 Result = Builder.CreateExtractValue(Call, 1);
2639 Result = EmitX86MaskedCompareResult(*this, Result, NumElts, nullptr);
2640 return Builder.CreateDefaultAlignedStore(Result, Ops[3]);
2641 }
2642
2643 case X86::BI__builtin_ia32_vpmultishiftqb128:
2644 case X86::BI__builtin_ia32_vpmultishiftqb256:
2645 case X86::BI__builtin_ia32_vpmultishiftqb512: {
2646 Intrinsic::ID ID;
2647 switch (BuiltinID) {
2648 default: llvm_unreachable("Unsupported intrinsic!");
2649 case X86::BI__builtin_ia32_vpmultishiftqb128:
2650 ID = Intrinsic::x86_avx512_pmultishift_qb_128;
2651 break;
2652 case X86::BI__builtin_ia32_vpmultishiftqb256:
2653 ID = Intrinsic::x86_avx512_pmultishift_qb_256;
2654 break;
2655 case X86::BI__builtin_ia32_vpmultishiftqb512:
2656 ID = Intrinsic::x86_avx512_pmultishift_qb_512;
2657 break;
2658 }
2659
2660 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
2661 }
2662
2663 case X86::BI__builtin_ia32_vpshufbitqmb128_mask:
2664 case X86::BI__builtin_ia32_vpshufbitqmb256_mask:
2665 case X86::BI__builtin_ia32_vpshufbitqmb512_mask: {
2666 unsigned NumElts =
2667 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
2668 Value *MaskIn = Ops[2];
2669 Ops.erase(&Ops[2]);
2670
2671 Intrinsic::ID ID;
2672 switch (BuiltinID) {
2673 default: llvm_unreachable("Unsupported intrinsic!");
2674 case X86::BI__builtin_ia32_vpshufbitqmb128_mask:
2675 ID = Intrinsic::x86_avx512_vpshufbitqmb_128;
2676 break;
2677 case X86::BI__builtin_ia32_vpshufbitqmb256_mask:
2678 ID = Intrinsic::x86_avx512_vpshufbitqmb_256;
2679 break;
2680 case X86::BI__builtin_ia32_vpshufbitqmb512_mask:
2681 ID = Intrinsic::x86_avx512_vpshufbitqmb_512;
2682 break;
2683 }
2684
2685 Value *Shufbit = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
2686 return EmitX86MaskedCompareResult(*this, Shufbit, NumElts, MaskIn);
2687 }
2688
2689 // packed comparison intrinsics
2690 case X86::BI__builtin_ia32_cmpeqps:
2691 case X86::BI__builtin_ia32_cmpeqpd:
2692 return getVectorFCmpIR(CmpInst::FCMP_OEQ, /*IsSignaling*/false);
2693 case X86::BI__builtin_ia32_cmpltps:
2694 case X86::BI__builtin_ia32_cmpltpd:
2695 return getVectorFCmpIR(CmpInst::FCMP_OLT, /*IsSignaling*/true);
2696 case X86::BI__builtin_ia32_cmpleps:
2697 case X86::BI__builtin_ia32_cmplepd:
2698 return getVectorFCmpIR(CmpInst::FCMP_OLE, /*IsSignaling*/true);
2699 case X86::BI__builtin_ia32_cmpunordps:
2700 case X86::BI__builtin_ia32_cmpunordpd:
2701 return getVectorFCmpIR(CmpInst::FCMP_UNO, /*IsSignaling*/false);
2702 case X86::BI__builtin_ia32_cmpneqps:
2703 case X86::BI__builtin_ia32_cmpneqpd:
2704 return getVectorFCmpIR(CmpInst::FCMP_UNE, /*IsSignaling*/false);
2705 case X86::BI__builtin_ia32_cmpnltps:
2706 case X86::BI__builtin_ia32_cmpnltpd:
2707 return getVectorFCmpIR(CmpInst::FCMP_UGE, /*IsSignaling*/true);
2708 case X86::BI__builtin_ia32_cmpnleps:
2709 case X86::BI__builtin_ia32_cmpnlepd:
2710 return getVectorFCmpIR(CmpInst::FCMP_UGT, /*IsSignaling*/true);
2711 case X86::BI__builtin_ia32_cmpordps:
2712 case X86::BI__builtin_ia32_cmpordpd:
2713 return getVectorFCmpIR(CmpInst::FCMP_ORD, /*IsSignaling*/false);
2714 case X86::BI__builtin_ia32_cmpph128_mask:
2715 case X86::BI__builtin_ia32_cmpph256_mask:
2716 case X86::BI__builtin_ia32_cmpph512_mask:
2717 case X86::BI__builtin_ia32_cmpps128_mask:
2718 case X86::BI__builtin_ia32_cmpps256_mask:
2719 case X86::BI__builtin_ia32_cmpps512_mask:
2720 case X86::BI__builtin_ia32_cmppd128_mask:
2721 case X86::BI__builtin_ia32_cmppd256_mask:
2722 case X86::BI__builtin_ia32_cmppd512_mask:
2723 case X86::BI__builtin_ia32_vcmpbf16512_mask:
2724 case X86::BI__builtin_ia32_vcmpbf16256_mask:
2725 case X86::BI__builtin_ia32_vcmpbf16128_mask:
2726 IsMaskFCmp = true;
2727 [[fallthrough]];
2728 case X86::BI__builtin_ia32_cmpps:
2729 case X86::BI__builtin_ia32_cmpps256:
2730 case X86::BI__builtin_ia32_cmppd:
2731 case X86::BI__builtin_ia32_cmppd256: {
2732 // Lowering vector comparisons to fcmp instructions, while
2733 // ignoring signalling behaviour requested
2734 // ignoring rounding mode requested
2735 // This is only possible if fp-model is not strict and FENV_ACCESS is off.
2736
2737 // The third argument is the comparison condition, and integer in the
2738 // range [0, 31]
2739 unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x1f;
2740
2741 // Lowering to IR fcmp instruction.
2742 // Ignoring requested signaling behaviour,
2743 // e.g. both _CMP_GT_OS & _CMP_GT_OQ are translated to FCMP_OGT.
2744 FCmpInst::Predicate Pred;
2745 bool IsSignaling;
2746 // Predicates for 16-31 repeat the 0-15 predicates. Only the signalling
2747 // behavior is inverted. We'll handle that after the switch.
2748 switch (CC & 0xf) {
2749 case 0x00: Pred = FCmpInst::FCMP_OEQ; IsSignaling = false; break;
2750 case 0x01: Pred = FCmpInst::FCMP_OLT; IsSignaling = true; break;
2751 case 0x02: Pred = FCmpInst::FCMP_OLE; IsSignaling = true; break;
2752 case 0x03: Pred = FCmpInst::FCMP_UNO; IsSignaling = false; break;
2753 case 0x04: Pred = FCmpInst::FCMP_UNE; IsSignaling = false; break;
2754 case 0x05: Pred = FCmpInst::FCMP_UGE; IsSignaling = true; break;
2755 case 0x06: Pred = FCmpInst::FCMP_UGT; IsSignaling = true; break;
2756 case 0x07: Pred = FCmpInst::FCMP_ORD; IsSignaling = false; break;
2757 case 0x08: Pred = FCmpInst::FCMP_UEQ; IsSignaling = false; break;
2758 case 0x09: Pred = FCmpInst::FCMP_ULT; IsSignaling = true; break;
2759 case 0x0a: Pred = FCmpInst::FCMP_ULE; IsSignaling = true; break;
2760 case 0x0b: Pred = FCmpInst::FCMP_FALSE; IsSignaling = false; break;
2761 case 0x0c: Pred = FCmpInst::FCMP_ONE; IsSignaling = false; break;
2762 case 0x0d: Pred = FCmpInst::FCMP_OGE; IsSignaling = true; break;
2763 case 0x0e: Pred = FCmpInst::FCMP_OGT; IsSignaling = true; break;
2764 case 0x0f: Pred = FCmpInst::FCMP_TRUE; IsSignaling = false; break;
2765 default: llvm_unreachable("Unhandled CC");
2766 }
2767
2768 // Invert the signalling behavior for 16-31.
2769 if (CC & 0x10)
2770 IsSignaling = !IsSignaling;
2771
2772 // If the predicate is true or false and we're using constrained intrinsics,
2773 // we don't have a compare intrinsic we can use. Just use the legacy X86
2774 // specific intrinsic.
2775 // If the intrinsic is mask enabled and we're using constrained intrinsics,
2776 // use the legacy X86 specific intrinsic.
2777 if (Builder.getIsFPConstrained() &&
2778 (Pred == FCmpInst::FCMP_TRUE || Pred == FCmpInst::FCMP_FALSE ||
2779 IsMaskFCmp)) {
2780
2781 Intrinsic::ID IID;
2782 switch (BuiltinID) {
2783 default: llvm_unreachable("Unexpected builtin");
2784 case X86::BI__builtin_ia32_cmpps:
2785 IID = Intrinsic::x86_sse_cmp_ps;
2786 break;
2787 case X86::BI__builtin_ia32_cmpps256:
2788 IID = Intrinsic::x86_avx_cmp_ps_256;
2789 break;
2790 case X86::BI__builtin_ia32_cmppd:
2791 IID = Intrinsic::x86_sse2_cmp_pd;
2792 break;
2793 case X86::BI__builtin_ia32_cmppd256:
2794 IID = Intrinsic::x86_avx_cmp_pd_256;
2795 break;
2796 case X86::BI__builtin_ia32_cmpph128_mask:
2797 IID = Intrinsic::x86_avx512fp16_mask_cmp_ph_128;
2798 break;
2799 case X86::BI__builtin_ia32_cmpph256_mask:
2800 IID = Intrinsic::x86_avx512fp16_mask_cmp_ph_256;
2801 break;
2802 case X86::BI__builtin_ia32_cmpph512_mask:
2803 IID = Intrinsic::x86_avx512fp16_mask_cmp_ph_512;
2804 break;
2805 case X86::BI__builtin_ia32_cmpps512_mask:
2806 IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
2807 break;
2808 case X86::BI__builtin_ia32_cmppd512_mask:
2809 IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
2810 break;
2811 case X86::BI__builtin_ia32_cmpps128_mask:
2812 IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
2813 break;
2814 case X86::BI__builtin_ia32_cmpps256_mask:
2815 IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
2816 break;
2817 case X86::BI__builtin_ia32_cmppd128_mask:
2818 IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
2819 break;
2820 case X86::BI__builtin_ia32_cmppd256_mask:
2821 IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
2822 break;
2823 }
2824
2825 Function *Intr = CGM.getIntrinsic(IID);
2826 if (IsMaskFCmp) {
2827 unsigned NumElts =
2828 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
2829 Ops[3] = getMaskVecValue(*this, Ops[3], NumElts);
2830 Value *Cmp = Builder.CreateCall(Intr, Ops);
2831 return EmitX86MaskedCompareResult(*this, Cmp, NumElts, nullptr);
2832 }
2833
2834 return Builder.CreateCall(Intr, Ops);
2835 }
2836
2837 // Builtins without the _mask suffix return a vector of integers
2838 // of the same width as the input vectors
2839 if (IsMaskFCmp) {
2840 // We ignore SAE if strict FP is disabled. We only keep precise
2841 // exception behavior under strict FP.
2842 // NOTE: If strict FP does ever go through here a CGFPOptionsRAII
2843 // object will be required.
2844 unsigned NumElts =
2845 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
2846 Value *Cmp;
2847 if (IsSignaling)
2848 Cmp = Builder.CreateFCmpS(Pred, Ops[0], Ops[1]);
2849 else
2850 Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
2851 return EmitX86MaskedCompareResult(*this, Cmp, NumElts, Ops[3]);
2852 }
2853
2854 return getVectorFCmpIR(Pred, IsSignaling);
2855 }
2856
2857 // SSE scalar comparison intrinsics
2858 case X86::BI__builtin_ia32_cmpeqss:
2859 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 0);
2860 case X86::BI__builtin_ia32_cmpltss:
2861 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 1);
2862 case X86::BI__builtin_ia32_cmpless:
2863 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 2);
2864 case X86::BI__builtin_ia32_cmpunordss:
2865 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 3);
2866 case X86::BI__builtin_ia32_cmpneqss:
2867 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 4);
2868 case X86::BI__builtin_ia32_cmpnltss:
2869 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 5);
2870 case X86::BI__builtin_ia32_cmpnless:
2871 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 6);
2872 case X86::BI__builtin_ia32_cmpordss:
2873 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 7);
2874 case X86::BI__builtin_ia32_cmpeqsd:
2875 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 0);
2876 case X86::BI__builtin_ia32_cmpltsd:
2877 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 1);
2878 case X86::BI__builtin_ia32_cmplesd:
2879 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 2);
2880 case X86::BI__builtin_ia32_cmpunordsd:
2881 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 3);
2882 case X86::BI__builtin_ia32_cmpneqsd:
2883 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 4);
2884 case X86::BI__builtin_ia32_cmpnltsd:
2885 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 5);
2886 case X86::BI__builtin_ia32_cmpnlesd:
2887 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 6);
2888 case X86::BI__builtin_ia32_cmpordsd:
2889 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 7);
2890
2891 // f16c half2float intrinsics
2892 case X86::BI__builtin_ia32_vcvtph2ps_mask:
2893 case X86::BI__builtin_ia32_vcvtph2ps256_mask:
2894 case X86::BI__builtin_ia32_vcvtph2ps512_mask: {
2895 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
2896 return EmitX86CvtF16ToFloatExpr(*this, Ops, ConvertType(E->getType()));
2897 }
2898
2899 // AVX512 bf16 intrinsics
2900 case X86::BI__builtin_ia32_cvtneps2bf16_128_mask: {
2901 Ops[2] = getMaskVecValue(
2902 *this, Ops[2],
2903 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements());
2904 Intrinsic::ID IID = Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128;
2905 return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
2906 }
2907
2908 case X86::BI__builtin_ia32_cvtneps2bf16_256_mask:
2909 case X86::BI__builtin_ia32_cvtneps2bf16_512_mask: {
2910 Intrinsic::ID IID;
2911 switch (BuiltinID) {
2912 default: llvm_unreachable("Unsupported intrinsic!");
2913 case X86::BI__builtin_ia32_cvtneps2bf16_256_mask:
2914 IID = Intrinsic::x86_avx512bf16_cvtneps2bf16_256;
2915 break;
2916 case X86::BI__builtin_ia32_cvtneps2bf16_512_mask:
2917 IID = Intrinsic::x86_avx512bf16_cvtneps2bf16_512;
2918 break;
2919 }
2920 Value *Res = Builder.CreateCall(CGM.getIntrinsic(IID), Ops[0]);
2921 return EmitX86Select(*this, Ops[2], Res, Ops[1]);
2922 }
2923
2924 case X86::BI__cpuid:
2925 case X86::BI__cpuidex: {
2926 Value *FuncId = EmitScalarExpr(E->getArg(1));
2927 Value *SubFuncId = BuiltinID == X86::BI__cpuidex
2928 ? EmitScalarExpr(E->getArg(2))
2929 : llvm::ConstantInt::get(Int32Ty, 0);
2930
2931 llvm::StructType *CpuidRetTy =
2932 llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty, Int32Ty);
2933 llvm::FunctionType *FTy =
2934 llvm::FunctionType::get(CpuidRetTy, {Int32Ty, Int32Ty}, false);
2935
2936 StringRef Asm, Constraints;
2937 if (getTarget().getTriple().getArch() == llvm::Triple::x86) {
2938 Asm = "cpuid";
2939 Constraints = "={ax},={bx},={cx},={dx},{ax},{cx}";
2940 } else {
2941 // x86-64 uses %rbx as the base register, so preserve it.
2942 Asm = "xchgq %rbx, ${1:q}\n"
2943 "cpuid\n"
2944 "xchgq %rbx, ${1:q}";
2945 Constraints = "={ax},=r,={cx},={dx},0,2";
2946 }
2947
2948 llvm::InlineAsm *IA = llvm::InlineAsm::get(FTy, Asm, Constraints,
2949 /*hasSideEffects=*/false);
2950 Value *IACall = Builder.CreateCall(IA, {FuncId, SubFuncId});
2951 Value *BasePtr = EmitScalarExpr(E->getArg(0));
2952 Value *Store = nullptr;
2953 for (unsigned i = 0; i < 4; i++) {
2954 Value *Extracted = Builder.CreateExtractValue(IACall, i);
2955 Value *StorePtr = Builder.CreateConstInBoundsGEP1_32(Int32Ty, BasePtr, i);
2956 Store = Builder.CreateAlignedStore(Extracted, StorePtr, getIntAlign());
2957 }
2958
2959 // Return the last store instruction to signal that we have emitted the
2960 // the intrinsic.
2961 return Store;
2962 }
2963
2964 case X86::BI__emul:
2965 case X86::BI__emulu: {
2966 llvm::Type *Int64Ty = llvm::IntegerType::get(getLLVMContext(), 64);
2967 bool isSigned = (BuiltinID == X86::BI__emul);
2968 Value *LHS = Builder.CreateIntCast(Ops[0], Int64Ty, isSigned);
2969 Value *RHS = Builder.CreateIntCast(Ops[1], Int64Ty, isSigned);
2970 return Builder.CreateMul(LHS, RHS, "", !isSigned, isSigned);
2971 }
2972 case X86::BI__mulh:
2973 case X86::BI__umulh:
2974 case X86::BI_mul128:
2975 case X86::BI_umul128: {
2976 llvm::Type *ResType = ConvertType(E->getType());
2977 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
2978
2979 bool IsSigned = (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI_mul128);
2980 Value *LHS = Builder.CreateIntCast(Ops[0], Int128Ty, IsSigned);
2981 Value *RHS = Builder.CreateIntCast(Ops[1], Int128Ty, IsSigned);
2982
2983 Value *MulResult, *HigherBits;
2984 if (IsSigned) {
2985 MulResult = Builder.CreateNSWMul(LHS, RHS);
2986 HigherBits = Builder.CreateAShr(MulResult, 64);
2987 } else {
2988 MulResult = Builder.CreateNUWMul(LHS, RHS);
2989 HigherBits = Builder.CreateLShr(MulResult, 64);
2990 }
2991 HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned);
2992
2993 if (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI__umulh)
2994 return HigherBits;
2995
2996 Address HighBitsAddress = EmitPointerWithAlignment(E->getArg(2));
2997 Builder.CreateStore(HigherBits, HighBitsAddress);
2998 return Builder.CreateIntCast(MulResult, ResType, IsSigned);
2999 }
3000
3001 case X86::BI__faststorefence: {
3002 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
3003 llvm::SyncScope::System);
3004 }
3005 case X86::BI__shiftleft128:
3006 case X86::BI__shiftright128: {
3007 llvm::Function *F = CGM.getIntrinsic(
3008 BuiltinID == X86::BI__shiftleft128 ? Intrinsic::fshl : Intrinsic::fshr,
3009 Int64Ty);
3010 // Flip low/high ops and zero-extend amount to matching type.
3011 // shiftleft128(Low, High, Amt) -> fshl(High, Low, Amt)
3012 // shiftright128(Low, High, Amt) -> fshr(High, Low, Amt)
3013 std::swap(Ops[0], Ops[1]);
3014 Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
3015 return Builder.CreateCall(F, Ops);
3016 }
3017 case X86::BI_ReadWriteBarrier:
3018 case X86::BI_ReadBarrier:
3019 case X86::BI_WriteBarrier: {
3020 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
3021 llvm::SyncScope::SingleThread);
3022 }
3023
3024 case X86::BI_AddressOfReturnAddress: {
3025 Function *F =
3026 CGM.getIntrinsic(Intrinsic::addressofreturnaddress, AllocaInt8PtrTy);
3027 return Builder.CreateCall(F);
3028 }
3029 case X86::BI__stosb: {
3030 // We treat __stosb as a volatile memset - it may not generate "rep stosb"
3031 // instruction, but it will create a memset that won't be optimized away.
3032 return Builder.CreateMemSet(Ops[0], Ops[1], Ops[2], Align(1), true);
3033 }
3034 case X86::BI__ud2:
3035 // llvm.trap makes a ud2a instruction on x86.
3036 return EmitTrapCall(Intrinsic::trap);
3037 case X86::BI__int2c: {
3038 // This syscall signals a driver assertion failure in x86 NT kernels.
3039 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false);
3040 llvm::InlineAsm *IA =
3041 llvm::InlineAsm::get(FTy, "int $$0x2c", "", /*hasSideEffects=*/true);
3042 llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(
3043 getLLVMContext(), llvm::AttributeList::FunctionIndex,
3044 llvm::Attribute::NoReturn);
3045 llvm::CallInst *CI = Builder.CreateCall(IA);
3046 CI->setAttributes(NoReturnAttr);
3047 return CI;
3048 }
3049 case X86::BI__readfsbyte:
3050 case X86::BI__readfsword:
3051 case X86::BI__readfsdword:
3052 case X86::BI__readfsqword: {
3053 llvm::Type *IntTy = ConvertType(E->getType());
3054 Value *Ptr = Builder.CreateIntToPtr(
3055 Ops[0], llvm::PointerType::get(getLLVMContext(), 257));
3056 LoadInst *Load = Builder.CreateAlignedLoad(
3057 IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));
3058 Load->setVolatile(true);
3059 return Load;
3060 }
3061 case X86::BI__readgsbyte:
3062 case X86::BI__readgsword:
3063 case X86::BI__readgsdword:
3064 case X86::BI__readgsqword: {
3065 llvm::Type *IntTy = ConvertType(E->getType());
3066 Value *Ptr = Builder.CreateIntToPtr(
3067 Ops[0], llvm::PointerType::get(getLLVMContext(), 256));
3068 LoadInst *Load = Builder.CreateAlignedLoad(
3069 IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));
3070 Load->setVolatile(true);
3071 return Load;
3072 }
3073 case X86::BI__builtin_ia32_encodekey128_u32: {
3074 Intrinsic::ID IID = Intrinsic::x86_encodekey128;
3075
3076 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[0], Ops[1]});
3077
3078 for (int i = 0; i < 3; ++i) {
3079 Value *Extract = Builder.CreateExtractValue(Call, i + 1);
3080 Value *Ptr = Builder.CreateConstGEP1_32(Int8Ty, Ops[2], i * 16);
3081 Builder.CreateAlignedStore(Extract, Ptr, Align(1));
3082 }
3083
3084 return Builder.CreateExtractValue(Call, 0);
3085 }
3086 case X86::BI__builtin_ia32_encodekey256_u32: {
3087 Intrinsic::ID IID = Intrinsic::x86_encodekey256;
3088
3089 Value *Call =
3090 Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[0], Ops[1], Ops[2]});
3091
3092 for (int i = 0; i < 4; ++i) {
3093 Value *Extract = Builder.CreateExtractValue(Call, i + 1);
3094 Value *Ptr = Builder.CreateConstGEP1_32(Int8Ty, Ops[3], i * 16);
3095 Builder.CreateAlignedStore(Extract, Ptr, Align(1));
3096 }
3097
3098 return Builder.CreateExtractValue(Call, 0);
3099 }
3100 case X86::BI__builtin_ia32_aesenc128kl_u8:
3101 case X86::BI__builtin_ia32_aesdec128kl_u8:
3102 case X86::BI__builtin_ia32_aesenc256kl_u8:
3103 case X86::BI__builtin_ia32_aesdec256kl_u8: {
3104 Intrinsic::ID IID;
3105 StringRef BlockName;
3106 switch (BuiltinID) {
3107 default:
3108 llvm_unreachable("Unexpected builtin");
3109 case X86::BI__builtin_ia32_aesenc128kl_u8:
3110 IID = Intrinsic::x86_aesenc128kl;
3111 BlockName = "aesenc128kl";
3112 break;
3113 case X86::BI__builtin_ia32_aesdec128kl_u8:
3114 IID = Intrinsic::x86_aesdec128kl;
3115 BlockName = "aesdec128kl";
3116 break;
3117 case X86::BI__builtin_ia32_aesenc256kl_u8:
3118 IID = Intrinsic::x86_aesenc256kl;
3119 BlockName = "aesenc256kl";
3120 break;
3121 case X86::BI__builtin_ia32_aesdec256kl_u8:
3122 IID = Intrinsic::x86_aesdec256kl;
3123 BlockName = "aesdec256kl";
3124 break;
3125 }
3126
3127 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[1], Ops[2]});
3128
3129 BasicBlock *NoError =
3130 createBasicBlock(BlockName + "_no_error", this->CurFn);
3131 BasicBlock *Error = createBasicBlock(BlockName + "_error", this->CurFn);
3132 BasicBlock *End = createBasicBlock(BlockName + "_end", this->CurFn);
3133
3134 Value *Ret = Builder.CreateExtractValue(Call, 0);
3135 Value *Succ = Builder.CreateTrunc(Ret, Builder.getInt1Ty());
3136 Value *Out = Builder.CreateExtractValue(Call, 1);
3137 Builder.CreateCondBr(Succ, NoError, Error);
3138
3139 Builder.SetInsertPoint(NoError);
3140 Builder.CreateDefaultAlignedStore(Out, Ops[0]);
3141 Builder.CreateBr(End);
3142
3143 Builder.SetInsertPoint(Error);
3144 Constant *Zero = llvm::Constant::getNullValue(Out->getType());
3145 Builder.CreateDefaultAlignedStore(Zero, Ops[0]);
3146 Builder.CreateBr(End);
3147
3148 Builder.SetInsertPoint(End);
3149 return Builder.CreateExtractValue(Call, 0);
3150 }
3151 case X86::BI__builtin_ia32_aesencwide128kl_u8:
3152 case X86::BI__builtin_ia32_aesdecwide128kl_u8:
3153 case X86::BI__builtin_ia32_aesencwide256kl_u8:
3154 case X86::BI__builtin_ia32_aesdecwide256kl_u8: {
3155 Intrinsic::ID IID;
3156 StringRef BlockName;
3157 switch (BuiltinID) {
3158 case X86::BI__builtin_ia32_aesencwide128kl_u8:
3159 IID = Intrinsic::x86_aesencwide128kl;
3160 BlockName = "aesencwide128kl";
3161 break;
3162 case X86::BI__builtin_ia32_aesdecwide128kl_u8:
3163 IID = Intrinsic::x86_aesdecwide128kl;
3164 BlockName = "aesdecwide128kl";
3165 break;
3166 case X86::BI__builtin_ia32_aesencwide256kl_u8:
3167 IID = Intrinsic::x86_aesencwide256kl;
3168 BlockName = "aesencwide256kl";
3169 break;
3170 case X86::BI__builtin_ia32_aesdecwide256kl_u8:
3171 IID = Intrinsic::x86_aesdecwide256kl;
3172 BlockName = "aesdecwide256kl";
3173 break;
3174 }
3175
3176 llvm::Type *Ty = FixedVectorType::get(Builder.getInt64Ty(), 2);
3177 Value *InOps[9];
3178 InOps[0] = Ops[2];
3179 for (int i = 0; i != 8; ++i) {
3180 Value *Ptr = Builder.CreateConstGEP1_32(Ty, Ops[1], i);
3181 InOps[i + 1] = Builder.CreateAlignedLoad(Ty, Ptr, Align(16));
3182 }
3183
3184 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), InOps);
3185
3186 BasicBlock *NoError =
3187 createBasicBlock(BlockName + "_no_error", this->CurFn);
3188 BasicBlock *Error = createBasicBlock(BlockName + "_error", this->CurFn);
3189 BasicBlock *End = createBasicBlock(BlockName + "_end", this->CurFn);
3190
3191 Value *Ret = Builder.CreateExtractValue(Call, 0);
3192 Value *Succ = Builder.CreateTrunc(Ret, Builder.getInt1Ty());
3193 Builder.CreateCondBr(Succ, NoError, Error);
3194
3195 Builder.SetInsertPoint(NoError);
3196 for (int i = 0; i != 8; ++i) {
3197 Value *Extract = Builder.CreateExtractValue(Call, i + 1);
3198 Value *Ptr = Builder.CreateConstGEP1_32(Ty, Ops[0], i);
3199 Builder.CreateAlignedStore(Extract, Ptr, Align(16));
3200 }
3201 Builder.CreateBr(End);
3202
3203 Builder.SetInsertPoint(Error);
3204 for (int i = 0; i != 8; ++i) {
3205 Constant *Zero = llvm::Constant::getNullValue(Ty);
3206 Value *Ptr = Builder.CreateConstGEP1_32(Ty, Ops[0], i);
3207 Builder.CreateAlignedStore(Zero, Ptr, Align(16));
3208 }
3209 Builder.CreateBr(End);
3210
3211 Builder.SetInsertPoint(End);
3212 return Builder.CreateExtractValue(Call, 0);
3213 }
3214 case X86::BI__builtin_ia32_vfcmaddcph512_mask:
3215 IsConjFMA = true;
3216 [[fallthrough]];
3217 case X86::BI__builtin_ia32_vfmaddcph512_mask: {
3218 Intrinsic::ID IID = IsConjFMA
3219 ? Intrinsic::x86_avx512fp16_mask_vfcmadd_cph_512
3220 : Intrinsic::x86_avx512fp16_mask_vfmadd_cph_512;
3221 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
3222 return EmitX86Select(*this, Ops[3], Call, Ops[0]);
3223 }
3224 case X86::BI__builtin_ia32_vfcmaddcsh_round_mask:
3225 IsConjFMA = true;
3226 [[fallthrough]];
3227 case X86::BI__builtin_ia32_vfmaddcsh_round_mask: {
3228 Intrinsic::ID IID = IsConjFMA ? Intrinsic::x86_avx512fp16_mask_vfcmadd_csh
3229 : Intrinsic::x86_avx512fp16_mask_vfmadd_csh;
3230 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
3231 Value *And = Builder.CreateAnd(Ops[3], llvm::ConstantInt::get(Int8Ty, 1));
3232 return EmitX86Select(*this, And, Call, Ops[0]);
3233 }
3234 case X86::BI__builtin_ia32_vfcmaddcsh_round_mask3:
3235 IsConjFMA = true;
3236 [[fallthrough]];
3237 case X86::BI__builtin_ia32_vfmaddcsh_round_mask3: {
3238 Intrinsic::ID IID = IsConjFMA ? Intrinsic::x86_avx512fp16_mask_vfcmadd_csh
3239 : Intrinsic::x86_avx512fp16_mask_vfmadd_csh;
3240 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
3241 static constexpr int Mask[] = {0, 5, 6, 7};
3242 return Builder.CreateShuffleVector(Call, Ops[2], Mask);
3243 }
3244 case X86::BI__builtin_ia32_prefetchi:
3245 return Builder.CreateCall(
3246 CGM.getIntrinsic(Intrinsic::prefetch, Ops[0]->getType()),
3247 {Ops[0], llvm::ConstantInt::get(Int32Ty, 0), Ops[1],
3248 llvm::ConstantInt::get(Int32Ty, 0)});
3249 }
3250}
#define X86_CPU_SUBTYPE(ENUM, STR)
#define X86_CPU_SUBTYPE_ALIAS(ENUM, ALIAS)
#define X86_VENDOR(ENUM, STRING)
#define X86_CPU_TYPE_ALIAS(ENUM, ALIAS)
#define X86_CPU_TYPE(ENUM, STR)
static mlir::Value getMaskVecValue(CIRGenBuilderTy &builder, mlir::Location loc, mlir::Value mask, unsigned numElems)
#define INTRINSIC_X86_XSAVE_ID(NAME)
static Value * EmitX86CompressExpand(CodeGenFunction &CGF, ArrayRef< Value * > Ops, bool IsCompress)
Definition X86.cpp:168
static Value * EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC, bool Signed, ArrayRef< Value * > Ops)
Definition X86.cpp:315
static Value * EmitScalarFMAExpr(CodeGenFunction &CGF, const CallExpr *E, MutableArrayRef< Value * > Ops, Value *Upper, bool ZeroMask=false, unsigned PTIdx=0, bool NegAcc=false)
Definition X86.cpp:501
static Value * EmitX86ExpandLoad(CodeGenFunction &CGF, ArrayRef< Value * > Ops)
Definition X86.cpp:156
static Value * EmitX86MaskedLoad(CodeGenFunction &CGF, ArrayRef< Value * > Ops, Align Alignment)
Definition X86.cpp:145
static std::optional< CodeGenFunction::MSVCIntrin > translateX86ToMsvcIntrin(unsigned BuiltinID)
Definition X86.cpp:24
static Value * EmitX86MaskedStore(CodeGenFunction &CGF, ArrayRef< Value * > Ops, Align Alignment)
Definition X86.cpp:134
static Value * EmitX86Muldq(CodeGenFunction &CGF, bool IsSigned, ArrayRef< Value * > Ops)
Definition X86.cpp:559
static Value * EmitX86CvtF16ToFloatExpr(CodeGenFunction &CGF, ArrayRef< Value * > Ops, llvm::Type *DstTy)
Definition X86.cpp:631
static Value * EmitX86SExtMask(CodeGenFunction &CGF, Value *Op, llvm::Type *DstTy)
Definition X86.cpp:616
static Value * EmitX86FunnelShift(CodeGenFunction &CGF, Value *Op0, Value *Op1, Value *Amt, bool IsRight)
Definition X86.cpp:206
static Value * EmitX86MaskLogic(CodeGenFunction &CGF, Instruction::BinaryOps Opc, ArrayRef< Value * > Ops, bool InvertLHS=false)
Definition X86.cpp:192
static Value * EmitX86Select(CodeGenFunction &CGF, Value *Mask, Value *Op0, Value *Op1)
Definition X86.cpp:264
static Value * EmitX86FMAExpr(CodeGenFunction &CGF, const CallExpr *E, ArrayRef< Value * > Ops, unsigned BuiltinID, bool IsAddSub)
Definition X86.cpp:376
static Value * getMaskVecValue(CodeGenFunction &CGF, Value *Mask, unsigned NumElts)
Definition X86.cpp:58
static Value * EmitX86MaskedCompareResult(CodeGenFunction &CGF, Value *Cmp, unsigned NumElts, Value *MaskIn)
Definition X86.cpp:292
static Value * EmitX86CompressStore(CodeGenFunction &CGF, ArrayRef< Value * > Ops)
Definition X86.cpp:181
static Value * EmitX86vpcom(CodeGenFunction &CGF, ArrayRef< Value * > Ops, bool IsSigned)
Definition X86.cpp:224
static Value * EmitX86ConvertToMask(CodeGenFunction &CGF, Value *In)
Definition X86.cpp:350
static Value * EmitX86Ternlog(CodeGenFunction &CGF, bool ZeroMask, ArrayRef< Value * > Ops)
Definition X86.cpp:588
static Value * emitX86RoundImmediate(CodeGenFunction &CGF, Value *X, unsigned RoundingControl)
Emit rounding for the value X according to the rounding RoundingControl based on bits 0 and 1.
Definition X86.cpp:80
static Value * EmitX86ConvertIntToFp(CodeGenFunction &CGF, const CallExpr *E, ArrayRef< Value * > Ops, bool IsSigned)
Definition X86.cpp:355
static Value * EmitX86ScalarSelect(CodeGenFunction &CGF, Value *Mask, Value *Op0, Value *Op1)
Definition X86.cpp:278
TokenType getType() const
Returns the token's type, e.g.
#define ALIAS(NAME, TOK, FLAGS)
#define X(type, name)
Definition Value.h:97
#define ENUM(NAME, LIT)
static StringRef getTriple(const Command &Job)
Enumerates target-specific builtins in their own namespaces within namespace clang.
QualType GetBuiltinType(unsigned ID, GetBuiltinTypeError &Error, unsigned *IntegerConstantArgs=nullptr) const
Return the type for the specified builtin.
@ GE_None
No error.
CallExpr - Represents a function call (C99 6.5.2.2, C++ [expr.call]).
Definition Expr.h:2949
Expr * getArg(unsigned Arg)
getArg - Return the specified argument.
Definition Expr.h:3153
unsigned getNumArgs() const
getNumArgs - Return the number of actual arguments to this call.
Definition Expr.h:3140
static CharUnits fromQuantity(QuantityType Quantity)
fromQuantity - Construct a CharUnits quantity from a raw integer type.
Definition CharUnits.h:63
Like RawAddress, an abstract representation of an aligned address, but the pointer contained in this ...
Definition Address.h:128
llvm::PointerType * getType() const
Return the type of the pointer value.
Definition Address.h:204
CodeGenFunction - This class organizes the per-function state that is used while generating LLVM code...
llvm::Value * EmitScalarOrConstFoldImmArg(unsigned ICEArguments, unsigned Idx, const CallExpr *E)
llvm::Type * ConvertType(QualType T)
llvm::BasicBlock * createBasicBlock(const Twine &name="", llvm::Function *parent=nullptr, llvm::BasicBlock *before=nullptr)
createBasicBlock - Create an LLVM basic block.
@ Default
! No language constraints on evaluation order.
const TargetInfo & getTarget() const
RawAddress CreateMemTempWithoutCast(QualType T, const Twine &Name="tmp")
CreateMemTemp - Create a temporary memory object of the given type, with appropriate alignmen without...
Definition CGExpr.cpp:232
llvm::Value * EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID, const CallExpr *E)
llvm::Value * EmitX86BuiltinExpr(unsigned BuiltinID, const CallExpr *E)
Definition X86.cpp:791
Address EmitPointerWithAlignment(const Expr *Addr, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitPointerWithAlignment - Given an expression with a pointer type, emit the value and compute our be...
Definition CGExpr.cpp:1599
llvm::Value * EmitScalarExpr(const Expr *E, bool IgnoreResultAssign=false)
EmitScalarExpr - Emit the computation of the specified expression of LLVM scalar type,...
llvm::CallInst * EmitTrapCall(llvm::Intrinsic::ID IntrID)
Emit a call to trap or debugtrap and attach function attribute "trap-func-name" if specified.
Definition CGExpr.cpp:4606
llvm::LLVMContext & getLLVMContext()
llvm::LLVMContext & getLLVMContext()
llvm::Function * getIntrinsic(unsigned IID, ArrayRef< llvm::Type * > Tys={})
An abstract representation of an aligned address.
Definition Address.h:42
llvm::Value * getPointer() const
Definition Address.h:66
Expr * IgnoreParenCasts() LLVM_READONLY
Skip past any parentheses and casts which might surround this expression until reaching a fixed point...
Definition Expr.cpp:3104
QualType getType() const
Definition Expr.h:144
QualType getType() const
Definition Value.cpp:238
The JSON file list parser is used to communicate input to InstallAPI.
@ Asm
Assembly: we accept this only so that we can preprocess it.
@ Result
The result type of a method or function.
Definition TypeBase.h:905
U cast(CodeGen::Address addr)
Definition Address.h:327
Diagnostic wrappers for TextAPI types for error reporting.
Definition Dominators.h:30
__packed_splat4 __packed_splat2 __packed_splat8 __packed_splat4 __packed_splat2 __packed_splat4 __packed_splat2 __packed_splat8 __packed_splat4 uint32_t
llvm::IntegerType * Int8Ty
i8, i16, i32, and i64